diff --git a/gdb/ChangeLog b/gdb/ChangeLog index bd00e826d3..e78bfcb879 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,70 @@ +2018-03-27 Simon Marchi + + * Makefile.in (COMMON_SFILES): Add dwarf-index-common.c and + dwarf-index-write.c + (HFILES_NO_SRCDIR): Add dwarf-index-common.h and dwarf2read.h. + * dwarf-index-common.c: New file. + * dwarf-index-common.h: New file. + * dwarf-index-write.c: New file. + * dwarf2read.c: Include dwarf2read.h and dwarf-index-common.h. + (struct dwarf2_section_info): Move from here. + (dwarf2_section_info_def): Likewise. + (DEF_VEC_O (dwarf2_section_info_def)): Likewise. + (offset_type): Likewise. + (DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE): Likewise. + (DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE): Likewise. + (DW2_GDB_INDEX_CU_SET_VALUE): Likewise. + (byte_swap): Likewise. + (MAYBE_SWAP): Likewise. + (dwarf2_per_cu_ptr): Likewise. + (DEF_VEC_P (dwarf2_per_cu_ptr)): Likewise. + (struct tu_stats): Likewise. + (struct dwarf2_per_objfile): Likewise. + (struct dwarf2_per_cu_data): Likewise. + (struct signatured_type): Likewise. + (sig_type_ptr): Likewise. + (DEF_VEC_P (sig_type_ptr)): Likewise. + (INDEX4_SUFFIX): Likewise. + (INDEX5_SUFFIX): Likewise. + (DEBUG_STR_SUFFIX): Likewise. + (dwarf2_read_section): Make non-static. + (mapped_index_string_hash): Move from here. + (dwarf5_djb_hash): Likewise. + (file_write): Likewise. + (class data_buf): Likewise. + (struct symtab_index_entry): Likewise. + (struct mapped_symtab): Likewise. + (find_slot): Likewise. + (hash_expand): Likewise. + (add_index_entry): Likewise. + (uniquify_cu_indices): Likewise. + (class c_str_view): Likewise. + (class c_str_view_hasher): Likewise. + (class vector_hasher): Likewise. + (write_hash_table): Likewise. + (psym_index_map): Likewise. + (struct addrmap_index_data): Likewise. + (add_address_entry): Likewise. + (add_address_entry_worker): Likewise. + (write_address_map): Likewise. + (symbol_kind): Likewise. + (write_psymbols): Likewise. + (struct signatured_type_index_data): Likewise. + (write_one_signatured_type): Likewise. + (recursively_count_psymbols): Likewise. + (recursively_write_psymbols): Likewise. + (class debug_names): Likewise. + (check_dwarf64_offsets): Likewise. + (psyms_seen_size): Likewise. + (write_gdbindex): Likewise. + (write_debug_names): Likewise. + (assert_file_size): Likewise. + (write_psymtabs_to_index): Likewise. + (save_gdb_index_command): Likewise. + (_initialize_dwarf2_read): Don't register the "save gdb-index" + command. + * dwarf2read.h: New file. + 2018-03-27 Joel Brobecker PR gdb/22670 diff --git a/gdb/Makefile.in b/gdb/Makefile.in index b1ba00583f..0a07cabb43 100644 --- a/gdb/Makefile.in +++ b/gdb/Makefile.in @@ -992,6 +992,8 @@ COMMON_SFILES = \ disasm.c \ disasm-selftests.c \ dummy-frame.c \ + dwarf-index-common.c \ + dwarf-index-write.c \ dwarf2-frame.c \ dwarf2-frame-tailcall.c \ dwarf2expr.c \ @@ -1213,10 +1215,12 @@ HFILES_NO_SRCDIR = \ dictionary.h \ disasm.h \ dummy-frame.h \ + dwarf-index-common.h \ dwarf2-frame.h \ dwarf2-frame-tailcall.h \ dwarf2expr.h \ dwarf2loc.h \ + dwarf2read.h \ event-loop.h \ event-top.h \ exceptions.h \ diff --git a/gdb/dwarf-index-common.c b/gdb/dwarf-index-common.c new file mode 100644 index 0000000000..991f640cd4 --- /dev/null +++ b/gdb/dwarf-index-common.c @@ -0,0 +1,56 @@ +/* Things needed for both reading and writing DWARF indices. + + Copyright (C) 1994-2018 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "defs.h" +#include "dwarf-index-common.h" + +/* See dwarf-index-common.h. */ + +hashval_t +mapped_index_string_hash (int index_version, const void *p) +{ + const unsigned char *str = (const unsigned char *) p; + hashval_t r = 0; + unsigned char c; + + while ((c = *str++) != 0) + { + if (index_version >= 5) + c = tolower (c); + r = r * 67 + c - 113; + } + + return r; +} + +/* See dwarf-index-common.h. */ + +uint32_t +dwarf5_djb_hash (const char *str_) +{ + const unsigned char *str = (const unsigned char *) str_; + + /* Note: tolower here ignores UTF-8, which isn't fully compliant. + See http://dwarfstd.org/ShowIssue.php?issue=161027.1. */ + + uint32_t hash = 5381; + while (int c = *str++) + hash = hash * 33 + tolower (c); + return hash; +} diff --git a/gdb/dwarf-index-common.h b/gdb/dwarf-index-common.h new file mode 100644 index 0000000000..32774c7bf6 --- /dev/null +++ b/gdb/dwarf-index-common.h @@ -0,0 +1,63 @@ +/* Things needed for both reading and writing DWARF indices. + + Copyright (C) 1994-2018 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef DWARF_INDEX_COMMON_H +#define DWARF_INDEX_COMMON_H + +/* All offsets in the index are of this type. It must be + architecture-independent. */ +typedef uint32_t offset_type; + +#if WORDS_BIGENDIAN + +/* Convert VALUE between big- and little-endian. */ + +static inline offset_type +byte_swap (offset_type value) +{ + offset_type result; + + result = (value & 0xff) << 24; + result |= (value & 0xff00) << 8; + result |= (value & 0xff0000) >> 8; + result |= (value & 0xff000000) >> 24; + return result; +} + +#define MAYBE_SWAP(V) byte_swap (V) + +#else +#define MAYBE_SWAP(V) static_cast (V) +#endif /* WORDS_BIGENDIAN */ + +/* The hash function for strings in the mapped index. This is the same as + SYMBOL_HASH_NEXT, but we keep a separate copy to maintain control over the + implementation. This is necessary because the hash function is tied to the + format of the mapped index file. The hash values do not have to match with + SYMBOL_HASH_NEXT. + + Use INT_MAX for INDEX_VERSION if you generate the current index format. */ + +hashval_t mapped_index_string_hash (int index_version, const void *p); + +/* Symbol name hashing function as specified by DWARF-5. */ + +uint32_t dwarf5_djb_hash (const char *str_); + +#endif /* DWARF_INDEX_COMMON_H */ diff --git a/gdb/dwarf-index-write.c b/gdb/dwarf-index-write.c new file mode 100644 index 0000000000..3059e0b776 --- /dev/null +++ b/gdb/dwarf-index-write.c @@ -0,0 +1,1684 @@ +/* DWARF index writing support for GDB. + + Copyright (C) 1994-2018 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "defs.h" + +#include "addrmap.h" +#include "cli/cli-decode.h" +#include "common/byte-vector.h" +#include "common/filestuff.h" +#include "common/gdb_unlinker.h" +#include "complaints.h" +#include "dwarf-index-common.h" +#include "dwarf2.h" +#include "dwarf2read.h" +#include "gdb/gdb-index.h" +#include "gdbcmd.h" +#include "objfiles.h" +#include "psympriv.h" + +#include +#include +#include +#include + +/* The suffix for an index file. */ +#define INDEX4_SUFFIX ".gdb-index" +#define INDEX5_SUFFIX ".debug_names" +#define DEBUG_STR_SUFFIX ".debug_str" + +/* Ensure only legit values are used. */ +#define DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE(cu_index, value) \ + do { \ + gdb_assert ((unsigned int) (value) <= 1); \ + GDB_INDEX_SYMBOL_STATIC_SET_VALUE((cu_index), (value)); \ + } while (0) + +/* Ensure only legit values are used. */ +#define DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE(cu_index, value) \ + do { \ + gdb_assert ((value) >= GDB_INDEX_SYMBOL_KIND_TYPE \ + && (value) <= GDB_INDEX_SYMBOL_KIND_OTHER); \ + GDB_INDEX_SYMBOL_KIND_SET_VALUE((cu_index), (value)); \ + } while (0) + +/* Ensure we don't use more than the alloted nuber of bits for the CU. */ +#define DW2_GDB_INDEX_CU_SET_VALUE(cu_index, value) \ + do { \ + gdb_assert (((value) & ~GDB_INDEX_CU_MASK) == 0); \ + GDB_INDEX_CU_SET_VALUE((cu_index), (value)); \ + } while (0) + +/* The "save gdb-index" command. */ + +/* Write SIZE bytes from the buffer pointed to by DATA to FILE, with + error checking. */ + +static void +file_write (FILE *file, const void *data, size_t size) +{ + if (fwrite (data, 1, size, file) != size) + error (_("couldn't data write to file")); +} + +/* Write the contents of VEC to FILE, with error checking. */ + +template +static void +file_write (FILE *file, const std::vector &vec) +{ + file_write (file, vec.data (), vec.size () * sizeof (vec[0])); +} + +/* In-memory buffer to prepare data to be written later to a file. */ +class data_buf +{ +public: + /* Copy DATA to the end of the buffer. */ + template + void append_data (const T &data) + { + std::copy (reinterpret_cast (&data), + reinterpret_cast (&data + 1), + grow (sizeof (data))); + } + + /* Copy CSTR (a zero-terminated string) to the end of buffer. The + terminating zero is appended too. */ + void append_cstr0 (const char *cstr) + { + const size_t size = strlen (cstr) + 1; + std::copy (cstr, cstr + size, grow (size)); + } + + /* Store INPUT as ULEB128 to the end of buffer. */ + void append_unsigned_leb128 (ULONGEST input) + { + for (;;) + { + gdb_byte output = input & 0x7f; + input >>= 7; + if (input) + output |= 0x80; + append_data (output); + if (input == 0) + break; + } + } + + /* Accept a host-format integer in VAL and append it to the buffer + as a target-format integer which is LEN bytes long. */ + void append_uint (size_t len, bfd_endian byte_order, ULONGEST val) + { + ::store_unsigned_integer (grow (len), len, byte_order, val); + } + + /* Return the size of the buffer. */ + size_t size () const + { + return m_vec.size (); + } + + /* Return true iff the buffer is empty. */ + bool empty () const + { + return m_vec.empty (); + } + + /* Write the buffer to FILE. */ + void file_write (FILE *file) const + { + ::file_write (file, m_vec); + } + +private: + /* Grow SIZE bytes at the end of the buffer. Returns a pointer to + the start of the new block. */ + gdb_byte *grow (size_t size) + { + m_vec.resize (m_vec.size () + size); + return &*m_vec.end () - size; + } + + gdb::byte_vector m_vec; +}; + +/* An entry in the symbol table. */ +struct symtab_index_entry +{ + /* The name of the symbol. */ + const char *name; + /* The offset of the name in the constant pool. */ + offset_type index_offset; + /* A sorted vector of the indices of all the CUs that hold an object + of this name. */ + std::vector cu_indices; +}; + +/* The symbol table. This is a power-of-2-sized hash table. */ +struct mapped_symtab +{ + mapped_symtab () + { + data.resize (1024); + } + + offset_type n_elements = 0; + std::vector data; +}; + +/* Find a slot in SYMTAB for the symbol NAME. Returns a reference to + the slot. + + Function is used only during write_hash_table so no index format backward + compatibility is needed. */ + +static symtab_index_entry & +find_slot (struct mapped_symtab *symtab, const char *name) +{ + offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name); + + index = hash & (symtab->data.size () - 1); + step = ((hash * 17) & (symtab->data.size () - 1)) | 1; + + for (;;) + { + if (symtab->data[index].name == NULL + || strcmp (name, symtab->data[index].name) == 0) + return symtab->data[index]; + index = (index + step) & (symtab->data.size () - 1); + } +} + +/* Expand SYMTAB's hash table. */ + +static void +hash_expand (struct mapped_symtab *symtab) +{ + auto old_entries = std::move (symtab->data); + + symtab->data.clear (); + symtab->data.resize (old_entries.size () * 2); + + for (auto &it : old_entries) + if (it.name != NULL) + { + auto &ref = find_slot (symtab, it.name); + ref = std::move (it); + } +} + +/* Add an entry to SYMTAB. NAME is the name of the symbol. + CU_INDEX is the index of the CU in which the symbol appears. + IS_STATIC is one if the symbol is static, otherwise zero (global). */ + +static void +add_index_entry (struct mapped_symtab *symtab, const char *name, + int is_static, gdb_index_symbol_kind kind, + offset_type cu_index) +{ + offset_type cu_index_and_attrs; + + ++symtab->n_elements; + if (4 * symtab->n_elements / 3 >= symtab->data.size ()) + hash_expand (symtab); + + symtab_index_entry &slot = find_slot (symtab, name); + if (slot.name == NULL) + { + slot.name = name; + /* index_offset is set later. */ + } + + cu_index_and_attrs = 0; + DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index); + DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static); + DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind); + + /* We don't want to record an index value twice as we want to avoid the + duplication. + We process all global symbols and then all static symbols + (which would allow us to avoid the duplication by only having to check + the last entry pushed), but a symbol could have multiple kinds in one CU. + To keep things simple we don't worry about the duplication here and + sort and uniqufy the list after we've processed all symbols. */ + slot.cu_indices.push_back (cu_index_and_attrs); +} + +/* Sort and remove duplicates of all symbols' cu_indices lists. */ + +static void +uniquify_cu_indices (struct mapped_symtab *symtab) +{ + for (auto &entry : symtab->data) + { + if (entry.name != NULL && !entry.cu_indices.empty ()) + { + auto &cu_indices = entry.cu_indices; + std::sort (cu_indices.begin (), cu_indices.end ()); + auto from = std::unique (cu_indices.begin (), cu_indices.end ()); + cu_indices.erase (from, cu_indices.end ()); + } + } +} + +/* A form of 'const char *' suitable for container keys. Only the + pointer is stored. The strings themselves are compared, not the + pointers. */ +class c_str_view +{ +public: + c_str_view (const char *cstr) + : m_cstr (cstr) + {} + + bool operator== (const c_str_view &other) const + { + return strcmp (m_cstr, other.m_cstr) == 0; + } + + /* Return the underlying C string. Note, the returned string is + only a reference with lifetime of this object. */ + const char *c_str () const + { + return m_cstr; + } + +private: + friend class c_str_view_hasher; + const char *const m_cstr; +}; + +/* A std::unordered_map::hasher for c_str_view that uses the right + hash function for strings in a mapped index. */ +class c_str_view_hasher +{ +public: + size_t operator () (const c_str_view &x) const + { + return mapped_index_string_hash (INT_MAX, x.m_cstr); + } +}; + +/* A std::unordered_map::hasher for std::vector<>. */ +template +class vector_hasher +{ +public: + size_t operator () (const std::vector &key) const + { + return iterative_hash (key.data (), + sizeof (key.front ()) * key.size (), 0); + } +}; + +/* Write the mapped hash table SYMTAB to the data buffer OUTPUT, with + constant pool entries going into the data buffer CPOOL. */ + +static void +write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool) +{ + { + /* Elements are sorted vectors of the indices of all the CUs that + hold an object of this name. */ + std::unordered_map, offset_type, + vector_hasher> + symbol_hash_table; + + /* We add all the index vectors to the constant pool first, to + ensure alignment is ok. */ + for (symtab_index_entry &entry : symtab->data) + { + if (entry.name == NULL) + continue; + gdb_assert (entry.index_offset == 0); + + /* Finding before inserting is faster than always trying to + insert, because inserting always allocates a node, does the + lookup, and then destroys the new node if another node + already had the same key. C++17 try_emplace will avoid + this. */ + const auto found + = symbol_hash_table.find (entry.cu_indices); + if (found != symbol_hash_table.end ()) + { + entry.index_offset = found->second; + continue; + } + + symbol_hash_table.emplace (entry.cu_indices, cpool.size ()); + entry.index_offset = cpool.size (); + cpool.append_data (MAYBE_SWAP (entry.cu_indices.size ())); + for (const auto index : entry.cu_indices) + cpool.append_data (MAYBE_SWAP (index)); + } + } + + /* Now write out the hash table. */ + std::unordered_map str_table; + for (const auto &entry : symtab->data) + { + offset_type str_off, vec_off; + + if (entry.name != NULL) + { + const auto insertpair = str_table.emplace (entry.name, cpool.size ()); + if (insertpair.second) + cpool.append_cstr0 (entry.name); + str_off = insertpair.first->second; + vec_off = entry.index_offset; + } + else + { + /* While 0 is a valid constant pool index, it is not valid + to have 0 for both offsets. */ + str_off = 0; + vec_off = 0; + } + + output.append_data (MAYBE_SWAP (str_off)); + output.append_data (MAYBE_SWAP (vec_off)); + } +} + +typedef std::unordered_map psym_index_map; + +/* Helper struct for building the address table. */ +struct addrmap_index_data +{ + addrmap_index_data (data_buf &addr_vec_, psym_index_map &cu_index_htab_) + : addr_vec (addr_vec_), cu_index_htab (cu_index_htab_) + {} + + struct objfile *objfile; + data_buf &addr_vec; + psym_index_map &cu_index_htab; + + /* Non-zero if the previous_* fields are valid. + We can't write an entry until we see the next entry (since it is only then + that we know the end of the entry). */ + int previous_valid; + /* Index of the CU in the table of all CUs in the index file. */ + unsigned int previous_cu_index; + /* Start address of the CU. */ + CORE_ADDR previous_cu_start; +}; + +/* Write an address entry to ADDR_VEC. */ + +static void +add_address_entry (struct objfile *objfile, data_buf &addr_vec, + CORE_ADDR start, CORE_ADDR end, unsigned int cu_index) +{ + CORE_ADDR baseaddr; + + baseaddr = ANOFFSET (objfile->section_offsets, SECT_OFF_TEXT (objfile)); + + addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start - baseaddr); + addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end - baseaddr); + addr_vec.append_data (MAYBE_SWAP (cu_index)); +} + +/* Worker function for traversing an addrmap to build the address table. */ + +static int +add_address_entry_worker (void *datap, CORE_ADDR start_addr, void *obj) +{ + struct addrmap_index_data *data = (struct addrmap_index_data *) datap; + struct partial_symtab *pst = (struct partial_symtab *) obj; + + if (data->previous_valid) + add_address_entry (data->objfile, data->addr_vec, + data->previous_cu_start, start_addr, + data->previous_cu_index); + + data->previous_cu_start = start_addr; + if (pst != NULL) + { + const auto it = data->cu_index_htab.find (pst); + gdb_assert (it != data->cu_index_htab.cend ()); + data->previous_cu_index = it->second; + data->previous_valid = 1; + } + else + data->previous_valid = 0; + + return 0; +} + +/* Write OBJFILE's address map to ADDR_VEC. + CU_INDEX_HTAB is used to map addrmap entries to their CU indices + in the index file. */ + +static void +write_address_map (struct objfile *objfile, data_buf &addr_vec, + psym_index_map &cu_index_htab) +{ + struct addrmap_index_data addrmap_index_data (addr_vec, cu_index_htab); + + /* When writing the address table, we have to cope with the fact that + the addrmap iterator only provides the start of a region; we have to + wait until the next invocation to get the start of the next region. */ + + addrmap_index_data.objfile = objfile; + addrmap_index_data.previous_valid = 0; + + addrmap_foreach (objfile->psymtabs_addrmap, add_address_entry_worker, + &addrmap_index_data); + + /* It's highly unlikely the last entry (end address = 0xff...ff) + is valid, but we should still handle it. + The end address is recorded as the start of the next region, but that + doesn't work here. To cope we pass 0xff...ff, this is a rare situation + anyway. */ + if (addrmap_index_data.previous_valid) + add_address_entry (objfile, addr_vec, + addrmap_index_data.previous_cu_start, (CORE_ADDR) -1, + addrmap_index_data.previous_cu_index); +} + +/* Return the symbol kind of PSYM. */ + +static gdb_index_symbol_kind +symbol_kind (struct partial_symbol *psym) +{ + domain_enum domain = PSYMBOL_DOMAIN (psym); + enum address_class aclass = PSYMBOL_CLASS (psym); + + switch (domain) + { + case VAR_DOMAIN: + switch (aclass) + { + case LOC_BLOCK: + return GDB_INDEX_SYMBOL_KIND_FUNCTION; + case LOC_TYPEDEF: + return GDB_INDEX_SYMBOL_KIND_TYPE; + case LOC_COMPUTED: + case LOC_CONST_BYTES: + case LOC_OPTIMIZED_OUT: + case LOC_STATIC: + return GDB_INDEX_SYMBOL_KIND_VARIABLE; + case LOC_CONST: + /* Note: It's currently impossible to recognize psyms as enum values + short of reading the type info. For now punt. */ + return GDB_INDEX_SYMBOL_KIND_VARIABLE; + default: + /* There are other LOC_FOO values that one might want to classify + as variables, but dwarf2read.c doesn't currently use them. */ + return GDB_INDEX_SYMBOL_KIND_OTHER; + } + case STRUCT_DOMAIN: + return GDB_INDEX_SYMBOL_KIND_TYPE; + default: + return GDB_INDEX_SYMBOL_KIND_OTHER; + } +} + +/* Add a list of partial symbols to SYMTAB. */ + +static void +write_psymbols (struct mapped_symtab *symtab, + std::unordered_set &psyms_seen, + struct partial_symbol **psymp, + int count, + offset_type cu_index, + int is_static) +{ + for (; count-- > 0; ++psymp) + { + struct partial_symbol *psym = *psymp; + + if (SYMBOL_LANGUAGE (psym) == language_ada) + error (_("Ada is not currently supported by the index")); + + /* Only add a given psymbol once. */ + if (psyms_seen.insert (psym).second) + { + gdb_index_symbol_kind kind = symbol_kind (psym); + + add_index_entry (symtab, SYMBOL_SEARCH_NAME (psym), + is_static, kind, cu_index); + } + } +} + +/* A helper struct used when iterating over debug_types. */ +struct signatured_type_index_data +{ + signatured_type_index_data (data_buf &types_list_, + std::unordered_set &psyms_seen_) + : types_list (types_list_), psyms_seen (psyms_seen_) + {} + + struct objfile *objfile; + struct mapped_symtab *symtab; + data_buf &types_list; + std::unordered_set &psyms_seen; + int cu_index; +}; + +/* A helper function that writes a single signatured_type to an + obstack. */ + +static int +write_one_signatured_type (void **slot, void *d) +{ + struct signatured_type_index_data *info + = (struct signatured_type_index_data *) d; + struct signatured_type *entry = (struct signatured_type *) *slot; + struct partial_symtab *psymtab = entry->per_cu.v.psymtab; + + write_psymbols (info->symtab, + info->psyms_seen, + &info->objfile->global_psymbols[psymtab->globals_offset], + psymtab->n_global_syms, info->cu_index, + 0); + write_psymbols (info->symtab, + info->psyms_seen, + &info->objfile->static_psymbols[psymtab->statics_offset], + psymtab->n_static_syms, info->cu_index, + 1); + + info->types_list.append_uint (8, BFD_ENDIAN_LITTLE, + to_underlying (entry->per_cu.sect_off)); + info->types_list.append_uint (8, BFD_ENDIAN_LITTLE, + to_underlying (entry->type_offset_in_tu)); + info->types_list.append_uint (8, BFD_ENDIAN_LITTLE, entry->signature); + + ++info->cu_index; + + return 1; +} + +/* Recurse into all "included" dependencies and count their symbols as + if they appeared in this psymtab. */ + +static void +recursively_count_psymbols (struct partial_symtab *psymtab, + size_t &psyms_seen) +{ + for (int i = 0; i < psymtab->number_of_dependencies; ++i) + if (psymtab->dependencies[i]->user != NULL) + recursively_count_psymbols (psymtab->dependencies[i], + psyms_seen); + + psyms_seen += psymtab->n_global_syms; + psyms_seen += psymtab->n_static_syms; +} + +/* Recurse into all "included" dependencies and write their symbols as + if they appeared in this psymtab. */ + +static void +recursively_write_psymbols (struct objfile *objfile, + struct partial_symtab *psymtab, + struct mapped_symtab *symtab, + std::unordered_set &psyms_seen, + offset_type cu_index) +{ + int i; + + for (i = 0; i < psymtab->number_of_dependencies; ++i) + if (psymtab->dependencies[i]->user != NULL) + recursively_write_psymbols (objfile, psymtab->dependencies[i], + symtab, psyms_seen, cu_index); + + write_psymbols (symtab, + psyms_seen, + &objfile->global_psymbols[psymtab->globals_offset], + psymtab->n_global_syms, cu_index, + 0); + write_psymbols (symtab, + psyms_seen, + &objfile->static_psymbols[psymtab->statics_offset], + psymtab->n_static_syms, cu_index, + 1); +} + +/* DWARF-5 .debug_names builder. */ +class debug_names +{ +public: + debug_names (struct dwarf2_per_objfile *dwarf2_per_objfile, bool is_dwarf64, + bfd_endian dwarf5_byte_order) + : m_dwarf5_byte_order (dwarf5_byte_order), + m_dwarf32 (dwarf5_byte_order), + m_dwarf64 (dwarf5_byte_order), + m_dwarf (is_dwarf64 + ? static_cast (m_dwarf64) + : static_cast (m_dwarf32)), + m_name_table_string_offs (m_dwarf.name_table_string_offs), + m_name_table_entry_offs (m_dwarf.name_table_entry_offs), + m_debugstrlookup (dwarf2_per_objfile) + {} + + int dwarf5_offset_size () const + { + const bool dwarf5_is_dwarf64 = &m_dwarf == &m_dwarf64; + return dwarf5_is_dwarf64 ? 8 : 4; + } + + /* Is this symbol from DW_TAG_compile_unit or DW_TAG_type_unit? */ + enum class unit_kind { cu, tu }; + + /* Insert one symbol. */ + void insert (const partial_symbol *psym, int cu_index, bool is_static, + unit_kind kind) + { + const int dwarf_tag = psymbol_tag (psym); + if (dwarf_tag == 0) + return; + const char *const name = SYMBOL_SEARCH_NAME (psym); + const auto insertpair + = m_name_to_value_set.emplace (c_str_view (name), + std::set ()); + std::set &value_set = insertpair.first->second; + value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static, kind)); + } + + /* Build all the tables. All symbols must be already inserted. + This function does not call file_write, caller has to do it + afterwards. */ + void build () + { + /* Verify the build method has not be called twice. */ + gdb_assert (m_abbrev_table.empty ()); + const size_t name_count = m_name_to_value_set.size (); + m_bucket_table.resize + (std::pow (2, std::ceil (std::log2 (name_count * 4 / 3)))); + m_hash_table.reserve (name_count); + m_name_table_string_offs.reserve (name_count); + m_name_table_entry_offs.reserve (name_count); + + /* Map each hash of symbol to its name and value. */ + struct hash_it_pair + { + uint32_t hash; + decltype (m_name_to_value_set)::const_iterator it; + }; + std::vector> bucket_hash; + bucket_hash.resize (m_bucket_table.size ()); + for (decltype (m_name_to_value_set)::const_iterator it + = m_name_to_value_set.cbegin (); + it != m_name_to_value_set.cend (); + ++it) + { + const char *const name = it->first.c_str (); + const uint32_t hash = dwarf5_djb_hash (name); + hash_it_pair hashitpair; + hashitpair.hash = hash; + hashitpair.it = it; + auto &slot = bucket_hash[hash % bucket_hash.size()]; + slot.push_front (std::move (hashitpair)); + } + for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix) + { + const std::forward_list &hashitlist + = bucket_hash[bucket_ix]; + if (hashitlist.empty ()) + continue; + uint32_t &bucket_slot = m_bucket_table[bucket_ix]; + /* The hashes array is indexed starting at 1. */ + store_unsigned_integer (reinterpret_cast (&bucket_slot), + sizeof (bucket_slot), m_dwarf5_byte_order, + m_hash_table.size () + 1); + for (const hash_it_pair &hashitpair : hashitlist) + { + m_hash_table.push_back (0); + store_unsigned_integer (reinterpret_cast + (&m_hash_table.back ()), + sizeof (m_hash_table.back ()), + m_dwarf5_byte_order, hashitpair.hash); + const c_str_view &name = hashitpair.it->first; + const std::set &value_set = hashitpair.it->second; + m_name_table_string_offs.push_back_reorder + (m_debugstrlookup.lookup (name.c_str ())); + m_name_table_entry_offs.push_back_reorder (m_entry_pool.size ()); + gdb_assert (!value_set.empty ()); + for (const symbol_value &value : value_set) + { + int &idx = m_indexkey_to_idx[index_key (value.dwarf_tag, + value.is_static, + value.kind)]; + if (idx == 0) + { + idx = m_idx_next++; + m_abbrev_table.append_unsigned_leb128 (idx); + m_abbrev_table.append_unsigned_leb128 (value.dwarf_tag); + m_abbrev_table.append_unsigned_leb128 + (value.kind == unit_kind::cu ? DW_IDX_compile_unit + : DW_IDX_type_unit); + m_abbrev_table.append_unsigned_leb128 (DW_FORM_udata); + m_abbrev_table.append_unsigned_leb128 (value.is_static + ? DW_IDX_GNU_internal + : DW_IDX_GNU_external); + m_abbrev_table.append_unsigned_leb128 (DW_FORM_flag_present); + + /* Terminate attributes list. */ + m_abbrev_table.append_unsigned_leb128 (0); + m_abbrev_table.append_unsigned_leb128 (0); + } + + m_entry_pool.append_unsigned_leb128 (idx); + m_entry_pool.append_unsigned_leb128 (value.cu_index); + } + + /* Terminate the list of CUs. */ + m_entry_pool.append_unsigned_leb128 (0); + } + } + gdb_assert (m_hash_table.size () == name_count); + + /* Terminate tags list. */ + m_abbrev_table.append_unsigned_leb128 (0); + } + + /* Return .debug_names bucket count. This must be called only after + calling the build method. */ + uint32_t bucket_count () const + { + /* Verify the build method has been already called. */ + gdb_assert (!m_abbrev_table.empty ()); + const uint32_t retval = m_bucket_table.size (); + + /* Check for overflow. */ + gdb_assert (retval == m_bucket_table.size ()); + return retval; + } + + /* Return .debug_names names count. This must be called only after + calling the build method. */ + uint32_t name_count () const + { + /* Verify the build method has been already called. */ + gdb_assert (!m_abbrev_table.empty ()); + const uint32_t retval = m_hash_table.size (); + + /* Check for overflow. */ + gdb_assert (retval == m_hash_table.size ()); + return retval; + } + + /* Return number of bytes of .debug_names abbreviation table. This + must be called only after calling the build method. */ + uint32_t abbrev_table_bytes () const + { + gdb_assert (!m_abbrev_table.empty ()); + return m_abbrev_table.size (); + } + + /* Recurse into all "included" dependencies and store their symbols + as if they appeared in this psymtab. */ + void recursively_write_psymbols + (struct objfile *objfile, + struct partial_symtab *psymtab, + std::unordered_set &psyms_seen, + int cu_index) + { + for (int i = 0; i < psymtab->number_of_dependencies; ++i) + if (psymtab->dependencies[i]->user != NULL) + recursively_write_psymbols (objfile, psymtab->dependencies[i], + psyms_seen, cu_index); + + write_psymbols (psyms_seen, + &objfile->global_psymbols[psymtab->globals_offset], + psymtab->n_global_syms, cu_index, false, unit_kind::cu); + write_psymbols (psyms_seen, + &objfile->static_psymbols[psymtab->statics_offset], + psymtab->n_static_syms, cu_index, true, unit_kind::cu); + } + + /* Return number of bytes the .debug_names section will have. This + must be called only after calling the build method. */ + size_t bytes () const + { + /* Verify the build method has been already called. */ + gdb_assert (!m_abbrev_table.empty ()); + size_t expected_bytes = 0; + expected_bytes += m_bucket_table.size () * sizeof (m_bucket_table[0]); + expected_bytes += m_hash_table.size () * sizeof (m_hash_table[0]); + expected_bytes += m_name_table_string_offs.bytes (); + expected_bytes += m_name_table_entry_offs.bytes (); + expected_bytes += m_abbrev_table.size (); + expected_bytes += m_entry_pool.size (); + return expected_bytes; + } + + /* Write .debug_names to FILE_NAMES and .debug_str addition to + FILE_STR. This must be called only after calling the build + method. */ + void file_write (FILE *file_names, FILE *file_str) const + { + /* Verify the build method has been already called. */ + gdb_assert (!m_abbrev_table.empty ()); + ::file_write (file_names, m_bucket_table); + ::file_write (file_names, m_hash_table); + m_name_table_string_offs.file_write (file_names); + m_name_table_entry_offs.file_write (file_names); + m_abbrev_table.file_write (file_names); + m_entry_pool.file_write (file_names); + m_debugstrlookup.file_write (file_str); + } + + /* A helper user data for write_one_signatured_type. */ + class write_one_signatured_type_data + { + public: + write_one_signatured_type_data (debug_names &nametable_, + signatured_type_index_data &&info_) + : nametable (nametable_), info (std::move (info_)) + {} + debug_names &nametable; + struct signatured_type_index_data info; + }; + + /* A helper function to pass write_one_signatured_type to + htab_traverse_noresize. */ + static int + write_one_signatured_type (void **slot, void *d) + { + write_one_signatured_type_data *data = (write_one_signatured_type_data *) d; + struct signatured_type_index_data *info = &data->info; + struct signatured_type *entry = (struct signatured_type *) *slot; + + data->nametable.write_one_signatured_type (entry, info); + + return 1; + } + +private: + + /* Storage for symbol names mapping them to their .debug_str section + offsets. */ + class debug_str_lookup + { + public: + + /* Object costructor to be called for current DWARF2_PER_OBJFILE. + All .debug_str section strings are automatically stored. */ + debug_str_lookup (struct dwarf2_per_objfile *dwarf2_per_objfile) + : m_abfd (dwarf2_per_objfile->objfile->obfd), + m_dwarf2_per_objfile (dwarf2_per_objfile) + { + dwarf2_read_section (dwarf2_per_objfile->objfile, + &dwarf2_per_objfile->str); + if (dwarf2_per_objfile->str.buffer == NULL) + return; + for (const gdb_byte *data = dwarf2_per_objfile->str.buffer; + data < (dwarf2_per_objfile->str.buffer + + dwarf2_per_objfile->str.size);) + { + const char *const s = reinterpret_cast (data); + const auto insertpair + = m_str_table.emplace (c_str_view (s), + data - dwarf2_per_objfile->str.buffer); + if (!insertpair.second) + complaint (&symfile_complaints, + _("Duplicate string \"%s\" in " + ".debug_str section [in module %s]"), + s, bfd_get_filename (m_abfd)); + data += strlen (s) + 1; + } + } + + /* Return offset of symbol name S in the .debug_str section. Add + such symbol to the section's end if it does not exist there + yet. */ + size_t lookup (const char *s) + { + const auto it = m_str_table.find (c_str_view (s)); + if (it != m_str_table.end ()) + return it->second; + const size_t offset = (m_dwarf2_per_objfile->str.size + + m_str_add_buf.size ()); + m_str_table.emplace (c_str_view (s), offset); + m_str_add_buf.append_cstr0 (s); + return offset; + } + + /* Append the end of the .debug_str section to FILE. */ + void file_write (FILE *file) const + { + m_str_add_buf.file_write (file); + } + + private: + std::unordered_map m_str_table; + bfd *const m_abfd; + struct dwarf2_per_objfile *m_dwarf2_per_objfile; + + /* Data to add at the end of .debug_str for new needed symbol names. */ + data_buf m_str_add_buf; + }; + + /* Container to map used DWARF tags to their .debug_names abbreviation + tags. */ + class index_key + { + public: + index_key (int dwarf_tag_, bool is_static_, unit_kind kind_) + : dwarf_tag (dwarf_tag_), is_static (is_static_), kind (kind_) + { + } + + bool + operator== (const index_key &other) const + { + return (dwarf_tag == other.dwarf_tag && is_static == other.is_static + && kind == other.kind); + } + + const int dwarf_tag; + const bool is_static; + const unit_kind kind; + }; + + /* Provide std::unordered_map::hasher for index_key. */ + class index_key_hasher + { + public: + size_t + operator () (const index_key &key) const + { + return (std::hash() (key.dwarf_tag) << 1) | key.is_static; + } + }; + + /* Parameters of one symbol entry. */ + class symbol_value + { + public: + const int dwarf_tag, cu_index; + const bool is_static; + const unit_kind kind; + + symbol_value (int dwarf_tag_, int cu_index_, bool is_static_, + unit_kind kind_) + : dwarf_tag (dwarf_tag_), cu_index (cu_index_), is_static (is_static_), + kind (kind_) + {} + + bool + operator< (const symbol_value &other) const + { +#define X(n) \ + do \ + { \ + if (n < other.n) \ + return true; \ + if (n > other.n) \ + return false; \ + } \ + while (0) + X (dwarf_tag); + X (is_static); + X (kind); + X (cu_index); +#undef X + return false; + } + }; + + /* Abstract base class to unify DWARF-32 and DWARF-64 name table + output. */ + class offset_vec + { + protected: + const bfd_endian dwarf5_byte_order; + public: + explicit offset_vec (bfd_endian dwarf5_byte_order_) + : dwarf5_byte_order (dwarf5_byte_order_) + {} + + /* Call std::vector::reserve for NELEM elements. */ + virtual void reserve (size_t nelem) = 0; + + /* Call std::vector::push_back with store_unsigned_integer byte + reordering for ELEM. */ + virtual void push_back_reorder (size_t elem) = 0; + + /* Return expected output size in bytes. */ + virtual size_t bytes () const = 0; + + /* Write name table to FILE. */ + virtual void file_write (FILE *file) const = 0; + }; + + /* Template to unify DWARF-32 and DWARF-64 output. */ + template + class offset_vec_tmpl : public offset_vec + { + public: + explicit offset_vec_tmpl (bfd_endian dwarf5_byte_order_) + : offset_vec (dwarf5_byte_order_) + {} + + /* Implement offset_vec::reserve. */ + void reserve (size_t nelem) override + { + m_vec.reserve (nelem); + } + + /* Implement offset_vec::push_back_reorder. */ + void push_back_reorder (size_t elem) override + { + m_vec.push_back (elem); + /* Check for overflow. */ + gdb_assert (m_vec.back () == elem); + store_unsigned_integer (reinterpret_cast (&m_vec.back ()), + sizeof (m_vec.back ()), dwarf5_byte_order, elem); + } + + /* Implement offset_vec::bytes. */ + size_t bytes () const override + { + return m_vec.size () * sizeof (m_vec[0]); + } + + /* Implement offset_vec::file_write. */ + void file_write (FILE *file) const override + { + ::file_write (file, m_vec); + } + + private: + std::vector m_vec; + }; + + /* Base class to unify DWARF-32 and DWARF-64 .debug_names output + respecting name table width. */ + class dwarf + { + public: + offset_vec &name_table_string_offs, &name_table_entry_offs; + + dwarf (offset_vec &name_table_string_offs_, + offset_vec &name_table_entry_offs_) + : name_table_string_offs (name_table_string_offs_), + name_table_entry_offs (name_table_entry_offs_) + { + } + }; + + /* Template to unify DWARF-32 and DWARF-64 .debug_names output + respecting name table width. */ + template + class dwarf_tmpl : public dwarf + { + public: + explicit dwarf_tmpl (bfd_endian dwarf5_byte_order_) + : dwarf (m_name_table_string_offs, m_name_table_entry_offs), + m_name_table_string_offs (dwarf5_byte_order_), + m_name_table_entry_offs (dwarf5_byte_order_) + {} + + private: + offset_vec_tmpl m_name_table_string_offs; + offset_vec_tmpl m_name_table_entry_offs; + }; + + /* Try to reconstruct original DWARF tag for given partial_symbol. + This function is not DWARF-5 compliant but it is sufficient for + GDB as a DWARF-5 index consumer. */ + static int psymbol_tag (const struct partial_symbol *psym) + { + domain_enum domain = PSYMBOL_DOMAIN (psym); + enum address_class aclass = PSYMBOL_CLASS (psym); + + switch (domain) + { + case VAR_DOMAIN: + switch (aclass) + { + case LOC_BLOCK: + return DW_TAG_subprogram; + case LOC_TYPEDEF: + return DW_TAG_typedef; + case LOC_COMPUTED: + case LOC_CONST_BYTES: + case LOC_OPTIMIZED_OUT: + case LOC_STATIC: + return DW_TAG_variable; + case LOC_CONST: + /* Note: It's currently impossible to recognize psyms as enum values + short of reading the type info. For now punt. */ + return DW_TAG_variable; + default: + /* There are other LOC_FOO values that one might want to classify + as variables, but dwarf2read.c doesn't currently use them. */ + return DW_TAG_variable; + } + case STRUCT_DOMAIN: + return DW_TAG_structure_type; + default: + return 0; + } + } + + /* Call insert for all partial symbols and mark them in PSYMS_SEEN. */ + void write_psymbols (std::unordered_set &psyms_seen, + struct partial_symbol **psymp, int count, int cu_index, + bool is_static, unit_kind kind) + { + for (; count-- > 0; ++psymp) + { + struct partial_symbol *psym = *psymp; + + if (SYMBOL_LANGUAGE (psym) == language_ada) + error (_("Ada is not currently supported by the index")); + + /* Only add a given psymbol once. */ + if (psyms_seen.insert (psym).second) + insert (psym, cu_index, is_static, kind); + } + } + + /* A helper function that writes a single signatured_type + to a debug_names. */ + void + write_one_signatured_type (struct signatured_type *entry, + struct signatured_type_index_data *info) + { + struct partial_symtab *psymtab = entry->per_cu.v.psymtab; + + write_psymbols (info->psyms_seen, + &info->objfile->global_psymbols[psymtab->globals_offset], + psymtab->n_global_syms, info->cu_index, false, + unit_kind::tu); + write_psymbols (info->psyms_seen, + &info->objfile->static_psymbols[psymtab->statics_offset], + psymtab->n_static_syms, info->cu_index, true, + unit_kind::tu); + + info->types_list.append_uint (dwarf5_offset_size (), m_dwarf5_byte_order, + to_underlying (entry->per_cu.sect_off)); + + ++info->cu_index; + } + + /* Store value of each symbol. */ + std::unordered_map, c_str_view_hasher> + m_name_to_value_set; + + /* Tables of DWARF-5 .debug_names. They are in object file byte + order. */ + std::vector m_bucket_table; + std::vector m_hash_table; + + const bfd_endian m_dwarf5_byte_order; + dwarf_tmpl m_dwarf32; + dwarf_tmpl m_dwarf64; + dwarf &m_dwarf; + offset_vec &m_name_table_string_offs, &m_name_table_entry_offs; + debug_str_lookup m_debugstrlookup; + + /* Map each used .debug_names abbreviation tag parameter to its + index value. */ + std::unordered_map m_indexkey_to_idx; + + /* Next unused .debug_names abbreviation tag for + m_indexkey_to_idx. */ + int m_idx_next = 1; + + /* .debug_names abbreviation table. */ + data_buf m_abbrev_table; + + /* .debug_names entry pool. */ + data_buf m_entry_pool; +}; + +/* Return iff any of the needed offsets does not fit into 32-bit + .debug_names section. */ + +static bool +check_dwarf64_offsets (struct dwarf2_per_objfile *dwarf2_per_objfile) +{ + for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) + { + const dwarf2_per_cu_data &per_cu = *dwarf2_per_objfile->all_comp_units[i]; + + if (to_underlying (per_cu.sect_off) >= (static_cast (1) << 32)) + return true; + } + for (int i = 0; i < dwarf2_per_objfile->n_type_units; ++i) + { + const signatured_type &sigtype = *dwarf2_per_objfile->all_type_units[i]; + const dwarf2_per_cu_data &per_cu = sigtype.per_cu; + + if (to_underlying (per_cu.sect_off) >= (static_cast (1) << 32)) + return true; + } + return false; +} + +/* The psyms_seen set is potentially going to be largish (~40k + elements when indexing a -g3 build of GDB itself). Estimate the + number of elements in order to avoid too many rehashes, which + require rebuilding buckets and thus many trips to + malloc/free. */ + +static size_t +psyms_seen_size (struct dwarf2_per_objfile *dwarf2_per_objfile) +{ + size_t psyms_count = 0; + for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) + { + struct dwarf2_per_cu_data *per_cu + = dwarf2_per_objfile->all_comp_units[i]; + struct partial_symtab *psymtab = per_cu->v.psymtab; + + if (psymtab != NULL && psymtab->user == NULL) + recursively_count_psymbols (psymtab, psyms_count); + } + /* Generating an index for gdb itself shows a ratio of + TOTAL_SEEN_SYMS/UNIQUE_SYMS or ~5. 4 seems like a good bet. */ + return psyms_count / 4; +} + +/* Write new .gdb_index section for OBJFILE into OUT_FILE. + Return how many bytes were expected to be written into OUT_FILE. */ + +static size_t +write_gdbindex (struct dwarf2_per_objfile *dwarf2_per_objfile, FILE *out_file) +{ + struct objfile *objfile = dwarf2_per_objfile->objfile; + mapped_symtab symtab; + data_buf cu_list; + + /* While we're scanning CU's create a table that maps a psymtab pointer + (which is what addrmap records) to its index (which is what is recorded + in the index file). This will later be needed to write the address + table. */ + psym_index_map cu_index_htab; + cu_index_htab.reserve (dwarf2_per_objfile->n_comp_units); + + /* The CU list is already sorted, so we don't need to do additional + work here. Also, the debug_types entries do not appear in + all_comp_units, but only in their own hash table. */ + + std::unordered_set psyms_seen + (psyms_seen_size (dwarf2_per_objfile)); + for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) + { + struct dwarf2_per_cu_data *per_cu + = dwarf2_per_objfile->all_comp_units[i]; + struct partial_symtab *psymtab = per_cu->v.psymtab; + + /* CU of a shared file from 'dwz -m' may be unused by this main file. + It may be referenced from a local scope but in such case it does not + need to be present in .gdb_index. */ + if (psymtab == NULL) + continue; + + if (psymtab->user == NULL) + recursively_write_psymbols (objfile, psymtab, &symtab, + psyms_seen, i); + + const auto insertpair = cu_index_htab.emplace (psymtab, i); + gdb_assert (insertpair.second); + + cu_list.append_uint (8, BFD_ENDIAN_LITTLE, + to_underlying (per_cu->sect_off)); + cu_list.append_uint (8, BFD_ENDIAN_LITTLE, per_cu->length); + } + + /* Dump the address map. */ + data_buf addr_vec; + write_address_map (objfile, addr_vec, cu_index_htab); + + /* Write out the .debug_type entries, if any. */ + data_buf types_cu_list; + if (dwarf2_per_objfile->signatured_types) + { + signatured_type_index_data sig_data (types_cu_list, + psyms_seen); + + sig_data.objfile = objfile; + sig_data.symtab = &symtab; + sig_data.cu_index = dwarf2_per_objfile->n_comp_units; + htab_traverse_noresize (dwarf2_per_objfile->signatured_types, + write_one_signatured_type, &sig_data); + } + + /* Now that we've processed all symbols we can shrink their cu_indices + lists. */ + uniquify_cu_indices (&symtab); + + data_buf symtab_vec, constant_pool; + write_hash_table (&symtab, symtab_vec, constant_pool); + + data_buf contents; + const offset_type size_of_contents = 6 * sizeof (offset_type); + offset_type total_len = size_of_contents; + + /* The version number. */ + contents.append_data (MAYBE_SWAP (8)); + + /* The offset of the CU list from the start of the file. */ + contents.append_data (MAYBE_SWAP (total_len)); + total_len += cu_list.size (); + + /* The offset of the types CU list from the start of the file. */ + contents.append_data (MAYBE_SWAP (total_len)); + total_len += types_cu_list.size (); + + /* The offset of the address table from the start of the file. */ + contents.append_data (MAYBE_SWAP (total_len)); + total_len += addr_vec.size (); + + /* The offset of the symbol table from the start of the file. */ + contents.append_data (MAYBE_SWAP (total_len)); + total_len += symtab_vec.size (); + + /* The offset of the constant pool from the start of the file. */ + contents.append_data (MAYBE_SWAP (total_len)); + total_len += constant_pool.size (); + + gdb_assert (contents.size () == size_of_contents); + + contents.file_write (out_file); + cu_list.file_write (out_file); + types_cu_list.file_write (out_file); + addr_vec.file_write (out_file); + symtab_vec.file_write (out_file); + constant_pool.file_write (out_file); + + return total_len; +} + +/* DWARF-5 augmentation string for GDB's DW_IDX_GNU_* extension. */ +static const gdb_byte dwarf5_gdb_augmentation[] = { 'G', 'D', 'B', 0 }; + +/* Write a new .debug_names section for OBJFILE into OUT_FILE, write + needed addition to .debug_str section to OUT_FILE_STR. Return how + many bytes were expected to be written into OUT_FILE. */ + +static size_t +write_debug_names (struct dwarf2_per_objfile *dwarf2_per_objfile, + FILE *out_file, FILE *out_file_str) +{ + const bool dwarf5_is_dwarf64 = check_dwarf64_offsets (dwarf2_per_objfile); + struct objfile *objfile = dwarf2_per_objfile->objfile; + const enum bfd_endian dwarf5_byte_order + = gdbarch_byte_order (get_objfile_arch (objfile)); + + /* The CU list is already sorted, so we don't need to do additional + work here. Also, the debug_types entries do not appear in + all_comp_units, but only in their own hash table. */ + data_buf cu_list; + debug_names nametable (dwarf2_per_objfile, dwarf5_is_dwarf64, + dwarf5_byte_order); + std::unordered_set + psyms_seen (psyms_seen_size (dwarf2_per_objfile)); + for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) + { + const dwarf2_per_cu_data *per_cu = dwarf2_per_objfile->all_comp_units[i]; + partial_symtab *psymtab = per_cu->v.psymtab; + + /* CU of a shared file from 'dwz -m' may be unused by this main + file. It may be referenced from a local scope but in such + case it does not need to be present in .debug_names. */ + if (psymtab == NULL) + continue; + + if (psymtab->user == NULL) + nametable.recursively_write_psymbols (objfile, psymtab, psyms_seen, i); + + cu_list.append_uint (nametable.dwarf5_offset_size (), dwarf5_byte_order, + to_underlying (per_cu->sect_off)); + } + + /* Write out the .debug_type entries, if any. */ + data_buf types_cu_list; + if (dwarf2_per_objfile->signatured_types) + { + debug_names::write_one_signatured_type_data sig_data (nametable, + signatured_type_index_data (types_cu_list, psyms_seen)); + + sig_data.info.objfile = objfile; + /* It is used only for gdb_index. */ + sig_data.info.symtab = nullptr; + sig_data.info.cu_index = 0; + htab_traverse_noresize (dwarf2_per_objfile->signatured_types, + debug_names::write_one_signatured_type, + &sig_data); + } + + nametable.build (); + + /* No addr_vec - DWARF-5 uses .debug_aranges generated by GCC. */ + + const offset_type bytes_of_header + = ((dwarf5_is_dwarf64 ? 12 : 4) + + 2 + 2 + 7 * 4 + + sizeof (dwarf5_gdb_augmentation)); + size_t expected_bytes = 0; + expected_bytes += bytes_of_header; + expected_bytes += cu_list.size (); + expected_bytes += types_cu_list.size (); + expected_bytes += nametable.bytes (); + data_buf header; + + if (!dwarf5_is_dwarf64) + { + const uint64_t size64 = expected_bytes - 4; + gdb_assert (size64 < 0xfffffff0); + header.append_uint (4, dwarf5_byte_order, size64); + } + else + { + header.append_uint (4, dwarf5_byte_order, 0xffffffff); + header.append_uint (8, dwarf5_byte_order, expected_bytes - 12); + } + + /* The version number. */ + header.append_uint (2, dwarf5_byte_order, 5); + + /* Padding. */ + header.append_uint (2, dwarf5_byte_order, 0); + + /* comp_unit_count - The number of CUs in the CU list. */ + header.append_uint (4, dwarf5_byte_order, dwarf2_per_objfile->n_comp_units); + + /* local_type_unit_count - The number of TUs in the local TU + list. */ + header.append_uint (4, dwarf5_byte_order, dwarf2_per_objfile->n_type_units); + + /* foreign_type_unit_count - The number of TUs in the foreign TU + list. */ + header.append_uint (4, dwarf5_byte_order, 0); + + /* bucket_count - The number of hash buckets in the hash lookup + table. */ + header.append_uint (4, dwarf5_byte_order, nametable.bucket_count ()); + + /* name_count - The number of unique names in the index. */ + header.append_uint (4, dwarf5_byte_order, nametable.name_count ()); + + /* abbrev_table_size - The size in bytes of the abbreviations + table. */ + header.append_uint (4, dwarf5_byte_order, nametable.abbrev_table_bytes ()); + + /* augmentation_string_size - The size in bytes of the augmentation + string. This value is rounded up to a multiple of 4. */ + static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, ""); + header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation)); + header.append_data (dwarf5_gdb_augmentation); + + gdb_assert (header.size () == bytes_of_header); + + header.file_write (out_file); + cu_list.file_write (out_file); + types_cu_list.file_write (out_file); + nametable.file_write (out_file, out_file_str); + + return expected_bytes; +} + +/* Assert that FILE's size is EXPECTED_SIZE. Assumes file's seek + position is at the end of the file. */ + +static void +assert_file_size (FILE *file, const char *filename, size_t expected_size) +{ + const auto file_size = ftell (file); + if (file_size == -1) + error (_("Can't get `%s' size"), filename); + gdb_assert (file_size == expected_size); +} + +/* Create an index file for OBJFILE in the directory DIR. */ + +static void +write_psymtabs_to_index (struct dwarf2_per_objfile *dwarf2_per_objfile, + const char *dir, + dw_index_kind index_kind) +{ + struct objfile *objfile = dwarf2_per_objfile->objfile; + + if (dwarf2_per_objfile->using_index) + error (_("Cannot use an index to create the index")); + + if (VEC_length (dwarf2_section_info_def, dwarf2_per_objfile->types) > 1) + error (_("Cannot make an index when the file has multiple .debug_types sections")); + + if (!objfile->psymtabs || !objfile->psymtabs_addrmap) + return; + + struct stat st; + if (stat (objfile_name (objfile), &st) < 0) + perror_with_name (objfile_name (objfile)); + + std::string filename (std::string (dir) + SLASH_STRING + + lbasename (objfile_name (objfile)) + + (index_kind == dw_index_kind::DEBUG_NAMES + ? INDEX5_SUFFIX : INDEX4_SUFFIX)); + + FILE *out_file = gdb_fopen_cloexec (filename.c_str (), "wb").release (); + if (!out_file) + error (_("Can't open `%s' for writing"), filename.c_str ()); + + /* Order matters here; we want FILE to be closed before FILENAME is + unlinked, because on MS-Windows one cannot delete a file that is + still open. (Don't call anything here that might throw until + file_closer is created.) */ + gdb::unlinker unlink_file (filename.c_str ()); + gdb_file_up close_out_file (out_file); + + if (index_kind == dw_index_kind::DEBUG_NAMES) + { + std::string filename_str (std::string (dir) + SLASH_STRING + + lbasename (objfile_name (objfile)) + + DEBUG_STR_SUFFIX); + FILE *out_file_str + = gdb_fopen_cloexec (filename_str.c_str (), "wb").release (); + if (!out_file_str) + error (_("Can't open `%s' for writing"), filename_str.c_str ()); + gdb::unlinker unlink_file_str (filename_str.c_str ()); + gdb_file_up close_out_file_str (out_file_str); + + const size_t total_len + = write_debug_names (dwarf2_per_objfile, out_file, out_file_str); + assert_file_size (out_file, filename.c_str (), total_len); + + /* We want to keep the file .debug_str file too. */ + unlink_file_str.keep (); + } + else + { + const size_t total_len + = write_gdbindex (dwarf2_per_objfile, out_file); + assert_file_size (out_file, filename.c_str (), total_len); + } + + /* We want to keep the file. */ + unlink_file.keep (); +} + +/* Implementation of the `save gdb-index' command. + + Note that the .gdb_index file format used by this command is + documented in the GDB manual. Any changes here must be documented + there. */ + +static void +save_gdb_index_command (const char *arg, int from_tty) +{ + struct objfile *objfile; + const char dwarf5space[] = "-dwarf-5 "; + dw_index_kind index_kind = dw_index_kind::GDB_INDEX; + + if (!arg) + arg = ""; + + arg = skip_spaces (arg); + if (strncmp (arg, dwarf5space, strlen (dwarf5space)) == 0) + { + index_kind = dw_index_kind::DEBUG_NAMES; + arg += strlen (dwarf5space); + arg = skip_spaces (arg); + } + + if (!*arg) + error (_("usage: save gdb-index [-dwarf-5] DIRECTORY")); + + ALL_OBJFILES (objfile) + { + struct stat st; + + /* If the objfile does not correspond to an actual file, skip it. */ + if (stat (objfile_name (objfile), &st) < 0) + continue; + + struct dwarf2_per_objfile *dwarf2_per_objfile + = get_dwarf2_per_objfile (objfile); + + if (dwarf2_per_objfile != NULL) + { + TRY + { + write_psymtabs_to_index (dwarf2_per_objfile, arg, index_kind); + } + CATCH (except, RETURN_MASK_ERROR) + { + exception_fprintf (gdb_stderr, except, + _("Error while writing index for `%s': "), + objfile_name (objfile)); + } + END_CATCH + } + + } +} + +void +_initialize_dwarf_index_write () +{ + cmd_list_element *c = add_cmd ("gdb-index", class_files, + save_gdb_index_command, _("\ +Save a gdb-index file.\n\ +Usage: save gdb-index [-dwarf-5] DIRECTORY\n\ +\n\ +No options create one file with .gdb-index extension for pre-DWARF-5\n\ +compatible .gdb_index section. With -dwarf-5 creates two files with\n\ +extension .debug_names and .debug_str for DWARF-5 .debug_names section."), + &save_cmdlist); + set_cmd_completer (c, filename_completer); +} diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c index c3a502ec44..dfa69d1dbb 100644 --- a/gdb/dwarf2read.c +++ b/gdb/dwarf2read.c @@ -29,6 +29,8 @@ E.g., load_partial_dies, read_partial_die. */ #include "defs.h" +#include "dwarf2read.h" +#include "dwarf-index-common.h" #include "bfd.h" #include "elf-bfd.h" #include "symtab.h" @@ -115,97 +117,6 @@ static int dwarf2_loclist_index; static int dwarf2_locexpr_block_index; static int dwarf2_loclist_block_index; -/* A descriptor for dwarf sections. - - S.ASECTION, SIZE are typically initialized when the objfile is first - scanned. BUFFER, READIN are filled in later when the section is read. - If the section contained compressed data then SIZE is updated to record - the uncompressed size of the section. - - DWP file format V2 introduces a wrinkle that is easiest to handle by - creating the concept of virtual sections contained within a real section. - In DWP V2 the sections of the input DWO files are concatenated together - into one section, but section offsets are kept relative to the original - input section. - If this is a virtual dwp-v2 section, S.CONTAINING_SECTION is a backlink to - the real section this "virtual" section is contained in, and BUFFER,SIZE - describe the virtual section. */ - -struct dwarf2_section_info -{ - union - { - /* If this is a real section, the bfd section. */ - asection *section; - /* If this is a virtual section, pointer to the containing ("real") - section. */ - struct dwarf2_section_info *containing_section; - } s; - /* Pointer to section data, only valid if readin. */ - const gdb_byte *buffer; - /* The size of the section, real or virtual. */ - bfd_size_type size; - /* If this is a virtual section, the offset in the real section. - Only valid if is_virtual. */ - bfd_size_type virtual_offset; - /* True if we have tried to read this section. */ - char readin; - /* True if this is a virtual section, False otherwise. - This specifies which of s.section and s.containing_section to use. */ - char is_virtual; -}; - -typedef struct dwarf2_section_info dwarf2_section_info_def; -DEF_VEC_O (dwarf2_section_info_def); - -/* All offsets in the index are of this type. It must be - architecture-independent. */ -typedef uint32_t offset_type; - -/* Ensure only legit values are used. */ -#define DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE(cu_index, value) \ - do { \ - gdb_assert ((unsigned int) (value) <= 1); \ - GDB_INDEX_SYMBOL_STATIC_SET_VALUE((cu_index), (value)); \ - } while (0) - -/* Ensure only legit values are used. */ -#define DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE(cu_index, value) \ - do { \ - gdb_assert ((value) >= GDB_INDEX_SYMBOL_KIND_TYPE \ - && (value) <= GDB_INDEX_SYMBOL_KIND_OTHER); \ - GDB_INDEX_SYMBOL_KIND_SET_VALUE((cu_index), (value)); \ - } while (0) - -/* Ensure we don't use more than the alloted nuber of bits for the CU. */ -#define DW2_GDB_INDEX_CU_SET_VALUE(cu_index, value) \ - do { \ - gdb_assert (((value) & ~GDB_INDEX_CU_MASK) == 0); \ - GDB_INDEX_CU_SET_VALUE((cu_index), (value)); \ - } while (0) - -#if WORDS_BIGENDIAN - -/* Convert VALUE between big- and little-endian. */ - -static offset_type -byte_swap (offset_type value) -{ - offset_type result; - - result = (value & 0xff) << 24; - result |= (value & 0xff00) << 8; - result |= (value & 0xff0000) >> 8; - result |= (value & 0xff000000) >> 24; - return result; -} - -#define MAYBE_SWAP(V) byte_swap (V) - -#else -#define MAYBE_SWAP(V) static_cast (V) -#endif /* WORDS_BIGENDIAN */ - /* An index into a (C++) symbol name component in a symbol name as recorded in the mapped_index's symbol table. For each C++ symbol in the symbol table, we record one entry for the start of each @@ -365,160 +276,9 @@ struct mapped_debug_names final : public mapped_index_base { return this->name_count; } }; -typedef struct dwarf2_per_cu_data *dwarf2_per_cu_ptr; -DEF_VEC_P (dwarf2_per_cu_ptr); +/* See dwarf2read.h. */ -struct tu_stats -{ - int nr_uniq_abbrev_tables; - int nr_symtabs; - int nr_symtab_sharers; - int nr_stmt_less_type_units; - int nr_all_type_units_reallocs; -}; - -/* Collection of data recorded per objfile. - This hangs off of dwarf2_objfile_data_key. */ - -struct dwarf2_per_objfile : public allocate_on_obstack -{ - /* Construct a dwarf2_per_objfile for OBJFILE. NAMES points to the - dwarf2 section names, or is NULL if the standard ELF names are - used. */ - dwarf2_per_objfile (struct objfile *objfile, - const dwarf2_debug_sections *names); - - ~dwarf2_per_objfile (); - - DISABLE_COPY_AND_ASSIGN (dwarf2_per_objfile); - - /* Free all cached compilation units. */ - void free_cached_comp_units (); -private: - /* This function is mapped across the sections and remembers the - offset and size of each of the debugging sections we are - interested in. */ - void locate_sections (bfd *abfd, asection *sectp, - const dwarf2_debug_sections &names); - -public: - dwarf2_section_info info {}; - dwarf2_section_info abbrev {}; - dwarf2_section_info line {}; - dwarf2_section_info loc {}; - dwarf2_section_info loclists {}; - dwarf2_section_info macinfo {}; - dwarf2_section_info macro {}; - dwarf2_section_info str {}; - dwarf2_section_info line_str {}; - dwarf2_section_info ranges {}; - dwarf2_section_info rnglists {}; - dwarf2_section_info addr {}; - dwarf2_section_info frame {}; - dwarf2_section_info eh_frame {}; - dwarf2_section_info gdb_index {}; - dwarf2_section_info debug_names {}; - dwarf2_section_info debug_aranges {}; - - VEC (dwarf2_section_info_def) *types = NULL; - - /* Back link. */ - struct objfile *objfile = NULL; - - /* Table of all the compilation units. This is used to locate - the target compilation unit of a particular reference. */ - struct dwarf2_per_cu_data **all_comp_units = NULL; - - /* The number of compilation units in ALL_COMP_UNITS. */ - int n_comp_units = 0; - - /* The number of .debug_types-related CUs. */ - int n_type_units = 0; - - /* The number of elements allocated in all_type_units. - If there are skeleton-less TUs, we add them to all_type_units lazily. */ - int n_allocated_type_units = 0; - - /* The .debug_types-related CUs (TUs). - This is stored in malloc space because we may realloc it. */ - struct signatured_type **all_type_units = NULL; - - /* Table of struct type_unit_group objects. - The hash key is the DW_AT_stmt_list value. */ - htab_t type_unit_groups {}; - - /* A table mapping .debug_types signatures to its signatured_type entry. - This is NULL if the .debug_types section hasn't been read in yet. */ - htab_t signatured_types {}; - - /* Type unit statistics, to see how well the scaling improvements - are doing. */ - struct tu_stats tu_stats {}; - - /* A chain of compilation units that are currently read in, so that - they can be freed later. */ - dwarf2_per_cu_data *read_in_chain = NULL; - - /* A table mapping DW_AT_dwo_name values to struct dwo_file objects. - This is NULL if the table hasn't been allocated yet. */ - htab_t dwo_files {}; - - /* True if we've checked for whether there is a DWP file. */ - bool dwp_checked = false; - - /* The DWP file if there is one, or NULL. */ - struct dwp_file *dwp_file = NULL; - - /* The shared '.dwz' file, if one exists. This is used when the - original data was compressed using 'dwz -m'. */ - struct dwz_file *dwz_file = NULL; - - /* A flag indicating whether this objfile has a section loaded at a - VMA of 0. */ - bool has_section_at_zero = false; - - /* True if we are using the mapped index, - or we are faking it for OBJF_READNOW's sake. */ - bool using_index = false; - - /* The mapped index, or NULL if .gdb_index is missing or not being used. */ - mapped_index *index_table = NULL; - - /* The mapped index, or NULL if .debug_names is missing or not being used. */ - std::unique_ptr debug_names_table; - - /* When using index_table, this keeps track of all quick_file_names entries. - TUs typically share line table entries with a CU, so we maintain a - separate table of all line table entries to support the sharing. - Note that while there can be way more TUs than CUs, we've already - sorted all the TUs into "type unit groups", grouped by their - DW_AT_stmt_list value. Therefore the only sharing done here is with a - CU and its associated TU group if there is one. */ - htab_t quick_file_names_table {}; - - /* Set during partial symbol reading, to prevent queueing of full - symbols. */ - bool reading_partial_symbols = false; - - /* Table mapping type DIEs to their struct type *. - This is NULL if not allocated yet. - The mapping is done via (CU/TU + DIE offset) -> type. */ - htab_t die_type_hash {}; - - /* The CUs we recently read. */ - VEC (dwarf2_per_cu_ptr) *just_read_cus = NULL; - - /* Table containing line_header indexed by offset and offset_in_dwz. */ - htab_t line_header_hash {}; - - /* Table containing all filenames. This is an optional because the - table is lazily constructed on first access. */ - gdb::optional filenames_cache; -}; - -/* Get the dwarf2_per_objfile associated to OBJFILE. */ - -struct dwarf2_per_objfile * +dwarf2_per_objfile * get_dwarf2_per_objfile (struct objfile *objfile) { return ((struct dwarf2_per_objfile *) @@ -797,145 +557,6 @@ struct dwarf2_cu struct partial_die_info *find_partial_die (sect_offset sect_off); }; -/* Persistent data held for a compilation unit, even when not - processing it. We put a pointer to this structure in the - read_symtab_private field of the psymtab. */ - -struct dwarf2_per_cu_data -{ - /* The start offset and length of this compilation unit. - NOTE: Unlike comp_unit_head.length, this length includes - initial_length_size. - If the DIE refers to a DWO file, this is always of the original die, - not the DWO file. */ - sect_offset sect_off; - unsigned int length; - - /* DWARF standard version this data has been read from (such as 4 or 5). */ - short dwarf_version; - - /* Flag indicating this compilation unit will be read in before - any of the current compilation units are processed. */ - unsigned int queued : 1; - - /* This flag will be set when reading partial DIEs if we need to load - absolutely all DIEs for this compilation unit, instead of just the ones - we think are interesting. It gets set if we look for a DIE in the - hash table and don't find it. */ - unsigned int load_all_dies : 1; - - /* Non-zero if this CU is from .debug_types. - Struct dwarf2_per_cu_data is contained in struct signatured_type iff - this is non-zero. */ - unsigned int is_debug_types : 1; - - /* Non-zero if this CU is from the .dwz file. */ - unsigned int is_dwz : 1; - - /* Non-zero if reading a TU directly from a DWO file, bypassing the stub. - This flag is only valid if is_debug_types is true. - We can't read a CU directly from a DWO file: There are required - attributes in the stub. */ - unsigned int reading_dwo_directly : 1; - - /* Non-zero if the TU has been read. - This is used to assist the "Stay in DWO Optimization" for Fission: - When reading a DWO, it's faster to read TUs from the DWO instead of - fetching them from random other DWOs (due to comdat folding). - If the TU has already been read, the optimization is unnecessary - (and unwise - we don't want to change where gdb thinks the TU lives - "midflight"). - This flag is only valid if is_debug_types is true. */ - unsigned int tu_read : 1; - - /* The section this CU/TU lives in. - If the DIE refers to a DWO file, this is always the original die, - not the DWO file. */ - struct dwarf2_section_info *section; - - /* Set to non-NULL iff this CU is currently loaded. When it gets freed out - of the CU cache it gets reset to NULL again. This is left as NULL for - dummy CUs (a CU header, but nothing else). */ - struct dwarf2_cu *cu; - - /* The corresponding dwarf2_per_objfile. */ - struct dwarf2_per_objfile *dwarf2_per_objfile; - - /* When dwarf2_per_objfile->using_index is true, the 'quick' field - is active. Otherwise, the 'psymtab' field is active. */ - union - { - /* The partial symbol table associated with this compilation unit, - or NULL for unread partial units. */ - struct partial_symtab *psymtab; - - /* Data needed by the "quick" functions. */ - struct dwarf2_per_cu_quick_data *quick; - } v; - - /* The CUs we import using DW_TAG_imported_unit. This is filled in - while reading psymtabs, used to compute the psymtab dependencies, - and then cleared. Then it is filled in again while reading full - symbols, and only deleted when the objfile is destroyed. - - This is also used to work around a difference between the way gold - generates .gdb_index version <=7 and the way gdb does. Arguably this - is a gold bug. For symbols coming from TUs, gold records in the index - the CU that includes the TU instead of the TU itself. This breaks - dw2_lookup_symbol: It assumes that if the index says symbol X lives - in CU/TU Y, then one need only expand Y and a subsequent lookup in Y - will find X. Alas TUs live in their own symtab, so after expanding CU Y - we need to look in TU Z to find X. Fortunately, this is akin to - DW_TAG_imported_unit, so we just use the same mechanism: For - .gdb_index version <=7 this also records the TUs that the CU referred - to. Concurrently with this change gdb was modified to emit version 8 - indices so we only pay a price for gold generated indices. - http://sourceware.org/bugzilla/show_bug.cgi?id=15021. */ - VEC (dwarf2_per_cu_ptr) *imported_symtabs; -}; - -/* Entry in the signatured_types hash table. */ - -struct signatured_type -{ - /* The "per_cu" object of this type. - This struct is used iff per_cu.is_debug_types. - N.B.: This is the first member so that it's easy to convert pointers - between them. */ - struct dwarf2_per_cu_data per_cu; - - /* The type's signature. */ - ULONGEST signature; - - /* Offset in the TU of the type's DIE, as read from the TU header. - If this TU is a DWO stub and the definition lives in a DWO file - (specified by DW_AT_GNU_dwo_name), this value is unusable. */ - cu_offset type_offset_in_tu; - - /* Offset in the section of the type's DIE. - If the definition lives in a DWO file, this is the offset in the - .debug_types.dwo section. - The value is zero until the actual value is known. - Zero is otherwise not a valid section offset. */ - sect_offset type_offset_in_section; - - /* Type units are grouped by their DW_AT_stmt_list entry so that they - can share them. This points to the containing symtab. */ - struct type_unit_group *type_unit_group; - - /* The type. - The first time we encounter this type we fully read it in and install it - in the symbol tables. Subsequent times we only need the type. */ - struct type *type; - - /* Containing DWO unit. - This field is valid iff per_cu.reading_dwo_directly. */ - struct dwo_unit *dwo_unit; -}; - -typedef struct signatured_type *sig_type_ptr; -DEF_VEC_P (sig_type_ptr); - /* A struct that can be used as a hash key for tables based on DW_AT_stmt_list. This includes type_unit_group and quick_file_names. */ @@ -2482,11 +2103,6 @@ attr_value_as_address (struct attribute *attr) return addr; } -/* The suffix for an index file. */ -#define INDEX4_SUFFIX ".gdb-index" -#define INDEX5_SUFFIX ".debug_names" -#define DEBUG_STR_SUFFIX ".debug_str" - /* See declaration. */ dwarf2_per_objfile::dwarf2_per_objfile (struct objfile *objfile_, @@ -2807,14 +2423,10 @@ dwarf2_section_empty_p (const struct dwarf2_section_info *section) return section->s.section == NULL || section->size == 0; } -/* Read the contents of the section INFO. - OBJFILE is the main object file, but not necessarily the file where - the section comes from. E.g., for DWO files the bfd of INFO is the bfd - of the DWO file. - If the section is compressed, uncompress it before returning. */ +/* See dwarf2read.h. */ -static void -dwarf2_read_section (struct objfile *objfile, struct dwarf2_section_info *info) +void +dwarf2_read_section (struct objfile *objfile, dwarf2_section_info *info) { asection *sectp; bfd *abfd; @@ -3733,31 +3345,6 @@ create_addrmap_from_aranges (struct dwarf2_per_objfile *dwarf2_per_objfile, &objfile->objfile_obstack); } -/* The hash function for strings in the mapped index. This is the same as - SYMBOL_HASH_NEXT, but we keep a separate copy to maintain control over the - implementation. This is necessary because the hash function is tied to the - format of the mapped index file. The hash values do not have to match with - SYMBOL_HASH_NEXT. - - Use INT_MAX for INDEX_VERSION if you generate the current index format. */ - -static hashval_t -mapped_index_string_hash (int index_version, const void *p) -{ - const unsigned char *str = (const unsigned char *) p; - hashval_t r = 0; - unsigned char c; - - while ((c = *str++) != 0) - { - if (index_version >= 5) - c = tolower (c); - r = r * 67 + c - 113; - } - - return r; -} - /* Find a slot in the mapped index INDEX for the object named NAME. If NAME is found, set *VEC_OUT to point to the CU vector in the constant pool and return true. If NAME cannot be found, return @@ -6099,22 +5686,6 @@ dwarf2_read_debug_names (struct dwarf2_per_objfile *dwarf2_per_objfile) return true; } -/* Symbol name hashing function as specified by DWARF-5. */ - -static uint32_t -dwarf5_djb_hash (const char *str_) -{ - const unsigned char *str = (const unsigned char *) str_; - - /* Note: tolower here ignores UTF-8, which isn't fully compliant. - See http://dwarfstd.org/ShowIssue.php?issue=161027.1. */ - - uint32_t hash = 5381; - while (int c = *str++) - hash = hash * 33 + tolower (c); - return hash; -} - /* Type used to manage iterating over all CUs looking for a symbol for .debug_names. */ @@ -25888,1611 +25459,6 @@ show_dwarf_cmd (const char *args, int from_tty) cmd_show_list (show_dwarf_cmdlist, from_tty, ""); } -/* The "save gdb-index" command. */ - -/* Write SIZE bytes from the buffer pointed to by DATA to FILE, with - error checking. */ - -static void -file_write (FILE *file, const void *data, size_t size) -{ - if (fwrite (data, 1, size, file) != size) - error (_("couldn't data write to file")); -} - -/* Write the contents of VEC to FILE, with error checking. */ - -template -static void -file_write (FILE *file, const std::vector &vec) -{ - file_write (file, vec.data (), vec.size () * sizeof (vec[0])); -} - -/* In-memory buffer to prepare data to be written later to a file. */ -class data_buf -{ -public: - /* Copy DATA to the end of the buffer. */ - template - void append_data (const T &data) - { - std::copy (reinterpret_cast (&data), - reinterpret_cast (&data + 1), - grow (sizeof (data))); - } - - /* Copy CSTR (a zero-terminated string) to the end of buffer. The - terminating zero is appended too. */ - void append_cstr0 (const char *cstr) - { - const size_t size = strlen (cstr) + 1; - std::copy (cstr, cstr + size, grow (size)); - } - - /* Store INPUT as ULEB128 to the end of buffer. */ - void append_unsigned_leb128 (ULONGEST input) - { - for (;;) - { - gdb_byte output = input & 0x7f; - input >>= 7; - if (input) - output |= 0x80; - append_data (output); - if (input == 0) - break; - } - } - - /* Accept a host-format integer in VAL and append it to the buffer - as a target-format integer which is LEN bytes long. */ - void append_uint (size_t len, bfd_endian byte_order, ULONGEST val) - { - ::store_unsigned_integer (grow (len), len, byte_order, val); - } - - /* Return the size of the buffer. */ - size_t size () const - { - return m_vec.size (); - } - - /* Return true iff the buffer is empty. */ - bool empty () const - { - return m_vec.empty (); - } - - /* Write the buffer to FILE. */ - void file_write (FILE *file) const - { - ::file_write (file, m_vec); - } - -private: - /* Grow SIZE bytes at the end of the buffer. Returns a pointer to - the start of the new block. */ - gdb_byte *grow (size_t size) - { - m_vec.resize (m_vec.size () + size); - return &*m_vec.end () - size; - } - - gdb::byte_vector m_vec; -}; - -/* An entry in the symbol table. */ -struct symtab_index_entry -{ - /* The name of the symbol. */ - const char *name; - /* The offset of the name in the constant pool. */ - offset_type index_offset; - /* A sorted vector of the indices of all the CUs that hold an object - of this name. */ - std::vector cu_indices; -}; - -/* The symbol table. This is a power-of-2-sized hash table. */ -struct mapped_symtab -{ - mapped_symtab () - { - data.resize (1024); - } - - offset_type n_elements = 0; - std::vector data; -}; - -/* Find a slot in SYMTAB for the symbol NAME. Returns a reference to - the slot. - - Function is used only during write_hash_table so no index format backward - compatibility is needed. */ - -static symtab_index_entry & -find_slot (struct mapped_symtab *symtab, const char *name) -{ - offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name); - - index = hash & (symtab->data.size () - 1); - step = ((hash * 17) & (symtab->data.size () - 1)) | 1; - - for (;;) - { - if (symtab->data[index].name == NULL - || strcmp (name, symtab->data[index].name) == 0) - return symtab->data[index]; - index = (index + step) & (symtab->data.size () - 1); - } -} - -/* Expand SYMTAB's hash table. */ - -static void -hash_expand (struct mapped_symtab *symtab) -{ - auto old_entries = std::move (symtab->data); - - symtab->data.clear (); - symtab->data.resize (old_entries.size () * 2); - - for (auto &it : old_entries) - if (it.name != NULL) - { - auto &ref = find_slot (symtab, it.name); - ref = std::move (it); - } -} - -/* Add an entry to SYMTAB. NAME is the name of the symbol. - CU_INDEX is the index of the CU in which the symbol appears. - IS_STATIC is one if the symbol is static, otherwise zero (global). */ - -static void -add_index_entry (struct mapped_symtab *symtab, const char *name, - int is_static, gdb_index_symbol_kind kind, - offset_type cu_index) -{ - offset_type cu_index_and_attrs; - - ++symtab->n_elements; - if (4 * symtab->n_elements / 3 >= symtab->data.size ()) - hash_expand (symtab); - - symtab_index_entry &slot = find_slot (symtab, name); - if (slot.name == NULL) - { - slot.name = name; - /* index_offset is set later. */ - } - - cu_index_and_attrs = 0; - DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index); - DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static); - DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind); - - /* We don't want to record an index value twice as we want to avoid the - duplication. - We process all global symbols and then all static symbols - (which would allow us to avoid the duplication by only having to check - the last entry pushed), but a symbol could have multiple kinds in one CU. - To keep things simple we don't worry about the duplication here and - sort and uniqufy the list after we've processed all symbols. */ - slot.cu_indices.push_back (cu_index_and_attrs); -} - -/* Sort and remove duplicates of all symbols' cu_indices lists. */ - -static void -uniquify_cu_indices (struct mapped_symtab *symtab) -{ - for (auto &entry : symtab->data) - { - if (entry.name != NULL && !entry.cu_indices.empty ()) - { - auto &cu_indices = entry.cu_indices; - std::sort (cu_indices.begin (), cu_indices.end ()); - auto from = std::unique (cu_indices.begin (), cu_indices.end ()); - cu_indices.erase (from, cu_indices.end ()); - } - } -} - -/* A form of 'const char *' suitable for container keys. Only the - pointer is stored. The strings themselves are compared, not the - pointers. */ -class c_str_view -{ -public: - c_str_view (const char *cstr) - : m_cstr (cstr) - {} - - bool operator== (const c_str_view &other) const - { - return strcmp (m_cstr, other.m_cstr) == 0; - } - - /* Return the underlying C string. Note, the returned string is - only a reference with lifetime of this object. */ - const char *c_str () const - { - return m_cstr; - } - -private: - friend class c_str_view_hasher; - const char *const m_cstr; -}; - -/* A std::unordered_map::hasher for c_str_view that uses the right - hash function for strings in a mapped index. */ -class c_str_view_hasher -{ -public: - size_t operator () (const c_str_view &x) const - { - return mapped_index_string_hash (INT_MAX, x.m_cstr); - } -}; - -/* A std::unordered_map::hasher for std::vector<>. */ -template -class vector_hasher -{ -public: - size_t operator () (const std::vector &key) const - { - return iterative_hash (key.data (), - sizeof (key.front ()) * key.size (), 0); - } -}; - -/* Write the mapped hash table SYMTAB to the data buffer OUTPUT, with - constant pool entries going into the data buffer CPOOL. */ - -static void -write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool) -{ - { - /* Elements are sorted vectors of the indices of all the CUs that - hold an object of this name. */ - std::unordered_map, offset_type, - vector_hasher> - symbol_hash_table; - - /* We add all the index vectors to the constant pool first, to - ensure alignment is ok. */ - for (symtab_index_entry &entry : symtab->data) - { - if (entry.name == NULL) - continue; - gdb_assert (entry.index_offset == 0); - - /* Finding before inserting is faster than always trying to - insert, because inserting always allocates a node, does the - lookup, and then destroys the new node if another node - already had the same key. C++17 try_emplace will avoid - this. */ - const auto found - = symbol_hash_table.find (entry.cu_indices); - if (found != symbol_hash_table.end ()) - { - entry.index_offset = found->second; - continue; - } - - symbol_hash_table.emplace (entry.cu_indices, cpool.size ()); - entry.index_offset = cpool.size (); - cpool.append_data (MAYBE_SWAP (entry.cu_indices.size ())); - for (const auto index : entry.cu_indices) - cpool.append_data (MAYBE_SWAP (index)); - } - } - - /* Now write out the hash table. */ - std::unordered_map str_table; - for (const auto &entry : symtab->data) - { - offset_type str_off, vec_off; - - if (entry.name != NULL) - { - const auto insertpair = str_table.emplace (entry.name, cpool.size ()); - if (insertpair.second) - cpool.append_cstr0 (entry.name); - str_off = insertpair.first->second; - vec_off = entry.index_offset; - } - else - { - /* While 0 is a valid constant pool index, it is not valid - to have 0 for both offsets. */ - str_off = 0; - vec_off = 0; - } - - output.append_data (MAYBE_SWAP (str_off)); - output.append_data (MAYBE_SWAP (vec_off)); - } -} - -typedef std::unordered_map psym_index_map; - -/* Helper struct for building the address table. */ -struct addrmap_index_data -{ - addrmap_index_data (data_buf &addr_vec_, psym_index_map &cu_index_htab_) - : addr_vec (addr_vec_), cu_index_htab (cu_index_htab_) - {} - - struct objfile *objfile; - data_buf &addr_vec; - psym_index_map &cu_index_htab; - - /* Non-zero if the previous_* fields are valid. - We can't write an entry until we see the next entry (since it is only then - that we know the end of the entry). */ - int previous_valid; - /* Index of the CU in the table of all CUs in the index file. */ - unsigned int previous_cu_index; - /* Start address of the CU. */ - CORE_ADDR previous_cu_start; -}; - -/* Write an address entry to ADDR_VEC. */ - -static void -add_address_entry (struct objfile *objfile, data_buf &addr_vec, - CORE_ADDR start, CORE_ADDR end, unsigned int cu_index) -{ - CORE_ADDR baseaddr; - - baseaddr = ANOFFSET (objfile->section_offsets, SECT_OFF_TEXT (objfile)); - - addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start - baseaddr); - addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end - baseaddr); - addr_vec.append_data (MAYBE_SWAP (cu_index)); -} - -/* Worker function for traversing an addrmap to build the address table. */ - -static int -add_address_entry_worker (void *datap, CORE_ADDR start_addr, void *obj) -{ - struct addrmap_index_data *data = (struct addrmap_index_data *) datap; - struct partial_symtab *pst = (struct partial_symtab *) obj; - - if (data->previous_valid) - add_address_entry (data->objfile, data->addr_vec, - data->previous_cu_start, start_addr, - data->previous_cu_index); - - data->previous_cu_start = start_addr; - if (pst != NULL) - { - const auto it = data->cu_index_htab.find (pst); - gdb_assert (it != data->cu_index_htab.cend ()); - data->previous_cu_index = it->second; - data->previous_valid = 1; - } - else - data->previous_valid = 0; - - return 0; -} - -/* Write OBJFILE's address map to ADDR_VEC. - CU_INDEX_HTAB is used to map addrmap entries to their CU indices - in the index file. */ - -static void -write_address_map (struct objfile *objfile, data_buf &addr_vec, - psym_index_map &cu_index_htab) -{ - struct addrmap_index_data addrmap_index_data (addr_vec, cu_index_htab); - - /* When writing the address table, we have to cope with the fact that - the addrmap iterator only provides the start of a region; we have to - wait until the next invocation to get the start of the next region. */ - - addrmap_index_data.objfile = objfile; - addrmap_index_data.previous_valid = 0; - - addrmap_foreach (objfile->psymtabs_addrmap, add_address_entry_worker, - &addrmap_index_data); - - /* It's highly unlikely the last entry (end address = 0xff...ff) - is valid, but we should still handle it. - The end address is recorded as the start of the next region, but that - doesn't work here. To cope we pass 0xff...ff, this is a rare situation - anyway. */ - if (addrmap_index_data.previous_valid) - add_address_entry (objfile, addr_vec, - addrmap_index_data.previous_cu_start, (CORE_ADDR) -1, - addrmap_index_data.previous_cu_index); -} - -/* Return the symbol kind of PSYM. */ - -static gdb_index_symbol_kind -symbol_kind (struct partial_symbol *psym) -{ - domain_enum domain = PSYMBOL_DOMAIN (psym); - enum address_class aclass = PSYMBOL_CLASS (psym); - - switch (domain) - { - case VAR_DOMAIN: - switch (aclass) - { - case LOC_BLOCK: - return GDB_INDEX_SYMBOL_KIND_FUNCTION; - case LOC_TYPEDEF: - return GDB_INDEX_SYMBOL_KIND_TYPE; - case LOC_COMPUTED: - case LOC_CONST_BYTES: - case LOC_OPTIMIZED_OUT: - case LOC_STATIC: - return GDB_INDEX_SYMBOL_KIND_VARIABLE; - case LOC_CONST: - /* Note: It's currently impossible to recognize psyms as enum values - short of reading the type info. For now punt. */ - return GDB_INDEX_SYMBOL_KIND_VARIABLE; - default: - /* There are other LOC_FOO values that one might want to classify - as variables, but dwarf2read.c doesn't currently use them. */ - return GDB_INDEX_SYMBOL_KIND_OTHER; - } - case STRUCT_DOMAIN: - return GDB_INDEX_SYMBOL_KIND_TYPE; - default: - return GDB_INDEX_SYMBOL_KIND_OTHER; - } -} - -/* Add a list of partial symbols to SYMTAB. */ - -static void -write_psymbols (struct mapped_symtab *symtab, - std::unordered_set &psyms_seen, - struct partial_symbol **psymp, - int count, - offset_type cu_index, - int is_static) -{ - for (; count-- > 0; ++psymp) - { - struct partial_symbol *psym = *psymp; - - if (SYMBOL_LANGUAGE (psym) == language_ada) - error (_("Ada is not currently supported by the index")); - - /* Only add a given psymbol once. */ - if (psyms_seen.insert (psym).second) - { - gdb_index_symbol_kind kind = symbol_kind (psym); - - add_index_entry (symtab, SYMBOL_SEARCH_NAME (psym), - is_static, kind, cu_index); - } - } -} - -/* A helper struct used when iterating over debug_types. */ -struct signatured_type_index_data -{ - signatured_type_index_data (data_buf &types_list_, - std::unordered_set &psyms_seen_) - : types_list (types_list_), psyms_seen (psyms_seen_) - {} - - struct objfile *objfile; - struct mapped_symtab *symtab; - data_buf &types_list; - std::unordered_set &psyms_seen; - int cu_index; -}; - -/* A helper function that writes a single signatured_type to an - obstack. */ - -static int -write_one_signatured_type (void **slot, void *d) -{ - struct signatured_type_index_data *info - = (struct signatured_type_index_data *) d; - struct signatured_type *entry = (struct signatured_type *) *slot; - struct partial_symtab *psymtab = entry->per_cu.v.psymtab; - - write_psymbols (info->symtab, - info->psyms_seen, - &info->objfile->global_psymbols[psymtab->globals_offset], - psymtab->n_global_syms, info->cu_index, - 0); - write_psymbols (info->symtab, - info->psyms_seen, - &info->objfile->static_psymbols[psymtab->statics_offset], - psymtab->n_static_syms, info->cu_index, - 1); - - info->types_list.append_uint (8, BFD_ENDIAN_LITTLE, - to_underlying (entry->per_cu.sect_off)); - info->types_list.append_uint (8, BFD_ENDIAN_LITTLE, - to_underlying (entry->type_offset_in_tu)); - info->types_list.append_uint (8, BFD_ENDIAN_LITTLE, entry->signature); - - ++info->cu_index; - - return 1; -} - -/* Recurse into all "included" dependencies and count their symbols as - if they appeared in this psymtab. */ - -static void -recursively_count_psymbols (struct partial_symtab *psymtab, - size_t &psyms_seen) -{ - for (int i = 0; i < psymtab->number_of_dependencies; ++i) - if (psymtab->dependencies[i]->user != NULL) - recursively_count_psymbols (psymtab->dependencies[i], - psyms_seen); - - psyms_seen += psymtab->n_global_syms; - psyms_seen += psymtab->n_static_syms; -} - -/* Recurse into all "included" dependencies and write their symbols as - if they appeared in this psymtab. */ - -static void -recursively_write_psymbols (struct objfile *objfile, - struct partial_symtab *psymtab, - struct mapped_symtab *symtab, - std::unordered_set &psyms_seen, - offset_type cu_index) -{ - int i; - - for (i = 0; i < psymtab->number_of_dependencies; ++i) - if (psymtab->dependencies[i]->user != NULL) - recursively_write_psymbols (objfile, psymtab->dependencies[i], - symtab, psyms_seen, cu_index); - - write_psymbols (symtab, - psyms_seen, - &objfile->global_psymbols[psymtab->globals_offset], - psymtab->n_global_syms, cu_index, - 0); - write_psymbols (symtab, - psyms_seen, - &objfile->static_psymbols[psymtab->statics_offset], - psymtab->n_static_syms, cu_index, - 1); -} - -/* DWARF-5 .debug_names builder. */ -class debug_names -{ -public: - debug_names (struct dwarf2_per_objfile *dwarf2_per_objfile, bool is_dwarf64, - bfd_endian dwarf5_byte_order) - : m_dwarf5_byte_order (dwarf5_byte_order), - m_dwarf32 (dwarf5_byte_order), - m_dwarf64 (dwarf5_byte_order), - m_dwarf (is_dwarf64 - ? static_cast (m_dwarf64) - : static_cast (m_dwarf32)), - m_name_table_string_offs (m_dwarf.name_table_string_offs), - m_name_table_entry_offs (m_dwarf.name_table_entry_offs), - m_debugstrlookup (dwarf2_per_objfile) - {} - - int dwarf5_offset_size () const - { - const bool dwarf5_is_dwarf64 = &m_dwarf == &m_dwarf64; - return dwarf5_is_dwarf64 ? 8 : 4; - } - - /* Is this symbol from DW_TAG_compile_unit or DW_TAG_type_unit? */ - enum class unit_kind { cu, tu }; - - /* Insert one symbol. */ - void insert (const partial_symbol *psym, int cu_index, bool is_static, - unit_kind kind) - { - const int dwarf_tag = psymbol_tag (psym); - if (dwarf_tag == 0) - return; - const char *const name = SYMBOL_SEARCH_NAME (psym); - const auto insertpair - = m_name_to_value_set.emplace (c_str_view (name), - std::set ()); - std::set &value_set = insertpair.first->second; - value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static, kind)); - } - - /* Build all the tables. All symbols must be already inserted. - This function does not call file_write, caller has to do it - afterwards. */ - void build () - { - /* Verify the build method has not be called twice. */ - gdb_assert (m_abbrev_table.empty ()); - const size_t name_count = m_name_to_value_set.size (); - m_bucket_table.resize - (std::pow (2, std::ceil (std::log2 (name_count * 4 / 3)))); - m_hash_table.reserve (name_count); - m_name_table_string_offs.reserve (name_count); - m_name_table_entry_offs.reserve (name_count); - - /* Map each hash of symbol to its name and value. */ - struct hash_it_pair - { - uint32_t hash; - decltype (m_name_to_value_set)::const_iterator it; - }; - std::vector> bucket_hash; - bucket_hash.resize (m_bucket_table.size ()); - for (decltype (m_name_to_value_set)::const_iterator it - = m_name_to_value_set.cbegin (); - it != m_name_to_value_set.cend (); - ++it) - { - const char *const name = it->first.c_str (); - const uint32_t hash = dwarf5_djb_hash (name); - hash_it_pair hashitpair; - hashitpair.hash = hash; - hashitpair.it = it; - auto &slot = bucket_hash[hash % bucket_hash.size()]; - slot.push_front (std::move (hashitpair)); - } - for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix) - { - const std::forward_list &hashitlist - = bucket_hash[bucket_ix]; - if (hashitlist.empty ()) - continue; - uint32_t &bucket_slot = m_bucket_table[bucket_ix]; - /* The hashes array is indexed starting at 1. */ - store_unsigned_integer (reinterpret_cast (&bucket_slot), - sizeof (bucket_slot), m_dwarf5_byte_order, - m_hash_table.size () + 1); - for (const hash_it_pair &hashitpair : hashitlist) - { - m_hash_table.push_back (0); - store_unsigned_integer (reinterpret_cast - (&m_hash_table.back ()), - sizeof (m_hash_table.back ()), - m_dwarf5_byte_order, hashitpair.hash); - const c_str_view &name = hashitpair.it->first; - const std::set &value_set = hashitpair.it->second; - m_name_table_string_offs.push_back_reorder - (m_debugstrlookup.lookup (name.c_str ())); - m_name_table_entry_offs.push_back_reorder (m_entry_pool.size ()); - gdb_assert (!value_set.empty ()); - for (const symbol_value &value : value_set) - { - int &idx = m_indexkey_to_idx[index_key (value.dwarf_tag, - value.is_static, - value.kind)]; - if (idx == 0) - { - idx = m_idx_next++; - m_abbrev_table.append_unsigned_leb128 (idx); - m_abbrev_table.append_unsigned_leb128 (value.dwarf_tag); - m_abbrev_table.append_unsigned_leb128 - (value.kind == unit_kind::cu ? DW_IDX_compile_unit - : DW_IDX_type_unit); - m_abbrev_table.append_unsigned_leb128 (DW_FORM_udata); - m_abbrev_table.append_unsigned_leb128 (value.is_static - ? DW_IDX_GNU_internal - : DW_IDX_GNU_external); - m_abbrev_table.append_unsigned_leb128 (DW_FORM_flag_present); - - /* Terminate attributes list. */ - m_abbrev_table.append_unsigned_leb128 (0); - m_abbrev_table.append_unsigned_leb128 (0); - } - - m_entry_pool.append_unsigned_leb128 (idx); - m_entry_pool.append_unsigned_leb128 (value.cu_index); - } - - /* Terminate the list of CUs. */ - m_entry_pool.append_unsigned_leb128 (0); - } - } - gdb_assert (m_hash_table.size () == name_count); - - /* Terminate tags list. */ - m_abbrev_table.append_unsigned_leb128 (0); - } - - /* Return .debug_names bucket count. This must be called only after - calling the build method. */ - uint32_t bucket_count () const - { - /* Verify the build method has been already called. */ - gdb_assert (!m_abbrev_table.empty ()); - const uint32_t retval = m_bucket_table.size (); - - /* Check for overflow. */ - gdb_assert (retval == m_bucket_table.size ()); - return retval; - } - - /* Return .debug_names names count. This must be called only after - calling the build method. */ - uint32_t name_count () const - { - /* Verify the build method has been already called. */ - gdb_assert (!m_abbrev_table.empty ()); - const uint32_t retval = m_hash_table.size (); - - /* Check for overflow. */ - gdb_assert (retval == m_hash_table.size ()); - return retval; - } - - /* Return number of bytes of .debug_names abbreviation table. This - must be called only after calling the build method. */ - uint32_t abbrev_table_bytes () const - { - gdb_assert (!m_abbrev_table.empty ()); - return m_abbrev_table.size (); - } - - /* Recurse into all "included" dependencies and store their symbols - as if they appeared in this psymtab. */ - void recursively_write_psymbols - (struct objfile *objfile, - struct partial_symtab *psymtab, - std::unordered_set &psyms_seen, - int cu_index) - { - for (int i = 0; i < psymtab->number_of_dependencies; ++i) - if (psymtab->dependencies[i]->user != NULL) - recursively_write_psymbols (objfile, psymtab->dependencies[i], - psyms_seen, cu_index); - - write_psymbols (psyms_seen, - &objfile->global_psymbols[psymtab->globals_offset], - psymtab->n_global_syms, cu_index, false, unit_kind::cu); - write_psymbols (psyms_seen, - &objfile->static_psymbols[psymtab->statics_offset], - psymtab->n_static_syms, cu_index, true, unit_kind::cu); - } - - /* Return number of bytes the .debug_names section will have. This - must be called only after calling the build method. */ - size_t bytes () const - { - /* Verify the build method has been already called. */ - gdb_assert (!m_abbrev_table.empty ()); - size_t expected_bytes = 0; - expected_bytes += m_bucket_table.size () * sizeof (m_bucket_table[0]); - expected_bytes += m_hash_table.size () * sizeof (m_hash_table[0]); - expected_bytes += m_name_table_string_offs.bytes (); - expected_bytes += m_name_table_entry_offs.bytes (); - expected_bytes += m_abbrev_table.size (); - expected_bytes += m_entry_pool.size (); - return expected_bytes; - } - - /* Write .debug_names to FILE_NAMES and .debug_str addition to - FILE_STR. This must be called only after calling the build - method. */ - void file_write (FILE *file_names, FILE *file_str) const - { - /* Verify the build method has been already called. */ - gdb_assert (!m_abbrev_table.empty ()); - ::file_write (file_names, m_bucket_table); - ::file_write (file_names, m_hash_table); - m_name_table_string_offs.file_write (file_names); - m_name_table_entry_offs.file_write (file_names); - m_abbrev_table.file_write (file_names); - m_entry_pool.file_write (file_names); - m_debugstrlookup.file_write (file_str); - } - - /* A helper user data for write_one_signatured_type. */ - class write_one_signatured_type_data - { - public: - write_one_signatured_type_data (debug_names &nametable_, - signatured_type_index_data &&info_) - : nametable (nametable_), info (std::move (info_)) - {} - debug_names &nametable; - struct signatured_type_index_data info; - }; - - /* A helper function to pass write_one_signatured_type to - htab_traverse_noresize. */ - static int - write_one_signatured_type (void **slot, void *d) - { - write_one_signatured_type_data *data = (write_one_signatured_type_data *) d; - struct signatured_type_index_data *info = &data->info; - struct signatured_type *entry = (struct signatured_type *) *slot; - - data->nametable.write_one_signatured_type (entry, info); - - return 1; - } - -private: - - /* Storage for symbol names mapping them to their .debug_str section - offsets. */ - class debug_str_lookup - { - public: - - /* Object costructor to be called for current DWARF2_PER_OBJFILE. - All .debug_str section strings are automatically stored. */ - debug_str_lookup (struct dwarf2_per_objfile *dwarf2_per_objfile) - : m_abfd (dwarf2_per_objfile->objfile->obfd), - m_dwarf2_per_objfile (dwarf2_per_objfile) - { - dwarf2_read_section (dwarf2_per_objfile->objfile, - &dwarf2_per_objfile->str); - if (dwarf2_per_objfile->str.buffer == NULL) - return; - for (const gdb_byte *data = dwarf2_per_objfile->str.buffer; - data < (dwarf2_per_objfile->str.buffer - + dwarf2_per_objfile->str.size);) - { - const char *const s = reinterpret_cast (data); - const auto insertpair - = m_str_table.emplace (c_str_view (s), - data - dwarf2_per_objfile->str.buffer); - if (!insertpair.second) - complaint (&symfile_complaints, - _("Duplicate string \"%s\" in " - ".debug_str section [in module %s]"), - s, bfd_get_filename (m_abfd)); - data += strlen (s) + 1; - } - } - - /* Return offset of symbol name S in the .debug_str section. Add - such symbol to the section's end if it does not exist there - yet. */ - size_t lookup (const char *s) - { - const auto it = m_str_table.find (c_str_view (s)); - if (it != m_str_table.end ()) - return it->second; - const size_t offset = (m_dwarf2_per_objfile->str.size - + m_str_add_buf.size ()); - m_str_table.emplace (c_str_view (s), offset); - m_str_add_buf.append_cstr0 (s); - return offset; - } - - /* Append the end of the .debug_str section to FILE. */ - void file_write (FILE *file) const - { - m_str_add_buf.file_write (file); - } - - private: - std::unordered_map m_str_table; - bfd *const m_abfd; - struct dwarf2_per_objfile *m_dwarf2_per_objfile; - - /* Data to add at the end of .debug_str for new needed symbol names. */ - data_buf m_str_add_buf; - }; - - /* Container to map used DWARF tags to their .debug_names abbreviation - tags. */ - class index_key - { - public: - index_key (int dwarf_tag_, bool is_static_, unit_kind kind_) - : dwarf_tag (dwarf_tag_), is_static (is_static_), kind (kind_) - { - } - - bool - operator== (const index_key &other) const - { - return (dwarf_tag == other.dwarf_tag && is_static == other.is_static - && kind == other.kind); - } - - const int dwarf_tag; - const bool is_static; - const unit_kind kind; - }; - - /* Provide std::unordered_map::hasher for index_key. */ - class index_key_hasher - { - public: - size_t - operator () (const index_key &key) const - { - return (std::hash() (key.dwarf_tag) << 1) | key.is_static; - } - }; - - /* Parameters of one symbol entry. */ - class symbol_value - { - public: - const int dwarf_tag, cu_index; - const bool is_static; - const unit_kind kind; - - symbol_value (int dwarf_tag_, int cu_index_, bool is_static_, - unit_kind kind_) - : dwarf_tag (dwarf_tag_), cu_index (cu_index_), is_static (is_static_), - kind (kind_) - {} - - bool - operator< (const symbol_value &other) const - { -#define X(n) \ - do \ - { \ - if (n < other.n) \ - return true; \ - if (n > other.n) \ - return false; \ - } \ - while (0) - X (dwarf_tag); - X (is_static); - X (kind); - X (cu_index); -#undef X - return false; - } - }; - - /* Abstract base class to unify DWARF-32 and DWARF-64 name table - output. */ - class offset_vec - { - protected: - const bfd_endian dwarf5_byte_order; - public: - explicit offset_vec (bfd_endian dwarf5_byte_order_) - : dwarf5_byte_order (dwarf5_byte_order_) - {} - - /* Call std::vector::reserve for NELEM elements. */ - virtual void reserve (size_t nelem) = 0; - - /* Call std::vector::push_back with store_unsigned_integer byte - reordering for ELEM. */ - virtual void push_back_reorder (size_t elem) = 0; - - /* Return expected output size in bytes. */ - virtual size_t bytes () const = 0; - - /* Write name table to FILE. */ - virtual void file_write (FILE *file) const = 0; - }; - - /* Template to unify DWARF-32 and DWARF-64 output. */ - template - class offset_vec_tmpl : public offset_vec - { - public: - explicit offset_vec_tmpl (bfd_endian dwarf5_byte_order_) - : offset_vec (dwarf5_byte_order_) - {} - - /* Implement offset_vec::reserve. */ - void reserve (size_t nelem) override - { - m_vec.reserve (nelem); - } - - /* Implement offset_vec::push_back_reorder. */ - void push_back_reorder (size_t elem) override - { - m_vec.push_back (elem); - /* Check for overflow. */ - gdb_assert (m_vec.back () == elem); - store_unsigned_integer (reinterpret_cast (&m_vec.back ()), - sizeof (m_vec.back ()), dwarf5_byte_order, elem); - } - - /* Implement offset_vec::bytes. */ - size_t bytes () const override - { - return m_vec.size () * sizeof (m_vec[0]); - } - - /* Implement offset_vec::file_write. */ - void file_write (FILE *file) const override - { - ::file_write (file, m_vec); - } - - private: - std::vector m_vec; - }; - - /* Base class to unify DWARF-32 and DWARF-64 .debug_names output - respecting name table width. */ - class dwarf - { - public: - offset_vec &name_table_string_offs, &name_table_entry_offs; - - dwarf (offset_vec &name_table_string_offs_, - offset_vec &name_table_entry_offs_) - : name_table_string_offs (name_table_string_offs_), - name_table_entry_offs (name_table_entry_offs_) - { - } - }; - - /* Template to unify DWARF-32 and DWARF-64 .debug_names output - respecting name table width. */ - template - class dwarf_tmpl : public dwarf - { - public: - explicit dwarf_tmpl (bfd_endian dwarf5_byte_order_) - : dwarf (m_name_table_string_offs, m_name_table_entry_offs), - m_name_table_string_offs (dwarf5_byte_order_), - m_name_table_entry_offs (dwarf5_byte_order_) - {} - - private: - offset_vec_tmpl m_name_table_string_offs; - offset_vec_tmpl m_name_table_entry_offs; - }; - - /* Try to reconstruct original DWARF tag for given partial_symbol. - This function is not DWARF-5 compliant but it is sufficient for - GDB as a DWARF-5 index consumer. */ - static int psymbol_tag (const struct partial_symbol *psym) - { - domain_enum domain = PSYMBOL_DOMAIN (psym); - enum address_class aclass = PSYMBOL_CLASS (psym); - - switch (domain) - { - case VAR_DOMAIN: - switch (aclass) - { - case LOC_BLOCK: - return DW_TAG_subprogram; - case LOC_TYPEDEF: - return DW_TAG_typedef; - case LOC_COMPUTED: - case LOC_CONST_BYTES: - case LOC_OPTIMIZED_OUT: - case LOC_STATIC: - return DW_TAG_variable; - case LOC_CONST: - /* Note: It's currently impossible to recognize psyms as enum values - short of reading the type info. For now punt. */ - return DW_TAG_variable; - default: - /* There are other LOC_FOO values that one might want to classify - as variables, but dwarf2read.c doesn't currently use them. */ - return DW_TAG_variable; - } - case STRUCT_DOMAIN: - return DW_TAG_structure_type; - default: - return 0; - } - } - - /* Call insert for all partial symbols and mark them in PSYMS_SEEN. */ - void write_psymbols (std::unordered_set &psyms_seen, - struct partial_symbol **psymp, int count, int cu_index, - bool is_static, unit_kind kind) - { - for (; count-- > 0; ++psymp) - { - struct partial_symbol *psym = *psymp; - - if (SYMBOL_LANGUAGE (psym) == language_ada) - error (_("Ada is not currently supported by the index")); - - /* Only add a given psymbol once. */ - if (psyms_seen.insert (psym).second) - insert (psym, cu_index, is_static, kind); - } - } - - /* A helper function that writes a single signatured_type - to a debug_names. */ - void - write_one_signatured_type (struct signatured_type *entry, - struct signatured_type_index_data *info) - { - struct partial_symtab *psymtab = entry->per_cu.v.psymtab; - - write_psymbols (info->psyms_seen, - &info->objfile->global_psymbols[psymtab->globals_offset], - psymtab->n_global_syms, info->cu_index, false, - unit_kind::tu); - write_psymbols (info->psyms_seen, - &info->objfile->static_psymbols[psymtab->statics_offset], - psymtab->n_static_syms, info->cu_index, true, - unit_kind::tu); - - info->types_list.append_uint (dwarf5_offset_size (), m_dwarf5_byte_order, - to_underlying (entry->per_cu.sect_off)); - - ++info->cu_index; - } - - /* Store value of each symbol. */ - std::unordered_map, c_str_view_hasher> - m_name_to_value_set; - - /* Tables of DWARF-5 .debug_names. They are in object file byte - order. */ - std::vector m_bucket_table; - std::vector m_hash_table; - - const bfd_endian m_dwarf5_byte_order; - dwarf_tmpl m_dwarf32; - dwarf_tmpl m_dwarf64; - dwarf &m_dwarf; - offset_vec &m_name_table_string_offs, &m_name_table_entry_offs; - debug_str_lookup m_debugstrlookup; - - /* Map each used .debug_names abbreviation tag parameter to its - index value. */ - std::unordered_map m_indexkey_to_idx; - - /* Next unused .debug_names abbreviation tag for - m_indexkey_to_idx. */ - int m_idx_next = 1; - - /* .debug_names abbreviation table. */ - data_buf m_abbrev_table; - - /* .debug_names entry pool. */ - data_buf m_entry_pool; -}; - -/* Return iff any of the needed offsets does not fit into 32-bit - .debug_names section. */ - -static bool -check_dwarf64_offsets (struct dwarf2_per_objfile *dwarf2_per_objfile) -{ - for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) - { - const dwarf2_per_cu_data &per_cu = *dwarf2_per_objfile->all_comp_units[i]; - - if (to_underlying (per_cu.sect_off) >= (static_cast (1) << 32)) - return true; - } - for (int i = 0; i < dwarf2_per_objfile->n_type_units; ++i) - { - const signatured_type &sigtype = *dwarf2_per_objfile->all_type_units[i]; - const dwarf2_per_cu_data &per_cu = sigtype.per_cu; - - if (to_underlying (per_cu.sect_off) >= (static_cast (1) << 32)) - return true; - } - return false; -} - -/* The psyms_seen set is potentially going to be largish (~40k - elements when indexing a -g3 build of GDB itself). Estimate the - number of elements in order to avoid too many rehashes, which - require rebuilding buckets and thus many trips to - malloc/free. */ - -static size_t -psyms_seen_size (struct dwarf2_per_objfile *dwarf2_per_objfile) -{ - size_t psyms_count = 0; - for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) - { - struct dwarf2_per_cu_data *per_cu - = dwarf2_per_objfile->all_comp_units[i]; - struct partial_symtab *psymtab = per_cu->v.psymtab; - - if (psymtab != NULL && psymtab->user == NULL) - recursively_count_psymbols (psymtab, psyms_count); - } - /* Generating an index for gdb itself shows a ratio of - TOTAL_SEEN_SYMS/UNIQUE_SYMS or ~5. 4 seems like a good bet. */ - return psyms_count / 4; -} - -/* Write new .gdb_index section for OBJFILE into OUT_FILE. - Return how many bytes were expected to be written into OUT_FILE. */ - -static size_t -write_gdbindex (struct dwarf2_per_objfile *dwarf2_per_objfile, FILE *out_file) -{ - struct objfile *objfile = dwarf2_per_objfile->objfile; - mapped_symtab symtab; - data_buf cu_list; - - /* While we're scanning CU's create a table that maps a psymtab pointer - (which is what addrmap records) to its index (which is what is recorded - in the index file). This will later be needed to write the address - table. */ - psym_index_map cu_index_htab; - cu_index_htab.reserve (dwarf2_per_objfile->n_comp_units); - - /* The CU list is already sorted, so we don't need to do additional - work here. Also, the debug_types entries do not appear in - all_comp_units, but only in their own hash table. */ - - std::unordered_set psyms_seen - (psyms_seen_size (dwarf2_per_objfile)); - for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) - { - struct dwarf2_per_cu_data *per_cu - = dwarf2_per_objfile->all_comp_units[i]; - struct partial_symtab *psymtab = per_cu->v.psymtab; - - /* CU of a shared file from 'dwz -m' may be unused by this main file. - It may be referenced from a local scope but in such case it does not - need to be present in .gdb_index. */ - if (psymtab == NULL) - continue; - - if (psymtab->user == NULL) - recursively_write_psymbols (objfile, psymtab, &symtab, - psyms_seen, i); - - const auto insertpair = cu_index_htab.emplace (psymtab, i); - gdb_assert (insertpair.second); - - cu_list.append_uint (8, BFD_ENDIAN_LITTLE, - to_underlying (per_cu->sect_off)); - cu_list.append_uint (8, BFD_ENDIAN_LITTLE, per_cu->length); - } - - /* Dump the address map. */ - data_buf addr_vec; - write_address_map (objfile, addr_vec, cu_index_htab); - - /* Write out the .debug_type entries, if any. */ - data_buf types_cu_list; - if (dwarf2_per_objfile->signatured_types) - { - signatured_type_index_data sig_data (types_cu_list, - psyms_seen); - - sig_data.objfile = objfile; - sig_data.symtab = &symtab; - sig_data.cu_index = dwarf2_per_objfile->n_comp_units; - htab_traverse_noresize (dwarf2_per_objfile->signatured_types, - write_one_signatured_type, &sig_data); - } - - /* Now that we've processed all symbols we can shrink their cu_indices - lists. */ - uniquify_cu_indices (&symtab); - - data_buf symtab_vec, constant_pool; - write_hash_table (&symtab, symtab_vec, constant_pool); - - data_buf contents; - const offset_type size_of_contents = 6 * sizeof (offset_type); - offset_type total_len = size_of_contents; - - /* The version number. */ - contents.append_data (MAYBE_SWAP (8)); - - /* The offset of the CU list from the start of the file. */ - contents.append_data (MAYBE_SWAP (total_len)); - total_len += cu_list.size (); - - /* The offset of the types CU list from the start of the file. */ - contents.append_data (MAYBE_SWAP (total_len)); - total_len += types_cu_list.size (); - - /* The offset of the address table from the start of the file. */ - contents.append_data (MAYBE_SWAP (total_len)); - total_len += addr_vec.size (); - - /* The offset of the symbol table from the start of the file. */ - contents.append_data (MAYBE_SWAP (total_len)); - total_len += symtab_vec.size (); - - /* The offset of the constant pool from the start of the file. */ - contents.append_data (MAYBE_SWAP (total_len)); - total_len += constant_pool.size (); - - gdb_assert (contents.size () == size_of_contents); - - contents.file_write (out_file); - cu_list.file_write (out_file); - types_cu_list.file_write (out_file); - addr_vec.file_write (out_file); - symtab_vec.file_write (out_file); - constant_pool.file_write (out_file); - - return total_len; -} - -/* DWARF-5 augmentation string for GDB's DW_IDX_GNU_* extension. */ -static const gdb_byte dwarf5_gdb_augmentation[] = { 'G', 'D', 'B', 0 }; - -/* Write a new .debug_names section for OBJFILE into OUT_FILE, write - needed addition to .debug_str section to OUT_FILE_STR. Return how - many bytes were expected to be written into OUT_FILE. */ - -static size_t -write_debug_names (struct dwarf2_per_objfile *dwarf2_per_objfile, - FILE *out_file, FILE *out_file_str) -{ - const bool dwarf5_is_dwarf64 = check_dwarf64_offsets (dwarf2_per_objfile); - struct objfile *objfile = dwarf2_per_objfile->objfile; - const enum bfd_endian dwarf5_byte_order - = gdbarch_byte_order (get_objfile_arch (objfile)); - - /* The CU list is already sorted, so we don't need to do additional - work here. Also, the debug_types entries do not appear in - all_comp_units, but only in their own hash table. */ - data_buf cu_list; - debug_names nametable (dwarf2_per_objfile, dwarf5_is_dwarf64, - dwarf5_byte_order); - std::unordered_set - psyms_seen (psyms_seen_size (dwarf2_per_objfile)); - for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) - { - const dwarf2_per_cu_data *per_cu = dwarf2_per_objfile->all_comp_units[i]; - partial_symtab *psymtab = per_cu->v.psymtab; - - /* CU of a shared file from 'dwz -m' may be unused by this main - file. It may be referenced from a local scope but in such - case it does not need to be present in .debug_names. */ - if (psymtab == NULL) - continue; - - if (psymtab->user == NULL) - nametable.recursively_write_psymbols (objfile, psymtab, psyms_seen, i); - - cu_list.append_uint (nametable.dwarf5_offset_size (), dwarf5_byte_order, - to_underlying (per_cu->sect_off)); - } - - /* Write out the .debug_type entries, if any. */ - data_buf types_cu_list; - if (dwarf2_per_objfile->signatured_types) - { - debug_names::write_one_signatured_type_data sig_data (nametable, - signatured_type_index_data (types_cu_list, psyms_seen)); - - sig_data.info.objfile = objfile; - /* It is used only for gdb_index. */ - sig_data.info.symtab = nullptr; - sig_data.info.cu_index = 0; - htab_traverse_noresize (dwarf2_per_objfile->signatured_types, - debug_names::write_one_signatured_type, - &sig_data); - } - - nametable.build (); - - /* No addr_vec - DWARF-5 uses .debug_aranges generated by GCC. */ - - const offset_type bytes_of_header - = ((dwarf5_is_dwarf64 ? 12 : 4) - + 2 + 2 + 7 * 4 - + sizeof (dwarf5_gdb_augmentation)); - size_t expected_bytes = 0; - expected_bytes += bytes_of_header; - expected_bytes += cu_list.size (); - expected_bytes += types_cu_list.size (); - expected_bytes += nametable.bytes (); - data_buf header; - - if (!dwarf5_is_dwarf64) - { - const uint64_t size64 = expected_bytes - 4; - gdb_assert (size64 < 0xfffffff0); - header.append_uint (4, dwarf5_byte_order, size64); - } - else - { - header.append_uint (4, dwarf5_byte_order, 0xffffffff); - header.append_uint (8, dwarf5_byte_order, expected_bytes - 12); - } - - /* The version number. */ - header.append_uint (2, dwarf5_byte_order, 5); - - /* Padding. */ - header.append_uint (2, dwarf5_byte_order, 0); - - /* comp_unit_count - The number of CUs in the CU list. */ - header.append_uint (4, dwarf5_byte_order, dwarf2_per_objfile->n_comp_units); - - /* local_type_unit_count - The number of TUs in the local TU - list. */ - header.append_uint (4, dwarf5_byte_order, dwarf2_per_objfile->n_type_units); - - /* foreign_type_unit_count - The number of TUs in the foreign TU - list. */ - header.append_uint (4, dwarf5_byte_order, 0); - - /* bucket_count - The number of hash buckets in the hash lookup - table. */ - header.append_uint (4, dwarf5_byte_order, nametable.bucket_count ()); - - /* name_count - The number of unique names in the index. */ - header.append_uint (4, dwarf5_byte_order, nametable.name_count ()); - - /* abbrev_table_size - The size in bytes of the abbreviations - table. */ - header.append_uint (4, dwarf5_byte_order, nametable.abbrev_table_bytes ()); - - /* augmentation_string_size - The size in bytes of the augmentation - string. This value is rounded up to a multiple of 4. */ - static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, ""); - header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation)); - header.append_data (dwarf5_gdb_augmentation); - - gdb_assert (header.size () == bytes_of_header); - - header.file_write (out_file); - cu_list.file_write (out_file); - types_cu_list.file_write (out_file); - nametable.file_write (out_file, out_file_str); - - return expected_bytes; -} - -/* Assert that FILE's size is EXPECTED_SIZE. Assumes file's seek - position is at the end of the file. */ - -static void -assert_file_size (FILE *file, const char *filename, size_t expected_size) -{ - const auto file_size = ftell (file); - if (file_size == -1) - error (_("Can't get `%s' size"), filename); - gdb_assert (file_size == expected_size); -} - -/* Create an index file for OBJFILE in the directory DIR. */ - -static void -write_psymtabs_to_index (struct dwarf2_per_objfile *dwarf2_per_objfile, - const char *dir, - dw_index_kind index_kind) -{ - struct objfile *objfile = dwarf2_per_objfile->objfile; - - if (dwarf2_per_objfile->using_index) - error (_("Cannot use an index to create the index")); - - if (VEC_length (dwarf2_section_info_def, dwarf2_per_objfile->types) > 1) - error (_("Cannot make an index when the file has multiple .debug_types sections")); - - if (!objfile->psymtabs || !objfile->psymtabs_addrmap) - return; - - struct stat st; - if (stat (objfile_name (objfile), &st) < 0) - perror_with_name (objfile_name (objfile)); - - std::string filename (std::string (dir) + SLASH_STRING - + lbasename (objfile_name (objfile)) - + (index_kind == dw_index_kind::DEBUG_NAMES - ? INDEX5_SUFFIX : INDEX4_SUFFIX)); - - FILE *out_file = gdb_fopen_cloexec (filename.c_str (), "wb").release (); - if (!out_file) - error (_("Can't open `%s' for writing"), filename.c_str ()); - - /* Order matters here; we want FILE to be closed before FILENAME is - unlinked, because on MS-Windows one cannot delete a file that is - still open. (Don't call anything here that might throw until - file_closer is created.) */ - gdb::unlinker unlink_file (filename.c_str ()); - gdb_file_up close_out_file (out_file); - - if (index_kind == dw_index_kind::DEBUG_NAMES) - { - std::string filename_str (std::string (dir) + SLASH_STRING - + lbasename (objfile_name (objfile)) - + DEBUG_STR_SUFFIX); - FILE *out_file_str - = gdb_fopen_cloexec (filename_str.c_str (), "wb").release (); - if (!out_file_str) - error (_("Can't open `%s' for writing"), filename_str.c_str ()); - gdb::unlinker unlink_file_str (filename_str.c_str ()); - gdb_file_up close_out_file_str (out_file_str); - - const size_t total_len - = write_debug_names (dwarf2_per_objfile, out_file, out_file_str); - assert_file_size (out_file, filename.c_str (), total_len); - - /* We want to keep the file .debug_str file too. */ - unlink_file_str.keep (); - } - else - { - const size_t total_len - = write_gdbindex (dwarf2_per_objfile, out_file); - assert_file_size (out_file, filename.c_str (), total_len); - } - - /* We want to keep the file. */ - unlink_file.keep (); -} - -/* Implementation of the `save gdb-index' command. - - Note that the .gdb_index file format used by this command is - documented in the GDB manual. Any changes here must be documented - there. */ - -static void -save_gdb_index_command (const char *arg, int from_tty) -{ - struct objfile *objfile; - const char dwarf5space[] = "-dwarf-5 "; - dw_index_kind index_kind = dw_index_kind::GDB_INDEX; - - if (!arg) - arg = ""; - - arg = skip_spaces (arg); - if (strncmp (arg, dwarf5space, strlen (dwarf5space)) == 0) - { - index_kind = dw_index_kind::DEBUG_NAMES; - arg += strlen (dwarf5space); - arg = skip_spaces (arg); - } - - if (!*arg) - error (_("usage: save gdb-index [-dwarf-5] DIRECTORY")); - - ALL_OBJFILES (objfile) - { - struct stat st; - - /* If the objfile does not correspond to an actual file, skip it. */ - if (stat (objfile_name (objfile), &st) < 0) - continue; - - struct dwarf2_per_objfile *dwarf2_per_objfile - = get_dwarf2_per_objfile (objfile); - - if (dwarf2_per_objfile != NULL) - { - TRY - { - write_psymtabs_to_index (dwarf2_per_objfile, arg, index_kind); - } - CATCH (except, RETURN_MASK_ERROR) - { - exception_fprintf (gdb_stderr, except, - _("Error while writing index for `%s': "), - objfile_name (objfile)); - } - END_CATCH - } - - } -} - - - int dwarf_always_disassemble; static void @@ -27517,7 +25483,6 @@ show_check_physname (struct ui_file *file, int from_tty, void _initialize_dwarf2_read (void) { - struct cmd_list_element *c; dwarf2_objfile_data_key = register_objfile_data (); @@ -27606,17 +25571,6 @@ Warning: This option must be enabled before gdb reads the file."), NULL, &setlist, &showlist); - c = add_cmd ("gdb-index", class_files, save_gdb_index_command, - _("\ -Save a gdb-index file.\n\ -Usage: save gdb-index [-dwarf-5] DIRECTORY\n\ -\n\ -No options create one file with .gdb-index extension for pre-DWARF-5\n\ -compatible .gdb_index section. With -dwarf-5 creates two files with\n\ -extension .debug_names and .debug_str for DWARF-5 .debug_names section."), - &save_cmdlist); - set_cmd_completer (c, filename_completer); - dwarf2_locexpr_index = register_symbol_computed_impl (LOC_COMPUTED, &dwarf2_locexpr_funcs); dwarf2_loclist_index = register_symbol_computed_impl (LOC_COMPUTED, diff --git a/gdb/dwarf2read.h b/gdb/dwarf2read.h new file mode 100644 index 0000000000..46a10520ec --- /dev/null +++ b/gdb/dwarf2read.h @@ -0,0 +1,375 @@ +/* DWARF 2 debugging format support for GDB. + + Copyright (C) 1994-2018 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef DWARF2READ_H +#define DWARF2READ_H + +#include "filename-seen-cache.h" +#include "gdb_obstack.h" + +typedef struct dwarf2_per_cu_data *dwarf2_per_cu_ptr; +DEF_VEC_P (dwarf2_per_cu_ptr); + +/* A descriptor for dwarf sections. + + S.ASECTION, SIZE are typically initialized when the objfile is first + scanned. BUFFER, READIN are filled in later when the section is read. + If the section contained compressed data then SIZE is updated to record + the uncompressed size of the section. + + DWP file format V2 introduces a wrinkle that is easiest to handle by + creating the concept of virtual sections contained within a real section. + In DWP V2 the sections of the input DWO files are concatenated together + into one section, but section offsets are kept relative to the original + input section. + If this is a virtual dwp-v2 section, S.CONTAINING_SECTION is a backlink to + the real section this "virtual" section is contained in, and BUFFER,SIZE + describe the virtual section. */ + +struct dwarf2_section_info +{ + union + { + /* If this is a real section, the bfd section. */ + asection *section; + /* If this is a virtual section, pointer to the containing ("real") + section. */ + struct dwarf2_section_info *containing_section; + } s; + /* Pointer to section data, only valid if readin. */ + const gdb_byte *buffer; + /* The size of the section, real or virtual. */ + bfd_size_type size; + /* If this is a virtual section, the offset in the real section. + Only valid if is_virtual. */ + bfd_size_type virtual_offset; + /* True if we have tried to read this section. */ + char readin; + /* True if this is a virtual section, False otherwise. + This specifies which of s.section and s.containing_section to use. */ + char is_virtual; +}; + +typedef struct dwarf2_section_info dwarf2_section_info_def; +DEF_VEC_O (dwarf2_section_info_def); + +/* Read the contents of the section INFO. + OBJFILE is the main object file, but not necessarily the file where + the section comes from. E.g., for DWO files the bfd of INFO is the bfd + of the DWO file. + If the section is compressed, uncompress it before returning. */ + +void dwarf2_read_section (struct objfile *objfile, dwarf2_section_info *info); + +struct tu_stats +{ + int nr_uniq_abbrev_tables; + int nr_symtabs; + int nr_symtab_sharers; + int nr_stmt_less_type_units; + int nr_all_type_units_reallocs; +}; + +struct dwarf2_debug_sections; +struct mapped_index; +struct mapped_debug_names; + +/* Collection of data recorded per objfile. + This hangs off of dwarf2_objfile_data_key. */ + +struct dwarf2_per_objfile : public allocate_on_obstack +{ + /* Construct a dwarf2_per_objfile for OBJFILE. NAMES points to the + dwarf2 section names, or is NULL if the standard ELF names are + used. */ + dwarf2_per_objfile (struct objfile *objfile, + const dwarf2_debug_sections *names); + + ~dwarf2_per_objfile (); + + DISABLE_COPY_AND_ASSIGN (dwarf2_per_objfile); + + /* Free all cached compilation units. */ + void free_cached_comp_units (); +private: + /* This function is mapped across the sections and remembers the + offset and size of each of the debugging sections we are + interested in. */ + void locate_sections (bfd *abfd, asection *sectp, + const dwarf2_debug_sections &names); + +public: + dwarf2_section_info info {}; + dwarf2_section_info abbrev {}; + dwarf2_section_info line {}; + dwarf2_section_info loc {}; + dwarf2_section_info loclists {}; + dwarf2_section_info macinfo {}; + dwarf2_section_info macro {}; + dwarf2_section_info str {}; + dwarf2_section_info line_str {}; + dwarf2_section_info ranges {}; + dwarf2_section_info rnglists {}; + dwarf2_section_info addr {}; + dwarf2_section_info frame {}; + dwarf2_section_info eh_frame {}; + dwarf2_section_info gdb_index {}; + dwarf2_section_info debug_names {}; + dwarf2_section_info debug_aranges {}; + + VEC (dwarf2_section_info_def) *types = NULL; + + /* Back link. */ + struct objfile *objfile = NULL; + + /* Table of all the compilation units. This is used to locate + the target compilation unit of a particular reference. */ + struct dwarf2_per_cu_data **all_comp_units = NULL; + + /* The number of compilation units in ALL_COMP_UNITS. */ + int n_comp_units = 0; + + /* The number of .debug_types-related CUs. */ + int n_type_units = 0; + + /* The number of elements allocated in all_type_units. + If there are skeleton-less TUs, we add them to all_type_units lazily. */ + int n_allocated_type_units = 0; + + /* The .debug_types-related CUs (TUs). + This is stored in malloc space because we may realloc it. */ + struct signatured_type **all_type_units = NULL; + + /* Table of struct type_unit_group objects. + The hash key is the DW_AT_stmt_list value. */ + htab_t type_unit_groups {}; + + /* A table mapping .debug_types signatures to its signatured_type entry. + This is NULL if the .debug_types section hasn't been read in yet. */ + htab_t signatured_types {}; + + /* Type unit statistics, to see how well the scaling improvements + are doing. */ + struct tu_stats tu_stats {}; + + /* A chain of compilation units that are currently read in, so that + they can be freed later. */ + dwarf2_per_cu_data *read_in_chain = NULL; + + /* A table mapping DW_AT_dwo_name values to struct dwo_file objects. + This is NULL if the table hasn't been allocated yet. */ + htab_t dwo_files {}; + + /* True if we've checked for whether there is a DWP file. */ + bool dwp_checked = false; + + /* The DWP file if there is one, or NULL. */ + struct dwp_file *dwp_file = NULL; + + /* The shared '.dwz' file, if one exists. This is used when the + original data was compressed using 'dwz -m'. */ + struct dwz_file *dwz_file = NULL; + + /* A flag indicating whether this objfile has a section loaded at a + VMA of 0. */ + bool has_section_at_zero = false; + + /* True if we are using the mapped index, + or we are faking it for OBJF_READNOW's sake. */ + bool using_index = false; + + /* The mapped index, or NULL if .gdb_index is missing or not being used. */ + mapped_index *index_table = NULL; + + /* The mapped index, or NULL if .debug_names is missing or not being used. */ + std::unique_ptr debug_names_table; + + /* When using index_table, this keeps track of all quick_file_names entries. + TUs typically share line table entries with a CU, so we maintain a + separate table of all line table entries to support the sharing. + Note that while there can be way more TUs than CUs, we've already + sorted all the TUs into "type unit groups", grouped by their + DW_AT_stmt_list value. Therefore the only sharing done here is with a + CU and its associated TU group if there is one. */ + htab_t quick_file_names_table {}; + + /* Set during partial symbol reading, to prevent queueing of full + symbols. */ + bool reading_partial_symbols = false; + + /* Table mapping type DIEs to their struct type *. + This is NULL if not allocated yet. + The mapping is done via (CU/TU + DIE offset) -> type. */ + htab_t die_type_hash {}; + + /* The CUs we recently read. */ + VEC (dwarf2_per_cu_ptr) *just_read_cus = NULL; + + /* Table containing line_header indexed by offset and offset_in_dwz. */ + htab_t line_header_hash {}; + + /* Table containing all filenames. This is an optional because the + table is lazily constructed on first access. */ + gdb::optional filenames_cache; +}; + +/* Get the dwarf2_per_objfile associated to OBJFILE. */ + +dwarf2_per_objfile *get_dwarf2_per_objfile (struct objfile *objfile); + +/* Persistent data held for a compilation unit, even when not + processing it. We put a pointer to this structure in the + read_symtab_private field of the psymtab. */ + +struct dwarf2_per_cu_data +{ + /* The start offset and length of this compilation unit. + NOTE: Unlike comp_unit_head.length, this length includes + initial_length_size. + If the DIE refers to a DWO file, this is always of the original die, + not the DWO file. */ + sect_offset sect_off; + unsigned int length; + + /* DWARF standard version this data has been read from (such as 4 or 5). */ + short dwarf_version; + + /* Flag indicating this compilation unit will be read in before + any of the current compilation units are processed. */ + unsigned int queued : 1; + + /* This flag will be set when reading partial DIEs if we need to load + absolutely all DIEs for this compilation unit, instead of just the ones + we think are interesting. It gets set if we look for a DIE in the + hash table and don't find it. */ + unsigned int load_all_dies : 1; + + /* Non-zero if this CU is from .debug_types. + Struct dwarf2_per_cu_data is contained in struct signatured_type iff + this is non-zero. */ + unsigned int is_debug_types : 1; + + /* Non-zero if this CU is from the .dwz file. */ + unsigned int is_dwz : 1; + + /* Non-zero if reading a TU directly from a DWO file, bypassing the stub. + This flag is only valid if is_debug_types is true. + We can't read a CU directly from a DWO file: There are required + attributes in the stub. */ + unsigned int reading_dwo_directly : 1; + + /* Non-zero if the TU has been read. + This is used to assist the "Stay in DWO Optimization" for Fission: + When reading a DWO, it's faster to read TUs from the DWO instead of + fetching them from random other DWOs (due to comdat folding). + If the TU has already been read, the optimization is unnecessary + (and unwise - we don't want to change where gdb thinks the TU lives + "midflight"). + This flag is only valid if is_debug_types is true. */ + unsigned int tu_read : 1; + + /* The section this CU/TU lives in. + If the DIE refers to a DWO file, this is always the original die, + not the DWO file. */ + struct dwarf2_section_info *section; + + /* Set to non-NULL iff this CU is currently loaded. When it gets freed out + of the CU cache it gets reset to NULL again. This is left as NULL for + dummy CUs (a CU header, but nothing else). */ + struct dwarf2_cu *cu; + + /* The corresponding dwarf2_per_objfile. */ + struct dwarf2_per_objfile *dwarf2_per_objfile; + + /* When dwarf2_per_objfile->using_index is true, the 'quick' field + is active. Otherwise, the 'psymtab' field is active. */ + union + { + /* The partial symbol table associated with this compilation unit, + or NULL for unread partial units. */ + struct partial_symtab *psymtab; + + /* Data needed by the "quick" functions. */ + struct dwarf2_per_cu_quick_data *quick; + } v; + + /* The CUs we import using DW_TAG_imported_unit. This is filled in + while reading psymtabs, used to compute the psymtab dependencies, + and then cleared. Then it is filled in again while reading full + symbols, and only deleted when the objfile is destroyed. + + This is also used to work around a difference between the way gold + generates .gdb_index version <=7 and the way gdb does. Arguably this + is a gold bug. For symbols coming from TUs, gold records in the index + the CU that includes the TU instead of the TU itself. This breaks + dw2_lookup_symbol: It assumes that if the index says symbol X lives + in CU/TU Y, then one need only expand Y and a subsequent lookup in Y + will find X. Alas TUs live in their own symtab, so after expanding CU Y + we need to look in TU Z to find X. Fortunately, this is akin to + DW_TAG_imported_unit, so we just use the same mechanism: For + .gdb_index version <=7 this also records the TUs that the CU referred + to. Concurrently with this change gdb was modified to emit version 8 + indices so we only pay a price for gold generated indices. + http://sourceware.org/bugzilla/show_bug.cgi?id=15021. */ + VEC (dwarf2_per_cu_ptr) *imported_symtabs; +}; + +/* Entry in the signatured_types hash table. */ + +struct signatured_type +{ + /* The "per_cu" object of this type. + This struct is used iff per_cu.is_debug_types. + N.B.: This is the first member so that it's easy to convert pointers + between them. */ + struct dwarf2_per_cu_data per_cu; + + /* The type's signature. */ + ULONGEST signature; + + /* Offset in the TU of the type's DIE, as read from the TU header. + If this TU is a DWO stub and the definition lives in a DWO file + (specified by DW_AT_GNU_dwo_name), this value is unusable. */ + cu_offset type_offset_in_tu; + + /* Offset in the section of the type's DIE. + If the definition lives in a DWO file, this is the offset in the + .debug_types.dwo section. + The value is zero until the actual value is known. + Zero is otherwise not a valid section offset. */ + sect_offset type_offset_in_section; + + /* Type units are grouped by their DW_AT_stmt_list entry so that they + can share them. This points to the containing symtab. */ + struct type_unit_group *type_unit_group; + + /* The type. + The first time we encounter this type we fully read it in and install it + in the symbol tables. Subsequent times we only need the type. */ + struct type *type; + + /* Containing DWO unit. + This field is valid iff per_cu.reading_dwo_directly. */ + struct dwo_unit *dwo_unit; +}; + +typedef struct signatured_type *sig_type_ptr; +DEF_VEC_P (sig_type_ptr); + +#endif /* DWARF2READ_H */