Fix performance regression due to ld -r memmove

The idea here is that instead of using memmove to shuffle the relocs
array every time one is deleted, to add a "wrel" pointer and copy from
rel[0] to wrel[0] as we go.

	* elf64-ppc.c (ppc64_elf_relocate_section): Use read and write
	pointers to reloc array, rather than memmove when deleting a
	reloc.  Don't use RELOC_AGAINST_DISCARDED_SECTION.  Adjust
	reloc counts at end of loop.
	* elf32-ppc.c (ppc_elf_relocate_section): Likewise.
This commit is contained in:
Alan Modra 2015-11-09 15:03:29 +10:30
parent 1057567ea7
commit c316a17c40
3 changed files with 157 additions and 69 deletions

View File

@ -1,3 +1,11 @@
2015-11-10 Alan Modra <amodra@gmail.com>
* elf64-ppc.c (ppc64_elf_relocate_section): Use read and write
pointers to reloc array, rather than memmove when deleting a
reloc. Don't use RELOC_AGAINST_DISCARDED_SECTION. Adjust
reloc counts at end of loop.
* elf32-ppc.c (ppc_elf_relocate_section): Likewise.
2015-11-09 Dominik Vogt <vogt@linux.vnet.ibm.com>
* elf64-ppc.c (ppc64_elf_size_stubs, ppc64_elf_build_stubs): Fix left

View File

@ -7650,6 +7650,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
struct elf_link_hash_entry **sym_hashes;
struct ppc_elf_link_hash_table *htab;
Elf_Internal_Rela *rel;
Elf_Internal_Rela *wrel;
Elf_Internal_Rela *relend;
Elf_Internal_Rela outrel;
asection *got2;
@ -7685,9 +7686,9 @@ ppc_elf_relocate_section (bfd *output_bfd,
".tls_vars"));
if (input_section->sec_info_type == SEC_INFO_TYPE_TARGET)
relax_info = elf_section_data (input_section)->sec_info;
rel = relocs;
rel = wrel = relocs;
relend = relocs + input_section->reloc_count;
for (; rel < relend; rel++)
for (; rel < relend; wrel++, rel++)
{
enum elf_ppc_reloc_type r_type;
bfd_vma addend;
@ -7706,6 +7707,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
struct plt_entry **ifunc;
struct reloc_howto_struct alt_howto;
again:
r_type = ELF32_R_TYPE (rel->r_info);
sym = NULL;
sec = NULL;
@ -7742,8 +7744,22 @@ ppc_elf_relocate_section (bfd *output_bfd,
howto = NULL;
if (r_type < R_PPC_max)
howto = ppc_elf_howto_table[r_type];
RELOC_AGAINST_DISCARDED_SECTION (info, input_bfd, input_section,
rel, 1, relend, howto, 0, contents);
_bfd_clear_contents (howto, input_bfd, input_section,
contents + rel->r_offset);
wrel->r_offset = rel->r_offset;
wrel->r_info = 0;
wrel->r_addend = 0;
/* For ld -r, remove relocations in debug sections against
sections defined in discarded sections. Not done for
non-debug to preserve relocs in .eh_frame which the
eh_frame editing code expects to be present. */
if (bfd_link_relocatable (info)
&& (input_section->flags & SEC_DEBUGGING))
wrel--;
continue;
}
if (bfd_link_relocatable (info))
@ -7759,7 +7775,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
if (r_type != R_PPC_RELAX_PLT
&& r_type != R_PPC_RELAX_PLTREL24
&& r_type != R_PPC_RELAX)
continue;
goto copy_reloc;
}
/* TLS optimizations. Replace instruction sequences and relocs
@ -7802,10 +7818,12 @@ ppc_elf_relocate_section (bfd *output_bfd,
{
bfd_vma insn;
insn = bfd_get_32 (output_bfd, contents + rel->r_offset - d_offset);
insn = bfd_get_32 (output_bfd,
contents + rel->r_offset - d_offset);
insn &= 31 << 21;
insn |= 0x3c020000; /* addis 0,2,0 */
bfd_put_32 (output_bfd, insn, contents + rel->r_offset - d_offset);
bfd_put_32 (output_bfd, insn,
contents + rel->r_offset - d_offset);
r_type = R_PPC_TPREL16_HA;
rel->r_info = ELF32_R_INFO (r_symndx, r_type);
}
@ -7941,8 +7959,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
{
/* We changed the symbol on an LD reloc. Start over
in order to get h, sym, sec etc. right. */
rel--;
continue;
goto again;
}
}
break;
@ -8000,8 +8017,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
/* Zap the reloc on the _tls_get_addr call too. */
BFD_ASSERT (rel->r_offset - d_offset == rel[1].r_offset);
rel[1].r_info = ELF32_R_INFO (STN_UNDEF, R_PPC_NONE);
rel--;
continue;
goto again;
}
break;
}
@ -8080,9 +8096,9 @@ ppc_elf_relocate_section (bfd *output_bfd,
got_addr = (htab->got->output_section->vma
+ htab->got->output_offset
+ (h->got.offset & ~1));
rel->r_info = ELF32_R_INFO (0, R_PPC_ADDR16_HA);
rel->r_addend = got_addr;
rel->r_offset = (p - contents) + d_offset;
wrel->r_offset = (p - contents) + d_offset;
wrel->r_info = ELF32_R_INFO (0, R_PPC_ADDR16_HA);
wrel->r_addend = got_addr;
insn &= ~0xffff;
insn |= ((unsigned int )(got_addr + 0x8000) >> 16) & 0xffff;
bfd_put_32 (output_bfd, insn, p);
@ -8100,9 +8116,10 @@ ppc_elf_relocate_section (bfd *output_bfd,
/* Use one of the spare relocs, so --emit-relocs
output is reasonable. */
memmove (rel + 1, rel, (relend - rel - 1) * sizeof (*rel));
rel++;
wrel++, rel++;
rel->r_offset = wrel[-1].r_offset + 4;
rel->r_info = ELF32_R_INFO (0, R_PPC_ADDR16_LO);
rel->r_offset += 4;
rel->r_addend = wrel[-1].r_addend;
/* Continue on as if we had a got reloc, to output
dynamic reloc. */
@ -8236,7 +8253,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
bfd_set_error (bfd_error_bad_value);
ret = FALSE;
continue;
goto copy_reloc;
case R_PPC_NONE:
case R_PPC_TLS:
@ -8245,7 +8262,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
case R_PPC_EMB_MRKREF:
case R_PPC_GNU_VTINHERIT:
case R_PPC_GNU_VTENTRY:
continue;
goto copy_reloc;
/* GOT16 relocations. Like an ADDR16 using the symbol's
address in the GOT as relocation value instead of the
@ -8496,7 +8513,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
/* If here for a picfixup, we're done. */
if (r_type != ELF32_R_TYPE (rel->r_info))
continue;
goto copy_reloc;
relocation = (htab->got->output_section->vma
+ htab->got->output_offset
@ -8529,7 +8546,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
rel->r_offset,
TRUE))
return FALSE;
continue;
goto copy_reloc;
}
break;
@ -8768,7 +8785,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
bfd_elf32_swap_reloca_out (output_bfd, &outrel, loc);
if (skip == -1)
continue;
goto copy_reloc;
/* This reloc will be computed at runtime. We clear the memory
so that it contains predictable value. */
@ -8861,12 +8878,13 @@ ppc_elf_relocate_section (bfd *output_bfd,
relocs to describe this relocation. */
BFD_ASSERT (ELF32_R_TYPE (relend[-1].r_info) == R_PPC_NONE);
/* The relocs are at the bottom 2 bytes */
rel[0].r_offset += d_offset;
memmove (rel + 1, rel, (relend - rel - 1) * sizeof (*rel));
rel[0].r_info = ELF32_R_INFO (r_symndx, R_PPC_ADDR16_HA);
rel[1].r_offset += 4;
rel[1].r_info = ELF32_R_INFO (r_symndx, R_PPC_ADDR16_LO);
rel++;
wrel->r_offset = rel->r_offset + d_offset;
wrel->r_info = ELF32_R_INFO (r_symndx, R_PPC_ADDR16_HA);
wrel->r_addend = rel->r_addend;
memmove (wrel + 1, wrel, (relend - wrel - 1) * sizeof (*wrel));
wrel++, rel++;
wrel->r_offset += 4;
wrel->r_info = ELF32_R_INFO (r_symndx, R_PPC_ADDR16_LO);
}
continue;
@ -9014,37 +9032,37 @@ ppc_elf_relocate_section (bfd *output_bfd,
relocation = relocation + addend;
ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset,
relocation, split16a_type);
continue;
goto copy_reloc;
case R_PPC_VLE_LO16D:
relocation = relocation + addend;
ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset,
relocation, split16d_type);
continue;
goto copy_reloc;
case R_PPC_VLE_HI16A:
relocation = (relocation + addend) >> 16;
ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset,
relocation, split16a_type);
continue;
goto copy_reloc;
case R_PPC_VLE_HI16D:
relocation = (relocation + addend) >> 16;
ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset,
relocation, split16d_type);
continue;
goto copy_reloc;
case R_PPC_VLE_HA16A:
relocation = (relocation + addend + 0x8000) >> 16;
ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset,
relocation, split16a_type);
continue;
goto copy_reloc;
case R_PPC_VLE_HA16D:
relocation = (relocation + addend + 0x8000) >> 16;
ppc_elf_vle_split16 (output_bfd, contents + rel->r_offset,
relocation, split16d_type);
continue;
goto copy_reloc;
/* Relocate against either _SDA_BASE_, _SDA2_BASE_, or 0. */
case R_PPC_EMB_SDA21:
@ -9093,7 +9111,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
bfd_set_error (bfd_error_bad_value);
ret = FALSE;
continue;
goto copy_reloc;
}
if (sda != NULL)
@ -9131,7 +9149,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
if (r_type == R_PPC_VLE_SDA21
&& ((relocation + 0x80000) & 0xffffffff) > 0x100000)
goto overflow;
continue;
goto copy_reloc;
}
else if (r_type == R_PPC_EMB_SDA21
|| r_type == R_PPC_VLE_SDA21
@ -9187,7 +9205,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
bfd_set_error (bfd_error_bad_value);
ret = FALSE;
continue;
goto copy_reloc;
}
if (sda != NULL)
@ -9234,7 +9252,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
value, split16d_type);
}
}
continue;
goto copy_reloc;
/* Relocate against the beginning of the section. */
case R_PPC_SECTOFF:
@ -9282,7 +9300,7 @@ ppc_elf_relocate_section (bfd *output_bfd,
bfd_set_error (bfd_error_invalid_operation);
ret = FALSE;
continue;
goto copy_reloc;
}
/* Do any further special processing. */
@ -9342,7 +9360,8 @@ ppc_elf_relocate_section (bfd *output_bfd,
that make up part of the insn opcode. */
unsigned int insn, mask, lobit;
insn = bfd_get_32 (output_bfd, contents + rel->r_offset - d_offset);
insn = bfd_get_32 (output_bfd,
contents + rel->r_offset - d_offset);
mask = 0;
if (is_insn_ds_form (insn))
mask = 3;
@ -9452,6 +9471,31 @@ ppc_elf_relocate_section (bfd *output_bfd,
ret = FALSE;
}
}
copy_reloc:
if (wrel != rel)
*wrel = *rel;
}
if (wrel != rel)
{
Elf_Internal_Shdr *rel_hdr;
size_t deleted = rel - wrel;
rel_hdr = _bfd_elf_single_rel_hdr (input_section->output_section);
rel_hdr->sh_size -= rel_hdr->sh_entsize * deleted;
if (rel_hdr->sh_size == 0)
{
/* It is too late to remove an empty reloc section. Leave
one NONE reloc.
??? What is wrong with an empty section??? */
rel_hdr->sh_size = rel_hdr->sh_entsize;
deleted -= 1;
wrel++;
}
relend = wrel;
rel_hdr = _bfd_elf_single_rel_hdr (input_section);
rel_hdr->sh_size -= rel_hdr->sh_entsize * deleted;
input_section->reloc_count -= deleted;
}
#ifdef DEBUG

View File

@ -13162,6 +13162,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
Elf_Internal_Shdr *symtab_hdr;
struct elf_link_hash_entry **sym_hashes;
Elf_Internal_Rela *rel;
Elf_Internal_Rela *wrel;
Elf_Internal_Rela *relend;
Elf_Internal_Rela outrel;
bfd_byte *loc;
@ -13193,9 +13194,9 @@ ppc64_elf_relocate_section (bfd *output_bfd,
sym_hashes = elf_sym_hashes (input_bfd);
is_opd = ppc64_elf_section_data (input_section)->sec_type == sec_opd;
rel = relocs;
rel = wrel = relocs;
relend = relocs + input_section->reloc_count;
for (; rel < relend; rel++)
for (; rel < relend; wrel++, rel++)
{
enum elf_ppc64_reloc_type r_type;
bfd_vma addend;
@ -13219,10 +13220,13 @@ ppc64_elf_relocate_section (bfd *output_bfd,
struct ppc_stub_hash_entry *stub_entry;
bfd_vma max_br_offset;
bfd_vma from;
const Elf_Internal_Rela orig_rel = *rel;
Elf_Internal_Rela orig_rel;
reloc_howto_type *howto;
struct reloc_howto_struct alt_howto;
again:
orig_rel = *rel;
r_type = ELF64_R_TYPE (rel->r_info);
r_symndx = ELF64_R_SYM (rel->r_info);
@ -13230,10 +13234,10 @@ ppc64_elf_relocate_section (bfd *output_bfd,
symbol of the previous ADDR64 reloc. The symbol gives us the
proper TOC base to use. */
if (rel->r_info == ELF64_R_INFO (0, R_PPC64_TOC)
&& rel != relocs
&& ELF64_R_TYPE (rel[-1].r_info) == R_PPC64_ADDR64
&& wrel != relocs
&& ELF64_R_TYPE (wrel[-1].r_info) == R_PPC64_ADDR64
&& is_opd)
r_symndx = ELF64_R_SYM (rel[-1].r_info);
r_symndx = ELF64_R_SYM (wrel[-1].r_info);
sym = NULL;
sec = NULL;
@ -13314,13 +13318,27 @@ ppc64_elf_relocate_section (bfd *output_bfd,
h = (struct ppc_link_hash_entry *) h_elf;
if (sec != NULL && discarded_section (sec))
RELOC_AGAINST_DISCARDED_SECTION (info, input_bfd, input_section,
rel, 1, relend,
ppc64_elf_howto_table[r_type], 0,
contents);
{
_bfd_clear_contents (ppc64_elf_howto_table[r_type],
input_bfd, input_section,
contents + rel->r_offset);
wrel->r_offset = rel->r_offset;
wrel->r_info = 0;
wrel->r_addend = 0;
/* For ld -r, remove relocations in debug sections against
sections defined in discarded sections. Not done for
non-debug to preserve relocs in .eh_frame which the
eh_frame editing code expects to be present. */
if (bfd_link_relocatable (info)
&& (input_section->flags & SEC_DEBUGGING))
wrel--;
continue;
}
if (bfd_link_relocatable (info))
continue;
goto copy_reloc;
if (h != NULL && &h->elf == htab->elf.hgot)
{
@ -13480,10 +13498,12 @@ ppc64_elf_relocate_section (bfd *output_bfd,
&& (tls_mask & TLS_TPREL) == 0)
{
toctprel:
insn = bfd_get_32 (output_bfd, contents + rel->r_offset - d_offset);
insn = bfd_get_32 (output_bfd,
contents + rel->r_offset - d_offset);
insn &= 31 << 21;
insn |= 0x3c0d0000; /* addis 0,13,0 */
bfd_put_32 (output_bfd, insn, contents + rel->r_offset - d_offset);
bfd_put_32 (output_bfd, insn,
contents + rel->r_offset - d_offset);
r_type = R_PPC64_TPREL16_HA;
if (toc_symndx != 0)
{
@ -13491,8 +13511,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
rel->r_addend = toc_addend;
/* We changed the symbol. Start over in order to
get h, sym, sec etc. right. */
rel--;
continue;
goto again;
}
else
rel->r_info = ELF64_R_INFO (r_symndx, r_type);
@ -13518,8 +13537,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
rel->r_addend = toc_addend;
/* We changed the symbol. Start over in order to
get h, sym, sec etc. right. */
rel--;
continue;
goto again;
}
else
rel->r_info = ELF64_R_INFO (r_symndx, r_type);
@ -13658,8 +13676,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
{
/* We changed the symbol. Start over in order
to get h, sym, sec etc. right. */
rel--;
continue;
goto again;
}
}
break;
@ -13703,10 +13720,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
}
bfd_put_32 (output_bfd, insn2, contents + offset);
if ((tls_mask & TLS_TPRELGD) == 0 && toc_symndx != 0)
{
rel--;
continue;
}
goto again;
}
break;
@ -13748,8 +13762,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
insn2 = NOP;
}
bfd_put_32 (output_bfd, insn2, contents + offset);
rel--;
continue;
goto again;
}
break;
@ -14091,7 +14104,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
&& addend == 0)
{
bfd_put_32 (output_bfd, NOP, contents + rel->r_offset);
continue;
goto copy_reloc;
}
break;
}
@ -14107,7 +14120,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
bfd_set_error (bfd_error_bad_value);
ret = FALSE;
continue;
goto copy_reloc;
case R_PPC64_NONE:
case R_PPC64_TLS:
@ -14116,7 +14129,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
case R_PPC64_TOCSAVE:
case R_PPC64_GNU_VTINHERIT:
case R_PPC64_GNU_VTENTRY:
continue;
goto copy_reloc;
/* GOT16 relocations. Like an ADDR16 using the symbol's
address in the GOT as relocation value instead of the
@ -14752,7 +14765,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
bfd_set_error (bfd_error_invalid_operation);
ret = FALSE;
continue;
goto copy_reloc;
}
/* Multi-instruction sequences that access the TOC can be
@ -14901,7 +14914,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
mask + 1);
bfd_set_error (bfd_error_bad_value);
ret = FALSE;
continue;
goto copy_reloc;
}
break;
}
@ -15000,6 +15013,29 @@ ppc64_elf_relocate_section (bfd *output_bfd,
if (more_info != NULL)
free (more_info);
}
copy_reloc:
if (wrel != rel)
*wrel = *rel;
}
if (wrel != rel)
{
Elf_Internal_Shdr *rel_hdr;
size_t deleted = rel - wrel;
rel_hdr = _bfd_elf_single_rel_hdr (input_section->output_section);
rel_hdr->sh_size -= rel_hdr->sh_entsize * deleted;
if (rel_hdr->sh_size == 0)
{
/* It is too late to remove an empty reloc section. Leave
one NONE reloc.
??? What is wrong with an empty section??? */
rel_hdr->sh_size = rel_hdr->sh_entsize;
deleted -= 1;
}
rel_hdr = _bfd_elf_single_rel_hdr (input_section);
rel_hdr->sh_size -= rel_hdr->sh_entsize * deleted;
input_section->reloc_count -= deleted;
}
/* If we're emitting relocations, then shortly after this function