PR ld/13235

bfd/
	* elf64-ppc.c (struct ppc64_elf_obj_tdata): Add ha_relocs_not_using_r2.
	(ppc64_elf_edit_toc): Check HA relocs.
	(ha_reloc_match): Delete function.
	(ppc64_elf_relocate_section): Remove delayed HA nop optimization.
	Instead do it and low part optimization based on
	ha_relocs_not_using_r2.
ld/testsuite/
	* ld-powerpc/tocopt.d: Update.
	* ld-powerpc/tocopt5.d, * ld-powerpc/tocopt5.s: New test.
	* ld-powerpc/powerpc.exp: Run new test.
This commit is contained in:
Alan Modra 2011-09-30 05:11:04 +00:00
parent 723b724b17
commit 9852805258
7 changed files with 126 additions and 112 deletions

View File

@ -1,3 +1,13 @@
2011-09-30 Alan Modra <amodra@gmail.com>
PR ld/13235
* elf64-ppc.c (struct ppc64_elf_obj_tdata): Add ha_relocs_not_using_r2.
(ppc64_elf_edit_toc): Check HA relocs.
(ha_reloc_match): Delete function.
(ppc64_elf_relocate_section): Remove delayed HA nop optimization.
Instead do it and low part optimization based on
ha_relocs_not_using_r2.
2011-09-29 Alan Modra <amodra@gmail.com>
PR ld/13233

View File

@ -2598,7 +2598,10 @@ struct ppc64_elf_obj_tdata
/* Nonzero if this bfd has small toc/got relocs, ie. that expect
the reloc to be in the range -32768 to 32767. */
unsigned int has_small_toc_reloc;
unsigned int has_small_toc_reloc : 1;
/* Set if toc/got ha relocs detected not using r2. */
unsigned int ha_relocs_not_using_r2 : 1;
};
#define ppc64_elf_tdata(bfd) \
@ -8171,6 +8174,35 @@ ppc64_elf_edit_toc (struct bfd_link_info *info)
bfd_vma val;
r_type = ELF64_R_TYPE (rel->r_info);
switch (r_type)
{
default:
break;
case R_PPC64_GOT_TLSLD16_HA:
case R_PPC64_GOT_TLSGD16_HA:
case R_PPC64_GOT_TPREL16_HA:
case R_PPC64_GOT_DTPREL16_HA:
case R_PPC64_GOT16_HA:
case R_PPC64_TOC16_HA:
{
bfd_vma off = rel->r_offset & ~3;
unsigned char buf[4];
unsigned int insn;
if (!bfd_get_section_contents (ibfd, sec, buf, off, 4))
{
free (used);
goto error_ret;
}
insn = bfd_get_32 (ibfd, buf);
if ((insn & ((0x3f << 26) | 0x1f << 16))
!= ((15u << 26) | (2 << 16)) /* addis rt,2,imm */)
ppc64_elf_tdata (ibfd)->ha_relocs_not_using_r2 = 1;
}
break;
}
switch (r_type)
{
case R_PPC64_TOC16:
@ -8220,7 +8252,10 @@ ppc64_elf_edit_toc (struct bfd_link_info *info)
case R_PPC64_TOC16_LO_DS:
off = rel->r_offset + (bfd_big_endian (ibfd) ? -2 : 3);
if (!bfd_get_section_contents (ibfd, sec, &opc, off, 1))
return FALSE;
{
free (used);
goto error_ret;
}
if ((opc & (0x3f << 2)) == (58u << 2))
break;
/* Fall thru */
@ -11673,63 +11708,6 @@ ppc64_elf_action_discarded (asection *sec)
return _bfd_elf_default_action_discarded (sec);
}
/* REL points to a low-part reloc on a largetoc instruction sequence.
Find the matching high-part reloc instruction and verify that it
is addis REG,x,imm. If so, set *REG to x and return a pointer to
the high-part reloc. */
static const Elf_Internal_Rela *
ha_reloc_match (const Elf_Internal_Rela *relocs,
const Elf_Internal_Rela *rel,
unsigned int *reg,
bfd_boolean match_addend,
const bfd *input_bfd,
const bfd_byte *contents)
{
enum elf_ppc64_reloc_type r_type, r_type_ha;
bfd_vma r_info_ha, r_addend;
r_type = ELF64_R_TYPE (rel->r_info);
switch (r_type)
{
case R_PPC64_GOT_TLSLD16_LO:
case R_PPC64_GOT_TLSGD16_LO:
case R_PPC64_GOT_TPREL16_LO_DS:
case R_PPC64_GOT_DTPREL16_LO_DS:
case R_PPC64_GOT16_LO:
case R_PPC64_TOC16_LO:
r_type_ha = r_type + 2;
break;
case R_PPC64_GOT16_LO_DS:
r_type_ha = R_PPC64_GOT16_HA;
break;
case R_PPC64_TOC16_LO_DS:
r_type_ha = R_PPC64_TOC16_HA;
break;
default:
abort ();
}
r_info_ha = ELF64_R_INFO (ELF64_R_SYM (rel->r_info), r_type_ha);
r_addend = rel->r_addend;
while (--rel >= relocs)
if (rel->r_info == r_info_ha
&& (!match_addend
|| rel->r_addend == r_addend))
{
const bfd_byte *p = contents + (rel->r_offset & ~3);
unsigned int insn = bfd_get_32 (input_bfd, p);
if ((insn & (0x3f << 26)) == (15u << 26) /* addis rt,x,imm */
&& (insn & (0x1f << 21)) == (*reg << 21))
{
*reg = (insn >> 16) & 0x1f;
return rel;
}
break;
}
return NULL;
}
/* The RELOCATE_SECTION function is called by the ELF backend linker
to handle the relocations for a section.
@ -11777,9 +11755,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
Elf_Internal_Rela outrel;
bfd_byte *loc;
struct got_entry **local_got_ents;
unsigned char *ha_opt;
bfd_vma TOCstart;
bfd_boolean no_ha_opt;
bfd_boolean ret = TRUE;
bfd_boolean is_opd;
/* Disabled until we sort out how ld should choose 'y' vs 'at'. */
@ -11805,8 +11781,6 @@ ppc64_elf_relocate_section (bfd *output_bfd,
symtab_hdr = &elf_symtab_hdr (input_bfd);
sym_hashes = elf_sym_hashes (input_bfd);
is_opd = ppc64_elf_section_data (input_section)->sec_type == sec_opd;
ha_opt = NULL;
no_ha_opt = FALSE;
rel = relocs;
relend = relocs + input_section->reloc_count;
@ -13218,7 +13192,12 @@ ppc64_elf_relocate_section (bfd *output_bfd,
case R_PPC64_GOT_DTPREL16_HA:
case R_PPC64_GOT16_HA:
case R_PPC64_TOC16_HA:
/* nop is done later. */
if (htab->do_toc_opt && relocation + addend + 0x8000 < 0x10000
&& !ppc64_elf_tdata (input_bfd)->ha_relocs_not_using_r2)
{
bfd_byte *p = contents + (rel->r_offset & ~3);
bfd_put_32 (input_bfd, NOP, p);
}
break;
case R_PPC64_GOT_TLSLD16_LO:
@ -13229,7 +13208,8 @@ ppc64_elf_relocate_section (bfd *output_bfd,
case R_PPC64_GOT16_LO_DS:
case R_PPC64_TOC16_LO:
case R_PPC64_TOC16_LO_DS:
if (htab->do_toc_opt && relocation + addend + 0x8000 < 0x10000)
if (htab->do_toc_opt && relocation + addend + 0x8000 < 0x10000
&& !ppc64_elf_tdata (input_bfd)->ha_relocs_not_using_r2)
{
bfd_byte *p = contents + (rel->r_offset & ~3);
insn = bfd_get_32 (input_bfd, p);
@ -13252,32 +13232,9 @@ ppc64_elf_relocate_section (bfd *output_bfd,
|| ((insn & (0x3f << 26)) == 62u << 26 /* std, stmd */
&& ((insn & 3) == 0 || (insn & 3) == 3)))
{
unsigned int reg = (insn >> 16) & 0x1f;
const Elf_Internal_Rela *ha;
bfd_boolean match_addend;
match_addend = (sym != NULL
&& ELF_ST_TYPE (sym->st_info) == STT_SECTION);
ha = ha_reloc_match (relocs, rel, &reg, match_addend,
input_bfd, contents);
if (ha != NULL)
{
insn &= ~(0x1f << 16);
insn |= reg << 16;
bfd_put_32 (input_bfd, insn, p);
if (ha_opt == NULL)
{
ha_opt = bfd_zmalloc (input_section->reloc_count);
if (ha_opt == NULL)
return FALSE;
}
ha_opt[ha - relocs] = 1;
}
else
/* If we don't find a matching high part insn,
something is fishy. Refuse to nop any high
part insn in this section. */
no_ha_opt = TRUE;
insn &= ~(0x1f << 16);
insn |= 2 << 16;
bfd_put_32 (input_bfd, insn, p);
}
}
break;
@ -13431,23 +13388,6 @@ ppc64_elf_relocate_section (bfd *output_bfd,
}
}
if (ha_opt != NULL)
{
if (!no_ha_opt)
{
unsigned char *opt = ha_opt;
rel = relocs;
relend = relocs + input_section->reloc_count;
for (; rel < relend; opt++, rel++)
if (*opt != 0)
{
bfd_byte *p = contents + (rel->r_offset & ~3);
bfd_put_32 (input_bfd, NOP, p);
}
}
free (ha_opt);
}
/* If we're emitting relocations, then shortly after this function
returns, reloc offsets and addends for this section will be
adjusted. Worse, reloc symbol indices will be for the output

View File

@ -1,3 +1,9 @@
2011-09-30 Alan Modra <amodra@gmail.com>
* ld-powerpc/tocopt.d: Update.
* ld-powerpc/tocopt5.d, * ld-powerpc/tocopt5.s: New test.
* ld-powerpc/powerpc.exp: Run new test.
2011-09-16 H.J. Lu <hongjiu.lu@intel.com>
* ld-elf/pr12975.d: Only run for *-*-linux* and *-*-gnu*

View File

@ -211,6 +211,8 @@ set ppc64elftests {
{{objdump -s tocopt3.d}} "tocopt3"}
{"TOC opt4" "-melf64ppc -no-keep-memory --defsym x=2" "-a64"
{tocopt4a.s tocopt4b.s} {{objdump -s tocopt4.d}} "tocopt4"}
{"TOC opt5" "-melf64ppc" "-a64" {tocopt5.s}
{{objdump -s tocopt5.d}} "tocopt5"}
}

View File

@ -2,10 +2,10 @@
.*: file format .*
Contents of section \.text:
100000b0 60000000 e9228018 60000000 38a28020 .*
100000c0 e8c50000 60000000 3ba08028 7c62e82a .*
100000d0 60000000 39228033 60000000 38a28008 .*
100000e0 e8c50000 60000000 3ba08010 7c62e82a .*
100000b0 3d220000 e9298018 3c820000 38a48020 .*
100000c0 e8c50000 3fa00000 3bbd8028 7c62e82a .*
100000d0 3d220000 39298033 3c820000 38a48008 .*
100000e0 e8c50000 3fa00000 3bbd8010 7c62e82a .*
Contents of section \.got:
100100f0 00000000 100180f0 00000000 10010124 .*
10010100 00000000 10010125 00000000 10010120 .*

View File

@ -0,0 +1,13 @@
.*: file format .*
Contents of section \.text:
100000b0 60000000 e9228018 60000000 38a28020 .*
100000c0 e8c50000 60000000 3922802b 60000000 .*
100000d0 38a28008 e8c50000 .*
Contents of section \.got:
100100d8 00000000 100180d8 00000000 10010104 .*
100100e8 00000000 10010105 00000000 10010100 .*
100100f8 00000000 10010101 .*
Contents of section \.sdata:
10010100 01020304 0506 .*

View File

@ -0,0 +1,43 @@
.section .toc,"aw"
x4t:
.quad x4
x5t:
.quad x5
x6t:
.quad x6
.section .sdata,"aw"
x1:
.byte 1
x2:
.byte 2
x3:
.byte 3
x4:
.byte 4
x5:
.byte 5
x6:
.byte 6
.globl _start
.text
_start:
# no need for got entry, optimise to nop,addi
# note: ld doesn't yet do got optimisation, so we get nop,ld
addis 9,2,x1@got@ha
ld 9,x1@got@l(9)
# must keep got entry, optimise to nop,addi,ld
addis 4,2,x2@got@ha
addi 5,4,x2@got@l
ld 6,0(5)
# no need for toc entry, optimise to nop,addi
addis 9,2,x4t@toc@ha
ld 9,x4t@toc@l(9)
# must keep toc entry, optimise to nop,addi,ld
# if we had a reloc tying the ld to x5/x5t then we could throw away
# the toc entry and optimise to nop,nop,addi
addis 4,2,x5t@toc@ha
addi 5,4,x5t@toc@l
ld 6,0(5)