Convert indirect calls to direct when possible.

Please see patch discussion:
https://www.sourceware.org/ml/binutils/2016-05/msg00322.html

2016-06-28  Sriraman Tallam  <tmsriram@google.com>

	* x86_64.cc (Lazy_view): New class.
	(can_convert_mov_to_lea): Templatize function.  Make the function
	check for appropriate relocation types and use the view parameter
	to get section contents.
	(can_convert_callq_to_direct): New function.
	(Target_x86_64<size>::Scan::global): Refactor.
	(Target_x86_64<size>::Relocate::relocate): Refactor. Change any indirect
	call via GOT that can be converted.
	* testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
	* testsuite/Makefile.in: Regenerate.
	* testsuite/x86_64_indirect_call_to_direct1.s: New file.
	* testsuite/x86_64_indirect_jump_to_direct1.s: New file.
This commit is contained in:
Sriraman Tallam 2016-06-28 15:42:33 -07:00
parent 8032ac0339
commit 3a4f096e5f
7 changed files with 250 additions and 28 deletions

View File

@ -1,3 +1,18 @@
2016-06-28 Sriraman Tallam <tmsriram@google.com>
* x86_64.cc (Lazy_view): New class.
(can_convert_mov_to_lea): Templatize function. Make the function
check for appropriate relocation types and use the view parameter
to get section contents.
(can_convert_callq_to_direct): New function.
(Target_x86_64<size>::Scan::global): Refactor.
(Target_x86_64<size>::Relocate::relocate): Refactor. Change any indirect
call via GOT that can be converted.
* testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
* testsuite/Makefile.in: Regenerate.
* testsuite/x86_64_indirect_call_to_direct1.s: New file.
* testsuite/x86_64_indirect_jump_to_direct1.s: New file.
2016-06-28 Igor Kudrin <ikudrin@accesssoftek.com>
* aarch64.cc (Target_aarch64::Scan::local): Move the call to got_section

View File

@ -1096,6 +1096,25 @@ x86_64_mov_to_lea13.stdout: x86_64_mov_to_lea13
x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14
$(TEST_OBJDUMP) -dw $< > $@
check_SCRIPTS += x86_64_indirect_call_to_direct.sh
check_DATA += x86_64_indirect_call_to_direct1.stdout \
x86_64_indirect_jump_to_direct1.stdout
MOSTLYCLEANFILES += x86_64_indirect_call_to_direct1 \
x86_64_indirect_jump_to_direct1
x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s
$(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld
gcctestdir/ld -o $@ $<
x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1
$(TEST_OBJDUMP) -dw $< > $@
x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s
$(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld
gcctestdir/ld -o $@ $<
x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1
$(TEST_OBJDUMP) -dw $< > $@
check_SCRIPTS += x86_64_overflow_pc32.sh
check_DATA += x86_64_overflow_pc32.err
MOSTLYCLEANFILES += x86_64_overflow_pc32.err

View File

@ -223,6 +223,7 @@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
@GCC_TRUE@@HAVE_STATIC_TRUE@@NATIVE_LINKER_TRUE@@STATIC_TLS_TRUE@@TLS_TRUE@ tls_static_pic_test
@FN_PTRS_IN_SO_WITHOUT_PIC_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@@TLS_TRUE@am__append_26 = tls_shared_nonpic_test
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_27 = x86_64_mov_to_lea.sh \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_indirect_call_to_direct.sh \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_overflow_pc32.sh \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x32_overflow_pc32.sh
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_28 = x86_64_mov_to_lea1.stdout \
@ -239,6 +240,8 @@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea12.stdout \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea13.stdout \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea14.stdout \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_indirect_call_to_direct1.stdout \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_indirect_jump_to_direct1.stdout \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_overflow_pc32.err \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x32_overflow_pc32.err
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_29 = x86_64_mov_to_lea1 \
@ -255,6 +258,8 @@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea12 \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea13 \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_mov_to_lea14 \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_indirect_call_to_direct1 \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_indirect_jump_to_direct1 \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x86_64_overflow_pc32.err \
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ x32_overflow_pc32.err
@DEFAULT_TARGET_I386_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_30 = i386_mov_to_lea.sh
@ -4564,6 +4569,8 @@ tls_pie_test.sh.log: tls_pie_test.sh
@p='tls_pie_test.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
x86_64_mov_to_lea.sh.log: x86_64_mov_to_lea.sh
@p='x86_64_mov_to_lea.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
x86_64_indirect_call_to_direct.sh.log: x86_64_indirect_call_to_direct.sh
@p='x86_64_indirect_call_to_direct.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
x86_64_overflow_pc32.sh.log: x86_64_overflow_pc32.sh
@p='x86_64_overflow_pc32.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
x32_overflow_pc32.sh.log: x32_overflow_pc32.sh
@ -5641,6 +5648,19 @@ uninstall-am:
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_OBJDUMP) -dw $< > $@
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_OBJDUMP) -dw $< > $@
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ gcctestdir/ld -o $@ $<
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_OBJDUMP) -dw $< > $@
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ gcctestdir/ld -o $@ $<
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_OBJDUMP) -dw $< > $@
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_overflow_pc32.o: x86_64_overflow_pc32.s
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@ $(TEST_AS) -o $@ $<
@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_overflow_pc32.err: x86_64_overflow_pc32.o gcctestdir/ld

View File

@ -0,0 +1,29 @@
#!/bin/sh
# x86_64_indirect_call_to_direct.sh -- a test for indirect call(jump) to direct
# conversion.
# Copyright (C) 2016 Free Software Foundation, Inc.
# Written by Sriraman Tallam <tmsriram@google.com>
# This file is part of gold.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
# MA 02110-1301, USA.
set -e
grep -q "callq[ ]\+[a-f0-9]\+ <foo>" x86_64_indirect_call_to_direct1.stdout
grep -q "jmpq[ ]\+[a-f0-9]\+ <foo>" x86_64_indirect_jump_to_direct1.stdout

View File

@ -0,0 +1,12 @@
.text
.globl foo
.type foo, @function
foo:
ret
.size foo, .-foo
.globl main
.type main, @function
main:
call *foo@GOTPCREL(%rip)
ret
.size main, .-main

View File

@ -0,0 +1,11 @@
.text
.globl foo
.type foo, @function
foo:
ret
.size foo, .-foo
.globl main
.type main, @function
main:
jmp *foo@GOTPCREL(%rip)
.size main, .-main

View File

@ -403,6 +403,33 @@ class Output_data_plt_x86_64_standard : public Output_data_plt_x86_64<size>
static const unsigned char plt_eh_frame_fde[plt_eh_frame_fde_size];
};
template<int size>
class Lazy_view
{
public:
Lazy_view(Sized_relobj_file<size, false>* object, unsigned int data_shndx)
: object_(object), data_shndx_(data_shndx), view_(NULL), view_size_(0)
{ }
inline unsigned char
operator[](size_t offset)
{
if (this->view_ == NULL)
this->view_ = this->object_->section_contents(this->data_shndx_,
&this->view_size_,
true);
if (offset >= this->view_size_)
return 0;
return this->view_[offset];
}
private:
Sized_relobj_file<size, false>* object_;
unsigned int data_shndx_;
const unsigned char* view_;
section_size_type view_size_;
};
// The x86_64 target class.
// See the ABI at
// http://www.x86-64.org/documentation/abi.pdf
@ -876,19 +903,62 @@ class Target_x86_64 : public Sized_target<size, false>
// conversion from
// mov foo@GOTPCREL(%rip), %reg
// to lea foo(%rip), %reg.
static bool
can_convert_mov_to_lea(const Symbol* gsym)
template<class View_type>
static inline bool
can_convert_mov_to_lea(const Symbol* gsym, unsigned int r_type,
size_t r_offset, View_type* view)
{
gold_assert(gsym != NULL);
return (gsym->type() != elfcpp::STT_GNU_IFUNC
&& !gsym->is_undefined ()
&& !gsym->is_from_dynobj()
&& !gsym->is_preemptible()
&& (!parameters->options().shared()
|| (gsym->visibility() != elfcpp::STV_DEFAULT
&& gsym->visibility() != elfcpp::STV_PROTECTED)
|| parameters->options().Bsymbolic())
&& strcmp(gsym->name(), "_DYNAMIC") != 0);
// We cannot do the conversion unless it's one of these relocations.
if (r_type != elfcpp::R_X86_64_GOTPCREL
&& r_type != elfcpp::R_X86_64_GOTPCRELX
&& r_type != elfcpp::R_X86_64_REX_GOTPCRELX)
return false;
// We cannot convert references to IFUNC symbols, or to symbols that
// are not local to the current module.
if (gsym->type() == elfcpp::STT_GNU_IFUNC
|| gsym->is_undefined ()
|| gsym->is_from_dynobj()
|| gsym->is_preemptible())
return false;
// If we are building a shared object and the symbol is protected, we may
// need to go through the GOT.
if (parameters->options().shared()
&& gsym->visibility() == elfcpp::STV_PROTECTED)
return false;
// We cannot convert references to the _DYNAMIC symbol.
if (strcmp(gsym->name(), "_DYNAMIC") == 0)
return false;
// Check for a MOV opcode.
return (*view)[r_offset - 2] == 0x8b;
}
// Convert
// callq *foo@GOTPCRELX(%rip) to
// addr32 callq foo
// and jmpq *foo@GOTPCRELX(%rip) to
// jmpq foo
// nop
template<class View_type>
static inline bool
can_convert_callq_to_direct(const Symbol* gsym, unsigned int r_type,
size_t r_offset, View_type* view)
{
gold_assert(gsym != NULL);
// We cannot do the conversion unless it's a GOTPCRELX relocation.
if (r_type != elfcpp::R_X86_64_GOTPCRELX)
return false;
// We cannot convert references to IFUNC symbols, or to symbols that
// are not local to the current module.
if (gsym->type() == elfcpp::STT_GNU_IFUNC
|| gsym->is_undefined ()
|| gsym->is_from_dynobj()
|| gsym->is_preemptible())
return false;
// Check for a CALLQ or JMPQ opcode.
return ((*view)[r_offset - 2] == 0xff
&& ((*view)[r_offset - 1] == 0x15
|| (*view)[r_offset - 1] == 0x25));
}
// Adjust TLS relocation type based on the options and whether this
@ -2935,19 +3005,24 @@ Target_x86_64<size>::Scan::global(Symbol_table* symtab,
// If we convert this from
// mov foo@GOTPCREL(%rip), %reg
// to lea foo(%rip), %reg.
// OR
// if we convert
// (callq|jmpq) *foo@GOTPCRELX(%rip) to
// (callq|jmpq) foo
// in Relocate::relocate, then there is nothing to do here.
if ((r_type == elfcpp::R_X86_64_GOTPCREL
|| r_type == elfcpp::R_X86_64_GOTPCRELX
|| r_type == elfcpp::R_X86_64_REX_GOTPCRELX)
&& reloc.get_r_offset() >= 2
&& Target_x86_64<size>::can_convert_mov_to_lea(gsym))
{
section_size_type stype;
const unsigned char* view = object->section_contents(data_shndx,
&stype, true);
if (view[reloc.get_r_offset() - 2] == 0x8b)
break;
}
Lazy_view<size> view(object, data_shndx);
size_t r_offset = reloc.get_r_offset();
if (r_offset >= 2
&& Target_x86_64<size>::can_convert_mov_to_lea(gsym, r_type,
r_offset, &view))
break;
if (r_offset >= 2
&& Target_x86_64<size>::can_convert_callq_to_direct(gsym, r_type,
r_offset,
&view))
break;
if (gsym->final_value_is_known())
{
@ -3629,15 +3704,56 @@ Target_x86_64<size>::Relocate::relocate(
// mov foo@GOTPCREL(%rip), %reg
// to lea foo(%rip), %reg.
// if possible.
if (rela.get_r_offset() >= 2
&& view[-2] == 0x8b
&& ((gsym == NULL && !psymval->is_ifunc_symbol())
|| (gsym != NULL
&& Target_x86_64<size>::can_convert_mov_to_lea(gsym))))
if ((gsym == NULL
&& rela.get_r_offset() >= 2
&& view[-2] == 0x8b
&& !psymval->is_ifunc_symbol())
|| (gsym != NULL
&& rela.get_r_offset() >= 2
&& Target_x86_64<size>::can_convert_mov_to_lea(gsym, r_type,
0, &view)))
{
view[-2] = 0x8d;
Reloc_funcs::pcrela32(view, object, psymval, addend, address);
}
// Convert
// callq *foo@GOTPCRELX(%rip) to
// addr32 callq foo
// and jmpq *foo@GOTPCRELX(%rip) to
// jmpq foo
// nop
else if (gsym != NULL
&& rela.get_r_offset() >= 2
&& Target_x86_64<size>::can_convert_callq_to_direct(gsym,
r_type,
0, &view))
{
if (view[-1] == 0x15)
{
// Convert callq *foo@GOTPCRELX(%rip) to addr32 callq.
// Opcode of addr32 is 0x67 and opcode of direct callq is 0xe8.
view[-2] = 0x67;
view[-1] = 0xe8;
// Convert GOTPCRELX to 32-bit pc relative reloc.
Reloc_funcs::pcrela32(view, object, psymval, addend, address);
}
else
{
// Convert jmpq *foo@GOTPCRELX(%rip) to
// jmpq foo
// nop
// The opcode of direct jmpq is 0xe9.
view[-2] = 0xe9;
// The opcode of nop is 0x90.
view[3] = 0x90;
// Convert GOTPCRELX to 32-bit pc relative reloc. jmpq is rip
// relative and since the instruction following the jmpq is now
// the nop, offset the address by 1 byte. The start of the
// relocation also moves ahead by 1 byte.
Reloc_funcs::pcrela32(&view[-1], object, psymval, addend,
address - 1);
}
}
else
{
if (gsym != NULL)