H.J. Lu e2cbcd9156 Support x86-64 TLS code sequences without PLT
We can generate x86-64 TLS code sequences for general and local dynamic
models without PLT, which uses indirect call via GOT:

call *__tls_get_addr@GOTPCREL(%rip)

instead of direct call:

call __tls_get_addr[@PLT]

Since direct call is 4-byte long and indirect call, is 5-byte long, the
extra one byte must be handled properly.

For general dynamic model, one 0x66 prefix before call instruction is
removed to make room for indirect call.  For local dynamic model, we
simply use 5-byte indirect call.

TLS linker optimization is updated to recognize new instruction patterns.
For local dynamic model to local exec model transition, we generate
4 0x66 prefixes, instead of 3, before mov instruction in 64-bit and
generate a 5-byte nop, instead of 4-byte, before mov instruction in
32-bit.  Since linker may convert

call *__tls_get_addr@GOTPCREL(%rip)

to

addr32 call __tls_get_addr

when producing static executable, both patterns are recognized.

bfd/

	* elf64-x86-64.c (elf_x86_64_link_hash_entry): Add tls_get_addr.
	(elf_x86_64_link_hash_newfunc): Initialize tls_get_addr to 2.
	(elf_x86_64_check_tls_transition): Check indirect call and
	direct call with the addr32 prefix for general and local dynamic
	models.  Set the tls_get_addr feild.
	(elf_x86_64_convert_load_reloc): Always use addr32 prefix for
	indirect __tls_get_addr call via GOT.
	(elf_x86_64_relocate_section): Handle GD->LE, GD->IE and LD->LE
	transitions with indirect call and direct call with the addr32
	prefix.

ld/

	* testsuite/ld-x86-64/pass.out: New file.
	* testsuite/ld-x86-64/tls-def1.c: Likewise.
	* testsuite/ld-x86-64/tls-gd1.S: Likewise.
	* testsuite/ld-x86-64/tls-ld1.S: Likewise.
	* testsuite/ld-x86-64/tls-main1.c: Likewise.
	* testsuite/ld-x86-64/tls.exp: Likewise.
	* testsuite/ld-x86-64/tlsbin2-nacl.rd: Likewise.
	* testsuite/ld-x86-64/tlsbin2.dd: Likewise.
	* testsuite/ld-x86-64/tlsbin2.rd: Likewise.
	* testsuite/ld-x86-64/tlsbin2.sd: Likewise.
	* testsuite/ld-x86-64/tlsbin2.td: Likewise.
	* testsuite/ld-x86-64/tlsbinpic2.s: Likewise.
	* testsuite/ld-x86-64/tlsgd10.dd: Likewise.
	* testsuite/ld-x86-64/tlsgd10.s: Likewise.
	* testsuite/ld-x86-64/tlsgd11.dd: Likewise.
	* testsuite/ld-x86-64/tlsgd11.s: Likewise.
	* testsuite/ld-x86-64/tlsgd12.d: Likewise.
	* testsuite/ld-x86-64/tlsgd12.s: Likewise.
	* testsuite/ld-x86-64/tlsgd13.d: Likewise.
	* testsuite/ld-x86-64/tlsgd13.s: Likewise.
	* testsuite/ld-x86-64/tlsgd14.dd: Likewise.
	* testsuite/ld-x86-64/tlsgd14.s: Likewise.
	* testsuite/ld-x86-64/tlsgd5c.s: Likewise.
	* testsuite/ld-x86-64/tlsgd6c.s: Likewise.
	* testsuite/ld-x86-64/tlsgd9.dd: Likewise.
	* testsuite/ld-x86-64/tlsgd9.s: Likewise.
	* testsuite/ld-x86-64/tlsld4.dd: Likewise.
	* testsuite/ld-x86-64/tlsld4.s: Likewise.
	* testsuite/ld-x86-64/tlsld5.dd: Likewise.
	* testsuite/ld-x86-64/tlsld5.s: Likewise.
	* testsuite/ld-x86-64/tlsld6.dd: Likewise.
	* testsuite/ld-x86-64/tlsld6.s: Likewise.
	* testsuite/ld-x86-64/tlspic2-nacl.rd: Likewise.
	* testsuite/ld-x86-64/tlspic2.dd: Likewise.
	* testsuite/ld-x86-64/tlspic2.rd: Likewise.
	* testsuite/ld-x86-64/tlspic2.sd: Likewise.
	* testsuite/ld-x86-64/tlspic2.td: Likewise.
	* testsuite/ld-x86-64/tlspic3.s: Likewise.
	* testsuite/ld-x86-64/tlspie2.s: Likewise.
	* testsuite/ld-x86-64/tlspie2a.d: Likewise.
	* testsuite/ld-x86-64/tlspie2b.d: Likewise.
	* testsuite/ld-x86-64/tlspie2c.d: Likewise.
	* testsuite/ld-x86-64/tlsgd5.dd: Updated.
	* testsuite/ld-x86-64/tlsgd6.dd: Likewise.
	* testsuite/ld-x86-64/x86-64.exp: Run libtlspic2.so, tlsbin2,
	tlsgd5b, tlsgd6b, tlsld4, tlsld5, tlsld6, tlsgd9, tlsgd10,
	tlsgd11, tlsgd14, tlsgd12, tlsgd13, tlspie2a, tlspie2b and
	tlspie2c.
2016-06-06 11:07:16 -07:00

147 lines
2.9 KiB
ArmAsm

/* Force .data aligned to 4K, so that .got very likely gets at
0x5021a0 (0x60 bytes .tdata and 0x140 bytes .dynamic) */
.data
.balign 4096
.section ".tdata", "awT", @progbits
.globl sg1, sg2, sg3, sg4, sg5, sg6, sg7, sg8
.globl sh1, sh2, sh3, sh4, sh5, sh6, sh7, sh8
.hidden sh1, sh2, sh3, sh4, sh5, sh6, sh7, sh8
sg1: .long 17
sg2: .long 18
sg3: .long 19
sg4: .long 20
sg5: .long 21
sg6: .long 22
sg7: .long 23
sg8: .long 24
sl1: .long 65
sl2: .long 66
sl3: .long 67
sl4: .long 68
sl5: .long 69
sl6: .long 70
sl7: .long 71
sl8: .long 72
sh1: .long 257
sh2: .long 258
sh3: .long 259
sh4: .long 260
sh5: .long 261
sh6: .long 262
sh7: .long 263
sh8: .long 264
/* Force .text aligned to 4K, so it very likely gets at 0x401000. */
.text
.balign 4096
.globl fn2
.type fn2,@function
fn2:
pushq %rbp
movq %rsp, %rbp
/* GD -> IE because variable is not defined in executable */
.byte 0x66
leaq sG1@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD -> IE because variable is not defined in executable where
the variable is referenced through IE too */
.byte 0x66
leaq sG2@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD -> LE with global variable defined in executable */
.byte 0x66
leaq sg1@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD -> LE with local variable defined in executable */
.byte 0x66
leaq sl1@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD -> LE with hidden variable defined in executable */
.byte 0x66
leaq sh1@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* LD -> LE */
leaq sl1@tlsld(%rip), %rdi
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop
leaq 1+sl1@dtpoff(%rax), %rdx
nop;nop
leaq sl2@dtpoff+2(%rax), %r9
nop;nop;nop;nop
/* LD -> LE against hidden variables */
leaq sh1@tlsld(%rip), %rdi
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop
leaq sh1@dtpoff(%rax), %rdx
nop;nop
leaq 3+sh2@dtpoff(%rax), %rcx
nop;nop;nop;nop
/* IE against global var */
movq %fs:0, %r9
nop;nop
addq sG2@gottpoff(%rip), %r9
nop;nop;nop;nop
/* IE -> LE against global var defined in exec */
movq %fs:0, %r10
nop;nop
addq sg1@gottpoff(%rip), %r10
nop;nop;nop;nop
/* IE -> LE against local var */
movq %fs:0, %rax
nop;nop
addq sl1@gottpoff(%rip), %rax
nop;nop;nop;nop
/* IE -> LE against hidden var */
movq %fs:0, %rcx
nop;nop
addq sh1@gottpoff(%rip), %rcx
nop;nop;nop;nop
/* Direct access through %fs */
/* IE against global var */
movq sG5@gottpoff(%rip), %rcx
nop;nop
movq %fs:(%rcx), %rdx
nop;nop;nop;nop
/* IE->LE against local var */
movq sl5@gottpoff(%rip), %r11
nop;nop
movq %fs:(%r11), %r12
nop;nop;nop;nop
/* IE->LE against hidden var */
movq sh5@gottpoff(%rip), %rdx
nop;nop
movq %fs:(%rdx), %rdx
nop;nop;nop;nop
leave
ret