9f0d7b6df9
consider_profiling always to zero. Don't count of compiler to remove unreached if block. * sysdeps/x86_64/dl-trampoline.S [PROF] (_dl_runtime_profile): Don't compile. * sysdeps/i386/dl-trampoline.S [PROF] (_dl_runtime_profile): Likewise. * sysdeps/ia64/dl-trampoline.S [PROF] (_dl_runtime_profile): Likewise. * sysdeps/s390/s390-64/dl-trampoline.S [PROF] (_dl_runtime_profile): Likewise. * sysdeps/s390/s390-32/dl-trampoline.S [PROF] (_dl_runtime_profile): Likewise. * sysdeps/powerpc/powerpc64/dl-trampoline.S [PROF] (_dl_profile_resolve): Likewise. * sysdeps/powerpc/powerpc32/dl-trampoline.S [PROF] (_dl_profile_resolve): Likewise. * gmon/Makefile: Add rules to build and run tst-profile-static. * gmon/tst-profile-static.c: New file. * Makeconfig (+link-static): Allow passing program-specific flags.
540 lines
11 KiB
ArmAsm
540 lines
11 KiB
ArmAsm
/* PLT trampolines. ia64 version.
|
|
Copyright (C) 2005 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, write to the Free
|
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307 USA. */
|
|
|
|
#include <sysdep.h>
|
|
#undef ret
|
|
|
|
/*
|
|
This code is used in dl-runtime.c to call the `_dl_fixup' function
|
|
and then redirect to the address it returns. `_dl_fixup()' takes two
|
|
arguments, however _dl_profile_fixup() takes five.
|
|
|
|
The ABI specifies that we will never see more than 8 input
|
|
registers to a function call, thus it is safe to simply allocate
|
|
those, and simpler than playing stack games. */
|
|
|
|
/* Used to save and restore 8 incoming fp registers */
|
|
#define RESOLVE_FRAME_SIZE (16*8)
|
|
|
|
ENTRY(_dl_runtime_resolve)
|
|
{ .mmi
|
|
.prologue
|
|
.save ar.pfs, r40
|
|
alloc loc0 = ar.pfs, 8, 6, 2, 0
|
|
/* Use the 16 byte scratch area. r2 will start at f8 and
|
|
r3 will start at f9. */
|
|
adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12
|
|
adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12
|
|
}
|
|
{ .mii
|
|
.fframe RESOLVE_FRAME_SIZE
|
|
adds r12 = -RESOLVE_FRAME_SIZE, r12
|
|
.save rp, loc1
|
|
mov loc1 = b0
|
|
.body
|
|
mov loc2 = r8 /* preserve struct value register */
|
|
;;
|
|
}
|
|
{ .mii
|
|
mov loc3 = r9 /* preserve language specific register */
|
|
mov loc4 = r10 /* preserve language specific register */
|
|
mov loc5 = r11 /* preserve language specific register */
|
|
}
|
|
{ .mmi
|
|
stf.spill [r2] = f8, 32
|
|
stf.spill [r3] = f9, 32
|
|
mov out0 = r16
|
|
;;
|
|
}
|
|
{ .mmi
|
|
stf.spill [r2] = f10, 32
|
|
stf.spill [r3] = f11, 32
|
|
shl out1 = r15, 4
|
|
;;
|
|
}
|
|
{ .mmi
|
|
stf.spill [r2] = f12, 32
|
|
stf.spill [r3] = f13, 32
|
|
/* Relocation record is 24 byte. */
|
|
shladd out1 = r15, 3, out1
|
|
;;
|
|
}
|
|
{ .mmb
|
|
stf.spill [r2] = f14
|
|
stf.spill [r3] = f15
|
|
br.call.sptk.many b0 = _dl_fixup
|
|
}
|
|
{ .mii
|
|
/* Skip the 16byte scratch area. */
|
|
adds r2 = 16, r12
|
|
adds r3 = 32, r12
|
|
mov b6 = ret0
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f8 = [r2], 32
|
|
ldf.fill f9 = [r3], 32
|
|
mov b0 = loc1
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f10 = [r2], 32
|
|
ldf.fill f11 = [r3], 32
|
|
mov gp = ret1
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f12 = [r2], 32
|
|
ldf.fill f13 = [r3], 32
|
|
mov ar.pfs = loc0
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f14 = [r2], 32
|
|
ldf.fill f15 = [r3], 32
|
|
.restore sp /* pop the unwind frame state */
|
|
adds r12 = RESOLVE_FRAME_SIZE, r12
|
|
;;
|
|
}
|
|
{ .mii
|
|
mov r9 = loc3 /* restore language specific register */
|
|
mov r10 = loc4 /* restore language specific register */
|
|
mov r11 = loc5 /* restore language specific register */
|
|
}
|
|
{ .mii
|
|
mov r8 = loc2 /* restore struct value register */
|
|
;;
|
|
}
|
|
/* An alloc is needed for the break system call to work.
|
|
We don't care about the old value of the pfs register. */
|
|
{ .mmb
|
|
.prologue
|
|
.body
|
|
alloc r2 = ar.pfs, 0, 0, 8, 0
|
|
br.sptk.many b6
|
|
;;
|
|
}
|
|
END(_dl_runtime_resolve)
|
|
|
|
|
|
/* The fourth argument to _dl_profile_fixup and the third one to
|
|
_dl_call_pltexit are a pointer to La_ia64_regs:
|
|
|
|
8byte r8
|
|
8byte r9
|
|
8byte r10
|
|
8byte r11
|
|
8byte in0
|
|
8byte in1
|
|
8byte in2
|
|
8byte in3
|
|
8byte in4
|
|
8byte in5
|
|
8byte in6
|
|
8byte in7
|
|
16byte f8
|
|
16byte f9
|
|
16byte f10
|
|
16byte f11
|
|
16byte f12
|
|
16byte f13
|
|
16byte f14
|
|
16byte f15
|
|
8byte ar.unat
|
|
8byte sp
|
|
|
|
The fifth argument to _dl_profile_fixup is a pointer to long int.
|
|
The fourth argument to _dl_call_pltexit is a pointer to
|
|
La_ia64_retval:
|
|
|
|
8byte r8
|
|
8byte r9
|
|
8byte r10
|
|
8byte r11
|
|
16byte f8
|
|
16byte f9
|
|
16byte f10
|
|
16byte f11
|
|
16byte f12
|
|
16byte f13
|
|
16byte f14
|
|
16byte f15
|
|
|
|
Since stack has to be 16 byte aligned, the stack allocation is in
|
|
16byte increment. Before calling _dl_profile_fixup, the stack will
|
|
look like
|
|
|
|
psp new frame_size
|
|
+16 La_ia64_regs
|
|
sp scratch
|
|
|
|
*/
|
|
|
|
#define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
|
|
#define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
|
|
|
|
#ifndef PROF
|
|
ENTRY(_dl_runtime_profile)
|
|
{ .mii
|
|
.prologue
|
|
.save ar.pfs, r40
|
|
alloc loc0 = ar.pfs, 8, 12, 8, 0
|
|
.vframe loc10
|
|
mov loc10 = r12
|
|
.save rp, loc1
|
|
mov loc1 = b0
|
|
}
|
|
{ .mii
|
|
.save ar.unat, r17
|
|
mov r17 = ar.unat
|
|
.save ar.lc, loc6
|
|
mov loc6 = ar.lc
|
|
mov loc11 = gp
|
|
}
|
|
{ .mii
|
|
.body
|
|
/* There is a 16 byte scratch area. r2 will start at r8 and
|
|
r3 will start at r9 for La_ia64_regs. */
|
|
adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12
|
|
adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12
|
|
adds r12 = -PLTENTER_FRAME_SIZE, r12
|
|
;;
|
|
}
|
|
{ .mmi
|
|
st8 [r2] = r8, 16;
|
|
st8 [r3] = r9, 16;
|
|
mov out2 = b0 /* needed by _dl_fixup_profile */
|
|
;;
|
|
}
|
|
{ .mmi
|
|
st8 [r2] = r10, 16;
|
|
st8 [r3] = r11, 16;
|
|
adds out3 = 16, r12 /* pointer to La_ia64_regs */
|
|
;;
|
|
}
|
|
{ .mmi
|
|
.mem.offset 0, 0
|
|
st8.spill [r2] = in0, 16
|
|
.mem.offset 8, 0
|
|
st8.spill [r3] = in1, 16
|
|
mov out4 = loc10 /* pointer to new frame size */
|
|
;;
|
|
}
|
|
{ .mmi
|
|
.mem.offset 0, 0
|
|
st8.spill [r2] = in2, 16
|
|
.mem.offset 8, 0
|
|
st8.spill [r3] = in3, 16
|
|
mov loc2 = r8 /* preserve struct value register */
|
|
;;
|
|
}
|
|
{ .mmi
|
|
.mem.offset 0, 0
|
|
st8.spill [r2] = in4, 16
|
|
.mem.offset 8, 0
|
|
st8.spill [r3] = in5, 16
|
|
mov loc3 = r9 /* preserve language specific register */
|
|
;;
|
|
}
|
|
{ .mmi
|
|
.mem.offset 0, 0
|
|
st8 [r2] = in6, 16
|
|
.mem.offset 8, 0
|
|
st8 [r3] = in7, 24 /* adjust for f9 */
|
|
mov loc4 = r10 /* preserve language specific register */
|
|
;;
|
|
}
|
|
{ .mii
|
|
mov r18 = ar.unat /* save it in La_ia64_regs */
|
|
mov loc7 = out3 /* save it for _dl_call_pltexit */
|
|
mov loc5 = r11 /* preserve language specific register */
|
|
}
|
|
{ .mmi
|
|
stf.spill [r2] = f8, 32
|
|
stf.spill [r3] = f9, 32
|
|
mov out0 = r16 /* needed by _dl_fixup_profile */
|
|
;;
|
|
}
|
|
{ .mii
|
|
mov ar.unat = r17 /* restore it for function call */
|
|
mov loc8 = r16 /* save it for _dl_call_pltexit */
|
|
nop.i 0x0
|
|
}
|
|
{ .mmi
|
|
stf.spill [r2] = f10, 32
|
|
stf.spill [r3] = f11, 32
|
|
shl out1 = r15, 4
|
|
;;
|
|
}
|
|
{ .mmi
|
|
stf.spill [r2] = f12, 32
|
|
stf.spill [r3] = f13, 32
|
|
/* Relocation record is 24 byte. */
|
|
shladd out1 = r15, 3, out1
|
|
;;
|
|
}
|
|
{ .mmi
|
|
stf.spill [r2] = f14, 32
|
|
stf.spill [r3] = f15, 24
|
|
mov loc9 = out1 /* save it for _dl_call_pltexit */
|
|
;;
|
|
}
|
|
{ .mmb
|
|
st8 [r2] = r18 /* store ar.unat */
|
|
st8 [r3] = loc10 /* store sp */
|
|
br.call.sptk.many b0 = _dl_profile_fixup
|
|
}
|
|
{ .mii
|
|
/* Skip the 16byte scratch area, 4 language specific GRs and
|
|
8 incoming GRs to restore incoming fp registers. */
|
|
adds r2 = (4*8 + 8*8 + 16), r12
|
|
adds r3 = (4*8 + 8*8 + 32), r12
|
|
mov b6 = ret0
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f8 = [r2], 32
|
|
ldf.fill f9 = [r3], 32
|
|
mov gp = ret1
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f10 = [r2], 32
|
|
ldf.fill f11 = [r3], 32
|
|
mov r8 = loc2 /* restore struct value register */
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f12 = [r2], 32
|
|
ldf.fill f13 = [r3], 32
|
|
mov r9 = loc3 /* restore language specific register */
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f14 = [r2], 32
|
|
ldf.fill f15 = [r3], 32
|
|
mov r10 = loc4 /* restore language specific register */
|
|
;;
|
|
}
|
|
{ .mii
|
|
ld8 r15 = [loc10] /* load the new frame size */
|
|
mov r11 = loc5 /* restore language specific register */
|
|
;;
|
|
cmp.eq p6, p7 = -1, r15
|
|
;;
|
|
}
|
|
{ .mii
|
|
(p7) cmp.eq p8, p9 = 0, r15
|
|
(p6) mov b0 = loc1
|
|
(p6) mov ar.lc = loc6
|
|
}
|
|
{ .mib
|
|
nop.m 0x0
|
|
(p6) mov ar.pfs = loc0
|
|
(p6) br.cond.dptk.many .Lresolved
|
|
;;
|
|
}
|
|
|
|
/* At this point, the stack looks like
|
|
|
|
+psp free
|
|
+16 La_ia64_regs
|
|
sp scratch
|
|
|
|
We need to keep the current stack and call the resolved
|
|
function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE
|
|
+ 16 (scratch area) to sp + 16 (scratch area). Since stack
|
|
has to be 16byte aligned, we around r15 up to 16byte. */
|
|
|
|
{ .mbb
|
|
(p9) adds r15 = 15, r15
|
|
(p8) br.cond.dptk.many .Lno_new_frame
|
|
nop.b 0x0
|
|
;;
|
|
}
|
|
{ .mmi
|
|
and r15 = -16, r15
|
|
;;
|
|
/* We don't copy the 16byte scatch area. Prepare r16/r17 as
|
|
destination. */
|
|
sub r16 = r12, r15
|
|
sub r17 = r12, r15
|
|
;;
|
|
}
|
|
{ .mii
|
|
adds r16 = 16, r16
|
|
adds r17 = 24, r17
|
|
sub r12 = r12, r15 /* Adjust stack */
|
|
;;
|
|
}
|
|
{ .mii
|
|
nop.m 0x0
|
|
shr r15 = r15, 4
|
|
;;
|
|
adds r15 = -1, r15
|
|
;;
|
|
}
|
|
{ .mii
|
|
/* Skip the 16byte scatch area. Prepare r2/r3 as source. */
|
|
adds r2 = 16, loc10
|
|
adds r3 = 24, loc10
|
|
mov ar.lc = r15
|
|
;;
|
|
}
|
|
.Lcopy:
|
|
{ .mmi
|
|
ld8 r18 = [r2], 16
|
|
ld8 r19 = [r3], 16
|
|
nop.i 0x0
|
|
;;
|
|
}
|
|
{ .mmb
|
|
st8 [r16] = r18, 16
|
|
st8 [r17] = r19, 16
|
|
br.cloop.sptk.few .Lcopy
|
|
}
|
|
.Lno_new_frame:
|
|
{ .mii
|
|
mov out0 = in0
|
|
mov out1 = in1
|
|
mov out2 = in2
|
|
}
|
|
{ .mii
|
|
mov out3 = in3
|
|
mov out4 = in4
|
|
mov out5 = in5
|
|
}
|
|
{ .mib
|
|
mov out6 = in6
|
|
mov out7 = in7
|
|
/* Call the resolved function */
|
|
br.call.sptk.many b0 = b6
|
|
}
|
|
{ .mii
|
|
/* Prepare stack for _dl_call_pltexit. Loc10 has the original
|
|
stack pointer. */
|
|
adds r12 = -PLTEXIT_FRAME_SIZE, loc10
|
|
adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10
|
|
adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10
|
|
;;
|
|
}
|
|
{ .mmi
|
|
/* Load all possible return values into buffer. */
|
|
st8 [r2] = r8, 16
|
|
st8 [r3] = r9, 16
|
|
mov out0 = loc8
|
|
;;
|
|
}
|
|
{ .mmi
|
|
st8 [r2] = r10, 16
|
|
st8 [r3] = r11, 24
|
|
mov out1 = loc9
|
|
;;
|
|
}
|
|
{ .mmi
|
|
stf.spill [r2] = f8, 32
|
|
stf.spill [r3] = f9, 32
|
|
mov out2 = loc7 /* Pointer to La_ia64_regs */
|
|
;;
|
|
}
|
|
{ .mmi
|
|
stf.spill [r2] = f10, 32
|
|
stf.spill [r3] = f11, 32
|
|
adds out3 = 16, r12 /* Pointer to La_ia64_retval */
|
|
;;
|
|
}
|
|
{ .mmi
|
|
stf.spill [r2] = f12, 32
|
|
stf.spill [r3] = f13, 32
|
|
/* We need to restore gp for _dl_call_pltexit. */
|
|
mov gp = loc11
|
|
;;
|
|
}
|
|
{ .mmb
|
|
stf.spill [r2] = f14
|
|
stf.spill [r3] = f15
|
|
br.call.sptk.many b0 = _dl_call_pltexit
|
|
}
|
|
{ .mmi
|
|
/* Load all the non-floating and floating return values. Skip
|
|
the 16byte scratch area. */
|
|
adds r2 = 16, r12
|
|
adds r3 = 24, r12
|
|
nop.i 0x0
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ld8 r8 = [r2], 16
|
|
ld8 r9 = [r3], 16
|
|
nop.i 0x0
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ld8 r10 = [r2], 16
|
|
ld8 r11 = [r3], 24
|
|
nop.i 0x0
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f8 = [r2], 32
|
|
ldf.fill f9 = [r3], 32
|
|
mov ar.lc = loc6
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f10 = [r2], 32
|
|
ldf.fill f11 = [r3], 32
|
|
mov ar.pfs = loc0
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f12 = [r2], 32
|
|
ldf.fill f13 = [r3], 32
|
|
mov b0 = loc1
|
|
;;
|
|
}
|
|
{ .mmi
|
|
ldf.fill f14 = [r2]
|
|
ldf.fill f15 = [r3]
|
|
/* We know that the previous stack pointer, loc10, isn't 0.
|
|
We use it to reload p7. */
|
|
cmp.ne p7, p0 = 0, loc10
|
|
;;
|
|
}
|
|
.Lresolved:
|
|
{ .mmb
|
|
.restore sp
|
|
mov r12 = loc10
|
|
(p7) br.ret.sptk.many b0
|
|
;;
|
|
}
|
|
/* An alloc is needed for the break system call to work. We
|
|
don't care about the old value of the pfs register. After
|
|
this alloc, we can't use any rotating registers. Otherwise
|
|
assembler won't be happy. This has to be at the end. */
|
|
{ .mmb
|
|
.prologue
|
|
.body
|
|
alloc r2 = ar.pfs, 0, 0, 8, 0
|
|
br.sptk.many b6
|
|
;;
|
|
}
|
|
END(_dl_runtime_profile)
|
|
#endif
|