aix.S (ffi_call_AIX): Convert to more standard register usage.

* src/powerpc/aix.S (ffi_call_AIX): Convert to more standard
        register usage.  Call ffi_prep_args directly.  Add long double
        return value support.
        * src/powerpc/ffi_darwin.c (ffi_prep_args): Double arg increment
        applies to FFI_TYPE_DOUBLE.  Correct fpr_base increment typo.
        Separate FFI_TYPE_SINT32 and FFI_TYPE_UINT32 cases.
        (ffi_prep_cif_machdep): Only 16 byte stack alignment in 64 bit
        mode.
        (ffi_closure_helper_DARWIN): Remove nf and ng counters.  Move temp
        into case.
        * src/powerpc/aix_closure.S: Maintain 16 byte stack alignment.
        Allocate result area between params and FPRs.

From-SVN: r154892
This commit is contained in:
David Edelsohn 2009-12-02 04:21:08 +00:00 committed by David Edelsohn
parent 5e87bf4a14
commit 6d518d3b72
4 changed files with 100 additions and 115 deletions

View File

@ -1,3 +1,18 @@
2009-11-30 David Edelsohn <edelsohn@gnu.org>
* src/powerpc/aix.S (ffi_call_AIX): Convert to more standard
register usage. Call ffi_prep_args directly. Add long double
return value support.
* src/powerpc/ffi_darwin.c (ffi_prep_args): Double arg increment
applies to FFI_TYPE_DOUBLE. Correct fpr_base increment typo.
Separate FFI_TYPE_SINT32 and FFI_TYPE_UINT32 cases.
(ffi_prep_cif_machdep): Only 16 byte stack alignment in 64 bit
mode.
(ffi_closure_helper_DARWIN): Remove nf and ng counters. Move temp
into case.
* src/powerpc/aix_closure.S: Maintain 16 byte stack alignment.
Allocate result area between params and FPRs.
2009-11-30 David Edelsohn <edelsohn@gnu.org>
PR target/35484

View File

@ -104,47 +104,34 @@ ffi_call_AIX:
.llong .ffi_call_AIX, TOC[tc0], 0
.csect .text[PR]
.ffi_call_AIX:
mr r12,r8 // We only need r12 until the call, so it doesn't have to be saved...
/* Save the old stack pointer as AP. */
mr r8,r1
/* Allocate the stack space we need. */
stdux r1,r1,r4
/* Save registers we use. */
mflr r9
mflr r0
std r28,-32(r8)
std r29,-24(r8)
std r30,-16(r8)
std r31, -8(r8)
std r28,-32(r1)
std r29,-24(r1)
std r30,-16(r1)
std r31, -8(r1)
std r9, 16(r8)
std r2, 40(r1)
std r0, 16(r1)
mr r28, r1 /* our AP. */
stdux r1, r1, r4
/* Save arguments over call... */
mr r31,r5 /* flags, */
mr r30,r6 /* rvalue, */
mr r29,r7 /* function address, */
mr r28,r8 /* our AP. */
mr r31, r5 /* flags, */
mr r30, r6 /* rvalue, */
mr r29, r7 /* function address. */
std r2, 40(r1)
/* Call ffi_prep_args. */
mr r4,r1
li r9,0
ld r2,8(r12)
ld r12,0(r12)
mtctr r12 // r12 holds address of _ffi_prep_args
bctrl
ld r2,40(r1)
mr r4, r1
bl .ffi_prep_args
/* Now do the call. */
ld r12,0(r29)
ld r0, 0(r29)
ld r2, 8(r29)
/* Set up cr1 with bits 4-7 of the flags. */
mtcrf 0x40,r31
std r2,40(r1)
mtctr r12
ld r2,8(r29)
mtcrf 0x40, r31
mtctr r0
/* Load all those argument registers. */
// We have set up a nice stack frame, just load it into registers.
ld r3, 40+(1*8)(r1)
@ -180,33 +167,33 @@ L1:
L2:
/* Make the call. */
bctrl
ld r2,40(r1)
ld r2, 40(r1)
/* Now, deal with the return value. */
mtcrf 0x01,r31
mtcrf 0x01, r31
bt 30,L(done_return_value)
bt 29,L(fp_return_value)
std r3,0(r30)
bf 28,L(done_return_value)
std r4,4(r30)
bt 30, L(done_return_value)
bt 29, L(fp_return_value)
std r3, 0(r30)
/* Fall through... */
L(done_return_value):
/* Restore the registers we used and return. */
ld r9,16(r28)
ld r31,-8(r28)
mtlr r9
ld r30,-16(r28)
ld r29,-24(r28)
ld r28,-32(r28)
ld r1,0(r1)
mr r1, r28
ld r0, 16(r28)
ld r28,-32(r1)
mtlr r0
ld r29,-24(r1)
ld r30,-16(r1)
ld r31,-8(r1)
blr
L(fp_return_value):
bf 28,L(float_return_value)
stfd f1,0(r30)
bf 31,L(done_return_value)
stfd f2,8(r30)
b L(done_return_value)
L(float_return_value):
stfs f1,0(r30)

View File

@ -105,11 +105,12 @@ ffi_closure_ASM:
/* 48 Bytes (Linkage Area) */
/* 64 Bytes (params) */
/* 16 Bytes (result) */
/* 104 Bytes (13*8 from FPR) */
/* 32 Bytes (result) */
/* 248 Bytes */
/* 8 Bytes (alignment) */
/* 240 Bytes */
stdu r1,-248(r1) /* skip over caller save area
stdu r1,-240(r1) /* skip over caller save area
keep stack aligned to 16 */
/* we want to build up an area for the parameters passed */
@ -117,42 +118,42 @@ ffi_closure_ASM:
/* we store gpr 3 to gpr 10 (aligned to 4)
in the parents outgoing area */
std r3, (304+0*8)(r1)
std r4, (304+1*8)(r1)
std r5, (304+2*8)(r1)
std r6, (304+3*8)(r1)
std r7, (304+4*8)(r1)
std r8, (304+5*8)(r1)
std r9, (304+6*8)(r1)
std r10, (304+7*8)(r1)
std r3, 288+(0*8)(r1)
std r4, 288+(1*8)(r1)
std r5, 288+(2*8)(r1)
std r6, 288+(3*8)(r1)
std r7, 288+(4*8)(r1)
std r8, 288+(5*8)(r1)
std r9, 288+(6*8)(r1)
std r10, 288+(7*8)(r1)
/* next save fpr 1 to fpr 13 (aligned to 8) */
stfd f1, (112+0*8)(r1)
stfd f2, (112+1*8)(r1)
stfd f3, (112+2*8)(r1)
stfd f4, (112+3*8)(r1)
stfd f5, (112+4*8)(r1)
stfd f6, (112+5*8)(r1)
stfd f7, (112+6*8)(r1)
stfd f8, (112+7*8)(r1)
stfd f9, (112+8*8)(r1)
stfd f10, (112+9*8)(r1)
stfd f11, (112+10*8)(r1)
stfd f12, (112+11*8)(r1)
stfd f13, (112+12*8)(r1)
stfd f1, 128+(0*8)(r1)
stfd f2, 128+(1*8)(r1)
stfd f3, 128+(2*8)(r1)
stfd f4, 128+(3*8)(r1)
stfd f5, 128+(4*8)(r1)
stfd f6, 128+(5*8)(r1)
stfd f7, 128+(6*8)(r1)
stfd f8, 128+(7*8)(r1)
stfd f9, 128+(8*8)(r1)
stfd f10, 128+(9*8)(r1)
stfd f11, 128+(10*8)(r1)
stfd f12, 128+(11*8)(r1)
stfd f13, 128+(12*8)(r1)
/* set up registers for the routine that actually does the work */
/* get the context pointer from the trampoline */
mr r3,r11
/* now load up the pointer to the result storage */
addi r4,r1,216
addi r4,r1,112
/* now load up the pointer to the saved gpr registers */
addi r5,r1,304
addi r5,r1,288
/* now load up the pointer to the saved fpr registers */
addi r6,r1,112
addi r6,r1,128
/* make the call */
bl .ffi_closure_helper_DARWIN
@ -164,7 +165,7 @@ ffi_closure_ASM:
/* look up the proper starting point in table */
/* by using return type as offset */
addi r5,r1,216 /* get pointer to results area */
addi r5,r1,112 /* get pointer to results area */
ld r4,LC..60(2) /* get address of jump table */
sldi r3,r3,2 /* now multiply return type by 4 */
lwzx r3,r4,r3 /* get the contents of that table value */
@ -243,7 +244,7 @@ L..58:
/* case void / done */
L..44:
addi r1,r1,248 /* restore stack pointer */
addi r1,r1,240 /* restore stack pointer */
ld r0,16(r1) /* get return address */
mtlr r0 /* reset link register */
blr

View File

@ -132,11 +132,7 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack)
*(double *)next_arg = double_tmp;
else
*fpr_base++ = double_tmp;
#ifdef POWERPC64
next_arg++;
#else
next_arg += 2;
#endif
fparg_count++;
FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
break;
@ -147,7 +143,11 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack)
*(double *)next_arg = double_tmp;
else
*fpr_base++ = double_tmp;
#ifdef POWERPC64
next_arg++;
#else
next_arg += 2;
#endif
fparg_count++;
FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
break;
@ -157,7 +157,7 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack)
case FFI_TYPE_LONGDOUBLE:
#ifdef POWERPC64
if (fparg_count < NUM_FPR_ARG_REGISTERS)
*((long double *) fpr_base)++ = *(long double *) *p_argv;
*(long double *) fpr_base++ = *(long double *) *p_argv;
else
*(long double *) next_arg = *(long double *) *p_argv;
next_arg += 2;
@ -238,9 +238,12 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack)
break;
case FFI_TYPE_INT:
case FFI_TYPE_UINT32:
case FFI_TYPE_SINT32:
gprvalue = *(unsigned *)*p_argv;
gprvalue = *(signed int *) *p_argv;
goto putgpr;
case FFI_TYPE_UINT32:
gprvalue = *(unsigned int *) *p_argv;
putgpr:
*next_arg++ = gprvalue;
break;
@ -457,11 +460,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
bytes += NUM_GPR_ARG_REGISTERS * sizeof(long);
/* The stack space allocated needs to be a multiple of 16 bytes. */
#ifdef POWERPC64
bytes = (bytes + 31) & -0x1F;
#else
bytes = (bytes + 15) & ~0xF;
#endif
cif->flags = flags;
cif->bytes = bytes;
@ -690,25 +689,19 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
void ** avalue;
ffi_type ** arg_types;
long i, avn;
long nf; /* number of floating registers already used. */
long ng; /* number of general registers already used. */
ffi_cif * cif;
double temp;
ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS;
unsigned size_al;
cif = closure->cif;
avalue = alloca(cif->nargs * sizeof(void *));
nf = 0;
ng = 0;
/* Copy the caller's structure return value address so that the closure
returns the data directly to the caller. */
if (cif->rtype->type == FFI_TYPE_STRUCT)
{
rvalue = (void *) *pgr;
pgr++;
ng++;
}
i = 0;
@ -727,7 +720,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
#else
avalue[i] = (char *) pgr + 3;
#endif
ng++;
pgr++;
break;
@ -738,7 +730,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
#else
avalue[i] = (char *) pgr + 2;
#endif
ng++;
pgr++;
break;
@ -750,7 +741,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
case FFI_TYPE_POINTER:
avalue[i] = pgr;
#endif
ng++;
pgr++;
break;
@ -763,7 +753,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
avalue[i] = (void *) pgr + 8 - size_al;
else
avalue[i] = (void *) pgr;
ng += (size_al + 7) / 8;
pgr += (size_al + 7) / 8;
#else
/* Structures that match the basic modes (QI 1 byte, HI 2 bytes,
@ -777,7 +766,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
avalue[i] = (void*) pgr + 4 - size_al;
else
avalue[i] = (void*) pgr;
ng += (size_al + 3) / 4;
pgr += (size_al + 3) / 4;
#endif
break;
@ -787,13 +775,11 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
#ifdef POWERPC64
case FFI_TYPE_POINTER:
avalue[i] = pgr;
ng++;
pgr++;
break;
#else
/* Long long ints are passed in two gpr's. */
avalue[i] = pgr;
ng += 2;
pgr += 2;
break;
#endif
@ -801,10 +787,10 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
case FFI_TYPE_FLOAT:
/* A float value consumes a GPR.
There are 13 64bit floating point registers. */
if (nf < NUM_FPR_ARG_REGISTERS)
if (pfr < end_pfr)
{
temp = pfr->d;
pfr->f = (float)temp;
double temp = pfr->d;
pfr->f = (float) temp;
avalue[i] = pfr;
pfr++;
}
@ -812,15 +798,13 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
{
avalue[i] = pgr;
}
nf++;
ng++;
pgr++;
break;
case FFI_TYPE_DOUBLE:
/* A double value consumes two GPRs.
There are 13 64bit floating point registers. */
if (nf < NUM_FPR_ARG_REGISTERS)
if (pfr < end_pfr)
{
avalue[i] = pfr;
pfr++;
@ -829,12 +813,9 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
{
avalue[i] = pgr;
}
nf++;
#ifdef POWERPC64
ng++;
pgr++;
#else
ng += 2;
pgr += 2;
#endif
break;
@ -843,22 +824,25 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
case FFI_TYPE_LONGDOUBLE:
#ifdef POWERPC64
if (nf < NUM_FPR_ARG_REGISTERS)
if (pfr + 1 < end_pfr)
{
avalue[i] = pfr;
pfr += 2;
}
else
{
if (pfr < end_pfr)
{
*pgr = *(unsigned long *) pfr;
pfr++;
}
avalue[i] = pgr;
}
nf += 2;
ng += 2;
pgr += 2;
#else /* POWERPC64 */
/* A long double value consumes four GPRs and two FPRs.
There are 13 64bit floating point registers. */
if (nf < NUM_FPR_ARG_REGISTERS - 1)
if (pfr + 1 < end_pfr)
{
avalue[i] = pfr;
pfr += 2;
@ -866,7 +850,7 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
/* Here we have the situation where one part of the long double
is stored in fpr13 and the other part is already on the stack.
We use a union to pass the long double to avalue[i]. */
else if (nf == NUM_FPR_ARG_REGISTERS - 1)
else if (pfr + 1 == end_pfr)
{
union ldu temp_ld;
memcpy (&temp_ld.lb[0], pfr, sizeof(ldbits));
@ -877,8 +861,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
{
avalue[i] = pgr;
}
nf += 2;
ng += 4;
pgr += 4;
#endif /* POWERPC64 */
break;