From 6d518d3b72692a2874122c903e5994db27bf5500 Mon Sep 17 00:00:00 2001 From: David Edelsohn Date: Wed, 2 Dec 2009 04:21:08 +0000 Subject: [PATCH] aix.S (ffi_call_AIX): Convert to more standard register usage. * src/powerpc/aix.S (ffi_call_AIX): Convert to more standard register usage. Call ffi_prep_args directly. Add long double return value support. * src/powerpc/ffi_darwin.c (ffi_prep_args): Double arg increment applies to FFI_TYPE_DOUBLE. Correct fpr_base increment typo. Separate FFI_TYPE_SINT32 and FFI_TYPE_UINT32 cases. (ffi_prep_cif_machdep): Only 16 byte stack alignment in 64 bit mode. (ffi_closure_helper_DARWIN): Remove nf and ng counters. Move temp into case. * src/powerpc/aix_closure.S: Maintain 16 byte stack alignment. Allocate result area between params and FPRs. From-SVN: r154892 --- libffi/ChangeLog | 15 +++++++ libffi/src/powerpc/aix.S | 77 +++++++++++++------------------- libffi/src/powerpc/aix_closure.S | 59 ++++++++++++------------ libffi/src/powerpc/ffi_darwin.c | 64 ++++++++++---------------- 4 files changed, 100 insertions(+), 115 deletions(-) diff --git a/libffi/ChangeLog b/libffi/ChangeLog index c93d0b73574..c0adaa9226b 100644 --- a/libffi/ChangeLog +++ b/libffi/ChangeLog @@ -1,3 +1,18 @@ +2009-11-30 David Edelsohn + + * src/powerpc/aix.S (ffi_call_AIX): Convert to more standard + register usage. Call ffi_prep_args directly. Add long double + return value support. + * src/powerpc/ffi_darwin.c (ffi_prep_args): Double arg increment + applies to FFI_TYPE_DOUBLE. Correct fpr_base increment typo. + Separate FFI_TYPE_SINT32 and FFI_TYPE_UINT32 cases. + (ffi_prep_cif_machdep): Only 16 byte stack alignment in 64 bit + mode. + (ffi_closure_helper_DARWIN): Remove nf and ng counters. Move temp + into case. + * src/powerpc/aix_closure.S: Maintain 16 byte stack alignment. + Allocate result area between params and FPRs. + 2009-11-30 David Edelsohn PR target/35484 diff --git a/libffi/src/powerpc/aix.S b/libffi/src/powerpc/aix.S index 7b73999f2a7..5b477cad5e8 100644 --- a/libffi/src/powerpc/aix.S +++ b/libffi/src/powerpc/aix.S @@ -104,47 +104,34 @@ ffi_call_AIX: .llong .ffi_call_AIX, TOC[tc0], 0 .csect .text[PR] .ffi_call_AIX: - mr r12,r8 // We only need r12 until the call, so it doesn't have to be saved... - /* Save the old stack pointer as AP. */ - mr r8,r1 - - /* Allocate the stack space we need. */ - stdux r1,r1,r4 - /* Save registers we use. */ - mflr r9 + mflr r0 - std r28,-32(r8) - std r29,-24(r8) - std r30,-16(r8) - std r31, -8(r8) + std r28,-32(r1) + std r29,-24(r1) + std r30,-16(r1) + std r31, -8(r1) - std r9, 16(r8) - std r2, 40(r1) + std r0, 16(r1) + mr r28, r1 /* our AP. */ + stdux r1, r1, r4 /* Save arguments over call... */ - mr r31,r5 /* flags, */ - mr r30,r6 /* rvalue, */ - mr r29,r7 /* function address, */ - mr r28,r8 /* our AP. */ + mr r31, r5 /* flags, */ + mr r30, r6 /* rvalue, */ + mr r29, r7 /* function address. */ + std r2, 40(r1) /* Call ffi_prep_args. */ - mr r4,r1 - li r9,0 - - ld r2,8(r12) - ld r12,0(r12) - mtctr r12 // r12 holds address of _ffi_prep_args - bctrl - ld r2,40(r1) + mr r4, r1 + bl .ffi_prep_args /* Now do the call. */ - ld r12,0(r29) + ld r0, 0(r29) + ld r2, 8(r29) /* Set up cr1 with bits 4-7 of the flags. */ - mtcrf 0x40,r31 - std r2,40(r1) - mtctr r12 - ld r2,8(r29) + mtcrf 0x40, r31 + mtctr r0 /* Load all those argument registers. */ // We have set up a nice stack frame, just load it into registers. ld r3, 40+(1*8)(r1) @@ -180,33 +167,33 @@ L1: L2: /* Make the call. */ bctrl - ld r2,40(r1) + ld r2, 40(r1) /* Now, deal with the return value. */ - mtcrf 0x01,r31 + mtcrf 0x01, r31 - bt 30,L(done_return_value) - bt 29,L(fp_return_value) - std r3,0(r30) - bf 28,L(done_return_value) - std r4,4(r30) + bt 30, L(done_return_value) + bt 29, L(fp_return_value) + std r3, 0(r30) /* Fall through... */ L(done_return_value): /* Restore the registers we used and return. */ - ld r9,16(r28) - ld r31,-8(r28) - mtlr r9 - ld r30,-16(r28) - ld r29,-24(r28) - ld r28,-32(r28) - ld r1,0(r1) + mr r1, r28 + ld r0, 16(r28) + ld r28,-32(r1) + mtlr r0 + ld r29,-24(r1) + ld r30,-16(r1) + ld r31,-8(r1) blr L(fp_return_value): bf 28,L(float_return_value) stfd f1,0(r30) + bf 31,L(done_return_value) + stfd f2,8(r30) b L(done_return_value) L(float_return_value): stfs f1,0(r30) diff --git a/libffi/src/powerpc/aix_closure.S b/libffi/src/powerpc/aix_closure.S index e1a60b46832..70456188092 100644 --- a/libffi/src/powerpc/aix_closure.S +++ b/libffi/src/powerpc/aix_closure.S @@ -105,11 +105,12 @@ ffi_closure_ASM: /* 48 Bytes (Linkage Area) */ /* 64 Bytes (params) */ + /* 16 Bytes (result) */ /* 104 Bytes (13*8 from FPR) */ - /* 32 Bytes (result) */ - /* 248 Bytes */ + /* 8 Bytes (alignment) */ + /* 240 Bytes */ - stdu r1,-248(r1) /* skip over caller save area + stdu r1,-240(r1) /* skip over caller save area keep stack aligned to 16 */ /* we want to build up an area for the parameters passed */ @@ -117,42 +118,42 @@ ffi_closure_ASM: /* we store gpr 3 to gpr 10 (aligned to 4) in the parents outgoing area */ - std r3, (304+0*8)(r1) - std r4, (304+1*8)(r1) - std r5, (304+2*8)(r1) - std r6, (304+3*8)(r1) - std r7, (304+4*8)(r1) - std r8, (304+5*8)(r1) - std r9, (304+6*8)(r1) - std r10, (304+7*8)(r1) + std r3, 288+(0*8)(r1) + std r4, 288+(1*8)(r1) + std r5, 288+(2*8)(r1) + std r6, 288+(3*8)(r1) + std r7, 288+(4*8)(r1) + std r8, 288+(5*8)(r1) + std r9, 288+(6*8)(r1) + std r10, 288+(7*8)(r1) /* next save fpr 1 to fpr 13 (aligned to 8) */ - stfd f1, (112+0*8)(r1) - stfd f2, (112+1*8)(r1) - stfd f3, (112+2*8)(r1) - stfd f4, (112+3*8)(r1) - stfd f5, (112+4*8)(r1) - stfd f6, (112+5*8)(r1) - stfd f7, (112+6*8)(r1) - stfd f8, (112+7*8)(r1) - stfd f9, (112+8*8)(r1) - stfd f10, (112+9*8)(r1) - stfd f11, (112+10*8)(r1) - stfd f12, (112+11*8)(r1) - stfd f13, (112+12*8)(r1) + stfd f1, 128+(0*8)(r1) + stfd f2, 128+(1*8)(r1) + stfd f3, 128+(2*8)(r1) + stfd f4, 128+(3*8)(r1) + stfd f5, 128+(4*8)(r1) + stfd f6, 128+(5*8)(r1) + stfd f7, 128+(6*8)(r1) + stfd f8, 128+(7*8)(r1) + stfd f9, 128+(8*8)(r1) + stfd f10, 128+(9*8)(r1) + stfd f11, 128+(10*8)(r1) + stfd f12, 128+(11*8)(r1) + stfd f13, 128+(12*8)(r1) /* set up registers for the routine that actually does the work */ /* get the context pointer from the trampoline */ mr r3,r11 /* now load up the pointer to the result storage */ - addi r4,r1,216 + addi r4,r1,112 /* now load up the pointer to the saved gpr registers */ - addi r5,r1,304 + addi r5,r1,288 /* now load up the pointer to the saved fpr registers */ - addi r6,r1,112 + addi r6,r1,128 /* make the call */ bl .ffi_closure_helper_DARWIN @@ -164,7 +165,7 @@ ffi_closure_ASM: /* look up the proper starting point in table */ /* by using return type as offset */ - addi r5,r1,216 /* get pointer to results area */ + addi r5,r1,112 /* get pointer to results area */ ld r4,LC..60(2) /* get address of jump table */ sldi r3,r3,2 /* now multiply return type by 4 */ lwzx r3,r4,r3 /* get the contents of that table value */ @@ -243,7 +244,7 @@ L..58: /* case void / done */ L..44: - addi r1,r1,248 /* restore stack pointer */ + addi r1,r1,240 /* restore stack pointer */ ld r0,16(r1) /* get return address */ mtlr r0 /* reset link register */ blr diff --git a/libffi/src/powerpc/ffi_darwin.c b/libffi/src/powerpc/ffi_darwin.c index fd2a3710434..53dbdb2b244 100644 --- a/libffi/src/powerpc/ffi_darwin.c +++ b/libffi/src/powerpc/ffi_darwin.c @@ -132,11 +132,7 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack) *(double *)next_arg = double_tmp; else *fpr_base++ = double_tmp; -#ifdef POWERPC64 next_arg++; -#else - next_arg += 2; -#endif fparg_count++; FFI_ASSERT(flags & FLAG_FP_ARGUMENTS); break; @@ -147,7 +143,11 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack) *(double *)next_arg = double_tmp; else *fpr_base++ = double_tmp; +#ifdef POWERPC64 + next_arg++; +#else next_arg += 2; +#endif fparg_count++; FFI_ASSERT(flags & FLAG_FP_ARGUMENTS); break; @@ -157,7 +157,7 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack) case FFI_TYPE_LONGDOUBLE: #ifdef POWERPC64 if (fparg_count < NUM_FPR_ARG_REGISTERS) - *((long double *) fpr_base)++ = *(long double *) *p_argv; + *(long double *) fpr_base++ = *(long double *) *p_argv; else *(long double *) next_arg = *(long double *) *p_argv; next_arg += 2; @@ -238,9 +238,12 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack) break; case FFI_TYPE_INT: - case FFI_TYPE_UINT32: case FFI_TYPE_SINT32: - gprvalue = *(unsigned *)*p_argv; + gprvalue = *(signed int *) *p_argv; + goto putgpr; + + case FFI_TYPE_UINT32: + gprvalue = *(unsigned int *) *p_argv; putgpr: *next_arg++ = gprvalue; break; @@ -457,11 +460,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif) bytes += NUM_GPR_ARG_REGISTERS * sizeof(long); /* The stack space allocated needs to be a multiple of 16 bytes. */ -#ifdef POWERPC64 - bytes = (bytes + 31) & -0x1F; -#else bytes = (bytes + 15) & ~0xF; -#endif cif->flags = flags; cif->bytes = bytes; @@ -690,25 +689,19 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, void ** avalue; ffi_type ** arg_types; long i, avn; - long nf; /* number of floating registers already used. */ - long ng; /* number of general registers already used. */ ffi_cif * cif; - double temp; + ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS; unsigned size_al; cif = closure->cif; avalue = alloca(cif->nargs * sizeof(void *)); - nf = 0; - ng = 0; - /* Copy the caller's structure return value address so that the closure returns the data directly to the caller. */ if (cif->rtype->type == FFI_TYPE_STRUCT) { rvalue = (void *) *pgr; pgr++; - ng++; } i = 0; @@ -727,7 +720,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, #else avalue[i] = (char *) pgr + 3; #endif - ng++; pgr++; break; @@ -738,7 +730,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, #else avalue[i] = (char *) pgr + 2; #endif - ng++; pgr++; break; @@ -750,7 +741,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, case FFI_TYPE_POINTER: avalue[i] = pgr; #endif - ng++; pgr++; break; @@ -763,7 +753,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, avalue[i] = (void *) pgr + 8 - size_al; else avalue[i] = (void *) pgr; - ng += (size_al + 7) / 8; pgr += (size_al + 7) / 8; #else /* Structures that match the basic modes (QI 1 byte, HI 2 bytes, @@ -777,7 +766,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, avalue[i] = (void*) pgr + 4 - size_al; else avalue[i] = (void*) pgr; - ng += (size_al + 3) / 4; pgr += (size_al + 3) / 4; #endif break; @@ -787,13 +775,11 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, #ifdef POWERPC64 case FFI_TYPE_POINTER: avalue[i] = pgr; - ng++; pgr++; break; #else /* Long long ints are passed in two gpr's. */ avalue[i] = pgr; - ng += 2; pgr += 2; break; #endif @@ -801,10 +787,10 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, case FFI_TYPE_FLOAT: /* A float value consumes a GPR. There are 13 64bit floating point registers. */ - if (nf < NUM_FPR_ARG_REGISTERS) + if (pfr < end_pfr) { - temp = pfr->d; - pfr->f = (float)temp; + double temp = pfr->d; + pfr->f = (float) temp; avalue[i] = pfr; pfr++; } @@ -812,15 +798,13 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, { avalue[i] = pgr; } - nf++; - ng++; pgr++; break; case FFI_TYPE_DOUBLE: /* A double value consumes two GPRs. There are 13 64bit floating point registers. */ - if (nf < NUM_FPR_ARG_REGISTERS) + if (pfr < end_pfr) { avalue[i] = pfr; pfr++; @@ -829,12 +813,9 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, { avalue[i] = pgr; } - nf++; #ifdef POWERPC64 - ng++; pgr++; #else - ng += 2; pgr += 2; #endif break; @@ -843,22 +824,25 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, case FFI_TYPE_LONGDOUBLE: #ifdef POWERPC64 - if (nf < NUM_FPR_ARG_REGISTERS) + if (pfr + 1 < end_pfr) { avalue[i] = pfr; pfr += 2; } else { + if (pfr < end_pfr) + { + *pgr = *(unsigned long *) pfr; + pfr++; + } avalue[i] = pgr; } - nf += 2; - ng += 2; pgr += 2; #else /* POWERPC64 */ /* A long double value consumes four GPRs and two FPRs. There are 13 64bit floating point registers. */ - if (nf < NUM_FPR_ARG_REGISTERS - 1) + if (pfr + 1 < end_pfr) { avalue[i] = pfr; pfr += 2; @@ -866,7 +850,7 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, /* Here we have the situation where one part of the long double is stored in fpr13 and the other part is already on the stack. We use a union to pass the long double to avalue[i]. */ - else if (nf == NUM_FPR_ARG_REGISTERS - 1) + else if (pfr + 1 == end_pfr) { union ldu temp_ld; memcpy (&temp_ld.lb[0], pfr, sizeof(ldbits)); @@ -877,8 +861,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue, { avalue[i] = pgr; } - nf += 2; - ng += 4; pgr += 4; #endif /* POWERPC64 */ break;