aix.S (ffi_call_AIX): Convert to more standard register usage.

* src/powerpc/aix.S (ffi_call_AIX): Convert to more standard register usage. Call ffi_prep_args directly. Add long double return value support. * src/powerpc/ffi_darwin.c (ffi_prep_args): Double arg increment applies to FFI_TYPE_DOUBLE. Correct fpr_base increment typo. Separate FFI_TYPE_SINT32 and FFI_TYPE_UINT32 cases. (ffi_prep_cif_machdep): Only 16 byte stack alignment in 64 bit mode. (ffi_closure_helper_DARWIN): Remove nf and ng counters. Move temp into case. * src/powerpc/aix_closure.S: Maintain 16 byte stack alignment. Allocate result area between params and FPRs. From-SVN: r154892
2009-12-02 04:21:08 +00:00 · 2009-12-02 04:21:08 +00:00 · 6d518d3b72
parent 5e87bf4a14
commit 6d518d3b72
4 changed files with 100 additions and 115 deletions
--- a/libffi/ChangeLog
+++ b/libffi/ChangeLog
@ -1,3 +1,18 @@
+2009-11-30  David Edelsohn  <edelsohn@gnu.org>
+
+	* src/powerpc/aix.S (ffi_call_AIX): Convert to more standard
+	register usage.  Call ffi_prep_args directly.  Add long double
+	return value support.
+	* src/powerpc/ffi_darwin.c (ffi_prep_args): Double arg increment
+	applies to FFI_TYPE_DOUBLE.  Correct fpr_base increment typo.
+	Separate FFI_TYPE_SINT32 and FFI_TYPE_UINT32 cases.
+	(ffi_prep_cif_machdep): Only 16 byte stack alignment in 64 bit
+	mode.
+	(ffi_closure_helper_DARWIN): Remove nf and ng counters.  Move temp
+	into case.
+	* src/powerpc/aix_closure.S: Maintain 16 byte stack alignment.
+	Allocate result area between params and FPRs.
+
 2009-11-30  David Edelsohn  <edelsohn@gnu.org>

 	PR target/35484
--- a/libffi/src/powerpc/aix.S
+++ b/libffi/src/powerpc/aix.S
@ -104,47 +104,34 @@ ffi_call_AIX:
 	.llong .ffi_call_AIX, TOC[tc0], 0
 	.csect .text[PR]
 .ffi_call_AIX:
-	mr	r12,r8 // We only need r12 until the call, so it doesn't have to be saved...
-	/* Save the old stack pointer as AP.  */
-	mr	r8,r1
-
-	/* Allocate the stack space we need.  */
-	stdux	r1,r1,r4
-
 	/* Save registers we use.  */
-	mflr	r9
+	mflr	r0

-	std	r28,-32(r8)
-	std	r29,-24(r8)
-	std	r30,-16(r8)
-	std	r31, -8(r8)
+	std	r28,-32(r1)
+	std	r29,-24(r1)
+	std	r30,-16(r1)
+	std	r31, -8(r1)

-	std	r9, 16(r8)
-	std	r2, 40(r1)
+	std	r0, 16(r1)
+	mr	r28, r1		/* our AP.  */
+	stdux	r1, r1, r4

 	/* Save arguments over call...  */
-	mr	r31,r5	/* flags, */
-	mr	r30,r6	/* rvalue, */
-	mr	r29,r7	/* function address, */
-	mr	r28,r8	/* our AP. */
+	mr	r31, r5	/* flags, */
+	mr	r30, r6	/* rvalue, */
+	mr	r29, r7	/* function address.  */
+	std	r2, 40(r1)

 	/* Call ffi_prep_args.  */
-	mr	r4,r1
-	li	r9,0
-
-	ld	r2,8(r12)
-	ld	r12,0(r12)
-	mtctr	r12 // r12 holds address of _ffi_prep_args
-	bctrl
-	ld	r2,40(r1)
+	mr	r4, r1
+	bl .ffi_prep_args

 	/* Now do the call.  */
-	ld	r12,0(r29)
+	ld	r0, 0(r29)
+	ld	r2, 8(r29)
 	/* Set up cr1 with bits 4-7 of the flags.  */
-	mtcrf	0x40,r31
-	std	r2,40(r1)
-	mtctr	r12
-	ld	r2,8(r29)
+	mtcrf	0x40, r31
+	mtctr	r0
 	/* Load all those argument registers.  */
 	// We have set up a nice stack frame, just load it into registers.
 	ld	r3, 40+(1*8)(r1)
@ -180,33 +167,33 @@ L1:
 L2:
 	/* Make the call.  */
 	bctrl
-	ld	r2,40(r1)
+	ld	r2, 40(r1)

 	/* Now, deal with the return value.  */
-	mtcrf	0x01,r31
+	mtcrf	0x01, r31

-	bt	30,L(done_return_value)
-	bt	29,L(fp_return_value)
-	std	r3,0(r30)
-	bf	28,L(done_return_value)
-	std	r4,4(r30)
+	bt	30, L(done_return_value)
+	bt	29, L(fp_return_value)
+	std	r3, 0(r30)

 	/* Fall through...  */

 L(done_return_value):
 	/* Restore the registers we used and return.  */
-	ld	r9,16(r28)
-	ld	r31,-8(r28)
-	mtlr	r9
-	ld	r30,-16(r28)
-	ld	r29,-24(r28)
-	ld	r28,-32(r28)
-	ld	r1,0(r1)
+	mr	r1, r28
+	ld	r0, 16(r28)
+	ld	r28,-32(r1)
+	mtlr	r0
+	ld	r29,-24(r1)
+	ld	r30,-16(r1)
+	ld	r31,-8(r1)
 	blr

 L(fp_return_value):
 	bf	28,L(float_return_value)
 	stfd	f1,0(r30)
+	bf	31,L(done_return_value)
+	stfd	f2,8(r30)
 	b	L(done_return_value)
 L(float_return_value):
 	stfs	f1,0(r30)
--- a/libffi/src/powerpc/aix_closure.S
+++ b/libffi/src/powerpc/aix_closure.S
@ -105,11 +105,12 @@ ffi_closure_ASM:

 	/* 48  Bytes (Linkage Area) */
 	/* 64  Bytes (params) */
+	/* 16  Bytes (result) */
 	/* 104 Bytes (13*8 from FPR) */
-	/* 32  Bytes (result) */
-	/* 248 Bytes */
+	/* 8   Bytes (alignment) */
+	/* 240 Bytes */

-	stdu r1,-248(r1)	/* skip over caller save area
+	stdu r1,-240(r1)	/* skip over caller save area
 				   keep stack aligned to 16  */

 /* we want to build up an area for the parameters passed */
@ -117,42 +118,42 @@ ffi_closure_ASM:

 	/* we store gpr 3 to gpr 10 (aligned to 4)
 	in the parents outgoing area  */
-	std   r3, (304+0*8)(r1)
-	std   r4, (304+1*8)(r1)
-	std   r5, (304+2*8)(r1)
-	std   r6, (304+3*8)(r1)
-	std   r7, (304+4*8)(r1)
-	std   r8, (304+5*8)(r1)
-	std   r9, (304+6*8)(r1)
-	std   r10, (304+7*8)(r1)
+	std   r3, 288+(0*8)(r1)
+	std   r4, 288+(1*8)(r1)
+	std   r5, 288+(2*8)(r1)
+	std   r6, 288+(3*8)(r1)
+	std   r7, 288+(4*8)(r1)
+	std   r8, 288+(5*8)(r1)
+	std   r9, 288+(6*8)(r1)
+	std   r10, 288+(7*8)(r1)

 	/* next save fpr 1 to fpr 13 (aligned to 8) */
-	stfd  f1, (112+0*8)(r1)
-	stfd  f2, (112+1*8)(r1)
-	stfd  f3, (112+2*8)(r1)
-	stfd  f4, (112+3*8)(r1)
-	stfd  f5, (112+4*8)(r1)
-	stfd  f6, (112+5*8)(r1)
-	stfd  f7, (112+6*8)(r1)
-	stfd  f8, (112+7*8)(r1)
-	stfd  f9, (112+8*8)(r1)
-	stfd  f10, (112+9*8)(r1)
-	stfd  f11, (112+10*8)(r1)
-	stfd  f12, (112+11*8)(r1)
-	stfd  f13, (112+12*8)(r1)
+	stfd  f1, 128+(0*8)(r1)
+	stfd  f2, 128+(1*8)(r1)
+	stfd  f3, 128+(2*8)(r1)
+	stfd  f4, 128+(3*8)(r1)
+	stfd  f5, 128+(4*8)(r1)
+	stfd  f6, 128+(5*8)(r1)
+	stfd  f7, 128+(6*8)(r1)
+	stfd  f8, 128+(7*8)(r1)
+	stfd  f9, 128+(8*8)(r1)
+	stfd  f10, 128+(9*8)(r1)
+	stfd  f11, 128+(10*8)(r1)
+	stfd  f12, 128+(11*8)(r1)
+	stfd  f13, 128+(12*8)(r1)

 	/* set up registers for the routine that actually does the work */
 	/* get the context pointer from the trampoline */
 	mr r3,r11

 	/* now load up the pointer to the result storage */
-	addi r4,r1,216
+	addi r4,r1,112

 	/* now load up the pointer to the saved gpr registers */
-	addi r5,r1,304
+	addi r5,r1,288

 	/* now load up the pointer to the saved fpr registers */
-	addi r6,r1,112
+	addi r6,r1,128

 	/* make the call */
 	bl .ffi_closure_helper_DARWIN
@ -164,7 +165,7 @@ ffi_closure_ASM:

 	/* look up the proper starting point in table  */
 	/* by using return type as offset */
-	addi r5,r1,216		/* get pointer to results area */
+	addi r5,r1,112		/* get pointer to results area */
 	ld r4,LC..60(2)		/* get address of jump table */
 	sldi r3,r3,2		/* now multiply return type by 4 */
 	lwzx r3,r4,r3		/* get the contents of that table value */
@ -243,7 +244,7 @@ L..58:

 /* case void / done	 */
 L..44:
-	addi r1,r1,248		/* restore stack pointer */
+	addi r1,r1,240		/* restore stack pointer */
 	ld r0,16(r1)		/* get return address */
 	mtlr r0			/* reset link register */
 	blr
--- a/libffi/src/powerpc/ffi_darwin.c
+++ b/libffi/src/powerpc/ffi_darwin.c
@ -132,11 +132,7 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack)
 	    *(double *)next_arg = double_tmp;
 	  else
 	    *fpr_base++ = double_tmp;
-#ifdef POWERPC64
 	  next_arg++;
-#else
-	  next_arg += 2;
-#endif
 	  fparg_count++;
 	  FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
 	  break;
@ -147,7 +143,11 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack)
 	    *(double *)next_arg = double_tmp;
 	  else
 	    *fpr_base++ = double_tmp;
+#ifdef POWERPC64
+	  next_arg++;
+#else
 	  next_arg += 2;
+#endif
 	  fparg_count++;
 	  FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
 	  break;
@ -157,7 +157,7 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack)
 	case FFI_TYPE_LONGDOUBLE:
 #ifdef POWERPC64
 	  if (fparg_count < NUM_FPR_ARG_REGISTERS)
-	    *((long double *) fpr_base)++ = *(long double *) *p_argv;
+	    *(long double *) fpr_base++ = *(long double *) *p_argv;
 	  else
 	    *(long double *) next_arg = *(long double *) *p_argv;
 	  next_arg += 2;
@ -238,9 +238,12 @@ void ffi_prep_args(extended_cif *ecif, unsigned long *const stack)
 	  break;

 	case FFI_TYPE_INT:
-	case FFI_TYPE_UINT32:
 	case FFI_TYPE_SINT32:
-	  gprvalue = *(unsigned *)*p_argv;
+	  gprvalue = *(signed int *) *p_argv;
+	  goto putgpr;
+
+	case FFI_TYPE_UINT32:
+	  gprvalue = *(unsigned int *) *p_argv;
 	putgpr:
 	  *next_arg++ = gprvalue;
 	  break;
@ -457,11 +460,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
    bytes += NUM_GPR_ARG_REGISTERS * sizeof(long);

  /* The stack space allocated needs to be a multiple of 16 bytes.  */
-#ifdef POWERPC64
-  bytes = (bytes + 31) & -0x1F;
-#else
  bytes = (bytes + 15) & ~0xF;
-#endif

  cif->flags = flags;
  cif->bytes = bytes;
@ -690,25 +689,19 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
  void **          avalue;
  ffi_type **      arg_types;
  long             i, avn;
-  long             nf;   /* number of floating registers already used.  */
-  long             ng;   /* number of general registers already used.  */
  ffi_cif *        cif;
-  double           temp;
+  ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS;
  unsigned         size_al;

  cif = closure->cif;
  avalue = alloca(cif->nargs * sizeof(void *));

-  nf = 0;
-  ng = 0;
-
  /* Copy the caller's structure return value address so that the closure
     returns the data directly to the caller.  */
  if (cif->rtype->type == FFI_TYPE_STRUCT)
    {
      rvalue = (void *) *pgr;
      pgr++;
-      ng++;
    }

  i = 0;
@ -727,7 +720,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 #else
 	  avalue[i] = (char *) pgr + 3;
 #endif
-	  ng++;
 	  pgr++;
 	  break;

@ -738,7 +730,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 #else
 	  avalue[i] = (char *) pgr + 2;
 #endif
-	  ng++;
 	  pgr++;
 	  break;

@ -750,7 +741,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 	case FFI_TYPE_POINTER:
 	  avalue[i] = pgr;
 #endif
-	  ng++;
 	  pgr++;
 	  break;

@ -763,7 +753,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 	    avalue[i] = (void *) pgr + 8 - size_al;
 	  else
 	    avalue[i] = (void *) pgr;
-	  ng += (size_al + 7) / 8;
 	  pgr += (size_al + 7) / 8;
 #else
 	  /* Structures that match the basic modes (QI 1 byte, HI 2 bytes,
@ -777,7 +766,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 	    avalue[i] = (void*) pgr + 4 - size_al;
 	  else
 	    avalue[i] = (void*) pgr;
-	  ng += (size_al + 3) / 4;
 	  pgr += (size_al + 3) / 4;
 #endif
 	  break;
@ -787,13 +775,11 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 #ifdef POWERPC64
 	case FFI_TYPE_POINTER:
 	  avalue[i] = pgr;
-	  ng++;
 	  pgr++;
 	  break;
 #else
 	  /* Long long ints are passed in two gpr's.  */
 	  avalue[i] = pgr;
-	  ng += 2;
 	  pgr += 2;
 	  break;
 #endif
@ -801,10 +787,10 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 	case FFI_TYPE_FLOAT:
 	  /* A float value consumes a GPR.
 	     There are 13 64bit floating point registers.  */
-	  if (nf < NUM_FPR_ARG_REGISTERS)
+	  if (pfr < end_pfr)
 	    {
-	      temp = pfr->d;
-	      pfr->f = (float)temp;
+	      double temp = pfr->d;
+	      pfr->f = (float) temp;
 	      avalue[i] = pfr;
 	      pfr++;
 	    }
@ -812,15 +798,13 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 	    {
 	      avalue[i] = pgr;
 	    }
-	  nf++;
-	  ng++;
 	  pgr++;
 	  break;

 	case FFI_TYPE_DOUBLE:
 	  /* A double value consumes two GPRs.
 	     There are 13 64bit floating point registers.  */
-	  if (nf < NUM_FPR_ARG_REGISTERS)
+	  if (pfr < end_pfr)
 	    {
 	      avalue[i] = pfr;
 	      pfr++;
@ -829,12 +813,9 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 	    {
 	      avalue[i] = pgr;
 	    }
-	  nf++;
 #ifdef POWERPC64
-	  ng++;
 	  pgr++;
 #else
-	  ng += 2;
 	  pgr += 2;
 #endif
 	  break;
@ -843,22 +824,25 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,

 	case FFI_TYPE_LONGDOUBLE:
 #ifdef POWERPC64
-	  if (nf < NUM_FPR_ARG_REGISTERS)
+	  if (pfr + 1 < end_pfr)
 	    {
 	      avalue[i] = pfr;
 	      pfr += 2;
 	    }
 	  else
 	    {
+	      if (pfr < end_pfr)
+		{
+		  *pgr = *(unsigned long *) pfr;
+		  pfr++;
+		}
 	      avalue[i] = pgr;
 	    }
-	  nf += 2;
-	  ng += 2;
 	  pgr += 2;
 #else  /* POWERPC64 */
 	  /* A long double value consumes four GPRs and two FPRs.
 	     There are 13 64bit floating point registers.  */
-	  if (nf < NUM_FPR_ARG_REGISTERS - 1)
+	  if (pfr + 1 < end_pfr)
 	    {
 	      avalue[i] = pfr;
 	      pfr += 2;
@ -866,7 +850,7 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 	  /* Here we have the situation where one part of the long double
 	     is stored in fpr13 and the other part is already on the stack.
 	     We use a union to pass the long double to avalue[i].  */
-	  else if (nf == NUM_FPR_ARG_REGISTERS - 1)
+	  else if (pfr + 1 == end_pfr)
 	    {
 	      union ldu temp_ld;
 	      memcpy (&temp_ld.lb[0], pfr, sizeof(ldbits));
@ -877,8 +861,6 @@ int ffi_closure_helper_DARWIN (ffi_closure* closure, void * rvalue,
 	    {
 	      avalue[i] = pgr;
 	    }
-	  nf += 2;
-	  ng += 4;
 	  pgr += 4;
 #endif  /* POWERPC64 */
 	  break;