From 29fe0479819d1212b18f7e577fb3ebcfad3d4ce5 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@redhat.com>
Date: Fri, 8 Dec 2000 19:41:15 +0000
Subject: [PATCH] libffi closures for Alpha

From-SVN: r38136
---
 libffi/ChangeLog        |  16 ++
 libffi/include/ffi.h.in |   8 +-
 libffi/src/alpha/ffi.c  | 320 +++++++++++++++++++++++-----------------
 libffi/src/alpha/osf.S  | 194 ++++++++++++++++--------
 4 files changed, 337 insertions(+), 201 deletions(-)

diff --git a/libffi/ChangeLog b/libffi/ChangeLog
index c409192e83d..71dad419174 100644
--- a/libffi/ChangeLog
+++ b/libffi/ChangeLog
@@ -1,3 +1,19 @@
+2000-12-07 Dec  8 11:23:29 2000  Richard Henderson  <rth@redhat.com>
+
+	* src/raw_api.c (ffi_translate_args): Fix typo.
+	(ffi_prep_closure): Likewise.
+
+	* include/ffi.h.in [ALPHA]: Define FFI_CLOSURES and
+	FFI_TRAMPOLINE_SIZE.
+	* src/alpha/ffi.c (ffi_prep_cif_machdep): Adjust minimal
+	cif->bytes for new ffi_call_osf implementation.
+	(ffi_prep_args): Absorb into ...
+	(ffi_call): ... here.  Do all stack allocation here and
+	avoid a callback function.
+	(ffi_prep_closure, ffi_closure_osf_inner): New.
+	* src/alpha/osf.S (ffi_call_osf): Reimplement with no callback.
+	(ffi_closure_osf): New.
+
 2000-09-10  Alexandre Oliva  <aoliva@redhat.com>
 
 	* config.guess, config.sub, install-sh: Removed.
diff --git a/libffi/include/ffi.h.in b/libffi/include/ffi.h.in
index 6be7e23c727..0d8e70788c9 100644
--- a/libffi/include/ffi.h.in
+++ b/libffi/include/ffi.h.in
@@ -1,7 +1,7 @@
 /* -----------------------------------------------------------------*-C-*-
    libffi @VERSION@ - Copyright (c) 1996-1999  Cygnus Solutions
 
-   $Id: ffi.h.in,v 1.4 2000/02/25 19:13:44 tromey Exp $
+   $Id: ffi.h.in,v 1.5 2000/04/17 02:15:31 green Exp $
 
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
@@ -368,6 +368,12 @@ struct ffi_ia64_trampoline_struct {
 };
 #define FFI_NATIVE_RAW_API 0
 
+#elif defined(ALPHA)
+
+#define FFI_CLOSURES 1
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_NATIVE_RAW_API 0
+
 #else 
 
 #define FFI_CLOSURES 0
diff --git a/libffi/src/alpha/ffi.c b/libffi/src/alpha/ffi.c
index e3d807ab196..84ee8494e3d 100644
--- a/libffi/src/alpha/ffi.c
+++ b/libffi/src/alpha/ffi.c
@@ -30,131 +30,25 @@
 
 #include <stdlib.h>
 
-/* ffi_prep_args is called by the assembly routine once stack space
-   has been allocated for the function's arguments */
+extern void ffi_call_osf(void *, unsigned long, unsigned, void *, void (*)());
+extern void ffi_closure_osf(void);
 
-static void
-ffi_prep_args(char *stack, extended_cif *ecif, int bytes, int flags)
-{
-  register long i, avn;
-  register void **p_argv;
-  register char *argp;
-  register ffi_type **p_arg;
 
-  /* To streamline things in the assembly code, we always allocate 12
-     words for loading up the int and fp argument registers.  The layout
-     is as when processing varargs: the 6 fp args, the 6 int args, then
-     the incoming stack.  ARGP points to the first int slot.  */
-  argp = stack + 6 * SIZEOF_ARG;
-  memset (stack, 0, 12 * SIZEOF_ARG);
-
-  if ( ecif->cif->rtype->type == FFI_TYPE_STRUCT )
-    {
-      *(void **) argp = ecif->rvalue;
-      argp += sizeof(void *);
-    }
-
-  i = 0;
-  avn = ecif->cif->nargs;
-  p_arg = ecif->cif->arg_types;
-  p_argv = ecif->avalue;
-  while (i < avn)
-    {
-      size_t z = ALIGN((*p_arg)->size, SIZEOF_ARG);
-
-      switch ((*p_arg)->type)
-	{
-	case FFI_TYPE_SINT8:
-	  *(SINT64 *) argp = *(SINT8 *)(* p_argv);
-	  break;
-		  
-	case FFI_TYPE_UINT8:
-	  *(UINT64 *) argp = *(UINT8 *)(* p_argv);
-	  break;
-		  
-	case FFI_TYPE_SINT16:
-	  *(SINT64 *) argp = *(SINT16 *)(* p_argv);
-	  break;
-		  
-	case FFI_TYPE_UINT16:
-	  *(UINT64 *) argp = *(UINT16 *)(* p_argv);
-	  break;
-		  
-	case FFI_TYPE_SINT32:
-	  *(SINT64 *) argp = *(SINT32 *)(* p_argv);
-	  break;
-		  
-	case FFI_TYPE_UINT32:
-	  *(UINT64 *) argp = *(UINT32 *)(* p_argv);
-	  break;
-
-	case FFI_TYPE_SINT64:
-	case FFI_TYPE_UINT64:
-	case FFI_TYPE_POINTER:
-	  *(UINT64 *) argp = *(UINT64 *)(* p_argv);
-	  break;
-
-	case FFI_TYPE_FLOAT:
-	  if (argp - stack < 12 * SIZEOF_ARG)
-	    {
-	      /* Note the conversion -- all the fp regs are loaded as
-		 doubles.  The in-register format is the same.  */
-	      *(double *) (argp - 6 * SIZEOF_ARG) = *(float *)(* p_argv);
-	    }
-	  else
-	    *(float *) argp = *(float *)(* p_argv);
-	  break;
-
-	case FFI_TYPE_DOUBLE:
-	  if (argp - stack < 12 * SIZEOF_ARG)
-	    *(double *) (argp - 6 * SIZEOF_ARG) = *(double *)(* p_argv);
-	  else
-	    *(double *) argp = *(double *)(* p_argv);
-	  break;
-
-	case FFI_TYPE_STRUCT:
-	  memcpy(argp, *p_argv, (*p_arg)->size);
-	  break;
-
-	default:
-	  FFI_ASSERT(0);
-	}
-
-      argp += z;
-      i++, p_arg++, p_argv++;
-    }
-}
-
-/* Perform machine dependent cif processing */
 ffi_status
 ffi_prep_cif_machdep(ffi_cif *cif)
 {
-  /* Adjust cif->bytes. to include 12 words for the temporary register
-     argument loading area.  This will be removed before the call.  */
-
-  cif->bytes += 6*SIZEOF_ARG;
-  if (cif->bytes < 12*SIZEOF_ARG)
-    cif->bytes = 12*SIZEOF_ARG;
-
-  /* The stack must be double word aligned, so round bytes up
-     appropriately. */
-
-  cif->bytes = ALIGN(cif->bytes, 2*sizeof(void*));
+  /* Adjust cif->bytes to represent a minimum 6 words for the temporary
+     register argument loading area.  */
+  if (cif->bytes < 6*SIZEOF_ARG)
+    cif->bytes = 6*SIZEOF_ARG;
 
   /* Set the return type flag */
   switch (cif->rtype->type)
     {
-    case FFI_TYPE_VOID:
     case FFI_TYPE_STRUCT:
-      cif->flags = cif->rtype->type;
-      break;
-
     case FFI_TYPE_FLOAT:
-      cif->flags = FFI_TYPE_FLOAT;
-      break;
-
     case FFI_TYPE_DOUBLE:
-      cif->flags = FFI_TYPE_DOUBLE;
+      cif->flags = cif->rtype->type;
       break;
 
     default:
@@ -165,35 +59,191 @@ ffi_prep_cif_machdep(ffi_cif *cif)
   return FFI_OK;
 }
 
-extern int ffi_call_osf(void (*)(char *, extended_cif *, int, int), 
-			extended_cif *, unsigned, 
-			unsigned, unsigned *, void (*)());
-
 void
 ffi_call(ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
 {
-  extended_cif ecif;
-
-  ecif.cif = cif;
-  ecif.avalue = avalue;
+  unsigned long *stack, *argp;
+  long i, avn;
+  ffi_type **arg_types;
   
+  FFI_ASSERT (cif->abi == FFI_OSF);
+
   /* If the return value is a struct and we don't have a return
      value address then we need to make one.  */
-  
   if (rvalue == NULL && cif->rtype->type == FFI_TYPE_STRUCT)
-    ecif.rvalue = alloca(cif->rtype->size);
-  else
-    ecif.rvalue = rvalue;
-    
-  switch (cif->abi) 
-    {
-    case FFI_OSF:
-      ffi_call_osf(ffi_prep_args, &ecif, cif->bytes, 
-		   cif->flags, rvalue, fn);
-      break;
+    rvalue = alloca(cif->rtype->size);
 
-    default:
-      FFI_ASSERT(0);
-      break;
+  /* Allocate the space for the arguments, plus 4 words of temp
+     space for ffi_call_osf.  */
+  argp = stack = alloca(cif->bytes + 4*SIZEOF_ARG);
+
+  if (cif->flags == FFI_TYPE_STRUCT)
+    *(void **) argp++ = rvalue;
+
+  i = 0;
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  while (i < avn)
+    {
+      switch ((*arg_types)->type)
+	{
+	case FFI_TYPE_SINT8:
+	  *(SINT64 *) argp = *(SINT8 *)(* avalue);
+	  break;
+		  
+	case FFI_TYPE_UINT8:
+	  *(SINT64 *) argp = *(UINT8 *)(* avalue);
+	  break;
+		  
+	case FFI_TYPE_SINT16:
+	  *(SINT64 *) argp = *(SINT16 *)(* avalue);
+	  break;
+		  
+	case FFI_TYPE_UINT16:
+	  *(SINT64 *) argp = *(UINT16 *)(* avalue);
+	  break;
+		  
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	  /* Note that unsigned 32-bit quantities are sign extended.  */
+	  *(SINT64 *) argp = *(SINT32 *)(* avalue);
+	  break;
+		  
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_POINTER:
+	  *(UINT64 *) argp = *(UINT64 *)(* avalue);
+	  break;
+
+	case FFI_TYPE_FLOAT:
+	  if (argp - stack < 6)
+	    {
+	      /* Note the conversion -- all the fp regs are loaded as
+		 doubles.  The in-register format is the same.  */
+	      *(double *) argp = *(float *)(* avalue);
+	    }
+	  else
+	    *(float *) argp = *(float *)(* avalue);
+	  break;
+
+	case FFI_TYPE_DOUBLE:
+	  *(double *) argp = *(double *)(* avalue);
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  memcpy(argp, *avalue, (*arg_types)->size);
+	  break;
+
+	default:
+	  FFI_ASSERT(0);
+	}
+
+      argp += ALIGN((*arg_types)->size, SIZEOF_ARG) / SIZEOF_ARG;
+      i++, arg_types++, avalue++;
     }
+
+  ffi_call_osf(stack, cif->bytes, cif->flags, rvalue, fn);
+}
+
+
+ffi_status
+ffi_prep_closure (ffi_closure* closure,
+		  ffi_cif* cif,
+		  void (*fun)(ffi_cif*, void*, void**, void*),
+		  void *user_data)
+{
+  unsigned int *tramp;
+
+  FFI_ASSERT (cif->abi == FFI_OSF);
+
+  tramp = (unsigned int *) &closure->tramp[0];
+  tramp[0] = 0x47fb0401;	/* mov $27,$1		*/
+  tramp[1] = 0xa77b0010;	/* ldq $27,16($27)	*/
+  tramp[2] = 0x6bfb0000;	/* jmp $31,($27),0	*/
+  tramp[3] = 0x47ff041f;	/* nop			*/
+  *(void **) &tramp[4] = ffi_closure_osf;
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  /* Flush the Icache.  */
+  asm volatile ("imb" : : : "memory");
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_osf_inner(ffi_closure *closure, void *rvalue, unsigned long *argp)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn, argn;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+
+  argn = 0;
+
+  /* Copy the caller's structure return address to that the closure
+     returns the data directly to the caller.  */
+  if (cif->flags == FFI_TYPE_STRUCT)
+    {
+      rvalue = (void *) argp[0];
+      argn = 1;
+    }
+
+  i = 0;
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+  
+  /* Grab the addresses of the arguments from the stack frame.  */
+  while (i < avn)
+    {
+      switch ((*arg_types)->type)
+	{
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_POINTER:
+	case FFI_TYPE_STRUCT:
+	  *avalue = &argp[argn];
+	  break;
+
+	case FFI_TYPE_FLOAT:
+	  if (argn < 6)
+	    {
+	      /* Floats coming from registers need conversion from double
+	         back to float format.  */
+	      *(float *)&argp[argn - 6] = *(double *)&argp[argn - 6];
+	      *avalue = &argp[argn - 6];
+	    }
+	  else
+	    *avalue = &argp[argn];
+	  break;
+
+	case FFI_TYPE_DOUBLE:
+	  *avalue = &argp[argn - (argn < 6 ? 6 : 0)];
+	  break;
+
+	default:
+	  FFI_ASSERT(0);
+	}
+
+      argn += ALIGN((*arg_types)->size, SIZEOF_ARG) / SIZEOF_ARG;
+      i++, arg_types++, avalue++;
+    }
+
+  /* Invoke the closure.  */
+  (closure->fun) (cif, rvalue, avalue, closure->user_data);
+
+  /* Tell ffi_closure_osf what register to put the return value in.  */
+  return cif->flags;
 }
diff --git a/libffi/src/alpha/osf.S b/libffi/src/alpha/osf.S
index 2078683cfb3..9ed37cbb00e 100644
--- a/libffi/src/alpha/osf.S
+++ b/libffi/src/alpha/osf.S
@@ -28,91 +28,155 @@
 #define LIBFFI_ASM	
 #include <ffi.h>
 
-#define callback $16
-#define ecifp	 $17
-#define bytes	 $18
-#define flags	 $19
-#define raddr    $20
-#define fn       $21
-
-#define flags_ofs	16
-#define raddr_ofs	24
-#define fn_ofs		32
-
-#define SIZEOF_FRAME	(6*8)
-
 	.text
-	.align	4
+
+/* ffi_call_osf (void *args, unsigned long bytes, unsigned flags,
+		 void *raddr, void (*fnaddr)());
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	3
 	.globl	ffi_call_osf
 	.ent	ffi_call_osf
-	
 ffi_call_osf:
-	lda	$30, -SIZEOF_FRAME($30)
-	stq	$26, 0($30)
-	stq	$15, 8($30)
-	stq	flags, flags_ofs($30)
-	stq	raddr, raddr_ofs($30)
-	stq	fn, fn_ofs($30)
-	mov	$30, $15
-	.frame	$15, SIZEOF_FRAME, $26, 0
-        .mask   0x4008000, -SIZEOF_FRAME
+	.frame	$15, 32, $26, 0
+	.mask   0x4008000, -32
+	addq	$16,$17,$1
+	mov	$16, $30
+	stq	$26, 0($1)
+	stq	$15, 8($1)
+	stq	$18, 16($1)
+	mov	$1, $15
 	.prologue 0
 
-	mov	callback, $27		# mov callback into place
-	subq	$30, bytes, $30		# allocate stack space
-	
-	# Call ffi_prep_args; ecif, bytes and flags are already in place.
-	mov	$30, $16		# push stack arg
-	jsr	$26, ($27), 0
+	stq	$19, 24($1)
+	mov	$20, $27
 
 	# Load up all of the (potential) argument registers.
+	ldq	$16, 0($30)
 	ldt	$f16, 0($30)
 	ldt	$f17, 8($30)
+	ldq	$17, 8($30)
 	ldt	$f18, 16($30)
+	ldq	$18, 16($30)
 	ldt	$f19, 24($30)
+	ldq	$19, 24($30)
 	ldt	$f20, 32($30)
+	ldq	$20, 32($30)
 	ldt	$f21, 40($30)
-	ldq	$16, 48($30)
-	ldq	$17, 56($30)
-	ldq	$18, 64($30)
-	ldq	$19, 72($30)
-	ldq	$20, 80($30)
-	ldq	$21, 88($30)
+	ldq	$21, 40($30)
+
+	# Deallocate the register argument area.
+	lda	$30, 48($30)
 
-	# Get rid of the arg reg temp space and call the function.
-	ldq	$27, fn_ofs($15)
-	lda	$30, 12*8($30)
 	jsr	$26, ($27), 0
+	ldgp	$29, 0($26)
 
 	# If the return value pointer is NULL, assume no return value.
-	ldq	raddr, raddr_ofs($15)
-	beq	raddr, $noretval
-
-	ldq	flags, flags_ofs($15)
-	cmpeq	flags, FFI_TYPE_INT, $1
-	bne	$1, $retint
-	cmpeq	flags, FFI_TYPE_FLOAT, $2
-	bne	$2, $retfloat
-	cmpeq	flags, FFI_TYPE_DOUBLE, $3
-	bne	$3, $retdouble
-	br	$retstruct
-	
-	.align 3
-$retint:
-	stq	$0, 0(raddr)
-	br	$noretval
-$retfloat:
-	sts	$f0, 0(raddr)
-	br	$noretval
-$retdouble:
-	stt	$f0, 0(raddr)
-
-$retstruct:
-$noretval:
-	mov	$15, $30
+	ldq	$19, 24($15)
+	ldq	$18, 16($15)
 	ldq	$26, 0($15)
+	beq	$19, $noretval
+
+	# Store the return value out in the proper type.
+	cmpeq	$18, FFI_TYPE_INT, $1
+	bne	$1, $retint
+	cmpeq	$18, FFI_TYPE_FLOAT, $2
+	bne	$2, $retfloat
+	cmpeq	$18, FFI_TYPE_DOUBLE, $3
+	bne	$3, $retdouble
+
+$noretval:
+	ldq	$15, 8($15)
+	ret
+
+$retint:
+	stq	$0, 0($19)
+	nop
+	ldq	$15, 8($15)
+	ret
+
+$retfloat:
+	sts	$f0, 0($19)
+	nop
+	ldq	$15, 8($15)
+	ret
+
+$retdouble:
+	stt	$f0, 0($19)
+	nop
 	ldq	$15, 8($15)
-	lda	$30, SIZEOF_FRAME($30)
 	ret
 
 	.end	ffi_call_osf
+
+/* ffi_closure_osf(...)
+
+   Receives the closure argument in $1.   */
+
+	.align	3
+	.globl	ffi_closure_osf
+	.ent	ffi_closure_osf
+ffi_closure_osf:
+	.frame	$30, 16*8, $26, 0
+	.mask	0x4000000, -14*8
+	ldgp	$29, 0($27)
+	subq	$30, 14*8, $30
+	stq	$26, 0($30)
+	.prologue 1
+
+	# Store all of the potential argument registers in va_list format.
+	stt	$f16, 4*8($30)
+	stt	$f17, 5*8($30)
+	stt	$f18, 6*8($30)
+	stt	$f19, 7*8($30)
+	stt	$f20, 8*8($30)
+	stt	$f21, 9*8($30)
+	stq	$16, 10*8($30)
+	stq	$17, 11*8($30)
+	stq	$18, 12*8($30)
+	stq	$19, 13*8($30)
+	stq	$20, 14*8($30)
+	stq	$21, 15*8($30)
+
+	# Call ffi_closure_osf_inner to do the bulk of the work.
+	mov	$1, $16
+	lda	$17, 2*8($30)
+	lda	$18, 10*8($30)
+	jsr	$26, ffi_closure_osf_inner
+	ldgp	$29, 0($26)
+	ldq	$26, 0($30)
+
+	# Load up the return value in the proper type.
+	cmpeq	$0, FFI_TYPE_INT, $1
+	bne	$1, $loadint
+	cmpeq	$0, FFI_TYPE_FLOAT, $2
+	bne	$2, $loadfloat
+	cmpeq	$18, FFI_TYPE_DOUBLE, $3
+	bne	$3, $loaddouble
+
+	addq	$30, 16*8, $30
+	ret
+
+	.align 3
+$loadint:
+	ldq	$0, 16($30)
+	nop
+	addq	$30, 16*8, $30
+	ret
+
+$loadfloat:
+	lds	$f0, 16($30)
+	nop
+	addq	$30, 16*8, $30
+	ret
+
+$loaddouble:
+	ldt	$f0, 16($30)
+	nop
+	addq	$30, 16*8, $30
+	ret
+
+	.end	ffi_closure_osf