From 29fe0479819d1212b18f7e577fb3ebcfad3d4ce5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 8 Dec 2000 19:41:15 +0000 Subject: [PATCH] libffi closures for Alpha From-SVN: r38136 --- libffi/ChangeLog | 16 ++ libffi/include/ffi.h.in | 8 +- libffi/src/alpha/ffi.c | 320 +++++++++++++++++++++++----------------- libffi/src/alpha/osf.S | 194 ++++++++++++++++-------- 4 files changed, 337 insertions(+), 201 deletions(-) diff --git a/libffi/ChangeLog b/libffi/ChangeLog index c409192e83d..71dad419174 100644 --- a/libffi/ChangeLog +++ b/libffi/ChangeLog @@ -1,3 +1,19 @@ +2000-12-07 Dec 8 11:23:29 2000 Richard Henderson + + * src/raw_api.c (ffi_translate_args): Fix typo. + (ffi_prep_closure): Likewise. + + * include/ffi.h.in [ALPHA]: Define FFI_CLOSURES and + FFI_TRAMPOLINE_SIZE. + * src/alpha/ffi.c (ffi_prep_cif_machdep): Adjust minimal + cif->bytes for new ffi_call_osf implementation. + (ffi_prep_args): Absorb into ... + (ffi_call): ... here. Do all stack allocation here and + avoid a callback function. + (ffi_prep_closure, ffi_closure_osf_inner): New. + * src/alpha/osf.S (ffi_call_osf): Reimplement with no callback. + (ffi_closure_osf): New. + 2000-09-10 Alexandre Oliva * config.guess, config.sub, install-sh: Removed. diff --git a/libffi/include/ffi.h.in b/libffi/include/ffi.h.in index 6be7e23c727..0d8e70788c9 100644 --- a/libffi/include/ffi.h.in +++ b/libffi/include/ffi.h.in @@ -1,7 +1,7 @@ /* -----------------------------------------------------------------*-C-*- libffi @VERSION@ - Copyright (c) 1996-1999 Cygnus Solutions - $Id: ffi.h.in,v 1.4 2000/02/25 19:13:44 tromey Exp $ + $Id: ffi.h.in,v 1.5 2000/04/17 02:15:31 green Exp $ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -368,6 +368,12 @@ struct ffi_ia64_trampoline_struct { }; #define FFI_NATIVE_RAW_API 0 +#elif defined(ALPHA) + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 24 +#define FFI_NATIVE_RAW_API 0 + #else #define FFI_CLOSURES 0 diff --git a/libffi/src/alpha/ffi.c b/libffi/src/alpha/ffi.c index e3d807ab196..84ee8494e3d 100644 --- a/libffi/src/alpha/ffi.c +++ b/libffi/src/alpha/ffi.c @@ -30,131 +30,25 @@ #include -/* ffi_prep_args is called by the assembly routine once stack space - has been allocated for the function's arguments */ +extern void ffi_call_osf(void *, unsigned long, unsigned, void *, void (*)()); +extern void ffi_closure_osf(void); -static void -ffi_prep_args(char *stack, extended_cif *ecif, int bytes, int flags) -{ - register long i, avn; - register void **p_argv; - register char *argp; - register ffi_type **p_arg; - /* To streamline things in the assembly code, we always allocate 12 - words for loading up the int and fp argument registers. The layout - is as when processing varargs: the 6 fp args, the 6 int args, then - the incoming stack. ARGP points to the first int slot. */ - argp = stack + 6 * SIZEOF_ARG; - memset (stack, 0, 12 * SIZEOF_ARG); - - if ( ecif->cif->rtype->type == FFI_TYPE_STRUCT ) - { - *(void **) argp = ecif->rvalue; - argp += sizeof(void *); - } - - i = 0; - avn = ecif->cif->nargs; - p_arg = ecif->cif->arg_types; - p_argv = ecif->avalue; - while (i < avn) - { - size_t z = ALIGN((*p_arg)->size, SIZEOF_ARG); - - switch ((*p_arg)->type) - { - case FFI_TYPE_SINT8: - *(SINT64 *) argp = *(SINT8 *)(* p_argv); - break; - - case FFI_TYPE_UINT8: - *(UINT64 *) argp = *(UINT8 *)(* p_argv); - break; - - case FFI_TYPE_SINT16: - *(SINT64 *) argp = *(SINT16 *)(* p_argv); - break; - - case FFI_TYPE_UINT16: - *(UINT64 *) argp = *(UINT16 *)(* p_argv); - break; - - case FFI_TYPE_SINT32: - *(SINT64 *) argp = *(SINT32 *)(* p_argv); - break; - - case FFI_TYPE_UINT32: - *(UINT64 *) argp = *(UINT32 *)(* p_argv); - break; - - case FFI_TYPE_SINT64: - case FFI_TYPE_UINT64: - case FFI_TYPE_POINTER: - *(UINT64 *) argp = *(UINT64 *)(* p_argv); - break; - - case FFI_TYPE_FLOAT: - if (argp - stack < 12 * SIZEOF_ARG) - { - /* Note the conversion -- all the fp regs are loaded as - doubles. The in-register format is the same. */ - *(double *) (argp - 6 * SIZEOF_ARG) = *(float *)(* p_argv); - } - else - *(float *) argp = *(float *)(* p_argv); - break; - - case FFI_TYPE_DOUBLE: - if (argp - stack < 12 * SIZEOF_ARG) - *(double *) (argp - 6 * SIZEOF_ARG) = *(double *)(* p_argv); - else - *(double *) argp = *(double *)(* p_argv); - break; - - case FFI_TYPE_STRUCT: - memcpy(argp, *p_argv, (*p_arg)->size); - break; - - default: - FFI_ASSERT(0); - } - - argp += z; - i++, p_arg++, p_argv++; - } -} - -/* Perform machine dependent cif processing */ ffi_status ffi_prep_cif_machdep(ffi_cif *cif) { - /* Adjust cif->bytes. to include 12 words for the temporary register - argument loading area. This will be removed before the call. */ - - cif->bytes += 6*SIZEOF_ARG; - if (cif->bytes < 12*SIZEOF_ARG) - cif->bytes = 12*SIZEOF_ARG; - - /* The stack must be double word aligned, so round bytes up - appropriately. */ - - cif->bytes = ALIGN(cif->bytes, 2*sizeof(void*)); + /* Adjust cif->bytes to represent a minimum 6 words for the temporary + register argument loading area. */ + if (cif->bytes < 6*SIZEOF_ARG) + cif->bytes = 6*SIZEOF_ARG; /* Set the return type flag */ switch (cif->rtype->type) { - case FFI_TYPE_VOID: case FFI_TYPE_STRUCT: - cif->flags = cif->rtype->type; - break; - case FFI_TYPE_FLOAT: - cif->flags = FFI_TYPE_FLOAT; - break; - case FFI_TYPE_DOUBLE: - cif->flags = FFI_TYPE_DOUBLE; + cif->flags = cif->rtype->type; break; default: @@ -165,35 +59,191 @@ ffi_prep_cif_machdep(ffi_cif *cif) return FFI_OK; } -extern int ffi_call_osf(void (*)(char *, extended_cif *, int, int), - extended_cif *, unsigned, - unsigned, unsigned *, void (*)()); - void ffi_call(ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue) { - extended_cif ecif; - - ecif.cif = cif; - ecif.avalue = avalue; + unsigned long *stack, *argp; + long i, avn; + ffi_type **arg_types; + FFI_ASSERT (cif->abi == FFI_OSF); + /* If the return value is a struct and we don't have a return value address then we need to make one. */ - if (rvalue == NULL && cif->rtype->type == FFI_TYPE_STRUCT) - ecif.rvalue = alloca(cif->rtype->size); - else - ecif.rvalue = rvalue; - - switch (cif->abi) - { - case FFI_OSF: - ffi_call_osf(ffi_prep_args, &ecif, cif->bytes, - cif->flags, rvalue, fn); - break; + rvalue = alloca(cif->rtype->size); - default: - FFI_ASSERT(0); - break; + /* Allocate the space for the arguments, plus 4 words of temp + space for ffi_call_osf. */ + argp = stack = alloca(cif->bytes + 4*SIZEOF_ARG); + + if (cif->flags == FFI_TYPE_STRUCT) + *(void **) argp++ = rvalue; + + i = 0; + avn = cif->nargs; + arg_types = cif->arg_types; + + while (i < avn) + { + switch ((*arg_types)->type) + { + case FFI_TYPE_SINT8: + *(SINT64 *) argp = *(SINT8 *)(* avalue); + break; + + case FFI_TYPE_UINT8: + *(SINT64 *) argp = *(UINT8 *)(* avalue); + break; + + case FFI_TYPE_SINT16: + *(SINT64 *) argp = *(SINT16 *)(* avalue); + break; + + case FFI_TYPE_UINT16: + *(SINT64 *) argp = *(UINT16 *)(* avalue); + break; + + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + /* Note that unsigned 32-bit quantities are sign extended. */ + *(SINT64 *) argp = *(SINT32 *)(* avalue); + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + *(UINT64 *) argp = *(UINT64 *)(* avalue); + break; + + case FFI_TYPE_FLOAT: + if (argp - stack < 6) + { + /* Note the conversion -- all the fp regs are loaded as + doubles. The in-register format is the same. */ + *(double *) argp = *(float *)(* avalue); + } + else + *(float *) argp = *(float *)(* avalue); + break; + + case FFI_TYPE_DOUBLE: + *(double *) argp = *(double *)(* avalue); + break; + + case FFI_TYPE_STRUCT: + memcpy(argp, *avalue, (*arg_types)->size); + break; + + default: + FFI_ASSERT(0); + } + + argp += ALIGN((*arg_types)->size, SIZEOF_ARG) / SIZEOF_ARG; + i++, arg_types++, avalue++; } + + ffi_call_osf(stack, cif->bytes, cif->flags, rvalue, fn); +} + + +ffi_status +ffi_prep_closure (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data) +{ + unsigned int *tramp; + + FFI_ASSERT (cif->abi == FFI_OSF); + + tramp = (unsigned int *) &closure->tramp[0]; + tramp[0] = 0x47fb0401; /* mov $27,$1 */ + tramp[1] = 0xa77b0010; /* ldq $27,16($27) */ + tramp[2] = 0x6bfb0000; /* jmp $31,($27),0 */ + tramp[3] = 0x47ff041f; /* nop */ + *(void **) &tramp[4] = ffi_closure_osf; + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + /* Flush the Icache. */ + asm volatile ("imb" : : : "memory"); + + return FFI_OK; +} + +int +ffi_closure_osf_inner(ffi_closure *closure, void *rvalue, unsigned long *argp) +{ + ffi_cif *cif; + void **avalue; + ffi_type **arg_types; + long i, avn, argn; + + cif = closure->cif; + avalue = alloca(cif->nargs * sizeof(void *)); + + argn = 0; + + /* Copy the caller's structure return address to that the closure + returns the data directly to the caller. */ + if (cif->flags == FFI_TYPE_STRUCT) + { + rvalue = (void *) argp[0]; + argn = 1; + } + + i = 0; + avn = cif->nargs; + arg_types = cif->arg_types; + + /* Grab the addresses of the arguments from the stack frame. */ + while (i < avn) + { + switch ((*arg_types)->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + case FFI_TYPE_STRUCT: + *avalue = &argp[argn]; + break; + + case FFI_TYPE_FLOAT: + if (argn < 6) + { + /* Floats coming from registers need conversion from double + back to float format. */ + *(float *)&argp[argn - 6] = *(double *)&argp[argn - 6]; + *avalue = &argp[argn - 6]; + } + else + *avalue = &argp[argn]; + break; + + case FFI_TYPE_DOUBLE: + *avalue = &argp[argn - (argn < 6 ? 6 : 0)]; + break; + + default: + FFI_ASSERT(0); + } + + argn += ALIGN((*arg_types)->size, SIZEOF_ARG) / SIZEOF_ARG; + i++, arg_types++, avalue++; + } + + /* Invoke the closure. */ + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + /* Tell ffi_closure_osf what register to put the return value in. */ + return cif->flags; } diff --git a/libffi/src/alpha/osf.S b/libffi/src/alpha/osf.S index 2078683cfb3..9ed37cbb00e 100644 --- a/libffi/src/alpha/osf.S +++ b/libffi/src/alpha/osf.S @@ -28,91 +28,155 @@ #define LIBFFI_ASM #include -#define callback $16 -#define ecifp $17 -#define bytes $18 -#define flags $19 -#define raddr $20 -#define fn $21 - -#define flags_ofs 16 -#define raddr_ofs 24 -#define fn_ofs 32 - -#define SIZEOF_FRAME (6*8) - .text - .align 4 + +/* ffi_call_osf (void *args, unsigned long bytes, unsigned flags, + void *raddr, void (*fnaddr)()); + + Bit o trickiness here -- ARGS+BYTES is the base of the stack frame + for this function. This has been allocated by ffi_call. We also + deallocate some of the stack that has been alloca'd. */ + + .align 3 .globl ffi_call_osf .ent ffi_call_osf - ffi_call_osf: - lda $30, -SIZEOF_FRAME($30) - stq $26, 0($30) - stq $15, 8($30) - stq flags, flags_ofs($30) - stq raddr, raddr_ofs($30) - stq fn, fn_ofs($30) - mov $30, $15 - .frame $15, SIZEOF_FRAME, $26, 0 - .mask 0x4008000, -SIZEOF_FRAME + .frame $15, 32, $26, 0 + .mask 0x4008000, -32 + addq $16,$17,$1 + mov $16, $30 + stq $26, 0($1) + stq $15, 8($1) + stq $18, 16($1) + mov $1, $15 .prologue 0 - mov callback, $27 # mov callback into place - subq $30, bytes, $30 # allocate stack space - - # Call ffi_prep_args; ecif, bytes and flags are already in place. - mov $30, $16 # push stack arg - jsr $26, ($27), 0 + stq $19, 24($1) + mov $20, $27 # Load up all of the (potential) argument registers. + ldq $16, 0($30) ldt $f16, 0($30) ldt $f17, 8($30) + ldq $17, 8($30) ldt $f18, 16($30) + ldq $18, 16($30) ldt $f19, 24($30) + ldq $19, 24($30) ldt $f20, 32($30) + ldq $20, 32($30) ldt $f21, 40($30) - ldq $16, 48($30) - ldq $17, 56($30) - ldq $18, 64($30) - ldq $19, 72($30) - ldq $20, 80($30) - ldq $21, 88($30) + ldq $21, 40($30) + + # Deallocate the register argument area. + lda $30, 48($30) - # Get rid of the arg reg temp space and call the function. - ldq $27, fn_ofs($15) - lda $30, 12*8($30) jsr $26, ($27), 0 + ldgp $29, 0($26) # If the return value pointer is NULL, assume no return value. - ldq raddr, raddr_ofs($15) - beq raddr, $noretval - - ldq flags, flags_ofs($15) - cmpeq flags, FFI_TYPE_INT, $1 - bne $1, $retint - cmpeq flags, FFI_TYPE_FLOAT, $2 - bne $2, $retfloat - cmpeq flags, FFI_TYPE_DOUBLE, $3 - bne $3, $retdouble - br $retstruct - - .align 3 -$retint: - stq $0, 0(raddr) - br $noretval -$retfloat: - sts $f0, 0(raddr) - br $noretval -$retdouble: - stt $f0, 0(raddr) - -$retstruct: -$noretval: - mov $15, $30 + ldq $19, 24($15) + ldq $18, 16($15) ldq $26, 0($15) + beq $19, $noretval + + # Store the return value out in the proper type. + cmpeq $18, FFI_TYPE_INT, $1 + bne $1, $retint + cmpeq $18, FFI_TYPE_FLOAT, $2 + bne $2, $retfloat + cmpeq $18, FFI_TYPE_DOUBLE, $3 + bne $3, $retdouble + +$noretval: + ldq $15, 8($15) + ret + +$retint: + stq $0, 0($19) + nop + ldq $15, 8($15) + ret + +$retfloat: + sts $f0, 0($19) + nop + ldq $15, 8($15) + ret + +$retdouble: + stt $f0, 0($19) + nop ldq $15, 8($15) - lda $30, SIZEOF_FRAME($30) ret .end ffi_call_osf + +/* ffi_closure_osf(...) + + Receives the closure argument in $1. */ + + .align 3 + .globl ffi_closure_osf + .ent ffi_closure_osf +ffi_closure_osf: + .frame $30, 16*8, $26, 0 + .mask 0x4000000, -14*8 + ldgp $29, 0($27) + subq $30, 14*8, $30 + stq $26, 0($30) + .prologue 1 + + # Store all of the potential argument registers in va_list format. + stt $f16, 4*8($30) + stt $f17, 5*8($30) + stt $f18, 6*8($30) + stt $f19, 7*8($30) + stt $f20, 8*8($30) + stt $f21, 9*8($30) + stq $16, 10*8($30) + stq $17, 11*8($30) + stq $18, 12*8($30) + stq $19, 13*8($30) + stq $20, 14*8($30) + stq $21, 15*8($30) + + # Call ffi_closure_osf_inner to do the bulk of the work. + mov $1, $16 + lda $17, 2*8($30) + lda $18, 10*8($30) + jsr $26, ffi_closure_osf_inner + ldgp $29, 0($26) + ldq $26, 0($30) + + # Load up the return value in the proper type. + cmpeq $0, FFI_TYPE_INT, $1 + bne $1, $loadint + cmpeq $0, FFI_TYPE_FLOAT, $2 + bne $2, $loadfloat + cmpeq $18, FFI_TYPE_DOUBLE, $3 + bne $3, $loaddouble + + addq $30, 16*8, $30 + ret + + .align 3 +$loadint: + ldq $0, 16($30) + nop + addq $30, 16*8, $30 + ret + +$loadfloat: + lds $f0, 16($30) + nop + addq $30, 16*8, $30 + ret + +$loaddouble: + ldt $f0, 16($30) + nop + addq $30, 16*8, $30 + ret + + .end ffi_closure_osf