ffi64.c (struct register_args): Rename from stackLayout.
* src/x86/ffi64.c (struct register_args): Rename from stackLayout. (enum x86_64_reg_class): Add X86_64_COMPLEX_X87_CLASS. (merge_classes): Check for it. (SSE_CLASS_P): New. (classify_argument): Pass byte_offset by value; perform all updates inside struct case. (examine_argument): Add classes argument; handle X86_64_COMPLEX_X87_CLASS. (ffi_prep_args): Merge into ... (ffi_call): ... here. Share stack frame with ffi_call_unix64. (ffi_prep_cif_machdep): Setup cif->flags for proper structure return. (ffi_fill_return_value): Remove. (ffi_prep_closure): Remove dead assert. (ffi_closure_unix64_inner): Rename from ffi_closure_UNIX64_inner. Rewrite to use struct register_args instead of va_list. Create flags for handling structure returns. * src/x86/unix64.S: Remove dead strings. (ffi_call_unix64): Rename from ffi_call_UNIX64. Rewrite to share stack frame with ffi_call. Handle structure returns properly. (float2sse, floatfloat2sse, double2sse): Remove. (sse2float, sse2double, sse2floatfloat): Remove. (ffi_closure_unix64): Rename from ffi_closure_UNIX64. Rewrite to handle structure returns properly. From-SVN: r92602
This commit is contained in:
parent
fa54a7a743
commit
1a0f488c32
@ -1,3 +1,29 @@
|
||||
2004-12-25 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* src/x86/ffi64.c (struct register_args): Rename from stackLayout.
|
||||
(enum x86_64_reg_class): Add X86_64_COMPLEX_X87_CLASS.
|
||||
(merge_classes): Check for it.
|
||||
(SSE_CLASS_P): New.
|
||||
(classify_argument): Pass byte_offset by value; perform all updates
|
||||
inside struct case.
|
||||
(examine_argument): Add classes argument; handle
|
||||
X86_64_COMPLEX_X87_CLASS.
|
||||
(ffi_prep_args): Merge into ...
|
||||
(ffi_call): ... here. Share stack frame with ffi_call_unix64.
|
||||
(ffi_prep_cif_machdep): Setup cif->flags for proper structure return.
|
||||
(ffi_fill_return_value): Remove.
|
||||
(ffi_prep_closure): Remove dead assert.
|
||||
(ffi_closure_unix64_inner): Rename from ffi_closure_UNIX64_inner.
|
||||
Rewrite to use struct register_args instead of va_list. Create
|
||||
flags for handling structure returns.
|
||||
* src/x86/unix64.S: Remove dead strings.
|
||||
(ffi_call_unix64): Rename from ffi_call_UNIX64. Rewrite to share
|
||||
stack frame with ffi_call. Handle structure returns properly.
|
||||
(float2sse, floatfloat2sse, double2sse): Remove.
|
||||
(sse2float, sse2double, sse2floatfloat): Remove.
|
||||
(ffi_closure_unix64): Rename from ffi_closure_UNIX64. Rewrite
|
||||
to handle structure returns properly.
|
||||
|
||||
2004-12-08 David Edelsohn <edelsohn@gnu.org>
|
||||
|
||||
* Makefile.am (AM_MAKEFLAGS): Remove duplicate LIBCFLAGS and
|
||||
|
@ -29,22 +29,20 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
/* ffi_prep_args is called by the assembly routine once stack space
|
||||
has been allocated for the function's arguments */
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
#define MAX_GPR_REGS 6
|
||||
#define MAX_SSE_REGS 8
|
||||
typedef struct
|
||||
|
||||
struct register_args
|
||||
{
|
||||
/* Registers for argument passing. */
|
||||
long gpr[MAX_GPR_REGS];
|
||||
UINT64 gpr[MAX_GPR_REGS];
|
||||
__int128_t sse[MAX_SSE_REGS];
|
||||
};
|
||||
|
||||
/* Stack space for arguments. */
|
||||
char argspace[0];
|
||||
} stackLayout;
|
||||
extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
|
||||
void *raddr, void (*fnaddr)());
|
||||
|
||||
/* All reference to register classes here is identical to the code in
|
||||
gcc/config/i386/i386.c. Do *not* change one without the other. */
|
||||
@ -55,8 +53,7 @@ typedef struct
|
||||
use SF or DFmode move instead of DImode to avoid reformating penalties.
|
||||
|
||||
Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
|
||||
whenever possible (upper half does contain padding).
|
||||
*/
|
||||
whenever possible (upper half does contain padding). */
|
||||
enum x86_64_reg_class
|
||||
{
|
||||
X86_64_NO_CLASS,
|
||||
@ -68,11 +65,14 @@ enum x86_64_reg_class
|
||||
X86_64_SSEUP_CLASS,
|
||||
X86_64_X87_CLASS,
|
||||
X86_64_X87UP_CLASS,
|
||||
X86_64_COMPLEX_X87_CLASS,
|
||||
X86_64_MEMORY_CLASS
|
||||
};
|
||||
|
||||
#define MAX_CLASSES 4
|
||||
|
||||
#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
|
||||
|
||||
/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
|
||||
of this code is to classify each 8bytes of incoming argument by the register
|
||||
class and assign registers accordingly. */
|
||||
@ -106,9 +106,14 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
|
||||
|| class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
|
||||
return X86_64_INTEGER_CLASS;
|
||||
|
||||
/* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
|
||||
if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
|
||||
|| class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
|
||||
/* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
|
||||
MEMORY is used. */
|
||||
if (class1 == X86_64_X87_CLASS
|
||||
|| class1 == X86_64_X87UP_CLASS
|
||||
|| class1 == X86_64_COMPLEX_X87_CLASS
|
||||
|| class2 == X86_64_X87_CLASS
|
||||
|| class2 == X86_64_X87UP_CLASS
|
||||
|| class2 == X86_64_COMPLEX_X87_CLASS)
|
||||
return X86_64_MEMORY_CLASS;
|
||||
|
||||
/* Rule #6: Otherwise class SSE is used. */
|
||||
@ -125,11 +130,8 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
|
||||
*/
|
||||
static int
|
||||
classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
|
||||
int *byte_offset)
|
||||
size_t byte_offset)
|
||||
{
|
||||
/* First, align to the right place. */
|
||||
*byte_offset = ALIGN(*byte_offset, type->alignment);
|
||||
|
||||
switch (type->type)
|
||||
{
|
||||
case FFI_TYPE_UINT8:
|
||||
@ -141,13 +143,13 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
|
||||
case FFI_TYPE_UINT64:
|
||||
case FFI_TYPE_SINT64:
|
||||
case FFI_TYPE_POINTER:
|
||||
if (((*byte_offset) % 8 + type->size) <= 4)
|
||||
if (byte_offset + type->size <= 4)
|
||||
classes[0] = X86_64_INTEGERSI_CLASS;
|
||||
else
|
||||
classes[0] = X86_64_INTEGER_CLASS;
|
||||
return 1;
|
||||
case FFI_TYPE_FLOAT:
|
||||
if (((*byte_offset) % 8) == 0)
|
||||
if (byte_offset == 0)
|
||||
classes[0] = X86_64_SSESF_CLASS;
|
||||
else
|
||||
classes[0] = X86_64_SSE_CLASS;
|
||||
@ -175,22 +177,23 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
|
||||
classes[i] = X86_64_NO_CLASS;
|
||||
|
||||
/* Merge the fields of structure. */
|
||||
for (ptr=type->elements; (*ptr)!=NULL; ptr++)
|
||||
for (ptr = type->elements; *ptr != NULL; ptr++)
|
||||
{
|
||||
int num;
|
||||
|
||||
num = classify_argument (*ptr, subclasses, byte_offset);
|
||||
byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
|
||||
|
||||
num = classify_argument (*ptr, subclasses, byte_offset % 8);
|
||||
if (num == 0)
|
||||
return 0;
|
||||
for (i = 0; i < num; i++)
|
||||
{
|
||||
int pos = *byte_offset / 8;
|
||||
int pos = byte_offset / 8;
|
||||
classes[i + pos] =
|
||||
merge_classes (subclasses[i], classes[i + pos]);
|
||||
}
|
||||
|
||||
if ((*ptr)->type != FFI_TYPE_STRUCT)
|
||||
*byte_offset += (*ptr)->size;
|
||||
byte_offset += (*ptr)->size;
|
||||
}
|
||||
|
||||
/* Final merger cleanup. */
|
||||
@ -222,155 +225,196 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
|
||||
}
|
||||
|
||||
/* Examine the argument and return set number of register required in each
|
||||
class. Return 0 iff parameter should be passed in memory. */
|
||||
class. Return zero iff parameter should be passed in memory, otherwise
|
||||
the number of registers. */
|
||||
|
||||
static int
|
||||
examine_argument (ffi_type *type, int in_return, int *int_nregs,int *sse_nregs)
|
||||
examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
|
||||
_Bool in_return, int *pngpr, int *pnsse)
|
||||
{
|
||||
enum x86_64_reg_class class[MAX_CLASSES];
|
||||
int offset = 0;
|
||||
int n;
|
||||
|
||||
n = classify_argument (type, class, &offset);
|
||||
int i, n, ngpr, nsse;
|
||||
|
||||
n = classify_argument (type, classes, 0);
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
*int_nregs = 0;
|
||||
*sse_nregs = 0;
|
||||
for (n--; n>=0; n--)
|
||||
switch (class[n])
|
||||
ngpr = nsse = 0;
|
||||
for (i = 0; i < n; ++i)
|
||||
switch (classes[i])
|
||||
{
|
||||
case X86_64_INTEGER_CLASS:
|
||||
case X86_64_INTEGERSI_CLASS:
|
||||
(*int_nregs)++;
|
||||
ngpr++;
|
||||
break;
|
||||
case X86_64_SSE_CLASS:
|
||||
case X86_64_SSESF_CLASS:
|
||||
case X86_64_SSEDF_CLASS:
|
||||
(*sse_nregs)++;
|
||||
nsse++;
|
||||
break;
|
||||
case X86_64_NO_CLASS:
|
||||
case X86_64_SSEUP_CLASS:
|
||||
break;
|
||||
case X86_64_X87_CLASS:
|
||||
case X86_64_X87UP_CLASS:
|
||||
if (!in_return)
|
||||
return 0;
|
||||
break;
|
||||
case X86_64_COMPLEX_X87_CLASS:
|
||||
return in_return != 0;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
return 1;
|
||||
|
||||
*pngpr = ngpr;
|
||||
*pnsse = nsse;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/* Functions to load floats and double to an SSE register placeholder. */
|
||||
extern void float2sse (float, __int128_t *);
|
||||
extern void double2sse (double, __int128_t *);
|
||||
extern void floatfloat2sse (void *, __int128_t *);
|
||||
/* Perform machine dependent cif processing. */
|
||||
|
||||
/* Functions to put the floats and doubles back. */
|
||||
extern float sse2float (__int128_t *);
|
||||
extern double sse2double (__int128_t *);
|
||||
extern void sse2floatfloat(__int128_t *, void *);
|
||||
|
||||
/*@-exportheader@*/
|
||||
void
|
||||
ffi_prep_args (stackLayout *stack, extended_cif *ecif)
|
||||
/*@=exportheader@*/
|
||||
ffi_status
|
||||
ffi_prep_cif_machdep (ffi_cif *cif)
|
||||
{
|
||||
int gprcount, ssecount, i, g, s;
|
||||
void **p_argv;
|
||||
void *argp = &stack->argspace;
|
||||
ffi_type **p_arg;
|
||||
int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
|
||||
enum x86_64_reg_class classes[MAX_CLASSES];
|
||||
size_t bytes;
|
||||
|
||||
/* First check if the return value should be passed in memory. If so,
|
||||
pass the pointer as the first argument. */
|
||||
gprcount = ssecount = 0;
|
||||
if (ecif->cif->rtype->type != FFI_TYPE_VOID
|
||||
&& examine_argument (ecif->cif->rtype, 1, &g, &s) == 0)
|
||||
stack->gpr[gprcount++] = (long) ecif->rvalue;
|
||||
|
||||
for (i=ecif->cif->nargs, p_arg=ecif->cif->arg_types, p_argv = ecif->avalue;
|
||||
i!=0; i--, p_arg++, p_argv++)
|
||||
flags = cif->rtype->type;
|
||||
if (flags != FFI_TYPE_VOID)
|
||||
{
|
||||
int in_register = 0;
|
||||
|
||||
switch ((*p_arg)->type)
|
||||
n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
|
||||
if (n == 0)
|
||||
{
|
||||
case FFI_TYPE_SINT8:
|
||||
case FFI_TYPE_SINT16:
|
||||
case FFI_TYPE_SINT32:
|
||||
case FFI_TYPE_SINT64:
|
||||
case FFI_TYPE_UINT8:
|
||||
case FFI_TYPE_UINT16:
|
||||
case FFI_TYPE_UINT32:
|
||||
case FFI_TYPE_UINT64:
|
||||
case FFI_TYPE_POINTER:
|
||||
if (gprcount < MAX_GPR_REGS)
|
||||
{
|
||||
stack->gpr[gprcount] = 0;
|
||||
stack->gpr[gprcount++] = *(long long *)(*p_argv);
|
||||
in_register = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case FFI_TYPE_FLOAT:
|
||||
if (ssecount < MAX_SSE_REGS)
|
||||
{
|
||||
float2sse (*(float *)(*p_argv), &stack->sse[ssecount++]);
|
||||
in_register = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case FFI_TYPE_DOUBLE:
|
||||
if (ssecount < MAX_SSE_REGS)
|
||||
{
|
||||
double2sse (*(double *)(*p_argv), &stack->sse[ssecount++]);
|
||||
in_register = 1;
|
||||
}
|
||||
break;
|
||||
/* The return value is passed in memory. A pointer to that
|
||||
memory is the first argument. Allocate a register for it. */
|
||||
gprcount++;
|
||||
/* We don't have to do anything in asm for the return. */
|
||||
flags = FFI_TYPE_VOID;
|
||||
}
|
||||
|
||||
if (in_register)
|
||||
continue;
|
||||
|
||||
/* Either all places in registers where filled, or this is a
|
||||
type that potentially goes into a memory slot. */
|
||||
if (examine_argument (*p_arg, 0, &g, &s) == 0
|
||||
|| gprcount + g > MAX_GPR_REGS || ssecount + s > MAX_SSE_REGS)
|
||||
else if (flags == FFI_TYPE_STRUCT)
|
||||
{
|
||||
/* Pass this argument in memory. */
|
||||
argp = (void *)ALIGN(argp, (*p_arg)->alignment);
|
||||
/* Stack arguments are *always* at least 8 byte aligned. */
|
||||
argp = (void *)ALIGN(argp, 8);
|
||||
memcpy (argp, *p_argv, (*p_arg)->size);
|
||||
argp += (*p_arg)->size;
|
||||
/* Mark which registers the result appears in. */
|
||||
_Bool sse0 = SSE_CLASS_P (classes[0]);
|
||||
_Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
|
||||
if (sse0 && !sse1)
|
||||
flags |= 1 << 8;
|
||||
else if (!sse0 && sse1)
|
||||
flags |= 1 << 9;
|
||||
else if (sse0 && sse1)
|
||||
flags |= 1 << 10;
|
||||
/* Mark the true size of the structure. */
|
||||
flags |= cif->rtype->size << 11;
|
||||
}
|
||||
}
|
||||
cif->flags = flags;
|
||||
|
||||
/* Go over all arguments and determine the way they should be passed.
|
||||
If it's in a register and there is space for it, let that be so. If
|
||||
not, add it's size to the stack byte count. */
|
||||
for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
|
||||
{
|
||||
if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
|
||||
|| gprcount + ngpr > MAX_GPR_REGS
|
||||
|| ssecount + nsse > MAX_SSE_REGS)
|
||||
{
|
||||
long align = cif->arg_types[i]->alignment;
|
||||
|
||||
if (align < 8)
|
||||
align = 8;
|
||||
|
||||
bytes = ALIGN(bytes, align);
|
||||
bytes += cif->arg_types[i]->size;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* All easy cases are eliminated. Now fire the big guns. */
|
||||
gprcount += ngpr;
|
||||
ssecount += nsse;
|
||||
}
|
||||
}
|
||||
cif->bytes = bytes;
|
||||
|
||||
enum x86_64_reg_class classes[MAX_CLASSES];
|
||||
int offset = 0, j, num;
|
||||
void *a;
|
||||
return FFI_OK;
|
||||
}
|
||||
|
||||
num = classify_argument (*p_arg, classes, &offset);
|
||||
for (j=0, a=*p_argv; j<num; j++, a+=8)
|
||||
void
|
||||
ffi_call (ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
|
||||
{
|
||||
enum x86_64_reg_class classes[MAX_CLASSES];
|
||||
char *stack, *argp;
|
||||
ffi_type **arg_types;
|
||||
int gprcount, ssecount, ngpr, nsse, i, avn;
|
||||
_Bool ret_in_memory;
|
||||
struct register_args *reg_args;
|
||||
|
||||
/* Can't call 32-bit mode from 64-bit mode. */
|
||||
FFI_ASSERT (cif->abi == FFI_UNIX64);
|
||||
|
||||
/* If the return value is a struct and we don't have a return value
|
||||
address then we need to make one. Note the setting of flags to
|
||||
VOID above in ffi_prep_cif_machdep. */
|
||||
ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
|
||||
&& cif->flags == FFI_TYPE_VOID);
|
||||
if (rvalue == NULL && ret_in_memory)
|
||||
rvalue = alloca (cif->rtype->size);
|
||||
|
||||
/* Allocate the space for the arguments, plus 4 words of temp space. */
|
||||
stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
|
||||
reg_args = (struct register_args *) stack;
|
||||
argp = stack + sizeof (struct register_args);
|
||||
|
||||
gprcount = ssecount = 0;
|
||||
|
||||
/* If the return value is passed in memory, add the pointer as the
|
||||
first integer argument. */
|
||||
if (ret_in_memory)
|
||||
reg_args->gpr[gprcount++] = (long) rvalue;
|
||||
|
||||
avn = cif->nargs;
|
||||
arg_types = cif->arg_types;
|
||||
|
||||
for (i = 0; i < avn; ++i)
|
||||
{
|
||||
size_t size = arg_types[i]->size;
|
||||
int n;
|
||||
|
||||
n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
|
||||
if (n == 0
|
||||
|| gprcount + ngpr > MAX_GPR_REGS
|
||||
|| ssecount + nsse > MAX_SSE_REGS)
|
||||
{
|
||||
long align = arg_types[i]->alignment;
|
||||
|
||||
/* Stack arguments are *always* at least 8 byte aligned. */
|
||||
if (align < 8)
|
||||
align = 8;
|
||||
|
||||
/* Pass this argument in memory. */
|
||||
argp = (void *) ALIGN (argp, align);
|
||||
memcpy (argp, avalue[i], size);
|
||||
argp += size;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The argument is passed entirely in registers. */
|
||||
char *a = (char *) avalue[i];
|
||||
int j;
|
||||
|
||||
for (j = 0; j < n; j++, a += 8, size -= 8)
|
||||
{
|
||||
switch (classes[j])
|
||||
{
|
||||
case X86_64_INTEGER_CLASS:
|
||||
case X86_64_INTEGERSI_CLASS:
|
||||
stack->gpr[gprcount++] = *(long long *)a;
|
||||
reg_args->gpr[gprcount] = 0;
|
||||
memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8);
|
||||
gprcount++;
|
||||
break;
|
||||
case X86_64_SSE_CLASS:
|
||||
floatfloat2sse (a, &stack->sse[ssecount++]);
|
||||
case X86_64_SSEDF_CLASS:
|
||||
reg_args->sse[ssecount++] = *(UINT64 *) a;
|
||||
break;
|
||||
case X86_64_SSESF_CLASS:
|
||||
float2sse (*(float *)a, &stack->sse[ssecount++]);
|
||||
break;
|
||||
case X86_64_SSEDF_CLASS:
|
||||
double2sse (*(double *)a, &stack->sse[ssecount++]);
|
||||
reg_args->sse[ssecount++] = *(UINT32 *) a;
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
@ -378,203 +422,13 @@ ffi_prep_args (stackLayout *stack, extended_cif *ecif)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
|
||||
cif->flags, rvalue, fn);
|
||||
}
|
||||
|
||||
/* Perform machine dependent cif processing. */
|
||||
ffi_status
|
||||
ffi_prep_cif_machdep (ffi_cif *cif)
|
||||
{
|
||||
int gprcount, ssecount, i, g, s;
|
||||
|
||||
gprcount = ssecount = 0;
|
||||
|
||||
/* Reset the byte count. We handle this size estimation here. */
|
||||
cif->bytes = 0;
|
||||
|
||||
/* If the return value should be passed in memory, pass the pointer
|
||||
as the first argument. The actual memory isn't allocated here. */
|
||||
if (cif->rtype->type != FFI_TYPE_VOID
|
||||
&& examine_argument (cif->rtype, 1, &g, &s) == 0)
|
||||
gprcount = 1;
|
||||
|
||||
/* Go over all arguments and determine the way they should be passed.
|
||||
If it's in a register and there is space for it, let that be so. If
|
||||
not, add it's size to the stack byte count. */
|
||||
for (i=0; i<cif->nargs; i++)
|
||||
{
|
||||
if (examine_argument (cif->arg_types[i], 0, &g, &s) == 0
|
||||
|| gprcount + g > MAX_GPR_REGS || ssecount + s > MAX_SSE_REGS)
|
||||
{
|
||||
/* This is passed in memory. First align to the basic type. */
|
||||
cif->bytes = ALIGN(cif->bytes, cif->arg_types[i]->alignment);
|
||||
|
||||
/* Stack arguments are *always* at least 8 byte aligned. */
|
||||
cif->bytes = ALIGN(cif->bytes, 8);
|
||||
|
||||
/* Now add the size of this argument. */
|
||||
cif->bytes += cif->arg_types[i]->size;
|
||||
}
|
||||
else
|
||||
{
|
||||
gprcount += g;
|
||||
ssecount += s;
|
||||
}
|
||||
}
|
||||
|
||||
/* Set the flag for the closures return. */
|
||||
switch (cif->rtype->type)
|
||||
{
|
||||
case FFI_TYPE_VOID:
|
||||
case FFI_TYPE_STRUCT:
|
||||
case FFI_TYPE_SINT64:
|
||||
case FFI_TYPE_FLOAT:
|
||||
case FFI_TYPE_DOUBLE:
|
||||
case FFI_TYPE_LONGDOUBLE:
|
||||
cif->flags = (unsigned) cif->rtype->type;
|
||||
break;
|
||||
|
||||
case FFI_TYPE_UINT64:
|
||||
cif->flags = FFI_TYPE_SINT64;
|
||||
break;
|
||||
|
||||
default:
|
||||
cif->flags = FFI_TYPE_INT;
|
||||
break;
|
||||
}
|
||||
|
||||
return FFI_OK;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
long gpr[2];
|
||||
__int128_t sse[2];
|
||||
long double st0;
|
||||
} return_value;
|
||||
|
||||
void
|
||||
ffi_fill_return_value (return_value *rv, extended_cif *ecif)
|
||||
{
|
||||
enum x86_64_reg_class classes[MAX_CLASSES];
|
||||
int i = 0, num;
|
||||
long *gpr = rv->gpr;
|
||||
__int128_t *sse = rv->sse;
|
||||
signed char sc;
|
||||
signed short ss;
|
||||
|
||||
/* This is needed because of the way x86-64 handles signed short
|
||||
integers. */
|
||||
switch (ecif->cif->rtype->type)
|
||||
{
|
||||
case FFI_TYPE_SINT8:
|
||||
sc = *(signed char *)gpr;
|
||||
*(long long *)ecif->rvalue = (long long)sc;
|
||||
return;
|
||||
case FFI_TYPE_SINT16:
|
||||
ss = *(signed short *)gpr;
|
||||
*(long long *)ecif->rvalue = (long long)ss;
|
||||
return;
|
||||
default:
|
||||
/* Just continue. */
|
||||
;
|
||||
}
|
||||
|
||||
num = classify_argument (ecif->cif->rtype, classes, &i);
|
||||
|
||||
if (num == 0)
|
||||
/* Return in memory. */
|
||||
ecif->rvalue = (void *) rv->gpr[0];
|
||||
else if (num == 2 && classes[0] == X86_64_X87_CLASS &&
|
||||
classes[1] == X86_64_X87UP_CLASS)
|
||||
/* This is a long double (this is easiest to handle this way instead
|
||||
of an eightbyte at a time as in the loop below. */
|
||||
*((long double *)ecif->rvalue) = rv->st0;
|
||||
else
|
||||
{
|
||||
void *a;
|
||||
|
||||
for (i=0, a=ecif->rvalue; i<num; i++, a+=8)
|
||||
{
|
||||
switch (classes[i])
|
||||
{
|
||||
case X86_64_INTEGER_CLASS:
|
||||
case X86_64_INTEGERSI_CLASS:
|
||||
*(long long *)a = *gpr;
|
||||
gpr++;
|
||||
break;
|
||||
case X86_64_SSE_CLASS:
|
||||
sse2floatfloat (sse++, a);
|
||||
break;
|
||||
case X86_64_SSESF_CLASS:
|
||||
*(float *)a = sse2float (sse++);
|
||||
break;
|
||||
case X86_64_SSEDF_CLASS:
|
||||
*(double *)a = sse2double (sse++);
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*@-declundef@*/
|
||||
/*@-exportheader@*/
|
||||
extern void ffi_call_UNIX64(void (*)(stackLayout *, extended_cif *),
|
||||
void (*) (return_value *, extended_cif *),
|
||||
/*@out@*/ extended_cif *,
|
||||
unsigned, /*@out@*/ unsigned *, void (*fn)());
|
||||
/*@=declundef@*/
|
||||
/*@=exportheader@*/
|
||||
|
||||
void ffi_call(/*@dependent@*/ ffi_cif *cif,
|
||||
void (*fn)(),
|
||||
/*@out@*/ void *rvalue,
|
||||
/*@dependent@*/ void **avalue)
|
||||
{
|
||||
extended_cif ecif;
|
||||
int dummy;
|
||||
|
||||
ecif.cif = cif;
|
||||
ecif.avalue = avalue;
|
||||
|
||||
/* If the return value is a struct and we don't have a return */
|
||||
/* value address then we need to make one */
|
||||
|
||||
if ((rvalue == NULL) &&
|
||||
(examine_argument (cif->rtype, 1, &dummy, &dummy) == 0))
|
||||
{
|
||||
/*@-sysunrecog@*/
|
||||
ecif.rvalue = alloca(cif->rtype->size);
|
||||
/*@=sysunrecog@*/
|
||||
}
|
||||
else
|
||||
ecif.rvalue = rvalue;
|
||||
|
||||
/* Stack must always be 16byte aligned. Make it so. */
|
||||
cif->bytes = ALIGN(cif->bytes, 16);
|
||||
|
||||
switch (cif->abi)
|
||||
{
|
||||
case FFI_SYSV:
|
||||
/* Calling 32bit code from 64bit is not possible */
|
||||
FFI_ASSERT(0);
|
||||
break;
|
||||
|
||||
case FFI_UNIX64:
|
||||
/*@-usedef@*/
|
||||
ffi_call_UNIX64 (ffi_prep_args, ffi_fill_return_value, &ecif,
|
||||
cif->bytes, ecif.rvalue, fn);
|
||||
/*@=usedef@*/
|
||||
break;
|
||||
|
||||
default:
|
||||
FFI_ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
extern void ffi_closure_UNIX64(void);
|
||||
extern void ffi_closure_unix64(void);
|
||||
|
||||
ffi_status
|
||||
ffi_prep_closure (ffi_closure* closure,
|
||||
@ -584,14 +438,12 @@ ffi_prep_closure (ffi_closure* closure,
|
||||
{
|
||||
volatile unsigned short *tramp;
|
||||
|
||||
/* FFI_ASSERT (cif->abi == FFI_OSF); */
|
||||
|
||||
tramp = (volatile unsigned short *) &closure->tramp[0];
|
||||
tramp[0] = 0xbb49; /* mov <code>, %r11 */
|
||||
tramp[5] = 0xba49; /* mov <data>, %r10 */
|
||||
tramp[10] = 0xff49; /* jmp *%r11 */
|
||||
tramp[11] = 0x00e3;
|
||||
*(void * volatile *) &tramp[1] = ffi_closure_UNIX64;
|
||||
*(void * volatile *) &tramp[1] = ffi_closure_unix64;
|
||||
*(void * volatile *) &tramp[6] = closure;
|
||||
|
||||
closure->cif = cif;
|
||||
@ -602,107 +454,109 @@ ffi_prep_closure (ffi_closure* closure,
|
||||
}
|
||||
|
||||
int
|
||||
ffi_closure_UNIX64_inner(ffi_closure *closure, va_list l, void *rp)
|
||||
ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
|
||||
struct register_args *reg_args, char *argp)
|
||||
{
|
||||
ffi_cif *cif;
|
||||
void **avalue;
|
||||
ffi_type **arg_types;
|
||||
long i, avn, argn;
|
||||
long i, avn;
|
||||
int gprcount, ssecount, ngpr, nsse;
|
||||
int ret;
|
||||
|
||||
cif = closure->cif;
|
||||
avalue = alloca(cif->nargs * sizeof(void *));
|
||||
gprcount = ssecount = 0;
|
||||
|
||||
argn = 0;
|
||||
ret = cif->rtype->type;
|
||||
if (ret != FFI_TYPE_VOID)
|
||||
{
|
||||
enum x86_64_reg_class classes[MAX_CLASSES];
|
||||
int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
|
||||
if (n == 0)
|
||||
{
|
||||
/* The return value goes in memory. Arrange for the closure
|
||||
return value to go directly back to the original caller. */
|
||||
rvalue = (void *) reg_args->gpr[gprcount++];
|
||||
/* We don't have to do anything in asm for the return. */
|
||||
ret = FFI_TYPE_VOID;
|
||||
}
|
||||
else if (ret == FFI_TYPE_STRUCT && n == 2)
|
||||
{
|
||||
/* Mark which register the second word of the structure goes in. */
|
||||
_Bool sse0 = SSE_CLASS_P (classes[0]);
|
||||
_Bool sse1 = SSE_CLASS_P (classes[1]);
|
||||
if (!sse0 && sse1)
|
||||
ret |= 1 << 8;
|
||||
else if (sse0 && !sse1)
|
||||
ret |= 1 << 9;
|
||||
}
|
||||
}
|
||||
|
||||
i = 0;
|
||||
avn = cif->nargs;
|
||||
arg_types = cif->arg_types;
|
||||
|
||||
/* Grab the addresses of the arguments from the stack frame. */
|
||||
while (i < avn)
|
||||
for (i = 0; i < avn; ++i)
|
||||
{
|
||||
switch (arg_types[i]->type)
|
||||
enum x86_64_reg_class classes[MAX_CLASSES];
|
||||
int n;
|
||||
|
||||
n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
|
||||
if (n == 0
|
||||
|| gprcount + ngpr > MAX_GPR_REGS
|
||||
|| ssecount + nsse > MAX_SSE_REGS)
|
||||
{
|
||||
case FFI_TYPE_SINT8:
|
||||
case FFI_TYPE_UINT8:
|
||||
case FFI_TYPE_SINT16:
|
||||
case FFI_TYPE_UINT16:
|
||||
case FFI_TYPE_SINT32:
|
||||
case FFI_TYPE_UINT32:
|
||||
case FFI_TYPE_SINT64:
|
||||
case FFI_TYPE_UINT64:
|
||||
case FFI_TYPE_POINTER:
|
||||
{
|
||||
if (l->gp_offset > 48-8)
|
||||
{
|
||||
avalue[i] = l->overflow_arg_area;
|
||||
l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
avalue[i] = (char *)l->reg_save_area + l->gp_offset;
|
||||
l->gp_offset += 8;
|
||||
}
|
||||
}
|
||||
break;
|
||||
long align = arg_types[i]->alignment;
|
||||
|
||||
case FFI_TYPE_STRUCT:
|
||||
/* FIXME */
|
||||
FFI_ASSERT(0);
|
||||
break;
|
||||
/* Stack arguments are *always* at least 8 byte aligned. */
|
||||
if (align < 8)
|
||||
align = 8;
|
||||
|
||||
case FFI_TYPE_DOUBLE:
|
||||
{
|
||||
if (l->fp_offset > 176-16)
|
||||
{
|
||||
avalue[i] = l->overflow_arg_area;
|
||||
l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
avalue[i] = (char *)l->reg_save_area + l->fp_offset;
|
||||
l->fp_offset += 16;
|
||||
}
|
||||
}
|
||||
#if DEBUG_FFI
|
||||
fprintf (stderr, "double arg %d = %g\n", i, *(double *)avalue[i]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case FFI_TYPE_FLOAT:
|
||||
{
|
||||
if (l->fp_offset > 176-16)
|
||||
{
|
||||
avalue[i] = l->overflow_arg_area;
|
||||
l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
avalue[i] = (char *)l->reg_save_area + l->fp_offset;
|
||||
l->fp_offset += 16;
|
||||
}
|
||||
}
|
||||
#if DEBUG_FFI
|
||||
fprintf (stderr, "float arg %d = %g\n", i, *(float *)avalue[i]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
default:
|
||||
FFI_ASSERT(0);
|
||||
/* Pass this argument in memory. */
|
||||
argp = (void *) ALIGN (argp, align);
|
||||
avalue[i] = argp;
|
||||
argp += arg_types[i]->size;
|
||||
}
|
||||
/* If the argument is in a single register, or two consecutive
|
||||
registers, then we can use that address directly. */
|
||||
else if (n == 1
|
||||
|| (n == 2
|
||||
&& SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
|
||||
{
|
||||
/* The argument is in a single register. */
|
||||
if (SSE_CLASS_P (classes[0]))
|
||||
{
|
||||
avalue[i] = ®_args->sse[ssecount];
|
||||
ssecount += n;
|
||||
}
|
||||
else
|
||||
{
|
||||
avalue[i] = ®_args->gpr[gprcount];
|
||||
gprcount += n;
|
||||
}
|
||||
}
|
||||
/* Otherwise, allocate space to make them consecutive. */
|
||||
else
|
||||
{
|
||||
char *a = alloca (16);
|
||||
int j;
|
||||
|
||||
argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
|
||||
i++;
|
||||
avalue[i] = a;
|
||||
for (j = 0; j < n; j++, a += 8)
|
||||
{
|
||||
if (SSE_CLASS_P (classes[j]))
|
||||
memcpy (a, ®_args->sse[ssecount++], 8);
|
||||
else
|
||||
memcpy (a, ®_args->gpr[gprcount++], 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Invoke the closure. */
|
||||
(closure->fun) (cif, rp, avalue, closure->user_data);
|
||||
closure->fun (cif, rvalue, avalue, closure->user_data);
|
||||
|
||||
/* FIXME: Structs not supported. */
|
||||
FFI_ASSERT(cif->rtype->type != FFI_TYPE_STRUCT);
|
||||
|
||||
/* Tell ffi_closure_UNIX64 how to perform return type promotions. */
|
||||
|
||||
return cif->rtype->type;
|
||||
/* Tell assembly how to perform return type promotions. */
|
||||
return ret;
|
||||
}
|
||||
#endif /* ifndef __x86_64__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
@ -28,276 +28,348 @@
|
||||
#include <fficonfig.h>
|
||||
#include <ffi.h>
|
||||
|
||||
.section .rodata
|
||||
.LC0:
|
||||
.string "asm in progress %lld\n"
|
||||
.LC1:
|
||||
.string "asm in progress\n"
|
||||
.text
|
||||
|
||||
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
|
||||
void *raddr, void (*fnaddr)());
|
||||
|
||||
Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
|
||||
for this function. This has been allocated by ffi_call. We also
|
||||
deallocate some of the stack that has been alloca'd. */
|
||||
|
||||
.align 2
|
||||
.globl ffi_call_UNIX64
|
||||
.type ffi_call_UNIX64,@function
|
||||
.globl ffi_call_unix64
|
||||
.type ffi_call_unix64,@function
|
||||
|
||||
ffi_call_UNIX64:
|
||||
.LFB1:
|
||||
pushq %rbp
|
||||
.LCFI0:
|
||||
movq %rsp, %rbp
|
||||
.LCFI1:
|
||||
/* Save all arguments */
|
||||
subq $48, %rsp
|
||||
.LCFI2:
|
||||
movq %rdi, -8(%rbp) /* ffi_prep_args */
|
||||
movq %rsi, -16(%rbp) /* ffi_fill_return_value */
|
||||
movq %rdx, -24(%rbp) /* ecif */
|
||||
movq %rcx, -32(%rbp) /* cif->bytes */
|
||||
movq %r8, -40(%rbp) /* ecif.rvalue */
|
||||
movq %r9, -48(%rbp) /* fn */
|
||||
ffi_call_unix64:
|
||||
.LUW0:
|
||||
movq (%rsp), %r10 /* Load return address. */
|
||||
leaq (%rdi, %rsi), %rax /* Find local stack base. */
|
||||
movq %rdx, (%rax) /* Save flags. */
|
||||
movq %rcx, 8(%rax) /* Save raddr. */
|
||||
movq %rbp, 16(%rax) /* Save old frame pointer. */
|
||||
movq %r10, 24(%rax) /* Relocate return address. */
|
||||
movq %rax, %rbp /* Finalize local stack frame. */
|
||||
.LUW1:
|
||||
movq %rdi, %r10 /* Save a copy of the register area. */
|
||||
movq %r8, %r11 /* Save a copy of the target fn. */
|
||||
|
||||
/* Make room for all of the new args and the register args */
|
||||
addl $176, %ecx
|
||||
.LCFI3:
|
||||
subq %rcx, %rsp
|
||||
.LCFI4:
|
||||
/* Setup the call to ffi_prep_args. */
|
||||
movq %rdi, %rax /* &ffi_prep_args */
|
||||
movq %rsp, %rdi /* stackLayout */
|
||||
movq %rdx, %rsi /* ecif */
|
||||
call *%rax /* ffi_prep_args(stackLayout, ecif);*/
|
||||
/* Load up all argument registers. */
|
||||
movq (%r10), %rdi
|
||||
movq 8(%r10), %rsi
|
||||
movq 16(%r10), %rdx
|
||||
movq 24(%r10), %rcx
|
||||
movq 32(%r10), %r8
|
||||
movq 40(%r10), %r9
|
||||
movdqa 48(%r10), %xmm0
|
||||
movdqa 64(%r10), %xmm1
|
||||
movdqa 80(%r10), %xmm2
|
||||
movdqa 96(%r10), %xmm3
|
||||
movdqa 112(%r10), %xmm4
|
||||
movdqa 128(%r10), %xmm5
|
||||
movdqa 144(%r10), %xmm6
|
||||
movdqa 160(%r10), %xmm7
|
||||
|
||||
/* ffi_prep_args have put all the register contents into the */
|
||||
/* stackLayout struct. Now put the register values in place. */
|
||||
movq (%rsp), %rdi
|
||||
movq 8(%rsp), %rsi
|
||||
movq 16(%rsp), %rdx
|
||||
movq 24(%rsp), %rcx
|
||||
movq 32(%rsp), %r8
|
||||
movq 40(%rsp), %r9
|
||||
movaps 48(%rsp), %xmm0
|
||||
movaps 64(%rsp), %xmm1
|
||||
movaps 80(%rsp), %xmm2
|
||||
movaps 96(%rsp), %xmm3
|
||||
movaps 112(%rsp), %xmm4
|
||||
movaps 128(%rsp), %xmm5
|
||||
movaps 144(%rsp), %xmm6
|
||||
movaps 160(%rsp), %xmm7
|
||||
/* Deallocate the reg arg area. */
|
||||
leaq 176(%r10), %rsp
|
||||
|
||||
/* Remove space for stackLayout so stack arguments are placed
|
||||
correctly for the call. */
|
||||
.LCFI5:
|
||||
addq $176, %rsp
|
||||
.LCFI6:
|
||||
/* Call the user function. */
|
||||
call *-48(%rbp)
|
||||
call *%r11
|
||||
|
||||
/* Make stack space for the return_value struct. */
|
||||
subq $64, %rsp
|
||||
/* Deallocate stack arg area; local stack frame in redzone. */
|
||||
leaq 24(%rbp), %rsp
|
||||
|
||||
/* Fill in all potential return values to this struct. */
|
||||
movq %rax, (%rsp)
|
||||
movq %rdx, 8(%rsp)
|
||||
movaps %xmm0, 16(%rsp)
|
||||
movaps %xmm1, 32(%rsp)
|
||||
fstpt 48(%rsp)
|
||||
movq 0(%rbp), %rcx /* Reload flags. */
|
||||
movq 8(%rbp), %rdi /* Reload raddr. */
|
||||
movq 16(%rbp), %rbp /* Reload old frame pointer. */
|
||||
.LUW2:
|
||||
|
||||
/* Now call ffi_fill_return_value. */
|
||||
movq %rsp, %rdi /* struct return_value */
|
||||
movq -24(%rbp), %rsi /* ecif */
|
||||
movq -16(%rbp), %rax /* &ffi_fill_return_value */
|
||||
call *%rax /* call it */
|
||||
/* The first byte of the flags contains the FFI_TYPE. */
|
||||
movzbl %cl, %r10d
|
||||
leaq .Lstore_table(%rip), %r11
|
||||
movslq (%r11, %r10, 4), %r10
|
||||
addq %r11, %r10
|
||||
jmp *%r10
|
||||
|
||||
/* And the work is done. */
|
||||
leave
|
||||
ret
|
||||
.LFE1:
|
||||
.ffi_call_UNIX64_end:
|
||||
.size ffi_call_UNIX64,.ffi_call_UNIX64_end-ffi_call_UNIX64
|
||||
.section .rodata
|
||||
.Lstore_table:
|
||||
.long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */
|
||||
.long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */
|
||||
.long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */
|
||||
.long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */
|
||||
.long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
|
||||
.long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */
|
||||
.long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */
|
||||
.long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */
|
||||
.long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */
|
||||
.long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */
|
||||
.long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */
|
||||
.long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */
|
||||
.long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */
|
||||
.long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */
|
||||
.long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.globl float2sse
|
||||
.type float2sse,@function
|
||||
float2sse:
|
||||
/* Save the contents of this sse-float in a pointer. */
|
||||
movaps %xmm0, (%rdi)
|
||||
.text
|
||||
.align 2
|
||||
.Lst_void:
|
||||
ret
|
||||
.align 2
|
||||
|
||||
.Lst_uint8:
|
||||
movzbq %al, %rax
|
||||
movq %rax, (%rdi)
|
||||
ret
|
||||
.align 2
|
||||
.Lst_sint8:
|
||||
movsbq %al, %rax
|
||||
movq %rax, (%rdi)
|
||||
ret
|
||||
.align 2
|
||||
.Lst_uint16:
|
||||
movzwq %ax, %rax
|
||||
movq %rax, (%rdi)
|
||||
.align 2
|
||||
.Lst_sint16:
|
||||
movswq %ax, %rax
|
||||
movq %rax, (%rdi)
|
||||
ret
|
||||
.align 2
|
||||
.Lst_uint32:
|
||||
movl %eax, %eax
|
||||
movq %rax, (%rdi)
|
||||
.align 2
|
||||
.Lst_sint32:
|
||||
cltq
|
||||
movq %rax, (%rdi)
|
||||
ret
|
||||
.align 2
|
||||
.Lst_int64:
|
||||
movq %rax, (%rdi)
|
||||
ret
|
||||
|
||||
.align 2
|
||||
.globl floatfloat2sse
|
||||
.type floatfloat2sse,@function
|
||||
floatfloat2sse:
|
||||
/* Save the contents of these two sse-floats in a pointer. */
|
||||
movq (%rdi), %xmm0
|
||||
movaps %xmm0, (%rsi)
|
||||
.align 2
|
||||
.Lst_float:
|
||||
movss %xmm0, (%rdi)
|
||||
ret
|
||||
.align 2
|
||||
.Lst_double:
|
||||
movsd %xmm0, (%rdi)
|
||||
ret
|
||||
.Lst_ldouble:
|
||||
fstpt (%rdi)
|
||||
ret
|
||||
|
||||
.align 2
|
||||
.globl double2sse
|
||||
.type double2sse,@function
|
||||
double2sse:
|
||||
/* Save the contents of this sse-double in a pointer. */
|
||||
movaps %xmm0, (%rdi)
|
||||
.align 2
|
||||
.Lst_struct:
|
||||
leaq -20(%rsp), %rsi /* Scratch area in redzone. */
|
||||
|
||||
/* We have to locate the values now, and since we don't want to
|
||||
write too much data into the user's return value, we spill the
|
||||
value to a 16 byte scratch area first. Bits 8, 9, and 10
|
||||
control where the values are located. Only one of the three
|
||||
bits will be set; see ffi_prep_cif_machdep for the pattern. */
|
||||
movd %xmm0, %r10
|
||||
movd %xmm1, %r11
|
||||
testl $0x100, %ecx
|
||||
cmovnz %rax, %rdx
|
||||
cmovnz %r10, %rax
|
||||
testl $0x200, %ecx
|
||||
cmovnz %r10, %rdx
|
||||
testl $0x400, %ecx
|
||||
cmovnz %r10, %rax
|
||||
cmovnz %r11, %rdx
|
||||
movq %rax, (%rsi)
|
||||
movq %rdx, 8(%rsi)
|
||||
|
||||
/* Bits 11-31 contain the true size of the structure. Copy from
|
||||
the scratch area to the true destination. */
|
||||
shrl $11, %ecx
|
||||
rep movsb
|
||||
ret
|
||||
.LUW3:
|
||||
.size ffi_call_unix64,.-ffi_call_unix64
|
||||
|
||||
.align 2
|
||||
.globl sse2float
|
||||
.type sse2float,@function
|
||||
sse2float:
|
||||
/* Save the contents of this sse-float in a pointer. */
|
||||
movaps (%rdi), %xmm0
|
||||
ret
|
||||
.globl ffi_closure_unix64
|
||||
.type ffi_closure_unix64,@function
|
||||
|
||||
.align 2
|
||||
.globl sse2double
|
||||
.type sse2double,@function
|
||||
sse2double:
|
||||
/* Save the contents of this pointer in a sse-double. */
|
||||
movaps (%rdi), %xmm0
|
||||
ret
|
||||
ffi_closure_unix64:
|
||||
.LUW4:
|
||||
subq $200, %rsp
|
||||
.LUW5:
|
||||
|
||||
.align 2
|
||||
.globl sse2floatfloat
|
||||
.type sse2floatfloat,@function
|
||||
sse2floatfloat:
|
||||
/* Save the contents of this pointer in two sse-floats. */
|
||||
movaps (%rdi), %xmm0
|
||||
movq %xmm0, (%rsi)
|
||||
ret
|
||||
movq %rdi, (%rsp)
|
||||
movq %rsi, 8(%rsp)
|
||||
movq %rdx, 16(%rsp)
|
||||
movq %rcx, 24(%rsp)
|
||||
movq %r8, 32(%rsp)
|
||||
movq %r9, 40(%rsp)
|
||||
movdqa %xmm0, 48(%rsp)
|
||||
movdqa %xmm1, 64(%rsp)
|
||||
movdqa %xmm2, 80(%rsp)
|
||||
movdqa %xmm3, 96(%rsp)
|
||||
movdqa %xmm4, 112(%rsp)
|
||||
movdqa %xmm5, 128(%rsp)
|
||||
movdqa %xmm6, 144(%rsp)
|
||||
movdqa %xmm7, 160(%rsp)
|
||||
|
||||
.align 2
|
||||
.globl ffi_closure_UNIX64
|
||||
.type ffi_closure_UNIX64,@function
|
||||
|
||||
ffi_closure_UNIX64:
|
||||
.LFB2:
|
||||
pushq %rbp
|
||||
.LCFI10:
|
||||
movq %rsp, %rbp
|
||||
.LCFI11:
|
||||
subq $240, %rsp
|
||||
.LCFI12:
|
||||
movq %rdi, -176(%rbp)
|
||||
movq %rsi, -168(%rbp)
|
||||
movq %rdx, -160(%rbp)
|
||||
movq %rcx, -152(%rbp)
|
||||
movq %r8, -144(%rbp)
|
||||
movq %r9, -136(%rbp)
|
||||
/* FIXME: We can avoid all this stashing of XMM registers by
|
||||
(in ffi_prep_closure) computing the number of
|
||||
floating-point args and moving it into %rax before calling
|
||||
this function. Once this is done, uncomment the next few
|
||||
lines and only the essential XMM registers will be written
|
||||
to memory. This is a significant saving. */
|
||||
/* movzbl %al, %eax */
|
||||
/* movq %rax, %rdx */
|
||||
/* leaq 0(,%rdx,4), %rax */
|
||||
/* leaq 2f(%rip), %rdx */
|
||||
/* subq %rax, %rdx */
|
||||
leaq -1(%rbp), %rax
|
||||
/* jmp *%rdx */
|
||||
movaps %xmm7, -15(%rax)
|
||||
movaps %xmm6, -31(%rax)
|
||||
movaps %xmm5, -47(%rax)
|
||||
movaps %xmm4, -63(%rax)
|
||||
movaps %xmm3, -79(%rax)
|
||||
movaps %xmm2, -95(%rax)
|
||||
movaps %xmm1, -111(%rax)
|
||||
movaps %xmm0, -127(%rax)
|
||||
2:
|
||||
movl %edi, -180(%rbp)
|
||||
movl $0, -224(%rbp)
|
||||
movl $48, -220(%rbp)
|
||||
leaq 16(%rbp), %rax
|
||||
movq %rax, -216(%rbp)
|
||||
leaq -176(%rbp), %rdx
|
||||
movq %rdx, -208(%rbp)
|
||||
leaq -224(%rbp), %rsi
|
||||
movq %r10, %rdi
|
||||
leaq 176(%rsp), %rsi
|
||||
movq %rsp, %rdx
|
||||
call ffi_closure_UNIX64_inner@PLT
|
||||
leaq 208(%rsp), %rcx
|
||||
call ffi_closure_unix64_inner@PLT
|
||||
|
||||
cmpl $FFI_TYPE_FLOAT, %eax
|
||||
je 1f
|
||||
cmpl $FFI_TYPE_DOUBLE, %eax
|
||||
je 2f
|
||||
cmpl $FFI_TYPE_LONGDOUBLE, %eax
|
||||
je 3f
|
||||
cmpl $FFI_TYPE_STRUCT, %eax
|
||||
je 4f
|
||||
popq %rax
|
||||
leave
|
||||
ret
|
||||
1:
|
||||
2:
|
||||
3:
|
||||
movaps -240(%rbp), %xmm0
|
||||
leave
|
||||
ret
|
||||
4:
|
||||
leave
|
||||
/* Deallocate stack frame early; return value is now in redzone. */
|
||||
addq $200, %rsp
|
||||
.LUW6:
|
||||
|
||||
/* The first byte of the return value contains the FFI_TYPE. */
|
||||
movzbl %al, %r10d
|
||||
leaq .Lload_table(%rip), %r11
|
||||
movslq (%r11, %r10, 4), %r10
|
||||
addq %r11, %r10
|
||||
jmp *%r10
|
||||
|
||||
.section .rodata
|
||||
.Lload_table:
|
||||
.long .Lld_void-.Lload_table /* FFI_TYPE_VOID */
|
||||
.long .Lld_int32-.Lload_table /* FFI_TYPE_INT */
|
||||
.long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */
|
||||
.long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */
|
||||
.long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */
|
||||
.long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */
|
||||
.long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */
|
||||
.long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */
|
||||
.long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */
|
||||
.long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */
|
||||
.long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */
|
||||
.long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */
|
||||
.long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */
|
||||
.long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */
|
||||
.long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.Lld_void:
|
||||
ret
|
||||
.LFE2:
|
||||
|
||||
.section .eh_frame,EH_FRAME_FLAGS,@progbits
|
||||
.Lframe0:
|
||||
.long .LECIE1-.LSCIE1
|
||||
|
||||
.align 2
|
||||
.Lld_int8:
|
||||
movzbl -24(%rsp), %eax
|
||||
ret
|
||||
.align 2
|
||||
.Lld_int16:
|
||||
movzwl -24(%rsp), %eax
|
||||
ret
|
||||
.align 2
|
||||
.Lld_int32:
|
||||
movl -24(%rsp), %eax
|
||||
ret
|
||||
.align 2
|
||||
.Lld_int64:
|
||||
movq -24(%rsp), %rax
|
||||
ret
|
||||
|
||||
.align 2
|
||||
.Lld_float:
|
||||
movss -24(%rsp), %xmm0
|
||||
ret
|
||||
.align 2
|
||||
.Lld_double:
|
||||
movsd -24(%rsp), %xmm0
|
||||
ret
|
||||
.align 2
|
||||
.Lld_ldouble:
|
||||
fldt -24(%rsp)
|
||||
ret
|
||||
|
||||
.align 2
|
||||
.Lld_struct:
|
||||
/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
|
||||
%rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
|
||||
both rdx and xmm1 with the second word. For the remaining,
|
||||
bit 8 set means xmm0 gets the second word, and bit 9 means
|
||||
that rax gets the second word. */
|
||||
movq -24(%rsp), %rcx
|
||||
movq -16(%rsp), %rdx
|
||||
movq -16(%rsp), %xmm1
|
||||
testl $0x100, %eax
|
||||
cmovnz %rdx, %rcx
|
||||
movd %rcx, %xmm0
|
||||
testl $0x200, %eax
|
||||
movq -24(%rsp), %rax
|
||||
cmovnz %rdx, %rax
|
||||
ret
|
||||
.LUW7:
|
||||
.size ffi_closure_unix64,.-ffi_closure_unix64
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.Lframe1:
|
||||
.long .LECIE1-.LSCIE1 /* CIE Length */
|
||||
.LSCIE1:
|
||||
.long 0x0
|
||||
.byte 0x1
|
||||
.string "zR"
|
||||
.uleb128 0x1
|
||||
.sleb128 -8
|
||||
.byte 0x10
|
||||
.uleb128 0x1
|
||||
.byte 0x1b
|
||||
.byte 0xc
|
||||
.uleb128 0x7
|
||||
.uleb128 0x8
|
||||
.byte 0x90
|
||||
.uleb128 0x1
|
||||
.align 8
|
||||
.long 0 /* CIE Identifier Tag */
|
||||
.byte 1 /* CIE Version */
|
||||
.ascii "zR\0" /* CIE Augmentation */
|
||||
.uleb128 1 /* CIE Code Alignment Factor */
|
||||
.sleb128 -8 /* CIE Data Alignment Factor */
|
||||
.byte 0x10 /* CIE RA Column */
|
||||
.uleb128 1 /* Augmentation size */
|
||||
.byte 0x1b /* FDE Encoding (pcrel sdata4) */
|
||||
.byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
|
||||
.uleb128 7
|
||||
.uleb128 8
|
||||
.byte 0x80+16 /* DW_CFA_offset, %rip offset 1*-8 */
|
||||
.uleb128 1
|
||||
.align 8
|
||||
.LECIE1:
|
||||
.LSFDE1:
|
||||
.long .LEFDE1-.LASFDE1
|
||||
.long .LEFDE1-.LASFDE1 /* FDE Length */
|
||||
.LASFDE1:
|
||||
.long .LASFDE1-.Lframe0
|
||||
.long .LASFDE1-.Lframe1 /* FDE CIE offset */
|
||||
.long .LUW0-. /* FDE initial location */
|
||||
.long .LUW3-.LUW0 /* FDE address range */
|
||||
.uleb128 0x0 /* Augmentation size */
|
||||
|
||||
.long .LFB1-.
|
||||
.long .LFE1-.LFB1
|
||||
.uleb128 0x0
|
||||
.byte 0x4 # DW_CFA_advance_loc4
|
||||
.long .LCFI0-.LFB1
|
||||
.byte 0xe # DW_CFA_def_cfa_offset
|
||||
.uleb128 0x10
|
||||
.byte 0x86 # DW_CFA_offset: r6 at cfa-16
|
||||
.uleb128 0x2
|
||||
.byte 0x4 # DW_CFA_advance_loc4
|
||||
.long .LCFI1-.LCFI0
|
||||
.byte 0x86 # DW_CFA_offset: r6 at cfa-16
|
||||
.uleb128 0x2
|
||||
.byte 0xd # DW_CFA_def_cfa_reg: r6
|
||||
.uleb128 0x6
|
||||
.byte 0x4 /* DW_CFA_advance_loc4 */
|
||||
.long .LUW1-.LUW0
|
||||
|
||||
/* New stack frame based off rbp. This is a itty bit of unwind
|
||||
trickery in that the CFA *has* changed. There is no easy way
|
||||
to describe it correctly on entry to the function. Fortunately,
|
||||
it doesn't matter too much since at all points we can correctly
|
||||
unwind back to ffi_call. Note that the location to which we
|
||||
moved the return address is (the new) CFA-8, so from the
|
||||
perspective of the unwind info, it hasn't moved. */
|
||||
.byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
|
||||
.uleb128 6
|
||||
.uleb128 32
|
||||
.byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
|
||||
.uleb128 2
|
||||
|
||||
.byte 0x4 /* DW_CFA_advance_loc4 */
|
||||
.long .LUW2-.LUW3
|
||||
.byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
|
||||
.uleb128 7
|
||||
.uleb128 8
|
||||
.byte 0xc0+6 /* DW_CFA_restore, %rbp */
|
||||
.align 8
|
||||
.LEFDE1:
|
||||
.LSFDE3:
|
||||
.long .LEFDE3-.LASFDE3 # FDE Length
|
||||
.long .LEFDE3-.LASFDE3 /* FDE Length */
|
||||
.LASFDE3:
|
||||
.long .LASFDE3-.Lframe0 # FDE CIE offset
|
||||
|
||||
.long .LFB2-. # FDE initial location
|
||||
.long .LFE2-.LFB2 # FDE address range
|
||||
.uleb128 0x0 # Augmentation size
|
||||
.byte 0x4 # DW_CFA_advance_loc4
|
||||
.long .LCFI10-.LFB2
|
||||
.byte 0xe # DW_CFA_def_cfa_offset
|
||||
.uleb128 0x10
|
||||
.byte 0x86 # DW_CFA_offset, column 0x6
|
||||
.uleb128 0x2
|
||||
.byte 0x4 # DW_CFA_advance_loc4
|
||||
.long .LCFI11-.LCFI10
|
||||
.byte 0xd # DW_CFA_def_cfa_register
|
||||
.uleb128 0x6
|
||||
.align 8
|
||||
.long .LASFDE3-.Lframe1 /* FDE CIE offset */
|
||||
.long .LUW4-. /* FDE initial location */
|
||||
.long .LUW7-.LUW4 /* FDE address range */
|
||||
.uleb128 0x0 /* Augmentation size */
|
||||
.byte 0x4 /* DW_CFA_advance_loc4 */
|
||||
.long .LUW5-.LUW4
|
||||
.byte 0xe /* DW_CFA_def_cfa_offset */
|
||||
.uleb128 208
|
||||
.byte 0x4 /* DW_CFA_advance_loc4 */
|
||||
.long .LUW6-.LUW5
|
||||
.byte 0xe /* DW_CFA_def_cfa_offset */
|
||||
.uleb128 8
|
||||
.align 8
|
||||
.LEFDE3:
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* __x86_64__ */
|
||||
|
Loading…
Reference in New Issue
Block a user