amdgcn: Vector procedure call ABI

Adjust the (unofficial) procedure calling ABI such that vector arguments are
passed in vector registers, not on the stack.  Scalar arguments continue to
be passed in scalar registers, making a total of 12 argument registers.

The return value is also moved to a vector register (even for scalars; it
would be possible to retain the scalar location, using untyped_call, but
there's no obvious advantage in doing so).

After this change the ABI is as follows:

s0-s13  : Reserved for kernel launch parameters.
s14-s15 : Frame pointer.
s16-s17 : Stack pointer.
s18-s19 : Link register.
s20-s21 : Exec Save.
s22-s23 : CC Save.
s24-s25 : Scalar arguments.          NO LONGER RETURN VALUE.
s26-s29 : Additional scalar arguments (makes 6 total).
s30-s31 : Static Chain.
v0      : Prologue/epilogue scratch.
v1      : Constant 0, 1, 2, 3, 4, ... 63.
v2-v7   : Prologue/epilogue scratch.
v8-v9   : Return value & vector arguments.              NEW.
v10-v13 : Additional vector arguments (makes 6 total).  NEW.

gcc/ChangeLog:

	* config/gcn/gcn.cc (gcn_function_value): Allow vector return values.
	(num_arg_regs): Allow vector arguments.
	(gcn_function_arg): Likewise.
	(gcn_function_arg_advance): Likewise.
	(gcn_arg_partial_bytes): Likewise.
	(gcn_return_in_memory): Likewise.
	(gcn_expand_epilogue): Get return value from v8.
	* config/gcn/gcn.h (RETURN_VALUE_REG): Set to v8.
	(FIRST_PARM_REG): USE FIRST_SGPR_REG for clarity.
	(FIRST_VPARM_REG): New.
	(FUNCTION_ARG_REGNO_P): Allow vector parameters.
	(struct gcn_args): Add vnum field.
	(LIBCALL_VALUE): All vector return values.
	* config/gcn/gcn.md (gcn_call_value): Add vector constraints.
	(gcn_call_value_indirect): Likewise.
This commit is contained in:
Andrew Stubbs 2022-07-15 09:47:36 +01:00
parent 9aa08cd484
commit 4e1914625d
3 changed files with 42 additions and 31 deletions

View File

@ -2284,7 +2284,7 @@ gcn_function_value (const_tree valtype, const_tree, bool)
&& GET_MODE_SIZE (mode) < 4)
mode = SImode;
return gen_rtx_REG (mode, SGPR_REGNO (RETURN_VALUE_REG));
return gen_rtx_REG (mode, RETURN_VALUE_REG);
}
/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
@ -2308,7 +2308,9 @@ num_arg_regs (const function_arg_info &arg)
return 0;
int size = arg.promoted_size_in_bytes ();
return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
int regsize = UNITS_PER_WORD * (VECTOR_MODE_P (arg.mode)
? GET_MODE_NUNITS (arg.mode) : 1);
return (size + regsize - 1) / regsize;
}
/* Implement TARGET_STRICT_ARGUMENT_NAMING.
@ -2358,16 +2360,16 @@ gcn_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
if (targetm.calls.must_pass_in_stack (arg))
return 0;
/* Vector parameters are not supported yet. */
if (VECTOR_MODE_P (arg.mode))
return 0;
int reg_num = FIRST_PARM_REG + cum->num;
int first_reg = (VECTOR_MODE_P (arg.mode)
? FIRST_VPARM_REG : FIRST_PARM_REG);
int cum_num = (VECTOR_MODE_P (arg.mode)
? cum->vnum : cum->num);
int reg_num = first_reg + cum_num;
int num_regs = num_arg_regs (arg);
if (num_regs > 0)
while (reg_num % num_regs != 0)
reg_num++;
if (reg_num + num_regs <= FIRST_PARM_REG + NUM_PARM_REGS)
if (reg_num + num_regs <= first_reg + NUM_PARM_REGS)
return gen_rtx_REG (arg.mode, reg_num);
}
else
@ -2419,11 +2421,15 @@ gcn_function_arg_advance (cumulative_args_t cum_v,
if (!arg.named)
return;
int first_reg = (VECTOR_MODE_P (arg.mode)
? FIRST_VPARM_REG : FIRST_PARM_REG);
int *cum_num = (VECTOR_MODE_P (arg.mode)
? &cum->vnum : &cum->num);
int num_regs = num_arg_regs (arg);
if (num_regs > 0)
while ((FIRST_PARM_REG + cum->num) % num_regs != 0)
cum->num++;
cum->num += num_regs;
while ((first_reg + *cum_num) % num_regs != 0)
(*cum_num)++;
*cum_num += num_regs;
}
else
{
@ -2454,14 +2460,18 @@ gcn_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
if (targetm.calls.must_pass_in_stack (arg))
return 0;
if (cum->num >= NUM_PARM_REGS)
int cum_num = (VECTOR_MODE_P (arg.mode) ? cum->vnum : cum->num);
int regsize = UNITS_PER_WORD * (VECTOR_MODE_P (arg.mode)
? GET_MODE_NUNITS (arg.mode) : 1);
if (cum_num >= NUM_PARM_REGS)
return 0;
/* If the argument fits entirely in registers, return 0. */
if (cum->num + num_arg_regs (arg) <= NUM_PARM_REGS)
if (cum_num + num_arg_regs (arg) <= NUM_PARM_REGS)
return 0;
return (NUM_PARM_REGS - cum->num) * UNITS_PER_WORD;
return (NUM_PARM_REGS - cum_num) * regsize;
}
/* A normal function which takes a pointer argument (to a scalar) may be
@ -2549,14 +2559,11 @@ gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype))
if (AGGREGATE_TYPE_P (type))
return true;
/* Vector return values are not supported yet. */
if (VECTOR_TYPE_P (type))
return true;
if (mode == BLKmode)
return true;
if (size > 2 * UNITS_PER_WORD)
if ((!VECTOR_TYPE_P (type) && size > 2 * UNITS_PER_WORD)
|| size > 2 * UNITS_PER_WORD * 64)
return true;
return false;
@ -3199,9 +3206,10 @@ gcn_expand_epilogue (void)
emit_move_insn (kernarg_reg, retptr_mem);
rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
rtx scalar_retval = gen_rtx_REG (SImode, FIRST_PARM_REG);
set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
emit_move_insn (retval_mem,
gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
emit_move_insn (scalar_retval, gen_rtx_REG (SImode, RETURN_VALUE_REG));
emit_move_insn (retval_mem, scalar_retval);
}
emit_jump_insn (gen_gcn_return ());

View File

@ -138,7 +138,7 @@
#define LINK_REGNUM 18
#define EXEC_SAVE_REG 20
#define CC_SAVE_REG 22
#define RETURN_VALUE_REG 24 /* Must be divisible by 4. */
#define RETURN_VALUE_REG 168 /* Must be divisible by 4. */
#define STATIC_CHAIN_REGNUM 30
#define WORK_ITEM_ID_Z_REG 162
#define SOFT_ARG_REG 416
@ -146,7 +146,8 @@
#define DWARF_LINK_REGISTER 420
#define FIRST_PSEUDO_REGISTER 421
#define FIRST_PARM_REG 24
#define FIRST_PARM_REG (FIRST_SGPR_REG + 24)
#define FIRST_VPARM_REG (FIRST_VGPR_REG + 8)
#define NUM_PARM_REGS 6
/* There is no arg pointer. Just choose random fixed register that does
@ -164,7 +165,8 @@
#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X)))
#define CC_REGNO_P(X) ((X) == SCC_REG || (X) == VCC_REG)
#define FUNCTION_ARG_REGNO_P(N) \
((N) >= FIRST_PARM_REG && (N) < (FIRST_PARM_REG + NUM_PARM_REGS))
(((N) >= FIRST_PARM_REG && (N) < (FIRST_PARM_REG + NUM_PARM_REGS)) \
|| ((N) >= FIRST_VPARM_REG && (N) < (FIRST_VPARM_REG + NUM_PARM_REGS)))
#define FIXED_REGISTERS { \
@ -550,6 +552,7 @@ typedef struct gcn_args
tree fntype;
struct gcn_kernel_args args;
int num;
int vnum;
int offset;
int alignment;
} CUMULATIVE_ARGS;
@ -653,7 +656,7 @@ enum gcn_builtin_codes
}
/* This needs to match gcn_function_value. */
#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, SGPR_REGNO (RETURN_VALUE_REG))
#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, RETURN_VALUE_REG)
/* The s_ff0 and s_flbit instructions return -1 if no input bits are set. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 2)

View File

@ -908,11 +908,11 @@
{})
(define_insn "gcn_call_value"
[(set (match_operand 0 "register_operand" "=Sg,Sg")
(call (mem (match_operand 1 "immediate_operand" "Y,B"))
[(set (match_operand 0 "register_operand" "=Sgv,Sgv")
(call (mem (match_operand 1 "immediate_operand" " Y, B"))
(match_operand 2 "const_int_operand")))
(clobber (reg:DI LR_REGNUM))
(clobber (match_scratch:DI 3 "=&Sg,X"))]
(clobber (match_scratch:DI 3 "=&Sg, X"))]
""
"@
s_getpc_b64\t%3\;s_add_u32\t%L3, %L3, %1@rel32@lo+4\;s_addc_u32\t%H3, %H3, %1@rel32@hi+4\;s_swappc_b64\ts[18:19], %3
@ -921,11 +921,11 @@
(set_attr "length" "24")])
(define_insn "gcn_call_value_indirect"
[(set (match_operand 0 "register_operand" "=Sg")
(call (mem (match_operand:DI 1 "register_operand" "Sg"))
[(set (match_operand 0 "register_operand" "=Sgv")
(call (mem (match_operand:DI 1 "register_operand" " Sg"))
(match_operand 2 "" "")))
(clobber (reg:DI LR_REGNUM))
(clobber (match_scratch:DI 3 "=X"))]
(clobber (match_scratch:DI 3 "= X"))]
""
"s_swappc_b64\ts[18:19], %1"
[(set_attr "type" "sop1")