amdgcn: Support XNACK mode

The XNACK feature allows memory load instructions to restart safely following
a page-miss interrupt.  This is useful for shared-memory devices, like APUs,
and to implement OpenMP Unified Shared Memory.

To support the feature we must be able to set the appropriate meta-data and
set the load instructions to early-clobber.  When the port supports scheduling
of s_waitcnt instructions there will be further requirements.

gcc/ChangeLog:

	* config/gcn/gcn-hsa.h (XNACKOPT): New macro.
	(ASM_SPEC): Use XNACKOPT.
	* config/gcn/gcn-opts.h (enum sram_ecc_type): Rename to ...
	(enum hsaco_attr_type): ... this, and generalize the names.
	(TARGET_XNACK): New macro.
	* config/gcn/gcn-valu.md (gather<mode>_insn_1offset<exec>):
	Add xnack compatible alternatives.
	(gather<mode>_insn_2offsets<exec>): Likewise.
	* config/gcn/gcn.c (gcn_option_override): Permit -mxnack for devices
	other than Fiji.
	(gcn_expand_epilogue): Remove early-clobber problems.
	(output_file_start): Emit xnack attributes.
	(gcn_hsa_declare_function_name): Obey -mxnack setting.
	* config/gcn/gcn.md (xnack): New attribute.
	(enabled): Rework to include "xnack" attribute.
	(*movbi): Add xnack compatible alternatives.
	(*mov<mode>_insn): Likewise.
	(*mov<mode>_insn): Likewise.
	(*mov<mode>_insn): Likewise.
	(*movti_insn): Likewise.
	* config/gcn/gcn.opt (-mxnack): Add the "on/off/any" syntax.
	(sram_ecc_type): Rename to ...
	(hsaco_attr_type: ... this.)
	* config/gcn/mkoffload.c (SET_XNACK_ANY): New macro.
	(TEST_XNACK): Delete.
	(TEST_XNACK_ANY): New macro.
	(TEST_XNACK_ON): New macro.
	(main): Support the new -mxnack=on/off/any syntax.
This commit is contained in:
Andrew Stubbs 2022-06-10 15:15:49 +01:00
parent 88c73dbc2d
commit 9aefadc9e6
7 changed files with 140 additions and 86 deletions

View File

@ -81,12 +81,13 @@ extern unsigned int gcn_local_sym_hash (const char *name);
/* In HSACOv4 no attribute setting means the binary supports "any" hardware
configuration. The name of the attribute also changed. */
#define SRAMOPT "msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc"
#define XNACKOPT "mxnack=on:-mattr=+xnack;mxnack=off:-mattr=-xnack"
/* Use LLVM assembler and linker options. */
#define ASM_SPEC "-triple=amdgcn--amdhsa " \
"%:last_arg(%{march=*:-mcpu=%*}) " \
"%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \
"%{" NO_XNACK "mxnack:-mattr=+xnack;:-mattr=-xnack} " \
"%{" NO_XNACK XNACKOPT "}" \
"%{" NO_SRAM_ECC SRAMOPT "} " \
"-filetype=obj"
#define LINK_SPEC "--pie --export-dynamic"

View File

@ -48,11 +48,13 @@ extern enum gcn_isa {
#define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
#define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS)
enum sram_ecc_type
#define TARGET_XNACK (flag_xnack != HSACO_ATTR_OFF)
enum hsaco_attr_type
{
SRAM_ECC_OFF,
SRAM_ECC_ON,
SRAM_ECC_ANY
HSACO_ATTR_OFF,
HSACO_ATTR_ON,
HSACO_ATTR_ANY
};
#endif

View File

@ -741,13 +741,13 @@
{})
(define_insn "gather<mode>_insn_1offset<exec>"
[(set (match_operand:V_ALL 0 "register_operand" "=v")
[(set (match_operand:V_ALL 0 "register_operand" "=v,&v")
(unspec:V_ALL
[(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
[(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v, v")
(vec_duplicate:<VnDI>
(match_operand 2 "immediate_operand" " n")))
(match_operand 3 "immediate_operand" " n")
(match_operand 4 "immediate_operand" " n")
(match_operand 2 "immediate_operand" " n, n")))
(match_operand 3 "immediate_operand" " n, n")
(match_operand 4 "immediate_operand" " n, n")
(mem:BLK (scratch))]
UNSPEC_GATHER))]
"(AS_FLAT_P (INTVAL (operands[3]))
@ -777,7 +777,8 @@
return buf;
}
[(set_attr "type" "flat")
(set_attr "length" "12")])
(set_attr "length" "12")
(set_attr "xnack" "off,on")])
(define_insn "gather<mode>_insn_1offset_ds<exec>"
[(set (match_operand:V_ALL 0 "register_operand" "=v")
@ -802,17 +803,18 @@
(set_attr "length" "12")])
(define_insn "gather<mode>_insn_2offsets<exec>"
[(set (match_operand:V_ALL 0 "register_operand" "=v")
[(set (match_operand:V_ALL 0 "register_operand" "=v,&v")
(unspec:V_ALL
[(plus:<VnDI>
(plus:<VnDI>
(vec_duplicate:<VnDI>
(match_operand:DI 1 "register_operand" "Sv"))
(match_operand:DI 1 "register_operand" "Sv,Sv"))
(sign_extend:<VnDI>
(match_operand:<VnSI> 2 "register_operand" " v")))
(vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
(match_operand 4 "immediate_operand" " n")
(match_operand 5 "immediate_operand" " n")
(match_operand:<VnSI> 2 "register_operand" " v, v")))
(vec_duplicate:<VnDI> (match_operand 3 "immediate_operand"
" n, n")))
(match_operand 4 "immediate_operand" " n, n")
(match_operand 5 "immediate_operand" " n, n")
(mem:BLK (scratch))]
UNSPEC_GATHER))]
"(AS_GLOBAL_P (INTVAL (operands[4]))
@ -831,7 +833,8 @@
return buf;
}
[(set_attr "type" "flat")
(set_attr "length" "12")])
(set_attr "length" "12")
(set_attr "xnack" "off,on")])
(define_expand "scatter_store<mode><vnsi>"
[(match_operand:DI 0 "register_operand")

View File

@ -170,9 +170,14 @@ gcn_option_override (void)
acc_lds_size = 32768;
}
/* The xnack option is a placeholder, for now. */
if (flag_xnack)
sorry ("XNACK support");
/* gfx908 "Fiji" does not support XNACK. */
if (gcn_arch == PROCESSOR_FIJI)
{
if (flag_xnack == HSACO_ATTR_ON)
error ("-mxnack=on is incompatible with -march=fiji");
/* Allow HSACO_ATTR_ANY silently because that's the default. */
flag_xnack = HSACO_ATTR_OFF;
}
}
/* }}} */
@ -3187,17 +3192,19 @@ gcn_expand_epilogue (void)
/* Assume that an exit value compatible with gcn-run is expected.
That is, the third input parameter is an int*.
We can't allocate any new registers, but the kernarg_reg is
dead after this, so we'll use that. */
We can't allocate any new registers, but the dispatch_ptr and
kernarg_reg are dead after this, so we'll use those. */
rtx dispatch_ptr_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
[DISPATCH_PTR_ARG]);
rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
[KERNARG_SEGMENT_PTR_ARG]);
rtx retptr_mem = gen_rtx_MEM (DImode,
gen_rtx_PLUS (DImode, kernarg_reg,
GEN_INT (16)));
set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
emit_move_insn (kernarg_reg, retptr_mem);
emit_move_insn (dispatch_ptr_reg, retptr_mem);
rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
rtx retval_mem = gen_rtx_MEM (SImode, dispatch_ptr_reg);
set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
emit_move_insn (retval_mem,
gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
@ -5222,11 +5229,12 @@ static void
output_file_start (void)
{
/* In HSACOv4 no attribute setting means the binary supports "any" hardware
configuration. In GCC binaries, this is true for SRAM ECC, but not
XNACK. */
const char *xnack = (flag_xnack ? ":xnack+" : ":xnack-");
const char *sram_ecc = (flag_sram_ecc == SRAM_ECC_ON ? ":sramecc+"
: flag_sram_ecc == SRAM_ECC_OFF ? ":sramecc-"
configuration. */
const char *xnack = (flag_xnack == HSACO_ATTR_ON ? ":xnack+"
: flag_xnack == HSACO_ATTR_OFF ? ":xnack-"
: "");
const char *sram_ecc = (flag_sram_ecc == HSACO_ATTR_ON ? ":sramecc+"
: flag_sram_ecc == HSACO_ATTR_OFF ? ":sramecc-"
: "");
const char *cpu;
@ -5270,7 +5278,7 @@ void
gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
{
int sgpr, vgpr;
bool xnack_enabled = false;
bool xnack_enabled = TARGET_XNACK;
fputs ("\n\n", file);

View File

@ -276,12 +276,19 @@
(define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3"))
(define_attr "xnack" "na,off,on" (const_string "na"))
(define_attr "enabled" ""
(cond [(eq_attr "gcn_version" "gcn3") (const_int 1)
(and (eq_attr "gcn_version" "gcn5")
(ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
(const_int 1)]
(const_int 0)))
(cond [(and (eq_attr "gcn_version" "gcn5")
(eq (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
(const_int 0)
(and (eq_attr "xnack" "off")
(ne (symbol_ref "TARGET_XNACK") (const_int 0)))
(const_int 0)
(and (eq_attr "xnack" "on")
(eq (symbol_ref "TARGET_XNACK") (const_int 0)))
(const_int 0)]
(const_int 1)))
; We need to be able to identify v_readlane and v_writelane with
; SGPR lane selection in order to handle "Manually Inserted Wait States".
@ -470,9 +477,9 @@
(define_insn "*movbi"
[(set (match_operand:BI 0 "nonimmediate_operand"
"=Sg, v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM")
"=Sg, v,Sg,cs,cV,cV,Sm,&Sm,RS, v,&v,RF, v,&v,RM")
(match_operand:BI 1 "gcn_load_operand"
"SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))]
"SSA,vSvA, v,SS, v,SS,RS, RS,Sm,RF,RF, v,RM,RM, v"))]
""
{
/* SCC as an operand is currently not accepted by the LLVM assembler, so
@ -514,66 +521,77 @@
return "s_mov_b32\tvcc_lo, %1\;"
"s_mov_b32\tvcc_hi, 0";
case 6:
return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
case 7:
return "s_store_dword\t%1, %A0";
return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
case 8:
return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
return "s_store_dword\t%1, %A0";
case 9:
return "flat_store_dword\t%A0, %1%O0%g0";
case 10:
return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
case 11:
return "flat_store_dword\t%A0, %1%O0%g0";
case 12:
case 13:
return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
case 14:
return "global_store_dword\t%A0, %1%O0%g0";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat,
flat,flat")
(set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*")
(set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")])
[(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
flat,flat,flat,flat")
(set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
(set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
(set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")])
; 32bit move pattern
(define_insn "*mov<mode>_insn"
[(set (match_operand:SISF 0 "nonimmediate_operand"
"=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM")
"=SD,SD,SD,SD,&SD,RB,Sm,&Sm,RS,v,Sg, v, v,&v,RF,v,RLRG, v,SD, v,&v,RM")
(match_operand:SISF 1 "gcn_load_operand"
"SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))]
"SSA, J, B,RB, RB,Sm,RS, RS,Sm,v, v,Sv,RF,RF, v,B, v,RLRG, Y,RM,RM, v"))]
""
"@
s_mov_b32\t%0, %1
s_movk_i32\t%0, %1
s_mov_b32\t%0, %1
s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
s_buffer_store%s1\t%1, s[0:3], %0
s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
s_store_dword\t%1, %A0
v_mov_b32\t%0, %1
v_readlane_b32\t%0, %1, 0
v_writelane_b32\t%0, %1, 0
flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store_dword\t%A0, %1%O0%g0
v_mov_b32\t%0, %1
ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
s_mov_b32\t%0, %1
global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store_dword\t%A0, %1%O0%g0"
[(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
flat,vop1,ds,ds,sop1,flat,flat")
(set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
(set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
[(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,smem,smem,vop1,vop3a,
vop3a,flat,flat,flat,vop1,ds,ds,sop1,flat,flat,flat")
(set_attr "exec" "*,*,*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*,*,*")
(set_attr "length"
"4,4,8,12,12,12,12,12,12,4,8,8,12,12,12,8,12,12,8,12,12,12")
(set_attr "xnack"
"*,*,*,off,on,*,off,on,*,*,*,*,off,on,*,*,*,*,*,off,on,*")])
; 8/16bit move pattern
; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
(define_insn "*mov<mode>_insn"
[(set (match_operand:QIHI 0 "nonimmediate_operand"
"=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM")
"=SD,SD,SD,v,Sg, v, v,&v,RF,v,RLRG, v, v,&v,RM")
(match_operand:QIHI 1 "gcn_load_operand"
"SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))]
"SSA, J, B,v, v,Sv,RF,RF, v,B, v,RLRG,RM,RM, v"))]
"gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
"@
s_mov_b32\t%0, %1
@ -583,24 +601,27 @@
v_readlane_b32\t%0, %1, 0
v_writelane_b32\t%0, %1, 0
flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store%s0\t%A0, %1%O0%g0
v_mov_b32\t%0, %1
ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store%s0\t%A0, %1%O0%g0"
[(set_attr "type"
"sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
(set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
(set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
[(set_attr "type" "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,flat,vop1,ds,ds,
flat,flat,flat")
(set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*,*,*")
(set_attr "length" "4,4,8,4,4,4,12,12,12,8,12,12,12,12,12")
(set_attr "xnack" "*,*,*,*,*,*,off,on,*,*,*,*,off,on,*")])
; 64bit move pattern
(define_insn_and_split "*mov<mode>_insn"
[(set (match_operand:DIDF 0 "nonimmediate_operand"
"=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM")
"=SD,SD,SD,RS,Sm,&Sm,v, v,Sg, v, v,&v,RF,RLRG, v, v,&v,RM")
(match_operand:DIDF 1 "general_operand"
"SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))]
"SSA, C,DB,Sm,RS, RS,v,DB, v,Sv,RF,RF, v, v,RLRG,RM,RM, v"))]
"GET_CODE(operands[1]) != SYMBOL_REF"
"@
s_mov_b64\t%0, %1
@ -608,15 +629,18 @@
#
s_store_dwordx2\t%1, %A0
s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
#
#
#
#
flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store_dwordx2\t%A0, %1%O0%g0
ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store_dwordx2\t%A0, %1%O0%g0"
"reload_completed
&& ((!MEM_P (operands[0]) && !MEM_P (operands[1])
@ -647,29 +671,33 @@
operands[3] = inhi;
}
}
[(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat,
flat,ds,ds,flat,flat")
(set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")])
[(set_attr "type" "sop1,sop1,mult,smem,smem,smem,vmult,vmult,vmult,vmult,
flat,flat,flat,ds,ds,flat,flat,flat")
(set_attr "length" "4,8,*,12,12,12,*,*,*,*,12,12,12,12,12,12,12,12")
(set_attr "xnack" "*,*,*,*,off,on,*,*,*,*,off,on,*,*,*,off,on,*")])
; 128-bit move.
(define_insn_and_split "*movti_insn"
[(set (match_operand:TI 0 "nonimmediate_operand"
"=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v")
(match_operand:TI 1 "general_operand"
"SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))]
"=SD,RS,Sm,&Sm,RF, v,&v,v, v,SD,RM, v,&v,RL, v")
(match_operand:TI 1 "general_operand"
"SSB,Sm,RS, RS, v,RF,RF,v,Sv, v, v,RM,RM, v,RL"))]
""
"@
#
s_store_dwordx4\t%1, %A0
s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
flat_store_dwordx4\t%A0, %1%O0%g0
flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
#
#
#
global_store_dwordx4\t%A0, %1%O0%g0
global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
"reload_completed
@ -691,10 +719,11 @@
operands[0] = gcn_operand_part (TImode, operands[0], 0);
operands[1] = gcn_operand_part (TImode, operands[1], 0);
}
[(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
ds,ds")
(set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*")
(set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
[(set_attr "type" "mult,smem,smem,smem,flat,flat,flat,vmult,vmult,vmult,flat,
flat,flat,ds,ds")
(set_attr "delayeduse" "*,*,yes,yes,*,*,*,*,*,*,*,yes,*,*,*")
(set_attr "length" "*,12,12,12,12,12,12,*,*,*,12,12,12,12,12")
(set_attr "xnack" "*,*,off,on,*,off,on,*,*,*,*,off,on,*,*")])
;; }}}
;; {{{ Prologue/Epilogue
@ -862,6 +891,8 @@
(clobber (reg:BI SCC_REG))]
"GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF"
{
/* This s_load may not be XNACK-safe on devices where the GOT may fault.
DGPUs are most likely fine. */
if (SYMBOL_REF_P (operands[1])
&& SYMBOL_REF_WEAK (operands[1]))
return "s_getpc_b64\t%0\;"
@ -886,6 +917,8 @@
{
/* !!! These sequences clobber CC_SAVE_REG. */
/* This s_load may not be XNACK-safe on devices where the GOT may fault.
DGPUs are most likely fine. */
if (SYMBOL_REF_P (operands[1])
&& SYMBOL_REF_WEAK (operands[1]))
return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"

View File

@ -86,23 +86,23 @@ Wopenacc-dims
Target Var(warn_openacc_dims) Warning
Warn about invalid OpenACC dimensions.
mxnack
Target Var(flag_xnack) Init(0)
Compile for devices requiring XNACK enabled. Default off.
Enum
Name(sram_ecc_type) Type(enum sram_ecc_type)
Name(hsaco_attr_type) Type(enum hsaco_attr_type)
SRAM-ECC modes:
EnumValue
Enum(sram_ecc_type) String(off) Value(SRAM_ECC_OFF)
Enum(hsaco_attr_type) String(off) Value(HSACO_ATTR_OFF)
EnumValue
Enum(sram_ecc_type) String(on) Value(SRAM_ECC_ON)
Enum(hsaco_attr_type) String(on) Value(HSACO_ATTR_ON)
EnumValue
Enum(sram_ecc_type) String(any) Value(SRAM_ECC_ANY)
Enum(hsaco_attr_type) String(any) Value(HSACO_ATTR_ANY)
mxnack=
Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_ANY)
Compile for devices requiring XNACK enabled. Default off.
msram-ecc=
Target RejectNegative Joined ToLower Enum(sram_ecc_type) Var(flag_sram_ecc) Init(SRAM_ECC_ANY)
Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_ANY)
Compile for devices with the SRAM ECC feature enabled, or not. Default \"any\".

View File

@ -72,10 +72,14 @@
#define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
| EF_AMDGPU_FEATURE_XNACK_ON_V4)
#define SET_XNACK_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
| EF_AMDGPU_FEATURE_XNACK_ANY_V4)
#define SET_XNACK_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
| EF_AMDGPU_FEATURE_XNACK_OFF_V4)
#define TEST_XNACK(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
== EF_AMDGPU_FEATURE_XNACK_ON_V4)
#define TEST_XNACK_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
== EF_AMDGPU_FEATURE_XNACK_ANY_V4)
#define TEST_XNACK_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
== EF_AMDGPU_FEATURE_XNACK_ON_V4)
#define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
| EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
@ -881,9 +885,11 @@ main (int argc, char **argv)
fPIC = true;
else if (strcmp (argv[i], "-fpic") == 0)
fpic = true;
else if (strcmp (argv[i], "-mxnack") == 0)
else if (strcmp (argv[i], "-mxnack=on") == 0)
SET_XNACK_ON (elf_flags);
else if (strcmp (argv[i], "-mno-xnack") == 0)
else if (strcmp (argv[i], "-mxnack=any") == 0)
SET_XNACK_ANY (elf_flags);
else if (strcmp (argv[i], "-mxnack=off") == 0)
SET_XNACK_OFF (elf_flags);
else if (strcmp (argv[i], "-msram-ecc=on") == 0)
SET_SRAM_ECC_ON (elf_flags);
@ -1042,8 +1048,9 @@ main (int argc, char **argv)
obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
obstack_ptr_grow (&ld_argv_obstack,
(TEST_XNACK (elf_flags)
? "-mxnack" : "-mno-xnack"));
(TEST_XNACK_ON (elf_flags) ? "-mxnack=on"
: TEST_XNACK_ANY (elf_flags) ? "-mxnack=any"
: "-mxnack=off"));
obstack_ptr_grow (&ld_argv_obstack,
(TEST_SRAM_ECC_ON (elf_flags) ? "-msram-ecc=on"
: TEST_SRAM_ECC_ANY (elf_flags) ? "-msram-ecc=any"