amdgcn: Support XNACK mode
The XNACK feature allows memory load instructions to restart safely following a page-miss interrupt. This is useful for shared-memory devices, like APUs, and to implement OpenMP Unified Shared Memory. To support the feature we must be able to set the appropriate meta-data and set the load instructions to early-clobber. When the port supports scheduling of s_waitcnt instructions there will be further requirements. gcc/ChangeLog: * config/gcn/gcn-hsa.h (XNACKOPT): New macro. (ASM_SPEC): Use XNACKOPT. * config/gcn/gcn-opts.h (enum sram_ecc_type): Rename to ... (enum hsaco_attr_type): ... this, and generalize the names. (TARGET_XNACK): New macro. * config/gcn/gcn-valu.md (gather<mode>_insn_1offset<exec>): Add xnack compatible alternatives. (gather<mode>_insn_2offsets<exec>): Likewise. * config/gcn/gcn.c (gcn_option_override): Permit -mxnack for devices other than Fiji. (gcn_expand_epilogue): Remove early-clobber problems. (output_file_start): Emit xnack attributes. (gcn_hsa_declare_function_name): Obey -mxnack setting. * config/gcn/gcn.md (xnack): New attribute. (enabled): Rework to include "xnack" attribute. (*movbi): Add xnack compatible alternatives. (*mov<mode>_insn): Likewise. (*mov<mode>_insn): Likewise. (*mov<mode>_insn): Likewise. (*movti_insn): Likewise. * config/gcn/gcn.opt (-mxnack): Add the "on/off/any" syntax. (sram_ecc_type): Rename to ... (hsaco_attr_type: ... this.) * config/gcn/mkoffload.c (SET_XNACK_ANY): New macro. (TEST_XNACK): Delete. (TEST_XNACK_ANY): New macro. (TEST_XNACK_ON): New macro. (main): Support the new -mxnack=on/off/any syntax.
This commit is contained in:
parent
88c73dbc2d
commit
9aefadc9e6
|
@ -81,12 +81,13 @@ extern unsigned int gcn_local_sym_hash (const char *name);
|
|||
/* In HSACOv4 no attribute setting means the binary supports "any" hardware
|
||||
configuration. The name of the attribute also changed. */
|
||||
#define SRAMOPT "msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc"
|
||||
#define XNACKOPT "mxnack=on:-mattr=+xnack;mxnack=off:-mattr=-xnack"
|
||||
|
||||
/* Use LLVM assembler and linker options. */
|
||||
#define ASM_SPEC "-triple=amdgcn--amdhsa " \
|
||||
"%:last_arg(%{march=*:-mcpu=%*}) " \
|
||||
"%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \
|
||||
"%{" NO_XNACK "mxnack:-mattr=+xnack;:-mattr=-xnack} " \
|
||||
"%{" NO_XNACK XNACKOPT "}" \
|
||||
"%{" NO_SRAM_ECC SRAMOPT "} " \
|
||||
"-filetype=obj"
|
||||
#define LINK_SPEC "--pie --export-dynamic"
|
||||
|
|
|
@ -48,11 +48,13 @@ extern enum gcn_isa {
|
|||
#define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
|
||||
#define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS)
|
||||
|
||||
enum sram_ecc_type
|
||||
#define TARGET_XNACK (flag_xnack != HSACO_ATTR_OFF)
|
||||
|
||||
enum hsaco_attr_type
|
||||
{
|
||||
SRAM_ECC_OFF,
|
||||
SRAM_ECC_ON,
|
||||
SRAM_ECC_ANY
|
||||
HSACO_ATTR_OFF,
|
||||
HSACO_ATTR_ON,
|
||||
HSACO_ATTR_ANY
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -741,13 +741,13 @@
|
|||
{})
|
||||
|
||||
(define_insn "gather<mode>_insn_1offset<exec>"
|
||||
[(set (match_operand:V_ALL 0 "register_operand" "=v")
|
||||
[(set (match_operand:V_ALL 0 "register_operand" "=v,&v")
|
||||
(unspec:V_ALL
|
||||
[(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
|
||||
[(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v, v")
|
||||
(vec_duplicate:<VnDI>
|
||||
(match_operand 2 "immediate_operand" " n")))
|
||||
(match_operand 3 "immediate_operand" " n")
|
||||
(match_operand 4 "immediate_operand" " n")
|
||||
(match_operand 2 "immediate_operand" " n, n")))
|
||||
(match_operand 3 "immediate_operand" " n, n")
|
||||
(match_operand 4 "immediate_operand" " n, n")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_GATHER))]
|
||||
"(AS_FLAT_P (INTVAL (operands[3]))
|
||||
|
@ -777,7 +777,8 @@
|
|||
return buf;
|
||||
}
|
||||
[(set_attr "type" "flat")
|
||||
(set_attr "length" "12")])
|
||||
(set_attr "length" "12")
|
||||
(set_attr "xnack" "off,on")])
|
||||
|
||||
(define_insn "gather<mode>_insn_1offset_ds<exec>"
|
||||
[(set (match_operand:V_ALL 0 "register_operand" "=v")
|
||||
|
@ -802,17 +803,18 @@
|
|||
(set_attr "length" "12")])
|
||||
|
||||
(define_insn "gather<mode>_insn_2offsets<exec>"
|
||||
[(set (match_operand:V_ALL 0 "register_operand" "=v")
|
||||
[(set (match_operand:V_ALL 0 "register_operand" "=v,&v")
|
||||
(unspec:V_ALL
|
||||
[(plus:<VnDI>
|
||||
(plus:<VnDI>
|
||||
(vec_duplicate:<VnDI>
|
||||
(match_operand:DI 1 "register_operand" "Sv"))
|
||||
(match_operand:DI 1 "register_operand" "Sv,Sv"))
|
||||
(sign_extend:<VnDI>
|
||||
(match_operand:<VnSI> 2 "register_operand" " v")))
|
||||
(vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
|
||||
(match_operand 4 "immediate_operand" " n")
|
||||
(match_operand 5 "immediate_operand" " n")
|
||||
(match_operand:<VnSI> 2 "register_operand" " v, v")))
|
||||
(vec_duplicate:<VnDI> (match_operand 3 "immediate_operand"
|
||||
" n, n")))
|
||||
(match_operand 4 "immediate_operand" " n, n")
|
||||
(match_operand 5 "immediate_operand" " n, n")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_GATHER))]
|
||||
"(AS_GLOBAL_P (INTVAL (operands[4]))
|
||||
|
@ -831,7 +833,8 @@
|
|||
return buf;
|
||||
}
|
||||
[(set_attr "type" "flat")
|
||||
(set_attr "length" "12")])
|
||||
(set_attr "length" "12")
|
||||
(set_attr "xnack" "off,on")])
|
||||
|
||||
(define_expand "scatter_store<mode><vnsi>"
|
||||
[(match_operand:DI 0 "register_operand")
|
||||
|
|
|
@ -170,9 +170,14 @@ gcn_option_override (void)
|
|||
acc_lds_size = 32768;
|
||||
}
|
||||
|
||||
/* The xnack option is a placeholder, for now. */
|
||||
if (flag_xnack)
|
||||
sorry ("XNACK support");
|
||||
/* gfx908 "Fiji" does not support XNACK. */
|
||||
if (gcn_arch == PROCESSOR_FIJI)
|
||||
{
|
||||
if (flag_xnack == HSACO_ATTR_ON)
|
||||
error ("-mxnack=on is incompatible with -march=fiji");
|
||||
/* Allow HSACO_ATTR_ANY silently because that's the default. */
|
||||
flag_xnack = HSACO_ATTR_OFF;
|
||||
}
|
||||
}
|
||||
|
||||
/* }}} */
|
||||
|
@ -3187,17 +3192,19 @@ gcn_expand_epilogue (void)
|
|||
/* Assume that an exit value compatible with gcn-run is expected.
|
||||
That is, the third input parameter is an int*.
|
||||
|
||||
We can't allocate any new registers, but the kernarg_reg is
|
||||
dead after this, so we'll use that. */
|
||||
We can't allocate any new registers, but the dispatch_ptr and
|
||||
kernarg_reg are dead after this, so we'll use those. */
|
||||
rtx dispatch_ptr_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
|
||||
[DISPATCH_PTR_ARG]);
|
||||
rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
|
||||
[KERNARG_SEGMENT_PTR_ARG]);
|
||||
rtx retptr_mem = gen_rtx_MEM (DImode,
|
||||
gen_rtx_PLUS (DImode, kernarg_reg,
|
||||
GEN_INT (16)));
|
||||
set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
|
||||
emit_move_insn (kernarg_reg, retptr_mem);
|
||||
emit_move_insn (dispatch_ptr_reg, retptr_mem);
|
||||
|
||||
rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
|
||||
rtx retval_mem = gen_rtx_MEM (SImode, dispatch_ptr_reg);
|
||||
set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
|
||||
emit_move_insn (retval_mem,
|
||||
gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
|
||||
|
@ -5222,11 +5229,12 @@ static void
|
|||
output_file_start (void)
|
||||
{
|
||||
/* In HSACOv4 no attribute setting means the binary supports "any" hardware
|
||||
configuration. In GCC binaries, this is true for SRAM ECC, but not
|
||||
XNACK. */
|
||||
const char *xnack = (flag_xnack ? ":xnack+" : ":xnack-");
|
||||
const char *sram_ecc = (flag_sram_ecc == SRAM_ECC_ON ? ":sramecc+"
|
||||
: flag_sram_ecc == SRAM_ECC_OFF ? ":sramecc-"
|
||||
configuration. */
|
||||
const char *xnack = (flag_xnack == HSACO_ATTR_ON ? ":xnack+"
|
||||
: flag_xnack == HSACO_ATTR_OFF ? ":xnack-"
|
||||
: "");
|
||||
const char *sram_ecc = (flag_sram_ecc == HSACO_ATTR_ON ? ":sramecc+"
|
||||
: flag_sram_ecc == HSACO_ATTR_OFF ? ":sramecc-"
|
||||
: "");
|
||||
|
||||
const char *cpu;
|
||||
|
@ -5270,7 +5278,7 @@ void
|
|||
gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
|
||||
{
|
||||
int sgpr, vgpr;
|
||||
bool xnack_enabled = false;
|
||||
bool xnack_enabled = TARGET_XNACK;
|
||||
|
||||
fputs ("\n\n", file);
|
||||
|
||||
|
|
|
@ -276,12 +276,19 @@
|
|||
|
||||
(define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3"))
|
||||
|
||||
(define_attr "xnack" "na,off,on" (const_string "na"))
|
||||
|
||||
(define_attr "enabled" ""
|
||||
(cond [(eq_attr "gcn_version" "gcn3") (const_int 1)
|
||||
(and (eq_attr "gcn_version" "gcn5")
|
||||
(ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
|
||||
(const_int 1)]
|
||||
(const_int 0)))
|
||||
(cond [(and (eq_attr "gcn_version" "gcn5")
|
||||
(eq (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
|
||||
(const_int 0)
|
||||
(and (eq_attr "xnack" "off")
|
||||
(ne (symbol_ref "TARGET_XNACK") (const_int 0)))
|
||||
(const_int 0)
|
||||
(and (eq_attr "xnack" "on")
|
||||
(eq (symbol_ref "TARGET_XNACK") (const_int 0)))
|
||||
(const_int 0)]
|
||||
(const_int 1)))
|
||||
|
||||
; We need to be able to identify v_readlane and v_writelane with
|
||||
; SGPR lane selection in order to handle "Manually Inserted Wait States".
|
||||
|
@ -470,9 +477,9 @@
|
|||
|
||||
(define_insn "*movbi"
|
||||
[(set (match_operand:BI 0 "nonimmediate_operand"
|
||||
"=Sg, v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM")
|
||||
"=Sg, v,Sg,cs,cV,cV,Sm,&Sm,RS, v,&v,RF, v,&v,RM")
|
||||
(match_operand:BI 1 "gcn_load_operand"
|
||||
"SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))]
|
||||
"SSA,vSvA, v,SS, v,SS,RS, RS,Sm,RF,RF, v,RM,RM, v"))]
|
||||
""
|
||||
{
|
||||
/* SCC as an operand is currently not accepted by the LLVM assembler, so
|
||||
|
@ -514,66 +521,77 @@
|
|||
return "s_mov_b32\tvcc_lo, %1\;"
|
||||
"s_mov_b32\tvcc_hi, 0";
|
||||
case 6:
|
||||
return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
|
||||
case 7:
|
||||
return "s_store_dword\t%1, %A0";
|
||||
return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
|
||||
case 8:
|
||||
return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
|
||||
return "s_store_dword\t%1, %A0";
|
||||
case 9:
|
||||
return "flat_store_dword\t%A0, %1%O0%g0";
|
||||
case 10:
|
||||
return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
|
||||
return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
|
||||
case 11:
|
||||
return "flat_store_dword\t%A0, %1%O0%g0";
|
||||
case 12:
|
||||
case 13:
|
||||
return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
|
||||
case 14:
|
||||
return "global_store_dword\t%A0, %1%O0%g0";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat,
|
||||
flat,flat")
|
||||
(set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*")
|
||||
(set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")])
|
||||
[(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
|
||||
flat,flat,flat,flat")
|
||||
(set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
|
||||
(set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
|
||||
(set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")])
|
||||
|
||||
; 32bit move pattern
|
||||
|
||||
(define_insn "*mov<mode>_insn"
|
||||
[(set (match_operand:SISF 0 "nonimmediate_operand"
|
||||
"=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM")
|
||||
"=SD,SD,SD,SD,&SD,RB,Sm,&Sm,RS,v,Sg, v, v,&v,RF,v,RLRG, v,SD, v,&v,RM")
|
||||
(match_operand:SISF 1 "gcn_load_operand"
|
||||
"SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))]
|
||||
"SSA, J, B,RB, RB,Sm,RS, RS,Sm,v, v,Sv,RF,RF, v,B, v,RLRG, Y,RM,RM, v"))]
|
||||
""
|
||||
"@
|
||||
s_mov_b32\t%0, %1
|
||||
s_movk_i32\t%0, %1
|
||||
s_mov_b32\t%0, %1
|
||||
s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
|
||||
s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
|
||||
s_buffer_store%s1\t%1, s[0:3], %0
|
||||
s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
s_store_dword\t%1, %A0
|
||||
v_mov_b32\t%0, %1
|
||||
v_readlane_b32\t%0, %1, 0
|
||||
v_writelane_b32\t%0, %1, 0
|
||||
flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
flat_store_dword\t%A0, %1%O0%g0
|
||||
v_mov_b32\t%0, %1
|
||||
ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
s_mov_b32\t%0, %1
|
||||
global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
global_store_dword\t%A0, %1%O0%g0"
|
||||
[(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
|
||||
flat,vop1,ds,ds,sop1,flat,flat")
|
||||
(set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
|
||||
(set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
|
||||
[(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,smem,smem,vop1,vop3a,
|
||||
vop3a,flat,flat,flat,vop1,ds,ds,sop1,flat,flat,flat")
|
||||
(set_attr "exec" "*,*,*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*,*,*")
|
||||
(set_attr "length"
|
||||
"4,4,8,12,12,12,12,12,12,4,8,8,12,12,12,8,12,12,8,12,12,12")
|
||||
(set_attr "xnack"
|
||||
"*,*,*,off,on,*,off,on,*,*,*,*,off,on,*,*,*,*,*,off,on,*")])
|
||||
|
||||
; 8/16bit move pattern
|
||||
; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
|
||||
|
||||
(define_insn "*mov<mode>_insn"
|
||||
[(set (match_operand:QIHI 0 "nonimmediate_operand"
|
||||
"=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM")
|
||||
"=SD,SD,SD,v,Sg, v, v,&v,RF,v,RLRG, v, v,&v,RM")
|
||||
(match_operand:QIHI 1 "gcn_load_operand"
|
||||
"SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))]
|
||||
"SSA, J, B,v, v,Sv,RF,RF, v,B, v,RLRG,RM,RM, v"))]
|
||||
"gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
|
||||
"@
|
||||
s_mov_b32\t%0, %1
|
||||
|
@ -583,24 +601,27 @@
|
|||
v_readlane_b32\t%0, %1, 0
|
||||
v_writelane_b32\t%0, %1, 0
|
||||
flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
flat_store%s0\t%A0, %1%O0%g0
|
||||
v_mov_b32\t%0, %1
|
||||
ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
global_store%s0\t%A0, %1%O0%g0"
|
||||
[(set_attr "type"
|
||||
"sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
|
||||
(set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
|
||||
(set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
|
||||
[(set_attr "type" "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,flat,vop1,ds,ds,
|
||||
flat,flat,flat")
|
||||
(set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*,*,*")
|
||||
(set_attr "length" "4,4,8,4,4,4,12,12,12,8,12,12,12,12,12")
|
||||
(set_attr "xnack" "*,*,*,*,*,*,off,on,*,*,*,*,off,on,*")])
|
||||
|
||||
; 64bit move pattern
|
||||
|
||||
(define_insn_and_split "*mov<mode>_insn"
|
||||
[(set (match_operand:DIDF 0 "nonimmediate_operand"
|
||||
"=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM")
|
||||
"=SD,SD,SD,RS,Sm,&Sm,v, v,Sg, v, v,&v,RF,RLRG, v, v,&v,RM")
|
||||
(match_operand:DIDF 1 "general_operand"
|
||||
"SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))]
|
||||
"SSA, C,DB,Sm,RS, RS,v,DB, v,Sv,RF,RF, v, v,RLRG,RM,RM, v"))]
|
||||
"GET_CODE(operands[1]) != SYMBOL_REF"
|
||||
"@
|
||||
s_mov_b64\t%0, %1
|
||||
|
@ -608,15 +629,18 @@
|
|||
#
|
||||
s_store_dwordx2\t%1, %A0
|
||||
s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
flat_store_dwordx2\t%A0, %1%O0%g0
|
||||
ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
global_store_dwordx2\t%A0, %1%O0%g0"
|
||||
"reload_completed
|
||||
&& ((!MEM_P (operands[0]) && !MEM_P (operands[1])
|
||||
|
@ -647,29 +671,33 @@
|
|||
operands[3] = inhi;
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat,
|
||||
flat,ds,ds,flat,flat")
|
||||
(set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")])
|
||||
[(set_attr "type" "sop1,sop1,mult,smem,smem,smem,vmult,vmult,vmult,vmult,
|
||||
flat,flat,flat,ds,ds,flat,flat,flat")
|
||||
(set_attr "length" "4,8,*,12,12,12,*,*,*,*,12,12,12,12,12,12,12,12")
|
||||
(set_attr "xnack" "*,*,*,*,off,on,*,*,*,*,off,on,*,*,*,off,on,*")])
|
||||
|
||||
; 128-bit move.
|
||||
|
||||
(define_insn_and_split "*movti_insn"
|
||||
[(set (match_operand:TI 0 "nonimmediate_operand"
|
||||
"=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v")
|
||||
(match_operand:TI 1 "general_operand"
|
||||
"SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))]
|
||||
"=SD,RS,Sm,&Sm,RF, v,&v,v, v,SD,RM, v,&v,RL, v")
|
||||
(match_operand:TI 1 "general_operand"
|
||||
"SSB,Sm,RS, RS, v,RF,RF,v,Sv, v, v,RM,RM, v,RL"))]
|
||||
""
|
||||
"@
|
||||
#
|
||||
s_store_dwordx4\t%1, %A0
|
||||
s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
flat_store_dwordx4\t%A0, %1%O0%g0
|
||||
flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
#
|
||||
#
|
||||
#
|
||||
global_store_dwordx4\t%A0, %1%O0%g0
|
||||
global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
|
||||
"reload_completed
|
||||
|
@ -691,10 +719,11 @@
|
|||
operands[0] = gcn_operand_part (TImode, operands[0], 0);
|
||||
operands[1] = gcn_operand_part (TImode, operands[1], 0);
|
||||
}
|
||||
[(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
|
||||
ds,ds")
|
||||
(set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*")
|
||||
(set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
|
||||
[(set_attr "type" "mult,smem,smem,smem,flat,flat,flat,vmult,vmult,vmult,flat,
|
||||
flat,flat,ds,ds")
|
||||
(set_attr "delayeduse" "*,*,yes,yes,*,*,*,*,*,*,*,yes,*,*,*")
|
||||
(set_attr "length" "*,12,12,12,12,12,12,*,*,*,12,12,12,12,12")
|
||||
(set_attr "xnack" "*,*,off,on,*,off,on,*,*,*,*,off,on,*,*")])
|
||||
|
||||
;; }}}
|
||||
;; {{{ Prologue/Epilogue
|
||||
|
@ -862,6 +891,8 @@
|
|||
(clobber (reg:BI SCC_REG))]
|
||||
"GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF"
|
||||
{
|
||||
/* This s_load may not be XNACK-safe on devices where the GOT may fault.
|
||||
DGPUs are most likely fine. */
|
||||
if (SYMBOL_REF_P (operands[1])
|
||||
&& SYMBOL_REF_WEAK (operands[1]))
|
||||
return "s_getpc_b64\t%0\;"
|
||||
|
@ -886,6 +917,8 @@
|
|||
{
|
||||
/* !!! These sequences clobber CC_SAVE_REG. */
|
||||
|
||||
/* This s_load may not be XNACK-safe on devices where the GOT may fault.
|
||||
DGPUs are most likely fine. */
|
||||
if (SYMBOL_REF_P (operands[1])
|
||||
&& SYMBOL_REF_WEAK (operands[1]))
|
||||
return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
|
||||
|
|
|
@ -86,23 +86,23 @@ Wopenacc-dims
|
|||
Target Var(warn_openacc_dims) Warning
|
||||
Warn about invalid OpenACC dimensions.
|
||||
|
||||
mxnack
|
||||
Target Var(flag_xnack) Init(0)
|
||||
Compile for devices requiring XNACK enabled. Default off.
|
||||
|
||||
Enum
|
||||
Name(sram_ecc_type) Type(enum sram_ecc_type)
|
||||
Name(hsaco_attr_type) Type(enum hsaco_attr_type)
|
||||
SRAM-ECC modes:
|
||||
|
||||
EnumValue
|
||||
Enum(sram_ecc_type) String(off) Value(SRAM_ECC_OFF)
|
||||
Enum(hsaco_attr_type) String(off) Value(HSACO_ATTR_OFF)
|
||||
|
||||
EnumValue
|
||||
Enum(sram_ecc_type) String(on) Value(SRAM_ECC_ON)
|
||||
Enum(hsaco_attr_type) String(on) Value(HSACO_ATTR_ON)
|
||||
|
||||
EnumValue
|
||||
Enum(sram_ecc_type) String(any) Value(SRAM_ECC_ANY)
|
||||
Enum(hsaco_attr_type) String(any) Value(HSACO_ATTR_ANY)
|
||||
|
||||
mxnack=
|
||||
Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_ANY)
|
||||
Compile for devices requiring XNACK enabled. Default off.
|
||||
|
||||
msram-ecc=
|
||||
Target RejectNegative Joined ToLower Enum(sram_ecc_type) Var(flag_sram_ecc) Init(SRAM_ECC_ANY)
|
||||
Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_ANY)
|
||||
Compile for devices with the SRAM ECC feature enabled, or not. Default \"any\".
|
||||
|
|
|
@ -72,10 +72,14 @@
|
|||
|
||||
#define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
|
||||
| EF_AMDGPU_FEATURE_XNACK_ON_V4)
|
||||
#define SET_XNACK_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
|
||||
| EF_AMDGPU_FEATURE_XNACK_ANY_V4)
|
||||
#define SET_XNACK_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
|
||||
| EF_AMDGPU_FEATURE_XNACK_OFF_V4)
|
||||
#define TEST_XNACK(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
|
||||
== EF_AMDGPU_FEATURE_XNACK_ON_V4)
|
||||
#define TEST_XNACK_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
|
||||
== EF_AMDGPU_FEATURE_XNACK_ANY_V4)
|
||||
#define TEST_XNACK_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
|
||||
== EF_AMDGPU_FEATURE_XNACK_ON_V4)
|
||||
|
||||
#define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
|
||||
| EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
|
||||
|
@ -881,9 +885,11 @@ main (int argc, char **argv)
|
|||
fPIC = true;
|
||||
else if (strcmp (argv[i], "-fpic") == 0)
|
||||
fpic = true;
|
||||
else if (strcmp (argv[i], "-mxnack") == 0)
|
||||
else if (strcmp (argv[i], "-mxnack=on") == 0)
|
||||
SET_XNACK_ON (elf_flags);
|
||||
else if (strcmp (argv[i], "-mno-xnack") == 0)
|
||||
else if (strcmp (argv[i], "-mxnack=any") == 0)
|
||||
SET_XNACK_ANY (elf_flags);
|
||||
else if (strcmp (argv[i], "-mxnack=off") == 0)
|
||||
SET_XNACK_OFF (elf_flags);
|
||||
else if (strcmp (argv[i], "-msram-ecc=on") == 0)
|
||||
SET_SRAM_ECC_ON (elf_flags);
|
||||
|
@ -1042,8 +1048,9 @@ main (int argc, char **argv)
|
|||
obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
|
||||
obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
|
||||
obstack_ptr_grow (&ld_argv_obstack,
|
||||
(TEST_XNACK (elf_flags)
|
||||
? "-mxnack" : "-mno-xnack"));
|
||||
(TEST_XNACK_ON (elf_flags) ? "-mxnack=on"
|
||||
: TEST_XNACK_ANY (elf_flags) ? "-mxnack=any"
|
||||
: "-mxnack=off"));
|
||||
obstack_ptr_grow (&ld_argv_obstack,
|
||||
(TEST_SRAM_ECC_ON (elf_flags) ? "-msram-ecc=on"
|
||||
: TEST_SRAM_ECC_ANY (elf_flags) ? "-msram-ecc=any"
|
||||
|
|
Loading…
Reference in New Issue