[nvptx] Handle sm_7x shared atomic store more optimal
For sm_7x atomic stores we fall back on expand_atomic_store, but this results in using membar.sys for shared stores. Fix this by adding an nvptx_atomic_store insn that adds a membar.cta for a shared store. Tested on x86_64 with nvptx accelerator. gcc/ChangeLog: 2022-02-02 Tom de Vries <tdevries@suse.de> * config/nvptx/nvptx.md (define_insn "nvptx_atomic_store<mode>"): New define_insn. (define_expand "atomic_store<mode>"): Use nvptx_atomic_store<mode> for TARGET_SM70. (define_c_enum "unspecv"): Add UNSPECV_ST. gcc/testsuite/ChangeLog: 2022-02-02 Tom de Vries <tdevries@suse.de> * gcc.target/nvptx/atomic-store-2.c: New test.
This commit is contained in:
parent
3e7d4e82dc
commit
19a13d5a1d
|
@ -57,6 +57,7 @@
|
|||
UNSPECV_CAS
|
||||
UNSPECV_CAS_LOCAL
|
||||
UNSPECV_XCHG
|
||||
UNSPECV_ST
|
||||
UNSPECV_BARSYNC
|
||||
UNSPECV_WARPSYNC
|
||||
UNSPECV_UNIFORM_WARP_CHECK
|
||||
|
@ -2067,8 +2068,11 @@
|
|||
}
|
||||
|
||||
if (TARGET_SM70)
|
||||
/* Fall back to expand_atomic_store. */
|
||||
FAIL;
|
||||
{
|
||||
emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1],
|
||||
operands[2]));
|
||||
DONE;
|
||||
}
|
||||
|
||||
bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]);
|
||||
if (!maybe_shared_p)
|
||||
|
@ -2081,6 +2085,20 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "nvptx_atomic_store<mode>"
|
||||
[(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory
|
||||
(unspec_volatile:SDIM
|
||||
[(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
|
||||
(match_operand:SI 2 "const_int_operand")] ;; model
|
||||
UNSPECV_ST))]
|
||||
"TARGET_SM70"
|
||||
{
|
||||
const char *t
|
||||
= "%.\tst%A0.b%T0\t%0, %1;";
|
||||
return nvptx_output_atomic_insn (t, operands, 0, 2);
|
||||
}
|
||||
[(set_attr "atomic" "true")])
|
||||
|
||||
(define_insn "atomic_fetch_add<mode>"
|
||||
[(set (match_operand:SDIM 1 "memory_operand" "+m")
|
||||
(unspec_volatile:SDIM
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
/* Test the atomic store expansion for sm > sm_6x targets,
|
||||
shared state space. */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-misa=sm_75" } */
|
||||
|
||||
enum memmodel
|
||||
{
|
||||
MEMMODEL_SEQ_CST = 5
|
||||
};
|
||||
|
||||
unsigned int u32 __attribute__((shared));
|
||||
unsigned long long int u64 __attribute__((shared));
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
__atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST);
|
||||
__atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "st.shared.b32" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "st.shared.b64" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "membar.cta" 4 } } */
|
Loading…
Reference in New Issue