[nvptx] Handle sm_7x shared atomic store more optimal

For sm_7x atomic stores we fall back on expand_atomic_store, but this
results in using membar.sys for shared stores.

Fix this by adding an nvptx_atomic_store insn that adds a membar.cta for a
shared store.

Tested on x86_64 with nvptx accelerator.

gcc/ChangeLog:

2022-02-02  Tom de Vries  <tdevries@suse.de>

	* config/nvptx/nvptx.md (define_insn "nvptx_atomic_store<mode>"): New
	define_insn.
	(define_expand "atomic_store<mode>"): Use nvptx_atomic_store<mode> for
	TARGET_SM70.
	(define_c_enum "unspecv"): Add UNSPECV_ST.

gcc/testsuite/ChangeLog:

2022-02-02  Tom de Vries  <tdevries@suse.de>

	* gcc.target/nvptx/atomic-store-2.c: New test.
This commit is contained in:
Tom de Vries 2022-02-02 16:23:37 +01:00
parent 3e7d4e82dc
commit 19a13d5a1d
2 changed files with 46 additions and 2 deletions

View File

@ -57,6 +57,7 @@
UNSPECV_CAS
UNSPECV_CAS_LOCAL
UNSPECV_XCHG
UNSPECV_ST
UNSPECV_BARSYNC
UNSPECV_WARPSYNC
UNSPECV_UNIFORM_WARP_CHECK
@ -2067,8 +2068,11 @@
}
if (TARGET_SM70)
/* Fall back to expand_atomic_store. */
FAIL;
{
emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1],
operands[2]));
DONE;
}
bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]);
if (!maybe_shared_p)
@ -2081,6 +2085,20 @@
DONE;
})
(define_insn "nvptx_atomic_store<mode>"
[(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory
(unspec_volatile:SDIM
[(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPECV_ST))]
"TARGET_SM70"
{
const char *t
= "%.\tst%A0.b%T0\t%0, %1;";
return nvptx_output_atomic_insn (t, operands, 0, 2);
}
[(set_attr "atomic" "true")])
(define_insn "atomic_fetch_add<mode>"
[(set (match_operand:SDIM 1 "memory_operand" "+m")
(unspec_volatile:SDIM

View File

@ -0,0 +1,26 @@
/* Test the atomic store expansion for sm > sm_6x targets,
shared state space. */
/* { dg-do compile } */
/* { dg-options "-misa=sm_75" } */
enum memmodel
{
MEMMODEL_SEQ_CST = 5
};
unsigned int u32 __attribute__((shared));
unsigned long long int u64 __attribute__((shared));
int
main()
{
__atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST);
__atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST);
return 0;
}
/* { dg-final { scan-assembler-times "st.shared.b32" 1 } } */
/* { dg-final { scan-assembler-times "st.shared.b64" 1 } } */
/* { dg-final { scan-assembler-times "membar.cta" 4 } } */