i386: Use lock prefixed insn instead of MFENCE [PR95750]

Currently, __atomic_thread_fence(seq_cst) on x86 and x86-64 generates
mfence instruction. A dummy atomic instruction (a lock-prefixed instruction
or xchg with a memory operand) would provide the same sequential consistency
guarantees while being more efficient on most current CPUs. The mfence
instruction additionally orders non-temporal stores, which is not relevant
for atomic operations and are not ordered by seq_cst atomic operations anyway.

2020-07-20  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:
	PR target/95750
	* config/i386/i386.h (TARGET_AVOID_MFENCE):
	Rename from TARGET_USE_XCHG_FOR_ATOMIC_STORE.
	* config/i386/sync.md (mfence_sse2): Disable for TARGET_AVOID_MFENCE.
	(mfence_nosse): Enable also for TARGET_AVOID_MFENCE. Emit stack
	referred memory in word_mode.
	(mem_thread_fence): Do not generate mfence_sse2 pattern when
	TARGET_AVOID_MFENCE is true.
	(atomic_store<mode>): Update for rename.
	* config/i386/x86-tune.def (X86_TUNE_AVOID_MFENCE):
	Rename from X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE.

gcc/testsuite/ChangeLog:
	PR target/95750
	* gcc.target/i386/pr95750.c: New test.
This commit is contained in:
Uros Bizjak 2020-07-20 20:34:46 +02:00
parent d5803b9876
commit 3c5e83d5b3
4 changed files with 36 additions and 11 deletions

View File

@ -598,8 +598,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
#define TARGET_ONE_IF_CONV_INSN \
ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
#define TARGET_USE_XCHG_FOR_ATOMIC_STORE \
ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE]
#define TARGET_AVOID_MFENCE ix86_tune_features[X86_TUNE_AVOID_MFENCE]
#define TARGET_EMIT_VZEROUPPER \
ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
#define TARGET_EXPAND_ABS \

View File

@ -89,7 +89,8 @@
(define_insn "mfence_sse2"
[(set (match_operand:BLK 0)
(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
"TARGET_64BIT || TARGET_SSE2"
"(TARGET_64BIT || TARGET_SSE2)
&& !TARGET_AVOID_MFENCE"
"mfence"
[(set_attr "type" "sse")
(set_attr "length_address" "0")
@ -100,8 +101,14 @@
[(set (match_operand:BLK 0)
(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
(clobber (reg:CC FLAGS_REG))]
"!(TARGET_64BIT || TARGET_SSE2)"
"lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
"!(TARGET_64BIT || TARGET_SSE2)
|| TARGET_AVOID_MFENCE"
{
rtx mem = gen_rtx_MEM (word_mode, stack_pointer_rtx);
output_asm_insn ("lock{%;} or%z0\t{$0, %0|%0, 0}", &mem);
return "";
}
[(set_attr "memory" "unknown")])
(define_expand "mem_thread_fence"
@ -117,7 +124,8 @@
rtx (*mfence_insn)(rtx);
rtx mem;
if (TARGET_64BIT || TARGET_SSE2)
if ((TARGET_64BIT || TARGET_SSE2)
&& !TARGET_AVOID_MFENCE)
mfence_insn = gen_mfence_sse2;
else
mfence_insn = gen_mfence_nosse;
@ -306,11 +314,10 @@
{
operands[1] = force_reg (<MODE>mode, operands[1]);
/* For seq-cst stores, use XCHG when we lack MFENCE
or when target prefers XCHG. */
/* For seq-cst stores, use XCHG when we lack MFENCE. */
if (is_mm_seq_cst (model)
&& (!(TARGET_64BIT || TARGET_SSE2)
|| TARGET_USE_XCHG_FOR_ATOMIC_STORE))
|| TARGET_AVOID_MFENCE))
{
emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
operands[0], operands[1],

View File

@ -313,8 +313,8 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
| m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
/* X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE: Use xchg instead of mov+mfence. */
DEF_TUNE (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE, "use_xchg_for_atomic_store",
/* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */
DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC)
/* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by

View File

@ -0,0 +1,19 @@
/* PR target/95750 */
/* { dg-do compile } */
/* { dg-options "-O2 -march=core2" } */
void
foo (void)
{
__atomic_thread_fence (__ATOMIC_SEQ_CST);
}
int x;
void
bar (void)
{
__atomic_store_n (&x, -1, __ATOMIC_SEQ_CST);
}
/* { dg-final { scan-assembler-not "mfence" } } */