[AArch64] Use atomic load-operate instructions for fetch-update patterns.

gcc/ 2015-09-22 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-protos.h (aarch64_atomic_ldop_supported_p): Declare. * config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New. (enum aarch64_atomic_load_op_code): New. (aarch64_emit_atomic_load_op): New. (aarch64_gen_atomic_ldop): Update to support load-operate patterns. * config/aarch64/atomics.md (atomic_<atomic_optab><mode>): Change to an expander. (aarch64_atomic_<atomic_optab><mode>): New. (aarch64_atomic_<atomic_optab><mode>_lse): New. (atomic_fetch_<atomic_optab><mode>): Change to an expander. (aarch64_atomic_fetch_<atomic_optab><mode>): New. (aarch64_atomic_fetch_<atomic_optab><mode>_lse): New. gcc/testsuite/ 2015-09-22 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/atomic-inst-ldadd.c: New. * gcc.target/aarch64/atomic-inst-ldlogic.c: New. From-SVN: r228001
2015-09-22 09:35:17 +00:00 · 2015-09-22 09:35:17 +00:00 · 641c2f8b69
parent 6380d2bc38
commit 641c2f8b69
7 changed files with 457 additions and 14 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,20 @@
+2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>
+
+	* config/aarch64/aarch64-protos.h
+	(aarch64_atomic_ldop_supported_p): Declare.
+	* config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New.
+	(enum aarch64_atomic_load_op_code): New.
+	(aarch64_emit_atomic_load_op): New.
+	(aarch64_gen_atomic_ldop): Update to support load-operate
+	patterns.
+	* config/aarch64/atomics.md (atomic_<atomic_optab><mode>): Change
+	to an expander.
+	(aarch64_atomic_<atomic_optab><mode>): New.
+	(aarch64_atomic_<atomic_optab><mode>_lse): New.
+	(atomic_fetch_<atomic_optab><mode>): Change to an expander.
+	(aarch64_atomic_fetch_<atomic_optab><mode>): New.
+	(aarch64_atomic_fetch_<atomic_optab><mode>_lse): New.
+
 2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>

 	* config/aarch64/aarch64/atomics.md (UNSPECV_ATOMIC_LDOP): New.
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@ -378,6 +378,8 @@ rtx aarch64_load_tp (rtx);
 void aarch64_expand_compare_and_swap (rtx op[]);
 void aarch64_split_compare_and_swap (rtx op[]);
 void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
+
+bool aarch64_atomic_ldop_supported_p (enum rtx_code);
 void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx);
 void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);

--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@ -10871,6 +10871,32 @@ aarch64_expand_compare_and_swap (rtx operands[])
  emit_insn (gen_rtx_SET (bval, x));
 }

+/* Test whether the target supports using a atomic load-operate instruction.
+   CODE is the operation and AFTER is TRUE if the data in memory after the
+   operation should be returned and FALSE if the data before the operation
+   should be returned.  Returns FALSE if the operation isn't supported by the
+   architecture.  */
+
+bool
+aarch64_atomic_ldop_supported_p (enum rtx_code code)
+{
+  if (!TARGET_LSE)
+    return false;
+
+  switch (code)
+    {
+    case SET:
+    case AND:
+    case IOR:
+    case XOR:
+    case MINUS:
+    case PLUS:
+      return true;
+    default:
+      return false;
+    }
+}
+
 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
   sequence implementing an atomic operation.  */

@ -11013,26 +11039,169 @@ aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
  emit_insn (gen (dst, mem, value, model));
 }

-/* Emit an atomic operation where the architecture supports it.  */
+/* Operations supported by aarch64_emit_atomic_load_op.  */
+
+enum aarch64_atomic_load_op_code
+{
+  AARCH64_LDOP_PLUS,	/* A + B  */
+  AARCH64_LDOP_XOR,	/* A ^ B  */
+  AARCH64_LDOP_OR,	/* A | B  */
+  AARCH64_LDOP_BIC	/* A & ~B  */
+};
+
+/* Emit an atomic load-operate.  */
+
+static void
+aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
+			     machine_mode mode, rtx dst, rtx src,
+			     rtx mem, rtx model)
+{
+  typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
+  const aarch64_atomic_load_op_fn plus[] =
+  {
+    gen_aarch64_atomic_loadaddqi,
+    gen_aarch64_atomic_loadaddhi,
+    gen_aarch64_atomic_loadaddsi,
+    gen_aarch64_atomic_loadadddi
+  };
+  const aarch64_atomic_load_op_fn eor[] =
+  {
+    gen_aarch64_atomic_loadeorqi,
+    gen_aarch64_atomic_loadeorhi,
+    gen_aarch64_atomic_loadeorsi,
+    gen_aarch64_atomic_loadeordi
+  };
+  const aarch64_atomic_load_op_fn ior[] =
+  {
+    gen_aarch64_atomic_loadsetqi,
+    gen_aarch64_atomic_loadsethi,
+    gen_aarch64_atomic_loadsetsi,
+    gen_aarch64_atomic_loadsetdi
+  };
+  const aarch64_atomic_load_op_fn bic[] =
+  {
+    gen_aarch64_atomic_loadclrqi,
+    gen_aarch64_atomic_loadclrhi,
+    gen_aarch64_atomic_loadclrsi,
+    gen_aarch64_atomic_loadclrdi
+  };
+  aarch64_atomic_load_op_fn gen;
+  int idx = 0;
+
+  switch (mode)
+    {
+    case QImode: idx = 0; break;
+    case HImode: idx = 1; break;
+    case SImode: idx = 2; break;
+    case DImode: idx = 3; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
+    case AARCH64_LDOP_XOR: gen = eor[idx]; break;
+    case AARCH64_LDOP_OR: gen = ior[idx]; break;
+    case AARCH64_LDOP_BIC: gen = bic[idx]; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (dst, mem, src, model));
+}
+
+/* Emit an atomic load+operate.  CODE is the operation.  OUT_DATA is the
+   location to store the data read from memory.  MEM is the memory location to
+   read and modify.  MODEL_RTX is the memory ordering to use.  VALUE is the
+   second operand for the operation.  Either OUT_DATA or OUT_RESULT, but not
+   both, can be NULL.  */

 void
 aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data,
 			 rtx mem, rtx value, rtx model_rtx)
 {
  machine_mode mode = GET_MODE (mem);
+  machine_mode wmode = (mode == DImode ? DImode : SImode);
+  const bool short_mode = (mode < SImode);
+  aarch64_atomic_load_op_code ldop_code;
+  rtx src;
+  rtx x;

-  out_data = gen_lowpart (mode, out_data);
+  if (out_data)
+    out_data = gen_lowpart (mode, out_data);

+  /* Make sure the value is in a register, putting it into a destination
+     register if it needs to be manipulated.  */
+  if (!register_operand (value, mode)
+      || code == AND || code == MINUS)
+    {
+      src = out_data;
+      emit_move_insn (src, gen_lowpart (mode, value));
+    }
+  else
+    src = value;
+  gcc_assert (register_operand (src, mode));
+
+  /* Preprocess the data for the operation as necessary.  If the operation is
+     a SET then emit a swap instruction and finish.  */
  switch (code)
    {
    case SET:
-      aarch64_emit_atomic_swap (mode, out_data, value, mem, model_rtx);
+      aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
      return;

+    case MINUS:
+      /* Negate the value and treat it as a PLUS.  */
+      {
+	rtx neg_src;
+
+	/* Resize the value if necessary.  */
+	if (short_mode)
+	  src = gen_lowpart (wmode, src);
+
+	neg_src = gen_rtx_NEG (wmode, src);
+	emit_insn (gen_rtx_SET (src, neg_src));
+
+	if (short_mode)
+	  src = gen_lowpart (mode, src);
+      }
+      /* Fall-through.  */
+    case PLUS:
+      ldop_code = AARCH64_LDOP_PLUS;
+      break;
+
+    case IOR:
+      ldop_code = AARCH64_LDOP_OR;
+      break;
+
+    case XOR:
+      ldop_code = AARCH64_LDOP_XOR;
+      break;
+
+    case AND:
+      {
+	rtx not_src;
+
+	/* Resize the value if necessary.  */
+	if (short_mode)
+	  src = gen_lowpart (wmode, src);
+
+	not_src = gen_rtx_NOT (wmode, src);
+	emit_insn (gen_rtx_SET (src, not_src));
+
+	if (short_mode)
+	  src = gen_lowpart (mode, src);
+      }
+      ldop_code = AARCH64_LDOP_BIC;
+      break;
+
    default:
      /* The operation can't be done with atomic instructions.  */
      gcc_unreachable ();
    }
+
+  aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
 }

 /* Split an atomic operation.  */
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@ -225,16 +225,37 @@
  }
 )

-(define_insn_and_split "atomic_<atomic_optab><mode>"
-  [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
-    (unspec_volatile:ALLI
-      [(atomic_op:ALLI (match_dup 0)
-	(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
-       (match_operand:SI 2 "const_int_operand")]		;; model
-      UNSPECV_ATOMIC_OP))
-       (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:ALLI 3 "=&r"))
-   (clobber (match_scratch:SI 4 "=&r"))]
+(define_expand "atomic_<atomic_optab><mode>"
+ [(match_operand:ALLI 0 "aarch64_sync_memory_operand" "")
+  (atomic_op:ALLI
+   (match_operand:ALLI 1 "<atomic_op_operand>" "")
+   (match_operand:SI 2 "const_int_operand"))]
+  ""
+  {
+    rtx (*gen) (rtx, rtx, rtx);
+
+    /* Use an atomic load-operate instruction when possible.  */
+    if (aarch64_atomic_ldop_supported_p (<CODE>))
+      gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
+    else
+      gen = gen_aarch64_atomic_<atomic_optab><mode>;
+
+    emit_insn (gen (operands[0], operands[1], operands[2]));
+
+    DONE;
+  }
+)
+
+(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>"
+ [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
+   (unspec_volatile:ALLI
+    [(atomic_op:ALLI (match_dup 0)
+      (match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
+     (match_operand:SI 2 "const_int_operand")]
+    UNSPECV_ATOMIC_OP))
+  (clobber (reg:CC CC_REGNUM))
+  (clobber (match_scratch:ALLI 3 "=&r"))
+  (clobber (match_scratch:SI 4 "=&r"))]
  ""
  "#"
  "&& reload_completed"
@ -246,6 +267,25 @@
  }
 )

+(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
+  [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
+    (unspec_volatile:ALLI
+      [(atomic_op:ALLI (match_dup 0)
+	(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
+       (match_operand:SI 2 "const_int_operand")]
+      UNSPECV_ATOMIC_OP))
+   (clobber (match_scratch:ALLI 3 "=&r"))]
+  "TARGET_LSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_gen_atomic_ldop (<CODE>, operands[3], operands[0],
+			     operands[1], operands[2]);
+    DONE;
+  }
+)
+
 (define_insn_and_split "atomic_nand<mode>"
  [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
    (unspec_volatile:ALLI
@ -268,7 +308,30 @@
  }
 )

-(define_insn_and_split "atomic_fetch_<atomic_optab><mode>"
+;; Load-operate-store, returning the updated memory data.
+
+(define_expand "atomic_fetch_<atomic_optab><mode>"
+ [(match_operand:ALLI 0 "register_operand" "")
+  (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
+  (atomic_op:ALLI
+   (match_operand:ALLI 2 "<atomic_op_operand>" "")
+   (match_operand:SI 3 "const_int_operand"))]
+ ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+
+  /* Use an atomic load-operate instruction when possible.  */
+  if (aarch64_atomic_ldop_supported_p (<CODE>))
+    gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
+  else
+    gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
+
+  emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
+
+  DONE;
+})
+
+(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>"
  [(set (match_operand:ALLI 0 "register_operand" "=&r")
    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
   (set (match_dup 1)
@ -291,6 +354,26 @@
  }
 )

+(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(atomic_op:ALLI (match_dup 1)
+	(match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
+       (match_operand:SI 3 "const_int_operand")]
+      UNSPECV_ATOMIC_LDOP))]
+  "TARGET_LSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_gen_atomic_ldop (<CODE>, operands[0], operands[1],
+			     operands[2], operands[3]);
+    DONE;
+  }
+)
+
 (define_insn_and_split "atomic_fetch_nand<mode>"
  [(set (match_operand:ALLI 0 "register_operand" "=&r")
    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,8 @@
+2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>
+
+	* gcc.target/aarch64/atomic-inst-ldadd.c: New.
+	* gcc.target/aarch64/atomic-inst-ldlogic.c: New.
+
 2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>

 	* gcc.target/aarch64/atomic-inst-ops.inc: (TEST_MODEL): New.
--- a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+lse" } */
+
+/* Test ARMv8.1-A Load-ADD instruction.  */
+
+#include "atomic-inst-ops.inc"
+
+#define TEST TEST_ONE
+
+#define LOAD_ADD(FN, TY, MODEL)						\
+  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
+  {									\
+    return __atomic_fetch_add (val, foo, MODEL);			\
+  }
+
+#define LOAD_ADD_NORETURN(FN, TY, MODEL)				\
+  void FNNAME (FN, TY) (TY* val, TY* foo)				\
+  {									\
+    __atomic_fetch_add (val, foo, MODEL);				\
+  }
+
+#define LOAD_SUB(FN, TY, MODEL)						\
+  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
+  {									\
+    return __atomic_fetch_sub (val, foo, MODEL);			\
+  }
+
+#define LOAD_SUB_NORETURN(FN, TY, MODEL)				\
+  void FNNAME (FN, TY) (TY* val, TY* foo)				\
+  {									\
+    __atomic_fetch_sub (val, foo, MODEL);				\
+  }
+
+
+TEST (load_add, LOAD_ADD)
+TEST (load_add_notreturn, LOAD_ADD_NORETURN)
+
+TEST (load_sub, LOAD_SUB)
+TEST (load_sub_notreturn, LOAD_SUB_NORETURN)
+
+/* { dg-final { scan-assembler-times "ldaddb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldaddab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldaddalb\t" 16} } */
+
+/* { dg-final { scan-assembler-times "ldaddh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldaddah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldaddalh\t" 16} } */
+
+/* { dg-final { scan-assembler-times "ldadd\t" 16} } */
+/* { dg-final { scan-assembler-times "ldadda\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddal\t" 32} } */
+
+/* { dg-final { scan-assembler-not "ldaxr\t" } } */
+/* { dg-final { scan-assembler-not "stlxr\t" } } */
+/* { dg-final { scan-assembler-not "dmb" } } */
--- a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
@ -0,0 +1,109 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+lse" } */
+
+/* Test ARMv8.1-A LD<logic-op> instruction.  */
+
+#include "atomic-inst-ops.inc"
+
+#define TEST TEST_ONE
+
+#define LOAD_OR(FN, TY, MODEL)						\
+  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
+  {									\
+    return __atomic_fetch_or (val, foo, MODEL);				\
+  }
+
+#define LOAD_OR_NORETURN(FN, TY, MODEL)					\
+  void FNNAME (FN, TY) (TY* val, TY* foo)				\
+  {									\
+    __atomic_fetch_or (val, foo, MODEL);				\
+  }
+
+#define LOAD_AND(FN, TY, MODEL)						\
+  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
+  {									\
+    return __atomic_fetch_and (val, foo, MODEL);			\
+  }
+
+#define LOAD_AND_NORETURN(FN, TY, MODEL)				\
+  void FNNAME (FN, TY) (TY* val, TY* foo)				\
+  {									\
+    __atomic_fetch_and (val, foo, MODEL);				\
+  }
+
+#define LOAD_XOR(FN, TY, MODEL)						\
+  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
+  {									\
+    return __atomic_fetch_xor (val, foo, MODEL);			\
+  }
+
+#define LOAD_XOR_NORETURN(FN, TY, MODEL)				\
+  void FNNAME (FN, TY) (TY* val, TY* foo)				\
+  {									\
+    __atomic_fetch_xor (val, foo, MODEL);				\
+  }
+
+
+TEST (load_or, LOAD_OR)
+TEST (load_or_notreturn, LOAD_OR_NORETURN)
+
+TEST (load_and, LOAD_AND)
+TEST (load_and_notreturn, LOAD_AND_NORETURN)
+
+TEST (load_xor, LOAD_XOR)
+TEST (load_xor_notreturn, LOAD_XOR_NORETURN)
+
+/* Load-OR.  */
+
+/* { dg-final { scan-assembler-times "ldsetb\t" 4} } */
+/* { dg-final { scan-assembler-times "ldsetab\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetlb\t" 4} } */
+/* { dg-final { scan-assembler-times "ldsetalb\t" 8} } */
+
+/* { dg-final { scan-assembler-times "ldseth\t" 4} } */
+/* { dg-final { scan-assembler-times "ldsetah\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetlh\t" 4} } */
+/* { dg-final { scan-assembler-times "ldsetalh\t" 8} } */
+
+/* { dg-final { scan-assembler-times "ldset\t" 8} } */
+/* { dg-final { scan-assembler-times "ldseta\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetl\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetal\t" 16} } */
+
+/* Load-AND.  */
+
+/* { dg-final { scan-assembler-times "ldclrb\t" 4} } */
+/* { dg-final { scan-assembler-times "ldclrab\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrlb\t" 4} } */
+/* { dg-final { scan-assembler-times "ldclralb\t" 8} } */
+
+/* { dg-final { scan-assembler-times "ldclrh\t" 4} } */
+/* { dg-final { scan-assembler-times "ldclrah\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrlh\t" 4} } */
+/* { dg-final { scan-assembler-times "ldclralh\t" 8} } */
+
+/* { dg-final { scan-assembler-times "ldclr\t" 8} */
+/* { dg-final { scan-assembler-times "ldclra\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclrl\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclral\t" 16} } */
+
+/* Load-XOR.  */
+
+/* { dg-final { scan-assembler-times "ldeorb\t" 4} } */
+/* { dg-final { scan-assembler-times "ldeorab\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorlb\t" 4} } */
+/* { dg-final { scan-assembler-times "ldeoralb\t" 8} } */
+
+/* { dg-final { scan-assembler-times "ldeorh\t" 4} } */
+/* { dg-final { scan-assembler-times "ldeorah\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorlh\t" 4} } */
+/* { dg-final { scan-assembler-times "ldeoralh\t" 8} } */
+
+/* { dg-final { scan-assembler-times "ldeor\t" 8} */
+/* { dg-final { scan-assembler-times "ldeora\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeorl\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeoral\t" 16} } */
+
+/* { dg-final { scan-assembler-not "ldaxr\t" } } */
+/* { dg-final { scan-assembler-not "stlxr\t" } } */
+/* { dg-final { scan-assembler-not "dmb" } } */