Check in patch/merge from cxx-mem-model Branch

From-SVN: r181031
2011-11-06 14:55:48 +00:00 · 2011-11-06 14:55:48 +00:00 · 86951993f8
parent a8a058f652
commit 86951993f8
118 changed files with 10142 additions and 2897 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,111 @@
+2011-11-06  Andrew Macleod  <amacleod@redhat.com>
+	    Richard Henderson  <rth@redhat.com>
+	    Aldy Hernandez  <aldyh@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* cppbuiltin.c (define__GNUC__): Define __ATOMIC memory models
+	* coretypes.h (enum memmodel): New.  enumerated memory model type.
+	* Makefile.in (cppbuiltin.o) Add missing dependency on $(TREE_H)
+	* genopinit,c (optabs): Add atomic direct optab handlers.
+	* sync-builtins.def (BUILT_IN_ATOMIC_*): New builtins.
+	* builtin-types.def (BT_CONST_VOLATILE_PTR,
+	BT_FN_I{1,2,4,8,16}_CONST_VPTR_INT, BT_FN_VOID_VPTR_INT,
+	BT_FN_BOOL_VPTR_INT, BT_FN_BOOL_SIZE_CONST_VPTR,
+	BT_FN_I{1,2,4,8,16}_VPTR_I{1,2,4,8,16}_INT,
+	BT_FN_VOID_VPTR_I{1,2,4,8,16}_INT, BT_FN_VOID_SIZE_VPTR_PTR_INT,
+	BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT,
+	BT_FN_BOOL_VPTR_PTR_I{1,2,4,8,16}_BOOL_INT_INT): New builtin types.
+	* expr.h (expand_atomic_*): Add prototypes.
+	(expand_{bool,val}_compare_and_swap): Remove prototypes.
+	* c-typeck.c (build_function_call_vec): Don't reprocess __atomic
+	parameters.
+	* common.opt (Winvalid-memory-model): New warning flag.
+	(finline-atomics): New. Flag to disable atomic inlining.
+	* params.h (ALLOW_LOAD_DATA_RACES): New.
+	(ALLOW_PACKED_LOAD_DATA_RACES): New.
+	(ALLOW_PACKED_STORE_DATA_RACES): New.
+	* params.def (PARAM_ALLOW_LOAD_DATA_RACES): New.
+	(PARAM_ALLOW_PACKED_LOAD_DATA_RACES): New.
+	(PARAM_ALLOW_PACKED_STORE_DATA_RACES): New.
+	* builtins.c (is_builtin_name): Handle __atomic.
+	(get_memmodel): New.  Extract memory model.
+	(expand_expr_force_mode): New. Factor out common code for ensuring an
+	integer argument is in the proper mode.
+	(expand_builtin_sync_operation): Remove ignore param.  Always call
+	expand_atomic_fetch_op instead of the old expanders.
+	(expand_builtin_compare_and_swap,
+	expand_builtin_sync_lock_test_and_set): Use expand_expr_force_mode,
+	call atomic expanders instead of sync expanders.
+	(expand_builtin_sync_lock_release): Call atomic_store expander.
+	(expand_builtin_atomic_compare_exchange, expand_builtin_atomic_load,
+	expand_builtin_atomic_store, expand_builtin_atomic_fetch_op): New.
+	(expand_builtin_atomic_exchange): New.
+	(fold_builtin_atomic_always_lock_free,
+	expand_builtin_atomic_always_lock_free,
+	fold_builtin_atomic_is_lock_free, expand_builtin_atomic_is_lock_free):
+	New.
+	(expand_builtin_mem_thread_fence, expand_builtin_atomic_thread_fence,
+	expand_builtin_atomic_signal_fence): New.
+	(expand_builtin_mem_signal_fence): New.
+	(expand_builtin): Add cases for BUILT_IN_ATOMIC_*.
+	(fold_builtin_2): Add cases for BUILT_IN_ATOMIC_{IS,ALWAYS}_LOCK_FREE.
+	* optabs.h (DOI_atomic_*): Define new atomics.
+	(atomic_*_optab): Define.
+	(can_compare_and_swap_p, expand_atomic_compare_and_swap): New
+	prototypes.
+	* optabs.c (expand_sync_operation, expand_sync_fetch_operation): Remove.
+	(expand_sync_lock_test_and_set): Remove.
+	(expand_atomic_load, expand_atomic_store): New.
+	(expand_atomic_exchange): New. 
+	(expand_atomic_compare_and_swap): New.  Implements
+	atomic_compare_exchange via compare and swap.
+	(struct atomic_op_functions): Opcode table struct for fetch ops.
+	(get_atomic_op_for_code): New.  Return an opcode table entry.
+	(maybe_emit_op): New.  Try to emit a fetch op.
+	(expand_atomic_fetch_op): New.
+	(expand_val_compare_and_swap_1): Remove.
+	(expand_val_compare_and_swap, expand_bool_compare_and_swap): Remove.
+	(expand_atomic_compare_and_swap): Rename from
+	expand_atomic_compare_exchange.  Rewrite to return both success and
+	oldval return values; expand via both atomic and sync optabs.
+	(can_compare_and_swap_p): New.
+	(expand_compare_and_swap_loop): Use expand_atomic_compare_and_swap.
+	(maybe_gen_insn): Handle 7 and 8 operands.
+	* omp-low.c (expand_omp_atomic_fetch_op): Don't test individual
+	fetch_op optabs, only test can_compare_and_swap_p.  Use __atomic
+	builtins instead of __sync builtins.
+	(expand_omp_atomic_pipeline): Use can_compare_and_swap_p.
+	* doc/extend.texi: Document __atomic built-in functions.
+	* doc/invoke.texi: Document data race parameters.
+	* doc/md.texi: Document atomic patterns.
+	* config/i386/i386.md (UNSPEC_MOVA): New.
+	(UNSPECV_CMPXCHG): Split into ...
+	(UNSPECV_CMPXCHG_1, UNSPECV_CMPXCHG_2,
+	UNSPECV_CMPXCHG_3, UNSPECV_CMPXCHG_4): New.
+	* config/i386/sync.md (ATOMIC): New mode iterator.
+	(atomic_load<ATOMIC>, atomic_store<ATOMIC>): New.
+	(atomic_loaddi_fpu, atomic_storedi_fpu, movdi_via_fpu): New.
+	(mem_thread_fence): Rename from memory_barrier.
+	Handle the added memory model parameter.
+	(mfence_nosse): Rename from memory_barrier_nosse.
+	(sync_compare_and_swap<CASMODE>): Split into ...
+	(atomic_compare_and_swap<SWI124>): this and ...
+	(atomic_compare_and_swap<CASMODE>): this.  Handle the new parameters.
+	(atomic_compare_and_swap_single<SWI>): Rename from
+	sync_compare_and_swap<SWI>; rewrite to use split unspecs.
+	(atomic_compare_and_swap_double<DCASMODE>): Rename from
+	sync_double_compare_and_swap<DCASMODE>; rewrite to use split unspecs.
+	(*atomic_compare_and_swap_doubledi_pic): Rename from
+	sync_double_compare_and_swapdi_pic; rewrite to use split unspecs.
+	(atomic_fetch_add<SWI>): Rename from sync_old_add<SWI>; add memory
+	model parameter.
+	(*atomic_fetch_add_cmp<SWI>): Similarly.
+	(atomic_add<SWI>, atomic<any_logic><SWI>): Similarly.
+	(atomic_sub<SWI>): Similarly.  Use x86_maybe_negate_const_int.
+	(sync_lock_test_and_set<SWI>): Merge with ...
+	(atomic_exchange<SWI>): ... this.
+
 2011-11-6  Richard Guenther  <rguenther@suse.de>

 	* ipa-prop.c (ipa_modify_call_arguments): Re-compute
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@ -3978,7 +3978,7 @@ PREPROCESSOR_DEFINES = \

 CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s)
 cppbuiltin.o: cppbuiltin.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
-	cppbuiltin.h Makefile
+	$(TREE_H) cppbuiltin.h Makefile

 CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
 cppdefault.o: cppdefault.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@ -95,6 +95,10 @@ DEF_PRIMITIVE_TYPE (BT_VOLATILE_PTR,
 		    build_pointer_type
 		     (build_qualified_type (void_type_node,
 					    TYPE_QUAL_VOLATILE)))
+DEF_PRIMITIVE_TYPE (BT_CONST_VOLATILE_PTR,
+		    build_pointer_type
+		     (build_qualified_type (void_type_node,
+					  TYPE_QUAL_VOLATILE|TYPE_QUAL_CONST)))
 DEF_PRIMITIVE_TYPE (BT_PTRMODE, (*lang_hooks.types.type_for_mode)(ptr_mode, 0))
 DEF_PRIMITIVE_TYPE (BT_INT_PTR, integer_ptr_type_node)
 DEF_PRIMITIVE_TYPE (BT_FLOAT_PTR, float_ptr_type_node)
@ -315,6 +319,20 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_LONGPTR_LONGPTR,
 		     BT_BOOL, BT_PTR_LONG, BT_PTR_LONG)
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR,
 		     BT_BOOL, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_2 (BT_FN_I1_CONST_VPTR_INT, BT_I1, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I2_CONST_VPTR_INT, BT_I2, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I4_CONST_VPTR_INT, BT_I4, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I8_CONST_VPTR_INT, BT_I8, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I16_CONST_VPTR_INT, BT_I16, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_VOID_VPTR_INT, BT_VOID, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT, BT_BOOL, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR)

 DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR)

@ -383,6 +401,16 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_OMPFN_PTR_UINT, BT_VOID, BT_PTR_FN_VOID_PTR,
 		     BT_PTR, BT_UINT)
 DEF_FUNCTION_TYPE_3 (BT_FN_PTR_CONST_PTR_INT_SIZE, BT_PTR,
 		     BT_CONST_PTR, BT_INT, BT_SIZE)
+DEF_FUNCTION_TYPE_3 (BT_FN_I1_VPTR_I1_INT, BT_I1, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I2_VPTR_I2_INT, BT_I2, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I4_VPTR_I4_INT, BT_I4, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I8_VPTR_I8_INT, BT_I8, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I16_VPTR_I16_INT, BT_I16, BT_VOLATILE_PTR, BT_I16, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I1_INT, BT_VOID, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT)

 DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR,
 		     BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR)
@ -402,6 +430,10 @@ DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR,
 		     BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_INT)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR, BT_PTR, BT_INT)

 DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
 		     BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING,
@ -409,6 +441,9 @@ DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
 DEF_FUNCTION_TYPE_5 (BT_FN_BOOL_LONG_LONG_LONG_LONGPTR_LONGPTR,
 		     BT_BOOL, BT_LONG, BT_LONG, BT_LONG,
 		     BT_PTR_LONG, BT_PTR_LONG)
+DEF_FUNCTION_TYPE_5 (BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT)
+

 DEF_FUNCTION_TYPE_6 (BT_FN_INT_STRING_SIZE_INT_SIZE_CONST_STRING_VALIST_ARG,
 		     BT_INT, BT_STRING, BT_SIZE, BT_INT, BT_SIZE,
@ -422,6 +457,24 @@ DEF_FUNCTION_TYPE_6 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
 DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
 		     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
 		     BT_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I1_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I1, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I2_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I2, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I4_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I4, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I8_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I8, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I16_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I16, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_SIZE_VPTR_PTR_PTR_INT_INT, BT_BOOL, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT)
+

 DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@ -223,6 +223,7 @@ static tree do_mpfr_bessel_n (tree, tree, tree,
 			      const REAL_VALUE_TYPE *, bool);
 static tree do_mpfr_remquo (tree, tree, tree);
 static tree do_mpfr_lgamma_r (tree, tree, tree);
+static void expand_builtin_sync_synchronize (void);

 /* Return true if NAME starts with __builtin_ or __sync_.  */

@ -233,6 +234,8 @@ is_builtin_name (const char *name)
    return true;
  if (strncmp (name, "__sync_", 7) == 0)
    return true;
+  if (strncmp (name, "__atomic_", 9) == 0)
+    return true;
  return false;
 }

@ -5090,21 +5093,41 @@ get_builtin_sync_mem (tree loc, enum machine_mode mode)
  return mem;
 }

+/* Make sure an argument is in the right mode.
+   EXP is the tree argument. 
+   MODE is the mode it should be in.  */
+
+static rtx
+expand_expr_force_mode (tree exp, enum machine_mode mode)
+{
+  rtx val;
+  enum machine_mode old_mode;
+
+  val = expand_expr (exp, NULL_RTX, mode, EXPAND_NORMAL);
+  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
+     of CONST_INTs, where we know the old_mode only from the call argument.  */
+
+  old_mode = GET_MODE (val);
+  if (old_mode == VOIDmode)
+    old_mode = TYPE_MODE (TREE_TYPE (exp));
+  val = convert_modes (mode, old_mode, val, 1);
+  return val;
+}
+
+
 /* Expand the __sync_xxx_and_fetch and __sync_fetch_and_xxx intrinsics.
   EXP is the CALL_EXPR.  CODE is the rtx code
   that corresponds to the arithmetic or logical operation from the name;
   an exception here is that NOT actually means NAND.  TARGET is an optional
   place for us to store the results; AFTER is true if this is the
-   fetch_and_xxx form.  IGNORE is true if we don't actually care about
-   the result of the operation at all.  */
+   fetch_and_xxx form.  */

 static rtx
 expand_builtin_sync_operation (enum machine_mode mode, tree exp,
 			       enum rtx_code code, bool after,
-			       rtx target, bool ignore)
+			       rtx target)
 {
  rtx val, mem;
-  enum machine_mode old_mode;
  location_t loc = EXPR_LOCATION (exp);

  if (code == NOT && warn_sync_nand)
@ -5151,19 +5174,10 @@ expand_builtin_sync_operation (enum machine_mode mode, tree exp,

  /* Expand the operands.  */
  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);

-  val = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, mode, EXPAND_NORMAL);
-  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
-     of CONST_INTs, where we know the old_mode only from the call argument.  */
-  old_mode = GET_MODE (val);
-  if (old_mode == VOIDmode)
-    old_mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 1)));
-  val = convert_modes (mode, old_mode, val, 1);
-
-  if (ignore)
-    return expand_sync_operation (mem, val, code);
-  else
-    return expand_sync_fetch_operation (mem, val, code, after, target);
+  return expand_atomic_fetch_op (target, mem, val, code, MEMMODEL_SEQ_CST,
+				 after);
 }

 /* Expand the __sync_val_compare_and_swap and __sync_bool_compare_and_swap
@ -5176,34 +5190,19 @@ expand_builtin_compare_and_swap (enum machine_mode mode, tree exp,
 				 bool is_bool, rtx target)
 {
  rtx old_val, new_val, mem;
-  enum machine_mode old_mode;

  /* Expand the operands.  */
  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  old_val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+  new_val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 2), mode);

+  if (!expand_atomic_compare_and_swap ((is_bool ? &target : NULL),
+				       (is_bool ? NULL : &target),
+				       mem, old_val, new_val, false,
+				       MEMMODEL_SEQ_CST, MEMMODEL_SEQ_CST))
+    return NULL_RTX;

-  old_val = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
-			 mode, EXPAND_NORMAL);
-  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
-     of CONST_INTs, where we know the old_mode only from the call argument.  */
-  old_mode = GET_MODE (old_val);
-  if (old_mode == VOIDmode)
-    old_mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 1)));
-  old_val = convert_modes (mode, old_mode, old_val, 1);
-
-  new_val = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX,
-			 mode, EXPAND_NORMAL);
-  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
-     of CONST_INTs, where we know the old_mode only from the call argument.  */
-  old_mode = GET_MODE (new_val);
-  if (old_mode == VOIDmode)
-    old_mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 2)));
-  new_val = convert_modes (mode, old_mode, new_val, 1);
-
-  if (is_bool)
-    return expand_bool_compare_and_swap (mem, old_val, new_val, target);
-  else
-    return expand_val_compare_and_swap (mem, old_val, new_val, target);
+  return target;
 }

 /* Expand the __sync_lock_test_and_set intrinsic.  Note that the most
@ -5214,22 +5213,461 @@ expand_builtin_compare_and_swap (enum machine_mode mode, tree exp,

 static rtx
 expand_builtin_sync_lock_test_and_set (enum machine_mode mode, tree exp,
-				  rtx target)
+				       rtx target)
 {
  rtx val, mem;
-  enum machine_mode old_mode;

  /* Expand the operands.  */
  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
-  val = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, mode, EXPAND_NORMAL);
-  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
-     of CONST_INTs, where we know the old_mode only from the call argument.  */
-  old_mode = GET_MODE (val);
-  if (old_mode == VOIDmode)
-    old_mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 1)));
-  val = convert_modes (mode, old_mode, val, 1);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);

-  return expand_sync_lock_test_and_set (mem, val, target);
+  return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE);
+}
+
+/* Expand the __sync_lock_release intrinsic.  EXP is the CALL_EXPR.  */
+
+static void
+expand_builtin_sync_lock_release (enum machine_mode mode, tree exp)
+{
+  rtx mem;
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+
+  expand_atomic_store (mem, const0_rtx, MEMMODEL_RELEASE);
+}
+
+/* Given an integer representing an ``enum memmodel'', verify its
+   correctness and return the memory model enum.  */
+
+static enum memmodel
+get_memmodel (tree exp)
+{
+  rtx op;
+
+  /* If the parameter is not a constant, it's a run time value so we'll just
+     convert it to MEMMODEL_SEQ_CST to avoid annoying runtime checking.  */
+  if (TREE_CODE (exp) != INTEGER_CST)
+    return MEMMODEL_SEQ_CST;
+
+  op = expand_normal (exp);
+  if (INTVAL (op) < 0 || INTVAL (op) >= MEMMODEL_LAST)
+    {
+      warning (OPT_Winvalid_memory_model,
+	       "invalid memory model argument to builtin");
+      return MEMMODEL_SEQ_CST;
+    }
+  return (enum memmodel) INTVAL (op);
+}
+
+/* Expand the __atomic_exchange intrinsic:
+   	TYPE __atomic_exchange (TYPE *object, TYPE desired, enum memmodel)
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.  */
+
+static rtx
+expand_builtin_atomic_exchange (enum machine_mode mode, tree exp, rtx target)
+{
+  rtx val, mem;
+  enum memmodel model;
+
+  model = get_memmodel (CALL_EXPR_ARG (exp, 2));
+  if (model == MEMMODEL_CONSUME)
+    {
+      error ("invalid memory model for %<__atomic_exchange%>");
+      return NULL_RTX;
+    }
+
+  if (!flag_inline_atomics)
+    return NULL_RTX;
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+
+  return expand_atomic_exchange (target, mem, val, model);
+}
+
+/* Expand the __atomic_compare_exchange intrinsic:
+   	bool __atomic_compare_exchange (TYPE *object, TYPE *expect, 
+					TYPE desired, BOOL weak, 
+					enum memmodel success,
+					enum memmodel failure)
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.  */
+
+static rtx
+expand_builtin_atomic_compare_exchange (enum machine_mode mode, tree exp, 
+					rtx target)
+{
+  rtx expect, desired, mem, oldval;
+  enum memmodel success, failure;
+  tree weak;
+  bool is_weak;
+
+  success = get_memmodel (CALL_EXPR_ARG (exp, 4));
+  failure = get_memmodel (CALL_EXPR_ARG (exp, 5));
+
+  if (failure == MEMMODEL_RELEASE || failure == MEMMODEL_ACQ_REL)
+    {
+      error ("invalid failure memory model for %<__atomic_compare_exchange%>");
+      return NULL_RTX;
+    }
+
+  if (failure > success)
+    {
+      error ("failure memory model cannot be stronger than success "
+	     "memory model for %<__atomic_compare_exchange%>");
+      return NULL_RTX;
+    }
+  
+  if (!flag_inline_atomics)
+    return NULL_RTX;
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+
+  expect = expand_normal (CALL_EXPR_ARG (exp, 1));
+  expect = convert_memory_address (Pmode, expect);
+  desired = expand_expr_force_mode (CALL_EXPR_ARG (exp, 2), mode);
+
+  weak = CALL_EXPR_ARG (exp, 3);
+  is_weak = false;
+  if (host_integerp (weak, 0) && tree_low_cst (weak, 0) != 0)
+    is_weak = true;
+
+  oldval = copy_to_reg (gen_rtx_MEM (mode, expect));
+
+  if (!expand_atomic_compare_and_swap (&target, &oldval, mem, oldval,
+				       desired, is_weak, success, failure))
+    return NULL_RTX;
+
+  emit_move_insn (gen_rtx_MEM (mode, expect), oldval);
+  return target;
+}
+
+/* Expand the __atomic_load intrinsic:
+   	TYPE __atomic_load (TYPE *object, enum memmodel)
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.  */
+
+static rtx
+expand_builtin_atomic_load (enum machine_mode mode, tree exp, rtx target)
+{
+  rtx mem;
+  enum memmodel model;
+
+  model = get_memmodel (CALL_EXPR_ARG (exp, 1));
+  if (model == MEMMODEL_RELEASE
+      || model == MEMMODEL_ACQ_REL)
+    {
+      error ("invalid memory model for %<__atomic_load%>");
+      return NULL_RTX;
+    }
+
+  if (!flag_inline_atomics)
+    return NULL_RTX;
+
+  /* Expand the operand.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+
+  return expand_atomic_load (target, mem, model);
+}
+
+
+/* Expand the __atomic_store intrinsic:
+   	void __atomic_store (TYPE *object, TYPE desired, enum memmodel)
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.  */
+
+static rtx
+expand_builtin_atomic_store (enum machine_mode mode, tree exp)
+{
+  rtx mem, val;
+  enum memmodel model;
+
+  model = get_memmodel (CALL_EXPR_ARG (exp, 2));
+  if (model != MEMMODEL_RELAXED
+      && model != MEMMODEL_SEQ_CST
+      && model != MEMMODEL_RELEASE)
+    {
+      error ("invalid memory model for %<__atomic_store%>");
+      return NULL_RTX;
+    }
+
+  if (!flag_inline_atomics)
+    return NULL_RTX;
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+
+  return expand_atomic_store (mem, val, model);
+}
+
+/* Expand the __atomic_fetch_XXX intrinsic:
+   	TYPE __atomic_fetch_XXX (TYPE *object, TYPE val, enum memmodel)
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.
+   CODE is the operation, PLUS, MINUS, ADD, XOR, or IOR.
+   FETCH_AFTER is true if returning the result of the operation.
+   FETCH_AFTER is false if returning the value before the operation.
+   IGNORE is true if the result is not used.
+   EXT_CALL is the correct builtin for an external call if this cannot be
+   resolved to an instruction sequence.  */
+
+static rtx
+expand_builtin_atomic_fetch_op (enum machine_mode mode, tree exp, rtx target,
+				enum rtx_code code, bool fetch_after,
+				bool ignore, enum built_in_function ext_call)
+{
+  rtx val, mem, ret;
+  enum memmodel model;
+  tree fndecl;
+  tree addr;
+
+  model = get_memmodel (CALL_EXPR_ARG (exp, 2));
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+
+  /* Only try generating instructions if inlining is turned on.  */
+  if (flag_inline_atomics)
+    {
+      ret = expand_atomic_fetch_op (target, mem, val, code, model, fetch_after);
+      if (ret)
+	return ret;
+    }
+
+  /* Return if a different routine isn't needed for the library call.  */
+  if (ext_call == BUILT_IN_NONE)
+    return NULL_RTX;
+
+  /* Change the call to the specified function.  */
+  fndecl = get_callee_fndecl (exp);
+  addr = CALL_EXPR_FN (exp);
+  STRIP_NOPS (addr);
+
+  gcc_assert (TREE_OPERAND (addr, 0) == fndecl);
+  TREE_OPERAND (addr, 0) = builtin_decl_explicit(ext_call);
+
+  /* Expand the call here so we can emit trailing code.  */
+  ret = expand_call (exp, target, ignore);
+
+  /* Replace the original function just in case it matters.  */
+  TREE_OPERAND (addr, 0) = fndecl;
+
+  /* Then issue the arithmetic correction to return the right result.  */
+  if (!ignore)
+    ret = expand_simple_binop (mode, code, ret, val, NULL_RTX, true,
+			       OPTAB_LIB_WIDEN);
+  return ret;
+}
+
+/* Return true if (optional) argument ARG1 of size ARG0 is always lock free on
+   this architecture.  If ARG1 is NULL, use typical alignment for size ARG0.  */
+
+static tree
+fold_builtin_atomic_always_lock_free (tree arg0, tree arg1)
+{
+  int size;
+  enum machine_mode mode;
+  unsigned int mode_align, type_align;
+
+  if (TREE_CODE (arg0) != INTEGER_CST)
+    return NULL_TREE;
+
+  size = INTVAL (expand_normal (arg0)) * BITS_PER_UNIT;
+  mode = mode_for_size (size, MODE_INT, 0);
+  mode_align = GET_MODE_ALIGNMENT (mode);
+
+  if (TREE_CODE (arg1) == INTEGER_CST && INTVAL (expand_normal (arg1)) == 0)
+    type_align = mode_align;
+  else
+    {
+      tree ttype = TREE_TYPE (arg1);
+
+      /* This function is usually invoked and folded immediately by the front
+	 end before anything else has a chance to look at it.  The pointer
+	 parameter at this point is usually cast to a void *, so check for that
+	 and look past the cast.  */
+      if (TREE_CODE (arg1) == NOP_EXPR && POINTER_TYPE_P (ttype)
+	  && VOID_TYPE_P (TREE_TYPE (ttype)))
+	arg1 = TREE_OPERAND (arg1, 0);
+
+      ttype = TREE_TYPE (arg1);
+      gcc_assert (POINTER_TYPE_P (ttype));
+
+      /* Get the underlying type of the object.  */
+      ttype = TREE_TYPE (ttype);
+      type_align = TYPE_ALIGN (ttype);
+    }
+
+  /* If the object has smaller alignment, the the lock free routines cannot
+     be used.  */
+  if (type_align < mode_align)
+    return integer_zero_node;
+
+  /* Check if a compare_and_swap pattern exists for the mode which represents
+     the required size.  The pattern is not allowed to fail, so the existence
+     of the pattern indicates support is present.  */
+  if (can_compare_and_swap_p (mode))
+    return integer_one_node;
+  else
+    return integer_zero_node;
+}
+
+/* Return true if the parameters to call EXP represent an object which will
+   always generate lock free instructions.  The first argument represents the
+   size of the object, and the second parameter is a pointer to the object 
+   itself.  If NULL is passed for the object, then the result is based on 
+   typical alignment for an object of the specified size.  Otherwise return 
+   false.  */
+
+static rtx
+expand_builtin_atomic_always_lock_free (tree exp)
+{
+  tree size;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+
+  if (TREE_CODE (arg0) != INTEGER_CST)
+    {
+      error ("non-constant argument 1 to __atomic_always_lock_free");
+      return const0_rtx;
+    }
+
+  size = fold_builtin_atomic_always_lock_free (arg0, arg1);
+  if (size == integer_one_node)
+    return const1_rtx;
+  return const0_rtx;
+}
+
+/* Return a one or zero if it can be determined that object ARG1 of size ARG 
+   is lock free on this architecture.  */
+
+static tree
+fold_builtin_atomic_is_lock_free (tree arg0, tree arg1)
+{
+  if (!flag_inline_atomics)
+    return NULL_TREE;
+  
+  /* If it isn't always lock free, don't generate a result.  */
+  if (fold_builtin_atomic_always_lock_free (arg0, arg1) == integer_one_node)
+    return integer_one_node;
+
+  return NULL_TREE;
+}
+
+/* Return true if the parameters to call EXP represent an object which will
+   always generate lock free instructions.  The first argument represents the
+   size of the object, and the second parameter is a pointer to the object 
+   itself.  If NULL is passed for the object, then the result is based on 
+   typical alignment for an object of the specified size.  Otherwise return 
+   NULL*/
+
+static rtx
+expand_builtin_atomic_is_lock_free (tree exp)
+{
+  tree size;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
+    {
+      error ("non-integer argument 1 to __atomic_is_lock_free");
+      return NULL_RTX;
+    }
+
+  if (!flag_inline_atomics)
+    return NULL_RTX; 
+
+  /* If the value is known at compile time, return the RTX for it.  */
+  size = fold_builtin_atomic_is_lock_free (arg0, arg1);
+  if (size == integer_one_node)
+    return const1_rtx;
+
+  return NULL_RTX;
+}
+
+/* This routine will either emit the mem_thread_fence pattern or issue a 
+   sync_synchronize to generate a fence for memory model MEMMODEL.  */
+
+#ifndef HAVE_mem_thread_fence
+# define HAVE_mem_thread_fence 0
+# define gen_mem_thread_fence(x) (gcc_unreachable (), NULL_RTX)
+#endif
+
+void
+expand_builtin_mem_thread_fence (enum memmodel model)
+{
+  if (HAVE_mem_thread_fence)
+    emit_insn (gen_mem_thread_fence (GEN_INT (model)));
+  else if (model != MEMMODEL_RELAXED)
+    expand_builtin_sync_synchronize ();
+}
+
+/* Expand the __atomic_thread_fence intrinsic:
+   	void __atomic_thread_fence (enum memmodel)
+   EXP is the CALL_EXPR.  */
+
+static void
+expand_builtin_atomic_thread_fence (tree exp)
+{
+  enum memmodel model;
+  
+  model = get_memmodel (CALL_EXPR_ARG (exp, 0));
+  expand_builtin_mem_thread_fence (model);
+}
+
+/* This routine will either emit the mem_signal_fence pattern or issue a 
+   sync_synchronize to generate a fence for memory model MEMMODEL.  */
+
+#ifndef HAVE_mem_signal_fence
+# define HAVE_mem_signal_fence 0
+# define gen_mem_signal_fence(x) (gcc_unreachable (), NULL_RTX)
+#endif
+
+static void
+expand_builtin_mem_signal_fence (enum memmodel model)
+{
+  if (HAVE_mem_signal_fence)
+    emit_insn (gen_mem_signal_fence (GEN_INT (model)));
+  else if (model != MEMMODEL_RELAXED)
+    {
+      rtx asm_op, clob;
+
+      /* By default targets are coherent between a thread and the signal
+	 handler running on the same thread.  Thus this really becomes a
+	 compiler barrier, in that stores must not be sunk past
+	 (or raised above) a given point.  */
+
+      /* Generate asm volatile("" : : : "memory") as the memory barrier.  */
+      asm_op = gen_rtx_ASM_OPERANDS (VOIDmode, empty_string, empty_string, 0,
+				     rtvec_alloc (0), rtvec_alloc (0),
+				     rtvec_alloc (0), UNKNOWN_LOCATION);
+      MEM_VOLATILE_P (asm_op) = 1;
+
+      clob = gen_rtx_SCRATCH (VOIDmode);
+      clob = gen_rtx_MEM (BLKmode, clob);
+      clob = gen_rtx_CLOBBER (VOIDmode, clob);
+
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, asm_op, clob)));
+    }
+}
+
+/* Expand the __atomic_signal_fence intrinsic:
+   	void __atomic_signal_fence (enum memmodel)
+   EXP is the CALL_EXPR.  */
+
+static void
+expand_builtin_atomic_signal_fence (tree exp)
+{
+  enum memmodel model;
+
+  model = get_memmodel (CALL_EXPR_ARG (exp, 0));
+  expand_builtin_mem_signal_fence (model);
 }

 /* Expand the __sync_synchronize intrinsic.  */
@ -5264,33 +5702,6 @@ expand_builtin_sync_synchronize (void)
  expand_asm_stmt (x);
 }

-/* Expand the __sync_lock_release intrinsic.  EXP is the CALL_EXPR.  */
-
-static void
-expand_builtin_sync_lock_release (enum machine_mode mode, tree exp)
-{
-  struct expand_operand ops[2];
-  enum insn_code icode;
-  rtx mem;
-
-  /* Expand the operands.  */
-  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
-
-  /* If there is an explicit operation in the md file, use it.  */
-  icode = direct_optab_handler (sync_lock_release_optab, mode);
-  if (icode != CODE_FOR_nothing)
-    {
-      create_fixed_operand (&ops[0], mem);
-      create_input_operand (&ops[1], const0_rtx, mode);
-      if (maybe_expand_insn (icode, 2, ops))
-	return;
-    }
-
-  /* Otherwise we can implement this operation by emitting a barrier
-     followed by a store of zero.  */
-  expand_builtin_sync_synchronize ();
-  emit_move_insn (mem, const0_rtx);
-}

 /* Expand an expression EXP that calls a built-in function,
   with result going to TARGET if that's convenient
@ -5891,8 +6302,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_FETCH_AND_ADD_8:
    case BUILT_IN_SYNC_FETCH_AND_ADD_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_ADD_1);
-      target = expand_builtin_sync_operation (mode, exp, PLUS,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, PLUS, false, target);
      if (target)
 	return target;
      break;
@ -5903,8 +6313,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_FETCH_AND_SUB_8:
    case BUILT_IN_SYNC_FETCH_AND_SUB_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_SUB_1);
-      target = expand_builtin_sync_operation (mode, exp, MINUS,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, MINUS, false, target);
      if (target)
 	return target;
      break;
@ -5915,8 +6324,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_FETCH_AND_OR_8:
    case BUILT_IN_SYNC_FETCH_AND_OR_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_OR_1);
-      target = expand_builtin_sync_operation (mode, exp, IOR,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, IOR, false, target);
      if (target)
 	return target;
      break;
@ -5927,8 +6335,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_FETCH_AND_AND_8:
    case BUILT_IN_SYNC_FETCH_AND_AND_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_AND_1);
-      target = expand_builtin_sync_operation (mode, exp, AND,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, AND, false, target);
      if (target)
 	return target;
      break;
@ -5939,8 +6346,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_FETCH_AND_XOR_8:
    case BUILT_IN_SYNC_FETCH_AND_XOR_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_XOR_1);
-      target = expand_builtin_sync_operation (mode, exp, XOR,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, XOR, false, target);
      if (target)
 	return target;
      break;
@ -5951,8 +6357,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_FETCH_AND_NAND_8:
    case BUILT_IN_SYNC_FETCH_AND_NAND_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_NAND_1);
-      target = expand_builtin_sync_operation (mode, exp, NOT,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, NOT, false, target);
      if (target)
 	return target;
      break;
@ -5963,8 +6368,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_ADD_AND_FETCH_8:
    case BUILT_IN_SYNC_ADD_AND_FETCH_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_ADD_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, PLUS,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, PLUS, true, target);
      if (target)
 	return target;
      break;
@ -5975,8 +6379,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_SUB_AND_FETCH_8:
    case BUILT_IN_SYNC_SUB_AND_FETCH_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_SUB_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, MINUS,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, MINUS, true, target);
      if (target)
 	return target;
      break;
@ -5987,8 +6390,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_OR_AND_FETCH_8:
    case BUILT_IN_SYNC_OR_AND_FETCH_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_OR_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, IOR,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, IOR, true, target);
      if (target)
 	return target;
      break;
@ -5999,8 +6401,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_AND_AND_FETCH_8:
    case BUILT_IN_SYNC_AND_AND_FETCH_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_AND_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, AND,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, AND, true, target);
      if (target)
 	return target;
      break;
@ -6011,8 +6412,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_XOR_AND_FETCH_8:
    case BUILT_IN_SYNC_XOR_AND_FETCH_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_XOR_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, XOR,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, XOR, true, target);
      if (target)
 	return target;
      break;
@ -6023,8 +6423,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
    case BUILT_IN_SYNC_NAND_AND_FETCH_8:
    case BUILT_IN_SYNC_NAND_AND_FETCH_16:
      mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_NAND_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, NOT,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, NOT, true, target);
      if (target)
 	return target;
      break;
@ -6082,6 +6481,236 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
      expand_builtin_sync_synchronize ();
      return const0_rtx;

+    case BUILT_IN_ATOMIC_EXCHANGE_1:
+    case BUILT_IN_ATOMIC_EXCHANGE_2:
+    case BUILT_IN_ATOMIC_EXCHANGE_4:
+    case BUILT_IN_ATOMIC_EXCHANGE_8:
+    case BUILT_IN_ATOMIC_EXCHANGE_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_EXCHANGE_1);
+      target = expand_builtin_atomic_exchange (mode, exp, target);
+      if (target)
+	return target;
+      break;
+
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
+      mode = 
+	  get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1);
+      target = expand_builtin_atomic_compare_exchange (mode, exp, target);
+      if (target)
+	return target;
+      break;
+
+    case BUILT_IN_ATOMIC_LOAD_1:
+    case BUILT_IN_ATOMIC_LOAD_2:
+    case BUILT_IN_ATOMIC_LOAD_4:
+    case BUILT_IN_ATOMIC_LOAD_8:
+    case BUILT_IN_ATOMIC_LOAD_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_LOAD_1);
+      target = expand_builtin_atomic_load (mode, exp, target);
+      if (target)
+	return target;
+      break;
+
+    case BUILT_IN_ATOMIC_STORE_1:
+    case BUILT_IN_ATOMIC_STORE_2:
+    case BUILT_IN_ATOMIC_STORE_4:
+    case BUILT_IN_ATOMIC_STORE_8:
+    case BUILT_IN_ATOMIC_STORE_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_STORE_1);
+      target = expand_builtin_atomic_store (mode, exp);
+      if (target)
+	return const0_rtx;
+      break;
+
+    case BUILT_IN_ATOMIC_ADD_FETCH_1:
+    case BUILT_IN_ATOMIC_ADD_FETCH_2:
+    case BUILT_IN_ATOMIC_ADD_FETCH_4:
+    case BUILT_IN_ATOMIC_ADD_FETCH_8:
+    case BUILT_IN_ATOMIC_ADD_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_ADD_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_ADD_1 + 
+				       (fcode - BUILT_IN_ATOMIC_ADD_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, PLUS, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_SUB_FETCH_1:
+    case BUILT_IN_ATOMIC_SUB_FETCH_2:
+    case BUILT_IN_ATOMIC_SUB_FETCH_4:
+    case BUILT_IN_ATOMIC_SUB_FETCH_8:
+    case BUILT_IN_ATOMIC_SUB_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_SUB_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_SUB_1 + 
+				       (fcode - BUILT_IN_ATOMIC_SUB_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, MINUS, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_AND_FETCH_1:
+    case BUILT_IN_ATOMIC_AND_FETCH_2:
+    case BUILT_IN_ATOMIC_AND_FETCH_4:
+    case BUILT_IN_ATOMIC_AND_FETCH_8:
+    case BUILT_IN_ATOMIC_AND_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_AND_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_AND_1 + 
+				       (fcode - BUILT_IN_ATOMIC_AND_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, AND, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_NAND_FETCH_1:
+    case BUILT_IN_ATOMIC_NAND_FETCH_2:
+    case BUILT_IN_ATOMIC_NAND_FETCH_4:
+    case BUILT_IN_ATOMIC_NAND_FETCH_8:
+    case BUILT_IN_ATOMIC_NAND_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_NAND_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_NAND_1 + 
+				       (fcode - BUILT_IN_ATOMIC_NAND_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, NOT, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_XOR_FETCH_1:
+    case BUILT_IN_ATOMIC_XOR_FETCH_2:
+    case BUILT_IN_ATOMIC_XOR_FETCH_4:
+    case BUILT_IN_ATOMIC_XOR_FETCH_8:
+    case BUILT_IN_ATOMIC_XOR_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_XOR_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_XOR_1 + 
+				       (fcode - BUILT_IN_ATOMIC_XOR_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, XOR, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_OR_FETCH_1:
+    case BUILT_IN_ATOMIC_OR_FETCH_2:
+    case BUILT_IN_ATOMIC_OR_FETCH_4:
+    case BUILT_IN_ATOMIC_OR_FETCH_8:
+    case BUILT_IN_ATOMIC_OR_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_OR_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_OR_1 + 
+				       (fcode - BUILT_IN_ATOMIC_OR_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, IOR, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_FETCH_ADD_1:
+    case BUILT_IN_ATOMIC_FETCH_ADD_2:
+    case BUILT_IN_ATOMIC_FETCH_ADD_4:
+    case BUILT_IN_ATOMIC_FETCH_ADD_8:
+    case BUILT_IN_ATOMIC_FETCH_ADD_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_ADD_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, PLUS, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+ 
+    case BUILT_IN_ATOMIC_FETCH_SUB_1:
+    case BUILT_IN_ATOMIC_FETCH_SUB_2:
+    case BUILT_IN_ATOMIC_FETCH_SUB_4:
+    case BUILT_IN_ATOMIC_FETCH_SUB_8:
+    case BUILT_IN_ATOMIC_FETCH_SUB_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_SUB_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, MINUS, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+
+    case BUILT_IN_ATOMIC_FETCH_AND_1:
+    case BUILT_IN_ATOMIC_FETCH_AND_2:
+    case BUILT_IN_ATOMIC_FETCH_AND_4:
+    case BUILT_IN_ATOMIC_FETCH_AND_8:
+    case BUILT_IN_ATOMIC_FETCH_AND_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_AND_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, AND, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+  
+    case BUILT_IN_ATOMIC_FETCH_NAND_1:
+    case BUILT_IN_ATOMIC_FETCH_NAND_2:
+    case BUILT_IN_ATOMIC_FETCH_NAND_4:
+    case BUILT_IN_ATOMIC_FETCH_NAND_8:
+    case BUILT_IN_ATOMIC_FETCH_NAND_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_NAND_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, NOT, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+ 
+    case BUILT_IN_ATOMIC_FETCH_XOR_1:
+    case BUILT_IN_ATOMIC_FETCH_XOR_2:
+    case BUILT_IN_ATOMIC_FETCH_XOR_4:
+    case BUILT_IN_ATOMIC_FETCH_XOR_8:
+    case BUILT_IN_ATOMIC_FETCH_XOR_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_XOR_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, XOR, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+ 
+    case BUILT_IN_ATOMIC_FETCH_OR_1:
+    case BUILT_IN_ATOMIC_FETCH_OR_2:
+    case BUILT_IN_ATOMIC_FETCH_OR_4:
+    case BUILT_IN_ATOMIC_FETCH_OR_8:
+    case BUILT_IN_ATOMIC_FETCH_OR_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_OR_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, IOR, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+ 
+    case BUILT_IN_ATOMIC_ALWAYS_LOCK_FREE:
+      return expand_builtin_atomic_always_lock_free (exp);
+
+    case BUILT_IN_ATOMIC_IS_LOCK_FREE:
+      target = expand_builtin_atomic_is_lock_free (exp);
+      if (target)
+        return target;
+      break;
+
+    case BUILT_IN_ATOMIC_THREAD_FENCE:
+      expand_builtin_atomic_thread_fence (exp);
+      return const0_rtx;
+
+    case BUILT_IN_ATOMIC_SIGNAL_FENCE:
+      expand_builtin_atomic_signal_fence (exp);
+      return const0_rtx;
+
    case BUILT_IN_OBJECT_SIZE:
      return expand_builtin_object_size (exp);

@ -10121,6 +10750,12 @@ fold_builtin_2 (location_t loc, tree fndecl, tree arg0, tree arg1, bool ignore)
      return fold_builtin_fprintf (loc, fndecl, arg0, arg1, NULL_TREE,
 				   ignore, fcode);

+    case BUILT_IN_ATOMIC_ALWAYS_LOCK_FREE:
+      return fold_builtin_atomic_always_lock_free (arg0, arg1);
+
+    case BUILT_IN_ATOMIC_IS_LOCK_FREE:
+      return fold_builtin_atomic_is_lock_free (arg0, arg1);
+
    default:
      break;
    }
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@ -1,3 +1,24 @@
+2011-11-06  Andrew MacLeod  <amacleod@redhat.com>
+	    Richard Henderson  <rth@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* c-cppbuiltin.c (c_cpp_builtins): Test both atomic and sync patterns.
+	* c-common.c (sync_resolve_params, sync_resolve_return): Only tweak 
+	parameters that are the same type size.
+	(get_atomic_generic_size): New.  Find size of generic
+	atomic function parameters and do typechecking.
+	(add_atomic_size_parameter): New.  Insert size into parameter list.
+	(resolve_overloaded_atomic_exchange): Restructure __atomic_exchange to
+	either __atomic_exchange_n or external library call.
+	(resolve_overloaded_atomic_compare_exchange): Restructure 
+	__atomic_compare_exchange to either _n variant or external library call.
+	(resolve_overloaded_atomic_load): Restructure __atomic_load to either 
+	__atomic_load_n or an external library call.
+	(resolve_overloaded_atomic_store): Restructure __atomic_store to either
+	__atomic_store_n or an external library call.
+	(resolve_overloaded_builtin): Handle new __atomic builtins.
+
 2011-11-04  Eric Botcazou  <ebotcazou@adacore.com>

 	PR c++/50608
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@ -9007,7 +9007,8 @@ sync_resolve_size (tree function, VEC(tree,gc) *params)
   was encountered; true on success.  */

 static bool
-sync_resolve_params (tree orig_function, tree function, VEC(tree, gc) *params)
+sync_resolve_params (location_t loc, tree orig_function, tree function,
+		     VEC(tree, gc) *params, bool orig_format)
 {
  function_args_iterator iter;
  tree ptype;
@ -9035,21 +9036,34 @@ sync_resolve_params (tree orig_function, tree function, VEC(tree, gc) *params)
      ++parmnum;
      if (VEC_length (tree, params) <= parmnum)
 	{
-	  error ("too few arguments to function %qE", orig_function);
+	  error_at (loc, "too few arguments to function %qE", orig_function);
 	  return false;
 	}

-      /* ??? Ideally for the first conversion we'd use convert_for_assignment
-	 so that we get warnings for anything that doesn't match the pointer
-	 type.  This isn't portable across the C and C++ front ends atm.  */
-      val = VEC_index (tree, params, parmnum);
-      val = convert (ptype, val);
-      val = convert (arg_type, val);
-      VEC_replace (tree, params, parmnum, val);
+      /* Only convert parameters if the size is appropriate with new format
+	 sync routines.  */
+      if (orig_format
+	  || tree_int_cst_equal (TYPE_SIZE (ptype), TYPE_SIZE (arg_type)))
+	{
+	  /* Ideally for the first conversion we'd use convert_for_assignment
+	     so that we get warnings for anything that doesn't match the pointer
+	     type.  This isn't portable across the C and C++ front ends atm.  */
+	  val = VEC_index (tree, params, parmnum);
+	  val = convert (ptype, val);
+	  val = convert (arg_type, val);
+	  VEC_replace (tree, params, parmnum, val);
+	}

      function_args_iter_next (&iter);
    }

+  /* __atomic routines are not variadic.  */
+  if (!orig_format && VEC_length (tree, params) != parmnum + 1)
+    {
+      error_at (loc, "too many arguments to function %qE", orig_function);
+      return false;
+    }
+
  /* The definition of these primitives is variadic, with the remaining
     being "an optional list of variables protected by the memory barrier".
     No clue what that's supposed to mean, precisely, but we consider all
@ -9064,13 +9078,388 @@ sync_resolve_params (tree orig_function, tree function, VEC(tree, gc) *params)
   PARAMS.  */

 static tree
-sync_resolve_return (tree first_param, tree result)
+sync_resolve_return (tree first_param, tree result, bool orig_format)
 {
  tree ptype = TREE_TYPE (TREE_TYPE (first_param));
+  tree rtype = TREE_TYPE (result);
  ptype = TYPE_MAIN_VARIANT (ptype);
-  return convert (ptype, result);
+
+  /* New format doesn't require casting unless the types are the same size.  */
+  if (orig_format || tree_int_cst_equal (TYPE_SIZE (ptype), TYPE_SIZE (rtype)))
+    return convert (ptype, result);
+  else
+    return result;
 }

+/* This function verifies the PARAMS to generic atomic FUNCTION.
+   It returns the size if all the parameters are the same size, otherwise
+   0 is returned if the parameters are invalid.  */
+
+static int
+get_atomic_generic_size (location_t loc, tree function, VEC(tree,gc) *params)
+{
+  unsigned int n_param;
+  unsigned int n_model;
+  unsigned int x;
+  int size_0;
+  tree type_0;
+
+  /* Determine the parameter makeup.  */
+  switch (DECL_FUNCTION_CODE (function))
+    {
+    case BUILT_IN_ATOMIC_EXCHANGE:
+      n_param = 4;
+      n_model = 1;
+      break;
+    case BUILT_IN_ATOMIC_LOAD:
+    case BUILT_IN_ATOMIC_STORE:
+      n_param = 3;
+      n_model = 1;
+      break;
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
+      n_param = 6;
+      n_model = 2;
+      break;
+    default:
+      return 0;
+    }
+
+  if (VEC_length (tree, params) != n_param)
+    {
+      error_at (loc, "incorrect number of arguments to function %qE", function);
+      return 0;
+    }
+
+  /* Get type of first parameter, and determine its size.  */
+  type_0 = TREE_TYPE (VEC_index (tree, params, 0));
+  if (TREE_CODE (type_0) != POINTER_TYPE)
+    {
+      error_at (loc, "argument 1 of %qE must be a pointer type", function);
+      return 0;
+    }
+  size_0 = tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type_0)), 1);
+
+  /* Check each other parameter is a pointer and the same size.  */
+  for (x = 0; x < n_param - n_model; x++)
+    {
+      int size;
+      tree type = TREE_TYPE (VEC_index (tree, params, x));
+      /* __atomic_compare_exchange has a bool in the 4th postion, skip it.  */
+      if (n_param == 6 && x == 3)
+        continue;
+      if (!POINTER_TYPE_P (type))
+	{
+	  error_at (loc, "argument %d of %qE must be a pointer type", x + 1,
+		    function);
+	  return 0;
+	}
+      size = tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
+      if (size != size_0)
+	{
+	  error_at (loc, "size mismatch in argument %d of %qE", x + 1,
+		    function);
+	  return 0;
+	}
+    }
+
+  /* Check memory model parameters for validity.  */
+  for (x = n_param - n_model ; x < n_param; x++)
+    {
+      tree p = VEC_index (tree, params, x);
+      if (TREE_CODE (p) == INTEGER_CST)
+        {
+	  int i = tree_low_cst (p, 1);
+	  if (i < 0 || i >= MEMMODEL_LAST)
+	    {
+	      warning_at (loc, OPT_Winvalid_memory_model,
+			  "invalid memory model argument %d of %qE", x + 1,
+			  function);
+	      return MEMMODEL_SEQ_CST;
+	    }
+	}
+      else
+	if (!INTEGRAL_TYPE_P (TREE_TYPE (p)))
+	  {
+	    error_at (loc, "non-integer memory model argument %d of %qE", x + 1,
+		   function);
+	    return 0;
+	  }
+      }
+
+  return size_0;
+}
+
+
+/* This will take an __atomic_ generic FUNCTION call, and add a size parameter N
+   at the beginning of the parameter list PARAMS representing the size of the
+   objects.  This is to match the library ABI requirement.  LOC is the location
+   of the function call.  
+   The new function is returned if it needed rebuilding, otherwise NULL_TREE is
+   returned to allow the external call to be constructed.  */
+
+static tree
+add_atomic_size_parameter (unsigned n, location_t loc, tree function, 
+			   VEC(tree,gc) *params)
+{
+  tree size_node;
+
+  /* Insert a SIZE_T parameter as the first param.  If there isn't
+     enough space, allocate a new vector and recursively re-build with that.  */
+  if (!VEC_space (tree, params, 1))
+    {
+      unsigned int z, len;
+      VEC(tree,gc) *vec;
+      tree f;
+
+      len = VEC_length (tree, params);
+      vec = VEC_alloc (tree, gc, len + 1);
+      for (z = 0; z < len; z++)
+	VEC_quick_push (tree, vec, VEC_index (tree, params, z));
+      f = build_function_call_vec (loc, function, vec, NULL);
+      VEC_free (tree, gc, vec);
+      return f;
+    }
+
+  /* Add the size parameter and leave as a function call for processing.  */
+  size_node = build_int_cst (size_type_node, n);
+  VEC_quick_insert (tree, params, 0, size_node);
+  return NULL_TREE;
+}
+
+
+/* This will process an __atomic_exchange function call, determine whether it
+   needs to be mapped to the _N variation, or turned into a library call.
+   LOC is the location of the builtin call.
+   FUNCTION is the DECL that has been invoked;
+   PARAMS is the argument list for the call.  The return value is non-null
+   TRUE is returned if it is translated into the proper format for a call to the
+   external library, and NEW_RETURN is set the tree for that function.
+   FALSE is returned if processing for the _N variation is required, and 
+   NEW_RETURN is set to the the return value the result is copied into.  */
+static bool
+resolve_overloaded_atomic_exchange (location_t loc, tree function, 
+				    VEC(tree,gc) *params, tree *new_return)
+{	
+  tree p0, p1, p2, p3;
+  tree I_type, I_type_ptr;
+  int n = get_atomic_generic_size (loc, function, params);
+
+  /* If not a lock-free size, change to the library generic format.  */
+  if (n != 1 && n != 2 && n != 4 && n != 8 && n != 16)
+    {
+      *new_return = add_atomic_size_parameter (n, loc, function, params);
+      return true;
+    }
+
+  /* Otherwise there is a lockfree match, transform the call from:
+       void fn(T* mem, T* desired, T* return, model)
+     into
+       *return = (T) (fn (In* mem, (In) *desired, model))  */
+
+  p0 = VEC_index (tree, params, 0);
+  p1 = VEC_index (tree, params, 1);
+  p2 = VEC_index (tree, params, 2);
+  p3 = VEC_index (tree, params, 3);
+  
+  /* Create pointer to appropriate size.  */
+  I_type = builtin_type_for_size (BITS_PER_UNIT * n, 1);
+  I_type_ptr = build_pointer_type (I_type);
+
+  /* Convert object pointer to required type.  */
+  p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
+  VEC_replace (tree, params, 0, p0);
+
+  /* Convert new value to required type, and dereference it.  */
+  p1 = build_indirect_ref (loc, p1, RO_UNARY_STAR);
+  p1 = build1 (VIEW_CONVERT_EXPR, I_type, p1);
+  VEC_replace (tree, params, 1, p1);
+
+  /* Move memory model to the 3rd position, and end param list.  */
+  VEC_replace (tree, params, 2, p3);
+  VEC_truncate (tree, params, 3);
+
+  /* Convert return pointer and dereference it for later assignment.  */
+  *new_return = build_indirect_ref (loc, p2, RO_UNARY_STAR);
+
+  return false;
+}
+
+
+/* This will process an __atomic_compare_exchange function call, determine 
+   whether it needs to be mapped to the _N variation, or turned into a lib call.
+   LOC is the location of the builtin call.
+   FUNCTION is the DECL that has been invoked;
+   PARAMS is the argument list for the call.  The return value is non-null
+   TRUE is returned if it is translated into the proper format for a call to the
+   external library, and NEW_RETURN is set the tree for that function.
+   FALSE is returned if processing for the _N variation is required.  */
+
+static bool
+resolve_overloaded_atomic_compare_exchange (location_t loc, tree function, 
+					    VEC(tree,gc) *params, 
+					    tree *new_return)
+{	
+  tree p0, p1, p2;
+  tree I_type, I_type_ptr;
+  int n = get_atomic_generic_size (loc, function, params);
+
+  /* If not a lock-free size, change to the library generic format.  */
+  if (n != 1 && n != 2 && n != 4 && n != 8 && n != 16)
+    {
+      /* The library generic format does not have the weak parameter, so 
+	 remove it from the param list.  Since a parameter has been removed,
+	 we can be sure that there is room for the SIZE_T parameter, meaning
+	 there will not be a recursive rebuilding of the parameter list, so
+	 there is no danger this will be done twice.  */
+      if (n > 0)
+        {
+	  VEC_replace (tree, params, 3, VEC_index (tree, params, 4));
+	  VEC_replace (tree, params, 4, VEC_index (tree, params, 5));
+	  VEC_truncate (tree, params, 5);
+	}
+      *new_return = add_atomic_size_parameter (n, loc, function, params);
+      return true;
+    }
+
+  /* Otherwise, there is a match, so the call needs to be transformed from:
+       bool fn(T* mem, T* desired, T* return, weak, success, failure)
+     into
+       bool fn ((In *)mem, (In *)expected, (In) *desired, weak, succ, fail)  */
+
+  p0 = VEC_index (tree, params, 0);
+  p1 = VEC_index (tree, params, 1);
+  p2 = VEC_index (tree, params, 2);
+  
+  /* Create pointer to appropriate size.  */
+  I_type = builtin_type_for_size (BITS_PER_UNIT * n, 1);
+  I_type_ptr = build_pointer_type (I_type);
+
+  /* Convert object pointer to required type.  */
+  p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
+  VEC_replace (tree, params, 0, p0);
+
+  /* Convert expected pointer to required type.  */
+  p1 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p1);
+  VEC_replace (tree, params, 1, p1);
+
+  /* Convert desired value to required type, and dereference it.  */
+  p2 = build_indirect_ref (loc, p2, RO_UNARY_STAR);
+  p2 = build1 (VIEW_CONVERT_EXPR, I_type, p2);
+  VEC_replace (tree, params, 2, p2);
+
+  /* The rest of the parameters are fine. NULL means no special return value
+     processing.*/
+  *new_return = NULL;
+  return false;
+}
+
+
+/* This will process an __atomic_load function call, determine whether it
+   needs to be mapped to the _N variation, or turned into a library call.
+   LOC is the location of the builtin call.
+   FUNCTION is the DECL that has been invoked;
+   PARAMS is the argument list for the call.  The return value is non-null
+   TRUE is returned if it is translated into the proper format for a call to the
+   external library, and NEW_RETURN is set the tree for that function.
+   FALSE is returned if processing for the _N variation is required, and 
+   NEW_RETURN is set to the the return value the result is copied into.  */
+
+static bool
+resolve_overloaded_atomic_load (location_t loc, tree function, 
+				VEC(tree,gc) *params, tree *new_return)
+{	
+  tree p0, p1, p2;
+  tree I_type, I_type_ptr;
+  int n = get_atomic_generic_size (loc, function, params);
+
+  /* If not a lock-free size, change to the library generic format.  */
+  if (n != 1 && n != 2 && n != 4 && n != 8 && n != 16)
+    {
+      *new_return = add_atomic_size_parameter (n, loc, function, params);
+      return true;
+    }
+
+  /* Otherwise, there is a match, so the call needs to be transformed from:
+       void fn(T* mem, T* return, model)
+     into
+       *return = (T) (fn ((In *) mem, model))  */
+
+  p0 = VEC_index (tree, params, 0);
+  p1 = VEC_index (tree, params, 1);
+  p2 = VEC_index (tree, params, 2);
+  
+  /* Create pointer to appropriate size.  */
+  I_type = builtin_type_for_size (BITS_PER_UNIT * n, 1);
+  I_type_ptr = build_pointer_type (I_type);
+
+  /* Convert object pointer to required type.  */
+  p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
+  VEC_replace (tree, params, 0, p0);
+
+  /* Move memory model to the 2nd position, and end param list.  */
+  VEC_replace (tree, params, 1, p2);
+  VEC_truncate (tree, params, 2);
+
+  /* Convert return pointer and dereference it for later assignment.  */
+  *new_return = build_indirect_ref (loc, p1, RO_UNARY_STAR);
+
+  return false;
+}
+
+
+/* This will process an __atomic_store function call, determine whether it
+   needs to be mapped to the _N variation, or turned into a library call.
+   LOC is the location of the builtin call.
+   FUNCTION is the DECL that has been invoked;
+   PARAMS is the argument list for the call.  The return value is non-null
+   TRUE is returned if it is translated into the proper format for a call to the
+   external library, and NEW_RETURN is set the tree for that function.
+   FALSE is returned if processing for the _N variation is required, and 
+   NEW_RETURN is set to the the return value the result is copied into.  */
+
+static bool
+resolve_overloaded_atomic_store (location_t loc, tree function, 
+				 VEC(tree,gc) *params, tree *new_return)
+{	
+  tree p0, p1;
+  tree I_type, I_type_ptr;
+  int n = get_atomic_generic_size (loc, function, params);
+
+  /* If not a lock-free size, change to the library generic format.  */
+  if (n != 1 && n != 2 && n != 4 && n != 8 && n != 16)
+    {
+      *new_return = add_atomic_size_parameter (n, loc, function, params);
+      return true;
+    }
+
+  /* Otherwise, there is a match, so the call needs to be transformed from:
+       void fn(T* mem, T* value, model)
+     into
+       fn ((In *) mem, (In) *value, model)  */
+
+  p0 = VEC_index (tree, params, 0);
+  p1 = VEC_index (tree, params, 1);
+  
+  /* Create pointer to appropriate size.  */
+  I_type = builtin_type_for_size (BITS_PER_UNIT * n, 1);
+  I_type_ptr = build_pointer_type (I_type);
+
+  /* Convert object pointer to required type.  */
+  p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
+  VEC_replace (tree, params, 0, p0);
+
+  /* Convert new value to required type, and dereference it.  */
+  p1 = build_indirect_ref (loc, p1, RO_UNARY_STAR);
+  p1 = build1 (VIEW_CONVERT_EXPR, I_type, p1);
+  VEC_replace (tree, params, 1, p1);
+  
+  /* The memory model is in the right spot already. Return is void.  */
+  *new_return = NULL_TREE;
+
+  return false;
+}
+
+
 /* Some builtin functions are placeholders for other expressions.  This
   function should be called immediately after parsing the call expression
   before surrounding code has committed to the type of the expression.
@ -9086,6 +9475,9 @@ tree
 resolve_overloaded_builtin (location_t loc, tree function, VEC(tree,gc) *params)
 {
  enum built_in_function orig_code = DECL_FUNCTION_CODE (function);
+  bool orig_format = true;
+  tree new_return = NULL_TREE;
+
  switch (DECL_BUILT_IN_CLASS (function))
    {
    case BUILT_IN_NORMAL:
@ -9102,6 +9494,78 @@ resolve_overloaded_builtin (location_t loc, tree function, VEC(tree,gc) *params)
  /* Handle BUILT_IN_NORMAL here.  */
  switch (orig_code)
    {
+    case BUILT_IN_ATOMIC_EXCHANGE:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
+    case BUILT_IN_ATOMIC_LOAD:
+    case BUILT_IN_ATOMIC_STORE:
+      {
+	/* Handle these 4 together so that they can fall through to the next
+	   case if the call is transformed to an _N variant.  */
+        switch (orig_code)
+	{
+	  case BUILT_IN_ATOMIC_EXCHANGE:
+	    {
+	      if (resolve_overloaded_atomic_exchange (loc, function, params,
+						      &new_return))
+		return new_return;
+	      /* Change to the _N variant.  */
+	      orig_code = BUILT_IN_ATOMIC_EXCHANGE_N;
+	      break;
+	    }
+
+	  case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
+	    {
+	      if (resolve_overloaded_atomic_compare_exchange (loc, function,
+							      params,
+							      &new_return))
+		return new_return;
+	      /* Change to the _N variant.  */
+	      orig_code = BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N;
+	      break;
+	    }
+	  case BUILT_IN_ATOMIC_LOAD:
+	    {
+	      if (resolve_overloaded_atomic_load (loc, function, params,
+						  &new_return))
+		return new_return;
+	      /* Change to the _N variant.  */
+	      orig_code = BUILT_IN_ATOMIC_LOAD_N;
+	      break;
+	    }
+	  case BUILT_IN_ATOMIC_STORE:
+	    {
+	      if (resolve_overloaded_atomic_store (loc, function, params,
+						   &new_return))
+		return new_return;
+	      /* Change to the _N variant.  */
+	      orig_code = BUILT_IN_ATOMIC_STORE_N;
+	      break;
+	    }
+	  default:
+	    gcc_unreachable ();
+	}
+	/* Fallthrough to the normal processing.  */
+      }
+    case BUILT_IN_ATOMIC_EXCHANGE_N:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
+    case BUILT_IN_ATOMIC_LOAD_N:
+    case BUILT_IN_ATOMIC_STORE_N:
+    case BUILT_IN_ATOMIC_ADD_FETCH_N:
+    case BUILT_IN_ATOMIC_SUB_FETCH_N:
+    case BUILT_IN_ATOMIC_AND_FETCH_N:
+    case BUILT_IN_ATOMIC_NAND_FETCH_N:
+    case BUILT_IN_ATOMIC_XOR_FETCH_N:
+    case BUILT_IN_ATOMIC_OR_FETCH_N:
+    case BUILT_IN_ATOMIC_FETCH_ADD_N:
+    case BUILT_IN_ATOMIC_FETCH_SUB_N:
+    case BUILT_IN_ATOMIC_FETCH_AND_N:
+    case BUILT_IN_ATOMIC_FETCH_NAND_N:
+    case BUILT_IN_ATOMIC_FETCH_XOR_N:
+    case BUILT_IN_ATOMIC_FETCH_OR_N:
+      {
+        orig_format = false;
+	/* Fallthru for parameter processing.  */
+      }
    case BUILT_IN_SYNC_FETCH_AND_ADD_N:
    case BUILT_IN_SYNC_FETCH_AND_SUB_N:
    case BUILT_IN_SYNC_FETCH_AND_OR_N:
@ -9128,15 +9592,31 @@ resolve_overloaded_builtin (location_t loc, tree function, VEC(tree,gc) *params)

 	fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1);
 	new_function = builtin_decl_explicit (fncode);
-	if (!sync_resolve_params (function, new_function, params))
+	if (!sync_resolve_params (loc, function, new_function, params,
+				  orig_format))
 	  return error_mark_node;

 	first_param = VEC_index (tree, params, 0);
 	result = build_function_call_vec (loc, new_function, params, NULL);
+	if (result == error_mark_node)
+	  return result;
 	if (orig_code != BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
-	    && orig_code != BUILT_IN_SYNC_LOCK_RELEASE_N)
-	  result = sync_resolve_return (first_param, result);
+	    && orig_code != BUILT_IN_SYNC_LOCK_RELEASE_N
+	    && orig_code != BUILT_IN_ATOMIC_STORE_N)
+	  result = sync_resolve_return (first_param, result, orig_format);

+	/* If new_return is set, assign function to that expr and cast the
+	   result to void since the generic interface returned void.  */
+	if (new_return)
+	  {
+	    /* Cast function result from I{1,2,4,8,16} to the required type.  */
+	    result = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (new_return), result);
+	    result = build2 (MODIFY_EXPR, TREE_TYPE (new_return), new_return,
+			     result);
+	    TREE_SIDE_EFFECTS (result) = 1;
+	    protected_set_expr_location (result, loc);
+	    result = convert (void_type_node, result);
+	  }
 	return result;
      }

--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@ -758,30 +758,50 @@ c_cpp_builtins (cpp_reader *pfile)

  /* Tell source code if the compiler makes sync_compare_and_swap
     builtins available.  */
-#ifdef HAVE_sync_compare_and_swapqi
-  if (HAVE_sync_compare_and_swapqi)
+#ifndef HAVE_sync_compare_and_swapqi
+#define HAVE_sync_compare_and_swapqi 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapqi
+#define HAVE_atomic_compare_and_swapqi 0
+#endif
+  if (HAVE_sync_compare_and_swapqi || HAVE_atomic_compare_and_swapqi)
    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
-#endif

-#ifdef HAVE_sync_compare_and_swaphi
-  if (HAVE_sync_compare_and_swaphi)
+#ifndef HAVE_sync_compare_and_swaphi
+#define HAVE_sync_compare_and_swaphi 0
+#endif
+#ifndef HAVE_atomic_compare_and_swaphi
+#define HAVE_atomic_compare_and_swaphi 0
+#endif
+  if (HAVE_sync_compare_and_swaphi || HAVE_atomic_compare_and_swaphi)
    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
-#endif

-#ifdef HAVE_sync_compare_and_swapsi
-  if (HAVE_sync_compare_and_swapsi)
+#ifndef HAVE_sync_compare_and_swapsi
+#define HAVE_sync_compare_and_swapsi 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapsi
+#define HAVE_atomic_compare_and_swapsi 0
+#endif
+  if (HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi)
    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
-#endif

-#ifdef HAVE_sync_compare_and_swapdi
-  if (HAVE_sync_compare_and_swapdi)
+#ifndef HAVE_sync_compare_and_swapdi
+#define HAVE_sync_compare_and_swapdi 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapdi
+#define HAVE_atomic_compare_and_swapdi 0
+#endif
+  if (HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi)
    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
-#endif

-#ifdef HAVE_sync_compare_and_swapti
-  if (HAVE_sync_compare_and_swapti)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
+#ifndef HAVE_sync_compare_and_swapti
+#define HAVE_sync_compare_and_swapti 0
 #endif
+#ifndef HAVE_atomic_compare_and_swapti
+#define HAVE_atomic_compare_and_swapti 0
+#endif
+  if (HAVE_sync_compare_and_swapti || HAVE_atomic_compare_and_swapti)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");

 #ifdef DWARF2_UNWIND_INFO
  if (dwarf2out_do_cfi_asm ())
--- a/gcc/c-typeck.c
+++ b/gcc/c-typeck.c
@ -2717,6 +2717,10 @@ build_function_call_vec (location_t loc, tree function, VEC(tree,gc) *params,

      name = DECL_NAME (function);
      fundecl = function;
+      /* Atomic functions have type checking/casting already done.  They are 
+	 often rewritten and don't match the original parameter list.  */
+      if (name && !strncmp (IDENTIFIER_POINTER (name), "__atomic_", 9))
+        origtypes = NULL;
    }
  if (TREE_CODE (TREE_TYPE (function)) == FUNCTION_TYPE)
    function = function_to_pointer_conversion (loc, function);
--- a/gcc/common.opt
+++ b/gcc/common.opt
@ -551,6 +551,10 @@ Winline
 Common Var(warn_inline) Warning
 Warn when an inlined function cannot be inlined

+Winvalid-memory-model
+Common Var(warn_invalid_memory_model) Init(1) Warning
+Warn when an atomic memory model parameter is known to be outside the valid range.
+
 Wlarger-than-
 Common RejectNegative Joined Warning Undocumented Alias(Wlarger-than=)

@ -1266,6 +1270,10 @@ finline-limit=
 Common RejectNegative Joined UInteger
 -finline-limit=<number>	Limit the size of inlined functions to <number>

+finline-atomics
+Common Report Var(flag_inline_atomics) Init(1) Optimization
+Inline __atomic operations when a lock free instruction sequence is available.
+
 finstrument-functions
 Common Report Var(flag_instrument_function_entry_exit)
 Instrument function entry and exit with profiling calls
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@ -248,6 +248,9 @@
  ;; For BMI2 support
  UNSPEC_PDEP
  UNSPEC_PEXT
+
+  ;; For __atomic support
+  UNSPEC_MOVA
 ])

 (define_c_enum "unspecv" [
@ -262,7 +265,10 @@
  UNSPECV_ALIGN
  UNSPECV_MONITOR
  UNSPECV_MWAIT
-  UNSPECV_CMPXCHG
+  UNSPECV_CMPXCHG_1
+  UNSPECV_CMPXCHG_2
+  UNSPECV_CMPXCHG_3
+  UNSPECV_CMPXCHG_4
  UNSPECV_XCHG
  UNSPECV_LOCK
  UNSPECV_PROLOGUE_USE
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@ -18,31 +18,27 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.

-(define_mode_iterator CASMODE
-  [QI HI SI (DI "TARGET_64BIT || TARGET_CMPXCHG8B")
-	    (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
-(define_mode_iterator DCASMODE
-  [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
-   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
-(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
-(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
-
-(define_expand "memory_barrier"
-  [(set (match_dup 0)
-	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand" "")]		;; model
  ""
 {
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
+  /* Unless this is a SEQ_CST fence, the i386 memory model is strong
+     enough not to require barriers of any kind.  */
+  if (INTVAL (operands[0]) != MEMMODEL_SEQ_CST)
+    DONE;

-  if (!(TARGET_64BIT || TARGET_SSE2))
+  if (TARGET_64BIT || TARGET_SSE2)
+    emit_insn (gen_sse2_mfence ());
+  else
    {
-      emit_insn (gen_memory_barrier_nosse (operands[0]));
-      DONE;
+      rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+      MEM_VOLATILE_P (mem) = 1;
+      emit_insn (gen_mfence_nosse (mem));
    }
+  DONE;
 })

-(define_insn "memory_barrier_nosse"
+(define_insn "mfence_nosse"
  [(set (match_operand:BLK 0 "" "")
 	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
   (clobber (reg:CC FLAGS_REG))]
@ -50,127 +46,315 @@
  "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
  [(set_attr "memory" "unknown")])

-;; ??? It would be possible to use cmpxchg8b on pentium for DImode
-;; changes.  It's complicated because the insn uses ecx:ebx as the
-;; new value; note that the registers are reversed from the order
-;; that they'd be in with (reg:DI 2 ecx).  Similarly for TImode
-;; data in 64-bit mode.
+;; ??? From volume 3 section 7.1.1 Guaranteed Atomic Operations,
+;; Only beginning at Pentium family processors do we get any guarantee of
+;; atomicity in aligned 64-bit quantities.  Beginning at P6, we get a
+;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
+;;
+;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
+;;
+;; Importantly, *no* processor makes atomicity guarantees for larger
+;; accesses.  In particular, there's no way to perform an atomic TImode
+;; move, despite the apparent applicability of MOVDQA et al.

-(define_expand "sync_compare_and_swap<mode>"
-  [(parallel
-    [(set (match_operand:CASMODE 0 "register_operand" "")
-	  (match_operand:CASMODE 1 "memory_operand" ""))
-     (set (match_dup 1)
-	  (unspec_volatile:CASMODE
-	    [(match_dup 1)
-	     (match_operand:CASMODE 2 "register_operand" "")
-	     (match_operand:CASMODE 3 "register_operand" "")]
-	    UNSPECV_CMPXCHG))
-   (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:CASMODE
-            [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
-          (match_dup 2)))])]
-  "TARGET_CMPXCHG"
+(define_mode_iterator ATOMIC
+   [QI HI SI
+    (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
+   ])
+
+(define_expand "atomic_load<mode>"
+  [(set (match_operand:ATOMIC 0 "register_operand" "")
+	(unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")]
+		       UNSPEC_MOVA))]
+  ""
 {
-  if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode)
-    {
-      enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode;
-      rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0);
-      rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode,
-				      GET_MODE_SIZE (hmode));
-      low = force_reg (hmode, low);
-      high = force_reg (hmode, high);
-      if (<MODE>mode == DImode)
-	{
-	  if (flag_pic && !cmpxchg8b_pic_memory_operand (operands[1], DImode))
-	    operands[1] = replace_equiv_address (operands[1],
-						 force_reg (Pmode,
-							    XEXP (operands[1],
-								  0)));
-	  emit_insn (gen_sync_double_compare_and_swapdi
-		     (operands[0], operands[1], operands[2], low, high));
-	}
-      else if (<MODE>mode == TImode)
-	emit_insn (gen_sync_double_compare_and_swapti
-		   (operands[0], operands[1], operands[2], low, high));
-      else
-	gcc_unreachable ();
-      DONE;
-    }
+  /* For DImode on 32-bit, we can use the FPU to perform the load.  */
+  if (<MODE>mode == DImode && !TARGET_64BIT)
+    emit_insn (gen_atomic_loaddi_fpu
+	       (operands[0], operands[1],
+	        assign_386_stack_local (DImode,
+					(virtuals_instantiated
+					 ? SLOT_TEMP : SLOT_VIRTUAL))));
+  else
+    emit_move_insn (operands[0], operands[1]);
+  DONE;
 })

-(define_insn "*sync_compare_and_swap<mode>"
+(define_insn_and_split "atomic_loaddi_fpu"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
+		   UNSPEC_MOVA))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
+   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
+  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dst = operands[0], src = operands[1];
+  rtx mem = operands[2], tmp = operands[3];
+
+  if (SSE_REG_P (dst))
+    emit_move_insn (dst, src);
+  else
+    {
+      if (MEM_P (dst))
+	mem = dst;
+
+      if (FP_REG_P (tmp))
+	emit_insn (gen_movdi_via_fpu (mem, src, tmp));
+      else
+	{
+	  adjust_reg_mode (tmp, DImode);
+	  emit_move_insn (tmp, src);
+	  emit_move_insn (mem, tmp);
+	}
+
+      if (mem != dst)
+	emit_move_insn (dst, mem);
+    }
+  DONE;
+})
+
+(define_expand "atomic_store<mode>"
+  [(set (match_operand:ATOMIC 0 "memory_operand" "")
+	(unspec:ATOMIC [(match_operand:ATOMIC 1 "register_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")]
+		       UNSPEC_MOVA))]
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  if (<MODE>mode == DImode && !TARGET_64BIT)
+    {
+      /* For DImode on 32-bit, we can use the FPU to perform the store.  */
+      /* Note that while we could perform a cmpxchg8b loop, that turns
+	 out to be significantly larger than this plus a barrier.  */
+      emit_insn (gen_atomic_storedi_fpu
+		 (operands[0], operands[1],
+	          assign_386_stack_local (DImode,
+					  (virtuals_instantiated
+					   ? SLOT_TEMP : SLOT_VIRTUAL))));
+    }
+  else
+    {
+      /* For seq-cst stores, when we lack MFENCE, use XCHG.  */
+      if (model == MEMMODEL_SEQ_CST && !(TARGET_64BIT || TARGET_SSE2))
+	{
+	  emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
+						operands[0], operands[1],
+						operands[2]));
+	  DONE;
+	}
+
+      /* Otherwise use a normal store.  */
+      emit_move_insn (operands[0], operands[1]);
+    }
+  /* ... followed by an MFENCE, if required.  */
+  if (model == MEMMODEL_SEQ_CST)
+    emit_insn (gen_mem_thread_fence (operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "atomic_storedi_fpu"
+  [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "x,m,?r")]
+		   UNSPEC_MOVA))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
+   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
+  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dst = operands[0], src = operands[1];
+  rtx mem = operands[2], tmp = operands[3];
+
+  if (!SSE_REG_P (src))
+    {
+      if (REG_P (src))
+	{
+	  emit_move_insn (mem, src);
+	  src = mem;
+	}
+
+      if (FP_REG_P (tmp))
+	{
+	  emit_insn (gen_movdi_via_fpu (dst, src, tmp));
+	  DONE;
+	}
+      else
+	{
+	  adjust_reg_mode (tmp, DImode);
+	  emit_move_insn (tmp, mem);
+	  src = tmp;
+	}
+    }
+  emit_move_insn (dst, src);
+  DONE;
+})
+
+;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
+;; operations.  But the fix_trunc patterns want way more setup than we want
+;; to provide.  Note that the scratch is DFmode instead of XFmode in order
+;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
+(define_insn "movdi_via_fpu"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_MOVA))
+   (clobber (match_operand:DF 2 "register_operand" "=f"))]
+  "TARGET_80387"
+  "fild\t%1\;fistp\t%0"
+  [(set_attr "type" "multi")
+   ;; Worst case based on full sib+offset32 addressing modes
+   (set_attr "length" "14")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand" "")		;; bool success output
+   (match_operand:SWI124 1 "register_operand" "")	;; oldval output
+   (match_operand:SWI124 2 "memory_operand" "")		;; memory
+   (match_operand:SWI124 3 "register_operand" "")	;; expected input
+   (match_operand:SWI124 4 "register_operand" "")	;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
+  "TARGET_CMPXCHG"
+{
+  emit_insn (gen_atomic_compare_and_swap_single<mode>
+	     (operands[1], operands[2], operands[3], operands[4]));
+  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
+		     const0_rtx);
+  DONE;
+})
+
+(define_mode_iterator CASMODE
+  [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
+   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_iterator DCASMODE
+  [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
+   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
+(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand" "")		;; bool success output
+   (match_operand:CASMODE 1 "register_operand" "")	;; oldval output
+   (match_operand:CASMODE 2 "memory_operand" "")	;; memory
+   (match_operand:CASMODE 3 "register_operand" "")	;; expected input
+   (match_operand:CASMODE 4 "register_operand" "")	;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
+  "TARGET_CMPXCHG"
+{
+  if (<MODE>mode == DImode && TARGET_64BIT)
+    {
+      emit_insn (gen_atomic_compare_and_swap_singledi
+		 (operands[1], operands[2], operands[3], operands[4]));
+    }
+  else
+    {
+      enum machine_mode hmode = <DCASHMODE>mode;
+      rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n, mem;
+
+      lo_o = operands[1];
+      mem  = operands[2];
+      lo_e = operands[3];
+      lo_n = operands[4];
+      hi_o = gen_highpart (hmode, lo_o);
+      hi_e = gen_highpart (hmode, lo_e);
+      hi_n = gen_highpart (hmode, lo_n);
+      lo_o = gen_lowpart (hmode, lo_o);
+      lo_e = gen_lowpart (hmode, lo_e);
+      lo_n = gen_lowpart (hmode, lo_n);
+
+      if (<MODE>mode == DImode
+	  && !TARGET_64BIT
+	  && flag_pic
+	  && !cmpxchg8b_pic_memory_operand (mem, DImode))
+	mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
+
+      emit_insn (gen_atomic_compare_and_swap_double<mode>
+		 (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n));
+    }
+  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
+		     const0_rtx);
+  DONE;
+})
+
+(define_insn "atomic_compare_and_swap_single<mode>"
  [(set (match_operand:SWI 0 "register_operand" "=a")
-	(match_operand:SWI 1 "memory_operand" "+m"))
-   (set (match_dup 1)
 	(unspec_volatile:SWI
-	  [(match_dup 1)
-	   (match_operand:SWI 2 "register_operand" "a")
+	  [(match_operand:SWI 1 "memory_operand" "+m")
+	   (match_operand:SWI 2 "register_operand" "0")
 	   (match_operand:SWI 3 "register_operand" "<r>")]
-	  UNSPECV_CMPXCHG))
+	  UNSPECV_CMPXCHG_1))
+   (set (match_dup 1)
+	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG_2))
   (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:SWI
-            [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
-          (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_3))]
  "TARGET_CMPXCHG"
  "lock{%;} cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")

-(define_insn "sync_double_compare_and_swap<mode>"
-  [(set (match_operand:DCASMODE 0 "register_operand" "=A")
-	(match_operand:DCASMODE 1 "memory_operand" "+m"))
-   (set (match_dup 1)
-	(unspec_volatile:DCASMODE
-	  [(match_dup 1)
-	   (match_operand:DCASMODE 2 "register_operand" "A")
-	   (match_operand:<DCASHMODE> 3 "register_operand" "b")
-	   (match_operand:<DCASHMODE> 4 "register_operand" "c")]
-	  UNSPECV_CMPXCHG))
+;; For double-word compare and swap, we are obliged to play tricks with
+;; the input newval (op5:op6) because the Intel register numbering does
+;; not match the gcc register numbering, so the pair must be CX:BX.
+;; That said, in order to take advantage of possible lower-subreg opts,
+;; treat all of the integral operands in the same way.
+(define_insn "atomic_compare_and_swap_double<mode>"
+  [(set (match_operand:<DCASHMODE> 0 "register_operand" "=a")
+	(unspec_volatile:<DCASHMODE>
+	  [(match_operand:DCASMODE 2 "memory_operand" "+m")
+	   (match_operand:<DCASHMODE> 3 "register_operand" "0")
+	   (match_operand:<DCASHMODE> 4 "register_operand" "1")
+	   (match_operand:<DCASHMODE> 5 "register_operand" "b")
+	   (match_operand:<DCASHMODE> 6 "register_operand" "c")]
+	  UNSPECV_CMPXCHG_1))
+   (set (match_operand:<DCASHMODE> 1 "register_operand" "=d")
+	(unspec_volatile:<DCASHMODE> [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (match_dup 2)
+	(unspec_volatile:DCASMODE [(const_int 0)] UNSPECV_CMPXCHG_3))
   (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:DCASMODE
-            [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
-	    UNSPECV_CMPXCHG)
-          (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
  ""
-  "lock{%;} cmpxchg<doublemodesuffix>b\t%1")
+  "lock{%;} cmpxchg<doublemodesuffix>b\t%2")

-;; Theoretically we'd like to use constraint "r" (any reg) for operand
-;; 3, but that includes ecx.  If operand 3 and 4 are the same (like when
-;; the input is -1LL) GCC might chose to allocate operand 3 to ecx, like
-;; operand 4.  This breaks, as the xchg will move the PIC register contents
-;; to %ecx then --> boom.  Operands 3 and 4 really need to be different
-;; registers, which in this case means operand 3 must not be ecx.
-;; Instead of playing tricks with fake early clobbers or the like we
-;; just enumerate all regs possible here, which (as this is !TARGET_64BIT)
+;; Theoretically we'd like to use constraint "r" (any reg) for op5,
+;; but that includes ecx.  If op5 and op6 are the same (like when
+;; the input is -1LL) GCC might chose to allocate op5 to ecx, like
+;; op6.  This breaks, as the xchg will move the PIC register contents
+;; to %ecx then --> boom.  Operands 5 and 6 really need to be different
+;; registers, which in this case means op5 must not be ecx.  Instead
+;; of playing tricks with fake early clobbers or the like we just
+;; enumerate all regs possible here, which (as this is !TARGET_64BIT)
 ;; are just esi and edi.
-(define_insn "*sync_double_compare_and_swapdi_pic"
-  [(set (match_operand:DI 0 "register_operand" "=A")
-	(match_operand:DI 1 "cmpxchg8b_pic_memory_operand" "+m"))
-   (set (match_dup 1)
-	(unspec_volatile:DI
-	  [(match_dup 1)
-	   (match_operand:DI 2 "register_operand" "A")
-	   (match_operand:SI 3 "register_operand" "SD")
-	   (match_operand:SI 4 "register_operand" "c")]
-	  UNSPECV_CMPXCHG))
+(define_insn "*atomic_compare_and_swap_doubledi_pic"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec_volatile:SI
+	  [(match_operand:DI 2 "cmpxchg8b_pic_memory_operand" "+m")
+	   (match_operand:SI 3 "register_operand" "0")
+	   (match_operand:SI 4 "register_operand" "1")
+	   (match_operand:SI 5 "register_operand" "SD")
+	   (match_operand:SI 6 "register_operand" "c")]
+	  UNSPECV_CMPXCHG_1))
+   (set (match_operand:SI 1 "register_operand" "=d")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (match_dup 2)
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG_3))
   (set (reg:CCZ FLAGS_REG)
-	(compare:CCZ
-	  (unspec_volatile:DI
-	    [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
-	    UNSPECV_CMPXCHG)
-	  (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
  "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
-  "xchg{l}\t%%ebx, %3\;lock{%;} cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3")
+  "xchg{l}\t%%ebx, %5\;lock{%;} cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5")

 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic
 ;; additions of constants.
-(define_insn "sync_old_add<mode>"
+(define_insn "atomic_fetch_add<mode>"
  [(set (match_operand:SWI 0 "register_operand" "=<r>")
 	(unspec_volatile:SWI
-	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
+	  [(match_operand:SWI 1 "memory_operand" "+m")
+	   (match_operand:SI 3 "const_int_operand" "")]		;; model
+	  UNSPECV_XCHG))
   (set (match_dup 1)
 	(plus:SWI (match_dup 1)
 		  (match_operand:SWI 2 "nonmemory_operand" "0")))
@ -186,7 +370,9 @@
 	(match_operand:SWI 2 "const_int_operand" ""))
   (parallel [(set (match_dup 0)
 		   (unspec_volatile:SWI
-		     [(match_operand:SWI 1 "memory_operand" "")] UNSPECV_XCHG))
+		     [(match_operand:SWI 1 "memory_operand" "")
+		      (match_operand:SI 4 "const_int_operand" "")]
+		     UNSPECV_XCHG))
 	      (set (match_dup 1)
 		   (plus:SWI (match_dup 1)
 			     (match_dup 0)))
@ -199,17 +385,19 @@
      == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
   && !reg_overlap_mentioned_p (operands[0], operands[1])"
  [(parallel [(set (reg:CCZ FLAGS_REG)
-		   (compare:CCZ (unspec_volatile:SWI [(match_dup 1)]
-						     UNSPECV_XCHG)
-				(match_dup 3)))
+		   (compare:CCZ
+		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
+					  UNSPECV_XCHG)
+		     (match_dup 3)))
 	      (set (match_dup 1)
 		   (plus:SWI (match_dup 1)
 			     (match_dup 2)))])])

-(define_insn "*sync_old_add_cmp<mode>"
+(define_insn "*atomic_fetch_add_cmp<mode>"
  [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ (unspec_volatile:SWI
-		       [(match_operand:SWI 0 "memory_operand" "+m")]
+		       [(match_operand:SWI 0 "memory_operand" "+m")
+		        (match_operand:SI 3 "const_int_operand" "")]
 		       UNSPECV_XCHG)
 		     (match_operand:SWI 2 "const_int_operand" "i")))
   (set (match_dup 0)
@ -233,20 +421,24 @@
 })

 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
-(define_insn "sync_lock_test_and_set<mode>"
-  [(set (match_operand:SWI 0 "register_operand" "=<r>")
+;; In addition, it is always a full barrier, so we can ignore the memory model.
+(define_insn "atomic_exchange<mode>"
+  [(set (match_operand:SWI 0 "register_operand" "=<r>")		;; output
 	(unspec_volatile:SWI
-	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
+	  [(match_operand:SWI 1 "memory_operand" "+m")		;; memory
+	   (match_operand:SI 3 "const_int_operand" "")]		;; model
+	  UNSPECV_XCHG))
   (set (match_dup 1)
-	(match_operand:SWI 2 "register_operand" "0"))]
+	(match_operand:SWI 2 "register_operand" "0"))]		;; input
  ""
  "xchg{<imodesuffix>}\t{%1, %0|%0, %1}")

-(define_insn "sync_add<mode>"
+(define_insn "atomic_add<mode>"
  [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(plus:SWI (match_dup 0)
-		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
   (clobber (reg:CC FLAGS_REG))]
  ""
@ -265,11 +457,12 @@
  return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
 })

-(define_insn "sync_sub<mode>"
+(define_insn "atomic_sub<mode>"
  [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(minus:SWI (match_dup 0)
-		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
   (clobber (reg:CC FLAGS_REG))]
  ""
@ -282,14 +475,18 @@
 	return "lock{%;} inc{<imodesuffix>}\t%0";
    }

+  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+    return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
+
  return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
 })

-(define_insn "sync_<code><mode>"
+(define_insn "atomic_<code><mode>"
  [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(any_logic:SWI (match_dup 0)
-			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
   (clobber (reg:CC FLAGS_REG))]
  ""
--- a/gcc/coretypes.h
+++ b/gcc/coretypes.h
@ -181,5 +181,18 @@ union _dont_use_tree_here_;

 #endif

+/* Memory model types for the __atomic* builtins. 
+   This must match the order in libstdc++-v3/include/bits/atomic_base.h.  */
+enum memmodel
+{
+  MEMMODEL_RELAXED = 0,
+  MEMMODEL_CONSUME = 1,
+  MEMMODEL_ACQUIRE = 2,
+  MEMMODEL_RELEASE = 3,
+  MEMMODEL_ACQ_REL = 4,
+  MEMMODEL_SEQ_CST = 5,
+  MEMMODEL_LAST = 6
+};
+
 #endif /* coretypes.h */

--- a/gcc/cppbuiltin.c
+++ b/gcc/cppbuiltin.c
@ -66,6 +66,12 @@ define__GNUC__ (cpp_reader *pfile)
  cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
  cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
  cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
+  cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
+  cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
+  cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
+  cpp_define_formatted (pfile, "__ATOMIC_RELEASE=%d", MEMMODEL_RELEASE);
+  cpp_define_formatted (pfile, "__ATOMIC_ACQ_REL=%d", MEMMODEL_ACQ_REL);
+  cpp_define_formatted (pfile, "__ATOMIC_CONSUME=%d", MEMMODEL_CONSUME);
 }


--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@ -79,7 +79,8 @@ extensions, accepted by GCC in C90 mode and in C++.
 * Return Address::      Getting the return or frame address of a function.
 * Vector Extensions::   Using vector instructions through built-in functions.
 * Offsetof::            Special syntax for implementing @code{offsetof}.
-* Atomic Builtins::     Built-in functions for atomic memory access.
+* __sync Builtins::     Legacy built-in functions for atomic memory access.
+* __atomic Builtins::   Atomic built-in functions with memory model.
 * Object Size Checking:: Built-in functions for limited buffer overflow
                        checking.
 * Other Builtins::      Other built-in functions.
@ -6683,8 +6684,8 @@ is a suitable definition of the @code{offsetof} macro.  In C++, @var{type}
 may be dependent.  In either case, @var{member} may consist of a single
 identifier, or a sequence of member accesses and array references.

-@node Atomic Builtins
-@section Built-in functions for atomic memory access
+@node __sync Builtins
+@section Legacy __sync built-in functions for atomic memory access

 The following builtins are intended to be compatible with those described
 in the @cite{Intel Itanium Processor-specific Application Binary Interface},
@ -6816,6 +6817,238 @@ previous memory loads have been satisfied, but following memory reads
 are not prevented from being speculated to before the barrier.
@end table

+@node __atomic Builtins
+@section Built-in functions for memory model aware atomic operations
+
+The following built-in functions approximately match the requirements for
+C++11 memory model. Many are similar to the @samp{__sync} prefixed built-in
+functions, but all also have a memory model parameter.  These are all
+identified by being prefixed with @samp{__atomic}, and most are overloaded
+such that they work with multiple types.
+
+GCC will allow any integral scalar or pointer type that is 1, 2, 4, or 8
+bytes in length. 16-byte integral types are also allowed if
+@samp{__int128} (@pxref{__int128}) is supported by the architecture.
+
+Target architectures are encouraged to provide their own patterns for
+each of these built-in functions.  If no target is provided, the original 
+non-memory model set of @samp{__sync} atomic built-in functions will be
+utilized, along with any required synchronization fences surrounding it in
+order to achieve the proper behaviour.  Execution in this case is subject
+to the same restrictions as those built-in functions.
+
+If there is no pattern or mechanism to provide a lock free instruction
+sequence, a call is made to an external routine with the same parameters
+to be resolved at runtime.
+
+The four non-arithmetic functions (load, store, exchange, and 
+compare_exchange) all have a generic version as well.  This generic
+version will work on any data type.  If the data type size maps to one
+of the integral sizes which may have lock free support, the generic
+version will utilize the lock free built-in function.  Otherwise an
+external call is left to be resolved at runtime.  This external call will
+be the same format with the addition of a @samp{size_t} parameter inserted
+as the first parameter indicating the size of the object being pointed to.
+All objects must be the same size.
+
+There are 6 different memory models which can be specified.  These map
+to the same names in the C++11 standard.  Refer there or to the
+@uref{http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync,GCC wiki on
+atomic synchronization} for more detailed definitions.  These memory
+models integrate both barriers to code motion as well as synchronization
+requirements with other threads. These are listed in approximately
+ascending order of strength.
+
+@table  @code
+@item __ATOMIC_RELAXED
+No barriers or synchronization.
+@item __ATOMIC_CONSUME
+Data dependency only for both barrier and synchronization with another
+thread.
+@item __ATOMIC_ACQUIRE
+Barrier to hoisting of code and synchronizes with release (or stronger)
+semantic stores from another thread.
+@item __ATOMIC_RELEASE
+Barrier to sinking of code and synchronizes with acquire (or stronger)
+semantic loads from another thread.
+@item __ATOMIC_ACQ_REL
+Full barrier in both directions and synchronizes with acquire loads and
+release stores in another thread.
+@item __ATOMIC_SEQ_CST
+Full barrier in both directions and synchronizes with acquire loads and
+release stores in all threads.
+@end table
+
+When implementing patterns for these built-in functions , the memory model
+parameter can be ignored as long as the pattern implements the most
+restrictive @code{__ATOMIC_SEQ_CST} model.  Any of the other memory models
+will execute correctly with this memory model but they may not execute as
+efficiently as they could with a more appropriate implemention of the
+relaxed requirements.
+
+Note that the C++11 standard allows for the memory model parameter to be
+determined at runtime rather than at compile time.  These built-in
+functions will map any runtime value to @code{__ATOMIC_SEQ_CST} rather
+than invoke a runtime library call or inline a switch statement.  This is
+standard compliant, safe, and the simplest approach for now.
+
+@deftypefn {Built-in Function} @var{type} __atomic_load_n (@var{type} *ptr, int memmodel)
+This built-in function implements an atomic load operation.  It returns the
+contents of @code{*@var{ptr}}.
+
+The valid memory model variants are
+@code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, @code{__ATOMIC_ACQUIRE},
+and @code{__ATOMIC_CONSUME}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_load (@var{type} *ptr, @var{type} *ret, int memmodel)
+This is the generic version of an atomic load.  It will return the
+contents of @code{*@var{ptr}} in @code{*@var{ret}}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_store_n (@var{type} *ptr, @var{type} val, int memmodel)
+This built-in function implements an atomic store operation.  It writes 
+@code{@var{val}} into @code{*@var{ptr}}.  On targets which are limited,
+0 may be the only valid value. This mimics the behaviour of
+@code{__sync_lock_release} on such hardware.
+
+The valid memory model variants are
+@code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, and @code{__ATOMIC_RELEASE}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_store (@var{type} *ptr, @var{type} *val, int memmodel)
+This is the generic version of an atomic store.  It will store the value
+of @code{*@var{val}} into @code{*@var{ptr}}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} @var{type} __atomic_exchange_n (@var{type} *ptr, @var{type} val, int memmodel)
+This built-in function implements an atomic exchange operation.  It writes
+@var{val} into @code{*@var{ptr}}, and returns the previous contents of
+@code{*@var{ptr}}.
+
+On targets which are limited, a value of 1 may be the only valid value
+written.  This mimics the behaviour of @code{__sync_lock_test_and_set} on
+such hardware.
+
+The valid memory model variants are
+@code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, @code{__ATOMIC_ACQUIRE},
+@code{__ATOMIC_RELEASE}, and @code{__ATOMIC_ACQ_REL}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_exchange (@var{type} *ptr, @var{type} *val, @var{type} *ret, int memmodel)
+This is the generic version of an atomic exchange.  It will store the
+contents of @code{*@var{val}} into @code{*@var{ptr}}. The original value
+of @code{*@var{ptr}} will be copied into @code{*@var{ret}}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __atomic_compare_exchange_n (@var{type} *ptr, @var{type} *expected, @var{type} desired, bool weak, int success_memmodel, int failure_memmodel)
+This built-in function implements an atomic compare and exchange operation.
+This compares the contents of @code{*@var{ptr}} with the contents of
+@code{*@var{expected}} and if equal, writes @var{desired} into
+@code{*@var{ptr}}.  If they are not equal, the current contents of
+@code{*@var{ptr}} is written into @code{*@var{expected}}.
+
+True is returned if @code{*@var{desired}} is written into
+@code{*@var{ptr}} and the execution is considered to conform to the
+memory model specified by @var{success_memmodel}.  There are no
+restrictions on what memory model can be used here.
+
+False is returned otherwise, and the execution is considered to conform
+to @var{failure_memmodel}. This memory model cannot be
+@code{__ATOMIC_RELEASE} nor @code{__ATOMIC_ACQ_REL}.  It also cannot be a
+stronger model than that specified by @var{success_memmodel}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __atomic_compare_exchange (@var{type} *ptr, @var{type} *expected, @var{type} *desired, bool weak, int success_memmodel, int failure_memmodel)
+This built-in function implements the generic version of
+@code{__atomic_compare_exchange}.  The function is virtually identical to
+@code{__atomic_compare_exchange_n}, except the desired value is also a
+pointer.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} @var{type} __atomic_add_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_sub_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_and_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_xor_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_or_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_nand_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+These built-in functions perform the operation suggested by the name, and
+return the result of the operation. That is,
+
+@smallexample
+@{ *ptr @var{op}= val; return *ptr; @}
+@end smallexample
+
+All memory models are valid.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} @var{type} __atomic_fetch_add (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_fetch_sub (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_fetch_and (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_fetch_xor (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_fetch_or (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_fetch_nand (@var{type} *ptr, @var{type} val, int memmodel)
+These built-in functions perform the operation suggested by the name, and
+return the value that had previously been in @code{*@var{ptr}}.  That is,
+
+@smallexample
+@{ tmp = *ptr; *ptr @var{op}= val; return tmp; @}
+@end smallexample
+
+All memory models are valid.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_thread_fence (int memmodel)
+
+This built-in function acts as a synchronization fence between threads
+based on the specified memory model.
+
+All memory orders are valid.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_signal_fence (int memmodel)
+
+This built-in function acts as a synchronization fence between a thread
+and signal handlers based in the same thread.
+
+All memory orders are valid.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __atomic_always_lock_free (size_t size)
+
+This built-in function returns true if objects of size bytes will always
+generate lock free atomic instructions for the target architecture.
+Otherwise false is returned.
+
+size must resolve to a compile time constant.
+
+@smallexample
+if (_atomic_always_lock_free (sizeof (long long)))
+@end smallexample
+
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __atomic_is_lock_free (size_t size)
+
+This built-in function returns true if objects of size bytes will always
+generate lock free atomic instructions for the target architecture.  If
+it is not known to be lock free a call is made to a runtime routine named
+@code{__atomic_is_lock_free}.
+
+@end deftypefn
+
@node Object Size Checking
@section Object Size Checking Builtins
@findex __builtin_object_size
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@ -9163,11 +9163,26 @@ The maximum number of conditional stores paires that can be sunk.  Set to 0
 if either vectorization (@option{-ftree-vectorize}) or if-conversion
 (@option{-ftree-loop-if-convert}) is disabled.  The default is 2.

+@item allow-load-data-races
+Allow optimizers to introduce new data races on loads.
+Set to 1 to allow, otherwise to 0.  This option is enabled by default
+unless implicitly set by the @option{-fmemory-model=} option.
+
@item allow-store-data-races
 Allow optimizers to introduce new data races on stores.
 Set to 1 to allow, otherwise to 0.  This option is enabled by default
 unless implicitly set by the @option{-fmemory-model=} option.

+@item allow-packed-load-data-races
+Allow optimizers to introduce new data races on packed data loads.
+Set to 1 to allow, otherwise to 0.  This option is enabled by default
+unless implicitly set by the @option{-fmemory-model=} option.
+
+@item allow-packed-store-data-races
+Allow optimizers to introduce new data races on packed data stores.
+Set to 1 to allow, otherwise to 0.  This option is enabled by default
+unless implicitly set by the @option{-fmemory-model=} option.
+
@item case-values-threshold
 The smallest number of different values for which it is best to use a
 jump-table instead of a tree of conditional branches.  If the value is
@ -13180,7 +13195,8 @@ This option will enable GCC to use CMPXCHG16B instruction in generated code.
 CMPXCHG16B allows for atomic operations on 128-bit double quadword (or oword)
 data types.  This is useful for high resolution counters that could be updated
 by multiple processors (or cores).  This instruction is generated as part of
-atomic built-in functions: see @ref{Atomic Builtins} for details.
+atomic built-in functions: see @ref{__sync Builtins} or
+@ref{__atomic Builtins} for details.

@item -msahf
@opindex msahf
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@ -5699,6 +5699,155 @@ released only after all previous memory operations have completed.
 If this pattern is not defined, then a @code{memory_barrier} pattern
 will be emitted, followed by a store of the value to the memory operand.

+@cindex @code{atomic_compare_and_swap@var{mode}} instruction pattern
+@item @samp{atomic_compare_and_swap@var{mode}} 
+This pattern, if defined, emits code for an atomic compare-and-swap
+operation with memory model semantics.  Operand 2 is the memory on which
+the atomic operation is performed.  Operand 0 is an output operand which
+is set to true or false based on whether the operation succeeded.  Operand
+1 is an output operand which is set to the contents of the memory before
+the operation was attempted.  Operand 3 is the value that is expected to
+be in memory.  Operand 4 is the value to put in memory if the expected
+value is found there.  Operand 5 is set to 1 if this compare and swap is to
+be treated as a weak operation.  Operand 6 is the memory model to be used
+if the operation is a success.  Operand 7 is the memory model to be used
+if the operation fails.
+
+If memory referred to in operand 2 contains the value in operand 3, then
+operand 4 is stored in memory pointed to by operand 2 and fencing based on
+the memory model in operand 6 is issued.  
+
+If memory referred to in operand 2 does not contain the value in operand 3,
+then fencing based on the memory model in operand 7 is issued.
+
+If a target does not support weak compare-and-swap operations, or the port
+elects not to implement weak operations, the argument in operand 5 can be
+ignored.  Note a strong implementation must be provided.
+
+If this pattern is not provided, the @code{__atomic_compare_exchange}
+built-in functions will utilize the legacy @code{sync_compare_and_swap}
+pattern with an @code{__ATOMIC_SEQ_CST} memory model.
+
+@cindex @code{atomic_load@var{mode}} instruction pattern
+@item @samp{atomic_load@var{mode}}
+This pattern implements an atomic load operation with memory model
+semantics.  Operand 1 is the memory address being loaded from.  Operand 0
+is the result of the load.  Operand 2 is the memory model to be used for
+the load operation.
+
+If not present, the @code{__atomic_load} built-in function will either
+resort to a normal load with memory barriers, or a compare-and-swap
+operation if a normal load would not be atomic.
+
+@cindex @code{atomic_store@var{mode}} instruction pattern
+@item @samp{atomic_store@var{mode}}
+This pattern implements an atomic store operation with memory model
+semantics.  Operand 0 is the memory address being stored to.  Operand 1
+is the value to be written.  Operand 2 is the memory model to be used for
+the operation.
+
+If not present, the @code{__atomic_store} built-in function will attempt to
+perform a normal store and surround it with any required memory fences.  If
+the store would not be atomic, then an @code{__atomic_exchange} is
+attempted with the result being ignored.
+
+@cindex @code{atomic_exchange@var{mode}} instruction pattern
+@item @samp{atomic_exchange@var{mode}}
+This pattern implements an atomic exchange operation with memory model
+semantics.  Operand 1 is the memory location the operation is performed on.
+Operand 0 is an output operand which is set to the original value contained
+in the memory pointed to by operand 1.  Operand 2 is the value to be
+stored.  Operand 3 is the memory model to be used.
+
+If this pattern is not present, the built-in function
+@code{__atomic_exchange} will attempt to preform the operation with a
+compare and swap loop.
+
+@cindex @code{atomic_add@var{mode}} instruction pattern
+@cindex @code{atomic_sub@var{mode}} instruction pattern
+@cindex @code{atomic_or@var{mode}} instruction pattern
+@cindex @code{atomic_and@var{mode}} instruction pattern
+@cindex @code{atomic_xor@var{mode}} instruction pattern
+@cindex @code{atomic_nand@var{mode}} instruction pattern
+@item @samp{atomic_add@var{mode}}, @samp{atomic_sub@var{mode}}
+@itemx @samp{atomic_or@var{mode}}, @samp{atomic_and@var{mode}}
+@itemx @samp{atomic_xor@var{mode}}, @samp{atomic_nand@var{mode}}
+
+These patterns emit code for an atomic operation on memory with memory
+model semantics. Operand 0 is the memory on which the atomic operation is
+performed.  Operand 1 is the second operand to the binary operator.
+Operand 2 is the memory model to be used by the operation.
+
+If these patterns are not defined, attempts will be made to use legacy
+@code{sync} patterns, or equivilent patterns which return a result.  If
+none of these are available a compare-and-swap loop will be used.
+
+@cindex @code{atomic_fetch_add@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_sub@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_or@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_and@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_xor@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_nand@var{mode}} instruction pattern
+@item @samp{atomic_fetch_add@var{mode}}, @samp{atomic_fetch_sub@var{mode}}
+@itemx @samp{atomic_fetch_or@var{mode}}, @samp{atomic_fetch_and@var{mode}}
+@itemx @samp{atomic_fetch_xor@var{mode}}, @samp{atomic_fetch_nand@var{mode}}
+
+These patterns emit code for an atomic operation on memory with memory
+model semantics, and return the original value. Operand 0 is an output 
+operand which contains the value of the memory location before the 
+operation was performed.  Operand 1 is the memory on which the atomic 
+operation is performed.  Operand 2 is the second operand to the binary
+operator.  Operand 3 is the memory model to be used by the operation.
+
+If these patterns are not defined, attempts will be made to use legacy
+@code{sync} patterns.  If none of these are available a compare-and-swap
+loop will be used.
+
+@cindex @code{atomic_add_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_sub_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_or_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_and_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_xor_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_nand_fetch@var{mode}} instruction pattern
+@item @samp{atomic_add_fetch@var{mode}}, @samp{atomic_sub_fetch@var{mode}}
+@itemx @samp{atomic_or_fetch@var{mode}}, @samp{atomic_and_fetch@var{mode}}
+@itemx @samp{atomic_xor_fetch@var{mode}}, @samp{atomic_nand_fetch@var{mode}}
+
+These patterns emit code for an atomic operation on memory with memory
+model semantics and return the result after the operation is performed.
+Operand 0 is an output operand which contains the value after the
+operation.  Operand 1 is the memory on which the atomic operation is
+performed.  Operand 2 is the second operand to the binary operator.
+Operand 3 is the memory model to be used by the operation.
+
+If these patterns are not defined, attempts will be made to use legacy
+@code{sync} patterns, or equivilent patterns which return the result before
+the operation followed by the arithmetic operation required to produce the
+result.  If none of these are available a compare-and-swap loop will be
+used.
+
+@cindex @code{mem_thread_fence@var{mode}} instruction pattern
+@item @samp{mem_thread_fence@var{mode}}
+This pattern emits code required to implement a thread fence with
+memory model semantics.  Operand 0 is the memory model to be used.
+
+If this pattern is not specified, all memory models except
+@code{__ATOMIC_RELAXED} will result in issuing a @code{sync_synchronize}
+barrier pattern.
+
+@cindex @code{mem_signal_fence@var{mode}} instruction pattern
+@item @samp{mem_signal_fence@var{mode}}
+This pattern emits code required to implement a signal fence with
+memory model semantics.  Operand 0 is the memory model to be used.
+
+This pattern should impact the compiler optimizers the same way that
+mem_signal_fence does, but it does not need to issue any barrier
+instructions.
+
+If this pattern is not specified, all memory models except
+@code{__ATOMIC_RELAXED} will result in issuing a @code{sync_synchronize}
+barrier pattern.
+
@cindex @code{stack_protect_set} instruction pattern
@item @samp{stack_protect_set}

--- a/gcc/expr.h
+++ b/gcc/expr.h
@ -212,11 +212,17 @@ int can_conditionally_move_p (enum machine_mode mode);
 rtx emit_conditional_add (rtx, enum rtx_code, rtx, rtx, enum machine_mode,
 			  rtx, rtx, enum machine_mode, int);

-rtx expand_val_compare_and_swap (rtx, rtx, rtx, rtx);
-rtx expand_bool_compare_and_swap (rtx, rtx, rtx, rtx);
 rtx expand_sync_operation (rtx, rtx, enum rtx_code);
 rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx);
-rtx expand_sync_lock_test_and_set (rtx, rtx, rtx);
+
+rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel);
+rtx expand_atomic_load (rtx, rtx, enum memmodel);
+rtx expand_atomic_store (rtx, rtx, enum memmodel);
+rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, 
+			      bool);
+void expand_atomic_thread_fence (enum memmodel);
+void expand_atomic_signal_fence (enum memmodel);
+

 /* Functions from expmed.c:  */

@ -248,6 +254,7 @@ extern void expand_builtin_setjmp_receiver (rtx);
 extern rtx expand_builtin_saveregs (void);
 extern void expand_builtin_trap (void);
 extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
+extern void expand_builtin_mem_thread_fence (enum memmodel);

 /* Functions from expr.c:  */

--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@ -1,3 +1,16 @@
+2011-11-06  Andrew MacLeod  <amacleod@redhat.com>
+	    Aldy Hernandez  <aldyh@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* types.def: (BT_SIZE, BT_CONST_VOLATILE_PTR, BT_FN_VOID_INT,
+	BT_FN_I{1,2,4,8,16}_CONST_VPTR_INT, BT_FN_VOID_VPTR_INT,
+	BT_FN_BOOL_VPTR_INT, BT_FN_BOOL_SIZE_CONST_VPTR,
+	BT_FN_VOID_VPTR_I{1,2,4,8,16}_INT, BT_FN_VOID_SIZE_VPTR_PTR_INT,
+	BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT,
+	BT_FN_BOOL_VPTR_PTR_I{1,2,4,8,16}_BOOL_INT_INT,
+	BT_FN_I{1,2,4,8,16}_VPTR_I{1,2,4,8,16}_INT): New types.
+
 2011-11-04  Mikael Morin  <mikael@gcc.gnu.org>

 	PR fortran/43829
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@ -57,6 +57,7 @@ DEF_PRIMITIVE_TYPE (BT_UINT, unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_LONG, long_integer_type_node)
 DEF_PRIMITIVE_TYPE (BT_ULONGLONG, long_long_unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1))
+DEF_PRIMITIVE_TYPE (BT_SIZE, size_type_node)

 DEF_PRIMITIVE_TYPE (BT_I1, builtin_type_for_size (BITS_PER_UNIT*1, 1))
 DEF_PRIMITIVE_TYPE (BT_I2, builtin_type_for_size (BITS_PER_UNIT*2, 1))
@ -70,7 +71,10 @@ DEF_PRIMITIVE_TYPE (BT_VOLATILE_PTR,
                    build_pointer_type
                     (build_qualified_type (void_type_node,
                                            TYPE_QUAL_VOLATILE)))
-
+DEF_PRIMITIVE_TYPE (BT_CONST_VOLATILE_PTR,
+		    build_pointer_type
+		     (build_qualified_type (void_type_node,
+					  TYPE_QUAL_VOLATILE|TYPE_QUAL_CONST)))
 DEF_POINTER_TYPE (BT_PTR_LONG, BT_LONG)
 DEF_POINTER_TYPE (BT_PTR_ULONGLONG, BT_ULONGLONG)
 DEF_POINTER_TYPE (BT_PTR_PTR, BT_PTR)
@ -85,6 +89,8 @@ DEF_FUNCTION_TYPE_1 (BT_FN_VOID_PTRPTR, BT_VOID, BT_PTR_PTR)
 DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VPTR, BT_VOID, BT_VOLATILE_PTR)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT_UINT, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT)
+

 DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR, BT_FN_VOID_PTR)

@ -98,6 +104,21 @@ DEF_FUNCTION_TYPE_2 (BT_FN_I4_VPTR_I4, BT_I4, BT_VOLATILE_PTR, BT_I4)
 DEF_FUNCTION_TYPE_2 (BT_FN_I8_VPTR_I8, BT_I8, BT_VOLATILE_PTR, BT_I8)
 DEF_FUNCTION_TYPE_2 (BT_FN_I16_VPTR_I16, BT_I16, BT_VOLATILE_PTR, BT_I16)
 DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_PTR, BT_VOID, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_2 (BT_FN_I1_CONST_VPTR_INT, BT_I1, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I2_CONST_VPTR_INT, BT_I2, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I4_CONST_VPTR_INT, BT_I4, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I8_CONST_VPTR_INT, BT_I8, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I16_CONST_VPTR_INT, BT_I16, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_VOID_VPTR_INT, BT_VOID, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT, BT_BOOL, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR)
+

 DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR)

@ -119,15 +140,31 @@ DEF_FUNCTION_TYPE_3 (BT_FN_I16_VPTR_I16_I16, BT_I16, BT_VOLATILE_PTR,
 		     BT_I16, BT_I16)
 DEF_FUNCTION_TYPE_3 (BT_FN_VOID_OMPFN_PTR_UINT, BT_VOID, BT_PTR_FN_VOID_PTR,
                     BT_PTR, BT_UINT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I1_VPTR_I1_INT, BT_I1, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I2_VPTR_I2_INT, BT_I2, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I4_VPTR_I4_INT, BT_I4, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I8_VPTR_I8_INT, BT_I8, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I16_VPTR_I16_INT, BT_I16, BT_VOLATILE_PTR, BT_I16, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I1_INT, BT_VOID, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT)

 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
                     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR,
 		     BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_INT)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR, BT_PTR, BT_INT)

 DEF_FUNCTION_TYPE_5 (BT_FN_BOOL_LONG_LONG_LONG_LONGPTR_LONGPTR,
                     BT_BOOL, BT_LONG, BT_LONG, BT_LONG,
 		     BT_PTR_LONG, BT_PTR_LONG)
+DEF_FUNCTION_TYPE_5 (BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT)

 DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR,
                     BT_BOOL, BT_LONG, BT_LONG, BT_LONG, BT_LONG,
@ -138,6 +175,23 @@ DEF_FUNCTION_TYPE_6 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
 DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
 		     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
 		     BT_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I1_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I1, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I2_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I2, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I4_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I4, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I8_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I8, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I16_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I16, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_SIZE_VPTR_PTR_PTR_INT_INT, BT_BOOL, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT)

 DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
                     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@ -243,6 +243,28 @@ static const char * const optabs[] =
  "set_direct_optab_handler (sync_compare_and_swap_optab, $A, CODE_FOR_$(sync_compare_and_swap$I$a$))",
  "set_direct_optab_handler (sync_lock_test_and_set_optab, $A, CODE_FOR_$(sync_lock_test_and_set$I$a$))",
  "set_direct_optab_handler (sync_lock_release_optab, $A, CODE_FOR_$(sync_lock_release$I$a$))",
+  "set_direct_optab_handler (atomic_exchange_optab, $A, CODE_FOR_$(atomic_exchange$I$a$))",
+  "set_direct_optab_handler (atomic_compare_and_swap_optab, $A, CODE_FOR_$(atomic_compare_and_swap$I$a$))",
+  "set_direct_optab_handler (atomic_load_optab, $A, CODE_FOR_$(atomic_load$I$a$))",
+  "set_direct_optab_handler (atomic_store_optab, $A, CODE_FOR_$(atomic_store$I$a$))",
+  "set_direct_optab_handler (atomic_add_fetch_optab, $A, CODE_FOR_$(atomic_add_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_sub_fetch_optab, $A, CODE_FOR_$(atomic_sub_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_and_fetch_optab, $A, CODE_FOR_$(atomic_and_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_nand_fetch_optab, $A, CODE_FOR_$(atomic_nand_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_xor_fetch_optab, $A, CODE_FOR_$(atomic_xor_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_or_fetch_optab, $A, CODE_FOR_$(atomic_or_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_add_optab, $A, CODE_FOR_$(atomic_fetch_add$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_sub_optab, $A, CODE_FOR_$(atomic_fetch_sub$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_and_optab, $A, CODE_FOR_$(atomic_fetch_and$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_nand_optab, $A, CODE_FOR_$(atomic_fetch_nand$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_xor_optab, $A, CODE_FOR_$(atomic_fetch_xor$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_or_optab, $A, CODE_FOR_$(atomic_fetch_or$I$a$))",
+  "set_direct_optab_handler (atomic_add_optab, $A, CODE_FOR_$(atomic_add$I$a$))",
+  "set_direct_optab_handler (atomic_sub_optab, $A, CODE_FOR_$(atomic_sub$I$a$))",
+  "set_direct_optab_handler (atomic_and_optab, $A, CODE_FOR_$(atomic_and$I$a$))",
+  "set_direct_optab_handler (atomic_nand_optab, $A, CODE_FOR_$(atomic_nand$I$a$))",
+  "set_direct_optab_handler (atomic_xor_optab, $A, CODE_FOR_$(atomic_xor$I$a$))",
+  "set_direct_optab_handler (atomic_or_optab, $A, CODE_FOR_$(atomic_or$I$a$))",
  "set_optab_handler (vec_set_optab, $A, CODE_FOR_$(vec_set$a$))",
  "set_optab_handler (vec_extract_optab, $A, CODE_FOR_$(vec_extract$a$))",
  "set_optab_handler (vec_extract_even_optab, $A, CODE_FOR_$(vec_extract_even$a$))",
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@ -4998,7 +4998,7 @@ expand_omp_atomic_store (basic_block load_bb, tree addr)
 }

 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
-   operation as a __sync_fetch_and_op builtin.  INDEX is log2 of the
+   operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
   size of the data type, and thus usable to find the index of the builtin
   decl.  Returns false if the expression is not of the proper form.  */

@ -5009,13 +5009,14 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
 {
  enum built_in_function oldbase, newbase, tmpbase;
  tree decl, itype, call;
-  direct_optab optab, oldoptab, newoptab;
  tree lhs, rhs;
  basic_block store_bb = single_succ (load_bb);
  gimple_stmt_iterator gsi;
  gimple stmt;
  location_t loc;
+  enum tree_code code;
  bool need_old, need_new;
+  enum machine_mode imode;

  /* We expect to find the following sequences:

@ -5047,47 +5048,34 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
    return false;

  /* Check for one of the supported fetch-op operations.  */
-  switch (gimple_assign_rhs_code (stmt))
+  code = gimple_assign_rhs_code (stmt);
+  switch (code)
    {
    case PLUS_EXPR:
    case POINTER_PLUS_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_ADD_N;
-      newbase = BUILT_IN_SYNC_ADD_AND_FETCH_N;
-      optab = sync_add_optab;
-      oldoptab = sync_old_add_optab;
-      newoptab = sync_new_add_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
+      newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
      break;
    case MINUS_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_SUB_N;
-      newbase = BUILT_IN_SYNC_SUB_AND_FETCH_N;
-      optab = sync_add_optab;
-      oldoptab = sync_old_add_optab;
-      newoptab = sync_new_add_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
+      newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
      break;
    case BIT_AND_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_AND_N;
-      newbase = BUILT_IN_SYNC_AND_AND_FETCH_N;
-      optab = sync_and_optab;
-      oldoptab = sync_old_and_optab;
-      newoptab = sync_new_and_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
+      newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
      break;
    case BIT_IOR_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_OR_N;
-      newbase = BUILT_IN_SYNC_OR_AND_FETCH_N;
-      optab = sync_ior_optab;
-      oldoptab = sync_old_ior_optab;
-      newoptab = sync_new_ior_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
+      newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
      break;
    case BIT_XOR_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_XOR_N;
-      newbase = BUILT_IN_SYNC_XOR_AND_FETCH_N;
-      optab = sync_xor_optab;
-      oldoptab = sync_old_xor_optab;
-      newoptab = sync_new_xor_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
+      newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
      break;
    default:
      return false;
    }
+
  /* Make sure the expression is of the proper form.  */
  if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
    rhs = gimple_assign_rhs2 (stmt);
@ -5103,37 +5091,25 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
  if (decl == NULL_TREE)
    return false;
  itype = TREE_TYPE (TREE_TYPE (decl));
+  imode = TYPE_MODE (itype);

-  if (need_new)
-    {
-      /* expand_sync_fetch_operation can always compensate when interested
-	 in the new value.  */
-      if (direct_optab_handler (newoptab, TYPE_MODE (itype))
-	  == CODE_FOR_nothing
-	  && direct_optab_handler (oldoptab, TYPE_MODE (itype))
-	     == CODE_FOR_nothing)
-	return false;
-    }
-  else if (need_old)
-    {
-      /* When interested in the old value, expand_sync_fetch_operation
-	 can compensate only if the operation is reversible.  AND and OR
-	 are not reversible.  */
-      if (direct_optab_handler (oldoptab, TYPE_MODE (itype))
-	  == CODE_FOR_nothing
-	  && (oldbase == BUILT_IN_SYNC_FETCH_AND_AND_N
-	      || oldbase == BUILT_IN_SYNC_FETCH_AND_OR_N
-	      || direct_optab_handler (newoptab, TYPE_MODE (itype))
-		 == CODE_FOR_nothing))
-	return false;
-    }
-  else if (direct_optab_handler (optab, TYPE_MODE (itype)) == CODE_FOR_nothing)
+  /* We could test all of the various optabs involved, but the fact of the
+     matter is that (with the exception of i486 vs i586 and xadd) all targets
+     that support any atomic operaton optab also implements compare-and-swap.
+     Let optabs.c take care of expanding any compare-and-swap loop.  */
+  if (!can_compare_and_swap_p (imode))
    return false;

  gsi = gsi_last_bb (load_bb);
  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
-  call = build_call_expr_loc (loc, decl, 2, addr,
-			      fold_convert_loc (loc, itype, rhs));
+
+  /* OpenMP does not imply any barrier-like semantics on its atomic ops.
+     It only requires that the operation happen atomically.  Thus we can
+     use the RELAXED memory model.  */
+  call = build_call_expr_loc (loc, decl, 3, addr,
+			      fold_convert_loc (loc, itype, rhs),
+			      build_int_cst (NULL, MEMMODEL_RELAXED));
+
  if (need_old || need_new)
    {
      lhs = need_old ? loaded_val : stored_val;
@ -5182,6 +5158,8 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
  edge e;
  enum built_in_function fncode;

+  /* ??? We need a non-pointer interface to __atomic_compare_exchange in
+     order to use the RELAXED memory model effectively.  */
  fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
 				    + index + 1);
  cmpxchg = builtin_decl_explicit (fncode);
@ -5190,8 +5168,7 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
  type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
  itype = TREE_TYPE (TREE_TYPE (cmpxchg));

-  if (direct_optab_handler (sync_compare_and_swap_optab, TYPE_MODE (itype))
-      == CODE_FOR_nothing)
+  if (!can_compare_and_swap_p (TYPE_MODE (itype)))
    return false;

  /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@ -7162,43 +7162,25 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
 }


-/* This is an internal subroutine of the other compare_and_swap expanders.
-   MEM, OLD_VAL and NEW_VAL are as you'd expect for a compare-and-swap
-   operation.  TARGET is an optional place to store the value result of
-   the operation.  ICODE is the particular instruction to expand.  Return
-   the result of the operation.  */
+/* Return true if there is a compare_and_swap pattern.  */

-static rtx
-expand_val_compare_and_swap_1 (rtx mem, rtx old_val, rtx new_val,
-			       rtx target, enum insn_code icode)
+bool
+can_compare_and_swap_p (enum machine_mode mode)
 {
-  struct expand_operand ops[4];
-  enum machine_mode mode = GET_MODE (mem);
+  enum insn_code icode;

-  create_output_operand (&ops[0], target, mode);
-  create_fixed_operand (&ops[1], mem);
-  /* OLD_VAL and NEW_VAL may have been promoted to a wider mode.
-     Shrink them if so.  */
-  create_convert_operand_to (&ops[2], old_val, mode, true);
-  create_convert_operand_to (&ops[3], new_val, mode, true);
-  if (maybe_expand_insn (icode, 4, ops))
-    return ops[0].value;
-  return NULL_RTX;
-}
+  /* Check for __sync_compare_and_swap.  */
+  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+      return true;

-/* Expand a compare-and-swap operation and return its value.  */
+  /* Check for __atomic_compare_and_swap.  */
+  icode = direct_optab_handler (atomic_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+      return true;

-rtx
-expand_val_compare_and_swap (rtx mem, rtx old_val, rtx new_val, rtx target)
-{
-  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode
-    = direct_optab_handler (sync_compare_and_swap_optab, mode);
-
-  if (icode == CODE_FOR_nothing)
-    return NULL_RTX;
-
-  return expand_val_compare_and_swap_1 (mem, old_val, new_val, target, icode);
+  /* No inline compare and swap.  */
+  return false;
 }

 /* Helper function to find the MODE_CC set in a sync_compare_and_swap
@ -7216,58 +7198,6 @@ find_cc_set (rtx x, const_rtx pat, void *data)
    }
 }

-/* Expand a compare-and-swap operation and store true into the result if
-   the operation was successful and false otherwise.  Return the result.
-   Unlike other routines, TARGET is not optional.  */
-
-rtx
-expand_bool_compare_and_swap (rtx mem, rtx old_val, rtx new_val, rtx target)
-{
-  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode;
-  rtx subtarget, seq, cc_reg;
-
-  /* If the target supports a compare-and-swap pattern that simultaneously
-     sets some flag for success, then use it.  Otherwise use the regular
-     compare-and-swap and follow that immediately with a compare insn.  */
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
-  if (icode == CODE_FOR_nothing)
-    return NULL_RTX;
-
-  do_pending_stack_adjust ();
-  do
-    {
-      start_sequence ();
-      subtarget = expand_val_compare_and_swap_1 (mem, old_val, new_val,
-					         NULL_RTX, icode);
-      cc_reg = NULL_RTX;
-      if (subtarget == NULL_RTX)
-	{
-	  end_sequence ();
-	  return NULL_RTX;
-	}
-
-      if (have_insn_for (COMPARE, CCmode))
-	note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
-      seq = get_insns ();
-      end_sequence ();
-
-      /* We might be comparing against an old value.  Try again. :-(  */
-      if (!cc_reg && MEM_P (old_val))
-	{
-	  seq = NULL_RTX;
-	  old_val = force_reg (mode, old_val);
-        }
-    }
-  while (!seq);
-
-  emit_insn (seq);
-  if (cc_reg)
-    return emit_store_flag_force (target, EQ, cc_reg, const0_rtx, VOIDmode, 0, 1);
-  else
-    return emit_store_flag_force (target, EQ, subtarget, old_val, VOIDmode, 1, 1);
-}
-
 /* This is a helper function for the other atomic operations.  This function
   emits a loop that contains SEQ that iterates until a compare-and-swap
   operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
@ -7281,8 +7211,7 @@ static bool
 expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
 {
  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode;
-  rtx label, cmp_reg, subtarget, cc_reg;
+  rtx label, cmp_reg, success, oldval;

  /* The loop we want to generate looks like

@ -7290,8 +7219,8 @@ expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
      label:
        old_reg = cmp_reg;
 	seq;
-	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
-	if (cmp_reg != old_reg)
+	(success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
+	if (success)
 	  goto label;

     Note that we only do the plain load from memory once.  Subsequent
@ -7306,309 +7235,64 @@ expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
  if (seq)
    emit_insn (seq);

-  /* If the target supports a compare-and-swap pattern that simultaneously
-     sets some flag for success, then use it.  Otherwise use the regular
-     compare-and-swap and follow that immediately with a compare insn.  */
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
-  if (icode == CODE_FOR_nothing)
+  success = NULL_RTX;
+  oldval = cmp_reg;
+  if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
+				       new_reg, false, MEMMODEL_SEQ_CST,
+				       MEMMODEL_RELAXED))
    return false;

-  subtarget = expand_val_compare_and_swap_1 (mem, old_reg, new_reg,
-					     cmp_reg, icode);
-  if (subtarget == NULL_RTX)
-    return false;
-
-  cc_reg = NULL_RTX;
-  if (have_insn_for (COMPARE, CCmode))
-    note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
-  if (cc_reg)
-    {
-      cmp_reg = cc_reg;
-      old_reg = const0_rtx;
-    }
-  else
-    {
-      if (subtarget != cmp_reg)
-	emit_move_insn (cmp_reg, subtarget);
-    }
+  if (oldval != cmp_reg)
+    emit_move_insn (cmp_reg, oldval);

  /* ??? Mark this jump predicted not taken?  */
-  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, const0_rtx, GET_MODE (cmp_reg), 1,
-			   label);
+  emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
+			   GET_MODE (success), 1, label);
  return true;
 }

-/* This function generates the atomic operation MEM CODE= VAL.  In this
-   case, we do not care about any resulting value.  Returns NULL if we
-   cannot generate the operation.  */
+
+/* This function expands the atomic exchange operation:
+   atomically store VAL in MEM and return the previous value in MEM.
+
+   MEMMODEL is the memory model variant to use.
+   TARGET is an option place to stick the return value.  */

 rtx
-expand_sync_operation (rtx mem, rtx val, enum rtx_code code)
+expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
 {
  enum machine_mode mode = GET_MODE (mem);
  enum insn_code icode;
-  rtx insn;
+  rtx last_insn;

-  /* Look to see if the target supports the operation directly.  */
-  switch (code)
-    {
-    case PLUS:
-      icode = direct_optab_handler (sync_add_optab, mode);
-      break;
-    case IOR:
-      icode = direct_optab_handler (sync_ior_optab, mode);
-      break;
-    case XOR:
-      icode = direct_optab_handler (sync_xor_optab, mode);
-      break;
-    case AND:
-      icode = direct_optab_handler (sync_and_optab, mode);
-      break;
-    case NOT:
-      icode = direct_optab_handler (sync_nand_optab, mode);
-      break;
-
-    case MINUS:
-      icode = direct_optab_handler (sync_sub_optab, mode);
-      if (icode == CODE_FOR_nothing || CONST_INT_P (val))
-	{
-	  icode = direct_optab_handler (sync_add_optab, mode);
-	  if (icode != CODE_FOR_nothing)
-	    {
-	      val = expand_simple_unop (mode, NEG, val, NULL_RTX, 1);
-	      code = PLUS;
-	    }
-	}
-      break;
-
-    default:
-      gcc_unreachable ();
-    }
-
-  /* Generate the direct operation, if present.  */
+  /* If the target supports the exchange directly, great.  */
+  icode = direct_optab_handler (atomic_exchange_optab, mode);
  if (icode != CODE_FOR_nothing)
    {
-      struct expand_operand ops[2];
-
-      create_fixed_operand (&ops[0], mem);
-      /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
-      create_convert_operand_to (&ops[1], val, mode, true);
-      if (maybe_expand_insn (icode, 2, ops))
-	return const0_rtx;
-    }
-
-  /* Failing that, generate a compare-and-swap loop in which we perform the
-     operation with normal arithmetic instructions.  */
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing)
-    {
-      rtx t0 = gen_reg_rtx (mode), t1;
-
-      start_sequence ();
-
-      t1 = t0;
-      if (code == NOT)
-	{
-	  t1 = expand_simple_binop (mode, AND, t1, val, NULL_RTX,
-				    true, OPTAB_LIB_WIDEN);
-	  t1 = expand_simple_unop (mode, code, t1, NULL_RTX, true);
-	}
-      else
-	t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX,
-				  true, OPTAB_LIB_WIDEN);
-      insn = get_insns ();
-      end_sequence ();
-
-      if (t1 != NULL && expand_compare_and_swap_loop (mem, t0, t1, insn))
-	return const0_rtx;
-    }
-
-  return NULL_RTX;
-}
-
-/* This function generates the atomic operation MEM CODE= VAL.  In this
-   case, we do care about the resulting value: if AFTER is true then
-   return the value MEM holds after the operation, if AFTER is false
-   then return the value MEM holds before the operation.  TARGET is an
-   optional place for the result value to be stored.  */
-
-rtx
-expand_sync_fetch_operation (rtx mem, rtx val, enum rtx_code code,
-			     bool after, rtx target)
-{
-  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code old_code, new_code, icode;
-  bool compensate;
-  rtx insn;
-
-  /* Look to see if the target supports the operation directly.  */
-  switch (code)
-    {
-    case PLUS:
-      old_code = direct_optab_handler (sync_old_add_optab, mode);
-      new_code = direct_optab_handler (sync_new_add_optab, mode);
-      break;
-    case IOR:
-      old_code = direct_optab_handler (sync_old_ior_optab, mode);
-      new_code = direct_optab_handler (sync_new_ior_optab, mode);
-      break;
-    case XOR:
-      old_code = direct_optab_handler (sync_old_xor_optab, mode);
-      new_code = direct_optab_handler (sync_new_xor_optab, mode);
-      break;
-    case AND:
-      old_code = direct_optab_handler (sync_old_and_optab, mode);
-      new_code = direct_optab_handler (sync_new_and_optab, mode);
-      break;
-    case NOT:
-      old_code = direct_optab_handler (sync_old_nand_optab, mode);
-      new_code = direct_optab_handler (sync_new_nand_optab, mode);
-      break;
-
-    case MINUS:
-      old_code = direct_optab_handler (sync_old_sub_optab, mode);
-      new_code = direct_optab_handler (sync_new_sub_optab, mode);
-      if ((old_code == CODE_FOR_nothing && new_code == CODE_FOR_nothing)
-          || CONST_INT_P (val))
-	{
-	  old_code = direct_optab_handler (sync_old_add_optab, mode);
-	  new_code = direct_optab_handler (sync_new_add_optab, mode);
-	  if (old_code != CODE_FOR_nothing || new_code != CODE_FOR_nothing)
-	    {
-	      val = expand_simple_unop (mode, NEG, val, NULL_RTX, 1);
-	      code = PLUS;
-	    }
-	}
-      break;
-
-    default:
-      gcc_unreachable ();
-    }
-
-  /* If the target does supports the proper new/old operation, great.  But
-     if we only support the opposite old/new operation, check to see if we
-     can compensate.  In the case in which the old value is supported, then
-     we can always perform the operation again with normal arithmetic.  In
-     the case in which the new value is supported, then we can only handle
-     this in the case the operation is reversible.  */
-  compensate = false;
-  if (after)
-    {
-      icode = new_code;
-      if (icode == CODE_FOR_nothing)
-	{
-	  icode = old_code;
-	  if (icode != CODE_FOR_nothing)
-	    compensate = true;
-	}
-    }
-  else
-    {
-      icode = old_code;
-      if (icode == CODE_FOR_nothing
-	  && (code == PLUS || code == MINUS || code == XOR))
-	{
-	  icode = new_code;
-	  if (icode != CODE_FOR_nothing)
-	    compensate = true;
-	}
-    }
-
-  /* If we found something supported, great.  */
-  if (icode != CODE_FOR_nothing)
-    {
-      struct expand_operand ops[3];
+      struct expand_operand ops[4];

      create_output_operand (&ops[0], target, mode);
      create_fixed_operand (&ops[1], mem);
      /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
      create_convert_operand_to (&ops[2], val, mode, true);
-      if (maybe_expand_insn (icode, 3, ops))
-	{
-	  target = ops[0].value;
-	  val = ops[2].value;
-	  /* If we need to compensate for using an operation with the
-	     wrong return value, do so now.  */
-	  if (compensate)
-	    {
-	      if (!after)
-		{
-		  if (code == PLUS)
-		    code = MINUS;
-		  else if (code == MINUS)
-		    code = PLUS;
-		}
-
-	      if (code == NOT)
-		{
-		  target = expand_simple_binop (mode, AND, target, val,
-						NULL_RTX, true,
-						OPTAB_LIB_WIDEN);
-		  target = expand_simple_unop (mode, code, target,
-					       NULL_RTX, true);
-		}
-	      else
-		target = expand_simple_binop (mode, code, target, val,
-					      NULL_RTX, true,
-					      OPTAB_LIB_WIDEN);
-	    }
-
-	  return target;
-	}
+      create_integer_operand (&ops[3], model);
+      if (maybe_expand_insn (icode, 4, ops))
+	return ops[0].value;
    }

-  /* Failing that, generate a compare-and-swap loop in which we perform the
-     operation with normal arithmetic instructions.  */
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing)
-    {
-      rtx t0 = gen_reg_rtx (mode), t1;
+  /* Legacy sync_lock_test_and_set works the same, but is only defined as an 
+     acquire barrier.  If the pattern exists, and the memory model is stronger
+     than acquire, add a release barrier before the instruction.
+     The barrier is not needed if sync_lock_test_and_set doesn't exist since
+     it will expand into a compare-and-swap loop.  */

-      if (!target || !register_operand (target, mode))
-	target = gen_reg_rtx (mode);
-
-      start_sequence ();
-
-      if (!after)
-	emit_move_insn (target, t0);
-      t1 = t0;
-      if (code == NOT)
-	{
-	  t1 = expand_simple_binop (mode, AND, t1, val, NULL_RTX,
-				    true, OPTAB_LIB_WIDEN);
-	  t1 = expand_simple_unop (mode, code, t1, NULL_RTX, true);
-	}
-      else
-	t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX,
-				  true, OPTAB_LIB_WIDEN);
-      if (after)
-	emit_move_insn (target, t1);
-
-      insn = get_insns ();
-      end_sequence ();
-
-      if (t1 != NULL && expand_compare_and_swap_loop (mem, t0, t1, insn))
-	return target;
-    }
-
-  return NULL_RTX;
-}
-
-/* This function expands a test-and-set operation.  Ideally we atomically
-   store VAL in MEM and return the previous value in MEM.  Some targets
-   may not support this operation and only support VAL with the constant 1;
-   in this case while the return value will be 0/1, but the exact value
-   stored in MEM is target defined.  TARGET is an option place to stick
-   the return value.  */
-
-rtx
-expand_sync_lock_test_and_set (rtx mem, rtx val, rtx target)
-{
-  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode;
-
-  /* If the target supports the test-and-set directly, great.  */
  icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
+  last_insn = get_last_insn ();
+  if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || 
+				      model == MEMMODEL_RELEASE ||
+				      model == MEMMODEL_ACQ_REL))
+    expand_builtin_mem_thread_fence (model);
+
  if (icode != CODE_FOR_nothing)
    {
      struct expand_operand ops[3];
@ -7621,9 +7305,13 @@ expand_sync_lock_test_and_set (rtx mem, rtx val, rtx target)
 	return ops[0].value;
    }

+  /* Remove any fence we may have inserted since a compare and swap loop is a
+     full memory barrier.  */
+  if (last_insn != get_last_insn ())
+    delete_insns_since (last_insn);
+
  /* Otherwise, use a compare-and-swap loop for the exchange.  */
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing)
+  if (can_compare_and_swap_p (mode))
    {
      if (!target || !register_operand (target, mode))
 	target = gen_reg_rtx (mode);
@ -7635,6 +7323,455 @@ expand_sync_lock_test_and_set (rtx mem, rtx val, rtx target)

  return NULL_RTX;
 }
+
+/* This function expands the atomic compare exchange operation:
+
+   *PTARGET_BOOL is an optional place to store the boolean success/failure.
+   *PTARGET_OVAL is an optional place to store the old value from memory.
+   Both target parameters may be NULL to indicate that we do not care about
+   that return value.  Both target parameters are updated on success to
+   the actual location of the corresponding result.
+
+   MEMMODEL is the memory model variant to use.
+
+   The return value of the function is true for success.  */
+
+bool
+expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
+				rtx mem, rtx expected, rtx desired,
+				bool is_weak, enum memmodel succ_model,
+				enum memmodel fail_model)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  struct expand_operand ops[8];
+  enum insn_code icode;
+  rtx target_bool, target_oval;
+
+  /* Load expected into a register for the compare and swap.  */
+  if (MEM_P (expected))
+    expected = copy_to_reg (expected);
+
+  /* Make sure we always have some place to put the return oldval.
+     Further, make sure that place is distinct from the input expected,
+     just in case we need that path down below.  */
+  if (ptarget_oval == NULL
+      || (target_oval = *ptarget_oval) == NULL
+      || reg_overlap_mentioned_p (expected, target_oval))
+    target_oval = gen_reg_rtx (mode);
+
+  icode = direct_optab_handler (atomic_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+    {
+      enum machine_mode bool_mode = insn_data[icode].operand[0].mode;
+
+      /* Make sure we always have a place for the bool operand.  */
+      if (ptarget_bool == NULL
+	  || (target_bool = *ptarget_bool) == NULL
+	  || GET_MODE (target_bool) != bool_mode)
+	target_bool = gen_reg_rtx (bool_mode);
+
+      /* Emit the compare_and_swap.  */
+      create_output_operand (&ops[0], target_bool, bool_mode);
+      create_output_operand (&ops[1], target_oval, mode);
+      create_fixed_operand (&ops[2], mem);
+      create_convert_operand_to (&ops[3], expected, mode, true);
+      create_convert_operand_to (&ops[4], desired, mode, true);
+      create_integer_operand (&ops[5], is_weak);
+      create_integer_operand (&ops[6], succ_model);
+      create_integer_operand (&ops[7], fail_model);
+      expand_insn (icode, 8, ops);
+
+      /* Return success/failure.  */
+      target_bool = ops[0].value;
+      target_oval = ops[1].value;
+      goto success;
+    }
+
+  /* Otherwise fall back to the original __sync_val_compare_and_swap
+     which is always seq-cst.  */
+  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+    {
+      rtx cc_reg;
+
+      create_output_operand (&ops[0], target_oval, mode);
+      create_fixed_operand (&ops[1], mem);
+      create_convert_operand_to (&ops[2], expected, mode, true);
+      create_convert_operand_to (&ops[3], desired, mode, true);
+      if (!maybe_expand_insn (icode, 4, ops))
+	return false;
+
+      target_oval = ops[0].value;
+      target_bool = NULL_RTX;
+
+      /* If the caller isn't interested in the boolean return value,
+	 skip the computation of it.  */
+      if (ptarget_bool == NULL)
+	goto success;
+
+      /* Otherwise, work out if the compare-and-swap succeeded.  */
+      cc_reg = NULL_RTX;
+      if (have_insn_for (COMPARE, CCmode))
+	note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
+
+      target_bool
+	= (cc_reg
+	   ? emit_store_flag_force (target_bool, EQ, cc_reg,
+				    const0_rtx, VOIDmode, 0, 1)
+	   : emit_store_flag_force (target_bool, EQ, target_oval,
+				    expected, VOIDmode, 1, 1));
+      goto success;
+    }
+  return false;
+
+ success:
+  /* Make sure that the oval output winds up where the caller asked.  */
+  if (ptarget_oval)
+    *ptarget_oval = target_oval;
+  if (ptarget_bool)
+    *ptarget_bool = target_bool;
+  return true;
+}
+
+/* This function expands the atomic load operation:
+   return the atomically loaded value in MEM.
+
+   MEMMODEL is the memory model variant to use.
+   TARGET is an option place to stick the return value.  */
+
+rtx
+expand_atomic_load (rtx target, rtx mem, enum memmodel model)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  enum insn_code icode;
+
+  /* If the target supports the load directly, great.  */
+  icode = direct_optab_handler (atomic_load_optab, mode);
+  if (icode != CODE_FOR_nothing)
+    {
+      struct expand_operand ops[3];
+
+      create_output_operand (&ops[0], target, mode);
+      create_fixed_operand (&ops[1], mem);
+      create_integer_operand (&ops[2], model);
+      if (maybe_expand_insn (icode, 3, ops))
+	return ops[0].value;
+    }
+
+  /* If the size of the object is greater than word size on this target,
+     then we assume that a load will not be atomic.  */
+  if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
+    {
+      /* Issue val = compare_and_swap (mem, 0, 0).
+	 This may cause the occasional harmless store of 0 when the value is
+	 already 0, but it seems to be OK according to the standards guys.  */
+      expand_atomic_compare_and_swap (NULL, &target, mem, const0_rtx,
+				      const0_rtx, false, model, model);
+      return target;
+    }
+
+  /* Otherwise assume loads are atomic, and emit the proper barriers.  */
+  if (!target || target == const0_rtx)
+    target = gen_reg_rtx (mode);
+
+  /* Emit the appropriate barrier before the load.  */
+  expand_builtin_mem_thread_fence (model);
+
+  emit_move_insn (target, mem);
+
+  /* For SEQ_CST, also emit a barrier after the load.  */
+  if (model == MEMMODEL_SEQ_CST)
+    expand_builtin_mem_thread_fence (model);
+
+  return target;
+}
+
+/* This function expands the atomic store operation:
+   Atomically store VAL in MEM.
+   MEMMODEL is the memory model variant to use.
+   function returns const0_rtx if a pattern was emitted.  */
+
+rtx
+expand_atomic_store (rtx mem, rtx val, enum memmodel model)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  enum insn_code icode;
+  struct expand_operand ops[3];
+
+  /* If the target supports the store directly, great.  */
+  icode = direct_optab_handler (atomic_store_optab, mode);
+  if (icode != CODE_FOR_nothing)
+    {
+      create_fixed_operand (&ops[0], mem);
+      create_input_operand (&ops[1], val, mode);
+      create_integer_operand (&ops[2], model);
+      if (maybe_expand_insn (icode, 3, ops))
+	return const0_rtx;
+    }
+
+  /* If the size of the object is greater than word size on this target,
+     a default store will not be atomic, Try a mem_exchange and throw away
+     the result.  If that doesn't work, don't do anything.  */
+  if (GET_MODE_PRECISION(mode) > BITS_PER_WORD)
+    {
+      rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model);
+      if (target)
+        return const0_rtx;
+      else
+        return NULL_RTX;
+    }
+
+  /* If there is no mem_store, default to a move with barriers */
+  if (model == MEMMODEL_SEQ_CST || model == MEMMODEL_RELEASE)
+    expand_builtin_mem_thread_fence (model);
+
+  emit_move_insn (mem, val);
+
+  /* For SEQ_CST, also emit a barrier after the load.  */
+  if (model == MEMMODEL_SEQ_CST)
+    expand_builtin_mem_thread_fence (model);
+
+  return const0_rtx;
+}
+
+
+/* Structure containing the pointers and values required to process the
+   various forms of the atomic_fetch_op and atomic_op_fetch builtins.  */
+
+struct atomic_op_functions
+{
+  struct direct_optab_d *mem_fetch_before;
+  struct direct_optab_d *mem_fetch_after;
+  struct direct_optab_d *mem_no_result;
+  struct direct_optab_d *fetch_before;
+  struct direct_optab_d *fetch_after;
+  struct direct_optab_d *no_result;
+  enum rtx_code reverse_code;
+};
+
+static const struct atomic_op_functions *
+get_atomic_op_for_code (enum rtx_code code)
+{
+  static const struct atomic_op_functions add_op = {
+    atomic_fetch_add_optab, atomic_add_fetch_optab, atomic_add_optab,
+    sync_old_add_optab, sync_new_add_optab, sync_add_optab, MINUS
+  }, sub_op = {
+    atomic_fetch_sub_optab, atomic_sub_fetch_optab, atomic_sub_optab,
+    sync_old_sub_optab, sync_new_sub_optab, sync_sub_optab, PLUS
+  }, xor_op = {
+    atomic_fetch_xor_optab, atomic_xor_fetch_optab, atomic_xor_optab,
+    sync_old_xor_optab, sync_new_xor_optab, sync_xor_optab, XOR
+  }, and_op = {
+    atomic_fetch_and_optab, atomic_and_fetch_optab, atomic_and_optab,
+    sync_old_and_optab, sync_new_and_optab, sync_and_optab, UNKNOWN
+  }, nand_op = {
+    atomic_fetch_nand_optab, atomic_nand_fetch_optab, atomic_nand_optab,
+    sync_old_nand_optab, sync_new_nand_optab, sync_nand_optab, UNKNOWN
+  }, ior_op = {
+    atomic_fetch_or_optab, atomic_or_fetch_optab, atomic_or_optab,
+    sync_old_ior_optab, sync_new_ior_optab, sync_ior_optab, UNKNOWN
+  };
+
+  switch (code)
+    {
+    case PLUS:
+      return &add_op;
+    case MINUS:
+      return &sub_op;
+    case XOR:
+      return &xor_op;
+    case AND:
+      return &and_op;
+    case IOR:
+      return &ior_op;
+    case NOT:
+      return &nand_op;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Try to emit an instruction for a specific operation varaition. 
+   OPTAB contains the OP functions.
+   TARGET is an optional place to return the result. const0_rtx means unused.
+   MEM is the memory location to operate on.
+   VAL is the value to use in the operation.
+   USE_MEMMODEL is TRUE if the variation with a memory model should be tried.
+   MODEL is the memory model, if used.
+   AFTER is true if the returned result is the value after the operation.  */
+
+static rtx 
+maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
+	       rtx val, bool use_memmodel, enum memmodel model, bool after)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  struct direct_optab_d *this_optab;
+  struct expand_operand ops[4];
+  enum insn_code icode;
+  int op_counter = 0;
+  int num_ops;
+
+  /* Check to see if there is a result returned.  */
+  if (target == const0_rtx)
+    {
+      if (use_memmodel)
+        {
+	  this_optab = optab->mem_no_result;
+	  create_integer_operand (&ops[2], model);
+	  num_ops = 3;
+	}
+      else
+        {
+	  this_optab = optab->no_result;
+	  num_ops = 2;
+	}
+    }
+  /* Otherwise, we need to generate a result.  */
+  else
+    {
+      if (use_memmodel)
+        {
+	  this_optab = after ? optab->mem_fetch_after : optab->mem_fetch_before;
+	  create_integer_operand (&ops[3], model);
+	  num_ops= 4;
+	}
+      else
+	{
+	  this_optab = after ? optab->fetch_after : optab->fetch_before;
+	  num_ops = 3;
+	}
+      create_output_operand (&ops[op_counter++], target, mode);
+    }
+
+  icode = direct_optab_handler (this_optab, mode);
+  if (icode == CODE_FOR_nothing)
+    return NULL_RTX;
+
+  create_fixed_operand (&ops[op_counter++], mem);
+  /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
+  create_convert_operand_to (&ops[op_counter++], val, mode, true);
+
+  if (maybe_expand_insn (icode, num_ops, ops))
+    return ((target == const0_rtx) ? const0_rtx : ops[0].value);
+
+  return NULL_RTX;
+} 
+
+
+/* This function expands an atomic fetch_OP or OP_fetch operation:
+   TARGET is an option place to stick the return value.  const0_rtx indicates
+   the result is unused. 
+   atomically fetch MEM, perform the operation with VAL and return it to MEM.
+   CODE is the operation being performed (OP)
+   MEMMODEL is the memory model variant to use.
+   AFTER is true to return the result of the operation (OP_fetch).
+   AFTER is false to return the value before the operation (fetch_OP).  */
+rtx
+expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
+			enum memmodel model, bool after)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  const struct atomic_op_functions *optab;
+  rtx result;
+  bool unused_result = (target == const0_rtx);
+
+  optab = get_atomic_op_for_code (code);
+
+  /* Check for the case where the result isn't used and try those patterns.  */
+  if (unused_result)
+    {
+      /* Try the memory model variant first.  */
+      result = maybe_emit_op (optab, target, mem, val, true, model, true);
+      if (result)
+        return result;
+
+      /* Next try the old style withuot a memory model.  */
+      result = maybe_emit_op (optab, target, mem, val, false, model, true);
+      if (result)
+        return result;
+
+      /* There is no no-result pattern, so try patterns with a result.  */
+      target = NULL_RTX;
+    }
+
+  /* Try the __atomic version.  */
+  result = maybe_emit_op (optab, target, mem, val, true, model, after);
+  if (result)
+    return result;
+
+  /* Try the older __sync version.  */
+  result = maybe_emit_op (optab, target, mem, val, false, model, after);
+  if (result)
+    return result;
+
+  /* If the fetch value can be calculated from the other variation of fetch,
+     try that operation.  */
+  if (after || optab->reverse_code != UNKNOWN || target == const0_rtx) 
+    {
+      /* Try the __atomic version, then the older __sync version.  */
+      result = maybe_emit_op (optab, target, mem, val, true, model, !after);
+      if (!result)
+	result = maybe_emit_op (optab, target, mem, val, false, model, !after);
+
+      if (result)
+	{
+	  /* If the result isn't used, no need to do compensation code.  */
+	  if (unused_result)
+	    return target;
+
+	  /* Issue compensation code.  Fetch_after  == fetch_before OP val.
+	     Fetch_before == after REVERSE_OP val.  */
+	  if (!after)
+	    code = optab->reverse_code;
+	  result = expand_simple_binop (mode, code, result, val, NULL_RTX, true,
+					OPTAB_LIB_WIDEN);
+	  return result;
+	}
+    }
+
+  /* If nothing else has succeeded, default to a compare and swap loop.  */
+  if (can_compare_and_swap_p (mode))
+    {
+      rtx insn;
+      rtx t0 = gen_reg_rtx (mode), t1;
+
+      start_sequence ();
+
+      /* If the result is used, get a register for it.  */
+      if (!unused_result) 
+        {
+	  if (!target || !register_operand (target, mode))
+	    target = gen_reg_rtx (mode);
+	  /* If fetch_before, copy the value now.  */
+	  if (!after)
+	    emit_move_insn (target, t0);
+	}
+      else
+        target = const0_rtx;
+
+      t1 = t0;
+      if (code == NOT)
+        {
+	  t1 = expand_simple_binop (mode, AND, t1, val, NULL_RTX,
+				    true, OPTAB_LIB_WIDEN);
+	  t1 = expand_simple_unop (mode, code, t1, NULL_RTX, true);
+	}
+      else
+	t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX, true, 
+				  OPTAB_LIB_WIDEN);
+
+      /* For after, copy the value now.  */
+      if (!unused_result && after)
+        emit_move_insn (target, t1);
+      insn = get_insns ();
+      end_sequence ();
+
+      if (t1 != NULL && expand_compare_and_swap_loop (mem, t0, t1, insn))
+        return target;
+    }
+
+  return NULL_RTX;
+}

 /* Return true if OPERAND is suitable for operand number OPNO of
   instruction ICODE.  */
@ -7838,6 +7975,14 @@ maybe_gen_insn (enum insn_code icode, unsigned int nops,
    case 6:
      return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
 			      ops[3].value, ops[4].value, ops[5].value);
+    case 7:
+      return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
+			      ops[3].value, ops[4].value, ops[5].value,
+			      ops[6].value);
+    case 8:
+      return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
+			      ops[3].value, ops[4].value, ops[5].value,
+			      ops[6].value, ops[7].value);
    }
  gcc_unreachable ();
 }
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@ -695,6 +695,34 @@ enum direct_optab_index
  /* Atomic clear with release semantics.  */
  DOI_sync_lock_release,

+  /* Atomic operations with memory model parameters. */
+  DOI_atomic_exchange,
+  DOI_atomic_compare_and_swap,
+  DOI_atomic_load,
+  DOI_atomic_store,
+  DOI_atomic_add_fetch,
+  DOI_atomic_sub_fetch,
+  DOI_atomic_and_fetch,
+  DOI_atomic_nand_fetch,
+  DOI_atomic_xor_fetch,
+  DOI_atomic_or_fetch,
+  DOI_atomic_fetch_add,
+  DOI_atomic_fetch_sub,
+  DOI_atomic_fetch_and,
+  DOI_atomic_fetch_nand,
+  DOI_atomic_fetch_xor,
+  DOI_atomic_fetch_or,
+  DOI_atomic_add,
+  DOI_atomic_sub,
+  DOI_atomic_and,
+  DOI_atomic_nand,
+  DOI_atomic_xor,
+  DOI_atomic_or,
+  DOI_atomic_always_lock_free,
+  DOI_atomic_is_lock_free,
+  DOI_atomic_thread_fence,
+  DOI_atomic_signal_fence,
+
  /* Vector permutation.  */
  DOI_vec_perm,
  DOI_vec_perm_const,
@ -744,6 +772,60 @@ typedef struct direct_optab_d *direct_optab;
  (&direct_optab_table[(int) DOI_sync_lock_test_and_set])
 #define sync_lock_release_optab \
  (&direct_optab_table[(int) DOI_sync_lock_release])
+
+#define atomic_exchange_optab \
+  (&direct_optab_table[(int) DOI_atomic_exchange])
+#define atomic_compare_and_swap_optab \
+  (&direct_optab_table[(int) DOI_atomic_compare_and_swap])
+#define atomic_load_optab \
+  (&direct_optab_table[(int) DOI_atomic_load])
+#define atomic_store_optab \
+  (&direct_optab_table[(int) DOI_atomic_store])
+#define atomic_add_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_add_fetch])
+#define atomic_sub_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_sub_fetch])
+#define atomic_and_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_and_fetch])
+#define atomic_nand_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_nand_fetch])
+#define atomic_xor_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_xor_fetch])
+#define atomic_or_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_or_fetch])
+#define atomic_fetch_add_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_add])
+#define atomic_fetch_sub_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_sub])
+#define atomic_fetch_and_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_and])
+#define atomic_fetch_nand_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_nand])
+#define atomic_fetch_xor_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_xor])
+#define atomic_fetch_or_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_or])
+#define atomic_add_optab \
+  (&direct_optab_table[(int) DOI_atomic_add])
+#define atomic_sub_optab \
+  (&direct_optab_table[(int) DOI_atomic_sub])
+#define atomic_and_optab \
+  (&direct_optab_table[(int) DOI_atomic_and])
+#define atomic_nand_optab \
+  (&direct_optab_table[(int) DOI_atomic_nand])
+#define atomic_xor_optab \
+  (&direct_optab_table[(int) DOI_atomic_xor])
+#define atomic_or_optab \
+  (&direct_optab_table[(int) DOI_atomic_or])
+#define atomic_always_lock_free_optab \
+  (&direct_optab_table[(int) DOI_atomic_always_lock_free])
+#define atomic_is_lock_free_optab \
+  (&direct_optab_table[(int) DOI_atomic_is_lock_free])
+#define atomic_thread_fence_optab \
+  (&direct_optab_table[(int) DOI_atomic_thread_fence])
+#define atomic_signal_fence_optab \
+  (&direct_optab_table[(int) DOI_atomic_signal_fence])
+
 #define vec_perm_optab (&direct_optab_table[DOI_vec_perm])
 #define vec_perm_const_optab (&direct_optab_table[(int) DOI_vec_perm_const])

@ -883,6 +965,13 @@ extern void expand_float (rtx, rtx, int);
 /* Return the insn_code for a FLOAT_EXPR.  */
 enum insn_code can_float_p (enum machine_mode, enum machine_mode, int);

+/* Return true if there is an inline compare and swap pattern.  */
+extern bool can_compare_and_swap_p (enum machine_mode);
+
+/* Generate code for a compare and swap.  */
+extern bool expand_atomic_compare_and_swap (rtx *, rtx *, rtx, rtx, rtx, bool,
+					    enum memmodel, enum memmodel);
+
 /* Check whether an operation represented by the code CODE is a
   convert operation that is supported by the target platform in
   vector form */
--- a/gcc/params.def
+++ b/gcc/params.def
@ -921,11 +921,26 @@ DEFPARAM (PARAM_CASE_VALUES_THRESHOLD,
          0, 0, 0)

 /* Data race flags for C++0x memory model compliance.  */
+DEFPARAM (PARAM_ALLOW_LOAD_DATA_RACES,
+	  "allow-load-data-races",
+	  "Allow new data races on loads to be introduced",
+	  1, 0, 1)
+
 DEFPARAM (PARAM_ALLOW_STORE_DATA_RACES,
 	  "allow-store-data-races",
 	  "Allow new data races on stores to be introduced",
 	  1, 0, 1)

+DEFPARAM (PARAM_ALLOW_PACKED_LOAD_DATA_RACES,
+	  "allow-packed-load-data-races",
+	  "Allow new data races on packed data loads to be introduced",
+	  1, 0, 1)
+
+DEFPARAM (PARAM_ALLOW_PACKED_STORE_DATA_RACES,
+	  "allow-packed-store-data-races",
+	  "Allow new data races on packed data stores to be introduced",
+	  1, 0, 1)
+
 /* Reassociation width to be used by tree reassoc optimization.  */
 DEFPARAM (PARAM_TREE_REASSOC_WIDTH,
 	  "tree-reassoc-width",
--- a/gcc/params.h
+++ b/gcc/params.h
@ -211,6 +211,13 @@ extern void init_param_values (int *params);
  PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID)
 #define MAX_STORES_TO_SINK \
  PARAM_VALUE (PARAM_MAX_STORES_TO_SINK)
+#define ALLOW_LOAD_DATA_RACES \
+  PARAM_VALUE (PARAM_ALLOW_LOAD_DATA_RACES)
 #define ALLOW_STORE_DATA_RACES \
  PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)
+#define ALLOW_PACKED_LOAD_DATA_RACES \
+  PARAM_VALUE (PARAM_ALLOW_PACKED_LOAD_DATA_RACES)
+#define ALLOW_PACKED_STORE_DATA_RACES \
+  PARAM_VALUE (PARAM_ALLOW_PACKED_STORE_DATA_RACES)
+
 #endif /* ! GCC_PARAMS_H */
--- a/gcc/sync-builtins.def
+++ b/gcc/sync-builtins.def
@ -256,3 +256,341 @@ DEF_SYNC_BUILTIN (BUILT_IN_SYNC_LOCK_RELEASE_16, "__sync_lock_release_16",

 DEF_SYNC_BUILTIN (BUILT_IN_SYNC_SYNCHRONIZE, "__sync_synchronize",
 		  BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST)
+
+/* __sync* builtins for the C++ memory model.  */
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE,
+		  "__atomic_exchange",
+		  BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_N,
+		  "__atomic_exchange_n",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_1,
+		  "__atomic_exchange_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_2,
+		  "__atomic_exchange_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_4,
+		  "__atomic_exchange_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_8,
+		  "__atomic_exchange_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_16,
+		  "__atomic_exchange_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD,
+		  "__atomic_load",
+		  BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_N,
+		  "__atomic_load_n",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_1,
+		  "__atomic_load_1",
+		  BT_FN_I1_CONST_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_2,
+		  "__atomic_load_2",
+		  BT_FN_I2_CONST_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_4,
+		  "__atomic_load_4",
+		  BT_FN_I4_CONST_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_8,
+		  "__atomic_load_8",
+		  BT_FN_I8_CONST_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_16,
+		  "__atomic_load_16",
+		  BT_FN_I16_CONST_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE,
+		  "__atomic_compare_exchange",
+		  BT_FN_BOOL_SIZE_VPTR_PTR_PTR_INT_INT,
+		  ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N,
+		  "__atomic_compare_exchange_n",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1,
+		  "__atomic_compare_exchange_1",
+		  BT_FN_BOOL_VPTR_PTR_I1_BOOL_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2,
+		  "__atomic_compare_exchange_2",
+		  BT_FN_BOOL_VPTR_PTR_I2_BOOL_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4,
+		  "__atomic_compare_exchange_4",
+		  BT_FN_BOOL_VPTR_PTR_I4_BOOL_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8,
+		  "__atomic_compare_exchange_8",
+		  BT_FN_BOOL_VPTR_PTR_I8_BOOL_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16,
+		  "__atomic_compare_exchange_16",
+		  BT_FN_BOOL_VPTR_PTR_I16_BOOL_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE,
+		  "__atomic_store",
+		  BT_FN_VOID_SIZE_VPTR_PTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_N,
+		  "__atomic_store_n",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_1,
+		  "__atomic_store_1",
+		  BT_FN_VOID_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_2,
+		  "__atomic_store_2",
+		  BT_FN_VOID_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_4,
+		  "__atomic_store_4",
+		  BT_FN_VOID_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_8,
+		  "__atomic_store_8",
+		  BT_FN_VOID_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_16,
+		  "__atomic_store_16",
+		  BT_FN_VOID_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_N,
+		  "__atomic_add_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_1,
+		  "__atomic_add_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_2,
+		  "__atomic_add_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_4,
+		  "__atomic_add_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_8,
+		  "__atomic_add_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_16,
+		  "__atomic_add_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_N,
+		  "__atomic_sub_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_1,
+		  "__atomic_sub_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_2,
+		  "__atomic_sub_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_4,
+		  "__atomic_sub_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_8,
+		  "__atomic_sub_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_16,
+		  "__atomic_sub_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_N,
+		  "__atomic_and_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_1,
+		  "__atomic_and_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_2,
+		  "__atomic_and_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_4,
+		  "__atomic_and_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_8,
+		  "__atomic_and_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_16,
+		  "__atomic_and_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_N,
+		  "__atomic_nand_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_1,
+		  "__atomic_nand_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_2,
+		  "__atomic_nand_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_4,
+		  "__atomic_nand_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_8,
+		  "__atomic_nand_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_16,
+		  "__atomic_nand_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_N,
+		  "__atomic_xor_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_1,
+		  "__atomic_xor_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_2,
+		  "__atomic_xor_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_4,
+		  "__atomic_xor_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_8,
+		  "__atomic_xor_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_16,
+		  "__atomic_xor_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_N,
+		  "__atomic_or_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_1,
+		  "__atomic_or_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_2,
+		  "__atomic_or_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_4,
+		  "__atomic_or_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_8,
+		  "__atomic_or_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_16,
+		  "__atomic_or_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_N,
+		  "__atomic_fetch_add",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_1,
+		  "__atomic_fetch_add_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_2,
+		  "__atomic_fetch_add_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_4,
+		  "__atomic_fetch_add_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_8,
+		  "__atomic_fetch_add_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_16,
+		  "__atomic_fetch_add_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_N,
+		  "__atomic_fetch_sub",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_1,
+		  "__atomic_fetch_sub_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_2,
+		  "__atomic_fetch_sub_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_4,
+		  "__atomic_fetch_sub_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_8,
+		  "__atomic_fetch_sub_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_16,
+		  "__atomic_fetch_sub_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_N,
+		  "__atomic_fetch_and",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_1,
+		  "__atomic_fetch_and_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_2,
+		  "__atomic_fetch_and_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_4,
+		  "__atomic_fetch_and_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_8,
+		  "__atomic_fetch_and_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_16,
+		  "__atomic_fetch_and_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_N,
+		  "__atomic_fetch_nand",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_1,
+		  "__atomic_fetch_nand_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_2,
+		  "__atomic_fetch_nand_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_4,
+		  "__atomic_fetch_nand_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_8,
+		  "__atomic_fetch_nand_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_16,
+		  "__atomic_fetch_nand_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_N,
+		  "__atomic_fetch_xor",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_1,
+		  "__atomic_fetch_xor_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_2,
+		  "__atomic_fetch_xor_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_4,
+		  "__atomic_fetch_xor_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_8,
+		  "__atomic_fetch_xor_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_16,
+		  "__atomic_fetch_xor_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_N,
+		  "__atomic_fetch_or",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_1,
+		  "__atomic_fetch_or_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_2,
+		  "__atomic_fetch_or_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_4,
+		  "__atomic_fetch_or_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_8,
+		  "__atomic_fetch_or_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_16,
+		  "__atomic_fetch_or_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ALWAYS_LOCK_FREE,
+		  "__atomic_always_lock_free",
+		  BT_FN_BOOL_SIZE_CONST_VPTR, ATTR_CONST_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_IS_LOCK_FREE,
+		  "__atomic_is_lock_free",
+		  BT_FN_BOOL_SIZE_CONST_VPTR, ATTR_CONST_NOTHROW_LEAF_LIST)
+
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_THREAD_FENCE,
+		  "__atomic_thread_fence",
+		  BT_FN_VOID_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SIGNAL_FENCE,
+		  "__atomic_signal_fence",
+		  BT_FN_VOID_INT, ATTR_NOTHROW_LEAF_LIST)
+
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,67 @@
+2011-11-06  Andrew MacLeod  <amacleod@redhat.com>
+	    Richard Henderson  <rth@redhat.com>
+	    Aldy Hernandez  <aldyh@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* lib/target-supports.exp (check_effective_target_sync_int_128,
+	check_effective_target_sync_long_long): Check whether the target
+	supports 64 and 128 bit __sync builtins.
+	(check_effective_target_cas_char): New.
+	(check_effective_target_cas_int): New.
+	* gcc.dg/dg.exp: Exclude simulate-thread tests.
+	* gcc.dg/atomic-noinline[-aux].c: New.  Make a variety of atomics calls.
+	* gcc.dg/atomic-generic[-aux].c: New. Test that generic functions
+	produce the expected library calls.
+	* gcc.dg/atomic-fence.c: New functional tests.
+	* gcc.dg/atomic-param.c: New.  Checl for illegal number of parameters.
+	* gcc.dg/atomic-invalid.c: New.  Test invalid parameters.
+	* gcc.dg/atomic-lockfree[-aux].c: New tests.
+	* gcc.dg/atomic-compare-exchange-{1-5}.c: New functional tests.
+	* gcc.dg/atomic-op-[1-5].c: New.  Test atomic fetch functionality.
+	* gcc.dg/atomic-exchange-{1-5}.c: New functional tests.
+	* gcc.dg/atomic-load-{1-5}.c: New functional tests.
+	* gcc.dg/atomic-store-{1-5}.c: New functional tests.
+	* gcc.dg/simulate-thread/atomic-load-int128.c: New. Verify int128 loads
+	are atomic.
+	* gcc.dg/simulate-thread/atomic-load-longlong.c: New. Verify 8 byte
+	loads are atomic.
+	* gcc.dg/simulate-thread/atomic-load-int.c: New. Verify 4 byte loads
+	are atomic.
+	* gcc.dg/simulate-thread/atomic-load-short.c: New. Verify 2 byte loads
+	are atomic.
+	* gcc.dg/simulate-thread/atomic-other-int128.c: New. Verify other
+	int128 operations are atomic.
+	* gcc.dg/simulate-thread/atomic-other-int.c: New. Verify other 4 byte
+	operations are atomic.
+	* gcc.dg/simulate-thread/atomic-other-longlong.c: New. Verify 8 byte
+	operations are atomic.
+	* gcc.dg/simulate-thread/atomic-other-short.c: New. Verify other 2 byte
+	operations are atomic.
+	* gcc.dg/simulate-thread/speculative-store.c: New. Verify speculative
+	stores aren't moved out of a loop.
+	* gcc.dg/simulate-thread/strict-align-global.c: New. Verify small
+	globals don't overwrite neighbouring globals.
+	* gcc.dg/simulate-thread/subfields.c: New. Verify struct component
+	writes dont overwrite neighbouring components.
+	* c-c++-common/gomp/atomic-10.c: Use cas_int; match __atomic builtin.
+	* c-c++-common/gomp/atomic-3.c: Likewise.
+	* c-c++-common/gomp/atomic-9.c: Likewise.
+	* gcc.dg/gomp/atomic-1.c, gcc.dg/gomp/atomic-2.c,
+	gcc.dg/gomp/atomic-3.c, gcc.dg/gomp/atomic-4.c, gcc.dg/gomp/atomic-7.c,
+	gcc.dg/gomp/atomic-8.c, gcc.dg/gomp/atomic-9.c,
+	gcc.dg/gomp/atomic-10.c, gcc.dg/gomp/atomic-12.c,
+	gcc.dg/gomp/atomic-13.c, gcc.dg/gomp/atomic-14.c,
+	gcc.dg/gomp/atomic-15.c: Move to c-c++-common/gomp/.
+	* g++.dg/gomp/atomic-1.C, g++.dg/gomp/atomic-2.C,
+	g++.dg/gomp/atomic-3.C, g++.dg/gomp/atomic-4.C, g++.dg/gomp/atomic-7.C,
+	g++.dg/gomp/atomic-8.C, g++.dg/gomp/atomic-9.C,
+	g++.dg/gomp/atomic-10.C, g++.dg/gomp/atomic-11.C,
+	g++.dg/gomp/atomic-12.C, g++.dg/gomp/atomic-13.C,
+	g++.dg/gomp/atomic-15.C: Remove.
+	* gcc.dg/gomp/gomp.exp, g++.dg/gomp/gomp.exp: Run c-c++-common tests.
+	* gcc.dg/gomp/atomic-11.c: Remove test.
+
 2011-11-06  Ira Rosen  <ira.rosen@linaro.org>

 	* gcc.dg/vect/bb-slp-cond-1.c: New test.
--- a/gcc/testsuite/c-c++-common/gomp/atomic-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-1.c
--- a/gcc/testsuite/c-c++-common/gomp/atomic-10.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-10.c
@ -1,6 +1,7 @@
 /* PR middle-end/28046 */
 /* { dg-do compile } */
 /* { dg-options "-fopenmp -fdump-tree-ompexp" } */
+/* { dg-require-effective-target cas_int } */

 int a[3], b;
 struct C { int x; int y; } c;
@ -20,5 +21,5 @@ foo (void)
  *baz () += bar ();
 }

-/* { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 4 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "__atomic_fetch_add" 4 "ompexp" } } */
 /* { dg-final { cleanup-tree-dump "ompexp" } } */
--- a/gcc/testsuite/c-c++-common/gomp/atomic-12.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-12.c
--- a/gcc/testsuite/c-c++-common/gomp/atomic-13.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-13.c
--- a/gcc/testsuite/c-c++-common/gomp/atomic-14.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-14.c
--- a/gcc/testsuite/c-c++-common/gomp/atomic-15.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-15.c
--- a/gcc/testsuite/c-c++-common/gomp/atomic-2.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-2.c
--- a/gcc/testsuite/c-c++-common/gomp/atomic-3.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-3.c
@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-fopenmp -fdump-tree-ompexp" } */
+/* { dg-require-effective-target cas_int } */

 int *xyzzy;

@ -9,5 +10,5 @@ void f1(void)
    xyzzy++;
 }

-/* { dg-final { scan-tree-dump-times "xyzzy, 4" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "xyzzy, 4" 1 "ompexp" } } */
 /* { dg-final { cleanup-tree-dump "ompexp" } } */
--- a/gcc/testsuite/c-c++-common/gomp/atomic-4.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-4.c
--- a/gcc/testsuite/c-c++-common/gomp/atomic-7.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-7.c
--- a/gcc/testsuite/c-c++-common/gomp/atomic-8.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-8.c
--- a/gcc/testsuite/c-c++-common/gomp/atomic-9.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-9.c
@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-fopenmp -fdump-tree-ompexp" } */
+/* { dg-require-effective-target cas_int } */

 volatile int *bar(void);

@ -9,5 +10,5 @@ void f1(void)
    *bar() += 1;
 }

-/* { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "__atomic_fetch_add" 1 "ompexp" } } */
 /* { dg-final { cleanup-tree-dump "ompexp" } } */
--- a/gcc/testsuite/g++.dg/dg.exp
+++ b/gcc/testsuite/g++.dg/dg.exp
@ -48,6 +48,7 @@ set tests [prune $tests $srcdir/$subdir/tree-prof/*]
 set tests [prune $tests $srcdir/$subdir/torture/*]
 set tests [prune $tests $srcdir/$subdir/graphite/*]
 set tests [prune $tests $srcdir/$subdir/guality/*]
+set tests [prune $tests $srcdir/$subdir/simulate-thread/*]

 # Main loop.
 dg-runtest $tests "" $DEFAULT_CXXFLAGS
--- a/gcc/testsuite/g++.dg/gomp/atomic-1.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-1.C
@ -1,99 +0,0 @@
-/* { dg-do compile } */
-
-int x;
-volatile int y;
-volatile unsigned char z;
-
-void f1(void)
-{
-  #pragma omp atomic
-    x++;
-  #pragma omp atomic
-    x--;
-  #pragma omp atomic
-    ++x;
-  #pragma omp atomic
-    --x;
-  #pragma omp atomic
-    x += 1;
-  #pragma omp atomic
-    x -= y;
-  #pragma omp atomic
-    x |= 1;
-  #pragma omp atomic
-    x &= 1;
-  #pragma omp atomic
-    x ^= 1;
-  #pragma omp atomic
-    x *= 3;
-  #pragma omp atomic
-    x /= 3;
-  #pragma omp atomic
-    x /= 3;
-  #pragma omp atomic
-    x <<= 3;
-  #pragma omp atomic
-    x >>= 3;
-}
-
-void f2(void)
-{
-  #pragma omp atomic
-    y++;
-  #pragma omp atomic
-    y--;
-  #pragma omp atomic
-    ++y;
-  #pragma omp atomic
-    --y;
-  #pragma omp atomic
-    y += 1;
-  #pragma omp atomic
-    y -= x;
-  #pragma omp atomic
-    y |= 1;
-  #pragma omp atomic
-    y &= 1;
-  #pragma omp atomic
-    y ^= 1;
-  #pragma omp atomic
-    y *= 3;
-  #pragma omp atomic
-    y /= 3;
-  #pragma omp atomic
-    y /= 3;
-  #pragma omp atomic
-    y <<= 3;
-  #pragma omp atomic
-    y >>= 3;
-}
-
-void f3(void)
-{
-  #pragma omp atomic
-    z++;
-  #pragma omp atomic
-    z--;
-  #pragma omp atomic
-    ++z;
-  #pragma omp atomic
-    --z;
-  #pragma omp atomic
-    z += 1;
-  #pragma omp atomic
-    z |= 1;
-  #pragma omp atomic
-    z &= 1;
-  #pragma omp atomic
-    z ^= 1;
-  #pragma omp atomic
-    z *= 3;
-  #pragma omp atomic
-    z /= 3;
-  #pragma omp atomic
-    z /= 3;
-  #pragma omp atomic
-    z <<= 3;
-  #pragma omp atomic
-    z >>= 3;
-}
--- a/gcc/testsuite/g++.dg/gomp/atomic-10.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-10.C
@ -1,24 +0,0 @@
-// PR middle-end/28046
-// { dg-do compile }
-// { dg-options "-fopenmp -fdump-tree-ompexp" }
-
-int a[3], b;
-struct C { int x; int y; } c;
-
-int bar (void), *baz (void);
-
-void
-foo (void)
-{
-#pragma omp atomic
-  a[2] += bar ();
-#pragma omp atomic
-  b += bar ();
-#pragma omp atomic
-  c.y += bar ();
-#pragma omp atomic
-  *baz () += bar ();
-}
-
-// { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 4 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } }
-// { dg-final { cleanup-tree-dump "ompexp" } }
--- a/gcc/testsuite/g++.dg/gomp/atomic-11.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-11.C
@ -1,306 +0,0 @@
-/* PR middle-end/45423 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple -g0" } */
-/* atomicvar should never be referenced in between the barrier and
-   following #pragma omp atomic_load.  */
-/* { dg-final { scan-tree-dump-not "barrier\[^#\]*atomicvar" "gimple" } } */
-/* { dg-final { cleanup-tree-dump "gimple" } } */
-
-#ifdef __cplusplus
-bool atomicvar, c;
-#else
-_Bool atomicvar, c;
-#endif
-int i, atomicvar2, c2;
-
-int
-foo (void)
-{
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++atomicvar;
-  #pragma omp barrier
-#ifndef __cplusplus
-  #pragma omp atomic
-    atomicvar--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --atomicvar;
-  #pragma omp barrier
-#endif
-  return 0;
-}
-
-int
-bar (void)
-{
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++atomicvar2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --atomicvar2;
-  #pragma omp barrier
-  return 0;
-}
--- a/gcc/testsuite/g++.dg/gomp/atomic-12.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-12.C
@ -1,9 +0,0 @@
-/* PR middle-end/45423 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple -g0 -O2" } */
-/* atomicvar should never be referenced in between the barrier and
-   following #pragma omp atomic_load.  */
-/* { dg-final { scan-tree-dump-not "barrier\[^#\]*atomicvar" "gimple" } } */
-/* { dg-final { cleanup-tree-dump "gimple" } } */
-
-#include "atomic-11.C"
--- a/gcc/testsuite/g++.dg/gomp/atomic-13.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-13.C
@ -1,43 +0,0 @@
-/* PR middle-end/45423 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp" } */
-
-#ifdef __cplusplus
-bool *baz ();
-#else
-_Bool *baz ();
-#endif
-int *bar ();
-
-int
-foo (void)
-{
-  #pragma omp barrier
-  #pragma omp atomic
-    (*bar ())++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++(*bar ());
-  #pragma omp barrier
-  #pragma omp atomic
-    (*bar ())--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --(*bar ());
-  #pragma omp barrier
-  #pragma omp atomic
-    (*baz ())++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++(*baz ());
-#ifndef __cplusplus
-  #pragma omp barrier
-  #pragma omp atomic
-    (*baz ())--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --(*baz ());
-  #pragma omp barrier
-#endif
-  return 0;
-}
--- a/gcc/testsuite/g++.dg/gomp/atomic-15.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-15.C
@ -1,46 +0,0 @@
-// { dg-do compile }
-// { dg-options "-fopenmp" }
-
-int x = 6;
-
-int
-main ()
-{
-  int v;
-  #pragma omp atomic
-    x = x * 7 + 6;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x * 7 ^ 6;	// { dg-error "expected" }
-  #pragma omp atomic update
-    x = x - 8 + 6;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x ^ 7 | 2;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x / 7 * 2;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x / 7 / 2;	// { dg-error "expected" }
-  #pragma omp atomic capture
-    v = x = x | 6;	// { dg-error "invalid operator" }
-  #pragma omp atomic capture
-    { v = x; x = x * 7 + 6; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x * 7 ^ 6; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x - 8 + 6; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x ^ 7 | 2; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x / 7 * 2; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x / 7 / 2; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x * 7 + 6; v = x; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x * 7 ^ 6; v = x; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x - 8 + 6; v = x; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x ^ 7 | 2; v = x; }	// { dg-error "expected" }
-  (void) v;
-  return 0;
-}
--- a/gcc/testsuite/g++.dg/gomp/atomic-2.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-2.C
@ -1,23 +0,0 @@
-/* { dg-do compile } */
-
-float x, y;
-
-void f1(void)
-{
-  #pragma omp atomic
-    x++;
-  #pragma omp atomic
-    x--;
-  #pragma omp atomic
-    ++x;
-  #pragma omp atomic
-    --x;
-  #pragma omp atomic
-    x += 1;
-  #pragma omp atomic
-    x -= y;
-  #pragma omp atomic
-    x *= 3;
-  #pragma omp atomic
-    x /= 3;
-}
--- a/gcc/testsuite/g++.dg/gomp/atomic-3.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-3.C
@ -1,13 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-ompexp" } */
-
-int *xyzzy;
-
-void f1(void)
-{
-  #pragma omp atomic
-    xyzzy++;
-}
-
-/* { dg-final { scan-tree-dump-times "xyzzy, 4" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
-/* { dg-final { cleanup-tree-dump "ompexp" } } */
--- a/gcc/testsuite/g++.dg/gomp/atomic-4.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-4.C
@ -1,24 +0,0 @@
-/* { dg-do compile } */
-
-int a[4];
-int *p;
-struct S { int x; int y[4]; } s;
-int *bar(void);
-
-void f1(void)
-{
-  #pragma omp atomic
-    a[4] += 1;
-  #pragma omp atomic
-    *p += 1;
-  #pragma omp atomic
-    s.x += 1;
-  #pragma omp atomic
-    s.y[*p] += 1;
-  #pragma omp atomic
-    s.y[*p] *= 42;
-  #pragma omp atomic
-    *bar() += 1;
-  #pragma omp atomic
-    *bar() *= 42;
-}
--- a/gcc/testsuite/g++.dg/gomp/atomic-7.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-7.C
@ -1,23 +0,0 @@
-/* { dg-do compile } */
-
-double x, y;
-
-void f2(void)
-{
-  #pragma omp atomic
-    y++;
-  #pragma omp atomic
-    y--;
-  #pragma omp atomic
-    ++y;
-  #pragma omp atomic
-    --y;
-  #pragma omp atomic
-    y += 1;
-  #pragma omp atomic
-    y -= x;
-  #pragma omp atomic
-    y *= 3;
-  #pragma omp atomic
-    y /= 3;
-}
--- a/gcc/testsuite/g++.dg/gomp/atomic-8.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-8.C
@ -1,21 +0,0 @@
-/* { dg-do compile } */
-
-long double z;
-
-void f3(void)
-{
-  #pragma omp atomic
-    z++;
-  #pragma omp atomic
-    z--;
-  #pragma omp atomic
-    ++z;
-  #pragma omp atomic
-    --z;
-  #pragma omp atomic
-    z += 1;
-  #pragma omp atomic
-    z *= 3;
-  #pragma omp atomic
-    z /= 3;
-}
--- a/gcc/testsuite/g++.dg/gomp/atomic-9.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-9.C
@ -1,13 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-ompexp" } */
-
-volatile int *bar(void);
-
-void f1(void)
-{
-  #pragma omp atomic
-    *bar() += 1;
-}
-
-/* { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
-/* { dg-final { cleanup-tree-dump "ompexp" } } */
--- a/gcc/testsuite/g++.dg/gomp/gomp.exp
+++ b/gcc/testsuite/g++.dg/gomp/gomp.exp
@ -27,7 +27,7 @@ if ![check_effective_target_fopenmp] {
 dg-init

 # Main loop.
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C]] "" "-fopenmp"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C $srcdir/c-c++-common/gomp/*.c]] "" "-fopenmp"

 # All done.
 dg-finish
--- a/gcc/testsuite/g++.dg/simulate-thread/atomics-1.C
+++ b/gcc/testsuite/g++.dg/simulate-thread/atomics-1.C
@ -0,0 +1,73 @@
+/* { dg-do link } */
+/* { dg-options "-std=c++0x" } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that atomic int and atomic char work properly.  */
+
+using namespace std;
+
+#include <atomic>
+#include <limits.h>
+#include <stdio.h>
+#include "simulate-thread.h"
+
+atomic<int> atomi;
+atomic<char> atomc;
+
+/* No need for parallel threads to do anything */
+void simulate_thread_other_threads()
+{
+}
+
+/* Verify after every instruction is executed, that the atmoic int and
+   char have one of the 2 legitimate values. */
+int simulate_thread_step_verify()
+{
+  if (atomi != 0 && atomi != INT_MAX)
+    {
+      printf ("FAIL: invalid intermediate result for atomi (%d).\n",
+	      (int)atomi);
+      return 1;
+    }
+  if (atomc != 0 && atomc != CHAR_MAX)
+    {
+      printf ("FAIL: invalid intermediate result for atomc (%d).\n",
+	      (int)atomc);
+      return 1;
+    }
+  return 0;
+}
+
+
+/* Verify that both atmoics have the corerct value.  */
+int simulate_thread_final_verify()
+{
+  if (atomi != INT_MAX)
+    {
+      printf ("FAIL: invalid final result for atomi (%d).\n",
+	      (int)atomi);
+      return 1;
+    }
+  if (atomc != CHAR_MAX)
+    {
+      printf ("FAIL: invalid final result for atomc (%d).\n",
+	      (int)atomc);
+      return 1;
+    }
+  return 0;
+}
+
+/* Test a store to an atomic int and an atomic char. */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  atomi = INT_MAX;
+  atomc = CHAR_MAX;
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
--- a/gcc/testsuite/g++.dg/simulate-thread/atomics-2.C
+++ b/gcc/testsuite/g++.dg/simulate-thread/atomics-2.C
@ -0,0 +1,58 @@
+/* { dg-do link } */
+/* { dg-options "-std=c++0x" } */
+/* { dg-final { simulate-thread } } */
+
+using namespace std;
+
+#include <atomic>
+#include <limits.h>
+#include <stdio.h>
+#include "simulate-thread.h"
+
+atomic_int atomi;
+
+/* Non-atomic.  Use a type wide enough to possibly coerce GCC into
+   moving things around.  */
+long double j;
+
+
+/* Test that an atomic store synchronizes with an atomic load.
+
+   In this case, test that the store to <j> happens-before the atomic
+   store to <atomi>.  Make sure the compiler does not reorder the
+   stores.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  j = 13.0;
+  atomi.store(1);
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
+
+void simulate_thread_other_threads()
+{
+}
+
+/* Verify that side-effects before an atomic store are correctly
+   synchronized with the an atomic load to the same location.  */
+int simulate_thread_step_verify()
+{
+  if (atomi.load() == 1 && j != 13.0)
+    {
+      printf ("FAIL: invalid synchronization for atomic load/store.\n");
+      return 1;
+    }
+  return 0;
+}
+
+
+int simulate_thread_final_verify()
+{
+  return simulate_thread_step_verify();
+}
--- a/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C
+++ b/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C
@ -0,0 +1,77 @@
+/* { dg-do link } */
+/* { dg-options "--param allow-load-data-races=0 --param allow-store-data-races=0" } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that setting <var.a> does not touch either <var.b> or <var.c>.
+   In the C++ memory model, non contiguous bitfields ("a" and "c"
+   here) should be considered as distinct memory locations, so we
+   can't use bit twiddling to set either one.  */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+#define CONSTA 12
+
+static int global;
+struct S
+{
+  unsigned int a : 4;
+  unsigned char b;
+  unsigned int c : 6;
+} var;
+
+__attribute__((noinline))
+void set_a()
+{
+  var.a = CONSTA;
+}
+
+void simulate_thread_other_threads()
+{
+  ++global;
+  var.b = global;
+  var.c = global;
+}
+
+int simulate_thread_step_verify()
+{
+  int ret = 0;
+  if (var.b != global)
+    {
+      printf ("FAIL: Unexpected value: var.b is %d, should be %d\n",
+	      var.b, global);
+      ret = 1;
+    }
+  if (var.c != global)
+    {
+      printf ("FAIL: Unexpected value: var.c is %d, should be %d\n",
+	      var.c, global);
+      ret = 1;
+    }
+  return ret;
+}
+
+int simulate_thread_final_verify()
+{
+  int ret = simulate_thread_step_verify();
+  if (var.a != CONSTA)
+    {
+      printf ("FAIL: Unexpected value: var.a is %d, should be %d\n",
+	      var.a, CONSTA);
+      ret = 1;
+    }
+  return ret;
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  set_a();
+}
+
+int main()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
--- a/gcc/testsuite/g++.dg/simulate-thread/bitfields.C
+++ b/gcc/testsuite/g++.dg/simulate-thread/bitfields.C
@ -0,0 +1,80 @@
+/* { dg-do link } */
+/* { dg-options "--param allow-load-data-races=0 --param allow-store-data-races=0" } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that setting <var.a> does not touch either <var.b> or <var.c>.
+   In the C++ memory model, non contiguous bitfields ("a" and "c"
+   here) should be considered as distinct memory locations, so we
+   can't use bit twiddling to set either one.  */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+#define CONSTA 12
+
+static int global;
+struct S
+{
+  /* On x86-64, the volatile causes us to access <a> with a 32-bit
+     access, and thus trigger this test.  */
+  volatile unsigned int a : 4;
+
+  unsigned char b;
+  unsigned int c : 6;
+} var;
+
+__attribute__((noinline))
+void set_a()
+{
+  var.a = CONSTA;
+}
+
+void simulate_thread_other_threads()
+{
+  ++global;
+  var.b = global;
+  var.c = global;
+}
+
+int simulate_thread_step_verify()
+{
+  int ret = 0;
+  if (var.b != global)
+    {
+      printf ("FAIL: Unexpected value: var.b is %d, should be %d\n",
+	      var.b, global);
+      ret = 1;
+    }
+  if (var.c != global)
+    {
+      printf ("FAIL: Unexpected value: var.c is %d, should be %d\n",
+	      var.c, global);
+      ret = 1;
+    }
+  return ret;
+}
+
+int simulate_thread_final_verify()
+{
+  int ret = simulate_thread_step_verify();
+  if (var.a != CONSTA)
+    {
+      printf ("FAIL: Unexpected value: var.a is %d, should be %d\n",
+	      var.a, CONSTA);
+      ret = 1;
+    }
+  return ret;
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  set_a();
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c
@ -0,0 +1,85 @@
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_compare_exchange_n builtin for a char.  */
+
+extern void abort(void);
+
+char v = 0;
+char expected = 0;
+char max = ~0;
+char desired = ~0;
+char zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c
@ -0,0 +1,85 @@
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_compare_exchange_n builtin for a short.  */
+
+extern void abort(void);
+
+short v = 0;
+short expected = 0;
+short max = ~0;
+short desired = ~0;
+short zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c
@ -0,0 +1,85 @@
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_compare_exchange_n builtin for an int.  */
+
+extern void abort(void);
+
+int v = 0;
+int expected = 0;
+int max = ~0;
+int desired = ~0;
+int zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-compare-exchange-4.c
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-4.c
@ -0,0 +1,86 @@
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of __atomic_compare_exchange_n builtin for a long_long.  */
+
+extern void abort(void);
+
+long long v = 0;
+long long expected = 0;
+long long max = ~0;
+long long desired = ~0;
+long long zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-compare-exchange-5.c
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-5.c
@ -0,0 +1,86 @@
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of __atomic_compare_exchange_n builtin for an int_128.  */
+
+extern void abort(void);
+
+__int128_t v = 0;
+__int128_t expected = 0;
+__int128_t max = ~0;
+__int128_t desired = ~0;
+__int128_t zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-exchange-1.c
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-1.c
@ -0,0 +1,62 @@
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_exchange_n builtin for a char.  */
+
+extern void abort(void);
+
+char v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-exchange-2.c
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-2.c
@ -0,0 +1,62 @@
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_X builtin for a short.  */
+
+extern void abort(void);
+
+short v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-exchange-3.c
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-3.c
@ -0,0 +1,62 @@
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_X builtin for an int.  */
+
+extern void abort(void);
+
+int v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-exchange-4.c
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-4.c
@ -0,0 +1,63 @@
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of the __atomic_X builtin for a long_long.  */
+
+extern void abort(void);
+
+long long v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-exchange-5.c
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-5.c
@ -0,0 +1,63 @@
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of the __atomic_X builtin for a 16 byte value.  */
+
+extern void abort(void);
+
+__int128_t v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-fence.c
+++ b/gcc/testsuite/gcc.dg/atomic-fence.c
@ -0,0 +1,27 @@
+/* Test __atomic routines for existence and execution with each valid 
+   memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test that __atomic_{thread,signal}_fence builtins execute.  */
+
+main ()
+{
+  __atomic_thread_fence (__ATOMIC_RELAXED);
+  __atomic_thread_fence (__ATOMIC_CONSUME);
+  __atomic_thread_fence (__ATOMIC_ACQUIRE);
+  __atomic_thread_fence (__ATOMIC_RELEASE);
+  __atomic_thread_fence (__ATOMIC_ACQ_REL);
+  __atomic_thread_fence (__ATOMIC_SEQ_CST);
+
+  __atomic_signal_fence (__ATOMIC_RELAXED);
+  __atomic_signal_fence (__ATOMIC_CONSUME);
+  __atomic_signal_fence (__ATOMIC_ACQUIRE);
+  __atomic_signal_fence (__ATOMIC_RELEASE);
+  __atomic_signal_fence (__ATOMIC_ACQ_REL);
+  __atomic_signal_fence (__ATOMIC_SEQ_CST);
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-generic-aux.c
+++ b/gcc/testsuite/gcc.dg/atomic-generic-aux.c
@ -0,0 +1,45 @@
+/* Supply a set of generic atomic functions to test the compiler make the
+   calls properly.  */
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+
+/* Test that the generic builtins make calls as expected.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+void
+__atomic_exchange (size_t size, void *obj, void *val, void *ret, int model)
+{
+  /* Copy old value into *ret.  */
+  memcpy (ret, obj, size);
+  /* Copy val into object.  */
+  memcpy (obj, val, size);
+}
+
+
+bool
+__atomic_compare_exchange (size_t size, void *obj, void *expected, 
+			   void *desired, int model1, int model2)
+{
+  if (!memcmp (obj, expected, size))
+    {
+      memcpy (obj, desired, size);
+      return true;
+    }
+  memcpy (expected, obj, size);
+  return false;
+}
+
+
+void __atomic_load (size_t size, void *obj, void *ret, int model)
+{
+  memcpy (ret, obj, size);
+}
+
+
+void __atomic_store (size_t size, void *obj, void *val, int model)
+{
+  memcpy (obj, val, size);
+}
--- a/gcc/testsuite/gcc.dg/atomic-generic.c
+++ b/gcc/testsuite/gcc.dg/atomic-generic.c
@ -0,0 +1,56 @@
+/* Test generic __atomic routines for proper function calling.
+   memory model.  */
+/* { dg-options "-w" } */
+/* { dg-do run } */
+/* { dg-additional-sources "atomic-generic-aux.c" } */
+
+/* Test that the generioc atomic builtins execute as expected..
+   sync-mem-generic-aux.c supplies a functional external entry point for 
+   the 4 generic functions.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+extern void abort();
+
+typedef struct test {
+  int array[10];
+} test_struct;
+
+test_struct zero = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+test_struct ones = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+test_struct a,b;
+
+int size = sizeof (test_struct);
+/* Test for consistency on sizes 1, 2, 4, 8, 16 and 32.  */
+main ()
+{
+  test_struct c;
+
+  __atomic_store (&a, &zero, __ATOMIC_RELAXED);
+  if (memcmp (&a, &zero, size))
+    abort ();
+
+  __atomic_exchange (&a, &ones, &c, __ATOMIC_SEQ_CST);
+  if (memcmp (&c, &zero, size))
+    abort ();
+  if (memcmp (&a, &ones, size))
+    abort ();
+
+  __atomic_load (&a, &b, __ATOMIC_RELAXED);
+  if (memcmp (&b, &ones, size))
+    abort ();
+
+  if (!__atomic_compare_exchange (&a, &b, &zero, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort();
+  if (memcmp (&a, &zero, size))
+    abort ();
+
+  if (__atomic_compare_exchange (&a, &b, &ones, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort();
+  if (memcmp (&b, &zero, size))
+    abort ();
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-invalid.c
+++ b/gcc/testsuite/gcc.dg/atomic-invalid.c
@ -0,0 +1,29 @@
+/* Test __atomic routines for invalid memory model errors. This only needs
+   to be tested on a single size.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target sync_int_long } */
+
+#include <stddef.h>
+
+int i, e, b;
+size_t s;
+
+main ()
+{
+  __atomic_compare_exchange_n (&i, &e, 1, 0, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST); /* { dg-error "failure memory model cannot be stronger" } */
+  __atomic_compare_exchange_n (&i, &e, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELEASE); /* { dg-error "invalid failure memory" } */
+  __atomic_compare_exchange_n (&i, &e, 1, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL); /* { dg-error "invalid failure memory" } */
+
+  __atomic_exchange_n (&i, 1, __ATOMIC_CONSUME); /* { dg-error "invalid memory model" } */
+
+  __atomic_load_n (&i, __ATOMIC_RELEASE); /* { dg-error "invalid memory model" } */
+  __atomic_load_n (&i, __ATOMIC_ACQ_REL); /* { dg-error "invalid memory model" } */
+
+  __atomic_store_n (&i, 1, __ATOMIC_ACQUIRE); /* { dg-error "invalid memory model" } */
+  __atomic_store_n (&i, 1, __ATOMIC_CONSUME); /* { dg-error "invalid memory model" } */
+  __atomic_store_n (&i, 1, __ATOMIC_ACQ_REL); /* { dg-error "invalid memory model" } */
+
+  i = __atomic_always_lock_free (s, NULL); /* { dg-error "non-constant argument" } */
+
+  __atomic_load_n (&i, 44); /* { dg-warning "invalid memory model" } */
+}
--- a/gcc/testsuite/gcc.dg/atomic-load-1.c
+++ b/gcc/testsuite/gcc.dg/atomic-load-1.c
@ -0,0 +1,66 @@
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test the execution of the __atomic_load_n builtin for a char.  */
+
+extern void abort(void);
+
+char v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-load-2.c
+++ b/gcc/testsuite/gcc.dg/atomic-load-2.c
@ -0,0 +1,68 @@
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test the execution of the __atomic_load_n builtin for a short.  */
+
+extern void abort(void);
+
+short v, count;
+
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-load-3.c
+++ b/gcc/testsuite/gcc.dg/atomic-load-3.c
@ -0,0 +1,65 @@
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+extern void abort(void);
+
+int v, count;
+
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-load-4.c
+++ b/gcc/testsuite/gcc.dg/atomic-load-4.c
@ -0,0 +1,65 @@
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+extern void abort(void);
+
+long long v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-load-5.c
+++ b/gcc/testsuite/gcc.dg/atomic-load-5.c
@ -0,0 +1,65 @@
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+extern void abort(void);
+
+__int128_t v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-lockfree-aux.c
+++ b/gcc/testsuite/gcc.dg/atomic-lockfree-aux.c
@ -0,0 +1,17 @@
+/* Test supply a __atomic_is_lock_free routine for lock-free tests.  */
+/* Just compile it on its own.  */
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+
+/* Test that __atomic_{is,always}_lock_free builtins execute.  */
+
+#include <stdlib.h>
+
+/* Supply a builtin external function which returns a non-standard value so
+   it can be detected that it was called.  */
+int 
+__atomic_is_lock_free (size_t s, void *p)
+{
+  return 2;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-lockfree.c
+++ b/gcc/testsuite/gcc.dg/atomic-lockfree.c
@ -0,0 +1,120 @@
+/* Test __atomic routines for existence and execution with each valid 
+   memory model.  */
+/* { dg-options "-w" } */
+/* { dg-do run } */
+/* { dg-additional-sources "atomic-lockfree-aux.c" } */
+
+/* Test that __atomic_{is,always}_lock_free builtins execute.
+   sync-mem-lockfree-aux.c supplies and external entry point for 
+   __atomic_is_lock_free which always returns a 2. We can detect the 
+   external routine was called if 2 is returned since that is not a valid
+   result normally.  */
+
+#include <stdlib.h>
+
+extern void abort();
+
+int r1, r2;
+
+/* Test for consistency on sizes 1, 2, 4, 8, 16 and 32.  */
+main ()
+{
+  
+  r1 = __atomic_always_lock_free (sizeof(char), 0);
+  r2 = __atomic_is_lock_free (sizeof(char), 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+  
+  r1 = __atomic_always_lock_free (2, 0);
+  r2 = __atomic_is_lock_free (2, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (4, 0);
+  r2 = __atomic_is_lock_free (4, 0);     /* Try passing in a variable.  */
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (8, 0);
+  r2 = __atomic_is_lock_free (8, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (16, 0);
+  r2 = __atomic_is_lock_free (16, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (32, 0);
+  r2 = __atomic_is_lock_free (32, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+ 
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-noinline-aux.c
+++ b/gcc/testsuite/gcc.dg/atomic-noinline-aux.c
@ -0,0 +1,51 @@
+/* Supply a set of generic atomic functions to test the compiler make the
+   calls properly.  */
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+
+/* Test that the generic builtins make calls as expected.  This file provides
+   the exact entry points the test file will require.  All these routines
+   simply set the first parameter to 1, and the caller will test for that.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+
+char 
+__atomic_exchange_1 (char *p, char t, int i)
+{
+  *p = 1;
+}
+
+short
+__atomic_load_2 (short *p, int i)
+{ 
+  *p = 1;
+}
+
+void
+__atomic_store_1 (char *p, char v, int i)
+{
+  *p = 1;
+}
+
+int __atomic_compare_exchange_2 (short *p, short *a, short b, int x, int y, int z)
+{
+  *p = 1;
+}
+
+char __atomic_fetch_add_1 (char *p, char v, int i)
+{
+  *p = 1;
+}
+
+short __atomic_fetch_add_2 (short *p, short v, short i)
+{
+  *p = 1;
+}
+
+int __atomic_is_lock_free (int i, void *p)
+{
+  return 10;
+}
--- a/gcc/testsuite/gcc.dg/atomic-noinline.c
+++ b/gcc/testsuite/gcc.dg/atomic-noinline.c
@ -0,0 +1,56 @@
+/* Test generic __atomic routines for proper function calling.
+   memory model.  */
+/* { dg-options "-w -fno-inline-atomics" } */
+/* { dg-do run } */
+/* { dg-additional-sources "atomic-noinline-aux.c" } */
+
+/* Test that -fno-inline-atomics works as expected.  
+   atomic-generic-aux provide the expected routines which simply set the
+   value of the first parameter to */
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+extern void abort();
+
+short as,bs,cs;
+char ac,bc,cc;
+
+main ()
+{
+
+  ac = __atomic_exchange_n (&bc, cc, __ATOMIC_RELAXED);
+  if (bc != 1)
+    abort ();
+
+  as = __atomic_load_n (&bs, __ATOMIC_SEQ_CST);
+  if (bs != 1)
+    abort ();
+
+  __atomic_store_n (&ac, bc, __ATOMIC_RELAXED);
+  if (ac != 1)
+    abort ();
+
+  __atomic_compare_exchange_n (&as, &bs, cs, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+  if (as != 1)
+    abort ();
+
+  ac = __atomic_fetch_add (&cc, 15, __ATOMIC_SEQ_CST);
+  if (cc != 1)
+    abort ();
+
+  /* This should be translated to __atomic_fetch_add for the library */
+  as = __atomic_add_fetch (&cs, 10, __ATOMIC_RELAXED);
+
+  if (cs != 1)
+    abort ();
+
+  /* The fake external function should return 10.  */
+  if (__atomic_is_lock_free (4, 0) != 10)
+    abort ();
+   
+  return 0;
+}
+
+
+
--- a/gcc/testsuite/gcc.dg/atomic-op-1.c
+++ b/gcc/testsuite/gcc.dg/atomic-op-1.c
@ -0,0 +1,554 @@
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_*OP builtin routines for a char.  */
+
+extern void abort(void);
+
+char v, count, res;
+const char init = ~0;
+
+/* The fetch_op routines return the original value before the operation.  */
+
+void
+test_fetch_add ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_RELAXED) != 0)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_CONSUME) != 1) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQUIRE) != 2)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_RELEASE) != 3) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQ_REL) != 4) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_SEQ_CST) != 5) 
+    abort ();
+}
+
+
+void
+test_fetch_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_RELAXED) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_CONSUME) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQUIRE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQ_REL) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_SEQ_CST) !=  res--) 
+    abort ();
+}
+
+void
+test_fetch_and ()
+{
+  v = init;
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQUIRE) !=  0)
+    abort ();
+
+  v = ~v;
+  if (__atomic_fetch_and (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_fetch_nand ()
+{
+  v = init;
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_ACQUIRE) !=  0 ) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_xor ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_fetch_xor (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_or ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_or (&v, count, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 2, __ATOMIC_CONSUME) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQUIRE) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 8, __ATOMIC_RELEASE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQ_REL) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST) !=  31) 
+    abort ();
+}
+
+/* The OP_fetch routines return the new value after the operation.  */
+
+void
+test_add_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_RELAXED) != 1)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_CONSUME) != 2) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQUIRE) != 3)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_RELEASE) != 4) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL) != 5) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_SEQ_CST) != 6) 
+    abort ();
+}
+
+
+void
+test_sub_fetch ()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED) !=  --res) 
+    abort ();
+
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_CONSUME) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQUIRE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_RELEASE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_SEQ_CST) !=  --res) 
+    abort ();
+}
+
+void
+test_and_fetch ()
+{
+  v = init;
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  v = init;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_nand_fetch ()
+{
+  v = init;
+
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_RELEASE) !=  0)
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+
+
+void
+test_xor_fetch ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_xor_fetch (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_RELEASE) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_or_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_or_fetch (&v, count, __ATOMIC_RELAXED) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 2, __ATOMIC_CONSUME) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQUIRE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 8, __ATOMIC_RELEASE) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL) !=  31) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_SEQ_CST) !=  63) 
+    abort ();
+}
+
+
+/* Test the OP routines with a result which isn't used. Use both variations
+   within each function.  */
+
+void
+test_add ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_add_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_CONSUME);
+  if (v != 2)
+    abort ();
+
+  __atomic_add_fetch (&v, 1 , __ATOMIC_ACQUIRE);
+  if (v != 3)
+    abort ();
+
+  __atomic_fetch_add (&v, 1, __ATOMIC_RELEASE);
+  if (v != 4)
+    abort ();
+
+  __atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 5)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 6)
+    abort ();
+}
+
+
+void
+test_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != --res)
+    abort ();
+
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_CONSUME);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, 1, __ATOMIC_ACQUIRE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != --res)
+    abort ();
+}
+
+void
+test_and ()
+{
+  v = init;
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_RELAXED);
+  if (v != 0)
+    abort ();
+
+  v = init;
+  __atomic_fetch_and (&v, init, __ATOMIC_CONSUME);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, init, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_nand ()
+{
+  v = init;
+
+  __atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, init, __ATOMIC_RELEASE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != init)
+    abort ();
+}
+
+
+
+void
+test_xor ()
+{
+  v = init;
+  count = 0;
+
+  __atomic_xor_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_or ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_or_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_CONSUME);
+  if (v != 3)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, 4, __ATOMIC_ACQUIRE);
+  if (v != 7)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, 8, __ATOMIC_RELEASE);
+  if (v != 15)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 31)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 63)
+    abort ();
+}
+
+main ()
+{
+  test_fetch_add ();
+  test_fetch_sub ();
+  test_fetch_and ();
+  test_fetch_nand ();
+  test_fetch_xor ();
+  test_fetch_or ();
+
+  test_add_fetch ();
+  test_sub_fetch ();
+  test_and_fetch ();
+  test_nand_fetch ();
+  test_xor_fetch ();
+  test_or_fetch ();
+
+  test_add ();
+  test_sub ();
+  test_and ();
+  test_nand ();
+  test_xor ();
+  test_or ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-op-2.c
+++ b/gcc/testsuite/gcc.dg/atomic-op-2.c
@ -0,0 +1,555 @@
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test the execution of the __atomic_*OP builtin routines for a short.  */
+
+extern void abort(void);
+
+short v, count, res;
+const short init = ~0;
+
+/* The fetch_op routines return the original value before the operation.  */
+
+void
+test_fetch_add ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_RELAXED) != 0)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_CONSUME) != 1) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQUIRE) != 2)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_RELEASE) != 3) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQ_REL) != 4) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_SEQ_CST) != 5) 
+    abort ();
+}
+
+
+void
+test_fetch_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_RELAXED) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_CONSUME) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQUIRE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQ_REL) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_SEQ_CST) !=  res--) 
+    abort ();
+}
+
+void
+test_fetch_and ()
+{
+  v = init;
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQUIRE) !=  0)
+    abort ();
+
+  v = ~v;
+  if (__atomic_fetch_and (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_fetch_nand ()
+{
+  v = init;
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_ACQUIRE) !=  0 ) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_xor ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_fetch_xor (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_or ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_or (&v, count, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 2, __ATOMIC_CONSUME) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQUIRE) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 8, __ATOMIC_RELEASE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQ_REL) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST) !=  31) 
+    abort ();
+}
+
+/* The OP_fetch routines return the new value after the operation.  */
+
+void
+test_add_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_RELAXED) != 1)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_CONSUME) != 2) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQUIRE) != 3)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_RELEASE) != 4) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL) != 5) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_SEQ_CST) != 6) 
+    abort ();
+}
+
+
+void
+test_sub_fetch ()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED) !=  --res) 
+    abort ();
+
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_CONSUME) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQUIRE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_RELEASE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_SEQ_CST) !=  --res) 
+    abort ();
+}
+
+void
+test_and_fetch ()
+{
+  v = init;
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  v = init;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_nand_fetch ()
+{
+  v = init;
+
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_RELEASE) !=  0)
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+
+
+void
+test_xor_fetch ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_xor_fetch (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_RELEASE) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_or_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_or_fetch (&v, count, __ATOMIC_RELAXED) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 2, __ATOMIC_CONSUME) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQUIRE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 8, __ATOMIC_RELEASE) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL) !=  31) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_SEQ_CST) !=  63) 
+    abort ();
+}
+
+
+/* Test the OP routines with a result which isn't used. Use both variations
+   within each function.  */
+
+void
+test_add ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_add_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_CONSUME);
+  if (v != 2)
+    abort ();
+
+  __atomic_add_fetch (&v, 1 , __ATOMIC_ACQUIRE);
+  if (v != 3)
+    abort ();
+
+  __atomic_fetch_add (&v, 1, __ATOMIC_RELEASE);
+  if (v != 4)
+    abort ();
+
+  __atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 5)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 6)
+    abort ();
+}
+
+
+void
+test_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != --res)
+    abort ();
+
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_CONSUME);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, 1, __ATOMIC_ACQUIRE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != --res)
+    abort ();
+}
+
+void
+test_and ()
+{
+  v = init;
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_RELAXED);
+  if (v != 0)
+    abort ();
+
+  v = init;
+  __atomic_fetch_and (&v, init, __ATOMIC_CONSUME);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, init, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_nand ()
+{
+  v = init;
+
+  __atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, init, __ATOMIC_RELEASE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != init)
+    abort ();
+}
+
+
+
+void
+test_xor ()
+{
+  v = init;
+  count = 0;
+
+  __atomic_xor_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_or ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_or_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_CONSUME);
+  if (v != 3)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, 4, __ATOMIC_ACQUIRE);
+  if (v != 7)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, 8, __ATOMIC_RELEASE);
+  if (v != 15)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 31)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 63)
+    abort ();
+}
+
+main ()
+{
+  test_fetch_add ();
+  test_fetch_sub ();
+  test_fetch_and ();
+  test_fetch_nand ();
+  test_fetch_xor ();
+  test_fetch_or ();
+
+  test_add_fetch ();
+  test_sub_fetch ();
+  test_and_fetch ();
+  test_nand_fetch ();
+  test_xor_fetch ();
+  test_or_fetch ();
+
+  test_add ();
+  test_sub ();
+  test_and ();
+  test_nand ();
+  test_xor ();
+  test_or ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-op-3.c
+++ b/gcc/testsuite/gcc.dg/atomic-op-3.c
@ -0,0 +1,554 @@
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_*OP builtin routines for an int.  */
+
+extern void abort(void);
+
+int v, count, res;
+const int init = ~0;
+
+/* The fetch_op routines return the original value before the operation.  */
+
+void
+test_fetch_add ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_RELAXED) != 0)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_CONSUME) != 1) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQUIRE) != 2)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_RELEASE) != 3) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQ_REL) != 4) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_SEQ_CST) != 5) 
+    abort ();
+}
+
+
+void
+test_fetch_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_RELAXED) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_CONSUME) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQUIRE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQ_REL) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_SEQ_CST) !=  res--) 
+    abort ();
+}
+
+void
+test_fetch_and ()
+{
+  v = init;
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQUIRE) !=  0)
+    abort ();
+
+  v = ~v;
+  if (__atomic_fetch_and (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_fetch_nand ()
+{
+  v = init;
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_ACQUIRE) !=  0 ) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_xor ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_fetch_xor (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_or ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_or (&v, count, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 2, __ATOMIC_CONSUME) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQUIRE) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 8, __ATOMIC_RELEASE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQ_REL) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST) !=  31) 
+    abort ();
+}
+
+/* The OP_fetch routines return the new value after the operation.  */
+
+void
+test_add_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_RELAXED) != 1)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_CONSUME) != 2) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQUIRE) != 3)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_RELEASE) != 4) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL) != 5) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_SEQ_CST) != 6) 
+    abort ();
+}
+
+
+void
+test_sub_fetch ()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED) !=  --res) 
+    abort ();
+
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_CONSUME) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQUIRE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_RELEASE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_SEQ_CST) !=  --res) 
+    abort ();
+}
+
+void
+test_and_fetch ()
+{
+  v = init;
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  v = init;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_nand_fetch ()
+{
+  v = init;
+
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_RELEASE) !=  0)
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+
+
+void
+test_xor_fetch ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_xor_fetch (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_RELEASE) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_or_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_or_fetch (&v, count, __ATOMIC_RELAXED) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 2, __ATOMIC_CONSUME) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQUIRE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 8, __ATOMIC_RELEASE) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL) !=  31) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_SEQ_CST) !=  63) 
+    abort ();
+}
+
+
+/* Test the OP routines with a result which isn't used. Use both variations
+   within each function.  */
+
+void
+test_add ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_add_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_CONSUME);
+  if (v != 2)
+    abort ();
+
+  __atomic_add_fetch (&v, 1 , __ATOMIC_ACQUIRE);
+  if (v != 3)
+    abort ();
+
+  __atomic_fetch_add (&v, 1, __ATOMIC_RELEASE);
+  if (v != 4)
+    abort ();
+
+  __atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 5)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 6)
+    abort ();
+}
+
+
+void
+test_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != --res)
+    abort ();
+
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_CONSUME);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, 1, __ATOMIC_ACQUIRE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != --res)
+    abort ();
+}
+
+void
+test_and ()
+{
+  v = init;
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_RELAXED);
+  if (v != 0)
+    abort ();
+
+  v = init;
+  __atomic_fetch_and (&v, init, __ATOMIC_CONSUME);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, init, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_nand ()
+{
+  v = init;
+
+  __atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, init, __ATOMIC_RELEASE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != init)
+    abort ();
+}
+
+
+
+void
+test_xor ()
+{
+  v = init;
+  count = 0;
+
+  __atomic_xor_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_or ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_or_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_CONSUME);
+  if (v != 3)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, 4, __ATOMIC_ACQUIRE);
+  if (v != 7)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, 8, __ATOMIC_RELEASE);
+  if (v != 15)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 31)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 63)
+    abort ();
+}
+
+main ()
+{
+  test_fetch_add ();
+  test_fetch_sub ();
+  test_fetch_and ();
+  test_fetch_nand ();
+  test_fetch_xor ();
+  test_fetch_or ();
+
+  test_add_fetch ();
+  test_sub_fetch ();
+  test_and_fetch ();
+  test_nand_fetch ();
+  test_xor_fetch ();
+  test_or_fetch ();
+
+  test_add ();
+  test_sub ();
+  test_and ();
+  test_nand ();
+  test_xor ();
+  test_or ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-op-4.c
+++ b/gcc/testsuite/gcc.dg/atomic-op-4.c
@ -0,0 +1,555 @@
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of the __atomic_*OP builtin routines for long long.  */
+
+extern void abort(void);
+
+long long v, count, res;
+const long long init = ~0;
+
+/* The fetch_op routines return the original value before the operation.  */
+
+void
+test_fetch_add ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_RELAXED) != 0)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_CONSUME) != 1) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQUIRE) != 2)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_RELEASE) != 3) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQ_REL) != 4) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_SEQ_CST) != 5) 
+    abort ();
+}
+
+
+void
+test_fetch_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_RELAXED) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_CONSUME) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQUIRE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQ_REL) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_SEQ_CST) !=  res--) 
+    abort ();
+}
+
+void
+test_fetch_and ()
+{
+  v = init;
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQUIRE) !=  0)
+    abort ();
+
+  v = ~v;
+  if (__atomic_fetch_and (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_fetch_nand ()
+{
+  v = init;
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_ACQUIRE) !=  0 ) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_xor ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_fetch_xor (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_or ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_or (&v, count, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 2, __ATOMIC_CONSUME) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQUIRE) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 8, __ATOMIC_RELEASE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQ_REL) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST) !=  31) 
+    abort ();
+}
+
+/* The OP_fetch routines return the new value after the operation.  */
+
+void
+test_add_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_RELAXED) != 1)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_CONSUME) != 2) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQUIRE) != 3)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_RELEASE) != 4) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL) != 5) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_SEQ_CST) != 6) 
+    abort ();
+}
+
+
+void
+test_sub_fetch ()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED) !=  --res) 
+    abort ();
+
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_CONSUME) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQUIRE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_RELEASE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_SEQ_CST) !=  --res) 
+    abort ();
+}
+
+void
+test_and_fetch ()
+{
+  v = init;
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  v = init;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_nand_fetch ()
+{
+  v = init;
+
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_RELEASE) !=  0)
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+
+
+void
+test_xor_fetch ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_xor_fetch (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_RELEASE) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_or_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_or_fetch (&v, count, __ATOMIC_RELAXED) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 2, __ATOMIC_CONSUME) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQUIRE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 8, __ATOMIC_RELEASE) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL) !=  31) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_SEQ_CST) !=  63) 
+    abort ();
+}
+
+
+/* Test the OP routines with a result which isn't used. Use both variations
+   within each function.  */
+
+void
+test_add ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_add_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_CONSUME);
+  if (v != 2)
+    abort ();
+
+  __atomic_add_fetch (&v, 1 , __ATOMIC_ACQUIRE);
+  if (v != 3)
+    abort ();
+
+  __atomic_fetch_add (&v, 1, __ATOMIC_RELEASE);
+  if (v != 4)
+    abort ();
+
+  __atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 5)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 6)
+    abort ();
+}
+
+
+void
+test_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != --res)
+    abort ();
+
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_CONSUME);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, 1, __ATOMIC_ACQUIRE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != --res)
+    abort ();
+}
+
+void
+test_and ()
+{
+  v = init;
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_RELAXED);
+  if (v != 0)
+    abort ();
+
+  v = init;
+  __atomic_fetch_and (&v, init, __ATOMIC_CONSUME);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, init, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_nand ()
+{
+  v = init;
+
+  __atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, init, __ATOMIC_RELEASE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != init)
+    abort ();
+}
+
+
+
+void
+test_xor ()
+{
+  v = init;
+  count = 0;
+
+  __atomic_xor_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_or ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_or_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_CONSUME);
+  if (v != 3)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, 4, __ATOMIC_ACQUIRE);
+  if (v != 7)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, 8, __ATOMIC_RELEASE);
+  if (v != 15)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 31)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 63)
+    abort ();
+}
+
+main ()
+{
+  test_fetch_add ();
+  test_fetch_sub ();
+  test_fetch_and ();
+  test_fetch_nand ();
+  test_fetch_xor ();
+  test_fetch_or ();
+
+  test_add_fetch ();
+  test_sub_fetch ();
+  test_and_fetch ();
+  test_nand_fetch ();
+  test_xor_fetch ();
+  test_or_fetch ();
+
+  test_add ();
+  test_sub ();
+  test_and ();
+  test_nand ();
+  test_xor ();
+  test_or ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-op-5.c
+++ b/gcc/testsuite/gcc.dg/atomic-op-5.c
@ -0,0 +1,555 @@
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of the __atomic_*OP builtin routines for an int_128.  */
+
+extern void abort(void);
+
+__int128_t v, count, res;
+const __int128_t init = ~0;
+
+/* The fetch_op routines return the original value before the operation.  */
+
+void
+test_fetch_add ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_RELAXED) != 0)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_CONSUME) != 1) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQUIRE) != 2)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_RELEASE) != 3) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQ_REL) != 4) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_SEQ_CST) != 5) 
+    abort ();
+}
+
+
+void
+test_fetch_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_RELAXED) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_CONSUME) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQUIRE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQ_REL) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_SEQ_CST) !=  res--) 
+    abort ();
+}
+
+void
+test_fetch_and ()
+{
+  v = init;
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQUIRE) !=  0)
+    abort ();
+
+  v = ~v;
+  if (__atomic_fetch_and (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_fetch_nand ()
+{
+  v = init;
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_ACQUIRE) !=  0 ) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_xor ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_fetch_xor (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_or ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_or (&v, count, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 2, __ATOMIC_CONSUME) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQUIRE) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 8, __ATOMIC_RELEASE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQ_REL) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST) !=  31) 
+    abort ();
+}
+
+/* The OP_fetch routines return the new value after the operation.  */
+
+void
+test_add_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_RELAXED) != 1)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_CONSUME) != 2) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQUIRE) != 3)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_RELEASE) != 4) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL) != 5) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_SEQ_CST) != 6) 
+    abort ();
+}
+
+
+void
+test_sub_fetch ()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED) !=  --res) 
+    abort ();
+
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_CONSUME) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQUIRE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_RELEASE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_SEQ_CST) !=  --res) 
+    abort ();
+}
+
+void
+test_and_fetch ()
+{
+  v = init;
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  v = init;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_nand_fetch ()
+{
+  v = init;
+
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_RELEASE) !=  0)
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+
+
+void
+test_xor_fetch ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_xor_fetch (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_RELEASE) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_or_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_or_fetch (&v, count, __ATOMIC_RELAXED) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 2, __ATOMIC_CONSUME) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQUIRE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 8, __ATOMIC_RELEASE) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL) !=  31) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_SEQ_CST) !=  63) 
+    abort ();
+}
+
+
+/* Test the OP routines with a result which isn't used. Use both variations
+   within each function.  */
+
+void
+test_add ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_add_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_CONSUME);
+  if (v != 2)
+    abort ();
+
+  __atomic_add_fetch (&v, 1 , __ATOMIC_ACQUIRE);
+  if (v != 3)
+    abort ();
+
+  __atomic_fetch_add (&v, 1, __ATOMIC_RELEASE);
+  if (v != 4)
+    abort ();
+
+  __atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 5)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 6)
+    abort ();
+}
+
+
+void
+test_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != --res)
+    abort ();
+
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_CONSUME);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, 1, __ATOMIC_ACQUIRE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != --res)
+    abort ();
+}
+
+void
+test_and ()
+{
+  v = init;
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_RELAXED);
+  if (v != 0)
+    abort ();
+
+  v = init;
+  __atomic_fetch_and (&v, init, __ATOMIC_CONSUME);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, init, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_nand ()
+{
+  v = init;
+
+  __atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, init, __ATOMIC_RELEASE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != init)
+    abort ();
+}
+
+
+
+void
+test_xor ()
+{
+  v = init;
+  count = 0;
+
+  __atomic_xor_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_or ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_or_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_CONSUME);
+  if (v != 3)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, 4, __ATOMIC_ACQUIRE);
+  if (v != 7)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, 8, __ATOMIC_RELEASE);
+  if (v != 15)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 31)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 63)
+    abort ();
+}
+
+main ()
+{
+  test_fetch_add ();
+  test_fetch_sub ();
+  test_fetch_and ();
+  test_fetch_nand ();
+  test_fetch_xor ();
+  test_fetch_or ();
+
+  test_add_fetch ();
+  test_sub_fetch ();
+  test_and_fetch ();
+  test_nand_fetch ();
+  test_xor_fetch ();
+  test_or_fetch ();
+
+  test_add ();
+  test_sub ();
+  test_and ();
+  test_nand ();
+  test_xor ();
+  test_or ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-param.c
+++ b/gcc/testsuite/gcc.dg/atomic-param.c
@ -0,0 +1,13 @@
+/* Test __atomic routines for invalid memory model errors. This only needs
+   to be tested on a single size.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target sync_int_long } */
+
+int i;
+
+main ()
+{
+
+  __atomic_exchange_n (&i, 1); /* { dg-error "too few arguments" } */
+  __atomic_exchange_n (&i, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); /* { dg-error "too many arguments" } */
+}
--- a/gcc/testsuite/gcc.dg/atomic-store-1.c
+++ b/gcc/testsuite/gcc.dg/atomic-store-1.c
@ -0,0 +1,47 @@
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_store_n builtin for a char.  */
+
+extern void abort(void);
+
+char v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-store-2.c
+++ b/gcc/testsuite/gcc.dg/atomic-store-2.c
@ -0,0 +1,46 @@
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_store_n builtin for a short.  */
+
+extern void abort(void);
+
+short v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-store-3.c
+++ b/gcc/testsuite/gcc.dg/atomic-store-3.c
@ -0,0 +1,47 @@
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_store_n builtin for an int.  */
+
+extern void abort(void);
+
+int v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-store-4.c
+++ b/gcc/testsuite/gcc.dg/atomic-store-4.c
@ -0,0 +1,48 @@
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of the __atomic_store_n builtin for a long long.  */
+
+extern void abort(void);
+
+long long v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-store-5.c
+++ b/gcc/testsuite/gcc.dg/atomic-store-5.c
@ -0,0 +1,48 @@
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of the __atomic_store_n builtin for a 16 byte value.  */
+
+extern void abort(void);
+
+__int128_t v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/gomp/atomic-11.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-11.c
@ -1,17 +0,0 @@
-/* PR middle-end/36877 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp" } */
-/* { dg-options "-fopenmp -march=i386" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
-
-int i;
-float f;
-
-void foo (void)
-{
-#pragma omp atomic
-  i++;
-#pragma omp atomic
-  f += 1.0;
-}
-
-/* { dg-final { scan-assembler-not "__sync_(fetch|add|bool|val)" { target i?86-*-* x86_64-*-* powerpc*-*-* ia64-*-* s390*-*-* sparc*-*-* } } } */
--- a/gcc/testsuite/gcc.dg/gomp/gomp.exp
+++ b/gcc/testsuite/gcc.dg/gomp/gomp.exp
@ -29,8 +29,7 @@ if ![check_effective_target_fopenmp] {
 dg-init

 # Main loop.
-dg-runtest [lsort [find $srcdir/$subdir *.c]] \
-	"" "-fopenmp"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c $srcdir/c-c++-common/gomp/*.c]] "" "-fopenmp"

 # All done.
 dg-finish
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int.c
@ -0,0 +1,116 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_long } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+unsigned int ret;
+unsigned int value = 0;
+unsigned int result = 0;
+unsigned int table[16] = {
+0x00000000, 
+0x11111111, 
+0x22222222, 
+0x33333333,
+0x44444444,
+0x55555555,
+0x66666666,
+0x77777777,
+0x88888888,
+0x99999999,
+0xAAAAAAAA,
+0xBBBBBBBB,
+0xCCCCCCCC,
+0xDDDDDDDD,
+0xEEEEEEEE,
+0xFFFFFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16 ; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int128.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int128.c
@ -0,0 +1,132 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* i?86-*-* } } } */
+/* { dg-final { simulate-thread } } */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+__int128_t ret;
+__int128_t value = 0;
+__int128_t result = 0;
+__int128_t table[16] = {
+0x0000000000000000, 
+0x1111111111111111, 
+0x2222222222222222, 
+0x3333333333333333,
+0x4444444444444444,
+0x5555555555555555,
+0x6666666666666666,
+0x7777777777777777,
+0x8888888888888888,
+0x9999999999999999,
+0xAAAAAAAAAAAAAAAA,
+0xBBBBBBBBBBBBBBBB,
+0xCCCCCCCCCCCCCCCC,
+0xDDDDDDDDDDDDDDDD,
+0xEEEEEEEEEEEEEEEE,
+0xFFFFFFFFFFFFFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Since we don't have 128 bit constants, we have to properly pad the table.  */
+void fill_table()
+{
+  int x;
+  for (x = 0; x < 16; x++)
+    {
+      ret = table[x];
+      ret = (ret << 64) | ret;
+      table[x] = ret;
+    }
+}
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Make sure value starts with an atomic value now.  */
+  __atomic_store_n (&value, ret, __ATOMIC_SEQ_CST);
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  fill_table ();
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-longlong.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-longlong.c
@ -0,0 +1,117 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+unsigned long long ret;
+unsigned long long value = 0;
+unsigned long long result = 0;
+unsigned long long table[16] = {
+0x0000000000000000, 
+0x1111111111111111, 
+0x2222222222222222, 
+0x3333333333333333,
+0x4444444444444444,
+0x5555555555555555,
+0x6666666666666666,
+0x7777777777777777,
+0x8888888888888888,
+0x9999999999999999,
+0xAAAAAAAAAAAAAAAA,
+0xBBBBBBBBBBBBBBBB,
+0xCCCCCCCCCCCCCCCC,
+0xDDDDDDDDDDDDDDDD,
+0xEEEEEEEEEEEEEEEE,
+0xFFFFFFFFFFFFFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16 ; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-short.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-short.c
@ -0,0 +1,116 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_char_short } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+unsigned short ret;
+unsigned short value = 0;
+unsigned short result = 0;
+unsigned short table[16] = {
+0x0000, 
+0x1111, 
+0x2222, 
+0x3333,
+0x4444,
+0x5555,
+0x6666,
+0x7777,
+0x8888,
+0x9999,
+0xAAAA,
+0xBBBB,
+0xCCCC,
+0xDDDD,
+0xEEEE,
+0xFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16 ; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int.c
@ -0,0 +1,118 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_long } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* Test all the __sync routines for proper atomicity on 4 byte values.  */
+
+unsigned int zero = 0;
+unsigned int max = ~0;
+
+unsigned int changing_value = 0;
+unsigned int value = 0;
+unsigned int ret;
+
+void test_abort()
+{
+  static int reported = 0;
+  if (!reported)
+    {
+      printf ("FAIL: improper execution of __sync builtin.\n");
+      reported = 1;
+    }
+}
+
+void simulate_thread_other_threads ()
+{
+}
+
+int simulate_thread_step_verify ()
+{
+  if (value != zero && value != max)
+    {
+      printf ("FAIL: invalid intermediate result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (value != 0)
+    {
+      printf ("FAIL: invalid final result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* All values written to 'value' alternate between 'zero' and
+   'max'. Any other value detected by simulate_thread_step_verify()
+   between instructions would indicate that the value was only
+   partially written, and would thus fail this atomicity test.
+
+   This function tests each different __atomic routine once, with
+   the exception of the load instruction which requires special
+   testing.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  
+  ret = __atomic_exchange_n (&value, max, __ATOMIC_SEQ_CST);
+  if (ret != zero || value != max)
+    test_abort();
+
+  __atomic_store_n (&value, zero, __ATOMIC_SEQ_CST);
+  if (value != zero)
+    test_abort();
+
+  ret = __atomic_fetch_add (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_sub (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_or (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_and (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_xor (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_add_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_sub_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+
+  ret = __atomic_or_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_and_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_xor_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+}
+
+main ()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int128.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int128.c
@ -0,0 +1,116 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* i?86-*-*] } } } */
+/* { dg-final { simulate-thread } } */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* Test all the __sync routines for proper atomicity on 16 byte values.  */
+
+__int128_t zero = 0;
+__int128_t max = ~0;
+__int128_t changing_value = 0;
+__int128_t value = 0;
+__int128_t ret;
+
+void test_abort()
+{
+  static int reported = 0;
+  if (!reported)
+    {
+      printf ("FAIL: improper execution of __sync builtin.\n");
+      reported = 1;
+    }
+}
+
+void simulate_thread_other_threads ()
+{
+}
+
+int simulate_thread_step_verify ()
+{
+  if (value != zero && value != max)
+    {
+      printf ("FAIL: invalid intermediate result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (value != 0)
+    {
+      printf ("FAIL: invalid final result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* All values written to 'value' alternate between 'zero' and 'max'. Any other
+   value detected by simulate_thread_step_verify() between instructions would indicate
+   that the value was only partially written, and would thus fail this 
+   atomicity test.  
+
+   This function tests each different __atomic routine once, with the
+   exception of the load instruction which requires special testing.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  
+  ret = __atomic_exchange_n (&value, max, __ATOMIC_SEQ_CST);
+  if (ret != zero || value != max)
+    test_abort();
+
+  __atomic_store_n (&value, zero, __ATOMIC_SEQ_CST);
+  if (value != zero)
+    test_abort();
+
+  ret = __atomic_fetch_add (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_sub (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_or (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_and (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_xor (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_add_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_sub_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+
+  ret = __atomic_or_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_and_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_xor_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+}
+
+int main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-longlong.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-longlong.c
@ -0,0 +1,117 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* Test all the __sync routines for proper atomicity on 8 byte values.  */
+
+unsigned long long zero = 0;
+unsigned long long max = ~0;
+
+unsigned long long changing_value = 0;
+unsigned long long value = 0;
+unsigned long long ret;
+
+void test_abort()
+{
+  static int reported = 0;
+  if (!reported)
+    {
+      printf ("FAIL: improper execution of __sync builtin.\n");
+      reported = 1;
+    }
+}
+
+void simulate_thread_other_threads ()
+{
+}
+
+int simulate_thread_step_verify ()
+{
+  if (value != zero && value != max)
+    {
+      printf ("FAIL: invalid intermediate result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (value != 0)
+    {
+      printf ("FAIL: invalid final result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* All values written to 'value' alternate between 'zero' and 'max'. Any other
+   value detected by simulate_thread_step_verify() between instructions would indicate
+   that the value was only partially written, and would thus fail this 
+   atomicity test.  
+
+   This function tests each different __atomic routine once, with the
+   exception of the load instruction which requires special testing.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  ret = __atomic_exchange_n (&value, max, __ATOMIC_SEQ_CST);
+  if (ret != zero || value != max)
+    test_abort();
+
+  __atomic_store_n (&value, zero, __ATOMIC_SEQ_CST);
+  if (value != zero)
+    test_abort();
+
+  ret = __atomic_fetch_add (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_sub (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_or (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_and (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_xor (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_add_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_sub_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+
+  ret = __atomic_or_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_and_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_xor_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+}
+
+int main ()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/Show More
+++ b/Show More