From e93ca5cadeb71a04b2f8ef2ebcbadb2f0213d878 Mon Sep 17 00:00:00 2001 From: Ramana Radhakrishnan Date: Thu, 4 Jun 2015 09:19:51 +0000 Subject: [PATCH] Remove TARGET_RELAXED_ORDERING and optimize for weak memory models. This patch removes the special casing for targets with relaxed memory ordering and handles guard accesses with equivalent atomic load acquire operations. In this process we change the algorithm to load the guard variable with an atomic load that has ACQUIRE semantics. This then means that on targets which have weak memory models, the fast path is inlined and can directly use a load-acquire instruction where available (and yay! one more hook gone). 2015-06-04 Ramana Radhakrishnan PR c++/66192 PR target/66200 * doc/tm.texi: Regenerate. * doc/tm.texi.in (TARGET_RELAXED_ORDERING): Delete. * target.def (TARGET_RELAXED_ORDERING): Likewise. * config/alpha/alpha.c (TARGET_RELAXED_ORDERING): Likewise. * config/ia64/ia64.c (TARGET_RELAXED_ORDERING): Likewise. * config/rs6000/rs6000.c (TARGET_RELAXED_ORDERING): Likewise. * config/sparc/linux.h (SPARC_RELAXED_ORDERING): Likewise. * config/sparc/linux64.h (SPARC_RELAXED_ORDERING): Likewise. * config/sparc/sparc.c (TARGET_RELAXED_ORDERING): Likewise. * config/sparc/sparc.h (SPARC_RELAXED_ORDERING): Likewise. * system.h (TARGET_RELAXED_ORDERING): Poison. 2015-06-04 Ramana Radhakrishnan PR c++/66192 PR target/66200 * cp-tree.h (get_guard_cond): Adjust declaration * decl.c (expand_static_init): Use atomic load acquire and adjust call to get_guard_cond. * decl2.c (build_atomic_load_byte): New function. (get_guard_cond): Handle thread_safety. (one_static_initialization_or_destruction): Adjust call to get_guard_cond. From-SVN: r224118 --- gcc/ChangeLog | 15 +++++++++++++++ gcc/config/alpha/alpha.c | 6 ------ gcc/config/ia64/ia64.c | 5 ----- gcc/config/rs6000/rs6000.c | 11 ----------- gcc/config/sparc/linux.h | 6 ------ gcc/config/sparc/linux64.h | 6 ------ gcc/config/sparc/sparc.c | 3 --- gcc/config/sparc/sparc.h | 11 ----------- gcc/cp/ChangeLog | 12 ++++++++++++ gcc/cp/cp-tree.h | 2 +- gcc/cp/decl.c | 26 +++++++++----------------- gcc/cp/decl2.c | 31 +++++++++++++++++++++++++++---- gcc/doc/tm.texi | 10 ---------- gcc/doc/tm.texi.in | 2 -- gcc/system.h | 2 +- gcc/target.def | 13 ------------- 16 files changed, 65 insertions(+), 96 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3cf20643acb..53d75b1c69f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2015-06-04 Ramana Radhakrishnan + + PR c++/66192 + PR target/66200 + * doc/tm.texi: Regenerate. + * doc/tm.texi.in (TARGET_RELAXED_ORDERING): Delete. + * target.def (TARGET_RELAXED_ORDERING): Likewise. + * config/alpha/alpha.c (TARGET_RELAXED_ORDERING): Likewise. + * config/ia64/ia64.c (TARGET_RELAXED_ORDERING): Likewise. + * config/rs6000/rs6000.c (TARGET_RELAXED_ORDERING): Likewise. + * config/sparc/linux.h (SPARC_RELAXED_ORDERING): Likewise. + * config/sparc/linux64.h (SPARC_RELAXED_ORDERING): Likewise. + * config/sparc/sparc.c (TARGET_RELAXED_ORDERING): Likewise. + * config/sparc/sparc.h (SPARC_RELAXED_ORDERING): Likewise. + 2015-06-04 Kyrylo Tkachov * config/aarch64/aarch64.c (aarch64_override_options): Unconditionally diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index 1ba99d04248..857c9ac77eb 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -9987,12 +9987,6 @@ alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #undef TARGET_EXPAND_BUILTIN_VA_START #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start -/* The Alpha architecture does not require sequential consistency. See - http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html - for an example of how it can be violated in practice. */ -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING true - #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE alpha_option_override diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index c1e2ecdf0d8..45ad97a10a4 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -630,11 +630,6 @@ static const struct attribute_spec ia64_attribute_table[] = #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ ia64_libgcc_floating_mode_supported_p -/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur - in an order different from the specified program order. */ -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING true - #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p #undef TARGET_LEGITIMATE_ADDRESS_P diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2927e5ae740..bed53489a61 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1620,17 +1620,6 @@ static const struct attribute_spec rs6000_attribute_table[] = #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail #endif -/* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors - The PowerPC architecture requires only weak consistency among - processors--that is, memory accesses between processors need not be - sequentially consistent and memory accesses among processors can occur - in any order. The ability to order memory accesses weakly provides - opportunities for more efficient use of the system bus. Unless a - dependency exists, the 604e allows read operations to precede store - operations. */ -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING true - #ifdef HAVE_AS_TLS #undef TARGET_ASM_OUTPUT_DWARF_DTPREL #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel diff --git a/gcc/config/sparc/linux.h b/gcc/config/sparc/linux.h index 17e1e862b68..29763c4bae7 100644 --- a/gcc/config/sparc/linux.h +++ b/gcc/config/sparc/linux.h @@ -139,12 +139,6 @@ do { \ /* Static stack checking is supported by means of probes. */ #define STACK_CHECK_STATIC_BUILTIN 1 -/* Linux currently uses RMO in uniprocessor mode, which is equivalent to - TMO, and TMO in multiprocessor mode. But they reserve the right to - change their minds. */ -#undef SPARC_RELAXED_ORDERING -#define SPARC_RELAXED_ORDERING true - #undef NEED_INDICATE_EXEC_STACK #define NEED_INDICATE_EXEC_STACK 1 diff --git a/gcc/config/sparc/linux64.h b/gcc/config/sparc/linux64.h index 43da8480f24..efa33fbf77b 100644 --- a/gcc/config/sparc/linux64.h +++ b/gcc/config/sparc/linux64.h @@ -253,12 +253,6 @@ do { \ /* Static stack checking is supported by means of probes. */ #define STACK_CHECK_STATIC_BUILTIN 1 -/* Linux currently uses RMO in uniprocessor mode, which is equivalent to - TMO, and TMO in multiprocessor mode. But they reserve the right to - change their minds. */ -#undef SPARC_RELAXED_ORDERING -#define SPARC_RELAXED_ORDERING true - #undef NEED_INDICATE_EXEC_STACK #define NEED_INDICATE_EXEC_STACK 1 diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index a1562ad8630..094287f5c11 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -808,9 +808,6 @@ char sparc_hard_reg_printed[8]; #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table #endif -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING - #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE sparc_option_override diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 106d993ee35..72dd18bfdfc 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -106,17 +106,6 @@ extern enum cmodel sparc_cmodel; #define SPARC_DEFAULT_CMODEL CM_32 -/* The SPARC-V9 architecture defines a relaxed memory ordering model (RMO) - which requires the following macro to be true if enabled. Prior to V9, - there are no instructions to even talk about memory synchronization. - Note that the UltraSPARC III processors don't implement RMO, unlike the - UltraSPARC II processors. Niagara, Niagara-2, and Niagara-3 do not - implement RMO either. - - Default to false; for example, Solaris never enables RMO, only ever uses - total memory ordering (TMO). */ -#define SPARC_RELAXED_ORDERING false - /* Do not use the .note.GNU-stack convention by default. */ #define NEED_INDICATE_EXEC_STACK 0 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index a9be43a8ffc..83071db733e 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,15 @@ +2015-06-04 Ramana Radhakrishnan + + PR c++/66192 + PR target/66200 + * cp-tree.h (get_guard_cond): Adjust declaration + * decl.c (expand_static_init): Use atomic load acquire + and adjust call to get_guard_cond. + * decl2.c (build_atomic_load_byte): New function. + (get_guard_cond): Handle thread_safety. + (one_static_initialization_or_destruction): Adjust call to + get_guard_cond. + 2015-06-03 Jason Merrill PR c++/44282 diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 7690af77d8d..efaf1bd879b 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -5491,7 +5491,7 @@ extern bool mark_used (tree, tsubst_flags_t); extern void finish_static_data_member_decl (tree, tree, bool, tree, int); extern tree cp_build_parm_decl (tree, tree); extern tree get_guard (tree); -extern tree get_guard_cond (tree); +extern tree get_guard_cond (tree, bool); extern tree set_guard (tree); extern tree get_tls_wrapper_fn (tree); extern void mark_needed (tree); diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 9d20b9487d2..2b57c47904b 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -7227,7 +7227,7 @@ expand_static_init (tree decl, tree init) looks like: static guard; - if (!guard.first_byte) { + if (!__atomic_load (guard.first_byte)) { if (__cxa_guard_acquire (&guard)) { bool flag = false; try { @@ -7257,16 +7257,11 @@ expand_static_init (tree decl, tree init) /* Create the guard variable. */ guard = get_guard (decl); - /* This optimization isn't safe on targets with relaxed memory - consistency. On such targets we force synchronization in - __cxa_guard_acquire. */ - if (!targetm.relaxed_ordering || !thread_guard) - { - /* Begin the conditional initialization. */ - if_stmt = begin_if_stmt (); - finish_if_stmt_cond (get_guard_cond (guard), if_stmt); - then_clause = begin_compound_stmt (BCS_NO_SCOPE); - } + /* Begin the conditional initialization. */ + if_stmt = begin_if_stmt (); + + finish_if_stmt_cond (get_guard_cond (guard, thread_guard), if_stmt); + then_clause = begin_compound_stmt (BCS_NO_SCOPE); if (thread_guard) { @@ -7335,12 +7330,9 @@ expand_static_init (tree decl, tree init) finish_if_stmt (inner_if_stmt); } - if (!targetm.relaxed_ordering || !thread_guard) - { - finish_compound_stmt (then_clause); - finish_then_clause (if_stmt); - finish_if_stmt (if_stmt); - } + finish_compound_stmt (then_clause); + finish_then_clause (if_stmt); + finish_if_stmt (if_stmt); } else if (DECL_THREAD_LOCAL_P (decl)) tls_aggregates = tree_cons (init, decl, tls_aggregates); diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c index 8ba19cfda19..e733e3498d5 100644 --- a/gcc/cp/decl2.c +++ b/gcc/cp/decl2.c @@ -3034,6 +3034,27 @@ get_guard (tree decl) return guard; } +/* Return an atomic load of src with the appropriate memory model. */ + +static tree +build_atomic_load_byte (tree src, HOST_WIDE_INT model) +{ + tree ptr_type = build_pointer_type (char_type_node); + tree mem_model = build_int_cst (integer_type_node, model); + tree t, addr, val; + unsigned int size; + int fncode; + + size = tree_to_uhwi (TYPE_SIZE_UNIT (char_type_node)); + + fncode = BUILT_IN_ATOMIC_LOAD_N + exact_log2 (size) + 1; + t = builtin_decl_implicit ((enum built_in_function) fncode); + + addr = build1 (ADDR_EXPR, ptr_type, src); + val = build_call_expr (t, 2, addr, mem_model); + return val; +} + /* Return those bits of the GUARD variable that should be set when the guarded entity is actually initialized. */ @@ -3060,12 +3081,14 @@ get_guard_bits (tree guard) variable has already been initialized. */ tree -get_guard_cond (tree guard) +get_guard_cond (tree guard, bool thread_safe) { tree guard_value; - /* Check to see if the GUARD is zero. */ - guard = get_guard_bits (guard); + if (!thread_safe) + guard = get_guard_bits (guard); + else + guard = build_atomic_load_byte (guard, MEMMODEL_ACQUIRE); /* Mask off all but the low bit. */ if (targetm.cxx.guard_mask_bit ()) @@ -3681,7 +3704,7 @@ one_static_initialization_or_destruction (tree decl, tree init, bool initp) /* When using __cxa_atexit, we never try to destroy anything from a static destructor. */ gcc_assert (initp); - guard_cond = get_guard_cond (guard); + guard_cond = get_guard_cond (guard, false); } /* If we don't have __cxa_atexit, then we will be running destructors from .fini sections, or their equivalents. So, diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index f2f34978db4..a16cd92260b 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -11395,16 +11395,6 @@ routine for target specific customizations of the system printf and scanf formatter settings. @end defmac -@deftypevr {Target Hook} bool TARGET_RELAXED_ORDERING -If set to @code{true}, means that the target's memory model does not -guarantee that loads which do not depend on one another will access -main memory in the order of the instruction stream; if ordering is -important, an explicit memory barrier must be used. This is true of -many recent processors which implement a policy of ``relaxed,'' -``weak,'' or ``release'' memory consistency, such as Alpha, PowerPC, -and ia64. The default is @code{false}. -@end deftypevr - @deftypefn {Target Hook} {const char *} TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN (const_tree @var{typelist}, const_tree @var{funcdecl}, const_tree @var{val}) If defined, this macro returns the diagnostic message when it is illegal to pass argument @var{val} to function @var{funcdecl} diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 35b02b7632c..93fb41c6879 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -8143,8 +8143,6 @@ routine for target specific customizations of the system printf and scanf formatter settings. @end defmac -@hook TARGET_RELAXED_ORDERING - @hook TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN @hook TARGET_INVALID_CONVERSION diff --git a/gcc/system.h b/gcc/system.h index 448a7fed78b..9ca5b5fadd3 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -964,7 +964,7 @@ extern void fancy_abort (const char *, int, const char *) ATTRIBUTE_NORETURN; TARGET_HANDLE_PRAGMA_EXTERN_PREFIX \ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN \ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \ - TARGET_MD_ASM_CLOBBERS + TARGET_MD_ASM_CLOBBERS TARGET_RELAXED_ORDERING /* Arrays that were deleted in favor of a functional interface. */ #pragma GCC poison built_in_decls implicit_built_in_decls diff --git a/gcc/target.def b/gcc/target.def index f2cb81dff05..b606b813360 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -5785,19 +5785,6 @@ for the primary source file, immediately after printing\n\ this to be done. The default is false.", bool, false) -/* True if the target is allowed to reorder memory accesses unless - synchronization is explicitly requested. */ -DEFHOOKPOD -(relaxed_ordering, - "If set to @code{true}, means that the target's memory model does not\n\ -guarantee that loads which do not depend on one another will access\n\ -main memory in the order of the instruction stream; if ordering is\n\ -important, an explicit memory barrier must be used. This is true of\n\ -many recent processors which implement a policy of ``relaxed,''\n\ -``weak,'' or ``release'' memory consistency, such as Alpha, PowerPC,\n\ -and ia64. The default is @code{false}.", - bool, false) - /* Returns true if we should generate exception tables for use with the ARM EABI. The effects the encoding of function exception specifications. */ DEFHOOKPOD