sw-1.c: Force rep;movsb.

* gcc.target/i386/sw-1.c: Force rep;movsb.

        * config/i386/i386.h (processor_costs): Add second dimension to
        stringop_algs array.
        * config/i386/i386.c (cost models): Initialize second dimension of
        stringop_algs arrays.
	(core_cost): New costs based on generic64 costs with updated stringop
	values.
        (promote_duplicated_reg): Add support for vector modes, add
        declaration.
        (promote_duplicated_reg_to_size): Likewise.
	(processor_target): Set core costs for core variants.
        (expand_set_or_movmem_via_loop_with_iter): New function.
        (expand_set_or_movmem_via_loop): Enable reuse of the same iters in
        different loops, produced by this function.
        (emit_strset): New function.
        (expand_movmem_epilogue): Add epilogue generation for bigger sizes,
        use SSE-moves where possible.
        (expand_setmem_epilogue): Likewise.
        (expand_movmem_prologue): Likewise for prologue.
        (expand_setmem_prologue): Likewise.
        (expand_constant_movmem_prologue): Likewise.
        (expand_constant_setmem_prologue): Likewise.
        (decide_alg): Add new argument align_unknown.  Fix algorithm of
        strategy selection if TARGET_INLINE_ALL_STRINGOPS is set; Skip sse_loop
        (decide_alignment): Update desired alignment according to chosen move
        mode.
        (ix86_expand_movmem): Change unrolled_loop strategy to use SSE-moves.
        (ix86_expand_setmem): Likewise.
        (ix86_slow_unaligned_access): Implementation of new hook
        slow_unaligned_access.
        * config/i386/i386.md (strset): Enable half-SSE moves.
        * config/i386/sse.md (vec_dupv4si): Add expand for vec_dupv4si.
        (vec_dupv2di): Add expand for vec_dupv2di.

Co-Authored-By: Jan Hubicka <jh@suse.cz>

From-SVN: r181357
This commit is contained in:
Zolotukhin Michael 2011-11-14 17:28:11 +00:00 committed by Jan Hubicka
parent f8acdd3c52
commit 38877e9851
12 changed files with 898 additions and 247 deletions

View File

@ -1,3 +1,39 @@
2011-11-14 Zolotukhin Michael <michael.v.zolotukhin@gmail.com>
Jan Hubicka <jh@suse.cz>
* config/i386/i386.h (processor_costs): Add second dimension to
stringop_algs array.
* config/i386/i386.c (cost models): Initialize second dimension of
stringop_algs arrays.
(core_cost): New costs based on generic64 costs with updated stringop
values.
(promote_duplicated_reg): Add support for vector modes, add
declaration.
(promote_duplicated_reg_to_size): Likewise.
(processor_target): Set core costs for core variants.
(expand_set_or_movmem_via_loop_with_iter): New function.
(expand_set_or_movmem_via_loop): Enable reuse of the same iters in
different loops, produced by this function.
(emit_strset): New function.
(expand_movmem_epilogue): Add epilogue generation for bigger sizes,
use SSE-moves where possible.
(expand_setmem_epilogue): Likewise.
(expand_movmem_prologue): Likewise for prologue.
(expand_setmem_prologue): Likewise.
(expand_constant_movmem_prologue): Likewise.
(expand_constant_setmem_prologue): Likewise.
(decide_alg): Add new argument align_unknown. Fix algorithm of
strategy selection if TARGET_INLINE_ALL_STRINGOPS is set; Skip sse_loop
(decide_alignment): Update desired alignment according to chosen move
mode.
(ix86_expand_movmem): Change unrolled_loop strategy to use SSE-moves.
(ix86_expand_setmem): Likewise.
(ix86_slow_unaligned_access): Implementation of new hook
slow_unaligned_access.
* config/i386/i386.md (strset): Enable half-SSE moves.
* config/i386/sse.md (vec_dupv4si): Add expand for vec_dupv4si.
(vec_dupv2di): Add expand for vec_dupv2di.
2011-11-14 Dimitrios Apostolou <jimis@gmx.net>
PR bootstrap/51094

View File

@ -641,6 +641,7 @@ void debug_varpool_node_set (varpool_node_set);
void free_varpool_node_set (varpool_node_set);
void ipa_discover_readonly_nonaddressable_vars (void);
bool cgraph_comdat_can_be_unshared_p (struct cgraph_node *);
bool varpool_externally_visible_p (struct varpool_node *, bool);
/* In predict.c */
bool cgraph_maybe_hot_edge_p (struct cgraph_edge *e);
@ -681,6 +682,7 @@ bool const_value_known_p (tree);
bool varpool_for_node_and_aliases (struct varpool_node *,
bool (*) (struct varpool_node *, void *),
void *, bool);
void varpool_add_new_variable (tree);
/* Walk all reachable static variables. */
#define FOR_EACH_STATIC_VARIABLE(node) \

View File

@ -37,7 +37,8 @@ enum stringop_alg
rep_prefix_8_byte,
loop_1_byte,
loop,
unrolled_loop
unrolled_loop,
sse_loop
};
/* Available call abi. */

File diff suppressed because it is too large Load Diff

View File

@ -159,8 +159,12 @@ struct processor_costs {
const int fchs; /* cost of FCHS instruction. */
const int fsqrt; /* cost of FSQRT instruction. */
/* Specify what algorithm
to use for stringops on unknown size. */
struct stringop_algs memcpy[2], memset[2];
to use for stringops on unknown size.
First index is used to specify whether
alignment is known or not.
Second - to specify whether 32 or 64 bits
are used. */
struct stringop_algs memcpy[2][2], memset[2][2];
const int scalar_stmt_cost; /* Cost of any scalar operation, excluding
load and store. */
const int scalar_load_cost; /* Cost of scalar load. */

View File

@ -324,6 +324,9 @@ Enum(stringop_alg) String(loop) Value(loop)
EnumValue
Enum(stringop_alg) String(unrolled_loop) Value(unrolled_loop)
EnumValue
Enum(stringop_alg) String(sse_loop) Value(sse_loop)
mtls-dialect=
Target RejectNegative Joined Var(ix86_tls_dialect) Enum(tls_dialect) Init(TLS_DIALECT_GNU)
Use given thread-local storage dialect

View File

@ -7501,6 +7501,16 @@
(set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
(set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
(define_expand "vec_dupv4si"
[(set (match_operand:V4SI 0 "register_operand" "")
(vec_duplicate:V4SI
(match_operand:SI 1 "nonimmediate_operand" "")))]
"TARGET_SSE"
{
if (!TARGET_AVX)
operands[1] = force_reg (V4SImode, operands[1]);
})
(define_insn "*vec_dupv4si"
[(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
(vec_duplicate:V4SI
@ -7517,6 +7527,16 @@
(set_attr "prefix" "maybe_vex,vex,orig")
(set_attr "mode" "TI,V4SF,V4SF")])
(define_expand "vec_dupv2di"
[(set (match_operand:V2DI 0 "register_operand" "")
(vec_duplicate:V2DI
(match_operand:DI 1 "nonimmediate_operand" "")))]
"TARGET_SSE"
{
if (!TARGET_AVX)
operands[1] = force_reg (V2DImode, operands[1]);
})
(define_insn "*vec_dupv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
(vec_duplicate:V2DI

View File

@ -647,7 +647,7 @@ cgraph_externally_visible_p (struct cgraph_node *node,
/* Return true when variable VNODE should be considered externally visible. */
static bool
bool
varpool_externally_visible_p (struct varpool_node *vnode, bool aliased)
{
if (!DECL_COMDAT (vnode->decl) && !TREE_PUBLIC (vnode->decl))

View File

@ -1,3 +1,8 @@
2011-11-14 Zolotukhin Michael <michael.v.zolotukhin@gmail.com>
Jan Hubicka <jh@suse.cz>
* gcc.target/i386/sw-1.c: Force rep;movsb.
2011-11-14 Iain Sandoe <iains@gcc.gnu.org>
PR testsuite/51059

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fshrink-wrap -fdump-rtl-pro_and_epilogue" } */
/* { dg-options "-O2 -fshrink-wrap -fdump-rtl-pro_and_epilogue -mstringop-strategy=rep_byte" } */
#include <string.h>

View File

@ -262,7 +262,7 @@ get_emutls_init_templ_addr (tree decl)
if (DECL_EXTERNAL (to))
varpool_node (to);
else
varpool_finalize_decl (to);
varpool_add_new_variable (to);
return build_fold_addr_expr (to);
}
@ -334,7 +334,7 @@ new_emutls_decl (tree decl, tree alias_of)
if (DECL_EXTERNAL (to))
varpool_node (to);
else if (!alias_of)
varpool_finalize_decl (to);
varpool_add_new_variable (to);
else
varpool_create_variable_alias (to,
varpool_node_for_asm

View File

@ -414,6 +414,20 @@ varpool_finalize_decl (tree decl)
varpool_assemble_pending_decls ();
}
/* Add the variable DECL to the varpool.
Unlike varpool_finalize_decl function is intended to be used
by middle end and allows insertion of new variable at arbitrary point
of compilation. */
void
varpool_add_new_variable (tree decl)
{
struct varpool_node *node;
varpool_finalize_decl (decl);
node = varpool_node (decl);
if (varpool_externally_visible_p (node, false))
node->externally_visible = true;
}
/* Return variable availability. See cgraph.h for description of individual
return values. */
enum availability