md.texi (vec_widen_ushiftl_hi, [...]): Document.
* doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo, vec_widen_sshiftl_hi, vec_widen_sshiftl_lo): Document. * tree-pretty-print.c (dump_generic_node): Handle WIDEN_LSHIFT_EXPR, VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. (op_code_prio): Likewise. (op_symbol_code): Handle WIDEN_LSHIFT_EXPR. * optabs.c (optab_for_tree_code): Handle VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. (init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo. * optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo. * genopinit.c (optabs): Initialize the new optabs. * expr.c (expand_expr_real_2): Handle VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. * gimple-pretty-print.c (dump_binary_rhs): Likewise. * tree-vectorizer.h (NUM_PATTERNS): Increase to 8. * tree.def (WIDEN_LSHIFT_EXPR, VEC_WIDEN_LSHIFT_HI_EXPR, VEC_WIDEN_LSHIFT_LO_EXPR): New. * cfgexpand.c (expand_debug_expr): Handle new tree codes. * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add vect_recog_widen_shift_pattern. (vect_handle_widen_mult_by_const): Rename... (vect_handle_widen_op_by_const): ...to this. Handle shifts. Add a new argument, update documentation. (vect_recog_widen_mult_pattern): Assume that only second operand can be constant. Update call to vect_handle_widen_op_by_const. (vect_recog_over_widening_pattern): Fix typo. (vect_recog_widen_shift_pattern): New. * tree-vect-stmts.c (vectorizable_type_promotion): Handle widening shifts. (supportable_widening_operation): Likewise. * tree-inline.c (estimate_operator_cost): Handle new tree codes. * tree-vect-generic.c (expand_vector_operations_1): Likewise. * tree-cfg.c (verify_gimple_assign_binary): Likewise. * config/arm/neon.md (neon_vec_<US>shiftl_<mode>): New. (vec_widen_<US>shiftl_lo_<mode>, neon_vec_<US>shiftl_hi_<mode>, vec_widen_<US>shiftl_hi_<mode>, neon_vec_<US>shift_left_<mode>): Likewise. * config/arm/predicates.md (const_neon_scalar_shift_amount_operand): New. * config/arm/iterators.md (V_innermode): New. * tree-vect-slp.c (vect_build_slp_tree): Require same shift operand for widening shift. From-SVN: r180128
This commit is contained in:
parent
d355361573
commit
36ba4aaedc
@ -1,3 +1,49 @@
|
||||
2011-10-18 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
* doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo,
|
||||
vec_widen_sshiftl_hi, vec_widen_sshiftl_lo): Document.
|
||||
* tree-pretty-print.c (dump_generic_node): Handle WIDEN_LSHIFT_EXPR,
|
||||
VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
|
||||
(op_code_prio): Likewise.
|
||||
(op_symbol_code): Handle WIDEN_LSHIFT_EXPR.
|
||||
* optabs.c (optab_for_tree_code): Handle
|
||||
VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
|
||||
(init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo.
|
||||
* optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo.
|
||||
* genopinit.c (optabs): Initialize the new optabs.
|
||||
* expr.c (expand_expr_real_2): Handle
|
||||
VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
|
||||
* gimple-pretty-print.c (dump_binary_rhs): Likewise.
|
||||
* tree-vectorizer.h (NUM_PATTERNS): Increase to 8.
|
||||
* tree.def (WIDEN_LSHIFT_EXPR, VEC_WIDEN_LSHIFT_HI_EXPR,
|
||||
VEC_WIDEN_LSHIFT_LO_EXPR): New.
|
||||
* cfgexpand.c (expand_debug_expr): Handle new tree codes.
|
||||
* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add
|
||||
vect_recog_widen_shift_pattern.
|
||||
(vect_handle_widen_mult_by_const): Rename...
|
||||
(vect_handle_widen_op_by_const): ...to this. Handle shifts.
|
||||
Add a new argument, update documentation.
|
||||
(vect_recog_widen_mult_pattern): Assume that only second
|
||||
operand can be constant. Update call to
|
||||
vect_handle_widen_op_by_const.
|
||||
(vect_recog_over_widening_pattern): Fix typo.
|
||||
(vect_recog_widen_shift_pattern): New.
|
||||
* tree-vect-stmts.c (vectorizable_type_promotion): Handle
|
||||
widening shifts.
|
||||
(supportable_widening_operation): Likewise.
|
||||
* tree-inline.c (estimate_operator_cost): Handle new tree codes.
|
||||
* tree-vect-generic.c (expand_vector_operations_1): Likewise.
|
||||
* tree-cfg.c (verify_gimple_assign_binary): Likewise.
|
||||
* config/arm/neon.md (neon_vec_<US>shiftl_<mode>): New.
|
||||
(vec_widen_<US>shiftl_lo_<mode>, neon_vec_<US>shiftl_hi_<mode>,
|
||||
vec_widen_<US>shiftl_hi_<mode>, neon_vec_<US>shift_left_<mode>):
|
||||
Likewise.
|
||||
* config/arm/predicates.md (const_neon_scalar_shift_amount_operand):
|
||||
New.
|
||||
* config/arm/iterators.md (V_innermode): New.
|
||||
* tree-vect-slp.c (vect_build_slp_tree): Require same shift operand
|
||||
for widening shift.
|
||||
|
||||
2011-10-18 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* tree-ssa-alias.h (struct pt_solution): Remove
|
||||
|
@ -3265,6 +3265,8 @@ expand_debug_expr (tree exp)
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
case VEC_WIDEN_MULT_HI_EXPR:
|
||||
case VEC_WIDEN_MULT_LO_EXPR:
|
||||
case VEC_WIDEN_LSHIFT_HI_EXPR:
|
||||
case VEC_WIDEN_LSHIFT_LO_EXPR:
|
||||
return NULL;
|
||||
|
||||
/* Misc codes. */
|
||||
|
@ -414,6 +414,9 @@
|
||||
(V4QQ "8") (V2HQ "16") (QQ "8") (HQ "16")
|
||||
(V2HA "16") (HA "16") (SQ "") (SA "")])
|
||||
|
||||
;; Mode attribute for vshll.
|
||||
(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
|
||||
|
||||
;;----------------------------------------------------------------------------
|
||||
;; Code attributes
|
||||
;;----------------------------------------------------------------------------
|
||||
|
@ -5335,6 +5335,44 @@
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "neon_vec_<US>shiftl_<mode>"
|
||||
[(set (match_operand:<V_widen> 0 "register_operand" "=w")
|
||||
(SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
|
||||
(match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
return "vshll.<US><V_sz_elem> %q0, %P1, %2";
|
||||
}
|
||||
[(set_attr "neon_type" "neon_shift_1")]
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<US>shiftl_lo_<mode>"
|
||||
[(match_operand:<V_unpack> 0 "register_operand" "")
|
||||
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
"TARGET_NEON && !BYTES_BIG_ENDIAN"
|
||||
{
|
||||
emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
|
||||
simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
|
||||
operands[2]));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<US>shiftl_hi_<mode>"
|
||||
[(match_operand:<V_unpack> 0 "register_operand" "")
|
||||
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
"TARGET_NEON && !BYTES_BIG_ENDIAN"
|
||||
{
|
||||
emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
|
||||
simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
|
||||
GET_MODE_SIZE (<V_HALF>mode)),
|
||||
operands[2]));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
;; Vectorize for non-neon-quad case
|
||||
(define_insn "neon_unpack<US>_<mode>"
|
||||
[(set (match_operand:<V_widen> 0 "register_operand" "=w")
|
||||
@ -5411,6 +5449,34 @@
|
||||
}
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<US>shiftl_hi_<mode>"
|
||||
[(match_operand:<V_double_width> 0 "register_operand" "")
|
||||
(SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx tmpreg = gen_reg_rtx (<V_widen>mode);
|
||||
emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
|
||||
emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
|
||||
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<US>shiftl_lo_<mode>"
|
||||
[(match_operand:<V_double_width> 0 "register_operand" "")
|
||||
(SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx tmpreg = gen_reg_rtx (<V_widen>mode);
|
||||
emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
|
||||
emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
|
||||
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
; FIXME: These instruction patterns can't be used safely in big-endian mode
|
||||
; because the ordering of vector elements in Q registers is different from what
|
||||
; the semantics of the instructions require.
|
||||
|
@ -136,6 +136,11 @@
|
||||
(match_operand 0 "s_register_operand"))
|
||||
(match_operand 0 "const_int_operand")))
|
||||
|
||||
(define_predicate "const_neon_scalar_shift_amount_operand"
|
||||
(and (match_code "const_int")
|
||||
(match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode)
|
||||
&& ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0")))
|
||||
|
||||
(define_predicate "arm_add_operand"
|
||||
(ior (match_operand 0 "arm_rhs_operand")
|
||||
(match_operand 0 "arm_neg_immediate_operand")))
|
||||
|
@ -4272,6 +4272,17 @@ are vectors with N signed/unsigned elements of size S@. Multiply the high/low
|
||||
elements of the two vectors, and put the N/2 products of size 2*S in the
|
||||
output vector (operand 0).
|
||||
|
||||
@cindex @code{vec_widen_ushiftl_hi_@var{m}} instruction pattern
|
||||
@cindex @code{vec_widen_ushiftl_lo_@var{m}} instruction pattern
|
||||
@cindex @code{vec_widen_sshiftl_hi_@var{m}} instruction pattern
|
||||
@cindex @code{vec_widen_sshiftl_lo_@var{m}} instruction pattern
|
||||
@item @samp{vec_widen_ushiftl_hi_@var{m}}, @samp{vec_widen_ushiftl_lo_@var{m}}
|
||||
@itemx @samp{vec_widen_sshiftl_hi_@var{m}}, @samp{vec_widen_sshiftl_lo_@var{m}}
|
||||
Signed/Unsigned widening shift left. The first input (operand 1) is a vector
|
||||
with N signed/unsigned elements of size S@. Operand 2 is a constant. Shift
|
||||
the high/low elements of operand 1, and put the N/2 results of size 2*S in the
|
||||
output vector (operand 0).
|
||||
|
||||
@cindex @code{mulhisi3} instruction pattern
|
||||
@item @samp{mulhisi3}
|
||||
Multiply operands 1 and 2, which have mode @code{HImode}, and store
|
||||
|
13
gcc/expr.c
13
gcc/expr.c
@ -8732,6 +8732,19 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
|
||||
return target;
|
||||
}
|
||||
|
||||
case VEC_WIDEN_LSHIFT_HI_EXPR:
|
||||
case VEC_WIDEN_LSHIFT_LO_EXPR:
|
||||
{
|
||||
tree oprnd0 = treeop0;
|
||||
tree oprnd1 = treeop1;
|
||||
|
||||
expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
|
||||
target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX,
|
||||
target, unsignedp);
|
||||
gcc_assert (target);
|
||||
return target;
|
||||
}
|
||||
|
||||
case VEC_PACK_TRUNC_EXPR:
|
||||
case VEC_PACK_SAT_EXPR:
|
||||
case VEC_PACK_FIX_TRUNC_EXPR:
|
||||
|
@ -271,6 +271,10 @@ static const char * const optabs[] =
|
||||
"set_optab_handler (vec_widen_umult_lo_optab, $A, CODE_FOR_$(vec_widen_umult_lo_$a$))",
|
||||
"set_optab_handler (vec_widen_smult_hi_optab, $A, CODE_FOR_$(vec_widen_smult_hi_$a$))",
|
||||
"set_optab_handler (vec_widen_smult_lo_optab, $A, CODE_FOR_$(vec_widen_smult_lo_$a$))",
|
||||
"set_optab_handler (vec_widen_ushiftl_hi_optab, $A, CODE_FOR_$(vec_widen_ushiftl_hi_$a$))",
|
||||
"set_optab_handler (vec_widen_ushiftl_lo_optab, $A, CODE_FOR_$(vec_widen_ushiftl_lo_$a$))",
|
||||
"set_optab_handler (vec_widen_sshiftl_hi_optab, $A, CODE_FOR_$(vec_widen_sshiftl_hi_$a$))",
|
||||
"set_optab_handler (vec_widen_sshiftl_lo_optab, $A, CODE_FOR_$(vec_widen_sshiftl_lo_$a$))",
|
||||
"set_optab_handler (vec_unpacks_hi_optab, $A, CODE_FOR_$(vec_unpacks_hi_$a$))",
|
||||
"set_optab_handler (vec_unpacks_lo_optab, $A, CODE_FOR_$(vec_unpacks_lo_$a$))",
|
||||
"set_optab_handler (vec_unpacku_hi_optab, $A, CODE_FOR_$(vec_unpacku_hi_$a$))",
|
||||
|
@ -343,6 +343,8 @@ dump_binary_rhs (pretty_printer *buffer, gimple gs, int spc, int flags)
|
||||
case VEC_EXTRACT_ODD_EXPR:
|
||||
case VEC_INTERLEAVE_HIGH_EXPR:
|
||||
case VEC_INTERLEAVE_LOW_EXPR:
|
||||
case VEC_WIDEN_LSHIFT_HI_EXPR:
|
||||
case VEC_WIDEN_LSHIFT_LO_EXPR:
|
||||
for (p = tree_code_name [(int) code]; *p; p++)
|
||||
pp_character (buffer, TOUPPER (*p));
|
||||
pp_string (buffer, " <");
|
||||
|
12
gcc/optabs.c
12
gcc/optabs.c
@ -479,6 +479,14 @@ optab_for_tree_code (enum tree_code code, const_tree type,
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
|
||||
|
||||
case VEC_WIDEN_LSHIFT_HI_EXPR:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
|
||||
|
||||
case VEC_WIDEN_LSHIFT_LO_EXPR:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
|
||||
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
vec_unpacku_hi_optab : vec_unpacks_hi_optab;
|
||||
@ -6197,6 +6205,10 @@ init_optabs (void)
|
||||
init_optab (vec_widen_umult_lo_optab, UNKNOWN);
|
||||
init_optab (vec_widen_smult_hi_optab, UNKNOWN);
|
||||
init_optab (vec_widen_smult_lo_optab, UNKNOWN);
|
||||
init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN);
|
||||
init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN);
|
||||
init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN);
|
||||
init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN);
|
||||
init_optab (vec_unpacks_hi_optab, UNKNOWN);
|
||||
init_optab (vec_unpacks_lo_optab, UNKNOWN);
|
||||
init_optab (vec_unpacku_hi_optab, UNKNOWN);
|
||||
|
10
gcc/optabs.h
10
gcc/optabs.h
@ -351,6 +351,12 @@ enum optab_index
|
||||
OTI_vec_widen_umult_lo,
|
||||
OTI_vec_widen_smult_hi,
|
||||
OTI_vec_widen_smult_lo,
|
||||
/* Widening shift left.
|
||||
The high/low part of the resulting vector is returned. */
|
||||
OTI_vec_widen_ushiftl_hi,
|
||||
OTI_vec_widen_ushiftl_lo,
|
||||
OTI_vec_widen_sshiftl_hi,
|
||||
OTI_vec_widen_sshiftl_lo,
|
||||
/* Extract and widen the high/low part of a vector of signed or
|
||||
floating point elements. */
|
||||
OTI_vec_unpacks_hi,
|
||||
@ -544,6 +550,10 @@ enum optab_index
|
||||
#define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo])
|
||||
#define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi])
|
||||
#define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo])
|
||||
#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi])
|
||||
#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo])
|
||||
#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi])
|
||||
#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo])
|
||||
#define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi])
|
||||
#define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo])
|
||||
#define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi])
|
||||
|
@ -1,3 +1,12 @@
|
||||
2011-10-18 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
* testsuite/lib/target-supports.exp
|
||||
(check_effective_target_vect_widen_shift): New.
|
||||
* gcc.dg/vect/vect-widen-shift-s16.c: New.
|
||||
* gcc.dg/vect/vect-widen-shift-s8.c: New.
|
||||
* gcc.dg/vect/vect-widen-shift-u16.c: New.
|
||||
* gcc.dg/vect/vect-widen-shift-u8.c: New.
|
||||
|
||||
2011-10-18 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* gcc.dg/torture/restrict-1.c: New testcase.
|
||||
|
107
gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c
Normal file
107
gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c
Normal file
@ -0,0 +1,107 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-require-effective-target vect_shift } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
#define C 16
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo (short *src, int *dst)
|
||||
{
|
||||
int i;
|
||||
short b, b0, b1, b2, b3, *s = src;
|
||||
int *d = dst;
|
||||
|
||||
for (i = 0; i < N/4; i++)
|
||||
{
|
||||
b0 = *s++;
|
||||
b1 = *s++;
|
||||
b2 = *s++;
|
||||
b3 = *s++;
|
||||
*d = b0 << C;
|
||||
d++;
|
||||
*d = b1 << C;
|
||||
d++;
|
||||
*d = b2 << C;
|
||||
d++;
|
||||
*d = b3 << C;
|
||||
d++;
|
||||
}
|
||||
|
||||
s = src;
|
||||
d = dst;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
b = *s++;
|
||||
if (*d != b << C)
|
||||
abort ();
|
||||
d++;
|
||||
}
|
||||
|
||||
s = src;
|
||||
d = dst;
|
||||
for (i = 0; i < N/4; i++)
|
||||
{
|
||||
b0 = *s++;
|
||||
b1 = *s++;
|
||||
b2 = *s++;
|
||||
b3 = *s++;
|
||||
*d = b0 << C;
|
||||
d++;
|
||||
*d = b1 << C;
|
||||
d++;
|
||||
*d = b2 << C;
|
||||
d++;
|
||||
*d = b3 << 6;
|
||||
d++;
|
||||
}
|
||||
|
||||
s = src;
|
||||
d = dst;
|
||||
for (i = 0; i < N/4; i++)
|
||||
{
|
||||
b = *s++;
|
||||
if (*d != b << C)
|
||||
abort ();
|
||||
d++;
|
||||
b = *s++;
|
||||
if (*d != b << C)
|
||||
abort ();
|
||||
d++;
|
||||
b = *s++;
|
||||
if (*d != b << C)
|
||||
abort ();
|
||||
d++;
|
||||
b = *s++;
|
||||
if (*d != b << 6)
|
||||
abort ();
|
||||
d++;
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
short in[N];
|
||||
int out[N];
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
in[i] = i;
|
||||
out[i] = 255;
|
||||
__asm__ volatile ("");
|
||||
}
|
||||
|
||||
foo (in, out);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 8 "vect" { target vect_widen_shift } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
58
gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c
Normal file
58
gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c
Normal file
@ -0,0 +1,58 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-require-effective-target vect_shift } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
#define C 12
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo (char *src, int *dst)
|
||||
{
|
||||
int i;
|
||||
char b, *s = src;
|
||||
int *d = dst;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
b = *s++;
|
||||
*d = b << C;
|
||||
d++;
|
||||
}
|
||||
|
||||
s = src;
|
||||
d = dst;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
b = *s++;
|
||||
if (*d != b << C)
|
||||
abort ();
|
||||
d++;
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
char in[N];
|
||||
int out[N];
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
in[i] = i;
|
||||
out[i] = 255;
|
||||
__asm__ volatile ("");
|
||||
}
|
||||
|
||||
foo (in, out);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
58
gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c
Normal file
58
gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c
Normal file
@ -0,0 +1,58 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-require-effective-target vect_shift } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
#define C 7
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo (unsigned short *src, unsigned int *dst)
|
||||
{
|
||||
int i;
|
||||
unsigned short b, *s = src;
|
||||
unsigned int *d = dst;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
b = *s++;
|
||||
*d = b << C;
|
||||
d++;
|
||||
}
|
||||
|
||||
s = src;
|
||||
d = dst;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
b = *s++;
|
||||
if (*d != b << C)
|
||||
abort ();
|
||||
d++;
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
unsigned short in[N];
|
||||
unsigned int out[N];
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
in[i] = i;
|
||||
out[i] = 255;
|
||||
__asm__ volatile ("");
|
||||
}
|
||||
|
||||
foo (in, out);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
65
gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c
Normal file
65
gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c
Normal file
@ -0,0 +1,65 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-require-effective-target vect_shift } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
#define C1 10
|
||||
#define C2 5
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo (unsigned char *src, unsigned int *dst1, unsigned int *dst2)
|
||||
{
|
||||
int i;
|
||||
unsigned char b, *s = src;
|
||||
unsigned int *d1 = dst1, *d2 = dst2;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
b = *s++;
|
||||
*d1 = b << C1;
|
||||
d1++;
|
||||
*d2 = b << C2;
|
||||
d2++;
|
||||
}
|
||||
|
||||
s = src;
|
||||
d1 = dst1;
|
||||
d2 = dst2;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
b = *s++;
|
||||
if (*d1 != b << C1 || *d2 != b << C2)
|
||||
abort ();
|
||||
d1++;
|
||||
d2++;
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
unsigned char in[N];
|
||||
unsigned int out1[N];
|
||||
unsigned int out2[N];
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
in[i] = i;
|
||||
out1[i] = 255;
|
||||
out2[i] = 255;
|
||||
__asm__ volatile ("");
|
||||
}
|
||||
|
||||
foo (in, out1, out2);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -2906,6 +2906,26 @@ proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } {
|
||||
return $et_vect_widen_mult_hi_to_si_pattern_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# widening shift, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_widen_shift { } {
|
||||
global et_vect_widen_shift_saved
|
||||
|
||||
if [info exists et_vect_shift_saved] {
|
||||
verbose "check_effective_target_vect_widen_shift: using cached result" 2
|
||||
} else {
|
||||
set et_vect_widen_shift_saved 0
|
||||
if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
|
||||
set et_vect_widen_shift_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2
|
||||
return $et_vect_widen_shift_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# dot-product of signed chars, 0 otherwise.
|
||||
#
|
||||
|
@ -3510,6 +3510,44 @@ verify_gimple_assign_binary (gimple stmt)
|
||||
return false;
|
||||
}
|
||||
|
||||
case WIDEN_LSHIFT_EXPR:
|
||||
{
|
||||
if (!INTEGRAL_TYPE_P (lhs_type)
|
||||
|| !INTEGRAL_TYPE_P (rhs1_type)
|
||||
|| TREE_CODE (rhs2) != INTEGER_CST
|
||||
|| (2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)))
|
||||
{
|
||||
error ("type mismatch in widening vector shift expression");
|
||||
debug_generic_expr (lhs_type);
|
||||
debug_generic_expr (rhs1_type);
|
||||
debug_generic_expr (rhs2_type);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
case VEC_WIDEN_LSHIFT_HI_EXPR:
|
||||
case VEC_WIDEN_LSHIFT_LO_EXPR:
|
||||
{
|
||||
if (TREE_CODE (rhs1_type) != VECTOR_TYPE
|
||||
|| TREE_CODE (lhs_type) != VECTOR_TYPE
|
||||
|| !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type))
|
||||
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs_type))
|
||||
|| TREE_CODE (rhs2) != INTEGER_CST
|
||||
|| (2 * TYPE_PRECISION (TREE_TYPE (rhs1_type))
|
||||
> TYPE_PRECISION (TREE_TYPE (lhs_type))))
|
||||
{
|
||||
error ("type mismatch in widening vector shift expression");
|
||||
debug_generic_expr (lhs_type);
|
||||
debug_generic_expr (rhs1_type);
|
||||
debug_generic_expr (rhs2_type);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
case PLUS_EXPR:
|
||||
case MINUS_EXPR:
|
||||
{
|
||||
|
@ -3355,6 +3355,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
|
||||
case DOT_PROD_EXPR:
|
||||
case WIDEN_MULT_PLUS_EXPR:
|
||||
case WIDEN_MULT_MINUS_EXPR:
|
||||
case WIDEN_LSHIFT_EXPR:
|
||||
|
||||
case VEC_WIDEN_MULT_HI_EXPR:
|
||||
case VEC_WIDEN_MULT_LO_EXPR:
|
||||
@ -3369,6 +3370,8 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
|
||||
case VEC_EXTRACT_ODD_EXPR:
|
||||
case VEC_INTERLEAVE_HIGH_EXPR:
|
||||
case VEC_INTERLEAVE_LOW_EXPR:
|
||||
case VEC_WIDEN_LSHIFT_HI_EXPR:
|
||||
case VEC_WIDEN_LSHIFT_LO_EXPR:
|
||||
|
||||
return 1;
|
||||
|
||||
|
@ -1599,6 +1599,7 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
|
||||
case RROTATE_EXPR:
|
||||
case VEC_LSHIFT_EXPR:
|
||||
case VEC_RSHIFT_EXPR:
|
||||
case WIDEN_LSHIFT_EXPR:
|
||||
case BIT_IOR_EXPR:
|
||||
case BIT_XOR_EXPR:
|
||||
case BIT_AND_EXPR:
|
||||
@ -2297,6 +2298,22 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
|
||||
case VEC_WIDEN_LSHIFT_HI_EXPR:
|
||||
pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (buffer, ", ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
|
||||
case VEC_WIDEN_LSHIFT_LO_EXPR:
|
||||
pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (buffer, ", ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
pp_string (buffer, " VEC_UNPACK_HI_EXPR < ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
@ -2619,6 +2636,9 @@ op_code_prio (enum tree_code code)
|
||||
case RSHIFT_EXPR:
|
||||
case LROTATE_EXPR:
|
||||
case RROTATE_EXPR:
|
||||
case VEC_WIDEN_LSHIFT_HI_EXPR:
|
||||
case VEC_WIDEN_LSHIFT_LO_EXPR:
|
||||
case WIDEN_LSHIFT_EXPR:
|
||||
return 11;
|
||||
|
||||
case WIDEN_SUM_EXPR:
|
||||
@ -2794,6 +2814,9 @@ op_symbol_code (enum tree_code code)
|
||||
case VEC_RSHIFT_EXPR:
|
||||
return "v>>";
|
||||
|
||||
case WIDEN_LSHIFT_EXPR:
|
||||
return "w<<";
|
||||
|
||||
case POINTER_PLUS_EXPR:
|
||||
return "+";
|
||||
|
||||
|
@ -823,7 +823,9 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
|
||||
|| code == VEC_UNPACK_LO_EXPR
|
||||
|| code == VEC_PACK_TRUNC_EXPR
|
||||
|| code == VEC_PACK_SAT_EXPR
|
||||
|| code == VEC_PACK_FIX_TRUNC_EXPR)
|
||||
|| code == VEC_PACK_FIX_TRUNC_EXPR
|
||||
|| code == VEC_WIDEN_LSHIFT_HI_EXPR
|
||||
|| code == VEC_WIDEN_LSHIFT_LO_EXPR)
|
||||
type = TREE_TYPE (rhs1);
|
||||
|
||||
/* Optabs will try converting a negation into a subtraction, so
|
||||
|
@ -49,6 +49,8 @@ static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
|
||||
static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
|
||||
static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
|
||||
tree *);
|
||||
static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
|
||||
tree *, tree *);
|
||||
static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
|
||||
tree *, tree *);
|
||||
static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
|
||||
@ -58,10 +60,10 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
|
||||
vect_recog_dot_prod_pattern,
|
||||
vect_recog_pow_pattern,
|
||||
vect_recog_over_widening_pattern,
|
||||
vect_recog_widen_shift_pattern,
|
||||
vect_recog_mixed_size_cond_pattern,
|
||||
vect_recog_bool_pattern};
|
||||
|
||||
|
||||
/* Function widened_name_p
|
||||
|
||||
Check whether NAME, an ssa-name used in USE_STMT,
|
||||
@ -340,27 +342,37 @@ vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
|
||||
}
|
||||
|
||||
|
||||
/* Handle two cases of multiplication by a constant. The first one is when
|
||||
the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
|
||||
operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
|
||||
TYPE.
|
||||
/* Handle widening operation by a constant. At the moment we support MULT_EXPR
|
||||
and LSHIFT_EXPR.
|
||||
|
||||
For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
|
||||
we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
|
||||
|
||||
Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
|
||||
HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
|
||||
TYPE), we can perform widen-mult from the intermediate type to TYPE and
|
||||
replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
|
||||
HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
|
||||
that satisfies the above restrictions, we can perform a widening opeartion
|
||||
from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
|
||||
with a_it = (interm_type) a_t; */
|
||||
|
||||
static bool
|
||||
vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
|
||||
VEC (gimple, heap) **stmts, tree type,
|
||||
tree *half_type, gimple def_stmt)
|
||||
vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
|
||||
tree const_oprnd, tree *oprnd,
|
||||
VEC (gimple, heap) **stmts, tree type,
|
||||
tree *half_type, gimple def_stmt)
|
||||
{
|
||||
tree new_type, new_oprnd, tmp;
|
||||
gimple new_stmt;
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
|
||||
struct loop *loop = LOOP_VINFO_LOOP (loop_info);
|
||||
|
||||
if (int_fits_type_p (const_oprnd, *half_type))
|
||||
if (code != MULT_EXPR && code != LSHIFT_EXPR)
|
||||
return false;
|
||||
|
||||
if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
|
||||
|| (code == LSHIFT_EXPR
|
||||
&& compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
|
||||
!= 1))
|
||||
&& TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
|
||||
{
|
||||
/* CONST_OPRND is a constant of HALF_TYPE. */
|
||||
*oprnd = gimple_assign_rhs1 (def_stmt);
|
||||
@ -373,14 +385,16 @@ vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
|
||||
|| !vinfo_for_stmt (def_stmt))
|
||||
return false;
|
||||
|
||||
/* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
|
||||
/* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
|
||||
a type 2 times bigger than HALF_TYPE. */
|
||||
new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
|
||||
TYPE_UNSIGNED (type));
|
||||
if (!int_fits_type_p (const_oprnd, new_type))
|
||||
if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
|
||||
|| (code == LSHIFT_EXPR
|
||||
&& compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
|
||||
return false;
|
||||
|
||||
/* Use NEW_TYPE for widen_mult. */
|
||||
/* Use NEW_TYPE for widening operation. */
|
||||
if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
|
||||
{
|
||||
new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
|
||||
@ -500,7 +514,7 @@ vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
|
||||
enum tree_code dummy_code;
|
||||
int dummy_int;
|
||||
VEC (tree, heap) *dummy_vec;
|
||||
bool op0_ok, op1_ok;
|
||||
bool op1_ok;
|
||||
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
return NULL;
|
||||
@ -520,38 +534,23 @@ vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
|
||||
return NULL;
|
||||
|
||||
/* Check argument 0. */
|
||||
op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
|
||||
if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
|
||||
return NULL;
|
||||
/* Check argument 1. */
|
||||
op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
|
||||
|
||||
/* In case of multiplication by a constant one of the operands may not match
|
||||
the pattern, but not both. */
|
||||
if (!op0_ok && !op1_ok)
|
||||
return NULL;
|
||||
|
||||
if (op0_ok && op1_ok)
|
||||
if (op1_ok)
|
||||
{
|
||||
oprnd0 = gimple_assign_rhs1 (def_stmt0);
|
||||
oprnd1 = gimple_assign_rhs1 (def_stmt1);
|
||||
}
|
||||
else if (!op0_ok)
|
||||
{
|
||||
if (TREE_CODE (oprnd0) == INTEGER_CST
|
||||
&& TREE_CODE (half_type1) == INTEGER_TYPE
|
||||
&& vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
|
||||
stmts, type,
|
||||
&half_type1, def_stmt1))
|
||||
half_type0 = half_type1;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
else if (!op1_ok)
|
||||
else
|
||||
{
|
||||
if (TREE_CODE (oprnd1) == INTEGER_CST
|
||||
&& TREE_CODE (half_type0) == INTEGER_TYPE
|
||||
&& vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
|
||||
stmts, type,
|
||||
&half_type0, def_stmt0))
|
||||
&& vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
|
||||
&oprnd0, stmts, type,
|
||||
&half_type0, def_stmt0))
|
||||
half_type1 = half_type0;
|
||||
else
|
||||
return NULL;
|
||||
@ -1130,7 +1129,7 @@ vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
|
||||
statetments, except for the case when the last statement in the
|
||||
sequence doesn't have a corresponding pattern statement. In such
|
||||
case we associate the last pattern statement with the last statement
|
||||
in the sequence. Therefore, we only add an original statetement to
|
||||
in the sequence. Therefore, we only add the original statement to
|
||||
the list if we know that it is not the last. */
|
||||
if (prev_stmt)
|
||||
VEC_safe_push (gimple, heap, *stmts, prev_stmt);
|
||||
@ -1215,6 +1214,230 @@ vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
|
||||
return pattern_stmt;
|
||||
}
|
||||
|
||||
/* Detect widening shift pattern:
|
||||
|
||||
type a_t;
|
||||
TYPE a_T, res_T;
|
||||
|
||||
S1 a_t = ;
|
||||
S2 a_T = (TYPE) a_t;
|
||||
S3 res_T = a_T << CONST;
|
||||
|
||||
where type 'TYPE' is at least double the size of type 'type'.
|
||||
|
||||
Also detect unsgigned cases:
|
||||
|
||||
unsigned type a_t;
|
||||
unsigned TYPE u_res_T;
|
||||
TYPE a_T, res_T;
|
||||
|
||||
S1 a_t = ;
|
||||
S2 a_T = (TYPE) a_t;
|
||||
S3 res_T = a_T << CONST;
|
||||
S4 u_res_T = (unsigned TYPE) res_T;
|
||||
|
||||
And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
|
||||
create an additional pattern stmt for S2 to create a variable of an
|
||||
intermediate type, and perform widen-shift on the intermediate type:
|
||||
|
||||
type a_t;
|
||||
interm_type a_it;
|
||||
TYPE a_T, res_T, res_T';
|
||||
|
||||
S1 a_t = ;
|
||||
S2 a_T = (TYPE) a_t;
|
||||
'--> a_it = (interm_type) a_t;
|
||||
S3 res_T = a_T << CONST;
|
||||
'--> res_T' = a_it <<* CONST;
|
||||
|
||||
Input/Output:
|
||||
|
||||
* STMTS: Contains a stmt from which the pattern search begins.
|
||||
In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
|
||||
in STMTS. When an intermediate type is used and a pattern statement is
|
||||
created for S2, we also put S2 here (before S3).
|
||||
|
||||
Output:
|
||||
|
||||
* TYPE_IN: The type of the input arguments to the pattern.
|
||||
|
||||
* TYPE_OUT: The type of the output of this pattern.
|
||||
|
||||
* Return value: A new stmt that will be used to replace the sequence of
|
||||
stmts that constitute the pattern. In this case it will be:
|
||||
WIDEN_LSHIFT_EXPR <a_t, CONST>. */
|
||||
|
||||
static gimple
|
||||
vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
|
||||
tree *type_in, tree *type_out)
|
||||
{
|
||||
gimple last_stmt = VEC_pop (gimple, *stmts);
|
||||
gimple def_stmt0;
|
||||
tree oprnd0, oprnd1;
|
||||
tree type, half_type0;
|
||||
gimple pattern_stmt, orig_stmt = NULL;
|
||||
tree vectype, vectype_out = NULL_TREE;
|
||||
tree dummy;
|
||||
tree var;
|
||||
enum tree_code dummy_code;
|
||||
int dummy_int;
|
||||
VEC (tree, heap) * dummy_vec;
|
||||
gimple use_stmt = NULL;
|
||||
bool over_widen = false;
|
||||
|
||||
if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
|
||||
return NULL;
|
||||
|
||||
orig_stmt = last_stmt;
|
||||
if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
|
||||
{
|
||||
/* This statement was also detected as over-widening operation (it can't
|
||||
be any other pattern, because only over-widening detects shifts).
|
||||
LAST_STMT is the final type demotion statement, but its related
|
||||
statement is shift. We analyze the related statement to catch cases:
|
||||
|
||||
orig code:
|
||||
type a_t;
|
||||
itype res;
|
||||
TYPE a_T, res_T;
|
||||
|
||||
S1 a_T = (TYPE) a_t;
|
||||
S2 res_T = a_T << CONST;
|
||||
S3 res = (itype)res_T;
|
||||
|
||||
(size of type * 2 <= size of itype
|
||||
and size of itype * 2 <= size of TYPE)
|
||||
|
||||
code after over-widening pattern detection:
|
||||
|
||||
S1 a_T = (TYPE) a_t;
|
||||
--> a_it = (itype) a_t;
|
||||
S2 res_T = a_T << CONST;
|
||||
S3 res = (itype)res_T; <--- LAST_STMT
|
||||
--> res = a_it << CONST;
|
||||
|
||||
after widen_shift:
|
||||
|
||||
S1 a_T = (TYPE) a_t;
|
||||
--> a_it = (itype) a_t; - redundant
|
||||
S2 res_T = a_T << CONST;
|
||||
S3 res = (itype)res_T;
|
||||
--> res = a_t w<< CONST;
|
||||
|
||||
i.e., we replace the three statements with res = a_t w<< CONST. */
|
||||
last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
|
||||
over_widen = true;
|
||||
}
|
||||
|
||||
if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
|
||||
return NULL;
|
||||
|
||||
oprnd0 = gimple_assign_rhs1 (last_stmt);
|
||||
oprnd1 = gimple_assign_rhs2 (last_stmt);
|
||||
if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
|
||||
return NULL;
|
||||
|
||||
/* Check operand 0: it has to be defined by a type promotion. */
|
||||
if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
|
||||
return NULL;
|
||||
|
||||
/* Check operand 1: has to be positive. We check that it fits the type
|
||||
in vect_handle_widen_op_by_const (). */
|
||||
if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
|
||||
return NULL;
|
||||
|
||||
oprnd0 = gimple_assign_rhs1 (def_stmt0);
|
||||
type = gimple_expr_type (last_stmt);
|
||||
|
||||
/* Check if this a widening operation. */
|
||||
if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
|
||||
&oprnd0, stmts,
|
||||
type, &half_type0, def_stmt0))
|
||||
return NULL;
|
||||
|
||||
/* Handle unsigned case. Look for
|
||||
S4 u_res_T = (unsigned TYPE) res_T;
|
||||
Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
|
||||
if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
|
||||
{
|
||||
tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
|
||||
imm_use_iterator imm_iter;
|
||||
use_operand_p use_p;
|
||||
int nuses = 0;
|
||||
tree use_type;
|
||||
|
||||
if (over_widen)
|
||||
{
|
||||
/* In case of over-widening pattern, S4 should be ORIG_STMT itself.
|
||||
We check here that TYPE is the correct type for the operation,
|
||||
i.e., it's the type of the original result. */
|
||||
tree orig_type = gimple_expr_type (orig_stmt);
|
||||
if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
|
||||
|| (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
|
||||
{
|
||||
if (is_gimple_debug (USE_STMT (use_p)))
|
||||
continue;
|
||||
use_stmt = USE_STMT (use_p);
|
||||
nuses++;
|
||||
}
|
||||
|
||||
if (nuses != 1 || !is_gimple_assign (use_stmt)
|
||||
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
|
||||
return NULL;
|
||||
|
||||
use_lhs = gimple_assign_lhs (use_stmt);
|
||||
use_type = TREE_TYPE (use_lhs);
|
||||
|
||||
if (!INTEGRAL_TYPE_P (use_type)
|
||||
|| (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
|
||||
|| (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
|
||||
return NULL;
|
||||
|
||||
type = use_type;
|
||||
}
|
||||
}
|
||||
|
||||
/* Pattern detected. */
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
|
||||
|
||||
/* Check target support. */
|
||||
vectype = get_vectype_for_scalar_type (half_type0);
|
||||
vectype_out = get_vectype_for_scalar_type (type);
|
||||
|
||||
if (!vectype
|
||||
|| !vectype_out
|
||||
|| !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
|
||||
vectype_out, vectype,
|
||||
&dummy, &dummy, &dummy_code,
|
||||
&dummy_code, &dummy_int,
|
||||
&dummy_vec))
|
||||
return NULL;
|
||||
|
||||
*type_in = vectype;
|
||||
*type_out = vectype_out;
|
||||
|
||||
/* Pattern supported. Create a stmt to be used to replace the pattern. */
|
||||
var = vect_recog_temp_ssa_var (type, NULL);
|
||||
pattern_stmt =
|
||||
gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
|
||||
|
||||
if (use_stmt)
|
||||
last_stmt = use_stmt;
|
||||
else
|
||||
last_stmt = orig_stmt;
|
||||
|
||||
VEC_safe_push (gimple, heap, *stmts, last_stmt);
|
||||
return pattern_stmt;
|
||||
}
|
||||
|
||||
/* Function vect_recog_mixed_size_cond_pattern
|
||||
|
||||
|
@ -489,6 +489,11 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (rhs_code == WIDEN_LSHIFT_EXPR)
|
||||
{
|
||||
need_same_oprnds = true;
|
||||
first_op1 = gimple_assign_rhs2 (stmt);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -3333,6 +3333,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
|
||||
VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
unsigned int k;
|
||||
|
||||
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
||||
return false;
|
||||
@ -3349,7 +3350,8 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
|
||||
code = gimple_assign_rhs_code (stmt);
|
||||
if (!CONVERT_EXPR_CODE_P (code)
|
||||
&& code != WIDEN_MULT_EXPR)
|
||||
&& code != WIDEN_MULT_EXPR
|
||||
&& code != WIDEN_LSHIFT_EXPR)
|
||||
return false;
|
||||
|
||||
scalar_dest = gimple_assign_lhs (stmt);
|
||||
@ -3377,7 +3379,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
bool ok;
|
||||
|
||||
op1 = gimple_assign_rhs2 (stmt);
|
||||
if (code == WIDEN_MULT_EXPR)
|
||||
if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
|
||||
{
|
||||
/* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
|
||||
OP1. */
|
||||
@ -3454,7 +3456,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
|
||||
ncopies);
|
||||
|
||||
if (code == WIDEN_MULT_EXPR)
|
||||
if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
|
||||
{
|
||||
if (CONSTANT_CLASS_P (op0))
|
||||
op0 = fold_convert (TREE_TYPE (op1), op0);
|
||||
@ -3495,6 +3497,8 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
if (op_type == binary_op)
|
||||
vec_oprnds1 = VEC_alloc (tree, heap, 1);
|
||||
}
|
||||
else if (code == WIDEN_LSHIFT_EXPR)
|
||||
vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
|
||||
|
||||
/* In case the vectorization factor (VF) is bigger than the number
|
||||
of elements that we can fit in a vectype (nunits), we have to generate
|
||||
@ -3508,15 +3512,33 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
if (j == 0)
|
||||
{
|
||||
if (slp_node)
|
||||
vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
|
||||
&vec_oprnds1, -1);
|
||||
else
|
||||
{
|
||||
if (code == WIDEN_LSHIFT_EXPR)
|
||||
{
|
||||
vec_oprnd1 = op1;
|
||||
/* Store vec_oprnd1 for every vector stmt to be created
|
||||
for SLP_NODE. We check during the analysis that all
|
||||
the shift arguments are the same. */
|
||||
for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
|
||||
VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
|
||||
|
||||
vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
|
||||
-1);
|
||||
}
|
||||
else
|
||||
vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
|
||||
&vec_oprnds1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
|
||||
VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
|
||||
if (op_type == binary_op)
|
||||
{
|
||||
vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
|
||||
if (code == WIDEN_LSHIFT_EXPR)
|
||||
vec_oprnd1 = op1;
|
||||
else
|
||||
vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
|
||||
VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
|
||||
}
|
||||
}
|
||||
@ -3527,7 +3549,10 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
|
||||
if (op_type == binary_op)
|
||||
{
|
||||
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
|
||||
if (code == WIDEN_LSHIFT_EXPR)
|
||||
vec_oprnd1 = op1;
|
||||
else
|
||||
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
|
||||
VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
|
||||
}
|
||||
}
|
||||
@ -5789,6 +5814,19 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
|
||||
}
|
||||
break;
|
||||
|
||||
case WIDEN_LSHIFT_EXPR:
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
|
||||
c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
|
||||
}
|
||||
else
|
||||
{
|
||||
c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
|
||||
c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
|
||||
}
|
||||
break;
|
||||
|
||||
CASE_CONVERT:
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
|
@ -902,7 +902,7 @@ extern void vect_slp_transform_bb (basic_block);
|
||||
Additional pattern recognition functions can (and will) be added
|
||||
in the future. */
|
||||
typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
|
||||
#define NUM_PATTERNS 7
|
||||
#define NUM_PATTERNS 8
|
||||
void vect_pattern_recog (loop_vec_info);
|
||||
|
||||
/* In tree-vectorizer.c. */
|
||||
|
23
gcc/tree.def
23
gcc/tree.def
@ -1125,6 +1125,19 @@ DEFTREECODE (WIDEN_MULT_PLUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3)
|
||||
is subtracted from t3. */
|
||||
DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3)
|
||||
|
||||
/* Widening shift left.
|
||||
The first operand is of type t1.
|
||||
The second operand is the number of bits to shift by; it need not be the
|
||||
same type as the first operand and result.
|
||||
Note that the result is undefined if the second operand is larger
|
||||
than or equal to the first operand's type size.
|
||||
The type of the entire expression is t2, such that t2 is at least twice
|
||||
the size of t1.
|
||||
WIDEN_LSHIFT_EXPR is equivalent to first widening (promoting)
|
||||
the first argument from type t1 to type t2, and then shifting it
|
||||
by the second argument. */
|
||||
DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2)
|
||||
|
||||
/* Fused multiply-add.
|
||||
All operands and the result are of the same type. No intermediate
|
||||
rounding is performed after multiplying operand one with operand two
|
||||
@ -1180,6 +1193,16 @@ DEFTREECODE (VEC_EXTRACT_ODD_EXPR, "vec_extractodd_expr", tcc_binary, 2)
|
||||
DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2)
|
||||
DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2)
|
||||
|
||||
/* Widening vector shift left in bits.
|
||||
Operand 0 is a vector to be shifted with N elements of size S.
|
||||
Operand 1 is an integer shift amount in bits.
|
||||
The result of the operation is N elements of size 2*S.
|
||||
VEC_WIDEN_LSHIFT_HI_EXPR computes the N/2 high results.
|
||||
VEC_WIDEN_LSHIFT_LO_EXPR computes the N/2 low results.
|
||||
*/
|
||||
DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2)
|
||||
DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2)
|
||||
|
||||
/* PREDICT_EXPR. Specify hint for branch prediction. The
|
||||
PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the
|
||||
outcome (0 for not taken and 1 for taken). Once the profile is guessed
|
||||
|
Loading…
Reference in New Issue
Block a user