i386.c (MAX_CLASSES): Increase number of classes.
gcc/ * config/i386/i386.c (MAX_CLASSES): Increase number of classes. (classify_argument): Extend for 512 bit vectors. (construct_container): Ditto. (function_arg_advance_32): Ditto. (function_arg_advance_64): Ditto. (function_arg_32): Ditto. (function_arg_64): Ditto. (function_value_32): Ditto. (return_in_memory_32): Ditto. (ix86_gimplify_va_arg): Ditto. (standard_sse_constant_p): Ditto. (standard_sse_constant_opcode): Ditto. (ix86_expand_vector_convert_uns_vsivsf): Ditto. (ix86_build_const_vector): Ditto. (ix86_build_signbit_mask): Ditto. (ix86_expand_sse_cmp): Extend for AVX512. (ix86_expand_sse_movcc): Ditto. (ix86_expand_int_vcond): Ditto. (ix86_expand_vec_perm): Ditto. (ix86_expand_sse_unpack): Ditto. (ix86_constant_alignment): Ditto. (ix86_builtin_vectorized_function): Ditto. (ix86_vectorize_builtin_gather): Ditto. (avx_vpermilp_parallel): Ditto. (ix86_rtx_costs): Ditto. (ix86_expand_vector_init_duplicate): Ditto. (ix86_expand_vector_init_concat): Ditto. (ix86_expand_vector_init_general): Ditto. (ix86_expand_vector_extract): Ditto. (emit_reduc_half): Ditto. (ix86_vector_mode_supported_p): Ditto. (ix86_emit_swdivsf): Ditto. (ix86_emit_swsqrtsf): Ditto. (expand_vec_perm_1): Ditto. (ix86_vectorize_vec_perm_const_ok): Ditto. (ix86_expand_mul_widen_evenodd): Ditto. (ix86_expand_sse2_mulvxdi3): Ditto. (ix86_preferred_simd_mode): Ditto. (ix86_autovectorize_vector_sizes): Ditto. (ix86_expand_vec_perm_vpermi2): New. (ix86_vector_duplicate_value): Ditto. (IX86_BUILTIN_SQRTPD512, IX86_BUILTIN_EXP2PS, IX86_BUILTIN_SQRTPS_NR512, IX86_BUILTIN_GATHER3ALTDIV16SF, IX86_BUILTIN_GATHER3ALTDIV16SI, IX86_BUILTIN_GATHER3ALTSIV8DF, IX86_BUILTIN_GATHER3ALTSIV8DI, IX86_BUILTIN_GATHER3DIV16SF, IX86_BUILTIN_GATHER3DIV16SI, IX86_BUILTIN_GATHER3DIV8DF, IX86_BUILTIN_GATHER3DIV8DI, IX86_BUILTIN_GATHER3SIV16SF, IX86_BUILTIN_GATHER3SIV16SI, IX86_BUILTIN_GATHER3SIV8DF, IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, IX86_BUILTIN_CPYSGNPS512, IX86_BUILTIN_CPYSGNPD512, IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512): Ditto. * config/i386/sse.md (*mov<mode>_internal): Disable SSE typeless stores vectors > 128bit (AVX*). (<sse>_storeu<ssemodesuffix><avxsizesuffix>): Ditto. (<sse2_avx_avx512f>_storedqu<mode>): Extend for AVX-512, disable SSE typeless stores vectors > 128bit (AVX*). (fixuns_trunc<mode><sseintvecmodelower>2): Extend for AVX-512. (vec_pack_ufix_trunc_<mode>): Ditto. (vec_unpacku_float_hi_v16si): New. * tree-vect-stmts.c (vectorizable_load): Support AVX512's gathers. * tree-vectorizer.h (MAX_VECTORIZATION_FACTOR): Extend for 512 bit vectors. testsuite/ * gcc.target/i386/pr49002-2.c: allow vmovapd generation. Co-Authored-By: Andrey Turetskiy <andrey.turetskiy@intel.com> Co-Authored-By: Anna Tikhonova <anna.tikhonova@intel.com> Co-Authored-By: Ilya Tocar <ilya.tocar@intel.com> Co-Authored-By: Ilya Verbin <ilya.verbin@intel.com> Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com> Co-Authored-By: Maxim Kuznetsov <maxim.kuznetsov@intel.com> Co-Authored-By: Michael Zolotukhin <michael.v.zolotukhin@intel.com> Co-Authored-By: Sergey Lega <sergey.s.lega@intel.com> From-SVN: r206260
This commit is contained in:
parent
41a828454e
commit
d3c2fee096
|
@ -1,3 +1,76 @@
|
|||
2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com>
|
||||
Maxim Kuznetsov <maxim.kuznetsov@intel.com>
|
||||
Sergey Lega <sergey.s.lega@intel.com>
|
||||
Anna Tikhonova <anna.tikhonova@intel.com>
|
||||
Ilya Tocar <ilya.tocar@intel.com>
|
||||
Andrey Turetskiy <andrey.turetskiy@intel.com>
|
||||
Ilya Verbin <ilya.verbin@intel.com>
|
||||
Kirill Yukhin <kirill.yukhin@intel.com>
|
||||
Michael Zolotukhin <michael.v.zolotukhin@intel.com>
|
||||
|
||||
* config/i386/i386.c (MAX_CLASSES): Increase number of classes.
|
||||
(classify_argument): Extend for 512 bit vectors.
|
||||
(construct_container): Ditto.
|
||||
(function_arg_advance_32): Ditto.
|
||||
(function_arg_advance_64): Ditto.
|
||||
(function_arg_32): Ditto.
|
||||
(function_arg_64): Ditto.
|
||||
(function_value_32): Ditto.
|
||||
(return_in_memory_32): Ditto.
|
||||
(ix86_gimplify_va_arg): Ditto.
|
||||
(standard_sse_constant_p): Ditto.
|
||||
(standard_sse_constant_opcode): Ditto.
|
||||
(ix86_expand_vector_convert_uns_vsivsf): Ditto.
|
||||
(ix86_build_const_vector): Ditto.
|
||||
(ix86_build_signbit_mask): Ditto.
|
||||
(ix86_expand_sse_cmp): Extend for AVX512.
|
||||
(ix86_expand_sse_movcc): Ditto.
|
||||
(ix86_expand_int_vcond): Ditto.
|
||||
(ix86_expand_vec_perm): Ditto.
|
||||
(ix86_expand_sse_unpack): Ditto.
|
||||
(ix86_constant_alignment): Ditto.
|
||||
(ix86_builtin_vectorized_function): Ditto.
|
||||
(ix86_vectorize_builtin_gather): Ditto.
|
||||
(avx_vpermilp_parallel): Ditto.
|
||||
(ix86_rtx_costs): Ditto.
|
||||
(ix86_expand_vector_init_duplicate): Ditto.
|
||||
(ix86_expand_vector_init_concat): Ditto.
|
||||
(ix86_expand_vector_init_general): Ditto.
|
||||
(ix86_expand_vector_extract): Ditto.
|
||||
(emit_reduc_half): Ditto.
|
||||
(ix86_vector_mode_supported_p): Ditto.
|
||||
(ix86_emit_swdivsf): Ditto.
|
||||
(ix86_emit_swsqrtsf): Ditto.
|
||||
(expand_vec_perm_1): Ditto.
|
||||
(ix86_vectorize_vec_perm_const_ok): Ditto.
|
||||
(ix86_expand_mul_widen_evenodd): Ditto.
|
||||
(ix86_expand_sse2_mulvxdi3): Ditto.
|
||||
(ix86_preferred_simd_mode): Ditto.
|
||||
(ix86_autovectorize_vector_sizes): Ditto.
|
||||
(ix86_expand_vec_perm_vpermi2): New.
|
||||
(ix86_vector_duplicate_value): Ditto.
|
||||
(IX86_BUILTIN_SQRTPD512, IX86_BUILTIN_EXP2PS, IX86_BUILTIN_SQRTPS_NR512,
|
||||
IX86_BUILTIN_GATHER3ALTDIV16SF, IX86_BUILTIN_GATHER3ALTDIV16SI,
|
||||
IX86_BUILTIN_GATHER3ALTSIV8DF, IX86_BUILTIN_GATHER3ALTSIV8DI,
|
||||
IX86_BUILTIN_GATHER3DIV16SF, IX86_BUILTIN_GATHER3DIV16SI,
|
||||
IX86_BUILTIN_GATHER3DIV8DF, IX86_BUILTIN_GATHER3DIV8DI,
|
||||
IX86_BUILTIN_GATHER3SIV16SF, IX86_BUILTIN_GATHER3SIV16SI,
|
||||
IX86_BUILTIN_GATHER3SIV8DF, IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
|
||||
IX86_BUILTIN_CPYSGNPS512, IX86_BUILTIN_CPYSGNPD512,
|
||||
IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
|
||||
IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512): Ditto.
|
||||
* config/i386/sse.md (*mov<mode>_internal): Disable SSE typeless
|
||||
stores vectors > 128bit (AVX*).
|
||||
(<sse>_storeu<ssemodesuffix><avxsizesuffix>): Ditto.
|
||||
(<sse2_avx_avx512f>_storedqu<mode>): Extend for AVX-512, disable
|
||||
SSE typeless stores vectors > 128bit (AVX*).
|
||||
(fixuns_trunc<mode><sseintvecmodelower>2): Extend for AVX-512.
|
||||
(vec_pack_ufix_trunc_<mode>): Ditto.
|
||||
(vec_unpacku_float_hi_v16si): New.
|
||||
* tree-vect-stmts.c (vectorizable_load): Support AVX512's gathers.
|
||||
* tree-vectorizer.h (MAX_VECTORIZATION_FACTOR): Extend for 512 bit
|
||||
vectors.
|
||||
|
||||
2013-12-31 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
Sandra Loosemore <sandra@codesourcery.com>
|
||||
Based on patches from Altera Corporation
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -748,8 +748,9 @@
|
|||
(set (attr "mode")
|
||||
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
|
||||
(const_string "<ssePSmode>")
|
||||
(and (eq_attr "alternative" "2")
|
||||
(match_test "TARGET_SSE_TYPELESS_STORES"))
|
||||
(and (match_test "GET_MODE_SIZE (<MODE>mode) == 16")
|
||||
(and (eq_attr "alternative" "2")
|
||||
(match_test "TARGET_SSE_TYPELESS_STORES")))
|
||||
(const_string "<ssePSmode>")
|
||||
(match_test "TARGET_AVX")
|
||||
(const_string "<sseinsnmode>")
|
||||
|
@ -986,8 +987,9 @@
|
|||
(set_attr "ssememalign" "8")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set (attr "mode")
|
||||
(cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
|
||||
(match_test "TARGET_SSE_TYPELESS_STORES"))
|
||||
(cond [(and (match_test "GET_MODE_SIZE (<MODE>mode) == 16")
|
||||
(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
|
||||
(match_test "TARGET_SSE_TYPELESS_STORES")))
|
||||
(const_string "<ssePSmode>")
|
||||
(match_test "TARGET_AVX")
|
||||
(const_string "<MODE>")
|
||||
|
@ -1091,6 +1093,7 @@
|
|||
{
|
||||
switch (get_attr_mode (insn))
|
||||
{
|
||||
case MODE_V16SF:
|
||||
case MODE_V8SF:
|
||||
case MODE_V4SF:
|
||||
return "%vmovups\t{%1, %0|%0, %1}";
|
||||
|
@ -1113,8 +1116,9 @@
|
|||
(const_string "1")))
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set (attr "mode")
|
||||
(cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
|
||||
(match_test "TARGET_SSE_TYPELESS_STORES"))
|
||||
(cond [(and (match_test "GET_MODE_SIZE (<MODE>mode) == 16")
|
||||
(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
|
||||
(match_test "TARGET_SSE_TYPELESS_STORES")))
|
||||
(const_string "<ssePSmode>")
|
||||
(match_test "TARGET_AVX")
|
||||
(const_string "<sseinsnmode>")
|
||||
|
@ -3492,7 +3496,11 @@
|
|||
(match_operand:<sseintvecmode> 1 "register_operand")]
|
||||
"TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
|
||||
{
|
||||
ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
|
||||
if (<MODE>mode == V16SFmode)
|
||||
emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
|
||||
else
|
||||
ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -3583,11 +3591,17 @@
|
|||
(match_operand:VF1 1 "register_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx tmp[3];
|
||||
tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
|
||||
tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
|
||||
emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
|
||||
emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
|
||||
if (<MODE>mode == V16SFmode)
|
||||
emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
|
||||
operands[1]));
|
||||
else
|
||||
{
|
||||
rtx tmp[3];
|
||||
tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
|
||||
tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
|
||||
emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
|
||||
emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
|
||||
}
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -4514,6 +4528,32 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_float_hi_v16si"
|
||||
[(match_operand:V8DF 0 "register_operand")
|
||||
(match_operand:V16SI 1 "register_operand")]
|
||||
"TARGET_AVX512F"
|
||||
{
|
||||
REAL_VALUE_TYPE TWO32r;
|
||||
rtx k, x, tmp[4];
|
||||
|
||||
real_ldexp (&TWO32r, &dconst1, 32);
|
||||
x = const_double_from_real_value (TWO32r, DFmode);
|
||||
|
||||
tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
|
||||
tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
|
||||
tmp[2] = gen_reg_rtx (V8DFmode);
|
||||
tmp[3] = gen_reg_rtx (V8SImode);
|
||||
k = gen_reg_rtx (QImode);
|
||||
|
||||
emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
|
||||
emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
|
||||
emit_insn (gen_rtx_SET (VOIDmode, k,
|
||||
gen_rtx_LT (QImode, tmp[2], tmp[0])));
|
||||
emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
|
||||
emit_move_insn (operands[0], tmp[2]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_float_lo_v8si"
|
||||
[(match_operand:V4DF 0 "register_operand")
|
||||
(match_operand:V8SI 1 "nonimmediate_operand")]
|
||||
|
@ -4679,31 +4719,46 @@
|
|||
|
||||
(define_expand "vec_pack_ufix_trunc_<mode>"
|
||||
[(match_operand:<ssepackfltmode> 0 "register_operand")
|
||||
(match_operand:VF2_128_256 1 "register_operand")
|
||||
(match_operand:VF2_128_256 2 "register_operand")]
|
||||
(match_operand:VF2 1 "register_operand")
|
||||
(match_operand:VF2 2 "register_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx tmp[7];
|
||||
tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
|
||||
tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
|
||||
tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
|
||||
emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
|
||||
if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
|
||||
if (<MODE>mode == V8DFmode)
|
||||
{
|
||||
tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
|
||||
ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
|
||||
rtx r1, r2;
|
||||
|
||||
r1 = gen_reg_rtx (V8SImode);
|
||||
r2 = gen_reg_rtx (V8SImode);
|
||||
|
||||
emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
|
||||
emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
|
||||
emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp[5] = gen_reg_rtx (V8SFmode);
|
||||
ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
|
||||
gen_lowpart (V8SFmode, tmp[3]), 0);
|
||||
tmp[5] = gen_lowpart (V8SImode, tmp[5]);
|
||||
rtx tmp[7];
|
||||
tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
|
||||
tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
|
||||
tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
|
||||
emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
|
||||
if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
|
||||
{
|
||||
tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
|
||||
ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp[5] = gen_reg_rtx (V8SFmode);
|
||||
ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
|
||||
gen_lowpart (V8SFmode, tmp[3]), 0);
|
||||
tmp[5] = gen_lowpart (V8SImode, tmp[5]);
|
||||
}
|
||||
tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
|
||||
operands[0], 0, OPTAB_DIRECT);
|
||||
if (tmp[6] != operands[0])
|
||||
emit_move_insn (operands[0], tmp[6]);
|
||||
}
|
||||
tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
|
||||
operands[0], 0, OPTAB_DIRECT);
|
||||
if (tmp[6] != operands[0])
|
||||
emit_move_insn (operands[0], tmp[6]);
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
|
|
@ -1,3 +1,15 @@
|
|||
2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com>
|
||||
Maxim Kuznetsov <maxim.kuznetsov@intel.com>
|
||||
Sergey Lega <sergey.s.lega@intel.com>
|
||||
Anna Tikhonova <anna.tikhonova@intel.com>
|
||||
Ilya Tocar <ilya.tocar@intel.com>
|
||||
Andrey Turetskiy <andrey.turetskiy@intel.com>
|
||||
Ilya Verbin <ilya.verbin@intel.com>
|
||||
Kirill Yukhin <kirill.yukhin@intel.com>
|
||||
Michael Zolotukhin <michael.v.zolotukhin@intel.com>
|
||||
|
||||
* gcc.target/i386/pr49002-2.c: allow vmovapd generation.
|
||||
|
||||
2013-12-31 Sandra Loosemore <sandra@codesourcery.com>
|
||||
Chung-Lin Tang <cltang@codesourcery.com>
|
||||
Based on patches from Altera Corporation
|
||||
|
|
|
@ -12,4 +12,4 @@ void foo(const __m128d from, __m256d *to)
|
|||
/* Ensure we store ymm, not xmm. */
|
||||
/* { dg-final { scan-assembler-not "vmovapd\[\t \]*%xmm\[0-9\]\+,\[^,\]*" } } */
|
||||
/* { dg-final { scan-assembler-not "vmovaps\[\t \]*%xmm\[0-9\]\+,\[^,\]*" } } */
|
||||
/* { dg-final { scan-assembler "vmovaps\[\t \]*%ymm\[0-9\]\+,\[^,\]*" } } */
|
||||
/* { dg-final { scan-assembler "vmovap\[sd\]\[\t \]*%ymm\[0-9\]\+,\[^,\]*" } } */
|
||||
|
|
|
@ -5699,7 +5699,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
tree vec_oprnd0 = NULL_TREE, op;
|
||||
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
|
||||
tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
|
||||
tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
|
||||
tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
|
||||
edge pe = loop_preheader_edge (loop);
|
||||
gimple_seq seq;
|
||||
basic_block new_bb;
|
||||
|
@ -5741,8 +5741,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
|
||||
masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
|
||||
scaletype = TREE_VALUE (arglist);
|
||||
gcc_checking_assert (types_compatible_p (srctype, rettype)
|
||||
&& types_compatible_p (srctype, masktype));
|
||||
gcc_checking_assert (types_compatible_p (srctype, rettype));
|
||||
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
|
||||
|
@ -5756,8 +5755,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
|
||||
/* Currently we support only unconditional gather loads,
|
||||
so mask should be all ones. */
|
||||
if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
|
||||
mask = build_int_cst (TREE_TYPE (masktype), -1);
|
||||
if (TREE_CODE (masktype) == INTEGER_TYPE)
|
||||
mask = build_int_cst (masktype, -1);
|
||||
else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
|
||||
{
|
||||
mask = build_int_cst (TREE_TYPE (masktype), -1);
|
||||
mask = build_vector_from_val (masktype, mask);
|
||||
}
|
||||
else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
|
||||
{
|
||||
REAL_VALUE_TYPE r;
|
||||
|
@ -5766,14 +5770,30 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
tmp[j] = -1;
|
||||
real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
|
||||
mask = build_real (TREE_TYPE (masktype), r);
|
||||
mask = build_vector_from_val (masktype, mask);
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
mask = build_vector_from_val (masktype, mask);
|
||||
mask = vect_init_vector (stmt, mask, masktype, NULL);
|
||||
|
||||
scale = build_int_cst (scaletype, gather_scale);
|
||||
|
||||
if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
|
||||
merge = build_int_cst (TREE_TYPE (rettype), 0);
|
||||
else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
|
||||
{
|
||||
REAL_VALUE_TYPE r;
|
||||
long tmp[6];
|
||||
for (j = 0; j < 6; ++j)
|
||||
tmp[j] = 0;
|
||||
real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
|
||||
merge = build_real (TREE_TYPE (rettype), r);
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
merge = build_vector_from_val (rettype, merge);
|
||||
merge = vect_init_vector (stmt, merge, rettype, NULL);
|
||||
|
||||
prev_stmt_info = NULL;
|
||||
for (j = 0; j < ncopies; ++j)
|
||||
{
|
||||
|
@ -5802,7 +5822,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
}
|
||||
|
||||
new_stmt
|
||||
= gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
|
||||
= gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
|
||||
|
||||
if (!useless_type_conversion_p (vectype, rettype))
|
||||
{
|
||||
|
|
|
@ -683,8 +683,8 @@ struct dataref_aux {
|
|||
conversion. */
|
||||
#define MAX_INTERM_CVT_STEPS 3
|
||||
|
||||
/* The maximum vectorization factor supported by any target (V32QI). */
|
||||
#define MAX_VECTORIZATION_FACTOR 32
|
||||
/* The maximum vectorization factor supported by any target (V64QI). */
|
||||
#define MAX_VECTORIZATION_FACTOR 64
|
||||
|
||||
/* Avoid GTY(()) on stmt_vec_info. */
|
||||
typedef void *vec_void_p;
|
||||
|
|
Loading…
Reference in New Issue