i386.c (MAX_CLASSES): Increase number of classes.

gcc/
	* config/i386/i386.c (MAX_CLASSES): Increase number of classes.
	(classify_argument): Extend for 512 bit vectors.
	(construct_container): Ditto.
	(function_arg_advance_32): Ditto.
	(function_arg_advance_64): Ditto.
	(function_arg_32): Ditto.
	(function_arg_64): Ditto.
	(function_value_32): Ditto.
	(return_in_memory_32): Ditto.
	(ix86_gimplify_va_arg): Ditto.
	(standard_sse_constant_p): Ditto.
	(standard_sse_constant_opcode): Ditto.
	(ix86_expand_vector_convert_uns_vsivsf): Ditto.
	(ix86_build_const_vector): Ditto.
	(ix86_build_signbit_mask): Ditto.
	(ix86_expand_sse_cmp): Extend for AVX512.
	(ix86_expand_sse_movcc): Ditto.
	(ix86_expand_int_vcond): Ditto.
	(ix86_expand_vec_perm): Ditto.
	(ix86_expand_sse_unpack): Ditto.
	(ix86_constant_alignment): Ditto.
	(ix86_builtin_vectorized_function): Ditto.
	(ix86_vectorize_builtin_gather): Ditto.
	(avx_vpermilp_parallel): Ditto.
	(ix86_rtx_costs): Ditto.
	(ix86_expand_vector_init_duplicate): Ditto.
	(ix86_expand_vector_init_concat): Ditto.
	(ix86_expand_vector_init_general): Ditto.
	(ix86_expand_vector_extract): Ditto.
	(emit_reduc_half): Ditto.
	(ix86_vector_mode_supported_p): Ditto.
	(ix86_emit_swdivsf): Ditto.
	(ix86_emit_swsqrtsf): Ditto.
	(expand_vec_perm_1): Ditto.
	(ix86_vectorize_vec_perm_const_ok): Ditto.
	(ix86_expand_mul_widen_evenodd): Ditto.
	(ix86_expand_sse2_mulvxdi3): Ditto.
	(ix86_preferred_simd_mode): Ditto.
	(ix86_autovectorize_vector_sizes): Ditto.
	(ix86_expand_vec_perm_vpermi2): New.
	(ix86_vector_duplicate_value): Ditto.
	(IX86_BUILTIN_SQRTPD512, IX86_BUILTIN_EXP2PS, IX86_BUILTIN_SQRTPS_NR512,
	IX86_BUILTIN_GATHER3ALTDIV16SF, IX86_BUILTIN_GATHER3ALTDIV16SI,
	IX86_BUILTIN_GATHER3ALTSIV8DF, IX86_BUILTIN_GATHER3ALTSIV8DI,
	IX86_BUILTIN_GATHER3DIV16SF, IX86_BUILTIN_GATHER3DIV16SI,
	IX86_BUILTIN_GATHER3DIV8DF, IX86_BUILTIN_GATHER3DIV8DI,
	IX86_BUILTIN_GATHER3SIV16SF, IX86_BUILTIN_GATHER3SIV16SI,
	IX86_BUILTIN_GATHER3SIV8DF, IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
	IX86_BUILTIN_CPYSGNPS512, IX86_BUILTIN_CPYSGNPD512,
	IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
	IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512): Ditto.
	* config/i386/sse.md (*mov<mode>_internal): Disable SSE typeless
	stores vectors > 128bit (AVX*).
	(<sse>_storeu<ssemodesuffix><avxsizesuffix>): Ditto.
	(<sse2_avx_avx512f>_storedqu<mode>): Extend for AVX-512, disable
	SSE typeless stores vectors > 128bit (AVX*).
	(fixuns_trunc<mode><sseintvecmodelower>2): Extend for AVX-512.
	(vec_pack_ufix_trunc_<mode>): Ditto.
	(vec_unpacku_float_hi_v16si): New.
	* tree-vect-stmts.c (vectorizable_load): Support AVX512's gathers.
	* tree-vectorizer.h (MAX_VECTORIZATION_FACTOR): Extend for 512 bit
	vectors.

testsuite/
	* gcc.target/i386/pr49002-2.c: allow vmovapd generation.


Co-Authored-By: Andrey Turetskiy <andrey.turetskiy@intel.com>
Co-Authored-By: Anna Tikhonova <anna.tikhonova@intel.com>
Co-Authored-By: Ilya Tocar <ilya.tocar@intel.com>
Co-Authored-By: Ilya Verbin <ilya.verbin@intel.com>
Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com>
Co-Authored-By: Maxim Kuznetsov <maxim.kuznetsov@intel.com>
Co-Authored-By: Michael Zolotukhin <michael.v.zolotukhin@intel.com>
Co-Authored-By: Sergey Lega <sergey.s.lega@intel.com>

From-SVN: r206260
This commit is contained in:
Alexander Ivchenko 2013-12-31 11:09:42 +00:00 committed by Kirill Yukhin
parent 41a828454e
commit d3c2fee096
7 changed files with 803 additions and 112 deletions

View File

@ -1,3 +1,76 @@
2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com>
Maxim Kuznetsov <maxim.kuznetsov@intel.com>
Sergey Lega <sergey.s.lega@intel.com>
Anna Tikhonova <anna.tikhonova@intel.com>
Ilya Tocar <ilya.tocar@intel.com>
Andrey Turetskiy <andrey.turetskiy@intel.com>
Ilya Verbin <ilya.verbin@intel.com>
Kirill Yukhin <kirill.yukhin@intel.com>
Michael Zolotukhin <michael.v.zolotukhin@intel.com>
* config/i386/i386.c (MAX_CLASSES): Increase number of classes.
(classify_argument): Extend for 512 bit vectors.
(construct_container): Ditto.
(function_arg_advance_32): Ditto.
(function_arg_advance_64): Ditto.
(function_arg_32): Ditto.
(function_arg_64): Ditto.
(function_value_32): Ditto.
(return_in_memory_32): Ditto.
(ix86_gimplify_va_arg): Ditto.
(standard_sse_constant_p): Ditto.
(standard_sse_constant_opcode): Ditto.
(ix86_expand_vector_convert_uns_vsivsf): Ditto.
(ix86_build_const_vector): Ditto.
(ix86_build_signbit_mask): Ditto.
(ix86_expand_sse_cmp): Extend for AVX512.
(ix86_expand_sse_movcc): Ditto.
(ix86_expand_int_vcond): Ditto.
(ix86_expand_vec_perm): Ditto.
(ix86_expand_sse_unpack): Ditto.
(ix86_constant_alignment): Ditto.
(ix86_builtin_vectorized_function): Ditto.
(ix86_vectorize_builtin_gather): Ditto.
(avx_vpermilp_parallel): Ditto.
(ix86_rtx_costs): Ditto.
(ix86_expand_vector_init_duplicate): Ditto.
(ix86_expand_vector_init_concat): Ditto.
(ix86_expand_vector_init_general): Ditto.
(ix86_expand_vector_extract): Ditto.
(emit_reduc_half): Ditto.
(ix86_vector_mode_supported_p): Ditto.
(ix86_emit_swdivsf): Ditto.
(ix86_emit_swsqrtsf): Ditto.
(expand_vec_perm_1): Ditto.
(ix86_vectorize_vec_perm_const_ok): Ditto.
(ix86_expand_mul_widen_evenodd): Ditto.
(ix86_expand_sse2_mulvxdi3): Ditto.
(ix86_preferred_simd_mode): Ditto.
(ix86_autovectorize_vector_sizes): Ditto.
(ix86_expand_vec_perm_vpermi2): New.
(ix86_vector_duplicate_value): Ditto.
(IX86_BUILTIN_SQRTPD512, IX86_BUILTIN_EXP2PS, IX86_BUILTIN_SQRTPS_NR512,
IX86_BUILTIN_GATHER3ALTDIV16SF, IX86_BUILTIN_GATHER3ALTDIV16SI,
IX86_BUILTIN_GATHER3ALTSIV8DF, IX86_BUILTIN_GATHER3ALTSIV8DI,
IX86_BUILTIN_GATHER3DIV16SF, IX86_BUILTIN_GATHER3DIV16SI,
IX86_BUILTIN_GATHER3DIV8DF, IX86_BUILTIN_GATHER3DIV8DI,
IX86_BUILTIN_GATHER3SIV16SF, IX86_BUILTIN_GATHER3SIV16SI,
IX86_BUILTIN_GATHER3SIV8DF, IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
IX86_BUILTIN_CPYSGNPS512, IX86_BUILTIN_CPYSGNPD512,
IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512): Ditto.
* config/i386/sse.md (*mov<mode>_internal): Disable SSE typeless
stores vectors > 128bit (AVX*).
(<sse>_storeu<ssemodesuffix><avxsizesuffix>): Ditto.
(<sse2_avx_avx512f>_storedqu<mode>): Extend for AVX-512, disable
SSE typeless stores vectors > 128bit (AVX*).
(fixuns_trunc<mode><sseintvecmodelower>2): Extend for AVX-512.
(vec_pack_ufix_trunc_<mode>): Ditto.
(vec_unpacku_float_hi_v16si): New.
* tree-vect-stmts.c (vectorizable_load): Support AVX512's gathers.
* tree-vectorizer.h (MAX_VECTORIZATION_FACTOR): Extend for 512 bit
vectors.
2013-12-31 Chung-Lin Tang <cltang@codesourcery.com>
Sandra Loosemore <sandra@codesourcery.com>
Based on patches from Altera Corporation

File diff suppressed because it is too large Load Diff

View File

@ -748,8 +748,9 @@
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "<ssePSmode>")
(and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES"))
(and (match_test "GET_MODE_SIZE (<MODE>mode) == 16")
(and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "<ssePSmode>")
(match_test "TARGET_AVX")
(const_string "<sseinsnmode>")
@ -986,8 +987,9 @@
(set_attr "ssememalign" "8")
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
(cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(match_test "TARGET_SSE_TYPELESS_STORES"))
(cond [(and (match_test "GET_MODE_SIZE (<MODE>mode) == 16")
(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "<ssePSmode>")
(match_test "TARGET_AVX")
(const_string "<MODE>")
@ -1091,6 +1093,7 @@
{
switch (get_attr_mode (insn))
{
case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
@ -1113,8 +1116,9 @@
(const_string "1")))
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
(cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(match_test "TARGET_SSE_TYPELESS_STORES"))
(cond [(and (match_test "GET_MODE_SIZE (<MODE>mode) == 16")
(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "<ssePSmode>")
(match_test "TARGET_AVX")
(const_string "<sseinsnmode>")
@ -3492,7 +3496,11 @@
(match_operand:<sseintvecmode> 1 "register_operand")]
"TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
{
ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
if (<MODE>mode == V16SFmode)
emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
else
ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
DONE;
})
@ -3583,11 +3591,17 @@
(match_operand:VF1 1 "register_operand")]
"TARGET_SSE2"
{
rtx tmp[3];
tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
if (<MODE>mode == V16SFmode)
emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
operands[1]));
else
{
rtx tmp[3];
tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
}
DONE;
})
@ -4514,6 +4528,32 @@
DONE;
})
(define_expand "vec_unpacku_float_hi_v16si"
[(match_operand:V8DF 0 "register_operand")
(match_operand:V16SI 1 "register_operand")]
"TARGET_AVX512F"
{
REAL_VALUE_TYPE TWO32r;
rtx k, x, tmp[4];
real_ldexp (&TWO32r, &dconst1, 32);
x = const_double_from_real_value (TWO32r, DFmode);
tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
tmp[2] = gen_reg_rtx (V8DFmode);
tmp[3] = gen_reg_rtx (V8SImode);
k = gen_reg_rtx (QImode);
emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
emit_insn (gen_rtx_SET (VOIDmode, k,
gen_rtx_LT (QImode, tmp[2], tmp[0])));
emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
emit_move_insn (operands[0], tmp[2]);
DONE;
})
(define_expand "vec_unpacku_float_lo_v8si"
[(match_operand:V4DF 0 "register_operand")
(match_operand:V8SI 1 "nonimmediate_operand")]
@ -4679,31 +4719,46 @@
(define_expand "vec_pack_ufix_trunc_<mode>"
[(match_operand:<ssepackfltmode> 0 "register_operand")
(match_operand:VF2_128_256 1 "register_operand")
(match_operand:VF2_128_256 2 "register_operand")]
(match_operand:VF2 1 "register_operand")
(match_operand:VF2 2 "register_operand")]
"TARGET_SSE2"
{
rtx tmp[7];
tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
if (<MODE>mode == V8DFmode)
{
tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
rtx r1, r2;
r1 = gen_reg_rtx (V8SImode);
r2 = gen_reg_rtx (V8SImode);
emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
}
else
{
tmp[5] = gen_reg_rtx (V8SFmode);
ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
gen_lowpart (V8SFmode, tmp[3]), 0);
tmp[5] = gen_lowpart (V8SImode, tmp[5]);
rtx tmp[7];
tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
{
tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
}
else
{
tmp[5] = gen_reg_rtx (V8SFmode);
ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
gen_lowpart (V8SFmode, tmp[3]), 0);
tmp[5] = gen_lowpart (V8SImode, tmp[5]);
}
tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
operands[0], 0, OPTAB_DIRECT);
if (tmp[6] != operands[0])
emit_move_insn (operands[0], tmp[6]);
}
tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
operands[0], 0, OPTAB_DIRECT);
if (tmp[6] != operands[0])
emit_move_insn (operands[0], tmp[6]);
DONE;
})

View File

@ -1,3 +1,15 @@
2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com>
Maxim Kuznetsov <maxim.kuznetsov@intel.com>
Sergey Lega <sergey.s.lega@intel.com>
Anna Tikhonova <anna.tikhonova@intel.com>
Ilya Tocar <ilya.tocar@intel.com>
Andrey Turetskiy <andrey.turetskiy@intel.com>
Ilya Verbin <ilya.verbin@intel.com>
Kirill Yukhin <kirill.yukhin@intel.com>
Michael Zolotukhin <michael.v.zolotukhin@intel.com>
* gcc.target/i386/pr49002-2.c: allow vmovapd generation.
2013-12-31 Sandra Loosemore <sandra@codesourcery.com>
Chung-Lin Tang <cltang@codesourcery.com>
Based on patches from Altera Corporation

View File

@ -12,4 +12,4 @@ void foo(const __m128d from, __m256d *to)
/* Ensure we store ymm, not xmm. */
/* { dg-final { scan-assembler-not "vmovapd\[\t \]*%xmm\[0-9\]\+,\[^,\]*" } } */
/* { dg-final { scan-assembler-not "vmovaps\[\t \]*%xmm\[0-9\]\+,\[^,\]*" } } */
/* { dg-final { scan-assembler "vmovaps\[\t \]*%ymm\[0-9\]\+,\[^,\]*" } } */
/* { dg-final { scan-assembler "vmovap\[sd\]\[\t \]*%ymm\[0-9\]\+,\[^,\]*" } } */

View File

@ -5699,7 +5699,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
tree vec_oprnd0 = NULL_TREE, op;
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
edge pe = loop_preheader_edge (loop);
gimple_seq seq;
basic_block new_bb;
@ -5741,8 +5741,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
scaletype = TREE_VALUE (arglist);
gcc_checking_assert (types_compatible_p (srctype, rettype)
&& types_compatible_p (srctype, masktype));
gcc_checking_assert (types_compatible_p (srctype, rettype));
vec_dest = vect_create_destination_var (scalar_dest, vectype);
@ -5756,8 +5755,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
/* Currently we support only unconditional gather loads,
so mask should be all ones. */
if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
mask = build_int_cst (TREE_TYPE (masktype), -1);
if (TREE_CODE (masktype) == INTEGER_TYPE)
mask = build_int_cst (masktype, -1);
else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
{
mask = build_int_cst (TREE_TYPE (masktype), -1);
mask = build_vector_from_val (masktype, mask);
}
else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
{
REAL_VALUE_TYPE r;
@ -5766,14 +5770,30 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
tmp[j] = -1;
real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
mask = build_real (TREE_TYPE (masktype), r);
mask = build_vector_from_val (masktype, mask);
}
else
gcc_unreachable ();
mask = build_vector_from_val (masktype, mask);
mask = vect_init_vector (stmt, mask, masktype, NULL);
scale = build_int_cst (scaletype, gather_scale);
if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
merge = build_int_cst (TREE_TYPE (rettype), 0);
else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
{
REAL_VALUE_TYPE r;
long tmp[6];
for (j = 0; j < 6; ++j)
tmp[j] = 0;
real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
merge = build_real (TREE_TYPE (rettype), r);
}
else
gcc_unreachable ();
merge = build_vector_from_val (rettype, merge);
merge = vect_init_vector (stmt, merge, rettype, NULL);
prev_stmt_info = NULL;
for (j = 0; j < ncopies; ++j)
{
@ -5802,7 +5822,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
}
new_stmt
= gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
= gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
if (!useless_type_conversion_p (vectype, rettype))
{

View File

@ -683,8 +683,8 @@ struct dataref_aux {
conversion. */
#define MAX_INTERM_CVT_STEPS 3
/* The maximum vectorization factor supported by any target (V32QI). */
#define MAX_VECTORIZATION_FACTOR 32
/* The maximum vectorization factor supported by any target (V64QI). */
#define MAX_VECTORIZATION_FACTOR 64
/* Avoid GTY(()) on stmt_vec_info. */
typedef void *vec_void_p;