sse.md (round<mode>2_vec_pack_sfix): Optimize V2DFmode sequence for AVX.

* config/i386/sse.md (round<mode>2_vec_pack_sfix): Optimize V2DFmode
	sequence for AVX.
	(<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>): Ditto.

From-SVN: r181421
This commit is contained in:
Uros Bizjak 2011-11-16 19:28:08 +01:00 committed by Uros Bizjak
parent a6bbb56fef
commit c05e32f58d
2 changed files with 58 additions and 20 deletions

View File

@ -1,3 +1,9 @@
2011-11-16 Uros Bizjak <ubizjak@gmail.com>
* config/i386/sse.md (round<mode>2_vec_pack_sfix): Optimize V2DFmode
sequence for AVX.
(<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>): Ditto.
2011-11-16 Venkataramanan Kumar <venkataramanan.kumar@amd.com>
* doc/invoke.texi: Document AMD bdver1 and btver1.
@ -11,13 +17,15 @@
the base reg is stored iff compiling for Thumb1.
2011-11-16 Razya Ladelsky <razya@il.ibm.com>
PR tree-optimization/49960
* tree-data-ref.c (initialize_data_dependence_relation): Add initializations.
* tree-data-ref.c (initialize_data_dependence_relation): Add
initializations.
Remove call to compute_self_dependence.
(compute_affine_dependence): Remove the !DDR_SELF_REFERENCE condition.
(compute_self_dependence): Remove old code. Add call to compute_affine_dependence.
(compute_all_dependences): Remove call to compute_self_dependence.
(compute_self_dependence): Remove old code. Add call to
compute_affine_dependence.
(compute_all_dependences): Remove call to compute_self_dependence.
Add call to compute_affine_dependence.
2011-11-16 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>

View File

@ -9962,17 +9962,32 @@
{
rtx tmp0, tmp1;
tmp0 = gen_reg_rtx (<MODE>mode);
tmp1 = gen_reg_rtx (<MODE>mode);
if (<MODE>mode == V2DFmode
&& TARGET_AVX && !TARGET_PREFER_AVX128)
{
rtx tmp2 = gen_reg_rtx (V4DFmode);
emit_insn
(gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
operands[3]));
emit_insn
(gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
operands[3]));
emit_insn
(gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
tmp0 = gen_reg_rtx (V4DFmode);
tmp1 = force_reg (V2DFmode, operands[1]);
emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
}
else
{
tmp0 = gen_reg_rtx (<MODE>mode);
tmp1 = gen_reg_rtx (<MODE>mode);
emit_insn
(gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
operands[3]));
emit_insn
(gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
operands[3]));
emit_insn
(gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
}
DONE;
})
@ -10053,14 +10068,29 @@
{
rtx tmp0, tmp1;
tmp0 = gen_reg_rtx (<MODE>mode);
tmp1 = gen_reg_rtx (<MODE>mode);
if (<MODE>mode == V2DFmode
&& TARGET_AVX && !TARGET_PREFER_AVX128)
{
rtx tmp2 = gen_reg_rtx (V4DFmode);
emit_insn (gen_round<mode>2 (tmp0, operands[1]));
emit_insn (gen_round<mode>2 (tmp1, operands[2]));
tmp0 = gen_reg_rtx (V4DFmode);
tmp1 = force_reg (V2DFmode, operands[1]);
emit_insn
(gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
emit_insn (gen_roundv4df2 (tmp2, tmp0));
emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
}
else
{
tmp0 = gen_reg_rtx (<MODE>mode);
tmp1 = gen_reg_rtx (<MODE>mode);
emit_insn (gen_round<mode>2 (tmp0, operands[1]));
emit_insn (gen_round<mode>2 (tmp1, operands[2]));
emit_insn
(gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
}
DONE;
})