i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes.

* config/i386/i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes.
	(ix86_expand_vector_move_misalign): Remove un-needed gen_lowpart calls.

From-SVN: r185927
This commit is contained in:
Uros Bizjak 2012-03-28 23:28:15 +02:00
parent 5caf63ca3f
commit 2310e4504d
2 changed files with 26 additions and 30 deletions

View File

@ -1,3 +1,8 @@
2012-03-28 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes.
(ix86_expand_vector_move_misalign): Remove un-needed gen_lowpart calls.
2012-03-28 Jakub Jelinek <jakub@redhat.com>
PR middle-end/52691
@ -16,22 +21,20 @@
(loop_optimizer_finalize): If loops are to be preserved only
clean up optional loop features.
(rtl_loop_done): Forcefully free loops here.
* cgraph.c (cgraph_release_function_body): Forcefully free
loops.
* cgraph.c (cgraph_release_function_body): Forcefully free loops.
* cfgexpand.c (expand_gimple_cond): Properly add new basic-blocks
to existing loops.
(construct_init_block): Likewise.
(construct_exit_block): Likewise.
(gimple_expand_cfg): Clear LOOP_CLOSED_SSA loop state. Cleanup
the CFG after expanding.
* cfgloop.c (verify_loop_structure): Calculate or verify
dominators. If we needed to calculate them, free them afterwards.
* cfgloop.c (verify_loop_structure): Calculate or verify dominators.
If we needed to calculate them, free them afterwards.
* tree-pass.h (PROP_loops): New define.
* tree-ssa-loop.c (pass_tree_loop_init): Provide PROP_loops.
* basic-block.h (CLEANUP_CFG_CHANGED): New.
* cfgcleanup.c (merge_blocks_move): Protect loop latches.
(cleanup_cfg): If we did something and have loops around, fix
them up.
(cleanup_cfg): If we did something and have loops around, fix them up.
* cse.c (rest_of_handle_cse_after_global_opts): Call cleanup_cfg
with CLEANUP_CFG_CHANGED.
* cfghooks.c (merge_blocks): If we merge a loop header into
@ -84,8 +87,7 @@
PR target/52737
* config.gcc (tm_file): Remove avr/multilib.h.
* doc/invoke.texi (AVR Options): Adjust
documentation of -mtiny-stack.
* doc/invoke.texi (AVR Options): Adjust documentation of -mtiny-stack.
* config/avr/genmultilib.awk: Remove code to generate multilib.h.
(BEGIN): Use -msp8 as multilib option instead of -mtiny-stack.

View File

@ -15831,17 +15831,18 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
switch (GET_MODE_SIZE (mode))
{
case 16:
/* If we're optimizing for size, movups is the smallest. */
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
return;
}
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
emit_insn (gen_sse2_movdqu (op0, op1));
else
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
emit_insn (gen_sse2_movdqu (op0, op1));
}
break;
case 32:
op0 = gen_lowpart (V32QImode, op0);
@ -15853,27 +15854,22 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
}
break;
case MODE_VECTOR_FLOAT:
op0 = gen_lowpart (mode, op0);
op1 = gen_lowpart (mode, op1);
switch (mode)
{
case V4SFmode:
emit_insn (gen_sse_movups (op0, op1));
break;
case V8SFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
break;
case V2DFmode:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
return;
}
emit_insn (gen_sse2_movupd (op0, op1));
else
emit_insn (gen_sse2_movupd (op0, op1));
break;
case V8SFmode:
case V4DFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
break;
@ -15918,8 +15914,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
{
op0 = gen_lowpart (V2DFmode, op0);
op1 = gen_lowpart (V2DFmode, op1);
emit_insn (gen_sse2_movupd (op0, op1));
return;
}
@ -15984,8 +15978,8 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
return;
}
/* ??? Similar to above, only less clear because of quote
typeless stores unquote. */
/* ??? Similar to above, only less clear
because of typeless stores. */
if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
{
@ -15998,11 +15992,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
if (TARGET_SSE2 && mode == V2DFmode)
{
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
{
op0 = gen_lowpart (V2DFmode, op0);
op1 = gen_lowpart (V2DFmode, op1);
emit_insn (gen_sse2_movupd (op0, op1));
}
emit_insn (gen_sse2_movupd (op0, op1));
else
{
m = adjust_address (op0, DFmode, 0);
@ -31399,6 +31389,10 @@ ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
/* If MODE2 is only appropriate for an SSE register, then tie with
any other mode acceptable to SSE registers. */
if (GET_MODE_SIZE (mode2) == 32
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
return (GET_MODE_SIZE (mode1) == 32
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
if (GET_MODE_SIZE (mode2) == 16
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
return (GET_MODE_SIZE (mode1) == 16