i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes.
* config/i386/i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes. (ix86_expand_vector_move_misalign): Remove un-needed gen_lowpart calls. From-SVN: r185927
This commit is contained in:
parent
5caf63ca3f
commit
2310e4504d
@ -1,3 +1,8 @@
|
||||
2012-03-28 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes.
|
||||
(ix86_expand_vector_move_misalign): Remove un-needed gen_lowpart calls.
|
||||
|
||||
2012-03-28 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR middle-end/52691
|
||||
@ -16,22 +21,20 @@
|
||||
(loop_optimizer_finalize): If loops are to be preserved only
|
||||
clean up optional loop features.
|
||||
(rtl_loop_done): Forcefully free loops here.
|
||||
* cgraph.c (cgraph_release_function_body): Forcefully free
|
||||
loops.
|
||||
* cgraph.c (cgraph_release_function_body): Forcefully free loops.
|
||||
* cfgexpand.c (expand_gimple_cond): Properly add new basic-blocks
|
||||
to existing loops.
|
||||
(construct_init_block): Likewise.
|
||||
(construct_exit_block): Likewise.
|
||||
(gimple_expand_cfg): Clear LOOP_CLOSED_SSA loop state. Cleanup
|
||||
the CFG after expanding.
|
||||
* cfgloop.c (verify_loop_structure): Calculate or verify
|
||||
dominators. If we needed to calculate them, free them afterwards.
|
||||
* cfgloop.c (verify_loop_structure): Calculate or verify dominators.
|
||||
If we needed to calculate them, free them afterwards.
|
||||
* tree-pass.h (PROP_loops): New define.
|
||||
* tree-ssa-loop.c (pass_tree_loop_init): Provide PROP_loops.
|
||||
* basic-block.h (CLEANUP_CFG_CHANGED): New.
|
||||
* cfgcleanup.c (merge_blocks_move): Protect loop latches.
|
||||
(cleanup_cfg): If we did something and have loops around, fix
|
||||
them up.
|
||||
(cleanup_cfg): If we did something and have loops around, fix them up.
|
||||
* cse.c (rest_of_handle_cse_after_global_opts): Call cleanup_cfg
|
||||
with CLEANUP_CFG_CHANGED.
|
||||
* cfghooks.c (merge_blocks): If we merge a loop header into
|
||||
@ -84,8 +87,7 @@
|
||||
PR target/52737
|
||||
* config.gcc (tm_file): Remove avr/multilib.h.
|
||||
|
||||
* doc/invoke.texi (AVR Options): Adjust
|
||||
documentation of -mtiny-stack.
|
||||
* doc/invoke.texi (AVR Options): Adjust documentation of -mtiny-stack.
|
||||
|
||||
* config/avr/genmultilib.awk: Remove code to generate multilib.h.
|
||||
(BEGIN): Use -msp8 as multilib option instead of -mtiny-stack.
|
||||
|
@ -15831,17 +15831,18 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||
switch (GET_MODE_SIZE (mode))
|
||||
{
|
||||
case 16:
|
||||
/* If we're optimizing for size, movups is the smallest. */
|
||||
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
{
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
op1 = gen_lowpart (V4SFmode, op1);
|
||||
emit_insn (gen_sse_movups (op0, op1));
|
||||
return;
|
||||
}
|
||||
op0 = gen_lowpart (V16QImode, op0);
|
||||
op1 = gen_lowpart (V16QImode, op1);
|
||||
emit_insn (gen_sse2_movdqu (op0, op1));
|
||||
else
|
||||
{
|
||||
op0 = gen_lowpart (V16QImode, op0);
|
||||
op1 = gen_lowpart (V16QImode, op1);
|
||||
emit_insn (gen_sse2_movdqu (op0, op1));
|
||||
}
|
||||
break;
|
||||
case 32:
|
||||
op0 = gen_lowpart (V32QImode, op0);
|
||||
@ -15853,27 +15854,22 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||
}
|
||||
break;
|
||||
case MODE_VECTOR_FLOAT:
|
||||
op0 = gen_lowpart (mode, op0);
|
||||
op1 = gen_lowpart (mode, op1);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V4SFmode:
|
||||
emit_insn (gen_sse_movups (op0, op1));
|
||||
break;
|
||||
case V8SFmode:
|
||||
ix86_avx256_split_vector_move_misalign (op0, op1);
|
||||
break;
|
||||
case V2DFmode:
|
||||
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
{
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
op1 = gen_lowpart (V4SFmode, op1);
|
||||
emit_insn (gen_sse_movups (op0, op1));
|
||||
return;
|
||||
}
|
||||
emit_insn (gen_sse2_movupd (op0, op1));
|
||||
else
|
||||
emit_insn (gen_sse2_movupd (op0, op1));
|
||||
break;
|
||||
case V8SFmode:
|
||||
case V4DFmode:
|
||||
ix86_avx256_split_vector_move_misalign (op0, op1);
|
||||
break;
|
||||
@ -15918,8 +15914,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||
|
||||
if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
|
||||
{
|
||||
op0 = gen_lowpart (V2DFmode, op0);
|
||||
op1 = gen_lowpart (V2DFmode, op1);
|
||||
emit_insn (gen_sse2_movupd (op0, op1));
|
||||
return;
|
||||
}
|
||||
@ -15984,8 +15978,8 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||
return;
|
||||
}
|
||||
|
||||
/* ??? Similar to above, only less clear because of quote
|
||||
typeless stores unquote. */
|
||||
/* ??? Similar to above, only less clear
|
||||
because of typeless stores. */
|
||||
if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
|
||||
&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
||||
{
|
||||
@ -15998,11 +15992,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||
if (TARGET_SSE2 && mode == V2DFmode)
|
||||
{
|
||||
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
|
||||
{
|
||||
op0 = gen_lowpart (V2DFmode, op0);
|
||||
op1 = gen_lowpart (V2DFmode, op1);
|
||||
emit_insn (gen_sse2_movupd (op0, op1));
|
||||
}
|
||||
emit_insn (gen_sse2_movupd (op0, op1));
|
||||
else
|
||||
{
|
||||
m = adjust_address (op0, DFmode, 0);
|
||||
@ -31399,6 +31389,10 @@ ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
|
||||
|
||||
/* If MODE2 is only appropriate for an SSE register, then tie with
|
||||
any other mode acceptable to SSE registers. */
|
||||
if (GET_MODE_SIZE (mode2) == 32
|
||||
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
|
||||
return (GET_MODE_SIZE (mode1) == 32
|
||||
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
|
||||
if (GET_MODE_SIZE (mode2) == 16
|
||||
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
|
||||
return (GET_MODE_SIZE (mode1) == 16
|
||||
|
Loading…
Reference in New Issue
Block a user