re PR target/13958 (Conversion from unsigned to double is painfully slow on P4)

PR target/13958
        * config/i386/i386.md ("*floatunssi<mode2>_1"): New pattern with
        corresponding post-reload splitters.
        ("floatunssi<mode>2"): Expand to unsigned_float x87 insn pattern
        when x87 FP math is selected.
        * config/i386/i386-protos.h (ix86_expand_convert_uns_sixf_sse):
        New function prototype.
        * config/i386/i386.c (ix86_expand_convert_uns_sixf_sse): New
        unreachable function to ease macroization of insn patterns.

From-SVN: r133435
This commit is contained in:
Uros Bizjak 2008-03-21 21:43:12 +01:00 committed by Uros Bizjak
parent 9e1e64ec2b
commit 7fb1431bfa
4 changed files with 90 additions and 6 deletions

View File

@ -1,3 +1,15 @@
2008-03-21 Uros Bizjak <ubizjak@gmail.com>
PR target/13958
* config/i386/i386.md ("*floatunssi<mode2>_1"): New pattern with
corresponding post-reload splitters.
("floatunssi<mode>2"): Expand to unsigned_float x87 insn pattern
when x87 FP math is selected.
* config/i386/i386-protos.h (ix86_expand_convert_uns_sixf_sse):
New function prototype.
* config/i386/i386.c (ix86_expand_convert_uns_sixf_sse): New
unreachable function to ease macroization of insn patterns.
2008-03-21 Martin Jambor <mjambor@suse.cz>
* tree-data-ref.c (dump_data_dependence_relation): Avoid data

View File

@ -91,6 +91,7 @@ extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
extern void ix86_split_convert_uns_si_sse (rtx[]);
extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);

View File

@ -10903,6 +10903,14 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
ix86_expand_vector_extract (false, target, fp_xmm, 0);
}
/* Not used, but eases macroization of patterns. */
void
ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
rtx input ATTRIBUTE_UNUSED)
{
gcc_unreachable ();
}
/* Convert an unsigned SImode value into a DFmode. Only currently used
for SSE, but applicable anywhere. */

View File

@ -5313,13 +5313,76 @@
DONE;
})
(define_expand "floatunssi<mode>2"
[(use (match_operand:MODEF 0 "register_operand" ""))
(use (match_operand:SI 1 "nonimmediate_operand" ""))]
"!TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
;; Avoid store forwarding (partial memory) stall penalty by extending
;; SImode value to DImode through XMM register instead of pushing two
;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES
;; targets benefit from this optimization. Also note that fild
;; loads from memory only.
(define_insn "*floatunssi<mode>2_1"
[(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
(unsigned_float:X87MODEF
(match_operand:SI 1 "nonimmediate_operand" "x,m")))
(clobber (match_operand:DI 2 "memory_operand" "=m,m"))
(clobber (match_scratch:SI 3 "=X,x"))]
"!TARGET_64BIT
&& TARGET_80387 && TARGET_SSE"
"#"
[(set_attr "type" "multi")
(set_attr "mode" "<MODE>")])
(define_split
[(set (match_operand:X87MODEF 0 "register_operand" "")
(unsigned_float:X87MODEF
(match_operand:SI 1 "register_operand" "")))
(clobber (match_operand:DI 2 "memory_operand" ""))
(clobber (match_scratch:SI 3 ""))]
"!TARGET_64BIT
&& TARGET_80387 && TARGET_SSE
&& reload_completed"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0)
(float:X87MODEF (match_dup 2)))]
"operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
(define_split
[(set (match_operand:X87MODEF 0 "register_operand" "")
(unsigned_float:X87MODEF
(match_operand:SI 1 "memory_operand" "")))
(clobber (match_operand:DI 2 "memory_operand" ""))
(clobber (match_scratch:SI 3 ""))]
"!TARGET_64BIT
&& TARGET_80387 && TARGET_SSE
&& reload_completed"
[(set (match_dup 2) (match_dup 3))
(set (match_dup 0)
(float:X87MODEF (match_dup 2)))]
{
ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
DONE;
emit_move_insn (operands[3], operands[1]);
operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
})
(define_expand "floatunssi<mode>2"
[(parallel
[(set (match_operand:X87MODEF 0 "register_operand" "")
(unsigned_float:X87MODEF
(match_operand:SI 1 "nonimmediate_operand" "")))
(clobber (match_dup 2))
(clobber (match_scratch:SI 3 ""))])]
"!TARGET_64BIT
&& ((TARGET_80387 && TARGET_SSE)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
{
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
{
ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
DONE;
}
else
{
int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
operands[2] = assign_386_stack_local (DImode, slot);
}
})
(define_expand "floatunsdisf2"