i386.c (ix86_expand_round_sse4): New function.
* config/i386/i386.c (ix86_expand_round_sse4): New function. * config/i386/i386-protos.h (ix86_expand_round_sse4): New prototype. * config/i386/i386.md (round<mode>2): Use ix86_expand_round_sse4 for TARGET_ROUND. (rint<mode>2): Simplify TARGET_ROUND check. (floor<mode>2): Ditto. (ceil<mode>2): Ditto. (btrunc<mode>2): Ditto. From-SVN: r177751
This commit is contained in:
parent
8533a1cae9
commit
f4ab7b998e
@ -1,3 +1,15 @@
|
||||
2011-08-14 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.c (ix86_expand_round_sse4): New function.
|
||||
* config/i386/i386-protos.h (ix86_expand_round_sse4): New prototype.
|
||||
* config/i386/i386.md (round<mode>2): Use ix86_expand_round_sse4
|
||||
for TARGET_ROUND.
|
||||
|
||||
(rint<mode>2): Simplify TARGET_ROUND check.
|
||||
(floor<mode>2): Ditto.
|
||||
(ceil<mode>2): Ditto.
|
||||
(btrunc<mode>2): Ditto.
|
||||
|
||||
2011-08-14 Anatoly Sokolov <aesok@post.ru>
|
||||
|
||||
* config/mmix/mmix.c (TARGET_PREFERRED_OUTPUT_RELOAD_CLASS): Redefine
|
||||
@ -122,8 +134,7 @@
|
||||
lto_output_ts_binfo_tree_pointers.
|
||||
(write_ts_constructor_tree_pointers): Rename from
|
||||
lto_output_ts_constructor_tree_pointers.
|
||||
(write_ts_target_option): Rename from
|
||||
lto_output_ts_target_option.
|
||||
(write_ts_target_option): Rename from lto_output_ts_target_option.
|
||||
(write_ts_translation_unit_decl_tree_pointers): Rename from
|
||||
lto_output_ts_translation_unit_decl_tree_pointers.
|
||||
* tree-streamer.c (streamer_tree_cache_add_to_node_array):
|
||||
@ -144,8 +155,7 @@
|
||||
|
||||
2011-08-12 Nick Clifton <nickc@redhat.com>
|
||||
|
||||
* builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
|
||||
pattern.
|
||||
* builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
|
||||
* doc/md.texi (cmpstrn): Note that the comparison stops if both
|
||||
fetched bytes are zero.
|
||||
(cmpstr): Likewise.
|
||||
|
@ -174,6 +174,7 @@ extern void ix86_expand_lfloorceil (rtx, rtx, bool);
|
||||
extern void ix86_expand_rint (rtx, rtx);
|
||||
extern void ix86_expand_floorceil (rtx, rtx, bool);
|
||||
extern void ix86_expand_floorceildf_32 (rtx, rtx, bool);
|
||||
extern void ix86_expand_round_sse4 (rtx, rtx);
|
||||
extern void ix86_expand_round (rtx, rtx);
|
||||
extern void ix86_expand_rounddf_32 (rtx, rtx);
|
||||
extern void ix86_expand_trunc (rtx, rtx);
|
||||
|
@ -32676,6 +32676,52 @@ ix86_expand_round (rtx operand0, rtx operand1)
|
||||
|
||||
emit_move_insn (operand0, res);
|
||||
}
|
||||
|
||||
/* Expand SSE sequence for computing round
|
||||
from OP1 storing into OP0 using sse4 round insn. */
|
||||
void
|
||||
ix86_expand_round_sse4 (rtx op0, rtx op1)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (op0);
|
||||
rtx e1, e2, e3, res, half, mask;
|
||||
const struct real_format *fmt;
|
||||
REAL_VALUE_TYPE pred_half, half_minus_pred_half;
|
||||
rtx (*gen_round) (rtx, rtx, rtx);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case SFmode:
|
||||
gen_round = gen_sse4_1_roundsf2;
|
||||
break;
|
||||
case DFmode:
|
||||
gen_round = gen_sse4_1_rounddf2;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* e1 = fabs(op1) */
|
||||
e1 = ix86_expand_sse_fabs (op1, &mask);
|
||||
|
||||
/* load nextafter (0.5, 0.0) */
|
||||
fmt = REAL_MODE_FORMAT (mode);
|
||||
real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
|
||||
REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
|
||||
|
||||
/* e2 = e1 + 0.5 */
|
||||
half = force_reg (mode, const_double_from_real_value (pred_half, mode));
|
||||
e2 = expand_simple_binop (mode, PLUS, e1, half, NULL_RTX, 0, OPTAB_DIRECT);
|
||||
|
||||
/* e3 = trunc(e2) */
|
||||
e3 = gen_reg_rtx (mode);
|
||||
emit_insn (gen_round (e3, e2, GEN_INT (ROUND_TRUNC)));
|
||||
|
||||
/* res = copysign (e3, op1) */
|
||||
res = gen_reg_rtx (mode);
|
||||
ix86_sse_copysign_to_positive (res, e3, op1, mask);
|
||||
|
||||
emit_move_insn (op0, res);
|
||||
}
|
||||
|
||||
|
||||
/* Table of valid machine attributes. */
|
||||
|
@ -14394,11 +14394,11 @@
|
||||
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
|
||||
&& !flag_trapping_math)
|
||||
{
|
||||
if (!TARGET_ROUND && optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
if (TARGET_ROUND)
|
||||
emit_insn (gen_sse4_1_round<mode>2
|
||||
(operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
|
||||
else if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
else
|
||||
ix86_expand_rint (operand0, operand1);
|
||||
}
|
||||
@ -14431,7 +14431,12 @@
|
||||
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
|
||||
&& !flag_trapping_math && !flag_rounding_math)
|
||||
{
|
||||
if (TARGET_64BIT || (<MODE>mode != DFmode))
|
||||
if (TARGET_ROUND)
|
||||
{
|
||||
operands[1] = force_reg (<MODE>mode, operands[1]);
|
||||
ix86_expand_round_sse4 (operands[0], operands[1]);
|
||||
}
|
||||
else if (TARGET_64BIT || (<MODE>mode != DFmode))
|
||||
ix86_expand_round (operands[0], operands[1]);
|
||||
else
|
||||
ix86_expand_rounddf_32 (operands[0], operands[1]);
|
||||
@ -14663,14 +14668,13 @@
|
||||
&& !flag_trapping_math)"
|
||||
{
|
||||
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
|
||||
&& !flag_trapping_math
|
||||
&& (TARGET_ROUND || optimize_insn_for_speed_p ()))
|
||||
&& !flag_trapping_math)
|
||||
{
|
||||
if (!TARGET_ROUND && optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
if (TARGET_ROUND)
|
||||
emit_insn (gen_sse4_1_round<mode>2
|
||||
(operands[0], operands[1], GEN_INT (ROUND_FLOOR)));
|
||||
else if (optimize_insn_for_size_p ())
|
||||
FAIL;
|
||||
else if (TARGET_64BIT || (<MODE>mode != DFmode))
|
||||
ix86_expand_floorceil (operand0, operand1, true);
|
||||
else
|
||||
@ -14922,8 +14926,7 @@
|
||||
&& !flag_trapping_math)"
|
||||
{
|
||||
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
|
||||
&& !flag_trapping_math
|
||||
&& (TARGET_ROUND || optimize_insn_for_speed_p ()))
|
||||
&& !flag_trapping_math)
|
||||
{
|
||||
if (TARGET_ROUND)
|
||||
emit_insn (gen_sse4_1_round<mode>2
|
||||
@ -15179,8 +15182,7 @@
|
||||
&& !flag_trapping_math)"
|
||||
{
|
||||
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
|
||||
&& !flag_trapping_math
|
||||
&& (TARGET_ROUND || optimize_insn_for_speed_p ()))
|
||||
&& !flag_trapping_math)
|
||||
{
|
||||
if (TARGET_ROUND)
|
||||
emit_insn (gen_sse4_1_round<mode>2
|
||||
|
Loading…
Reference in New Issue
Block a user