i386.c (ix86_expand_round_sse4): New function.

* config/i386/i386.c (ix86_expand_round_sse4): New function.
	* config/i386/i386-protos.h (ix86_expand_round_sse4): New prototype.
	* config/i386/i386.md (round<mode>2): Use ix86_expand_round_sse4
	for TARGET_ROUND.

	(rint<mode>2): Simplify TARGET_ROUND check.
	(floor<mode>2): Ditto.
	(ceil<mode>2): Ditto.
	(btrunc<mode>2): Ditto.

From-SVN: r177751
This commit is contained in:
Uros Bizjak 2011-08-14 22:02:32 +02:00
parent 8533a1cae9
commit f4ab7b998e
4 changed files with 165 additions and 106 deletions

View File

@ -1,3 +1,15 @@
2011-08-14 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_expand_round_sse4): New function.
* config/i386/i386-protos.h (ix86_expand_round_sse4): New prototype.
* config/i386/i386.md (round<mode>2): Use ix86_expand_round_sse4
for TARGET_ROUND.
(rint<mode>2): Simplify TARGET_ROUND check.
(floor<mode>2): Ditto.
(ceil<mode>2): Ditto.
(btrunc<mode>2): Ditto.
2011-08-14 Anatoly Sokolov <aesok@post.ru>
* config/mmix/mmix.c (TARGET_PREFERRED_OUTPUT_RELOAD_CLASS): Redefine
@ -122,8 +134,7 @@
lto_output_ts_binfo_tree_pointers.
(write_ts_constructor_tree_pointers): Rename from
lto_output_ts_constructor_tree_pointers.
(write_ts_target_option): Rename from
lto_output_ts_target_option.
(write_ts_target_option): Rename from lto_output_ts_target_option.
(write_ts_translation_unit_decl_tree_pointers): Rename from
lto_output_ts_translation_unit_decl_tree_pointers.
* tree-streamer.c (streamer_tree_cache_add_to_node_array):
@ -144,8 +155,7 @@
2011-08-12 Nick Clifton <nickc@redhat.com>
* builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
pattern.
* builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
* doc/md.texi (cmpstrn): Note that the comparison stops if both
fetched bytes are zero.
(cmpstr): Likewise.

View File

@ -174,6 +174,7 @@ extern void ix86_expand_lfloorceil (rtx, rtx, bool);
extern void ix86_expand_rint (rtx, rtx);
extern void ix86_expand_floorceil (rtx, rtx, bool);
extern void ix86_expand_floorceildf_32 (rtx, rtx, bool);
extern void ix86_expand_round_sse4 (rtx, rtx);
extern void ix86_expand_round (rtx, rtx);
extern void ix86_expand_rounddf_32 (rtx, rtx);
extern void ix86_expand_trunc (rtx, rtx);

View File

@ -32676,6 +32676,52 @@ ix86_expand_round (rtx operand0, rtx operand1)
emit_move_insn (operand0, res);
}
/* Expand SSE sequence for computing round
from OP1 storing into OP0 using sse4 round insn. */
void
ix86_expand_round_sse4 (rtx op0, rtx op1)
{
enum machine_mode mode = GET_MODE (op0);
rtx e1, e2, e3, res, half, mask;
const struct real_format *fmt;
REAL_VALUE_TYPE pred_half, half_minus_pred_half;
rtx (*gen_round) (rtx, rtx, rtx);
switch (mode)
{
case SFmode:
gen_round = gen_sse4_1_roundsf2;
break;
case DFmode:
gen_round = gen_sse4_1_rounddf2;
break;
default:
gcc_unreachable ();
}
/* e1 = fabs(op1) */
e1 = ix86_expand_sse_fabs (op1, &mask);
/* load nextafter (0.5, 0.0) */
fmt = REAL_MODE_FORMAT (mode);
real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
/* e2 = e1 + 0.5 */
half = force_reg (mode, const_double_from_real_value (pred_half, mode));
e2 = expand_simple_binop (mode, PLUS, e1, half, NULL_RTX, 0, OPTAB_DIRECT);
/* e3 = trunc(e2) */
e3 = gen_reg_rtx (mode);
emit_insn (gen_round (e3, e2, GEN_INT (ROUND_TRUNC)));
/* res = copysign (e3, op1) */
res = gen_reg_rtx (mode);
ix86_sse_copysign_to_positive (res, e3, op1, mask);
emit_move_insn (op0, res);
}
/* Table of valid machine attributes. */

View File

@ -14394,11 +14394,11 @@
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math)
{
if (!TARGET_ROUND && optimize_insn_for_size_p ())
FAIL;
if (TARGET_ROUND)
emit_insn (gen_sse4_1_round<mode>2
(operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
else if (optimize_insn_for_size_p ())
FAIL;
else
ix86_expand_rint (operand0, operand1);
}
@ -14431,7 +14431,12 @@
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math && !flag_rounding_math)
{
if (TARGET_64BIT || (<MODE>mode != DFmode))
if (TARGET_ROUND)
{
operands[1] = force_reg (<MODE>mode, operands[1]);
ix86_expand_round_sse4 (operands[0], operands[1]);
}
else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_round (operands[0], operands[1]);
else
ix86_expand_rounddf_32 (operands[0], operands[1]);
@ -14663,14 +14668,13 @@
&& !flag_trapping_math)"
{
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& (TARGET_ROUND || optimize_insn_for_speed_p ()))
&& !flag_trapping_math)
{
if (!TARGET_ROUND && optimize_insn_for_size_p ())
FAIL;
if (TARGET_ROUND)
emit_insn (gen_sse4_1_round<mode>2
(operands[0], operands[1], GEN_INT (ROUND_FLOOR)));
else if (optimize_insn_for_size_p ())
FAIL;
else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_floorceil (operand0, operand1, true);
else
@ -14922,8 +14926,7 @@
&& !flag_trapping_math)"
{
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& (TARGET_ROUND || optimize_insn_for_speed_p ()))
&& !flag_trapping_math)
{
if (TARGET_ROUND)
emit_insn (gen_sse4_1_round<mode>2
@ -15179,8 +15182,7 @@
&& !flag_trapping_math)"
{
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& (TARGET_ROUND || optimize_insn_for_speed_p ()))
&& !flag_trapping_math)
{
if (TARGET_ROUND)
emit_insn (gen_sse4_1_round<mode>2