i386-protos.h (ix86_expand_floorceil): Declare.

2006-10-29  Richard Guenther  <rguenther@suse.de>

	* config/i386/i386-protos.h (ix86_expand_floorceil): Declare.
	(ix86_expand_floorceildf_32): Likewise.
	* config/i386/i386.c (ix86_expand_sse_compare_mask): New
	static helper function.
	(ix86_expand_floorceil): Expander for floor and ceil to SSE
	math.
	(ix86_expand_floorceildf_32): Same for DFmode on 32bit archs.
	* config/i386/i386.md (floordf2): Adjust to enable floor
	expansion via ix86_expand_floorceil if TARGET_SSE_MATH and
	-fno-trapping-math is enabled and if not optimizing for size.
	(floorsf2, ceildf2, ceilsf2): Likewise.
	* config/i386/sse.md (sse_maskcmpsf3): New insn.
	(sse2_maskcmpdf3): Likewise.

	* gcc.target/i386/math-torture/ceil.c: New testcase.
	* gcc.target/i386/math-torture/floor.c: Likewise.

From-SVN: r118145
This commit is contained in:
Richard Guenther 2006-10-29 16:28:40 +00:00 committed by Richard Biener
parent c7d32ff619
commit d096ecdd96
8 changed files with 295 additions and 32 deletions

View File

@ -1,3 +1,19 @@
2006-10-29 Richard Guenther <rguenther@suse.de>
* config/i386/i386-protos.h (ix86_expand_floorceil): Declare.
(ix86_expand_floorceildf_32): Likewise.
* config/i386/i386.c (ix86_expand_sse_compare_mask): New
static helper function.
(ix86_expand_floorceil): Expander for floor and ceil to SSE
math.
(ix86_expand_floorceildf_32): Same for DFmode on 32bit archs.
* config/i386/i386.md (floordf2): Adjust to enable floor
expansion via ix86_expand_floorceil if TARGET_SSE_MATH and
-fno-trapping-math is enabled and if not optimizing for size.
(floorsf2, ceildf2, ceilsf2): Likewise.
* config/i386/sse.md (sse_maskcmpsf3): New insn.
(sse2_maskcmpdf3): Likewise.
2006-10-29 Richard Guenther <rguenther@suse.de>
* builtins.c (expand_builtin_mathfn): Expand nearbyint as

View File

@ -160,6 +160,8 @@ extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
extern void ix86_expand_lround (rtx, rtx);
extern void ix86_expand_lfloorceil (rtx, rtx, bool);
extern void ix86_expand_rint (rtx, rtx);
extern void ix86_expand_floorceil (rtx, rtx, bool);
extern void ix86_expand_floorceildf_32 (rtx, rtx, bool);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);

View File

@ -19310,6 +19310,33 @@ ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
return label;
}
/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
using comparison code CODE. Operands are swapped for the comparison if
SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
static rtx
ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
bool swap_operands)
{
enum machine_mode mode = GET_MODE (op0);
rtx mask = gen_reg_rtx (mode);
if (swap_operands)
{
rtx tmp = op0;
op0 = op1;
op1 = tmp;
}
if (mode == DFmode)
emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
gen_rtx_fmt_ee (code, mode, op0, op1)));
else
emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
gen_rtx_fmt_ee (code, mode, op0, op1)));
return mask;
}
/* Generate and return a rtx of mode MODE for 2**n where n is the number
of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
static rtx
@ -19423,4 +19450,117 @@ ix86_expand_rint (rtx operand0, rtx operand1)
emit_move_insn (operand0, res);
}
/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
into OPERAND0. */
void
ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
{
/* C code for the stuff we expand below.
double xa = fabs (x), x2;
if (!isless (xa, TWO52))
return x;
xa = xa + TWO52 - TWO52;
x2 = copysign (xa, x);
Compensate. Floor:
if (x2 > x)
x2 -= 1;
Compensate. Ceil:
if (x2 < x)
x2 += 1;
return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
rtx xa, TWO52, tmp, label, one, res, mask;
TWO52 = ix86_gen_TWO52 (mode);
/* Temporary for holding the result, initialized to the input
operand to ease control flow. */
res = gen_reg_rtx (mode);
emit_move_insn (res, operand1);
/* xa = abs (operand1) */
xa = ix86_expand_sse_fabs (res, &mask);
/* if (!isless (xa, TWO52)) goto label; */
label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
/* xa = xa + TWO52 - TWO52; */
expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
/* xa = copysign (xa, operand1) */
ix86_sse_copysign_to_positive (xa, xa, res, mask);
/* generate 1.0 */
one = force_reg (mode, const_double_from_real_value (dconst1, mode));
/* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
emit_insn (gen_rtx_SET (VOIDmode, tmp,
gen_rtx_AND (mode, one, tmp)));
expand_simple_binop (mode, do_floor ? MINUS : PLUS,
xa, tmp, res, 0, OPTAB_DIRECT);
emit_label (label);
LABEL_NUSES (label) = 1;
emit_move_insn (operand0, res);
}
/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
into OPERAND0. */
void
ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
{
/* C code for the stuff we expand below.
double xa = fabs (x), x2;
if (!isless (xa, TWO52))
return x;
x2 = (double)(long)x;
Compensate. Floor:
if (x2 > x)
x2 -= 1;
Compensate. Ceil:
if (x2 < x)
x2 += 1;
return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
rtx xa, xi, TWO52, tmp, label, one, res;
TWO52 = ix86_gen_TWO52 (mode);
/* Temporary for holding the result, initialized to the input
operand to ease control flow. */
res = gen_reg_rtx (mode);
emit_move_insn (res, operand1);
/* xa = abs (operand1) */
xa = ix86_expand_sse_fabs (res, NULL);
/* if (!isless (xa, TWO52)) goto label; */
label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
/* xa = (double)(long)x */
xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
expand_fix (xi, res, 0);
expand_float (xa, xi, 0);
/* generate 1.0 */
one = force_reg (mode, const_double_from_real_value (dconst1, mode));
/* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
emit_insn (gen_rtx_SET (VOIDmode, tmp,
gen_rtx_AND (mode, one, tmp)));
expand_simple_binop (mode, do_floor ? MINUS : PLUS,
xa, tmp, res, 0, OPTAB_DIRECT);
emit_label (label);
LABEL_NUSES (label) = 1;
emit_move_insn (operand0, res);
}
#include "gt-i386.h"

View File

@ -17435,34 +17435,59 @@
(define_expand "floordf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:DF 1 "register_operand" ""))]
"TARGET_USE_FANCY_MATH_387
&& (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations"
"(TARGET_USE_FANCY_MATH_387
&& (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& !optimize_size)"
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& !optimize_size)
{
if (TARGET_64BIT)
ix86_expand_floorceil (operand0, operand1, true);
else
ix86_expand_floorceildf_32 (operand0, operand1, true);
}
else
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
emit_insn (gen_extenddfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_floor (op0, op1));
emit_insn (gen_extenddfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_floor (op0, op1));
emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
}
DONE;
})
(define_expand "floorsf2"
[(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:SF 1 "register_operand" ""))]
"TARGET_USE_FANCY_MATH_387
&& (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations"
"(TARGET_USE_FANCY_MATH_387
&& (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& !optimize_size)"
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& !optimize_size)
ix86_expand_floorceil (operand0, operand1, true);
else
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
emit_insn (gen_extendsfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_floor (op0, op1));
emit_insn (gen_extendsfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_floor (op0, op1));
emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
}
DONE;
})
@ -17696,34 +17721,59 @@
(define_expand "ceildf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:DF 1 "register_operand" ""))]
"TARGET_USE_FANCY_MATH_387
&& (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations"
"(TARGET_USE_FANCY_MATH_387
&& (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& !optimize_size)"
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& !optimize_size)
{
if (TARGET_64BIT)
ix86_expand_floorceil (operand0, operand1, false);
else
ix86_expand_floorceildf_32 (operand0, operand1, false);
}
else
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
emit_insn (gen_extenddfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_ceil (op0, op1));
emit_insn (gen_extenddfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_ceil (op0, op1));
emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
}
DONE;
})
(define_expand "ceilsf2"
[(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:SF 1 "register_operand" ""))]
"TARGET_USE_FANCY_MATH_387
&& (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations"
"(TARGET_USE_FANCY_MATH_387
&& (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& !optimize_size)"
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& !optimize_size)
ix86_expand_floorceil (operand0, operand1, false);
else
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
emit_insn (gen_extendsfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_ceil (op0, op1));
emit_insn (gen_extendsfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_ceil (op0, op1));
emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
}
DONE;
})

View File

@ -733,6 +733,16 @@
[(set_attr "type" "ssecmp")
(set_attr "mode" "V4SF")])
(define_insn "sse_maskcmpsf3"
[(set (match_operand:SF 0 "register_operand" "=x")
(match_operator:SF 3 "sse_comparison_operator"
[(match_operand:SF 1 "register_operand" "0")
(match_operand:SF 2 "nonimmediate_operand" "xm")]))]
"TARGET_SSE"
"cmp%D3ss\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
(set_attr "mode" "SF")])
(define_insn "sse_vmmaskcmpv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
@ -1718,6 +1728,16 @@
[(set_attr "type" "ssecmp")
(set_attr "mode" "V2DF")])
(define_insn "sse2_maskcmpdf3"
[(set (match_operand:DF 0 "register_operand" "=x")
(match_operator:DF 3 "sse_comparison_operator"
[(match_operand:DF 1 "register_operand" "0")
(match_operand:DF 2 "nonimmediate_operand" "xm")]))]
"TARGET_SSE2"
"cmp%D3sd\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
(set_attr "mode" "DF")])
(define_insn "sse2_vmmaskcmpv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF

View File

@ -1,3 +1,8 @@
2006-10-29 Richard Guenther <rguenther@suse.de>
* gcc.target/i386/math-torture/ceil.c: New testcase.
* gcc.target/i386/math-torture/floor.c: Likewise.
2006-10-29 Richard Guenther <rguenther@suse.de>
* gcc.target/i386/math-torture/rint.c: New testcase.

View File

@ -0,0 +1,15 @@
/* { dg-do assemble } */
float testlf (float x)
{
return __builtin_ceilf (x);
}
double testl (double x)
{
return __builtin_ceil (x);
}
long double testll (long double x)
{
return __builtin_ceill (x);
}

View File

@ -0,0 +1,15 @@
/* { dg-do assemble } */
float testlf (float x)
{
return __builtin_floorf (x);
}
double testl (double x)
{
return __builtin_floor (x);
}
long double testll (long double x)
{
return __builtin_floorl (x);
}