sse.md (copysign<mode>3): New expander.

* config/i386/sse.md (copysign<mode>3): New expander.
	* config/i386/i386-protos.h (ix86_build_signbit_mask): New prototype.
	* config/i386/i386.c (ix86_build_signbit_mask): Make public.
	Use ix86_build_const_vector.
	(enum ix86_builtins): Add IX86_BUILTIN_CPYSGNPS and
	IX86_BUILTIN_CPYSGNPD.
	(builtin_description): Add __builtin_ia32_copysignps and
	__builtin_ia32_copysignpd.
	(ix86_builtin_vectorized_function): Handle BUILT_IN_COPYSIGN
	and BUILT_IN_COPYSIGNF.

testsuite/ChangeLog:

	* gcc.target/i386/sse-copysignf-vec.c: New test.
	* gcc.target/i386/sse2-copysign-vec.c: Ditto.

From-SVN: r149639
This commit is contained in:
Uros Bizjak 2009-07-14 20:33:12 +02:00
parent 8353dd9a26
commit af766f2db7
7 changed files with 123 additions and 16 deletions

View File

@ -1,3 +1,16 @@
2009-07-14 Uros Bizjak <ubizjak@gmail.com>
* config/i386/sse.md (copysign<mode>3): New expander.
* config/i386/i386-protos.h (ix86_build_signbit_mask): New prototype.
* config/i386/i386.c (ix86_build_signbit_mask): Make public.
Use ix86_build_const_vector.
(enum ix86_builtins): Add IX86_BUILTIN_CPYSGNPS and
IX86_BUILTIN_CPYSGNPD.
(builtin_description): Add __builtin_ia32_copysignps and
__builtin_ia32_copysignpd.
(ix86_builtin_vectorized_function): Handle BUILT_IN_COPYSIGN
and BUILT_IN_COPYSIGNF.
2009-07-13 Jason Merrill <jason@redhat.com>
* builtins.c (can_trust_pointer_alignment): New fn.
@ -16,6 +29,7 @@
* dwarf2.out.c (gen_type_die_with_usage): Added comment.
2009-07-14 Richard Guenther <rguenther@suse.de>
Andrey Belevantsev <abel@ispras.ru>
PR middle-end/40745
* cfgexpand.c (partition_stack_vars): Do not bother to update
@ -66,13 +80,12 @@
2009-07-13 Ghassan Shobaki <ghassan.shobaki@amd.com>
* haifa-sched.c
(rank_for_schedule): Introduced flags to enable/disable
individual scheduling heuristics.
* common.opt: Introduced flags to enable/disable
individual heuristics in the scheduler.
* doc/invoke.texi: Introduced flags to enable/disable
individual heuristics in the scheduler.
* haifa-sched.c (rank_for_schedule): Introduced flags to
enable/disable individual scheduling heuristics.
* common.opt: Introduced flags to enable/disable individual
heuristics in the scheduler.
* doc/invoke.texi: Introduced flags to enable/disable individual
heuristics in the scheduler.
2009-07-13 Kai Tietz <kai.tietz@onevision.com>

View File

@ -89,6 +89,7 @@ extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
rtx[]);
extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
extern void ix86_split_convert_uns_si_sse (rtx[]);
extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);

View File

@ -14017,7 +14017,7 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
all elements of the vector register. If INVERT is true, then create
a mask excluding the sign bit. */
static rtx
rtx
ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
{
enum machine_mode vec_mode, imode;
@ -14181,15 +14181,9 @@ ix86_expand_copysign (rtx operands[])
op0 = CONST0_RTX (vmode);
else
{
rtvec v;
rtx v = ix86_build_const_vector (mode, false, op0);
if (mode == SFmode)
v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
CONST0_RTX (SFmode), CONST0_RTX (SFmode));
else
v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
op0 = force_reg (vmode, v);
}
}
else if (op0 != CONST0_RTX (mode))
@ -20910,6 +20904,10 @@ enum ix86_builtins
IX86_BUILTIN_FABSQ,
IX86_BUILTIN_COPYSIGNQ,
/* Vectorizer support builtins. */
IX86_BUILTIN_CPYSGNPS,
IX86_BUILTIN_CPYSGNPD,
/* SSE5 instructions */
IX86_BUILTIN_FMADDSS,
IX86_BUILTIN_FMADDSD,
@ -21746,6 +21744,8 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
@ -21843,6 +21843,8 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
@ -25729,6 +25731,18 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
break;
case BUILT_IN_COPYSIGN:
if (out_mode == DFmode && out_n == 2
&& in_mode == DFmode && in_n == 2)
return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
break;
case BUILT_IN_COPYSIGNF:
if (out_mode == SFmode && out_n == 4
&& in_mode == SFmode && in_n == 4)
return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
break;
default:
;
}

View File

@ -1594,6 +1594,26 @@
[(set_attr "type" "sselog")
(set_attr "mode" "<MODE>")])
(define_expand "copysign<mode>3"
[(set (match_dup 5)
(and:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")
(match_dup 3)))
(set (match_dup 6)
(and:SSEMODEF2P (match_operand:SSEMODEF2P 2 "register_operand" "")
(match_dup 4)))
(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(ior:SSEMODEF2P (match_dup 5) (match_dup 6)))]
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
{
int i;
for (i = 3; i < 7; i++)
operands[i] = gen_reg_rtx (<MODE>mode);
operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 1);
operands[4] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
})
;; Also define scalar versions. These are used for abs, neg, and
;; conditional move. Using subregs into vector modes causes register
;; allocation lossage. These patterns do not allow memory operands

View File

@ -1,3 +1,8 @@
2009-07-14 Uros Bizjak <ubizjak@gmail.com>
* gcc.target/i386/sse-copysignf-vec.c: New test.
* gcc.target/i386/sse2-copysign-vec.c: Ditto.
2009-07-14 Jason Merrill <jason@redhat.com>
PR c++/37276

View File

@ -0,0 +1,27 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -msse" } */
#include "sse-check.h"
extern float copysignf (float, float);
#define N 16
float a[N] = {-0.1f,-3.2f,-6.3f,-9.4f,-12.5f,-15.6f,-18.7f,-21.8f,24.9f,27.1f,30.2f,33.3f,36.4f,39.5f,42.6f,45.7f};
float b[N] = {-1.2f,3.4f,-5.6f,7.8f,-9.0f,1.0f,-2.0f,3.0f,-4.0f,-5.0f,6.0f,7.0f,-8.0f,-9.0f,10.0f,11.0f};
float r[N];
static void
sse_test (void)
{
int i;
for (i = 0; i < N; i++)
r[i] = copysignf (a[i], b[i]);
/* check results: */
for (i = 0; i < N; i++)
if (r[i] != copysignf (a[i], b[i]))
abort ();
}

View File

@ -0,0 +1,27 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -msse2" } */
#include "sse2-check.h"
extern double copysign (double, double);
#define N 16
double a[N] = {-0.1,-3.2,-6.3,-9.4,-12.5,-15.6,-18.7,-21.8,24.9,27.1,30.2,33.3,36.4,39.5,42.6,45.7};
double b[N] = {-1.2,3.4,-5.6,7.8,-9.0,1.0,-2.0,3.0,-4.0,-5.0,6.0,7.0,-8.0,-9.0,10.0,11.0};
double r[N];
static void
sse2_test (void)
{
int i;
for (i = 0; i < N; i++)
r[i] = copysign (a[i], b[i]);
/* check results: */
for (i = 0; i < N; i++)
if (r[i] != copysign (a[i], b[i]))
abort ();
}