sse.md (copysign<mode>3): New expander.
* config/i386/sse.md (copysign<mode>3): New expander. * config/i386/i386-protos.h (ix86_build_signbit_mask): New prototype. * config/i386/i386.c (ix86_build_signbit_mask): Make public. Use ix86_build_const_vector. (enum ix86_builtins): Add IX86_BUILTIN_CPYSGNPS and IX86_BUILTIN_CPYSGNPD. (builtin_description): Add __builtin_ia32_copysignps and __builtin_ia32_copysignpd. (ix86_builtin_vectorized_function): Handle BUILT_IN_COPYSIGN and BUILT_IN_COPYSIGNF. testsuite/ChangeLog: * gcc.target/i386/sse-copysignf-vec.c: New test. * gcc.target/i386/sse2-copysign-vec.c: Ditto. From-SVN: r149639
This commit is contained in:
parent
8353dd9a26
commit
af766f2db7
@ -1,3 +1,16 @@
|
||||
2009-07-14 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/sse.md (copysign<mode>3): New expander.
|
||||
* config/i386/i386-protos.h (ix86_build_signbit_mask): New prototype.
|
||||
* config/i386/i386.c (ix86_build_signbit_mask): Make public.
|
||||
Use ix86_build_const_vector.
|
||||
(enum ix86_builtins): Add IX86_BUILTIN_CPYSGNPS and
|
||||
IX86_BUILTIN_CPYSGNPD.
|
||||
(builtin_description): Add __builtin_ia32_copysignps and
|
||||
__builtin_ia32_copysignpd.
|
||||
(ix86_builtin_vectorized_function): Handle BUILT_IN_COPYSIGN
|
||||
and BUILT_IN_COPYSIGNF.
|
||||
|
||||
2009-07-13 Jason Merrill <jason@redhat.com>
|
||||
|
||||
* builtins.c (can_trust_pointer_alignment): New fn.
|
||||
@ -16,6 +29,7 @@
|
||||
* dwarf2.out.c (gen_type_die_with_usage): Added comment.
|
||||
|
||||
2009-07-14 Richard Guenther <rguenther@suse.de>
|
||||
Andrey Belevantsev <abel@ispras.ru>
|
||||
|
||||
PR middle-end/40745
|
||||
* cfgexpand.c (partition_stack_vars): Do not bother to update
|
||||
@ -66,13 +80,12 @@
|
||||
|
||||
2009-07-13 Ghassan Shobaki <ghassan.shobaki@amd.com>
|
||||
|
||||
* haifa-sched.c
|
||||
(rank_for_schedule): Introduced flags to enable/disable
|
||||
individual scheduling heuristics.
|
||||
* common.opt: Introduced flags to enable/disable
|
||||
individual heuristics in the scheduler.
|
||||
* doc/invoke.texi: Introduced flags to enable/disable
|
||||
individual heuristics in the scheduler.
|
||||
* haifa-sched.c (rank_for_schedule): Introduced flags to
|
||||
enable/disable individual scheduling heuristics.
|
||||
* common.opt: Introduced flags to enable/disable individual
|
||||
heuristics in the scheduler.
|
||||
* doc/invoke.texi: Introduced flags to enable/disable individual
|
||||
heuristics in the scheduler.
|
||||
|
||||
2009-07-13 Kai Tietz <kai.tietz@onevision.com>
|
||||
|
||||
|
@ -89,6 +89,7 @@ extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
|
||||
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
|
||||
rtx[]);
|
||||
extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
|
||||
extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
|
||||
extern void ix86_split_convert_uns_si_sse (rtx[]);
|
||||
extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
|
||||
extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
|
||||
|
@ -14017,7 +14017,7 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
|
||||
all elements of the vector register. If INVERT is true, then create
|
||||
a mask excluding the sign bit. */
|
||||
|
||||
static rtx
|
||||
rtx
|
||||
ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
|
||||
{
|
||||
enum machine_mode vec_mode, imode;
|
||||
@ -14181,15 +14181,9 @@ ix86_expand_copysign (rtx operands[])
|
||||
op0 = CONST0_RTX (vmode);
|
||||
else
|
||||
{
|
||||
rtvec v;
|
||||
rtx v = ix86_build_const_vector (mode, false, op0);
|
||||
|
||||
if (mode == SFmode)
|
||||
v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
|
||||
CONST0_RTX (SFmode), CONST0_RTX (SFmode));
|
||||
else
|
||||
v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
|
||||
|
||||
op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
|
||||
op0 = force_reg (vmode, v);
|
||||
}
|
||||
}
|
||||
else if (op0 != CONST0_RTX (mode))
|
||||
@ -20910,6 +20904,10 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_FABSQ,
|
||||
IX86_BUILTIN_COPYSIGNQ,
|
||||
|
||||
/* Vectorizer support builtins. */
|
||||
IX86_BUILTIN_CPYSGNPS,
|
||||
IX86_BUILTIN_CPYSGNPD,
|
||||
|
||||
/* SSE5 instructions */
|
||||
IX86_BUILTIN_FMADDSS,
|
||||
IX86_BUILTIN_FMADDSD,
|
||||
@ -21746,6 +21744,8 @@ static const struct builtin_description bdesc_args[] =
|
||||
{ OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
||||
{ OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
||||
|
||||
{ OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
||||
|
||||
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
||||
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
||||
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
||||
@ -21843,6 +21843,8 @@ static const struct builtin_description bdesc_args[] =
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
||||
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
||||
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
||||
@ -25729,6 +25731,18 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
|
||||
return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
|
||||
break;
|
||||
|
||||
case BUILT_IN_COPYSIGN:
|
||||
if (out_mode == DFmode && out_n == 2
|
||||
&& in_mode == DFmode && in_n == 2)
|
||||
return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
|
||||
break;
|
||||
|
||||
case BUILT_IN_COPYSIGNF:
|
||||
if (out_mode == SFmode && out_n == 4
|
||||
&& in_mode == SFmode && in_n == 4)
|
||||
return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
|
||||
break;
|
||||
|
||||
default:
|
||||
;
|
||||
}
|
||||
|
@ -1594,6 +1594,26 @@
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_expand "copysign<mode>3"
|
||||
[(set (match_dup 5)
|
||||
(and:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")
|
||||
(match_dup 3)))
|
||||
(set (match_dup 6)
|
||||
(and:SSEMODEF2P (match_operand:SSEMODEF2P 2 "register_operand" "")
|
||||
(match_dup 4)))
|
||||
(set (match_operand:SSEMODEF2P 0 "register_operand" "")
|
||||
(ior:SSEMODEF2P (match_dup 5) (match_dup 6)))]
|
||||
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 3; i < 7; i++)
|
||||
operands[i] = gen_reg_rtx (<MODE>mode);
|
||||
|
||||
operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 1);
|
||||
operands[4] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
|
||||
})
|
||||
|
||||
;; Also define scalar versions. These are used for abs, neg, and
|
||||
;; conditional move. Using subregs into vector modes causes register
|
||||
;; allocation lossage. These patterns do not allow memory operands
|
||||
|
@ -1,3 +1,8 @@
|
||||
2009-07-14 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* gcc.target/i386/sse-copysignf-vec.c: New test.
|
||||
* gcc.target/i386/sse2-copysign-vec.c: Ditto.
|
||||
|
||||
2009-07-14 Jason Merrill <jason@redhat.com>
|
||||
|
||||
PR c++/37276
|
||||
|
27
gcc/testsuite/gcc.target/i386/sse-copysignf-vec.c
Normal file
27
gcc/testsuite/gcc.target/i386/sse-copysignf-vec.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -msse" } */
|
||||
|
||||
#include "sse-check.h"
|
||||
|
||||
extern float copysignf (float, float);
|
||||
|
||||
#define N 16
|
||||
|
||||
float a[N] = {-0.1f,-3.2f,-6.3f,-9.4f,-12.5f,-15.6f,-18.7f,-21.8f,24.9f,27.1f,30.2f,33.3f,36.4f,39.5f,42.6f,45.7f};
|
||||
float b[N] = {-1.2f,3.4f,-5.6f,7.8f,-9.0f,1.0f,-2.0f,3.0f,-4.0f,-5.0f,6.0f,7.0f,-8.0f,-9.0f,10.0f,11.0f};
|
||||
float r[N];
|
||||
|
||||
static void
|
||||
sse_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
r[i] = copysignf (a[i], b[i]);
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
if (r[i] != copysignf (a[i], b[i]))
|
||||
abort ();
|
||||
}
|
||||
|
27
gcc/testsuite/gcc.target/i386/sse2-copysign-vec.c
Normal file
27
gcc/testsuite/gcc.target/i386/sse2-copysign-vec.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -msse2" } */
|
||||
|
||||
#include "sse2-check.h"
|
||||
|
||||
extern double copysign (double, double);
|
||||
|
||||
#define N 16
|
||||
|
||||
double a[N] = {-0.1,-3.2,-6.3,-9.4,-12.5,-15.6,-18.7,-21.8,24.9,27.1,30.2,33.3,36.4,39.5,42.6,45.7};
|
||||
double b[N] = {-1.2,3.4,-5.6,7.8,-9.0,1.0,-2.0,3.0,-4.0,-5.0,6.0,7.0,-8.0,-9.0,10.0,11.0};
|
||||
double r[N];
|
||||
|
||||
static void
|
||||
sse2_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
r[i] = copysign (a[i], b[i]);
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
if (r[i] != copysign (a[i], b[i]))
|
||||
abort ();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user