257 lines
6.1 KiB
C
257 lines
6.1 KiB
C
/* PR tree-optimization/71488 */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-require-effective-target vect_pack_trunc } */
|
|
/* { dg-additional-options "-msse4" { target sse4_runtime } } */
|
|
|
|
#include "tree-vect.h"
|
|
|
|
int i1, i2;
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn1 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn2 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn3 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn4 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn5 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn6 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn7 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn8 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn9 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn10 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn11 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn12 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn13 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn14 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn15 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn16 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn17 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
void __attribute__((noclone,noinline))
|
|
fn18 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < size; i++)
|
|
p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
|
}
|
|
|
|
int eq (int i1, int i2) { return i1 == i2; }
|
|
int ne (int i1, int i2) { return i1 != i2; }
|
|
int lt (int i1, int i2) { return i1 < i2; }
|
|
int le (int i1, int i2) { return i1 <= i2; }
|
|
int gt (int i1, int i2) { return i1 > i2; }
|
|
int ge (int i1, int i2) { return i1 >= i2; }
|
|
|
|
typedef int (*cmp_fn)(int, int);
|
|
|
|
void
|
|
check (int *p, cmp_fn fn)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
int t1 = ((i % 4) > 1) == 0;
|
|
int t2 = (i % 2) == 0;
|
|
int res = fn (t1, t2) + t1;
|
|
if (p[i] != res)
|
|
__builtin_abort ();
|
|
}
|
|
}
|
|
|
|
int
|
|
main (int argc, char **argv)
|
|
{
|
|
int i1[32], i2[32], res[32];
|
|
short s2[32];
|
|
long long l2[32];
|
|
int i;
|
|
|
|
check_vect ();
|
|
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
l2[i] = i2[i] = s2[i] = i % 2;
|
|
i1[i] = (i % 4) > 1;
|
|
asm ("":::"memory");
|
|
}
|
|
|
|
fn1 (res, i1, i2, 32);
|
|
check (res, gt);
|
|
fn2 (res, i1, s2, 32);
|
|
check (res, gt);
|
|
fn3 (res, i1, l2, 32);
|
|
check (res, gt);
|
|
|
|
fn4 (res, i1, i2, 32);
|
|
check (res, ge);
|
|
fn5 (res, i1, s2, 32);
|
|
check (res, ge);
|
|
fn6 (res, i1, l2, 32);
|
|
check (res, ge);
|
|
|
|
fn7 (res, i1, i2, 32);
|
|
check (res, lt);
|
|
fn8 (res, i1, s2, 32);
|
|
check (res, lt);
|
|
fn9 (res, i1, l2, 32);
|
|
check (res, lt);
|
|
|
|
fn10 (res, i1, i2, 32);
|
|
check (res, le);
|
|
fn11 (res, i1, s2, 32);
|
|
check (res, le);
|
|
fn12 (res, i1, l2, 32);
|
|
check (res, le);
|
|
|
|
fn13 (res, i1, i2, 32);
|
|
check (res, eq);
|
|
fn14 (res, i1, s2, 32);
|
|
check (res, eq);
|
|
fn15 (res, i1, l2, 32);
|
|
check (res, eq);
|
|
|
|
fn16 (res, i1, i2, 32);
|
|
check (res, ne);
|
|
fn17 (res, i1, s2, 32);
|
|
check (res, ne);
|
|
fn18 (res, i1, l2, 32);
|
|
check (res, ne);
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "VECTORIZED" 18 "vect" { target sse4_runtime } } } */
|