From 904e4b8d32159b43f5edba2f4723fa2782925d0a Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sat, 8 Mar 2008 13:43:13 +0100 Subject: [PATCH] re PR target/22152 (Poor loop optimization when using mmx builtins) PR target/22152 * gcc.target/i386/pr22152.c: New test. * gcc.target/i386/sse2-mmx.c: Ditto. From-SVN: r133034 --- gcc/testsuite/ChangeLog | 6 ++ gcc/testsuite/gcc.target/i386/pr22152.c | 18 ++++++ gcc/testsuite/gcc.target/i386/sse2-mmx.c | 75 ++++++++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr22152.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx.c diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8a8c5edadf5..06e36a74ab6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2008-03-08 Uros Bizjak + + PR target/22152 + * gcc.target/i386/pr22152.c: New test. + * gcc.target/i386/sse2-mmx.c: Ditto. + 2008-03-08 Eric Botcazou * gnat.dg/uninit_func.adb: New test. diff --git a/gcc/testsuite/gcc.target/i386/pr22152.c b/gcc/testsuite/gcc.target/i386/pr22152.c new file mode 100644 index 00000000000..d12597703ea --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr22152.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +#include + +__m64 +unsigned_add3 (const __m64 * a, const __m64 * b, unsigned long count) +{ + __m64 sum; + unsigned int i; + + for (i = 1; i < count; i++) + sum = _mm_add_si64 (a[i], b[i]); + + return sum; +} + +/* { dg-final { scan-assembler-times "movq\[ \\t\]+.*%mm" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx.c b/gcc/testsuite/gcc.target/i386/sse2-mmx.c new file mode 100644 index 00000000000..cc2381e04ab --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx.c @@ -0,0 +1,75 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse2" } */ + +#include "sse2-check.h" + +#include + +#define N 4 + +unsigned long long a[N], b[N], result[N]; + +unsigned long long check[N] = + { 0x101010101010100full, + 0x1010101010101010ull, + 0x1010101010101010ull, + 0x1010101010101010ull }; + +__m64 +unsigned_add3 (const __m64 * a, const __m64 * b, + __m64 * result, unsigned int count) +{ + __m64 _a, _b, one, sum, carry, onesCarry; + + unsigned int i; + + one = _mm_cmpeq_pi8 (_a, _a); + one = _mm_sub_si64 (_mm_xor_si64 (one, one), one); + + carry = _mm_xor_si64 (one, one); + + for (i = 0; i < count; i++) + { + _a = a[i]; + _b = b[i]; + + sum = _mm_add_si64 (_a, _b); + sum = _mm_add_si64 (sum, carry); + + result[i] = sum; + + onesCarry = _mm_and_si64 (_mm_xor_si64 (_a, _b), carry); + onesCarry = _mm_or_si64 (_mm_and_si64 (_a, _b), onesCarry); + onesCarry = _mm_and_si64 (onesCarry, one); + + _a = _mm_srli_si64 (_a, 1); + _b = _mm_srli_si64 (_b, 1); + + carry = _mm_add_si64 (_mm_add_si64 (_a, _b), onesCarry); + carry = _mm_srli_si64 (carry, 63); + } + + _mm_empty (); + return carry; +} + +void __attribute__((noinline)) +sse2_test (void) +{ + unsigned long long carry; + int i; + + /* Really long numbers. */ + a[3] = a[2] = a[1] = a[0] = 0xd3d3d3d3d3d3d3d3ull; + b[3] = b[2] = b[1] = b[0] = 0x3c3c3c3c3c3c3c3cull; + + carry = (unsigned long long) unsigned_add3 + ((__m64 *)a, (__m64 *)b, (__m64 *)result, N); + + if (carry != 1) + abort (); + + for (i = 0; i < N; i++) + if (result [i] != check[i]) + abort (); +}