From b883ab57fad6a9e6b0a2b40f3c512612bf3192ee Mon Sep 17 00:00:00 2001 From: Hui-May Chang Date: Tue, 24 Apr 2007 19:17:22 +0000 Subject: [PATCH] reload1.c (merge_assigned_reloads): Do not merge a RELOAD_OTHER instruction with a RELOAD_FOR_OPERAND_ADDRESS... 2007-04-24 Hui-May Chang * reload1.c (merge_assigned_reloads) : Do not merge a RELOAD_OTHER instruction with a RELOAD_FOR_OPERAND_ADDRESS instruction. * gcc.target/i386/reload-1.c. New. From-SVN: r124115 --- gcc/ChangeLog | 5 ++ gcc/reload1.c | 27 ++++-- gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.target/i386/reload-1.c | 109 +++++++++++++++++++++++ 4 files changed, 136 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/reload-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9ab42d9a6af..451472e8da8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2007-04-24 Hui-May Chang + + * reload1.c (merge_assigned_reloads) : Do not merge a RELOAD_OTHER + instruction with a RELOAD_FOR_OPERAND_ADDRESS instruction. + 2007-04-24 Richard Guenther Olga Golovanevsky diff --git a/gcc/reload1.c b/gcc/reload1.c index f29f0799140..9ee046ae1ec 100644 --- a/gcc/reload1.c +++ b/gcc/reload1.c @@ -6323,15 +6323,23 @@ merge_assigned_reloads (rtx insn) transfer_replacements (i, j); } - /* If this is now RELOAD_OTHER, look for any reloads that load - parts of this operand and set them to RELOAD_FOR_OTHER_ADDRESS - if they were for inputs, RELOAD_OTHER for outputs. Note that - this test is equivalent to looking for reloads for this operand - number. */ - /* We must take special care with RELOAD_FOR_OUTPUT_ADDRESS; it may - share registers with a RELOAD_FOR_INPUT, so we can not change it - to RELOAD_FOR_OTHER_ADDRESS. We should never need to, since we - do not modify RELOAD_FOR_OUTPUT. */ + /* If this is now RELOAD_OTHER, look for any reloads that + load parts of this operand and set them to + RELOAD_FOR_OTHER_ADDRESS if they were for inputs, + RELOAD_OTHER for outputs. Note that this test is + equivalent to looking for reloads for this operand + number. + + We must take special care with RELOAD_FOR_OUTPUT_ADDRESS; + it may share registers with a RELOAD_FOR_INPUT, so we can + not change it to RELOAD_FOR_OTHER_ADDRESS. We should + never need to, since we do not modify RELOAD_FOR_OUTPUT. + + It is possible that the RELOAD_FOR_OPERAND_ADDRESS + instruction is assigned the same register as the earlier + RELOAD_FOR_OTHER_ADDRESS instruction. Merging these two + instructions will cause the RELOAD_FOR_OTHER_ADDRESS + instruction to be deleted later on. */ if (rld[i].when_needed == RELOAD_OTHER) for (j = 0; j < n_reloads; j++) @@ -6339,6 +6347,7 @@ merge_assigned_reloads (rtx insn) && rld[j].when_needed != RELOAD_OTHER && rld[j].when_needed != RELOAD_FOR_OTHER_ADDRESS && rld[j].when_needed != RELOAD_FOR_OUTPUT_ADDRESS + && rld[j].when_needed != RELOAD_FOR_OPERAND_ADDRESS && (! conflicting_input || rld[j].when_needed == RELOAD_FOR_INPUT_ADDRESS || rld[j].when_needed == RELOAD_FOR_INPADDR_ADDRESS) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7980e53297c..59c10fe0cc5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2007-04-24 Hui-May Chang + + * gcc.target/i386/reload-1.c. New. + 2007-04-24 Andrew Pinski Olga Golovanevsky diff --git a/gcc/testsuite/gcc.target/i386/reload-1.c b/gcc/testsuite/gcc.target/i386/reload-1.c new file mode 100644 index 00000000000..7fcb5b421af --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/reload-1.c @@ -0,0 +1,109 @@ +/* { dg-do compile { target i?86-*-* } } */ +/* { dg-options "-O3 -msse2 -fdump-rtl-csa" } */ +/* { dg-skip-if "" { i?86-*-* } { "-m64" } { "" } } */ +/* { dg-final { scan-file-not reload-1.c.167r.csa "deleted 1 dead insns" } }*/ +#include +typedef __SIZE_TYPE__ size_t; +typedef float vFloat __attribute__ ((__vector_size__ (16))); +typedef double vDouble __attribute__ ((__vector_size__ (16))); +typedef struct buf +{ + void *data; + unsigned long h; + unsigned long w; + size_t bytes; +} buf; + +typedef struct job +{ + struct Job *next; + void * info; + long (*func)(struct Job *job); + long error; +} job; + +typedef struct fj +{ + job hd; + buf src; + buf dest; + float g; + unsigned int flags; +} fj; + +static const double r[256], t[256]; + +long bar (const buf *src, const buf *dest, float g, unsigned int flags) +{ + float *d0 = (float*) src->data; + float *d1 = (float*) dest->data; + uintptr_t w = dest->w; + uintptr_t idx; + vFloat p0; + static const vFloat m0; + static const vDouble p[3], m, b; + float *sr = d0; + float *dr = d1; + for( idx = 0; idx + 8 <= w; idx += 8 ) + { + vFloat f0 = _mm_loadu_ps (sr); + vFloat f1 = _mm_loadu_ps (sr + 4); + sr += 8; + vFloat fa0 = _mm_andnot_ps (m0, f0); + vFloat fa1 = _mm_andnot_ps (m0, f1); + vDouble v0 = _mm_cvtps_pd (fa0); + vDouble v1 = _mm_cvtps_pd (_mm_movehl_ps (fa0, fa0)); + vDouble v2 = _mm_cvtps_pd (fa1); + vDouble v3 = _mm_cvtps_pd (_mm_movehl_ps (fa1, fa1)); + vDouble vi0, vi1, vi2, vi3; + __m128i b0, b1, b2, b3; + b0 = _mm_packs_epi32 (_mm_packs_epi32 (b0, b1), _mm_packs_epi32 (b2, b3)); + b1 = _mm_srli_epi64 (b0, 32); + unsigned int i0 = _mm_cvtsi128_si32 (b0); + unsigned int i2 = _mm_cvtsi128_si32 (b1); + v0 -= _mm_loadh_pd (_mm_load_sd (r + (i0 & 0xff)), r + (i0 >> 16)); + v1 -= _mm_loadh_pd (_mm_load_sd (r + (i2 & 0xff)), r + (i2 >> 16)); + b0 = _mm_unpackhi_epi64 (b0, b0); + b1 = _mm_unpackhi_epi64 (b1, b1); + unsigned int i4 = _mm_cvtsi128_si32 (b0); + unsigned int i6 = _mm_cvtsi128_si32 (b1); + v2 -= _mm_loadh_pd (_mm_load_sd (r + (i4 & 0xff)), r + (i4 >> 16)); + v3 -= _mm_loadh_pd (_mm_load_sd (r + (i6 & 0xff)), r + (i6 >> 16)); + v0 = p[0] + (p[1] + p[2] * v0) * v0; + v1 = p[0] + (p[1] + p[2] * v1) * v1; + v2 = p[0] + (p[1] + p[2] * v2) * v2; + v3 = p[0] + (p[1] + p[2] * v3) * v3; + vi0 = (vDouble) _mm_slli_epi64 ((__m128i)((vi0 + b) + m), 52); + vi1 = (vDouble) _mm_slli_epi64 ((__m128i)((vi1 + b) + m), 52); + vi2 = (vDouble) _mm_slli_epi64 ((__m128i)((vi2 + b) + m), 52); + vi3 = (vDouble) _mm_slli_epi64 ((__m128i)((vi3 + b) + m), 52); + vi0 *= _mm_loadh_pd (_mm_load_sd (t + (i0 & 0xff)), t + (i0 >> 16)); + vi1 *= _mm_loadh_pd (_mm_load_sd (t + (i2 & 0xff)), t + (i2 >> 16)); + vi2 *= _mm_loadh_pd (_mm_load_sd (t + (i4 & 0xff)), t + (i4 >> 16)); + vi3 *= _mm_loadh_pd (_mm_load_sd (t + (i6 & 0xff)), t + (i6 >> 16)); + v0 *= vi0; + v1 *= vi1; + v2 *= vi2; + v3 *= vi3; + vFloat r0 = _mm_movelh_ps (_mm_cvtpd_ps( v0 ), _mm_cvtpd_ps (v1)); + vFloat r1 = _mm_movelh_ps (_mm_cvtpd_ps( v2 ), _mm_cvtpd_ps (v3)); + vFloat z0 = _mm_cmpeq_ps (f0, _mm_setzero_ps()); + vFloat z1 = _mm_cmpeq_ps (f1, _mm_setzero_ps()); + r0 = _mm_andnot_ps (z0, r0); + r1 = _mm_andnot_ps (z1, r1); + z0 = _mm_and_ps (z0, p0); + z1 = _mm_and_ps (z1, p0); + r0 = _mm_or_ps (r0, z0); + r1 = _mm_or_ps (r1, z1); + _mm_storeu_ps (dr, r0); + _mm_storeu_ps (dr + 4, r1); + dr += 8; + } + return 0; +} + +long foo (job *j ) +{ + fj *jd = (fj*) j; + return bar (&jd->src, &jd->dest, jd->g, jd->flags); +}