reload1.c (merge_assigned_reloads): Do not merge a RELOAD_OTHER instruction with a RELOAD_FOR_OPERAND_ADDRESS...
2007-04-24 Hui-May Chang <hm.chang@apple.com> * reload1.c (merge_assigned_reloads) : Do not merge a RELOAD_OTHER instruction with a RELOAD_FOR_OPERAND_ADDRESS instruction. * gcc.target/i386/reload-1.c. New. From-SVN: r124115
This commit is contained in:
parent
84b31797f3
commit
b883ab57fa
|
@ -1,3 +1,8 @@
|
|||
2007-04-24 Hui-May Chang <hm.chang@apple.com>
|
||||
|
||||
* reload1.c (merge_assigned_reloads) : Do not merge a RELOAD_OTHER
|
||||
instruction with a RELOAD_FOR_OPERAND_ADDRESS instruction.
|
||||
|
||||
2007-04-24 Richard Guenther <rguenther@suse.de>
|
||||
Olga Golovanevsky <olga@il.ibm.com>
|
||||
|
||||
|
|
|
@ -6323,15 +6323,23 @@ merge_assigned_reloads (rtx insn)
|
|||
transfer_replacements (i, j);
|
||||
}
|
||||
|
||||
/* If this is now RELOAD_OTHER, look for any reloads that load
|
||||
parts of this operand and set them to RELOAD_FOR_OTHER_ADDRESS
|
||||
if they were for inputs, RELOAD_OTHER for outputs. Note that
|
||||
this test is equivalent to looking for reloads for this operand
|
||||
number. */
|
||||
/* We must take special care with RELOAD_FOR_OUTPUT_ADDRESS; it may
|
||||
share registers with a RELOAD_FOR_INPUT, so we can not change it
|
||||
to RELOAD_FOR_OTHER_ADDRESS. We should never need to, since we
|
||||
do not modify RELOAD_FOR_OUTPUT. */
|
||||
/* If this is now RELOAD_OTHER, look for any reloads that
|
||||
load parts of this operand and set them to
|
||||
RELOAD_FOR_OTHER_ADDRESS if they were for inputs,
|
||||
RELOAD_OTHER for outputs. Note that this test is
|
||||
equivalent to looking for reloads for this operand
|
||||
number.
|
||||
|
||||
We must take special care with RELOAD_FOR_OUTPUT_ADDRESS;
|
||||
it may share registers with a RELOAD_FOR_INPUT, so we can
|
||||
not change it to RELOAD_FOR_OTHER_ADDRESS. We should
|
||||
never need to, since we do not modify RELOAD_FOR_OUTPUT.
|
||||
|
||||
It is possible that the RELOAD_FOR_OPERAND_ADDRESS
|
||||
instruction is assigned the same register as the earlier
|
||||
RELOAD_FOR_OTHER_ADDRESS instruction. Merging these two
|
||||
instructions will cause the RELOAD_FOR_OTHER_ADDRESS
|
||||
instruction to be deleted later on. */
|
||||
|
||||
if (rld[i].when_needed == RELOAD_OTHER)
|
||||
for (j = 0; j < n_reloads; j++)
|
||||
|
@ -6339,6 +6347,7 @@ merge_assigned_reloads (rtx insn)
|
|||
&& rld[j].when_needed != RELOAD_OTHER
|
||||
&& rld[j].when_needed != RELOAD_FOR_OTHER_ADDRESS
|
||||
&& rld[j].when_needed != RELOAD_FOR_OUTPUT_ADDRESS
|
||||
&& rld[j].when_needed != RELOAD_FOR_OPERAND_ADDRESS
|
||||
&& (! conflicting_input
|
||||
|| rld[j].when_needed == RELOAD_FOR_INPUT_ADDRESS
|
||||
|| rld[j].when_needed == RELOAD_FOR_INPADDR_ADDRESS)
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2007-04-24 Hui-May Chang <hm.chang@apple.com>
|
||||
|
||||
* gcc.target/i386/reload-1.c. New.
|
||||
|
||||
2007-04-24 Andrew Pinski <andrew_pinski@playstation.sony.com>
|
||||
Olga Golovanevsky <olga@il.ibm.com>
|
||||
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
/* { dg-do compile { target i?86-*-* } } */
|
||||
/* { dg-options "-O3 -msse2 -fdump-rtl-csa" } */
|
||||
/* { dg-skip-if "" { i?86-*-* } { "-m64" } { "" } } */
|
||||
/* { dg-final { scan-file-not reload-1.c.167r.csa "deleted 1 dead insns" } }*/
|
||||
#include <emmintrin.h>
|
||||
typedef __SIZE_TYPE__ size_t;
|
||||
typedef float vFloat __attribute__ ((__vector_size__ (16)));
|
||||
typedef double vDouble __attribute__ ((__vector_size__ (16)));
|
||||
typedef struct buf
|
||||
{
|
||||
void *data;
|
||||
unsigned long h;
|
||||
unsigned long w;
|
||||
size_t bytes;
|
||||
} buf;
|
||||
|
||||
typedef struct job
|
||||
{
|
||||
struct Job *next;
|
||||
void * info;
|
||||
long (*func)(struct Job *job);
|
||||
long error;
|
||||
} job;
|
||||
|
||||
typedef struct fj
|
||||
{
|
||||
job hd;
|
||||
buf src;
|
||||
buf dest;
|
||||
float g;
|
||||
unsigned int flags;
|
||||
} fj;
|
||||
|
||||
static const double r[256], t[256];
|
||||
|
||||
long bar (const buf *src, const buf *dest, float g, unsigned int flags)
|
||||
{
|
||||
float *d0 = (float*) src->data;
|
||||
float *d1 = (float*) dest->data;
|
||||
uintptr_t w = dest->w;
|
||||
uintptr_t idx;
|
||||
vFloat p0;
|
||||
static const vFloat m0;
|
||||
static const vDouble p[3], m, b;
|
||||
float *sr = d0;
|
||||
float *dr = d1;
|
||||
for( idx = 0; idx + 8 <= w; idx += 8 )
|
||||
{
|
||||
vFloat f0 = _mm_loadu_ps (sr);
|
||||
vFloat f1 = _mm_loadu_ps (sr + 4);
|
||||
sr += 8;
|
||||
vFloat fa0 = _mm_andnot_ps (m0, f0);
|
||||
vFloat fa1 = _mm_andnot_ps (m0, f1);
|
||||
vDouble v0 = _mm_cvtps_pd (fa0);
|
||||
vDouble v1 = _mm_cvtps_pd (_mm_movehl_ps (fa0, fa0));
|
||||
vDouble v2 = _mm_cvtps_pd (fa1);
|
||||
vDouble v3 = _mm_cvtps_pd (_mm_movehl_ps (fa1, fa1));
|
||||
vDouble vi0, vi1, vi2, vi3;
|
||||
__m128i b0, b1, b2, b3;
|
||||
b0 = _mm_packs_epi32 (_mm_packs_epi32 (b0, b1), _mm_packs_epi32 (b2, b3));
|
||||
b1 = _mm_srli_epi64 (b0, 32);
|
||||
unsigned int i0 = _mm_cvtsi128_si32 (b0);
|
||||
unsigned int i2 = _mm_cvtsi128_si32 (b1);
|
||||
v0 -= _mm_loadh_pd (_mm_load_sd (r + (i0 & 0xff)), r + (i0 >> 16));
|
||||
v1 -= _mm_loadh_pd (_mm_load_sd (r + (i2 & 0xff)), r + (i2 >> 16));
|
||||
b0 = _mm_unpackhi_epi64 (b0, b0);
|
||||
b1 = _mm_unpackhi_epi64 (b1, b1);
|
||||
unsigned int i4 = _mm_cvtsi128_si32 (b0);
|
||||
unsigned int i6 = _mm_cvtsi128_si32 (b1);
|
||||
v2 -= _mm_loadh_pd (_mm_load_sd (r + (i4 & 0xff)), r + (i4 >> 16));
|
||||
v3 -= _mm_loadh_pd (_mm_load_sd (r + (i6 & 0xff)), r + (i6 >> 16));
|
||||
v0 = p[0] + (p[1] + p[2] * v0) * v0;
|
||||
v1 = p[0] + (p[1] + p[2] * v1) * v1;
|
||||
v2 = p[0] + (p[1] + p[2] * v2) * v2;
|
||||
v3 = p[0] + (p[1] + p[2] * v3) * v3;
|
||||
vi0 = (vDouble) _mm_slli_epi64 ((__m128i)((vi0 + b) + m), 52);
|
||||
vi1 = (vDouble) _mm_slli_epi64 ((__m128i)((vi1 + b) + m), 52);
|
||||
vi2 = (vDouble) _mm_slli_epi64 ((__m128i)((vi2 + b) + m), 52);
|
||||
vi3 = (vDouble) _mm_slli_epi64 ((__m128i)((vi3 + b) + m), 52);
|
||||
vi0 *= _mm_loadh_pd (_mm_load_sd (t + (i0 & 0xff)), t + (i0 >> 16));
|
||||
vi1 *= _mm_loadh_pd (_mm_load_sd (t + (i2 & 0xff)), t + (i2 >> 16));
|
||||
vi2 *= _mm_loadh_pd (_mm_load_sd (t + (i4 & 0xff)), t + (i4 >> 16));
|
||||
vi3 *= _mm_loadh_pd (_mm_load_sd (t + (i6 & 0xff)), t + (i6 >> 16));
|
||||
v0 *= vi0;
|
||||
v1 *= vi1;
|
||||
v2 *= vi2;
|
||||
v3 *= vi3;
|
||||
vFloat r0 = _mm_movelh_ps (_mm_cvtpd_ps( v0 ), _mm_cvtpd_ps (v1));
|
||||
vFloat r1 = _mm_movelh_ps (_mm_cvtpd_ps( v2 ), _mm_cvtpd_ps (v3));
|
||||
vFloat z0 = _mm_cmpeq_ps (f0, _mm_setzero_ps());
|
||||
vFloat z1 = _mm_cmpeq_ps (f1, _mm_setzero_ps());
|
||||
r0 = _mm_andnot_ps (z0, r0);
|
||||
r1 = _mm_andnot_ps (z1, r1);
|
||||
z0 = _mm_and_ps (z0, p0);
|
||||
z1 = _mm_and_ps (z1, p0);
|
||||
r0 = _mm_or_ps (r0, z0);
|
||||
r1 = _mm_or_ps (r1, z1);
|
||||
_mm_storeu_ps (dr, r0);
|
||||
_mm_storeu_ps (dr + 4, r1);
|
||||
dr += 8;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
long foo (job *j )
|
||||
{
|
||||
fj *jd = (fj*) j;
|
||||
return bar (&jd->src, &jd->dest, jd->g, jd->flags);
|
||||
}
|
Loading…
Reference in New Issue