backport: re PR target/48605 (gcc.target/i386/sse4_1-insertps-2.c FAILs with -mtune=geode - instruction insertps with memory operands behaves differently)
Backported from 4.6 branch 2011-04-15 Jakub Jelinek <jakub@redhat.com> PR target/48605 * config/i386/sse.md (avx_insertps, sse4_1_insertps): If operands[2] is a MEM, offset it as needed based on top 2 bits in operands[3], change MEM mode to SFmode and mask those 2 bits away from operands[3]. * gcc.target/i386/sse4_1-insertps-3.c: New test. * gcc.target/i386/sse4_1-insertps-4.c: New test. * gcc.target/i386/avx-insertps-3.c: New test. * gcc.target/i386/avx-insertps-4.c: New test. From-SVN: r172583
This commit is contained in:
parent
da1ba202bd
commit
14eb2ba6cd
|
@ -1,3 +1,13 @@
|
|||
2011-04-16 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
Backported from 4.6 branch
|
||||
2011-04-15 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/48605
|
||||
* config/i386/sse.md (avx_insertps, sse4_1_insertps): If operands[2]
|
||||
is a MEM, offset it as needed based on top 2 bits in operands[3],
|
||||
change MEM mode to SFmode and mask those 2 bits away from operands[3].
|
||||
|
||||
2011-04-16 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* BASE-VER: Set to 4.4.7.
|
||||
|
|
|
@ -3655,7 +3655,16 @@
|
|||
(match_operand:SI 3 "const_0_to_255_operand" "n")]
|
||||
UNSPEC_INSERTPS))]
|
||||
"TARGET_AVX"
|
||||
"vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
|
||||
{
|
||||
if (MEM_P (operands[2]))
|
||||
{
|
||||
unsigned count_s = INTVAL (operands[3]) >> 6;
|
||||
if (count_s)
|
||||
operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
|
||||
operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
|
||||
}
|
||||
return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
@ -3667,7 +3676,16 @@
|
|||
(match_operand:SI 3 "const_0_to_255_operand" "n")]
|
||||
UNSPEC_INSERTPS))]
|
||||
"TARGET_SSE4_1"
|
||||
"insertps\t{%3, %2, %0|%0, %2, %3}";
|
||||
{
|
||||
if (MEM_P (operands[2]))
|
||||
{
|
||||
unsigned count_s = INTVAL (operands[3]) >> 6;
|
||||
if (count_s)
|
||||
operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
|
||||
operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
|
||||
}
|
||||
return "insertps\t{%3, %2, %0|%0, %2, %3}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
|
|
@ -1,3 +1,14 @@
|
|||
2011-04-16 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
Backported from 4.6 branch
|
||||
2011-04-15 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/48605
|
||||
* gcc.target/i386/sse4_1-insertps-3.c: New test.
|
||||
* gcc.target/i386/sse4_1-insertps-4.c: New test.
|
||||
* gcc.target/i386/avx-insertps-3.c: New test.
|
||||
* gcc.target/i386/avx-insertps-4.c: New test.
|
||||
|
||||
2011-04-16 Release Manager
|
||||
|
||||
* GCC 4.4.6 released.
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
/* { dg-do run { target ilp32 } } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mfpmath=sse -mavx -mtune=geode" } */
|
||||
|
||||
#define CHECK_H "avx-check.h"
|
||||
#define TEST avx_test
|
||||
|
||||
#include "sse4_1-insertps-3.c"
|
|
@ -0,0 +1,8 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mfpmath=sse -mavx" } */
|
||||
|
||||
#define CHECK_H "avx-check.h"
|
||||
#define TEST avx_test
|
||||
|
||||
#include "sse4_1-insertps-4.c"
|
|
@ -0,0 +1,5 @@
|
|||
/* { dg-do run { target ilp32 } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1 -mtune=geode" } */
|
||||
|
||||
#include "sse4_1-insertps-2.c"
|
|
@ -0,0 +1,92 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse4_1_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define msk0 0x41
|
||||
#define msk1 0x90
|
||||
#define msk2 0xe9
|
||||
#define msk3 0x70
|
||||
|
||||
#define msk4 0xFC
|
||||
#define msk5 0x05
|
||||
#define msk6 0x0A
|
||||
#define msk7 0x0F
|
||||
|
||||
union
|
||||
{
|
||||
__m128 x;
|
||||
float f[4];
|
||||
} val1;
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128 x;
|
||||
float f[4];
|
||||
} res[8], val2, tmp;
|
||||
int masks[8];
|
||||
int i, j;
|
||||
|
||||
val2.f[0] = 55.0;
|
||||
val2.f[1] = 55.0;
|
||||
val2.f[2] = 55.0;
|
||||
val2.f[3] = 55.0;
|
||||
|
||||
val1.f[0] = 1.;
|
||||
val1.f[1] = 2.;
|
||||
val1.f[2] = 3.;
|
||||
val1.f[3] = 4.;
|
||||
|
||||
asm volatile ("" : "+m" (val1));
|
||||
res[0].x = _mm_insert_ps (val2.x, val1.x, msk0);
|
||||
asm volatile ("" : "+m" (val1));
|
||||
res[1].x = _mm_insert_ps (val2.x, val1.x, msk1);
|
||||
asm volatile ("" : "+m" (val1));
|
||||
res[2].x = _mm_insert_ps (val2.x, val1.x, msk2);
|
||||
asm volatile ("" : "+m" (val1));
|
||||
res[3].x = _mm_insert_ps (val2.x, val1.x, msk3);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
masks[2] = msk2;
|
||||
masks[3] = msk3;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
asm volatile ("" : "+m" (val1));
|
||||
res[i + 4].x = _mm_insert_ps (val2.x, val1.x, msk4);
|
||||
}
|
||||
|
||||
masks[4] = msk4;
|
||||
masks[5] = msk4;
|
||||
masks[6] = msk4;
|
||||
masks[7] = msk4;
|
||||
|
||||
for (i=0; i < 8; i++)
|
||||
{
|
||||
tmp = val2;
|
||||
tmp.f[(masks[i] & 0x30) >> 4] = val1.f[(masks[i] & 0xC0) >> 6];
|
||||
|
||||
for (j = 0; j < 4; j++)
|
||||
if (masks[i] & (0x1 << j))
|
||||
tmp.f[j] = 0.f;
|
||||
|
||||
if (memcmp (&res[i], &tmp, sizeof (tmp)))
|
||||
abort ();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue