rs6000.c (special_handling_values): Add SH_EXTRACT.
[gcc] 2014-09-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * config/rs6000/rs6000.c (special_handling_values): Add SH_EXTRACT. (rtx_is_swappable_p): Look for patterns with a VEC_SELECT, perhaps wrapped in a VEC_DUPLICATE, representing an extract. Mark these as swappable with special handling SH_EXTRACT. Remove UNSPEC_VSX_XXSPLTW from the list of disallowed unspecs for the optimization. (adjust_extract): New function. (handle_special_swappables): Add default to case statement; add case for SH_EXTRACT that calls adjust_extract. (dump_swap_insn_table): Handle SH_EXTRACT. [gcc/testsuite] 2014-09-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * gcc.target/powerpc/swaps-p8-13.c: New test. * gcc.target/powerpc/swaps-p8-14.c: New test. * gcc.target/powerpc/swaps-p8-15.c: New test. From-SVN: r214903
This commit is contained in:
parent
2b3106ecef
commit
6bf5ce8f85
|
@ -1,3 +1,17 @@
|
||||||
|
2014-09-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||||
|
|
||||||
|
* config/rs6000/rs6000.c (special_handling_values): Add
|
||||||
|
SH_EXTRACT.
|
||||||
|
(rtx_is_swappable_p): Look for patterns with a VEC_SELECT, perhaps
|
||||||
|
wrapped in a VEC_DUPLICATE, representing an extract. Mark these
|
||||||
|
as swappable with special handling SH_EXTRACT. Remove
|
||||||
|
UNSPEC_VSX_XXSPLTW from the list of disallowed unspecs for the
|
||||||
|
optimization.
|
||||||
|
(adjust_extract): New function.
|
||||||
|
(handle_special_swappables): Add default to case statement; add
|
||||||
|
case for SH_EXTRACT that calls adjust_extract.
|
||||||
|
(dump_swap_insn_table): Handle SH_EXTRACT.
|
||||||
|
|
||||||
2014-09-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
2014-09-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||||
|
|
||||||
* config/rs6000/vsx.md (*vsx_extract_<mode>_load): Always match
|
* config/rs6000/vsx.md (*vsx_extract_<mode>_load): Always match
|
||||||
|
|
|
@ -33523,7 +33523,8 @@ enum special_handling_values {
|
||||||
SH_CONST_VECTOR,
|
SH_CONST_VECTOR,
|
||||||
SH_SUBREG,
|
SH_SUBREG,
|
||||||
SH_NOSWAP_LD,
|
SH_NOSWAP_LD,
|
||||||
SH_NOSWAP_ST
|
SH_NOSWAP_ST,
|
||||||
|
SH_EXTRACT
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Union INSN with all insns containing definitions that reach USE.
|
/* Union INSN with all insns containing definitions that reach USE.
|
||||||
|
@ -33665,6 +33666,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
|
||||||
{
|
{
|
||||||
enum rtx_code code = GET_CODE (op);
|
enum rtx_code code = GET_CODE (op);
|
||||||
int i, j;
|
int i, j;
|
||||||
|
rtx parallel;
|
||||||
|
|
||||||
switch (code)
|
switch (code)
|
||||||
{
|
{
|
||||||
|
@ -33675,7 +33677,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
case VEC_CONCAT:
|
case VEC_CONCAT:
|
||||||
case VEC_SELECT:
|
|
||||||
case ASM_INPUT:
|
case ASM_INPUT:
|
||||||
case ASM_OPERANDS:
|
case ASM_OPERANDS:
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -33693,6 +33694,28 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
|
||||||
handling. */
|
handling. */
|
||||||
if (GET_CODE (XEXP (op, 0)) == CONST_INT)
|
if (GET_CODE (XEXP (op, 0)) == CONST_INT)
|
||||||
return 1;
|
return 1;
|
||||||
|
else if (GET_CODE (XEXP (op, 0)) == REG
|
||||||
|
&& GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
|
||||||
|
/* This catches V2DF and V2DI splat, at a minimum. */
|
||||||
|
return 1;
|
||||||
|
else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
|
||||||
|
/* If the duplicated item is from a select, defer to the select
|
||||||
|
processing to see if we can change the lane for the splat. */
|
||||||
|
return rtx_is_swappable_p (XEXP (op, 0), special);
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
case VEC_SELECT:
|
||||||
|
/* A vec_extract operation is ok if we change the lane. */
|
||||||
|
if (GET_CODE (XEXP (op, 0)) == REG
|
||||||
|
&& GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
|
||||||
|
&& GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
|
||||||
|
&& XVECLEN (parallel, 0) == 1
|
||||||
|
&& GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
|
||||||
|
{
|
||||||
|
*special = SH_EXTRACT;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -33738,7 +33761,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
|
||||||
|| val == UNSPEC_VSX_CVSPDPN
|
|| val == UNSPEC_VSX_CVSPDPN
|
||||||
|| val == UNSPEC_VSX_SET
|
|| val == UNSPEC_VSX_SET
|
||||||
|| val == UNSPEC_VSX_SLDWI
|
|| val == UNSPEC_VSX_SLDWI
|
||||||
|| val == UNSPEC_VSX_XXSPLTW
|
|
||||||
|| val == UNSPEC_VUNPACK_HI_SIGN
|
|| val == UNSPEC_VUNPACK_HI_SIGN
|
||||||
|| val == UNSPEC_VUNPACK_HI_SIGN_DIRECT
|
|| val == UNSPEC_VUNPACK_HI_SIGN_DIRECT
|
||||||
|| val == UNSPEC_VUNPACK_LO_SIGN
|
|| val == UNSPEC_VUNPACK_LO_SIGN
|
||||||
|
@ -34076,6 +34098,27 @@ permute_store (rtx_insn *insn)
|
||||||
INSN_UID (insn));
|
INSN_UID (insn));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Given OP that contains a vector extract operation, change the index
|
||||||
|
of the extracted lane to count from the other side of the vector. */
|
||||||
|
static void
|
||||||
|
adjust_extract (rtx_insn *insn)
|
||||||
|
{
|
||||||
|
rtx body = PATTERN (insn);
|
||||||
|
/* The vec_select may be wrapped in a vec_duplicate for a splat, so
|
||||||
|
account for that. */
|
||||||
|
rtx sel = (GET_CODE (body) == VEC_DUPLICATE
|
||||||
|
? XEXP (XEXP (body, 0), 1)
|
||||||
|
: XEXP (body, 1));
|
||||||
|
rtx par = XEXP (sel, 1);
|
||||||
|
int nunits = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0)));
|
||||||
|
XVECEXP (par, 0, 0) = GEN_INT (nunits - 1 - INTVAL (XVECEXP (par, 0, 0)));
|
||||||
|
INSN_CODE (insn) = -1; /* Force re-recognition. */
|
||||||
|
df_insn_rescan (insn);
|
||||||
|
|
||||||
|
if (dump_file)
|
||||||
|
fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
|
||||||
|
}
|
||||||
|
|
||||||
/* The insn described by INSN_ENTRY[I] can be swapped, but only
|
/* The insn described by INSN_ENTRY[I] can be swapped, but only
|
||||||
with special handling. Take care of that here. */
|
with special handling. Take care of that here. */
|
||||||
static void
|
static void
|
||||||
|
@ -34086,6 +34129,8 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
|
||||||
|
|
||||||
switch (insn_entry[i].special_handling)
|
switch (insn_entry[i].special_handling)
|
||||||
{
|
{
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
case SH_CONST_VECTOR:
|
case SH_CONST_VECTOR:
|
||||||
{
|
{
|
||||||
/* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
|
/* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
|
||||||
|
@ -34112,6 +34157,9 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
|
||||||
/* Convert a non-permuting store to a permuting one. */
|
/* Convert a non-permuting store to a permuting one. */
|
||||||
permute_store (insn);
|
permute_store (insn);
|
||||||
break;
|
break;
|
||||||
|
case SH_EXTRACT:
|
||||||
|
/* Change the lane on an extract operation. */
|
||||||
|
adjust_extract (insn);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34180,6 +34228,8 @@ dump_swap_insn_table (swap_web_entry *insn_entry)
|
||||||
fputs ("special:load ", dump_file);
|
fputs ("special:load ", dump_file);
|
||||||
else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
|
else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
|
||||||
fputs ("special:store ", dump_file);
|
fputs ("special:store ", dump_file);
|
||||||
|
else if (insn_entry[i].special_handling == SH_EXTRACT)
|
||||||
|
fputs ("special:extract ", dump_file);
|
||||||
}
|
}
|
||||||
if (insn_entry[i].web_not_optimizable)
|
if (insn_entry[i].web_not_optimizable)
|
||||||
fputs ("unoptimizable ", dump_file);
|
fputs ("unoptimizable ", dump_file);
|
||||||
|
|
|
@ -1,3 +1,9 @@
|
||||||
|
2014-09-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||||
|
|
||||||
|
* gcc.target/powerpc/swaps-p8-13.c: New test.
|
||||||
|
* gcc.target/powerpc/swaps-p8-14.c: New test.
|
||||||
|
* gcc.target/powerpc/swaps-p8-15.c: New test.
|
||||||
|
|
||||||
2014-09-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
2014-09-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||||
|
|
||||||
* gcc.target/powerpc/vsx-extract-1.c: Test 0th doubleword
|
* gcc.target/powerpc/vsx-extract-1.c: Test 0th doubleword
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
/* { dg-do run { target { powerpc64le-*-* } } } */
|
||||||
|
/* { dg-options "-mcpu=power8 -O3" } */
|
||||||
|
|
||||||
|
#include <altivec.h>
|
||||||
|
void abort ();
|
||||||
|
|
||||||
|
#define N 4096
|
||||||
|
long long ca[N] __attribute__((aligned(16)));
|
||||||
|
long long cb[N] __attribute__((aligned(16)));
|
||||||
|
long long cc[N] __attribute__((aligned(16)));
|
||||||
|
long long cd[N] __attribute__((aligned(16)));
|
||||||
|
long long x;
|
||||||
|
|
||||||
|
__attribute__((noinline)) void foo ()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
vector long long va, vb, vc, vd, tmp;
|
||||||
|
volatile unsigned long long three = 3;
|
||||||
|
vector unsigned long long threes = vec_splats (three);
|
||||||
|
for (i = 0; i < N; i+=2) {
|
||||||
|
vb = vec_vsx_ld (0, (vector long long *)&cb[i]);
|
||||||
|
vc = vec_vsx_ld (0, (vector long long *)&cc[i]);
|
||||||
|
vd = vec_vsx_ld (0, (vector long long *)&cd[i]);
|
||||||
|
tmp = vec_add (vb, vc);
|
||||||
|
tmp = vec_sub (tmp, vd);
|
||||||
|
tmp = vec_sra (tmp, threes);
|
||||||
|
x = vec_extract (tmp, 0);
|
||||||
|
vec_vsx_st (tmp, 0, (vector long long *)&ca[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((noinline)) void init ()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < N; ++i) {
|
||||||
|
cb[i] = 3 * i - 2048;
|
||||||
|
cc[i] = -5 * i + 93;
|
||||||
|
cd[i] = i + 14;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main ()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
init ();
|
||||||
|
foo ();
|
||||||
|
for (i = 0; i < N; ++i)
|
||||||
|
if (ca[i] != (-3 * i - 1969) >> 3)
|
||||||
|
abort ();
|
||||||
|
if (x != ca[N-1])
|
||||||
|
abort ();
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
/* { dg-do compile { target { powerpc64le-*-* } } } */
|
||||||
|
/* { dg-options "-mcpu=power8 -O3" } */
|
||||||
|
/* { dg-final { scan-assembler "lxvd2x" } } */
|
||||||
|
/* { dg-final { scan-assembler "stxvd2x" } } */
|
||||||
|
/* { dg-final { scan-assembler "stxsdx" } } */
|
||||||
|
/* { dg-final { scan-assembler-times "xxpermdi" 1 } } */
|
||||||
|
|
||||||
|
/* The only xxpermdi expected is for the vec_splats. */
|
||||||
|
|
||||||
|
#include <altivec.h>
|
||||||
|
void abort ();
|
||||||
|
|
||||||
|
#define N 4096
|
||||||
|
long long ca[N] __attribute__((aligned(16)));
|
||||||
|
long long cb[N] __attribute__((aligned(16)));
|
||||||
|
long long cc[N] __attribute__((aligned(16)));
|
||||||
|
long long cd[N] __attribute__((aligned(16)));
|
||||||
|
long long x;
|
||||||
|
|
||||||
|
__attribute__((noinline)) void foo ()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
vector long long va, vb, vc, vd, tmp;
|
||||||
|
volatile unsigned long long three = 3;
|
||||||
|
vector unsigned long long threes = vec_splats (three);
|
||||||
|
for (i = 0; i < N; i+=2) {
|
||||||
|
vb = vec_vsx_ld (0, (vector long long *)&cb[i]);
|
||||||
|
vc = vec_vsx_ld (0, (vector long long *)&cc[i]);
|
||||||
|
vd = vec_vsx_ld (0, (vector long long *)&cd[i]);
|
||||||
|
tmp = vec_add (vb, vc);
|
||||||
|
tmp = vec_sub (tmp, vd);
|
||||||
|
tmp = vec_sra (tmp, threes);
|
||||||
|
x = vec_extract (tmp, 0);
|
||||||
|
vec_vsx_st (tmp, 0, (vector long long *)&ca[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main ()
|
||||||
|
{
|
||||||
|
foo ();
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,49 @@
|
||||||
|
/* { dg-do compile { target { powerpc64le-*-* } } } */
|
||||||
|
/* { dg-options "-mcpu=power8 -O3" } */
|
||||||
|
/* { dg-final { scan-assembler "lxvd2x" } } */
|
||||||
|
/* { dg-final { scan-assembler "stxvd2x" } } */
|
||||||
|
/* { dg-final { scan-assembler "xxspltw" } } */
|
||||||
|
/* { dg-final { scan-assembler-not "xxpermdi" } } */
|
||||||
|
|
||||||
|
#include <altivec.h>
|
||||||
|
void abort();
|
||||||
|
|
||||||
|
typedef struct xx {vector double l; vector double h;} xx;
|
||||||
|
|
||||||
|
#define N 4096
|
||||||
|
#define M 10000000
|
||||||
|
vector float ca[N][4] = {0};
|
||||||
|
vector float cb[N][4] = {0};
|
||||||
|
vector float cc[N][4] = {0};
|
||||||
|
|
||||||
|
__attribute__((noinline)) void foo ()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < N; i++) {
|
||||||
|
cc[i][0] = vec_mul(vec_splats(cb[i][0][0]), ca[i][0]);
|
||||||
|
cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][1]), ca[i][1]);
|
||||||
|
cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][2]), ca[i][2]);
|
||||||
|
cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][3]), ca[i][3]);
|
||||||
|
|
||||||
|
cc[i][1] = vec_mul(vec_splats(cb[i][1][0]), ca[i][0]);
|
||||||
|
cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][1]), ca[i][1]);
|
||||||
|
cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][2]), ca[i][2]);
|
||||||
|
cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][3]), ca[i][3]);
|
||||||
|
|
||||||
|
cc[i][2] = vec_mul(vec_splats(cb[i][2][0]), ca[i][0]);
|
||||||
|
cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][1]), ca[i][1]);
|
||||||
|
cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][2]), ca[i][2]);
|
||||||
|
cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][3]), ca[i][3]);
|
||||||
|
|
||||||
|
cc[i][3] = vec_mul(vec_splats(cb[i][3][0]), ca[i][0]);
|
||||||
|
cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][1]), ca[i][1]);
|
||||||
|
cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][2]), ca[i][2]);
|
||||||
|
cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][3]), ca[i][3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main ()
|
||||||
|
{
|
||||||
|
foo ();
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue