rs6000.c (special_handling_values): Add SH_SPLAT.
[gcc] 2014-09-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * config/rs6000/rs6000.c (special_handling_values): Add SH_SPLAT. (rtx_is_swappable_p): Convert UNSPEC cascading ||s to a switch statement; allow optimization of UNSPEC_VSPLT_DIRECT with special handling SH_SPLAT. (adjust_extract): Fix test for VEC_DUPLICATE case; fix adjustment of extracted lane. (adjust_splat): New function. (handle_special_swappables): Call adjust_splat for SH_SPLAT. (dump_swap_insn_table): Add case for SH_SPLAT. [gcc/testsuite] 2014-09-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * gcc.target/powerpc/swaps-p8-16.c: New test. From-SVN: r215019
This commit is contained in:
parent
33d9078ac5
commit
db09274d91
@ -1,3 +1,15 @@
|
||||
2014-09-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000.c (special_handling_values): Add SH_SPLAT.
|
||||
(rtx_is_swappable_p): Convert UNSPEC cascading ||s to a switch
|
||||
statement; allow optimization of UNSPEC_VSPLT_DIRECT with special
|
||||
handling SH_SPLAT.
|
||||
(adjust_extract): Fix test for VEC_DUPLICATE case; fix adjustment
|
||||
of extracted lane.
|
||||
(adjust_splat): New function.
|
||||
(handle_special_swappables): Call adjust_splat for SH_SPLAT.
|
||||
(dump_swap_insn_table): Add case for SH_SPLAT.
|
||||
|
||||
2014-09-08 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR ipa/63196
|
||||
|
@ -33524,7 +33524,8 @@ enum special_handling_values {
|
||||
SH_SUBREG,
|
||||
SH_NOSWAP_LD,
|
||||
SH_NOSWAP_ST,
|
||||
SH_EXTRACT
|
||||
SH_EXTRACT,
|
||||
SH_SPLAT
|
||||
};
|
||||
|
||||
/* Union INSN with all insns containing definitions that reach USE.
|
||||
@ -33735,43 +33736,50 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
|
||||
vector splat are element-order sensitive. A few of these
|
||||
cases might be workable with special handling if required. */
|
||||
int val = XINT (op, 1);
|
||||
if (val == UNSPEC_VMRGH_DIRECT
|
||||
|| val == UNSPEC_VMRGL_DIRECT
|
||||
|| val == UNSPEC_VPACK_SIGN_SIGN_SAT
|
||||
|| val == UNSPEC_VPACK_SIGN_UNS_SAT
|
||||
|| val == UNSPEC_VPACK_UNS_UNS_MOD
|
||||
|| val == UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
|
||||
|| val == UNSPEC_VPACK_UNS_UNS_SAT
|
||||
|| val == UNSPEC_VPERM
|
||||
|| val == UNSPEC_VPERM_UNS
|
||||
|| val == UNSPEC_VPERMHI
|
||||
|| val == UNSPEC_VPERMSI
|
||||
|| val == UNSPEC_VPKPX
|
||||
|| val == UNSPEC_VSLDOI
|
||||
|| val == UNSPEC_VSLO
|
||||
|| val == UNSPEC_VSPLT_DIRECT
|
||||
|| val == UNSPEC_VSRO
|
||||
|| val == UNSPEC_VSUM2SWS
|
||||
|| val == UNSPEC_VSUM4S
|
||||
|| val == UNSPEC_VSUM4UBS
|
||||
|| val == UNSPEC_VSUMSWS
|
||||
|| val == UNSPEC_VSUMSWS_DIRECT
|
||||
|| val == UNSPEC_VSX_CONCAT
|
||||
|| val == UNSPEC_VSX_CVSPDP
|
||||
|| val == UNSPEC_VSX_CVSPDPN
|
||||
|| val == UNSPEC_VSX_SET
|
||||
|| val == UNSPEC_VSX_SLDWI
|
||||
|| val == UNSPEC_VUNPACK_HI_SIGN
|
||||
|| val == UNSPEC_VUNPACK_HI_SIGN_DIRECT
|
||||
|| val == UNSPEC_VUNPACK_LO_SIGN
|
||||
|| val == UNSPEC_VUNPACK_LO_SIGN_DIRECT
|
||||
|| val == UNSPEC_VUPKHPX
|
||||
|| val == UNSPEC_VUPKHS_V4SF
|
||||
|| val == UNSPEC_VUPKHU_V4SF
|
||||
|| val == UNSPEC_VUPKLPX
|
||||
|| val == UNSPEC_VUPKLS_V4SF
|
||||
|| val == UNSPEC_VUPKHU_V4SF)
|
||||
return 0;
|
||||
switch (val)
|
||||
{
|
||||
default:
|
||||
break;
|
||||
case UNSPEC_VMRGH_DIRECT:
|
||||
case UNSPEC_VMRGL_DIRECT:
|
||||
case UNSPEC_VPACK_SIGN_SIGN_SAT:
|
||||
case UNSPEC_VPACK_SIGN_UNS_SAT:
|
||||
case UNSPEC_VPACK_UNS_UNS_MOD:
|
||||
case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
|
||||
case UNSPEC_VPACK_UNS_UNS_SAT:
|
||||
case UNSPEC_VPERM:
|
||||
case UNSPEC_VPERM_UNS:
|
||||
case UNSPEC_VPERMHI:
|
||||
case UNSPEC_VPERMSI:
|
||||
case UNSPEC_VPKPX:
|
||||
case UNSPEC_VSLDOI:
|
||||
case UNSPEC_VSLO:
|
||||
case UNSPEC_VSRO:
|
||||
case UNSPEC_VSUM2SWS:
|
||||
case UNSPEC_VSUM4S:
|
||||
case UNSPEC_VSUM4UBS:
|
||||
case UNSPEC_VSUMSWS:
|
||||
case UNSPEC_VSUMSWS_DIRECT:
|
||||
case UNSPEC_VSX_CONCAT:
|
||||
case UNSPEC_VSX_CVSPDP:
|
||||
case UNSPEC_VSX_CVSPDPN:
|
||||
case UNSPEC_VSX_SET:
|
||||
case UNSPEC_VSX_SLDWI:
|
||||
case UNSPEC_VUNPACK_HI_SIGN:
|
||||
case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
|
||||
case UNSPEC_VUNPACK_LO_SIGN:
|
||||
case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
|
||||
case UNSPEC_VUPKHPX:
|
||||
case UNSPEC_VUPKHS_V4SF:
|
||||
case UNSPEC_VUPKHU_V4SF:
|
||||
case UNSPEC_VUPKLPX:
|
||||
case UNSPEC_VUPKLS_V4SF:
|
||||
case UNSPEC_VUPKLU_V4SF:
|
||||
return 0;
|
||||
case UNSPEC_VSPLT_DIRECT:
|
||||
*special = SH_SPLAT;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
@ -34098,20 +34106,20 @@ permute_store (rtx_insn *insn)
|
||||
INSN_UID (insn));
|
||||
}
|
||||
|
||||
/* Given OP that contains a vector extract operation, change the index
|
||||
of the extracted lane to count from the other side of the vector. */
|
||||
/* Given OP that contains a vector extract operation, adjust the index
|
||||
of the extracted lane to account for the doubleword swap. */
|
||||
static void
|
||||
adjust_extract (rtx_insn *insn)
|
||||
{
|
||||
rtx body = PATTERN (insn);
|
||||
rtx src = SET_SRC (PATTERN (insn));
|
||||
/* The vec_select may be wrapped in a vec_duplicate for a splat, so
|
||||
account for that. */
|
||||
rtx sel = (GET_CODE (body) == VEC_DUPLICATE
|
||||
? XEXP (XEXP (body, 0), 1)
|
||||
: XEXP (body, 1));
|
||||
rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
|
||||
rtx par = XEXP (sel, 1);
|
||||
int nunits = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0)));
|
||||
XVECEXP (par, 0, 0) = GEN_INT (nunits - 1 - INTVAL (XVECEXP (par, 0, 0)));
|
||||
int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
|
||||
int lane = INTVAL (XVECEXP (par, 0, 0));
|
||||
lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
|
||||
XVECEXP (par, 0, 0) = GEN_INT (lane);
|
||||
INSN_CODE (insn) = -1; /* Force re-recognition. */
|
||||
df_insn_rescan (insn);
|
||||
|
||||
@ -34119,6 +34127,24 @@ adjust_extract (rtx_insn *insn)
|
||||
fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
|
||||
}
|
||||
|
||||
/* Given OP that contains a vector direct-splat operation, adjust the index
|
||||
of the source lane to account for the doubleword swap. */
|
||||
static void
|
||||
adjust_splat (rtx_insn *insn)
|
||||
{
|
||||
rtx body = PATTERN (insn);
|
||||
rtx unspec = XEXP (body, 1);
|
||||
int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
|
||||
int lane = INTVAL (XVECEXP (unspec, 0, 1));
|
||||
lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
|
||||
XVECEXP (unspec, 0, 1) = GEN_INT (lane);
|
||||
INSN_CODE (insn) = -1; /* Force re-recognition. */
|
||||
df_insn_rescan (insn);
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
|
||||
}
|
||||
|
||||
/* The insn described by INSN_ENTRY[I] can be swapped, but only
|
||||
with special handling. Take care of that here. */
|
||||
static void
|
||||
@ -34160,6 +34186,11 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
|
||||
case SH_EXTRACT:
|
||||
/* Change the lane on an extract operation. */
|
||||
adjust_extract (insn);
|
||||
break;
|
||||
case SH_SPLAT:
|
||||
/* Change the lane on a direct-splat operation. */
|
||||
adjust_splat (insn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -34230,6 +34261,8 @@ dump_swap_insn_table (swap_web_entry *insn_entry)
|
||||
fputs ("special:store ", dump_file);
|
||||
else if (insn_entry[i].special_handling == SH_EXTRACT)
|
||||
fputs ("special:extract ", dump_file);
|
||||
else if (insn_entry[i].special_handling == SH_SPLAT)
|
||||
fputs ("special:splat ", dump_file);
|
||||
}
|
||||
if (insn_entry[i].web_not_optimizable)
|
||||
fputs ("unoptimizable ", dump_file);
|
||||
|
@ -1,3 +1,7 @@
|
||||
2014-09-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/powerpc/swaps-p8-16.c: New test.
|
||||
|
||||
2014-09-07 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
* gcc.target/i386/pr62208.c: New test.
|
||||
|
56
gcc/testsuite/gcc.target/powerpc/swaps-p8-16.c
Normal file
56
gcc/testsuite/gcc.target/powerpc/swaps-p8-16.c
Normal file
@ -0,0 +1,56 @@
|
||||
/* { dg-do compile { target { powerpc64le-*-* } } } */
|
||||
/* { dg-options "-mcpu=power8 -O3" } */
|
||||
/* { dg-final { scan-assembler "lxvd2x" } } */
|
||||
/* { dg-final { scan-assembler "stxvd2x" } } */
|
||||
/* { dg-final { scan-assembler "vspltw" } } */
|
||||
/* { dg-final { scan-assembler-not "xxpermdi" } } */
|
||||
|
||||
#include <altivec.h>
|
||||
void abort();
|
||||
|
||||
typedef struct xx {vector double l; vector double h;} xx;
|
||||
|
||||
#define N 4096
|
||||
#define M 10000000
|
||||
vector float ca[N][4] = {0};
|
||||
vector float cb[N][4] = {0};
|
||||
vector float cc[N][4] = {0};
|
||||
|
||||
__attribute__((noinline)) void foo ()
|
||||
{
|
||||
int i;
|
||||
vector float brow;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
|
||||
brow = cb[i][0];
|
||||
cc[i][0] = vec_mul(vec_splats(brow[0]), ca[i][0]);
|
||||
cc[i][0] = vec_madd(cc[i][0],vec_splats(brow[1]), ca[i][1]);
|
||||
cc[i][0] = vec_madd(cc[i][0],vec_splats(brow[2]), ca[i][2]);
|
||||
cc[i][0] = vec_madd(cc[i][0],vec_splats(brow[3]), ca[i][3]);
|
||||
|
||||
brow = cb[i][1];
|
||||
cc[i][1] = vec_mul(vec_splats(brow[0]), ca[i][0]);
|
||||
cc[i][1] = vec_madd(cc[i][0],vec_splats(brow[1]), ca[i][1]);
|
||||
cc[i][1] = vec_madd(cc[i][0],vec_splats(brow[2]), ca[i][2]);
|
||||
cc[i][1] = vec_madd(cc[i][0],vec_splats(brow[3]), ca[i][3]);
|
||||
|
||||
brow = cb[i][2];
|
||||
cc[i][2] = vec_mul(vec_splats(brow[0]), ca[i][0]);
|
||||
cc[i][2] = vec_madd(cc[i][0],vec_splats(brow[1]), ca[i][1]);
|
||||
cc[i][2] = vec_madd(cc[i][0],vec_splats(brow[2]), ca[i][2]);
|
||||
cc[i][2] = vec_madd(cc[i][0],vec_splats(brow[3]), ca[i][3]);
|
||||
|
||||
brow = cb[i][3];
|
||||
cc[i][3] = vec_mul(vec_splats(brow[0]), ca[i][0]);
|
||||
cc[i][3] = vec_madd(cc[i][0],vec_splats(brow[1]), ca[i][1]);
|
||||
cc[i][3] = vec_madd(cc[i][0],vec_splats(brow[2]), ca[i][2]);
|
||||
cc[i][3] = vec_madd(cc[i][0],vec_splats(brow[3]), ca[i][3]);
|
||||
}
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
foo ();
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user