diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c6042b53fcc..a10162d5bd0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2014-09-04 Bill Schmidt + + * config/rs6000/rs6000.c (special_handling_values): Add + SH_EXTRACT. + (rtx_is_swappable_p): Look for patterns with a VEC_SELECT, perhaps + wrapped in a VEC_DUPLICATE, representing an extract. Mark these + as swappable with special handling SH_EXTRACT. Remove + UNSPEC_VSX_XXSPLTW from the list of disallowed unspecs for the + optimization. + (adjust_extract): New function. + (handle_special_swappables): Add default to case statement; add + case for SH_EXTRACT that calls adjust_extract. + (dump_swap_insn_table): Handle SH_EXTRACT. + 2014-09-04 Bill Schmidt * config/rs6000/vsx.md (*vsx_extract__load): Always match diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 58f23ea57a1..6370304b287 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -33523,7 +33523,8 @@ enum special_handling_values { SH_CONST_VECTOR, SH_SUBREG, SH_NOSWAP_LD, - SH_NOSWAP_ST + SH_NOSWAP_ST, + SH_EXTRACT }; /* Union INSN with all insns containing definitions that reach USE. @@ -33665,6 +33666,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special) { enum rtx_code code = GET_CODE (op); int i, j; + rtx parallel; switch (code) { @@ -33675,7 +33677,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special) return 1; case VEC_CONCAT: - case VEC_SELECT: case ASM_INPUT: case ASM_OPERANDS: return 0; @@ -33693,6 +33694,28 @@ rtx_is_swappable_p (rtx op, unsigned int *special) handling. */ if (GET_CODE (XEXP (op, 0)) == CONST_INT) return 1; + else if (GET_CODE (XEXP (op, 0)) == REG + && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) + /* This catches V2DF and V2DI splat, at a minimum. */ + return 1; + else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT) + /* If the duplicated item is from a select, defer to the select + processing to see if we can change the lane for the splat. */ + return rtx_is_swappable_p (XEXP (op, 0), special); + else + return 0; + + case VEC_SELECT: + /* A vec_extract operation is ok if we change the lane. */ + if (GET_CODE (XEXP (op, 0)) == REG + && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op) + && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL + && XVECLEN (parallel, 0) == 1 + && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT) + { + *special = SH_EXTRACT; + return 1; + } else return 0; @@ -33738,7 +33761,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special) || val == UNSPEC_VSX_CVSPDPN || val == UNSPEC_VSX_SET || val == UNSPEC_VSX_SLDWI - || val == UNSPEC_VSX_XXSPLTW || val == UNSPEC_VUNPACK_HI_SIGN || val == UNSPEC_VUNPACK_HI_SIGN_DIRECT || val == UNSPEC_VUNPACK_LO_SIGN @@ -34076,6 +34098,27 @@ permute_store (rtx_insn *insn) INSN_UID (insn)); } +/* Given OP that contains a vector extract operation, change the index + of the extracted lane to count from the other side of the vector. */ +static void +adjust_extract (rtx_insn *insn) +{ + rtx body = PATTERN (insn); + /* The vec_select may be wrapped in a vec_duplicate for a splat, so + account for that. */ + rtx sel = (GET_CODE (body) == VEC_DUPLICATE + ? XEXP (XEXP (body, 0), 1) + : XEXP (body, 1)); + rtx par = XEXP (sel, 1); + int nunits = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))); + XVECEXP (par, 0, 0) = GEN_INT (nunits - 1 - INTVAL (XVECEXP (par, 0, 0))); + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn)); +} + /* The insn described by INSN_ENTRY[I] can be swapped, but only with special handling. Take care of that here. */ static void @@ -34086,6 +34129,8 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i) switch (insn_entry[i].special_handling) { + default: + gcc_unreachable (); case SH_CONST_VECTOR: { /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */ @@ -34112,6 +34157,9 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i) /* Convert a non-permuting store to a permuting one. */ permute_store (insn); break; + case SH_EXTRACT: + /* Change the lane on an extract operation. */ + adjust_extract (insn); } } @@ -34180,6 +34228,8 @@ dump_swap_insn_table (swap_web_entry *insn_entry) fputs ("special:load ", dump_file); else if (insn_entry[i].special_handling == SH_NOSWAP_ST) fputs ("special:store ", dump_file); + else if (insn_entry[i].special_handling == SH_EXTRACT) + fputs ("special:extract ", dump_file); } if (insn_entry[i].web_not_optimizable) fputs ("unoptimizable ", dump_file); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3143e320889..462d1d3c318 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2014-09-04 Bill Schmidt + + * gcc.target/powerpc/swaps-p8-13.c: New test. + * gcc.target/powerpc/swaps-p8-14.c: New test. + * gcc.target/powerpc/swaps-p8-15.c: New test. + 2014-09-04 Bill Schmidt * gcc.target/powerpc/vsx-extract-1.c: Test 0th doubleword diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-13.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-13.c new file mode 100644 index 00000000000..522639b8811 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-13.c @@ -0,0 +1,53 @@ +/* { dg-do run { target { powerpc64le-*-* } } } */ +/* { dg-options "-mcpu=power8 -O3" } */ + +#include +void abort (); + +#define N 4096 +long long ca[N] __attribute__((aligned(16))); +long long cb[N] __attribute__((aligned(16))); +long long cc[N] __attribute__((aligned(16))); +long long cd[N] __attribute__((aligned(16))); +long long x; + +__attribute__((noinline)) void foo () +{ + int i; + vector long long va, vb, vc, vd, tmp; + volatile unsigned long long three = 3; + vector unsigned long long threes = vec_splats (three); + for (i = 0; i < N; i+=2) { + vb = vec_vsx_ld (0, (vector long long *)&cb[i]); + vc = vec_vsx_ld (0, (vector long long *)&cc[i]); + vd = vec_vsx_ld (0, (vector long long *)&cd[i]); + tmp = vec_add (vb, vc); + tmp = vec_sub (tmp, vd); + tmp = vec_sra (tmp, threes); + x = vec_extract (tmp, 0); + vec_vsx_st (tmp, 0, (vector long long *)&ca[i]); + } +} + +__attribute__((noinline)) void init () +{ + int i; + for (i = 0; i < N; ++i) { + cb[i] = 3 * i - 2048; + cc[i] = -5 * i + 93; + cd[i] = i + 14; + } +} + +int main () +{ + int i; + init (); + foo (); + for (i = 0; i < N; ++i) + if (ca[i] != (-3 * i - 1969) >> 3) + abort (); + if (x != ca[N-1]) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-14.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-14.c new file mode 100644 index 00000000000..50d1ec4f5b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-14.c @@ -0,0 +1,42 @@ +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-options "-mcpu=power8 -O3" } */ +/* { dg-final { scan-assembler "lxvd2x" } } */ +/* { dg-final { scan-assembler "stxvd2x" } } */ +/* { dg-final { scan-assembler "stxsdx" } } */ +/* { dg-final { scan-assembler-times "xxpermdi" 1 } } */ + +/* The only xxpermdi expected is for the vec_splats. */ + +#include +void abort (); + +#define N 4096 +long long ca[N] __attribute__((aligned(16))); +long long cb[N] __attribute__((aligned(16))); +long long cc[N] __attribute__((aligned(16))); +long long cd[N] __attribute__((aligned(16))); +long long x; + +__attribute__((noinline)) void foo () +{ + int i; + vector long long va, vb, vc, vd, tmp; + volatile unsigned long long three = 3; + vector unsigned long long threes = vec_splats (three); + for (i = 0; i < N; i+=2) { + vb = vec_vsx_ld (0, (vector long long *)&cb[i]); + vc = vec_vsx_ld (0, (vector long long *)&cc[i]); + vd = vec_vsx_ld (0, (vector long long *)&cd[i]); + tmp = vec_add (vb, vc); + tmp = vec_sub (tmp, vd); + tmp = vec_sra (tmp, threes); + x = vec_extract (tmp, 0); + vec_vsx_st (tmp, 0, (vector long long *)&ca[i]); + } +} + +int main () +{ + foo (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-15.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-15.c new file mode 100644 index 00000000000..3c9296c3783 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-15.c @@ -0,0 +1,49 @@ +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-options "-mcpu=power8 -O3" } */ +/* { dg-final { scan-assembler "lxvd2x" } } */ +/* { dg-final { scan-assembler "stxvd2x" } } */ +/* { dg-final { scan-assembler "xxspltw" } } */ +/* { dg-final { scan-assembler-not "xxpermdi" } } */ + +#include +void abort(); + +typedef struct xx {vector double l; vector double h;} xx; + +#define N 4096 +#define M 10000000 +vector float ca[N][4] = {0}; +vector float cb[N][4] = {0}; +vector float cc[N][4] = {0}; + +__attribute__((noinline)) void foo () +{ + int i; + for (i = 0; i < N; i++) { + cc[i][0] = vec_mul(vec_splats(cb[i][0][0]), ca[i][0]); + cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][1]), ca[i][1]); + cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][2]), ca[i][2]); + cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][3]), ca[i][3]); + + cc[i][1] = vec_mul(vec_splats(cb[i][1][0]), ca[i][0]); + cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][1]), ca[i][1]); + cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][2]), ca[i][2]); + cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][3]), ca[i][3]); + + cc[i][2] = vec_mul(vec_splats(cb[i][2][0]), ca[i][0]); + cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][1]), ca[i][1]); + cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][2]), ca[i][2]); + cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][3]), ca[i][3]); + + cc[i][3] = vec_mul(vec_splats(cb[i][3][0]), ca[i][0]); + cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][1]), ca[i][1]); + cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][2]), ca[i][2]); + cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][3]), ca[i][3]); + } +} + +int main () +{ + foo (); + return 0; +}