diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d67a20fb756..7dd239d012d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2016-10-31 Michael Meissner + + * config/rs6000/vsx.md (VSX_EXTRACT_FL): New iterator for all + binary floating point types supported by the hardware except for + double. + (vsx_xvcvsxwdp_df): Provide scalar result alternative to the + vector instruction for optimizing extracting a SImode from a + V4SImode vector and converting it to floating point. + (vsx_xvcvuxwdp_df): Likewise. + (vsx_extract_si): On ISA 3.0, allow extract target and temporary + registers to be any VSX register. Move stores to the end of the + constraints. + (vsx_extract_si_float_df): New combiner pattern and splitter + to optimize extracting a SImode from a V4SImode vector and + converting it to a binary floating point type supported by the + hardware. Use the vector converts instead of extracting the + element, sign extending it, and then converting it to double. + Other floating point types than double first convert to double, + then the double is converted to that type. + (vsx_extract_si_float_): Likewise. + 2016-10-31 Andrew Pinski * config/aarch64/driver-aarch64.c (host_detect_local_cpu): diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 18f3e86e29f..505c270edfd 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -288,6 +288,16 @@ (V8HI "v") (V4SI "wa")]) +;; Mode iterator for binary floating types other than double to +;; optimize convert to that floating point type from an extract +;; of an integer type +(define_mode_iterator VSX_EXTRACT_FL [SF + (IF "FLOAT128_2REG_P (IFmode)") + (KF "TARGET_FLOAT128_HW") + (TF "FLOAT128_2REG_P (TFmode) + || (FLOAT128_IEEE_P (TFmode) + && TARGET_FLOAT128_HW)")]) + ;; Iterator for the 2 short vector types to do a splat from an integer (define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) @@ -1907,6 +1917,7 @@ [(set_attr "type" "vecdouble")]) ;; Convert from 32-bit to 64-bit types +;; Provide both vector and scalar targets (define_insn "vsx_xvcvsxwdp" [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] @@ -1915,6 +1926,14 @@ "xvcvsxwdp %x0,%x1" [(set_attr "type" "vecdouble")]) +(define_insn "vsx_xvcvsxwdp_df" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws") + (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSXWDP))] + "TARGET_VSX" + "xvcvsxwdp %x0,%x1" + [(set_attr "type" "vecdouble")]) + (define_insn "vsx_xvcvuxwdp" [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] @@ -1923,6 +1942,14 @@ "xvcvuxwdp %x0,%x1" [(set_attr "type" "vecdouble")]) +(define_insn "vsx_xvcvuxwdp_df" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws") + (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVUXWDP))] + "TARGET_VSX" + "xvcvuxwdp %x0,%x1" + [(set_attr "type" "vecdouble")]) + (define_insn "vsx_xvcvspsxds" [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] @@ -2574,11 +2601,11 @@ [(set_attr "type" "vecsimple")]) (define_insn_and_split "*vsx_extract_si" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,Z,Z,wJwK") + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z") (vec_select:SI - (match_operand:V4SI 1 "gpc_reg_operand" "v,wJwK,v,v") - (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) - (clobber (match_scratch:V4SI 3 "=v,wJwK,v,v"))] + (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) + (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))] "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" @@ -2628,7 +2655,7 @@ DONE; } - [(set_attr "type" "mftgpr,fpstore,fpstore,vecsimple") + [(set_attr "type" "mftgpr,vecperm,fpstore") (set_attr "length" "8")]) (define_insn_and_split "*vsx_extract__p8" @@ -2714,6 +2741,107 @@ DONE; }) +;; VSX_EXTRACT optimizations +;; Optimize double d = (double) vec_extract (vi, ) +;; Get the element into the top position and use XVCVSWDP/XVCVUWDP +(define_insn_and_split "*vsx_extract_si_float_df" + [(set (match_operand:DF 0 "gpc_reg_operand" "=ws") + (any_float:DF + (vec_select:SI + (match_operand:V4SI 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) + (clobber (match_scratch:V4SI 3 "=v"))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& 1" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx element = operands[2]; + rtx v4si_tmp = operands[3]; + int value; + + if (!VECTOR_ELT_ORDER_BIG) + element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); + + /* If the value is in the correct position, we can avoid doing the VSPLT + instruction. */ + value = INTVAL (element); + if (value != 0) + { + if (GET_CODE (v4si_tmp) == SCRATCH) + v4si_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); + } + else + v4si_tmp = src; + + emit_insn (gen_vsx_xvcvxwdp_df (dest, v4si_tmp)); + DONE; +}) + +;; Optimize f = () vec_extract (vi, ) +;; where is a floating point type that supported by the hardware that is +;; not double. First convert the value to double, and then to the desired +;; type. +(define_insn_and_split "*vsx_extract_si_float_" + [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww") + (any_float:VSX_EXTRACT_FL + (vec_select:SI + (match_operand:V4SI 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) + (clobber (match_scratch:V4SI 3 "=v")) + (clobber (match_scratch:DF 4 "=ws"))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& 1" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx element = operands[2]; + rtx v4si_tmp = operands[3]; + rtx df_tmp = operands[4]; + int value; + + if (!VECTOR_ELT_ORDER_BIG) + element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); + + /* If the value is in the correct position, we can avoid doing the VSPLT + instruction. */ + value = INTVAL (element); + if (value != 0) + { + if (GET_CODE (v4si_tmp) == SCRATCH) + v4si_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); + } + else + v4si_tmp = src; + + if (GET_CODE (df_tmp) == SCRATCH) + df_tmp = gen_reg_rtx (DFmode); + + emit_insn (gen_vsx_xvcvxwdp_df (df_tmp, v4si_tmp)); + + if (mode == SFmode) + emit_insn (gen_truncdfsf2 (dest, df_tmp)); + else if (mode == TFmode && FLOAT128_IBM_P (TFmode)) + emit_insn (gen_extenddftf2_vsx (dest, df_tmp)); + else if (mode == TFmode && FLOAT128_IEEE_P (TFmode) + && TARGET_FLOAT128_HW) + emit_insn (gen_extenddftf2_hw (dest, df_tmp)); + else if (mode == IFmode && FLOAT128_IBM_P (IFmode)) + emit_insn (gen_extenddfif2 (dest, df_tmp)); + else if (mode == KFmode && TARGET_FLOAT128_HW) + emit_insn (gen_extenddfkf2_hw (dest, df_tmp)); + else + gcc_unreachable (); + + DONE; +}) + ;; Expanders for builtins (define_expand "vsx_mergel_" [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 36b4d8ecb32..aed8a662d5c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-10-31 Michael Meissner + + * gcc.target/powerpc/vsx-extract-4.c: New test. + * gcc.target/powerpc/vsx-extract-5.c: Likewise. + 2016-10-31 Jerry DeLisle PR fortran/54679 diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-extract-4.c b/gcc/testsuite/gcc.target/powerpc/vsx-extract-4.c new file mode 100644 index 00000000000..3b498f4bc2a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-extract-4.c @@ -0,0 +1,76 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-O2 -mcpu=power8" } */ + +/* { dg-final { scan-assembler-times "vspltw" 6 } } */ +/* { dg-final { scan-assembler-times "xvcvsxwdp" 4 } } */ +/* { dg-final { scan-assembler-times "xvcvuxwdp" 4 } } */ +/* { dg-final { scan-assembler-not "mtvsrd" } } */ +/* { dg-final { scan-assembler-not "mtvsrwa" } } */ +/* { dg-final { scan-assembler-not "mtvsrwz" } } */ +/* { dg-final { scan-assembler-not "mfvsrd" } } */ +/* { dg-final { scan-assembler-not "mfvsrwz" } } */ + +#include + +#ifndef TYPE +#define TYPE double +#endif + +TYPE +foo_0s (vector int v) +{ + int i = vec_extract (v, 0); + return (TYPE) i; +} + +TYPE +foo_1s (vector int v) +{ + int i = vec_extract (v, 1); + return (TYPE) i; +} + +TYPE +foo_2s (vector int v) +{ + int i = vec_extract (v, 2); + return (TYPE) i; +} + +TYPE +foo_3s (vector int v) +{ + int i = vec_extract (v, 3); + return (TYPE) i; +} + +TYPE +foo_0u (vector unsigned int v) +{ + unsigned int u = vec_extract (v, 0); + return (TYPE) u; +} + +TYPE +foo_1u (vector unsigned int v) +{ + unsigned int u = vec_extract (v, 1); + return (TYPE) u; +} + +TYPE +foo_2u (vector unsigned int v) +{ + unsigned int u = vec_extract (v, 2); + return (TYPE) u; +} + +TYPE +foo_3u (vector unsigned int v) +{ + unsigned int u = vec_extract (v, 3); + return (TYPE) u; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-extract-5.c b/gcc/testsuite/gcc.target/powerpc/vsx-extract-5.c new file mode 100644 index 00000000000..1338c6b1de5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-extract-5.c @@ -0,0 +1,77 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-O2 -mcpu=power8" } */ + +/* { dg-final { scan-assembler-times "vspltw" 6 } } */ +/* { dg-final { scan-assembler-times "xvcvsxwdp" 4 } } */ +/* { dg-final { scan-assembler-times "xvcvuxwdp" 4 } } */ +/* { dg-final { scan-assembler-times "frsp\|xsrsp" 8 } } */ +/* { dg-final { scan-assembler-not "mtvsrd" } } */ +/* { dg-final { scan-assembler-not "mtvsrwa" } } */ +/* { dg-final { scan-assembler-not "mtvsrwz" } } */ +/* { dg-final { scan-assembler-not "mfvsrd" } } */ +/* { dg-final { scan-assembler-not "mfvsrwz" } } */ + +#include + +#ifndef TYPE +#define TYPE float +#endif + +TYPE +foo_0s (vector int v) +{ + int i = vec_extract (v, 0); + return (TYPE) i; +} + +TYPE +foo_1s (vector int v) +{ + int i = vec_extract (v, 1); + return (TYPE) i; +} + +TYPE +foo_2s (vector int v) +{ + int i = vec_extract (v, 2); + return (TYPE) i; +} + +TYPE +foo_3s (vector int v) +{ + int i = vec_extract (v, 3); + return (TYPE) i; +} + +TYPE +foo_0u (vector unsigned int v) +{ + unsigned int u = vec_extract (v, 0); + return (TYPE) u; +} + +TYPE +foo_1u (vector unsigned int v) +{ + unsigned int u = vec_extract (v, 1); + return (TYPE) u; +} + +TYPE +foo_2u (vector unsigned int v) +{ + unsigned int u = vec_extract (v, 2); + return (TYPE) u; +} + +TYPE +foo_3u (vector unsigned int v) +{ + unsigned int u = vec_extract (v, 3); + return (TYPE) u; +}