vsx.md (VSX_EXTRACT_FL): New iterator for all binary floating point types supported by the hardware...

[gcc]
2016-10-31  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/vsx.md (VSX_EXTRACT_FL): New iterator for all
	binary floating point types supported by the hardware except for
	double.
	(vsx_xvcvsxwdp_df): Provide scalar result alternative to the
	vector instruction for optimizing extracting a SImode from a
	V4SImode vector and converting it to floating point.
	(vsx_xvcvuxwdp_df): Likewise.
	(vsx_extract_si): On ISA 3.0, allow extract target and temporary
	registers to be any VSX register.  Move stores to the end of the
	constraints.
	(vsx_extract_si_<uns>float_df): New combiner pattern and splitter
	to optimize extracting a SImode from a V4SImode vector and
	converting it to a binary floating point type supported by the
	hardware.  Use the vector converts instead of extracting the
	element, sign extending it, and then converting it to double.
	Other floating point types  than double first convert to double,
	then the double is converted to that type.
	(vsx_extract_si_<uns>float_<mode>): Likewise.

[gcc/testsuite]
2016-10-31  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/vsx-extract-4.c: New test.
	* gcc.target/powerpc/vsx-extract-5.c: Likewise.

From-SVN: r241731
This commit is contained in:
Michael Meissner 2016-11-01 00:41:30 +00:00 committed by Michael Meissner
parent fb4c92aabc
commit 156b5ccae7
5 changed files with 312 additions and 5 deletions

View File

@ -1,3 +1,24 @@
2016-10-31 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/vsx.md (VSX_EXTRACT_FL): New iterator for all
binary floating point types supported by the hardware except for
double.
(vsx_xvcvsxwdp_df): Provide scalar result alternative to the
vector instruction for optimizing extracting a SImode from a
V4SImode vector and converting it to floating point.
(vsx_xvcvuxwdp_df): Likewise.
(vsx_extract_si): On ISA 3.0, allow extract target and temporary
registers to be any VSX register. Move stores to the end of the
constraints.
(vsx_extract_si_<uns>float_df): New combiner pattern and splitter
to optimize extracting a SImode from a V4SImode vector and
converting it to a binary floating point type supported by the
hardware. Use the vector converts instead of extracting the
element, sign extending it, and then converting it to double.
Other floating point types than double first convert to double,
then the double is converted to that type.
(vsx_extract_si_<uns>float_<mode>): Likewise.
2016-10-31 Andrew Pinski <apinski@cavium.com>
* config/aarch64/driver-aarch64.c (host_detect_local_cpu):

View File

@ -288,6 +288,16 @@
(V8HI "v")
(V4SI "wa")])
;; Mode iterator for binary floating types other than double to
;; optimize convert to that floating point type from an extract
;; of an integer type
(define_mode_iterator VSX_EXTRACT_FL [SF
(IF "FLOAT128_2REG_P (IFmode)")
(KF "TARGET_FLOAT128_HW")
(TF "FLOAT128_2REG_P (TFmode)
|| (FLOAT128_IEEE_P (TFmode)
&& TARGET_FLOAT128_HW)")])
;; Iterator for the 2 short vector types to do a splat from an integer
(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
@ -1907,6 +1917,7 @@
[(set_attr "type" "vecdouble")])
;; Convert from 32-bit to 64-bit types
;; Provide both vector and scalar targets
(define_insn "vsx_xvcvsxwdp"
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
@ -1915,6 +1926,14 @@
"xvcvsxwdp %x0,%x1"
[(set_attr "type" "vecdouble")])
(define_insn "vsx_xvcvsxwdp_df"
[(set (match_operand:DF 0 "vsx_register_operand" "=ws")
(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
UNSPEC_VSX_CVSXWDP))]
"TARGET_VSX"
"xvcvsxwdp %x0,%x1"
[(set_attr "type" "vecdouble")])
(define_insn "vsx_xvcvuxwdp"
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
@ -1923,6 +1942,14 @@
"xvcvuxwdp %x0,%x1"
[(set_attr "type" "vecdouble")])
(define_insn "vsx_xvcvuxwdp_df"
[(set (match_operand:DF 0 "vsx_register_operand" "=ws")
(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
UNSPEC_VSX_CVUXWDP))]
"TARGET_VSX"
"xvcvuxwdp %x0,%x1"
[(set_attr "type" "vecdouble")])
(define_insn "vsx_xvcvspsxds"
[(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
@ -2574,11 +2601,11 @@
[(set_attr "type" "vecsimple")])
(define_insn_and_split "*vsx_extract_si"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,Z,Z,wJwK")
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
(vec_select:SI
(match_operand:V4SI 1 "gpc_reg_operand" "v,wJwK,v,v")
(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
(clobber (match_scratch:V4SI 3 "=v,wJwK,v,v"))]
(match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
(clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
"VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
"#"
"&& reload_completed"
@ -2628,7 +2655,7 @@
DONE;
}
[(set_attr "type" "mftgpr,fpstore,fpstore,vecsimple")
[(set_attr "type" "mftgpr,vecperm,fpstore")
(set_attr "length" "8")])
(define_insn_and_split "*vsx_extract_<mode>_p8"
@ -2714,6 +2741,107 @@
DONE;
})
;; VSX_EXTRACT optimizations
;; Optimize double d = (double) vec_extract (vi, <n>)
;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
(define_insn_and_split "*vsx_extract_si_<uns>float_df"
[(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
(any_float:DF
(vec_select:SI
(match_operand:V4SI 1 "gpc_reg_operand" "v")
(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
(clobber (match_scratch:V4SI 3 "=v"))]
"VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
"#"
"&& 1"
[(const_int 0)]
{
rtx dest = operands[0];
rtx src = operands[1];
rtx element = operands[2];
rtx v4si_tmp = operands[3];
int value;
if (!VECTOR_ELT_ORDER_BIG)
element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
/* If the value is in the correct position, we can avoid doing the VSPLT<x>
instruction. */
value = INTVAL (element);
if (value != 0)
{
if (GET_CODE (v4si_tmp) == SCRATCH)
v4si_tmp = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
}
else
v4si_tmp = src;
emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
DONE;
})
;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
;; where <type> is a floating point type that supported by the hardware that is
;; not double. First convert the value to double, and then to the desired
;; type.
(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
[(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
(any_float:VSX_EXTRACT_FL
(vec_select:SI
(match_operand:V4SI 1 "gpc_reg_operand" "v")
(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
(clobber (match_scratch:V4SI 3 "=v"))
(clobber (match_scratch:DF 4 "=ws"))]
"VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
"#"
"&& 1"
[(const_int 0)]
{
rtx dest = operands[0];
rtx src = operands[1];
rtx element = operands[2];
rtx v4si_tmp = operands[3];
rtx df_tmp = operands[4];
int value;
if (!VECTOR_ELT_ORDER_BIG)
element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
/* If the value is in the correct position, we can avoid doing the VSPLT<x>
instruction. */
value = INTVAL (element);
if (value != 0)
{
if (GET_CODE (v4si_tmp) == SCRATCH)
v4si_tmp = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
}
else
v4si_tmp = src;
if (GET_CODE (df_tmp) == SCRATCH)
df_tmp = gen_reg_rtx (DFmode);
emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
if (<MODE>mode == SFmode)
emit_insn (gen_truncdfsf2 (dest, df_tmp));
else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
&& TARGET_FLOAT128_HW)
emit_insn (gen_extenddftf2_hw (dest, df_tmp));
else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
emit_insn (gen_extenddfif2 (dest, df_tmp));
else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
else
gcc_unreachable ();
DONE;
})
;; Expanders for builtins
(define_expand "vsx_mergel_<mode>"
[(use (match_operand:VSX_D 0 "vsx_register_operand" ""))

View File

@ -1,3 +1,8 @@
2016-10-31 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/vsx-extract-4.c: New test.
* gcc.target/powerpc/vsx-extract-5.c: Likewise.
2016-10-31 Jerry DeLisle <jvdelisle@gcc.gnu.org>
PR fortran/54679

View File

@ -0,0 +1,76 @@
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-O2 -mcpu=power8" } */
/* { dg-final { scan-assembler-times "vspltw" 6 } } */
/* { dg-final { scan-assembler-times "xvcvsxwdp" 4 } } */
/* { dg-final { scan-assembler-times "xvcvuxwdp" 4 } } */
/* { dg-final { scan-assembler-not "mtvsrd" } } */
/* { dg-final { scan-assembler-not "mtvsrwa" } } */
/* { dg-final { scan-assembler-not "mtvsrwz" } } */
/* { dg-final { scan-assembler-not "mfvsrd" } } */
/* { dg-final { scan-assembler-not "mfvsrwz" } } */
#include <altivec.h>
#ifndef TYPE
#define TYPE double
#endif
TYPE
foo_0s (vector int v)
{
int i = vec_extract (v, 0);
return (TYPE) i;
}
TYPE
foo_1s (vector int v)
{
int i = vec_extract (v, 1);
return (TYPE) i;
}
TYPE
foo_2s (vector int v)
{
int i = vec_extract (v, 2);
return (TYPE) i;
}
TYPE
foo_3s (vector int v)
{
int i = vec_extract (v, 3);
return (TYPE) i;
}
TYPE
foo_0u (vector unsigned int v)
{
unsigned int u = vec_extract (v, 0);
return (TYPE) u;
}
TYPE
foo_1u (vector unsigned int v)
{
unsigned int u = vec_extract (v, 1);
return (TYPE) u;
}
TYPE
foo_2u (vector unsigned int v)
{
unsigned int u = vec_extract (v, 2);
return (TYPE) u;
}
TYPE
foo_3u (vector unsigned int v)
{
unsigned int u = vec_extract (v, 3);
return (TYPE) u;
}

View File

@ -0,0 +1,77 @@
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-O2 -mcpu=power8" } */
/* { dg-final { scan-assembler-times "vspltw" 6 } } */
/* { dg-final { scan-assembler-times "xvcvsxwdp" 4 } } */
/* { dg-final { scan-assembler-times "xvcvuxwdp" 4 } } */
/* { dg-final { scan-assembler-times "frsp\|xsrsp" 8 } } */
/* { dg-final { scan-assembler-not "mtvsrd" } } */
/* { dg-final { scan-assembler-not "mtvsrwa" } } */
/* { dg-final { scan-assembler-not "mtvsrwz" } } */
/* { dg-final { scan-assembler-not "mfvsrd" } } */
/* { dg-final { scan-assembler-not "mfvsrwz" } } */
#include <altivec.h>
#ifndef TYPE
#define TYPE float
#endif
TYPE
foo_0s (vector int v)
{
int i = vec_extract (v, 0);
return (TYPE) i;
}
TYPE
foo_1s (vector int v)
{
int i = vec_extract (v, 1);
return (TYPE) i;
}
TYPE
foo_2s (vector int v)
{
int i = vec_extract (v, 2);
return (TYPE) i;
}
TYPE
foo_3s (vector int v)
{
int i = vec_extract (v, 3);
return (TYPE) i;
}
TYPE
foo_0u (vector unsigned int v)
{
unsigned int u = vec_extract (v, 0);
return (TYPE) u;
}
TYPE
foo_1u (vector unsigned int v)
{
unsigned int u = vec_extract (v, 1);
return (TYPE) u;
}
TYPE
foo_2u (vector unsigned int v)
{
unsigned int u = vec_extract (v, 2);
return (TYPE) u;
}
TYPE
foo_3u (vector unsigned int v)
{
unsigned int u = vec_extract (v, 3);
return (TYPE) u;
}