re PR target/72804 (Poor code gen with -mvsx-timode)

gcc/
	PR target/72804
	* config/rs6000/vsx.md (*vsx_le_permute_<mode>): Add support for
	operands residing in integer registers.
	(*vsx_le_perm_load_<mode>): Likewise.
	(*vsx_le_perm_store_<mode>): Likewise.
	(define_peephole2): Add peepholes to optimize the above.

gcc/testsuite/
	PR target/72804
	* gcc.target/powerpc/pr72804.c: New test.

From-SVN: r251153
This commit is contained in:
Peter Bergner 2017-08-17 10:56:48 -05:00 committed by Peter Bergner
parent e67bbd5da4
commit d00fdf8579
4 changed files with 86 additions and 15 deletions

View File

@ -1,3 +1,12 @@
2017-08-17 Peter Bergner <bergner@vnet.ibm.com>
PR target/72804
* config/rs6000/vsx.md (*vsx_le_permute_<mode>): Add support for
operands residing in integer registers.
(*vsx_le_perm_load_<mode>): Likewise.
(*vsx_le_perm_store_<mode>): Likewise.
(define_peephole2): Add peepholes to optimize the above.
2017-08-17 Marek Polacek <polacek@redhat.com> 2017-08-17 Marek Polacek <polacek@redhat.com>
PR middle-end/81814 PR middle-end/81814

View File

@ -759,17 +759,20 @@
;; special V1TI container class, which it is not appropriate to use vec_select ;; special V1TI container class, which it is not appropriate to use vec_select
;; for the type. ;; for the type.
(define_insn "*vsx_le_permute_<mode>" (define_insn "*vsx_le_permute_<mode>"
[(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z") [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
(rotate:VSX_TI (rotate:VSX_TI
(match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>") (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
(const_int 64)))] (const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"@ "@
xxpermdi %x0,%x1,%x1,2 xxpermdi %x0,%x1,%x1,2
lxvd2x %x0,%y1 lxvd2x %x0,%y1
stxvd2x %x1,%y0" stxvd2x %x1,%y0
[(set_attr "length" "4") mr %0,%L1\;mr %L0,%1
(set_attr "type" "vecperm,vecload,vecstore")]) ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
[(set_attr "length" "4,4,4,8,8,8")
(set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
(define_insn_and_split "*vsx_le_undo_permute_<mode>" (define_insn_and_split "*vsx_le_undo_permute_<mode>"
[(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>") [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
@ -795,10 +798,12 @@
(set_attr "type" "veclogical")]) (set_attr "type" "veclogical")])
(define_insn_and_split "*vsx_le_perm_load_<mode>" (define_insn_and_split "*vsx_le_perm_load_<mode>"
[(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>") [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
(match_operand:VSX_LE_128 1 "memory_operand" "Z"))] (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#" "@
#
#"
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
[(const_int 0)] [(const_int 0)]
" "
@ -811,16 +816,18 @@
DONE; DONE;
} }
" "
[(set_attr "type" "vecload") [(set_attr "type" "vecload,load")
(set_attr "length" "8")]) (set_attr "length" "8,8")])
(define_insn "*vsx_le_perm_store_<mode>" (define_insn "*vsx_le_perm_store_<mode>"
[(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z") [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
(match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))] (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#" "@
[(set_attr "type" "vecstore") #
(set_attr "length" "12")]) #"
[(set_attr "type" "vecstore,store")
(set_attr "length" "12,8")])
(define_split (define_split
[(set (match_operand:VSX_LE_128 0 "memory_operand" "") [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
@ -836,6 +843,31 @@
DONE; DONE;
}) })
;; Peepholes to catch loads and stores for TImode if TImode landed in
;; GPR registers on a little endian system.
(define_peephole2
[(set (match_operand:VSX_TI 0 "int_reg_operand")
(rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
(const_int 64)))
(set (match_operand:VSX_TI 2 "int_reg_operand")
(rotate:VSX_TI (match_dup 0)
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE && !TARGET_P9_VECTOR
&& (rtx_equal_p (operands[0], operands[2])
|| peep2_reg_dead_p (2, operands[0]))"
[(set (match_dup 2) (match_dup 1))])
(define_peephole2
[(set (match_operand:VSX_TI 0 "int_reg_operand")
(rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
(const_int 64)))
(set (match_operand:VSX_TI 2 "memory_operand")
(rotate:VSX_TI (match_dup 0)
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE && !TARGET_P9_VECTOR
&& peep2_reg_dead_p (2, operands[0])"
[(set (match_dup 2) (match_dup 1))])
;; Peephole to catch memory to memory transfers for TImode if TImode landed in ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
;; VSX registers on a little endian system. The vector types and IEEE 128-bit ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
;; floating point are handled by the more generic swap elimination pass. ;; floating point are handled by the more generic swap elimination pass.

View File

@ -1,3 +1,8 @@
2017-08-17 Peter Bergner <bergner@vnet.ibm.com>
PR target/72804
* gcc.target/powerpc/pr72804.c: New test.
2017-08-17 Marek Polacek <polacek@redhat.com> 2017-08-17 Marek Polacek <polacek@redhat.com>
PR middle-end/81814 PR middle-end/81814

View File

@ -0,0 +1,25 @@
/* { dg-do compile { target { lp64 } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } } */
/* { dg-require-effective-target powerpc_vsx_ok } */
/* { dg-options "-O2 -mvsx" } */
__int128_t
foo (__int128_t *src)
{
return ~*src;
}
void
bar (__int128_t *dst, __int128_t src)
{
*dst = ~src;
}
/* { dg-final { scan-assembler-times "not " 4 } } */
/* { dg-final { scan-assembler-times "std " 2 } } */
/* { dg-final { scan-assembler-times "ld " 2 } } */
/* { dg-final { scan-assembler-not "lxvd2x" } } */
/* { dg-final { scan-assembler-not "stxvd2x" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
/* { dg-final { scan-assembler-not "mfvsrd" } } */
/* { dg-final { scan-assembler-not "mfvsrd" } } */