S/390: Fix a problem with the bswap vector pattern
arch13 introduced instructions to perform vector element-wise byte swaps on the way from or to memory. For a byte swap between vector registers the vector permute instruction is required which needs a permute pattern to be loaded into a vector register first. With the current implementation there is a potential problem when the decision for the reg-reg variant is made very late. This patch is supposed to fix that. With the patch the required permute pattern is generated already in the expander and attached to the bswap pattern as USE operand. The predicate in the insn_and_split pattern accepts it although the permute constant as such is not a valid constant. For the reg-reg variant only the vector register constraint is used for the permute constant forcing LRA to a) push the constant into literal pool and b) load the literal pool constant into a vector register. gcc/ChangeLog: 2019-04-12 Andreas Krebbel <krebbel@linux.ibm.com> * config/s390/predicates.md (permute_pattern_operand): New predicate. * config/s390/vector.md ("*vec_splats_bswap_vec<mode>"): Add USE operand for the permute pattern. ("*vec_perm<mode>"): New insn definition. ("bswap<mode>"): Generate the permute pattern operand in the expander and perform the operand reloads for pre arch13 level already. ("*bswap<mode>_emu"): Rename to ... ("*bswap<mode>"): ... this. And make the splitter vxe2 only. * config/s390/vx-builtins.md ("*vec_insert_and_zero_bswap<mode>"): Add the USE operand for the permute pattern. ("*vec_set_bswap_vec<mode>"): Likewise. From-SVN: r270306
This commit is contained in:
parent
8e8225e69d
commit
2541c40f06
@ -1,3 +1,19 @@
|
||||
2019-04-12 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
* config/s390/predicates.md (permute_pattern_operand): New
|
||||
predicate.
|
||||
* config/s390/vector.md ("*vec_splats_bswap_vec<mode>"): Add USE
|
||||
operand for the permute pattern.
|
||||
("*vec_perm<mode>"): New insn definition.
|
||||
("bswap<mode>"): Generate the permute pattern operand in the
|
||||
expander and perform the operand reloads for pre arch13 level
|
||||
already.
|
||||
("*bswap<mode>_emu"): Rename to ...
|
||||
("*bswap<mode>"): ... this. And make the splitter vxe2 only.
|
||||
* config/s390/vx-builtins.md ("*vec_insert_and_zero_bswap<mode>"):
|
||||
Add the USE operand for the permute pattern.
|
||||
("*vec_set_bswap_vec<mode>"): Likewise.
|
||||
|
||||
2019-04-12 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR c/89946
|
||||
|
@ -46,6 +46,16 @@
|
||||
(and (match_code "symbol_ref, label_ref, const, const_int, const_wide_int, const_double, const_vector")
|
||||
(match_test "CONSTANT_P (op)")))
|
||||
|
||||
; An operand used as vector permutation pattern
|
||||
|
||||
; This in particular accepts constants which would otherwise be
|
||||
; rejected. These constants require special post reload handling
|
||||
|
||||
(define_special_predicate "permute_pattern_operand"
|
||||
(and (match_code "const_vector,mem,reg,subreg")
|
||||
(match_test "GET_MODE (op) == V16QImode")
|
||||
(match_test "!MEM_P (op) || s390_mem_constraint (\"R\", op)")))
|
||||
|
||||
;; Return true if OP is a valid S-type operand.
|
||||
|
||||
(define_predicate "s_operand"
|
||||
|
@ -551,9 +551,10 @@
|
||||
|
||||
; vlbrreph, vlbrrepf, vlbrrepg
|
||||
(define_insn "*vec_splats_bswap_vec<mode>"
|
||||
[(set (match_operand:V_HW_HSD 0 "register_operand" "=v")
|
||||
[(set (match_operand:V_HW_HSD 0 "register_operand" "=v")
|
||||
(bswap:V_HW_HSD
|
||||
(vec_duplicate:V_HW_HSD (match_operand:<non_vec> 1 "memory_operand" "R"))))]
|
||||
(vec_duplicate:V_HW_HSD (match_operand:<non_vec> 1 "memory_operand" "R"))))
|
||||
(use (match_operand:V16QI 2 "permute_pattern_operand" "X"))]
|
||||
"TARGET_VXE2"
|
||||
"vlbrrep<bhfgq>\t%v0,%1"
|
||||
[(set_attr "op_type" "VRX")])
|
||||
@ -655,6 +656,17 @@
|
||||
"vperm\t%v0,%v1,%v2,%v3"
|
||||
[(set_attr "op_type" "VRR")])
|
||||
|
||||
(define_insn "*vec_perm<mode>"
|
||||
[(set (match_operand:VT_HW 0 "register_operand" "=v")
|
||||
(subreg:VT_HW (unspec:V16QI [(subreg:V16QI (match_operand:VT_HW 1 "register_operand" "v") 0)
|
||||
(subreg:V16QI (match_operand:VT_HW 2 "register_operand" "v") 0)
|
||||
(match_operand:V16QI 3 "register_operand" "v")]
|
||||
UNSPEC_VEC_PERM) 0))]
|
||||
"TARGET_VX"
|
||||
"vperm\t%v0,%v1,%v2,%v3"
|
||||
[(set_attr "op_type" "VRR")])
|
||||
|
||||
|
||||
; vec_perm_const for V2DI using vpdi?
|
||||
|
||||
;;
|
||||
@ -2073,35 +2085,11 @@
|
||||
; FIXME: The bswap rtl standard name currently does not appear to be
|
||||
; used for vector modes.
|
||||
(define_expand "bswap<mode>"
|
||||
[(set (match_operand:VT_HW_HSDT 0 "nonimmediate_operand" "")
|
||||
(bswap:VT_HW_HSDT (match_operand:VT_HW_HSDT 1 "nonimmediate_operand" "")))]
|
||||
"TARGET_VX")
|
||||
|
||||
; vlbrh, vlbrf, vlbrg, vlbrq, vstbrh, vstbrf, vstbrg, vstbrq
|
||||
(define_insn "*bswap<mode>"
|
||||
[(set (match_operand:VT_HW_HSDT 0 "nonimmediate_operand" "=v,v,R")
|
||||
(bswap:VT_HW_HSDT (match_operand:VT_HW_HSDT 1 "nonimmediate_operand" "v,R,v")))]
|
||||
"TARGET_VXE2"
|
||||
"@
|
||||
#
|
||||
vlbr<bhfgq>\t%v0,%v1
|
||||
vstbr<bhfgq>\t%v1,%v0"
|
||||
[(set_attr "op_type" "*,VRX,VRX")])
|
||||
|
||||
(define_insn_and_split "*bswap<mode>_emu"
|
||||
[(set (match_operand:VT_HW_HSDT 0 "nonimmediate_operand" "=vR")
|
||||
(bswap:VT_HW_HSDT (match_operand:VT_HW_HSDT 1 "nonimmediate_operand" "vR")))]
|
||||
"TARGET_VX && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& ((!memory_operand (operands[1], <MODE>mode)
|
||||
&& !memory_operand (operands[0], <MODE>mode))
|
||||
|| !TARGET_VXE2)"
|
||||
[(set (match_dup 3)
|
||||
(unspec:V16QI [(match_dup 4)
|
||||
(match_dup 4)
|
||||
(match_dup 2)]
|
||||
UNSPEC_VEC_PERM))
|
||||
(set (match_dup 0) (subreg:VT_HW_HSDT (match_dup 3) 0))]
|
||||
[(parallel
|
||||
[(set (match_operand:VT_HW_HSDT 0 "nonimmediate_operand" "")
|
||||
(bswap:VT_HW_HSDT (match_operand:VT_HW_HSDT 1 "nonimmediate_operand" "")))
|
||||
(use (match_dup 2))])]
|
||||
"TARGET_VX"
|
||||
{
|
||||
static char p[4][16] =
|
||||
{ { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }, /* H */
|
||||
@ -2109,7 +2097,7 @@
|
||||
{ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }, /* D */
|
||||
{ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } }; /* T */
|
||||
char *perm;
|
||||
rtx perm_rtx[16], constv;
|
||||
rtx perm_rtx[16];
|
||||
|
||||
switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)))
|
||||
{
|
||||
@ -2122,14 +2110,57 @@
|
||||
for (int i = 0; i < 16; i++)
|
||||
perm_rtx[i] = GEN_INT (perm[i]);
|
||||
|
||||
operands[1] = force_reg (<MODE>mode, operands[1]);
|
||||
operands[2] = gen_reg_rtx (V16QImode);
|
||||
operands[3] = gen_reg_rtx (V16QImode);
|
||||
operands[4] = simplify_gen_subreg (V16QImode, operands[1], <MODE>mode, 0);
|
||||
constv = force_const_mem (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx)));
|
||||
emit_move_insn (operands[2], constv);
|
||||
operands[2] = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx));
|
||||
|
||||
/* Without vxe2 we do not have byte swap instructions dealing
|
||||
directly with memory operands. So instead of waiting until
|
||||
reload to fix that up switch over to vector permute right
|
||||
now. */
|
||||
if (!TARGET_VXE2)
|
||||
{
|
||||
rtx in = force_reg (V16QImode, simplify_gen_subreg (V16QImode, operands[1], <MODE>mode, 0));
|
||||
rtx permute = force_reg (V16QImode, force_const_mem (V16QImode, operands[2]));
|
||||
rtx out = gen_reg_rtx (V16QImode);
|
||||
|
||||
emit_insn (gen_vec_permv16qi (out, in, in, permute));
|
||||
emit_move_insn (operands[0], simplify_gen_subreg (<MODE>mode, out, V16QImode, 0));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
; Switching late to the reg-reg variant requires the vector permute
|
||||
; pattern to be pushed into literal pool and allocating a vector
|
||||
; register to load it into. We rely on both being provided by LRA
|
||||
; when fixing up the v constraint for operand 2.
|
||||
|
||||
; permute_pattern_operand: general_operand would reject the permute
|
||||
; pattern constants since these are not accepted by
|
||||
; s390_legimitate_constant_p
|
||||
|
||||
; ^R: Prevent these alternatives from being chosen if it would require
|
||||
; pushing the operand into memory first
|
||||
|
||||
; vlbrh, vlbrf, vlbrg, vlbrq, vstbrh, vstbrf, vstbrg, vstbrq
|
||||
(define_insn_and_split "*bswap<mode>"
|
||||
[(set (match_operand:VT_HW_HSDT 0 "nonimmediate_operand" "=v, v,^R")
|
||||
(bswap:VT_HW_HSDT (match_operand:VT_HW_HSDT 1 "nonimmediate_operand" "v,^R, v")))
|
||||
(use (match_operand:V16QI 2 "permute_pattern_operand" "v, X, X"))]
|
||||
"TARGET_VXE2"
|
||||
"@
|
||||
#
|
||||
vlbr<bhfgq>\t%v0,%v1
|
||||
vstbr<bhfgq>\t%v1,%v0"
|
||||
"&& reload_completed
|
||||
&& !memory_operand (operands[0], <MODE>mode)
|
||||
&& !memory_operand (operands[1], <MODE>mode)"
|
||||
[(set (match_dup 0)
|
||||
(subreg:VT_HW_HSDT
|
||||
(unspec:V16QI [(subreg:V16QI (match_dup 1) 0)
|
||||
(subreg:V16QI (match_dup 1) 0)
|
||||
(match_dup 2)]
|
||||
UNSPEC_VEC_PERM) 0))]
|
||||
""
|
||||
[(set_attr "op_type" "*,VRX,VRX")])
|
||||
|
||||
; reduc_smin
|
||||
; reduc_smax
|
||||
|
@ -183,10 +183,11 @@
|
||||
; vec_revb (vec_insert_and_zero(x)) bswap-and-replicate-1.c
|
||||
; vllebrzh, vllebrzf, vllebrzg
|
||||
(define_insn "*vec_insert_and_zero_bswap<mode>"
|
||||
[(set (match_operand:V_HW_HSD 0 "register_operand" "=v")
|
||||
[(set (match_operand:V_HW_HSD 0 "register_operand" "=v")
|
||||
(bswap:V_HW_HSD (unspec:V_HW_HSD
|
||||
[(match_operand:<non_vec> 1 "memory_operand" "R")]
|
||||
UNSPEC_VEC_INSERT_AND_ZERO)))]
|
||||
[(match_operand:<non_vec> 1 "memory_operand" "R")]
|
||||
UNSPEC_VEC_INSERT_AND_ZERO)))
|
||||
(use (match_operand:V16QI 2 "permute_pattern_operand" "X"))]
|
||||
"TARGET_VXE2"
|
||||
"vllebrz<bhfgq>\t%v0,%1"
|
||||
[(set_attr "op_type" "VRX")])
|
||||
@ -2243,12 +2244,13 @@
|
||||
; vec_revb (vec_insert (*a, vec_revb (b), 1)) set-element-bswap-1.c
|
||||
; vlebrh, vlebrf, vlebrg
|
||||
(define_insn "*vec_set_bswap_vec<mode>"
|
||||
[(set (match_operand:V_HW_HSD 0 "register_operand" "=v")
|
||||
[(set (match_operand:V_HW_HSD 0 "register_operand" "=v")
|
||||
(bswap:V_HW_HSD
|
||||
(unspec:V_HW_HSD [(match_operand:<non_vec> 1 "memory_operand" "R")
|
||||
(match_operand:SI 2 "const_int_operand" "C")
|
||||
(bswap:V_HW_HSD (match_operand:V_HW_HSD 3 "register_operand" "0"))]
|
||||
UNSPEC_VEC_SET)))]
|
||||
(unspec:V_HW_HSD [(match_operand:<non_vec> 1 "memory_operand" "R")
|
||||
(match_operand:SI 2 "const_int_operand" "C")
|
||||
(bswap:V_HW_HSD (match_operand:V_HW_HSD 3 "register_operand" "0"))]
|
||||
UNSPEC_VEC_SET)))
|
||||
(use (match_operand:V16QI 4 "permute_pattern_operand" "X"))]
|
||||
"TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS (<V_HW_HSD:MODE>mode)"
|
||||
"vlebr<bhfgq>\t%v0,%1,%2"
|
||||
[(set_attr "op_type" "VRX")])
|
||||
|
Loading…
Reference in New Issue
Block a user