rs6000: Prefer assigning the MMA vector operands to altivec registers [PR105556]

When optimizing the DGEMM kernel in OpenBLAS to use MMA, the MMA code
uses all 8 accumulators, which overlap all vs0-vs31 vector registers.
Current trunk assigns one of the normal vector inputs to one of the MMA
instructions, which forces us to spill one of the accumulators to memory,
leading to poor performance.  The solution here is to replace the "wa"
constraints for the vector input operands in the MMA instruction patterns
with "v,?wa" so that we prefer using the altivec registers vs32-vs63
over the vs0-vs31 registers.

2022-05-17  Peter Bergner  <bergner@linux.ibm.com>
	    Segher Boessenkool  <segher@kernel.crashing.org>

gcc/
	PR target/105556
	* config/rs6000/mma.md (mma_<vv>, mma_<avv>, mma_<pv>, mma_<apv>,
	mma_<vvi4i4i8>, mma_<avvi4i4i8>, mma_<vvi4i4i2>, mma_<avvi4i4i2>,
	mma_<vvi4i4>, mma_<avvi4i4>, mma_<pvi4i2>, mma_<apvi4i2>,
	mma_<vvi4i4i4>, mma_<avvi4i4i4>): Replace "wa" constraints with "v,?wa".
	Update other operands accordingly.
This commit is contained in:
Peter Bergner 2022-05-17 21:09:29 -05:00
parent 3d9439b1bb
commit c6e36f05fb
1 changed files with 75 additions and 75 deletions

View File

@ -490,50 +490,50 @@
[(set_attr "type" "mma")])
(define_insn "mma_<vv>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
"TARGET_MMA"
"<vv> %A0,%x1,%x2"
[(set_attr "type" "mma")])
(define_insn "mma_<avv>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
"TARGET_MMA"
"<avv> %A0,%x2,%x3"
[(set_attr "type" "mma")])
(define_insn "mma_<pv>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
"TARGET_MMA"
"<pv> %A0,%x1,%x2"
[(set_attr "type" "mma")])
(define_insn "mma_<apv>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:OO 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
"TARGET_MMA"
"<apv> %A0,%x2,%x3"
[(set_attr "type" "mma")])
(define_insn "mma_<vvi4i4i8>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "u8bit_cint_operand" "n")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
(match_operand:SI 5 "u8bit_cint_operand" "n,n")]
MMA_VVI4I4I8))]
"TARGET_MMA"
"<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
@ -541,13 +541,13 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<avvi4i4i8>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")
(match_operand:SI 6 "u8bit_cint_operand" "n")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
(match_operand:SI 5 "const_0_to_15_operand" "n,n")
(match_operand:SI 6 "u8bit_cint_operand" "n,n")]
MMA_AVVI4I4I8))]
"TARGET_MMA"
"<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
@ -555,12 +555,12 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<vvi4i4i2>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_3_operand" "n")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
(match_operand:SI 5 "const_0_to_3_operand" "n,n")]
MMA_VVI4I4I2))]
"TARGET_MMA"
"<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
@ -568,13 +568,13 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<avvi4i4i2>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")
(match_operand:SI 6 "const_0_to_3_operand" "n")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
(match_operand:SI 5 "const_0_to_15_operand" "n,n")
(match_operand:SI 6 "const_0_to_3_operand" "n,n")]
MMA_AVVI4I4I2))]
"TARGET_MMA"
"<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
@ -582,11 +582,11 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<vvi4i4>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")]
MMA_VVI4I4))]
"TARGET_MMA"
"<vvi4i4> %A0,%x1,%x2,%3,%4"
@ -594,12 +594,12 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<avvi4i4>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
(match_operand:SI 5 "const_0_to_15_operand" "n,n")]
MMA_AVVI4I4))]
"TARGET_MMA"
"<avvi4i4> %A0,%x2,%x3,%4,%5"
@ -607,11 +607,11 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<pvi4i2>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_3_operand" "n")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
(match_operand:SI 4 "const_0_to_3_operand" "n,n")]
MMA_PVI4I2))]
"TARGET_MMA"
"<pvi4i2> %A0,%x1,%x2,%3,%4"
@ -619,12 +619,12 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<apvi4i2>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:OO 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_3_operand" "n")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
(match_operand:SI 5 "const_0_to_3_operand" "n,n")]
MMA_APVI4I2))]
"TARGET_MMA"
"<apvi4i2> %A0,%x2,%x3,%4,%5"
@ -632,12 +632,12 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<vvi4i4i4>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
(match_operand:SI 5 "const_0_to_15_operand" "n,n")]
MMA_VVI4I4I4))]
"TARGET_MMA"
"<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
@ -645,13 +645,13 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<avvi4i4i4>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")
(match_operand:SI 6 "const_0_to_15_operand" "n")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
(match_operand:SI 5 "const_0_to_15_operand" "n,n")
(match_operand:SI 6 "const_0_to_15_operand" "n,n")]
MMA_AVVI4I4I4))]
"TARGET_MMA"
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"