rs6000: Prefer assigning the MMA vector operands to altivec registers [PR105556]
When optimizing the DGEMM kernel in OpenBLAS to use MMA, the MMA code uses all 8 accumulators, which overlap all vs0-vs31 vector registers. Current trunk assigns one of the normal vector inputs to one of the MMA instructions, which forces us to spill one of the accumulators to memory, leading to poor performance. The solution here is to replace the "wa" constraints for the vector input operands in the MMA instruction patterns with "v,?wa" so that we prefer using the altivec registers vs32-vs63 over the vs0-vs31 registers. 2022-05-17 Peter Bergner <bergner@linux.ibm.com> Segher Boessenkool <segher@kernel.crashing.org> gcc/ PR target/105556 * config/rs6000/mma.md (mma_<vv>, mma_<avv>, mma_<pv>, mma_<apv>, mma_<vvi4i4i8>, mma_<avvi4i4i8>, mma_<vvi4i4i2>, mma_<avvi4i4i2>, mma_<vvi4i4>, mma_<avvi4i4>, mma_<pvi4i2>, mma_<apvi4i2>, mma_<vvi4i4i4>, mma_<avvi4i4i4>): Replace "wa" constraints with "v,?wa". Update other operands accordingly.
This commit is contained in:
parent
3d9439b1bb
commit
c6e36f05fb
|
@ -490,50 +490,50 @@
|
|||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_<vv>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
|
||||
MMA_VV))]
|
||||
"TARGET_MMA"
|
||||
"<vv> %A0,%x1,%x2"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_<avv>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
|
||||
MMA_AVV))]
|
||||
"TARGET_MMA"
|
||||
"<avv> %A0,%x2,%x3"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_<pv>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
|
||||
MMA_PV))]
|
||||
"TARGET_MMA"
|
||||
"<pv> %A0,%x1,%x2"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_<apv>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
|
||||
(match_operand:OO 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
|
||||
MMA_APV))]
|
||||
"TARGET_MMA"
|
||||
"<apv> %A0,%x2,%x3"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_<vvi4i4i8>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "u8bit_cint_operand" "n")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 5 "u8bit_cint_operand" "n,n")]
|
||||
MMA_VVI4I4I8))]
|
||||
"TARGET_MMA"
|
||||
"<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
|
||||
|
@ -541,13 +541,13 @@
|
|||
(set_attr "prefixed" "yes")])
|
||||
|
||||
(define_insn "mma_<avvi4i4i8>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 6 "u8bit_cint_operand" "n")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 6 "u8bit_cint_operand" "n,n")]
|
||||
MMA_AVVI4I4I8))]
|
||||
"TARGET_MMA"
|
||||
"<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
|
||||
|
@ -555,12 +555,12 @@
|
|||
(set_attr "prefixed" "yes")])
|
||||
|
||||
(define_insn "mma_<vvi4i4i2>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_3_operand" "n")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 5 "const_0_to_3_operand" "n,n")]
|
||||
MMA_VVI4I4I2))]
|
||||
"TARGET_MMA"
|
||||
"<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
|
||||
|
@ -568,13 +568,13 @@
|
|||
(set_attr "prefixed" "yes")])
|
||||
|
||||
(define_insn "mma_<avvi4i4i2>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 6 "const_0_to_3_operand" "n")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 6 "const_0_to_3_operand" "n,n")]
|
||||
MMA_AVVI4I4I2))]
|
||||
"TARGET_MMA"
|
||||
"<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
|
||||
|
@ -582,11 +582,11 @@
|
|||
(set_attr "prefixed" "yes")])
|
||||
|
||||
(define_insn "mma_<vvi4i4>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")]
|
||||
MMA_VVI4I4))]
|
||||
"TARGET_MMA"
|
||||
"<vvi4i4> %A0,%x1,%x2,%3,%4"
|
||||
|
@ -594,12 +594,12 @@
|
|||
(set_attr "prefixed" "yes")])
|
||||
|
||||
(define_insn "mma_<avvi4i4>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n,n")]
|
||||
MMA_AVVI4I4))]
|
||||
"TARGET_MMA"
|
||||
"<avvi4i4> %A0,%x2,%x3,%4,%5"
|
||||
|
@ -607,11 +607,11 @@
|
|||
(set_attr "prefixed" "yes")])
|
||||
|
||||
(define_insn "mma_<pvi4i2>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 4 "const_0_to_3_operand" "n")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 4 "const_0_to_3_operand" "n,n")]
|
||||
MMA_PVI4I2))]
|
||||
"TARGET_MMA"
|
||||
"<pvi4i2> %A0,%x1,%x2,%3,%4"
|
||||
|
@ -619,12 +619,12 @@
|
|||
(set_attr "prefixed" "yes")])
|
||||
|
||||
(define_insn "mma_<apvi4i2>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
|
||||
(match_operand:OO 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_3_operand" "n")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 5 "const_0_to_3_operand" "n,n")]
|
||||
MMA_APVI4I2))]
|
||||
"TARGET_MMA"
|
||||
"<apvi4i2> %A0,%x2,%x3,%4,%5"
|
||||
|
@ -632,12 +632,12 @@
|
|||
(set_attr "prefixed" "yes")])
|
||||
|
||||
(define_insn "mma_<vvi4i4i4>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n,n")]
|
||||
MMA_VVI4I4I4))]
|
||||
"TARGET_MMA"
|
||||
"<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
|
||||
|
@ -645,13 +645,13 @@
|
|||
(set_attr "prefixed" "yes")])
|
||||
|
||||
(define_insn "mma_<avvi4i4i4>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 6 "const_0_to_15_operand" "n")]
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n,n")
|
||||
(match_operand:SI 6 "const_0_to_15_operand" "n,n")]
|
||||
MMA_AVVI4I4I4))]
|
||||
"TARGET_MMA"
|
||||
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
|
||||
|
|
Loading…
Reference in New Issue