[AArch64] Improve register allocation of fma

This patch improves register allocation of fma by preferring to update the
accumulator register.  This is done by adding fma insns with operand 1 as the
accumulator.  The register allocator considers copy preferences only in operand
order, so if the first operand is dead, it has the highest chance of being
reused as the destination.  As a result code using fma often has a better
register allocation.  Performance of SPECFP2017 improves by over 0.5% on some
implementations, while it had no effect on other implementations.  Fma is more
readable too, in a simple example we now generate:

	fmadd	s16, s2, s1, s16
	fmadd	s7, s17, s16, s7
	fmadd	s6, s16, s7, s6
	fmadd	s5, s7, s6, s5

instead of:

	fmadd	s16, s16, s2, s1
	fmadd	s7, s7, s16, s6
	fmadd	s6, s6, s7, s5
	fmadd	s5, s5, s6, s4

    gcc/
	* config/aarch64/aarch64.md (fma<mode>4): Change into expand pattern.
	(fnma<mode>4): Likewise.
	(fms<mode>4): Likewise.
	(fnms<mode>4): Likewise.
	(aarch64_fma<mode>4): Rename insn, reorder accumulator operand.
	(aarch64_fnma<mode>4): Likewise.
	(aarch64_fms<mode>4): Likewise.
	(aarch64_fnms<mode>4): Likewise.
	(aarch64_fnmadd<mode>4): Likewise.

From-SVN: r260292
This commit is contained in:
Wilco Dijkstra 2018-05-16 14:33:16 +00:00 committed by Wilco Dijkstra
parent df0fc585b7
commit d6e6e8b677
2 changed files with 78 additions and 29 deletions

View File

@ -1,3 +1,15 @@
2018-05-16 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.md (fma<mode>4): Change into expand pattern.
(fnma<mode>4): Likewise.
(fms<mode>4): Likewise.
(fnms<mode>4): Likewise.
(aarch64_fma<mode>4): Rename insn, reorder accumulator operand.
(aarch64_fnma<mode>4): Likewise.
(aarch64_fms<mode>4): Likewise.
(aarch64_fnms<mode>4): Likewise.
(aarch64_fnmadd<mode>4): Likewise.
2018-05-16 Jason Merrill <jason@redhat.com>
* tree.c (warn_deprecated_use): Return bool. Simplify logic.

View File

@ -4973,57 +4973,94 @@
[(set_attr "type" "f_cvtf2i")]
)
;; fma - no throw
;; fma - expand fma into patterns with the accumulator operand first since
;; reusing the accumulator results in better register allocation.
;; The register allocator considers copy preferences in operand order,
;; so this prefers fmadd s0, s1, s2, s0 over fmadd s1, s1, s2, s0.
(define_insn "fma<mode>4"
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
(fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")
(match_operand:GPF_F16 2 "register_operand" "w")
(match_operand:GPF_F16 3 "register_operand" "w")))]
(define_expand "fma<mode>4"
[(set (match_operand:GPF_F16 0 "register_operand")
(fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand")
(match_operand:GPF_F16 2 "register_operand")
(match_operand:GPF_F16 3 "register_operand")))]
"TARGET_FLOAT"
"fmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
)
(define_insn "*aarch64_fma<mode>4"
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
(fma:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w")
(match_operand:GPF_F16 3 "register_operand" "w")
(match_operand:GPF_F16 1 "register_operand" "w")))]
"TARGET_FLOAT"
"fmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
[(set_attr "type" "fmac<stype>")]
)
(define_insn "fnma<mode>4"
(define_expand "fnma<mode>4"
[(set (match_operand:GPF_F16 0 "register_operand")
(fma:GPF_F16
(neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand"))
(match_operand:GPF_F16 2 "register_operand")
(match_operand:GPF_F16 3 "register_operand")))]
"TARGET_FLOAT"
)
(define_insn "*aarch64_fnma<mode>4"
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
(fma:GPF_F16
(neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w"))
(match_operand:GPF_F16 2 "register_operand" "w")
(match_operand:GPF_F16 3 "register_operand" "w")))]
(neg:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w"))
(match_operand:GPF_F16 3 "register_operand" "w")
(match_operand:GPF_F16 1 "register_operand" "w")))]
"TARGET_FLOAT"
"fmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
"fmsub\\t%<s>0, %<s>2, %<s>3, %<s>1"
[(set_attr "type" "fmac<stype>")]
)
(define_insn "fms<mode>4"
[(set (match_operand:GPF 0 "register_operand" "=w")
(fma:GPF (match_operand:GPF 1 "register_operand" "w")
(match_operand:GPF 2 "register_operand" "w")
(neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
(define_expand "fms<mode>4"
[(set (match_operand:GPF 0 "register_operand")
(fma:GPF (match_operand:GPF 1 "register_operand")
(match_operand:GPF 2 "register_operand")
(neg:GPF (match_operand:GPF 3 "register_operand"))))]
"TARGET_FLOAT"
"fnmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
)
(define_insn "*aarch64_fms<mode>4"
[(set (match_operand:GPF 0 "register_operand" "=w")
(fma:GPF (match_operand:GPF 2 "register_operand" "w")
(match_operand:GPF 3 "register_operand" "w")
(neg:GPF (match_operand:GPF 1 "register_operand" "w"))))]
"TARGET_FLOAT"
"fnmsub\\t%<s>0, %<s>2, %<s>3, %<s>1"
[(set_attr "type" "fmac<s>")]
)
(define_insn "fnms<mode>4"
[(set (match_operand:GPF 0 "register_operand" "=w")
(fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w"))
(match_operand:GPF 2 "register_operand" "w")
(neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
(define_expand "fnms<mode>4"
[(set (match_operand:GPF 0 "register_operand")
(fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand"))
(match_operand:GPF 2 "register_operand")
(neg:GPF (match_operand:GPF 3 "register_operand"))))]
"TARGET_FLOAT"
"fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
)
(define_insn "*aarch64_fnms<mode>4"
[(set (match_operand:GPF 0 "register_operand" "=w")
(fma:GPF (neg:GPF (match_operand:GPF 2 "register_operand" "w"))
(match_operand:GPF 3 "register_operand" "w")
(neg:GPF (match_operand:GPF 1 "register_operand" "w"))))]
"TARGET_FLOAT"
"fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
[(set_attr "type" "fmac<s>")]
)
;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
(define_insn "*fnmadd<mode>4"
(define_insn "*aarch64_fnmadd<mode>4"
[(set (match_operand:GPF 0 "register_operand" "=w")
(neg:GPF (fma:GPF (match_operand:GPF 1 "register_operand" "w")
(match_operand:GPF 2 "register_operand" "w")
(match_operand:GPF 3 "register_operand" "w"))))]
(neg:GPF (fma:GPF (match_operand:GPF 2 "register_operand" "w")
(match_operand:GPF 3 "register_operand" "w")
(match_operand:GPF 1 "register_operand" "w"))))]
"!HONOR_SIGNED_ZEROS (<MODE>mode) && TARGET_FLOAT"
"fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
"fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
[(set_attr "type" "fmac<s>")]
)