PR target/104345: Use nvptx "set" instruction for cond ? -1 : 0
This patch addresses the "increased register pressure" regression on nvptx-none caused by my change to transition the backend to a STORE_FLAG_VALUE = 1 target. This improved code generation for the more common case of producing 0/1 Boolean values, but unfortunately made things marginally worse when a 0/-1 mask value is desired. Unfortunately, nvptx kernels are extremely sensitive to changes in register usage, which was observable in the reported PR. This patch provides optimizations for -(cond ? 1 : 0), effectively simplify this into cond ? -1 : 0, where these ternary operators are provided by nvptx's selp instruction, and for the specific case of SImode, using (restoring) nvptx's "set" instruction (which avoids the need for a predicate register). This patch has been tested on nvptx-none hosted on x86_64-pc-linux-gnu with a "make" and "make -k check" with no new failures. Unfortunately, the exact register usage of a nvptx kernel depends upon the version of the Cuda drivers being used (and the hardware), but I believe this change should resolve the PR (for Thomas) by improving code generation for the cases that regressed. gcc/ChangeLog: PR target/104345 * config/nvptx/nvptx.md (sel_true<mode>): Fix indentation. (sel_false<mode>): Likewise. (define_code_iterator eqne): New code iterator for EQ and NE. (*selp<mode>_neg_<code>): New define_insn_and_split to optimize the negation of a selp instruction. (*selp<mode>_not_<code>): New define_insn_and_split to optimize the bitwise not of a selp instruction. (*setcc_int<mode>): Use set instruction for neg:SI of a selp. gcc/testsuite/ChangeLog: PR target/104345 * gcc.target/nvptx/neg-selp.c: New test case.
This commit is contained in:
parent
f68c3de7fc
commit
9bacd7af2e
|
@ -977,7 +977,7 @@
|
|||
|
||||
(define_insn "sel_true<mode>"
|
||||
[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
||||
(if_then_else:HSDIM
|
||||
(if_then_else:HSDIM
|
||||
(ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
|
||||
(match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
|
||||
(match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
|
||||
|
@ -986,7 +986,7 @@
|
|||
|
||||
(define_insn "sel_true<mode>"
|
||||
[(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
||||
(if_then_else:SDFM
|
||||
(if_then_else:SDFM
|
||||
(ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
|
||||
(match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
|
||||
(match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
|
||||
|
@ -995,7 +995,7 @@
|
|||
|
||||
(define_insn "sel_false<mode>"
|
||||
[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
||||
(if_then_else:HSDIM
|
||||
(if_then_else:HSDIM
|
||||
(eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
|
||||
(match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
|
||||
(match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
|
||||
|
@ -1004,13 +1004,63 @@
|
|||
|
||||
(define_insn "sel_false<mode>"
|
||||
[(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
||||
(if_then_else:SDFM
|
||||
(if_then_else:SDFM
|
||||
(eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
|
||||
(match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
|
||||
(match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
|
||||
""
|
||||
"%.\\tselp%t0\\t%0, %3, %2, %1;")
|
||||
|
||||
(define_code_iterator eqne [eq ne])
|
||||
|
||||
;; Split negation of a predicate into a conditional move.
|
||||
(define_insn_and_split "*selp<mode>_neg_<code>"
|
||||
[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
||||
(neg:HSDIM (eqne:HSDIM
|
||||
(match_operand:BI 1 "nvptx_register_operand" "R")
|
||||
(const_int 0))))]
|
||||
""
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(if_then_else:HSDIM
|
||||
(eqne (match_dup 1) (const_int 0))
|
||||
(const_int -1)
|
||||
(const_int 0)))])
|
||||
|
||||
;; Split bitwise not of a predicate into a conditional move.
|
||||
(define_insn_and_split "*selp<mode>_not_<code>"
|
||||
[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
||||
(not:HSDIM (eqne:HSDIM
|
||||
(match_operand:BI 1 "nvptx_register_operand" "R")
|
||||
(const_int 0))))]
|
||||
""
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(if_then_else:HSDIM
|
||||
(eqne (match_dup 1) (const_int 0))
|
||||
(const_int -2)
|
||||
(const_int -1)))])
|
||||
|
||||
(define_insn "*setcc_int<mode>"
|
||||
[(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
||||
(neg:SI
|
||||
(match_operator:SI 1 "nvptx_comparison_operator"
|
||||
[(match_operand:HSDIM 2 "nvptx_register_operand" "R")
|
||||
(match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))]
|
||||
""
|
||||
"%.\\tset%t0%c1\\t%0, %2, %3;")
|
||||
|
||||
(define_insn "*setcc_int<mode>"
|
||||
[(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
||||
(neg:SI
|
||||
(match_operator:SI 1 "nvptx_float_comparison_operator"
|
||||
[(match_operand:SDFM 2 "nvptx_register_operand" "R")
|
||||
(match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))]
|
||||
""
|
||||
"%.\\tset%t0%c1\\t%0, %2, %3;")
|
||||
|
||||
(define_insn "setcc_float<mode>"
|
||||
[(set (match_operand:SF 0 "nvptx_register_operand" "=R")
|
||||
(match_operator:SF 1 "nvptx_comparison_operator"
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
int neg(int x, int y)
|
||||
{
|
||||
int t = (x == y) ? 1 : 0;
|
||||
return -t;
|
||||
}
|
||||
|
||||
int not(int x, int y)
|
||||
{
|
||||
int t = (x == y) ? 1 : 0;
|
||||
return ~t;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "neg.s32" } } */
|
||||
/* { dg-final { scan-assembler-not "not.b32" } } */
|
Loading…
Reference in New Issue