re PR target/83831 ([RX] Unused bclr,bnot,bset insns)

gcc/
	PR target/83831
	* config/rx/rx-protos.h (rx_reg_dead_or_unused_after_insn,
	rx_copy_reg_dead_or_unused_notes, rx_fuse_in_memory_bitop): New
	declarations.
	(set_of_reg): New struct.
	(rx_find_set_of_reg, rx_find_use_of_reg): New functions.
	* config/rx/rx.c (rx_reg_dead_or_unused_after_insn,
	rx_copy_reg_dead_or_unused_notes, rx_fuse_in_memory_bitop): New
	functions.
	* config/rx/rx.md (andsi3, iorsi3, xorsi3): Convert to insn_and_split.
	Split into bitclr, bitset, bitinvert patterns if appropriate.
	(*bitset, *bitinvert, *bitclr): Convert to named insn_and_split and
	use rx_fuse_in_memory_bitop.
	(*bitset_in_memory, *bitinvert_in_memory, *bitclr_in_memory): Convert
	to named insn, correct maximum insn length.

gcc/testsuite/
	PR target/83831
	* gcc.target/rx/pr83831.c: New tests.

From-SVN: r257655
This commit is contained in:
Oleg Endo 2018-02-14 12:33:37 +00:00
parent d1fe612461
commit 06e972705d
6 changed files with 376 additions and 22 deletions

View File

@ -1,3 +1,21 @@
2018-02-14 Oleg Endo <olegendo@gcc.gnu.org>
PR target/83831
* config/rx/rx-protos.h (rx_reg_dead_or_unused_after_insn,
rx_copy_reg_dead_or_unused_notes, rx_fuse_in_memory_bitop): New
declarations.
(set_of_reg): New struct.
(rx_find_set_of_reg, rx_find_use_of_reg): New functions.
* config/rx/rx.c (rx_reg_dead_or_unused_after_insn,
rx_copy_reg_dead_or_unused_notes, rx_fuse_in_memory_bitop): New
functions.
* config/rx/rx.md (andsi3, iorsi3, xorsi3): Convert to insn_and_split.
Split into bitclr, bitset, bitinvert patterns if appropriate.
(*bitset, *bitinvert, *bitclr): Convert to named insn_and_split and
use rx_fuse_in_memory_bitop.
(*bitset_in_memory, *bitinvert_in_memory, *bitclr_in_memory): Convert
to named insn, correct maximum insn length.
2018-02-14 Jozef Lawrynowicz <jozefl.gcc@gmail.com> 2018-02-14 Jozef Lawrynowicz <jozefl.gcc@gmail.com>
PR target/79242 PR target/79242

View File

@ -63,6 +63,112 @@ extern void rx_notice_update_cc (rtx, rtx);
extern void rx_split_cbranch (machine_mode, enum rtx_code, extern void rx_split_cbranch (machine_mode, enum rtx_code,
rtx, rtx, rtx); rtx, rtx, rtx);
extern machine_mode rx_select_cc_mode (enum rtx_code, rtx, rtx); extern machine_mode rx_select_cc_mode (enum rtx_code, rtx, rtx);
extern bool rx_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno);
extern void rx_copy_reg_dead_or_unused_notes (rtx reg, const rtx_insn* src,
rtx_insn* dst);
extern bool rx_fuse_in_memory_bitop (rtx* operands, rtx_insn* curr_insn,
rtx (*gen_insn)(rtx, rtx));
/* Result value of rx_find_set_of_reg. */
struct set_of_reg
{
/* The insn where sh_find_set_of_reg stopped looking.
Can be NULL_RTX if the end of the insn list was reached. */
rtx_insn* insn;
/* The set rtx of the specified reg if found, NULL_RTX otherwise. */
const_rtx set_rtx;
/* The set source rtx of the specified reg if found, NULL_RTX otherwise.
Usually, this is the most interesting return value. */
rtx set_src;
};
/* FIXME: Copy-pasta from SH. Move to rtl.h.
Given a reg rtx and a start insn, try to find the insn that sets
the specified reg by using the specified insn stepping function,
such as 'prev_nonnote_nondebug_insn_bb'. When the insn is found,
try to extract the rtx of the reg set. */
template <typename F> inline set_of_reg
rx_find_set_of_reg (rtx reg, rtx_insn* insn, F stepfunc,
bool ignore_reg_reg_copies = false)
{
set_of_reg result;
result.insn = insn;
result.set_rtx = NULL_RTX;
result.set_src = NULL_RTX;
if (!REG_P (reg) || insn == NULL_RTX)
return result;
for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i))
{
if (BARRIER_P (i))
break;
if (!INSN_P (i) || DEBUG_INSN_P (i))
continue;
if (reg_set_p (reg, i))
{
if (CALL_P (i))
break;
result.insn = i;
result.set_rtx = set_of (reg, i);
if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
break;
result.set_src = XEXP (result.set_rtx, 1);
if (ignore_reg_reg_copies && REG_P (result.set_src))
{
reg = result.set_src;
continue;
}
if (ignore_reg_reg_copies && SUBREG_P (result.set_src)
&& REG_P (SUBREG_REG (result.set_src)))
{
reg = SUBREG_REG (result.set_src);
continue;
}
break;
}
}
/* If the searched reg is found inside a (mem (post_inc:SI (reg))), set_of
will return NULL and set_rtx will be NULL.
In this case report a 'not found'. result.insn will always be non-null
at this point, so no need to check it. */
if (result.set_src != NULL && result.set_rtx == NULL)
result.set_src = NULL;
return result;
}
/* FIXME: Move to rtlh.h. */
template <typename F> inline rtx_insn*
rx_find_use_of_reg (rtx reg, rtx_insn* insn, F stepfunc)
{
if (!REG_P (reg) || insn == NULL_RTX)
return NULL;
for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i))
{
if (BARRIER_P (i))
break;
if (!INSN_P (i) || DEBUG_INSN_P (i))
continue;
if (reg_overlap_mentioned_p (reg, PATTERN (i))
|| (CALL_P (i) && find_reg_fusage (i, USE, reg)))
return i;
}
return NULL;
}
#endif #endif
#endif /* GCC_RX_PROTOS_H */ #endif /* GCC_RX_PROTOS_H */

View File

@ -3439,6 +3439,88 @@ rx_atomic_sequence::~rx_atomic_sequence (void)
emit_insn (gen_mvtc (GEN_INT (CTRLREG_PSW), m_prev_psw_reg)); emit_insn (gen_mvtc (GEN_INT (CTRLREG_PSW), m_prev_psw_reg));
} }
/* Given an insn and a reg number, tell whether the reg dies or is unused
after the insn. */
bool
rx_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
{
return find_regno_note (i, REG_DEAD, regno) != NULL
|| find_regno_note (i, REG_UNUSED, regno) != NULL;
}
/* Copy dead and unused notes from SRC to DST for the specified REGNO. */
void
rx_copy_reg_dead_or_unused_notes (rtx reg, const rtx_insn* src, rtx_insn* dst)
{
int regno = REGNO (SUBREG_P (reg) ? SUBREG_REG (reg) : reg);
if (rtx note = find_regno_note (src, REG_DEAD, regno))
add_shallow_copy_of_reg_note (dst, note);
if (rtx note = find_regno_note (src, REG_UNUSED, regno))
add_shallow_copy_of_reg_note (dst, note);
}
/* Try to fuse the current bit-operation insn with the surrounding memory load
and store. */
bool
rx_fuse_in_memory_bitop (rtx* operands, rtx_insn* curr_insn,
rtx (*gen_insn)(rtx, rtx))
{
rtx op2_reg = SUBREG_P (operands[2]) ? SUBREG_REG (operands[2]) : operands[2];
set_of_reg op2_def = rx_find_set_of_reg (op2_reg, curr_insn,
prev_nonnote_nondebug_insn_bb);
if (op2_def.set_src == NULL_RTX
|| !MEM_P (op2_def.set_src)
|| GET_MODE (op2_def.set_src) != QImode
|| !rx_is_restricted_memory_address (XEXP (op2_def.set_src, 0),
GET_MODE (op2_def.set_src))
|| reg_used_between_p (operands[2], op2_def.insn, curr_insn)
|| !rx_reg_dead_or_unused_after_insn (curr_insn, REGNO (op2_reg))
)
return false;
/* The register operand originates from a memory load and the memory load
could be fused with the bitop insn.
Look for the following memory store with the same memory operand. */
rtx mem = op2_def.set_src;
/* If the memory is an auto-mod address, it can't be fused. */
if (GET_CODE (XEXP (mem, 0)) == POST_INC
|| GET_CODE (XEXP (mem, 0)) == PRE_INC
|| GET_CODE (XEXP (mem, 0)) == POST_DEC
|| GET_CODE (XEXP (mem, 0)) == PRE_DEC)
return false;
rtx_insn* op0_use = rx_find_use_of_reg (operands[0], curr_insn,
next_nonnote_nondebug_insn_bb);
if (op0_use == NULL
|| !(GET_CODE (PATTERN (op0_use)) == SET
&& RX_REG_P (XEXP (PATTERN (op0_use), 1))
&& reg_overlap_mentioned_p (operands[0], XEXP (PATTERN (op0_use), 1))
&& rtx_equal_p (mem, XEXP (PATTERN (op0_use), 0)))
|| !rx_reg_dead_or_unused_after_insn (op0_use, REGNO (operands[0]))
|| reg_set_between_p (operands[2], curr_insn, op0_use))
return false;
/* If the load-modify-store operation is fused it could potentially modify
load/store ordering if there are other memory accesses between the load
and the store for this insn. If there are volatile mems between the load
and store it's better not to change the ordering. If there is a call
between the load and store, it's also not safe to fuse it. */
for (rtx_insn* i = next_nonnote_nondebug_insn_bb (op2_def.insn);
i != NULL && i != op0_use;
i = next_nonnote_nondebug_insn_bb (i))
if (volatile_insn_p (PATTERN (i)) || CALL_P (i))
return false;
emit_insn (gen_insn (mem, operands[1]));
set_insn_deleted (op2_def.insn);
set_insn_deleted (op0_use);
return true;
}
/* Implement TARGET_HARD_REGNO_NREGS. */ /* Implement TARGET_HARD_REGNO_NREGS. */
static unsigned int static unsigned int

View File

@ -1094,7 +1094,7 @@
DONE; DONE;
}) })
(define_insn "andsi3" (define_insn_and_split "andsi3"
[(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r,r") [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r,r")
(and:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0") (and:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
(match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q"))) (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
@ -1110,6 +1110,21 @@
and\t%1, %0 and\t%1, %0
and\t%2, %1, %0 and\t%2, %1, %0
and\t%Q2, %0" and\t%Q2, %0"
"&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2])
&& pow2p_hwi (~UINTVAL (operands[2]))"
[(const_int 0)]
{
/* For negated single bit constants use the bclr insn for smaller code. */
if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG))
FAIL;
rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn,
emit_insn (gen_bitclr (operands[0],
GEN_INT (exact_log2 (~UINTVAL (operands[2]))),
operands[1])));
DONE;
}
[(set_attr "timings" "11,11,11,11,11,11,11,11,33") [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
(set_attr "length" "2,2,3,4,5,6,2,5,5")] (set_attr "length" "2,2,3,4,5,6,2,5,5")]
) )
@ -1383,7 +1398,7 @@
[(set_attr "length" "2,3")] [(set_attr "length" "2,3")]
) )
(define_insn "iorsi3" (define_insn_and_split "iorsi3"
[(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r,r") [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r,r")
(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0") (ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
(match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q"))) (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
@ -1399,6 +1414,21 @@
or\t%1, %0 or\t%1, %0
or\t%2, %1, %0 or\t%2, %1, %0
or\t%Q2, %0" or\t%Q2, %0"
"&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2])
&& pow2p_hwi (UINTVAL (operands[2]))"
[(const_int 0)]
{
/* For single bit constants use the bset insn for smaller code. */
if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG))
FAIL;
rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn,
emit_insn (gen_bitset (operands[0],
GEN_INT (exact_log2 (UINTVAL (operands[2]))),
operands[1])));
DONE;
}
[(set_attr "timings" "11,11,11,11,11,11,11,11,33") [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
(set_attr "length" "2,2,3,4,5,6,2,3,5")] (set_attr "length" "2,2,3,4,5,6,2,3,5")]
) )
@ -1704,7 +1734,7 @@
DONE; DONE;
}) })
(define_insn "xorsi3" (define_insn_and_split "xorsi3"
[(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r") [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r")
(xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0") (xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
(match_operand:SI 2 "rx_source_operand" (match_operand:SI 2 "rx_source_operand"
@ -1712,6 +1742,21 @@
(clobber (reg:CC CC_REG))] (clobber (reg:CC CC_REG))]
"" ""
"xor\t%Q2, %0" "xor\t%Q2, %0"
"&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2])
&& pow2p_hwi (UINTVAL (operands[2]))"
[(const_int 0)]
{
/* For single bit constants use the bnot insn for smaller code. */
if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG))
FAIL;
rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn,
emit_insn (gen_bitinvert (operands[0],
GEN_INT (exact_log2 (UINTVAL (operands[2]))),
operands[1])));
DONE;
}
[(set_attr "timings" "11,11,11,11,11,33") [(set_attr "timings" "11,11,11,11,11,33")
(set_attr "length" "3,4,5,6,7,6")] (set_attr "length" "3,4,5,6,7,6")]
) )
@ -1960,50 +2005,63 @@
;; Bit manipulation instructions. ;; Bit manipulation instructions.
;; ??? The *_in_memory patterns will not be matched without further help. ;; The *_in_memory patterns will not be matched automatically, not even with
;; At one time we had the insv expander generate them, but I suspect that ;; combiner bridge patterns. Especially when the memory operands have a
;; in general we get better performance by exposing the register load to ;; displacement, the resulting patterns look too complex.
;; the optimizers. ;; Instead we manually look around the matched insn to see if there is a
;; ;; preceeding memory load and a following memory store of the modified register
;; An alternate solution would be to re-organize these patterns such ;; which can be fused into the single *_in_memory insn.
;; that allow both register and memory operands. This would allow the ;; Do that before register allocation, as it can eliminate one temporary
;; register allocator to spill and not load the register operand. This ;; register that needs to be allocated.
;; would be possible only for operations for which we have a constant
;; bit offset, so that we can adjust the address by ofs/8 and replace
;; the offset in the insn by ofs%8.
(define_insn "*bitset" (define_insn_and_split "bitset"
[(set (match_operand:SI 0 "register_operand" "=r") [(set (match_operand:SI 0 "register_operand" "=r")
(ior:SI (ashift:SI (const_int 1) (ior:SI (ashift:SI (const_int 1)
(match_operand:SI 1 "rx_shift_operand" "ri")) (match_operand:SI 1 "rx_shift_operand" "ri"))
(match_operand:SI 2 "register_operand" "0")))] (match_operand:SI 2 "register_operand" "0")))]
"" ""
"bset\t%1, %0" "bset\t%1, %0"
"&& can_create_pseudo_p ()"
[(const_int 0)]
{
if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitset_in_memory))
DONE;
else
FAIL;
}
[(set_attr "length" "3")] [(set_attr "length" "3")]
) )
(define_insn "*bitset_in_memory" (define_insn "bitset_in_memory"
[(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q") [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
(ior:QI (ashift:QI (const_int 1) (ior:QI (ashift:QI (const_int 1)
(match_operand:QI 1 "nonmemory_operand" "ri")) (match_operand:QI 1 "nonmemory_operand" "ri"))
(match_dup 0)))] (match_dup 0)))]
"" ""
"bset\t%1, %0.B" "bset\t%1, %0.B"
[(set_attr "length" "3") [(set_attr "length" "5")
(set_attr "timings" "33")] (set_attr "timings" "33")]
) )
(define_insn "*bitinvert" (define_insn_and_split "bitinvert"
[(set (match_operand:SI 0 "register_operand" "=r") [(set (match_operand:SI 0 "register_operand" "=r")
(xor:SI (ashift:SI (const_int 1) (xor:SI (ashift:SI (const_int 1)
(match_operand:SI 1 "rx_shift_operand" "ri")) (match_operand:SI 1 "rx_shift_operand" "ri"))
(match_operand:SI 2 "register_operand" "0")))] (match_operand:SI 2 "register_operand" "0")))]
"" ""
"bnot\t%1, %0" "bnot\t%1, %0"
"&& can_create_pseudo_p ()"
[(const_int 0)]
{
if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitinvert_in_memory))
DONE;
else
FAIL;
}
[(set_attr "length" "3")] [(set_attr "length" "3")]
) )
(define_insn "*bitinvert_in_memory" (define_insn "bitinvert_in_memory"
[(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q") [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
(xor:QI (ashift:QI (const_int 1) (xor:QI (ashift:QI (const_int 1)
(match_operand:QI 1 "nonmemory_operand" "ri")) (match_operand:QI 1 "nonmemory_operand" "ri"))
@ -2014,7 +2072,7 @@
(set_attr "timings" "33")] (set_attr "timings" "33")]
) )
(define_insn "*bitclr" (define_insn_and_split "bitclr"
[(set (match_operand:SI 0 "register_operand" "=r") [(set (match_operand:SI 0 "register_operand" "=r")
(and:SI (not:SI (and:SI (not:SI
(ashift:SI (ashift:SI
@ -2023,10 +2081,18 @@
(match_operand:SI 2 "register_operand" "0")))] (match_operand:SI 2 "register_operand" "0")))]
"" ""
"bclr\t%1, %0" "bclr\t%1, %0"
"&& can_create_pseudo_p ()"
[(const_int 0)]
{
if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitclr_in_memory))
DONE;
else
FAIL;
}
[(set_attr "length" "3")] [(set_attr "length" "3")]
) )
(define_insn "*bitclr_in_memory" (define_insn "bitclr_in_memory"
[(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q") [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
(and:QI (not:QI (and:QI (not:QI
(ashift:QI (ashift:QI
@ -2035,7 +2101,7 @@
(match_dup 0)))] (match_dup 0)))]
"" ""
"bclr\t%1, %0.B" "bclr\t%1, %0.B"
[(set_attr "length" "3") [(set_attr "length" "5")
(set_attr "timings" "33")] (set_attr "timings" "33")]
) )

View File

@ -1,3 +1,8 @@
2018-02-14 Oleg Endo <olegendo@gcc.gnu.org>
PR target/83831
* gcc.target/rx/pr83831.c: New tests.
2018-02-14 Jozef Lawrynowicz <jozefl.gcc@gmail.com> 2018-02-14 Jozef Lawrynowicz <jozefl.gcc@gmail.com>
PR target/79242 PR target/79242

View File

@ -0,0 +1,77 @@
/* { dg-do compile } */
/* { dg-options "-O1" } */
/* { dg-final { scan-assembler-times "bclr" 6 } } */
/* { dg-final { scan-assembler-times "bset" 6 } } */
/* { dg-final { scan-assembler-times "bnot" 6 } } */
void
test_0 (char* x, unsigned int y)
{
/* Expect 4x bclr here. */
x[0] &= 0xFE;
x[1] = y & ~(1 << 1);
x[2] &= 0xFE;
x[65000] &= 0xFE;
}
unsigned int
test_1 (unsigned int x, unsigned int y)
{
/* Expect 1x bclr here. */
return x & ~(1 << y);
}
unsigned int
test_2 (unsigned int x)
{
/* Expect 1x bclr here. */
return x & ~(1 << 1);
}
void
test_3 (char* x, unsigned int y)
{
/* Expect 4x bset here. */
x[0] |= 0x10;
x[1] = y | (1 << 1);
x[2] |= 0x10;
x[65000] |= 0x10;
}
unsigned int
test_4 (unsigned int x, unsigned int y)
{
/* Expect 1x bset here. */
return x | (1 << y);
}
unsigned int
test_5 (unsigned int x)
{
/* Expect 1x bset here. */
return x | (1 << 8);
}
void
test_6 (char* x, unsigned int y)
{
/* Expect 4x bnot here. */
x[0] ^= 0x10;
x[1] = y ^ (1 << 1);
x[2] ^= 0x10;
x[65000] ^= 0x10;
}
unsigned int
test_7 (unsigned int x, unsigned int y)
{
/* Expect 1x bnot here. */
return x ^ (1 << y);
}
unsigned int
test_8 (unsigned int x)
{
/* Expect 1x bnot here. */
return x ^ (1 << 8);
}