cris: try to generate zero-based comparisons
* config/cris/cris.c (cris_reduce_compare): New function. * config/cris/cris-protos.h (cris_reduce_compare): Add prototype. * config/cris/cris.md ("cbranch<mode>4", "cbranchdi4", "cstoredi4") (cstore<mode>4"): Apply cris_reduce_compare in expanders. The decc0ration work of the CRIS port made me look closer at the code for trivial comparisons, as in the condition for branches and conditional-stores, like in: void g(short int a, short int b) { short int c = a + b; if (c >= 0) foo (); } At -O2, the cc0 version of the CRIS port has an explicit *uneliminated* compare instruction ("cmp.w -1,$r10") instead of an (eliminated) compare against 0 (which below I'll call a zero-compare). This for the CRIS-cc0 version, but I see this also for a much older gcc, at 4.7. For the decc0rated port, the compare *is* a test against 0, eventually eliminated. To wit, for cc0 (mind the delay-slot): _g: subq 4,$sp add.w $r11,$r10 cmp.w -1,$r10 ble .L9 move $srp,[$sp] jsr _foo .L9: jump [$sp+] The compare instruction is expected to be eliminated, i.e. the following diff to the above is desired, modulo the missing sibling call, which corresponds to what I get from 4.7 and for the decc0rated port: !--- a Wed Feb 5 15:22:27 2020 !+++ b Wed Feb 5 15:22:51 2020 !@@ -1,8 +1,7 @@ ! _g: ! subq 4,$sp ! add.w $r11,$r10 !- cmp.w -1,$r10 !- ble .L9 !+ bmi .L9 ! move $srp,[$sp] ! ! jsr _foo Tracking this difference, I see that for both cc0-CRIS and the decc0rated CRIS, the comparison actually starts out as a compare against -1 at "expand" time, but is transformed for decc0rated CRIS to a zero-compare in "cse1". For CRIS-cc0 "cse1" does try to replace the compare with a zero-compare, but fails because at the same time it tries to replace the c operand with (a + b). Or some such; it fails and no other pass succeeds. I was not into fixing cc0-handling in core gcc, so I didn't look closer. BTW, at first, I was a bit surprised to see that for compares against a constant, a zero-compare is not canonical RTX for *all* conditions, and that instead only a subset of all RTX conditions against a constant are canonical, transforming one condition to the canonical one by adding 1 or -1 to the constant. It does makes sense at a closer look, but still not so much when emitting RTL. There are several places that mention in comments that emitting RTX as zero-compare is preferable, but nothing is done about it. Some generic code instead seems confused that the *target* is helped by seeing canonical RTX, or perhaps it (its authors) like me, confused about what a canonical comparison is. For example, prepare_cmp_insn calls canonicalize_comparison last before emitting the actual instructions. I see most ports for various port-specific reasons does their own massaging in their cbranch and cstore expanders. Still, the suboptimal compares *should* be fixed at expand time; better start out right than just relying on later optimizations. This kind of change is not acceptable in the current gcc development stage, at least as a change in generic code. However, it's problematic enough that I chose to fix this right now in the CRIS port. For that, I claim a possibly long-standing regression. After this, code before and after decc0ration is similar enough that I can spot compare-elimination-efforts and apply regression test-cases without them drowning in cc0-specific xfailing. I hope to eventually lift out cris_reduce_compare (renamed) into say expmed.c, called in e.g. emit_store_flag_1 (replacing the in-line code) and prepare_cmp_insn. Later.
This commit is contained in:
parent
cf0f07ef0e
commit
cfaf520486
@ -1,3 +1,11 @@
|
||||
2020-02-10 Hans-Peter Nilsson <hp@axis.com>
|
||||
|
||||
Try to generate zero-based comparisons.
|
||||
* config/cris/cris.c (cris_reduce_compare): New function.
|
||||
* config/cris/cris-protos.h (cris_reduce_compare): Add prototype.
|
||||
* config/cris/cris.md ("cbranch<mode>4", "cbranchdi4", "cstoredi4")
|
||||
(cstore<mode>4"): Apply cris_reduce_compare in expanders.
|
||||
|
||||
2020-02-10 Richard Earnshaw <rearnsha@arm.com>
|
||||
|
||||
PR target/91913
|
||||
|
@ -38,6 +38,7 @@ extern bool cris_constant_index_p (const_rtx);
|
||||
extern bool cris_base_p (const_rtx, bool);
|
||||
extern bool cris_base_or_autoincr_p (const_rtx, bool);
|
||||
extern bool cris_bdap_index_p (const_rtx, bool);
|
||||
extern void cris_reduce_compare (rtx *, rtx *, rtx *);
|
||||
extern bool cris_biap_index_p (const_rtx, bool);
|
||||
extern bool cris_legitimate_address_p (machine_mode, rtx, bool);
|
||||
extern bool cris_store_multiple_op_p (rtx);
|
||||
|
@ -3053,6 +3053,63 @@ cris_split_movdx (rtx *operands)
|
||||
return val;
|
||||
}
|
||||
|
||||
/* Try to change a comparison against a constant to be against zero, and
|
||||
an unsigned compare against zero to be an equality test. Beware:
|
||||
only valid for compares of integer-type operands. Also, note that we
|
||||
don't use operand 0 at the moment. */
|
||||
|
||||
void
|
||||
cris_reduce_compare (rtx *relp, rtx *, rtx *op1p)
|
||||
{
|
||||
rtx op1 = *op1p;
|
||||
rtx_code code = GET_CODE (*relp);
|
||||
|
||||
/* Code lifted mostly from emit_store_flag_1. */
|
||||
switch (code)
|
||||
{
|
||||
case LT:
|
||||
if (op1 == const1_rtx)
|
||||
code = LE;
|
||||
break;
|
||||
case LE:
|
||||
if (op1 == constm1_rtx)
|
||||
code = LT;
|
||||
break;
|
||||
case GE:
|
||||
if (op1 == const1_rtx)
|
||||
code = GT;
|
||||
break;
|
||||
case GT:
|
||||
if (op1 == constm1_rtx)
|
||||
code = GE;
|
||||
break;
|
||||
case GEU:
|
||||
if (op1 == const1_rtx)
|
||||
code = NE;
|
||||
break;
|
||||
case LTU:
|
||||
if (op1 == const1_rtx)
|
||||
code = EQ;
|
||||
break;
|
||||
case GTU:
|
||||
if (op1 == const0_rtx)
|
||||
code = NE;
|
||||
break;
|
||||
case LEU:
|
||||
if (op1 == const0_rtx)
|
||||
code = EQ;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (code != GET_CODE (*relp))
|
||||
{
|
||||
*op1p = const0_rtx;
|
||||
PUT_CODE (*relp, code);
|
||||
}
|
||||
}
|
||||
|
||||
/* The expander for the prologue pattern name. */
|
||||
|
||||
void
|
||||
|
@ -3539,7 +3539,7 @@
|
||||
(label_ref (match_operand 3 "" ""))
|
||||
(pc)))]
|
||||
""
|
||||
"")
|
||||
"cris_reduce_compare (&operands[0], &operands[1], &operands[2]);")
|
||||
|
||||
(define_expand "cbranchdi4"
|
||||
[(set (cc0)
|
||||
@ -3552,6 +3552,7 @@
|
||||
(pc)))]
|
||||
""
|
||||
{
|
||||
cris_reduce_compare (&operands[0], &operands[1], &operands[2]);
|
||||
if (TARGET_V32 && !REG_P (operands[1]))
|
||||
operands[1] = force_reg (DImode, operands[1]);
|
||||
if (TARGET_V32 && MEM_P (operands[2]))
|
||||
@ -3652,6 +3653,7 @@
|
||||
[(cc0) (const_int 0)]))]
|
||||
""
|
||||
{
|
||||
cris_reduce_compare (&operands[1], &operands[2], &operands[3]);
|
||||
if (TARGET_V32 && !REG_P (operands[2]))
|
||||
operands[2] = force_reg (DImode, operands[2]);
|
||||
if (TARGET_V32 && MEM_P (operands[3]))
|
||||
@ -3666,7 +3668,7 @@
|
||||
(match_operator:SI 1 "ordered_comparison_operator"
|
||||
[(cc0) (const_int 0)]))]
|
||||
""
|
||||
"")
|
||||
"cris_reduce_compare (&operands[1], &operands[2], &operands[3]);")
|
||||
|
||||
;; Like bCC, we have to check the overflow bit for
|
||||
;; signed conditions.
|
||||
|
Loading…
Reference in New Issue
Block a user