sh-mem.cc (sh_expand_cmpnstr, [...]): Factorize probabilities...

2013-11-06  Christian Bruel  <christian.bruel@st.com>

	* gcc/config/sh/sh-mem.cc (sh_expand_cmpnstr, sh_expand_cmpstr):
	Factorize probabilities, Use adjust_address instead of
	adjust_automodify_address when possible. Enable for optimize.
	(sh_expand_strlen): New function.
	* gcc/config/sh/sh-protos.h (sh_expand_strlen): Declare.
	* gcc/config/sh/sh.md (strlensi): New pattern.
	(UNSPEC_BUILTIN_STRLEN): Define.

From-SVN: r204445
This commit is contained in:
Christian Bruel 2013-11-06 09:49:15 +01:00 committed by Christian Bruel
parent fda1ba0aea
commit 3a1a7897b7
6 changed files with 187 additions and 61 deletions

View File

@ -1,3 +1,13 @@
2013-11-06 Christian Bruel <christian.bruel@st.com>
* gcc/config/sh/sh-mem.cc (sh_expand_cmpnstr, sh_expand_cmpstr):
Factorize probabilities, Use adjust_address instead of
adjust_automodify_address when possible. Enable for optimize.
(sh_expand_strlen): New function.
* gcc/config/sh/sh-protos.h (sh_expand_strlen): Declare.
* gcc/config/sh/sh.md (strlensi): New pattern.
(UNSPEC_BUILTIN_STRLEN): Define.
2013-11-06 Jakub Jelinek <jakub@redhat.com>
PR middle-end/58970

View File

@ -179,32 +179,31 @@ expand_block_move (rtx *operands)
return false;
}
static int prob_unlikely = REG_BR_PROB_BASE / 10;
static int prob_likely = REG_BR_PROB_BASE / 4;
/* Emit code to perform a strcmp.
OPERANDS[0] is the destination.
OPERANDS[1] is the first string.
OPERANDS[2] is the second string.
OPERANDS[3] is the align. */
OPERANDS[3] is the known alignment. */
bool
sh_expand_cmpstr (rtx *operands)
{
rtx s1 = copy_rtx (operands[1]);
rtx s2 = copy_rtx (operands[2]);
rtx s1_addr = copy_addr_to_reg (XEXP (s1, 0));
rtx s2_addr = copy_addr_to_reg (XEXP (s2, 0));
rtx addr1 = operands[1];
rtx addr2 = operands[2];
rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
rtx tmp0 = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (SImode);
rtx tmp2 = gen_reg_rtx (SImode);
rtx tmp3 = gen_reg_rtx (SImode);
rtx jump;
rtx L_return = gen_label_rtx ();
rtx L_loop_byte = gen_label_rtx ();
rtx L_end_loop_byte = gen_label_rtx ();
rtx jump, addr1, addr2;
int prob_unlikely = REG_BR_PROB_BASE / 10;
int prob_likely = REG_BR_PROB_BASE / 4;
rtx L_loop_long = gen_label_rtx ();
rtx L_end_loop_long = gen_label_rtx ();
@ -220,8 +219,8 @@ sh_expand_cmpstr (rtx *operands)
add_int_reg_note (jump, REG_BR_PROB, prob_likely);
}
addr1 = adjust_automodify_address (s1, SImode, s1_addr, 0);
addr2 = adjust_automodify_address (s2, SImode, s2_addr, 0);
addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
/* tmp2 is aligned, OK to load. */
emit_move_insn (tmp3, addr2);
@ -276,8 +275,8 @@ sh_expand_cmpstr (rtx *operands)
emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
/* start byte loop. */
addr1 = adjust_automodify_address (s1, QImode, s1_addr, 0);
addr2 = adjust_automodify_address (s2, QImode, s2_addr, 0);
addr1 = adjust_address (addr1, QImode, 0);
addr2 = adjust_address (addr2, QImode, 0);
emit_label (L_loop_byte);
@ -317,27 +316,23 @@ sh_expand_cmpstr (rtx *operands)
OPERANDS[1] is the first string.
OPERANDS[2] is the second string.
OPERANDS[3] is the length.
OPERANDS[4] is the align. */
OPERANDS[4] is the known alignment. */
bool
sh_expand_cmpnstr (rtx *operands)
{
rtx s1 = copy_rtx (operands[1]);
rtx s2 = copy_rtx (operands[2]);
rtx s1_addr = copy_addr_to_reg (XEXP (s1, 0));
rtx s2_addr = copy_addr_to_reg (XEXP (s2, 0));
rtx addr1 = operands[1];
rtx addr2 = operands[2];
rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
rtx tmp0 = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (SImode);
rtx tmp2 = gen_reg_rtx (SImode);
rtx jump;
rtx L_return = gen_label_rtx ();
rtx L_loop_byte = gen_label_rtx ();
rtx L_end_loop_byte = gen_label_rtx ();
rtx jump, addr1, addr2;
int prob_unlikely = REG_BR_PROB_BASE / 10;
int prob_likely = REG_BR_PROB_BASE / 4;
rtx len = force_reg (SImode, operands[3]);
int constp = (CONST_INT_P (operands[3]));
int bytes = (constp ? INTVAL (operands[3]) : 0);
@ -366,10 +361,10 @@ sh_expand_cmpnstr (rtx *operands)
add_int_reg_note (jump, REG_BR_PROB, prob_likely);
}
addr1 = adjust_automodify_address (s1, SImode, s1_addr, 0);
addr2 = adjust_automodify_address (s2, SImode, s2_addr, 0);
addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
/* words count. */
/* word count. Do we have iterations ? */
emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
/*start long loop. */
@ -429,48 +424,128 @@ sh_expand_cmpnstr (rtx *operands)
emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
}
addr1 = adjust_automodify_address (s1, QImode, s1_addr, 0);
addr2 = adjust_automodify_address (s2, QImode, s2_addr, 0);
addr1 = adjust_address (addr1, QImode, 0);
addr2 = adjust_address (addr2, QImode, 0);
emit_label (L_loop_byte);
emit_label (L_loop_byte);
emit_insn (gen_extendqisi2 (tmp2, addr2));
emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
emit_insn (gen_extendqisi2 (tmp2, addr2));
emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
emit_insn (gen_extendqisi2 (tmp1, addr1));
emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
emit_insn (gen_extendqisi2 (tmp1, addr1));
emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
if (flag_delayed_branch)
emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
if (flag_delayed_branch)
emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
if (TARGET_SH2)
emit_insn (gen_dect (len, len));
else
{
emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
emit_insn (gen_tstsi_t (len, len));
}
if (TARGET_SH2)
emit_insn (gen_dect (len, len));
else
{
emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
emit_insn (gen_tstsi_t (len, len));
}
jump = emit_jump_insn (gen_branch_false (L_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_likely);
/* end byte loop. */
jump = emit_jump_insn (gen_branch_false (L_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_likely);
/* end byte loop. */
emit_label (L_end_loop_byte);
emit_label (L_end_loop_byte);
if (! flag_delayed_branch)
emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
if (! flag_delayed_branch)
emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
emit_label (L_return);
emit_label (L_return);
emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
return true;
return true;
}
/* Emit code to perform a strlen
OPERANDS[0] is the destination.
OPERANDS[1] is the string.
OPERANDS[2] is the char to search.
OPERANDS[3] is the alignment. */
bool
sh_expand_strlen (rtx *operands)
{
rtx addr1 = operands[1];
rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
rtx start_addr = gen_reg_rtx (Pmode);
rtx tmp0 = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (SImode);
rtx L_return = gen_label_rtx ();
rtx L_loop_byte = gen_label_rtx ();
rtx jump;
rtx L_loop_long = gen_label_rtx ();
rtx L_end_loop_long = gen_label_rtx ();
int align = INTVAL (operands[3]);
emit_move_insn (operands[0], GEN_INT (-1));
/* remember start of string. */
emit_move_insn (start_addr, current_addr);
if (align < 4)
{
emit_insn (gen_tstsi_t (GEN_INT (3), current_addr));
jump = emit_jump_insn (gen_branch_false (L_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_likely);
}
emit_move_insn (tmp0, operands[2]);
addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
/*start long loop. */
emit_label (L_loop_long);
/* tmp1 is aligned, OK to load. */
emit_move_insn (tmp1, addr1);
emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
/* Is there a 0 byte ? */
emit_insn (gen_cmpstr_t (tmp0, tmp1));
jump = emit_jump_insn (gen_branch_false (L_loop_long));
add_int_reg_note (jump, REG_BR_PROB, prob_likely);
/* end loop. */
emit_label (L_end_loop_long);
emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
/* start byte loop. */
addr1 = adjust_address (addr1, QImode, 0);
emit_label (L_loop_byte);
emit_insn (gen_extendqisi2 (tmp1, addr1));
emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
jump = emit_jump_insn (gen_branch_false (L_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_likely);
/* end loop. */
emit_label (L_return);
emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
return true;
}

View File

@ -118,6 +118,7 @@ extern bool expand_block_move (rtx *);
extern void prepare_move_operands (rtx[], enum machine_mode mode);
extern bool sh_expand_cmpstr (rtx *);
extern bool sh_expand_cmpnstr (rtx *);
extern bool sh_expand_strlen (rtx *);
extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode,
enum rtx_code comparison);
extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int);

View File

@ -161,6 +161,9 @@
;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
(UNSPEC_PCREL_SYMOFF 46)
;; Misc builtins
(UNSPEC_BUILTIN_STRLEN 47)
;; These are used with unspec_volatile.
(UNSPECV_BLOCKAGE 0)
(UNSPECV_ALIGN 1)
@ -12059,7 +12062,7 @@ label:
(compare:SI (match_operand:BLK 1 "memory_operand")
(match_operand:BLK 2 "memory_operand")))
(use (match_operand 3 "immediate_operand"))]
"TARGET_SH1"
"TARGET_SH1 && optimize"
{
if (! optimize_insn_for_size_p () && sh_expand_cmpstr (operands))
DONE;
@ -12073,7 +12076,7 @@ label:
(match_operand:BLK 2 "memory_operand")))
(use (match_operand:SI 3 "immediate_operand"))
(use (match_operand:SI 4 "immediate_operand"))]
"TARGET_SH1"
"TARGET_SH1 && optimize"
{
if (! optimize_insn_for_size_p () && sh_expand_cmpnstr (operands))
DONE;
@ -12081,6 +12084,20 @@ label:
FAIL;
})
(define_expand "strlensi"
[(set (match_operand:SI 0 "register_operand")
(unspec:SI [(match_operand:BLK 1 "memory_operand")
(match_operand:SI 2 "immediate_operand")
(match_operand:SI 3 "immediate_operand")]
UNSPEC_BUILTIN_STRLEN))]
"TARGET_SH1 && optimize"
{
if (! optimize_insn_for_size_p () && sh_expand_strlen (operands))
DONE;
else
FAIL;
})
;; -------------------------------------------------------------------------
;; Floating point instructions.

View File

@ -1,3 +1,7 @@
2013-11-06 Christian Bruel <christian.bruel@st.com>
* gcc.target/sh/strlen.c: New test.
2013-11-06 Jakub Jelinek <jakub@redhat.com>
PR middle-end/58970

View File

@ -0,0 +1,19 @@
/* Check that the __builtin_strlen function is inlined with cmp/str
when optimizing for speed. */
/* { dg-do compile { target "sh*-*-*" } } */
/* { dg-options "-O2" } */
/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */
/* { dg-final { scan-assembler-not "jmp" } } */
/* { dg-final { scan-assembler-times "cmp/str" 2 } } */
/* { dg-final { scan-assembler-times "tst\t#3" 1 } } */
test00 (const char *s1)
{
return __builtin_strlen (s1);
}
/* Check that no test for alignment is needed. */
test03(const char *s1)
{
return __builtin_strlen (__builtin_assume_aligned (s1, 4));
}