rs6000-protos.h (expand_strn_compare): Declare.
* config/rs6000/rs6000-protos.h (expand_strn_compare): Declare. * config/rs6000/rs6000.md (UNSPEC_CMPB): New unspec. (cmpb<mode>3): pattern for generating cmpb. (cmpstrnsi): pattern to expand strncmp (). * config/rs6000/rs6000.opt (mstring-compare-inline-limit): Add a new target option for controlling how much code inline expansion of strncmp() will be allowed to generate. * config/rs6000/rs6000.c (expand_strncmp_align_check): generate code for runtime page crossing check of strncmp () args. (expand_strn_compare): Function to do builtin expansion of strncmp (). From-SVN: r243813
This commit is contained in:
parent
f407722285
commit
e9c2033eff
|
@ -1,3 +1,16 @@
|
||||||
|
2016-12-19 Aaron Sawdey <acsawdey@linux.vnet.ibm.com>
|
||||||
|
|
||||||
|
* config/rs6000/rs6000-protos.h (expand_strn_compare): Declare.
|
||||||
|
* config/rs6000/rs6000.md (UNSPEC_CMPB): New unspec.
|
||||||
|
(cmpb<mode>3): pattern for generating cmpb.
|
||||||
|
(cmpstrnsi): pattern to expand strncmp ().
|
||||||
|
* config/rs6000/rs6000.opt (mstring-compare-inline-limit): Add a new
|
||||||
|
target option for controlling how much code inline expansion of
|
||||||
|
strncmp() will be allowed to generate.
|
||||||
|
* config/rs6000/rs6000.c (expand_strncmp_align_check): generate code
|
||||||
|
for runtime page crossing check of strncmp () args.
|
||||||
|
(expand_strn_compare): Function to do builtin expansion of strncmp ().
|
||||||
|
|
||||||
2016-12-19 David Malcolm <dmalcolm@redhat.com>
|
2016-12-19 David Malcolm <dmalcolm@redhat.com>
|
||||||
|
|
||||||
* print-rtl-function.c (print_rtx_function): Update
|
* print-rtl-function.c (print_rtx_function): Update
|
||||||
|
|
|
@ -78,6 +78,7 @@ extern void rs6000_scale_v2df (rtx, rtx, int);
|
||||||
extern int expand_block_clear (rtx[]);
|
extern int expand_block_clear (rtx[]);
|
||||||
extern int expand_block_move (rtx[]);
|
extern int expand_block_move (rtx[]);
|
||||||
extern bool expand_block_compare (rtx[]);
|
extern bool expand_block_compare (rtx[]);
|
||||||
|
extern bool expand_strn_compare (rtx[]);
|
||||||
extern const char * rs6000_output_load_multiple (rtx[]);
|
extern const char * rs6000_output_load_multiple (rtx[]);
|
||||||
extern bool rs6000_is_valid_mask (rtx, int *, int *, machine_mode);
|
extern bool rs6000_is_valid_mask (rtx, int *, int *, machine_mode);
|
||||||
extern bool rs6000_is_valid_and_mask (rtx, machine_mode);
|
extern bool rs6000_is_valid_and_mask (rtx, machine_mode);
|
||||||
|
|
|
@ -18993,7 +18993,6 @@ rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Figure out the correct instructions to generate to load data for
|
/* Figure out the correct instructions to generate to load data for
|
||||||
block compare. MODE is used for the read from memory, and
|
block compare. MODE is used for the read from memory, and
|
||||||
data is zero extended if REG is wider than MODE. If LE code
|
data is zero extended if REG is wider than MODE. If LE code
|
||||||
|
@ -19430,7 +19429,387 @@ expand_block_compare (rtx operands[])
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Generate alignment check and branch code to set up for
|
||||||
|
strncmp when we don't have DI alignment.
|
||||||
|
STRNCMP_LABEL is the label to branch if there is a page crossing.
|
||||||
|
SRC is the string pointer to be examined.
|
||||||
|
BYTES is the max number of bytes to compare. */
|
||||||
|
static void
|
||||||
|
expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
|
||||||
|
{
|
||||||
|
rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
|
||||||
|
rtx src_check = copy_addr_to_reg (XEXP (src, 0));
|
||||||
|
if (GET_MODE (src_check) == SImode)
|
||||||
|
emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
|
||||||
|
else
|
||||||
|
emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
|
||||||
|
rtx cond = gen_reg_rtx (CCmode);
|
||||||
|
emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
|
||||||
|
GEN_INT (4096 - bytes)));
|
||||||
|
|
||||||
|
rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx);
|
||||||
|
|
||||||
|
rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||||
|
pc_rtx, lab_ref);
|
||||||
|
rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||||
|
JUMP_LABEL (j) = strncmp_label;
|
||||||
|
LABEL_NUSES (strncmp_label) += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Expand a string compare operation with length, and return
|
||||||
|
true if successful. Return false if we should let the
|
||||||
|
compiler generate normal code, probably a strncmp call.
|
||||||
|
|
||||||
|
OPERANDS[0] is the target (result).
|
||||||
|
OPERANDS[1] is the first source.
|
||||||
|
OPERANDS[2] is the second source.
|
||||||
|
OPERANDS[3] is the length.
|
||||||
|
OPERANDS[4] is the alignment in bytes. */
|
||||||
|
bool
|
||||||
|
expand_strn_compare (rtx operands[])
|
||||||
|
{
|
||||||
|
rtx target = operands[0];
|
||||||
|
rtx orig_src1 = operands[1];
|
||||||
|
rtx orig_src2 = operands[2];
|
||||||
|
rtx bytes_rtx = operands[3];
|
||||||
|
rtx align_rtx = operands[4];
|
||||||
|
HOST_WIDE_INT cmp_bytes = 0;
|
||||||
|
rtx src1 = orig_src1;
|
||||||
|
rtx src2 = orig_src2;
|
||||||
|
|
||||||
|
/* If this is not a fixed size compare, just call strncmp. */
|
||||||
|
if (!CONST_INT_P (bytes_rtx))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* This must be a fixed size alignment. */
|
||||||
|
if (!CONST_INT_P (align_rtx))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int base_align = INTVAL (align_rtx);
|
||||||
|
int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
|
||||||
|
int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
|
||||||
|
|
||||||
|
/* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
|
||||||
|
if (SLOW_UNALIGNED_ACCESS (word_mode, align1)
|
||||||
|
|| SLOW_UNALIGNED_ACCESS (word_mode, align2))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
gcc_assert (GET_MODE (target) == SImode);
|
||||||
|
|
||||||
|
HOST_WIDE_INT bytes = INTVAL (bytes_rtx);
|
||||||
|
|
||||||
|
/* If we have an LE target without ldbrx and word_mode is DImode,
|
||||||
|
then we must avoid using word_mode. */
|
||||||
|
int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
|
||||||
|
&& word_mode == DImode);
|
||||||
|
|
||||||
|
int word_mode_size = GET_MODE_SIZE (word_mode);
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
machine_mode load_mode =
|
||||||
|
select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
|
||||||
|
int load_mode_size = GET_MODE_SIZE (load_mode);
|
||||||
|
|
||||||
|
/* We don't want to generate too much code. Also if bytes is
|
||||||
|
4096 or larger we always want the library strncmp anyway. */
|
||||||
|
int groups = ROUND_UP (bytes, load_mode_size) / load_mode_size;
|
||||||
|
if (bytes >= 4096 || groups > rs6000_string_compare_inline_limit)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
rtx result_reg = gen_reg_rtx (word_mode);
|
||||||
|
rtx final_move_label = gen_label_rtx ();
|
||||||
|
rtx final_label = gen_label_rtx ();
|
||||||
|
rtx begin_compare_label = NULL;
|
||||||
|
|
||||||
|
if (base_align < 8)
|
||||||
|
{
|
||||||
|
/* Generate code that checks distance to 4k boundary for this case. */
|
||||||
|
begin_compare_label = gen_label_rtx ();
|
||||||
|
rtx strncmp_label = gen_label_rtx ();
|
||||||
|
rtx jmp;
|
||||||
|
|
||||||
|
/* Strncmp for power8 in glibc does this:
|
||||||
|
rldicl r8,r3,0,52
|
||||||
|
cmpldi cr7,r8,4096-16
|
||||||
|
bgt cr7,L(pagecross) */
|
||||||
|
|
||||||
|
if (align1 < 8)
|
||||||
|
expand_strncmp_align_check (strncmp_label, src1, bytes);
|
||||||
|
if (align2 < 8)
|
||||||
|
expand_strncmp_align_check (strncmp_label, src2, bytes);
|
||||||
|
|
||||||
|
/* Now generate the following sequence:
|
||||||
|
- branch to begin_compare
|
||||||
|
- strncmp_label
|
||||||
|
- call to strncmp
|
||||||
|
- branch to final_label
|
||||||
|
- begin_compare_label */
|
||||||
|
|
||||||
|
rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label);
|
||||||
|
jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref));
|
||||||
|
JUMP_LABEL(jmp) = begin_compare_label;
|
||||||
|
LABEL_NUSES (begin_compare_label) += 1;
|
||||||
|
emit_barrier ();
|
||||||
|
|
||||||
|
emit_label (strncmp_label);
|
||||||
|
|
||||||
|
if (!REG_P (XEXP (src1, 0)))
|
||||||
|
{
|
||||||
|
rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
||||||
|
src1 = replace_equiv_address (src1, src1_reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!REG_P (XEXP (src2, 0)))
|
||||||
|
{
|
||||||
|
rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
||||||
|
src2 = replace_equiv_address (src2, src2_reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -m32 -mpowerpc64 results in word_mode being DImode even
|
||||||
|
though otherwise it is 32-bit. The length arg to strncmp
|
||||||
|
is a size_t which will be the same size as pointers. */
|
||||||
|
rtx len_rtx;
|
||||||
|
if (TARGET_64BIT)
|
||||||
|
len_rtx = gen_reg_rtx(DImode);
|
||||||
|
else
|
||||||
|
len_rtx = gen_reg_rtx(SImode);
|
||||||
|
|
||||||
|
emit_move_insn (len_rtx, bytes_rtx);
|
||||||
|
|
||||||
|
emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "strncmp"),
|
||||||
|
target, LCT_NORMAL, GET_MODE (target), 3,
|
||||||
|
force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
||||||
|
force_reg (Pmode, XEXP (src2, 0)), Pmode,
|
||||||
|
len_rtx, GET_MODE (len_rtx));
|
||||||
|
|
||||||
|
rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
|
||||||
|
jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
|
||||||
|
JUMP_LABEL (jmp) = final_label;
|
||||||
|
LABEL_NUSES (final_label) += 1;
|
||||||
|
emit_barrier ();
|
||||||
|
emit_label (begin_compare_label);
|
||||||
|
}
|
||||||
|
|
||||||
|
rtx cleanup_label = NULL;
|
||||||
|
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
|
||||||
|
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
|
||||||
|
|
||||||
|
/* Generate sequence of ld/ldbrx, cmpb to compare out
|
||||||
|
to the length specified. */
|
||||||
|
while (bytes > 0)
|
||||||
|
{
|
||||||
|
/* Compare sequence:
|
||||||
|
check each 8B with: ld/ld cmpd bne
|
||||||
|
cleanup code at end:
|
||||||
|
cmpb get byte that differs
|
||||||
|
cmpb look for zero byte
|
||||||
|
orc combine
|
||||||
|
cntlzd get bit of first zero/diff byte
|
||||||
|
subfic convert for rldcl use
|
||||||
|
rldcl rldcl extract diff/zero byte
|
||||||
|
subf subtract for final result
|
||||||
|
|
||||||
|
The last compare can branch around the cleanup code if the
|
||||||
|
result is zero because the strings are exactly equal. */
|
||||||
|
int align = compute_current_alignment (base_align, offset);
|
||||||
|
if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||||
|
load_mode = select_block_compare_mode (offset, bytes, align,
|
||||||
|
word_mode_ok);
|
||||||
|
else
|
||||||
|
load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
|
||||||
|
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||||
|
if (bytes >= load_mode_size)
|
||||||
|
cmp_bytes = load_mode_size;
|
||||||
|
else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||||
|
{
|
||||||
|
/* Move this load back so it doesn't go past the end.
|
||||||
|
P8/P9 can do this efficiently. */
|
||||||
|
int extra_bytes = load_mode_size - bytes;
|
||||||
|
cmp_bytes = bytes;
|
||||||
|
if (extra_bytes < offset)
|
||||||
|
{
|
||||||
|
offset -= extra_bytes;
|
||||||
|
cmp_bytes = load_mode_size;
|
||||||
|
bytes = cmp_bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
/* P7 and earlier can't do the overlapping load trick fast,
|
||||||
|
so this forces a non-overlapping load and a shift to get
|
||||||
|
rid of the extra bytes. */
|
||||||
|
cmp_bytes = bytes;
|
||||||
|
|
||||||
|
src1 = adjust_address (orig_src1, load_mode, offset);
|
||||||
|
src2 = adjust_address (orig_src2, load_mode, offset);
|
||||||
|
|
||||||
|
if (!REG_P (XEXP (src1, 0)))
|
||||||
|
{
|
||||||
|
rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
||||||
|
src1 = replace_equiv_address (src1, src1_reg);
|
||||||
|
}
|
||||||
|
set_mem_size (src1, cmp_bytes);
|
||||||
|
|
||||||
|
if (!REG_P (XEXP (src2, 0)))
|
||||||
|
{
|
||||||
|
rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
||||||
|
src2 = replace_equiv_address (src2, src2_reg);
|
||||||
|
}
|
||||||
|
set_mem_size (src2, cmp_bytes);
|
||||||
|
|
||||||
|
do_load_for_compare (tmp_reg_src1, src1, load_mode);
|
||||||
|
do_load_for_compare (tmp_reg_src2, src2, load_mode);
|
||||||
|
|
||||||
|
/* We must always left-align the data we read, and
|
||||||
|
clear any bytes to the right that are beyond the string.
|
||||||
|
Otherwise the cmpb sequence won't produce the correct
|
||||||
|
results. The beginning of the compare will be done
|
||||||
|
with word_mode so will not have any extra shifts or
|
||||||
|
clear rights. */
|
||||||
|
|
||||||
|
if (load_mode_size < word_mode_size)
|
||||||
|
{
|
||||||
|
/* Rotate left first. */
|
||||||
|
rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
||||||
|
if (word_mode == DImode)
|
||||||
|
{
|
||||||
|
emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
|
||||||
|
emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
|
||||||
|
emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmp_bytes < word_mode_size)
|
||||||
|
{
|
||||||
|
/* Now clear right. This plus the rotate can be
|
||||||
|
turned into a rldicr instruction. */
|
||||||
|
HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||||
|
rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||||
|
if (word_mode == DImode)
|
||||||
|
{
|
||||||
|
emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
|
||||||
|
emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
|
||||||
|
emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int remain = bytes - cmp_bytes;
|
||||||
|
|
||||||
|
rtx dst_label;
|
||||||
|
if (remain > 0)
|
||||||
|
{
|
||||||
|
if (!cleanup_label)
|
||||||
|
cleanup_label = gen_label_rtx ();
|
||||||
|
dst_label = cleanup_label;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
dst_label = final_move_label;
|
||||||
|
|
||||||
|
rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||||
|
rtx cond = gen_reg_rtx (CCmode);
|
||||||
|
|
||||||
|
if (remain == 0)
|
||||||
|
{
|
||||||
|
/* For the last chunk, subf. also
|
||||||
|
generates the zero result we need. */
|
||||||
|
rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
|
||||||
|
rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
emit_move_insn (cond, gen_rtx_COMPARE (CCmode,
|
||||||
|
tmp_reg_src1, tmp_reg_src2));
|
||||||
|
|
||||||
|
rtx cmp_rtx;
|
||||||
|
if (remain > 0)
|
||||||
|
cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||||
|
else
|
||||||
|
cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
||||||
|
|
||||||
|
rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||||
|
lab_ref, pc_rtx);
|
||||||
|
rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||||
|
JUMP_LABEL (j) = dst_label;
|
||||||
|
LABEL_NUSES (dst_label) += 1;
|
||||||
|
|
||||||
|
offset += cmp_bytes;
|
||||||
|
bytes -= cmp_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cleanup_label)
|
||||||
|
emit_label (cleanup_label);
|
||||||
|
|
||||||
|
/* Generate the final sequence that identifies the differing
|
||||||
|
byte and generates the final result, taking into account
|
||||||
|
zero bytes:
|
||||||
|
|
||||||
|
cmpb cmpb_result1, src1, src2
|
||||||
|
cmpb cmpb_result2, src1, zero
|
||||||
|
orc cmpb_result1, cmp_result1, cmpb_result2
|
||||||
|
cntlzd get bit of first zero/diff byte
|
||||||
|
addi convert for rldcl use
|
||||||
|
rldcl rldcl extract diff/zero byte
|
||||||
|
subf subtract for final result
|
||||||
|
*/
|
||||||
|
|
||||||
|
rtx cmpb_diff = gen_reg_rtx (word_mode);
|
||||||
|
rtx cmpb_zero = gen_reg_rtx (word_mode);
|
||||||
|
rtx rot_amt = gen_reg_rtx (word_mode);
|
||||||
|
rtx zero_reg = gen_reg_rtx (word_mode);
|
||||||
|
|
||||||
|
rtx rot1_1 = gen_reg_rtx(word_mode);
|
||||||
|
rtx rot1_2 = gen_reg_rtx(word_mode);
|
||||||
|
rtx rot2_1 = gen_reg_rtx(word_mode);
|
||||||
|
rtx rot2_2 = gen_reg_rtx(word_mode);
|
||||||
|
|
||||||
|
if (word_mode == SImode)
|
||||||
|
{
|
||||||
|
emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
|
||||||
|
emit_insn (gen_movsi (zero_reg, GEN_INT(0)));
|
||||||
|
emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
||||||
|
emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
|
||||||
|
emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||||
|
emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
|
||||||
|
emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||||
|
emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
|
||||||
|
gen_lowpart (SImode, rot_amt)));
|
||||||
|
emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT(0xff)));
|
||||||
|
emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
|
||||||
|
gen_lowpart (SImode, rot_amt)));
|
||||||
|
emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT(0xff)));
|
||||||
|
emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
|
||||||
|
emit_insn (gen_movdi (zero_reg, GEN_INT(0)));
|
||||||
|
emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
||||||
|
emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
|
||||||
|
emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||||
|
emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
|
||||||
|
emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||||
|
emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
|
||||||
|
gen_lowpart (SImode, rot_amt)));
|
||||||
|
emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT(0xff)));
|
||||||
|
emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
|
||||||
|
gen_lowpart (SImode, rot_amt)));
|
||||||
|
emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT(0xff)));
|
||||||
|
emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
|
||||||
|
}
|
||||||
|
|
||||||
|
emit_label (final_move_label);
|
||||||
|
emit_insn (gen_movsi (target,
|
||||||
|
gen_lowpart (SImode, result_reg)));
|
||||||
|
emit_label (final_label);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/* Expand a block move operation, and return 1 if successful. Return 0
|
/* Expand a block move operation, and return 1 if successful. Return 0
|
||||||
if we should let the compiler generate normal code.
|
if we should let the compiler generate normal code.
|
||||||
|
|
||||||
|
@ -32852,7 +33231,7 @@ power9_sched_reorder2 (rtx_insn **ready, int lastpos)
|
||||||
|
|
||||||
3 : 2 vector insns and a vector load have been issued and another
|
3 : 2 vector insns and a vector load have been issued and another
|
||||||
vector load has been found and moved to the end of the ready
|
vector load has been found and moved to the end of the ready
|
||||||
list. */
|
list. */
|
||||||
if (type == TYPE_VECLOAD)
|
if (type == TYPE_VECLOAD)
|
||||||
{
|
{
|
||||||
/* Issued a vecload. */
|
/* Issued a vecload. */
|
||||||
|
|
|
@ -117,6 +117,7 @@
|
||||||
UNSPEC_BPERM
|
UNSPEC_BPERM
|
||||||
UNSPEC_COPYSIGN
|
UNSPEC_COPYSIGN
|
||||||
UNSPEC_PARITY
|
UNSPEC_PARITY
|
||||||
|
UNSPEC_CMPB
|
||||||
UNSPEC_FCTIW
|
UNSPEC_FCTIW
|
||||||
UNSPEC_FCTID
|
UNSPEC_FCTID
|
||||||
UNSPEC_LFIWAX
|
UNSPEC_LFIWAX
|
||||||
|
@ -2316,6 +2317,13 @@
|
||||||
"prty<wd> %0,%1"
|
"prty<wd> %0,%1"
|
||||||
[(set_attr "type" "popcnt")])
|
[(set_attr "type" "popcnt")])
|
||||||
|
|
||||||
|
(define_insn "cmpb<mode>3"
|
||||||
|
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
|
||||||
|
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")
|
||||||
|
(match_operand:GPR 2 "gpc_reg_operand" "r")] UNSPEC_CMPB))]
|
||||||
|
"TARGET_CMPB"
|
||||||
|
"cmpb %0,%1,%2"
|
||||||
|
[(set_attr "type" "cmp")])
|
||||||
|
|
||||||
;; Since the hardware zeros the upper part of the register, save generating the
|
;; Since the hardware zeros the upper part of the register, save generating the
|
||||||
;; AND immediate if we are converting to unsigned
|
;; AND immediate if we are converting to unsigned
|
||||||
|
@ -8844,7 +8852,28 @@
|
||||||
FAIL;
|
FAIL;
|
||||||
}")
|
}")
|
||||||
|
|
||||||
;; String/block compare insn.
|
;; String compare N insn.
|
||||||
|
;; Argument 0 is the target (result)
|
||||||
|
;; Argument 1 is the destination
|
||||||
|
;; Argument 2 is the source
|
||||||
|
;; Argument 3 is the length
|
||||||
|
;; Argument 4 is the alignment
|
||||||
|
|
||||||
|
(define_expand "cmpstrnsi"
|
||||||
|
[(parallel [(set (match_operand:SI 0)
|
||||||
|
(compare:SI (match_operand:BLK 1)
|
||||||
|
(match_operand:BLK 2)))
|
||||||
|
(use (match_operand:SI 3))
|
||||||
|
(use (match_operand:SI 4))])]
|
||||||
|
"TARGET_CMPB && (BYTES_BIG_ENDIAN || TARGET_LDBRX)"
|
||||||
|
{
|
||||||
|
if (expand_strn_compare (operands))
|
||||||
|
DONE;
|
||||||
|
else
|
||||||
|
FAIL;
|
||||||
|
})
|
||||||
|
|
||||||
|
;; Block compare insn.
|
||||||
;; Argument 0 is the target (result)
|
;; Argument 0 is the target (result)
|
||||||
;; Argument 1 is the destination
|
;; Argument 1 is the destination
|
||||||
;; Argument 2 is the source
|
;; Argument 2 is the source
|
||||||
|
|
|
@ -337,6 +337,10 @@ mblock-compare-inline-limit=
|
||||||
Target Report Var(rs6000_block_compare_inline_limit) Init(5) RejectNegative Joined UInteger Save
|
Target Report Var(rs6000_block_compare_inline_limit) Init(5) RejectNegative Joined UInteger Save
|
||||||
Specify the maximum number pairs of load instructions that should be generated inline for the compare. If the number needed exceeds the limit, a call to memcmp will be generated instead.
|
Specify the maximum number pairs of load instructions that should be generated inline for the compare. If the number needed exceeds the limit, a call to memcmp will be generated instead.
|
||||||
|
|
||||||
|
mstring-compare-inline-limit=
|
||||||
|
Target Report Var(rs6000_string_compare_inline_limit) Init(8) RejectNegative Joined UInteger Save
|
||||||
|
Specify the maximum number pairs of load instructions that should be generated inline for the compare. If the number needed exceeds the limit, a call to strncmp will be generated instead.
|
||||||
|
|
||||||
misel
|
misel
|
||||||
Target Report Mask(ISEL) Var(rs6000_isa_flags)
|
Target Report Mask(ISEL) Var(rs6000_isa_flags)
|
||||||
Generate isel instructions.
|
Generate isel instructions.
|
||||||
|
|
Loading…
Reference in New Issue