optabs.c: Remove unnecessary forward declarations.

* optabs.c: Remove unnecessary forward declarations.
	(expand_unop_direct): New, broken out of expand_unop.
	(expand_doubleword_clz): New.
	(expand_ctz): Move above expand_ffs.  Use
	start_sequence, end_sequence, add_equal_note, and
	expand_unop_direct.  Add more commentary.
	(expand_ffs): Try both ctz optab and expand_ctz.
	Generate a test and branch if the hardware doesn't give us
	a useful value for input zero.  Style improvements similar to
	expand_ctz.

	* config/arm/arm.md (ffssi2, ctzsi2): Delete.

From-SVN: r128054
This commit is contained in:
Zack Weinberg 2007-09-03 17:48:44 +00:00 committed by Zack Weinberg
parent d46c958bfa
commit 9cce5b20d6
3 changed files with 243 additions and 141 deletions

View File

@ -1,3 +1,18 @@
2007-09-03 Zack Weinberg <zack@codesourcery.com>
* optabs.c: Remove unnecessary forward declarations.
(expand_unop_direct): New, broken out of expand_unop.
(expand_doubleword_clz): New.
(expand_ctz): Move above expand_ffs. Use
start_sequence, end_sequence, add_equal_note, and
expand_unop_direct. Add more commentary.
(expand_ffs): Try both ctz optab and expand_ctz.
Generate a test and branch if the hardware doesn't give us
a useful value for input zero. Style improvements similar to
expand_ctz.
* config/arm/arm.md (ffssi2, ctzsi2): Delete.
2007-09-03 Zack Weinberg <zack@codesourcery.com>
* config.gcc: Delete stanza for arm-semi-aof and

View File

@ -10747,46 +10747,6 @@
[(set_attr "predicable" "yes")
(set_attr "insn" "clz")])
(define_expand "ffssi2"
[(set (match_operand:SI 0 "s_register_operand" "")
(ffs:SI (match_operand:SI 1 "s_register_operand" "")))]
"TARGET_32BIT && arm_arch5"
"
{
rtx t1, t2, t3;
t1 = gen_reg_rtx (SImode);
t2 = gen_reg_rtx (SImode);
t3 = gen_reg_rtx (SImode);
emit_insn (gen_negsi2 (t1, operands[1]));
emit_insn (gen_andsi3 (t2, operands[1], t1));
emit_insn (gen_clzsi2 (t3, t2));
emit_insn (gen_subsi3 (operands[0], GEN_INT (32), t3));
DONE;
}"
)
(define_expand "ctzsi2"
[(set (match_operand:SI 0 "s_register_operand" "")
(ctz:SI (match_operand:SI 1 "s_register_operand" "")))]
"TARGET_32BIT && arm_arch5"
"
{
rtx t1, t2, t3;
t1 = gen_reg_rtx (SImode);
t2 = gen_reg_rtx (SImode);
t3 = gen_reg_rtx (SImode);
emit_insn (gen_negsi2 (t1, operands[1]));
emit_insn (gen_andsi3 (t2, operands[1], t1));
emit_insn (gen_clzsi2 (t3, t2));
emit_insn (gen_subsi3 (operands[0], GEN_INT (31), t3));
DONE;
}"
)
;; V5E instructions.
(define_insn "prefetch"

View File

@ -95,37 +95,9 @@ enum insn_code vcondu_gen_code[NUM_MACHINE_MODES];
the code to be used in the trap insn and all other fields are ignored. */
static GTY(()) rtx trap_rtx;
static int add_equal_note (rtx, rtx, enum rtx_code, rtx, rtx);
static rtx widen_operand (rtx, enum machine_mode, enum machine_mode, int,
int);
static void prepare_cmp_insn (rtx *, rtx *, enum rtx_code *, rtx,
enum machine_mode *, int *,
enum can_compare_purpose);
static enum insn_code can_fix_p (enum machine_mode, enum machine_mode, int,
int *);
static enum insn_code can_float_p (enum machine_mode, enum machine_mode, int);
static optab new_optab (void);
static convert_optab new_convert_optab (void);
static inline optab init_optab (enum rtx_code);
static inline optab init_optabv (enum rtx_code);
static inline convert_optab init_convert_optab (enum rtx_code);
static void init_libfuncs (optab, int, int, const char *, int);
static void init_integral_libfuncs (optab, const char *, int);
static void init_floating_libfuncs (optab, const char *, int);
static void init_interclass_conv_libfuncs (convert_optab, const char *,
enum mode_class, enum mode_class);
static void init_intraclass_conv_libfuncs (convert_optab, const char *,
enum mode_class, bool);
static void emit_cmp_and_jump_insn_1 (rtx, rtx, enum machine_mode,
enum rtx_code, int, rtx);
static void prepare_float_lib_cmp (rtx *, rtx *, enum rtx_code *,
enum machine_mode *, int *);
static rtx widen_clz (enum machine_mode, rtx, rtx);
static rtx expand_parity (enum machine_mode, rtx, rtx);
static rtx expand_ffs (enum machine_mode, rtx, rtx);
static rtx expand_ctz (enum machine_mode, rtx, rtx);
static enum rtx_code get_rtx_code (enum tree_code, bool);
static rtx vector_compare_rtx (tree, bool, enum insn_code);
static rtx expand_unop_direct (enum machine_mode, optab, rtx, rtx, int);
/* Current libcall id. It doesn't matter what these are, as long
as they are unique to each libcall that is emitted. */
@ -2500,6 +2472,76 @@ widen_clz (enum machine_mode mode, rtx op0, rtx target)
return 0;
}
/* Try calculating clz of a double-word quantity as two clz's of word-sized
quantities, choosing which based on whether the high word is nonzero. */
static rtx
expand_doubleword_clz (enum machine_mode mode, rtx op0, rtx target)
{
rtx xop0 = force_reg (mode, op0);
rtx subhi = gen_highpart (word_mode, xop0);
rtx sublo = gen_lowpart (word_mode, xop0);
rtx hi0_label = gen_label_rtx ();
rtx after_label = gen_label_rtx ();
rtx seq, temp, result;
/* If we were not given a target, use a word_mode register, not a
'mode' register. The result will fit, and nobody is expecting
anything bigger (the return type of __builtin_clz* is int). */
if (!target)
target = gen_reg_rtx (word_mode);
/* In any case, write to a word_mode scratch in both branches of the
conditional, so we can ensure there is a single move insn setting
'target' to tag a REG_EQUAL note on. */
result = gen_reg_rtx (word_mode);
start_sequence ();
/* If the high word is not equal to zero,
then clz of the full value is clz of the high word. */
emit_cmp_and_jump_insns (subhi, CONST0_RTX (word_mode), EQ, 0,
word_mode, true, hi0_label);
temp = expand_unop_direct (word_mode, clz_optab, subhi, result, true);
if (!temp)
goto fail;
if (temp != result)
convert_move (result, temp, true);
emit_jump_insn (gen_jump (after_label));
emit_barrier ();
/* Else clz of the full value is clz of the low word plus the number
of bits in the high word. */
emit_label (hi0_label);
temp = expand_unop_direct (word_mode, clz_optab, sublo, 0, true);
if (!temp)
goto fail;
temp = expand_binop (word_mode, add_optab, temp,
GEN_INT (GET_MODE_BITSIZE (word_mode)),
result, true, OPTAB_DIRECT);
if (!temp)
goto fail;
if (temp != result)
convert_move (result, temp, true);
emit_label (after_label);
convert_move (target, result, true);
seq = get_insns ();
end_sequence ();
add_equal_note (seq, target, CLZ, xop0, 0);
emit_insn (seq);
return target;
fail:
end_sequence ();
return 0;
}
/* Try calculating
(bswap:narrow x)
as
@ -2604,65 +2646,130 @@ expand_parity (enum machine_mode mode, rtx op0, rtx target)
return 0;
}
/* Try calculating ffs(x) using clz(x). Since the ffs builtin promises
to return zero for a zero value and clz may have an undefined value
in that case, only do this if we know clz returns the right thing so
that we don't have to generate a test and branch. */
/* Try calculating ctz(x) as K - clz(x & -x) ,
where K is GET_MODE_BITSIZE(mode) - 1.
Both __builtin_ctz and __builtin_clz are undefined at zero, so we
don't have to worry about what the hardware does in that case. (If
the clz instruction produces the usual value at 0, which is K, the
result of this code sequence will be -1; expand_ffs, below, relies
on this. It might be nice to have it be K instead, for consistency
with the (very few) processors that provide a ctz with a defined
value, but that would take one more instruction, and it would be
less convenient for expand_ffs anyway. */
static rtx
expand_ctz (enum machine_mode mode, rtx op0, rtx target)
{
rtx seq, temp;
if (optab_handler (clz_optab, mode)->insn_code == CODE_FOR_nothing)
return 0;
start_sequence ();
temp = expand_unop_direct (mode, neg_optab, op0, NULL_RTX, true);
if (temp)
temp = expand_binop (mode, and_optab, op0, temp, NULL_RTX,
true, OPTAB_DIRECT);
if (temp)
temp = expand_unop_direct (mode, clz_optab, temp, NULL_RTX, true);
if (temp)
temp = expand_binop (mode, sub_optab, GEN_INT (GET_MODE_BITSIZE (mode) - 1),
temp, target,
true, OPTAB_DIRECT);
if (temp == 0)
{
end_sequence ();
return 0;
}
seq = get_insns ();
end_sequence ();
add_equal_note (seq, temp, CTZ, op0, 0);
emit_insn (seq);
return temp;
}
/* Try calculating ffs(x) using ctz(x) if we have that instruction, or
else with the sequence used by expand_clz.
The ffs builtin promises to return zero for a zero value and ctz/clz
may have an undefined value in that case. If they do not give us a
convenient value, we have to generate a test and branch. */
static rtx
expand_ffs (enum machine_mode mode, rtx op0, rtx target)
{
HOST_WIDE_INT val;
if (clz_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing
&& CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2
&& val == GET_MODE_BITSIZE (mode))
bool defined_at_zero;
rtx temp, seq;
if (optab_handler (ctz_optab, mode)->insn_code != CODE_FOR_nothing)
{
rtx last = get_last_insn ();
rtx temp;
start_sequence ();
temp = expand_unop (mode, neg_optab, op0, NULL_RTX, true);
if (temp)
temp = expand_binop (mode, and_optab, op0, temp, NULL_RTX,
true, OPTAB_DIRECT);
if (temp)
temp = expand_unop (mode, clz_optab, temp, NULL_RTX, true);
if (temp)
temp = expand_binop (mode, sub_optab,
GEN_INT (GET_MODE_BITSIZE (mode)),
temp,
target, true, OPTAB_DIRECT);
if (temp == 0)
delete_insns_since (last);
return temp;
temp = expand_unop_direct (mode, ctz_optab, op0, 0, true);
if (!temp)
goto fail;
defined_at_zero = (CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2);
}
return 0;
}
/* We can compute ctz(x) using clz(x) with a similar recipe. Here the ctz
builtin has an undefined result on zero, just like clz, so we don't have
to do that check. */
static rtx
expand_ctz (enum machine_mode mode, rtx op0, rtx target)
{
if (clz_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
else if (optab_handler (clz_optab, mode)->insn_code != CODE_FOR_nothing)
{
rtx last = get_last_insn ();
rtx temp;
start_sequence ();
temp = expand_ctz (mode, op0, 0);
if (!temp)
goto fail;
temp = expand_unop (mode, neg_optab, op0, NULL_RTX, true);
if (temp)
temp = expand_binop (mode, and_optab, op0, temp, NULL_RTX,
true, OPTAB_DIRECT);
if (temp)
temp = expand_unop (mode, clz_optab, temp, NULL_RTX, true);
if (temp)
temp = expand_binop (mode, xor_optab, temp,
GEN_INT (GET_MODE_BITSIZE (mode) - 1),
target,
true, OPTAB_DIRECT);
if (temp == 0)
delete_insns_since (last);
return temp;
if (CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
{
defined_at_zero = true;
val = (GET_MODE_BITSIZE (mode) - 1) - val;
}
}
else
return 0;
if (defined_at_zero && val == -1)
/* No correction needed at zero. */;
else
{
/* We don't try to do anything clever with the situation found
on some processors (eg Alpha) where ctz(0:mode) ==
bitsize(mode). If someone can think of a way to send N to -1
and leave alone all values in the range 0..N-1 (where N is a
power of two), cheaper than this test-and-branch, please add it.
The test-and-branch is done after the operation itself, in case
the operation sets condition codes that can be recycled for this.
(This is true on i386, for instance.) */
rtx nonzero_label = gen_label_rtx ();
emit_cmp_and_jump_insns (op0, CONST0_RTX (mode), NE, 0,
mode, true, nonzero_label);
convert_move (temp, GEN_INT (-1), false);
emit_label (nonzero_label);
}
/* temp now has a value in the range -1..bitsize-1. ffs is supposed
to produce a value in the range 0..bitsize. */
temp = expand_binop (mode, add_optab, temp, GEN_INT (1),
target, false, OPTAB_DIRECT);
if (!temp)
goto fail;
seq = get_insns ();
end_sequence ();
add_equal_note (seq, temp, FFS, op0, 0);
emit_insn (seq);
return temp;
fail:
end_sequence ();
return 0;
}
@ -2791,34 +2898,19 @@ expand_absneg_bit (enum rtx_code code, enum machine_mode mode,
return target;
}
/* Generate code to perform an operation specified by UNOPTAB
on operand OP0, with result having machine-mode MODE.
UNSIGNEDP is for the case where we have to widen the operands
to perform the operation. It says to use zero-extension.
If TARGET is nonzero, the value
is generated there, if it is convenient to do so.
In all cases an rtx is returned for the locus of the value;
this may or may not be TARGET. */
rtx
expand_unop (enum machine_mode mode, optab unoptab, rtx op0, rtx target,
/* As expand_unop, but will fail rather than attempt the operation in a
different mode or with a libcall. */
static rtx
expand_unop_direct (enum machine_mode mode, optab unoptab, rtx op0, rtx target,
int unsignedp)
{
enum mode_class class;
enum machine_mode wider_mode;
rtx temp;
rtx last = get_last_insn ();
rtx pat;
class = GET_MODE_CLASS (mode);
if (optab_handler (unoptab, mode)->insn_code != CODE_FOR_nothing)
{
int icode = (int) optab_handler (unoptab, mode)->insn_code;
enum machine_mode mode0 = insn_data[icode].operand[1].mode;
rtx xop0 = op0;
rtx last = get_last_insn ();
rtx pat, temp;
if (target)
temp = target;
@ -2854,16 +2946,49 @@ expand_unop (enum machine_mode mode, optab unoptab, rtx op0, rtx target,
else
delete_insns_since (last);
}
return 0;
}
/* Generate code to perform an operation specified by UNOPTAB
on operand OP0, with result having machine-mode MODE.
UNSIGNEDP is for the case where we have to widen the operands
to perform the operation. It says to use zero-extension.
If TARGET is nonzero, the value
is generated there, if it is convenient to do so.
In all cases an rtx is returned for the locus of the value;
this may or may not be TARGET. */
rtx
expand_unop (enum machine_mode mode, optab unoptab, rtx op0, rtx target,
int unsignedp)
{
enum mode_class class = GET_MODE_CLASS (mode);
enum machine_mode wider_mode;
rtx temp;
temp = expand_unop_direct (mode, unoptab, op0, target, unsignedp);
if (temp)
return temp;
/* It can't be done in this mode. Can we open-code it in a wider mode? */
/* Widening clz needs special treatment. */
/* Widening (or narrowing) clz needs special treatment. */
if (unoptab == clz_optab)
{
temp = widen_clz (mode, op0, target);
if (temp)
return temp;
else
if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_WORD
&& optab_handler (unoptab, word_mode)->insn_code != CODE_FOR_nothing)
{
temp = expand_doubleword_clz (mode, op0, target);
if (temp)
return temp;
}
goto try_libcall;
}
@ -2893,6 +3018,7 @@ expand_unop (enum machine_mode mode, optab unoptab, rtx op0, rtx target,
if (optab_handler (unoptab, wider_mode)->insn_code != CODE_FOR_nothing)
{
rtx xop0 = op0;
rtx last = get_last_insn ();
/* For certain operations, we need not actually extend
the narrow operand, as long as we will truncate the
@ -3052,6 +3178,7 @@ expand_unop (enum machine_mode mode, optab unoptab, rtx op0, rtx target,
|| optab_handler (unoptab, wider_mode)->libfunc)
{
rtx xop0 = op0;
rtx last = get_last_insn ();
/* For certain operations, we need not actually extend
the narrow operand, as long as we will truncate the