longlong.h [__alpha] (count_leading_zeros): New.

* longlong.h [__alpha] (count_leading_zeros): New.
        (count_trailing_zeros): New.
        (COUNT_LEADING_ZEROS_0): New.

        * config/alpha/alpha.c (alpha_zero_comparison_operator): New.
        (alpha_split_conditional_move): New.
        * config/alpha/alpha-protos.h: Prototype them.
        * config/alpha/alpha.h (PREDICATE_CODES): Update.
        (CPP_CPU_DEFAULT_SPEC): Fix typo for EV67.
        * config/alpha/alpha.md: Update ffs cix commentary.
        (*ze_and_ne): New.
        (*nabssf2, *nabsdf2): New.
        (*mov[qhsd]icc_internal): Use add_operand.
        (if_then_else constant splitters): New.
        (*cmp_sadd_di, *cmp_sadd_si, *cmp_sadd_sidi): New.
        (*cmp_ssub_di, *cmp_ssub_si, *cmp_ssub_sidi): New.

From-SVN: r34250
This commit is contained in:
Richard Henderson 2000-05-29 00:52:26 -07:00 committed by Richard Henderson
parent e2c7a29e97
commit 8f4773eae7
6 changed files with 399 additions and 17 deletions

View File

@ -1,3 +1,22 @@
2000-05-29 Richard Henderson <rth@cygnus.com>
* longlong.h [__alpha] (count_leading_zeros): New.
(count_trailing_zeros): New.
(COUNT_LEADING_ZEROS_0): New.
* config/alpha/alpha.c (alpha_zero_comparison_operator): New.
(alpha_split_conditional_move): New.
* config/alpha/alpha-protos.h: Prototype them.
* config/alpha/alpha.h (PREDICATE_CODES): Update.
(CPP_CPU_DEFAULT_SPEC): Fix typo for EV67.
* config/alpha/alpha.md: Update ffs cix commentary.
(*ze_and_ne): New.
(*nabssf2, *nabsdf2): New.
(*mov[qhsd]icc_internal): Use add_operand.
(if_then_else constant splitters): New.
(*cmp_sadd_di, *cmp_sadd_si, *cmp_sadd_sidi): New.
(*cmp_ssub_di, *cmp_ssub_si, *cmp_ssub_sidi): New.
2000-05-29 Richard Henderson <rth@cygnus.com>
* combine.c (force_to_mode) [MINUS]: Convert subtraction from

View File

@ -57,6 +57,7 @@ extern int input_operand PARAMS ((rtx, enum machine_mode));
extern int current_file_function_operand PARAMS ((rtx, enum machine_mode));
extern int call_operand PARAMS ((rtx, enum machine_mode));
extern int alpha_comparison_operator PARAMS ((rtx, enum machine_mode));
extern int alpha_zero_comparison_operator PARAMS ((rtx, enum machine_mode));
extern int alpha_swapped_comparison_operator PARAMS ((rtx, enum machine_mode));
extern int signed_comparison_operator PARAMS ((rtx, enum machine_mode));
extern int alpha_fp_comparison_operator PARAMS ((rtx, enum machine_mode));
@ -83,6 +84,8 @@ extern rtx alpha_emit_set_long_const PARAMS ((rtx, HOST_WIDE_INT,
extern void alpha_emit_floatuns PARAMS ((rtx[]));
extern rtx alpha_emit_conditional_branch PARAMS ((enum rtx_code));
extern rtx alpha_emit_conditional_move PARAMS ((rtx, enum machine_mode));
extern int alpha_split_conditional_move PARAMS ((enum rtx_code, rtx, rtx,
rtx, rtx));
extern void alpha_emit_xfloating_arith PARAMS ((enum rtx_code, rtx[]));
extern void alpha_emit_xfloating_cvt PARAMS ((enum rtx_code, rtx[]));
extern void alpha_split_tfmode_pair PARAMS ((rtx[]));

View File

@ -735,6 +735,23 @@ alpha_comparison_operator (op, mode)
|| code == LEU || code == LTU);
}
/* Return 1 if OP is a valid Alpha comparison operator against zero.
Here we know which comparisons are valid in which insn. */
int
alpha_zero_comparison_operator (op, mode)
register rtx op;
enum machine_mode mode;
{
enum rtx_code code = GET_CODE (op);
if (mode != GET_MODE (op) && mode != VOIDmode)
return 0;
return (code == EQ || code == NE || code == LE || code == LT
|| code == LEU || code == LTU);
}
/* Return 1 if OP is a valid Alpha swapped comparison operator. */
int
@ -1834,6 +1851,90 @@ alpha_emit_conditional_move (cmp, mode)
emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_op_mode, op0, op1));
return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_op_mode));
}
/* Simplify a conditional move of two constants into a setcc with
arithmetic. This is done with a splitter since combine would
just undo the work if done during code generation. It also catches
cases we wouldn't have before cse. */
int
alpha_split_conditional_move (code, dest, cond, t_rtx, f_rtx)
enum rtx_code code;
rtx dest, cond, t_rtx, f_rtx;
{
HOST_WIDE_INT t, f, diff;
enum machine_mode mode;
rtx target, subtarget, tmp;
mode = GET_MODE (dest);
t = INTVAL (t_rtx);
f = INTVAL (f_rtx);
diff = t - f;
if (((code == NE || code == EQ) && diff < 0)
|| (code == GE || code == GT))
{
code = reverse_condition (code);
diff = t, t = f, f = diff;
diff = t - f;
}
subtarget = target = dest;
if (mode != DImode)
{
target = gen_lowpart (DImode, dest);
if (! no_new_pseudos)
subtarget = gen_reg_rtx (DImode);
else
subtarget = target;
}
if (f == 0 && exact_log2 (diff) > 0
/* On EV6, we've got enough shifters to make non-arithmatic shifts
viable over a longer latency cmove. On EV5, the E0 slot is a
scarce resource, and on EV4 shift has the same latency as a cmove. */
&& (diff <= 8 || alpha_cpu == PROCESSOR_EV6))
{
tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
tmp = gen_rtx_ASHIFT (DImode, subtarget, GEN_INT (exact_log2 (t)));
emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
}
else if (f == 0 && t == -1)
{
tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
emit_insn (gen_negdi2 (target, subtarget));
}
else if (diff == 1 || diff == 4 || diff == 8)
{
rtx add_op;
tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
if (diff == 1)
emit_insn (gen_adddi3 (target, subtarget, GEN_INT (f)));
else
{
add_op = GEN_INT (f);
if (sext_add_operand (add_op, mode))
{
tmp = gen_rtx_MULT (DImode, subtarget, GEN_INT (diff));
tmp = gen_rtx_PLUS (DImode, tmp, add_op);
emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
}
else
return 0;
}
}
else
return 0;
return 1;
}
/* Look up the function X_floating library function name for the
given operation. */

View File

@ -290,7 +290,7 @@ extern const char *alpha_mlat_string; /* For -mmemory-latency= */
#ifndef CPP_CPU_DEFAULT_SPEC
# if TARGET_CPU_DEFAULT & MASK_CPU_EV6
# if TARGET_CPU_DEFAULT & MAX_CIX
# if TARGET_CPU_DEFAULT & MASK_CIX
# define CPP_CPU_DEFAULT_SPEC CPP_CPU_EV67_SPEC
# else
# define CPP_CPU_DEFAULT_SPEC CPP_CPU_EV6_SPEC
@ -2333,6 +2333,7 @@ do { \
{"mode_width_operand", {CONST_INT}}, \
{"reg_or_fp0_operand", {SUBREG, REG, CONST_DOUBLE}}, \
{"alpha_comparison_operator", {EQ, LE, LT, LEU, LTU}}, \
{"alpha_zero_comparison_operator", {EQ, NE, LE, LT, LEU, LTU}}, \
{"alpha_swapped_comparison_operator", {EQ, GE, GT, GEU, GTU}}, \
{"signed_comparison_operator", {EQ, NE, LE, LT, GE, GT}}, \
{"alpha_fp_comparison_operator", {EQ, LE, LT, UNORDERED}}, \

View File

@ -1177,14 +1177,6 @@
[(set_attr "type" "ilog")])
;; Handle the FFS insn iff we support CIX.
;;
;; These didn't make it into EV6 pass 2 as planned. Instead they
;; cropped cttz/ctlz/ctpop from the old CIX and renamed it FIX for
;; "Square Root and Floating Point Convert Extension".
;;
;; I'm assured that these insns will make it into EV67 (first pass
;; due Summer 1999), presumably with a new AMASK bit, and presumably
;; will still be named CIX.
(define_expand "ffsdi2"
[(set (match_dup 2)
@ -1756,6 +1748,22 @@
""
"msk%M2h %1,%3,%0"
[(set_attr "type" "shift")])
;; Prefer AND + NE over LSHIFTRT + AND.
(define_insn_and_split "*ze_and_ne"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
(const_int 1)
(match_operand 2 "const_int_operand" "I")))]
"(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8"
"#"
""
[(set (match_dup 0)
(and:DI (match_dup 1) (match_dup 3)))
(set (match_dup 0)
(ne:DI (match_dup 0) (const_int 0)))]
"operands[3] = GEN_INT (1 << INTVAL (operands[2]));")
;; Floating-point operations. All the double-precision insns can extend
;; from single, so indicate that. The exception are the ones that simply
@ -1768,6 +1776,13 @@
"cpys $f31,%R1,%0"
[(set_attr "type" "fcpys")])
(define_insn "*nabssf2"
[(set (match_operand:SF 0 "register_operand" "=f")
(neg:SF (abs:SF (match_operand:SF 1 "reg_or_fp0_operand" "fG"))))]
"TARGET_FP"
"cpysn $f31,%R1,%0"
[(set_attr "type" "fadd")])
(define_insn "absdf2"
[(set (match_operand:DF 0 "register_operand" "=f")
(abs:DF (match_operand:DF 1 "reg_or_fp0_operand" "fG")))]
@ -1775,6 +1790,13 @@
"cpys $f31,%R1,%0"
[(set_attr "type" "fcpys")])
(define_insn "*nabsdf2"
[(set (match_operand:DF 0 "register_operand" "=f")
(neg:DF (abs:DF (match_operand:DF 1 "reg_or_fp0_operand" "fG"))))]
"TARGET_FP"
"cpysn $f31,%R1,%0"
[(set_attr "type" "fadd")])
(define_expand "abstf2"
[(parallel [(set (match_operand:TF 0 "register_operand" "")
(neg:TF (match_operand:TF 1 "reg_or_fp0_operand" "")))
@ -2532,6 +2554,10 @@
;; The mode folding trick can't be used with const_int operands, since
;; reload needs to know the proper mode.
;;
;; Use add_operand instead of the more seemingly natural reg_or_8bit_operand
;; in order to create more pairs of constants. As long as we're allowing
;; two constants at the same time, and will have to reload one of them...
(define_insn "*movqicc_internal"
[(set (match_operand:QI 0 "register_operand" "=r,r,r,r")
@ -2539,8 +2565,8 @@
(match_operator 2 "signed_comparison_operator"
[(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
(match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
(match_operand:QI 1 "reg_or_8bit_operand" "rI,0,rI,0")
(match_operand:QI 5 "reg_or_8bit_operand" "0,rI,0,rI")))]
(match_operand:QI 1 "add_operand" "rI,0,rI,0")
(match_operand:QI 5 "add_operand" "0,rI,0,rI")))]
"(operands[3] == const0_rtx || operands[4] == const0_rtx)"
"@
cmov%C2 %r3,%1,%0
@ -2555,8 +2581,8 @@
(match_operator 2 "signed_comparison_operator"
[(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
(match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
(match_operand:HI 1 "reg_or_8bit_operand" "rI,0,rI,0")
(match_operand:HI 5 "reg_or_8bit_operand" "0,rI,0,rI")))]
(match_operand:HI 1 "add_operand" "rI,0,rI,0")
(match_operand:HI 5 "add_operand" "0,rI,0,rI")))]
"(operands[3] == const0_rtx || operands[4] == const0_rtx)"
"@
cmov%C2 %r3,%1,%0
@ -2571,8 +2597,8 @@
(match_operator 2 "signed_comparison_operator"
[(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
(match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
(match_operand:SI 1 "reg_or_8bit_operand" "rI,0,rI,0")
(match_operand:SI 5 "reg_or_8bit_operand" "0,rI,0,rI")))]
(match_operand:SI 1 "add_operand" "rI,0,rI,0")
(match_operand:SI 5 "add_operand" "0,rI,0,rI")))]
"(operands[3] == const0_rtx || operands[4] == const0_rtx)"
"@
cmov%C2 %r3,%1,%0
@ -2587,8 +2613,8 @@
(match_operator 2 "signed_comparison_operator"
[(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
(match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
(match_operand:DI 1 "reg_or_8bit_operand" "rI,0,rI,0")
(match_operand:DI 5 "reg_or_8bit_operand" "0,rI,0,rI")))]
(match_operand:DI 1 "add_operand" "rI,0,rI,0")
(match_operand:DI 5 "add_operand" "0,rI,0,rI")))]
"(operands[3] == const0_rtx || operands[4] == const0_rtx)"
"@
cmov%C2 %r3,%1,%0
@ -3824,6 +3850,205 @@
? NE : EQ),
DImode, operands[4], const0_rtx);
}")
;; Prefer to use cmp and arithmetic when possible instead of a cmove.
(define_split
[(set (match_operand 0 "register_operand" "")
(if_then_else (match_operator 1 "signed_comparison_operator"
[(match_operand:DI 2 "reg_or_0_operand" "")
(const_int 0)])
(match_operand 3 "const_int_operand" "")
(match_operand 4 "const_int_operand" "")))]
""
[(const_int 0)]
"
{
if (alpha_split_conditional_move (GET_CODE (operands[1]), operands[0],
operands[2], operands[3], operands[4]))
DONE;
else
FAIL;
}")
;; ??? Why combine is allowed to create such non-canonical rtl, I don't know.
;; Oh well, we match it in movcc, so it must be partially our fault.
(define_split
[(set (match_operand 0 "register_operand" "")
(if_then_else (match_operator 1 "signed_comparison_operator"
[(const_int 0)
(match_operand:DI 2 "reg_or_0_operand" "")])
(match_operand 3 "const_int_operand" "")
(match_operand 4 "const_int_operand" "")))]
""
[(const_int 0)]
"
{
if (alpha_split_conditional_move (swap_condition (GET_CODE (operands[1])),
operands[0], operands[2], operands[3],
operands[4]))
DONE;
else
FAIL;
}")
(define_insn_and_split "*cmp_sadd_di"
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (if_then_else:DI
(match_operator 1 "alpha_zero_comparison_operator"
[(match_operand:DI 2 "reg_or_0_operand" "rJ")
(const_int 0)])
(match_operand:DI 3 "const48_operand" "I")
(const_int 0))
(match_operand:DI 4 "sext_add_operand" "rIO")))
(clobber (match_scratch:DI 5 "=r"))]
""
"#"
"! no_new_pseudos || reload_completed"
[(set (match_dup 5)
(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
(set (match_dup 0)
(plus:DI (mult:DI (match_dup 5) (match_dup 3))
(match_dup 4)))]
"
{
if (! no_new_pseudos)
operands[5] = gen_reg_rtx (DImode);
else if (reg_overlap_mentioned_p (operands[5], operands[4]))
operands[5] = operands[0];
}")
(define_insn_and_split "*cmp_sadd_si"
[(set (match_operand:SI 0 "register_operand" "=r")
(plus:SI (if_then_else:SI
(match_operator 1 "alpha_zero_comparison_operator"
[(match_operand:DI 2 "reg_or_0_operand" "rJ")
(const_int 0)])
(match_operand:SI 3 "const48_operand" "I")
(const_int 0))
(match_operand:SI 4 "sext_add_operand" "rIO")))
(clobber (match_scratch:SI 5 "=r"))]
""
"#"
"! no_new_pseudos || reload_completed"
[(set (match_dup 5)
(match_op_dup:SI 1 [(match_dup 2) (const_int 0)]))
(set (match_dup 0)
(plus:SI (mult:SI (match_dup 5) (match_dup 3))
(match_dup 4)))]
"
{
if (! no_new_pseudos)
operands[5] = gen_reg_rtx (DImode);
else if (reg_overlap_mentioned_p (operands[5], operands[4]))
operands[5] = operands[0];
}")
(define_insn_and_split "*cmp_sadd_sidi"
[(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI
(plus:SI (if_then_else:SI
(match_operator 1 "alpha_zero_comparison_operator"
[(match_operand:DI 2 "reg_or_0_operand" "rJ")
(const_int 0)])
(match_operand:SI 3 "const48_operand" "I")
(const_int 0))
(match_operand:SI 4 "sext_add_operand" "rIO"))))
(clobber (match_scratch:SI 5 "=r"))]
""
"#"
"! no_new_pseudos || reload_completed"
[(set (match_dup 5)
(match_op_dup:SI 1 [(match_dup 2) (const_int 0)]))
(set (match_dup 0)
(sign_extend:DI (plus:SI (mult:SI (match_dup 5) (match_dup 3))
(match_dup 4))))]
"
{
if (! no_new_pseudos)
operands[5] = gen_reg_rtx (DImode);
else if (reg_overlap_mentioned_p (operands[5], operands[4]))
operands[5] = operands[0];
}")
(define_insn_and_split "*cmp_ssub_di"
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (if_then_else:DI
(match_operator 1 "alpha_zero_comparison_operator"
[(match_operand:DI 2 "reg_or_0_operand" "rJ")
(const_int 0)])
(match_operand:DI 3 "const48_operand" "I")
(const_int 0))
(match_operand:DI 4 "reg_or_8bit_operand" "rI")))
(clobber (match_scratch:DI 5 "=r"))]
""
"#"
"! no_new_pseudos || reload_completed"
[(set (match_dup 5)
(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
(set (match_dup 0)
(minus:DI (mult:DI (match_dup 5) (match_dup 3))
(match_dup 4)))]
"
{
if (! no_new_pseudos)
operands[5] = gen_reg_rtx (DImode);
else if (reg_overlap_mentioned_p (operands[5], operands[4]))
operands[5] = operands[0];
}")
(define_insn_and_split "*cmp_ssub_si"
[(set (match_operand:SI 0 "register_operand" "=r")
(minus:SI (if_then_else:SI
(match_operator 1 "alpha_zero_comparison_operator"
[(match_operand:DI 2 "reg_or_0_operand" "rJ")
(const_int 0)])
(match_operand:SI 3 "const48_operand" "I")
(const_int 0))
(match_operand:SI 4 "reg_or_8bit_operand" "rI")))
(clobber (match_scratch:SI 5 "=r"))]
""
"#"
"! no_new_pseudos || reload_completed"
[(set (match_dup 5)
(match_op_dup:SI 1 [(match_dup 2) (const_int 0)]))
(set (match_dup 0)
(minus:SI (mult:SI (match_dup 5) (match_dup 3))
(match_dup 4)))]
"
{
if (! no_new_pseudos)
operands[5] = gen_reg_rtx (DImode);
else if (reg_overlap_mentioned_p (operands[5], operands[4]))
operands[5] = operands[0];
}")
(define_insn_and_split "*cmp_ssub_sidi"
[(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI
(minus:SI (if_then_else:SI
(match_operator 1 "alpha_zero_comparison_operator"
[(match_operand:DI 2 "reg_or_0_operand" "rJ")
(const_int 0)])
(match_operand:SI 3 "const48_operand" "I")
(const_int 0))
(match_operand:SI 4 "reg_or_8bit_operand" "rI"))))
(clobber (match_scratch:SI 5 "=r"))]
""
"#"
"! no_new_pseudos || reload_completed"
[(set (match_dup 5)
(match_op_dup:SI 1 [(match_dup 2) (const_int 0)]))
(set (match_dup 0)
(sign_extend:DI (minus:SI (mult:SI (match_dup 5) (match_dup 3))
(match_dup 4))))]
"
{
if (! no_new_pseudos)
operands[5] = gen_reg_rtx (DImode);
else if (reg_overlap_mentioned_p (operands[5], operands[4]))
operands[5] = operands[0];
}")
;; Here are the CALL and unconditional branch insns. Calls on NT and OSF
;; work differently, so we have different patterns for each.

View File

@ -169,6 +169,39 @@
extern UDItype __udiv_qrnnd PARAMS ((UDItype *, UDItype, UDItype, UDItype));
#define UDIV_TIME 220
#endif /* LONGLONG_STANDALONE */
#ifdef __alpha_cix__
#define count_leading_zeros(COUNT,X) \
__asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X))
#define count_trailing_zeros(COUNT,X) \
__asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X))
#define COUNT_LEADING_ZEROS_0 64
#else
extern const UQItype __clz_tab[];
#define count_leading_zeros(COUNT,X) \
do { \
UDItype __xr = (X), __t, __a; \
__asm__("cmpbge %1,%2,%0" : "=r"(__t) : "r"(~__xr), "r"(-1)); \
__a = __clz_tab[__t ^ 0xff] - 1; \
__asm__("extbl %1,%2,%0" : "=r"(__t) : "r"(__xr), "r"(__a)); \
(COUNT) = 64 - (__clz_tab[__t] + __a*8); \
} while (0)
#define count_trailing_zeros(COUNT,X) \
do { \
UDItype __xr = (X), __t, __a; \
__asm__("cmpbge %1,%2,%0" : "=r"(__t) : "r"(~__xr), "r"(-1)); \
__t = ~__t & -~__t; \
__a = ((__t & 0xCC) != 0) * 2; \
__a += ((__t & 0xF0) != 0) * 4; \
__a += ((__t & 0xAA) != 0); \
__asm__("extbl %1,%2,%0" : "=r"(__t) : "r"(__xr), "r"(__a)); \
__a <<= 3; \
__t &= -__t; \
__a += ((__t & 0xCC) != 0) * 2; \
__a += ((__t & 0xF0) != 0) * 4; \
__a += ((__t & 0xAA) != 0); \
(COUNT) = __a; \
} while (0)
#endif /* __alpha_cix__ */
#endif /* __alpha */
#if defined (__arc__) && W_TYPE_SIZE == 32