diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d49783c626c..220fa68d8bb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2000-05-29 Richard Henderson + + * longlong.h [__alpha] (count_leading_zeros): New. + (count_trailing_zeros): New. + (COUNT_LEADING_ZEROS_0): New. + + * config/alpha/alpha.c (alpha_zero_comparison_operator): New. + (alpha_split_conditional_move): New. + * config/alpha/alpha-protos.h: Prototype them. + * config/alpha/alpha.h (PREDICATE_CODES): Update. + (CPP_CPU_DEFAULT_SPEC): Fix typo for EV67. + * config/alpha/alpha.md: Update ffs cix commentary. + (*ze_and_ne): New. + (*nabssf2, *nabsdf2): New. + (*mov[qhsd]icc_internal): Use add_operand. + (if_then_else constant splitters): New. + (*cmp_sadd_di, *cmp_sadd_si, *cmp_sadd_sidi): New. + (*cmp_ssub_di, *cmp_ssub_si, *cmp_ssub_sidi): New. + 2000-05-29 Richard Henderson * combine.c (force_to_mode) [MINUS]: Convert subtraction from diff --git a/gcc/config/alpha/alpha-protos.h b/gcc/config/alpha/alpha-protos.h index 842b5ae1ef4..9bdf142ad55 100644 --- a/gcc/config/alpha/alpha-protos.h +++ b/gcc/config/alpha/alpha-protos.h @@ -57,6 +57,7 @@ extern int input_operand PARAMS ((rtx, enum machine_mode)); extern int current_file_function_operand PARAMS ((rtx, enum machine_mode)); extern int call_operand PARAMS ((rtx, enum machine_mode)); extern int alpha_comparison_operator PARAMS ((rtx, enum machine_mode)); +extern int alpha_zero_comparison_operator PARAMS ((rtx, enum machine_mode)); extern int alpha_swapped_comparison_operator PARAMS ((rtx, enum machine_mode)); extern int signed_comparison_operator PARAMS ((rtx, enum machine_mode)); extern int alpha_fp_comparison_operator PARAMS ((rtx, enum machine_mode)); @@ -83,6 +84,8 @@ extern rtx alpha_emit_set_long_const PARAMS ((rtx, HOST_WIDE_INT, extern void alpha_emit_floatuns PARAMS ((rtx[])); extern rtx alpha_emit_conditional_branch PARAMS ((enum rtx_code)); extern rtx alpha_emit_conditional_move PARAMS ((rtx, enum machine_mode)); +extern int alpha_split_conditional_move PARAMS ((enum rtx_code, rtx, rtx, + rtx, rtx)); extern void alpha_emit_xfloating_arith PARAMS ((enum rtx_code, rtx[])); extern void alpha_emit_xfloating_cvt PARAMS ((enum rtx_code, rtx[])); extern void alpha_split_tfmode_pair PARAMS ((rtx[])); diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index f70e889bac8..68eedb071a8 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -735,6 +735,23 @@ alpha_comparison_operator (op, mode) || code == LEU || code == LTU); } +/* Return 1 if OP is a valid Alpha comparison operator against zero. + Here we know which comparisons are valid in which insn. */ + +int +alpha_zero_comparison_operator (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + + if (mode != GET_MODE (op) && mode != VOIDmode) + return 0; + + return (code == EQ || code == NE || code == LE || code == LT + || code == LEU || code == LTU); +} + /* Return 1 if OP is a valid Alpha swapped comparison operator. */ int @@ -1834,6 +1851,90 @@ alpha_emit_conditional_move (cmp, mode) emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_op_mode, op0, op1)); return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_op_mode)); } + +/* Simplify a conditional move of two constants into a setcc with + arithmetic. This is done with a splitter since combine would + just undo the work if done during code generation. It also catches + cases we wouldn't have before cse. */ + +int +alpha_split_conditional_move (code, dest, cond, t_rtx, f_rtx) + enum rtx_code code; + rtx dest, cond, t_rtx, f_rtx; +{ + HOST_WIDE_INT t, f, diff; + enum machine_mode mode; + rtx target, subtarget, tmp; + + mode = GET_MODE (dest); + t = INTVAL (t_rtx); + f = INTVAL (f_rtx); + diff = t - f; + + if (((code == NE || code == EQ) && diff < 0) + || (code == GE || code == GT)) + { + code = reverse_condition (code); + diff = t, t = f, f = diff; + diff = t - f; + } + + subtarget = target = dest; + if (mode != DImode) + { + target = gen_lowpart (DImode, dest); + if (! no_new_pseudos) + subtarget = gen_reg_rtx (DImode); + else + subtarget = target; + } + + if (f == 0 && exact_log2 (diff) > 0 + /* On EV6, we've got enough shifters to make non-arithmatic shifts + viable over a longer latency cmove. On EV5, the E0 slot is a + scarce resource, and on EV4 shift has the same latency as a cmove. */ + && (diff <= 8 || alpha_cpu == PROCESSOR_EV6)) + { + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp)); + + tmp = gen_rtx_ASHIFT (DImode, subtarget, GEN_INT (exact_log2 (t))); + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + } + else if (f == 0 && t == -1) + { + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp)); + + emit_insn (gen_negdi2 (target, subtarget)); + } + else if (diff == 1 || diff == 4 || diff == 8) + { + rtx add_op; + + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp)); + + if (diff == 1) + emit_insn (gen_adddi3 (target, subtarget, GEN_INT (f))); + else + { + add_op = GEN_INT (f); + if (sext_add_operand (add_op, mode)) + { + tmp = gen_rtx_MULT (DImode, subtarget, GEN_INT (diff)); + tmp = gen_rtx_PLUS (DImode, tmp, add_op); + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + } + else + return 0; + } + } + else + return 0; + + return 1; +} /* Look up the function X_floating library function name for the given operation. */ diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h index ad59214ff18..45ebbd83f3f 100644 --- a/gcc/config/alpha/alpha.h +++ b/gcc/config/alpha/alpha.h @@ -290,7 +290,7 @@ extern const char *alpha_mlat_string; /* For -mmemory-latency= */ #ifndef CPP_CPU_DEFAULT_SPEC # if TARGET_CPU_DEFAULT & MASK_CPU_EV6 -# if TARGET_CPU_DEFAULT & MAX_CIX +# if TARGET_CPU_DEFAULT & MASK_CIX # define CPP_CPU_DEFAULT_SPEC CPP_CPU_EV67_SPEC # else # define CPP_CPU_DEFAULT_SPEC CPP_CPU_EV6_SPEC @@ -2333,6 +2333,7 @@ do { \ {"mode_width_operand", {CONST_INT}}, \ {"reg_or_fp0_operand", {SUBREG, REG, CONST_DOUBLE}}, \ {"alpha_comparison_operator", {EQ, LE, LT, LEU, LTU}}, \ + {"alpha_zero_comparison_operator", {EQ, NE, LE, LT, LEU, LTU}}, \ {"alpha_swapped_comparison_operator", {EQ, GE, GT, GEU, GTU}}, \ {"signed_comparison_operator", {EQ, NE, LE, LT, GE, GT}}, \ {"alpha_fp_comparison_operator", {EQ, LE, LT, UNORDERED}}, \ diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index 0866be524a1..cc3c06c8aa1 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -1177,14 +1177,6 @@ [(set_attr "type" "ilog")]) ;; Handle the FFS insn iff we support CIX. -;; -;; These didn't make it into EV6 pass 2 as planned. Instead they -;; cropped cttz/ctlz/ctpop from the old CIX and renamed it FIX for -;; "Square Root and Floating Point Convert Extension". -;; -;; I'm assured that these insns will make it into EV67 (first pass -;; due Summer 1999), presumably with a new AMASK bit, and presumably -;; will still be named CIX. (define_expand "ffsdi2" [(set (match_dup 2) @@ -1756,6 +1748,22 @@ "" "msk%M2h %1,%3,%0" [(set_attr "type" "shift")]) + +;; Prefer AND + NE over LSHIFTRT + AND. + +(define_insn_and_split "*ze_and_ne" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 1) + (match_operand 2 "const_int_operand" "I")))] + "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8" + "#" + "" + [(set (match_dup 0) + (and:DI (match_dup 1) (match_dup 3))) + (set (match_dup 0) + (ne:DI (match_dup 0) (const_int 0)))] + "operands[3] = GEN_INT (1 << INTVAL (operands[2]));") ;; Floating-point operations. All the double-precision insns can extend ;; from single, so indicate that. The exception are the ones that simply @@ -1768,6 +1776,13 @@ "cpys $f31,%R1,%0" [(set_attr "type" "fcpys")]) +(define_insn "*nabssf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (abs:SF (match_operand:SF 1 "reg_or_fp0_operand" "fG"))))] + "TARGET_FP" + "cpysn $f31,%R1,%0" + [(set_attr "type" "fadd")]) + (define_insn "absdf2" [(set (match_operand:DF 0 "register_operand" "=f") (abs:DF (match_operand:DF 1 "reg_or_fp0_operand" "fG")))] @@ -1775,6 +1790,13 @@ "cpys $f31,%R1,%0" [(set_attr "type" "fcpys")]) +(define_insn "*nabsdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (abs:DF (match_operand:DF 1 "reg_or_fp0_operand" "fG"))))] + "TARGET_FP" + "cpysn $f31,%R1,%0" + [(set_attr "type" "fadd")]) + (define_expand "abstf2" [(parallel [(set (match_operand:TF 0 "register_operand" "") (neg:TF (match_operand:TF 1 "reg_or_fp0_operand" ""))) @@ -2532,6 +2554,10 @@ ;; The mode folding trick can't be used with const_int operands, since ;; reload needs to know the proper mode. +;; +;; Use add_operand instead of the more seemingly natural reg_or_8bit_operand +;; in order to create more pairs of constants. As long as we're allowing +;; two constants at the same time, and will have to reload one of them... (define_insn "*movqicc_internal" [(set (match_operand:QI 0 "register_operand" "=r,r,r,r") @@ -2539,8 +2565,8 @@ (match_operator 2 "signed_comparison_operator" [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J") (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")]) - (match_operand:QI 1 "reg_or_8bit_operand" "rI,0,rI,0") - (match_operand:QI 5 "reg_or_8bit_operand" "0,rI,0,rI")))] + (match_operand:QI 1 "add_operand" "rI,0,rI,0") + (match_operand:QI 5 "add_operand" "0,rI,0,rI")))] "(operands[3] == const0_rtx || operands[4] == const0_rtx)" "@ cmov%C2 %r3,%1,%0 @@ -2555,8 +2581,8 @@ (match_operator 2 "signed_comparison_operator" [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J") (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")]) - (match_operand:HI 1 "reg_or_8bit_operand" "rI,0,rI,0") - (match_operand:HI 5 "reg_or_8bit_operand" "0,rI,0,rI")))] + (match_operand:HI 1 "add_operand" "rI,0,rI,0") + (match_operand:HI 5 "add_operand" "0,rI,0,rI")))] "(operands[3] == const0_rtx || operands[4] == const0_rtx)" "@ cmov%C2 %r3,%1,%0 @@ -2571,8 +2597,8 @@ (match_operator 2 "signed_comparison_operator" [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J") (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")]) - (match_operand:SI 1 "reg_or_8bit_operand" "rI,0,rI,0") - (match_operand:SI 5 "reg_or_8bit_operand" "0,rI,0,rI")))] + (match_operand:SI 1 "add_operand" "rI,0,rI,0") + (match_operand:SI 5 "add_operand" "0,rI,0,rI")))] "(operands[3] == const0_rtx || operands[4] == const0_rtx)" "@ cmov%C2 %r3,%1,%0 @@ -2587,8 +2613,8 @@ (match_operator 2 "signed_comparison_operator" [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J") (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")]) - (match_operand:DI 1 "reg_or_8bit_operand" "rI,0,rI,0") - (match_operand:DI 5 "reg_or_8bit_operand" "0,rI,0,rI")))] + (match_operand:DI 1 "add_operand" "rI,0,rI,0") + (match_operand:DI 5 "add_operand" "0,rI,0,rI")))] "(operands[3] == const0_rtx || operands[4] == const0_rtx)" "@ cmov%C2 %r3,%1,%0 @@ -3824,6 +3850,205 @@ ? NE : EQ), DImode, operands[4], const0_rtx); }") + +;; Prefer to use cmp and arithmetic when possible instead of a cmove. + +(define_split + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "") + (const_int 0)]) + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")))] + "" + [(const_int 0)] + " +{ + if (alpha_split_conditional_move (GET_CODE (operands[1]), operands[0], + operands[2], operands[3], operands[4])) + DONE; + else + FAIL; +}") + +;; ??? Why combine is allowed to create such non-canonical rtl, I don't know. +;; Oh well, we match it in movcc, so it must be partially our fault. +(define_split + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "signed_comparison_operator" + [(const_int 0) + (match_operand:DI 2 "reg_or_0_operand" "")]) + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")))] + "" + [(const_int 0)] + " +{ + if (alpha_split_conditional_move (swap_condition (GET_CODE (operands[1])), + operands[0], operands[2], operands[3], + operands[4])) + DONE; + else + FAIL; +}") + +(define_insn_and_split "*cmp_sadd_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (if_then_else:DI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:DI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:DI 4 "sext_add_operand" "rIO"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "! no_new_pseudos || reload_completed" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (plus:DI (mult:DI (match_dup 5) (match_dup 3)) + (match_dup 4)))] + " +{ + if (! no_new_pseudos) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}") + +(define_insn_and_split "*cmp_sadd_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "sext_add_operand" "rIO"))) + (clobber (match_scratch:SI 5 "=r"))] + "" + "#" + "! no_new_pseudos || reload_completed" + [(set (match_dup 5) + (match_op_dup:SI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (plus:SI (mult:SI (match_dup 5) (match_dup 3)) + (match_dup 4)))] + " +{ + if (! no_new_pseudos) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}") + +(define_insn_and_split "*cmp_sadd_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (plus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "sext_add_operand" "rIO")))) + (clobber (match_scratch:SI 5 "=r"))] + "" + "#" + "! no_new_pseudos || reload_completed" + [(set (match_dup 5) + (match_op_dup:SI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (sign_extend:DI (plus:SI (mult:SI (match_dup 5) (match_dup 3)) + (match_dup 4))))] + " +{ + if (! no_new_pseudos) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}") + +(define_insn_and_split "*cmp_ssub_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (if_then_else:DI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:DI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:DI 4 "reg_or_8bit_operand" "rI"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "! no_new_pseudos || reload_completed" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (minus:DI (mult:DI (match_dup 5) (match_dup 3)) + (match_dup 4)))] + " +{ + if (! no_new_pseudos) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}") + +(define_insn_and_split "*cmp_ssub_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "reg_or_8bit_operand" "rI"))) + (clobber (match_scratch:SI 5 "=r"))] + "" + "#" + "! no_new_pseudos || reload_completed" + [(set (match_dup 5) + (match_op_dup:SI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (minus:SI (mult:SI (match_dup 5) (match_dup 3)) + (match_dup 4)))] + " +{ + if (! no_new_pseudos) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}") + +(define_insn_and_split "*cmp_ssub_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "reg_or_8bit_operand" "rI")))) + (clobber (match_scratch:SI 5 "=r"))] + "" + "#" + "! no_new_pseudos || reload_completed" + [(set (match_dup 5) + (match_op_dup:SI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (sign_extend:DI (minus:SI (mult:SI (match_dup 5) (match_dup 3)) + (match_dup 4))))] + " +{ + if (! no_new_pseudos) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}") ;; Here are the CALL and unconditional branch insns. Calls on NT and OSF ;; work differently, so we have different patterns for each. diff --git a/gcc/longlong.h b/gcc/longlong.h index aa9aea585e4..1c1456974cb 100644 --- a/gcc/longlong.h +++ b/gcc/longlong.h @@ -169,6 +169,39 @@ extern UDItype __udiv_qrnnd PARAMS ((UDItype *, UDItype, UDItype, UDItype)); #define UDIV_TIME 220 #endif /* LONGLONG_STANDALONE */ +#ifdef __alpha_cix__ +#define count_leading_zeros(COUNT,X) \ + __asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X)) +#define count_trailing_zeros(COUNT,X) \ + __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X)) +#define COUNT_LEADING_ZEROS_0 64 +#else +extern const UQItype __clz_tab[]; +#define count_leading_zeros(COUNT,X) \ + do { \ + UDItype __xr = (X), __t, __a; \ + __asm__("cmpbge %1,%2,%0" : "=r"(__t) : "r"(~__xr), "r"(-1)); \ + __a = __clz_tab[__t ^ 0xff] - 1; \ + __asm__("extbl %1,%2,%0" : "=r"(__t) : "r"(__xr), "r"(__a)); \ + (COUNT) = 64 - (__clz_tab[__t] + __a*8); \ + } while (0) +#define count_trailing_zeros(COUNT,X) \ + do { \ + UDItype __xr = (X), __t, __a; \ + __asm__("cmpbge %1,%2,%0" : "=r"(__t) : "r"(~__xr), "r"(-1)); \ + __t = ~__t & -~__t; \ + __a = ((__t & 0xCC) != 0) * 2; \ + __a += ((__t & 0xF0) != 0) * 4; \ + __a += ((__t & 0xAA) != 0); \ + __asm__("extbl %1,%2,%0" : "=r"(__t) : "r"(__xr), "r"(__a)); \ + __a <<= 3; \ + __t &= -__t; \ + __a += ((__t & 0xCC) != 0) * 2; \ + __a += ((__t & 0xF0) != 0) * 4; \ + __a += ((__t & 0xAA) != 0); \ + (COUNT) = __a; \ + } while (0) +#endif /* __alpha_cix__ */ #endif /* __alpha */ #if defined (__arc__) && W_TYPE_SIZE == 32