longlong.h [__alpha] (count_leading_zeros): New.

* longlong.h [__alpha] (count_leading_zeros): New. (count_trailing_zeros): New. (COUNT_LEADING_ZEROS_0): New. * config/alpha/alpha.c (alpha_zero_comparison_operator): New. (alpha_split_conditional_move): New. * config/alpha/alpha-protos.h: Prototype them. * config/alpha/alpha.h (PREDICATE_CODES): Update. (CPP_CPU_DEFAULT_SPEC): Fix typo for EV67. * config/alpha/alpha.md: Update ffs cix commentary. (*ze_and_ne): New. (*nabssf2, *nabsdf2): New. (*mov[qhsd]icc_internal): Use add_operand. (if_then_else constant splitters): New. (*cmp_sadd_di, *cmp_sadd_si, *cmp_sadd_sidi): New. (*cmp_ssub_di, *cmp_ssub_si, *cmp_ssub_sidi): New. From-SVN: r34250
2000-05-29 00:52:26 -07:00 · 2000-05-29 00:52:26 -07:00 · 8f4773eae7
parent e2c7a29e97
commit 8f4773eae7
6 changed files with 399 additions and 17 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,22 @@
+2000-05-29  Richard Henderson  <rth@cygnus.com>
+
+	* longlong.h [__alpha] (count_leading_zeros): New.
+	(count_trailing_zeros): New.
+	(COUNT_LEADING_ZEROS_0): New.
+
+	* config/alpha/alpha.c (alpha_zero_comparison_operator): New.
+	(alpha_split_conditional_move): New.
+	* config/alpha/alpha-protos.h: Prototype them.
+	* config/alpha/alpha.h (PREDICATE_CODES): Update.
+	(CPP_CPU_DEFAULT_SPEC): Fix typo for EV67.
+	* config/alpha/alpha.md: Update ffs cix commentary.
+	(*ze_and_ne): New.
+	(*nabssf2, *nabsdf2): New.
+	(*mov[qhsd]icc_internal): Use add_operand.
+	(if_then_else constant splitters): New.
+	(*cmp_sadd_di, *cmp_sadd_si, *cmp_sadd_sidi): New.
+	(*cmp_ssub_di, *cmp_ssub_si, *cmp_ssub_sidi): New.
+
 2000-05-29  Richard Henderson  <rth@cygnus.com>

 	* combine.c (force_to_mode) [MINUS]: Convert subtraction from
--- a/gcc/config/alpha/alpha-protos.h
+++ b/gcc/config/alpha/alpha-protos.h
@ -57,6 +57,7 @@ extern int input_operand PARAMS ((rtx, enum machine_mode));
 extern int current_file_function_operand PARAMS ((rtx, enum machine_mode));
 extern int call_operand PARAMS ((rtx, enum machine_mode));
 extern int alpha_comparison_operator PARAMS ((rtx, enum machine_mode));
+extern int alpha_zero_comparison_operator PARAMS ((rtx, enum machine_mode));
 extern int alpha_swapped_comparison_operator PARAMS ((rtx, enum machine_mode));
 extern int signed_comparison_operator PARAMS ((rtx, enum machine_mode));
 extern int alpha_fp_comparison_operator PARAMS ((rtx, enum machine_mode));
@ -83,6 +84,8 @@ extern rtx alpha_emit_set_long_const PARAMS ((rtx, HOST_WIDE_INT,
 extern void alpha_emit_floatuns PARAMS ((rtx[]));
 extern rtx alpha_emit_conditional_branch PARAMS ((enum rtx_code));
 extern rtx alpha_emit_conditional_move PARAMS ((rtx, enum machine_mode));
+extern int alpha_split_conditional_move PARAMS ((enum rtx_code, rtx, rtx,
+						 rtx, rtx));
 extern void alpha_emit_xfloating_arith PARAMS ((enum rtx_code, rtx[]));
 extern void alpha_emit_xfloating_cvt PARAMS ((enum rtx_code, rtx[]));
 extern void alpha_split_tfmode_pair PARAMS ((rtx[]));
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@ -735,6 +735,23 @@ alpha_comparison_operator (op, mode)
 	  || code == LEU || code == LTU);
 }

+/* Return 1 if OP is a valid Alpha comparison operator against zero. 
+   Here we know which comparisons are valid in which insn.  */
+
+int
+alpha_zero_comparison_operator (op, mode)
+     register rtx op;
+     enum machine_mode mode;
+{
+  enum rtx_code code = GET_CODE (op);
+
+  if (mode != GET_MODE (op) && mode != VOIDmode)
+    return 0;
+
+  return (code == EQ || code == NE || code == LE || code == LT
+	  || code == LEU || code == LTU);
+}
+
 /* Return 1 if OP is a valid Alpha swapped comparison operator.  */

 int
@ -1834,6 +1851,90 @@ alpha_emit_conditional_move (cmp, mode)
  emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_op_mode, op0, op1));
  return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_op_mode));
 }
+
+/* Simplify a conditional move of two constants into a setcc with
+   arithmetic.  This is done with a splitter since combine would
+   just undo the work if done during code generation.  It also catches
+   cases we wouldn't have before cse.  */
+
+int
+alpha_split_conditional_move (code, dest, cond, t_rtx, f_rtx)
+     enum rtx_code code;
+     rtx dest, cond, t_rtx, f_rtx;
+{
+  HOST_WIDE_INT t, f, diff;
+  enum machine_mode mode;
+  rtx target, subtarget, tmp;
+
+  mode = GET_MODE (dest);
+  t = INTVAL (t_rtx);
+  f = INTVAL (f_rtx);
+  diff = t - f;
+
+  if (((code == NE || code == EQ) && diff < 0)
+      || (code == GE || code == GT))
+    {
+      code = reverse_condition (code);
+      diff = t, t = f, f = diff;
+      diff = t - f;
+    }
+
+  subtarget = target = dest;
+  if (mode != DImode)
+    {
+      target = gen_lowpart (DImode, dest);
+      if (! no_new_pseudos)
+        subtarget = gen_reg_rtx (DImode);
+      else
+	subtarget = target;
+    }
+
+  if (f == 0 && exact_log2 (diff) > 0
+      /* On EV6, we've got enough shifters to make non-arithmatic shifts
+	 viable over a longer latency cmove.  On EV5, the E0 slot is a
+	 scarce resource, and on EV4 shift has the same latency as a cmove. */
+      && (diff <= 8 || alpha_cpu == PROCESSOR_EV6))
+    {
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
+
+      tmp = gen_rtx_ASHIFT (DImode, subtarget, GEN_INT (exact_log2 (t)));
+      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+    }
+  else if (f == 0 && t == -1)
+    {
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
+
+      emit_insn (gen_negdi2 (target, subtarget));
+    }
+  else if (diff == 1 || diff == 4 || diff == 8)
+    {
+      rtx add_op;
+
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
+
+      if (diff == 1)
+	emit_insn (gen_adddi3 (target, subtarget, GEN_INT (f)));
+      else
+	{
+	  add_op = GEN_INT (f);
+	  if (sext_add_operand (add_op, mode))
+	    {
+	      tmp = gen_rtx_MULT (DImode, subtarget, GEN_INT (diff));
+	      tmp = gen_rtx_PLUS (DImode, tmp, add_op);
+	      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+	    }
+	  else
+	    return 0;
+	}
+    }
+  else
+    return 0;
+
+  return 1;
+}

 /* Look up the function X_floating library function name for the
   given operation.  */
--- a/gcc/config/alpha/alpha.h
+++ b/gcc/config/alpha/alpha.h
@ -290,7 +290,7 @@ extern const char *alpha_mlat_string;	/* For -mmemory-latency= */

 #ifndef CPP_CPU_DEFAULT_SPEC
 # if TARGET_CPU_DEFAULT & MASK_CPU_EV6
-#  if TARGET_CPU_DEFAULT & MAX_CIX
+#  if TARGET_CPU_DEFAULT & MASK_CIX
 #    define CPP_CPU_DEFAULT_SPEC	CPP_CPU_EV67_SPEC
 #  else
 #    define CPP_CPU_DEFAULT_SPEC	CPP_CPU_EV6_SPEC
@ -2333,6 +2333,7 @@ do {									\
  {"mode_width_operand", {CONST_INT}},					\
  {"reg_or_fp0_operand", {SUBREG, REG, CONST_DOUBLE}},			\
  {"alpha_comparison_operator", {EQ, LE, LT, LEU, LTU}},		\
+  {"alpha_zero_comparison_operator", {EQ, NE, LE, LT, LEU, LTU}},	\
  {"alpha_swapped_comparison_operator", {EQ, GE, GT, GEU, GTU}},	\
  {"signed_comparison_operator", {EQ, NE, LE, LT, GE, GT}},		\
  {"alpha_fp_comparison_operator", {EQ, LE, LT, UNORDERED}},		\
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@ -1177,14 +1177,6 @@
  [(set_attr "type" "ilog")])

 ;; Handle the FFS insn iff we support CIX. 
-;;
-;; These didn't make it into EV6 pass 2 as planned.  Instead they
-;; cropped cttz/ctlz/ctpop from the old CIX and renamed it FIX for
-;; "Square Root and Floating Point Convert Extension".
-;;
-;; I'm assured that these insns will make it into EV67 (first pass
-;; due Summer 1999), presumably with a new AMASK bit, and presumably
-;; will still be named CIX.

 (define_expand "ffsdi2"
  [(set (match_dup 2)
@ -1756,6 +1748,22 @@
  ""
  "msk%M2h %1,%3,%0"
  [(set_attr "type" "shift")])
+
+;; Prefer AND + NE over LSHIFTRT + AND.
+
+(define_insn_and_split "*ze_and_ne"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "I")))]
+  "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8"
+  "#"
+  ""
+  [(set (match_dup 0)
+	(and:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0)
+	(ne:DI (match_dup 0) (const_int 0)))]
+  "operands[3] = GEN_INT (1 << INTVAL (operands[2]));")

 ;; Floating-point operations.  All the double-precision insns can extend
 ;; from single, so indicate that.  The exception are the ones that simply
@ -1768,6 +1776,13 @@
  "cpys $f31,%R1,%0"
  [(set_attr "type" "fcpys")])

+(define_insn "*nabssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (abs:SF (match_operand:SF 1 "reg_or_fp0_operand" "fG"))))]
+  "TARGET_FP"
+  "cpysn $f31,%R1,%0"
+  [(set_attr "type" "fadd")])
+
 (define_insn "absdf2"
  [(set (match_operand:DF 0 "register_operand" "=f")
 	(abs:DF (match_operand:DF 1 "reg_or_fp0_operand" "fG")))]
@ -1775,6 +1790,13 @@
  "cpys $f31,%R1,%0"
  [(set_attr "type" "fcpys")])

+(define_insn "*nabsdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (abs:DF (match_operand:DF 1 "reg_or_fp0_operand" "fG"))))]
+  "TARGET_FP"
+  "cpysn $f31,%R1,%0"
+  [(set_attr "type" "fadd")])
+
 (define_expand "abstf2"
  [(parallel [(set (match_operand:TF 0 "register_operand" "")
 		   (neg:TF (match_operand:TF 1 "reg_or_fp0_operand" "")))
@ -2532,6 +2554,10 @@

 ;; The mode folding trick can't be used with const_int operands, since
 ;; reload needs to know the proper mode.
+;;
+;; Use add_operand instead of the more seemingly natural reg_or_8bit_operand
+;; in order to create more pairs of constants.  As long as we're allowing
+;; two constants at the same time, and will have to reload one of them...

 (define_insn "*movqicc_internal"
  [(set (match_operand:QI 0 "register_operand" "=r,r,r,r")
@ -2539,8 +2565,8 @@
 	 (match_operator 2 "signed_comparison_operator"
 			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
 			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
-	 (match_operand:QI 1 "reg_or_8bit_operand" "rI,0,rI,0")
-	 (match_operand:QI 5 "reg_or_8bit_operand" "0,rI,0,rI")))]
+	 (match_operand:QI 1 "add_operand" "rI,0,rI,0")
+	 (match_operand:QI 5 "add_operand" "0,rI,0,rI")))]
  "(operands[3] == const0_rtx || operands[4] == const0_rtx)"
  "@
   cmov%C2 %r3,%1,%0
@ -2555,8 +2581,8 @@
 	 (match_operator 2 "signed_comparison_operator"
 			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
 			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
-	 (match_operand:HI 1 "reg_or_8bit_operand" "rI,0,rI,0")
-	 (match_operand:HI 5 "reg_or_8bit_operand" "0,rI,0,rI")))]
+	 (match_operand:HI 1 "add_operand" "rI,0,rI,0")
+	 (match_operand:HI 5 "add_operand" "0,rI,0,rI")))]
  "(operands[3] == const0_rtx || operands[4] == const0_rtx)"
  "@
   cmov%C2 %r3,%1,%0
@ -2571,8 +2597,8 @@
 	 (match_operator 2 "signed_comparison_operator"
 			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
 			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
-	 (match_operand:SI 1 "reg_or_8bit_operand" "rI,0,rI,0")
-	 (match_operand:SI 5 "reg_or_8bit_operand" "0,rI,0,rI")))]
+	 (match_operand:SI 1 "add_operand" "rI,0,rI,0")
+	 (match_operand:SI 5 "add_operand" "0,rI,0,rI")))]
  "(operands[3] == const0_rtx || operands[4] == const0_rtx)"
  "@
   cmov%C2 %r3,%1,%0
@ -2587,8 +2613,8 @@
 	 (match_operator 2 "signed_comparison_operator"
 			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
 			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
-	 (match_operand:DI 1 "reg_or_8bit_operand" "rI,0,rI,0")
-	 (match_operand:DI 5 "reg_or_8bit_operand" "0,rI,0,rI")))]
+	 (match_operand:DI 1 "add_operand" "rI,0,rI,0")
+	 (match_operand:DI 5 "add_operand" "0,rI,0,rI")))]
  "(operands[3] == const0_rtx || operands[4] == const0_rtx)"
  "@
   cmov%C2 %r3,%1,%0
@ -3824,6 +3850,205 @@
 				 ? NE : EQ),
 				DImode, operands[4], const0_rtx);
 }")
+
+;; Prefer to use cmp and arithmetic when possible instead of a cmove.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(if_then_else (match_operator 1 "signed_comparison_operator"
+			   [(match_operand:DI 2 "reg_or_0_operand" "")
+			    (const_int 0)])
+	  (match_operand 3 "const_int_operand" "")
+	  (match_operand 4 "const_int_operand" "")))]
+  ""
+  [(const_int 0)]
+  "
+{
+  if (alpha_split_conditional_move (GET_CODE (operands[1]), operands[0],
+				    operands[2], operands[3], operands[4]))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; ??? Why combine is allowed to create such non-canonical rtl, I don't know.
+;; Oh well, we match it in movcc, so it must be partially our fault.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(if_then_else (match_operator 1 "signed_comparison_operator"
+			   [(const_int 0)
+			    (match_operand:DI 2 "reg_or_0_operand" "")])
+	  (match_operand 3 "const_int_operand" "")
+	  (match_operand 4 "const_int_operand" "")))]
+  ""
+  [(const_int 0)]
+  "
+{
+  if (alpha_split_conditional_move (swap_condition (GET_CODE (operands[1])),
+				    operands[0], operands[2], operands[3],
+				    operands[4]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_insn_and_split "*cmp_sadd_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (if_then_else:DI
+		   (match_operator 1 "alpha_zero_comparison_operator"
+		     [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		      (const_int 0)])
+		   (match_operand:DI 3 "const48_operand" "I")
+		   (const_int 0))
+	         (match_operand:DI 4 "sext_add_operand" "rIO")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  "! no_new_pseudos || reload_completed"
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(plus:DI (mult:DI (match_dup 5) (match_dup 3))
+		 (match_dup 4)))]
+  "
+{
+  if (! no_new_pseudos)
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+}")
+
+(define_insn_and_split "*cmp_sadd_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (if_then_else:SI
+		   (match_operator 1 "alpha_zero_comparison_operator"
+		     [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		      (const_int 0)])
+		   (match_operand:SI 3 "const48_operand" "I")
+		   (const_int 0))
+	         (match_operand:SI 4 "sext_add_operand" "rIO")))
+   (clobber (match_scratch:SI 5 "=r"))]
+  ""
+  "#"
+  "! no_new_pseudos || reload_completed"
+  [(set (match_dup 5)
+	(match_op_dup:SI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(plus:SI (mult:SI (match_dup 5) (match_dup 3))
+		 (match_dup 4)))]
+  "
+{
+  if (! no_new_pseudos)
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+}")
+
+(define_insn_and_split "*cmp_sadd_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (plus:SI (if_then_else:SI
+		     (match_operator 1 "alpha_zero_comparison_operator"
+		       [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		        (const_int 0)])
+		     (match_operand:SI 3 "const48_operand" "I")
+		     (const_int 0))
+	           (match_operand:SI 4 "sext_add_operand" "rIO"))))
+   (clobber (match_scratch:SI 5 "=r"))]
+  ""
+  "#"
+  "! no_new_pseudos || reload_completed"
+  [(set (match_dup 5)
+	(match_op_dup:SI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(sign_extend:DI (plus:SI (mult:SI (match_dup 5) (match_dup 3))
+				 (match_dup 4))))]
+  "
+{
+  if (! no_new_pseudos)
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+}")
+
+(define_insn_and_split "*cmp_ssub_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (if_then_else:DI
+		    (match_operator 1 "alpha_zero_comparison_operator"
+		      [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		       (const_int 0)])
+		    (match_operand:DI 3 "const48_operand" "I")
+		    (const_int 0))
+	          (match_operand:DI 4 "reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  "! no_new_pseudos || reload_completed"
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(minus:DI (mult:DI (match_dup 5) (match_dup 3))
+		  (match_dup 4)))]
+  "
+{
+  if (! no_new_pseudos)
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+}")
+
+(define_insn_and_split "*cmp_ssub_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (if_then_else:SI
+		    (match_operator 1 "alpha_zero_comparison_operator"
+		      [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		       (const_int 0)])
+		    (match_operand:SI 3 "const48_operand" "I")
+		    (const_int 0))
+	          (match_operand:SI 4 "reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:SI 5 "=r"))]
+  ""
+  "#"
+  "! no_new_pseudos || reload_completed"
+  [(set (match_dup 5)
+	(match_op_dup:SI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(minus:SI (mult:SI (match_dup 5) (match_dup 3))
+		 (match_dup 4)))]
+  "
+{
+  if (! no_new_pseudos)
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+}")
+
+(define_insn_and_split "*cmp_ssub_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (minus:SI (if_then_else:SI
+		      (match_operator 1 "alpha_zero_comparison_operator"
+		        [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		         (const_int 0)])
+		      (match_operand:SI 3 "const48_operand" "I")
+		      (const_int 0))
+	            (match_operand:SI 4 "reg_or_8bit_operand" "rI"))))
+   (clobber (match_scratch:SI 5 "=r"))]
+  ""
+  "#"
+  "! no_new_pseudos || reload_completed"
+  [(set (match_dup 5)
+	(match_op_dup:SI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(sign_extend:DI (minus:SI (mult:SI (match_dup 5) (match_dup 3))
+				  (match_dup 4))))]
+  "
+{
+  if (! no_new_pseudos)
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+}")

 ;; Here are the CALL and unconditional branch insns.  Calls on NT and OSF
 ;; work differently, so we have different patterns for each.
--- a/gcc/longlong.h
+++ b/gcc/longlong.h
@ -169,6 +169,39 @@
 extern UDItype __udiv_qrnnd PARAMS ((UDItype *, UDItype, UDItype, UDItype));
 #define UDIV_TIME 220
 #endif /* LONGLONG_STANDALONE */
+#ifdef __alpha_cix__
+#define count_leading_zeros(COUNT,X) \
+  __asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X))
+#define count_trailing_zeros(COUNT,X) \
+  __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X))
+#define COUNT_LEADING_ZEROS_0 64
+#else
+extern const UQItype __clz_tab[];
+#define count_leading_zeros(COUNT,X) \
+  do {									\
+    UDItype __xr = (X), __t, __a;					\
+    __asm__("cmpbge %1,%2,%0" : "=r"(__t) : "r"(~__xr), "r"(-1));	\
+    __a = __clz_tab[__t ^ 0xff] - 1;					\
+    __asm__("extbl %1,%2,%0" : "=r"(__t) : "r"(__xr), "r"(__a));	\
+    (COUNT) = 64 - (__clz_tab[__t] + __a*8);				\
+  } while (0)
+#define count_trailing_zeros(COUNT,X) \
+  do {									\
+    UDItype __xr = (X), __t, __a;					\
+    __asm__("cmpbge %1,%2,%0" : "=r"(__t) : "r"(~__xr), "r"(-1));	\
+    __t = ~__t & -~__t;							\
+    __a = ((__t & 0xCC) != 0) * 2;					\
+    __a += ((__t & 0xF0) != 0) * 4;					\
+    __a += ((__t & 0xAA) != 0);						\
+    __asm__("extbl %1,%2,%0" : "=r"(__t) : "r"(__xr), "r"(__a));	\
+    __a <<= 3;								\
+    __t &= -__t;							\
+    __a += ((__t & 0xCC) != 0) * 2;					\
+    __a += ((__t & 0xF0) != 0) * 4;					\
+    __a += ((__t & 0xAA) != 0);						\
+    (COUNT) = __a;							\
+  } while (0)
+#endif /* __alpha_cix__ */
 #endif /* __alpha */

 #if defined (__arc__) && W_TYPE_SIZE == 32