gcc/libgcc/libgcc2.c

/* More subroutines needed by GCC output code on some machines.  */
/* Compile this one with gcc.  */
/* Copyright (C) 1989-2022 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.

You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
<http://www.gnu.org/licenses/>.  */

#include "tconfig.h"
#include "tsystem.h"
#include "coretypes.h"
#include "tm.h"
#include "libgcc_tm.h"

#ifdef HAVE_GAS_HIDDEN
#define ATTRIBUTE_HIDDEN  __attribute__ ((__visibility__ ("hidden")))
#else
#define ATTRIBUTE_HIDDEN
#endif

/* Work out the largest "word" size that we can deal with on this target.  */
#if MIN_UNITS_PER_WORD > 4
# define LIBGCC2_MAX_UNITS_PER_WORD 8
#elif (MIN_UNITS_PER_WORD > 2 \
       || (MIN_UNITS_PER_WORD > 1 && __SIZEOF_LONG_LONG__ > 4))
# define LIBGCC2_MAX_UNITS_PER_WORD 4
#else
# define LIBGCC2_MAX_UNITS_PER_WORD MIN_UNITS_PER_WORD
#endif

/* Work out what word size we are using for this compilation.
   The value can be set on the command line.  */
#ifndef LIBGCC2_UNITS_PER_WORD
#define LIBGCC2_UNITS_PER_WORD LIBGCC2_MAX_UNITS_PER_WORD
#endif

#if LIBGCC2_UNITS_PER_WORD <= LIBGCC2_MAX_UNITS_PER_WORD

#include "libgcc2.h"

#ifdef DECLARE_LIBRARY_RENAMES
  DECLARE_LIBRARY_RENAMES
#endif

#if defined (L_negdi2)
DWtype
__negdi2 (DWtype u)
{
  const DWunion uu = {.ll = u};
  const DWunion w = { {.low = -uu.s.low,
		       .high = -uu.s.high - ((UWtype) -uu.s.low > 0) } };

  return w.ll;
}
#endif

#ifdef L_addvsi3
Wtype
__addvSI3 (Wtype a, Wtype b)
{
  Wtype w;

  if (__builtin_add_overflow (a, b, &w))
    abort ();

  return w;
}
#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
SItype
__addvsi3 (SItype a, SItype b)
{
  SItype w;

  if (__builtin_add_overflow (a, b, &w))
    abort ();

  return w;
}
#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
#endif

#ifdef L_addvdi3
DWtype
__addvDI3 (DWtype a, DWtype b)
{
  DWtype w;

  if (__builtin_add_overflow (a, b, &w))
    abort ();

  return w;
}
#endif

#ifdef L_subvsi3
Wtype
__subvSI3 (Wtype a, Wtype b)
{
  Wtype w;

  if (__builtin_sub_overflow (a, b, &w))
    abort ();

  return w;
}
#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
SItype
__subvsi3 (SItype a, SItype b)
{
  SItype w;

  if (__builtin_sub_overflow (a, b, &w))
    abort ();

  return w;
}
#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
#endif

#ifdef L_subvdi3
DWtype
__subvDI3 (DWtype a, DWtype b)
{
  DWtype w;

  if (__builtin_sub_overflow (a, b, &w))
    abort ();

  return w;
}
#endif

#ifdef L_mulvsi3
Wtype
__mulvSI3 (Wtype a, Wtype b)
{
  Wtype w;

  if (__builtin_mul_overflow (a, b, &w))
    abort ();

  return w;
}
#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
SItype
__mulvsi3 (SItype a, SItype b)
{
  SItype w;

  if (__builtin_mul_overflow (a, b, &w))
    abort ();

  return w;
}
#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
#endif

#ifdef L_negvsi2
Wtype
__negvSI2 (Wtype a)
{
  Wtype w;

  if (__builtin_sub_overflow (0, a, &w))
    abort ();

  return w;
}
#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
SItype
__negvsi2 (SItype a)
{
  SItype w;

  if (__builtin_sub_overflow (0, a, &w))
    abort ();

  return w;
}
#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
#endif

#ifdef L_negvdi2
DWtype
__negvDI2 (DWtype a)
{
  DWtype w;

  if (__builtin_sub_overflow (0, a, &w))
    abort ();

  return w;
}
#endif

#ifdef L_absvsi2
Wtype
__absvSI2 (Wtype a)
{
  const Wtype v = 0 - (a < 0);
  Wtype w;

  if (__builtin_add_overflow (a, v, &w))
    abort ();

  return v ^ w;
}
#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
SItype
__absvsi2 (SItype a)
{
  const SItype v = 0 - (a < 0);
  SItype w;

  if (__builtin_add_overflow (a, v, &w))
    abort ();

  return v ^ w;
}
#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
#endif

#ifdef L_absvdi2
DWtype
__absvDI2 (DWtype a)
{
  const DWtype v = 0 - (a < 0);
  DWtype w;

  if (__builtin_add_overflow (a, v, &w))
    abort ();

  return v ^ w;
}
#endif

#ifdef L_mulvdi3
DWtype
__mulvDI3 (DWtype u, DWtype v)
{
  /* The unchecked multiplication needs 3 Wtype x Wtype multiplications,
     but the checked multiplication needs only two.  */
  const DWunion uu = {.ll = u};
  const DWunion vv = {.ll = v};

  if (__builtin_expect (uu.s.high == uu.s.low >> (W_TYPE_SIZE - 1), 1))
    {
      /* u fits in a single Wtype.  */
      if (__builtin_expect (vv.s.high == vv.s.low >> (W_TYPE_SIZE - 1), 1))
	{
	  /* v fits in a single Wtype as well.  */
	  /* A single multiplication.  No overflow risk.  */
	  return (DWtype) uu.s.low * (DWtype) vv.s.low;
	}
      else
	{
	  /* Two multiplications.  */
	  DWunion w0 = {.ll = (UDWtype) (UWtype) uu.s.low
			* (UDWtype) (UWtype) vv.s.low};
	  DWunion w1 = {.ll = (UDWtype) (UWtype) uu.s.low
			* (UDWtype) (UWtype) vv.s.high};

	  if (vv.s.high < 0)
	    w1.s.high -= uu.s.low;
	  if (uu.s.low < 0)
	    w1.ll -= vv.ll;
	  w1.ll += (UWtype) w0.s.high;
	  if (__builtin_expect (w1.s.high == w1.s.low >> (W_TYPE_SIZE - 1), 1))
	    {
	      w0.s.high = w1.s.low;
	      return w0.ll;
	    }
	}
    }
  else
    {
      if (__builtin_expect (vv.s.high == vv.s.low >> (W_TYPE_SIZE - 1), 1))
	{
	  /* v fits into a single Wtype.  */
	  /* Two multiplications.  */
	  DWunion w0 = {.ll = (UDWtype) (UWtype) uu.s.low
			* (UDWtype) (UWtype) vv.s.low};
	  DWunion w1 = {.ll = (UDWtype) (UWtype) uu.s.high
			* (UDWtype) (UWtype) vv.s.low};

	  if (uu.s.high < 0)
	    w1.s.high -= vv.s.low;
	  if (vv.s.low < 0)
	    w1.ll -= uu.ll;
	  w1.ll += (UWtype) w0.s.high;
	  if (__builtin_expect (w1.s.high == w1.s.low >> (W_TYPE_SIZE - 1), 1))
	    {
	      w0.s.high = w1.s.low;
	      return w0.ll;
	    }
	}
      else
	{
	  /* A few sign checks and a single multiplication.  */
	  if (uu.s.high >= 0)
	    {
	      if (vv.s.high >= 0)
		{
		  if (uu.s.high == 0 && vv.s.high == 0)
		    {
		      const DWtype w = (UDWtype) (UWtype) uu.s.low
			* (UDWtype) (UWtype) vv.s.low;
		      if (__builtin_expect (w >= 0, 1))
			return w;
		    }
		}
	      else
		{
		  if (uu.s.high == 0 && vv.s.high == (Wtype) -1)
		    {
		      DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low
				    * (UDWtype) (UWtype) vv.s.low};

		      ww.s.high -= uu.s.low;
		      if (__builtin_expect (ww.s.high < 0, 1))
			return ww.ll;
		    }
		}
	    }
	  else
	    {
	      if (vv.s.high >= 0)
		{
		  if (uu.s.high == (Wtype) -1 && vv.s.high == 0)
		    {
		      DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low
				    * (UDWtype) (UWtype) vv.s.low};

		      ww.s.high -= vv.s.low;
		      if (__builtin_expect (ww.s.high < 0, 1))
			return ww.ll;
		    }
		}
	      else
		{
		  if ((uu.s.high & vv.s.high) == (Wtype) -1
		      && (uu.s.low | vv.s.low) != 0)
		    {
		      DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low
				    * (UDWtype) (UWtype) vv.s.low};

		      ww.s.high -= uu.s.low;
		      ww.s.high -= vv.s.low;
		      if (__builtin_expect (ww.s.high >= 0, 1))
			return ww.ll;
		    }
		}
	    }
	}
    }

  /* Overflow.  */
  abort ();
}
#endif


/* Unless shift functions are defined with full ANSI prototypes,
   parameter b will be promoted to int if shift_count_type is smaller than an int.  */
#ifdef L_lshrdi3
DWtype
__lshrdi3 (DWtype u, shift_count_type b)
{
  if (b == 0)
    return u;

  const DWunion uu = {.ll = u};
  const shift_count_type bm = W_TYPE_SIZE - b;
  DWunion w;

  if (bm <= 0)
    {
      w.s.high = 0;
      w.s.low = (UWtype) uu.s.high >> -bm;
    }
  else
    {
      const UWtype carries = (UWtype) uu.s.high << bm;

      w.s.high = (UWtype) uu.s.high >> b;
      w.s.low = ((UWtype) uu.s.low >> b) | carries;
    }

  return w.ll;
}
#endif

#ifdef L_ashldi3
DWtype
__ashldi3 (DWtype u, shift_count_type b)
{
  if (b == 0)
    return u;

  const DWunion uu = {.ll = u};
  const shift_count_type bm = W_TYPE_SIZE - b;
  DWunion w;

  if (bm <= 0)
    {
      w.s.low = 0;
      w.s.high = (UWtype) uu.s.low << -bm;
    }
  else
    {
      const UWtype carries = (UWtype) uu.s.low >> bm;

      w.s.low = (UWtype) uu.s.low << b;
      w.s.high = ((UWtype) uu.s.high << b) | carries;
    }

  return w.ll;
}
#endif

#ifdef L_ashrdi3
DWtype
__ashrdi3 (DWtype u, shift_count_type b)
{
  if (b == 0)
    return u;

  const DWunion uu = {.ll = u};
  const shift_count_type bm = W_TYPE_SIZE - b;
  DWunion w;

  if (bm <= 0)
    {
      /* w.s.high = 1..1 or 0..0 */
      w.s.high = uu.s.high >> (W_TYPE_SIZE - 1);
      w.s.low = uu.s.high >> -bm;
    }
  else
    {
      const UWtype carries = (UWtype) uu.s.high << bm;

      w.s.high = uu.s.high >> b;
      w.s.low = ((UWtype) uu.s.low >> b) | carries;
    }

  return w.ll;
}
#endif

#ifdef L_bswapsi2
SItype
__bswapsi2 (SItype u)
{
  return ((((u) & 0xff000000u) >> 24)
	  | (((u) & 0x00ff0000u) >>  8)
	  | (((u) & 0x0000ff00u) <<  8)
	  | (((u) & 0x000000ffu) << 24));
}
#endif
#ifdef L_bswapdi2
DItype
__bswapdi2 (DItype u)
{
  return ((((u) & 0xff00000000000000ull) >> 56)
	  | (((u) & 0x00ff000000000000ull) >> 40)
	  | (((u) & 0x0000ff0000000000ull) >> 24)
	  | (((u) & 0x000000ff00000000ull) >>  8)
	  | (((u) & 0x00000000ff000000ull) <<  8)
	  | (((u) & 0x0000000000ff0000ull) << 24)
	  | (((u) & 0x000000000000ff00ull) << 40)
	  | (((u) & 0x00000000000000ffull) << 56));
}
#endif
#ifdef L_ffssi2
#undef int
int
__ffsSI2 (UWtype u)
{
  UWtype count;

  if (u == 0)
    return 0;

  count_trailing_zeros (count, u);
  return count + 1;
}
#endif

#ifdef L_ffsdi2
#undef int
int
__ffsDI2 (DWtype u)
{
  const DWunion uu = {.ll = u};
  UWtype word, count, add;

  if (uu.s.low != 0)
    word = uu.s.low, add = 0;
  else if (uu.s.high != 0)
    word = uu.s.high, add = W_TYPE_SIZE;
  else
    return 0;

  count_trailing_zeros (count, word);
  return count + add + 1;
}
#endif

#ifdef L_muldi3
DWtype
__muldi3 (DWtype u, DWtype v)
{
  const DWunion uu = {.ll = u};
  const DWunion vv = {.ll = v};
  DWunion w = {.ll = __umulsidi3 (uu.s.low, vv.s.low)};

  w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
	       + (UWtype) uu.s.high * (UWtype) vv.s.low);

  return w.ll;
}
#endif

#if (defined (L_udivdi3) || defined (L_divdi3) || \
     defined (L_umoddi3) || defined (L_moddi3))
#if defined (sdiv_qrnnd)
#define L_udiv_w_sdiv
#endif
#endif

#ifdef L_udiv_w_sdiv
#if defined (sdiv_qrnnd)
#if (defined (L_udivdi3) || defined (L_divdi3) || \
     defined (L_umoddi3) || defined (L_moddi3))
static inline __attribute__ ((__always_inline__))
#endif
UWtype
__udiv_w_sdiv (UWtype *rp, UWtype a1, UWtype a0, UWtype d)
{
  UWtype q, r;
  UWtype c0, c1, b1;

  if ((Wtype) d >= 0)
    {
      if (a1 < d - a1 - (a0 >> (W_TYPE_SIZE - 1)))
	{
	  /* Dividend, divisor, and quotient are nonnegative.  */
	  sdiv_qrnnd (q, r, a1, a0, d);
	}
      else
	{
	  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d.  */
	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (W_TYPE_SIZE - 1));
	  /* Divide (c1*2^32 + c0) by d.  */
	  sdiv_qrnnd (q, r, c1, c0, d);
	  /* Add 2^31 to quotient.  */
	  q += (UWtype) 1 << (W_TYPE_SIZE - 1);
	}
    }
  else
    {
      b1 = d >> 1;			/* d/2, between 2^30 and 2^31 - 1 */
      c1 = a1 >> 1;			/* A/2 */
      c0 = (a1 << (W_TYPE_SIZE - 1)) + (a0 >> 1);

      if (a1 < b1)			/* A < 2^32*b1, so A/2 < 2^31*b1 */
	{
	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */

	  r = 2*r + (a0 & 1);		/* Remainder from A/(2*b1) */
	  if ((d & 1) != 0)
	    {
	      if (r >= q)
		r = r - q;
	      else if (q - r <= d)
		{
		  r = r - q + d;
		  q--;
		}
	      else
		{
		  r = r - q + 2*d;
		  q -= 2;
		}
	    }
	}
      else if (c1 < b1)			/* So 2^31 <= (A/2)/b1 < 2^32 */
	{
	  c1 = (b1 - 1) - c1;
	  c0 = ~c0;			/* logical NOT */

	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */

	  q = ~q;			/* (A/2)/b1 */
	  r = (b1 - 1) - r;

	  r = 2*r + (a0 & 1);		/* A/(2*b1) */

	  if ((d & 1) != 0)
	    {
	      if (r >= q)
		r = r - q;
	      else if (q - r <= d)
		{
		  r = r - q + d;
		  q--;
		}
	      else
		{
		  r = r - q + 2*d;
		  q -= 2;
		}
	    }
	}
      else				/* Implies c1 = b1 */
	{				/* Hence a1 = d - 1 = 2*b1 - 1 */
	  if (a0 >= -d)
	    {
	      q = -1;
	      r = a0 + d;
	    }
	  else
	    {
	      q = -2;
	      r = a0 + 2*d;
	    }
	}
    }

  *rp = r;
  return q;
}
#else
/* If sdiv_qrnnd doesn't exist, define dummy __udiv_w_sdiv.  */
UWtype
__udiv_w_sdiv (UWtype *rp __attribute__ ((__unused__)),
	       UWtype a1 __attribute__ ((__unused__)),
	       UWtype a0 __attribute__ ((__unused__)),
	       UWtype d __attribute__ ((__unused__)))
{
  return 0;
}
#endif
#endif

#if (defined (L_udivdi3) || defined (L_divdi3) || \
     defined (L_umoddi3) || defined (L_moddi3) || \
     defined (L_divmoddi4))
#define L_udivmoddi4
#endif

#ifdef L_clz
const UQItype __clz_tab[256] =
{
  0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
};
#endif

#ifdef L_clzsi2
#undef int
int
__clzSI2 (UWtype x)
{
  Wtype ret;

  count_leading_zeros (ret, x);

  return ret;
}
#endif

#ifdef L_clzdi2
#undef int
int
__clzDI2 (UDWtype x)
{
  const DWunion uu = {.ll = x};
  UWtype word;
  Wtype ret, add;

  if (uu.s.high)
    word = uu.s.high, add = 0;
  else
    word = uu.s.low, add = W_TYPE_SIZE;

  count_leading_zeros (ret, word);
  return ret + add;
}
#endif

#ifdef L_ctzsi2
#undef int
int
__ctzSI2 (UWtype x)
{
  Wtype ret;

  count_trailing_zeros (ret, x);

  return ret;
}
#endif

#ifdef L_ctzdi2
#undef int
int
__ctzDI2 (UDWtype x)
{
  const DWunion uu = {.ll = x};
  UWtype word;
  Wtype ret, add;

  if (uu.s.low)
    word = uu.s.low, add = 0;
  else
    word = uu.s.high, add = W_TYPE_SIZE;

  count_trailing_zeros (ret, word);
  return ret + add;
}
#endif

#ifdef L_clrsbsi2
#undef int
int
__clrsbSI2 (Wtype x)
{
  Wtype ret;

  if (x < 0)
    x = ~x;
  if (x == 0)
    return W_TYPE_SIZE - 1;
  count_leading_zeros (ret, x);
  return ret - 1;
}
#endif

#ifdef L_clrsbdi2
#undef int
int
__clrsbDI2 (DWtype x)
{
  const DWunion uu = {.ll = x};
  UWtype word;
  Wtype ret, add;

  if (uu.s.high == 0)
    word = uu.s.low, add = W_TYPE_SIZE;
  else if (uu.s.high == -1)
    word = ~uu.s.low, add = W_TYPE_SIZE;
  else if (uu.s.high >= 0)
    word = uu.s.high, add = 0;
  else
    word = ~uu.s.high, add = 0;

  if (word == 0)
    ret = W_TYPE_SIZE;
  else
    count_leading_zeros (ret, word);

  return ret + add - 1;
}
#endif

#ifdef L_popcount_tab
const UQItype __popcount_tab[256] =
{
    0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
    1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
    1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
    2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
    1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
    2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
    2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
    3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
};
#endif

#if defined(L_popcountsi2) || defined(L_popcountdi2)
#define POPCOUNTCST2(x) (((UWtype) x << __CHAR_BIT__) | x)
#define POPCOUNTCST4(x) (((UWtype) x << (2 * __CHAR_BIT__)) | x)
#define POPCOUNTCST8(x) (((UWtype) x << (4 * __CHAR_BIT__)) | x)
#if W_TYPE_SIZE == __CHAR_BIT__
#define POPCOUNTCST(x) x
#elif W_TYPE_SIZE == 2 * __CHAR_BIT__
#define POPCOUNTCST(x) POPCOUNTCST2 (x)
#elif W_TYPE_SIZE == 4 * __CHAR_BIT__
#define POPCOUNTCST(x) POPCOUNTCST4 (POPCOUNTCST2 (x))
#elif W_TYPE_SIZE == 8 * __CHAR_BIT__
#define POPCOUNTCST(x) POPCOUNTCST8 (POPCOUNTCST4 (POPCOUNTCST2 (x)))
#endif
#endif

#ifdef L_popcountsi2
#undef int
int
__popcountSI2 (UWtype x)
{
  /* Force table lookup on targets like AVR and RL78 which only
     pretend they have LIBGCC2_UNITS_PER_WORD 4, but actually
     have 1, and other small word targets.  */
#if __SIZEOF_INT__ > 2 && defined (POPCOUNTCST) && __CHAR_BIT__ == 8
  x = x - ((x >> 1) & POPCOUNTCST (0x55));
  x = (x & POPCOUNTCST (0x33)) + ((x >> 2) & POPCOUNTCST (0x33));
  x = (x + (x >> 4)) & POPCOUNTCST (0x0F);
  return (x * POPCOUNTCST (0x01)) >> (W_TYPE_SIZE - __CHAR_BIT__);
#else
  int i, ret = 0;

  for (i = 0; i < W_TYPE_SIZE; i += 8)
    ret += __popcount_tab[(x >> i) & 0xff];

  return ret;
#endif
}
#endif

#ifdef L_popcountdi2
#undef int
int
__popcountDI2 (UDWtype x)
{
  /* Force table lookup on targets like AVR and RL78 which only
     pretend they have LIBGCC2_UNITS_PER_WORD 4, but actually
     have 1, and other small word targets.  */
#if __SIZEOF_INT__ > 2 && defined (POPCOUNTCST) && __CHAR_BIT__ == 8
  const DWunion uu = {.ll = x};
  UWtype x1 = uu.s.low, x2 = uu.s.high;
  x1 = x1 - ((x1 >> 1) & POPCOUNTCST (0x55));
  x2 = x2 - ((x2 >> 1) & POPCOUNTCST (0x55));
  x1 = (x1 & POPCOUNTCST (0x33)) + ((x1 >> 2) & POPCOUNTCST (0x33));
  x2 = (x2 & POPCOUNTCST (0x33)) + ((x2 >> 2) & POPCOUNTCST (0x33));
  x1 = (x1 + (x1 >> 4)) & POPCOUNTCST (0x0F);
  x2 = (x2 + (x2 >> 4)) & POPCOUNTCST (0x0F);
  x1 += x2;
  return (x1 * POPCOUNTCST (0x01)) >> (W_TYPE_SIZE - __CHAR_BIT__);
#else
  int i, ret = 0;

  for (i = 0; i < 2*W_TYPE_SIZE; i += 8)
    ret += __popcount_tab[(x >> i) & 0xff];

  return ret;
#endif
}
#endif

#ifdef L_paritysi2
#undef int
int
__paritySI2 (UWtype x)
{
#if W_TYPE_SIZE > 64
# error "fill out the table"
#endif
#if W_TYPE_SIZE > 32
  x ^= x >> 32;
#endif
#if W_TYPE_SIZE > 16
  x ^= x >> 16;
#endif
  x ^= x >> 8;
  x ^= x >> 4;
  x &= 0xf;
  return (0x6996 >> x) & 1;
}
#endif

#ifdef L_paritydi2
#undef int
int
__parityDI2 (UDWtype x)
{
  const DWunion uu = {.ll = x};
  UWtype nx = uu.s.low ^ uu.s.high;

#if W_TYPE_SIZE > 64
# error "fill out the table"
#endif
#if W_TYPE_SIZE > 32
  nx ^= nx >> 32;
#endif
#if W_TYPE_SIZE > 16
  nx ^= nx >> 16;
#endif
  nx ^= nx >> 8;
  nx ^= nx >> 4;
  nx &= 0xf;
  return (0x6996 >> nx) & 1;
}
#endif

#ifdef L_udivmoddi4
#ifdef TARGET_HAS_NO_HW_DIVIDE

#if (defined (L_udivdi3) || defined (L_divdi3) || \
     defined (L_umoddi3) || defined (L_moddi3) || \
     defined (L_divmoddi4))
static inline __attribute__ ((__always_inline__))
#endif
UDWtype
__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
{
  UDWtype q = 0, r = n, y = d;
  UWtype lz1, lz2, i, k;

  /* Implements align divisor shift dividend method. This algorithm
     aligns the divisor under the dividend and then perform number of
     test-subtract iterations which shift the dividend left. Number of
     iterations is k + 1 where k is the number of bit positions the
     divisor must be shifted left to align it under the dividend.
     quotient bits can be saved in the rightmost positions of the dividend
     as it shifts left on each test-subtract iteration. */

  if (y <= r)
    {
      lz1 = __builtin_clzll (d);
      lz2 = __builtin_clzll (n);

      k = lz1 - lz2;
      y = (y << k);

      /* Dividend can exceed 2 ^ (width - 1) - 1 but still be less than the
	 aligned divisor. Normal iteration can drops the high order bit
	 of the dividend. Therefore, first test-subtract iteration is a
	 special case, saving its quotient bit in a separate location and
	 not shifting the dividend. */
      if (r >= y)
	{
	  r = r - y;
	  q =  (1ULL << k);
	}

      if (k > 0)
	{
	  y = y >> 1;

	  /* k additional iterations where k regular test subtract shift
	    dividend iterations are done.  */
	  i = k;
	  do
	    {
	      if (r >= y)
		r = ((r - y) << 1) + 1;
	      else
		r =  (r << 1);
	      i = i - 1;
	    } while (i != 0);

	  /* First quotient bit is combined with the quotient bits resulting
	     from the k regular iterations.  */
	  q = q + r;
	  r = r >> k;
	  q = q - (r << k);
	}
    }

  if (rp)
    *rp = r;
  return q;
}
#else

#if (defined (L_udivdi3) || defined (L_divdi3) || \
     defined (L_umoddi3) || defined (L_moddi3) || \
     defined (L_divmoddi4))
static inline __attribute__ ((__always_inline__))
#endif
UDWtype
__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
{
  const DWunion nn = {.ll = n};
  const DWunion dd = {.ll = d};
  DWunion rr;
  UWtype d0, d1, n0, n1, n2;
  UWtype q0, q1;
  UWtype b, bm;

  d0 = dd.s.low;
  d1 = dd.s.high;
  n0 = nn.s.low;
  n1 = nn.s.high;

#if !UDIV_NEEDS_NORMALIZATION
  if (d1 == 0)
    {
      if (d0 > n1)
	{
	  /* 0q = nn / 0D */

	  udiv_qrnnd (q0, n0, n1, n0, d0);
	  q1 = 0;

	  /* Remainder in n0.  */
	}
      else
	{
	  /* qq = NN / 0d */

	  if (d0 == 0)
	    d0 = 1 / d0;	/* Divide intentionally by zero.  */

	  udiv_qrnnd (q1, n1, 0, n1, d0);
	  udiv_qrnnd (q0, n0, n1, n0, d0);

	  /* Remainder in n0.  */
	}

      if (rp != 0)
	{
	  rr.s.low = n0;
	  rr.s.high = 0;
	  *rp = rr.ll;
	}
    }

#else /* UDIV_NEEDS_NORMALIZATION */

  if (d1 == 0)
    {
      if (d0 > n1)
	{
	  /* 0q = nn / 0D */

	  count_leading_zeros (bm, d0);

	  if (bm != 0)
	    {
	      /* Normalize, i.e. make the most significant bit of the
		 denominator set.  */

	      d0 = d0 << bm;
	      n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
	      n0 = n0 << bm;
	    }

	  udiv_qrnnd (q0, n0, n1, n0, d0);
	  q1 = 0;

	  /* Remainder in n0 >> bm.  */
	}
      else
	{
	  /* qq = NN / 0d */

	  if (d0 == 0)
	    d0 = 1 / d0;	/* Divide intentionally by zero.  */

	  count_leading_zeros (bm, d0);

	  if (bm == 0)
	    {
	      /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
		 conclude (the most significant bit of n1 is set) /\ (the
		 leading quotient digit q1 = 1).

		 This special case is necessary, not an optimization.
		 (Shifts counts of W_TYPE_SIZE are undefined.)  */

	      n1 -= d0;
	      q1 = 1;
	    }
	  else
	    {
	      /* Normalize.  */

	      b = W_TYPE_SIZE - bm;

	      d0 = d0 << bm;
	      n2 = n1 >> b;
	      n1 = (n1 << bm) | (n0 >> b);
	      n0 = n0 << bm;

	      udiv_qrnnd (q1, n1, n2, n1, d0);
	    }

	  /* n1 != d0...  */

	  udiv_qrnnd (q0, n0, n1, n0, d0);

	  /* Remainder in n0 >> bm.  */
	}

      if (rp != 0)
	{
	  rr.s.low = n0 >> bm;
	  rr.s.high = 0;
	  *rp = rr.ll;
	}
    }
#endif /* UDIV_NEEDS_NORMALIZATION */

  else
    {
      if (d1 > n1)
	{
	  /* 00 = nn / DD */

	  q0 = 0;
	  q1 = 0;

	  /* Remainder in n1n0.  */
	  if (rp != 0)
	    {
	      rr.s.low = n0;
	      rr.s.high = n1;
	      *rp = rr.ll;
	    }
	}
      else
	{
	  /* 0q = NN / dd */

	  count_leading_zeros (bm, d1);
	  if (bm == 0)
	    {
	      /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
		 conclude (the most significant bit of n1 is set) /\ (the
		 quotient digit q0 = 0 or 1).

		 This special case is necessary, not an optimization.  */

	      /* The condition on the next line takes advantage of that
		 n1 >= d1 (true due to program flow).  */
	      if (n1 > d1 || n0 >= d0)
		{
		  q0 = 1;
		  sub_ddmmss (n1, n0, n1, n0, d1, d0);
		}
	      else
		q0 = 0;

	      q1 = 0;

	      if (rp != 0)
		{
		  rr.s.low = n0;
		  rr.s.high = n1;
		  *rp = rr.ll;
		}
	    }
	  else
	    {
	      UWtype m1, m0;
	      /* Normalize.  */

	      b = W_TYPE_SIZE - bm;

	      d1 = (d1 << bm) | (d0 >> b);
	      d0 = d0 << bm;
	      n2 = n1 >> b;
	      n1 = (n1 << bm) | (n0 >> b);
	      n0 = n0 << bm;

	      udiv_qrnnd (q0, n1, n2, n1, d1);
	      umul_ppmm (m1, m0, q0, d0);

	      if (m1 > n1 || (m1 == n1 && m0 > n0))
		{
		  q0--;
		  sub_ddmmss (m1, m0, m1, m0, d1, d0);
		}

	      q1 = 0;

	      /* Remainder in (n1n0 - m1m0) >> bm.  */
	      if (rp != 0)
		{
		  sub_ddmmss (n1, n0, n1, n0, m1, m0);
		  rr.s.low = (n1 << b) | (n0 >> bm);
		  rr.s.high = n1 >> bm;
		  *rp = rr.ll;
		}
	    }
	}
    }

  const DWunion ww = {{.low = q0, .high = q1}};
  return ww.ll;
}
#endif
#endif

#ifdef L_divdi3
DWtype
__divdi3 (DWtype u, DWtype v)
{
  Wtype c = 0;
  DWunion uu = {.ll = u};
  DWunion vv = {.ll = v};
  DWtype w;

  if (uu.s.high < 0)
    c = ~c,
    uu.ll = -uu.ll;
  if (vv.s.high < 0)
    c = ~c,
    vv.ll = -vv.ll;

  w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) 0);
  if (c)
    w = -w;

  return w;
}
#endif

#ifdef L_moddi3
DWtype
__moddi3 (DWtype u, DWtype v)
{
  Wtype c = 0;
  DWunion uu = {.ll = u};
  DWunion vv = {.ll = v};
  DWtype w;

  if (uu.s.high < 0)
    c = ~c,
    uu.ll = -uu.ll;
  if (vv.s.high < 0)
    vv.ll = -vv.ll;

  (void) __udivmoddi4 (uu.ll, vv.ll, (UDWtype*)&w);
  if (c)
    w = -w;

  return w;
}
#endif

#ifdef L_divmoddi4
DWtype
__divmoddi4 (DWtype u, DWtype v, DWtype *rp)
{
  Wtype c1 = 0, c2 = 0;
  DWunion uu = {.ll = u};
  DWunion vv = {.ll = v};
  DWtype w;
  DWtype r;

  if (uu.s.high < 0)
    c1 = ~c1, c2 = ~c2,
    uu.ll = -uu.ll;
  if (vv.s.high < 0)
    c1 = ~c1,
    vv.ll = -vv.ll;

  w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype*)&r);
  if (c1)
    w = -w;
  if (c2)
    r = -r;

  *rp = r;
  return w;
}
#endif

#ifdef L_umoddi3
UDWtype
__umoddi3 (UDWtype u, UDWtype v)
{
  UDWtype w;

  (void) __udivmoddi4 (u, v, &w);

  return w;
}
#endif

#ifdef L_udivdi3
UDWtype
__udivdi3 (UDWtype n, UDWtype d)
{
  return __udivmoddi4 (n, d, (UDWtype *) 0);
}
#endif

#ifdef L_cmpdi2
cmp_return_type
__cmpdi2 (DWtype a, DWtype b)
{
  return (a > b) - (a < b) + 1;
}
#endif

#ifdef L_ucmpdi2
cmp_return_type
__ucmpdi2 (UDWtype a, UDWtype b)
{
  return (a > b) - (a < b) + 1;
}
#endif

#if defined(L_fixunstfdi) && LIBGCC2_HAS_TF_MODE
UDWtype
__fixunstfDI (TFtype a)
{
  if (a < 0)
    return 0;

  /* Compute high word of result, as a flonum.  */
  const TFtype b = (a / Wtype_MAXp1_F);
  /* Convert that to fixed (but not to DWtype!),
     and shift it into the high word.  */
  UDWtype v = (UWtype) b;
  v <<= W_TYPE_SIZE;
  /* Remove high part from the TFtype, leaving the low part as flonum.  */
  a -= (TFtype)v;
  /* Convert that to fixed (but not to DWtype!) and add it in.
     Sometimes A comes out negative.  This is significant, since
     A has more bits than a long int does.  */
  if (a < 0)
    v -= (UWtype) (- a);
  else
    v += (UWtype) a;
  return v;
}
#endif

#if defined(L_fixtfdi) && LIBGCC2_HAS_TF_MODE
DWtype
__fixtfdi (TFtype a)
{
  if (a < 0)
    return - __fixunstfDI (-a);
  return __fixunstfDI (a);
}
#endif

#if defined(L_fixunsxfdi) && LIBGCC2_HAS_XF_MODE
UDWtype
__fixunsxfDI (XFtype a)
{
  if (a < 0)
    return 0;

  /* Compute high word of result, as a flonum.  */
  const XFtype b = (a / Wtype_MAXp1_F);
  /* Convert that to fixed (but not to DWtype!),
     and shift it into the high word.  */
  UDWtype v = (UWtype) b;
  v <<= W_TYPE_SIZE;
  /* Remove high part from the XFtype, leaving the low part as flonum.  */
  a -= (XFtype)v;
  /* Convert that to fixed (but not to DWtype!) and add it in.
     Sometimes A comes out negative.  This is significant, since
     A has more bits than a long int does.  */
  if (a < 0)
    v -= (UWtype) (- a);
  else
    v += (UWtype) a;
  return v;
}
#endif

#if defined(L_fixxfdi) && LIBGCC2_HAS_XF_MODE
DWtype
__fixxfdi (XFtype a)
{
  if (a < 0)
    return - __fixunsxfDI (-a);
  return __fixunsxfDI (a);
}
#endif

#if defined(L_fixunsdfdi) && LIBGCC2_HAS_DF_MODE
UDWtype
__fixunsdfDI (DFtype a)
{
  /* Get high part of result.  The division here will just moves the radix
     point and will not cause any rounding.  Then the conversion to integral
     type chops result as desired.  */
  const UWtype hi = a / Wtype_MAXp1_F;

  /* Get low part of result.  Convert `hi' to floating type and scale it back,
     then subtract this from the number being converted.  This leaves the low
     part.  Convert that to integral type.  */
  const UWtype lo = a - (DFtype) hi * Wtype_MAXp1_F;

  /* Assemble result from the two parts.  */
  return ((UDWtype) hi << W_TYPE_SIZE) | lo;
}
#endif

#if defined(L_fixdfdi) && LIBGCC2_HAS_DF_MODE
DWtype
__fixdfdi (DFtype a)
{
  if (a < 0)
    return - __fixunsdfDI (-a);
  return __fixunsdfDI (a);
}
#endif

#if defined(L_fixunssfdi) && LIBGCC2_HAS_SF_MODE
UDWtype
__fixunssfDI (SFtype a)
{
#if LIBGCC2_HAS_DF_MODE
  /* Convert the SFtype to a DFtype, because that is surely not going
     to lose any bits.  Some day someone else can write a faster version
     that avoids converting to DFtype, and verify it really works right.  */
  const DFtype dfa = a;

  /* Get high part of result.  The division here will just moves the radix
     point and will not cause any rounding.  Then the conversion to integral
     type chops result as desired.  */
  const UWtype hi = dfa / Wtype_MAXp1_F;

  /* Get low part of result.  Convert `hi' to floating type and scale it back,
     then subtract this from the number being converted.  This leaves the low
     part.  Convert that to integral type.  */
  const UWtype lo = dfa - (DFtype) hi * Wtype_MAXp1_F;

  /* Assemble result from the two parts.  */
  return ((UDWtype) hi << W_TYPE_SIZE) | lo;
#elif FLT_MANT_DIG < W_TYPE_SIZE
  if (a < 1)
    return 0;
  if (a < Wtype_MAXp1_F)
    return (UWtype)a;
  if (a < Wtype_MAXp1_F * Wtype_MAXp1_F)
    {
      /* Since we know that there are fewer significant bits in the SFmode
	 quantity than in a word, we know that we can convert out all the
	 significant bits in one step, and thus avoid losing bits.  */

      /* ??? This following loop essentially performs frexpf.  If we could
	 use the real libm function, or poke at the actual bits of the fp
	 format, it would be significantly faster.  */

      UWtype shift = 0, counter;
      SFtype msb;

      a /= Wtype_MAXp1_F;
      for (counter = W_TYPE_SIZE / 2; counter != 0; counter >>= 1)
	{
	  SFtype counterf = (UWtype)1 << counter;
	  if (a >= counterf)
	    {
	      shift |= counter;
	      a /= counterf;
	    }
	}

      /* Rescale into the range of one word, extract the bits of that
	 one word, and shift the result into position.  */
      a *= Wtype_MAXp1_F;
      counter = a;
      return (DWtype)counter << shift;
    }
  return -1;
#else
# error
#endif
}
#endif

#if defined(L_fixsfdi) && LIBGCC2_HAS_SF_MODE
DWtype
__fixsfdi (SFtype a)
{
  if (a < 0)
    return - __fixunssfDI (-a);
  return __fixunssfDI (a);
}
#endif

#if defined(L_floatdixf) && LIBGCC2_HAS_XF_MODE
XFtype
__floatdixf (DWtype u)
{
#if W_TYPE_SIZE > __LIBGCC_XF_MANT_DIG__
# error
#endif
  XFtype d = (Wtype) (u >> W_TYPE_SIZE);
  d *= Wtype_MAXp1_F;
  d += (UWtype)u;
  return d;
}
#endif

#if defined(L_floatundixf) && LIBGCC2_HAS_XF_MODE
XFtype
__floatundixf (UDWtype u)
{
#if W_TYPE_SIZE > __LIBGCC_XF_MANT_DIG__
# error
#endif
  XFtype d = (UWtype) (u >> W_TYPE_SIZE);
  d *= Wtype_MAXp1_F;
  d += (UWtype)u;
  return d;
}
#endif

#if defined(L_floatditf) && LIBGCC2_HAS_TF_MODE
TFtype
__floatditf (DWtype u)
{
#if W_TYPE_SIZE > __LIBGCC_TF_MANT_DIG__
# error
#endif
  TFtype d = (Wtype) (u >> W_TYPE_SIZE);
  d *= Wtype_MAXp1_F;
  d += (UWtype)u;
  return d;
}
#endif

#if defined(L_floatunditf) && LIBGCC2_HAS_TF_MODE
TFtype
__floatunditf (UDWtype u)
{
#if W_TYPE_SIZE > __LIBGCC_TF_MANT_DIG__
# error
#endif
  TFtype d = (UWtype) (u >> W_TYPE_SIZE);
  d *= Wtype_MAXp1_F;
  d += (UWtype)u;
  return d;
}
#endif

#if (defined(L_floatdisf) && LIBGCC2_HAS_SF_MODE)	\
     || (defined(L_floatdidf) && LIBGCC2_HAS_DF_MODE)
#define DI_SIZE (W_TYPE_SIZE * 2)
#define F_MODE_OK(SIZE) \
  (SIZE < DI_SIZE							\
   && SIZE > (DI_SIZE - SIZE + FSSIZE)					\
   && !AVOID_FP_TYPE_CONVERSION(SIZE))
#if defined(L_floatdisf)
#define FUNC __floatdisf
#define FSTYPE SFtype
#define FSSIZE __LIBGCC_SF_MANT_DIG__
#else
#define FUNC __floatdidf
#define FSTYPE DFtype
#define FSSIZE __LIBGCC_DF_MANT_DIG__
#endif

FSTYPE
FUNC (DWtype u)
{
#if FSSIZE >= W_TYPE_SIZE
  /* When the word size is small, we never get any rounding error.  */
  FSTYPE f = (Wtype) (u >> W_TYPE_SIZE);
  f *= Wtype_MAXp1_F;
  f += (UWtype)u;
  return f;
#elif (LIBGCC2_HAS_DF_MODE && F_MODE_OK (__LIBGCC_DF_MANT_DIG__))	\
     || (LIBGCC2_HAS_XF_MODE && F_MODE_OK (__LIBGCC_XF_MANT_DIG__))	\
     || (LIBGCC2_HAS_TF_MODE && F_MODE_OK (__LIBGCC_TF_MANT_DIG__))

#if (LIBGCC2_HAS_DF_MODE && F_MODE_OK (__LIBGCC_DF_MANT_DIG__))
# define FSIZE __LIBGCC_DF_MANT_DIG__
# define FTYPE DFtype
#elif (LIBGCC2_HAS_XF_MODE && F_MODE_OK (__LIBGCC_XF_MANT_DIG__))
# define FSIZE __LIBGCC_XF_MANT_DIG__
# define FTYPE XFtype
#elif (LIBGCC2_HAS_TF_MODE && F_MODE_OK (__LIBGCC_TF_MANT_DIG__))
# define FSIZE __LIBGCC_TF_MANT_DIG__
# define FTYPE TFtype
#else
# error
#endif

#define REP_BIT ((UDWtype) 1 << (DI_SIZE - FSIZE))

  /* Protect against double-rounding error.
     Represent any low-order bits, that might be truncated by a bit that
     won't be lost.  The bit can go in anywhere below the rounding position
     of the FSTYPE.  A fixed mask and bit position handles all usual
     configurations.  */
  if (! (- ((DWtype) 1 << FSIZE) < u
	 && u < ((DWtype) 1 << FSIZE)))
    {
      if ((UDWtype) u & (REP_BIT - 1))
	{
	  u &= ~ (REP_BIT - 1);
	  u |= REP_BIT;
	}
    }

  /* Do the calculation in a wider type so that we don't lose any of
     the precision of the high word while multiplying it.  */
  FTYPE f = (Wtype) (u >> W_TYPE_SIZE);
  f *= Wtype_MAXp1_F;
  f += (UWtype)u;
  return (FSTYPE) f;
#else
#if FSSIZE >= W_TYPE_SIZE - 2
# error
#endif
  /* Finally, the word size is larger than the number of bits in the
     required FSTYPE, and we've got no suitable wider type.  The only
     way to avoid double rounding is to special case the
     extraction.  */

  /* If there are no high bits set, fall back to one conversion.  */
  if ((Wtype)u == u)
    return (FSTYPE)(Wtype)u;

  /* Otherwise, find the power of two.  */
  Wtype hi = u >> W_TYPE_SIZE;
  if (hi < 0)
    hi = -(UWtype) hi;

  UWtype count, shift;
#if !defined (COUNT_LEADING_ZEROS_0) || COUNT_LEADING_ZEROS_0 != W_TYPE_SIZE
  if (hi == 0)
    count = W_TYPE_SIZE;
  else
#endif
  count_leading_zeros (count, hi);

  /* No leading bits means u == minimum.  */
  if (count == 0)
    return Wtype_MAXp1_F * (FSTYPE) (hi | ((UWtype) u != 0));

  shift = 1 + W_TYPE_SIZE - count;

  /* Shift down the most significant bits.  */
  hi = u >> shift;

  /* If we lost any nonzero bits, set the lsb to ensure correct rounding.  */
  if ((UWtype)u << (W_TYPE_SIZE - shift))
    hi |= 1;

  /* Convert the one word of data, and rescale.  */
  FSTYPE f = hi, e;
  if (shift == W_TYPE_SIZE)
    e = Wtype_MAXp1_F;
  /* The following two cases could be merged if we knew that the target
     supported a native unsigned->float conversion.  More often, we only
     have a signed conversion, and have to add extra fixup code.  */
  else if (shift == W_TYPE_SIZE - 1)
    e = Wtype_MAXp1_F / 2;
  else
    e = (Wtype)1 << shift;
  return f * e;
#endif
}
#endif

#if (defined(L_floatundisf) && LIBGCC2_HAS_SF_MODE)	\
     || (defined(L_floatundidf) && LIBGCC2_HAS_DF_MODE)
#define DI_SIZE (W_TYPE_SIZE * 2)
#define F_MODE_OK(SIZE) \
  (SIZE < DI_SIZE							\
   && SIZE > (DI_SIZE - SIZE + FSSIZE)					\
   && !AVOID_FP_TYPE_CONVERSION(SIZE))
#if defined(L_floatundisf)
#define FUNC __floatundisf
#define FSTYPE SFtype
#define FSSIZE __LIBGCC_SF_MANT_DIG__
#else
#define FUNC __floatundidf
#define FSTYPE DFtype
#define FSSIZE __LIBGCC_DF_MANT_DIG__
#endif

FSTYPE
FUNC (UDWtype u)
{
#if FSSIZE >= W_TYPE_SIZE
  /* When the word size is small, we never get any rounding error.  */
  FSTYPE f = (UWtype) (u >> W_TYPE_SIZE);
  f *= Wtype_MAXp1_F;
  f += (UWtype)u;
  return f;
#elif (LIBGCC2_HAS_DF_MODE && F_MODE_OK (__LIBGCC_DF_MANT_DIG__))	\
     || (LIBGCC2_HAS_XF_MODE && F_MODE_OK (__LIBGCC_XF_MANT_DIG__))	\
     || (LIBGCC2_HAS_TF_MODE && F_MODE_OK (__LIBGCC_TF_MANT_DIG__))

#if (LIBGCC2_HAS_DF_MODE && F_MODE_OK (__LIBGCC_DF_MANT_DIG__))
# define FSIZE __LIBGCC_DF_MANT_DIG__
# define FTYPE DFtype
#elif (LIBGCC2_HAS_XF_MODE && F_MODE_OK (__LIBGCC_XF_MANT_DIG__))
# define FSIZE __LIBGCC_XF_MANT_DIG__
# define FTYPE XFtype
#elif (LIBGCC2_HAS_TF_MODE && F_MODE_OK (__LIBGCC_TF_MANT_DIG__))
# define FSIZE __LIBGCC_TF_MANT_DIG__
# define FTYPE TFtype
#else
# error
#endif

#define REP_BIT ((UDWtype) 1 << (DI_SIZE - FSIZE))

  /* Protect against double-rounding error.
     Represent any low-order bits, that might be truncated by a bit that
     won't be lost.  The bit can go in anywhere below the rounding position
     of the FSTYPE.  A fixed mask and bit position handles all usual
     configurations.  */
  if (u >= ((UDWtype) 1 << FSIZE))
    {
      if ((UDWtype) u & (REP_BIT - 1))
	{
	  u &= ~ (REP_BIT - 1);
	  u |= REP_BIT;
	}
    }

  /* Do the calculation in a wider type so that we don't lose any of
     the precision of the high word while multiplying it.  */
  FTYPE f = (UWtype) (u >> W_TYPE_SIZE);
  f *= Wtype_MAXp1_F;
  f += (UWtype)u;
  return (FSTYPE) f;
#else
#if FSSIZE == W_TYPE_SIZE - 1
# error
#endif
  /* Finally, the word size is larger than the number of bits in the
     required FSTYPE, and we've got no suitable wider type.  The only
     way to avoid double rounding is to special case the
     extraction.  */

  /* If there are no high bits set, fall back to one conversion.  */
  if ((UWtype)u == u)
    return (FSTYPE)(UWtype)u;

  /* Otherwise, find the power of two.  */
  UWtype hi = u >> W_TYPE_SIZE;

  UWtype count, shift;
  count_leading_zeros (count, hi);

  shift = W_TYPE_SIZE - count;

  /* Shift down the most significant bits.  */
  hi = u >> shift;

  /* If we lost any nonzero bits, set the lsb to ensure correct rounding.  */
  if ((UWtype)u << (W_TYPE_SIZE - shift))
    hi |= 1;

  /* Convert the one word of data, and rescale.  */
  FSTYPE f = hi, e;
  if (shift == W_TYPE_SIZE)
    e = Wtype_MAXp1_F;
  /* The following two cases could be merged if we knew that the target
     supported a native unsigned->float conversion.  More often, we only
     have a signed conversion, and have to add extra fixup code.  */
  else if (shift == W_TYPE_SIZE - 1)
    e = Wtype_MAXp1_F / 2;
  else
    e = (Wtype)1 << shift;
  return f * e;
#endif
}
#endif

#if defined(L_fixunsxfsi) && LIBGCC2_HAS_XF_MODE
UWtype
__fixunsxfSI (XFtype a)
{
  if (a >= - (DFtype) Wtype_MIN)
    return (Wtype) (a + Wtype_MIN) - Wtype_MIN;
  return (Wtype) a;
}
#endif

#if defined(L_fixunsdfsi) && LIBGCC2_HAS_DF_MODE
UWtype
__fixunsdfSI (DFtype a)
{
  if (a >= - (DFtype) Wtype_MIN)
    return (Wtype) (a + Wtype_MIN) - Wtype_MIN;
  return (Wtype) a;
}
#endif

#if defined(L_fixunssfsi) && LIBGCC2_HAS_SF_MODE
UWtype
__fixunssfSI (SFtype a)
{
  if (a >= - (SFtype) Wtype_MIN)
    return (Wtype) (a + Wtype_MIN) - Wtype_MIN;
  return (Wtype) a;
}
#endif

/* Integer power helper used from __builtin_powi for non-constant
   exponents.  */

#if (defined(L_powisf2) && LIBGCC2_HAS_SF_MODE) \
    || (defined(L_powidf2) && LIBGCC2_HAS_DF_MODE) \
    || (defined(L_powixf2) && LIBGCC2_HAS_XF_MODE) \
    || (defined(L_powitf2) && LIBGCC2_HAS_TF_MODE)
# if defined(L_powisf2)
#  define TYPE SFtype
#  define NAME __powisf2
# elif defined(L_powidf2)
#  define TYPE DFtype
#  define NAME __powidf2
# elif defined(L_powixf2)
#  define TYPE XFtype
#  define NAME __powixf2
# elif defined(L_powitf2)
#  define TYPE TFtype
#  define NAME __powitf2
# endif

#undef int
#undef unsigned
TYPE
NAME (TYPE x, int m)
{
  unsigned int n = m < 0 ? -(unsigned int) m : (unsigned int) m;
  TYPE y = n % 2 ? x : 1;
  while (n >>= 1)
    {
      x = x * x;
      if (n % 2)
	y = y * x;
    }
  return m < 0 ? 1/y : y;
}

#endif

#if((defined(L_mulhc3) || defined(L_divhc3)) && LIBGCC2_HAS_HF_MODE) \
    || ((defined(L_mulsc3) || defined(L_divsc3)) && LIBGCC2_HAS_SF_MODE) \
    || ((defined(L_muldc3) || defined(L_divdc3)) && LIBGCC2_HAS_DF_MODE) \
    || ((defined(L_mulxc3) || defined(L_divxc3)) && LIBGCC2_HAS_XF_MODE) \
    || ((defined(L_multc3) || defined(L_divtc3)) && LIBGCC2_HAS_TF_MODE)

#undef float
#undef double
#undef long

#if defined(L_mulhc3) || defined(L_divhc3)
# define MTYPE	HFtype
# define CTYPE	HCtype
# define AMTYPE SFtype
# define MODE	hc
# define CEXT	__LIBGCC_HF_FUNC_EXT__
# define NOTRUNC (!__LIBGCC_HF_EXCESS_PRECISION__)
#elif defined(L_mulsc3) || defined(L_divsc3)
# define MTYPE	SFtype
# define CTYPE	SCtype
# define AMTYPE DFtype
# define MODE	sc
# define CEXT	__LIBGCC_SF_FUNC_EXT__
# define NOTRUNC (!__LIBGCC_SF_EXCESS_PRECISION__)
# define RBIG	(__LIBGCC_SF_MAX__ / 2)
# define RMIN	(__LIBGCC_SF_MIN__)
# define RMIN2	(__LIBGCC_SF_EPSILON__)
# define RMINSCAL (1 / __LIBGCC_SF_EPSILON__)
# define RMAX2	(RBIG * RMIN2)
#elif defined(L_muldc3) || defined(L_divdc3)
# define MTYPE	DFtype
# define CTYPE	DCtype
# define MODE	dc
# define CEXT	__LIBGCC_DF_FUNC_EXT__
# define NOTRUNC (!__LIBGCC_DF_EXCESS_PRECISION__)
# define RBIG	(__LIBGCC_DF_MAX__ / 2)
# define RMIN	(__LIBGCC_DF_MIN__)
# define RMIN2	(__LIBGCC_DF_EPSILON__)
# define RMINSCAL (1 / __LIBGCC_DF_EPSILON__)
# define RMAX2  (RBIG * RMIN2)
#elif defined(L_mulxc3) || defined(L_divxc3)
# define MTYPE	XFtype
# define CTYPE	XCtype
# define MODE	xc
# define CEXT	__LIBGCC_XF_FUNC_EXT__
# define NOTRUNC (!__LIBGCC_XF_EXCESS_PRECISION__)
# define RBIG	(__LIBGCC_XF_MAX__ / 2)
# define RMIN	(__LIBGCC_XF_MIN__)
# define RMIN2	(__LIBGCC_XF_EPSILON__)
# define RMINSCAL (1 / __LIBGCC_XF_EPSILON__)
# define RMAX2	(RBIG * RMIN2)
#elif defined(L_multc3) || defined(L_divtc3)
# define MTYPE	TFtype
# define CTYPE	TCtype
# define MODE	tc
# define CEXT	__LIBGCC_TF_FUNC_EXT__
# define NOTRUNC (!__LIBGCC_TF_EXCESS_PRECISION__)
# if __LIBGCC_TF_MANT_DIG__ == 106
#  define RBIG	(__LIBGCC_DF_MAX__ / 2)
#  define RMIN	(__LIBGCC_DF_MIN__)
#  define RMIN2  (__LIBGCC_DF_EPSILON__)
#  define RMINSCAL (1 / __LIBGCC_DF_EPSILON__)
# else
#  define RBIG	(__LIBGCC_TF_MAX__ / 2)
#  define RMIN	(__LIBGCC_TF_MIN__)
#  define RMIN2	(__LIBGCC_TF_EPSILON__)
#  define RMINSCAL (1 / __LIBGCC_TF_EPSILON__)
# endif
# define RMAX2	(RBIG * RMIN2)
#else
# error
#endif

#define CONCAT3(A,B,C)	_CONCAT3(A,B,C)
#define _CONCAT3(A,B,C)	A##B##C

#define CONCAT2(A,B)	_CONCAT2(A,B)
#define _CONCAT2(A,B)	A##B

#define isnan(x)	__builtin_isnan (x)
#define isfinite(x)	__builtin_isfinite (x)
#define isinf(x)	__builtin_isinf (x)

#define INFINITY	CONCAT2(__builtin_huge_val, CEXT) ()
#define I		1i

/* Helpers to make the following code slightly less gross.  */
#define COPYSIGN	CONCAT2(__builtin_copysign, CEXT)
#define FABS		CONCAT2(__builtin_fabs, CEXT)

/* Verify that MTYPE matches up with CEXT.  */
extern void *compile_type_assert[sizeof(INFINITY) == sizeof(MTYPE) ? 1 : -1];

/* Ensure that we've lost any extra precision.  */
#if NOTRUNC
# define TRUNC(x)
#else
# define TRUNC(x)	__asm__ ("" : "=m"(x) : "m"(x))
#endif

#if defined(L_mulhc3) || defined(L_mulsc3) || defined(L_muldc3) \
    || defined(L_mulxc3) || defined(L_multc3)

CTYPE
CONCAT3(__mul,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
{
  MTYPE ac, bd, ad, bc, x, y;
  CTYPE res;

  ac = a * c;
  bd = b * d;
  ad = a * d;
  bc = b * c;

  TRUNC (ac);
  TRUNC (bd);
  TRUNC (ad);
  TRUNC (bc);

  x = ac - bd;
  y = ad + bc;

  if (isnan (x) && isnan (y))
    {
      /* Recover infinities that computed as NaN + iNaN.  */
      _Bool recalc = 0;
      if (isinf (a) || isinf (b))
	{
	  /* z is infinite.  "Box" the infinity and change NaNs in
	     the other factor to 0.  */
	  a = COPYSIGN (isinf (a) ? 1 : 0, a);
	  b = COPYSIGN (isinf (b) ? 1 : 0, b);
	  if (isnan (c)) c = COPYSIGN (0, c);
	  if (isnan (d)) d = COPYSIGN (0, d);
          recalc = 1;
	}
     if (isinf (c) || isinf (d))
	{
	  /* w is infinite.  "Box" the infinity and change NaNs in
	     the other factor to 0.  */
	  c = COPYSIGN (isinf (c) ? 1 : 0, c);
	  d = COPYSIGN (isinf (d) ? 1 : 0, d);
	  if (isnan (a)) a = COPYSIGN (0, a);
	  if (isnan (b)) b = COPYSIGN (0, b);
	  recalc = 1;
	}
     if (!recalc
	  && (isinf (ac) || isinf (bd)
	      || isinf (ad) || isinf (bc)))
	{
	  /* Recover infinities from overflow by changing NaNs to 0.  */
	  if (isnan (a)) a = COPYSIGN (0, a);
	  if (isnan (b)) b = COPYSIGN (0, b);
	  if (isnan (c)) c = COPYSIGN (0, c);
	  if (isnan (d)) d = COPYSIGN (0, d);
	  recalc = 1;
	}
      if (recalc)
	{
	  x = INFINITY * (a * c - b * d);
	  y = INFINITY * (a * d + b * c);
	}
    }

  __real__ res = x;
  __imag__ res = y;
  return res;
}
#endif /* complex multiply */

#if defined(L_divhc3) || defined(L_divsc3) || defined(L_divdc3) \
    || defined(L_divxc3) || defined(L_divtc3)

CTYPE
CONCAT3(__div,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
{
#if defined(L_divhc3)						\
  || (defined(L_divsc3) && defined(__LIBGCC_HAVE_HWDBL__) )

  /* Half precision is handled with float precision.
     float is handled with double precision when double precision
     hardware is available.
     Due to the additional precision, the simple complex divide
     method (without Smith's method) is sufficient to get accurate
     answers and runs slightly faster than Smith's method.  */

  AMTYPE aa, bb, cc, dd;
  AMTYPE denom;
  MTYPE x, y;
  CTYPE res;
  aa = a;
  bb = b;
  cc = c;
  dd = d;

  denom = (cc * cc) + (dd * dd);
  x = ((aa * cc) + (bb * dd)) / denom;
  y = ((bb * cc) - (aa * dd)) / denom;

#else
  MTYPE denom, ratio, x, y;
  CTYPE res;

  /* double, extended, long double have significant potential
     underflow/overflow errors that can be greatly reduced with
     a limited number of tests and adjustments.  float is handled
     the same way when no HW double is available.
  */

  /* Scale by max(c,d) to reduce chances of denominator overflowing.  */
  if (FABS (c) < FABS (d))
    {
      /* Prevent underflow when denominator is near max representable.  */
      if (FABS (d) >= RBIG)
	{
	  a = a / 2;
	  b = b / 2;
	  c = c / 2;
	  d = d / 2;
	}
      /* Avoid overflow/underflow issues when c and d are small.
	 Scaling up helps avoid some underflows.
	 No new overflow possible since c&d < RMIN2.  */
      if (FABS (d) < RMIN2)
	{
	  a = a * RMINSCAL;
	  b = b * RMINSCAL;
	  c = c * RMINSCAL;
	  d = d * RMINSCAL;
	}
      else
	{
	  if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2))
	      || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
		  && (FABS (d) < RMAX2)))
	    {
	      a = a * RMINSCAL;
	      b = b * RMINSCAL;
	      c = c * RMINSCAL;
	      d = d * RMINSCAL;
	    }
	}
      ratio = c / d;
      denom = (c * ratio) + d;
      /* Choose alternate order of computation if ratio is subnormal.  */
      if (FABS (ratio) > RMIN)
	{
	  x = ((a * ratio) + b) / denom;
	  y = ((b * ratio) - a) / denom;
	}
      else
	{
	  x = ((c * (a / d)) + b) / denom;
	  y = ((c * (b / d)) - a) / denom;
	}
    }
  else
    {
      /* Prevent underflow when denominator is near max representable.  */
      if (FABS (c) >= RBIG)
	{
	  a = a / 2;
	  b = b / 2;
	  c = c / 2;
	  d = d / 2;
	}
      /* Avoid overflow/underflow issues when both c and d are small.
	 Scaling up helps avoid some underflows.
	 No new overflow possible since both c&d are less than RMIN2.  */
      if (FABS (c) < RMIN2)
	{
	  a = a * RMINSCAL;
	  b = b * RMINSCAL;
	  c = c * RMINSCAL;
	  d = d * RMINSCAL;
	}
      else
	{
	  if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2))
	      || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
		  && (FABS (c) < RMAX2)))
	    {
	      a = a * RMINSCAL;
	      b = b * RMINSCAL;
	      c = c * RMINSCAL;
	      d = d * RMINSCAL;
	    }
	}
      ratio = d / c;
      denom = (d * ratio) + c;
      /* Choose alternate order of computation if ratio is subnormal.  */
      if (FABS (ratio) > RMIN)
	{
	  x = ((b * ratio) + a) / denom;
	  y = (b - (a * ratio)) / denom;
	}
      else
	{
	  x = (a + (d * (b / c))) / denom;
	  y = (b - (d * (a / c))) / denom;
	}
    }
#endif

  /* Recover infinities and zeros that computed as NaN+iNaN; the only
     cases are nonzero/zero, infinite/finite, and finite/infinite.  */
  if (isnan (x) && isnan (y))
    {
      if (c == 0.0 && d == 0.0 && (!isnan (a) || !isnan (b)))
	{
	  x = COPYSIGN (INFINITY, c) * a;
	  y = COPYSIGN (INFINITY, c) * b;
	}
      else if ((isinf (a) || isinf (b)) && isfinite (c) && isfinite (d))
	{
	  a = COPYSIGN (isinf (a) ? 1 : 0, a);
	  b = COPYSIGN (isinf (b) ? 1 : 0, b);
	  x = INFINITY * (a * c + b * d);
	  y = INFINITY * (b * c - a * d);
	}
      else if ((isinf (c) || isinf (d)) && isfinite (a) && isfinite (b))
	{
	  c = COPYSIGN (isinf (c) ? 1 : 0, c);
	  d = COPYSIGN (isinf (d) ? 1 : 0, d);
	  x = 0.0 * (a * c + b * d);
	  y = 0.0 * (b * c - a * d);
	}
    }

  __real__ res = x;
  __imag__ res = y;
  return res;
}
#endif /* complex divide */

#endif /* all complex float routines */

/* From here on down, the routines use normal data types.  */

#define SItype bogus_type
#define USItype bogus_type
#define DItype bogus_type
#define UDItype bogus_type
#define SFtype bogus_type
#define DFtype bogus_type
#undef Wtype
#undef UWtype
#undef HWtype
#undef UHWtype
#undef DWtype
#undef UDWtype

#undef char
#undef short
#undef int
#undef long
#undef unsigned
#undef float
#undef double

#ifdef L__gcc_bcmp

/* Like bcmp except the sign is meaningful.
   Result is negative if S1 is less than S2,
   positive if S1 is greater, 0 if S1 and S2 are equal.  */

int
__gcc_bcmp (const unsigned char *s1, const unsigned char *s2, size_t size)
{
  while (size > 0)
    {
      const unsigned char c1 = *s1++, c2 = *s2++;
      if (c1 != c2)
	return c1 - c2;
      size--;
    }
  return 0;
}

#endif

/* __eprintf used to be used by GCC's private version of <assert.h>.
   We no longer provide that header, but this routine remains in libgcc.a
   for binary backward compatibility.  Note that it is not included in
   the shared version of libgcc.  */
#ifdef L_eprintf
#ifndef inhibit_libc

#undef NULL /* Avoid errors if stdio.h and our stddef.h mismatch.  */
#include <stdio.h>

void
__eprintf (const char *string, const char *expression,
	   unsigned int line, const char *filename)
{
  fprintf (stderr, string, expression, line, filename);
  fflush (stderr);
  abort ();
}

#endif
#endif


#ifdef L_clear_cache
/* Clear part of an instruction cache.  */

void
__clear_cache (void *beg __attribute__((__unused__)),
	       void *end __attribute__((__unused__)))
{
#ifdef CLEAR_INSN_CACHE
  /* Cast the void* pointers to char* as some implementations
     of the macro assume the pointers can be subtracted from
     one another.  */
  CLEAR_INSN_CACHE ((char *) beg, (char *) end);
#endif /* CLEAR_INSN_CACHE */
}

#endif /* L_clear_cache */

#ifdef L_trampoline

/* Jump to a trampoline, loading the static chain address.  */

#if defined(WINNT) && ! defined(__CYGWIN__)
#include <windows.h>
int getpagesize (void);
int mprotect (char *,int, int);

int
getpagesize (void)
{
#ifdef _ALPHA_
  return 8192;
#else
  return 4096;
#endif
}

int
mprotect (char *addr, int len, int prot)
{
  DWORD np, op;

  if (prot == 7)
    np = 0x40;
  else if (prot == 5)
    np = 0x20;
  else if (prot == 4)
    np = 0x10;
  else if (prot == 3)
    np = 0x04;
  else if (prot == 1)
    np = 0x02;
  else if (prot == 0)
    np = 0x01;
  else
    return -1;

  if (VirtualProtect (addr, len, np, &op))
    return 0;
  else
    return -1;
}

#endif /* WINNT && ! __CYGWIN__ */

#ifdef TRANSFER_FROM_TRAMPOLINE
TRANSFER_FROM_TRAMPOLINE
#endif
#endif /* L_trampoline */

#ifndef __CYGWIN__
#ifdef L__main

#include "gbl-ctors.h"

/* Some systems use __main in a way incompatible with its use in gcc, in these
   cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
   give the same symbol without quotes for an alternative entry point.  You
   must define both, or neither.  */
#ifndef NAME__MAIN
#define NAME__MAIN "__main"
#define SYMBOL__MAIN __main
#endif

#if defined (__LIBGCC_INIT_SECTION_ASM_OP__) \
    || defined (__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__)
#undef HAS_INIT_SECTION
#define HAS_INIT_SECTION
#endif

#if !defined (HAS_INIT_SECTION) || !defined (OBJECT_FORMAT_ELF)

/* Some ELF crosses use crtstuff.c to provide __CTOR_LIST__, but use this
   code to run constructors.  In that case, we need to handle EH here, too.
   But MINGW32 is special because it handles CRTSTUFF and EH on its own.  */

#ifdef __MINGW32__
#undef __LIBGCC_EH_FRAME_SECTION_NAME__
#endif

#ifdef __LIBGCC_EH_FRAME_SECTION_NAME__
#include "unwind-dw2-fde.h"
extern unsigned char __EH_FRAME_BEGIN__[];
#endif

/* Run all the global destructors on exit from the program.  */

void
__do_global_dtors (void)
{
#ifdef DO_GLOBAL_DTORS_BODY
  DO_GLOBAL_DTORS_BODY;
#else
  static func_ptr *p = __DTOR_LIST__ + 1;
  while (*p)
    {
      p++;
      (*(p-1)) ();
    }
#endif
#if defined (__LIBGCC_EH_FRAME_SECTION_NAME__) && !defined (HAS_INIT_SECTION)
  {
    static int completed = 0;
    if (! completed)
      {
	completed = 1;
	__deregister_frame_info (__EH_FRAME_BEGIN__);
      }
  }
#endif
}
#endif

#ifndef HAS_INIT_SECTION
/* Run all the global constructors on entry to the program.  */

void
__do_global_ctors (void)
{
#ifdef __LIBGCC_EH_FRAME_SECTION_NAME__
  {
    static struct object object;
    __register_frame_info (__EH_FRAME_BEGIN__, &object);
  }
#endif
  DO_GLOBAL_CTORS_BODY;
  atexit (__do_global_dtors);
}
#endif /* no HAS_INIT_SECTION */

#if !defined (HAS_INIT_SECTION) || defined (INVOKE__main)
/* Subroutine called automatically by `main'.
   Compiling a global function named `main'
   produces an automatic call to this function at the beginning.

   For many systems, this routine calls __do_global_ctors.
   For systems which support a .init section we use the .init section
   to run __do_global_ctors, so we need not do anything here.  */

extern void SYMBOL__MAIN (void);
void
SYMBOL__MAIN (void)
{
  /* Support recursive calls to `main': run initializers just once.  */
  static int initialized;
  if (! initialized)
    {
      initialized = 1;
      __do_global_ctors ();
    }
}
#endif /* no HAS_INIT_SECTION or INVOKE__main */

#endif /* L__main */
#endif /* __CYGWIN__ */

#ifdef L_ctors

#include "gbl-ctors.h"

/* Provide default definitions for the lists of constructors and
   destructors, so that we don't get linker errors.  These symbols are
   intentionally bss symbols, so that gld and/or collect will provide
   the right values.  */

/* We declare the lists here with two elements each,
   so that they are valid empty lists if no other definition is loaded.

   If we are using the old "set" extensions to have the gnu linker
   collect ctors and dtors, then we __CTOR_LIST__ and __DTOR_LIST__
   must be in the bss/common section.

   Long term no port should use those extensions.  But many still do.  */
#if !defined(__LIBGCC_INIT_SECTION_ASM_OP__)
#if defined (TARGET_ASM_CONSTRUCTOR) || defined (USE_COLLECT2)
func_ptr __CTOR_LIST__[2] = {0, 0};
func_ptr __DTOR_LIST__[2] = {0, 0};
#else
func_ptr __CTOR_LIST__[2];
func_ptr __DTOR_LIST__[2];
#endif
#endif /* no __LIBGCC_INIT_SECTION_ASM_OP__ */
#endif /* L_ctors */
#endif /* LIBGCC2_UNITS_PER_WORD <= MIN_UNITS_PER_WORD */
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								/* More subroutines needed by GCC output code on some machines.  */
 								/* Compile this one with gcc.  */
-												Update copyright years.

											
										
										
											2022-01-03 10:42:10 +01:00
+								/* Copyright (C) 1989-2022 Free Software Foundation, Inc.
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												Makefile.in, [...]: replace "GNU CC" with "GCC".

	* Makefile.in, alias.c, basic-block.h, bb-reorder.c, bitmap.c,
	bitmap.h, builtin-types.def, builtins.c, builtins.def,
	c-aux-info.c, c-common.c, c-common.def, c-common.h,
	c-convert.c, c-decl.c, c-dump.c, c-dump.h, c-errors.c,
	c-format.c, c-lang.c, c-lex.c, c-lex.h, c-parse.in,
	c-pragma.c, c-pragma.h, c-semantics.c, c-tree.h, c-typeck.c,
	caller-save.c, calls.c, collect2.c, collect2.h, combine.c,
	conditions.h, config.gcc, configure.frag, configure.in,
	conflict.c, convert.c, convert.h, cppspec.c, crtstuff.c,
	cse.c, cselib.c, cselib.h, dbxout.c, dbxout.h, defaults.h,
	dependence.c, df.c, df.h, diagnostic.c, diagnostic.h,
	doloop.c, dominance.c, dwarf.h, dwarf2.h, dwarf2asm.c,
	dwarf2asm.h, dwarf2out.c, dwarf2out.h, dwarfout.c,
	emit-rtl.c, errors.c, errors.h, except.c, except.h,
	exgettext, explow.c, expmed.c, expr.c, expr.h, final.c,
	fixproto, flags.h, flow.c, fold-const.c, fp-test.c,
	function.c, function.h, gbl-ctors.h, gcc.c, gcc.h, gcc.hlp,
	gccspec.c, gcov-io.h, gcse.c, genattr.c, genattrtab.c,
	gencheck.c, gencodes.c, genconfig.c, genemit.c,
	genextract.c, genflags.c, gengenrtl.c, genmultilib,
	genopinit.c, genoutput.c, genpeep.c, genrecog.c,
	gensupport.c, gensupport.h, ggc-callbacks.c, ggc-common.c,
	ggc-none.c, ggc-page.c, ggc-simple.c, ggc.h, global.c,
	graph.c, graph.h, gthr-aix.h, gthr-dce.h, gthr-posix.h,
	gthr-rtems.h, gthr-single.h, gthr-solaris.h, gthr-vxworks.h,
	gthr-win32.h, gthr.h, haifa-sched.c, halfpic.c, halfpic.h,
	hard-reg-set.h, hwint.h, ifcvt.c, input.h, insn-addr.h,
	integrate.c, integrate.h, jump.c, lcm.c, libgcc2.c,
	libgcc2.h, lists.c, local-alloc.c, loop.c, loop.h,
	machmode.def, machmode.h, main.c, mbchar.c, mbchar.h,
	mips-tdump.c, mips-tfile.c, mklibgcc.in, mkmap-flat.awk,
	mkmap-symver.awk, optabs.c, output.h, params.c, params.def,
	params.h, predict.c, predict.def, predict.h, prefix.c,
	prefix.h, print-rtl.c, print-tree.c, profile.c, protoize.c,
	read-rtl.c, real.c, real.h, recog.c, recog.h, reg-stack.c,
	regclass.c, regmove.c, regrename.c, regs.h, reload.c,
	reload.h, reload1.c, reorg.c, resource.c, resource.h, rtl.c,
	rtl.def, rtl.h, rtlanal.c, sbitmap.c, sbitmap.h,
	sched-deps.c, sched-ebb.c, sched-int.h, sched-rgn.c,
	sched-vis.c, sdbout.c, sdbout.h, sibcall.c, simplify-rtx.c,
	ssa-ccp.c, ssa-dce.c, ssa.c, ssa.h, stmt.c, stor-layout.c,
	stringpool.c, system.h, timevar.c, timevar.def, timevar.h,
	tlink.c, toplev.c, toplev.h, tree.c, tree.def, tree.h,
	tsystem.h, unroll.c, unwind-dw2-fde.c, unwind-dw2-fde.h,
	unwind-dw2.c, unwind-pe.h, unwind-sjlj.c, unwind.h,
	unwind.inc, varasm.c, varray.c, varray.h, xcoffout.c,
	xcoffout.h: replace "GNU CC" with "GCC".

From-SVN: r45105

											
										
										
											2001-08-22 16:35:51 +02:00
+								This file is part of GCC.
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												Makefile.in, [...]: replace "GNU CC" with "GCC".

	* Makefile.in, alias.c, basic-block.h, bb-reorder.c, bitmap.c,
	bitmap.h, builtin-types.def, builtins.c, builtins.def,
	c-aux-info.c, c-common.c, c-common.def, c-common.h,
	c-convert.c, c-decl.c, c-dump.c, c-dump.h, c-errors.c,
	c-format.c, c-lang.c, c-lex.c, c-lex.h, c-parse.in,
	c-pragma.c, c-pragma.h, c-semantics.c, c-tree.h, c-typeck.c,
	caller-save.c, calls.c, collect2.c, collect2.h, combine.c,
	conditions.h, config.gcc, configure.frag, configure.in,
	conflict.c, convert.c, convert.h, cppspec.c, crtstuff.c,
	cse.c, cselib.c, cselib.h, dbxout.c, dbxout.h, defaults.h,
	dependence.c, df.c, df.h, diagnostic.c, diagnostic.h,
	doloop.c, dominance.c, dwarf.h, dwarf2.h, dwarf2asm.c,
	dwarf2asm.h, dwarf2out.c, dwarf2out.h, dwarfout.c,
	emit-rtl.c, errors.c, errors.h, except.c, except.h,
	exgettext, explow.c, expmed.c, expr.c, expr.h, final.c,
	fixproto, flags.h, flow.c, fold-const.c, fp-test.c,
	function.c, function.h, gbl-ctors.h, gcc.c, gcc.h, gcc.hlp,
	gccspec.c, gcov-io.h, gcse.c, genattr.c, genattrtab.c,
	gencheck.c, gencodes.c, genconfig.c, genemit.c,
	genextract.c, genflags.c, gengenrtl.c, genmultilib,
	genopinit.c, genoutput.c, genpeep.c, genrecog.c,
	gensupport.c, gensupport.h, ggc-callbacks.c, ggc-common.c,
	ggc-none.c, ggc-page.c, ggc-simple.c, ggc.h, global.c,
	graph.c, graph.h, gthr-aix.h, gthr-dce.h, gthr-posix.h,
	gthr-rtems.h, gthr-single.h, gthr-solaris.h, gthr-vxworks.h,
	gthr-win32.h, gthr.h, haifa-sched.c, halfpic.c, halfpic.h,
	hard-reg-set.h, hwint.h, ifcvt.c, input.h, insn-addr.h,
	integrate.c, integrate.h, jump.c, lcm.c, libgcc2.c,
	libgcc2.h, lists.c, local-alloc.c, loop.c, loop.h,
	machmode.def, machmode.h, main.c, mbchar.c, mbchar.h,
	mips-tdump.c, mips-tfile.c, mklibgcc.in, mkmap-flat.awk,
	mkmap-symver.awk, optabs.c, output.h, params.c, params.def,
	params.h, predict.c, predict.def, predict.h, prefix.c,
	prefix.h, print-rtl.c, print-tree.c, profile.c, protoize.c,
	read-rtl.c, real.c, real.h, recog.c, recog.h, reg-stack.c,
	regclass.c, regmove.c, regrename.c, regs.h, reload.c,
	reload.h, reload1.c, reorg.c, resource.c, resource.h, rtl.c,
	rtl.def, rtl.h, rtlanal.c, sbitmap.c, sbitmap.h,
	sched-deps.c, sched-ebb.c, sched-int.h, sched-rgn.c,
	sched-vis.c, sdbout.c, sdbout.h, sibcall.c, simplify-rtx.c,
	ssa-ccp.c, ssa-dce.c, ssa.c, ssa.h, stmt.c, stor-layout.c,
	stringpool.c, system.h, timevar.c, timevar.def, timevar.h,
	tlink.c, toplev.c, toplev.h, tree.c, tree.def, tree.h,
	tsystem.h, unroll.c, unwind-dw2-fde.c, unwind-dw2-fde.h,
	unwind-dw2.c, unwind-pe.h, unwind-sjlj.c, unwind.h,
	unwind.inc, varasm.c, varray.c, varray.h, xcoffout.c,
	xcoffout.h: replace "GNU CC" with "GCC".

From-SVN: r45105

											
										
										
											2001-08-22 16:35:51 +02:00
+								GCC is free software; you can redistribute it and/or modify it under
 								the terms of the GNU General Public License as published by the Free
-												Licensing changes to GPLv3 resp. GPLv3 with GCC Runtime Exception.

From-SVN: r145841

											
										
										
											2009-04-09 17:00:19 +02:00
+								Software Foundation; either version 3, or (at your option) any later
-												Makefile.in, [...]: replace "GNU CC" with "GCC".

	* Makefile.in, alias.c, basic-block.h, bb-reorder.c, bitmap.c,
	bitmap.h, builtin-types.def, builtins.c, builtins.def,
	c-aux-info.c, c-common.c, c-common.def, c-common.h,
	c-convert.c, c-decl.c, c-dump.c, c-dump.h, c-errors.c,
	c-format.c, c-lang.c, c-lex.c, c-lex.h, c-parse.in,
	c-pragma.c, c-pragma.h, c-semantics.c, c-tree.h, c-typeck.c,
	caller-save.c, calls.c, collect2.c, collect2.h, combine.c,
	conditions.h, config.gcc, configure.frag, configure.in,
	conflict.c, convert.c, convert.h, cppspec.c, crtstuff.c,
	cse.c, cselib.c, cselib.h, dbxout.c, dbxout.h, defaults.h,
	dependence.c, df.c, df.h, diagnostic.c, diagnostic.h,
	doloop.c, dominance.c, dwarf.h, dwarf2.h, dwarf2asm.c,
	dwarf2asm.h, dwarf2out.c, dwarf2out.h, dwarfout.c,
	emit-rtl.c, errors.c, errors.h, except.c, except.h,
	exgettext, explow.c, expmed.c, expr.c, expr.h, final.c,
	fixproto, flags.h, flow.c, fold-const.c, fp-test.c,
	function.c, function.h, gbl-ctors.h, gcc.c, gcc.h, gcc.hlp,
	gccspec.c, gcov-io.h, gcse.c, genattr.c, genattrtab.c,
	gencheck.c, gencodes.c, genconfig.c, genemit.c,
	genextract.c, genflags.c, gengenrtl.c, genmultilib,
	genopinit.c, genoutput.c, genpeep.c, genrecog.c,
	gensupport.c, gensupport.h, ggc-callbacks.c, ggc-common.c,
	ggc-none.c, ggc-page.c, ggc-simple.c, ggc.h, global.c,
	graph.c, graph.h, gthr-aix.h, gthr-dce.h, gthr-posix.h,
	gthr-rtems.h, gthr-single.h, gthr-solaris.h, gthr-vxworks.h,
	gthr-win32.h, gthr.h, haifa-sched.c, halfpic.c, halfpic.h,
	hard-reg-set.h, hwint.h, ifcvt.c, input.h, insn-addr.h,
	integrate.c, integrate.h, jump.c, lcm.c, libgcc2.c,
	libgcc2.h, lists.c, local-alloc.c, loop.c, loop.h,
	machmode.def, machmode.h, main.c, mbchar.c, mbchar.h,
	mips-tdump.c, mips-tfile.c, mklibgcc.in, mkmap-flat.awk,
	mkmap-symver.awk, optabs.c, output.h, params.c, params.def,
	params.h, predict.c, predict.def, predict.h, prefix.c,
	prefix.h, print-rtl.c, print-tree.c, profile.c, protoize.c,
	read-rtl.c, real.c, real.h, recog.c, recog.h, reg-stack.c,
	regclass.c, regmove.c, regrename.c, regs.h, reload.c,
	reload.h, reload1.c, reorg.c, resource.c, resource.h, rtl.c,
	rtl.def, rtl.h, rtlanal.c, sbitmap.c, sbitmap.h,
	sched-deps.c, sched-ebb.c, sched-int.h, sched-rgn.c,
	sched-vis.c, sdbout.c, sdbout.h, sibcall.c, simplify-rtx.c,
	ssa-ccp.c, ssa-dce.c, ssa.c, ssa.h, stmt.c, stor-layout.c,
	stringpool.c, system.h, timevar.c, timevar.def, timevar.h,
	tlink.c, toplev.c, toplev.h, tree.c, tree.def, tree.h,
	tsystem.h, unroll.c, unwind-dw2-fde.c, unwind-dw2-fde.h,
	unwind-dw2.c, unwind-pe.h, unwind-sjlj.c, unwind.h,
	unwind.inc, varasm.c, varray.c, varray.h, xcoffout.c,
	xcoffout.h: replace "GNU CC" with "GCC".

From-SVN: r45105

											
										
										
											2001-08-22 16:35:51 +02:00
+								version.
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												Makefile.in, [...]: replace "GNU CC" with "GCC".

	* Makefile.in, alias.c, basic-block.h, bb-reorder.c, bitmap.c,
	bitmap.h, builtin-types.def, builtins.c, builtins.def,
	c-aux-info.c, c-common.c, c-common.def, c-common.h,
	c-convert.c, c-decl.c, c-dump.c, c-dump.h, c-errors.c,
	c-format.c, c-lang.c, c-lex.c, c-lex.h, c-parse.in,
	c-pragma.c, c-pragma.h, c-semantics.c, c-tree.h, c-typeck.c,
	caller-save.c, calls.c, collect2.c, collect2.h, combine.c,
	conditions.h, config.gcc, configure.frag, configure.in,
	conflict.c, convert.c, convert.h, cppspec.c, crtstuff.c,
	cse.c, cselib.c, cselib.h, dbxout.c, dbxout.h, defaults.h,
	dependence.c, df.c, df.h, diagnostic.c, diagnostic.h,
	doloop.c, dominance.c, dwarf.h, dwarf2.h, dwarf2asm.c,
	dwarf2asm.h, dwarf2out.c, dwarf2out.h, dwarfout.c,
	emit-rtl.c, errors.c, errors.h, except.c, except.h,
	exgettext, explow.c, expmed.c, expr.c, expr.h, final.c,
	fixproto, flags.h, flow.c, fold-const.c, fp-test.c,
	function.c, function.h, gbl-ctors.h, gcc.c, gcc.h, gcc.hlp,
	gccspec.c, gcov-io.h, gcse.c, genattr.c, genattrtab.c,
	gencheck.c, gencodes.c, genconfig.c, genemit.c,
	genextract.c, genflags.c, gengenrtl.c, genmultilib,
	genopinit.c, genoutput.c, genpeep.c, genrecog.c,
	gensupport.c, gensupport.h, ggc-callbacks.c, ggc-common.c,
	ggc-none.c, ggc-page.c, ggc-simple.c, ggc.h, global.c,
	graph.c, graph.h, gthr-aix.h, gthr-dce.h, gthr-posix.h,
	gthr-rtems.h, gthr-single.h, gthr-solaris.h, gthr-vxworks.h,
	gthr-win32.h, gthr.h, haifa-sched.c, halfpic.c, halfpic.h,
	hard-reg-set.h, hwint.h, ifcvt.c, input.h, insn-addr.h,
	integrate.c, integrate.h, jump.c, lcm.c, libgcc2.c,
	libgcc2.h, lists.c, local-alloc.c, loop.c, loop.h,
	machmode.def, machmode.h, main.c, mbchar.c, mbchar.h,
	mips-tdump.c, mips-tfile.c, mklibgcc.in, mkmap-flat.awk,
	mkmap-symver.awk, optabs.c, output.h, params.c, params.def,
	params.h, predict.c, predict.def, predict.h, prefix.c,
	prefix.h, print-rtl.c, print-tree.c, profile.c, protoize.c,
	read-rtl.c, real.c, real.h, recog.c, recog.h, reg-stack.c,
	regclass.c, regmove.c, regrename.c, regs.h, reload.c,
	reload.h, reload1.c, reorg.c, resource.c, resource.h, rtl.c,
	rtl.def, rtl.h, rtlanal.c, sbitmap.c, sbitmap.h,
	sched-deps.c, sched-ebb.c, sched-int.h, sched-rgn.c,
	sched-vis.c, sdbout.c, sdbout.h, sibcall.c, simplify-rtx.c,
	ssa-ccp.c, ssa-dce.c, ssa.c, ssa.h, stmt.c, stor-layout.c,
	stringpool.c, system.h, timevar.c, timevar.def, timevar.h,
	tlink.c, toplev.c, toplev.h, tree.c, tree.def, tree.h,
	tsystem.h, unroll.c, unwind-dw2-fde.c, unwind-dw2-fde.h,
	unwind-dw2.c, unwind-pe.h, unwind-sjlj.c, unwind.h,
	unwind.inc, varasm.c, varray.c, varray.h, xcoffout.c,
	xcoffout.h: replace "GNU CC" with "GCC".

From-SVN: r45105

											
										
										
											2001-08-22 16:35:51 +02:00
+								GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 								WARRANTY; without even the implied warranty of MERCHANTABILITY or
 								FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 								for more details.
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												Licensing changes to GPLv3 resp. GPLv3 with GCC Runtime Exception.

From-SVN: r145841

											
										
										
											2009-04-09 17:00:19 +02:00
+								Under Section 7 of GPL version 3, you are granted additional
 								permissions described in the GCC Runtime Library Exception, version
 .1, as published by the Free Software Foundation.
 								You should have received a copy of the GNU General Public License and
 								a copy of the GCC Runtime Library Exception along with this program;
 								see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 								<http://www.gnu.org/licenses/>.  */
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												*** empty log message ***

From-SVN: r1614

											
										
										
											1992-07-17 11:57:24 +02:00
+								#include "tconfig.h"
-												tsystem.h: New file.

	* tsystem.h: New file.

	* Makefile.in (libgcc2.a, stmp-multilib, crtbegin.o, crtend.o,
	s-crtS): Depend on tsystem.h.

	* crtstuff.c: Include tsystem.h.
	* frame.c: Likewise.
	* libgcc2.c: Likewise.

From-SVN: r31741

											
										
										
											2000-02-01 22:30:52 +01:00
+								#include "tsystem.h"
-												Merge basic-improvements-branch to trunk

From-SVN: r60174

											
										
										
											2002-12-16 19:23:00 +01:00
+								#include "coretypes.h"
 								#include "tm.h"
-												Move libgcc_tm_file to toplevel libgcc

	gcc:
	* configure.ac (libgcc_tm_file_list, libgcc_tm_include_list):
	Remove.
	* configure: Regenerate.
	* Makefile.in (libgcc_tm_file_list, libgcc_tm_include_list): Remove.
	(TM_H): Remove libgcc_tm.h, $(libgcc_tm_file_list).
	(libgcc_tm.h, cs-libgcc_tm.h): Remove.
	(clean): Remove libgcc_tm.h
	* mkconfig.sh: Don't include libgcc_tm.h in tm.h.
	* config.gcc (libgcc_tm_file): Remove.
	(arm*-*-linux*): Remove libgcc_tm_file for arm*-*-linux-*eabi.
	(arm*-*-uclinux*): Remove libgcc_tm_file for arm*-*-uclinux*eabi.
	(arm*-*-eabi*, arm*-*-symbianelf*): Remove libgcc_tm_file.
	(avr-*-rtems*): Likewise.
	(avr-*-*): Likewise.
	(frv-*-elf): Likewise.
	(frv-*-*linux*): Likewise.
	(h8300-*-rtems*): Likewise.
	(h8300-*-elf*): Likewise.
	(i[34567]86-*-darwin*): Likewise.
	(x86_64-*-darwin*): Likewise.
	(rx-*-elf*): Likewise.
	(tic6x-*-elf): Likewise.
	(tic6x-*-uclinux): Likewise.
	(i[34567]86-*-linux*, x86_64-*-linux*): Likewise.

	libgcc:
	* configure.ac (tm_file_): New variable.
	Determine from tm_file.
	(tm_file, tm_defines): Substitute.
	* configure: Regenerate.
	* mkheader.sh: New file.
	* Makefile.in (clean): Remove libgcc_tm.h.
	($(objects)): Depend on libgcc_tm.h.
	(libgcc_tm_defines, libgcc_tm_file): New variables.
	(libgcc_tm.h, libgcc_tm.stamp): New targets.
	($(libgcc-objects), $(libgcc-s-objects), $(libgcc-eh-objects))
	($(libgcov-objects), $(libunwind-objects), $(libunwind-s-objects))
	($(extra-parts)): Depend on libgcc_tm.h.
	* config.host (tm_defines, tm_file): New variable.
	(arm*-*-linux*): Set tm_file for arm*-*-linux-*eabi.
	(arm*-*-uclinux*): Set tm_file for arm*-*-uclinux*eabi.
	(arm*-*-eabi*, arm*-*-symbianelf*): Set tm_file.
	(avr-*-rtems*): Likewise.
	(avr-*-*): Likewise.
	(frv-*-elf): Likewise.
	(frv-*-*linux*): Likewise.
	(h8300-*-rtems*): Likewise.
	(h8300-*-elf*): Likewise.
	(i[34567]86-*-darwin*): Likewise.
	(x86_64-*-darwin*): Likewise.
	(rx-*-elf): Likewise.
	(tic6x-*-uclinux): Likewise.
	(tic6x-*-elf): Likewise.
	(i[34567]86-*-linux*, x86_64-*-linux*): Likewise.
	* config/alpha/gthr-posix.c: Include libgcc_tm.h.
	* config/i386/cygming-crtbegin.c: Likewise.
	* config/i386/cygming-crtend.c: Likewise.
	* config/ia64/fde-vms.c: Likewise.
	* config/ia64/unwind-ia64.c: Likewise.
	* config/libbid/bid_gcc_intrinsics.h: Likewise.
	* config/rs6000/darwin-fallback.c: Likewise.
	* config/stormy16/lib2funcs.c: Likewise.
	* config/xtensa/unwind-dw2-xtensa.c: Likewise.
	* crtstuff.c: Likewise.
	* dfp-bit.h: Likewise.
	* emutls.c: Likewise.
	* fixed-bit.c: Likewise.
	* fp-bit.c: Likewise.
	* generic-morestack-thread.c: Likewise.
	* generic-morestack.c: Likewise.
	* libgcc2.c: Likewise.
	* libgcov.c: Likewise.
	* unwind-dw2-fde-dip.c: Likewise.
	* unwind-dw2-fde.c: Likewise.
	* unwind-dw2.c: Likewise.
	* unwind-sjlj.c: Likewise.

Co-Authored-By: Paolo Bonzini <bonzini@gnu.org>

From-SVN: r180775

											
										
										
											2011-11-02 16:26:35 +01:00
+								#include "libgcc_tm.h"
-												Zap some warnings in target files:

        * frame.c: Include stdlib.h and unistd.h to possibly get various
        function prototypes.  The fixproto script guarantees these header
        files exist on the target system.
        * libgcc2.c: Likewise.
        * gthr-single.h (__gthread_mutex_lock, __gthread_mutex_trylock,
        __gthread_mutex_unlock): Add __attribute__ ((__unused__)) to the
        function parameters.
        * libgcc2.c (__udiv_w_sdiv): Likewise.

From-SVN: r19261

											
										
										
											1998-04-17 10:26:33 +02:00
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								#ifdef HAVE_GAS_HIDDEN
 								#define ATTRIBUTE_HIDDEN  __attribute__ ((__visibility__ ("hidden")))
 								#else
 								#define ATTRIBUTE_HIDDEN
 								#endif
-												libgcc2.c (LIBGCC2_MAX_UNITS_PER_WORD): New macro.

	* libgcc2.c (LIBGCC2_MAX_UNITS_PER_WORD): New macro.
	(LIBGCC2_UNITS_PER_WORD): Use LIBGCC2_MAX_UNITS_PER_WORD rather than
	MIN_UNITS_PER_WORD to set the default.  Also use it in the guard.

From-SVN: r114022

											
										
										
											2006-05-23 21:29:36 +02:00
+								/* Work out the largest "word" size that we can deal with on this target.  */
 								#if MIN_UNITS_PER_WORD > 4
 								# define LIBGCC2_MAX_UNITS_PER_WORD 8
 								#elif (MIN_UNITS_PER_WORD > 2 \
-												libgcc2.h: Use __SIZEOF_LONG_LONG__ instead of LONG_LONG_TYPE_SIZE.

	* libgcc2.h: Use __SIZEOF_LONG_LONG__ instead of LONG_LONG_TYPE_SIZE.
	* libgcc2.c: Likewise.

From-SVN: r165238

											
										
										
											2010-10-09 23:12:56 +02:00
+								       || (MIN_UNITS_PER_WORD > 1 && __SIZEOF_LONG_LONG__ > 4))
-												libgcc2.c (LIBGCC2_MAX_UNITS_PER_WORD): New macro.

	* libgcc2.c (LIBGCC2_MAX_UNITS_PER_WORD): New macro.
	(LIBGCC2_UNITS_PER_WORD): Use LIBGCC2_MAX_UNITS_PER_WORD rather than
	MIN_UNITS_PER_WORD to set the default.  Also use it in the guard.

From-SVN: r114022

											
										
										
											2006-05-23 21:29:36 +02:00
+								# define LIBGCC2_MAX_UNITS_PER_WORD 4
 								#else
 								# define LIBGCC2_MAX_UNITS_PER_WORD MIN_UNITS_PER_WORD
 								#endif
 								/* Work out what word size we are using for this compilation.
 								   The value can be set on the command line.  */
-												re PR target/22209 (libgfortran unresolvable symbols on irix6.5)

	* libgcc2.c (MIN_UNITS_PER_WORD): Move default definition from
	libgcc2.h.
	(LIBGCC2_UNITS_PER_WORD): Provide default definition, using old
	MIN_UNITS_PER_WORD logic from libgcc2.h.  Do nothing if
	LIBGCC2_UNITS_PER_WORD > MIN_UNITS_PER_WORD.
	* libgcc2.h (MIN_UNITS_PER_WORD): Remove definition from here.
	Use LIBGCC2_UNITS_PER_WORD rather than MIN_UNITS_PER_WORD to
	determine the size of Wtype, etc.
	* mklibgcc.in (LIB2_SIDITI_CONV_FUNCS): New argument.
	(swfloatfuncs): New variable.
	(dwfloatfuncs): Likewise.
	(lib2funcs): Remove floating-point conversion functions from
	initial assignment.  Use LIB2_SIDITI_CONV_FUNCS to determine
	the set of conversion routines needed.  Allow entries to specify
	an object name, filename and word size.  Update users accordingly.
	* Makefile.in (libgcc.mk): Pass LIB2_SIDITI_CONV_FUNCS.
	* config/mips/t-mips (LIB2_SIDITI_CONV_FUNCS): Define.

	Revert:

	2006-02-08  Roger Sayle  <roger@eyesopen.com>

	PR target/22209
	* config/fixtfdi.c: New libgcc source file.
	* config/fixunstfdi.c: New source file.
	* config/floatditf.c: New source file.
	* config/floatunditf.c: New souce file.
	* config/mips/t-iris6 (LIB2FUNCS_EXTRA): Include the new source
	files above instead of config/mips/_tilib.c.
	* config/mips/t-linux64 (LIB2FUNCS_EXTRA): Likewise.

From-SVN: r113903

											
										
										
											2006-05-19 10:05:39 +02:00
+								#ifndef LIBGCC2_UNITS_PER_WORD
-												libgcc2.c (LIBGCC2_MAX_UNITS_PER_WORD): New macro.

	* libgcc2.c (LIBGCC2_MAX_UNITS_PER_WORD): New macro.
	(LIBGCC2_UNITS_PER_WORD): Use LIBGCC2_MAX_UNITS_PER_WORD rather than
	MIN_UNITS_PER_WORD to set the default.  Also use it in the guard.

From-SVN: r114022

											
										
										
											2006-05-23 21:29:36 +02:00
+								#define LIBGCC2_UNITS_PER_WORD LIBGCC2_MAX_UNITS_PER_WORD
-												re PR target/22209 (libgfortran unresolvable symbols on irix6.5)

	* libgcc2.c (MIN_UNITS_PER_WORD): Move default definition from
	libgcc2.h.
	(LIBGCC2_UNITS_PER_WORD): Provide default definition, using old
	MIN_UNITS_PER_WORD logic from libgcc2.h.  Do nothing if
	LIBGCC2_UNITS_PER_WORD > MIN_UNITS_PER_WORD.
	* libgcc2.h (MIN_UNITS_PER_WORD): Remove definition from here.
	Use LIBGCC2_UNITS_PER_WORD rather than MIN_UNITS_PER_WORD to
	determine the size of Wtype, etc.
	* mklibgcc.in (LIB2_SIDITI_CONV_FUNCS): New argument.
	(swfloatfuncs): New variable.
	(dwfloatfuncs): Likewise.
	(lib2funcs): Remove floating-point conversion functions from
	initial assignment.  Use LIB2_SIDITI_CONV_FUNCS to determine
	the set of conversion routines needed.  Allow entries to specify
	an object name, filename and word size.  Update users accordingly.
	* Makefile.in (libgcc.mk): Pass LIB2_SIDITI_CONV_FUNCS.
	* config/mips/t-mips (LIB2_SIDITI_CONV_FUNCS): Define.

	Revert:

	2006-02-08  Roger Sayle  <roger@eyesopen.com>

	PR target/22209
	* config/fixtfdi.c: New libgcc source file.
	* config/fixunstfdi.c: New source file.
	* config/floatditf.c: New source file.
	* config/floatunditf.c: New souce file.
	* config/mips/t-iris6 (LIB2FUNCS_EXTRA): Include the new source
	files above instead of config/mips/_tilib.c.
	* config/mips/t-linux64 (LIB2FUNCS_EXTRA): Likewise.

From-SVN: r113903

											
										
										
											2006-05-19 10:05:39 +02:00
+								#endif
-												libgcc2.c (LIBGCC2_MAX_UNITS_PER_WORD): New macro.

	* libgcc2.c (LIBGCC2_MAX_UNITS_PER_WORD): New macro.
	(LIBGCC2_UNITS_PER_WORD): Use LIBGCC2_MAX_UNITS_PER_WORD rather than
	MIN_UNITS_PER_WORD to set the default.  Also use it in the guard.

From-SVN: r114022

											
										
										
											2006-05-23 21:29:36 +02:00
+								#if LIBGCC2_UNITS_PER_WORD <= LIBGCC2_MAX_UNITS_PER_WORD
-												re PR target/22209 (libgfortran unresolvable symbols on irix6.5)

	* libgcc2.c (MIN_UNITS_PER_WORD): Move default definition from
	libgcc2.h.
	(LIBGCC2_UNITS_PER_WORD): Provide default definition, using old
	MIN_UNITS_PER_WORD logic from libgcc2.h.  Do nothing if
	LIBGCC2_UNITS_PER_WORD > MIN_UNITS_PER_WORD.
	* libgcc2.h (MIN_UNITS_PER_WORD): Remove definition from here.
	Use LIBGCC2_UNITS_PER_WORD rather than MIN_UNITS_PER_WORD to
	determine the size of Wtype, etc.
	* mklibgcc.in (LIB2_SIDITI_CONV_FUNCS): New argument.
	(swfloatfuncs): New variable.
	(dwfloatfuncs): Likewise.
	(lib2funcs): Remove floating-point conversion functions from
	initial assignment.  Use LIB2_SIDITI_CONV_FUNCS to determine
	the set of conversion routines needed.  Allow entries to specify
	an object name, filename and word size.  Update users accordingly.
	* Makefile.in (libgcc.mk): Pass LIB2_SIDITI_CONV_FUNCS.
	* config/mips/t-mips (LIB2_SIDITI_CONV_FUNCS): Define.

	Revert:

	2006-02-08  Roger Sayle  <roger@eyesopen.com>

	PR target/22209
	* config/fixtfdi.c: New libgcc source file.
	* config/fixunstfdi.c: New source file.
	* config/floatditf.c: New source file.
	* config/floatunditf.c: New souce file.
	* config/mips/t-iris6 (LIB2FUNCS_EXTRA): Include the new source
	files above instead of config/mips/_tilib.c.
	* config/mips/t-linux64 (LIB2FUNCS_EXTRA): Likewise.

From-SVN: r113903

											
										
										
											2006-05-19 10:05:39 +02:00
-												libgcc2.h: New file.

	* libgcc2.h: New file.
	* libgcc2.c: Move macros, typedefs and prototypes to libgcc2.h.

From-SVN: r32440

											
										
										
											2000-03-09 04:39:09 +01:00
+								#include "libgcc2.h"
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												New target macro DECLARE_LIBRARY_RENAMES

From-SVN: r55889

											
										
										
											2002-07-31 01:55:09 +02:00
+								#ifdef DECLARE_LIBRARY_RENAMES
 								  DECLARE_LIBRARY_RENAMES
 								#endif
-												libgcc2.c (__divdi3, __moddi3): Use unary minus operator instead of __negdi2 directly.

        * libgcc2.c (__divdi3, __moddi3): Use unary minus operator
        instead of __negdi2 directly.

From-SVN: r55632

											
										
										
											2002-07-22 02:15:49 +02:00
+								#if defined (L_negdi2)
-												Added the missing #endif.

From-SVN: r36950

											
										
										
											2000-10-19 17:44:32 +02:00
+								DWtype
 								__negdi2 (DWtype u)
 								{
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion uu = {.ll = u};
 								  const DWunion w = { {.low = -uu.s.low,
 										       .high = -uu.s.high - ((UWtype) -uu.s.low > 0) } };
-												Added the missing #endif.

From-SVN: r36950

											
										
										
											2000-10-19 17:44:32 +02:00
 								  return w.ll;
 								}
 								#endif
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
 								#ifdef L_addvsi3
-												libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.

	* libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.
	* libgcc2.c (__absvsi2): Use Wtype and DWtype.
	(__absvdi2, __addvsi3, __addvdi3, __subvsi3): Likewise.
	(__subvdi3, __mulvsi3, __mulvdi3, __negvsi2, __negvdi2): Likewise.

From-SVN: r38314

											
										
										
											2000-12-16 23:43:58 +01:00
+								Wtype
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								__addvSI3 (Wtype a, Wtype b)
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  Wtype w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_add_overflow (a, b, &w))
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								    abort ();
 								  return w;
-												gcov.c (output_data): Use HOST_WIDEST_INT_PRINT_DEC to output variables of type HOST_WIDEST_INT.

	* gcov.c (output_data): Use HOST_WIDEST_INT_PRINT_DEC to output
	variables of type HOST_WIDEST_INT.

	* libgcc2.c (__bb_exit_func): Handle gcov_type as long long.
	(__bb_exit_func): Correct type of count_max to avoid overflow.
	(num_digits): Handle long long argument.

	* combine.c (gen_lowpart_for_combine): Remove unused variable.

From-SVN: r44033

											
										
										
											2001-07-16 11:16:04 +02:00
+								}
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
 								SItype
 								__addvsi3 (SItype a, SItype b)
 								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  SItype w;
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_add_overflow (a, b, &w))
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								    abort ();
 								  return w;
 								}
 								#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
-												Added the missing #endif.

From-SVN: r36950

											
										
										
											2000-10-19 17:44:32 +02:00
+								#endif
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
 								#ifdef L_addvdi3
-												libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.

	* libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.
	* libgcc2.c (__absvsi2): Use Wtype and DWtype.
	(__absvdi2, __addvsi3, __addvdi3, __subvsi3): Likewise.
	(__subvdi3, __mulvsi3, __mulvdi3, __negvsi2, __negvdi2): Likewise.

From-SVN: r38314

											
										
										
											2000-12-16 23:43:58 +01:00
+								DWtype
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								__addvDI3 (DWtype a, DWtype b)
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  DWtype w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_add_overflow (a, b, &w))
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								    abort ();
 								  return w;
 								}
 								#endif
 								#ifdef L_subvsi3
-												libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.

	* libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.
	* libgcc2.c (__absvsi2): Use Wtype and DWtype.
	(__absvdi2, __addvsi3, __addvdi3, __subvsi3): Likewise.
	(__subvdi3, __mulvsi3, __mulvdi3, __negvsi2, __negvdi2): Likewise.

From-SVN: r38314

											
										
										
											2000-12-16 23:43:58 +01:00
+								Wtype
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								__subvSI3 (Wtype a, Wtype b)
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  Wtype w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_sub_overflow (a, b, &w))
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								    abort ();
 								  return w;
 								}
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
 								SItype
 								__subvsi3 (SItype a, SItype b)
 								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  SItype w;
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_sub_overflow (a, b, &w))
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								    abort ();
 								  return w;
 								}
 								#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								#endif
 								#ifdef L_subvdi3
-												libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.

	* libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.
	* libgcc2.c (__absvsi2): Use Wtype and DWtype.
	(__absvdi2, __addvsi3, __addvdi3, __subvsi3): Likewise.
	(__subvdi3, __mulvsi3, __mulvdi3, __negvsi2, __negvdi2): Likewise.

From-SVN: r38314

											
										
										
											2000-12-16 23:43:58 +01:00
+								DWtype
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								__subvDI3 (DWtype a, DWtype b)
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  DWtype w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_sub_overflow (a, b, &w))
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								    abort ();
 								  return w;
 								}
 								#endif
 								#ifdef L_mulvsi3
-												libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.

	* libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.
	* libgcc2.c (__absvsi2): Use Wtype and DWtype.
	(__absvdi2, __addvsi3, __addvdi3, __subvsi3): Likewise.
	(__subvdi3, __mulvsi3, __mulvdi3, __negvsi2, __negvdi2): Likewise.

From-SVN: r38314

											
										
										
											2000-12-16 23:43:58 +01:00
+								Wtype
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								__mulvSI3 (Wtype a, Wtype b)
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  Wtype w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_mul_overflow (a, b, &w))
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								    abort ();
 								  return w;
 								}
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
 								SItype
 								__mulvsi3 (SItype a, SItype b)
 								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  SItype w;
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_mul_overflow (a, b, &w))
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								    abort ();
 								  return w;
 								}
 								#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								#endif
 								#ifdef L_negvsi2
-												libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.

	* libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.
	* libgcc2.c (__absvsi2): Use Wtype and DWtype.
	(__absvdi2, __addvsi3, __addvdi3, __subvsi3): Likewise.
	(__subvdi3, __mulvsi3, __mulvdi3, __negvsi2, __negvdi2): Likewise.

From-SVN: r38314

											
										
										
											2000-12-16 23:43:58 +01:00
+								Wtype
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								__negvSI2 (Wtype a)
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  Wtype w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_sub_overflow (0, a, &w))
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								    abort ();
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  return w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								}
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
 								SItype
 								__negvsi2 (SItype a)
 								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  SItype w;
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_sub_overflow (0, a, &w))
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								    abort ();
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  return w;
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								}
 								#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								#endif
 								#ifdef L_negvdi2
-												libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.

	* libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.
	* libgcc2.c (__absvsi2): Use Wtype and DWtype.
	(__absvdi2, __addvsi3, __addvdi3, __subvsi3): Likewise.
	(__subvdi3, __mulvsi3, __mulvdi3, __negvsi2, __negvdi2): Likewise.

From-SVN: r38314

											
										
										
											2000-12-16 23:43:58 +01:00
+								DWtype
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								__negvDI2 (DWtype a)
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  DWtype w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Fix minor whitespace issues

libgcc/

	* libgcc2.c: Fix whitespace issues in most recent change.

											
										
										
											2020-11-10 17:07:24 +01:00
+								  if (__builtin_sub_overflow (0, a, &w))
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								    abort ();
-												langhooks.c: Fix formatting.

	* langhooks.c: Fix formatting.
	* langhooks.h: Likewise.
	* lcm.c: Likewise.
	* libgcc2.c: Likewise.
	* lists.c: Likewise.
	* local-alloc.c: Likewise.
	* loop.c: Likewise.
	* loop.h: Likewise.

From-SVN: r54070

											
										
										
											2002-05-30 22:55:11 +02:00
+								  return w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								}
 								#endif
 								#ifdef L_absvsi2
-												libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.

	* libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.
	* libgcc2.c (__absvsi2): Use Wtype and DWtype.
	(__absvdi2, __addvsi3, __addvdi3, __subvsi3): Likewise.
	(__subvdi3, __mulvsi3, __mulvdi3, __negvsi2, __negvdi2): Likewise.

From-SVN: r38314

											
										
										
											2000-12-16 23:43:58 +01:00
+								Wtype
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								__absvSI2 (Wtype a)
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								{
-												Improve abs with overflow implementations

libgcc/

	* libgcc2.c (absvSI2): Simplify/improve implementation by using
	builtin_add_overflow.
	(absvsi2, absvDI2): Likewise.

											
										
										
											2020-11-25 19:36:51 +01:00
+								  const Wtype v = 0 - (a < 0);
 								  Wtype w;
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
-												Improve abs with overflow implementations

libgcc/

	* libgcc2.c (absvSI2): Simplify/improve implementation by using
	builtin_add_overflow.
	(absvsi2, absvDI2): Likewise.

											
										
										
											2020-11-25 19:36:51 +01:00
+								  if (__builtin_add_overflow (a, v, &w))
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								    abort ();
-												Improve abs with overflow implementations

libgcc/

	* libgcc2.c (absvSI2): Simplify/improve implementation by using
	builtin_add_overflow.
	(absvsi2, absvDI2): Likewise.

											
										
										
											2020-11-25 19:36:51 +01:00
+								  return v ^ w;
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								}
 								#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
 								SItype
 								__absvsi2 (SItype a)
 								{
-												Improve abs with overflow implementations

libgcc/

	* libgcc2.c (absvSI2): Simplify/improve implementation by using
	builtin_add_overflow.
	(absvsi2, absvDI2): Likewise.

											
										
										
											2020-11-25 19:36:51 +01:00
+								  const SItype v = 0 - (a < 0);
 								  SItype w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Improve abs with overflow implementations

libgcc/

	* libgcc2.c (absvSI2): Simplify/improve implementation by using
	builtin_add_overflow.
	(absvsi2, absvDI2): Likewise.

											
										
										
											2020-11-25 19:36:51 +01:00
+								  if (__builtin_add_overflow (a, v, &w))
-												langhooks.c: Fix formatting.

	* langhooks.c: Fix formatting.
	* langhooks.h: Likewise.
	* lcm.c: Likewise.
	* libgcc2.c: Likewise.
	* lists.c: Likewise.
	* local-alloc.c: Likewise.
	* loop.c: Likewise.
	* loop.h: Likewise.

From-SVN: r54070

											
										
										
											2002-05-30 22:55:11 +02:00
+								    abort ();
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Improve abs with overflow implementations

libgcc/

	* libgcc2.c (absvSI2): Simplify/improve implementation by using
	builtin_add_overflow.
	(absvsi2, absvDI2): Likewise.

											
										
										
											2020-11-25 19:36:51 +01:00
+								  return v ^ w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								}
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								#endif
 								#ifdef L_absvdi2
-												libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.

	* libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.
	* libgcc2.c (__absvsi2): Use Wtype and DWtype.
	(__absvdi2, __addvsi3, __addvdi3, __subvsi3): Likewise.
	(__subvdi3, __mulvsi3, __mulvdi3, __negvsi2, __negvdi2): Likewise.

From-SVN: r38314

											
										
										
											2000-12-16 23:43:58 +01:00
+								DWtype
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								__absvDI2 (DWtype a)
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								{
-												Improve abs with overflow implementations

libgcc/

	* libgcc2.c (absvSI2): Simplify/improve implementation by using
	builtin_add_overflow.
	(absvsi2, absvDI2): Likewise.

											
										
										
											2020-11-25 19:36:51 +01:00
+								  const DWtype v = 0 - (a < 0);
 								  DWtype w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Improve abs with overflow implementations

libgcc/

	* libgcc2.c (absvSI2): Simplify/improve implementation by using
	builtin_add_overflow.
	(absvsi2, absvDI2): Likewise.

											
										
										
											2020-11-25 19:36:51 +01:00
+								  if (__builtin_add_overflow (a, v, &w))
-												langhooks.c: Fix formatting.

	* langhooks.c: Fix formatting.
	* langhooks.h: Likewise.
	* lcm.c: Likewise.
	* libgcc2.c: Likewise.
	* lists.c: Likewise.
	* local-alloc.c: Likewise.
	* loop.c: Likewise.
	* loop.h: Likewise.

From-SVN: r54070

											
										
										
											2002-05-30 22:55:11 +02:00
+								    abort ();
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												Improve abs with overflow implementations

libgcc/

	* libgcc2.c (absvSI2): Simplify/improve implementation by using
	builtin_add_overflow.
	(absvsi2, absvDI2): Likewise.

											
										
										
											2020-11-25 19:36:51 +01:00
+								  return v ^ w;
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								}
 								#endif
 								#ifdef L_mulvdi3
-												libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.

	* libgcc2.h: Use Wtype for SItype and DWtype for DItype in prototypes.
	* libgcc2.c (__absvsi2): Use Wtype and DWtype.
	(__absvdi2, __addvsi3, __addvdi3, __subvsi3): Likewise.
	(__subvdi3, __mulvsi3, __mulvdi3, __negvsi2, __negvdi2): Likewise.

From-SVN: r38314

											
										
										
											2000-12-16 23:43:58 +01:00
+								DWtype
-												re PR other/18665 (-ftrapv borks up simple integer arithmetic)

	PR other/18665
	* libgcc-std.ver (GCC_3.4.4): Inherit from GCC_3.4.2.
	Export __absvti2, __addvti3, __mulvti3, __negvti2 and __subvti3.
	* libgcc2.c (__addvsi3): Rename to __addvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__addvdi3): Rename to __addvDI3.
	(__subvsi3): Rename to __subvSI3.  Use word type for the result.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__subvdi3): Rename to __subvDI3.
	(_mulvsi3): Rename to _mulvSI3.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(_mulvdi3): Rename to _mulvDI3.
	(__negvsi2): Rename to __negvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__negvdi2): Rename to __negvDI2.
	(__absvsi2): Rename to __absvSI2.
	New version if COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvdi2): Rename to __absvDI2.
	* libgcc2.h (64-bit targets): Define COMPAT_SIMODE_TRAPPING_ARITHMETIC.
	(__absvSI2, __addvSI3, __subvSI3, __mulvSI3, __negvSI2, __absvDI2,
	__addvDI3, __subvDI3, __mulvDI3, __negvDI2): Define to the appropriate
	symbol and declare.
	(__absvsi2, __addvsi3, __subvsi3, __mulvsi3, __negvsi2): Declare if
	COMPAT_SIMODE_TRAPPING_ARITHMETIC.

From-SVN: r92187

											
										
										
											2004-12-15 13:30:46 +01:00
+								__mulvDI3 (DWtype u, DWtype v)
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								{
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
+								  /* The unchecked multiplication needs 3 Wtype x Wtype multiplications,
 								     but the checked multiplication needs only two.  */
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion uu = {.ll = u};
 								  const DWunion vv = {.ll = v};
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  if (__builtin_expect (uu.s.high == uu.s.low >> (W_TYPE_SIZE - 1), 1))
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
+								    {
 								      /* u fits in a single Wtype.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								      if (__builtin_expect (vv.s.high == vv.s.low >> (W_TYPE_SIZE - 1), 1))
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
+									{
 									  /* v fits in a single Wtype as well.  */
 									  /* A single multiplication.  No overflow risk.  */
 									  return (DWtype) uu.s.low * (DWtype) vv.s.low;
 									}
 								      else
 									{
 									  /* Two multiplications.  */
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+									  DWunion w0 = {.ll = (UDWtype) (UWtype) uu.s.low
 											* (UDWtype) (UWtype) vv.s.low};
 									  DWunion w1 = {.ll = (UDWtype) (UWtype) uu.s.low
 											* (UDWtype) (UWtype) vv.s.high};
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
 									  if (vv.s.high < 0)
 									    w1.s.high -= uu.s.low;
 									  if (uu.s.low < 0)
 									    w1.ll -= vv.ll;
 									  w1.ll += (UWtype) w0.s.high;
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+									  if (__builtin_expect (w1.s.high == w1.s.low >> (W_TYPE_SIZE - 1), 1))
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
+									    {
 									      w0.s.high = w1.s.low;
 									      return w0.ll;
 									    }
 									}
 								    }
 								  else
 								    {
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								      if (__builtin_expect (vv.s.high == vv.s.low >> (W_TYPE_SIZE - 1), 1))
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
+									{
 									  /* v fits into a single Wtype.  */
 									  /* Two multiplications.  */
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+									  DWunion w0 = {.ll = (UDWtype) (UWtype) uu.s.low
 											* (UDWtype) (UWtype) vv.s.low};
 									  DWunion w1 = {.ll = (UDWtype) (UWtype) uu.s.high
 											* (UDWtype) (UWtype) vv.s.low};
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
 									  if (uu.s.high < 0)
 									    w1.s.high -= vv.s.low;
 									  if (vv.s.low < 0)
 									    w1.ll -= uu.ll;
 									  w1.ll += (UWtype) w0.s.high;
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+									  if (__builtin_expect (w1.s.high == w1.s.low >> (W_TYPE_SIZE - 1), 1))
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
+									    {
 									      w0.s.high = w1.s.low;
 									      return w0.ll;
 									    }
 									}
 								      else
 									{
 									  /* A few sign checks and a single multiplication.  */
 									  if (uu.s.high >= 0)
 									    {
 									      if (vv.s.high >= 0)
 										{
 										  if (uu.s.high == 0 && vv.s.high == 0)
 										    {
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+										      const DWtype w = (UDWtype) (UWtype) uu.s.low
 											* (UDWtype) (UWtype) vv.s.low;
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
+										      if (__builtin_expect (w >= 0, 1))
 											return w;
 										    }
 										}
 									      else
 										{
 										  if (uu.s.high == 0 && vv.s.high == (Wtype) -1)
 										    {
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+										      DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low
 												    * (UDWtype) (UWtype) vv.s.low};
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
 										      ww.s.high -= uu.s.low;
 										      if (__builtin_expect (ww.s.high < 0, 1))
 											return ww.ll;
 										    }
 										}
 									    }
 									  else
 									    {
 									      if (vv.s.high >= 0)
 										{
 										  if (uu.s.high == (Wtype) -1 && vv.s.high == 0)
 										    {
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+										      DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low
 												    * (UDWtype) (UWtype) vv.s.low};
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
 										      ww.s.high -= vv.s.low;
 										      if (__builtin_expect (ww.s.high < 0, 1))
 											return ww.ll;
 										    }
 										}
 									      else
 										{
-												re PR target/82274 (__builtin_mul_overflow fails to detect overflow for int64_t when compiled with -m32)

	PR target/82274
	* internal-fn.c (expand_mul_overflow): If both operands have
	the same highpart of -1 or 0 and the topmost bit of lowpart
	is different, overflow is if res <= 0 rather than res < 0.

	* libgcc2.c (__mulvDI3): If both operands have
	the same highpart of -1 and the topmost bit of lowpart is 0,
	multiplication overflows even if both lowparts are 0.

	* gcc.dg/pr82274-1.c: New test.
	* gcc.dg/pr82274-2.c: New test.

From-SVN: r253734

											
										
										
											2017-10-13 19:19:12 +02:00
+										  if ((uu.s.high & vv.s.high) == (Wtype) -1
 										      && (uu.s.low | vv.s.low) != 0)
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
+										    {
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+										      DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low
 												    * (UDWtype) (UWtype) vv.s.low};
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
 										      ww.s.high -= uu.s.low;
 										      ww.s.high -= vv.s.low;
 										      if (__builtin_expect (ww.s.high >= 0, 1))
 											return ww.ll;
 										    }
 										}
 									    }
 									}
 								    }
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
-												re PR middle-end/6578 (-ftrapv doesn't catch multiplication overflow)

2003-06-30  Bruno Haible  <bruno@clisp.org>

	PR middle-end/6578
	* libgcc2.c (__subvsi3): Remove simplification that would not work
	when subtracting -0x80000000.
	(__subvdi3): Remove simplification that would return a wrong result.
	(__mulvsi3): Fix overflow check.
	(__absvdi2): Fix simplification that would return a wrong result.
	(__mulvdi3): Fix overflow check.

From-SVN: r68758

											
										
										
											2003-07-01 06:04:13 +02:00
+								  /* Overflow.  */
 								  abort ();
-												Adding new option -ftrapv.

From-SVN: r36942

											
										
										
											2000-10-18 23:33:41 +02:00
+								}
 								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												ifcvt.c: Fix comment typos.

	* ifcvt.c: Fix comment typos.
	* lcm.c: Likewise.
	* libgcc2.c: Likewise.
	* local-alloc.c: Likewise.
	* loop.c: Likewise.
	* predict.c: Likewise.
	* ra-build.c: Likewise.
	* ra.c: Likewise.
	* ra-colorize.c: Likewise.
	* ra.h: Likewise.
	* ra-rewrite.c: Likewise.
	* regmove.c: Likewise.
	* reload.h: Likewise.
	* rtlanal.c: Likewise.
	* toplev.c: Likewise.
	* tree.h: Likewise.
	* unwind-dw2-fde-glibc.c: Likewise.
	* vmsdbgout.c: Likewise.

From-SVN: r61421

											
										
										
											2003-01-17 04:28:11 +01:00
+								/* Unless shift functions are defined with full ANSI prototypes,
-												libgcc2.h (word_type): Type definition removed.

2007-07-06  Andreas Krebbel  <krebbel1@de.ibm.com>

	* libgcc2.h (word_type): Type definition removed.
	(cmp_return_type, shift_count_type): Type definitions added.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* libgcc2.c (__udivmoddi4, __moddi3): Type of local variable c
	changed from word_type to Wtype.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* c-common.c (handle_mode_attribute): Handling for libgcc_cmp_return and
	libgcc_shift_count attribute added.
	* target-def.h (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): New target hooks defined.
	(TARGET_INITIALIZER): New target hooks added.
	* targhooks.c (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Default implementations for the new
	target hooks added.
	* targhooks.h (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Function prototypes added.
	* target.h (struct gcc_target): Fields for the new target hooks added.
	* optabs.c (expand_binop): Use shift_count_mode when expanding shift
	as library call.
	(prepare_cmp_insn): Use cmp_return_mode when expanding comparison as
	library call.

	* doc/tm.texi (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): Documentation added.

	* config/s390/s390.c (s390_libgcc_cmp_return_mode,
	s390_libgcc_shift_count_mode): Functions added.
	(TARGET_LIBGCC_CMP_RETURN_MODE,	TARGET_LIBGCC_SHIFT_COUNT_MODE): Target
	hooks defined.

From-SVN: r126410

											
										
										
											2007-07-06 12:47:31 +02:00
+								   parameter b will be promoted to int if shift_count_type is smaller than an int.  */
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								#ifdef L_lshrdi3
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								DWtype
-												libgcc2.h (word_type): Type definition removed.

2007-07-06  Andreas Krebbel  <krebbel1@de.ibm.com>

	* libgcc2.h (word_type): Type definition removed.
	(cmp_return_type, shift_count_type): Type definitions added.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* libgcc2.c (__udivmoddi4, __moddi3): Type of local variable c
	changed from word_type to Wtype.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* c-common.c (handle_mode_attribute): Handling for libgcc_cmp_return and
	libgcc_shift_count attribute added.
	* target-def.h (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): New target hooks defined.
	(TARGET_INITIALIZER): New target hooks added.
	* targhooks.c (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Default implementations for the new
	target hooks added.
	* targhooks.h (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Function prototypes added.
	* target.h (struct gcc_target): Fields for the new target hooks added.
	* optabs.c (expand_binop): Use shift_count_mode when expanding shift
	as library call.
	(prepare_cmp_insn): Use cmp_return_mode when expanding comparison as
	library call.

	* doc/tm.texi (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): Documentation added.

	* config/s390/s390.c (s390_libgcc_cmp_return_mode,
	s390_libgcc_shift_count_mode): Functions added.
	(TARGET_LIBGCC_CMP_RETURN_MODE,	TARGET_LIBGCC_SHIFT_COUNT_MODE): Target
	hooks defined.

From-SVN: r126410

											
										
										
											2007-07-06 12:47:31 +02:00
+								__lshrdi3 (DWtype u, shift_count_type b)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
 								  if (b == 0)
 								    return u;
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion uu = {.ll = u};
-												libgcc2.c (__lshrdi3, [...]): Use W_TYPE_SIZE.

	* libgcc2.c (__lshrdi3, __ashldi3, __ashrdi3): Use W_TYPE_SIZE.
	(__ffsDI2): Likewise.

From-SVN: r171338

											
										
										
											2011-03-23 02:34:55 +01:00
+								  const shift_count_type bm = W_TYPE_SIZE - b;
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  DWunion w;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								  if (bm <= 0)
 								    {
 								      w.s.high = 0;
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								      w.s.low = (UWtype) uu.s.high >> -bm;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								    }
 								  else
 								    {
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								      const UWtype carries = (UWtype) uu.s.high << bm;
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
 								      w.s.high = (UWtype) uu.s.high >> b;
 								      w.s.low = ((UWtype) uu.s.low >> b) | carries;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								    }
 								  return w.ll;
 								}
 								#endif
 								#ifdef L_ashldi3
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								DWtype
-												libgcc2.h (word_type): Type definition removed.

2007-07-06  Andreas Krebbel  <krebbel1@de.ibm.com>

	* libgcc2.h (word_type): Type definition removed.
	(cmp_return_type, shift_count_type): Type definitions added.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* libgcc2.c (__udivmoddi4, __moddi3): Type of local variable c
	changed from word_type to Wtype.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* c-common.c (handle_mode_attribute): Handling for libgcc_cmp_return and
	libgcc_shift_count attribute added.
	* target-def.h (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): New target hooks defined.
	(TARGET_INITIALIZER): New target hooks added.
	* targhooks.c (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Default implementations for the new
	target hooks added.
	* targhooks.h (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Function prototypes added.
	* target.h (struct gcc_target): Fields for the new target hooks added.
	* optabs.c (expand_binop): Use shift_count_mode when expanding shift
	as library call.
	(prepare_cmp_insn): Use cmp_return_mode when expanding comparison as
	library call.

	* doc/tm.texi (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): Documentation added.

	* config/s390/s390.c (s390_libgcc_cmp_return_mode,
	s390_libgcc_shift_count_mode): Functions added.
	(TARGET_LIBGCC_CMP_RETURN_MODE,	TARGET_LIBGCC_SHIFT_COUNT_MODE): Target
	hooks defined.

From-SVN: r126410

											
										
										
											2007-07-06 12:47:31 +02:00
+								__ashldi3 (DWtype u, shift_count_type b)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
 								  if (b == 0)
 								    return u;
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion uu = {.ll = u};
-												libgcc2.c (__lshrdi3, [...]): Use W_TYPE_SIZE.

	* libgcc2.c (__lshrdi3, __ashldi3, __ashrdi3): Use W_TYPE_SIZE.
	(__ffsDI2): Likewise.

From-SVN: r171338

											
										
										
											2011-03-23 02:34:55 +01:00
+								  const shift_count_type bm = W_TYPE_SIZE - b;
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  DWunion w;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								  if (bm <= 0)
 								    {
 								      w.s.low = 0;
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								      w.s.high = (UWtype) uu.s.low << -bm;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								    }
 								  else
 								    {
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								      const UWtype carries = (UWtype) uu.s.low >> bm;
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
 								      w.s.low = (UWtype) uu.s.low << b;
 								      w.s.high = ((UWtype) uu.s.high << b) | carries;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								    }
 								  return w.ll;
 								}
 								#endif
 								#ifdef L_ashrdi3
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								DWtype
-												libgcc2.h (word_type): Type definition removed.

2007-07-06  Andreas Krebbel  <krebbel1@de.ibm.com>

	* libgcc2.h (word_type): Type definition removed.
	(cmp_return_type, shift_count_type): Type definitions added.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* libgcc2.c (__udivmoddi4, __moddi3): Type of local variable c
	changed from word_type to Wtype.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* c-common.c (handle_mode_attribute): Handling for libgcc_cmp_return and
	libgcc_shift_count attribute added.
	* target-def.h (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): New target hooks defined.
	(TARGET_INITIALIZER): New target hooks added.
	* targhooks.c (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Default implementations for the new
	target hooks added.
	* targhooks.h (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Function prototypes added.
	* target.h (struct gcc_target): Fields for the new target hooks added.
	* optabs.c (expand_binop): Use shift_count_mode when expanding shift
	as library call.
	(prepare_cmp_insn): Use cmp_return_mode when expanding comparison as
	library call.

	* doc/tm.texi (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): Documentation added.

	* config/s390/s390.c (s390_libgcc_cmp_return_mode,
	s390_libgcc_shift_count_mode): Functions added.
	(TARGET_LIBGCC_CMP_RETURN_MODE,	TARGET_LIBGCC_SHIFT_COUNT_MODE): Target
	hooks defined.

From-SVN: r126410

											
										
										
											2007-07-06 12:47:31 +02:00
+								__ashrdi3 (DWtype u, shift_count_type b)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
 								  if (b == 0)
 								    return u;
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion uu = {.ll = u};
-												libgcc2.c (__lshrdi3, [...]): Use W_TYPE_SIZE.

	* libgcc2.c (__lshrdi3, __ashldi3, __ashrdi3): Use W_TYPE_SIZE.
	(__ffsDI2): Likewise.

From-SVN: r171338

											
										
										
											2011-03-23 02:34:55 +01:00
+								  const shift_count_type bm = W_TYPE_SIZE - b;
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  DWunion w;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								  if (bm <= 0)
 								    {
 								      /* w.s.high = 1..1 or 0..0 */
-												libgcc2.c (__lshrdi3, [...]): Use W_TYPE_SIZE.

	* libgcc2.c (__lshrdi3, __ashldi3, __ashrdi3): Use W_TYPE_SIZE.
	(__ffsDI2): Likewise.

From-SVN: r171338

											
										
										
											2011-03-23 02:34:55 +01:00
+								      w.s.high = uu.s.high >> (W_TYPE_SIZE - 1);
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								      w.s.low = uu.s.high >> -bm;
 								    }
 								  else
 								    {
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								      const UWtype carries = (UWtype) uu.s.high << bm;
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								      w.s.high = uu.s.high >> b;
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								      w.s.low = ((UWtype) uu.s.low >> b) | carries;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								    }
 								  return w.ll;
 								}
 								#endif
-												extend.texi (__builtin_bswap32): Document.

2006-09-07  Eric Christopher  <echristo@apple.com>
	    Falk Hueffner  <falk@debian.org>

	* doc/extend.texi (__builtin_bswap32): Document.
	(__builtin_bswap64): Ditto.
	* doc/libgcc.texi (bswapsi2): Document.
	(bswapdi2): Ditto.
	* doc/rtl.texi (bswap): Document.
	* optabs.c (expand_unop): Don't widen a bswap.
	(init_optabs): Init bswap. Set libfuncs explicitly
	for bswapsi2 and bswapdi2.
	* optabs.h (OTI_bswap): New.
	(bswap_optab): Ditto.
	* genopinit.c (optabs): Handle bswap_optab.
	* tree.h (tree_index): Add TI_UINT32_TYPE and
	TI_UINT64_TYPE.
	(uint32_type_node): New.
	(uint64_type_node): Ditto.
	* tree.c (build_common_tree_nodes_2): Initialize
	uint32_type_node and uint64_type_node.
	* builtins.c (expand_builtin_bswap): New.
	(expand_builtin): Call.
	(fold_builtin_bswap): New.
	(fold_builtin_1): Call.
	* fold-const.c (tree_expr_nonnegative_p): Return true
	for bswap.
	* builtin-types.def (BT_UINT32): New.
	(BT_UINT64): Ditto.
	(BT_FN_UINT32_UINT32): Ditto.
	(BT_FN_UINT64_UINT64): Ditto.
	* builtins.def (BUILT_IN_BSWAP32): New.
	(BUILT_IN_BSWAP64): Ditto.
	* rtl.def (BSWAP): New.
	* genattrtab.c (check_attr_value): New.
	* libgcc2.c (__bswapSI2): New.
	(__bswapDI2): Ditto.
	* libgcc2.h (__bswapSI2): Declare.
	(__bswapDI2): Ditto.
	* mklibgcc.in (lib2funcs): Add _bswapsi2 and _bswapdi2.
	* simplify-rtx.c (simplify_const_unary_operation): Return
	0 for BSWAP.
	* libgcc-std.ver (__bwapsi2): Add.
	(__bswapdi2): Ditto.
	* reload1.c (eliminate_regs_1): Add bswap.
	(elimination_effects): Ditto.
	* config/i386/i386.h (x86_bswap): New.
	(TARGET_BSWAP): Use.
	* config/i386/i386.c (x86_bswap): Set.

Co-Authored-By: Falk Hueffner <falk@debian.org>

From-SVN: r118361

											
										
										
											2006-11-01 06:14:40 +01:00
+								#ifdef L_bswapsi2
-												libgcc2.c (__bswapsi2): Use SItype.

2007-01-04  Eric Christopher  <echristo@apple.com>

	    * libgcc2.c (__bswapsi2): Use SItype.
	      (__bswapdi2): Use DItype.
	    * libgcc2.h: Update for above.

From-SVN: r120454

											
										
										
											2007-01-05 00:16:34 +01:00
+								SItype
 								__bswapsi2 (SItype u)
-												extend.texi (__builtin_bswap32): Document.

2006-09-07  Eric Christopher  <echristo@apple.com>
	    Falk Hueffner  <falk@debian.org>

	* doc/extend.texi (__builtin_bswap32): Document.
	(__builtin_bswap64): Ditto.
	* doc/libgcc.texi (bswapsi2): Document.
	(bswapdi2): Ditto.
	* doc/rtl.texi (bswap): Document.
	* optabs.c (expand_unop): Don't widen a bswap.
	(init_optabs): Init bswap. Set libfuncs explicitly
	for bswapsi2 and bswapdi2.
	* optabs.h (OTI_bswap): New.
	(bswap_optab): Ditto.
	* genopinit.c (optabs): Handle bswap_optab.
	* tree.h (tree_index): Add TI_UINT32_TYPE and
	TI_UINT64_TYPE.
	(uint32_type_node): New.
	(uint64_type_node): Ditto.
	* tree.c (build_common_tree_nodes_2): Initialize
	uint32_type_node and uint64_type_node.
	* builtins.c (expand_builtin_bswap): New.
	(expand_builtin): Call.
	(fold_builtin_bswap): New.
	(fold_builtin_1): Call.
	* fold-const.c (tree_expr_nonnegative_p): Return true
	for bswap.
	* builtin-types.def (BT_UINT32): New.
	(BT_UINT64): Ditto.
	(BT_FN_UINT32_UINT32): Ditto.
	(BT_FN_UINT64_UINT64): Ditto.
	* builtins.def (BUILT_IN_BSWAP32): New.
	(BUILT_IN_BSWAP64): Ditto.
	* rtl.def (BSWAP): New.
	* genattrtab.c (check_attr_value): New.
	* libgcc2.c (__bswapSI2): New.
	(__bswapDI2): Ditto.
	* libgcc2.h (__bswapSI2): Declare.
	(__bswapDI2): Ditto.
	* mklibgcc.in (lib2funcs): Add _bswapsi2 and _bswapdi2.
	* simplify-rtx.c (simplify_const_unary_operation): Return
	0 for BSWAP.
	* libgcc-std.ver (__bwapsi2): Add.
	(__bswapdi2): Ditto.
	* reload1.c (eliminate_regs_1): Add bswap.
	(elimination_effects): Ditto.
	* config/i386/i386.h (x86_bswap): New.
	(TARGET_BSWAP): Use.
	* config/i386/i386.c (x86_bswap): Set.

Co-Authored-By: Falk Hueffner <falk@debian.org>

From-SVN: r118361

											
										
										
											2006-11-01 06:14:40 +01:00
+								{
-												fixed _bswapsi2 function

libgcc

	* libgcc2.c (bswapsi2): Make constants unsigned.

											
										
										
											2020-11-30 02:05:46 +01:00
+								  return ((((u) & 0xff000000u) >> 24)
 									  | (((u) & 0x00ff0000u) >>  8)
 									  | (((u) & 0x0000ff00u) <<  8)
 									  | (((u) & 0x000000ffu) << 24));
-												extend.texi (__builtin_bswap32): Document.

2006-09-07  Eric Christopher  <echristo@apple.com>
	    Falk Hueffner  <falk@debian.org>

	* doc/extend.texi (__builtin_bswap32): Document.
	(__builtin_bswap64): Ditto.
	* doc/libgcc.texi (bswapsi2): Document.
	(bswapdi2): Ditto.
	* doc/rtl.texi (bswap): Document.
	* optabs.c (expand_unop): Don't widen a bswap.
	(init_optabs): Init bswap. Set libfuncs explicitly
	for bswapsi2 and bswapdi2.
	* optabs.h (OTI_bswap): New.
	(bswap_optab): Ditto.
	* genopinit.c (optabs): Handle bswap_optab.
	* tree.h (tree_index): Add TI_UINT32_TYPE and
	TI_UINT64_TYPE.
	(uint32_type_node): New.
	(uint64_type_node): Ditto.
	* tree.c (build_common_tree_nodes_2): Initialize
	uint32_type_node and uint64_type_node.
	* builtins.c (expand_builtin_bswap): New.
	(expand_builtin): Call.
	(fold_builtin_bswap): New.
	(fold_builtin_1): Call.
	* fold-const.c (tree_expr_nonnegative_p): Return true
	for bswap.
	* builtin-types.def (BT_UINT32): New.
	(BT_UINT64): Ditto.
	(BT_FN_UINT32_UINT32): Ditto.
	(BT_FN_UINT64_UINT64): Ditto.
	* builtins.def (BUILT_IN_BSWAP32): New.
	(BUILT_IN_BSWAP64): Ditto.
	* rtl.def (BSWAP): New.
	* genattrtab.c (check_attr_value): New.
	* libgcc2.c (__bswapSI2): New.
	(__bswapDI2): Ditto.
	* libgcc2.h (__bswapSI2): Declare.
	(__bswapDI2): Ditto.
	* mklibgcc.in (lib2funcs): Add _bswapsi2 and _bswapdi2.
	* simplify-rtx.c (simplify_const_unary_operation): Return
	0 for BSWAP.
	* libgcc-std.ver (__bwapsi2): Add.
	(__bswapdi2): Ditto.
	* reload1.c (eliminate_regs_1): Add bswap.
	(elimination_effects): Ditto.
	* config/i386/i386.h (x86_bswap): New.
	(TARGET_BSWAP): Use.
	* config/i386/i386.c (x86_bswap): Set.

Co-Authored-By: Falk Hueffner <falk@debian.org>

From-SVN: r118361

											
										
										
											2006-11-01 06:14:40 +01:00
+								}
 								#endif
 								#ifdef L_bswapdi2
-												libgcc2.c (__bswapsi2): Use SItype.

2007-01-04  Eric Christopher  <echristo@apple.com>

	    * libgcc2.c (__bswapsi2): Use SItype.
	      (__bswapdi2): Use DItype.
	    * libgcc2.h: Update for above.

From-SVN: r120454

											
										
										
											2007-01-05 00:16:34 +01:00
+								DItype
 								__bswapdi2 (DItype u)
-												extend.texi (__builtin_bswap32): Document.

2006-09-07  Eric Christopher  <echristo@apple.com>
	    Falk Hueffner  <falk@debian.org>

	* doc/extend.texi (__builtin_bswap32): Document.
	(__builtin_bswap64): Ditto.
	* doc/libgcc.texi (bswapsi2): Document.
	(bswapdi2): Ditto.
	* doc/rtl.texi (bswap): Document.
	* optabs.c (expand_unop): Don't widen a bswap.
	(init_optabs): Init bswap. Set libfuncs explicitly
	for bswapsi2 and bswapdi2.
	* optabs.h (OTI_bswap): New.
	(bswap_optab): Ditto.
	* genopinit.c (optabs): Handle bswap_optab.
	* tree.h (tree_index): Add TI_UINT32_TYPE and
	TI_UINT64_TYPE.
	(uint32_type_node): New.
	(uint64_type_node): Ditto.
	* tree.c (build_common_tree_nodes_2): Initialize
	uint32_type_node and uint64_type_node.
	* builtins.c (expand_builtin_bswap): New.
	(expand_builtin): Call.
	(fold_builtin_bswap): New.
	(fold_builtin_1): Call.
	* fold-const.c (tree_expr_nonnegative_p): Return true
	for bswap.
	* builtin-types.def (BT_UINT32): New.
	(BT_UINT64): Ditto.
	(BT_FN_UINT32_UINT32): Ditto.
	(BT_FN_UINT64_UINT64): Ditto.
	* builtins.def (BUILT_IN_BSWAP32): New.
	(BUILT_IN_BSWAP64): Ditto.
	* rtl.def (BSWAP): New.
	* genattrtab.c (check_attr_value): New.
	* libgcc2.c (__bswapSI2): New.
	(__bswapDI2): Ditto.
	* libgcc2.h (__bswapSI2): Declare.
	(__bswapDI2): Ditto.
	* mklibgcc.in (lib2funcs): Add _bswapsi2 and _bswapdi2.
	* simplify-rtx.c (simplify_const_unary_operation): Return
	0 for BSWAP.
	* libgcc-std.ver (__bwapsi2): Add.
	(__bswapdi2): Ditto.
	* reload1.c (eliminate_regs_1): Add bswap.
	(elimination_effects): Ditto.
	* config/i386/i386.h (x86_bswap): New.
	(TARGET_BSWAP): Use.
	* config/i386/i386.c (x86_bswap): Set.

Co-Authored-By: Falk Hueffner <falk@debian.org>

From-SVN: r118361

											
										
										
											2006-11-01 06:14:40 +01:00
+								{
 								  return ((((u) & 0xff00000000000000ull) >> 56)
 									  | (((u) & 0x00ff000000000000ull) >> 40)
 									  | (((u) & 0x0000ff0000000000ull) >> 24)
 									  | (((u) & 0x000000ff00000000ull) >>  8)
 									  | (((u) & 0x00000000ff000000ull) <<  8)
 									  | (((u) & 0x0000000000ff0000ull) << 24)
 									  | (((u) & 0x000000000000ff00ull) << 40)
 									  | (((u) & 0x00000000000000ffull) << 56));
 								}
 								#endif
-												libgcc2.h, libgcc2.c (__ffsSI2): New.

        * libgcc2.h, libgcc2.c (__ffsSI2): New.
        (__ffsDI2): Rename from __ffsdi2.
        * mklibgcc.in (lib2funcs): Add _ffssi2.

From-SVN: r62422

											
										
										
											2003-02-05 01:52:24 +01:00
+								#ifdef L_ffssi2
 								#undef int
 								int
 								__ffsSI2 (UWtype u)
 								{
 								  UWtype count;
 								  if (u == 0)
 								    return 0;
 								  count_trailing_zeros (count, u);
 								  return count + 1;
 								}
 								#endif
-												(_ffsdi2): Use the correct names for structure elements of type DIunion.

From-SVN: r3217

											
										
										
											1993-01-13 05:30:11 +01:00
+								#ifdef L_ffsdi2
-												libgcc2.c (__ffsdi2, [...]): Change return type to "int".

        * libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2, __paritydi2): Change
        return type to "int".  Shuffle declarations and undef int trap.
        * libgcc2.h: Remove their declarations.
        * optabs.c (expand_unop): Force outmode to int for bitops.

From-SVN: r62353

											
										
										
											2003-02-03 23:42:20 +01:00
+								#undef int
 								int
-												libgcc2.h, libgcc2.c (__ffsSI2): New.

        * libgcc2.h, libgcc2.c (__ffsSI2): New.
        (__ffsDI2): Rename from __ffsdi2.
        * mklibgcc.in (lib2funcs): Add _ffssi2.

From-SVN: r62422

											
										
										
											2003-02-05 01:52:24 +01:00
+								__ffsDI2 (DWtype u)
-												(_ffsdi2): Use the correct names for structure elements of type DIunion.

From-SVN: r3217

											
										
										
											1993-01-13 05:30:11 +01:00
+								{
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion uu = {.ll = u};
-												Makefile.in (LIB2FUNCS): Add _clz.

        * Makefile.in (LIB2FUNCS): Add _clz.
        * libgcc2.c (__ffsdi2): Use count_trailing_zeros.
        (__clz_tab): Put in its own unit, non-static.
        * libgcc2.h: Always include longlong.h.

From-SVN: r36744

											
										
										
											2000-10-06 07:29:56 +02:00
+								  UWtype word, count, add;
 								  if (uu.s.low != 0)
 								    word = uu.s.low, add = 0;
 								  else if (uu.s.high != 0)
-												libgcc2.c (__lshrdi3, [...]): Use W_TYPE_SIZE.

	* libgcc2.c (__lshrdi3, __ashldi3, __ashrdi3): Use W_TYPE_SIZE.
	(__ffsDI2): Likewise.

From-SVN: r171338

											
										
										
											2011-03-23 02:34:55 +01:00
+								    word = uu.s.high, add = W_TYPE_SIZE;
-												Makefile.in (LIB2FUNCS): Add _clz.

        * Makefile.in (LIB2FUNCS): Add _clz.
        * libgcc2.c (__ffsdi2): Use count_trailing_zeros.
        (__clz_tab): Put in its own unit, non-static.
        * libgcc2.h: Always include longlong.h.

From-SVN: r36744

											
										
										
											2000-10-06 07:29:56 +02:00
+								  else
 								    return 0;
 								  count_trailing_zeros (count, word);
 								  return count + add + 1;
-												(_ffsdi2): Use the correct names for structure elements of type DIunion.

From-SVN: r3217

											
										
										
											1993-01-13 05:30:11 +01:00
+								}
 								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								#ifdef L_muldi3
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								DWtype
 								__muldi3 (DWtype u, DWtype v)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion uu = {.ll = u};
 								  const DWunion vv = {.ll = v};
 								  DWunion w = {.ll = __umulsidi3 (uu.s.low, vv.s.low)};
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
 									       + (UWtype) uu.s.high * (UWtype) vv.s.low);
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								  return w.ll;
 								}
 								#endif
-												libgcc2.c: Inline __udiv_w_sdiv when compiling __udivdi3, __divdi3, __umoddi3, or __moddi3.

	* libgcc2.c: Inline __udiv_w_sdiv when compiling __udivdi3,
	__divdi3, __umoddi3, or __moddi3.

From-SVN: r58382

											
										
										
											2002-10-21 22:25:38 +02:00
+								#if (defined (L_udivdi3) || defined (L_divdi3) || \
 								     defined (L_umoddi3) || defined (L_moddi3))
-												libgcc2.c: Fix __udiv_w_sdiv breakage on platforms that don't define sdiv_qrnnd.

        * libgcc2.c: Fix __udiv_w_sdiv breakage on platforms that
        don't define sdiv_qrnnd.

From-SVN: r58393

											
										
										
											2002-10-22 01:10:38 +02:00
+								#if defined (sdiv_qrnnd)
-												libgcc2.c: Inline __udiv_w_sdiv when compiling __udivdi3, __divdi3, __umoddi3, or __moddi3.

	* libgcc2.c: Inline __udiv_w_sdiv when compiling __udivdi3,
	__divdi3, __umoddi3, or __moddi3.

From-SVN: r58382

											
										
										
											2002-10-21 22:25:38 +02:00
+								#define L_udiv_w_sdiv
 								#endif
-												libgcc2.c: Fix __udiv_w_sdiv breakage on platforms that don't define sdiv_qrnnd.

        * libgcc2.c: Fix __udiv_w_sdiv breakage on platforms that
        don't define sdiv_qrnnd.

From-SVN: r58393

											
										
										
											2002-10-22 01:10:38 +02:00
+								#endif
-												libgcc2.c: Inline __udiv_w_sdiv when compiling __udivdi3, __divdi3, __umoddi3, or __moddi3.

	* libgcc2.c: Inline __udiv_w_sdiv when compiling __udivdi3,
	__divdi3, __umoddi3, or __moddi3.

From-SVN: r58382

											
										
										
											2002-10-21 22:25:38 +02:00
-												*** empty log message ***

From-SVN: r1546

											
										
										
											1992-07-10 00:30:59 +02:00
+								#ifdef L_udiv_w_sdiv
-												(__udiv_w_sdiv): If we don't have sdiv_qrnnd, define dummy variant of
__udiv_w_sdiv.

From-SVN: r10476

											
										
										
											1995-10-19 23:48:45 +01:00
+								#if defined (sdiv_qrnnd)
-												libgcc2.c: Inline __udiv_w_sdiv when compiling __udivdi3, __divdi3, __umoddi3, or __moddi3.

	* libgcc2.c: Inline __udiv_w_sdiv when compiling __udivdi3,
	__divdi3, __umoddi3, or __moddi3.

From-SVN: r58382

											
										
										
											2002-10-21 22:25:38 +02:00
+								#if (defined (L_udivdi3) || defined (L_divdi3) || \
 								     defined (L_umoddi3) || defined (L_moddi3))
-												libgcc2.c (__udiv_w_sdiv): Use attribute ((always_inline)) when inlining it into other libgcc2 routines.

	* libgcc2.c (__udiv_w_sdiv): Use attribute ((always_inline)) when
	inlining it into other libgcc2 routines.
	(__udivmoddi4): Likewise.

From-SVN: r58450

											
										
										
											2002-10-23 12:47:24 +02:00
+								static inline __attribute__ ((__always_inline__))
-												libgcc2.c: Inline __udiv_w_sdiv when compiling __udivdi3, __divdi3, __umoddi3, or __moddi3.

	* libgcc2.c: Inline __udiv_w_sdiv when compiling __udivdi3,
	__divdi3, __umoddi3, or __moddi3.

From-SVN: r58382

											
										
										
											2002-10-21 22:25:38 +02:00
+								#endif
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								UWtype
 								__udiv_w_sdiv (UWtype *rp, UWtype a1, UWtype a0, UWtype d)
-												*** empty log message ***

From-SVN: r1504

											
										
										
											1992-07-07 21:58:52 +02:00
+								{
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  UWtype q, r;
 								  UWtype c0, c1, b1;
-												*** empty log message ***

From-SVN: r1504

											
										
										
											1992-07-07 21:58:52 +02:00
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  if ((Wtype) d >= 0)
-												*** empty log message ***

From-SVN: r1504

											
										
										
											1992-07-07 21:58:52 +02:00
+								    {
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								      if (a1 < d - a1 - (a0 >> (W_TYPE_SIZE - 1)))
-												*** empty log message ***

From-SVN: r1504

											
										
										
											1992-07-07 21:58:52 +02:00
+									{
-												c-typeck.c, [...]: Fix comment formatting.

	* c-typeck.c, defaults.h, dwarf.h, dwarf2out.c, fold-const.c,
	gthr-dce.h, gthr-posix.h, gthr-solaris.h, gthr-win32.h,
	lambda-code.c, lambda-mat.c, libgcc2.c, stmt.c,
	tree-ssa-pre.c, tree-vn.c, tree.h: Fix comment formatting.

From-SVN: r88102

											
										
										
											2004-09-25 16:36:40 +02:00
+									  /* Dividend, divisor, and quotient are nonnegative.  */
-												*** empty log message ***

From-SVN: r1504

											
										
										
											1992-07-07 21:58:52 +02:00
+									  sdiv_qrnnd (q, r, a1, a0, d);
 									}
 								      else
 									{
-												c-typeck.c, [...]: Fix comment formatting.

	* c-typeck.c, defaults.h, dwarf.h, dwarf2out.c, fold-const.c,
	gthr-dce.h, gthr-posix.h, gthr-solaris.h, gthr-win32.h,
	lambda-code.c, lambda-mat.c, libgcc2.c, stmt.c,
	tree-ssa-pre.c, tree-vn.c, tree.h: Fix comment formatting.

From-SVN: r88102

											
										
										
											2004-09-25 16:36:40 +02:00
+									  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d.  */
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+									  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (W_TYPE_SIZE - 1));
-												c-typeck.c, [...]: Fix comment formatting.

	* c-typeck.c, defaults.h, dwarf.h, dwarf2out.c, fold-const.c,
	gthr-dce.h, gthr-posix.h, gthr-solaris.h, gthr-win32.h,
	lambda-code.c, lambda-mat.c, libgcc2.c, stmt.c,
	tree-ssa-pre.c, tree-vn.c, tree.h: Fix comment formatting.

From-SVN: r88102

											
										
										
											2004-09-25 16:36:40 +02:00
+									  /* Divide (c1*2^32 + c0) by d.  */
-												*** empty log message ***

From-SVN: r1504

											
										
										
											1992-07-07 21:58:52 +02:00
+									  sdiv_qrnnd (q, r, c1, c0, d);
-												c-typeck.c, [...]: Fix comment formatting.

	* c-typeck.c, defaults.h, dwarf.h, dwarf2out.c, fold-const.c,
	gthr-dce.h, gthr-posix.h, gthr-solaris.h, gthr-win32.h,
	lambda-code.c, lambda-mat.c, libgcc2.c, stmt.c,
	tree-ssa-pre.c, tree-vn.c, tree.h: Fix comment formatting.

From-SVN: r88102

											
										
										
											2004-09-25 16:36:40 +02:00
+									  /* Add 2^31 to quotient.  */
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+									  q += (UWtype) 1 << (W_TYPE_SIZE - 1);
-												*** empty log message ***

From-SVN: r1504

											
										
										
											1992-07-07 21:58:52 +02:00
+									}
 								    }
 								  else
 								    {
 								      b1 = d >> 1;			/* d/2, between 2^30 and 2^31 - 1 */
 								      c1 = a1 >> 1;			/* A/2 */
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								      c0 = (a1 << (W_TYPE_SIZE - 1)) + (a0 >> 1);
-												*** empty log message ***

From-SVN: r1504

											
										
										
											1992-07-07 21:58:52 +02:00
 								      if (a1 < b1)			/* A < 2^32*b1, so A/2 < 2^31*b1 */
 									{
 									  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
 									  r = 2*r + (a0 & 1);		/* Remainder from A/(2*b1) */
 									  if ((d & 1) != 0)
 									    {
 									      if (r >= q)
 										r = r - q;
 									      else if (q - r <= d)
 										{
 										  r = r - q + d;
 										  q--;
 										}
 									      else
 										{
 										  r = r - q + 2*d;
 										  q -= 2;
 										}
 									    }
 									}
 								      else if (c1 < b1)			/* So 2^31 <= (A/2)/b1 < 2^32 */
 									{
 									  c1 = (b1 - 1) - c1;
 									  c0 = ~c0;			/* logical NOT */
 									  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
 									  q = ~q;			/* (A/2)/b1 */
 									  r = (b1 - 1) - r;
 									  r = 2*r + (a0 & 1);		/* A/(2*b1) */
 									  if ((d & 1) != 0)
 									    {
 									      if (r >= q)
 										r = r - q;
 									      else if (q - r <= d)
 										{
 										  r = r - q + d;
 										  q--;
 										}
 									      else
 										{
 										  r = r - q + 2*d;
 										  q -= 2;
 										}
 									    }
 									}
 								      else				/* Implies c1 = b1 */
 									{				/* Hence a1 = d - 1 = 2*b1 - 1 */
 									  if (a0 >= -d)
 									    {
 									      q = -1;
 									      r = a0 + d;
 									    }
 									  else
 									    {
 									      q = -2;
 									      r = a0 + 2*d;
 									    }
 									}
 								    }
 								  *rp = r;
 								  return q;
 								}
-												(__udiv_w_sdiv): If we don't have sdiv_qrnnd, define dummy variant of
__udiv_w_sdiv.

From-SVN: r10476

											
										
										
											1995-10-19 23:48:45 +01:00
+								#else
 								/* If sdiv_qrnnd doesn't exist, define dummy __udiv_w_sdiv.  */
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								UWtype
 								__udiv_w_sdiv (UWtype *rp __attribute__ ((__unused__)),
 									       UWtype a1 __attribute__ ((__unused__)),
 									       UWtype a0 __attribute__ ((__unused__)),
 									       UWtype d __attribute__ ((__unused__)))
-												Fix more warnings...

        * c-lang.c (finish_file): Wrap variable `void_list_node' with macro
        test !ASM_OUTPUT_CONSTRUCTOR || !ASM_OUTPUT_DESTRUCTOR.
        * calls.c (emit_call_1): Wrap variable `already_popped' with macro
        test !ACCUMULATE_OUTGOING_ARGS.
        * collect2.c (write_c_file_glob): Wrap function definition in
        macro test !LD_INIT_SWITCH.
        * combine.c (try_combine): Wrap variables `cc_use' and
        `compare_mode' in macro test EXTRA_CC_MODES.
        * cpplib.c (do_ident): Remove unused variable `len'.
        (skip_if_group): Remove unused variables `at_beg_of_line' and
        `after_ident'.
        (cpp_get_token): Remove unused variable `dummy'.
        * dbxout.c (scope_labelno): Move static variable definition inside
        the one function scope where it is used.
        (dbxout_function_end): Wrap prototype and definition in
        macro test !NO_DBX_FUNCTION_END.
        * dwarf2out.c (add_subscript_info): Wrap variable `dimension_number'
        in macro test !MIPS_DEBUGGING_INFO.
        * expr.c (expand_builtin_setjmp): Move declaration of variable `i'
        into the scope where it is used.  Wrap empty else-statement body
        in braces.
        * fix-header.c: Fix typo in comment.
        (inf_skip_spaces): Cast results of INF_UNGET to (void).
        (check_protection, main): Likewise.
        * flow.c (find_basic_blocks_1): Remove dangling comment text.
        * function.c (contains): Wrap prototype and definition in macro
        test HAVE_prologue || HAVE_epilogue.
        (fixup_var_refs_1): Remove unused variable `width'.
        * gen-protos.c (main): Remove unused variable `optr'.
        * haifa-sched.c (debug_control_flow): Remove unused variable `j'.
        * libgcc2.c (__udiv_w_sdiv): Provide dummy return value of 0.
        (__sjpopnthrow): Remove unused variable `jmpbuf'.
        (__throw): Remove unused variable `val'.
        * protoize.c: Check for a previously existing definition before
        defining *_OK macros.
        * scan-decls.c (scan_decls): Remove unused variable `old_written'.

From-SVN: r18654

											
										
										
											1998-03-18 08:18:06 +01:00
+								{
 								  return 0;
 								}
-												(__udiv_w_sdiv): If we don't have sdiv_qrnnd, define dummy variant of
__udiv_w_sdiv.

From-SVN: r10476

											
										
										
											1995-10-19 23:48:45 +01:00
+								#endif
-												*** empty log message ***

From-SVN: r1504

											
										
										
											1992-07-07 21:58:52 +02:00
+								#endif
-												Include longlong.h.

[L_udivdi3 || L_divdi3 || L_umoddi3 || L_moddi3] (__udivmoddi4):
Define this `static inline' when defining these, so they all remain
leaf functions.

From-SVN: r10305

											
										
										
											1995-09-01 01:26:53 +02:00
+								#if (defined (L_udivdi3) || defined (L_divdi3) || \
-												Makefile.in (LIB2_DIVMOD_FUNCS): Add _divmoddi4.

	* Makefile.in (LIB2_DIVMOD_FUNCS): Add _divmoddi4.
	* libgcc2.c (__divmoddi4): New function.
	* libgcc2.h (__divmoddi4): Declare.
	* libgcc-std.ver.in (GCC_7.0.0): New. Add __PFX_divmoddi4
	and __PFX_divmodti4.

From-SVN: r241804

											
										
										
											2016-11-02 23:23:13 +01:00
+								     defined (L_umoddi3) || defined (L_moddi3) || \
 								     defined (L_divmoddi4))
-												Include longlong.h.

[L_udivdi3 || L_divdi3 || L_umoddi3 || L_moddi3] (__udivmoddi4):
Define this `static inline' when defining these, so they all remain
leaf functions.

From-SVN: r10305

											
										
										
											1995-09-01 01:26:53 +02:00
+								#define L_udivmoddi4
 								#endif
-												Makefile.in (LIB2FUNCS): Add _clz.

        * Makefile.in (LIB2FUNCS): Add _clz.
        * libgcc2.c (__ffsdi2): Use count_trailing_zeros.
        (__clz_tab): Put in its own unit, non-static.
        * libgcc2.h: Always include longlong.h.

From-SVN: r36744

											
										
										
											2000-10-06 07:29:56 +02:00
+								#ifdef L_clz
-												Makefile.in (LIBGCC_DEPS): Add libgcc2.h.

* Makefile.in (LIBGCC_DEPS): Add libgcc2.h.
* libgcc2.c (__clz_tab[], __popcount_tab[]): Set the fixed dimension of
  these arrays.
* libgcc2.h (__clz_tab[], __popcount_tab[]): Add exports of these arrays.
* longlong.h: Only provide a prototype for the __clz_tab[] array if this
  header has not been included from libgcc2.h.
* config/stormy16/stormy16-lib2.c: Include libgcc2.h rather than defining
  own types.
  Provide prototypes for exported functions.
  Use the __clz_tab[] and __popcount_tab[] arrays provided by libgcc2.c.

From-SVN: r104081

											
										
										
											2005-09-09 10:39:18 +02:00
+								const UQItype __clz_tab[256] =
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
 ,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
 ,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
 ,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
 ,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
 ,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
 ,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
 ,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-												Makefile.in (LIBGCC_DEPS): Add libgcc2.h.

* Makefile.in (LIBGCC_DEPS): Add libgcc2.h.
* libgcc2.c (__clz_tab[], __popcount_tab[]): Set the fixed dimension of
  these arrays.
* libgcc2.h (__clz_tab[], __popcount_tab[]): Add exports of these arrays.
* longlong.h: Only provide a prototype for the __clz_tab[] array if this
  header has not been included from libgcc2.h.
* config/stormy16/stormy16-lib2.c: Include libgcc2.h rather than defining
  own types.
  Provide prototypes for exported functions.
  Use the __clz_tab[] and __popcount_tab[] arrays provided by libgcc2.c.

From-SVN: r104081

											
										
										
											2005-09-09 10:39:18 +02:00
+,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								};
-												Makefile.in (LIB2FUNCS): Add _clz.

        * Makefile.in (LIB2FUNCS): Add _clz.
        * libgcc2.c (__ffsdi2): Use count_trailing_zeros.
        (__clz_tab): Put in its own unit, non-static.
        * libgcc2.h: Always include longlong.h.

From-SVN: r36744

											
										
										
											2000-10-06 07:29:56 +02:00
+								#endif
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
 								#ifdef L_clzsi2
-												libgcc2.c (__ffsdi2, [...]): Change return type to "int".

        * libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2, __paritydi2): Change
        return type to "int".  Shuffle declarations and undef int trap.
        * libgcc2.h: Remove their declarations.
        * optabs.c (expand_unop): Force outmode to int for bitops.

From-SVN: r62353

											
										
										
											2003-02-03 23:42:20 +01:00
+								#undef int
 								int
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								__clzSI2 (UWtype x)
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								{
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  Wtype ret;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								  count_leading_zeros (ret, x);
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
 								  return ret;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								}
 								#endif
 								#ifdef L_clzdi2
-												libgcc2.c (__ffsdi2, [...]): Change return type to "int".

        * libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2, __paritydi2): Change
        return type to "int".  Shuffle declarations and undef int trap.
        * libgcc2.h: Remove their declarations.
        * optabs.c (expand_unop): Force outmode to int for bitops.

From-SVN: r62353

											
										
										
											2003-02-03 23:42:20 +01:00
+								#undef int
 								int
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								__clzDI2 (UDWtype x)
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								{
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion uu = {.ll = x};
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  UWtype word;
 								  Wtype ret, add;
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								  if (uu.s.high)
 								    word = uu.s.high, add = 0;
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  else
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								    word = uu.s.low, add = W_TYPE_SIZE;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  count_leading_zeros (ret, word);
 								  return ret + add;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								}
 								#endif
 								#ifdef L_ctzsi2
-												libgcc2.c (__ffsdi2, [...]): Change return type to "int".

        * libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2, __paritydi2): Change
        return type to "int".  Shuffle declarations and undef int trap.
        * libgcc2.h: Remove their declarations.
        * optabs.c (expand_unop): Force outmode to int for bitops.

From-SVN: r62353

											
										
										
											2003-02-03 23:42:20 +01:00
+								#undef int
 								int
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								__ctzSI2 (UWtype x)
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								{
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  Wtype ret;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  count_trailing_zeros (ret, x);
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  return ret;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								}
 								#endif
 								#ifdef L_ctzdi2
-												libgcc2.c (__ffsdi2, [...]): Change return type to "int".

        * libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2, __paritydi2): Change
        return type to "int".  Shuffle declarations and undef int trap.
        * libgcc2.h: Remove their declarations.
        * optabs.c (expand_unop): Force outmode to int for bitops.

From-SVN: r62353

											
										
										
											2003-02-03 23:42:20 +01:00
+								#undef int
 								int
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								__ctzDI2 (UDWtype x)
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								{
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion uu = {.ll = x};
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  UWtype word;
 								  Wtype ret, add;
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								  if (uu.s.low)
 								    word = uu.s.low, add = 0;
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  else
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								    word = uu.s.high, add = W_TYPE_SIZE;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  count_trailing_zeros (ret, word);
 								  return ret + add;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								}
 								#endif
-												Makefile.in (lib2funcs): Add _clrsbsi2 and _clrsbdi2.

	libgcc/
	* Makefile.in (lib2funcs): Add _clrsbsi2 and _clrsbdi2.
	* libgcc-std.ver.in (GCC_4.7.0): New section.

	gcc/
	* doc/extend.texi (__builtin_clrsb, __builtin_clrsbl,
	__builtin_clrsbll): Document.
	* doc/rtl.texi (clrsb): New entry.
	* optabs.c (widen_leading): Renamed from widen_clz.  New argument
	UNOPTAB.  All callers changed.  Use UNOPTAB instead of clz_optab.
	(expand_unop): Handle clrsb_optab.
	(init_optabs): Initialize it.
	* optabs.h (enum optab_index): New entry OTI_clrsb.
	(clrsb_optab): Define.
	* genopinit.c (optabs): Add an entry for it.
	* builtins.c (expand_builtin): Handle clrsb builtin functions.
	* builtins.def (BUILT_IN_CLRSB, BUILT_IN_CLRSBIMAX, BUILT_IN_CLRSBL,
	BUILT_IN_CLRSBLL): New.
	* rtl.def (CLRSB): New code.
	* dwarf2out.c (mem_loc_descriptor): Handle it.
	* simplify-rtx.c (simplify_const_unary_operation): Likewise.
	Use op_mode rather than mode when optimizing ffs, clz, ctz, parity
	and popcount.
	* libgcc2.c (__clrsbSI2, __clrsbDI2): New functions.
	* libgcc2.h (__clrsbSI2, __clrsbDI2): Define and declare.
	(__ctzDI2): Move declaration.
	* config/bfin/bfin.md (clrsbsi2): New expander.
	(signbitssi2): Use the CLRSB rtx.
	(clrsbhi2): Renamed from signbitshi2.  Use the CLRSB rtx.
	* config/bfin/bfin.c (bdesc_1arg): Changed accordingly.

	gcc/testsuite/
	* gcc.c-torture/excute/builtin-bitops-1.c (MAKE_FUNS): Make
	my_clrsb test functions.
	(main): Test clrsb.
	* gcc.dg/builtin-protos-1.c (test_s, test_u, test_sl, test_ul,
	test_sll, test_ull): Add clrsb tests.
	* gcc.dg/torture/builtin-attr-1.c: Add tests for clrsb, clrsbl,
	clrsbll.

From-SVN: r175261

											
										
										
											2011-06-21 16:16:39 +02:00
 								#ifdef L_clrsbsi2
 								#undef int
 								int
 								__clrsbSI2 (Wtype x)
 								{
 								  Wtype ret;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
-												Makefile.in (lib2funcs): Add _clrsbsi2 and _clrsbdi2.

	libgcc/
	* Makefile.in (lib2funcs): Add _clrsbsi2 and _clrsbdi2.
	* libgcc-std.ver.in (GCC_4.7.0): New section.

	gcc/
	* doc/extend.texi (__builtin_clrsb, __builtin_clrsbl,
	__builtin_clrsbll): Document.
	* doc/rtl.texi (clrsb): New entry.
	* optabs.c (widen_leading): Renamed from widen_clz.  New argument
	UNOPTAB.  All callers changed.  Use UNOPTAB instead of clz_optab.
	(expand_unop): Handle clrsb_optab.
	(init_optabs): Initialize it.
	* optabs.h (enum optab_index): New entry OTI_clrsb.
	(clrsb_optab): Define.
	* genopinit.c (optabs): Add an entry for it.
	* builtins.c (expand_builtin): Handle clrsb builtin functions.
	* builtins.def (BUILT_IN_CLRSB, BUILT_IN_CLRSBIMAX, BUILT_IN_CLRSBL,
	BUILT_IN_CLRSBLL): New.
	* rtl.def (CLRSB): New code.
	* dwarf2out.c (mem_loc_descriptor): Handle it.
	* simplify-rtx.c (simplify_const_unary_operation): Likewise.
	Use op_mode rather than mode when optimizing ffs, clz, ctz, parity
	and popcount.
	* libgcc2.c (__clrsbSI2, __clrsbDI2): New functions.
	* libgcc2.h (__clrsbSI2, __clrsbDI2): Define and declare.
	(__ctzDI2): Move declaration.
	* config/bfin/bfin.md (clrsbsi2): New expander.
	(signbitssi2): Use the CLRSB rtx.
	(clrsbhi2): Renamed from signbitshi2.  Use the CLRSB rtx.
	* config/bfin/bfin.c (bdesc_1arg): Changed accordingly.

	gcc/testsuite/
	* gcc.c-torture/excute/builtin-bitops-1.c (MAKE_FUNS): Make
	my_clrsb test functions.
	(main): Test clrsb.
	* gcc.dg/builtin-protos-1.c (test_s, test_u, test_sl, test_ul,
	test_sll, test_ull): Add clrsb tests.
	* gcc.dg/torture/builtin-attr-1.c: Add tests for clrsb, clrsbl,
	clrsbll.

From-SVN: r175261

											
										
										
											2011-06-21 16:16:39 +02:00
+								  if (x < 0)
 								    x = ~x;
 								  if (x == 0)
 								    return W_TYPE_SIZE - 1;
 								  count_leading_zeros (ret, x);
 								  return ret - 1;
 								}
 								#endif
 								#ifdef L_clrsbdi2
 								#undef int
 								int
 								__clrsbDI2 (DWtype x)
 								{
 								  const DWunion uu = {.ll = x};
 								  UWtype word;
 								  Wtype ret, add;
 								  if (uu.s.high == 0)
 								    word = uu.s.low, add = W_TYPE_SIZE;
 								  else if (uu.s.high == -1)
 								    word = ~uu.s.low, add = W_TYPE_SIZE;
 								  else if (uu.s.high >= 0)
 								    word = uu.s.high, add = 0;
 								  else
 								    word = ~uu.s.high, add = 0;
 								  if (word == 0)
 								    ret = W_TYPE_SIZE;
 								  else
 								    count_leading_zeros (ret, word);
 								  return ret + add - 1;
 								}
 								#endif
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								#ifdef L_popcount_tab
-												Makefile.in (LIBGCC_DEPS): Add libgcc2.h.

* Makefile.in (LIBGCC_DEPS): Add libgcc2.h.
* libgcc2.c (__clz_tab[], __popcount_tab[]): Set the fixed dimension of
  these arrays.
* libgcc2.h (__clz_tab[], __popcount_tab[]): Add exports of these arrays.
* longlong.h: Only provide a prototype for the __clz_tab[] array if this
  header has not been included from libgcc2.h.
* config/stormy16/stormy16-lib2.c: Include libgcc2.h rather than defining
  own types.
  Provide prototypes for exported functions.
  Use the __clz_tab[] and __popcount_tab[] arrays provided by libgcc2.c.

From-SVN: r104081

											
										
										
											2005-09-09 10:39:18 +02:00
+								const UQItype __popcount_tab[256] =
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								{
 ,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
 ,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
 ,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
 ,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
 ,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
 ,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
 ,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
-												Makefile.in (LIBGCC_DEPS): Add libgcc2.h.

* Makefile.in (LIBGCC_DEPS): Add libgcc2.h.
* libgcc2.c (__clz_tab[], __popcount_tab[]): Set the fixed dimension of
  these arrays.
* libgcc2.h (__clz_tab[], __popcount_tab[]): Add exports of these arrays.
* longlong.h: Only provide a prototype for the __clz_tab[] array if this
  header has not been included from libgcc2.h.
* config/stormy16/stormy16-lib2.c: Include libgcc2.h rather than defining
  own types.
  Provide prototypes for exported functions.
  Use the __clz_tab[] and __popcount_tab[] arrays provided by libgcc2.c.

From-SVN: r104081

											
										
										
											2005-09-09 10:39:18 +02:00
+,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								};
 								#endif
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								#if defined(L_popcountsi2) || defined(L_popcountdi2)
-												replace BITS_PER_UNIT with __CHAR_BIT__ in target libs

libgcc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	* config/visium/lib2funcs.c (__set_trampoline_parity): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	* fixed-bit.h: Likewise.
	* fp-bit.h: Likewise.
	* libgcc2.c (__popcountSI2): Likewise.
	(__popcountDI2): Likewise.
	* libgcc2.h: Likewise.
	* libgcov.h: Likewise.

libobjc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	PR libobjc/24775
	* encoding.c (_darwin_rs6000_special_round_type_align): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	(objc_sizeof_type): Likewise.
	(objc_layout_structure): Likewise.
	(objc_layout_structure_next_member): Likewise.
	(objc_layout_finish_structure): Likewise.
	(objc_layout_structure_get_info): Likewise.

From-SVN: r229936

											
										
										
											2015-11-07 20:36:26 +01:00
+								#define POPCOUNTCST2(x) (((UWtype) x << __CHAR_BIT__) | x)
 								#define POPCOUNTCST4(x) (((UWtype) x << (2 * __CHAR_BIT__)) | x)
 								#define POPCOUNTCST8(x) (((UWtype) x << (4 * __CHAR_BIT__)) | x)
 								#if W_TYPE_SIZE == __CHAR_BIT__
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								#define POPCOUNTCST(x) x
-												replace BITS_PER_UNIT with __CHAR_BIT__ in target libs

libgcc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	* config/visium/lib2funcs.c (__set_trampoline_parity): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	* fixed-bit.h: Likewise.
	* fp-bit.h: Likewise.
	* libgcc2.c (__popcountSI2): Likewise.
	(__popcountDI2): Likewise.
	* libgcc2.h: Likewise.
	* libgcov.h: Likewise.

libobjc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	PR libobjc/24775
	* encoding.c (_darwin_rs6000_special_round_type_align): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	(objc_sizeof_type): Likewise.
	(objc_layout_structure): Likewise.
	(objc_layout_structure_next_member): Likewise.
	(objc_layout_finish_structure): Likewise.
	(objc_layout_structure_get_info): Likewise.

From-SVN: r229936

											
										
										
											2015-11-07 20:36:26 +01:00
+								#elif W_TYPE_SIZE == 2 * __CHAR_BIT__
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								#define POPCOUNTCST(x) POPCOUNTCST2 (x)
-												replace BITS_PER_UNIT with __CHAR_BIT__ in target libs

libgcc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	* config/visium/lib2funcs.c (__set_trampoline_parity): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	* fixed-bit.h: Likewise.
	* fp-bit.h: Likewise.
	* libgcc2.c (__popcountSI2): Likewise.
	(__popcountDI2): Likewise.
	* libgcc2.h: Likewise.
	* libgcov.h: Likewise.

libobjc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	PR libobjc/24775
	* encoding.c (_darwin_rs6000_special_round_type_align): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	(objc_sizeof_type): Likewise.
	(objc_layout_structure): Likewise.
	(objc_layout_structure_next_member): Likewise.
	(objc_layout_finish_structure): Likewise.
	(objc_layout_structure_get_info): Likewise.

From-SVN: r229936

											
										
										
											2015-11-07 20:36:26 +01:00
+								#elif W_TYPE_SIZE == 4 * __CHAR_BIT__
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								#define POPCOUNTCST(x) POPCOUNTCST4 (POPCOUNTCST2 (x))
-												replace BITS_PER_UNIT with __CHAR_BIT__ in target libs

libgcc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	* config/visium/lib2funcs.c (__set_trampoline_parity): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	* fixed-bit.h: Likewise.
	* fp-bit.h: Likewise.
	* libgcc2.c (__popcountSI2): Likewise.
	(__popcountDI2): Likewise.
	* libgcc2.h: Likewise.
	* libgcov.h: Likewise.

libobjc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	PR libobjc/24775
	* encoding.c (_darwin_rs6000_special_round_type_align): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	(objc_sizeof_type): Likewise.
	(objc_layout_structure): Likewise.
	(objc_layout_structure_next_member): Likewise.
	(objc_layout_finish_structure): Likewise.
	(objc_layout_structure_get_info): Likewise.

From-SVN: r229936

											
										
										
											2015-11-07 20:36:26 +01:00
+								#elif W_TYPE_SIZE == 8 * __CHAR_BIT__
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								#define POPCOUNTCST(x) POPCOUNTCST8 (POPCOUNTCST4 (POPCOUNTCST2 (x)))
 								#endif
 								#endif
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								#ifdef L_popcountsi2
-												libgcc2.c (__ffsdi2, [...]): Change return type to "int".

        * libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2, __paritydi2): Change
        return type to "int".  Shuffle declarations and undef int trap.
        * libgcc2.h: Remove their declarations.
        * optabs.c (expand_unop): Force outmode to int for bitops.

From-SVN: r62353

											
										
										
											2003-02-03 23:42:20 +01:00
+								#undef int
 								int
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								__popcountSI2 (UWtype x)
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								{
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								  /* Force table lookup on targets like AVR and RL78 which only
 								     pretend they have LIBGCC2_UNITS_PER_WORD 4, but actually
 								     have 1, and other small word targets.  */
-												replace BITS_PER_UNIT with __CHAR_BIT__ in target libs

libgcc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	* config/visium/lib2funcs.c (__set_trampoline_parity): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	* fixed-bit.h: Likewise.
	* fp-bit.h: Likewise.
	* libgcc2.c (__popcountSI2): Likewise.
	(__popcountDI2): Likewise.
	* libgcc2.h: Likewise.
	* libgcov.h: Likewise.

libobjc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	PR libobjc/24775
	* encoding.c (_darwin_rs6000_special_round_type_align): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	(objc_sizeof_type): Likewise.
	(objc_layout_structure): Likewise.
	(objc_layout_structure_next_member): Likewise.
	(objc_layout_finish_structure): Likewise.
	(objc_layout_structure_get_info): Likewise.

From-SVN: r229936

											
										
										
											2015-11-07 20:36:26 +01:00
+								#if __SIZEOF_INT__ > 2 && defined (POPCOUNTCST) && __CHAR_BIT__ == 8
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								  x = x - ((x >> 1) & POPCOUNTCST (0x55));
 								  x = (x & POPCOUNTCST (0x33)) + ((x >> 2) & POPCOUNTCST (0x33));
 								  x = (x + (x >> 4)) & POPCOUNTCST (0x0F);
-												replace BITS_PER_UNIT with __CHAR_BIT__ in target libs

libgcc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	* config/visium/lib2funcs.c (__set_trampoline_parity): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	* fixed-bit.h: Likewise.
	* fp-bit.h: Likewise.
	* libgcc2.c (__popcountSI2): Likewise.
	(__popcountDI2): Likewise.
	* libgcc2.h: Likewise.
	* libgcov.h: Likewise.

libobjc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	PR libobjc/24775
	* encoding.c (_darwin_rs6000_special_round_type_align): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	(objc_sizeof_type): Likewise.
	(objc_layout_structure): Likewise.
	(objc_layout_structure_next_member): Likewise.
	(objc_layout_finish_structure): Likewise.
	(objc_layout_structure_get_info): Likewise.

From-SVN: r229936

											
										
										
											2015-11-07 20:36:26 +01:00
+								  return (x * POPCOUNTCST (0x01)) >> (W_TYPE_SIZE - __CHAR_BIT__);
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								#else
-												libgcc2.c (__popcountSI2): Don't use wide type for iterator and result.

2005-12-05  Jan Beulich  <jbeulich@novell.com>

	* libgcc2.c (__popcountSI2): Don't use wide type for iterator and
	result.
	(__popcountDI2): Likewise.

From-SVN: r108046

											
										
										
											2005-12-05 09:34:25 +01:00
+								  int i, ret = 0;
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
 								  for (i = 0; i < W_TYPE_SIZE; i += 8)
 								    ret += __popcount_tab[(x >> i) & 0xff];
 								  return ret;
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								#endif
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								}
 								#endif
 								#ifdef L_popcountdi2
-												libgcc2.c (__ffsdi2, [...]): Change return type to "int".

        * libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2, __paritydi2): Change
        return type to "int".  Shuffle declarations and undef int trap.
        * libgcc2.h: Remove their declarations.
        * optabs.c (expand_unop): Force outmode to int for bitops.

From-SVN: r62353

											
										
										
											2003-02-03 23:42:20 +01:00
+								#undef int
 								int
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								__popcountDI2 (UDWtype x)
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								{
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								  /* Force table lookup on targets like AVR and RL78 which only
 								     pretend they have LIBGCC2_UNITS_PER_WORD 4, but actually
 								     have 1, and other small word targets.  */
-												replace BITS_PER_UNIT with __CHAR_BIT__ in target libs

libgcc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	* config/visium/lib2funcs.c (__set_trampoline_parity): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	* fixed-bit.h: Likewise.
	* fp-bit.h: Likewise.
	* libgcc2.c (__popcountSI2): Likewise.
	(__popcountDI2): Likewise.
	* libgcc2.h: Likewise.
	* libgcov.h: Likewise.

libobjc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	PR libobjc/24775
	* encoding.c (_darwin_rs6000_special_round_type_align): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	(objc_sizeof_type): Likewise.
	(objc_layout_structure): Likewise.
	(objc_layout_structure_next_member): Likewise.
	(objc_layout_finish_structure): Likewise.
	(objc_layout_structure_get_info): Likewise.

From-SVN: r229936

											
										
										
											2015-11-07 20:36:26 +01:00
+								#if __SIZEOF_INT__ > 2 && defined (POPCOUNTCST) && __CHAR_BIT__ == 8
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								  const DWunion uu = {.ll = x};
 								  UWtype x1 = uu.s.low, x2 = uu.s.high;
 								  x1 = x1 - ((x1 >> 1) & POPCOUNTCST (0x55));
 								  x2 = x2 - ((x2 >> 1) & POPCOUNTCST (0x55));
 								  x1 = (x1 & POPCOUNTCST (0x33)) + ((x1 >> 2) & POPCOUNTCST (0x33));
 								  x2 = (x2 & POPCOUNTCST (0x33)) + ((x2 >> 2) & POPCOUNTCST (0x33));
 								  x1 = (x1 + (x1 >> 4)) & POPCOUNTCST (0x0F);
 								  x2 = (x2 + (x2 >> 4)) & POPCOUNTCST (0x0F);
 								  x1 += x2;
-												replace BITS_PER_UNIT with __CHAR_BIT__ in target libs

libgcc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	* config/visium/lib2funcs.c (__set_trampoline_parity): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	* fixed-bit.h: Likewise.
	* fp-bit.h: Likewise.
	* libgcc2.c (__popcountSI2): Likewise.
	(__popcountDI2): Likewise.
	* libgcc2.h: Likewise.
	* libgcov.h: Likewise.

libobjc/ChangeLog:

2015-11-07  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	PR libobjc/24775
	* encoding.c (_darwin_rs6000_special_round_type_align): Use
	__CHAR_BIT__ instead of BITS_PER_UNIT.
	(objc_sizeof_type): Likewise.
	(objc_layout_structure): Likewise.
	(objc_layout_structure_next_member): Likewise.
	(objc_layout_finish_structure): Likewise.
	(objc_layout_structure_get_info): Likewise.

From-SVN: r229936

											
										
										
											2015-11-07 20:36:26 +01:00
+								  return (x1 * POPCOUNTCST (0x01)) >> (W_TYPE_SIZE - __CHAR_BIT__);
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								#else
-												libgcc2.c (__popcountSI2): Don't use wide type for iterator and result.

2005-12-05  Jan Beulich  <jbeulich@novell.com>

	* libgcc2.c (__popcountSI2): Don't use wide type for iterator and
	result.
	(__popcountDI2): Likewise.

From-SVN: r108046

											
										
										
											2005-12-05 09:34:25 +01:00
+								  int i, ret = 0;
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
 								  for (i = 0; i < 2*W_TYPE_SIZE; i += 8)
 								    ret += __popcount_tab[(x >> i) & 0xff];
 								  return ret;
-												re PR middle-end/36041 (Speed up builtin_popcountll)

	PR middle-end/36041
	* libgcc2.c (POPCOUNTCST2, POPCOUNTCST4, POPCOUNTCST8, POPCOUNTCST):
	Define.
	(__popcountSI2): For __SIZEOF_INT__ > 2 targets use arithmetics
	instead of table lookups.
	(__popcountDI2): Likewise.

From-SVN: r200506

											
										
										
											2013-06-28 11:28:40 +02:00
+								#endif
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								}
 								#endif
 								#ifdef L_paritysi2
-												libgcc2.c (__ffsdi2, [...]): Change return type to "int".

        * libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2, __paritydi2): Change
        return type to "int".  Shuffle declarations and undef int trap.
        * libgcc2.h: Remove their declarations.
        * optabs.c (expand_unop): Force outmode to int for bitops.

From-SVN: r62353

											
										
										
											2003-02-03 23:42:20 +01:00
+								#undef int
 								int
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								__paritySI2 (UWtype x)
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								{
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								#if W_TYPE_SIZE > 64
 								# error "fill out the table"
 								#endif
 								#if W_TYPE_SIZE > 32
 								  x ^= x >> 32;
 								#endif
 								#if W_TYPE_SIZE > 16
 								  x ^= x >> 16;
 								#endif
 								  x ^= x >> 8;
 								  x ^= x >> 4;
 								  x &= 0xf;
 								  return (0x6996 >> x) & 1;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								}
 								#endif
 								#ifdef L_paritydi2
-												libgcc2.c (__ffsdi2, [...]): Change return type to "int".

        * libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2, __paritydi2): Change
        return type to "int".  Shuffle declarations and undef int trap.
        * libgcc2.h: Remove their declarations.
        * optabs.c (expand_unop): Force outmode to int for bitops.

From-SVN: r62353

											
										
										
											2003-02-03 23:42:20 +01:00
+								#undef int
 								int
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								__parityDI2 (UDWtype x)
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								{
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion uu = {.ll = x};
 								  UWtype nx = uu.s.low ^ uu.s.high;
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
 								#if W_TYPE_SIZE > 64
 								# error "fill out the table"
 								#endif
 								#if W_TYPE_SIZE > 32
 								  nx ^= nx >> 32;
 								#endif
 								#if W_TYPE_SIZE > 16
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								  nx ^= nx >> 16;
-												libgcc-std.ver (__clztf2): New.

        * libgcc-std.ver (__clztf2): New.
        (__ctztf2, __popcounttf2, __paritytf2): New.
        * libgcc2.c (__clzSI2, __clzDI2, __ctzSI2, __ctzDI2, __popcountSI2,
        __popcountDI2, __paritySI2, __parityDI2): Use UWmode and UDWmode;
        adjust code to match the different type sizes.
        * libgcc2.h (__clzSI2, __ctzSI2, __popcountSI2, __paritySI2,
        __clzDI2, __ctzDI2, __popcountDI2, __parityDI2): New macros.

        * optabs.c (init_integral_libfuncs): Don't hard-code SImode and
        TImode; select word_mode and twice that.
        (init_floating_libfuncs): Don't hard-code SFmode and TFmode;
        select the modes from float, double, and long double.
        (init_optabs): Remove duplicate initializations.

From-SVN: r62606

											
										
										
											2003-02-09 19:35:22 +01:00
+								#endif
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								  nx ^= nx >> 8;
-												libgcc2.c: Include auto-host.h.

        * libgcc2.c: Include auto-host.h.
        (ATTRIBUTE_HIDDEN): New.
        (__clz_tab): Don't declare here for clz and ctz.
        (__clzsi2, __clzdi2): Use count_leading_zeros.
        (__ctzsi2, __ctzdi2): Use count_trailing_zeros.
        (__popcount_tab): Mark ATTRIBUTE_HIDDEN.
        (__paritysi2, __paritydi2): Use shifts instead of __popcount_tab.
        * longlong.h (__clz_tab): Mark ATTRIBUTE_HIDDEN.

From-SVN: r62256

											
										
										
											2003-02-01 21:58:35 +01:00
+								  nx ^= nx >> 4;
-												libgcc2.c (__paritysi2, [...]): Replace last two reduction rounds with a "bit table" lookup.

        * libgcc2.c (__paritysi2, __paritydi2): Replace last two reduction
        rounds with a "bit table" lookup.

From-SVN: r62421

											
										
										
											2003-02-05 01:43:22 +01:00
+								  nx &= 0xf;
 								  return (0x6996 >> nx) & 1;
-												[multiple changes]

2003-02-01  Richard Henderson  <rth@redhat.com>

	* optabs.c (expand_unop): Use word_mode for outmode of bit scaners.
	* libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
	__popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change
	return type to Wtype.

	* libgcc-std.ver (GCC_3.4): Fix inheritance.

	* config/i386/i386.md (ffssi2): Use nonimmediate_operand for
	expander input constraint.

2003-02-01  Falk Hueffner  <falk.hueffner@student.uni-tuebingen.de>

        * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and
        OTI_parity.
        (clz_optab, ctz_optab, popcount_optab, parity_optab): New.
        * optabs.c (widen_clz, expand_parity): New.
        (expand_unop): Handle clz and parity.  Hardcode SImode as outmode
        for libcalls to clz, ctz, popcount, and parity.
        (init_optabs): Init clz_optab, ctz_optab, popcount_optab and
        parity_optab, and set up libfunc handlers.
        * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2,
        __popcountsi2, __popcountdi2, __paritysi2 __paritydi2,
        __popcount_tab): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c.
        * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab
        and parity_optab.
        * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New.
        * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT,
        BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL,
        BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL,
        BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL,
        BUILT_IN_PARITYLL): New.
        * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs
        and add optab argument.
        (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*.
        * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New.
        * expr.c (expand_expr): Handle them.
        * fold-const.c (tree_expr_nonnegative_p): Likewise.
        * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New.
        * reload1.c (eliminate_regs): Handle them.
        (elimination_effects): Likewise.
        * function.c (instantiate_virtual_regs_1): Likewise
        * genattrtab.c (check_attr_value): Likewise.
        * simplify-rtx.c (simplify_unary_operation): Likewise.
        * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR.
        * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY.
        (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY.
        * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New.
        * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to
        __builtin_arm_clz.
        * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move...
        * mklibgcc.in (lib2funcs): ...here and merge.  Add new members.
        * doc/extend.texi (Other Builtins): Add new builtins.
        * doc/md.texi (Standard Names): Add new patterns.

From-SVN: r62252

											
										
										
											2003-02-01 20:00:02 +01:00
+								}
 								#endif
-												Makefile.in (LIB2FUNCS): Add _clz.

        * Makefile.in (LIB2FUNCS): Add _clz.
        * libgcc2.c (__ffsdi2): Use count_trailing_zeros.
        (__clz_tab): Put in its own unit, non-static.
        * libgcc2.h: Always include longlong.h.

From-SVN: r36744

											
										
										
											2000-10-06 07:29:56 +02:00
 								#ifdef L_udivmoddi4
-												tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define.

2013-11-27  Kugan Vivekanandarajah  <kuganv@linaro.org>

	gcc/
	* doc/tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define.
	* doc/tm.texi (TARGET_HAS_NO_HW_DIVIDE): Regenerate.

	libgcc/
	* libgcc2.c (__udivmoddi4): Define new implementation when
	TARGET_HAS_NO_HW_DIVIDE is defined, for processors without any
	divide instructions.

From-SVN: r205444

											
										
										
											2013-11-27 13:17:05 +01:00
+								#ifdef TARGET_HAS_NO_HW_DIVIDE
 								#if (defined (L_udivdi3) || defined (L_divdi3) || \
-												Makefile.in (LIB2_DIVMOD_FUNCS): Add _divmoddi4.

	* Makefile.in (LIB2_DIVMOD_FUNCS): Add _divmoddi4.
	* libgcc2.c (__divmoddi4): New function.
	* libgcc2.h (__divmoddi4): Declare.
	* libgcc-std.ver.in (GCC_7.0.0): New. Add __PFX_divmoddi4
	and __PFX_divmodti4.

From-SVN: r241804

											
										
										
											2016-11-02 23:23:13 +01:00
+								     defined (L_umoddi3) || defined (L_moddi3) || \
 								     defined (L_divmoddi4))
-												tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define.

2013-11-27  Kugan Vivekanandarajah  <kuganv@linaro.org>

	gcc/
	* doc/tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define.
	* doc/tm.texi (TARGET_HAS_NO_HW_DIVIDE): Regenerate.

	libgcc/
	* libgcc2.c (__udivmoddi4): Define new implementation when
	TARGET_HAS_NO_HW_DIVIDE is defined, for processors without any
	divide instructions.

From-SVN: r205444

											
										
										
											2013-11-27 13:17:05 +01:00
+								static inline __attribute__ ((__always_inline__))
 								#endif
 								UDWtype
 								__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
 								{
 								  UDWtype q = 0, r = n, y = d;
 								  UWtype lz1, lz2, i, k;
 								  /* Implements align divisor shift dividend method. This algorithm
 								     aligns the divisor under the dividend and then perform number of
 								     test-subtract iterations which shift the dividend left. Number of
 								     iterations is k + 1 where k is the number of bit positions the
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								     divisor must be shifted left to align it under the dividend.
-												tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define.

2013-11-27  Kugan Vivekanandarajah  <kuganv@linaro.org>

	gcc/
	* doc/tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define.
	* doc/tm.texi (TARGET_HAS_NO_HW_DIVIDE): Regenerate.

	libgcc/
	* libgcc2.c (__udivmoddi4): Define new implementation when
	TARGET_HAS_NO_HW_DIVIDE is defined, for processors without any
	divide instructions.

From-SVN: r205444

											
										
										
											2013-11-27 13:17:05 +01:00
+								     quotient bits can be saved in the rightmost positions of the dividend
 								     as it shifts left on each test-subtract iteration. */
 								  if (y <= r)
 								    {
 								      lz1 = __builtin_clzll (d);
 								      lz2 = __builtin_clzll (n);
 								      k = lz1 - lz2;
 								      y = (y << k);
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								      /* Dividend can exceed 2 ^ (width - 1) - 1 but still be less than the
-												tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define.

2013-11-27  Kugan Vivekanandarajah  <kuganv@linaro.org>

	gcc/
	* doc/tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define.
	* doc/tm.texi (TARGET_HAS_NO_HW_DIVIDE): Regenerate.

	libgcc/
	* libgcc2.c (__udivmoddi4): Define new implementation when
	TARGET_HAS_NO_HW_DIVIDE is defined, for processors without any
	divide instructions.

From-SVN: r205444

											
										
										
											2013-11-27 13:17:05 +01:00
+									 aligned divisor. Normal iteration can drops the high order bit
 									 of the dividend. Therefore, first test-subtract iteration is a
 									 special case, saving its quotient bit in a separate location and
 									 not shifting the dividend. */
 								      if (r >= y)
 									{
 									  r = r - y;
 									  q =  (1ULL << k);
 									}
 								      if (k > 0)
 									{
 									  y = y >> 1;
 									  /* k additional iterations where k regular test subtract shift
 									    dividend iterations are done.  */
 									  i = k;
 									  do
 									    {
 									      if (r >= y)
 										r = ((r - y) << 1) + 1;
 									      else
 										r =  (r << 1);
 									      i = i - 1;
 									    } while (i != 0);
 									  /* First quotient bit is combined with the quotient bits resulting
 									     from the k regular iterations.  */
 									  q = q + r;
 									  r = r >> k;
 									  q = q - (r << k);
 									}
 								    }
 								  if (rp)
 								    *rp = r;
 								  return q;
 								}
 								#else
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												Include longlong.h.

[L_udivdi3 || L_divdi3 || L_umoddi3 || L_moddi3] (__udivmoddi4):
Define this `static inline' when defining these, so they all remain
leaf functions.

From-SVN: r10305

											
										
										
											1995-09-01 01:26:53 +02:00
+								#if (defined (L_udivdi3) || defined (L_divdi3) || \
-												Makefile.in (LIB2_DIVMOD_FUNCS): Add _divmoddi4.

	* Makefile.in (LIB2_DIVMOD_FUNCS): Add _divmoddi4.
	* libgcc2.c (__divmoddi4): New function.
	* libgcc2.h (__divmoddi4): Declare.
	* libgcc-std.ver.in (GCC_7.0.0): New. Add __PFX_divmoddi4
	and __PFX_divmodti4.

From-SVN: r241804

											
										
										
											2016-11-02 23:23:13 +01:00
+								     defined (L_umoddi3) || defined (L_moddi3) || \
 								     defined (L_divmoddi4))
-												libgcc2.c (__udiv_w_sdiv): Use attribute ((always_inline)) when inlining it into other libgcc2 routines.

	* libgcc2.c (__udiv_w_sdiv): Use attribute ((always_inline)) when
	inlining it into other libgcc2 routines.
	(__udivmoddi4): Likewise.

From-SVN: r58450

											
										
										
											2002-10-23 12:47:24 +02:00
+								static inline __attribute__ ((__always_inline__))
-												Include longlong.h.

[L_udivdi3 || L_divdi3 || L_umoddi3 || L_moddi3] (__udivmoddi4):
Define this `static inline' when defining these, so they all remain
leaf functions.

From-SVN: r10305

											
										
										
											1995-09-01 01:26:53 +02:00
+								#endif
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								UDWtype
 								__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion nn = {.ll = n};
 								  const DWunion dd = {.ll = d};
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  DWunion rr;
 								  UWtype d0, d1, n0, n1, n2;
 								  UWtype q0, q1;
 								  UWtype b, bm;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								  d0 = dd.s.low;
 								  d1 = dd.s.high;
 								  n0 = nn.s.low;
 								  n1 = nn.s.high;
 								#if !UDIV_NEEDS_NORMALIZATION
 								  if (d1 == 0)
 								    {
 								      if (d0 > n1)
 									{
 									  /* 0q = nn / 0D */
 									  udiv_qrnnd (q0, n0, n1, n0, d0);
 									  q1 = 0;
 									  /* Remainder in n0.  */
 									}
 								      else
 									{
 									  /* qq = NN / 0d */
 									  if (d0 == 0)
 									    d0 = 1 / d0;	/* Divide intentionally by zero.  */
 									  udiv_qrnnd (q1, n1, 0, n1, d0);
 									  udiv_qrnnd (q0, n0, n1, n0, d0);
 									  /* Remainder in n0.  */
 									}
 								      if (rp != 0)
 									{
 									  rr.s.low = n0;
 									  rr.s.high = 0;
 									  *rp = rr.ll;
 									}
 								    }
 								#else /* UDIV_NEEDS_NORMALIZATION */
 								  if (d1 == 0)
 								    {
 								      if (d0 > n1)
 									{
 									  /* 0q = nn / 0D */
 									  count_leading_zeros (bm, d0);
 									  if (bm != 0)
 									    {
 									      /* Normalize, i.e. make the most significant bit of the
 										 denominator set.  */
 									      d0 = d0 << bm;
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+									      n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+									      n0 = n0 << bm;
 									    }
 									  udiv_qrnnd (q0, n0, n1, n0, d0);
 									  q1 = 0;
 									  /* Remainder in n0 >> bm.  */
 									}
 								      else
 									{
 									  /* qq = NN / 0d */
 									  if (d0 == 0)
 									    d0 = 1 / d0;	/* Divide intentionally by zero.  */
 									  count_leading_zeros (bm, d0);
 									  if (bm == 0)
 									    {
 									      /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
 										 conclude (the most significant bit of n1 is set) /\ (the
 										 leading quotient digit q1 = 1).
 										 This special case is necessary, not an optimization.
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+										 (Shifts counts of W_TYPE_SIZE are undefined.)  */
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 									      n1 -= d0;
 									      q1 = 1;
 									    }
 									  else
 									    {
 									      /* Normalize.  */
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+									      b = W_TYPE_SIZE - bm;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 									      d0 = d0 << bm;
 									      n2 = n1 >> b;
 									      n1 = (n1 << bm) | (n0 >> b);
 									      n0 = n0 << bm;
 									      udiv_qrnnd (q1, n1, n2, n1, d0);
 									    }
-												formatting tweaks

From-SVN: r12390

											
										
										
											1996-07-04 00:07:53 +02:00
+									  /* n1 != d0...  */
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 									  udiv_qrnnd (q0, n0, n1, n0, d0);
 									  /* Remainder in n0 >> bm.  */
 									}
 								      if (rp != 0)
 									{
 									  rr.s.low = n0 >> bm;
 									  rr.s.high = 0;
 									  *rp = rr.ll;
 									}
 								    }
 								#endif /* UDIV_NEEDS_NORMALIZATION */
 								  else
 								    {
 								      if (d1 > n1)
 									{
 									  /* 00 = nn / DD */
 									  q0 = 0;
 									  q1 = 0;
 									  /* Remainder in n1n0.  */
 									  if (rp != 0)
 									    {
 									      rr.s.low = n0;
 									      rr.s.high = n1;
 									      *rp = rr.ll;
 									    }
 									}
 								      else
 									{
 									  /* 0q = NN / dd */
 									  count_leading_zeros (bm, d1);
 									  if (bm == 0)
 									    {
 									      /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
 										 conclude (the most significant bit of n1 is set) /\ (the
 										 quotient digit q0 = 0 or 1).
 										 This special case is necessary, not an optimization.  */
 									      /* The condition on the next line takes advantage of that
 										 n1 >= d1 (true due to program flow).  */
 									      if (n1 > d1 || n0 >= d0)
 										{
 										  q0 = 1;
 										  sub_ddmmss (n1, n0, n1, n0, d1, d0);
 										}
 									      else
 										q0 = 0;
 									      q1 = 0;
 									      if (rp != 0)
 										{
 										  rr.s.low = n0;
 										  rr.s.high = n1;
 										  *rp = rr.ll;
 										}
 									    }
 									  else
 									    {
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+									      UWtype m1, m0;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+									      /* Normalize.  */
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+									      b = W_TYPE_SIZE - bm;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 									      d1 = (d1 << bm) | (d0 >> b);
 									      d0 = d0 << bm;
 									      n2 = n1 >> b;
 									      n1 = (n1 << bm) | (n0 >> b);
 									      n0 = n0 << bm;
 									      udiv_qrnnd (q0, n1, n2, n1, d1);
 									      umul_ppmm (m1, m0, q0, d0);
 									      if (m1 > n1 || (m1 == n1 && m0 > n0))
 										{
 										  q0--;
 										  sub_ddmmss (m1, m0, m1, m0, d1, d0);
 										}
 									      q1 = 0;
 									      /* Remainder in (n1n0 - m1m0) >> bm.  */
 									      if (rp != 0)
 										{
 										  sub_ddmmss (n1, n0, n1, n0, m1, m0);
 										  rr.s.low = (n1 << b) | (n0 >> bm);
 										  rr.s.high = n1 >> bm;
 										  *rp = rr.ll;
 										}
 									    }
 									}
 								    }
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  const DWunion ww = {{.low = q0, .high = q1}};
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								  return ww.ll;
 								}
 								#endif
-												tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define.

2013-11-27  Kugan Vivekanandarajah  <kuganv@linaro.org>

	gcc/
	* doc/tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define.
	* doc/tm.texi (TARGET_HAS_NO_HW_DIVIDE): Regenerate.

	libgcc/
	* libgcc2.c (__udivmoddi4): Define new implementation when
	TARGET_HAS_NO_HW_DIVIDE is defined, for processors without any
	divide instructions.

From-SVN: r205444

											
										
										
											2013-11-27 13:17:05 +01:00
+								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								#ifdef L_divdi3
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								DWtype
 								__divdi3 (DWtype u, DWtype v)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												libgcc2.h (word_type): Type definition removed.

2007-07-06  Andreas Krebbel  <krebbel1@de.ibm.com>

	* libgcc2.h (word_type): Type definition removed.
	(cmp_return_type, shift_count_type): Type definitions added.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* libgcc2.c (__udivmoddi4, __moddi3): Type of local variable c
	changed from word_type to Wtype.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* c-common.c (handle_mode_attribute): Handling for libgcc_cmp_return and
	libgcc_shift_count attribute added.
	* target-def.h (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): New target hooks defined.
	(TARGET_INITIALIZER): New target hooks added.
	* targhooks.c (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Default implementations for the new
	target hooks added.
	* targhooks.h (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Function prototypes added.
	* target.h (struct gcc_target): Fields for the new target hooks added.
	* optabs.c (expand_binop): Use shift_count_mode when expanding shift
	as library call.
	(prepare_cmp_insn): Use cmp_return_mode when expanding comparison as
	library call.

	* doc/tm.texi (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): Documentation added.

	* config/s390/s390.c (s390_libgcc_cmp_return_mode,
	s390_libgcc_shift_count_mode): Functions added.
	(TARGET_LIBGCC_CMP_RETURN_MODE,	TARGET_LIBGCC_SHIFT_COUNT_MODE): Target
	hooks defined.

From-SVN: r126410

											
										
										
											2007-07-06 12:47:31 +02:00
+								  Wtype c = 0;
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  DWunion uu = {.ll = u};
 								  DWunion vv = {.ll = v};
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  DWtype w;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								  if (uu.s.high < 0)
 								    c = ~c,
-												libgcc2.c (__divdi3, __moddi3): Use unary minus operator instead of __negdi2 directly.

        * libgcc2.c (__divdi3, __moddi3): Use unary minus operator
        instead of __negdi2 directly.

From-SVN: r55632

											
										
										
											2002-07-22 02:15:49 +02:00
+								    uu.ll = -uu.ll;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								  if (vv.s.high < 0)
 								    c = ~c,
-												libgcc2.c (__divdi3, __moddi3): Use unary minus operator instead of __negdi2 directly.

        * libgcc2.c (__divdi3, __moddi3): Use unary minus operator
        instead of __negdi2 directly.

From-SVN: r55632

											
										
										
											2002-07-22 02:15:49 +02:00
+								    vv.ll = -vv.ll;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) 0);
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								  if (c)
-												libgcc2.c (__divdi3, __moddi3): Use unary minus operator instead of __negdi2 directly.

        * libgcc2.c (__divdi3, __moddi3): Use unary minus operator
        instead of __negdi2 directly.

From-SVN: r55632

											
										
										
											2002-07-22 02:15:49 +02:00
+								    w = -w;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								  return w;
 								}
 								#endif
 								#ifdef L_moddi3
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								DWtype
 								__moddi3 (DWtype u, DWtype v)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												libgcc2.h (word_type): Type definition removed.

2007-07-06  Andreas Krebbel  <krebbel1@de.ibm.com>

	* libgcc2.h (word_type): Type definition removed.
	(cmp_return_type, shift_count_type): Type definitions added.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* libgcc2.c (__udivmoddi4, __moddi3): Type of local variable c
	changed from word_type to Wtype.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* c-common.c (handle_mode_attribute): Handling for libgcc_cmp_return and
	libgcc_shift_count attribute added.
	* target-def.h (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): New target hooks defined.
	(TARGET_INITIALIZER): New target hooks added.
	* targhooks.c (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Default implementations for the new
	target hooks added.
	* targhooks.h (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Function prototypes added.
	* target.h (struct gcc_target): Fields for the new target hooks added.
	* optabs.c (expand_binop): Use shift_count_mode when expanding shift
	as library call.
	(prepare_cmp_insn): Use cmp_return_mode when expanding comparison as
	library call.

	* doc/tm.texi (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): Documentation added.

	* config/s390/s390.c (s390_libgcc_cmp_return_mode,
	s390_libgcc_shift_count_mode): Functions added.
	(TARGET_LIBGCC_CMP_RETURN_MODE,	TARGET_LIBGCC_SHIFT_COUNT_MODE): Target
	hooks defined.

From-SVN: r126410

											
										
										
											2007-07-06 12:47:31 +02:00
+								  Wtype c = 0;
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  DWunion uu = {.ll = u};
 								  DWunion vv = {.ll = v};
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  DWtype w;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								  if (uu.s.high < 0)
 								    c = ~c,
-												libgcc2.c (__divdi3, __moddi3): Use unary minus operator instead of __negdi2 directly.

        * libgcc2.c (__divdi3, __moddi3): Use unary minus operator
        instead of __negdi2 directly.

From-SVN: r55632

											
										
										
											2002-07-22 02:15:49 +02:00
+								    uu.ll = -uu.ll;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								  if (vv.s.high < 0)
-												libgcc2.c (__divdi3, __moddi3): Use unary minus operator instead of __negdi2 directly.

        * libgcc2.c (__divdi3, __moddi3): Use unary minus operator
        instead of __negdi2 directly.

From-SVN: r55632

											
										
										
											2002-07-22 02:15:49 +02:00
+								    vv.ll = -vv.ll;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												libgcc2.c (__moddi3): Cast &w to UDWtype*.

2004-10-07  Andrew Pinski  <pinskia@physics.uc.edu>

        * libgcc2.c (__moddi3): Cast &w to UDWtype*.

From-SVN: r88734

											
										
										
											2004-10-08 04:55:15 +02:00
+								  (void) __udivmoddi4 (uu.ll, vv.ll, (UDWtype*)&w);
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								  if (c)
-												libgcc2.c (__divdi3, __moddi3): Use unary minus operator instead of __negdi2 directly.

        * libgcc2.c (__divdi3, __moddi3): Use unary minus operator
        instead of __negdi2 directly.

From-SVN: r55632

											
										
										
											2002-07-22 02:15:49 +02:00
+								    w = -w;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								  return w;
 								}
 								#endif
-												Makefile.in (LIB2_DIVMOD_FUNCS): Add _divmoddi4.

	* Makefile.in (LIB2_DIVMOD_FUNCS): Add _divmoddi4.
	* libgcc2.c (__divmoddi4): New function.
	* libgcc2.h (__divmoddi4): Declare.
	* libgcc-std.ver.in (GCC_7.0.0): New. Add __PFX_divmoddi4
	and __PFX_divmodti4.

From-SVN: r241804

											
										
										
											2016-11-02 23:23:13 +01:00
+								#ifdef L_divmoddi4
 								DWtype
 								__divmoddi4 (DWtype u, DWtype v, DWtype *rp)
 								{
 								  Wtype c1 = 0, c2 = 0;
 								  DWunion uu = {.ll = u};
 								  DWunion vv = {.ll = v};
 								  DWtype w;
 								  DWtype r;
 								  if (uu.s.high < 0)
 								    c1 = ~c1, c2 = ~c2,
 								    uu.ll = -uu.ll;
 								  if (vv.s.high < 0)
 								    c1 = ~c1,
 								    vv.ll = -vv.ll;
 								  w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype*)&r);
 								  if (c1)
 								    w = -w;
 								  if (c2)
 								    r = -r;
 								  *rp = r;
 								  return w;
 								}
 								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								#ifdef L_umoddi3
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								UDWtype
 								__umoddi3 (UDWtype u, UDWtype v)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  UDWtype w;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								  (void) __udivmoddi4 (u, v, &w);
 								  return w;
 								}
 								#endif
 								#ifdef L_udivdi3
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								UDWtype
 								__udivdi3 (UDWtype n, UDWtype d)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  return __udivmoddi4 (n, d, (UDWtype *) 0);
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
 								#endif
 								#ifdef L_cmpdi2
-												libgcc2.h (word_type): Type definition removed.

2007-07-06  Andreas Krebbel  <krebbel1@de.ibm.com>

	* libgcc2.h (word_type): Type definition removed.
	(cmp_return_type, shift_count_type): Type definitions added.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* libgcc2.c (__udivmoddi4, __moddi3): Type of local variable c
	changed from word_type to Wtype.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* c-common.c (handle_mode_attribute): Handling for libgcc_cmp_return and
	libgcc_shift_count attribute added.
	* target-def.h (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): New target hooks defined.
	(TARGET_INITIALIZER): New target hooks added.
	* targhooks.c (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Default implementations for the new
	target hooks added.
	* targhooks.h (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Function prototypes added.
	* target.h (struct gcc_target): Fields for the new target hooks added.
	* optabs.c (expand_binop): Use shift_count_mode when expanding shift
	as library call.
	(prepare_cmp_insn): Use cmp_return_mode when expanding comparison as
	library call.

	* doc/tm.texi (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): Documentation added.

	* config/s390/s390.c (s390_libgcc_cmp_return_mode,
	s390_libgcc_shift_count_mode): Functions added.
	(TARGET_LIBGCC_CMP_RETURN_MODE,	TARGET_LIBGCC_SHIFT_COUNT_MODE): Target
	hooks defined.

From-SVN: r126410

											
										
										
											2007-07-06 12:47:31 +02:00
+								cmp_return_type
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								__cmpdi2 (DWtype a, DWtype b)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  return (a > b) - (a < b) + 1;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
 								#endif
 								#ifdef L_ucmpdi2
-												libgcc2.h (word_type): Type definition removed.

2007-07-06  Andreas Krebbel  <krebbel1@de.ibm.com>

	* libgcc2.h (word_type): Type definition removed.
	(cmp_return_type, shift_count_type): Type definitions added.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* libgcc2.c (__udivmoddi4, __moddi3): Type of local variable c
	changed from word_type to Wtype.
	(__lshrdi3, __ashldi3, __ashrdi3): word_type of second parameter
	replaced with shift_count_type.
	(__cmpdi2, __ucmpdi2): word_type of return type replaced with
	cmp_return_type.
	* c-common.c (handle_mode_attribute): Handling for libgcc_cmp_return and
	libgcc_shift_count attribute added.
	* target-def.h (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): New target hooks defined.
	(TARGET_INITIALIZER): New target hooks added.
	* targhooks.c (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Default implementations for the new
	target hooks added.
	* targhooks.h (default_libgcc_cmp_return_mode,
	default_libgcc_shift_count_mode): Function prototypes added.
	* target.h (struct gcc_target): Fields for the new target hooks added.
	* optabs.c (expand_binop): Use shift_count_mode when expanding shift
	as library call.
	(prepare_cmp_insn): Use cmp_return_mode when expanding comparison as
	library call.

	* doc/tm.texi (TARGET_LIBGCC_CMP_RETURN_MODE,
	TARGET_LIBGCC_SHIFT_COUNT_MODE): Documentation added.

	* config/s390/s390.c (s390_libgcc_cmp_return_mode,
	s390_libgcc_shift_count_mode): Functions added.
	(TARGET_LIBGCC_CMP_RETURN_MODE,	TARGET_LIBGCC_SHIFT_COUNT_MODE): Target
	hooks defined.

From-SVN: r126410

											
										
										
											2007-07-06 12:47:31 +02:00
+								cmp_return_type
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								__ucmpdi2 (UDWtype a, UDWtype b)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												Improve generated code for various libgcc2.c routines

libgcc/

	* libgcc2.c (__addvSI3): Use overflow builtins.
	(__addvsi3, __addvDI3 ,__subvSI3, __subvsi3): Likewise.
	(__subvDI3 __mulvSI3, __mulvsi3, __negvSI2): Likewise.
	(__negvsi2, __negvDI2): Likewise.
	(__cmpdi2, __ucmpdi2): Adjust implementation to improve
	generated code.
	* libgcc2.h (__ucmpdi2): Adjust prototype.

											
										
										
											2020-11-10 16:22:28 +01:00
+								  return (a > b) - (a < b) + 1;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
 								#endif
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if defined(L_fixunstfdi) && LIBGCC2_HAS_TF_MODE
-												libgcc2.c (__fixunstfDI, [...]): Make return type unsigned.

	* libgcc2.c (__fixunstfDI, __fixunsxfDI, __fixunsdfDI,
	__fixunssfDI): Make return type unsigned.
	* libgcc2.h (__fixunstfDI, __fixunsxfDI, __fixunsdfDI,
	__fixunssfDI): Make return type unsigned.

From-SVN: r129899

											
										
										
											2007-11-05 12:41:40 +01:00
+								UDWtype
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								__fixunstfDI (TFtype a)
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								{
 								  if (a < 0)
 								    return 0;
 								  /* Compute high word of result, as a flonum.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  const TFtype b = (a / Wtype_MAXp1_F);
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  /* Convert that to fixed (but not to DWtype!),
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								     and shift it into the high word.  */
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  UDWtype v = (UWtype) b;
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  v <<= W_TYPE_SIZE;
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								  /* Remove high part from the TFtype, leaving the low part as flonum.  */
 								  a -= (TFtype)v;
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  /* Convert that to fixed (but not to DWtype!) and add it in.
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								     Sometimes A comes out negative.  This is significant, since
 								     A has more bits than a long int does.  */
 								  if (a < 0)
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								    v -= (UWtype) (- a);
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								  else
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								    v += (UWtype) a;
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								  return v;
 								}
 								#endif
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if defined(L_fixtfdi) && LIBGCC2_HAS_TF_MODE
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								DWtype
-												(__negdi2, __lshrdi3, __ashldi3, __ashrdi3, __ffsdi2):

Use ANSI style definition with full prototype.
(__muldi3, __udiv_w_sdiv, __udivmoddi4, __divdi3, __moddi3) : Likewise.
(__udivmoddi4, __udivdi3, __cmpdi2, __ucmpdi2) : Likewise.
(__fixunstfdi, __fixtfdi, __fixunsxfdi, __fixxfdi) : Likewise.
(__fixunsdfdi, __fixdfdi, __floatdixf, __floatditf) : Likewise.
(__floatdidf, __floatdisf, __fixunsxfsi, __fixunsdfsi) : Likewise.
(__gcc_bcmp, __eprintf, gopen, gclose, __bb_init_file) : Likewise.
(__bb_init_trace_func, __clear_cache, mprotect) : Likewise.
(__enable_execute_stack, cacheflush, exit) : Likewise.
(find_exception_table, __find_first_exception_table_match) : Likewise.

From-SVN: r13658

											
										
										
											1997-02-16 13:55:15 +01:00
+								__fixtfdi (TFtype a)
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								{
 								  if (a < 0)
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								    return - __fixunstfDI (-a);
 								  return __fixunstfDI (a);
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								}
 								#endif
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if defined(L_fixunsxfdi) && LIBGCC2_HAS_XF_MODE
-												libgcc2.c (__fixunstfDI, [...]): Make return type unsigned.

	* libgcc2.c (__fixunstfDI, __fixunsxfDI, __fixunsdfDI,
	__fixunssfDI): Make return type unsigned.
	* libgcc2.h (__fixunstfDI, __fixunsxfDI, __fixunsdfDI,
	__fixunssfDI): Make return type unsigned.

From-SVN: r129899

											
										
										
											2007-11-05 12:41:40 +01:00
+								UDWtype
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								__fixunsxfDI (XFtype a)
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								{
 								  if (a < 0)
 								    return 0;
 								  /* Compute high word of result, as a flonum.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  const XFtype b = (a / Wtype_MAXp1_F);
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  /* Convert that to fixed (but not to DWtype!),
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								     and shift it into the high word.  */
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								  UDWtype v = (UWtype) b;
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  v <<= W_TYPE_SIZE;
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								  /* Remove high part from the XFtype, leaving the low part as flonum.  */
 								  a -= (XFtype)v;
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  /* Convert that to fixed (but not to DWtype!) and add it in.
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								     Sometimes A comes out negative.  This is significant, since
 								     A has more bits than a long int does.  */
 								  if (a < 0)
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								    v -= (UWtype) (- a);
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								  else
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								    v += (UWtype) a;
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								  return v;
 								}
 								#endif
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if defined(L_fixxfdi) && LIBGCC2_HAS_XF_MODE
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								DWtype
-												(__negdi2, __lshrdi3, __ashldi3, __ashrdi3, __ffsdi2):

Use ANSI style definition with full prototype.
(__muldi3, __udiv_w_sdiv, __udivmoddi4, __divdi3, __moddi3) : Likewise.
(__udivmoddi4, __udivdi3, __cmpdi2, __ucmpdi2) : Likewise.
(__fixunstfdi, __fixtfdi, __fixunsxfdi, __fixxfdi) : Likewise.
(__fixunsdfdi, __fixdfdi, __floatdixf, __floatditf) : Likewise.
(__floatdidf, __floatdisf, __fixunsxfsi, __fixunsdfsi) : Likewise.
(__gcc_bcmp, __eprintf, gopen, gclose, __bb_init_file) : Likewise.
(__bb_init_trace_func, __clear_cache, mprotect) : Likewise.
(__enable_execute_stack, cacheflush, exit) : Likewise.
(find_exception_table, __find_first_exception_table_match) : Likewise.

From-SVN: r13658

											
										
										
											1997-02-16 13:55:15 +01:00
+								__fixxfdi (XFtype a)
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								{
 								  if (a < 0)
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								    return - __fixunsxfDI (-a);
 								  return __fixunsxfDI (a);
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								}
 								#endif
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if defined(L_fixunsdfdi) && LIBGCC2_HAS_DF_MODE
-												libgcc2.c (__fixunstfDI, [...]): Make return type unsigned.

	* libgcc2.c (__fixunstfDI, __fixunsxfDI, __fixunsdfDI,
	__fixunssfDI): Make return type unsigned.
	* libgcc2.h (__fixunstfDI, __fixunsxfDI, __fixunsdfDI,
	__fixunssfDI): Make return type unsigned.

From-SVN: r129899

											
										
										
											2007-11-05 12:41:40 +01:00
+								UDWtype
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								__fixunsdfDI (DFtype a)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												Merge basic-improvements-branch to trunk

From-SVN: r60174

											
										
										
											2002-12-16 19:23:00 +01:00
+								  /* Get high part of result.  The division here will just moves the radix
 								     point and will not cause any rounding.  Then the conversion to integral
 								     type chops result as desired.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  const UWtype hi = a / Wtype_MAXp1_F;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												Merge basic-improvements-branch to trunk

From-SVN: r60174

											
										
										
											2002-12-16 19:23:00 +01:00
+								  /* Get low part of result.  Convert `hi' to floating type and scale it back,
 								     then subtract this from the number being converted.  This leaves the low
 								     part.  Convert that to integral type.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  const UWtype lo = a - (DFtype) hi * Wtype_MAXp1_F;
-												Merge basic-improvements-branch to trunk

From-SVN: r60174

											
										
										
											2002-12-16 19:23:00 +01:00
 								  /* Assemble result from the two parts.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  return ((UDWtype) hi << W_TYPE_SIZE) | lo;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
 								#endif
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if defined(L_fixdfdi) && LIBGCC2_HAS_DF_MODE
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								DWtype
-												(__negdi2, __lshrdi3, __ashldi3, __ashrdi3, __ffsdi2):

Use ANSI style definition with full prototype.
(__muldi3, __udiv_w_sdiv, __udivmoddi4, __divdi3, __moddi3) : Likewise.
(__udivmoddi4, __udivdi3, __cmpdi2, __ucmpdi2) : Likewise.
(__fixunstfdi, __fixtfdi, __fixunsxfdi, __fixxfdi) : Likewise.
(__fixunsdfdi, __fixdfdi, __floatdixf, __floatditf) : Likewise.
(__floatdidf, __floatdisf, __fixunsxfsi, __fixunsdfsi) : Likewise.
(__gcc_bcmp, __eprintf, gopen, gclose, __bb_init_file) : Likewise.
(__bb_init_trace_func, __clear_cache, mprotect) : Likewise.
(__enable_execute_stack, cacheflush, exit) : Likewise.
(find_exception_table, __find_first_exception_table_match) : Likewise.

From-SVN: r13658

											
										
										
											1997-02-16 13:55:15 +01:00
+								__fixdfdi (DFtype a)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
 								  if (a < 0)
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								    return - __fixunsdfDI (-a);
 								  return __fixunsdfDI (a);
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
 								#endif
-												libgcc2.h (LIBGCC2_HAS_SF_MODE): New macro.

	* libgcc2.h (LIBGCC2_HAS_SF_MODE): New macro.
	(LIBGCC2_HAS_DF_MODE, LIBGCC2_HAS_TF_MODE, LIBGCC2_HAS_XF_MODE): Make
	the defaults false if BITS_PER_UNIT != 8.
	(SFtype, SCtype, __fixsfdi, __floatdisf, __fixunssfSI, __fixunssfDI)
	(__powisf2, __divsc3, __mulsc3): Guard with LIBGCC2_HAS_SF_MODE rather
	than BITS_PER_UNIT != 8.
	(L_fixdfdi, L_fixsfdi, L_fixtfdi, L_fixunsdfdi, L_fixunsdfsi)
	(L_fixunssfdi, L_fixunssfsi, L_fixunstfdi, L_fixunsxfdi, L_fixunsxfsi)
	(L_fixxfdi, L_floatdidf, L_floatdisf, L_floatditf, L_floatdixf): Remove
	#undefs.
	* libgcc2.c (__fixunssfDI, __fixsfdi, __floatdisf, __fixunssfSI)
	(__powisf2, __divsc3, __mulsc3): Guard with LIBGCC2_HAS_SF_MODE.

From-SVN: r96778

											
										
										
											2005-03-21 08:22:22 +01:00
+								#if defined(L_fixunssfdi) && LIBGCC2_HAS_SF_MODE
-												libgcc2.c (__fixunstfDI, [...]): Make return type unsigned.

	* libgcc2.c (__fixunstfDI, __fixunsxfDI, __fixunsdfDI,
	__fixunssfDI): Make return type unsigned.
	* libgcc2.h (__fixunstfDI, __fixunsxfDI, __fixunsdfDI,
	__fixunssfDI): Make return type unsigned.

From-SVN: r129899

											
										
										
											2007-11-05 12:41:40 +01:00
+								UDWtype
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								__fixunssfDI (SFtype a)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if LIBGCC2_HAS_DF_MODE
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								  /* Convert the SFtype to a DFtype, because that is surely not going
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								     to lose any bits.  Some day someone else can write a faster version
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								     that avoids converting to DFtype, and verify it really works right.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  const DFtype dfa = a;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												Merge basic-improvements-branch to trunk

From-SVN: r60174

											
										
										
											2002-12-16 19:23:00 +01:00
+								  /* Get high part of result.  The division here will just moves the radix
 								     point and will not cause any rounding.  Then the conversion to integral
 								     type chops result as desired.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  const UWtype hi = dfa / Wtype_MAXp1_F;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												Merge basic-improvements-branch to trunk

From-SVN: r60174

											
										
										
											2002-12-16 19:23:00 +01:00
+								  /* Get low part of result.  Convert `hi' to floating type and scale it back,
 								     then subtract this from the number being converted.  This leaves the low
 								     part.  Convert that to integral type.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  const UWtype lo = dfa - (DFtype) hi * Wtype_MAXp1_F;
-												Merge basic-improvements-branch to trunk

From-SVN: r60174

											
										
										
											2002-12-16 19:23:00 +01:00
 								  /* Assemble result from the two parts.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  return ((UDWtype) hi << W_TYPE_SIZE) | lo;
 								#elif FLT_MANT_DIG < W_TYPE_SIZE
 								  if (a < 1)
 								    return 0;
 								  if (a < Wtype_MAXp1_F)
 								    return (UWtype)a;
 								  if (a < Wtype_MAXp1_F * Wtype_MAXp1_F)
 								    {
 								      /* Since we know that there are fewer significant bits in the SFmode
 									 quantity than in a word, we know that we can convert out all the
-												* libgcc2.c, tree-vect-analyze.c: Fix comment typos.

From-SVN: r95196

											
										
										
											2005-02-17 22:20:21 +01:00
+									 significant bits in one step, and thus avoid losing bits.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
 								      /* ??? This following loop essentially performs frexpf.  If we could
 									 use the real libm function, or poke at the actual bits of the fp
 									 format, it would be significantly faster.  */
 								      UWtype shift = 0, counter;
 								      SFtype msb;
 								      a /= Wtype_MAXp1_F;
 								      for (counter = W_TYPE_SIZE / 2; counter != 0; counter >>= 1)
 									{
 									  SFtype counterf = (UWtype)1 << counter;
 									  if (a >= counterf)
 									    {
 									      shift |= counter;
 									      a /= counterf;
 									    }
 									}
 								      /* Rescale into the range of one word, extract the bits of that
 									 one word, and shift the result into position.  */
 								      a *= Wtype_MAXp1_F;
 								      counter = a;
 								      return (DWtype)counter << shift;
 								    }
 								  return -1;
 								#else
 								# error
 								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
 								#endif
-												libgcc2.h (LIBGCC2_HAS_SF_MODE): New macro.

	* libgcc2.h (LIBGCC2_HAS_SF_MODE): New macro.
	(LIBGCC2_HAS_DF_MODE, LIBGCC2_HAS_TF_MODE, LIBGCC2_HAS_XF_MODE): Make
	the defaults false if BITS_PER_UNIT != 8.
	(SFtype, SCtype, __fixsfdi, __floatdisf, __fixunssfSI, __fixunssfDI)
	(__powisf2, __divsc3, __mulsc3): Guard with LIBGCC2_HAS_SF_MODE rather
	than BITS_PER_UNIT != 8.
	(L_fixdfdi, L_fixsfdi, L_fixtfdi, L_fixunsdfdi, L_fixunsdfsi)
	(L_fixunssfdi, L_fixunssfsi, L_fixunstfdi, L_fixunsxfdi, L_fixunsxfsi)
	(L_fixxfdi, L_floatdidf, L_floatdisf, L_floatditf, L_floatdixf): Remove
	#undefs.
	* libgcc2.c (__fixunssfDI, __fixsfdi, __floatdisf, __fixunssfSI)
	(__powisf2, __divsc3, __mulsc3): Guard with LIBGCC2_HAS_SF_MODE.

From-SVN: r96778

											
										
										
											2005-03-21 08:22:22 +01:00
+								#if defined(L_fixsfdi) && LIBGCC2_HAS_SF_MODE
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								DWtype
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								__fixsfdi (SFtype a)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
 								  if (a < 0)
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								    return - __fixunssfDI (-a);
 								  return __fixunssfDI (a);
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
 								#endif
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if defined(L_floatdixf) && LIBGCC2_HAS_XF_MODE
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								XFtype
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								__floatdixf (DWtype u)
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								{
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#if W_TYPE_SIZE > __LIBGCC_XF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								# error
 								#endif
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  XFtype d = (Wtype) (u >> W_TYPE_SIZE);
 								  d *= Wtype_MAXp1_F;
 								  d += (UWtype)u;
-												* Check in merge from gcc2.  See ChangeLog.11 and ChangeLog.12
        for details.

        * haifa-sched.c: Mirror recent changes from gcc2.

From-SVN: r18984

											
										
										
											1998-04-04 15:32:39 +02:00
+								  return d;
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								}
 								#endif
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								#if defined(L_floatundixf) && LIBGCC2_HAS_XF_MODE
 								XFtype
 								__floatundixf (UDWtype u)
 								{
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#if W_TYPE_SIZE > __LIBGCC_XF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								# error
 								#endif
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								  XFtype d = (UWtype) (u >> W_TYPE_SIZE);
 								  d *= Wtype_MAXp1_F;
 								  d += (UWtype)u;
 								  return d;
 								}
 								#endif
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if defined(L_floatditf) && LIBGCC2_HAS_TF_MODE
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								TFtype
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								__floatditf (DWtype u)
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								{
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#if W_TYPE_SIZE > __LIBGCC_TF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								# error
 								#endif
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  TFtype d = (Wtype) (u >> W_TYPE_SIZE);
 								  d *= Wtype_MAXp1_F;
 								  d += (UWtype)u;
-												* Check in merge from gcc2.  See ChangeLog.11 and ChangeLog.12
        for details.

        * haifa-sched.c: Mirror recent changes from gcc2.

From-SVN: r18984

											
										
										
											1998-04-04 15:32:39 +02:00
+								  return d;
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								}
 								#endif
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								#if defined(L_floatunditf) && LIBGCC2_HAS_TF_MODE
 								TFtype
 								__floatunditf (UDWtype u)
 								{
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#if W_TYPE_SIZE > __LIBGCC_TF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								# error
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								#endif
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								  TFtype d = (UWtype) (u >> W_TYPE_SIZE);
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								  d *= Wtype_MAXp1_F;
 								  d += (UWtype)u;
 								  return d;
 								}
 								#endif
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#if (defined(L_floatdisf) && LIBGCC2_HAS_SF_MODE)	\
 								     || (defined(L_floatdidf) && LIBGCC2_HAS_DF_MODE)
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								#define DI_SIZE (W_TYPE_SIZE * 2)
-												libgcc2.c (__floatdisf, [...]): Don't use IBM Extended Double TFmode.

	* libgcc2.c (__floatdisf, __floatdidf): Don't use IBM Extended
	Double TFmode.
	(__floatundisf, __floatundidf): Likewise.
	* libgcc2.h (IS_IBM_EXTENDED): Define.

From-SVN: r110004

											
										
										
											2006-01-20 01:42:29 +01:00
+								#define F_MODE_OK(SIZE) \
 								  (SIZE < DI_SIZE							\
 								   && SIZE > (DI_SIZE - SIZE + FSSIZE)					\
-												libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from IS_IBM_EXTENDED.

        * libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from 
        IS_IBM_EXTENDED.  Also define in terms of WIDEST_HARDWARE_FP_SIZE.
        * libgcc2.c (__floatdisf): Avoid double-word arithmetic when
        looking for non-zero bits shifted out.  Avoid a recursive call
        when constructing the scalar.
        (__floatundisf): Likewise.

From-SVN: r124106

											
										
										
											2007-04-24 17:28:21 +02:00
+								   && !AVOID_FP_TYPE_CONVERSION(SIZE))
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#if defined(L_floatdisf)
 								#define FUNC __floatdisf
 								#define FSTYPE SFtype
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#define FSSIZE __LIBGCC_SF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#else
 								#define FUNC __floatdidf
 								#define FSTYPE DFtype
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#define FSSIZE __LIBGCC_DF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								FSTYPE
 								FUNC (DWtype u)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#if FSSIZE >= W_TYPE_SIZE
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  /* When the word size is small, we never get any rounding error.  */
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								  FSTYPE f = (Wtype) (u >> W_TYPE_SIZE);
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  f *= Wtype_MAXp1_F;
 								  f += (UWtype)u;
 								  return f;
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#elif (LIBGCC2_HAS_DF_MODE && F_MODE_OK (__LIBGCC_DF_MANT_DIG__))	\
 								     || (LIBGCC2_HAS_XF_MODE && F_MODE_OK (__LIBGCC_XF_MANT_DIG__))	\
 								     || (LIBGCC2_HAS_TF_MODE && F_MODE_OK (__LIBGCC_TF_MANT_DIG__))
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#if (LIBGCC2_HAS_DF_MODE && F_MODE_OK (__LIBGCC_DF_MANT_DIG__))
 								# define FSIZE __LIBGCC_DF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								# define FTYPE DFtype
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#elif (LIBGCC2_HAS_XF_MODE && F_MODE_OK (__LIBGCC_XF_MANT_DIG__))
 								# define FSIZE __LIBGCC_XF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								# define FTYPE XFtype
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#elif (LIBGCC2_HAS_TF_MODE && F_MODE_OK (__LIBGCC_TF_MANT_DIG__))
 								# define FSIZE __LIBGCC_TF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								# define FTYPE TFtype
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								#else
 								# error
 								#endif
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#define REP_BIT ((UDWtype) 1 << (DI_SIZE - FSIZE))
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
-												(__floatdisf):  Protect against double-rounding error.

From-SVN: r7431

											
										
										
											1994-06-03 00:10:30 +02:00
+								  /* Protect against double-rounding error.
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								     Represent any low-order bits, that might be truncated by a bit that
 								     won't be lost.  The bit can go in anywhere below the rounding position
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								     of the FSTYPE.  A fixed mask and bit position handles all usual
 								     configurations.  */
 								  if (! (- ((DWtype) 1 << FSIZE) < u
 									 && u < ((DWtype) 1 << FSIZE)))
-												(__floatdisf):  Protect against double-rounding error.

From-SVN: r7431

											
										
										
											1994-06-03 00:10:30 +02:00
+								    {
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								      if ((UDWtype) u & (REP_BIT - 1))
-												(__floatdisf):  Protect against double-rounding error.

From-SVN: r7431

											
										
										
											1994-06-03 00:10:30 +02:00
+									{
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+									  u &= ~ (REP_BIT - 1);
 									  u |= REP_BIT;
-												(__floatdisf):  Protect against double-rounding error.

From-SVN: r7431

											
										
										
											1994-06-03 00:10:30 +02:00
+									}
 								    }
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								  /* Do the calculation in a wider type so that we don't lose any of
 								     the precision of the high word while multiplying it.  */
 								  FTYPE f = (Wtype) (u >> W_TYPE_SIZE);
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  f *= Wtype_MAXp1_F;
 								  f += (UWtype)u;
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								  return (FSTYPE) f;
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								#else
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#if FSSIZE >= W_TYPE_SIZE - 2
 								# error
 								#endif
 								  /* Finally, the word size is larger than the number of bits in the
 								     required FSTYPE, and we've got no suitable wider type.  The only
 								     way to avoid double rounding is to special case the
 								     extraction.  */
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
 								  /* If there are no high bits set, fall back to one conversion.  */
 								  if ((Wtype)u == u)
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								    return (FSTYPE)(Wtype)u;
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
 								  /* Otherwise, find the power of two.  */
 								  Wtype hi = u >> W_TYPE_SIZE;
 								  if (hi < 0)
-												re PR target/29776 (result of ffs/clz/ctz/popcount/parity are already sign-extended)

	PR target/29776
	* fold-const.c (tree_call_nonnegative_warnv_p): Return true
	for BUILT_IN_C{LZ,LRSB}*.
	* tree.h (CASE_INT_FN): Add FN##IMAX case.
	* tree-vrp.c (extract_range_basic): Handle
	BUILT_IN_{FFS,PARITY,POPCOUNT,C{LZ,TZ,LRSB}}*.  For
	BUILT_IN_CONSTANT_P if argument isn't (D) of PARM_DECL,
	fall thru to code calling set_value*.
	* builtins.c (expand_builtin): Remove *IMAX cases.
	(fold_builtin_bitop): For BUILT_IN_CLRSB* return NULL_TREE
	if width is bigger than 2*HWI.

	* libgcc2.c (__floattisf): Avoid undefined signed overflow.

	* gcc.dg/tree-ssa/vrp89.c: New test.

From-SVN: r200731

											
										
										
											2013-07-06 11:34:17 +02:00
+								    hi = -(UWtype) hi;
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
 								  UWtype count, shift;
-												re PR libgcc/78067 (libgcc2 calls count_leading_zero with 0)

2016-11-03  Bernd Edlinger  <bernd.edlinger@hotmail.de>

        PR libgcc/78067
        * libgcc2.c (__floatdisf, __floatdidf): Avoid undefined results from
        count_leading_zeros.

testsuite:
2016-11-03  Bernd Edlinger  <bernd.edlinger@hotmail.de>

        PR libgcc/78067
        * gcc.dg/torture/fp-int-convert.h: Add more conversion tests.

From-SVN: r241817

											
										
										
											2016-11-03 13:52:19 +01:00
+								#if !defined (COUNT_LEADING_ZEROS_0) || COUNT_LEADING_ZEROS_0 != W_TYPE_SIZE
 								  if (hi == 0)
 								    count = W_TYPE_SIZE;
 								  else
 								#endif
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								  count_leading_zeros (count, hi);
 								  /* No leading bits means u == minimum.  */
 								  if (count == 0)
-												libgcc2.c: Correct DI/TI -> SF/DF conversions

FSTYPE FUNC (DWtype u) in libgcc2.c, which converts DI/TI to SF/DF, has

  /* No leading bits means u == minimum.  */
  if (count == 0)
    return -(Wtype_MAXp1_F * (Wtype_MAXp1_F / 2));

in the third case (where actually count == 0 only means the high part is
minimum).  It should be:

  /* No leading bits means u == minimum.  */
  if (count == 0)
    return Wtype_MAXp1_F * (FSTYPE) (hi | ((UWtype) u != 0));

instead.

gcc/testsuite/

2019-01-23  H.J. Lu  <hongjiu.lu@intel.com>

	PR libgcc/88931
	* gcc.dg/torture/fp-int-convert-timode-1.c: New test.
	* gcc.dg/torture/fp-int-convert-timode-2.c: Likewise.
	* gcc.dg/torture/fp-int-convert-timode-3.c: Likewise.
	* gcc.dg/torture/fp-int-convert-timode-4.c: Likewise.

libgcc/

2019-01-23  Joseph Myers  <joseph@codesourcery.com>

	PR libgcc/88931
	* libgcc2.c (FSTYPE FUNC (DWtype u)): Correct no leading bits
	case.

From-SVN: r268216

											
										
										
											2019-01-23 22:41:59 +01:00
+								    return Wtype_MAXp1_F * (FSTYPE) (hi | ((UWtype) u != 0));
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								  shift = 1 + W_TYPE_SIZE - count;
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
 								  /* Shift down the most significant bits.  */
 								  hi = u >> shift;
 								  /* If we lost any nonzero bits, set the lsb to ensure correct rounding.  */
-												libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from IS_IBM_EXTENDED.

        * libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from 
        IS_IBM_EXTENDED.  Also define in terms of WIDEST_HARDWARE_FP_SIZE.
        * libgcc2.c (__floatdisf): Avoid double-word arithmetic when
        looking for non-zero bits shifted out.  Avoid a recursive call
        when constructing the scalar.
        (__floatundisf): Likewise.

From-SVN: r124106

											
										
										
											2007-04-24 17:28:21 +02:00
+								  if ((UWtype)u << (W_TYPE_SIZE - shift))
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								    hi |= 1;
 								  /* Convert the one word of data, and rescale.  */
-												libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from IS_IBM_EXTENDED.

        * libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from 
        IS_IBM_EXTENDED.  Also define in terms of WIDEST_HARDWARE_FP_SIZE.
        * libgcc2.c (__floatdisf): Avoid double-word arithmetic when
        looking for non-zero bits shifted out.  Avoid a recursive call
        when constructing the scalar.
        (__floatundisf): Likewise.

From-SVN: r124106

											
										
										
											2007-04-24 17:28:21 +02:00
+								  FSTYPE f = hi, e;
 								  if (shift == W_TYPE_SIZE)
 								    e = Wtype_MAXp1_F;
 								  /* The following two cases could be merged if we knew that the target
 								     supported a native unsigned->float conversion.  More often, we only
 								     have a signed conversion, and have to add extra fixup code.  */
 								  else if (shift == W_TYPE_SIZE - 1)
 								    e = Wtype_MAXp1_F / 2;
 								  else
 								    e = (Wtype)1 << shift;
 								  return f * e;
-												re PR middle-end/19920 (build broken on several targets due to recent 'DC' type update to libgcc2)

        PR 19920
        * libgcc2.c (WORD_SIZE): Remove all definitions; replace uses
        with W_TYPE_SIZE.
        (HIGH_WORD_COEFF, HIGH_HALFWORD_COEFF): Remove all definitions;
        replace uses with Wtype_MAXp1_F.
        (L_fixunstfdi, L_fixtfdi, L_floatditf, L_fixunsxfdi, L_fixxfdi,
        L_floatdixf, L_fixunsxfsi, L_fixunsdfdi, L_floatdidf, L_fixunsdfsi,
        L_powidf2, L_powixf2, L_powitf2, L_muldc3, L_divdc3, L_mulxc3,
        L_divxc3, L_multc3, L_divtc3): Protect with HAVE_DFMODE, HAVE_XFMODE,
        and HAVE_TFMODE as appropriate.
        (__fixunssfDI): Provide an implementation that doesn't need DFmode.
        (__floatdisf): Likewise.
        * libgcc2.h (LIBGCC2_DOUBLE_TYPE_SIZE): New.
        (HAVE_DFMODE, HAVE_XFMODE, HAVE_TFMODE): New.
        (Wtype_MAXp1_F): New.
        (DFtype, DCtype, __fixdfdi, __floatdidf, __fixunsdfSI, __fixunsdfDI,
        __powidf2, __divdc3, __muldc3): Protect with HAVE_DFMODE.

From-SVN: r95121

											
										
										
											2005-02-16 23:55:33 +01:00
+								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
 								#endif
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#if (defined(L_floatundisf) && LIBGCC2_HAS_SF_MODE)	\
 								     || (defined(L_floatundidf) && LIBGCC2_HAS_DF_MODE)
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								#define DI_SIZE (W_TYPE_SIZE * 2)
-												libgcc2.c (__floatdisf, [...]): Don't use IBM Extended Double TFmode.

	* libgcc2.c (__floatdisf, __floatdidf): Don't use IBM Extended
	Double TFmode.
	(__floatundisf, __floatundidf): Likewise.
	* libgcc2.h (IS_IBM_EXTENDED): Define.

From-SVN: r110004

											
										
										
											2006-01-20 01:42:29 +01:00
+								#define F_MODE_OK(SIZE) \
 								  (SIZE < DI_SIZE							\
 								   && SIZE > (DI_SIZE - SIZE + FSSIZE)					\
-												libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from IS_IBM_EXTENDED.

        * libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from 
        IS_IBM_EXTENDED.  Also define in terms of WIDEST_HARDWARE_FP_SIZE.
        * libgcc2.c (__floatdisf): Avoid double-word arithmetic when
        looking for non-zero bits shifted out.  Avoid a recursive call
        when constructing the scalar.
        (__floatundisf): Likewise.

From-SVN: r124106

											
										
										
											2007-04-24 17:28:21 +02:00
+								   && !AVOID_FP_TYPE_CONVERSION(SIZE))
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#if defined(L_floatundisf)
 								#define FUNC __floatundisf
 								#define FSTYPE SFtype
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#define FSSIZE __LIBGCC_SF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#else
 								#define FUNC __floatundidf
 								#define FSTYPE DFtype
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#define FSSIZE __LIBGCC_DF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#endif
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								FSTYPE
 								FUNC (UDWtype u)
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								{
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#if FSSIZE >= W_TYPE_SIZE
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								  /* When the word size is small, we never get any rounding error.  */
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								  FSTYPE f = (UWtype) (u >> W_TYPE_SIZE);
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								  f *= Wtype_MAXp1_F;
 								  f += (UWtype)u;
 								  return f;
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#elif (LIBGCC2_HAS_DF_MODE && F_MODE_OK (__LIBGCC_DF_MANT_DIG__))	\
 								     || (LIBGCC2_HAS_XF_MODE && F_MODE_OK (__LIBGCC_XF_MANT_DIG__))	\
 								     || (LIBGCC2_HAS_TF_MODE && F_MODE_OK (__LIBGCC_TF_MANT_DIG__))
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#if (LIBGCC2_HAS_DF_MODE && F_MODE_OK (__LIBGCC_DF_MANT_DIG__))
 								# define FSIZE __LIBGCC_DF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								# define FTYPE DFtype
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#elif (LIBGCC2_HAS_XF_MODE && F_MODE_OK (__LIBGCC_XF_MANT_DIG__))
 								# define FSIZE __LIBGCC_XF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								# define FTYPE XFtype
-												Remove SF_SIZE etc. target macros.

gcc:
	* config/i386/cygming.h (TF_SIZE): Remove.
	* config/i386/darwin.h (TF_SIZE): Remove.
	* config/i386/dragonfly.h (TF_SIZE): Remove.
	* config/i386/freebsd.h (TF_SIZE): Remove.
	* config/i386/gnu-user-common.h (TF_SIZE): Remove.
	* config/i386/openbsdelf.h (TF_SIZE): Remove.
	* config/i386/sol2.h (TF_SIZE): Remove.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Remove.
	* config/ia64/linux.h (TF_SIZE): Remove.
	* doc/tm.texi.in (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Remove.
	* doc/tm.texi: Regenerate.
	* system.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Poison.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define macros for mantissa
	digits of floating-point modes if -fbuilding-libgcc.

libgcc:
	* libgcc2.c (SF_SIZE): Change all uses to __LIBGCC_SF_MANT_DIG__.
	(DF_SIZE): Change all uses to __LIBGCC_DF_MANT_DIG__.
	(XF_SIZE): Change all uses to __LIBGCC_XF_MANT_DIG__.
	(TF_SIZE): Change all uses to __LIBGCC_TF_MANT_DIG__.
	* libgcc2.h (SF_SIZE): Change to __LIBGCC_SF_MANT_DIG__.  Give
	error if not defined and LIBGCC2_HAS_SF_MODE is defined.
	(DF_SIZE): Change to __LIBGCC_DF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_DF_MODE is defined.
	(XF_SIZE): Change to __LIBGCC_XF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_XF_MODE is defined.
	(TF_SIZE): Change to __LIBGCC_TF_MANT_DIG__.  Give error if not
	defined and LIBGCC2_HAS_TF_MODE is defined.

From-SVN: r215014

											
										
										
											2014-09-08 14:25:35 +02:00
+								#elif (LIBGCC2_HAS_TF_MODE && F_MODE_OK (__LIBGCC_TF_MANT_DIG__))
 								# define FSIZE __LIBGCC_TF_MANT_DIG__
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								# define FTYPE TFtype
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								#else
 								# error
 								#endif
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#define REP_BIT ((UDWtype) 1 << (DI_SIZE - FSIZE))
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
 								  /* Protect against double-rounding error.
 								     Represent any low-order bits, that might be truncated by a bit that
 								     won't be lost.  The bit can go in anywhere below the rounding position
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								     of the FSTYPE.  A fixed mask and bit position handles all usual
 								     configurations.  */
 								  if (u >= ((UDWtype) 1 << FSIZE))
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								    {
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								      if ((UDWtype) u & (REP_BIT - 1))
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+									{
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+									  u &= ~ (REP_BIT - 1);
 									  u |= REP_BIT;
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+									}
 								    }
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								  /* Do the calculation in a wider type so that we don't lose any of
 								     the precision of the high word while multiplying it.  */
 								  FTYPE f = (UWtype) (u >> W_TYPE_SIZE);
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								  f *= Wtype_MAXp1_F;
 								  f += (UWtype)u;
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								  return (FSTYPE) f;
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								#else
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								#if FSSIZE == W_TYPE_SIZE - 1
 								# error
 								#endif
 								  /* Finally, the word size is larger than the number of bits in the
 								     required FSTYPE, and we've got no suitable wider type.  The only
 								     way to avoid double rounding is to special case the
 								     extraction.  */
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
 								  /* If there are no high bits set, fall back to one conversion.  */
 								  if ((UWtype)u == u)
-												re PR other/25028 (TImode-to-floating conversions broken)

	PR other/25028
	* libgcc2.h (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Define.
	* libgcc2.c (__floatdixf, __floatundixf, __floatditf,
	__floatunditf): Use #error if type sizes don't match requirements
	of implementation.
	(__floatdisf, __floatdidf): Unify.  Possibly use XFmode or TFmode
	as wider floating-point type.  Use #error if type sizes don't
	match requirements of implementation.  Avoid overflow in computing
	Wtype_MAXp1_F * Wtype_MAXp1_F.  When special casing conversion,
	shift one more bit.  Cast 1 to DWtype or UDWtype for shifting.
	(__floatundisf, __floatundidf): Likewise.
	* config/ia64/hpux.h (XF_SIZE, TF_SIZE): Define.
	* config/ia64/ia64.c (ia64_init_libfuncs): Use
	_U_Qfcnvfxt_quad_to_quad and _U_Qfcnvxf_quad_to_quad for
	TFmode-TImode conversions.
	* doc/tm.texi (SF_SIZE, DF_SIZE, XF_SIZE, TF_SIZE): Document.

testsuite:
	* gcc.dg/torture/fp-int-convert-timode.c: Only XFAIL for LP64 IA64
	HP-UX.

From-SVN: r108598

											
										
										
											2005-12-15 22:50:10 +01:00
+								    return (FSTYPE)(UWtype)u;
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
 								  /* Otherwise, find the power of two.  */
 								  UWtype hi = u >> W_TYPE_SIZE;
 								  UWtype count, shift;
 								  count_leading_zeros (count, hi);
 								  shift = W_TYPE_SIZE - count;
 								  /* Shift down the most significant bits.  */
 								  hi = u >> shift;
 								  /* If we lost any nonzero bits, set the lsb to ensure correct rounding.  */
-												libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from IS_IBM_EXTENDED.

        * libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from 
        IS_IBM_EXTENDED.  Also define in terms of WIDEST_HARDWARE_FP_SIZE.
        * libgcc2.c (__floatdisf): Avoid double-word arithmetic when
        looking for non-zero bits shifted out.  Avoid a recursive call
        when constructing the scalar.
        (__floatundisf): Likewise.

From-SVN: r124106

											
										
										
											2007-04-24 17:28:21 +02:00
+								  if ((UWtype)u << (W_TYPE_SIZE - shift))
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								    hi |= 1;
 								  /* Convert the one word of data, and rescale.  */
-												libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from IS_IBM_EXTENDED.

        * libgcc2.h (AVOID_FP_TYPE_CONVERSION): Rename from 
        IS_IBM_EXTENDED.  Also define in terms of WIDEST_HARDWARE_FP_SIZE.
        * libgcc2.c (__floatdisf): Avoid double-word arithmetic when
        looking for non-zero bits shifted out.  Avoid a recursive call
        when constructing the scalar.
        (__floatundisf): Likewise.

From-SVN: r124106

											
										
										
											2007-04-24 17:28:21 +02:00
+								  FSTYPE f = hi, e;
 								  if (shift == W_TYPE_SIZE)
 								    e = Wtype_MAXp1_F;
 								  /* The following two cases could be merged if we knew that the target
 								     supported a native unsigned->float conversion.  More often, we only
 								     have a signed conversion, and have to add extra fixup code.  */
 								  else if (shift == W_TYPE_SIZE - 1)
 								    e = Wtype_MAXp1_F / 2;
 								  else
 								    e = (Wtype)1 << shift;
 								  return f * e;
-												fp-bit.c (clzusi): New function.

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
	* gcc.c-torture/execute/floatunsisf-1.c: New test.

From-SVN: r107345

											
										
										
											2005-11-22 01:38:30 +01:00
+								#endif
 								}
 								#endif
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if defined(L_fixunsxfsi) && LIBGCC2_HAS_XF_MODE
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								UWtype
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								__fixunsxfSI (XFtype a)
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								{
-												libgcc2.h (Wtype_MAX, Wtype_MIN): Define.

* libgcc2.h (Wtype_MAX, Wtype_MIN): Define.
* libgcc2.c (__fixunssfSI, __fixunsdfSI, __fixunsxfSI): Use
Wtype_MIN instead of LONG_MIN.

From-SVN: r39365

											
										
										
											2001-01-31 04:53:32 +01:00
+								  if (a >= - (DFtype) Wtype_MIN)
 								    return (Wtype) (a + Wtype_MIN) - Wtype_MIN;
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  return (Wtype) a;
-												(XFtype): Do define it, if LONG_DOUBLE_TYPE_SIZE == 96.

(__fixunsxfdi): New function, if LONG_DOUBLE_TYPE_SIZE == 96.
(__fixxfdi, __floatdixf, __fixunsxfsi): Likewise.

From-SVN: r4000

											
										
										
											1993-04-04 09:18:03 +02:00
+								}
 								#endif
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								#if defined(L_fixunsdfsi) && LIBGCC2_HAS_DF_MODE
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								UWtype
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								__fixunsdfSI (DFtype a)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												libgcc2.h (Wtype_MAX, Wtype_MIN): Define.

* libgcc2.h (Wtype_MAX, Wtype_MIN): Define.
* libgcc2.c (__fixunssfSI, __fixunsdfSI, __fixunsxfSI): Use
Wtype_MIN instead of LONG_MIN.

From-SVN: r39365

											
										
										
											2001-01-31 04:53:32 +01:00
+								  if (a >= - (DFtype) Wtype_MIN)
 								    return (Wtype) (a + Wtype_MIN) - Wtype_MIN;
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  return (Wtype) a;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
 								#endif
-												libgcc2.h (LIBGCC2_HAS_SF_MODE): New macro.

	* libgcc2.h (LIBGCC2_HAS_SF_MODE): New macro.
	(LIBGCC2_HAS_DF_MODE, LIBGCC2_HAS_TF_MODE, LIBGCC2_HAS_XF_MODE): Make
	the defaults false if BITS_PER_UNIT != 8.
	(SFtype, SCtype, __fixsfdi, __floatdisf, __fixunssfSI, __fixunssfDI)
	(__powisf2, __divsc3, __mulsc3): Guard with LIBGCC2_HAS_SF_MODE rather
	than BITS_PER_UNIT != 8.
	(L_fixdfdi, L_fixsfdi, L_fixtfdi, L_fixunsdfdi, L_fixunsdfsi)
	(L_fixunssfdi, L_fixunssfsi, L_fixunstfdi, L_fixunsxfdi, L_fixunsxfsi)
	(L_fixxfdi, L_floatdidf, L_floatdisf, L_floatditf, L_floatdixf): Remove
	#undefs.
	* libgcc2.c (__fixunssfDI, __fixsfdi, __floatdisf, __fixunssfSI)
	(__powisf2, __divsc3, __mulsc3): Guard with LIBGCC2_HAS_SF_MODE.

From-SVN: r96778

											
										
										
											2005-03-21 08:22:22 +01:00
+								#if defined(L_fixunssfsi) && LIBGCC2_HAS_SF_MODE
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								UWtype
-												configure.in (alpha*-*-*): Aad config/alpha/t-alpha.

	* configure.in (alpha*-*-*): Aad config/alpha/t-alpha.
	* configure: Rebuilt.
	* libgcc2.c (__fixunstfDI): Renamed from __fixunstfdi.
	(__fixunsxfDI): Renamed from __fixunsxfdi.
	(__fixunsdfDI): Renamed from __fixunsdfdi.
	(__fixunssfDI): Renamed from __fixunssfdi.
	(__floatdisf): Use proper type in REP_BIT macro.
	(__fixunsxfSI): Renamed from __fixunsxfsi.
	(__fixunsdfSI): Renamed from __fixunsdfsi.
	(__fixunssfSI): Renamed from __fixunssfsi.
	* libgcc2.h: Add cases for MIN_UNITS_PER_WORD > 4.
	Change location of macros and upper-case some names as above.
	* longlong.h ([alpha]): Use PARAMS, not __P in decl of __udiv__qrnnd.
	* config/alpha/t-alpha, config/alpha/qrnnd.asm: New files.

From-SVN: r33166

											
										
										
											2000-04-15 18:34:38 +02:00
+								__fixunssfSI (SFtype a)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												libgcc2.h (Wtype_MAX, Wtype_MIN): Define.

* libgcc2.h (Wtype_MAX, Wtype_MIN): Define.
* libgcc2.c (__fixunssfSI, __fixunsdfSI, __fixunsxfSI): Use
Wtype_MIN instead of LONG_MIN.

From-SVN: r39365

											
										
										
											2001-01-31 04:53:32 +01:00
+								  if (a >= - (SFtype) Wtype_MIN)
 								    return (Wtype) (a + Wtype_MIN) - Wtype_MIN;
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								  return (Wtype) a;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
-												re PR middle-end/19402 (__builtin_powi? still missing)

2005-02-09  Richard Guenther  <rguenth@gcc.gnu.org>

	PR middle-end/19402

	* builtins.def: New __builtin_powi[lf].
	* builtins.c (mathfn_built_in): Handle BUILT_IN_POWI.
	(expand_builtin_powi): New function.
	(expand_builtin): Dispatch to expand_builtin_powi.
	* libgcc2.h: Add prototypes for __builtin_powi[lf].
	* libgcc2.c: Add __builtin_powi[lf] implementation.
	* mklibgcc.in: Add __builtin_powi[lf] to lib2funcs.
	* optabs.h: Add powi_optab.
	* optabs.c (init_optabs): Initialize powi_optab.
	* doc/extend.texi: Document __builtin_powi[lf].

	* gcc.dg/pr19402-1.c: New testcase.
	* gcc.dg/pr19402-2.c: likewise.

From-SVN: r94774

											
										
										
											2005-02-09 21:58:13 +01:00
+								#endif
 								/* Integer power helper used from __builtin_powi for non-constant
 								   exponents.  */
-												libgcc2.h (LIBGCC2_HAS_SF_MODE): New macro.

	* libgcc2.h (LIBGCC2_HAS_SF_MODE): New macro.
	(LIBGCC2_HAS_DF_MODE, LIBGCC2_HAS_TF_MODE, LIBGCC2_HAS_XF_MODE): Make
	the defaults false if BITS_PER_UNIT != 8.
	(SFtype, SCtype, __fixsfdi, __floatdisf, __fixunssfSI, __fixunssfDI)
	(__powisf2, __divsc3, __mulsc3): Guard with LIBGCC2_HAS_SF_MODE rather
	than BITS_PER_UNIT != 8.
	(L_fixdfdi, L_fixsfdi, L_fixtfdi, L_fixunsdfdi, L_fixunsdfsi)
	(L_fixunssfdi, L_fixunssfsi, L_fixunstfdi, L_fixunsxfdi, L_fixunsxfsi)
	(L_fixxfdi, L_floatdidf, L_floatdisf, L_floatditf, L_floatdixf): Remove
	#undefs.
	* libgcc2.c (__fixunssfDI, __fixsfdi, __floatdisf, __fixunssfSI)
	(__powisf2, __divsc3, __mulsc3): Guard with LIBGCC2_HAS_SF_MODE.

From-SVN: r96778

											
										
										
											2005-03-21 08:22:22 +01:00
+								#if (defined(L_powisf2) && LIBGCC2_HAS_SF_MODE) \
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								    || (defined(L_powidf2) && LIBGCC2_HAS_DF_MODE) \
 								    || (defined(L_powixf2) && LIBGCC2_HAS_XF_MODE) \
 								    || (defined(L_powitf2) && LIBGCC2_HAS_TF_MODE)
-												re PR middle-end/19402 (__builtin_powi? still missing)

2005-02-09  Richard Guenther  <rguenth@gcc.gnu.org>

	PR middle-end/19402

	* builtins.def: New __builtin_powi[lf].
	* builtins.c (mathfn_built_in): Handle BUILT_IN_POWI.
	(expand_builtin_powi): New function.
	(expand_builtin): Dispatch to expand_builtin_powi.
	* libgcc2.h: Add prototypes for __builtin_powi[lf].
	* libgcc2.c: Add __builtin_powi[lf] implementation.
	* mklibgcc.in: Add __builtin_powi[lf] to lib2funcs.
	* optabs.h: Add powi_optab.
	* optabs.c (init_optabs): Initialize powi_optab.
	* doc/extend.texi: Document __builtin_powi[lf].

	* gcc.dg/pr19402-1.c: New testcase.
	* gcc.dg/pr19402-2.c: likewise.

From-SVN: r94774

											
										
										
											2005-02-09 21:58:13 +01:00
+								# if defined(L_powisf2)
 								#  define TYPE SFtype
 								#  define NAME __powisf2
 								# elif defined(L_powidf2)
 								#  define TYPE DFtype
 								#  define NAME __powidf2
 								# elif defined(L_powixf2)
 								#  define TYPE XFtype
 								#  define NAME __powixf2
 								# elif defined(L_powitf2)
 								#  define TYPE TFtype
 								#  define NAME __powitf2
 								# endif
-												Fix problem with calling powi* builtins.

OKed by Richard Henderson.

From-SVN: r97278

											
										
										
											2005-03-30 22:59:21 +02:00
+								#undef int
 								#undef unsigned
-												re PR middle-end/19402 (__builtin_powi? still missing)

2005-02-09  Richard Guenther  <rguenth@gcc.gnu.org>

	PR middle-end/19402

	* builtins.def: New __builtin_powi[lf].
	* builtins.c (mathfn_built_in): Handle BUILT_IN_POWI.
	(expand_builtin_powi): New function.
	(expand_builtin): Dispatch to expand_builtin_powi.
	* libgcc2.h: Add prototypes for __builtin_powi[lf].
	* libgcc2.c: Add __builtin_powi[lf] implementation.
	* mklibgcc.in: Add __builtin_powi[lf] to lib2funcs.
	* optabs.h: Add powi_optab.
	* optabs.c (init_optabs): Initialize powi_optab.
	* doc/extend.texi: Document __builtin_powi[lf].

	* gcc.dg/pr19402-1.c: New testcase.
	* gcc.dg/pr19402-2.c: likewise.

From-SVN: r94774

											
										
										
											2005-02-09 21:58:13 +01:00
+								TYPE
-												Fix problem with calling powi* builtins.

OKed by Richard Henderson.

From-SVN: r97278

											
										
										
											2005-03-30 22:59:21 +02:00
+								NAME (TYPE x, int m)
-												re PR middle-end/19402 (__builtin_powi? still missing)

2005-02-09  Richard Guenther  <rguenth@gcc.gnu.org>

	PR middle-end/19402

	* builtins.def: New __builtin_powi[lf].
	* builtins.c (mathfn_built_in): Handle BUILT_IN_POWI.
	(expand_builtin_powi): New function.
	(expand_builtin): Dispatch to expand_builtin_powi.
	* libgcc2.h: Add prototypes for __builtin_powi[lf].
	* libgcc2.c: Add __builtin_powi[lf] implementation.
	* mklibgcc.in: Add __builtin_powi[lf] to lib2funcs.
	* optabs.h: Add powi_optab.
	* optabs.c (init_optabs): Initialize powi_optab.
	* doc/extend.texi: Document __builtin_powi[lf].

	* gcc.dg/pr19402-1.c: New testcase.
	* gcc.dg/pr19402-2.c: likewise.

From-SVN: r94774

											
										
										
											2005-02-09 21:58:13 +01:00
+								{
-												libgcc: Avoid signed negation overflow in __powi?f2 [PR99236]

When these functions are called with integer minimum, there is UB on the libgcc
side.  Fixed in the obvious way, the code in the end wants ABSU_EXPR behavior.

2021-02-24  Jakub Jelinek  <jakub@redhat.com>

	PR libgcc/99236
	* libgcc2.c (__powisf2, __powidf2, __powitf2, __powixf2): Perform
	negation of m in unsigned type.

											
										
										
											2021-02-24 20:07:38 +01:00
+								  unsigned int n = m < 0 ? -(unsigned int) m : (unsigned int) m;
-												re PR middle-end/19402 (__builtin_powi? still missing)

2005-02-09  Richard Guenther  <rguenth@gcc.gnu.org>

	PR middle-end/19402

	* builtins.def: New __builtin_powi[lf].
	* builtins.c (mathfn_built_in): Handle BUILT_IN_POWI.
	(expand_builtin_powi): New function.
	(expand_builtin): Dispatch to expand_builtin_powi.
	* libgcc2.h: Add prototypes for __builtin_powi[lf].
	* libgcc2.c: Add __builtin_powi[lf] implementation.
	* mklibgcc.in: Add __builtin_powi[lf] to lib2funcs.
	* optabs.h: Add powi_optab.
	* optabs.c (init_optabs): Initialize powi_optab.
	* doc/extend.texi: Document __builtin_powi[lf].

	* gcc.dg/pr19402-1.c: New testcase.
	* gcc.dg/pr19402-2.c: likewise.

From-SVN: r94774

											
										
										
											2005-02-09 21:58:13 +01:00
+								  TYPE y = n % 2 ? x : 1;
 								  while (n >>= 1)
 								    {
 								      x = x * x;
 								      if (n % 2)
 									y = y * x;
 								    }
 								  return m < 0 ? 1/y : y;
 								}
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								#endif
-												[Patch libgcc] Enable HCmode multiply and divide (mulhc3/divhc3)

This patch arranges for half-precision complex multiply and divide
routines to be built if __LIBGCC_HAS_HF_MODE__.  This will be true
if the target supports the _Float16 type.

libgcc/

	PR target/63250
	*  Makefile.in (lib2funcs): Build _mulhc3 and _divhc3.
	* libgcc2.h (LIBGCC_HAS_HF_MODE): Conditionally define.
	(HFtype): Likewise.
	(HCtype): Likewise.
	(__divhc3): Likewise.
	(__mulhc3): Likewise.
	* libgcc2.c: Support _mulhc3 and _divhc3.

From-SVN: r240043

											
										
										
											2016-09-09 11:40:22 +02:00
+								#if((defined(L_mulhc3) || defined(L_divhc3)) && LIBGCC2_HAS_HF_MODE) \
 								    || ((defined(L_mulsc3) || defined(L_divsc3)) && LIBGCC2_HAS_SF_MODE) \
-												re PR target/19930 (gcc.dg/pr19402-2.c fails on ia64-hpux)

	PR target/19930
	* doc/tm.texi (LIBGCC2_LONG_DOUBLE_TYPE_SIZE): Document.
	(LIBGCC2_HAS_DF_MODE): New.
	(LIBGCC2_HAS_XF_MODE): New.
	(LIBGCC2_HAS_TF_MODE): New.
	* libgcc2.h (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	* libgcc2.c (LIBGCC2_HAS_XF_MODE): New name for HAVE_XFMODE.
	(LIBGCC2_HAS_TF_MODE): New name for HAVE_TFMODE.
	(LIBGCC2_HAS_DF_MODE): New name for HAVE_DFMODE.
	* config/ia64/t-ia64 (LIB1ASMFUNCS): Remove __compat
	and add _fixtfdi, _fixunstfdi, _floatditf
	* lib1funcs.asm: Remove L__compat. Add L_fixtfdi,
	L_fixunstfdi, L_floatditf.
	* config/ia64/hpux.h (LIBGCC2_HAS_XF_MODE): Define.
	(LIBGCC2_HAS_TF_MODE): Define.

From-SVN: r95548

											
										
										
											2005-02-25 22:34:49 +01:00
+								    || ((defined(L_muldc3) || defined(L_divdc3)) && LIBGCC2_HAS_DF_MODE) \
 								    || ((defined(L_mulxc3) || defined(L_divxc3)) && LIBGCC2_HAS_XF_MODE) \
 								    || ((defined(L_multc3) || defined(L_divtc3)) && LIBGCC2_HAS_TF_MODE)
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
 								#undef float
 								#undef double
 								#undef long
-												[Patch libgcc] Enable HCmode multiply and divide (mulhc3/divhc3)

This patch arranges for half-precision complex multiply and divide
routines to be built if __LIBGCC_HAS_HF_MODE__.  This will be true
if the target supports the _Float16 type.

libgcc/

	PR target/63250
	*  Makefile.in (lib2funcs): Build _mulhc3 and _divhc3.
	* libgcc2.h (LIBGCC_HAS_HF_MODE): Conditionally define.
	(HFtype): Likewise.
	(HCtype): Likewise.
	(__divhc3): Likewise.
	(__mulhc3): Likewise.
	* libgcc2.c: Support _mulhc3 and _divhc3.

From-SVN: r240043

											
										
										
											2016-09-09 11:40:22 +02:00
+								#if defined(L_mulhc3) || defined(L_divhc3)
 								# define MTYPE	HFtype
 								# define CTYPE	HCtype
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								# define AMTYPE SFtype
-												[Patch libgcc] Enable HCmode multiply and divide (mulhc3/divhc3)

This patch arranges for half-precision complex multiply and divide
routines to be built if __LIBGCC_HAS_HF_MODE__.  This will be true
if the target supports the _Float16 type.

libgcc/

	PR target/63250
	*  Makefile.in (lib2funcs): Build _mulhc3 and _divhc3.
	* libgcc2.h (LIBGCC_HAS_HF_MODE): Conditionally define.
	(HFtype): Likewise.
	(HCtype): Likewise.
	(__divhc3): Likewise.
	(__mulhc3): Likewise.
	* libgcc2.c: Support _mulhc3 and _divhc3.

From-SVN: r240043

											
										
										
											2016-09-09 11:40:22 +02:00
+								# define MODE	hc
 								# define CEXT	__LIBGCC_HF_FUNC_EXT__
 								# define NOTRUNC (!__LIBGCC_HF_EXCESS_PRECISION__)
 								#elif defined(L_mulsc3) || defined(L_divsc3)
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								# define MTYPE	SFtype
 								# define CTYPE	SCtype
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								# define AMTYPE DFtype
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								# define MODE	sc
-												Remove LIBGCC2_TF_CEXT target macro.

This patch removes the (undocumented) LIBGCC2_TF_CEXT target macro,
replacing it by -fbuilding-libgcc predefines (and thereby gets rid of
another LIBGCC2_LONG_DOUBLE_TYPE_SIZE conditional, though some more
patches are needed before that target macro can be eliminated).  This
macro indicated the suffix used on __builtin_huge_val,
__builtin_copysign, __builtin_fabs built-in function names to produce
the names for a given floating-point mode.

Predefines are added for all floating-point modes supported for
libgcc, not just TFmode.  These are fully accurate for modes
corresponding to float, double and long double.  For other modes, the
suffix for *constants* is determined by the targetm.c.mode_for_suffix
hook (the limit to two possible suffixes 'w' and 'q' being hardcoded
in various places).  This is in fact the suffix for built-in functions
as well where such functions exist.

* For i386, the *q functions always exist (whether or not TFmode is
  used for long double).  The *w functions never exist (but this
  doesn't matter for libgcc, since no i386 configuration treats XFmode
  as a supported scalar mode if long double is TFmode; if __float80
  were to be supported for 64-bit Android, properly such functions
  ought to be added).

* For ia64, the *q functions exist for non-HP-UX (under HP-UX, long
  double is TFmode, so they aren't needed).  The *w functions never
  exist.  This is an issue for this libgcc code for the XFmode complex
  functions in libgcc on HP-UX; as I understand it, right now those
  will accidentally be using TFmode versions of those three functions,
  so involving unnecessary conversions, while the sanity check on CEXT
  accidentally passes because all it tests is the sizes of the types.

Because of the lack of 'w' functions, the patch uses 'l' when the
constant suffix is 'w', matching what the existing libgcc code would
do for IA64 HP-UX in that case.

Ideally there would be generic code to create such built-in functions
for all supported floating-point types.  That may be something to
consider if support for TS 18661-3 (standard bindings for IEEE
754-2008, defining names such as _Float128, and function names such as
copysignf128) is added in future.

Bootstrapped with no regressions on x86_64-unknown-linux-gnu.

gcc:
	* system.h (LIBGCC2_TF_CEXT): Poison.
	* config/i386/cygming.h (LIBGCC2_TF_CEXT): Remove.
	* config/i386/darwin.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/dragonfly.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/freebsd.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/gnu-user-common.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/openbsdelf.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/sol2.h (LIBGCC2_TF_CEXT): Likewise.
	* config/ia64/ia64.h (LIBGCC2_TF_CEXT): Likewise.
	* config/ia64/linux.h (LIBGCC2_TF_CEXT): Likewise.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define __LIBGCC_*_FUNC_EXT__
	for supported floating-point modes.

libgcc:
	* libgcc2.c (CEXT): Define using __LIBGCC_*_FUNC_EXT__.

From-SVN: r215368

											
										
										
											2014-09-19 01:27:26 +02:00
+								# define CEXT	__LIBGCC_SF_FUNC_EXT__
-												Correct libgcc complex multiply excess precision handling (PR libgcc/77519).

libgcc complex multiply is meant to eliminate excess
precision from certain internal values by forcing them to memory in
exactly those cases where the type has excess precision.  But in
https://gcc.gnu.org/ml/gcc-patches/2014-09/msg01894.html I
accidentally inverted the logic so that values get forced to memory in
exactly the cases where it's not needed.  (This is a pessimization in
the no-excess-precision case, in principle could lead to bad results
depending on code generation in the excess-precision case.  Note: I do
not have a test demonstrating bad results.)

Bootstrapped with no regressions on x86_64-pc-linux-gnu.  Code size
went down on x86_64 as expected; old sizes:

   text    data     bss     dec     hex filename
    887       0       0     887     377 _muldc3.o
    810       0       0     810     32a _mulsc3.o
   2032       0       0    2032     7f0 _multc3.o
    983       0       0     983     3d7 _mulxc3.o

New sizes:

    847       0       0     847     34f _muldc3.o
    770       0       0     770     302 _mulsc3.o
   2032       0       0    2032     7f0 _multc3.o
    951       0       0     951     3b7 _mulxc3.o

	PR libgcc/77519
	* libgcc2.c (NOTRUNC): Invert settings.

From-SVN: r240033

											
										
										
											2016-09-08 01:02:56 +02:00
+								# define NOTRUNC (!__LIBGCC_SF_EXCESS_PRECISION__)
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								# define RBIG	(__LIBGCC_SF_MAX__ / 2)
 								# define RMIN	(__LIBGCC_SF_MIN__)
 								# define RMIN2	(__LIBGCC_SF_EPSILON__)
 								# define RMINSCAL (1 / __LIBGCC_SF_EPSILON__)
 								# define RMAX2	(RBIG * RMIN2)
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								#elif defined(L_muldc3) || defined(L_divdc3)
 								# define MTYPE	DFtype
 								# define CTYPE	DCtype
 								# define MODE	dc
-												Remove LIBGCC2_TF_CEXT target macro.

This patch removes the (undocumented) LIBGCC2_TF_CEXT target macro,
replacing it by -fbuilding-libgcc predefines (and thereby gets rid of
another LIBGCC2_LONG_DOUBLE_TYPE_SIZE conditional, though some more
patches are needed before that target macro can be eliminated).  This
macro indicated the suffix used on __builtin_huge_val,
__builtin_copysign, __builtin_fabs built-in function names to produce
the names for a given floating-point mode.

Predefines are added for all floating-point modes supported for
libgcc, not just TFmode.  These are fully accurate for modes
corresponding to float, double and long double.  For other modes, the
suffix for *constants* is determined by the targetm.c.mode_for_suffix
hook (the limit to two possible suffixes 'w' and 'q' being hardcoded
in various places).  This is in fact the suffix for built-in functions
as well where such functions exist.

* For i386, the *q functions always exist (whether or not TFmode is
  used for long double).  The *w functions never exist (but this
  doesn't matter for libgcc, since no i386 configuration treats XFmode
  as a supported scalar mode if long double is TFmode; if __float80
  were to be supported for 64-bit Android, properly such functions
  ought to be added).

* For ia64, the *q functions exist for non-HP-UX (under HP-UX, long
  double is TFmode, so they aren't needed).  The *w functions never
  exist.  This is an issue for this libgcc code for the XFmode complex
  functions in libgcc on HP-UX; as I understand it, right now those
  will accidentally be using TFmode versions of those three functions,
  so involving unnecessary conversions, while the sanity check on CEXT
  accidentally passes because all it tests is the sizes of the types.

Because of the lack of 'w' functions, the patch uses 'l' when the
constant suffix is 'w', matching what the existing libgcc code would
do for IA64 HP-UX in that case.

Ideally there would be generic code to create such built-in functions
for all supported floating-point types.  That may be something to
consider if support for TS 18661-3 (standard bindings for IEEE
754-2008, defining names such as _Float128, and function names such as
copysignf128) is added in future.

Bootstrapped with no regressions on x86_64-unknown-linux-gnu.

gcc:
	* system.h (LIBGCC2_TF_CEXT): Poison.
	* config/i386/cygming.h (LIBGCC2_TF_CEXT): Remove.
	* config/i386/darwin.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/dragonfly.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/freebsd.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/gnu-user-common.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/openbsdelf.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/sol2.h (LIBGCC2_TF_CEXT): Likewise.
	* config/ia64/ia64.h (LIBGCC2_TF_CEXT): Likewise.
	* config/ia64/linux.h (LIBGCC2_TF_CEXT): Likewise.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define __LIBGCC_*_FUNC_EXT__
	for supported floating-point modes.

libgcc:
	* libgcc2.c (CEXT): Define using __LIBGCC_*_FUNC_EXT__.

From-SVN: r215368

											
										
										
											2014-09-19 01:27:26 +02:00
+								# define CEXT	__LIBGCC_DF_FUNC_EXT__
-												Correct libgcc complex multiply excess precision handling (PR libgcc/77519).

libgcc complex multiply is meant to eliminate excess
precision from certain internal values by forcing them to memory in
exactly those cases where the type has excess precision.  But in
https://gcc.gnu.org/ml/gcc-patches/2014-09/msg01894.html I
accidentally inverted the logic so that values get forced to memory in
exactly the cases where it's not needed.  (This is a pessimization in
the no-excess-precision case, in principle could lead to bad results
depending on code generation in the excess-precision case.  Note: I do
not have a test demonstrating bad results.)

Bootstrapped with no regressions on x86_64-pc-linux-gnu.  Code size
went down on x86_64 as expected; old sizes:

   text    data     bss     dec     hex filename
    887       0       0     887     377 _muldc3.o
    810       0       0     810     32a _mulsc3.o
   2032       0       0    2032     7f0 _multc3.o
    983       0       0     983     3d7 _mulxc3.o

New sizes:

    847       0       0     847     34f _muldc3.o
    770       0       0     770     302 _mulsc3.o
   2032       0       0    2032     7f0 _multc3.o
    951       0       0     951     3b7 _mulxc3.o

	PR libgcc/77519
	* libgcc2.c (NOTRUNC): Invert settings.

From-SVN: r240033

											
										
										
											2016-09-08 01:02:56 +02:00
+								# define NOTRUNC (!__LIBGCC_DF_EXCESS_PRECISION__)
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								# define RBIG	(__LIBGCC_DF_MAX__ / 2)
 								# define RMIN	(__LIBGCC_DF_MIN__)
 								# define RMIN2	(__LIBGCC_DF_EPSILON__)
 								# define RMINSCAL (1 / __LIBGCC_DF_EPSILON__)
 								# define RMAX2  (RBIG * RMIN2)
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								#elif defined(L_mulxc3) || defined(L_divxc3)
 								# define MTYPE	XFtype
 								# define CTYPE	XCtype
 								# define MODE	xc
-												Remove LIBGCC2_TF_CEXT target macro.

This patch removes the (undocumented) LIBGCC2_TF_CEXT target macro,
replacing it by -fbuilding-libgcc predefines (and thereby gets rid of
another LIBGCC2_LONG_DOUBLE_TYPE_SIZE conditional, though some more
patches are needed before that target macro can be eliminated).  This
macro indicated the suffix used on __builtin_huge_val,
__builtin_copysign, __builtin_fabs built-in function names to produce
the names for a given floating-point mode.

Predefines are added for all floating-point modes supported for
libgcc, not just TFmode.  These are fully accurate for modes
corresponding to float, double and long double.  For other modes, the
suffix for *constants* is determined by the targetm.c.mode_for_suffix
hook (the limit to two possible suffixes 'w' and 'q' being hardcoded
in various places).  This is in fact the suffix for built-in functions
as well where such functions exist.

* For i386, the *q functions always exist (whether or not TFmode is
  used for long double).  The *w functions never exist (but this
  doesn't matter for libgcc, since no i386 configuration treats XFmode
  as a supported scalar mode if long double is TFmode; if __float80
  were to be supported for 64-bit Android, properly such functions
  ought to be added).

* For ia64, the *q functions exist for non-HP-UX (under HP-UX, long
  double is TFmode, so they aren't needed).  The *w functions never
  exist.  This is an issue for this libgcc code for the XFmode complex
  functions in libgcc on HP-UX; as I understand it, right now those
  will accidentally be using TFmode versions of those three functions,
  so involving unnecessary conversions, while the sanity check on CEXT
  accidentally passes because all it tests is the sizes of the types.

Because of the lack of 'w' functions, the patch uses 'l' when the
constant suffix is 'w', matching what the existing libgcc code would
do for IA64 HP-UX in that case.

Ideally there would be generic code to create such built-in functions
for all supported floating-point types.  That may be something to
consider if support for TS 18661-3 (standard bindings for IEEE
754-2008, defining names such as _Float128, and function names such as
copysignf128) is added in future.

Bootstrapped with no regressions on x86_64-unknown-linux-gnu.

gcc:
	* system.h (LIBGCC2_TF_CEXT): Poison.
	* config/i386/cygming.h (LIBGCC2_TF_CEXT): Remove.
	* config/i386/darwin.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/dragonfly.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/freebsd.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/gnu-user-common.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/openbsdelf.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/sol2.h (LIBGCC2_TF_CEXT): Likewise.
	* config/ia64/ia64.h (LIBGCC2_TF_CEXT): Likewise.
	* config/ia64/linux.h (LIBGCC2_TF_CEXT): Likewise.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define __LIBGCC_*_FUNC_EXT__
	for supported floating-point modes.

libgcc:
	* libgcc2.c (CEXT): Define using __LIBGCC_*_FUNC_EXT__.

From-SVN: r215368

											
										
										
											2014-09-19 01:27:26 +02:00
+								# define CEXT	__LIBGCC_XF_FUNC_EXT__
-												Correct libgcc complex multiply excess precision handling (PR libgcc/77519).

libgcc complex multiply is meant to eliminate excess
precision from certain internal values by forcing them to memory in
exactly those cases where the type has excess precision.  But in
https://gcc.gnu.org/ml/gcc-patches/2014-09/msg01894.html I
accidentally inverted the logic so that values get forced to memory in
exactly the cases where it's not needed.  (This is a pessimization in
the no-excess-precision case, in principle could lead to bad results
depending on code generation in the excess-precision case.  Note: I do
not have a test demonstrating bad results.)

Bootstrapped with no regressions on x86_64-pc-linux-gnu.  Code size
went down on x86_64 as expected; old sizes:

   text    data     bss     dec     hex filename
    887       0       0     887     377 _muldc3.o
    810       0       0     810     32a _mulsc3.o
   2032       0       0    2032     7f0 _multc3.o
    983       0       0     983     3d7 _mulxc3.o

New sizes:

    847       0       0     847     34f _muldc3.o
    770       0       0     770     302 _mulsc3.o
   2032       0       0    2032     7f0 _multc3.o
    951       0       0     951     3b7 _mulxc3.o

	PR libgcc/77519
	* libgcc2.c (NOTRUNC): Invert settings.

From-SVN: r240033

											
										
										
											2016-09-08 01:02:56 +02:00
+								# define NOTRUNC (!__LIBGCC_XF_EXCESS_PRECISION__)
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								# define RBIG	(__LIBGCC_XF_MAX__ / 2)
 								# define RMIN	(__LIBGCC_XF_MIN__)
 								# define RMIN2	(__LIBGCC_XF_EPSILON__)
 								# define RMINSCAL (1 / __LIBGCC_XF_EPSILON__)
 								# define RMAX2	(RBIG * RMIN2)
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								#elif defined(L_multc3) || defined(L_divtc3)
 								# define MTYPE	TFtype
 								# define CTYPE	TCtype
 								# define MODE	tc
-												Remove LIBGCC2_TF_CEXT target macro.

This patch removes the (undocumented) LIBGCC2_TF_CEXT target macro,
replacing it by -fbuilding-libgcc predefines (and thereby gets rid of
another LIBGCC2_LONG_DOUBLE_TYPE_SIZE conditional, though some more
patches are needed before that target macro can be eliminated).  This
macro indicated the suffix used on __builtin_huge_val,
__builtin_copysign, __builtin_fabs built-in function names to produce
the names for a given floating-point mode.

Predefines are added for all floating-point modes supported for
libgcc, not just TFmode.  These are fully accurate for modes
corresponding to float, double and long double.  For other modes, the
suffix for *constants* is determined by the targetm.c.mode_for_suffix
hook (the limit to two possible suffixes 'w' and 'q' being hardcoded
in various places).  This is in fact the suffix for built-in functions
as well where such functions exist.

* For i386, the *q functions always exist (whether or not TFmode is
  used for long double).  The *w functions never exist (but this
  doesn't matter for libgcc, since no i386 configuration treats XFmode
  as a supported scalar mode if long double is TFmode; if __float80
  were to be supported for 64-bit Android, properly such functions
  ought to be added).

* For ia64, the *q functions exist for non-HP-UX (under HP-UX, long
  double is TFmode, so they aren't needed).  The *w functions never
  exist.  This is an issue for this libgcc code for the XFmode complex
  functions in libgcc on HP-UX; as I understand it, right now those
  will accidentally be using TFmode versions of those three functions,
  so involving unnecessary conversions, while the sanity check on CEXT
  accidentally passes because all it tests is the sizes of the types.

Because of the lack of 'w' functions, the patch uses 'l' when the
constant suffix is 'w', matching what the existing libgcc code would
do for IA64 HP-UX in that case.

Ideally there would be generic code to create such built-in functions
for all supported floating-point types.  That may be something to
consider if support for TS 18661-3 (standard bindings for IEEE
754-2008, defining names such as _Float128, and function names such as
copysignf128) is added in future.

Bootstrapped with no regressions on x86_64-unknown-linux-gnu.

gcc:
	* system.h (LIBGCC2_TF_CEXT): Poison.
	* config/i386/cygming.h (LIBGCC2_TF_CEXT): Remove.
	* config/i386/darwin.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/dragonfly.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/freebsd.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/gnu-user-common.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/openbsdelf.h (LIBGCC2_TF_CEXT): Likewise.
	* config/i386/sol2.h (LIBGCC2_TF_CEXT): Likewise.
	* config/ia64/ia64.h (LIBGCC2_TF_CEXT): Likewise.
	* config/ia64/linux.h (LIBGCC2_TF_CEXT): Likewise.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Define __LIBGCC_*_FUNC_EXT__
	for supported floating-point modes.

libgcc:
	* libgcc2.c (CEXT): Define using __LIBGCC_*_FUNC_EXT__.

From-SVN: r215368

											
										
										
											2014-09-19 01:27:26 +02:00
+								# define CEXT	__LIBGCC_TF_FUNC_EXT__
-												Correct libgcc complex multiply excess precision handling (PR libgcc/77519).

libgcc complex multiply is meant to eliminate excess
precision from certain internal values by forcing them to memory in
exactly those cases where the type has excess precision.  But in
https://gcc.gnu.org/ml/gcc-patches/2014-09/msg01894.html I
accidentally inverted the logic so that values get forced to memory in
exactly the cases where it's not needed.  (This is a pessimization in
the no-excess-precision case, in principle could lead to bad results
depending on code generation in the excess-precision case.  Note: I do
not have a test demonstrating bad results.)

Bootstrapped with no regressions on x86_64-pc-linux-gnu.  Code size
went down on x86_64 as expected; old sizes:

   text    data     bss     dec     hex filename
    887       0       0     887     377 _muldc3.o
    810       0       0     810     32a _mulsc3.o
   2032       0       0    2032     7f0 _multc3.o
    983       0       0     983     3d7 _mulxc3.o

New sizes:

    847       0       0     847     34f _muldc3.o
    770       0       0     770     302 _mulsc3.o
   2032       0       0    2032     7f0 _multc3.o
    951       0       0     951     3b7 _mulxc3.o

	PR libgcc/77519
	* libgcc2.c (NOTRUNC): Invert settings.

From-SVN: r240033

											
										
										
											2016-09-08 01:02:56 +02:00
+								# define NOTRUNC (!__LIBGCC_TF_EXCESS_PRECISION__)
-												Fix for powerpc64 long double complex divide failure

- - - -

New in version 6: Due to an oversight (i.e. coding error), version 5
changed the use of __LIBGCC_TF_EPSILON__ to __LIBGCC_DF_EPSILON__ but
not the other LIBGCC_TF values. For correct execution of the long
double test case it is necessary to also switch to using
__LIBGCC_DF_MIN__. For consistency we also switch to using
__LIBGCC_DF_MAX__. LDBL_MIN is 2**53 times as larger than DBL_MIN.
The larger value causes the code to switch the order of computation
when it is not optimal, resulting in failure for one of the values
in the cdivchk_ld.c test. Using DBL_MIN does not cause that failure..

There may be opportunity for further refinement of IBM128 format
Long Double complex divide, but that's beyond the scope of this
patch.

- - - -

This revision adds a test in libgcc/libgcc2.c for when
"__LIBGCC_TF_MANT_DIG__ == 106" to use __LIBGCC_DF_EPSILON__ instead
of __LIBGCC_TF_EPSILON__. That is specific to IBM 128-bit format long
doubles where EPSILON is very, very small and 1/EPSILON oveflows to
infinity. This change avoids the overflow without affecting any other
platform. Discussion in the patch is adjusted to reflect this
limitation.

It does not make any changes to .../rs6000/_divkc3.c, leaving it to
use __LIBGCC_KF__*. That means the upstream gcc will not build in
older IBM environments that do not recognize the KF floating point
mode properly. Environments that do not need IBM longdouble support
do build cleanly.

- - - -
This patch addresses the failure of powerpc64 long double complex divide
in native ibm long double format after the patch "Practical improvement
to libgcc complex divide".

The new code uses the following macros which are intended to be mapped
to appropriate values according to the underlying hardware representation.
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101104

RBIG     a value near the maximum representation
RMIN     a value near the minimum representation
         (but not in the subnormal range)
RMIN2    a value moderately less than 1
RMINSCAL the inverse of RMIN2
RMAX2    RBIG * RMIN2  - a value to limit scaling to not overflow

When "long double" values were not using the IEEE 128-bit format but
the traditional IBM 128-bit, the previous code used the LDBL values
which caused overflow for RMINSCAL. The new code uses the DBL values.

RBIG  LDBL_MAX = 0x1.fffffffffffff800p+1022
      DBL_MAX  = 0x1.fffffffffffff000p+1022

RMIN  LDBL_MIN = 0x1.0000000000000000p-969
RMIN  DBL_MIN  = 0x1.0000000000000000p-1022

RMIN2 LDBL_EPSILON = 0x0.0000000000001000p-1022 = 0x1.0p-1074
RMIN2 DBL_EPSILON  = 0x1.0000000000000000p-52

[ORMINSCAL 1/LDBL_EPSILON = inf (1.0p+1074 does not fit in IBM 128-bit).
         1/DBL_EPSILON  = 0x1.0000000000000000p+52

RMAX2 = RBIG * RMIN2 = 0x1.fffffffffffff800p-52
        RBIG * RMIN2 = 0x1.fffffffffffff000p+970

The MAX and MIN values have only modest changes since the maximum and
minimum values are about the same as for double precision.  The
EPSILON field is considerably different. Due to how very small values
can be represented in the lower 64 bits of the IBM 128-bit floating
point, EPSILON is extremely small, so far beyond the desired value
that inversion of the value overflows and even without the overflow,
the RMAX2 is so small as to eliminate most usage of the test.

The change has been tested on gcc135.fsffrance.org and gains the
expected improvements in accuracy for long double complex divide.

libgcc/
	PR target/101104
	* libgcc2.c (RMIN2, RMINSCAL, RMAX2):
	Use more correct values for native IBM 128-bit.

											
										
										
											2021-10-04 00:07:06 +02:00
+								# if __LIBGCC_TF_MANT_DIG__ == 106
 								#  define RBIG	(__LIBGCC_DF_MAX__ / 2)
 								#  define RMIN	(__LIBGCC_DF_MIN__)
 								#  define RMIN2  (__LIBGCC_DF_EPSILON__)
 								#  define RMINSCAL (1 / __LIBGCC_DF_EPSILON__)
 								# else
 								#  define RBIG	(__LIBGCC_TF_MAX__ / 2)
 								#  define RMIN	(__LIBGCC_TF_MIN__)
 								#  define RMIN2	(__LIBGCC_TF_EPSILON__)
 								#  define RMINSCAL (1 / __LIBGCC_TF_EPSILON__)
 								# endif
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								# define RMAX2	(RBIG * RMIN2)
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								#else
 								# error
 								#endif
 								#define CONCAT3(A,B,C)	_CONCAT3(A,B,C)
 								#define _CONCAT3(A,B,C)	A##B##C
 								#define CONCAT2(A,B)	_CONCAT2(A,B)
 								#define _CONCAT2(A,B)	A##B
-												libgcc2.c (isnan): Use __builtin_isnan.

	* libgcc2.c (isnan): Use __builtin_isnan.
	(isfinite): Use __builtin_isfinite.
	(isinf): Use __builtin_isinf.

From-SVN: r264823

											
										
										
											2018-10-03 22:29:10 +02:00
+								#define isnan(x)	__builtin_isnan (x)
 								#define isfinite(x)	__builtin_isfinite (x)
 								#define isinf(x)	__builtin_isinf (x)
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
-												libgcc2.c (INFINITY): Use __builtin_huge_val...

	* libgcc2.c (INFINITY): Use __builtin_huge_val, not __builtin_inf,
	as the latter produces a warning when the target does not support
	infinity.

From-SVN: r145646

											
										
										
											2009-04-07 05:26:32 +02:00
+								#define INFINITY	CONCAT2(__builtin_huge_val, CEXT) ()
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								#define I		1i
 								/* Helpers to make the following code slightly less gross.  */
 								#define COPYSIGN	CONCAT2(__builtin_copysign, CEXT)
 								#define FABS		CONCAT2(__builtin_fabs, CEXT)
 								/* Verify that MTYPE matches up with CEXT.  */
 								extern void *compile_type_assert[sizeof(INFINITY) == sizeof(MTYPE) ? 1 : -1];
 								/* Ensure that we've lost any extra precision.  */
 								#if NOTRUNC
 								# define TRUNC(x)
 								#else
 								# define TRUNC(x)	__asm__ ("" : "=m"(x) : "m"(x))
 								#endif
-												[Patch libgcc] Enable HCmode multiply and divide (mulhc3/divhc3)

This patch arranges for half-precision complex multiply and divide
routines to be built if __LIBGCC_HAS_HF_MODE__.  This will be true
if the target supports the _Float16 type.

libgcc/

	PR target/63250
	*  Makefile.in (lib2funcs): Build _mulhc3 and _divhc3.
	* libgcc2.h (LIBGCC_HAS_HF_MODE): Conditionally define.
	(HFtype): Likewise.
	(HCtype): Likewise.
	(__divhc3): Likewise.
	(__mulhc3): Likewise.
	* libgcc2.c: Support _mulhc3 and _divhc3.

From-SVN: r240043

											
										
										
											2016-09-09 11:40:22 +02:00
+								#if defined(L_mulhc3) || defined(L_mulsc3) || defined(L_muldc3) \
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								    || defined(L_mulxc3) || defined(L_multc3)
 								CTYPE
 								CONCAT3(__mul,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
 								{
 								  MTYPE ac, bd, ad, bc, x, y;
-												re PR middle-end/37850 (infinite recursive call to __mulsc3 when multiplying not-constant complexs)

	PR middle-end/37850
	* libgcc2.c (__mulMODE3): Use explicit assignments to form the
	result.
	(__divMODE3): Likewise.

Co-Authored-By: Nathan Froyd <froydnj@codesourcery.com>

From-SVN: r144751

											
										
										
											2009-03-10 16:42:51 +01:00
+								  CTYPE res;
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
 								  ac = a * c;
 								  bd = b * d;
 								  ad = a * d;
 								  bc = b * c;
 								  TRUNC (ac);
 								  TRUNC (bd);
 								  TRUNC (ad);
 								  TRUNC (bc);
 								  x = ac - bd;
 								  y = ad + bc;
 								  if (isnan (x) && isnan (y))
 								    {
 								      /* Recover infinities that computed as NaN + iNaN.  */
 								      _Bool recalc = 0;
 								      if (isinf (a) || isinf (b))
 									{
 									  /* z is infinite.  "Box" the infinity and change NaNs in
 									     the other factor to 0.  */
 									  a = COPYSIGN (isinf (a) ? 1 : 0, a);
 									  b = COPYSIGN (isinf (b) ? 1 : 0, b);
 									  if (isnan (c)) c = COPYSIGN (0, c);
 									  if (isnan (d)) d = COPYSIGN (0, d);
 								          recalc = 1;
 									}
 								     if (isinf (c) || isinf (d))
 									{
 									  /* w is infinite.  "Box" the infinity and change NaNs in
 									     the other factor to 0.  */
 									  c = COPYSIGN (isinf (c) ? 1 : 0, c);
 									  d = COPYSIGN (isinf (d) ? 1 : 0, d);
 									  if (isnan (a)) a = COPYSIGN (0, a);
 									  if (isnan (b)) b = COPYSIGN (0, b);
 									  recalc = 1;
 									}
 								     if (!recalc
 									  && (isinf (ac) || isinf (bd)
 									      || isinf (ad) || isinf (bc)))
 									{
 									  /* Recover infinities from overflow by changing NaNs to 0.  */
 									  if (isnan (a)) a = COPYSIGN (0, a);
 									  if (isnan (b)) b = COPYSIGN (0, b);
 									  if (isnan (c)) c = COPYSIGN (0, c);
 									  if (isnan (d)) d = COPYSIGN (0, d);
 									  recalc = 1;
 									}
 								      if (recalc)
 									{
 									  x = INFINITY * (a * c - b * d);
 									  y = INFINITY * (a * d + b * c);
 									}
 								    }
-												re PR middle-end/37850 (infinite recursive call to __mulsc3 when multiplying not-constant complexs)

	PR middle-end/37850
	* libgcc2.c (__mulMODE3): Use explicit assignments to form the
	result.
	(__divMODE3): Likewise.

Co-Authored-By: Nathan Froyd <froydnj@codesourcery.com>

From-SVN: r144751

											
										
										
											2009-03-10 16:42:51 +01:00
+								  __real__ res = x;
 								  __imag__ res = y;
 								  return res;
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								}
 								#endif /* complex multiply */
-												[Patch libgcc] Enable HCmode multiply and divide (mulhc3/divhc3)

This patch arranges for half-precision complex multiply and divide
routines to be built if __LIBGCC_HAS_HF_MODE__.  This will be true
if the target supports the _Float16 type.

libgcc/

	PR target/63250
	*  Makefile.in (lib2funcs): Build _mulhc3 and _divhc3.
	* libgcc2.h (LIBGCC_HAS_HF_MODE): Conditionally define.
	(HFtype): Likewise.
	(HCtype): Likewise.
	(__divhc3): Likewise.
	(__mulhc3): Likewise.
	* libgcc2.c: Support _mulhc3 and _divhc3.

From-SVN: r240043

											
										
										
											2016-09-09 11:40:22 +02:00
+								#if defined(L_divhc3) || defined(L_divsc3) || defined(L_divdc3) \
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								    || defined(L_divxc3) || defined(L_divtc3)
 								CTYPE
 								CONCAT3(__div,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
 								{
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								#if defined(L_divhc3)						\
 								  || (defined(L_divsc3) && defined(__LIBGCC_HAVE_HWDBL__) )
 								  /* Half precision is handled with float precision.
 								     float is handled with double precision when double precision
 								     hardware is available.
 								     Due to the additional precision, the simple complex divide
 								     method (without Smith's method) is sufficient to get accurate
 								     answers and runs slightly faster than Smith's method.  */
 								  AMTYPE aa, bb, cc, dd;
 								  AMTYPE denom;
 								  MTYPE x, y;
 								  CTYPE res;
 								  aa = a;
 								  bb = b;
 								  cc = c;
 								  dd = d;
 								  denom = (cc * cc) + (dd * dd);
 								  x = ((aa * cc) + (bb * dd)) / denom;
 								  y = ((bb * cc) - (aa * dd)) / denom;
 								#else
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								  MTYPE denom, ratio, x, y;
-												re PR middle-end/37850 (infinite recursive call to __mulsc3 when multiplying not-constant complexs)

	PR middle-end/37850
	* libgcc2.c (__mulMODE3): Use explicit assignments to form the
	result.
	(__divMODE3): Likewise.

Co-Authored-By: Nathan Froyd <froydnj@codesourcery.com>

From-SVN: r144751

											
										
										
											2009-03-10 16:42:51 +01:00
+								  CTYPE res;
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								  /* double, extended, long double have significant potential
 								     underflow/overflow errors that can be greatly reduced with
 								     a limited number of tests and adjustments.  float is handled
 								     the same way when no HW double is available.
 								  */
 								  /* Scale by max(c,d) to reduce chances of denominator overflowing.  */
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								  if (FABS (c) < FABS (d))
 								    {
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								      /* Prevent underflow when denominator is near max representable.  */
 								      if (FABS (d) >= RBIG)
 									{
 									  a = a / 2;
 									  b = b / 2;
 									  c = c / 2;
 									  d = d / 2;
 									}
 								      /* Avoid overflow/underflow issues when c and d are small.
 									 Scaling up helps avoid some underflows.
 									 No new overflow possible since c&d < RMIN2.  */
 								      if (FABS (d) < RMIN2)
 									{
 									  a = a * RMINSCAL;
 									  b = b * RMINSCAL;
 									  c = c * RMINSCAL;
 									  d = d * RMINSCAL;
 									}
 								      else
 									{
 									  if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2))
 									      || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
 										  && (FABS (d) < RMAX2)))
 									    {
 									      a = a * RMINSCAL;
 									      b = b * RMINSCAL;
 									      c = c * RMINSCAL;
 									      d = d * RMINSCAL;
 									    }
 									}
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								      ratio = c / d;
 								      denom = (c * ratio) + d;
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								      /* Choose alternate order of computation if ratio is subnormal.  */
 								      if (FABS (ratio) > RMIN)
 									{
 									  x = ((a * ratio) + b) / denom;
 									  y = ((b * ratio) - a) / denom;
 									}
 								      else
 									{
 									  x = ((c * (a / d)) + b) / denom;
 									  y = ((c * (b / d)) - a) / denom;
 									}
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								    }
 								  else
 								    {
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								      /* Prevent underflow when denominator is near max representable.  */
 								      if (FABS (c) >= RBIG)
 									{
 									  a = a / 2;
 									  b = b / 2;
 									  c = c / 2;
 									  d = d / 2;
 									}
 								      /* Avoid overflow/underflow issues when both c and d are small.
 									 Scaling up helps avoid some underflows.
 									 No new overflow possible since both c&d are less than RMIN2.  */
 								      if (FABS (c) < RMIN2)
 									{
 									  a = a * RMINSCAL;
 									  b = b * RMINSCAL;
 									  c = c * RMINSCAL;
 									  d = d * RMINSCAL;
 									}
 								      else
 									{
 									  if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2))
 									      || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
 										  && (FABS (c) < RMAX2)))
 									    {
 									      a = a * RMINSCAL;
 									      b = b * RMINSCAL;
 									      c = c * RMINSCAL;
 									      d = d * RMINSCAL;
 									    }
 									}
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								      ratio = d / c;
 								      denom = (d * ratio) + c;
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								      /* Choose alternate order of computation if ratio is subnormal.  */
 								      if (FABS (ratio) > RMIN)
 									{
 									  x = ((b * ratio) + a) / denom;
 									  y = (b - (a * ratio)) / denom;
 									}
 								      else
 									{
 									  x = (a + (d * (b / c))) / denom;
 									  y = (b - (d * (a / c))) / denom;
 									}
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								    }
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								#endif
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
-												Practical improvement to libgcc complex divide

Correctness and performance test programs used during development of
this project may be found in the attachment to:
https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg254210.html

Summary of Purpose

This patch to libgcc/libgcc2.c __divdc3 provides an
opportunity to gain important improvements to the quality of answers
for the default complex divide routine (half, float, double, extended,
long double precisions) when dealing with very large or very small exponents.

The current code correctly implements Smith's method (1962) [2]
further modified by c99's requirements for dealing with NaN (not a
number) results. When working with input values where the exponents
are greater than *_MAX_EXP/2 or less than -(*_MAX_EXP)/2, results are
substantially different from the answers provided by quad precision
more than 1% of the time. This error rate may be unacceptable for many
applications that cannot a priori restrict their computations to the
safe range. The proposed method reduces the frequency of
"substantially different" answers by more than 99% for double
precision at a modest cost of performance.

Differences between current gcc methods and the new method will be
described. Then accuracy and performance differences will be discussed.

Background

This project started with an investigation related to
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59714.  Study of Beebe[1]
provided an overview of past and recent practice for computing complex
divide. The current glibc implementation is based on Robert Smith's
algorithm [2] from 1962.  A google search found the paper by Baudin
and Smith [3] (same Robert Smith) published in 2012. Elen Kalda's
proposed patch [4] is based on that paper.

I developed two sets of test data by randomly distributing values over
a restricted range and the full range of input values. The current
complex divide handled the restricted range well enough, but failed on
the full range more than 1% of the time. Baudin and Smith's primary
test for "ratio" equals zero reduced the cases with 16 or more error
bits by a factor of 5, but still left too many flawed answers. Adding
debug print out to cases with substantial errors allowed me to see the
intermediate calculations for test values that failed. I noted that
for many of the failures, "ratio" was a subnormal. Changing the
"ratio" test from check for zero to check for subnormal reduced the 16
bit error rate by another factor of 12. This single modified test
provides the greatest benefit for the least cost, but the percentage
of cases with greater than 16 bit errors (double precision data) is
still greater than 0.027% (2.7 in 10,000).

Continued examination of remaining errors and their intermediate
computations led to the various tests of input value tests and scaling
to avoid under/overflow. The current patch does not handle some of the
rare and most extreme combinations of input values, but the random
test data is only showing 1 case in 10 million that has an error of
greater than 12 bits. That case has 18 bits of error and is due to
subtraction cancellation. These results are significantly better
than the results reported by Baudin and Smith.

Support for half, float, double, extended, and long double precision
is included as all are handled with suitable preprocessor symbols in a
single source routine. Since half precision is computed with float
precision as per current libgcc practice, the enhanced algorithm
provides no benefit for half precision and would cost performance.
Further investigation showed changing the half precision algorithm
to use the simple formula (real=a*c+b*d imag=b*c-a*d) caused no
loss of precision and modest improvement in performance.

The existing constants for each precision:
float: FLT_MAX, FLT_MIN;
double: DBL_MAX, DBL_MIN;
extended and/or long double: LDBL_MAX, LDBL_MIN
are used for avoiding the more common overflow/underflow cases.  This
use is made generic by defining appropriate __LIBGCC2_* macros in
c-cppbuiltin.c.

Tests are added for when both parts of the denominator have exponents
small enough to allow shifting any subnormal values to normal values
all input values could be scaled up without risking overflow. That
gained a clear improvement in accuracy. Similarly, when either
numerator was subnormal and the other numerator and both denominator
values were not too large, scaling could be used to reduce risk of
computing with subnormals.  The test and scaling values used all fit
within the allowed exponent range for each precision required by the C
standard.

Float precision has more difficulty with getting correct answers than
double precision. When hardware for double precision floating point
operations is available, float precision is now handled in double
precision intermediate calculations with the simple algorithm the same
as the half-precision method of using float precision for intermediate
calculations. Using the higher precision yields exact results for all
tested input values (64-bit double, 32-bit float) with the only
performance cost being the requirement to convert the four input
values from float to double. If double precision hardware is not
available, then float complex divide will use the same improved
algorithm as the other precisions with similar change in performance.

Further Improvement

The most common remaining substantial errors are due to accuracy loss
when subtracting nearly equal values. This patch makes no attempt to
improve that situation.

NOTATION

For all of the following, the notation is:
Input complex values:
  a+bi  (a= real part, b= imaginary part)
  c+di
Output complex value:
  e+fi = (a+bi)/(c+di)

For the result tables:
current = current method (SMITH)
b1div = method proposed by Elen Kalda
b2div = alternate method considered by Elen Kalda
new = new method proposed by this patch

DESCRIPTIONS of different complex divide methods:

NAIVE COMPUTATION (-fcx-limited-range):
  e = (a*c + b*d)/(c*c + d*d)
  f = (b*c - a*d)/(c*c + d*d)

Note that c*c and d*d will overflow or underflow if either
c or d is outside the range 2^-538 to 2^512.

This method is available in gcc when the switch -fcx-limited-range is
used. That switch is also enabled by -ffast-math. Only one who has a
clear understanding of the maximum range of all intermediate values
generated by an application should consider using this switch.

SMITH's METHOD (current libgcc):
  if(fabs(c)<fabs(d) {
    r = c/d;
    denom = (c*r) + d;
    e = (a*r + b) / denom;
    f = (b*r - a) / denom;
  } else {
    r = d/c;
    denom = c + (d*r);
    e = (a + b*r) / denom;
    f = (b - a*r) / denom;
  }

Smith's method is the current default method available with __divdc3.

Elen Kalda's METHOD

Elen Kalda proposed a patch about a year ago, also based on Baudin and
Smith, but not including tests for subnormals:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html [4]
It is compared here for accuracy with this patch.

This method applies the most significant part of the algorithm
proposed by Baudin&Smith (2012) in the paper "A Robust Complex
Division in Scilab" [3]. Elen's method also replaces two divides by
one divide and two multiplies due to the high cost of divide on
aarch64. In the comparison sections, this method will be labeled
b1div. A variation discussed in that patch which does not replace the
two divides will be labeled b2div.

  inline void improved_internal (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
    r = d/c;
    t = 1.0 / (c + (d * r));
    if (r != 0) {
        x = (a + (b * r)) * t;
        y = (b - (a * r)) * t;
    }  else {
    /* Changing the order of operations avoids the underflow of r impacting
     the result. */
        x = (a + (d * (b / c))) * t;
        y = (b - (d * (a / c))) * t;
    }
  }

  if (FABS (d) < FABS (c)) {
      improved_internal (a, b, c, d);
  } else {
      improved_internal (b, a, d, c);
      y = -y;
  }

NEW METHOD (proposed by patch) to replace the current default method:

The proposed method starts with an algorithm proposed by Baudin&Smith
(2012) in the paper "A Robust Complex Division in Scilab" [3]. The
patch makes additional modifications to that method for further
reductions in the error rate. The following code shows the #define
values for double precision. See the patch for #define values used
for other precisions.

  #define RBIG ((DBL_MAX)/2.0)
  #define RMIN (DBL_MIN)
  #define RMIN2 (0x1.0p-53)
  #define RMINSCAL (0x1.0p+51)
  #define RMAX2  ((RBIG)*(RMIN2))

  if (FABS(c) < FABS(d)) {
  /* prevent overflow when arguments are near max representable */
  if ((FABS (d) > RBIG) || (FABS (a) > RBIG) || (FABS (b) > RBIG) ) {
      a = a * 0.5;
      b = b * 0.5;
      c = c * 0.5;
      d = d * 0.5;
  }
  /* minimize overflow/underflow issues when c and d are small */
  else if (FABS (d) < RMIN2) {
      a = a * RMINSCAL;
      b = b * RMINSCAL;
      c = c * RMINSCAL;
      d = d * RMINSCAL;
  }
  else {
    if(((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) ||
       ((FABS (b) < RMIN) && (FABS (a) < RMAX2) && (FABS (d) < RMAX2))) {
        a = a * RMINSCAL;
        b = b * RMINSCAL;
        c = c * RMINSCAL;
        d = d * RMINSCAL;
    }
  }
  r = c/d; denom = (c*r) + d;
  if( r > RMIN ) {
      e = (a*r + b) / denom   ;
      f = (b*r - a) / denom
  } else {
      e = (c * (a/d) + b) / denom;
      f = (c * (b/d) - a) / denom;
  }
  }
[ only presenting the fabs(c) < fabs(d) case here, full code in patch. ]

Before any computation of the answer, the code checks for any input
values near maximum to allow down scaling to avoid overflow.  These
scalings almost never harm the accuracy since they are by 2. Values that
are over RBIG are relatively rare but it is easy to test for them and
allow aviodance of overflows.

Testing for RMIN2 reveals when both c and d are less than [FLT|DBL]_EPSILON.
By scaling all values by 1/EPSILON, the code converts subnormals to normals,
avoids loss of accuracy and underflows in intermediate computations
that otherwise might occur. If scaling a and b by 1/EPSILON causes either
to overflow, then the computation will overflow whatever method is used.

Finally, we test for either a or b being subnormal (RMIN) and if so,
for the other three values being small enough to allow scaling.  We
only need to test a single denominator value since we have already
determined which of c and d is larger.

Next, r (the ratio of c to d) is checked for being near zero. Baudin
and Smith checked r for zero. This code improves that approach by
checking for values less than DBL_MIN (subnormal) covers roughly 12
times as many cases and substantially improves overall accuracy. If r
is too small, then when it is used in a multiplication, there is a
high chance that the result will underflow to zero, losing significant
accuracy. That underflow is avoided by reordering the computation.
When r is subnormal, the code replaces a*r (= a*(c/d)) with ((a/d)*c)
which is mathematically the same but avoids the unnecessary underflow.

TEST Data

Two sets of data are presented to test these methods. Both sets
contain 10 million pairs of complex values.  The exponents and
mantissas are generated using multiple calls to random() and then
combining the results. Only values which give results to complex
divide that are representable in the appropriate precision after
being computed in quad precision are used.

The first data set is labeled "moderate exponents".
The exponent range is limited to -DBL_MAX_EXP/2 to DBL_MAX_EXP/2
for Double Precision (use FLT_MAX_EXP or LDBL_MAX_EXP for the
appropriate precisions.
The second data set is labeled "full exponents".
The exponent range for these cases is the full exponent range
including subnormals for a given precision.

ACCURACY Test results:

Note: The following accuracy tests are based on IEEE-754 arithmetic.

Note: All results reporteed are based on use of fused multiply-add. If
fused multiply-add is not used, the error rate increases, giving more
1 and 2 bit errors for both current and new complex divide.
Differences between using fused multiply and not using it that are
greater than 2 bits are less than 1 in a million.

The complex divide methods are evaluated by determining the percentage
of values that exceed differences in low order bits.  If a "2 bit"
test results show 1%, that would mean that 1% of 10,000,000 values
(100,000) have either a real or imaginary part that differs from the
quad precision result by more than the last 2 bits.

Results are reported for differences greater than or equal to 1 bit, 2
bits, 8 bits, 16 bits, 24 bits, and 52 bits for double precision.  Even
when the patch avoids overflows and underflows, some input values are
expected to have errors due to the potential for catastrophic roundoff
from floating point subtraction. For example, when b*c and a*d are
nearly equal, the result of subtraction may lose several places of
accuracy. This patch does not attempt to detect or minimize this type
of error, but neither does it increase them.

I only show the results for Elen Kalda's method (with both 1 and
2 divides) and the new method for only 1 divide in the double
precision table.

In the following charts, lower values are better.

current - current complex divide in libgcc
b1div - Elen Kalda's method from Baudin & Smith with one divide
b2div - Elen Kalda's method from Baudin & Smith with two divides
new   - This patch which uses 2 divides

===================================================
Errors   Moderate Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit    0.24707%   0.92986%   0.24707%   0.24707%
 2 bits   0.01762%   0.01770%   0.01762%   0.01762%
 8 bits   0.00026%   0.00026%   0.00026%   0.00026%
16 bits   0.00000%   0.00000%   0.00000%   0.00000%
24 bits         0%         0%         0%         0%
52 bits         0%         0%         0%         0%
===================================================
Table 1: Errors with Moderate Dataset (Double Precision)

Note in Table 1 that both the old and new methods give identical error
rates for data with moderate exponents. Errors exceeding 16 bits are
exceedingly rare. There are substantial increases in the 1 bit error
rates for b1div (the 1 divide/2 multiplys method) as compared to b2div
(the 2 divides method). These differences are minimal for 2 bits and
larger error measurements.

===================================================
Errors   Full Dataset
gtr eq     current    b1div      b2div        new
======    ========   ========   ========   ========
 1 bit      2.05%   1.23842%    0.67130%   0.16664%
 2 bits     1.88%   0.51615%    0.50354%   0.00900%
 8 bits     1.77%   0.42856%    0.42168%   0.00011%
16 bits     1.63%   0.33840%    0.32879%   0.00001%
24 bits     1.51%   0.25583%    0.24405%   0.00000%
52 bits     1.13%   0.01886%    0.00350%   0.00000%
===================================================
Table 2: Errors with Full Dataset (Double Precision)

Table 2 shows significant differences in error rates. First, the
difference between b1div and b2div show a significantly higher error
rate for the b1div method both for single bit errros and well
beyond. Even for 52 bits, we see the b1div method gets completely
wrong answers more than 5 times as often as b2div. To retain
comparable accuracy with current complex divide results for small
exponents and due to the increase in errors for large exponents, I
choose to use the more accurate method of two divides.

The current method has more 1.6% of cases where it is getting results
where the low 24 bits of the mantissa differ from the correct
answer. More than 1.1% of cases where the answer is completely wrong.
The new method shows less than one case in 10,000 with greater than
two bits of error and only one case in 10 million with greater than
16 bits of errors. The new patch reduces 8 bit errors by
a factor of 16,000 and virtually eliminates completely wrong
answers.

As noted above, for architectures with double precision
hardware, the new method uses that hardware for the
intermediate calculations before returning the
result in float precision. Testing of the new patch
has shown zero errors found as seen in Tables 3 and 4.

Correctness for float
=============================
Errors   Moderate Dataset
gtr eq     current     new
======    ========   ========
 1 bit   28.68070%         0%
 2 bits   0.64386%         0%
 8 bits   0.00401%         0%
16 bits   0.00001%         0%
24 bits         0%         0%
=============================
Table 3: Errors with Moderate Dataset (float)

=============================
Errors   Full Dataset
gtr eq     current     new
======    ========   ========
 1 bit     19.98%         0%
 2 bits     3.20%         0%
 8 bits     1.97%         0%
16 bits     1.08%         0%
24 bits     0.55%         0%
=============================
Table 4: Errors with Full Dataset (float)

As before, the current method shows an troubling rate of extreme
errors.

There very minor changes in accuracy for half-precision since the code
changes from Smith's method to the simple method. 5 out of 1 million
test cases show correct answers instead of 1 or 2 bit errors.
libgcc computes half-precision functions in float precision
allowing the existing methods to avoid overflow/underflow issues
for the allowed range of exponents for half-precision.

Extended precision (using x87 80-bit format on x86) and Long double
(using IEEE-754 128-bit on x86 and aarch64) both have 15-bit exponents
as compared to 11-bit exponents in double precision. We note that the
C standard also allows Long Double to be implemented in the equivalent
range of Double. The RMIN2 and RMINSCAL constants are selected to work
within the Double range as well as with extended and 128-bit ranges.
We will limit our performance and accurancy discussions to the 80-bit
and 128-bit formats as seen on x86 here.

The extended and long double precision investigations were more
limited. Aarch64 does not support extended precision but does support
the software implementation of 128-bit long double precision. For x86,
long double defaults to the 80-bit precision but using the
-mlong-double-128 flag switches to using the software implementation
of 128-bit precision. Both 80-bit and 128-bit precisions have the same
exponent range, with the 128-bit precision has extended mantissas.
Since this change is only aimed at avoiding underflow/overflow for
extreme exponents, I studied the extended precision results on x86 for
100,000 values. The limited exponent dataset showed no differences.
For the dataset with full exponent range, the current and new values
showed major differences (greater than 32 bits) in 567 cases out of
100,000 (0.56%). In every one of these cases, the ratio of c/d or d/c
(as appropriate) was zero or subnormal, indicating the advantage of
the new method and its continued correctness where needed.

PERFORMANCE Test results

In order for a library change to be practical, it is necessary to show
the slowdown is tolerable. The slowdowns observed are much less than
would be seen by (for example) switching from hardware double precison
to a software quad precision, which on the tested machines causes a
slowdown of around 100x).

The actual slowdown depends on the machine architecture. It also
depends on the nature of the input data. If underflow/overflow is
rare, then implementations that have strong branch prediction will
only slowdown by a few cycles. If underflow/overflow is common, then
the branch predictors will be less accurate and the cost will be
higher.

Results from two machines are presented as examples of the overhead
for the new method. The one labeled x86 is a 5 year old Intel x86
processor and the one labeled aarch64 is a 3 year old arm64 processor.

In the following chart, the times are averaged over a one million
value data set. All values are scaled to set the time of the current
method to be 1.0. Lower values are better. A value of less than 1.0
would be faster than the current method and a value greater than 1.0
would be slower than the current method.

================================================
               Moderate set          full set
               x86  aarch64        x86  aarch64
========     ===============     ===============
float         0.59    0.79        0.45    0.81
double        1.04    1.24        1.38    1.56
long double   1.13    1.24        1.29    1.25
================================================
Table 5: Performance Comparisons (ratio new/current)

The above tables omit the timing for the 1 divide and 2 multiply
comparison with the 2 divide approach.

The float results show clear performance improvement due to using the
simple method with double precision for intermediate calculations.

The double results with the newer method show less overhead for the
moderate dataset than for the full dataset. That's because the moderate
dataset does not ever take the new branches which protect from
under/overflow. The better the branch predictor, the lower the cost
for these untaken branches. Both platforms are somewhat dated, with
the x86 having a better branch predictor which reduces the cost of the
additional branches in the new code. Of course, the relative slowdown
may be greater for some architectures, especially those with limited
branch prediction combined with a high cost of misprediction.

The long double results are fairly consistent in showing the moderate
additional cost of the extra branches and calculations for all cases.

The observed cost for all precisions is claimed to be tolerable on the
grounds that:

(a) the cost is worthwhile considering the accuracy improvement shown.
(b) most applications will only spend a small fraction of their time
    calculating complex divide.
(c) it is much less than the cost of extended precision
(d) users are not forced to use it (as described below)

Those users who find this degree of slowdown unsatisfactory may use
the gcc switch -fcx-fortran-rules which does not use the library
routine, instead inlining Smith's method without the C99 requirement
for dealing with NaN results. The proposed patch for libgcc complex
divide does not affect the code generated by -fcx-fortran-rules.

SUMMARY

When input data to complex divide has exponents whose absolute value
is less than half of *_MAX_EXP, this patch makes no changes in
accuracy and has only a modest effect on performance.  When input data
contains values outside those ranges, the patch eliminates more than
99.9% of major errors with a tolerable cost in performance.

In comparison to Elen Kalda's method, this patch introduces more
performance overhead but reduces major errors by a factor of
greater than 4000.

REFERENCES

[1] Nelson H.F. Beebe, "The Mathematical-Function Computation Handbook.
Springer International Publishing AG, 2017.

[2] Robert L. Smith. Algorithm 116: Complex division.  Commun. ACM,
 5(8):435, 1962.

[3] Michael Baudin and Robert L. Smith. "A robust complex division in
Scilab," October 2012, available at http://arxiv.org/abs/1210.4539.

[4] Elen Kalda: Complex division improvements in libgcc
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg01629.html

2020-12-08  Patrick McGehearty  <patrick.mcgehearty@oracle.com>

gcc/c-family/
	* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
	complex divide
libgcc/
	* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
	* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
	Define.
	(__divkc3): Improve complex divide.
gcc/testsuite/
	* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
	* gcc.c-torture/execute/ieee/cdivchkf.c: Likewise.
	* gcc.c-torture/execute/ieee/cdivchkld.c: Likewise.

											
										
										
											2021-04-28 21:14:48 +02:00
+								  /* Recover infinities and zeros that computed as NaN+iNaN; the only
 								     cases are nonzero/zero, infinite/finite, and finite/infinite.  */
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								  if (isnan (x) && isnan (y))
 								    {
-												re PR c/30360 (Complex divide bug)

	PR c/30360
	* libgcc2.c (__divdc3): Compare c and d against 0.0 instead of
	denom against 0.0.

	* gcc.dg/pr30360.c: New test.

From-SVN: r120486

											
										
										
											2007-01-05 16:49:05 +01:00
+								      if (c == 0.0 && d == 0.0 && (!isnan (a) || !isnan (b)))
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+									{
 									  x = COPYSIGN (INFINITY, c) * a;
 									  y = COPYSIGN (INFINITY, c) * b;
 									}
 								      else if ((isinf (a) || isinf (b)) && isfinite (c) && isfinite (d))
 									{
 									  a = COPYSIGN (isinf (a) ? 1 : 0, a);
 									  b = COPYSIGN (isinf (b) ? 1 : 0, b);
 									  x = INFINITY * (a * c + b * d);
 									  y = INFINITY * (b * c - a * d);
 									}
 								      else if ((isinf (c) || isinf (d)) && isfinite (a) && isfinite (b))
 									{
 									  c = COPYSIGN (isinf (c) ? 1 : 0, c);
 									  d = COPYSIGN (isinf (d) ? 1 : 0, d);
 									  x = 0.0 * (a * c + b * d);
 									  y = 0.0 * (b * c - a * d);
 									}
 								    }
-												re PR middle-end/37850 (infinite recursive call to __mulsc3 when multiplying not-constant complexs)

	PR middle-end/37850
	* libgcc2.c (__mulMODE3): Use explicit assignments to form the
	result.
	(__divMODE3): Likewise.

Co-Authored-By: Nathan Froyd <froydnj@codesourcery.com>

From-SVN: r144751

											
										
										
											2009-03-10 16:42:51 +01:00
+								  __real__ res = x;
 								  __imag__ res = y;
 								  return res;
-												tree-complex.c (expand_complex_libcall): New.

        * tree-complex.c (expand_complex_libcall): New.
        (expand_complex_multiplication): Use it for c99 compliance.
        (expand_complex_division): Likewise.
        * fold-const.c (fold_complex_add, fold_complex_mult): New.
        (fold): Call them.
        * builtins.c (built_in_names): Remove const.
        * tree.c (build_common_builtin_nodes): Build complex arithmetic
        builtins.
        * tree.h (BUILT_IN_COMPLEX_MUL_MIN, BUILT_IN_COMPLEX_MUL_MAX): New.
        (BUILT_IN_COMPLEX_DIV_MIN, BUILT_IN_COMPLEX_DIV_MAX): New.
        (built_in_names): Remove const.
        * c-common.c (c_common_type_for_mode): Handle complex modes.
        * flags.h, toplev.c (flag_complex_method): Rename from
        flag_complex_divide_method.
        * libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3,
        __mulsc3, __muldc3, __mulxc3, __multc3): New.
        * libgcc2.h: Declare them.
        * libgcc-std.ver: Export them.
        * mklibgcc.in (lib2funcs): Build them.

From-SVN: r94909

											
										
										
											2005-02-12 01:26:57 +01:00
+								}
 								#endif /* complex divide */
 								#endif /* all complex float routines */
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
+								/* From here on down, the routines use normal data types.  */
 								#define SItype bogus_type
 								#define USItype bogus_type
 								#define DItype bogus_type
 								#define UDItype bogus_type
 								#define SFtype bogus_type
 								#define DFtype bogus_type
-												h8300.h (TARGET_H8300H, [...]): Make sure UNITS_PER_WORD and BITS_PER_WORD are compile time constants when...


	* config/h8300/h8300.h (TARGET_H8300H, TARGET_H8300S): Make sure
	UNITS_PER_WORD and BITS_PER_WORD are compile time constants when
	compiling libgcc2.
	* config/mips/mips.h (TARGET_64BIT): Likewise.
	* config/rs6000/rs6000.h (TARGET_POWERPC64): Likewise.
	* libgcc2.c: Use {,U}{HW,W,DW}type and DWunion everywhere instead
	of {SI,DI}type and DIunion.  Define these types to QI/HI modes on
	dsps.  Give routines proper names if SI/DI modes are not used.
	* longlong.h: Use DWunion instead of DIunion.

From-SVN: r31095

											
										
										
											1999-12-27 09:34:45 +01:00
+								#undef Wtype
 								#undef UWtype
 								#undef HWtype
 								#undef UHWtype
 								#undef DWtype
 								#undef UDWtype
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
 								#undef char
 								#undef short
 								#undef int
 								#undef long
 								#undef unsigned
 								#undef float
 								#undef double
-												(__gcc_bcmp): New function, in cond on L__gcc_bcmp.

From-SVN: r2110

											
										
										
											1992-09-12 10:45:46 +02:00
 								#ifdef L__gcc_bcmp
 								/* Like bcmp except the sign is meaningful.
-												Fix typos in comments.

From-SVN: r9712

											
										
										
											1995-05-16 14:39:54 +02:00
+								   Result is negative if S1 is less than S2,
-												(__gcc_bcmp): New function, in cond on L__gcc_bcmp.

From-SVN: r2110

											
										
										
											1992-09-12 10:45:46 +02:00
+								   positive if S1 is greater, 0 if S1 and S2 are equal.  */
 								int
-												libgcc2.h: New file.

	* libgcc2.h: New file.
	* libgcc2.c: Move macros, typedefs and prototypes to libgcc2.h.

From-SVN: r32440

											
										
										
											2000-03-09 04:39:09 +01:00
+								__gcc_bcmp (const unsigned char *s1, const unsigned char *s2, size_t size)
-												(__gcc_bcmp): New function, in cond on L__gcc_bcmp.

From-SVN: r2110

											
										
										
											1992-09-12 10:45:46 +02:00
+								{
 								  while (size > 0)
 								    {
-												libgcc2.c (__negdi2, [...]): Const-ify and/or initialize automatic variables at declaration.

	* libgcc2.c (__negdi2, __addvsi3, __addvdi3, __subvsi3, __subvdi3,
	__mulvsi3, __negvsi2, __negvdi2, __mulvdi3, __lshrdi3, __ashldi3,
	__ashrdi3, __ffsDI2, __muldi3, __clzDI2, __ctzDI2, __parityDI2,
	__udivmoddi4, __divdi3, __moddi3, __cmpdi2, __ucmpdi2,
	__fixunstfDI, __fixunsxfDI, __fixunsdfDI, __fixunssfDI,
	__floatdixf, __floatditf, __floatdidf, __floatdisf, __gcc_bcmp):
	Const-ify and/or initialize automatic variables at declaration.

From-SVN: r73573

											
										
										
											2003-11-14 03:23:13 +01:00
+								      const unsigned char c1 = *s1++, c2 = *s2++;
-												(__gcc_bcmp): New function, in cond on L__gcc_bcmp.

From-SVN: r2110

											
										
										
											1992-09-12 10:45:46 +02:00
+								      if (c1 != c2)
 									return c1 - c2;
 								      size--;
 								    }
 								  return 0;
 								}
-												*** empty log message ***

From-SVN: r1503

											
										
										
											1992-07-07 21:46:10 +02:00
-												libgcc2.c, libgcc2.h: Restore __eprintf.

	* libgcc2.c, libgcc2.h: Restore __eprintf.  Label as used for
	binary backward compat only.
	* Makefile.in (LIB2FUNCS_ST): New.  Put _eprintf here, not
	in LIB2FUNCS.  Pass it to mklibgcc.
	* mklibgcc.in: Handle LIB2FUNCS_ST.

	* Makefile.in (installdirs): Don't create $(gcc_tooldir).

From-SVN: r42057

											
										
										
											2001-05-14 04:46:22 +02:00
+								#endif
 								/* __eprintf used to be used by GCC's private version of <assert.h>.
 								   We no longer provide that header, but this routine remains in libgcc.a
 								   for binary backward compatibility.  Note that it is not included in
 								   the shared version of libgcc.  */
 								#ifdef L_eprintf
 								#ifndef inhibit_libc
 								#undef NULL /* Avoid errors if stdio.h and our stddef.h mismatch.  */
 								#include <stdio.h>
 								void
 								__eprintf (const char *string, const char *expression,
 									   unsigned int line, const char *filename)
 								{
 								  fprintf (stderr, string, expression, line, filename);
 								  fflush (stderr);
 								  abort ();
 								}
 								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								#endif
 								#ifdef L_clear_cache
 								/* Clear part of an instruction cache.  */
 								void
-												extend.texi (__clear_cache): Correct signature.

gcc/ChangeLog:

	* doc/extend.texi (__clear_cache): Correct signature.

libgcc/ChangeLog:

	* libgcc2.h (__clear_cache): Correct signature.
	* libgcc2.c (__clear_cache): Same.

gcc/testsuite/ChangeLog:

	* gcc.dg/Wbuiltin-declaration-mismatch-12.c: New test.

From-SVN: r269082

											
										
										
											2019-02-22 00:23:12 +01:00
+								__clear_cache (void *beg __attribute__((__unused__)),
 									       void *end __attribute__((__unused__)))
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												gcov.c (output_data): Use HOST_WIDEST_INT_PRINT_DEC to output variables of type HOST_WIDEST_INT.

	* gcov.c (output_data): Use HOST_WIDEST_INT_PRINT_DEC to output
	variables of type HOST_WIDEST_INT.

	* libgcc2.c (__bb_exit_func): Handle gcov_type as long long.
	(__bb_exit_func): Correct type of count_max to avoid overflow.
	(num_digits): Handle long long argument.

	* combine.c (gen_lowpart_for_combine): Remove unused variable.

From-SVN: r44033

											
										
										
											2001-07-16 11:16:04 +02:00
+								#ifdef CLEAR_INSN_CACHE
-												extend.texi (__clear_cache): Correct signature.

gcc/ChangeLog:

	* doc/extend.texi (__clear_cache): Correct signature.

libgcc/ChangeLog:

	* libgcc2.h (__clear_cache): Correct signature.
	* libgcc2.c (__clear_cache): Same.

gcc/testsuite/ChangeLog:

	* gcc.dg/Wbuiltin-declaration-mismatch-12.c: New test.

From-SVN: r269082

											
										
										
											2019-02-22 00:23:12 +01:00
+								  /* Cast the void* pointers to char* as some implementations
 								     of the macro assume the pointers can be subtracted from
 								     one another.  */
 								  CLEAR_INSN_CACHE ((char *) beg, (char *) end);
-												(__clear_cache): add case for new CLEAR_INSN_CACHE.

From-SVN: r5391

											
										
										
											1993-09-22 19:43:00 +02:00
+								#endif /* CLEAR_INSN_CACHE */
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
 								#endif /* L_clear_cache */
 								#ifdef L_trampoline
 								/* Jump to a trampoline, loading the static chain address.  */
-												MAINTAINERS (mt port): Remove.

	* MAINTAINERS (mt port): Remove.
	(sco5, unixware, sco udk): Remove.
	(Kean Johnston): Add to Write After Approval.

fixincludes:
	* inclhack.def (AAB_svr4_replace_byteorder,
	AAB_ultrix_ansi_compat, AAB_ultrix_limits, AAB_ultrix_memory,
	libc1_G_va_list, libc1_ifdefd_memx, nested_motorola,
	ptx_sys_mc_param_h, sco_regset, sco_static_func, sco_utime,
	solaris_mutex_init_1, solaris_socket, solaris_unistd,
	solaris_widec, svr4_krnl, ultrix_atexit_param, ultrix_atof_param,
	ultrix_const3, ultrix_fix_fixproto, ultrix_ifdef, ultrix_locale,
	ultrix_math_ifdef, ultrix_nested_ioctl, ultrix_nested_svc,
	ultrix_stat, ultrix_static, ultrix_stdlib, ultrix_strings,
	ultrix_strings2, ultrix_sys_time, ultrix_unistd,
	unicosmk_restrict, uw7_byteorder_fix, windiss_math1,
	windiss_math2, windiss_valist): Remove.
	* fixincl.x: Regenerate.
	* mkfixinc.sh: (arm-semi-aof, hppa1.1-*-osf*, hppa1.1-*-bsd*,
	i370-*-openedition, i?86-*-moss*, i?86-*-uwin*,
	powerpc-*-eabiaix*): Remove.
	* tests/base/math.h: Update.
	* tests/base/pthread.h: Update.
	* tests/base/stdio.h: Update.
	* tests/base/stdlib.h: Update.
	* tests/base/string.h: Update.
	* tests/base/strings.h: Update.
	* tests/base/sys/file.h: Update.
	* tests/base/sys/limits.h: Update.
	* tests/base/sys/socket.h: Update.
	* tests/base/sys/stat.h: Update.
	* tests/base/sys/time.h: Update.
	* tests/base/testing.h: Update.
	* tests/base/unistd.h: Update.
	* tests/base/_G_config.h: Remove.
	* tests/base/arpa: Remove directory.
	* tests/base/fs: Remove directory.
	* tests/base/locale.h: Remove.
	* tests/base/machine: Remove directory.
	* tests/base/rpc/svc.h: Remove.
	* tests/base/sys/ioctl.h: Remove.
	* tests/base/sys/regset.h: Remove.
	* tests/base/sys/times.h: Remove.
	* tests/base/sys/utsname.h: Remove.
	* tests/base/widec.h: Remove.

gcc:
	* config.gcc (Obsolete configurations): Remove list of
	configurations.
	(Unsupported targets list): Add *-*-linux*aout*, *-*-linux*libc1*,
	*-*-solaris2.[0-6], *-*-solaris2.[0-6].*, *-*-sysv*.  Remove other
	targets matched by those patterns.
	(strongarm*-*-*, ep9312*-*-*, xscale-*-*, parisc*-*-*,
	m680[012]0-*-*, *-*-linux*libc1*, *-*-linux*aout*,
	alpha*-*-unicosmk*, strongarm*-*-freebsd*, ep9312-*-elf,
	arm*-*-kaos*, cris-*-aout, parisc*64*-*-linux*, parisc*-*-linux*,
	hppa1.1-*-pro*, hppa1.1-*-osf*, hppa1.1-*-bsd*,
	i[34567]86-sequent-ptx4*, i[34567]86-sequent-sysv4*,
	i[34567]86-*-beoself*, i[34567]86-*-beos*, i[34567]86-*-sco3.2v5*,
	i[34567]86-*-sysv5*, i[34567]86-*-sysv4*, i[34567]86-*-uwin*,
	i[34567]86-*-kaos*, m68020-*-elf*, m68010-*-netbsdelf*,
	mips-wrs-windiss, mt-*-elf, powerpc-*-beos*, powerpc-*-chorusos*,
	powerpc-wrs-windiss*, powerpcle-*-sysv*, powerpc-*-kaos*,
	powerpcle-*-kaos*, sh*-*-kaos*, sparc-*-sysv4*, strongarm-*-elf*,
	strongarm-*-pe, strongarm-*-kaos*, vax-*-bsd*, vax-*-sysv*,
	vax-*-ultrix*, xscale-*-elf, xscale-*-coff,
	i[34567]86-*-linux*aout*, i[34567]86-*-linux*libc1): Remove.
	Make code for Solaris 7 and greater unconditional for Solaris.
	(ep9312-*-*, parisc1*, m680[012]0-*-*, parisc*-*-*, mt-*-*):
	Remove --with-* handling.
	* config/rs6000/sysv4.h (-mwindiss): Remove from all specs.
	(LIB_WINDISS_SPEC, CPP_OS_WINDISS_SPEC, STARTFILE_WINDISS_SPEC,
	ENDFILE_WINDISS_SPEC, LINK_START_WINDISS_SPEC,
	LINK_OS_WINDISS_SPEC): Remove.
	* config/rs6000/sysv4.opt (mwindiss): Remove.
	* configure.ac (strongarm*-*-*, xscale*-*-*): Remove.
	* configure: Regenerate.
	* doc/cpp.texi: Don't mention BeOS.
	* doc/extend.texi (interrupt): Don't mention MS1.
	* doc/install.texi: (i386-@var{any}-sysv, m68k-bull-sysv,
	m68k-hp-hpux, m68000-hp-hpux, m68000-att-sysv,
	alphaev5-cray-unicosmk*, xscale-*-*, i?86-*-linux*aout,
	i?86-*-sco3.2v5*, i?86-*-udk, m68k-hp-hpux, powerpc-*-sysv4,
	powerpc-*-sysv4, powerpcle-*-sysv4, *-*-sysv*, vax-dec-ultrix):
	Remove.
	* doc/invoke.texi (MT Options): Remove.
	(-mwindiss): Remove.
	(CRIS Options): Remove cris-axis-aout references.
	(HPPA Options): Don't mention hppa1.1-*-pro.
	* doc/md.texi: (MorphoTech family): Remove.
	* libgcc2.c: Don't handle UWIN.
	* config/alpha/t-unicosmk: Remove.
	* config/alpha/unicosmk.h: Remove.
	* config/arm/kaos-arm.h: Remove.
	* config/arm/kaos-strongarm.h: Remove.
	* config/arm/strongarm-coff.h: Remove.
	* config/arm/strongarm-elf.h: Remove.
	* config/arm/strongarm-pe.h: Remove.
	* config/arm/t-strongarm-pe: Remove.
	* config/arm/t-xscale-coff: Remove.
	* config/arm/t-xscale-elf: Remove.
	* config/arm/xscale-coff.h: Remove.
	* config/arm/xscale-elf.h: Remove.
	* config/chorus.h: Remove.
	* config/cris/aout.h: Remove.
	* config/cris/aout.opt: Remove.
	* config/cris/t-aout: Remove.
	* config/i386/beos-elf.h: Remove.
	* config/i386/kaos-i386.h: Remove.
	* config/i386/ptx4-i.h: Remove.
	* config/i386/sco5.h: Remove.
	* config/i386/sco5.opt: Remove.
	* config/i386/sysv4-cpp.h: Remove.
	* config/i386/sysv5.h: Remove.
	* config/i386/t-beos: Remove.
	* config/i386/t-sco5: Remove.
	* config/i386/t-uwin: Remove.
	* config/i386/uwin.asm: Remove.
	* config/i386/uwin.h: Remove.
	* config/kaos.h: Remove.
	* config/mips/windiss.h: Remove.
	* config/mt: Remove directory.
	* config/pa/pa-osf.h: Remove.
	* config/pa/pa-pro-end.h: Remove.
	* config/pa/t-pro: Remove.
	* config/ptx4.h: Remove.
	* config/rs6000/beos.h: Remove.
	* config/rs6000/kaos-ppc.h: Remove.
	* config/rs6000/t-beos: Remove.
	* config/rs6000/windiss.h: Remove.
	* config/sh/kaos-sh.h: Remove.
	* config/sol2-6.h: Remove.
	* config/sparc/sol26-sld.h: Remove.
	* config/sparc/sysv4-only.h: Remove.
	* config/vax/bsd.h: Remove.
	* config/vax/t-memfuncs: Remove.
	* config/vax/ultrix.h: Remove.
	* config/vax/vaxv.h: Remove.
	* config/windiss.h: Remove.

gcc/testsuite:
	* g++.dg/abi/arm_cxa_vec1.C: Don't handle xscale*-*-*.
	* g++.dg/eh/spbp.C: Don't handle *-*-solaris2.[56]*.
	* g++.dg/warn/miss-format-1.C: Don't handle Solaris before Solaris
	7.
	* gcc.c-torture/compile/981006-1.c: Don't handle xscale*-*-*,
	strongarm*-*-* and cris-*-aout*.
	* gcc.c-torture/execute/941014-1.x: Don't handle xscale*-*-* and
	strongarm*-*-*.
	* gcc.dg/20030909-1.c: Don't handle xscale*-*-* and
	strongarm*-*-*.
	* gcc.dg/20031108-1.c: Don't handle xscale*-*-* and
	strongarm*-*-*.
	* gcc.dg/20040813-1.c: Don't handle *-*-sysv5*.
	* gcc.dg/arm-asm.c: Don't handle strongarm*-*-* and xscale*-*-*.
	* gcc.dg/arm-scd42-1.c: Use target arm*-*-*.
	* gcc.dg/arm-scd42-3.c: Use target arm*-*-*.
	* gcc.dg/cpp/assert4.c: Don't handle BeOS.
	* gcc.dg/debug/pr35154.c: Don't handle *-*-sysv5*.
	* gcc.dg/intmax_t-1.c: Don't handle *-*-solaris2.5.1 and
	xscale*-*-elf*.
	* gcc.dg/pragma-align.c: Don't handle i?86-*-sco3.2v5*.
	* gcc.dg/pthread-init-2.c: Don't handle *-*-solaris2.5.1.
	* gcc.misc-tests/arm-isr.exp: Use target arm*-*-*.
	* gcc.target/powerpc/ppc-sdata-1.c: Don't handle powerpc-*-sysv*.
	* gcc.target/powerpc/ppc-sdata-2.c: Don't handle powerpc-*-sysv*.
	* gcc.target/powerpc/ppc-stackalign-1.c: Don't handle
	powerpc-*-sysv*.
	* gfortran.dg/debug/pr35154-stabs.f: Don't handle *-*-sysv5*.
	* lib/target-supports.exp: Don't handle strongarm*-*-elf,
	xscale*-*-elf and *-*-windiss.
	* obj-c++.dg/dwarf-2.mm: Don't handle *-*-solaris2.[56]*.
	* objc.dg/dwarf-1.m: Don't handle *-*-solaris2.[56]*.
	* objc.dg/dwarf-2.m: Don't handle *-*-solaris2.[56]*.
	* gcc.dg/mt-loopi1.c: Remove.

gnattools:
	* configure.ac (xscale*-wrs-vx*, xscale*-wrs-coff): Remove.
	* configure: Regenerate.

libcpp:
	* configure.ac (parisc*64*-*-*): Remove.
	* configure: Regenerate.

libffi:
	* configure.ac (parisc*-*-linux*, powerpc-*-sysv*,
	powerpc-*-beos*): Remove.
	* configure: Regenerate.

libgcc:
	* config.host (strongarm*-*-*, ep9312*-*-*, xscale-*-*,
	parisc*-*-*, m680[012]0-*-*, *-*-linux*libc1*, *-*-linux*aout*,
	alpha*-*-unicosmk*, strongarm*-*-freebsd*, ep9312-*-elf,
	arm*-*-kaos*, cris-*-aout, parisc*64*-*-linux*, parisc*-*-linux*,
	hppa1.1-*-pro*, hppa1.1-*-osf*, hppa1.1-*-bsd*,
	i[34567]86-sequent-ptx4*, i[34567]86-sequent-sysv4*,
	i[34567]86-*-beoself*, i[34567]86-*-beos*, i[34567]86-*-sco3.2v5*,
	i[34567]86-*-sysv5*, i[34567]86-*-sysv4*, i[34567]86-*-uwin*,
	i[34567]86-*-kaos*, m68020-*-elf*, m68010-*-netbsdelf*,
	mips-wrs-windiss, mt-*-elf, powerpc-*-beos*, powerpc-*-chorusos*,
	powerpc-wrs-windiss*, powerpcle-*-sysv*, powerpc-*-kaos*,
	powerpcle-*-kaos*, sh*-*-kaos*, sparc-*-sysv4*, strongarm-*-elf*,
	strongarm-*-pe, strongarm-*-kaos*, vax-*-bsd*, vax-*-sysv*,
	vax-*-ultrix*, xscale-*-elf, xscale-*-coff): Remove.

libjava:
	* configure.host (strongarm*-elf, xscale*-elf): Remove.

libstdc++-v3:
	* configure.host (xscale, ep9312, m680[246]0, solaris2.5,
	solaris2.5.[0-9], solaris2.6, windiss*): Remove.
	* crossconfig.m4 (*-solaris2.5, *-solaris2.6, *-windiss*): Remove.
	* configure: Regenerate.
	* config/os/solaris/solaris2.5: Remove directory.
	* config/os/solaris/solaris2.6: Remove directory.
	* config/os/windiss: Remove directory.

From-SVN: r136534

											
										
										
											2008-06-07 20:00:15 +02:00
+								#if defined(WINNT) && ! defined(__CYGWIN__)
-												libgcc2.c (L_trampoline): Include windows.h for mingw targets.

2011-06-09  Kai Tietz  <ktietz@redhat.com>

        * libgcc2.c (L_trampoline): Include windows.h for mingw targets.

From-SVN: r174860

											
										
										
											2011-06-09 23:10:25 +02:00
+								#include <windows.h>
-												libgcc2.c (L_trampoline): Prototype for getpagesize and mprotect in WINNT case.

2009-09-24  Kai Tietz  <kai.tietz@onevision.com>

	* libgcc2.c (L_trampoline): Prototype for getpagesize
	and mprotect in WINNT case.

From-SVN: r152139

											
										
										
											2009-09-24 21:02:44 +02:00
+								int getpagesize (void);
 								int mprotect (char *,int, int);
-												Don't compile getpagesize if __CYGWIN32__

From-SVN: r14571

											
										
										
											1997-08-01 01:39:26 +02:00
-												libgcc2.c (getpagesize): Change type of return value to int.

2004-09-27  Aaron W. LaFramboise <aaronraolete36@aaronwl.com>

	* libgcc2.c (getpagesize): Change type of return value to int.

From-SVN: r88165

											
										
										
											2004-09-27 10:01:57 +02:00
+								int
-												gthr-single.h (__gthread_active_p): Add prototype arguments.

	* gthr-single.h (__gthread_active_p): Add prototype arguments.

	* libgcc2.c (__udivmoddi4): Remove unnecessary decls.
	(__dummy, __builtin_saveregs, __bb_exit_trace_func, __bb_init_prg,
	__bb_trace_func, __bb_trace_func_ret, __bb_trace_ret,
	function_ptr, getpagesize, __enable_execute_stack,
	__enable_execute_stack, __clear_insn_cache,
	__enable_execute_stack, __do_global_dtors, __do_global_ctors,
	_cleanup, _exit, __default_terminate, __terminate_func,
	__terminate, __empty, __throw, new_eh_context,
	eh_context_initialize, eh_context_static, eh_context_specific,
	get_eh_context, __get_eh_context, __get_eh_info,
	init_reg_size_table, eh_threads_initialize,
	__get_dynamic_handler_chain, __sjthrow, __sjpopnthrow,
	__unwinding_cleanup, throw_helper, __throw, __rethrow,
	__pure_virtual): Add prototype arguments.
	(__bb_exit_func): Cast a sizeof to long when comparing against one.
	Cast a signed value to unsigned long when comparing against one.
	(new_eh_context): Wrap in _GTHREADS macro.
	(__sjthrow, __sjpopnthrow): Initialize variable `cleanup' at
	declaration.
	(in_reg_window): Mark parameters with __attribute__ ((__unused__)).
	(throw_helper): Initialize variables `handler_p' and `pc_p'.

From-SVN: r30872

											
										
										
											1999-12-12 16:34:09 +01:00
+								getpagesize (void)
-												(trampoline): Add getpagesize and mprotect for WINNT.

From-SVN: r10605

											
										
										
											1995-11-26 20:41:43 +01:00
+								{
 								#ifdef _ALPHA_
 								  return 8192;
 								#else
 								  return 4096;
 								#endif
 								}
-												(_trampoline): Rework last change; both getpagesize and mprotect are
in cygwin32.

From-SVN: r14636

											
										
										
											1997-08-03 00:27:33 +02:00
+								int
 								mprotect (char *addr, int len, int prot)
-												(trampoline): Add getpagesize and mprotect for WINNT.

From-SVN: r10605

											
										
										
											1995-11-26 20:41:43 +01:00
+								{
-												re PR target/39063 (libgcc2.c:mprotect() for mingw, incompatible pointer type warning)

	PR target/39063
	* libgcc2.c (mprotect): Do not use signed arguments for
	VirtualProtect, use DWORD arguments.  Also fix the 'may
	be used uninitialized' warning for the np variable.

From-SVN: r144957

											
										
										
											2009-03-19 11:40:32 +01:00
+								  DWORD np, op;
-												(trampoline): Add getpagesize and mprotect for WINNT.

From-SVN: r10605

											
										
										
											1995-11-26 20:41:43 +01:00
-												(_trampoline): Rework last change; both getpagesize and mprotect are
in cygwin32.

From-SVN: r14636

											
										
										
											1997-08-03 00:27:33 +02:00
+								  if (prot == 7)
 								    np = 0x40;
 								  else if (prot == 5)
 								    np = 0x20;
 								  else if (prot == 4)
 								    np = 0x10;
 								  else if (prot == 3)
 								    np = 0x04;
 								  else if (prot == 1)
 								    np = 0x02;
 								  else if (prot == 0)
 								    np = 0x01;
-												re PR target/39063 (libgcc2.c:mprotect() for mingw, incompatible pointer type warning)

	PR target/39063
	* libgcc2.c (mprotect): Do not use signed arguments for
	VirtualProtect, use DWORD arguments.  Also fix the 'may
	be used uninitialized' warning for the np variable.

From-SVN: r144957

											
										
										
											2009-03-19 11:40:32 +01:00
+								  else
 								    return -1;
-												(trampoline): Add getpagesize and mprotect for WINNT.

From-SVN: r10605

											
										
										
											1995-11-26 20:41:43 +01:00
 								  if (VirtualProtect (addr, len, np, &op))
 								    return 0;
 								  else
 								    return -1;
 								}
-												MAINTAINERS (mt port): Remove.

	* MAINTAINERS (mt port): Remove.
	(sco5, unixware, sco udk): Remove.
	(Kean Johnston): Add to Write After Approval.

fixincludes:
	* inclhack.def (AAB_svr4_replace_byteorder,
	AAB_ultrix_ansi_compat, AAB_ultrix_limits, AAB_ultrix_memory,
	libc1_G_va_list, libc1_ifdefd_memx, nested_motorola,
	ptx_sys_mc_param_h, sco_regset, sco_static_func, sco_utime,
	solaris_mutex_init_1, solaris_socket, solaris_unistd,
	solaris_widec, svr4_krnl, ultrix_atexit_param, ultrix_atof_param,
	ultrix_const3, ultrix_fix_fixproto, ultrix_ifdef, ultrix_locale,
	ultrix_math_ifdef, ultrix_nested_ioctl, ultrix_nested_svc,
	ultrix_stat, ultrix_static, ultrix_stdlib, ultrix_strings,
	ultrix_strings2, ultrix_sys_time, ultrix_unistd,
	unicosmk_restrict, uw7_byteorder_fix, windiss_math1,
	windiss_math2, windiss_valist): Remove.
	* fixincl.x: Regenerate.
	* mkfixinc.sh: (arm-semi-aof, hppa1.1-*-osf*, hppa1.1-*-bsd*,
	i370-*-openedition, i?86-*-moss*, i?86-*-uwin*,
	powerpc-*-eabiaix*): Remove.
	* tests/base/math.h: Update.
	* tests/base/pthread.h: Update.
	* tests/base/stdio.h: Update.
	* tests/base/stdlib.h: Update.
	* tests/base/string.h: Update.
	* tests/base/strings.h: Update.
	* tests/base/sys/file.h: Update.
	* tests/base/sys/limits.h: Update.
	* tests/base/sys/socket.h: Update.
	* tests/base/sys/stat.h: Update.
	* tests/base/sys/time.h: Update.
	* tests/base/testing.h: Update.
	* tests/base/unistd.h: Update.
	* tests/base/_G_config.h: Remove.
	* tests/base/arpa: Remove directory.
	* tests/base/fs: Remove directory.
	* tests/base/locale.h: Remove.
	* tests/base/machine: Remove directory.
	* tests/base/rpc/svc.h: Remove.
	* tests/base/sys/ioctl.h: Remove.
	* tests/base/sys/regset.h: Remove.
	* tests/base/sys/times.h: Remove.
	* tests/base/sys/utsname.h: Remove.
	* tests/base/widec.h: Remove.

gcc:
	* config.gcc (Obsolete configurations): Remove list of
	configurations.
	(Unsupported targets list): Add *-*-linux*aout*, *-*-linux*libc1*,
	*-*-solaris2.[0-6], *-*-solaris2.[0-6].*, *-*-sysv*.  Remove other
	targets matched by those patterns.
	(strongarm*-*-*, ep9312*-*-*, xscale-*-*, parisc*-*-*,
	m680[012]0-*-*, *-*-linux*libc1*, *-*-linux*aout*,
	alpha*-*-unicosmk*, strongarm*-*-freebsd*, ep9312-*-elf,
	arm*-*-kaos*, cris-*-aout, parisc*64*-*-linux*, parisc*-*-linux*,
	hppa1.1-*-pro*, hppa1.1-*-osf*, hppa1.1-*-bsd*,
	i[34567]86-sequent-ptx4*, i[34567]86-sequent-sysv4*,
	i[34567]86-*-beoself*, i[34567]86-*-beos*, i[34567]86-*-sco3.2v5*,
	i[34567]86-*-sysv5*, i[34567]86-*-sysv4*, i[34567]86-*-uwin*,
	i[34567]86-*-kaos*, m68020-*-elf*, m68010-*-netbsdelf*,
	mips-wrs-windiss, mt-*-elf, powerpc-*-beos*, powerpc-*-chorusos*,
	powerpc-wrs-windiss*, powerpcle-*-sysv*, powerpc-*-kaos*,
	powerpcle-*-kaos*, sh*-*-kaos*, sparc-*-sysv4*, strongarm-*-elf*,
	strongarm-*-pe, strongarm-*-kaos*, vax-*-bsd*, vax-*-sysv*,
	vax-*-ultrix*, xscale-*-elf, xscale-*-coff,
	i[34567]86-*-linux*aout*, i[34567]86-*-linux*libc1): Remove.
	Make code for Solaris 7 and greater unconditional for Solaris.
	(ep9312-*-*, parisc1*, m680[012]0-*-*, parisc*-*-*, mt-*-*):
	Remove --with-* handling.
	* config/rs6000/sysv4.h (-mwindiss): Remove from all specs.
	(LIB_WINDISS_SPEC, CPP_OS_WINDISS_SPEC, STARTFILE_WINDISS_SPEC,
	ENDFILE_WINDISS_SPEC, LINK_START_WINDISS_SPEC,
	LINK_OS_WINDISS_SPEC): Remove.
	* config/rs6000/sysv4.opt (mwindiss): Remove.
	* configure.ac (strongarm*-*-*, xscale*-*-*): Remove.
	* configure: Regenerate.
	* doc/cpp.texi: Don't mention BeOS.
	* doc/extend.texi (interrupt): Don't mention MS1.
	* doc/install.texi: (i386-@var{any}-sysv, m68k-bull-sysv,
	m68k-hp-hpux, m68000-hp-hpux, m68000-att-sysv,
	alphaev5-cray-unicosmk*, xscale-*-*, i?86-*-linux*aout,
	i?86-*-sco3.2v5*, i?86-*-udk, m68k-hp-hpux, powerpc-*-sysv4,
	powerpc-*-sysv4, powerpcle-*-sysv4, *-*-sysv*, vax-dec-ultrix):
	Remove.
	* doc/invoke.texi (MT Options): Remove.
	(-mwindiss): Remove.
	(CRIS Options): Remove cris-axis-aout references.
	(HPPA Options): Don't mention hppa1.1-*-pro.
	* doc/md.texi: (MorphoTech family): Remove.
	* libgcc2.c: Don't handle UWIN.
	* config/alpha/t-unicosmk: Remove.
	* config/alpha/unicosmk.h: Remove.
	* config/arm/kaos-arm.h: Remove.
	* config/arm/kaos-strongarm.h: Remove.
	* config/arm/strongarm-coff.h: Remove.
	* config/arm/strongarm-elf.h: Remove.
	* config/arm/strongarm-pe.h: Remove.
	* config/arm/t-strongarm-pe: Remove.
	* config/arm/t-xscale-coff: Remove.
	* config/arm/t-xscale-elf: Remove.
	* config/arm/xscale-coff.h: Remove.
	* config/arm/xscale-elf.h: Remove.
	* config/chorus.h: Remove.
	* config/cris/aout.h: Remove.
	* config/cris/aout.opt: Remove.
	* config/cris/t-aout: Remove.
	* config/i386/beos-elf.h: Remove.
	* config/i386/kaos-i386.h: Remove.
	* config/i386/ptx4-i.h: Remove.
	* config/i386/sco5.h: Remove.
	* config/i386/sco5.opt: Remove.
	* config/i386/sysv4-cpp.h: Remove.
	* config/i386/sysv5.h: Remove.
	* config/i386/t-beos: Remove.
	* config/i386/t-sco5: Remove.
	* config/i386/t-uwin: Remove.
	* config/i386/uwin.asm: Remove.
	* config/i386/uwin.h: Remove.
	* config/kaos.h: Remove.
	* config/mips/windiss.h: Remove.
	* config/mt: Remove directory.
	* config/pa/pa-osf.h: Remove.
	* config/pa/pa-pro-end.h: Remove.
	* config/pa/t-pro: Remove.
	* config/ptx4.h: Remove.
	* config/rs6000/beos.h: Remove.
	* config/rs6000/kaos-ppc.h: Remove.
	* config/rs6000/t-beos: Remove.
	* config/rs6000/windiss.h: Remove.
	* config/sh/kaos-sh.h: Remove.
	* config/sol2-6.h: Remove.
	* config/sparc/sol26-sld.h: Remove.
	* config/sparc/sysv4-only.h: Remove.
	* config/vax/bsd.h: Remove.
	* config/vax/t-memfuncs: Remove.
	* config/vax/ultrix.h: Remove.
	* config/vax/vaxv.h: Remove.
	* config/windiss.h: Remove.

gcc/testsuite:
	* g++.dg/abi/arm_cxa_vec1.C: Don't handle xscale*-*-*.
	* g++.dg/eh/spbp.C: Don't handle *-*-solaris2.[56]*.
	* g++.dg/warn/miss-format-1.C: Don't handle Solaris before Solaris
	7.
	* gcc.c-torture/compile/981006-1.c: Don't handle xscale*-*-*,
	strongarm*-*-* and cris-*-aout*.
	* gcc.c-torture/execute/941014-1.x: Don't handle xscale*-*-* and
	strongarm*-*-*.
	* gcc.dg/20030909-1.c: Don't handle xscale*-*-* and
	strongarm*-*-*.
	* gcc.dg/20031108-1.c: Don't handle xscale*-*-* and
	strongarm*-*-*.
	* gcc.dg/20040813-1.c: Don't handle *-*-sysv5*.
	* gcc.dg/arm-asm.c: Don't handle strongarm*-*-* and xscale*-*-*.
	* gcc.dg/arm-scd42-1.c: Use target arm*-*-*.
	* gcc.dg/arm-scd42-3.c: Use target arm*-*-*.
	* gcc.dg/cpp/assert4.c: Don't handle BeOS.
	* gcc.dg/debug/pr35154.c: Don't handle *-*-sysv5*.
	* gcc.dg/intmax_t-1.c: Don't handle *-*-solaris2.5.1 and
	xscale*-*-elf*.
	* gcc.dg/pragma-align.c: Don't handle i?86-*-sco3.2v5*.
	* gcc.dg/pthread-init-2.c: Don't handle *-*-solaris2.5.1.
	* gcc.misc-tests/arm-isr.exp: Use target arm*-*-*.
	* gcc.target/powerpc/ppc-sdata-1.c: Don't handle powerpc-*-sysv*.
	* gcc.target/powerpc/ppc-sdata-2.c: Don't handle powerpc-*-sysv*.
	* gcc.target/powerpc/ppc-stackalign-1.c: Don't handle
	powerpc-*-sysv*.
	* gfortran.dg/debug/pr35154-stabs.f: Don't handle *-*-sysv5*.
	* lib/target-supports.exp: Don't handle strongarm*-*-elf,
	xscale*-*-elf and *-*-windiss.
	* obj-c++.dg/dwarf-2.mm: Don't handle *-*-solaris2.[56]*.
	* objc.dg/dwarf-1.m: Don't handle *-*-solaris2.[56]*.
	* objc.dg/dwarf-2.m: Don't handle *-*-solaris2.[56]*.
	* gcc.dg/mt-loopi1.c: Remove.

gnattools:
	* configure.ac (xscale*-wrs-vx*, xscale*-wrs-coff): Remove.
	* configure: Regenerate.

libcpp:
	* configure.ac (parisc*64*-*-*): Remove.
	* configure: Regenerate.

libffi:
	* configure.ac (parisc*-*-linux*, powerpc-*-sysv*,
	powerpc-*-beos*): Remove.
	* configure: Regenerate.

libgcc:
	* config.host (strongarm*-*-*, ep9312*-*-*, xscale-*-*,
	parisc*-*-*, m680[012]0-*-*, *-*-linux*libc1*, *-*-linux*aout*,
	alpha*-*-unicosmk*, strongarm*-*-freebsd*, ep9312-*-elf,
	arm*-*-kaos*, cris-*-aout, parisc*64*-*-linux*, parisc*-*-linux*,
	hppa1.1-*-pro*, hppa1.1-*-osf*, hppa1.1-*-bsd*,
	i[34567]86-sequent-ptx4*, i[34567]86-sequent-sysv4*,
	i[34567]86-*-beoself*, i[34567]86-*-beos*, i[34567]86-*-sco3.2v5*,
	i[34567]86-*-sysv5*, i[34567]86-*-sysv4*, i[34567]86-*-uwin*,
	i[34567]86-*-kaos*, m68020-*-elf*, m68010-*-netbsdelf*,
	mips-wrs-windiss, mt-*-elf, powerpc-*-beos*, powerpc-*-chorusos*,
	powerpc-wrs-windiss*, powerpcle-*-sysv*, powerpc-*-kaos*,
	powerpcle-*-kaos*, sh*-*-kaos*, sparc-*-sysv4*, strongarm-*-elf*,
	strongarm-*-pe, strongarm-*-kaos*, vax-*-bsd*, vax-*-sysv*,
	vax-*-ultrix*, xscale-*-elf, xscale-*-coff): Remove.

libjava:
	* configure.host (strongarm*-elf, xscale*-elf): Remove.

libstdc++-v3:
	* configure.host (xscale, ep9312, m680[246]0, solaris2.5,
	solaris2.5.[0-9], solaris2.6, windiss*): Remove.
	* crossconfig.m4 (*-solaris2.5, *-solaris2.6, *-windiss*): Remove.
	* configure: Regenerate.
	* config/os/solaris/solaris2.5: Remove directory.
	* config/os/solaris/solaris2.6: Remove directory.
	* config/os/windiss: Remove directory.

From-SVN: r136534

											
										
										
											2008-06-07 20:00:15 +02:00
+								#endif /* WINNT && ! __CYGWIN__ */
-												(trampoline): Add getpagesize and mprotect for WINNT.

From-SVN: r10605

											
										
										
											1995-11-26 20:41:43 +01:00
-												gcov.c (output_data): Use HOST_WIDEST_INT_PRINT_DEC to output variables of type HOST_WIDEST_INT.

	* gcov.c (output_data): Use HOST_WIDEST_INT_PRINT_DEC to output
	variables of type HOST_WIDEST_INT.

	* libgcc2.c (__bb_exit_func): Handle gcov_type as long long.
	(__bb_exit_func): Correct type of count_max to avoid overflow.
	(num_digits): Handle long long argument.

	* combine.c (gen_lowpart_for_combine): Remove unused variable.

From-SVN: r44033

											
										
										
											2001-07-16 11:16:04 +02:00
+								#ifdef TRANSFER_FROM_TRAMPOLINE
 								TRANSFER_FROM_TRAMPOLINE
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								#endif
 								#endif /* L_trampoline */
-												Jumbo patch from Geoff Noer to rename CYGWIN32 to CYGWIN.

From-SVN: r23622

											
										
										
											1998-11-12 20:37:47 +01:00
+								#ifndef __CYGWIN__
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								#ifdef L__main
 								#include "gbl-ctors.h"
-												crtstuff.c: Handle targets that use .init_array.

	* crtstuff.c: Handle targets that use .init_array.
	* function.c (HAS_INIT_SECTION): Do not define. Instead, make sure
	that INVOKE__main is set correctly.
	(expand_main_function): Test INVOKE__main.
	* libgcc2.c: Do not define __main when using .init_array.
	* config/arm/arm.c (arm_elf_asm_constructor): New function.
	* config/arm/arm.h (CTORS_SECTION_ASM_OP): Define, with specialized
	libgcc version.
	(DTORS_SECTION_ASM_OP): Likewise.
	(CTOR_LIST_BEGIN): Define specially when in libgcc.
	(CTOR_LIST_END): Likewise.
	(DTOR_LIST_BEGIN): Likewise.
	(DTOR_LIST_END): Likewise.
	* config/arm/bpapi.h (INIT_SECTION_ASM_OP): Do not define it.
	(FINI_SECTION_ASM_OP): Likewise.
	(INIT_ARRAY_SECTION_ASM_OP): Define.
	(FINI_ARRAY_SECTION_ASM_OP): Likewise.
	* config/arm/elf.h (TARGET_ASM_CONSTRUCTOR): Define.
	(SUPPORTS_INIT_PRIORITY): Evaluate to false for EABI based targets.
	* doc/tm.texi (INIT_ARRAY_SECTION_ASM_OP): Document.
	(FINI_ARRAY_SECTION_ASM_OP): Likewise.

Co-Authored-By: Mark Mitchell <mark@codesourcery.com>
Co-Authored-By: Paul Brook <paul@codesourcery.com>

From-SVN: r98986

											
										
										
											2005-04-29 16:09:45 +02:00
-												(__main): Use macro SYMBOL__MAIN instead of invoking directly.

From-SVN: r4992

											
										
										
											1993-07-26 23:00:16 +02:00
+								/* Some systems use __main in a way incompatible with its use in gcc, in these
 								   cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
 								   give the same symbol without quotes for an alternative entry point.  You
-												formatting tweaks

From-SVN: r12390

											
										
										
											1996-07-04 00:07:53 +02:00
+								   must define both, or neither.  */
-												(__main): Use macro SYMBOL__MAIN instead of invoking directly.

From-SVN: r4992

											
										
										
											1993-07-26 23:00:16 +02:00
+								#ifndef NAME__MAIN
 								#define NAME__MAIN "__main"
 								#define SYMBOL__MAIN __main
 								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												Use -fbuilding-libgcc for more target macros used in libgcc.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Also define
	__LIBGCC_EH_TABLES_CAN_BE_READ_ONLY__,
	__LIBGCC_EH_FRAME_SECTION_NAME__, __LIBGCC_JCR_SECTION_NAME__,
	__LIBGCC_CTORS_SECTION_ASM_OP__, __LIBGCC_DTORS_SECTION_ASM_OP__,
	__LIBGCC_TEXT_SECTION_ASM_OP__, __LIBGCC_INIT_SECTION_ASM_OP__,
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__,
	__LIBGCC_STACK_GROWS_DOWNWARD__,
	__LIBGCC_DONT_USE_BUILTIN_SETJMP__,
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__,
	__LIBGCC_DWARF_FRAME_REGISTERS__,
	__LIBGCC_EH_RETURN_STACKADJ_RTX__, __LIBGCC_JMP_BUF_SIZE__,
	__LIBGCC_STACK_POINTER_REGNUM__ and
	__LIBGCC_VTABLE_USES_DESCRIPTORS__ for -fbuilding-libgcc.
	(builtin_define_with_value): Handle backslash-escaping in string
	macro values.

libgcc:
	* Makefile.in (CRTSTUFF_CFLAGS): Add -fbuilding-libgcc.
	* config/aarch64/linux-unwind.h (STACK_POINTER_REGNUM): Change all
	uses to __LIBGCC_STACK_POINTER_REGNUM__.
	(DWARF_ALT_FRAME_RETURN_COLUMN): Change all uses to
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/alpha/vms-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN):
	Change use to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/cr16/unwind-cr16.c (STACK_GROWS_DOWNWARD): Change all
	uses to __LIBGCC_STACK_GROWS_DOWNWARD__.
	(DWARF_FRAME_REGISTERS): Change all uses to
	__LIBGCC_DWARF_FRAME_REGISTERS__.
	(EH_RETURN_STACKADJ_RTX): Change all uses to
	__LIBGCC_EH_RETURN_STACKADJ_RTX__.
	* config/cr16/unwind-dw2.h (DWARF_FRAME_REGISTERS): Change use to
	__LIBGCC_DWARF_FRAME_REGISTERS__.  Remove conditional definition.
	* config/i386/cygming-crtbegin.c (EH_FRAME_SECTION_NAME): Change
	use to __LIBGCC_EH_FRAME_SECTION_NAME__.
	(JCR_SECTION_NAME): Change use to __LIBGCC_JCR_SECTION_NAME__.
	* config/i386/cygming-crtend.c (EH_FRAME_SECTION_NAME): Change use
	to __LIBGCC_EH_FRAME_SECTION_NAME__.
	(JCR_SECTION_NAME): Change use to __LIBGCC_JCR_SECTION_NAME__
	* config/mips/linux-unwind.h (STACK_POINTER_REGNUM): Change use to
	__LIBGCC_STACK_POINTER_REGNUM__.
	(DWARF_ALT_FRAME_RETURN_COLUMN): Change all uses to
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/nios2/linux-unwind.h (STACK_POINTER_REGNUM): Change use
	to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/pa/hpux-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN): Change
	all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/pa/linux-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN): Change
	all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/rs6000/aix-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN):
	Change all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	(STACK_POINTER_REGNUM): Change all uses to
	__LIBGCC_STACK_POINTER_REGNUM__.
	* config/rs6000/darwin-fallback.c (STACK_POINTER_REGNUM): Change
	use to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/rs6000/linux-unwind.h (STACK_POINTER_REGNUM): Change all
	uses to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/sparc/linux-unwind.h (DWARF_FRAME_REGISTERS): Change use
	to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* config/sparc/sol2-unwind.h (DWARF_FRAME_REGISTERS): Change use
	to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* config/tilepro/linux-unwind.h (STACK_POINTER_REGNUM): Change use
	to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/xtensa/unwind-dw2-xtensa.h (DWARF_FRAME_REGISTERS):
	Remove conditional definition.
	* crtstuff.c (TEXT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_TEXT_SECTION_ASM_OP__.
	(EH_FRAME_SECTION_NAME): Change all uses to
	__LIBGCC_EH_FRAME_SECTION_NAME__.
	(EH_TABLES_CAN_BE_READ_ONLY): Change all uses to
	__LIBGCC_EH_TABLES_CAN_BE_READ_ONLY__.
	(CTORS_SECTION_ASM_OP): Change all uses to
	__LIBGCC_CTORS_SECTION_ASM_OP__.
	(DTORS_SECTION_ASM_OP): Change all uses to
	__LIBGCC_DTORS_SECTION_ASM_OP__.
	(JCR_SECTION_NAME): Change all uses to
	__LIBGCC_JCR_SECTION_NAME__.
	(INIT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_SECTION_ASM_OP__.
	(INIT_ARRAY_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__.
	* generic-morestack.c (STACK_GROWS_DOWNWARD): Change all uses to
	__LIBGCC_STACK_GROWS_DOWNWARD__.
	* libgcc2.c (INIT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_SECTION_ASM_OP__.
	(INIT_ARRAY_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__.
	(EH_FRAME_SECTION_NAME): Change all uses to
	__LIBGCC_EH_FRAME_SECTION_NAME__.
	* libgcov-profiler.c (VTABLE_USES_DESCRIPTORS): Remove conditional
	definitions.  Change all uses to
	__LIBGCC_VTABLE_USES_DESCRIPTORS__.
	* unwind-dw2.c (STACK_GROWS_DOWNWARD): Change all uses to
	__LIBGCC_STACK_GROWS_DOWNWARD__.
	(DWARF_FRAME_REGISTERS): Change all uses to
	__LIBGCC_DWARF_FRAME_REGISTERS__.
	(EH_RETURN_STACKADJ_RTX): Change all uses to
	__LIBGCC_EH_RETURN_STACKADJ_RTX__.
	* unwind-dw2.h (DWARF_FRAME_REGISTERS): Remove conditional
	definition.  Change use to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* unwind-sjlj.c (DONT_USE_BUILTIN_SETJMP): Change all uses to
	__LIBGCC_DONT_USE_BUILTIN_SETJMP__.
	(JMP_BUF_SIZE): Change use to __LIBGCC_JMP_BUF_SIZE__.

From-SVN: r214954

											
										
										
											2014-09-05 14:03:46 +02:00
+								#if defined (__LIBGCC_INIT_SECTION_ASM_OP__) \
 								    || defined (__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__)
-												irix6 ctors

From-SVN: r12971

											
										
										
											1996-10-16 22:25:25 +02:00
+								#undef HAS_INIT_SECTION
 								#define HAS_INIT_SECTION
 								#endif
 								#if !defined (HAS_INIT_SECTION) || !defined (OBJECT_FORMAT_ELF)
-												defaults.h (EH_FRAME_SECTION, [...]): Define here.

	* defaults.h (EH_FRAME_SECTION, EH_FRAME_SECTION_ASM_OP): Define here.
	* crtstuff.c: Not here.
	* dwarf2out.c: Or here.
	* libgcc2.c (__do_global_ctors, __do_global_dtors): Handle EH frame
	info.

From-SVN: r29241

											
										
										
											1999-09-09 23:36:20 +02:00
 								/* Some ELF crosses use crtstuff.c to provide __CTOR_LIST__, but use this
-												re PR target/67172 (i686-w64-mingw32 dwarf2 bootstrap fails with undefined reference to __EH_FRAME_BEGIN__)

	PR target/67172
	* libgcc2.c (L__main): Undefine __LIBGCC_EH_FRAME_SECTION_NAME__ if
	__MINGW32__ is defined.

From-SVN: r234727

											
										
										
											2016-04-05 00:29:02 +02:00
+								   code to run constructors.  In that case, we need to handle EH here, too.
 								   But MINGW32 is special because it handles CRTSTUFF and EH on its own.  */
 								#ifdef __MINGW32__
 								#undef __LIBGCC_EH_FRAME_SECTION_NAME__
 								#endif
-												defaults.h (EH_FRAME_SECTION, [...]): Define here.

	* defaults.h (EH_FRAME_SECTION, EH_FRAME_SECTION_ASM_OP): Define here.
	* crtstuff.c: Not here.
	* dwarf2out.c: Or here.
	* libgcc2.c (__do_global_ctors, __do_global_dtors): Handle EH frame
	info.

From-SVN: r29241

											
										
										
											1999-09-09 23:36:20 +02:00
-												Use -fbuilding-libgcc for more target macros used in libgcc.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Also define
	__LIBGCC_EH_TABLES_CAN_BE_READ_ONLY__,
	__LIBGCC_EH_FRAME_SECTION_NAME__, __LIBGCC_JCR_SECTION_NAME__,
	__LIBGCC_CTORS_SECTION_ASM_OP__, __LIBGCC_DTORS_SECTION_ASM_OP__,
	__LIBGCC_TEXT_SECTION_ASM_OP__, __LIBGCC_INIT_SECTION_ASM_OP__,
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__,
	__LIBGCC_STACK_GROWS_DOWNWARD__,
	__LIBGCC_DONT_USE_BUILTIN_SETJMP__,
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__,
	__LIBGCC_DWARF_FRAME_REGISTERS__,
	__LIBGCC_EH_RETURN_STACKADJ_RTX__, __LIBGCC_JMP_BUF_SIZE__,
	__LIBGCC_STACK_POINTER_REGNUM__ and
	__LIBGCC_VTABLE_USES_DESCRIPTORS__ for -fbuilding-libgcc.
	(builtin_define_with_value): Handle backslash-escaping in string
	macro values.

libgcc:
	* Makefile.in (CRTSTUFF_CFLAGS): Add -fbuilding-libgcc.
	* config/aarch64/linux-unwind.h (STACK_POINTER_REGNUM): Change all
	uses to __LIBGCC_STACK_POINTER_REGNUM__.
	(DWARF_ALT_FRAME_RETURN_COLUMN): Change all uses to
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/alpha/vms-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN):
	Change use to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/cr16/unwind-cr16.c (STACK_GROWS_DOWNWARD): Change all
	uses to __LIBGCC_STACK_GROWS_DOWNWARD__.
	(DWARF_FRAME_REGISTERS): Change all uses to
	__LIBGCC_DWARF_FRAME_REGISTERS__.
	(EH_RETURN_STACKADJ_RTX): Change all uses to
	__LIBGCC_EH_RETURN_STACKADJ_RTX__.
	* config/cr16/unwind-dw2.h (DWARF_FRAME_REGISTERS): Change use to
	__LIBGCC_DWARF_FRAME_REGISTERS__.  Remove conditional definition.
	* config/i386/cygming-crtbegin.c (EH_FRAME_SECTION_NAME): Change
	use to __LIBGCC_EH_FRAME_SECTION_NAME__.
	(JCR_SECTION_NAME): Change use to __LIBGCC_JCR_SECTION_NAME__.
	* config/i386/cygming-crtend.c (EH_FRAME_SECTION_NAME): Change use
	to __LIBGCC_EH_FRAME_SECTION_NAME__.
	(JCR_SECTION_NAME): Change use to __LIBGCC_JCR_SECTION_NAME__
	* config/mips/linux-unwind.h (STACK_POINTER_REGNUM): Change use to
	__LIBGCC_STACK_POINTER_REGNUM__.
	(DWARF_ALT_FRAME_RETURN_COLUMN): Change all uses to
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/nios2/linux-unwind.h (STACK_POINTER_REGNUM): Change use
	to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/pa/hpux-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN): Change
	all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/pa/linux-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN): Change
	all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/rs6000/aix-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN):
	Change all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	(STACK_POINTER_REGNUM): Change all uses to
	__LIBGCC_STACK_POINTER_REGNUM__.
	* config/rs6000/darwin-fallback.c (STACK_POINTER_REGNUM): Change
	use to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/rs6000/linux-unwind.h (STACK_POINTER_REGNUM): Change all
	uses to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/sparc/linux-unwind.h (DWARF_FRAME_REGISTERS): Change use
	to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* config/sparc/sol2-unwind.h (DWARF_FRAME_REGISTERS): Change use
	to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* config/tilepro/linux-unwind.h (STACK_POINTER_REGNUM): Change use
	to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/xtensa/unwind-dw2-xtensa.h (DWARF_FRAME_REGISTERS):
	Remove conditional definition.
	* crtstuff.c (TEXT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_TEXT_SECTION_ASM_OP__.
	(EH_FRAME_SECTION_NAME): Change all uses to
	__LIBGCC_EH_FRAME_SECTION_NAME__.
	(EH_TABLES_CAN_BE_READ_ONLY): Change all uses to
	__LIBGCC_EH_TABLES_CAN_BE_READ_ONLY__.
	(CTORS_SECTION_ASM_OP): Change all uses to
	__LIBGCC_CTORS_SECTION_ASM_OP__.
	(DTORS_SECTION_ASM_OP): Change all uses to
	__LIBGCC_DTORS_SECTION_ASM_OP__.
	(JCR_SECTION_NAME): Change all uses to
	__LIBGCC_JCR_SECTION_NAME__.
	(INIT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_SECTION_ASM_OP__.
	(INIT_ARRAY_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__.
	* generic-morestack.c (STACK_GROWS_DOWNWARD): Change all uses to
	__LIBGCC_STACK_GROWS_DOWNWARD__.
	* libgcc2.c (INIT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_SECTION_ASM_OP__.
	(INIT_ARRAY_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__.
	(EH_FRAME_SECTION_NAME): Change all uses to
	__LIBGCC_EH_FRAME_SECTION_NAME__.
	* libgcov-profiler.c (VTABLE_USES_DESCRIPTORS): Remove conditional
	definitions.  Change all uses to
	__LIBGCC_VTABLE_USES_DESCRIPTORS__.
	* unwind-dw2.c (STACK_GROWS_DOWNWARD): Change all uses to
	__LIBGCC_STACK_GROWS_DOWNWARD__.
	(DWARF_FRAME_REGISTERS): Change all uses to
	__LIBGCC_DWARF_FRAME_REGISTERS__.
	(EH_RETURN_STACKADJ_RTX): Change all uses to
	__LIBGCC_EH_RETURN_STACKADJ_RTX__.
	* unwind-dw2.h (DWARF_FRAME_REGISTERS): Remove conditional
	definition.  Change use to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* unwind-sjlj.c (DONT_USE_BUILTIN_SETJMP): Change all uses to
	__LIBGCC_DONT_USE_BUILTIN_SETJMP__.
	(JMP_BUF_SIZE): Change use to __LIBGCC_JMP_BUF_SIZE__.

From-SVN: r214954

											
										
										
											2014-09-05 14:03:46 +02:00
+								#ifdef __LIBGCC_EH_FRAME_SECTION_NAME__
-												* libgcc2.c [L__main]: Include unwind-dw2-fde.h instead of frame.h.

From-SVN: r40957

											
										
										
											2001-03-29 23:11:23 +02:00
+								#include "unwind-dw2-fde.h"
-												defaults.h (EH_FRAME_SECTION, [...]): Define here.

	* defaults.h (EH_FRAME_SECTION, EH_FRAME_SECTION_ASM_OP): Define here.
	* crtstuff.c: Not here.
	* dwarf2out.c: Or here.
	* libgcc2.c (__do_global_ctors, __do_global_dtors): Handle EH frame
	info.

From-SVN: r29241

											
										
										
											1999-09-09 23:36:20 +02:00
+								extern unsigned char __EH_FRAME_BEGIN__[];
 								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								/* Run all the global destructors on exit from the program.  */
 								void
-												gthr-single.h (__gthread_active_p): Add prototype arguments.

	* gthr-single.h (__gthread_active_p): Add prototype arguments.

	* libgcc2.c (__udivmoddi4): Remove unnecessary decls.
	(__dummy, __builtin_saveregs, __bb_exit_trace_func, __bb_init_prg,
	__bb_trace_func, __bb_trace_func_ret, __bb_trace_ret,
	function_ptr, getpagesize, __enable_execute_stack,
	__enable_execute_stack, __clear_insn_cache,
	__enable_execute_stack, __do_global_dtors, __do_global_ctors,
	_cleanup, _exit, __default_terminate, __terminate_func,
	__terminate, __empty, __throw, new_eh_context,
	eh_context_initialize, eh_context_static, eh_context_specific,
	get_eh_context, __get_eh_context, __get_eh_info,
	init_reg_size_table, eh_threads_initialize,
	__get_dynamic_handler_chain, __sjthrow, __sjpopnthrow,
	__unwinding_cleanup, throw_helper, __throw, __rethrow,
	__pure_virtual): Add prototype arguments.
	(__bb_exit_func): Cast a sizeof to long when comparing against one.
	Cast a signed value to unsigned long when comparing against one.
	(new_eh_context): Wrap in _GTHREADS macro.
	(__sjthrow, __sjpopnthrow): Initialize variable `cleanup' at
	declaration.
	(in_reg_window): Mark parameters with __attribute__ ((__unused__)).
	(throw_helper): Initialize variables `handler_p' and `pc_p'.

From-SVN: r30872

											
										
										
											1999-12-12 16:34:09 +01:00
+								__do_global_dtors (void)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												*** empty log message ***

From-SVN: r309

											
										
										
											1992-02-12 18:47:31 +01:00
+								#ifdef DO_GLOBAL_DTORS_BODY
 								  DO_GLOBAL_DTORS_BODY;
 								#else
-												crtstuff.c (__do_global_dtors_aux): Allow finalization code to be run more than once.

        * crtstuff.c (__do_global_dtors_aux): Allow finalization code to
        be run more than once.
        * libgcc2.c (__do_global_dtors): Ditto.

From-SVN: r13023

											
										
										
											1996-10-24 03:23:43 +02:00
+								  static func_ptr *p = __DTOR_LIST__ + 1;
 								  while (*p)
 								    {
 								      p++;
 								      (*(p-1)) ();
 								    }
-												*** empty log message ***

From-SVN: r309

											
										
										
											1992-02-12 18:47:31 +01:00
+								#endif
-												Use -fbuilding-libgcc for more target macros used in libgcc.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Also define
	__LIBGCC_EH_TABLES_CAN_BE_READ_ONLY__,
	__LIBGCC_EH_FRAME_SECTION_NAME__, __LIBGCC_JCR_SECTION_NAME__,
	__LIBGCC_CTORS_SECTION_ASM_OP__, __LIBGCC_DTORS_SECTION_ASM_OP__,
	__LIBGCC_TEXT_SECTION_ASM_OP__, __LIBGCC_INIT_SECTION_ASM_OP__,
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__,
	__LIBGCC_STACK_GROWS_DOWNWARD__,
	__LIBGCC_DONT_USE_BUILTIN_SETJMP__,
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__,
	__LIBGCC_DWARF_FRAME_REGISTERS__,
	__LIBGCC_EH_RETURN_STACKADJ_RTX__, __LIBGCC_JMP_BUF_SIZE__,
	__LIBGCC_STACK_POINTER_REGNUM__ and
	__LIBGCC_VTABLE_USES_DESCRIPTORS__ for -fbuilding-libgcc.
	(builtin_define_with_value): Handle backslash-escaping in string
	macro values.

libgcc:
	* Makefile.in (CRTSTUFF_CFLAGS): Add -fbuilding-libgcc.
	* config/aarch64/linux-unwind.h (STACK_POINTER_REGNUM): Change all
	uses to __LIBGCC_STACK_POINTER_REGNUM__.
	(DWARF_ALT_FRAME_RETURN_COLUMN): Change all uses to
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/alpha/vms-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN):
	Change use to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/cr16/unwind-cr16.c (STACK_GROWS_DOWNWARD): Change all
	uses to __LIBGCC_STACK_GROWS_DOWNWARD__.
	(DWARF_FRAME_REGISTERS): Change all uses to
	__LIBGCC_DWARF_FRAME_REGISTERS__.
	(EH_RETURN_STACKADJ_RTX): Change all uses to
	__LIBGCC_EH_RETURN_STACKADJ_RTX__.
	* config/cr16/unwind-dw2.h (DWARF_FRAME_REGISTERS): Change use to
	__LIBGCC_DWARF_FRAME_REGISTERS__.  Remove conditional definition.
	* config/i386/cygming-crtbegin.c (EH_FRAME_SECTION_NAME): Change
	use to __LIBGCC_EH_FRAME_SECTION_NAME__.
	(JCR_SECTION_NAME): Change use to __LIBGCC_JCR_SECTION_NAME__.
	* config/i386/cygming-crtend.c (EH_FRAME_SECTION_NAME): Change use
	to __LIBGCC_EH_FRAME_SECTION_NAME__.
	(JCR_SECTION_NAME): Change use to __LIBGCC_JCR_SECTION_NAME__
	* config/mips/linux-unwind.h (STACK_POINTER_REGNUM): Change use to
	__LIBGCC_STACK_POINTER_REGNUM__.
	(DWARF_ALT_FRAME_RETURN_COLUMN): Change all uses to
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/nios2/linux-unwind.h (STACK_POINTER_REGNUM): Change use
	to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/pa/hpux-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN): Change
	all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/pa/linux-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN): Change
	all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/rs6000/aix-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN):
	Change all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	(STACK_POINTER_REGNUM): Change all uses to
	__LIBGCC_STACK_POINTER_REGNUM__.
	* config/rs6000/darwin-fallback.c (STACK_POINTER_REGNUM): Change
	use to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/rs6000/linux-unwind.h (STACK_POINTER_REGNUM): Change all
	uses to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/sparc/linux-unwind.h (DWARF_FRAME_REGISTERS): Change use
	to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* config/sparc/sol2-unwind.h (DWARF_FRAME_REGISTERS): Change use
	to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* config/tilepro/linux-unwind.h (STACK_POINTER_REGNUM): Change use
	to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/xtensa/unwind-dw2-xtensa.h (DWARF_FRAME_REGISTERS):
	Remove conditional definition.
	* crtstuff.c (TEXT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_TEXT_SECTION_ASM_OP__.
	(EH_FRAME_SECTION_NAME): Change all uses to
	__LIBGCC_EH_FRAME_SECTION_NAME__.
	(EH_TABLES_CAN_BE_READ_ONLY): Change all uses to
	__LIBGCC_EH_TABLES_CAN_BE_READ_ONLY__.
	(CTORS_SECTION_ASM_OP): Change all uses to
	__LIBGCC_CTORS_SECTION_ASM_OP__.
	(DTORS_SECTION_ASM_OP): Change all uses to
	__LIBGCC_DTORS_SECTION_ASM_OP__.
	(JCR_SECTION_NAME): Change all uses to
	__LIBGCC_JCR_SECTION_NAME__.
	(INIT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_SECTION_ASM_OP__.
	(INIT_ARRAY_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__.
	* generic-morestack.c (STACK_GROWS_DOWNWARD): Change all uses to
	__LIBGCC_STACK_GROWS_DOWNWARD__.
	* libgcc2.c (INIT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_SECTION_ASM_OP__.
	(INIT_ARRAY_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__.
	(EH_FRAME_SECTION_NAME): Change all uses to
	__LIBGCC_EH_FRAME_SECTION_NAME__.
	* libgcov-profiler.c (VTABLE_USES_DESCRIPTORS): Remove conditional
	definitions.  Change all uses to
	__LIBGCC_VTABLE_USES_DESCRIPTORS__.
	* unwind-dw2.c (STACK_GROWS_DOWNWARD): Change all uses to
	__LIBGCC_STACK_GROWS_DOWNWARD__.
	(DWARF_FRAME_REGISTERS): Change all uses to
	__LIBGCC_DWARF_FRAME_REGISTERS__.
	(EH_RETURN_STACKADJ_RTX): Change all uses to
	__LIBGCC_EH_RETURN_STACKADJ_RTX__.
	* unwind-dw2.h (DWARF_FRAME_REGISTERS): Remove conditional
	definition.  Change use to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* unwind-sjlj.c (DONT_USE_BUILTIN_SETJMP): Change all uses to
	__LIBGCC_DONT_USE_BUILTIN_SETJMP__.
	(JMP_BUF_SIZE): Change use to __LIBGCC_JMP_BUF_SIZE__.

From-SVN: r214954

											
										
										
											2014-09-05 14:03:46 +02:00
+								#if defined (__LIBGCC_EH_FRAME_SECTION_NAME__) && !defined (HAS_INIT_SECTION)
-												libgcc2.c (__do_global_dtors): Protect __deregister_frame_info from multiple calls.

	* libgcc2.c (__do_global_dtors): Protect __deregister_frame_info
	from multiple calls.

From-SVN: r29979

											
										
										
											1999-10-14 15:38:01 +02:00
+								  {
 								    static int completed = 0;
 								    if (! completed)
 								      {
 									completed = 1;
 									__deregister_frame_info (__EH_FRAME_BEGIN__);
 								      }
 								  }
-												defaults.h (EH_FRAME_SECTION, [...]): Define here.

	* defaults.h (EH_FRAME_SECTION, EH_FRAME_SECTION_ASM_OP): Define here.
	* crtstuff.c: Not here.
	* dwarf2out.c: Or here.
	* libgcc2.c (__do_global_ctors, __do_global_dtors): Handle EH frame
	info.

From-SVN: r29241

											
										
										
											1999-09-09 23:36:20 +02:00
+								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
-												Shared library support.

From-SVN: r8519

											
										
										
											1994-11-19 05:09:58 +01:00
+								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												irix6 ctors

From-SVN: r12971

											
										
										
											1996-10-16 22:25:25 +02:00
+								#ifndef HAS_INIT_SECTION
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								/* Run all the global constructors on entry to the program.  */
 								void
-												gthr-single.h (__gthread_active_p): Add prototype arguments.

	* gthr-single.h (__gthread_active_p): Add prototype arguments.

	* libgcc2.c (__udivmoddi4): Remove unnecessary decls.
	(__dummy, __builtin_saveregs, __bb_exit_trace_func, __bb_init_prg,
	__bb_trace_func, __bb_trace_func_ret, __bb_trace_ret,
	function_ptr, getpagesize, __enable_execute_stack,
	__enable_execute_stack, __clear_insn_cache,
	__enable_execute_stack, __do_global_dtors, __do_global_ctors,
	_cleanup, _exit, __default_terminate, __terminate_func,
	__terminate, __empty, __throw, new_eh_context,
	eh_context_initialize, eh_context_static, eh_context_specific,
	get_eh_context, __get_eh_context, __get_eh_info,
	init_reg_size_table, eh_threads_initialize,
	__get_dynamic_handler_chain, __sjthrow, __sjpopnthrow,
	__unwinding_cleanup, throw_helper, __throw, __rethrow,
	__pure_virtual): Add prototype arguments.
	(__bb_exit_func): Cast a sizeof to long when comparing against one.
	Cast a signed value to unsigned long when comparing against one.
	(new_eh_context): Wrap in _GTHREADS macro.
	(__sjthrow, __sjpopnthrow): Initialize variable `cleanup' at
	declaration.
	(in_reg_window): Mark parameters with __attribute__ ((__unused__)).
	(throw_helper): Initialize variables `handler_p' and `pc_p'.

From-SVN: r30872

											
										
										
											1999-12-12 16:34:09 +01:00
+								__do_global_ctors (void)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
-												Use -fbuilding-libgcc for more target macros used in libgcc.

gcc/c-family:
	* c-cppbuiltin.c (c_cpp_builtins): Also define
	__LIBGCC_EH_TABLES_CAN_BE_READ_ONLY__,
	__LIBGCC_EH_FRAME_SECTION_NAME__, __LIBGCC_JCR_SECTION_NAME__,
	__LIBGCC_CTORS_SECTION_ASM_OP__, __LIBGCC_DTORS_SECTION_ASM_OP__,
	__LIBGCC_TEXT_SECTION_ASM_OP__, __LIBGCC_INIT_SECTION_ASM_OP__,
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__,
	__LIBGCC_STACK_GROWS_DOWNWARD__,
	__LIBGCC_DONT_USE_BUILTIN_SETJMP__,
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__,
	__LIBGCC_DWARF_FRAME_REGISTERS__,
	__LIBGCC_EH_RETURN_STACKADJ_RTX__, __LIBGCC_JMP_BUF_SIZE__,
	__LIBGCC_STACK_POINTER_REGNUM__ and
	__LIBGCC_VTABLE_USES_DESCRIPTORS__ for -fbuilding-libgcc.
	(builtin_define_with_value): Handle backslash-escaping in string
	macro values.

libgcc:
	* Makefile.in (CRTSTUFF_CFLAGS): Add -fbuilding-libgcc.
	* config/aarch64/linux-unwind.h (STACK_POINTER_REGNUM): Change all
	uses to __LIBGCC_STACK_POINTER_REGNUM__.
	(DWARF_ALT_FRAME_RETURN_COLUMN): Change all uses to
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/alpha/vms-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN):
	Change use to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/cr16/unwind-cr16.c (STACK_GROWS_DOWNWARD): Change all
	uses to __LIBGCC_STACK_GROWS_DOWNWARD__.
	(DWARF_FRAME_REGISTERS): Change all uses to
	__LIBGCC_DWARF_FRAME_REGISTERS__.
	(EH_RETURN_STACKADJ_RTX): Change all uses to
	__LIBGCC_EH_RETURN_STACKADJ_RTX__.
	* config/cr16/unwind-dw2.h (DWARF_FRAME_REGISTERS): Change use to
	__LIBGCC_DWARF_FRAME_REGISTERS__.  Remove conditional definition.
	* config/i386/cygming-crtbegin.c (EH_FRAME_SECTION_NAME): Change
	use to __LIBGCC_EH_FRAME_SECTION_NAME__.
	(JCR_SECTION_NAME): Change use to __LIBGCC_JCR_SECTION_NAME__.
	* config/i386/cygming-crtend.c (EH_FRAME_SECTION_NAME): Change use
	to __LIBGCC_EH_FRAME_SECTION_NAME__.
	(JCR_SECTION_NAME): Change use to __LIBGCC_JCR_SECTION_NAME__
	* config/mips/linux-unwind.h (STACK_POINTER_REGNUM): Change use to
	__LIBGCC_STACK_POINTER_REGNUM__.
	(DWARF_ALT_FRAME_RETURN_COLUMN): Change all uses to
	__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/nios2/linux-unwind.h (STACK_POINTER_REGNUM): Change use
	to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/pa/hpux-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN): Change
	all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/pa/linux-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN): Change
	all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	* config/rs6000/aix-unwind.h (DWARF_ALT_FRAME_RETURN_COLUMN):
	Change all uses to __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__.
	(STACK_POINTER_REGNUM): Change all uses to
	__LIBGCC_STACK_POINTER_REGNUM__.
	* config/rs6000/darwin-fallback.c (STACK_POINTER_REGNUM): Change
	use to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/rs6000/linux-unwind.h (STACK_POINTER_REGNUM): Change all
	uses to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/sparc/linux-unwind.h (DWARF_FRAME_REGISTERS): Change use
	to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* config/sparc/sol2-unwind.h (DWARF_FRAME_REGISTERS): Change use
	to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* config/tilepro/linux-unwind.h (STACK_POINTER_REGNUM): Change use
	to __LIBGCC_STACK_POINTER_REGNUM__.
	* config/xtensa/unwind-dw2-xtensa.h (DWARF_FRAME_REGISTERS):
	Remove conditional definition.
	* crtstuff.c (TEXT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_TEXT_SECTION_ASM_OP__.
	(EH_FRAME_SECTION_NAME): Change all uses to
	__LIBGCC_EH_FRAME_SECTION_NAME__.
	(EH_TABLES_CAN_BE_READ_ONLY): Change all uses to
	__LIBGCC_EH_TABLES_CAN_BE_READ_ONLY__.
	(CTORS_SECTION_ASM_OP): Change all uses to
	__LIBGCC_CTORS_SECTION_ASM_OP__.
	(DTORS_SECTION_ASM_OP): Change all uses to
	__LIBGCC_DTORS_SECTION_ASM_OP__.
	(JCR_SECTION_NAME): Change all uses to
	__LIBGCC_JCR_SECTION_NAME__.
	(INIT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_SECTION_ASM_OP__.
	(INIT_ARRAY_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__.
	* generic-morestack.c (STACK_GROWS_DOWNWARD): Change all uses to
	__LIBGCC_STACK_GROWS_DOWNWARD__.
	* libgcc2.c (INIT_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_SECTION_ASM_OP__.
	(INIT_ARRAY_SECTION_ASM_OP): Change all uses to
	__LIBGCC_INIT_ARRAY_SECTION_ASM_OP__.
	(EH_FRAME_SECTION_NAME): Change all uses to
	__LIBGCC_EH_FRAME_SECTION_NAME__.
	* libgcov-profiler.c (VTABLE_USES_DESCRIPTORS): Remove conditional
	definitions.  Change all uses to
	__LIBGCC_VTABLE_USES_DESCRIPTORS__.
	* unwind-dw2.c (STACK_GROWS_DOWNWARD): Change all uses to
	__LIBGCC_STACK_GROWS_DOWNWARD__.
	(DWARF_FRAME_REGISTERS): Change all uses to
	__LIBGCC_DWARF_FRAME_REGISTERS__.
	(EH_RETURN_STACKADJ_RTX): Change all uses to
	__LIBGCC_EH_RETURN_STACKADJ_RTX__.
	* unwind-dw2.h (DWARF_FRAME_REGISTERS): Remove conditional
	definition.  Change use to __LIBGCC_DWARF_FRAME_REGISTERS__.
	* unwind-sjlj.c (DONT_USE_BUILTIN_SETJMP): Change all uses to
	__LIBGCC_DONT_USE_BUILTIN_SETJMP__.
	(JMP_BUF_SIZE): Change use to __LIBGCC_JMP_BUF_SIZE__.

From-SVN: r214954

											
										
										
											2014-09-05 14:03:46 +02:00
+								#ifdef __LIBGCC_EH_FRAME_SECTION_NAME__
-												defaults.h (EH_FRAME_SECTION, [...]): Define here.

	* defaults.h (EH_FRAME_SECTION, EH_FRAME_SECTION_ASM_OP): Define here.
	* crtstuff.c: Not here.
	* dwarf2out.c: Or here.
	* libgcc2.c (__do_global_ctors, __do_global_dtors): Handle EH frame
	info.

From-SVN: r29241

											
										
										
											1999-09-09 23:36:20 +02:00
+								  {
 								    static struct object object;
 								    __register_frame_info (__EH_FRAME_BEGIN__, &object);
 								  }
 								#endif
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								  DO_GLOBAL_CTORS_BODY;
-												libgcc2.c (__do_global_ctors): Call atexit with one arg.

	* libgcc2.c (__do_global_ctors): Call atexit with one arg.

	* sparc/sunos4.h (on_exit): Wrap prototype parameters in PARAMS().
	Define HAVE_ON_EXIT.

From-SVN: r29465

											
										
										
											1999-09-17 00:51:47 +02:00
+								  atexit (__do_global_dtors);
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								}
-												irix6 ctors

From-SVN: r12971

											
										
										
											1996-10-16 22:25:25 +02:00
+								#endif /* no HAS_INIT_SECTION */
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												irix6 ctors

From-SVN: r12971

											
										
										
											1996-10-16 22:25:25 +02:00
+								#if !defined (HAS_INIT_SECTION) || defined (INVOKE__main)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								/* Subroutine called automatically by `main'.
 								   Compiling a global function named `main'
 								   produces an automatic call to this function at the beginning.
 								   For many systems, this routine calls __do_global_ctors.
 								   For systems which support a .init section we use the .init section
 								   to run __do_global_ctors, so we need not do anything here.  */
-												* libgcc2.c (SYMBOL__MAIN): Provide C90 declaration.

From-SVN: r73191

											
										
										
											2003-11-02 01:16:33 +01:00
+								extern void SYMBOL__MAIN (void);
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								void
-												* libgcc2.c (SYMBOL__MAIN): Provide C90 declaration.

From-SVN: r73191

											
										
										
											2003-11-02 01:16:33 +01:00
+								SYMBOL__MAIN (void)
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								{
 								  /* Support recursive calls to `main': run initializers just once.  */
-												Remove explicit 0-initializations of static variables.

From-SVN: r9269

											
										
										
											1995-03-31 01:51:30 +02:00
+								  static int initialized;
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
+								  if (! initialized)
 								    {
 								      initialized = 1;
 								      __do_global_ctors ();
 								    }
 								}
-												irix6 ctors

From-SVN: r12971

											
										
										
											1996-10-16 22:25:25 +02:00
+								#endif /* no HAS_INIT_SECTION or INVOKE__main */
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								#endif /* L__main */
-												Jumbo patch from Geoff Noer to rename CYGWIN32 to CYGWIN.

From-SVN: r23622

											
										
										
											1998-11-12 20:37:47 +01:00
+								#endif /* __CYGWIN__ */
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
-												(exit): Declare _exit as volatile.

(__CTOR_LIST__, __DTOR_LIST__): Moved to separate member.

From-SVN: r2158

											
										
										
											1992-09-19 06:47:11 +02:00
+								#ifdef L_ctors
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								#include "gbl-ctors.h"
 								/* Provide default definitions for the lists of constructors and
-												Makefile.in (libgcc2): Pass MAYBE_USE_COLLECT2 as an argument.

        * Makefile.in (libgcc2): Pass MAYBE_USE_COLLECT2 as an argument.
        * libgcc2.c (__CTOR_LIST, __DTOR_LIST); Do not provide
        initializers is some circumstances.

From-SVN: r27489

											
										
										
											1999-06-11 05:12:22 +02:00
+								   destructors, so that we don't get linker errors.  These symbols are
 								   intentionally bss symbols, so that gld and/or collect will provide
 								   the right values.  */
-												Initial revision

From-SVN: r243

											
										
										
											1992-01-28 04:44:05 +01:00
 								/* We declare the lists here with two elements each,
-												Makefile.in (libgcc2): Pass MAYBE_USE_COLLECT2 as an argument.

        * Makefile.in (libgcc2): Pass MAYBE_USE_COLLECT2 as an argument.
        * libgcc2.c (__CTOR_LIST, __DTOR_LIST); Do not provide
        initializers is some circumstances.

From-SVN: r27489

											
										
										
											1999-06-11 05:12:22 +02:00
+								   so that they are valid empty lists if no other definition is loaded.
 								   If we are using the old "set" extensions to have the gnu linker
 								   collect ctors and dtors, then we __CTOR_LIST__ and __DTOR_LIST__
 								   must be in the bss/common section.
 								   Long term no port should use those extensions.  But many still do.  */
-												remove unused CTOR_LISTS_DEFINED_EXTERNALLY macro

The last target to use this was i386-interix, so since that is gone we
don't need this anymore.

libgcc/ChangeLog:

2016-07-06  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	* libgcc2.c (SYMBOL__MAIN): Remove checks for
	CTOR_LISTS_DEFINED_EXTERNALLY.

From-SVN: r238067

											
										
										
											2016-07-07 01:55:52 +02:00
+								#if !defined(__LIBGCC_INIT_SECTION_ASM_OP__)
-												cppmacro.c (_cpp_backup_tokens): Pop cur_run before decrementing cur_token, not after.

        * cppmacro.c (_cpp_backup_tokens): Pop cur_run before decrementing
        cur_token, not after.

        * config/clipper/clix.h, config/h8300/elf.h, config/mips/linux.h:
        Lose ASM_OUTPUT_CONSTRUCTOR.
        * system.h: Poison it and INT_ASM_OP.
        * c-lang.c (finish_file): Don't check for it.
        * libgcc2.c (L_ctors): Check TARGET_ASM_CONSTRUCTOR instead.

From-SVN: r47169

											
										
										
											2001-11-19 12:28:52 +01:00
+								#if defined (TARGET_ASM_CONSTRUCTOR) || defined (USE_COLLECT2)
-												[L_ctors] (__CTOR_LIST__, __DTOR_LIST__):

Initialize these default definitions if __NeXT__.

From-SVN: r2472

											
										
										
											1992-10-15 09:25:16 +01:00
+								func_ptr __CTOR_LIST__[2] = {0, 0};
 								func_ptr __DTOR_LIST__[2] = {0, 0};
-												Makefile.in (libgcc2): Pass MAYBE_USE_COLLECT2 as an argument.

        * Makefile.in (libgcc2): Pass MAYBE_USE_COLLECT2 as an argument.
        * libgcc2.c (__CTOR_LIST, __DTOR_LIST); Do not provide
        initializers is some circumstances.

From-SVN: r27489

											
										
										
											1999-06-11 05:12:22 +02:00
+								#else
 								func_ptr __CTOR_LIST__[2];
 								func_ptr __DTOR_LIST__[2];
 								#endif
-												remove unused CTOR_LISTS_DEFINED_EXTERNALLY macro

The last target to use this was i386-interix, so since that is gone we
don't need this anymore.

libgcc/ChangeLog:

2016-07-06  Trevor Saunders  <tbsaunde+gcc@tbsaunde.org>

	* libgcc2.c (SYMBOL__MAIN): Remove checks for
	CTOR_LISTS_DEFINED_EXTERNALLY.

From-SVN: r238067

											
										
										
											2016-07-07 01:55:52 +02:00
+								#endif /* no __LIBGCC_INIT_SECTION_ASM_OP__ */
-												(exit): Declare _exit as volatile.

(__CTOR_LIST__, __DTOR_LIST__): Moved to separate member.

From-SVN: r2158

											
										
										
											1992-09-19 06:47:11 +02:00
+								#endif /* L_ctors */
-												re PR target/22209 (libgfortran unresolvable symbols on irix6.5)

	* libgcc2.c (MIN_UNITS_PER_WORD): Move default definition from
	libgcc2.h.
	(LIBGCC2_UNITS_PER_WORD): Provide default definition, using old
	MIN_UNITS_PER_WORD logic from libgcc2.h.  Do nothing if
	LIBGCC2_UNITS_PER_WORD > MIN_UNITS_PER_WORD.
	* libgcc2.h (MIN_UNITS_PER_WORD): Remove definition from here.
	Use LIBGCC2_UNITS_PER_WORD rather than MIN_UNITS_PER_WORD to
	determine the size of Wtype, etc.
	* mklibgcc.in (LIB2_SIDITI_CONV_FUNCS): New argument.
	(swfloatfuncs): New variable.
	(dwfloatfuncs): Likewise.
	(lib2funcs): Remove floating-point conversion functions from
	initial assignment.  Use LIB2_SIDITI_CONV_FUNCS to determine
	the set of conversion routines needed.  Allow entries to specify
	an object name, filename and word size.  Update users accordingly.
	* Makefile.in (libgcc.mk): Pass LIB2_SIDITI_CONV_FUNCS.
	* config/mips/t-mips (LIB2_SIDITI_CONV_FUNCS): Define.

	Revert:

	2006-02-08  Roger Sayle  <roger@eyesopen.com>

	PR target/22209
	* config/fixtfdi.c: New libgcc source file.
	* config/fixunstfdi.c: New source file.
	* config/floatditf.c: New source file.
	* config/floatunditf.c: New souce file.
	* config/mips/t-iris6 (LIB2FUNCS_EXTRA): Include the new source
	files above instead of config/mips/_tilib.c.
	* config/mips/t-linux64 (LIB2FUNCS_EXTRA): Likewise.

From-SVN: r113903

											
										
										
											2006-05-19 10:05:39 +02:00
+								#endif /* LIBGCC2_UNITS_PER_WORD <= MIN_UNITS_PER_WORD */