gcc/libgfortran/m4/iforeach.m4
Thomas Koenig 28dc6b33c4 re PR libfortran/32972 (performance of pack/unpack)
2007-08-24  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/32972
	* iresolve.c:  Don't convert array masks.

2007-08-24  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/32972
	* libgfortran.h:  Remove GFOR_POINTER_L8_TO_L4 macro.
	Add GFOR_POINTER_TO_L1 macro.
	* m4/iforeach.m4(`m'name`'rtype_qual`_'atype_code):
	Change argument 'mask' to gfc_array_l1.  Adjust prototype.
	Change mask pointer to GFC_LOGICAL_1.  Multiply strides
	by kind of mask
	* m4/ifunction.m4:  Likewise.
	* intrinsics/pack_generic.c(pack_internal):  Likewise.
	* intrinsics/unpack_generic.c(unpack_internal):  Likewise.
	* m4/matmull.m4:  Switch to GFC_LOGICAL_1.  Multiply strides by
	kind of logical arguments a and b.
	* generated/matmul_l16.c: Regenerated.
	* generated/matmul_l4.c: Regenerated.
	* generated/matmul_l8.c: Regenerated.
	* generated/maxloc0_16_i1.c: Regenerated.
	* generated/maxloc0_16_i16.c: Regenerated.
	* generated/maxloc0_16_i2.c: Regenerated.
	* generated/maxloc0_16_i4.c: Regenerated.
	* generated/maxloc0_16_i8.c: Regenerated.
	* generated/maxloc0_16_r10.c: Regenerated.
	* generated/maxloc0_16_r16.c: Regenerated.
	* generated/maxloc0_16_r4.c: Regenerated.
	* generated/maxloc0_16_r8.c: Regenerated.
	* generated/maxloc0_4_i1.c: Regenerated.
	* generated/maxloc0_4_i16.c: Regenerated.
	* generated/maxloc0_4_i2.c: Regenerated.
	* generated/maxloc0_4_i4.c: Regenerated.
	* generated/maxloc0_4_i8.c: Regenerated.
	* generated/maxloc0_4_r10.c: Regenerated.
	* generated/maxloc0_4_r16.c: Regenerated.
	* generated/maxloc0_4_r4.c: Regenerated.
	* generated/maxloc0_4_r8.c: Regenerated.
	* generated/maxloc0_8_i1.c: Regenerated.
	* generated/maxloc0_8_i16.c: Regenerated.
	* generated/maxloc0_8_i2.c: Regenerated.
	* generated/maxloc0_8_i4.c: Regenerated.
	* generated/maxloc0_8_i8.c: Regenerated.
	* generated/maxloc0_8_r10.c: Regenerated.
	* generated/maxloc0_8_r16.c: Regenerated.
	* generated/maxloc0_8_r4.c: Regenerated.
	* generated/maxloc0_8_r8.c: Regenerated.
	* generated/maxloc1_16_i1.c: Regenerated.
	* generated/maxloc1_16_i16.c: Regenerated.
	* generated/maxloc1_16_i2.c: Regenerated.
	* generated/maxloc1_16_i4.c: Regenerated.
	* generated/maxloc1_16_i8.c: Regenerated.
	* generated/maxloc1_16_r10.c: Regenerated.
	* generated/maxloc1_16_r16.c: Regenerated.
	* generated/maxloc1_16_r4.c: Regenerated.
	* generated/maxloc1_16_r8.c: Regenerated.
	* generated/maxloc1_4_i1.c: Regenerated.
	* generated/maxloc1_4_i16.c: Regenerated.
	* generated/maxloc1_4_i2.c: Regenerated.
	* generated/maxloc1_4_i4.c: Regenerated.
	* generated/maxloc1_4_i8.c: Regenerated.
	* generated/maxloc1_4_r10.c: Regenerated.
	* generated/maxloc1_4_r16.c: Regenerated.
	* generated/maxloc1_4_r4.c: Regenerated.
	* generated/maxloc1_4_r8.c: Regenerated.
	* generated/maxloc1_8_i1.c: Regenerated.
	* generated/maxloc1_8_i16.c: Regenerated.
	* generated/maxloc1_8_i2.c: Regenerated.
	* generated/maxloc1_8_i4.c: Regenerated.
	* generated/maxloc1_8_i8.c: Regenerated.
	* generated/maxloc1_8_r10.c: Regenerated.
	* generated/maxloc1_8_r16.c: Regenerated.
	* generated/maxloc1_8_r4.c: Regenerated.
	* generated/maxloc1_8_r8.c: Regenerated.
	* generated/maxval_i1.c: Regenerated.
	* generated/maxval_i16.c: Regenerated.
	* generated/maxval_i2.c: Regenerated.
	* generated/maxval_i4.c: Regenerated.
	* generated/maxval_i8.c: Regenerated.
	* generated/maxval_r10.c: Regenerated.
	* generated/maxval_r16.c: Regenerated.
	* generated/maxval_r4.c: Regenerated.
	* generated/maxval_r8.c: Regenerated.
	* generated/minloc0_16_i1.c: Regenerated.
	* generated/minloc0_16_i16.c: Regenerated.
	* generated/minloc0_16_i2.c: Regenerated.
	* generated/minloc0_16_i4.c: Regenerated.
	* generated/minloc0_16_i8.c: Regenerated.
	* generated/minloc0_16_r10.c: Regenerated.
	* generated/minloc0_16_r16.c: Regenerated.
	* generated/minloc0_16_r4.c: Regenerated.
	* generated/minloc0_16_r8.c: Regenerated.
	* generated/minloc0_4_i1.c: Regenerated.
	* generated/minloc0_4_i16.c: Regenerated.
	* generated/minloc0_4_i2.c: Regenerated.
	* generated/minloc0_4_i4.c: Regenerated.
	* generated/minloc0_4_i8.c: Regenerated.
	* generated/minloc0_4_r10.c: Regenerated.
	* generated/minloc0_4_r16.c: Regenerated.
	* generated/minloc0_4_r4.c: Regenerated.
	* generated/minloc0_4_r8.c: Regenerated.
	* generated/minloc0_8_i1.c: Regenerated.
	* generated/minloc0_8_i16.c: Regenerated.
	* generated/minloc0_8_i2.c: Regenerated.
	* generated/minloc0_8_i4.c: Regenerated.
	* generated/minloc0_8_i8.c: Regenerated.
	* generated/minloc0_8_r10.c: Regenerated.
	* generated/minloc0_8_r16.c: Regenerated.
	* generated/minloc0_8_r4.c: Regenerated.
	* generated/minloc0_8_r8.c: Regenerated.
	* generated/minloc1_16_i1.c: Regenerated.
	* generated/minloc1_16_i16.c: Regenerated.
	* generated/minloc1_16_i2.c: Regenerated.
	* generated/minloc1_16_i4.c: Regenerated.
	* generated/minloc1_16_i8.c: Regenerated.
	* generated/minloc1_16_r10.c: Regenerated.
	* generated/minloc1_16_r16.c: Regenerated.
	* generated/minloc1_16_r4.c: Regenerated.
	* generated/minloc1_16_r8.c: Regenerated.
	* generated/minloc1_4_i1.c: Regenerated.
	* generated/minloc1_4_i16.c: Regenerated.
	* generated/minloc1_4_i2.c: Regenerated.
	* generated/minloc1_4_i4.c: Regenerated.
	* generated/minloc1_4_i8.c: Regenerated.
	* generated/minloc1_4_r10.c: Regenerated.
	* generated/minloc1_4_r16.c: Regenerated.
	* generated/minloc1_4_r4.c: Regenerated.
	* generated/minloc1_4_r8.c: Regenerated.
	* generated/minloc1_8_i1.c: Regenerated.
	* generated/minloc1_8_i16.c: Regenerated.
	* generated/minloc1_8_i2.c: Regenerated.
	* generated/minloc1_8_i4.c: Regenerated.
	* generated/minloc1_8_i8.c: Regenerated.
	* generated/minloc1_8_r10.c: Regenerated.
	* generated/minloc1_8_r16.c: Regenerated.
	* generated/minloc1_8_r4.c: Regenerated.
	* generated/minloc1_8_r8.c: Regenerated.
	* generated/minval_i1.c: Regenerated.
	* generated/minval_i16.c: Regenerated.
	* generated/minval_i2.c: Regenerated.
	* generated/minval_i4.c: Regenerated.
	* generated/minval_i8.c: Regenerated.
	* generated/minval_r10.c: Regenerated.
	* generated/minval_r16.c: Regenerated.
	* generated/minval_r4.c: Regenerated.
	* generated/minval_r8.c: Regenerated.
	* generated/product_c10.c: Regenerated.
	* generated/product_c16.c: Regenerated.
	* generated/product_c4.c: Regenerated.
	* generated/product_c8.c: Regenerated.
	* generated/product_i1.c: Regenerated.
	* generated/product_i16.c: Regenerated.
	* generated/product_i2.c: Regenerated.
	* generated/product_i4.c: Regenerated.
	* generated/product_i8.c: Regenerated.
	* generated/product_r10.c: Regenerated.
	* generated/product_r16.c: Regenerated.
	* generated/product_r4.c: Regenerated.
	* generated/product_r8.c: Regenerated.
	* generated/sum_c10.c: Regenerated.
	* generated/sum_c16.c: Regenerated.
	* generated/sum_c4.c: Regenerated.
	* generated/sum_c8.c: Regenerated.
	* generated/sum_i1.c: Regenerated.
	* generated/sum_i16.c: Regenerated.
	* generated/sum_i2.c: Regenerated.
	* generated/sum_i4.c: Regenerated.
	* generated/sum_i8.c: Regenerated.
	* generated/sum_r10.c: Regenerated.
	* generated/sum_r16.c: Regenerated.
	* generated/sum_r4.c: Regenerated.
	* generated/sum_r8.c: Regenerated.

From-SVN: r127774
2007-08-24 16:16:16 +00:00

285 lines
7.5 KiB
Plaintext

dnl Support macro file for intrinsic functions.
dnl Contains the generic sections of the array functions.
dnl This file is part of the GNU Fortran 95 Runtime Library (libgfortran)
dnl Distributed under the GNU GPL with exception. See COPYING for details.
define(START_FOREACH_FUNCTION,
`
extern void name`'rtype_qual`_'atype_code (rtype * const restrict retarray,
atype * const restrict array);
export_proto(name`'rtype_qual`_'atype_code);
void
name`'rtype_qual`_'atype_code (rtype * const restrict retarray,
atype * const restrict array)
{
index_type count[GFC_MAX_DIMENSIONS];
index_type extent[GFC_MAX_DIMENSIONS];
index_type sstride[GFC_MAX_DIMENSIONS];
index_type dstride;
const atype_name *base;
rtype_name *dest;
index_type rank;
index_type n;
rank = GFC_DESCRIPTOR_RANK (array);
if (rank <= 0)
runtime_error ("Rank of array needs to be > 0");
if (retarray->data == NULL)
{
retarray->dim[0].lbound = 0;
retarray->dim[0].ubound = rank-1;
retarray->dim[0].stride = 1;
retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1;
retarray->offset = 0;
retarray->data = internal_malloc_size (sizeof (rtype_name) * rank);
}
else
{
if (GFC_DESCRIPTOR_RANK (retarray) != 1)
runtime_error ("rank of return array does not equal 1");
if (retarray->dim[0].ubound + 1 - retarray->dim[0].lbound != rank)
runtime_error ("dimension of return array incorrect");
}
dstride = retarray->dim[0].stride;
dest = retarray->data;
for (n = 0; n < rank; n++)
{
sstride[n] = array->dim[n].stride;
extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound;
count[n] = 0;
if (extent[n] <= 0)
{
/* Set the return value. */
for (n = 0; n < rank; n++)
dest[n * dstride] = 0;
return;
}
}
base = array->data;
/* Initialize the return value. */
for (n = 0; n < rank; n++)
dest[n * dstride] = 0;
{
')dnl
define(START_FOREACH_BLOCK,
` while (base)
{
{
/* Implementation start. */
')dnl
define(FINISH_FOREACH_FUNCTION,
` /* Implementation end. */
}
/* Advance to the next element. */
count[0]++;
base += sstride[0];
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it and increment
the next dimension. */
count[n] = 0;
/* We could precalculate these products, but this is a less
frequently used path so probably not worth it. */
base -= sstride[n] * extent[n];
n++;
if (n == rank)
{
/* Break out of the loop. */
base = NULL;
break;
}
else
{
count[n]++;
base += sstride[n];
}
}
}
}
}')dnl
define(START_MASKED_FOREACH_FUNCTION,
`
extern void `m'name`'rtype_qual`_'atype_code (rtype * const restrict,
atype * const restrict, gfc_array_l1 * const restrict);
export_proto(`m'name`'rtype_qual`_'atype_code);
void
`m'name`'rtype_qual`_'atype_code (rtype * const restrict retarray,
atype * const restrict array,
gfc_array_l1 * const restrict mask)
{
index_type count[GFC_MAX_DIMENSIONS];
index_type extent[GFC_MAX_DIMENSIONS];
index_type sstride[GFC_MAX_DIMENSIONS];
index_type mstride[GFC_MAX_DIMENSIONS];
index_type dstride;
rtype_name *dest;
const atype_name *base;
GFC_LOGICAL_1 *mbase;
int rank;
index_type n;
int mask_kind;
rank = GFC_DESCRIPTOR_RANK (array);
if (rank <= 0)
runtime_error ("Rank of array needs to be > 0");
if (retarray->data == NULL)
{
retarray->dim[0].lbound = 0;
retarray->dim[0].ubound = rank-1;
retarray->dim[0].stride = 1;
retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1;
retarray->offset = 0;
retarray->data = internal_malloc_size (sizeof (rtype_name) * rank);
}
else
{
if (GFC_DESCRIPTOR_RANK (retarray) != 1)
runtime_error ("rank of return array does not equal 1");
if (retarray->dim[0].ubound + 1 - retarray->dim[0].lbound != rank)
runtime_error ("dimension of return array incorrect");
}
mask_kind = GFC_DESCRIPTOR_SIZE (mask);
mbase = mask->data;
if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8
#ifdef HAVE_GFC_LOGICAL_16
|| mask_kind == 16
#endif
)
mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind);
else
runtime_error ("Funny sized logical array");
dstride = retarray->dim[0].stride;
dest = retarray->data;
for (n = 0; n < rank; n++)
{
sstride[n] = array->dim[n].stride;
mstride[n] = mask->dim[n].stride * mask_kind;
extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound;
count[n] = 0;
if (extent[n] <= 0)
{
/* Set the return value. */
for (n = 0; n < rank; n++)
dest[n * dstride] = 0;
return;
}
}
base = array->data;
/* Initialize the return value. */
for (n = 0; n < rank; n++)
dest[n * dstride] = 0;
{
')dnl
define(START_MASKED_FOREACH_BLOCK, `START_FOREACH_BLOCK')dnl
define(FINISH_MASKED_FOREACH_FUNCTION,
` /* Implementation end. */
}
/* Advance to the next element. */
count[0]++;
base += sstride[0];
mbase += mstride[0];
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it and increment
the next dimension. */
count[n] = 0;
/* We could precalculate these products, but this is a less
frequently used path so probably not worth it. */
base -= sstride[n] * extent[n];
mbase -= mstride[n] * extent[n];
n++;
if (n == rank)
{
/* Break out of the loop. */
base = NULL;
break;
}
else
{
count[n]++;
base += sstride[n];
mbase += mstride[n];
}
}
}
}
}')dnl
define(FOREACH_FUNCTION,
`START_FOREACH_FUNCTION
$1
START_FOREACH_BLOCK
$2
FINISH_FOREACH_FUNCTION')dnl
define(MASKED_FOREACH_FUNCTION,
`START_MASKED_FOREACH_FUNCTION
$1
START_MASKED_FOREACH_BLOCK
$2
FINISH_MASKED_FOREACH_FUNCTION')dnl
define(SCALAR_FOREACH_FUNCTION,
`
extern void `s'name`'rtype_qual`_'atype_code (rtype * const restrict,
atype * const restrict, GFC_LOGICAL_4 *);
export_proto(`s'name`'rtype_qual`_'atype_code);
void
`s'name`'rtype_qual`_'atype_code (rtype * const restrict retarray,
atype * const restrict array,
GFC_LOGICAL_4 * mask)
{
index_type rank;
index_type dstride;
index_type n;
rtype_name *dest;
if (*mask)
{
name`'rtype_qual`_'atype_code (retarray, array);
return;
}
rank = GFC_DESCRIPTOR_RANK (array);
if (rank <= 0)
runtime_error ("Rank of array needs to be > 0");
if (retarray->data == NULL)
{
retarray->dim[0].lbound = 0;
retarray->dim[0].ubound = rank-1;
retarray->dim[0].stride = 1;
retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1;
retarray->offset = 0;
retarray->data = internal_malloc_size (sizeof (rtype_name) * rank);
}
else
{
if (GFC_DESCRIPTOR_RANK (retarray) != 1)
runtime_error ("rank of return array does not equal 1");
if (retarray->dim[0].ubound + 1 - retarray->dim[0].lbound != rank)
runtime_error ("dimension of return array incorrect");
}
dstride = retarray->dim[0].stride;
dest = retarray->data;
for (n = 0; n<rank; n++)
dest[n * dstride] = $1 ;
}')dnl