gcc/libgfortran/generated/maxloc0_16_r8.c
Thomas Koenig 28dc6b33c4 re PR libfortran/32972 (performance of pack/unpack)
2007-08-24  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/32972
	* iresolve.c:  Don't convert array masks.

2007-08-24  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/32972
	* libgfortran.h:  Remove GFOR_POINTER_L8_TO_L4 macro.
	Add GFOR_POINTER_TO_L1 macro.
	* m4/iforeach.m4(`m'name`'rtype_qual`_'atype_code):
	Change argument 'mask' to gfc_array_l1.  Adjust prototype.
	Change mask pointer to GFC_LOGICAL_1.  Multiply strides
	by kind of mask
	* m4/ifunction.m4:  Likewise.
	* intrinsics/pack_generic.c(pack_internal):  Likewise.
	* intrinsics/unpack_generic.c(unpack_internal):  Likewise.
	* m4/matmull.m4:  Switch to GFC_LOGICAL_1.  Multiply strides by
	kind of logical arguments a and b.
	* generated/matmul_l16.c: Regenerated.
	* generated/matmul_l4.c: Regenerated.
	* generated/matmul_l8.c: Regenerated.
	* generated/maxloc0_16_i1.c: Regenerated.
	* generated/maxloc0_16_i16.c: Regenerated.
	* generated/maxloc0_16_i2.c: Regenerated.
	* generated/maxloc0_16_i4.c: Regenerated.
	* generated/maxloc0_16_i8.c: Regenerated.
	* generated/maxloc0_16_r10.c: Regenerated.
	* generated/maxloc0_16_r16.c: Regenerated.
	* generated/maxloc0_16_r4.c: Regenerated.
	* generated/maxloc0_16_r8.c: Regenerated.
	* generated/maxloc0_4_i1.c: Regenerated.
	* generated/maxloc0_4_i16.c: Regenerated.
	* generated/maxloc0_4_i2.c: Regenerated.
	* generated/maxloc0_4_i4.c: Regenerated.
	* generated/maxloc0_4_i8.c: Regenerated.
	* generated/maxloc0_4_r10.c: Regenerated.
	* generated/maxloc0_4_r16.c: Regenerated.
	* generated/maxloc0_4_r4.c: Regenerated.
	* generated/maxloc0_4_r8.c: Regenerated.
	* generated/maxloc0_8_i1.c: Regenerated.
	* generated/maxloc0_8_i16.c: Regenerated.
	* generated/maxloc0_8_i2.c: Regenerated.
	* generated/maxloc0_8_i4.c: Regenerated.
	* generated/maxloc0_8_i8.c: Regenerated.
	* generated/maxloc0_8_r10.c: Regenerated.
	* generated/maxloc0_8_r16.c: Regenerated.
	* generated/maxloc0_8_r4.c: Regenerated.
	* generated/maxloc0_8_r8.c: Regenerated.
	* generated/maxloc1_16_i1.c: Regenerated.
	* generated/maxloc1_16_i16.c: Regenerated.
	* generated/maxloc1_16_i2.c: Regenerated.
	* generated/maxloc1_16_i4.c: Regenerated.
	* generated/maxloc1_16_i8.c: Regenerated.
	* generated/maxloc1_16_r10.c: Regenerated.
	* generated/maxloc1_16_r16.c: Regenerated.
	* generated/maxloc1_16_r4.c: Regenerated.
	* generated/maxloc1_16_r8.c: Regenerated.
	* generated/maxloc1_4_i1.c: Regenerated.
	* generated/maxloc1_4_i16.c: Regenerated.
	* generated/maxloc1_4_i2.c: Regenerated.
	* generated/maxloc1_4_i4.c: Regenerated.
	* generated/maxloc1_4_i8.c: Regenerated.
	* generated/maxloc1_4_r10.c: Regenerated.
	* generated/maxloc1_4_r16.c: Regenerated.
	* generated/maxloc1_4_r4.c: Regenerated.
	* generated/maxloc1_4_r8.c: Regenerated.
	* generated/maxloc1_8_i1.c: Regenerated.
	* generated/maxloc1_8_i16.c: Regenerated.
	* generated/maxloc1_8_i2.c: Regenerated.
	* generated/maxloc1_8_i4.c: Regenerated.
	* generated/maxloc1_8_i8.c: Regenerated.
	* generated/maxloc1_8_r10.c: Regenerated.
	* generated/maxloc1_8_r16.c: Regenerated.
	* generated/maxloc1_8_r4.c: Regenerated.
	* generated/maxloc1_8_r8.c: Regenerated.
	* generated/maxval_i1.c: Regenerated.
	* generated/maxval_i16.c: Regenerated.
	* generated/maxval_i2.c: Regenerated.
	* generated/maxval_i4.c: Regenerated.
	* generated/maxval_i8.c: Regenerated.
	* generated/maxval_r10.c: Regenerated.
	* generated/maxval_r16.c: Regenerated.
	* generated/maxval_r4.c: Regenerated.
	* generated/maxval_r8.c: Regenerated.
	* generated/minloc0_16_i1.c: Regenerated.
	* generated/minloc0_16_i16.c: Regenerated.
	* generated/minloc0_16_i2.c: Regenerated.
	* generated/minloc0_16_i4.c: Regenerated.
	* generated/minloc0_16_i8.c: Regenerated.
	* generated/minloc0_16_r10.c: Regenerated.
	* generated/minloc0_16_r16.c: Regenerated.
	* generated/minloc0_16_r4.c: Regenerated.
	* generated/minloc0_16_r8.c: Regenerated.
	* generated/minloc0_4_i1.c: Regenerated.
	* generated/minloc0_4_i16.c: Regenerated.
	* generated/minloc0_4_i2.c: Regenerated.
	* generated/minloc0_4_i4.c: Regenerated.
	* generated/minloc0_4_i8.c: Regenerated.
	* generated/minloc0_4_r10.c: Regenerated.
	* generated/minloc0_4_r16.c: Regenerated.
	* generated/minloc0_4_r4.c: Regenerated.
	* generated/minloc0_4_r8.c: Regenerated.
	* generated/minloc0_8_i1.c: Regenerated.
	* generated/minloc0_8_i16.c: Regenerated.
	* generated/minloc0_8_i2.c: Regenerated.
	* generated/minloc0_8_i4.c: Regenerated.
	* generated/minloc0_8_i8.c: Regenerated.
	* generated/minloc0_8_r10.c: Regenerated.
	* generated/minloc0_8_r16.c: Regenerated.
	* generated/minloc0_8_r4.c: Regenerated.
	* generated/minloc0_8_r8.c: Regenerated.
	* generated/minloc1_16_i1.c: Regenerated.
	* generated/minloc1_16_i16.c: Regenerated.
	* generated/minloc1_16_i2.c: Regenerated.
	* generated/minloc1_16_i4.c: Regenerated.
	* generated/minloc1_16_i8.c: Regenerated.
	* generated/minloc1_16_r10.c: Regenerated.
	* generated/minloc1_16_r16.c: Regenerated.
	* generated/minloc1_16_r4.c: Regenerated.
	* generated/minloc1_16_r8.c: Regenerated.
	* generated/minloc1_4_i1.c: Regenerated.
	* generated/minloc1_4_i16.c: Regenerated.
	* generated/minloc1_4_i2.c: Regenerated.
	* generated/minloc1_4_i4.c: Regenerated.
	* generated/minloc1_4_i8.c: Regenerated.
	* generated/minloc1_4_r10.c: Regenerated.
	* generated/minloc1_4_r16.c: Regenerated.
	* generated/minloc1_4_r4.c: Regenerated.
	* generated/minloc1_4_r8.c: Regenerated.
	* generated/minloc1_8_i1.c: Regenerated.
	* generated/minloc1_8_i16.c: Regenerated.
	* generated/minloc1_8_i2.c: Regenerated.
	* generated/minloc1_8_i4.c: Regenerated.
	* generated/minloc1_8_i8.c: Regenerated.
	* generated/minloc1_8_r10.c: Regenerated.
	* generated/minloc1_8_r16.c: Regenerated.
	* generated/minloc1_8_r4.c: Regenerated.
	* generated/minloc1_8_r8.c: Regenerated.
	* generated/minval_i1.c: Regenerated.
	* generated/minval_i16.c: Regenerated.
	* generated/minval_i2.c: Regenerated.
	* generated/minval_i4.c: Regenerated.
	* generated/minval_i8.c: Regenerated.
	* generated/minval_r10.c: Regenerated.
	* generated/minval_r16.c: Regenerated.
	* generated/minval_r4.c: Regenerated.
	* generated/minval_r8.c: Regenerated.
	* generated/product_c10.c: Regenerated.
	* generated/product_c16.c: Regenerated.
	* generated/product_c4.c: Regenerated.
	* generated/product_c8.c: Regenerated.
	* generated/product_i1.c: Regenerated.
	* generated/product_i16.c: Regenerated.
	* generated/product_i2.c: Regenerated.
	* generated/product_i4.c: Regenerated.
	* generated/product_i8.c: Regenerated.
	* generated/product_r10.c: Regenerated.
	* generated/product_r16.c: Regenerated.
	* generated/product_r4.c: Regenerated.
	* generated/product_r8.c: Regenerated.
	* generated/sum_c10.c: Regenerated.
	* generated/sum_c16.c: Regenerated.
	* generated/sum_c4.c: Regenerated.
	* generated/sum_c8.c: Regenerated.
	* generated/sum_i1.c: Regenerated.
	* generated/sum_i16.c: Regenerated.
	* generated/sum_i2.c: Regenerated.
	* generated/sum_i4.c: Regenerated.
	* generated/sum_i8.c: Regenerated.
	* generated/sum_r10.c: Regenerated.
	* generated/sum_r16.c: Regenerated.
	* generated/sum_r4.c: Regenerated.
	* generated/sum_r8.c: Regenerated.

From-SVN: r127774
2007-08-24 16:16:16 +00:00

329 lines
8.6 KiB
C

/* Implementation of the MAXLOC intrinsic
Copyright 2002 Free Software Foundation, Inc.
Contributed by Paul Brook <paul@nowt.org>
This file is part of the GNU Fortran 95 runtime library (libgfortran).
Libgfortran is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file. (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)
Libgfortran is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public
License along with libgfortran; see the file COPYING. If not,
write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
#include "config.h"
#include <stdlib.h>
#include <assert.h>
#include <limits.h>
#include "libgfortran.h"
#if defined (HAVE_GFC_REAL_8) && defined (HAVE_GFC_INTEGER_16)
extern void maxloc0_16_r8 (gfc_array_i16 * const restrict retarray,
gfc_array_r8 * const restrict array);
export_proto(maxloc0_16_r8);
void
maxloc0_16_r8 (gfc_array_i16 * const restrict retarray,
gfc_array_r8 * const restrict array)
{
index_type count[GFC_MAX_DIMENSIONS];
index_type extent[GFC_MAX_DIMENSIONS];
index_type sstride[GFC_MAX_DIMENSIONS];
index_type dstride;
const GFC_REAL_8 *base;
GFC_INTEGER_16 *dest;
index_type rank;
index_type n;
rank = GFC_DESCRIPTOR_RANK (array);
if (rank <= 0)
runtime_error ("Rank of array needs to be > 0");
if (retarray->data == NULL)
{
retarray->dim[0].lbound = 0;
retarray->dim[0].ubound = rank-1;
retarray->dim[0].stride = 1;
retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1;
retarray->offset = 0;
retarray->data = internal_malloc_size (sizeof (GFC_INTEGER_16) * rank);
}
else
{
if (GFC_DESCRIPTOR_RANK (retarray) != 1)
runtime_error ("rank of return array does not equal 1");
if (retarray->dim[0].ubound + 1 - retarray->dim[0].lbound != rank)
runtime_error ("dimension of return array incorrect");
}
dstride = retarray->dim[0].stride;
dest = retarray->data;
for (n = 0; n < rank; n++)
{
sstride[n] = array->dim[n].stride;
extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound;
count[n] = 0;
if (extent[n] <= 0)
{
/* Set the return value. */
for (n = 0; n < rank; n++)
dest[n * dstride] = 0;
return;
}
}
base = array->data;
/* Initialize the return value. */
for (n = 0; n < rank; n++)
dest[n * dstride] = 0;
{
GFC_REAL_8 maxval;
maxval = -GFC_REAL_8_HUGE;
while (base)
{
{
/* Implementation start. */
if (*base > maxval || !dest[0])
{
maxval = *base;
for (n = 0; n < rank; n++)
dest[n * dstride] = count[n] + 1;
}
/* Implementation end. */
}
/* Advance to the next element. */
count[0]++;
base += sstride[0];
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it and increment
the next dimension. */
count[n] = 0;
/* We could precalculate these products, but this is a less
frequently used path so probably not worth it. */
base -= sstride[n] * extent[n];
n++;
if (n == rank)
{
/* Break out of the loop. */
base = NULL;
break;
}
else
{
count[n]++;
base += sstride[n];
}
}
}
}
}
extern void mmaxloc0_16_r8 (gfc_array_i16 * const restrict,
gfc_array_r8 * const restrict, gfc_array_l1 * const restrict);
export_proto(mmaxloc0_16_r8);
void
mmaxloc0_16_r8 (gfc_array_i16 * const restrict retarray,
gfc_array_r8 * const restrict array,
gfc_array_l1 * const restrict mask)
{
index_type count[GFC_MAX_DIMENSIONS];
index_type extent[GFC_MAX_DIMENSIONS];
index_type sstride[GFC_MAX_DIMENSIONS];
index_type mstride[GFC_MAX_DIMENSIONS];
index_type dstride;
GFC_INTEGER_16 *dest;
const GFC_REAL_8 *base;
GFC_LOGICAL_1 *mbase;
int rank;
index_type n;
int mask_kind;
rank = GFC_DESCRIPTOR_RANK (array);
if (rank <= 0)
runtime_error ("Rank of array needs to be > 0");
if (retarray->data == NULL)
{
retarray->dim[0].lbound = 0;
retarray->dim[0].ubound = rank-1;
retarray->dim[0].stride = 1;
retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1;
retarray->offset = 0;
retarray->data = internal_malloc_size (sizeof (GFC_INTEGER_16) * rank);
}
else
{
if (GFC_DESCRIPTOR_RANK (retarray) != 1)
runtime_error ("rank of return array does not equal 1");
if (retarray->dim[0].ubound + 1 - retarray->dim[0].lbound != rank)
runtime_error ("dimension of return array incorrect");
}
mask_kind = GFC_DESCRIPTOR_SIZE (mask);
mbase = mask->data;
if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8
#ifdef HAVE_GFC_LOGICAL_16
|| mask_kind == 16
#endif
)
mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind);
else
runtime_error ("Funny sized logical array");
dstride = retarray->dim[0].stride;
dest = retarray->data;
for (n = 0; n < rank; n++)
{
sstride[n] = array->dim[n].stride;
mstride[n] = mask->dim[n].stride * mask_kind;
extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound;
count[n] = 0;
if (extent[n] <= 0)
{
/* Set the return value. */
for (n = 0; n < rank; n++)
dest[n * dstride] = 0;
return;
}
}
base = array->data;
/* Initialize the return value. */
for (n = 0; n < rank; n++)
dest[n * dstride] = 0;
{
GFC_REAL_8 maxval;
maxval = -GFC_REAL_8_HUGE;
while (base)
{
{
/* Implementation start. */
if (*mbase && (*base > maxval || !dest[0]))
{
maxval = *base;
for (n = 0; n < rank; n++)
dest[n * dstride] = count[n] + 1;
}
/* Implementation end. */
}
/* Advance to the next element. */
count[0]++;
base += sstride[0];
mbase += mstride[0];
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it and increment
the next dimension. */
count[n] = 0;
/* We could precalculate these products, but this is a less
frequently used path so probably not worth it. */
base -= sstride[n] * extent[n];
mbase -= mstride[n] * extent[n];
n++;
if (n == rank)
{
/* Break out of the loop. */
base = NULL;
break;
}
else
{
count[n]++;
base += sstride[n];
mbase += mstride[n];
}
}
}
}
}
extern void smaxloc0_16_r8 (gfc_array_i16 * const restrict,
gfc_array_r8 * const restrict, GFC_LOGICAL_4 *);
export_proto(smaxloc0_16_r8);
void
smaxloc0_16_r8 (gfc_array_i16 * const restrict retarray,
gfc_array_r8 * const restrict array,
GFC_LOGICAL_4 * mask)
{
index_type rank;
index_type dstride;
index_type n;
GFC_INTEGER_16 *dest;
if (*mask)
{
maxloc0_16_r8 (retarray, array);
return;
}
rank = GFC_DESCRIPTOR_RANK (array);
if (rank <= 0)
runtime_error ("Rank of array needs to be > 0");
if (retarray->data == NULL)
{
retarray->dim[0].lbound = 0;
retarray->dim[0].ubound = rank-1;
retarray->dim[0].stride = 1;
retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1;
retarray->offset = 0;
retarray->data = internal_malloc_size (sizeof (GFC_INTEGER_16) * rank);
}
else
{
if (GFC_DESCRIPTOR_RANK (retarray) != 1)
runtime_error ("rank of return array does not equal 1");
if (retarray->dim[0].ubound + 1 - retarray->dim[0].lbound != rank)
runtime_error ("dimension of return array incorrect");
}
dstride = retarray->dim[0].stride;
dest = retarray->data;
for (n = 0; n<rank; n++)
dest[n * dstride] = 0 ;
}
#endif