diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog index 6bf52bbc7a9..5f29a04980e 100644 --- a/libgfortran/ChangeLog +++ b/libgfortran/ChangeLog @@ -1,3 +1,10 @@ +2004-09-03 Richard Henderson + + * intrinsics/cshift0.c (DEF_COPY_LOOP, copy_loop_int, copy_loop_long, + copy_loop_double, copy_loop_ldouble): New. + (__cshift0): Make shift type ssize_t. Use % operator instead of + div. Use specialized versions of copy loop depending on the shape. + 2004-09-02 Paul Brook * io/format.c (parse_format_list): Set repeat count for P descriptors. diff --git a/libgfortran/intrinsics/cshift0.c b/libgfortran/intrinsics/cshift0.c index dd96ddf173c..a988827a826 100644 --- a/libgfortran/intrinsics/cshift0.c +++ b/libgfortran/intrinsics/cshift0.c @@ -25,25 +25,61 @@ Boston, MA 02111-1307, USA. */ #include #include "libgfortran.h" -/* TODO: make this work for large shifts when - sizeof(int) < sizeof (index_type). */ + +/* "Templatized" helper function for the inner shift loop. */ + +#define DEF_COPY_LOOP(NAME, TYPE) \ +static inline void \ +copy_loop_##NAME (void *xdest, const void *xsrc, \ + size_t roff, size_t soff, \ + index_type len, index_type shift) \ +{ \ + TYPE *dest = xdest; \ + const TYPE *src; \ + index_type i; \ + \ + roff /= sizeof (TYPE); \ + soff /= sizeof (TYPE); \ + \ + src = xsrc; \ + src += shift * soff; \ + for (i = 0; i < len - shift; ++i) \ + { \ + *dest = *src; \ + dest += roff; \ + src += soff; \ + } \ + \ + src = xsrc; \ + for (i = 0; i < shift; ++i) \ + { \ + *dest = *src; \ + dest += roff; \ + src += soff; \ + } \ +} + +DEF_COPY_LOOP(int, int) +DEF_COPY_LOOP(long, long) +DEF_COPY_LOOP(double, double) +DEF_COPY_LOOP(ldouble, long double) + static void __cshift0 (gfc_array_char * ret, const gfc_array_char * array, - int shift, int which) + ssize_t shift, int which) { /* r.* indicates the return array. */ index_type rstride[GFC_MAX_DIMENSIONS - 1]; index_type rstride0; index_type roffset; char *rptr; - char *dest; + /* s.* indicates the source array. */ index_type sstride[GFC_MAX_DIMENSIONS - 1]; index_type sstride0; index_type soffset; const char *sptr; - const char *src; index_type count[GFC_MAX_DIMENSIONS - 1]; index_type extent[GFC_MAX_DIMENSIONS - 1]; @@ -64,7 +100,7 @@ __cshift0 (gfc_array_char * ret, const gfc_array_char * array, size = GFC_DESCRIPTOR_SIZE (array); n = 0; -/* Initialized for avoiding compiler warnings. */ + /* Initialized for avoiding compiler warnings. */ roffset = size; soffset = size; len = 0; @@ -120,24 +156,55 @@ __cshift0 (gfc_array_char * ret, const gfc_array_char * array, rptr = ret->data; sptr = array->data; - shift = (div (shift, len)).rem; + shift = shift % (ssize_t)len; if (shift < 0) shift += len; while (rptr) { /* Do the shift for this dimension. */ - src = &sptr[shift * soffset]; - dest = rptr; - for (n = 0; n < len; n++) - { - memcpy (dest, src, size); - dest += roffset; - if (n == len - shift - 1) - src = sptr; - else - src += soffset; - } + + /* If elements are contiguous, perform the operation + in two block moves. */ + if (soffset == size && roffset == size) + { + size_t len1 = shift * size; + size_t len2 = (len - shift) * size; + memcpy (rptr, sptr + len1, len2); + memcpy (rptr + len2, sptr, len1); + } + else + { + /* Otherwise, we'll have to perform the copy one element at + a time. We can speed this up a tad for common cases of + fundamental types. */ + if (size == sizeof(int)) + copy_loop_int (rptr, sptr, roffset, soffset, len, shift); + else if (size == sizeof(long)) + copy_loop_long (rptr, sptr, roffset, soffset, len, shift); + else if (size == sizeof(double)) + copy_loop_double (rptr, sptr, roffset, soffset, len, shift); + else if (size == sizeof(long double)) + copy_loop_ldouble (rptr, sptr, roffset, soffset, len, shift); + else + { + char *dest = rptr; + const char *src = &sptr[shift * soffset]; + + for (n = 0; n < len - shift; n++) + { + memcpy (dest, src, size); + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < shift; n++) + { + memcpy (dest, src, size); + dest += roffset; + src += soffset; + } + } + } /* Advance to the next section. */ rptr += rstride0; @@ -185,4 +252,3 @@ __cshift0_8 (gfc_array_char * ret, const gfc_array_char * array, { __cshift0 (ret, array, *pshift, pdim ? *pdim : 1); } -