glibc/sysdeps/e2k/memmove.c

843 lines
25 KiB
C
Raw Normal View History

2022-08-11 20:23:08 +02:00
/* Copyright (c) 2016-2018 ZAO "MCST". All rights reserved. */
/* Copy memory to memory until the specified number of bytes
has been copied. Overlap is handled correctly.
Copyright (C) 1991, 1995, 1996, 1997, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Torbjorn Granlund (tege@sics.se).
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <string.h>
#include <e2kintrin.h>
/* All this is so that bcopy.c can #include
this file after defining some things. */
#ifndef a1
#define a1 dst /* First arg is DST. */
#define a1const
#define a2 src /* Second arg is SRC. */
#define a2const const
#undef memmove
#endif
#if !defined(RETURN) || !defined(rettype)
#define RETURN(s) return (s) /* Return DST. */
#define rettype void *
#endif
#define restrict __restrict__
/* ************************************************************************** */
#if defined (__elbrus_8c__) || __iset__ > 4
#define WC_LIMIT SIZE_L3 /* <20> <20>1 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> L3 <20><><EFBFBD> */
#else /* defined (__elbrus_8c__) || __iset__ > 4 */
#define WC_LIMIT SIZE_L2
#endif /* defined (__elbrus_8c__) || __iset__ > 4 */
/* ************************************************************************** */
#if __iset__ <= 4
#define MAKE(i) __builtin_e2k_pshufb (bl[-(i)], bl[-(i) - 1], shift)
#define PREPARE shift = __builtin_e2k_psubd (0x0f0e0d0c0b0a0908LL, __builtin_e2k_pshufb (shift, shift, 0))
#define BIGSIZE 0x10000000
#else /* __iset__ <= 4 */
#define MAKE(i) __builtin_e2k_qppermb (qsp[-(i)], qsp[-(i) - 1], qshift)
#define PREPARE E2K_PREPARE_ALIGN128 (shift, qshift)
#define BIGSIZE 0x8000000
#endif /* __iset__ <= 4 */
/* ************************************************************************** */
rettype
memmove (a1, a2, len)
a1const void *a1;
a2const void *a2;
size_t len;
{
unsigned char * restrict ac = (unsigned char *) dst;
unsigned char * restrict bc = (unsigned char *) src;
long i, diff;
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> dst < src, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
if (((unsigned long int)ac - (unsigned long int)bc) >= len) /* unsigned compare! */
{
#if __iset__ <= 4
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
* <EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 8. */
if (len < 24)
{
if (len > 0) ac[0] = bc[0];
#pragma loop count(12)
for (i = 1; i < len; i++)
{
ac[i] = bc[i];
}
RETURN (dst);
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> */
diff = (- (size_t) ac) & 7;
len -= diff;
if (diff & 1) {
*ac++ = *bc++;
}
if (diff & 2) {
((short *) ac)[0] = (bc[1] << 8) | bc[0];
bc += 2;
ac += 2;
}
if (diff & 4) {
((short *) ac)[0] = (bc[1] << 8) | bc[0];
((short *) ac)[1] = (bc[3] << 8) | bc[2];
bc += 4;
ac += 4;
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
{
unsigned long long * restrict al = (unsigned long long *) ac;
unsigned long long * restrict bl = (unsigned long long *) ((long) bc & ~7);
unsigned long long shift = (unsigned long long) (unsigned long) bc & 7;
long ll = len >> 3;
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
ac += len & ~7;
bc += len & ~7;
if (len < 64 * 8) { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> apb */
if (shift) { /* <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> src */
E2K_PREPARE_ALIGN (shift, shift);
#pragma unroll (1) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (8)
for (i = 0; i < ll; i++)
{
E2K_ALIGN_DATA (bl[i], bl[i + 1], al[i], shift);
}
}
else { /* <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
#pragma unroll (1) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (8)
for (i = 0; i < ll; i++)
{
al[i] = bl[i];
}
}
}
else if (ll < WC_LIMIT / 8) { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> L2/L3 <20><><EFBFBD> */
if (shift) { /* <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> src */
E2K_PREPARE_ALIGN (shift, shift);
#pragma unroll (2) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (i = 0; i < ll; i++)
{
E2K_ALIGN_DATA (bl[i], bl[i + 1], al[i], shift);
}
}
else { /* <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
#pragma unroll (2) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (i = 0; i < ll; i++)
{
al[i] = bl[i];
}
}
}
else { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
if (shift) { /* <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> src */
E2K_PREPARE_ALIGN (shift, shift);
#ifdef __ptr64__
int ii, isize;
#pragma loop count (1)
for (i = 0; i < ll; i += BIGSIZE) {
isize = (ll - i) > BIGSIZE ? BIGSIZE : ll - i;
#pragma unroll (2) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (ii = 0; ii < isize; ii++)
{
unsigned long long res;
E2K_ALIGN_DATA (bl[ii], bl[ii + 1], res, shift);
__builtin_e2k_st_64s_nt (res, al + ii);
}
al += isize;
bl += isize;
}
#else /* __ptr64__ */
#pragma unroll (2) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (i = 0; i < ll; i++)
{
unsigned long long res;
E2K_ALIGN_DATA (bl[i], bl[i + 1], res, shift);
__builtin_e2k_st_64s_nt (res, al + i);
}
#endif /* __ptr64__ */
}
else { /* <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
#ifdef __ptr64__
int ii, isize;
#pragma loop count (1)
for (i = 0; i < ll; i += BIGSIZE) {
isize = (ll - i) > BIGSIZE ? BIGSIZE : ll - i;
#pragma unroll (2) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (ii = 0; ii < isize; ii++)
{
__builtin_e2k_st_64s_nt (bl[ii], al + ii);
}
al += isize;
bl += isize;
}
#else /* __ptr64__ */
#pragma unroll (2) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (i = 0; i < ll; i++)
{
__builtin_e2k_st_64s_nt (bl[i], al + i);
}
#endif /* __ptr64__ */
}
}
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> */
if (len & 4) {
((short *) ac)[0] = (bc[1] << 8) | bc[0];
((short *) ac)[1] = (bc[3] << 8) | bc[2];
bc += 4;
ac += 4;
}
if (len & 2) {
((short *) ac)[0] = (bc[1] << 8) | bc[0];
bc += 2;
ac += 2;
}
if (len & 1) {
ac[0] = bc[0];
}
#elif __iset__ <= 5
__v2di * restrict qsp, qsrc0, qsrc1, qsrc, qshift;
__v2di * restrict qdp = (__v2di *) ((size_t) ac & ~15);
size_t len1;
long shift, ll;
diff = (size_t) ac & 15;
int mask = 0xffff << diff;
len1 = len - (16 - diff);
shift = ((size_t) bc - diff) & 15;
qsp = (__v2di *) (((size_t) bc - diff) & ~15);
E2K_PREPARE_ALIGN128 (shift, qshift);
qsrc0 = __builtin_e2k_ld_128_cleartag (qsp, 0);
qsrc1 = __builtin_e2k_ld_128_cleartag (qsp, 16);
E2K_ALIGN_DATA128 (qsrc0, qsrc1, qsrc, qshift);
if (len <= (16 - diff))
mask &= 0xffff >> (-len1);
__builtin_e2k_pst_128 (qsrc, qdp, mask);
if (len <= (16 - diff))
RETURN (dst);
diff = (-len1) & 15; /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> */
mask = 0xffff >> diff;
ll = (len1 - (16 - diff)) >> 4;
qsp++;
qdp++;
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
if (ll <= 16) { /* <20><> 256 <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> apb */
#pragma unroll (1) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (8)
#pragma noprefetch
for (i = 0; i < ll; i++) {
E2K_ALIGN_DATA128 (qsp[i], qsp[i + 1], qdp[i], qshift);
}
} else {
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 qp, <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 32 <20><><EFBFBD><EFBFBD> */
i = 0;
if (((unsigned long) qsp & 31) == 0) {
E2K_ALIGN_DATA128 (qsp[0], qsp[1], qdp[0], qshift);
i = 1;
}
if (ll < WC_LIMIT / 16) { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> L3 <20><><EFBFBD> */
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (; i < ll; i++) {
E2K_ALIGN_DATA128 (qsp[i], qsp[i + 1], qdp[i], qshift);
}
} else { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
#ifdef __ptr64__
int ii, isize;
qsp += i;
qdp += i;
ll -= i;
#pragma loop count (1)
for (i = 0; i < ll; i += BIGSIZE) {
isize = (ll - i) > BIGSIZE ? BIGSIZE : ll - i;
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (ii = 0; ii < isize; ii++)
{
E2K_ALIGN_DATA128 (qsp[ii], qsp[ii + 1], qsrc, qshift);
__builtin_e2k_st_128_nt (qsrc, qdp + ii);
}
qsp += isize;
qdp += isize;
}
ll = 0;
#else /* __ptr64__ */
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (; i < ll; i++) {
E2K_ALIGN_DATA128 (qsp[i], qsp[i + 1], qsrc, qshift);
__builtin_e2k_st_128_nt (qsrc, qdp + i);
}
#endif /* __ptr64__ */
}
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> */
qsrc0 = qsp[ll];
qsrc1 = __builtin_e2k_ld_128_cleartag (qsp + ll + 1, 0);
E2K_ALIGN_DATA128 (qsrc0, qsrc1, qsrc, qshift);
__builtin_e2k_pst_128 (qsrc, qdp + ll, mask);
#else /* __iset__ > 5 */
__v2di * restrict qdp = (__v2di *) ac;
__v2di * restrict qsp = (__v2di *) bc;
__v2di qsrc;
long ll;
if (len <= 16) {
if (__builtin_expect (E2K_BYTES_FROM_ALIGN (qsp, 4096) > 4080, 0)) { /* closely to page border */
/* Offsets 4081-4095 will be shifted back from the array last byte thus fit into page */
qsrc = ((__v2di *) ((const char *) qsp - (16 - len)))[0];
__builtin_pstoremas_128v (qsrc, (char *) qdp - (16 - len), 0xffff << (16 - len), __LCC_MAS_SPEC, __LCC_CHAN_ANY);
}
else {
__builtin_pstoremas_128v (qsp[0], qdp, 0xffff >> (16 - len), __LCC_MAS_SPEC, __LCC_CHAN_ANY);
}
RETURN (dst);
}
diff = (-len) & 15; /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> diff <20><><EFBFBD><EFBFBD> (<28><> 1 <20><> 16) */
ll = (len - (16 - diff)) >> 4;
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
if (ll <= 16) { /* <20><> 256 <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> apb */
#pragma unroll (1) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (8)
#pragma noprefetch
for (i = 0; i < ll; i++) {
qdp[i] = qsp[i];
}
} else if (ll < WC_LIMIT / 16) { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> L3 <20><><EFBFBD> */
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (i = 0; i < ll; i++) {
qdp[i] = qsp[i];
}
} else { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
#ifdef __ptr64__
int ii, isize;
#pragma loop count (1)
for (i = 0; i < ll; i += BIGSIZE) {
isize = (ll - i) > BIGSIZE ? BIGSIZE : ll - i;
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (ii = 0; ii < isize; ii++)
{
__builtin_e2k_st_128_nt (qsp[ii], qdp + ii);
}
qsp += isize;
qdp += isize;
}
ll = 0;
#else /* __ptr64__ */
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (i = 0; i < ll; i++) {
__builtin_e2k_st_128_nt (qsp[i], qdp + i);
}
#endif /* __ptr64__ */
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> */
qsrc = ((__v2di *) ((const char *) (qsp + ll) - diff))[0];
__builtin_e2k_pst_128 (qsrc, (char *) (qdp + ll) - diff, 0xffff << diff);
#endif /* __iset__ > 5 */
RETURN (dst);
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
{
ac += len;
bc += len;
#if __iset__ <= 4
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
* <EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD> <EFBFBD><EFBFBD> <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 8. */
if (len < 24)
{
if (len > 0) ac[-1] = bc[-1];
#pragma loop count (12)
for (i = 1; i < len; i++)
{
ac[-i - 1] = bc[-i - 1];
}
RETURN (dst);
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> */
diff = (size_t) ac & 7;
len -= diff;
if (diff & 1) {
*--ac = *--bc;
}
if (diff & 2) {
bc -= 2;
ac -= 2;
((short *) ac)[0] = (bc[1] << 8) | bc[0];
}
if (diff & 4) {
bc -= 4;
ac -= 4;
((short *) ac)[1] = (bc[3] << 8) | bc[2];
((short *) ac)[0] = (bc[1] << 8) | bc[0];
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
{
unsigned long long * restrict al = (unsigned long long *) ac;
unsigned long long * restrict bl = (unsigned long long *) (((long) bc + 7) & ~7);
unsigned long long shift = -(unsigned long long) (unsigned long) bc & 7;
long ll = len >> 3;
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
ac -= len & ~7;
bc -= len & ~7;
i = 1;
if (len < 64 * 8) { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> apb */
if (shift) { /* <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> src */
PREPARE;
/* #68547 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 <20><><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 32 <20><><EFBFBD><EFBFBD> */
if (((unsigned long) bl & 0x18) == 0x10) {
al[- 1] = MAKE (1);
i = 2;
}
#pragma unroll (1) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (8)
for (; i <= ll; i++)
{
al[-i] = MAKE (i);
}
}
else { /* <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
/* #68547 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 <20><><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 32 <20><><EFBFBD><EFBFBD> */
if (((unsigned long) bl & 0x18) == 0x8) {
al[-1] = bl[-1];
i = 2;
}
#pragma unroll (1) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (8)
for (; i <= ll; i++)
{
al[-i] = bl[-i];
}
}
}
else { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> */
if (shift) { /* <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> src */
PREPARE;
/* #68547 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 <20><><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 32 <20><><EFBFBD><EFBFBD> */
if (((unsigned long) bl & 0x18) == 0x10) {
al[- 1] = MAKE (1);
i = 2;
}
#ifdef __ptr64__
int ii, isize;
ll -= i - 1;
al -= i;
bl -= i;
#pragma loop count (1)
for (i = 0; i < ll; i += BIGSIZE) {
isize = (ll - i) > BIGSIZE ? BIGSIZE : ll - i;
#pragma unroll (2) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (ii = 0; ii < isize; ii++)
{
al[-ii] = MAKE (ii);
}
al -= isize;
bl -= isize;
}
#else /* __ptr64__ */
#pragma unroll (2) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (; i <= ll; i++)
{
al[-i] = MAKE (i);
}
#endif /* __ptr64__ */
}
else { /* <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
/* #68547 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 <20><><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 32 <20><><EFBFBD><EFBFBD> */
if (((unsigned long) bl & 0x18) == 0x8) {
al[-1] = bl[-1];
i = 2;
}
#ifdef __ptr64__
int ii, isize;
ll -= i - 1;
al -= i;
bl -= i;
#pragma loop count (1)
for (i = 0; i < ll; i += BIGSIZE) {
isize = (ll - i) > BIGSIZE ? BIGSIZE : ll - i;
#pragma unroll (2) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (ii = 0; ii < isize; ii++)
{
al[-ii] = bl[-ii];
}
al -= isize;
bl -= isize;
}
#else /* __ptr64__ */
#pragma unroll (2) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (; i <= ll; i++)
{
al[-i] = bl[-i];
}
#endif /* __ptr64__ */
}
}
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> */
if (len & 4) {
bc -= 4;
ac -= 4;
((short *) ac)[1] = (bc[3] << 8) | bc[2];
((short *) ac)[0] = (bc[1] << 8) | bc[0];
}
if (len & 2) {
bc -= 2;
ac -= 2;
((short *) ac)[0] = (bc[1] << 8) | bc[0];
}
if (len & 1) {
ac[-1] = bc[-1];
}
#elif __iset__ <= 5
__v2di * restrict qsp, qsrc0, qsrc1, qsrc, qshift;
__v2di * restrict qdp = (__v2di *) ((size_t) ac & ~15);
size_t len1;
long shift, ll;
diff = (size_t) ac & 15;
if (diff == 0) {
diff = 16;
qdp--;
}
shift = ((size_t) bc - diff) & 15;
qsp = (__v2di *) (((size_t) bc - diff) & ~15);
int mask = 0xffff >> (16 - diff);
len1 = len - diff;
PREPARE;
qsrc0 = __builtin_e2k_ld_128_cleartag (qsp, 16);
qsrc1 = __builtin_e2k_ld_128_cleartag (qsp, 0);
E2K_ALIGN_DATA128 (qsrc1, qsrc0, qsrc, qshift);
if (len <= diff)
mask &= 0xffff << (-len1);
__builtin_e2k_pst_128 (qsrc, qdp, mask);
if (len <= diff)
RETURN (dst);
diff = (-len1) & 15; /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> */
mask = 0xffff0000 >> (16 - diff);
ll = (len1 - (16 - diff)) >> 4;
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
if (shift == 0) { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> src */
if (ll <= 16) { /* <20><> 256 <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> apb */
#pragma unroll (1) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (8)
#pragma noprefetch
for (i = 0; i < ll; i++) {
qdp[-i - 1] = qsp[-i - 1];
}
} else { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> */
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 qp, <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 32 <20><><EFBFBD><EFBFBD> */
i = 0;
if ((unsigned long) qsp & 31) {
qdp[- 1] = qsp[- 1];
i = 1;
}
#ifdef __ptr64__
int ii, isize;
qsp -= i;
qdp -= i;
ll -= i;
#pragma loop count (1)
for (i = 0; i < ll; i += BIGSIZE) {
isize = (ll - i) > BIGSIZE ? BIGSIZE : ll - i;
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (ii = 0; ii < isize; ii++)
{
qdp[-ii - 1] = qsp[-ii - 1];
}
qsp -= isize;
qdp -= isize;
}
i = 0;
#else /* __ptr64__ */
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (; i < ll; i++) {
qdp[-i - 1] = qsp[-i - 1];
}
#endif /* __ptr64__ */
}
__builtin_e2k_pst_128 (qsp[-i - 1], qdp - i - 1, mask);
}
else { /* <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> src */
if (ll <= 16) { /* <20><> 256 <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> apb */
#pragma unroll (1) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (8)
#pragma noprefetch
for (i = 0; i < ll; i++) {
qdp[-i - 1] = MAKE (i);
}
} else { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> */
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 qp, <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 32 <20><><EFBFBD><EFBFBD> */
i = 0;
if ((unsigned long) qsp & 31) {
qdp[- 1] = MAKE (0);
i = 1;
}
#ifdef __ptr64__
int ii, isize;
qsp -= i;
qdp -= i;
ll -= i;
#pragma loop count (1)
for (i = 0; i < ll; i += BIGSIZE) {
isize = (ll - i) > BIGSIZE ? BIGSIZE : ll - i;
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (ii = 0; ii < isize; ii++)
{
qdp[-ii - 1] = MAKE (ii);
}
qsp -= isize;
qdp -= isize;
}
i = 0;
#else /* __ptr64__ */
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (; i < ll; i++) {
qdp[-i - 1] = MAKE (i);
}
#endif /* __ptr64__ */
}
qsrc0 = qsp[-i];
qsrc1 = __builtin_e2k_ld_128_cleartag (qsp - i - 1, 0);
qsrc = __builtin_e2k_qppermb (qsrc0, qsrc1, qshift);
__builtin_e2k_pst_128 (qsrc, qdp - i - 1, mask);
}
#else /* __iset__ > 5 */
__v2di * restrict qdp = (__v2di *) ac;
__v2di * restrict qsp = (__v2di *) bc;
__v2di qsrc;
long ll;
if (len <= 16) {
if (__builtin_expect (E2K_BYTES_FROM_ALIGN (qsp, 4096) < 16, 0)) { /* closely to page border */
/* Offsets 0-15 will be shifted forward to the array first byte thus fit into page */
qsrc = ((__v2di *) ((const char *) qsp - len))[0];
__builtin_pstoremas_128v (qsrc, (char *) qdp - len, 0xffff >> (16 - len), __LCC_MAS_SPEC, __LCC_CHAN_ANY);
}
else {
__builtin_pstoremas_128v (qsp[-1], qdp - 1, 0xffff << (16 - len), __LCC_MAS_SPEC, __LCC_CHAN_ANY);
}
RETURN (dst);
}
diff = (-len) & 15; /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> diff <20><><EFBFBD><EFBFBD> (<28><> 1 <20><> 16) */
ll = (len - (16 - diff)) >> 4;
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
if (ll <= 16) { /* <20><> 256 <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> apb */
#pragma unroll (1) /* 1 <20><><EFBFBD><EFBFBD> */
#pragma loop count (8)
#pragma noprefetch
for (i = 0; i < ll; i++) {
qdp[-i - 1] = qsp[-i - 1];
}
} else { /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> */
#ifdef __ptr64__
int ii, isize;
#pragma loop count (1)
for (i = 0; i < ll; i += BIGSIZE) {
isize = (ll - i) > BIGSIZE ? BIGSIZE : ll - i;
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (ii = 0; ii < isize; ii++)
{
qdp[-ii - 1] = qsp[-ii - 1];
}
qsp -= isize;
qdp -= isize;
}
ll = 0;
#else /* __ptr64__ */
#pragma unroll (4) /* 2 <20><><EFBFBD><EFBFBD><EFBFBD> */
#pragma loop count (1000)
for (i = 0; i < ll; i++) {
qdp[-i - 1] = qsp[-i - 1];
}
#endif /* __ptr64__ */
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> */
qsrc = ((__v2di *) ((const char *) (qsp - ll - 1) + diff))[0];
__builtin_e2k_pst_128 (qsrc, (char *) (qdp - ll - 1) + diff, 0xffff >> diff);
#endif /* __iset__ > 5 */
}
RETURN (dst);
}
#ifndef memmove
libc_hidden_builtin_def (memmove)
#endif