glibc/sysdeps/e2k/memccpy.c

385 lines
11 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* Copyright (c) 2015-2018 ZAO "MCST". All rights reserved.
*
* @(#) $Id: memccpy.c 5560 2020-03-01 08:29:24Z vlog $
*/
/* Copyright (C) 1991-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <string.h>
#include <memcopy.h>
#include <e2kintrin.h>
//#undef E2K_PREPARE_ALIGN
//#undef E2K_ALIGN_DATA
///* ×ÁÒÉÁÎÔ ÎÁ ÏÐÅÒÁÃÉÑÈ insfd/scrd ÄÌÑ ÄÁÎÎÙÈ ÆÏÒÍÁÔÁ 64 */
//#define E2K_PREPARE_ALIGN(align, spec) spec = align << 9
//#define E2K_ALIGN_DATA(src1, src2, dst, spec) \
// dst = __builtin_e2k_scrd (__builtin_e2k_insfd (src1, spec, src2), spec >> 6)
#undef __memccpy
#undef memccpy
#define restrict __restrict__
/* Copy no more than N bytes of SRC to DEST, stopping when C is found.
Return the position in DEST one byte past where C was copied, or
NULL if C was not found in the first N bytes of SRC. */
void *
__memccpy (void * restrict dest, const void * restrict src, int c_in, size_t n)
{
unsigned char * restrict srcp = (unsigned char *) src;
unsigned char * restrict dstp = (unsigned char *) dest;
op_t charmask = __builtin_e2k_pshufb (c_in, c_in, 0);
long int tail;
#if __iset__ <= 4
op_t aligns, alignd, a0, a00, a01, spec, mask;
unsigned char * restrict end_ptr = E2K_ALIGN_PTR_BACK (dstp + n - 1, 8);
unsigned char s, c = (unsigned char) c_in;
if (n <= OP_T_THRES) { /* ÍÁÌÙÅ ÄÌÉÎÙ */
/* There are just a few bytes to copy. Use byte memory operations. */
#pragma noprefetch
#pragma loop count (8)
while (n > 0) {
*dstp++ = s = *srcp++;
n--;
if (s == c) {
return (char *) dstp;
}
}
return NULL;
}
if (end_ptr < dstp) /* ÄÌÉÎÙ ÂÏÌØÛÅ ÐÏÒÏÇÁ */
end_ptr = (unsigned char *) (~0UL << 3);
alignd = 8 - ((unsigned long int) dstp & 7);
aligns = (srcp - dstp) & 7;
n -= alignd;
tail = n & 7;
if (tail == 0) tail = 8;
/* ÏÂÒÁÂÏÔËÁ ÄÏ ×ÙÒÏ×ÎÅÎÎÏÇÏ dest */
dstp[0] = s = srcp[0];
if (s == c)
return (char *) dstp + 1;
dstp[1] = s = srcp[1];
if (s == c)
return (char *) dstp + 2;
dstp[2] = s = srcp[2];
if (s == c)
return (char *) dstp + 3;
dstp[3] = s = srcp[3];
if (s == c)
return (char *) dstp + 4;
dstp[4] = s = srcp[4];
if (s == c)
return (char *) dstp + 5;
dstp[5] = s = srcp[5];
if (s == c)
return (char *) dstp + 6;
dstp[6] = s = srcp[6];
if (s == c)
return (char *) dstp + 7;
dstp[7] = s = srcp[7];
if (s == c)
return (char *) dstp + 8;
srcp += alignd;
dstp += alignd;
if (aligns == 0) { /* ÏÂÅ ÓÔÒÏËÉ ×ÙÒÏ×ÎÅÎÙ ÏÄÉÎÁËÏ×Ï */
a0 = ((op_t *) srcp)[0];
mask = __builtin_e2k_pcmpeqb (a0, charmask);
if (mask == 0) {
/* We will test a 8 bytes at a time. */
#pragma noprefetch /* ÉÎÁÞÅ ÐÒÉÍÅÎÑÅÔÓÑ apb, 1 ÔÁËÔ */
#pragma loop count (100)
#pragma unroll (1)
for (;;) {
((op_t *) dstp)[0] = a0;
srcp += OPSIZ;
dstp += OPSIZ;
a0 = ((op_t *) srcp)[0];
mask = __builtin_e2k_pcmpeqb (a0, charmask);
if (!(mask == 0 && dstp < end_ptr)) break;
}
}
}
else { /* ÒÁÚÎÏÅ ×ÙÒÁ×ÎÉ×ÁÎÉÅ ÓÔÒÏË */
srcp = (unsigned char *) ((unsigned long int) srcp & ~7);
E2K_PREPARE_ALIGN (aligns, spec);
a00 = ((op_t *) srcp)[0];
a01 = __builtin_e2k_ld_64s_cleartag ((op_t *) srcp, 8);
E2K_ALIGN_DATA (a00, a01, a0, spec);
mask = __builtin_e2k_pcmpeqb (a0, charmask);
if (mask == 0) {
/* We will test a 8 bytes at a time. */
#pragma noprefetch /* ÉÎÁÞÅ ÐÒÉÍÅÎÑÅÔÓÑ apb, 2 ÔÁËÔÁ */
#pragma loop count (100)
#pragma unroll (1)
for (;;) {
((op_t *) dstp)[0] = a0;
srcp += OPSIZ;
dstp += OPSIZ;
a00 = a01;
a01 = __builtin_e2k_ld_64s_cleartag ((op_t *) srcp, 8);
E2K_ALIGN_DATA (a00, a01, a0, spec);
mask = __builtin_e2k_pcmpeqb (a0, charmask);
if (mask != 0 || dstp >= end_ptr) break;
}
}
}
if (dstp < end_ptr) { /* ÎÁÛÌÉ ÓÉÍ×ÏÌ ÎÅ × ÐÏÓÌÅÄÎÅÍ ÄÁÂÌÅ */
tail = 8;
}
/* ÄÏÐÉÓÙ×ÁÅÍ È×ÏÓÔ */
if ((mask & 0xffffffff) == 0 && tail >= 4) {
*((int *) dstp) = a0;
a0 >>= 32;
mask >>= 32;
dstp += 4;
tail -= 4;
}
if ((mask & 0xffff) == 0 && tail >= 2) {
*((short *) dstp) = a0;
a0 >>= 16;
mask >>= 16;
dstp += 2;
tail -= 2;
}
if ((mask & 0xff) == 0 && tail >= 1) {
*dstp++ = a0;
a0 >>= 8;
tail--;
}
if (tail > 0) {
*dstp = a0;
if ((a0 & 0xff) == c)
return dstp + 1;
}
#elif __iset__ <= 5
#define __CMP(mask, x) /* bytes with 'c_in' -> bit mask */ \
(mask) = __builtin_e2k_qpsgn2mskb (__builtin_e2k_qpcmpeqb (x, qcharmask))
op_t aligns, alignd;
__v2di a0, a00, a01, spec;
const __v2di qcharmask = __builtin_e2k_qppackdl (charmask, charmask);
unsigned char * restrict end_ptr = E2K_ALIGN_PTR_BACK (dstp + n - 1, 16);
unsigned int mask;
if (n == 0) return NULL;
if ((dstp + n) < dstp) /* ÄÌÉÎÙ ÂÏÌØÛÅ ÐÏÒÏÇÁ */
end_ptr = (unsigned char *) (~0UL << 4);
alignd = 16 - ((unsigned long int) dstp & 15);
aligns = (srcp - dstp) & 15;
tail = (n - alignd) & 15;
if (tail == 0) tail = 16;
srcp += alignd;
dstp += alignd;
if (aligns == 0) { /* ÏÂÅ ÓÔÒÏËÉ ×ÙÒÏ×ÎÅÎÙ ÏÄÉÎÁËÏ×Ï */
a0 = ((__v2di *) srcp)[-1];
__CMP (mask, a0);
mask &= (-1 << (16 - alignd)); /* ÏÂÎÕÌÉÌÉ ÂÉÔÙ ÄÏ ÎÁÞÁÌÁ dst */
if (mask != 0 || dstp > end_ptr) { /* ÚÁËÁÎÞÉ×ÁÅÍ × ÜÔÏÍ qword */
mask = __builtin_ctz (mask);
if (n <= mask - (16 - alignd)) { /* n ÍÅÎØÛÅ ÄÌÉÎÙ ÓÔÒÏËÉ */
__builtin_e2k_pst_128 (a0, dstp - 16, (-1 << (16 - alignd)) & ~(-1 << (16 - alignd + n)));
return NULL;
}
else { /* ÚÁÐÉÓÙ×ÁÅÍ ÄÏ ËÏÎÃÁ ÓÔÒÏËÉ, ×ËÌÀÞÁÑ ÐÏÓÌÅÄÎÉÊ c_in */
__builtin_e2k_pst_128 (a0, dstp - 16, (-1 << (16 - alignd)) & ~(-2 << mask));
return (char *) dstp - 16 + mask + 1;
}
}
__builtin_e2k_pst_128 (a0, dstp - 16, -1 << (16 - alignd));
a0 = ((__v2di *) srcp)[0];
__CMP (mask, a0);
if (mask == 0 && dstp < end_ptr) {
/* We will test a 16 bytes at a time. */
#pragma noprefetch /* ÉÎÁÞÅ ÐÒÉÍÅÎÑÅÔÓÑ apb, 1 ÔÁËÔ */
#pragma loop count (100)
#pragma unroll (1)
for (;;) {
((__v2di *) dstp)[0] = a0;
srcp += 16;
dstp += 16;
a0 = ((__v2di *) srcp)[0];
__CMP (mask, a0);
if (!(mask == 0 && dstp < end_ptr)) break;
}
}
}
else { /* ÒÁÚÎÏÅ ×ÙÒÁ×ÎÉ×ÁÎÉÅ ÓÔÒÏË */
srcp = (unsigned char *) ((unsigned long int) srcp & ~15);
E2K_PREPARE_ALIGN128 (aligns, spec);
a00 = __builtin_e2k_ld_128_cleartag ((__v2di *) srcp, -16);
a01 = __builtin_e2k_ld_128_cleartag ((__v2di *) srcp, 0);
E2K_ALIGN_DATA128 (a00, a01, a0, spec);
__CMP (mask, a0);
mask &= (-1 << (16 - alignd)); /* ÏÂÎÕÌÉÌÉ ÂÉÔÙ ÄÏ ÎÁÞÁÌÁ dst */
if (mask != 0 || dstp > end_ptr) { /* ÚÁËÁÎÞÉ×ÁÅÍ × ÜÔÏÍ qword */
mask = __builtin_ctz (mask);
if (n <= mask - (16 - alignd)) { /* n ÍÅÎØÛÅ ÄÌÉÎÙ ÓÔÒÏËÉ */
__builtin_e2k_pst_128 (a0, dstp - 16, (-1 << (16 - alignd)) & ~(-1 << (16 - alignd + n)));
return NULL;
}
else { /* ÚÁÐÉÓÙ×ÁÅÍ ÄÏ ËÏÎÃÁ ÓÔÒÏËÉ, ×ËÌÀÞÁÑ ÐÏÓÌÅÄÎÉÊ c_in */
__builtin_e2k_pst_128 (a0, dstp - 16, (-1 << (16 - alignd)) & ~(-2 << mask));
return (char *) dstp - 16 + mask + 1;
}
}
__builtin_e2k_pst_128 (a0, dstp - 16, -1 << (16 - alignd));
a00 = a01;
a01 = __builtin_e2k_ld_128_cleartag ((__v2di *) srcp, 16);
E2K_ALIGN_DATA128 (a00, a01, a0, spec);
__CMP (mask, a0);
if (mask == 0 && dstp < end_ptr) {
/* We will test a 16 bytes at a time. */
#pragma noprefetch /* ÉÎÁÞÅ ÐÒÉÍÅÎÑÅÔÓÑ apb, 2 ÔÁËÔÁ */
#pragma loop count (100)
#pragma unroll (1)
for (;;) {
((__v2di *) dstp)[0] = a0;
srcp += 16;
dstp += 16;
a00 = a01;
a01 = __builtin_e2k_ld_128_cleartag ((__v2di *) srcp, 16);
E2K_ALIGN_DATA128 (a00, a01, a0, spec);
__CMP (mask, a0);
if (mask != 0 || dstp >= end_ptr) break;
}
}
}
if (dstp >= end_ptr) { /* ÄÏÛÌÉ ÄÏ ÐÏÓÌÅÄÎÅÇÏ qword */
mask &= ~(-1 << tail); /* ÏÂÎÕÌÑÅÍ ÂÉÔÙ ÐÏÓÌÅ ÐÏÓÌÅÄÎÅÇÏ ÂÁÊÔÁ ÄÌÑ ÚÁÐÉÓÉ */
}
/* ÄÏÐÉÓÙ×ÁÅÍ È×ÏÓÔ */
if (mask != 0) { /* ÎÁÛÌÉ ÓÉÍ×ÏÌ × ÐÏÓÌÅÄÎÅÍ qword */
mask = __builtin_ctz (mask);
__builtin_e2k_pst_128 (a0, dstp, ~(-2 << mask));
return (char *) dstp + mask + 1;
}
__builtin_e2k_pst_128 (a0, dstp, ~(-1 << tail));
#else /* __iset__ > 5 */
#define __CMP(mask, x) /* bytes with 'c_in' -> bit mask */ \
(mask) = __builtin_e2k_qpsgn2mskb (__builtin_e2k_qpcmpeqb (x, qcharmask))
#define __CMP_PRED(x) /* bytes with 'c_in' -> predicate */ \
__builtin_e2k_qpcmpeqbop (x, qcharmask)
__v2di a0;
size_t align = (size_t) srcp & 15;
const __v2di qcharmask = __builtin_e2k_qppackdl (charmask, charmask);
unsigned char * restrict end_ptr = dstp - align + ((n - 1 + align) & ~15);
unsigned int mask;
if (n == 0) return NULL;
if ((dstp + n) < dstp) /* ÄÌÉÎÙ ÂÏÌØÛÅ ÐÏÒÏÇÁ */
end_ptr = (unsigned char *) (~0UL << 4);
tail = (n + align) & 15;
if (tail == 0) tail = 16;
srcp = (unsigned char *) ((size_t) srcp & ~15);
dstp -= align;
/* first qword load is aligned */
a0 = ((__v2di *) srcp)[0];
__CMP (mask, a0);
mask &= (-1 << align); /* ÏÂÎÕÌÉÌÉ ÂÉÔÙ ÄÏ ÎÁÞÁÌÁ src */
if (mask != 0 || dstp >= end_ptr) { /* ÚÁËÁÎÞÉ×ÁÅÍ × ÜÔÏÍ qword */
mask = __builtin_ctz (mask);
if (n <= mask - align) { /* n ÍÅÎØÛÅ ÄÌÉÎÙ ÓÔÒÏËÉ */
__builtin_e2k_pst_128 (a0, dstp, (-1 << align) & ~(-1 << (align + n)));
return NULL;
}
else { /* ÚÁÐÉÓÙ×ÁÅÍ ÄÏ ËÏÎÃÁ ÓÔÒÏËÉ, ×ËÌÀÞÁÑ ÐÏÓÌÅÄÎÉÊ c_in */
__builtin_e2k_pst_128 (a0, dstp, (-1 << align) & ~(-2 << mask));
return (char *) dstp + mask + 1;
}
}
__builtin_e2k_pst_128 (a0, dstp, -1 << align);
srcp += 16;
dstp += 16;
/* next qword load is aligned */
a0 = ((__v2di *) srcp)[0];
if (__CMP_PRED (a0) == 0 && dstp < end_ptr) {
/* We will test a 16 bytes at a time. */
#pragma noprefetch /* ÉÎÁÞÅ ÐÒÉÍÅÎÑÅÔÓÑ apb, 1 ÔÁËÔ */
#pragma loop count (100)
#pragma unroll (1)
for (;;) {
((__v2di *) dstp)[0] = a0;
srcp += 16;
dstp += 16;
a0 = ((__v2di *) srcp)[0];
if (__CMP_PRED (a0) || dstp >= end_ptr) break;
}
}
/* Which of the bytes was the zero? */
__CMP (mask, a0);
if (dstp >= end_ptr) { /* ÄÏÛÌÉ ÄÏ ÐÏÓÌÅÄÎÅÇÏ qword */
mask &= ~(-1 << tail); /* ÏÂÎÕÌÑÅÍ ÂÉÔÙ ÐÏÓÌÅ ÐÏÓÌÅÄÎÅÇÏ ÂÁÊÔÁ ÄÌÑ ÚÁÐÉÓÉ */
}
/* ÄÏÐÉÓÙ×ÁÅÍ È×ÏÓÔ */
if (mask != 0) { /* ÎÁÛÌÉ ÓÉÍ×ÏÌ × ÐÏÓÌÅÄÎÅÍ qword */
mask = __builtin_ctz (mask);
__builtin_e2k_pst_128 (a0, dstp, ~(-2 << mask));
return (char *) dstp + mask + 1;
}
__builtin_e2k_pst_128 (a0, dstp, ~(-1 << tail));
#endif /* __iset__ > 5 */
return NULL;
}
weak_alias (__memccpy, memccpy)