glibc/sysdeps/e2k/stpcpy.c

359 lines
9.3 KiB
C

/* Copyright (c) 2015-2018 ZAO "MCST". All rights reserved.
*
* @(#) $Id: stpcpy.c 2101 2014-05-13 11:24:32Z vlog $
*/
/* Copyright (C) 1992-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#define NO_MEMPCPY_STPCPY_REDIRECT
#include <string.h>
#include <memcopy.h>
#include <e2kintrin.h>
#undef __stpcpy
#undef stpcpy
#ifndef weak_alias
# define __stpcpy stpcpy
#endif
#define restrict __restrict__
/* ÐÒÁËÔÉÞÅÓËÉ ËÏÐÉÑ strcpy */
/* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. */
char *
__stpcpy (dest, src)
char * restrict dest;
const char * restrict src;
{
unsigned char * restrict srcp = (unsigned char *) src;
unsigned char * restrict dstp = (unsigned char *) dest;
unsigned char c;
#if __iset__ <= 4
op_t aligns, alignd;
op_t a0, a00, a01, spec, mask;
/* ÏÂÒÁÂÏÔËÁ ÄÏ ×ÙÒÏ×ÎÅÎÎÏÇÏ dest */
dstp[0] = c = srcp[0];
if (c == '\0')
return dest;
dstp[1] = c = srcp[1];
if (c == '\0')
return dest + 1;
dstp[2] = c = srcp[2];
if (c == '\0')
return dest + 2;
dstp[3] = c = srcp[3];
if (c == '\0')
return dest + 3;
dstp[4] = c = srcp[4];
if (c == '\0')
return dest + 4;
dstp[5] = c = srcp[5];
if (c == '\0')
return dest + 5;
dstp[6] = c = srcp[6];
if (c == '\0')
return dest + 6;
dstp[7] = c = srcp[7];
if (c == '\0')
return dest + 7;
alignd = 8 - ((unsigned long int) dstp & 7);
aligns = (srcp - dstp) & 7;
srcp += alignd;
dstp += alignd;
if (aligns == 0) { /* ÏÂÅ ÓÔÒÏËÉ ×ÙÒÏ×ÎÅÎÙ ÏÄÉÎÁËÏ×Ï */
a0 = ((op_t *) srcp)[0];
mask = __builtin_e2k_pcmpeqb (a0, 0);
if (mask == 0) {
/* We will test a 8 bytes at a time. */
#pragma noprefetch /* ÉÎÁÞÅ ÐÒÉÍÅÎÑÅÔÓÑ apb, 1 ÔÁËÔ */
#pragma loop count (1000)
for (;;) {
((op_t *) dstp)[0] = a0;
srcp += OPSIZ;
dstp += OPSIZ;
a0 = ((op_t *) srcp)[0];
mask = __builtin_e2k_pcmpeqb (a0, 0);
if (mask != 0) break;
}
}
}
else { /* ÒÁÚÎÏÅ ×ÙÒÁ×ÎÉ×ÁÎÉÅ ÓÔÒÏË */
srcp = (unsigned char *) ((unsigned long int) srcp & ~7);
E2K_PREPARE_ALIGN (aligns, spec);
a00 = ((op_t *) srcp)[0];
a01 = __builtin_e2k_ld_64s_cleartag ((op_t *) srcp, 8);
E2K_ALIGN_DATA (a00, a01, a0, spec);
mask = __builtin_e2k_pcmpeqb (a0, 0);
if (mask == 0) {
/* We will test a 8 bytes at a time. */
#pragma noprefetch /* ÉÎÁÞÅ ÐÒÉÍÅÎÑÅÔÓÑ apb, 2 ÔÁËÔÁ */
#pragma loop count (1000)
for (;;) {
((op_t *) dstp)[0] = a0;
srcp += OPSIZ;
dstp += OPSIZ;
a00 = a01;
a01 = __builtin_e2k_ld_64s_cleartag ((op_t *) srcp, 8);
E2K_ALIGN_DATA (a00, a01, a0, spec);
mask = __builtin_e2k_pcmpeqb (a0, 0);
if (mask != 0) break;
}
}
}
/* ÏÂÒÁÂÏÔËÁ È×ÏÓÔÁ */
if ((mask & 0xffffffff) == 0) {
*((int *) dstp) = a0;
a0 >>= 32;
mask >>= 32;
dstp += 4;
}
if ((mask & 0xffff) == 0) {
*((short *) dstp) = a0;
a0 >>= 16;
mask >>= 16;
dstp += 2;
}
if ((mask & 0xff) == 0) {
*dstp = a0;
dstp++;
}
*dstp = '\0';
return (char *) dstp;
}
#elif __iset__ <= 5
#define __CMP(mask, x) /* zero bytes -> bit mask */ \
(mask) = __builtin_e2k_qpsgn2mskb (__builtin_e2k_qpcmpeqb (x, qzero))
op_t aligns, alignd;
__v2di a0, a00, a01, spec;
const __v2di qzero = __builtin_e2k_qppackdl (0, 0);
unsigned int mask;
/* ÏÂÒÁÂÏÔËÁ ÐÅÒ×ÙÈ 2-È ÂÁÊÔ */
dstp[0] = c = srcp[0];
if (c == '\0')
return dest;
dstp[1] = c = srcp[1];
if (c == '\0')
return dest + 1;
alignd = 16 - ((unsigned long int) dstp & 15);
aligns = (srcp - dstp) & 15;
srcp += alignd;
dstp += alignd;
if (aligns == 0) { /* ÏÂÅ ÓÔÒÏËÉ ×ÙÒÏ×ÎÅÎÙ ÏÄÉÎÁËÏ×Ï */
a0 = ((__v2di *) srcp)[-1];
__CMP (mask, a0);
mask &= (-1 << (16 - alignd)); /* ÏÂÎÕÌÉÌÉ ÂÉÔÙ ÄÏ ÎÁÞÁÌÁ dst */
if (mask != 0) {
/* Which of the bytes was the zero? */
mask = __builtin_ctz (mask);
__builtin_e2k_pst_128 (a0, dstp - 16, (-1 << (16 - alignd)) & ~(-2 << mask));
return (char *) dstp - 16 + mask;
}
__builtin_e2k_pst_128 (a0, dstp - 16, -1 << (16 - alignd));
a0 = ((__v2di *) srcp)[0];
__CMP (mask, a0);
if (mask == 0) {
/* We will test a 16 bytes at a time. */
#pragma noprefetch /* ÉÎÁÞÅ ÐÒÉÍÅÎÑÅÔÓÑ apb, 1 ÔÁËÔ */
#pragma loop count (1000)
for (;;) {
((__v2di *) dstp)[0] = a0;
srcp += 16;
dstp += 16;
a0 = ((__v2di *) srcp)[0];
__CMP (mask, a0);
if (mask != 0) break;
}
}
}
else { /* ÒÁÚÎÏÅ ×ÙÒÁ×ÎÉ×ÁÎÉÅ ÓÔÒÏË */
srcp = (unsigned char *) ((unsigned long int) srcp & ~15);
E2K_PREPARE_ALIGN128 (aligns, spec);
a00 = __builtin_e2k_ld_128_cleartag ((__v2di *) srcp, -16);
a01 = __builtin_e2k_ld_128_cleartag ((__v2di *) srcp, 0);
E2K_ALIGN_DATA128 (a00, a01, a0, spec);
__CMP (mask, a0);
mask &= (-1 << (16 - alignd)); /* ÏÂÎÕÌÉÌÉ ÂÉÔÙ ÄÏ ÎÁÞÁÌÁ dst */
if (mask != 0) {
/* Which of the bytes was the zero? */
mask = __builtin_ctz (mask);
__builtin_e2k_pst_128 (a0, dstp - 16, (-1 << (16 - alignd)) & ~(-2 << mask));
return (char *) dstp - 16 + mask;
}
__builtin_e2k_pst_128 (a0, dstp - 16, -1 << (16 - alignd));
a00 = a01;
a01 =__builtin_e2k_ld_128_cleartag ((__v2di *) srcp, 16);
E2K_ALIGN_DATA128 (a00, a01, a0, spec);
__CMP (mask, a0);
if (mask == 0) {
/* We will test a 16 bytes at a time. */
#pragma noprefetch /* ÉÎÁÞÅ ÐÒÉÍÅÎÑÅÔÓÑ apb, 2 ÔÁËÔÁ */
#pragma loop count (1000)
for (;;) {
((__v2di *) dstp)[0] = a0;
srcp += 16;
dstp += 16;
a00 = a01;
a01 = __builtin_e2k_ld_128_cleartag ((__v2di *) srcp, 16);
E2K_ALIGN_DATA128 (a00, a01, a0, spec);
__CMP (mask, a0);
if (mask != 0) break;
}
}
}
/* ÏÂÒÁÂÏÔËÁ È×ÏÓÔÁ */
/* Which of the bytes was the zero? */
mask = __builtin_ctz (mask);
__builtin_e2k_pst_128 (a0, dstp, ~(-2 << mask));
return (char *) dstp + mask;
}
#else /* __iset__ > 5 */
#define __CMP(mask, x) /* zero bytes -> bit mask */ \
(mask) = __builtin_e2k_qpsgn2mskb (__builtin_e2k_qpcmpeqb (x, qzero))
#define __CMP_PRED(x) /* zero bytes -> predicate */ \
__builtin_e2k_qpcmpeqbop (x, qzero)
__v2di a0;
const __v2di qzero = __builtin_e2k_qppackdl (0, 0);
size_t align;
unsigned int mask;
/* ÏÂÒÁÂÏÔËÁ ÐÅÒ×ÙÈ 2-È ÂÁÊÔ */
dstp[0] = c = srcp[0];
if (c == '\0')
return dest;
dstp[1] = c = srcp[1];
if (c == '\0')
return dest + 1;
dstp[2] = c = srcp[2];
if (c == '\0')
return dest + 2;
dstp[3] = c = srcp[3];
if (c == '\0')
return dest + 3;
srcp += 4;
dstp += 4;
align = (size_t) srcp & 15;
if (__builtin_expect (E2K_BYTES_FROM_ALIGN (srcp, 4096) > 4080, 0)) { /* closely to page border */
/* Offsets 4081-4095 will be shifted back to the aligned address thus fit into page */
srcp = (unsigned char *) ((size_t) srcp & ~15);
dstp -= align;
/* first qword load is aligned */
a0 = ((__v2di *) srcp)[0];
__CMP (mask, a0);
mask &= (-1 << align); /* ÏÂÎÕÌÉÌÉ ÂÉÔÙ ÄÏ ÎÁÞÁÌÁ src */
if (mask != 0) {
/* Which of the bytes was the zero? */
mask = __builtin_ctz (mask);
__builtin_e2k_pst_128 (a0, dstp, (-1 << align) & ~(-2 << mask));
return (char *) dstp + mask;
}
__builtin_e2k_pst_128 (a0, dstp, -1 << align);
srcp += 16;
dstp += 16;
}
else {
/* first qword load is unaligned */
a0 = ((__v2di *) srcp)[0];
if (__CMP_PRED (a0) != 0) {
/* Which of the bytes was the zero? */
__CMP (mask, a0);
mask = __builtin_ctz (mask);
__builtin_e2k_pst_128 (a0, dstp, ~(-2 << mask));
return (char *) dstp + mask;
}
__builtin_e2k_pst_128 (a0, dstp, 0xffff >> align);
srcp += 16 - align;
dstp += 16 - align;
}
/* next qword load is aligned */
a0 = ((__v2di *) srcp)[0];
if (__CMP_PRED (a0) == 0) {
/* We will test a 16 bytes at a time. */
#pragma noprefetch /* ÉÎÁÞÅ ÐÒÉÍÅÎÑÅÔÓÑ apb, 1 ÔÁËÔ */
#pragma loop count (1000)
#pragma unroll (1)
for (;;) {
((__v2di *) dstp)[0] = a0;
srcp += 16;
dstp += 16;
a0 = ((__v2di *) srcp)[0];
if (__CMP_PRED (a0)) break;
}
}
/* ÏÂÒÁÂÏÔËÁ È×ÏÓÔÁ */
/* Which of the bytes was the zero? */
__CMP (mask, a0);
mask = __builtin_ctz (mask);
__builtin_e2k_pst_128 (a0, dstp, ~(-2 << mask));
return (char *) dstp + mask;
}
#endif /* __iset__ > 5 */
#ifdef libc_hidden_def
libc_hidden_def (__stpcpy)
#endif
#ifdef weak_alias
weak_alias (__stpcpy, stpcpy)
#endif
#ifdef libc_hidden_builtin_def
libc_hidden_builtin_def (stpcpy)
#endif