6fb8cbcb58
This patch includes optimized 64bit memcpy/memmove for Atom, Core 2 and Core i7. It improves memcpy by up to 3X on Atom, up to 4X on Core 2 and up to 1X on Core i7. It also improves memmove by up to 3X on Atom, up to 4X on Core 2 and up to 2X on Core i7.
103 lines
3.2 KiB
C
103 lines
3.2 KiB
C
/* Copy memory to memory until the specified number of bytes
|
|
has been copied with error checking. Overlap is handled correctly.
|
|
Copyright (C) 1991,1995,1996,1997,2003,2004 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
Contributed by Torbjorn Granlund (tege@sics.se).
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, write to the Free
|
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307 USA. */
|
|
|
|
#include <string.h>
|
|
#include <memcopy.h>
|
|
#include <pagecopy.h>
|
|
|
|
#ifndef MEMMOVE_CHK
|
|
# define MEMMOVE_CHK __memmove_chk
|
|
#endif
|
|
|
|
void *
|
|
MEMMOVE_CHK (dest, src, len, destlen)
|
|
void *dest;
|
|
const void *src;
|
|
size_t len;
|
|
size_t destlen;
|
|
{
|
|
if (__builtin_expect (destlen < len, 0))
|
|
__chk_fail ();
|
|
|
|
unsigned long int dstp = (long int) dest;
|
|
unsigned long int srcp = (long int) src;
|
|
|
|
/* This test makes the forward copying code be used whenever possible.
|
|
Reduces the working set. */
|
|
if (dstp - srcp >= len) /* *Unsigned* compare! */
|
|
{
|
|
/* Copy from the beginning to the end. */
|
|
|
|
/* If there not too few bytes to copy, use word copy. */
|
|
if (len >= OP_T_THRES)
|
|
{
|
|
/* Copy just a few bytes to make DSTP aligned. */
|
|
len -= (-dstp) % OPSIZ;
|
|
BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);
|
|
|
|
/* Copy whole pages from SRCP to DSTP by virtual address
|
|
manipulation, as much as possible. */
|
|
|
|
PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);
|
|
|
|
/* Copy from SRCP to DSTP taking advantage of the known
|
|
alignment of DSTP. Number of bytes remaining is put
|
|
in the third argument, i.e. in LEN. This number may
|
|
vary from machine to machine. */
|
|
|
|
WORD_COPY_FWD (dstp, srcp, len, len);
|
|
|
|
/* Fall out and copy the tail. */
|
|
}
|
|
|
|
/* There are just a few bytes to copy. Use byte memory operations. */
|
|
BYTE_COPY_FWD (dstp, srcp, len);
|
|
}
|
|
else
|
|
{
|
|
/* Copy from the end to the beginning. */
|
|
srcp += len;
|
|
dstp += len;
|
|
|
|
/* If there not too few bytes to copy, use word copy. */
|
|
if (len >= OP_T_THRES)
|
|
{
|
|
/* Copy just a few bytes to make DSTP aligned. */
|
|
len -= dstp % OPSIZ;
|
|
BYTE_COPY_BWD (dstp, srcp, dstp % OPSIZ);
|
|
|
|
/* Copy from SRCP to DSTP taking advantage of the known
|
|
alignment of DSTP. Number of bytes remaining is put
|
|
in the third argument, i.e. in LEN. This number may
|
|
vary from machine to machine. */
|
|
|
|
WORD_COPY_BWD (dstp, srcp, len, len);
|
|
|
|
/* Fall out and copy the tail. */
|
|
}
|
|
|
|
/* There are just a few bytes to copy. Use byte memory operations. */
|
|
BYTE_COPY_BWD (dstp, srcp, len);
|
|
}
|
|
|
|
return dest;
|
|
}
|