2002-08-22 Steven Munroe <sjmunroe@us.ibm.com>
* sysdeps/powerpc/elf/libc-start.c (__cache_line_size): Declare. (__aux_init_cache): New. (__libc_start_main): Change type of `auxvec' parameter to `ElfW(auxv_t) *'. Correct walking of aux vector. Call __aux_init_cache. * sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c (__cache_line_size): Declare. (__aux_init_cache): New. (DL_PLATFORM_INIT): Define. * sysdeps/powerpc/memset.S: Define __cache_line_size and use its value to select the correct stride for dcbz. 2002-08-22 Steven Munroe <sjmunroe@us.ibm.com> * sysdeps/powerpc/elf/libc-start.c (__cache_line_size): Declare. (__aux_init_cache): New. (__libc_start_main): Change type of `auxvec' parameter to `ElfW(auxv_t) *'. Correct walking of aux vector. Call __aux_init_cache. * sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c (__cache_line_size): Declare. (__aux_init_cache): New. (DL_PLATFORM_INIT): Define. * sysdeps/powerpc/memset.S: Define __cache_line_size and use its value to select the correct stride for dcbz.
This commit is contained in:
parent
7a14a672b9
commit
b8a5737a49
15
ChangeLog
15
ChangeLog
|
@ -1,3 +1,18 @@
|
||||||
|
2002-08-22 Steven Munroe <sjmunroe@us.ibm.com>
|
||||||
|
|
||||||
|
* sysdeps/powerpc/elf/libc-start.c
|
||||||
|
(__cache_line_size): Declare.
|
||||||
|
(__aux_init_cache): New.
|
||||||
|
(__libc_start_main): Change type of `auxvec' parameter to
|
||||||
|
`ElfW(auxv_t) *'. Correct walking of aux vector. Call
|
||||||
|
__aux_init_cache.
|
||||||
|
* sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
|
||||||
|
(__cache_line_size): Declare.
|
||||||
|
(__aux_init_cache): New.
|
||||||
|
(DL_PLATFORM_INIT): Define.
|
||||||
|
* sysdeps/powerpc/memset.S: Define __cache_line_size and use its
|
||||||
|
value to select the correct stride for dcbz.
|
||||||
|
|
||||||
2002-08-22 Andreas Jaeger <aj@suse.de>
|
2002-08-22 Andreas Jaeger <aj@suse.de>
|
||||||
|
|
||||||
* sysdeps/unix/sysv/linux/x86_64/syscalls.list: Fix arguments of
|
* sysdeps/unix/sysv/linux/x86_64/syscalls.list: Fix arguments of
|
||||||
|
|
|
@ -26,6 +26,10 @@ extern void __libc_init_first (int argc, char **argv, char **envp);
|
||||||
|
|
||||||
extern int _dl_starting_up;
|
extern int _dl_starting_up;
|
||||||
weak_extern (_dl_starting_up)
|
weak_extern (_dl_starting_up)
|
||||||
|
|
||||||
|
extern int __cache_line_size;
|
||||||
|
weak_extern (__cache_line_size)
|
||||||
|
|
||||||
extern int __libc_multiple_libcs;
|
extern int __libc_multiple_libcs;
|
||||||
extern void *__libc_stack_end;
|
extern void *__libc_stack_end;
|
||||||
|
|
||||||
|
@ -37,12 +41,33 @@ struct startup_info
|
||||||
void (*fini) (void);
|
void (*fini) (void);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Scan the Aux Vector for the "Data Cache Block Size" entry. If found
|
||||||
|
verify that the static extern __cache_line_size is defined by checking
|
||||||
|
for not NULL. If it is defined then assign the cache block size
|
||||||
|
value to __cache_line_size. */
|
||||||
|
static inline void
|
||||||
|
__aux_init_cache (ElfW(auxv_t) *av)
|
||||||
|
{
|
||||||
|
for (; av->a_type != AT_NULL; ++av)
|
||||||
|
switch (av->a_type)
|
||||||
|
{
|
||||||
|
case AT_DCACHEBSIZE:
|
||||||
|
{
|
||||||
|
int *cls = & __cache_line_size;
|
||||||
|
if (cls != NULL)
|
||||||
|
*cls = av->a_un.a_val;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
/* GKM FIXME: GCC: this should get __BP_ prefix by virtue of the
|
/* GKM FIXME: GCC: this should get __BP_ prefix by virtue of the
|
||||||
BPs in the arglist of startup_info.main and startup_info.init. */
|
BPs in the arglist of startup_info.main and startup_info.init. */
|
||||||
BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
|
BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
|
||||||
char *__unbounded *__unbounded ubp_ev,
|
char *__unbounded *__unbounded ubp_ev,
|
||||||
void *__unbounded auxvec, void (*rtld_fini) (void),
|
ElfW(auxv_t) *__unbounded auxvec, void (*rtld_fini) (void),
|
||||||
struct startup_info *__unbounded stinfo,
|
struct startup_info *__unbounded stinfo,
|
||||||
char *__unbounded *__unbounded stack_on_entry)
|
char *__unbounded *__unbounded stack_on_entry)
|
||||||
{
|
{
|
||||||
|
@ -66,6 +91,7 @@ BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
|
||||||
as a statically-linked program by Linux... */
|
as a statically-linked program by Linux... */
|
||||||
if (*stack_on_entry != NULL)
|
if (*stack_on_entry != NULL)
|
||||||
{
|
{
|
||||||
|
char *__unbounded *__unbounded temp;
|
||||||
/* ...in which case, we have argc as the top thing on the
|
/* ...in which case, we have argc as the top thing on the
|
||||||
stack, followed by argv (NULL-terminated), envp (likewise),
|
stack, followed by argv (NULL-terminated), envp (likewise),
|
||||||
and the auxilary vector. */
|
and the auxilary vector. */
|
||||||
|
@ -73,10 +99,12 @@ BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
|
||||||
ubp_av = stack_on_entry + 1;
|
ubp_av = stack_on_entry + 1;
|
||||||
ubp_ev = ubp_av + argc + 1;
|
ubp_ev = ubp_av + argc + 1;
|
||||||
#ifdef HAVE_AUX_VECTOR
|
#ifdef HAVE_AUX_VECTOR
|
||||||
auxvec = ubp_ev;
|
temp = ubp_ev;
|
||||||
while (*(char *__unbounded *__unbounded) auxvec != NULL)
|
while (*temp != NULL)
|
||||||
++auxvec;
|
++temp;
|
||||||
++auxvec;
|
auxvec = (ElfW(auxv_t) *)++temp;
|
||||||
|
|
||||||
|
|
||||||
# ifndef SHARED
|
# ifndef SHARED
|
||||||
_dl_aux_init ((ElfW(auxv_t) *) auxvec);
|
_dl_aux_init ((ElfW(auxv_t) *) auxvec);
|
||||||
# endif
|
# endif
|
||||||
|
@ -85,6 +113,9 @@ BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
|
||||||
}
|
}
|
||||||
|
|
||||||
INIT_ARGV_and_ENVIRON;
|
INIT_ARGV_and_ENVIRON;
|
||||||
|
|
||||||
|
/* Initialize the __cache_line_size variable from the aux vector. */
|
||||||
|
__aux_init_cache((ElfW(auxv_t) *) auxvec);
|
||||||
|
|
||||||
/* Store something that has some relationship to the end of the
|
/* Store something that has some relationship to the end of the
|
||||||
stack, for backtraces. This variable should be thread-specific. */
|
stack, for backtraces. This variable should be thread-specific. */
|
||||||
|
|
|
@ -21,12 +21,26 @@
|
||||||
#include <bp-sym.h>
|
#include <bp-sym.h>
|
||||||
#include <bp-asm.h>
|
#include <bp-asm.h>
|
||||||
|
|
||||||
|
/* Define a global static that can hold the cache line size. The
|
||||||
|
assumption is that startup code will access the "aux vector" to
|
||||||
|
to obtain the value set by the kernel and store it into this
|
||||||
|
variable. */
|
||||||
|
|
||||||
|
.globl __cache_line_size
|
||||||
|
.section ".data","aw"
|
||||||
|
.align 2
|
||||||
|
.type __cache_line_size,@object
|
||||||
|
.size __cache_line_size,4
|
||||||
|
__cache_line_size:
|
||||||
|
.long 0
|
||||||
|
.section ".text"
|
||||||
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
|
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
|
||||||
Returns 's'.
|
Returns 's'.
|
||||||
|
|
||||||
The memset is done in three sizes: byte (8 bits), word (32 bits),
|
The memset is done in four sizes: byte (8 bits), word (32 bits),
|
||||||
cache line (256 bits). There is a special case for setting cache lines
|
32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
|
||||||
to 0, to take advantage of the dcbz instruction. */
|
There is a special case for setting whole cache lines to 0, which
|
||||||
|
takes advantage of the dcbz instruction. */
|
||||||
|
|
||||||
EALIGN (BP_SYM (memset), 5, 1)
|
EALIGN (BP_SYM (memset), 5, 1)
|
||||||
|
|
||||||
|
@ -50,6 +64,10 @@ EALIGN (BP_SYM (memset), 5, 1)
|
||||||
#define rNEG64 r8 /* constant -64 for clearing with dcbz */
|
#define rNEG64 r8 /* constant -64 for clearing with dcbz */
|
||||||
#define rNEG32 r9 /* constant -32 for clearing with dcbz */
|
#define rNEG32 r9 /* constant -32 for clearing with dcbz */
|
||||||
|
|
||||||
|
#define rGOT r9 /* Address of the Global Offset Table. */
|
||||||
|
#define rCLS r8 /* Cache line size obtained from static. */
|
||||||
|
#define rCLM r9 /* Cache line size mask to check for cache alignment. */
|
||||||
|
|
||||||
#if __BOUNDED_POINTERS__
|
#if __BOUNDED_POINTERS__
|
||||||
cmplwi cr1, rRTN, 0
|
cmplwi cr1, rRTN, 0
|
||||||
CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
|
CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
|
||||||
|
@ -105,7 +123,17 @@ L(caligned):
|
||||||
cmplwi cr1, rCHR, 0
|
cmplwi cr1, rCHR, 0
|
||||||
clrrwi. rALIGN, rLEN, 5
|
clrrwi. rALIGN, rLEN, 5
|
||||||
mtcrf 0x01, rLEN /* 40th instruction from .align */
|
mtcrf 0x01, rLEN /* 40th instruction from .align */
|
||||||
beq cr1, L(zloopstart) /* special case for clearing memory using dcbz */
|
|
||||||
|
/* Check if we can use the special case for clearing memory using dcbz.
|
||||||
|
This requires that we know the correct cache line size for this
|
||||||
|
processor. Getting the __cache_line_size may require establishing GOT
|
||||||
|
addressability, so branch out of line to set this up. */
|
||||||
|
beq cr1, L(checklinesize)
|
||||||
|
|
||||||
|
/* Store blocks of 32-bytes (256-bits) starting on a 32-byte boundary.
|
||||||
|
Can't assume that rCHR is zero or that the cache line size is either
|
||||||
|
32-bytes or even known. */
|
||||||
|
L(nondcbz):
|
||||||
srwi rTMP, rALIGN, 5
|
srwi rTMP, rALIGN, 5
|
||||||
mtctr rTMP
|
mtctr rTMP
|
||||||
beq L(medium) /* we may not actually get to do a full line */
|
beq L(medium) /* we may not actually get to do a full line */
|
||||||
|
@ -114,7 +142,9 @@ L(caligned):
|
||||||
li rNEG64, -0x40
|
li rNEG64, -0x40
|
||||||
bdz L(cloopdone) /* 48th instruction from .align */
|
bdz L(cloopdone) /* 48th instruction from .align */
|
||||||
|
|
||||||
L(c3): dcbz rNEG64, rMEMP
|
/* We can't use dcbz here as we don't know the cache line size. We can
|
||||||
|
use "data cache block touch for store", which is safe. */
|
||||||
|
L(c3): dcbtst rNEG64, rMEMP
|
||||||
stw rCHR, -4(rMEMP)
|
stw rCHR, -4(rMEMP)
|
||||||
stw rCHR, -8(rMEMP)
|
stw rCHR, -8(rMEMP)
|
||||||
stw rCHR, -12(rMEMP)
|
stw rCHR, -12(rMEMP)
|
||||||
|
@ -142,7 +172,10 @@ L(cloopdone):
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
nop
|
nop
|
||||||
/* Clear lines of memory in 128-byte chunks. */
|
/* Clear cache lines of memory in 128-byte chunks.
|
||||||
|
This code is optimized for processors with 32-byte cache lines.
|
||||||
|
It is further optimized for the 601 processor, which requires
|
||||||
|
some care in how the code is aligned in the i-cache. */
|
||||||
L(zloopstart):
|
L(zloopstart):
|
||||||
clrlwi rLEN, rLEN, 27
|
clrlwi rLEN, rLEN, 27
|
||||||
mtcrf 0x02, rALIGN
|
mtcrf 0x02, rALIGN
|
||||||
|
@ -226,4 +259,80 @@ L(medium_28t):
|
||||||
stw rCHR, -4(rMEMP)
|
stw rCHR, -4(rMEMP)
|
||||||
stw rCHR, -8(rMEMP)
|
stw rCHR, -8(rMEMP)
|
||||||
blr
|
blr
|
||||||
|
|
||||||
|
L(checklinesize):
|
||||||
|
#ifdef SHARED
|
||||||
|
mflr rTMP
|
||||||
|
/* If the remaining length is less the 32 bytes then don't bother getting
|
||||||
|
the cache line size. */
|
||||||
|
beq L(medium)
|
||||||
|
/* Establishes GOT addressability so we can load __cache_line_size
|
||||||
|
from static. This value was set from the aux vector during startup. */
|
||||||
|
bl _GLOBAL_OFFSET_TABLE_@local-4
|
||||||
|
mflr rGOT
|
||||||
|
lwz rGOT,__cache_line_size@got(rGOT)
|
||||||
|
lwz rCLS,0(rGOT)
|
||||||
|
mtlr rTMP
|
||||||
|
#else
|
||||||
|
/* Load __cache_line_size from static. This value was set from the
|
||||||
|
aux vector during startup. */
|
||||||
|
lis rCLS,__cache_line_size@ha
|
||||||
|
/* If the remaining length is less the 32 bytes then don't bother getting
|
||||||
|
the cache line size. */
|
||||||
|
beq L(medium)
|
||||||
|
lwz rCLS,__cache_line_size@l(rCLS)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*If the cache line size was not set then goto to L(nondcbz), which is
|
||||||
|
safe for any cache line size. */
|
||||||
|
cmplwi cr1,rCLS,0
|
||||||
|
beq cr1,L(nondcbz)
|
||||||
|
|
||||||
|
/* If the cache line size is 32 bytes then goto to L(zloopstart),
|
||||||
|
which is coded specificly for 32-byte lines (and 601). */
|
||||||
|
cmplwi cr1,rCLS,32
|
||||||
|
beq cr1,L(zloopstart)
|
||||||
|
|
||||||
|
/* Now we know the cache line size and it is not 32-bytes. However
|
||||||
|
we may not yet be aligned to the cache line and may have a partial
|
||||||
|
line to fill. Touch it 1st to fetch the cache line. */
|
||||||
|
dcbtst 0,rMEMP
|
||||||
|
|
||||||
|
addi rCLM,rCLS,-1
|
||||||
|
L(getCacheAligned):
|
||||||
|
cmplwi cr1,rLEN,32
|
||||||
|
and. rTMP,rCLM,rMEMP
|
||||||
|
blt cr1,L(handletail32)
|
||||||
|
beq L(cacheAligned)
|
||||||
|
/* We are not aligned to start of a cache line yet. Store 32-byte
|
||||||
|
of data and test again. */
|
||||||
|
addi rMEMP,rMEMP,32
|
||||||
|
addi rLEN,rLEN,-32
|
||||||
|
stw rCHR,-32(rMEMP)
|
||||||
|
stw rCHR,-28(rMEMP)
|
||||||
|
stw rCHR,-24(rMEMP)
|
||||||
|
stw rCHR,-20(rMEMP)
|
||||||
|
stw rCHR,-16(rMEMP)
|
||||||
|
stw rCHR,-12(rMEMP)
|
||||||
|
stw rCHR,-8(rMEMP)
|
||||||
|
stw rCHR,-4(rMEMP)
|
||||||
|
b L(getCacheAligned)
|
||||||
|
|
||||||
|
/* Now we are aligned to the cache line and can use dcbz. */
|
||||||
|
L(cacheAligned):
|
||||||
|
cmplw cr1,rLEN,rCLS
|
||||||
|
blt cr1,L(handletail32)
|
||||||
|
dcbz 0,rMEMP
|
||||||
|
subf rLEN,rCLS,rLEN
|
||||||
|
add rMEMP,rMEMP,rCLS
|
||||||
|
b L(cacheAligned)
|
||||||
|
|
||||||
|
/* We are here because; the cache line size was set, it was not
|
||||||
|
32-bytes, and the remainder (rLEN) is now less than the actual cache
|
||||||
|
line size. Set up the preconditions for L(nondcbz) and go there to
|
||||||
|
store the remaining bytes. */
|
||||||
|
L(handletail32):
|
||||||
|
clrrwi. rALIGN, rLEN, 5
|
||||||
|
b L(nondcbz)
|
||||||
|
|
||||||
END (BP_SYM (memset))
|
END (BP_SYM (memset))
|
||||||
|
|
|
@ -20,6 +20,32 @@
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "kernel-features.h"
|
#include "kernel-features.h"
|
||||||
|
#include <ldsodefs.h>
|
||||||
|
|
||||||
|
extern int __cache_line_size;
|
||||||
|
weak_extern (__cache_line_size)
|
||||||
|
|
||||||
|
#define DL_PLATFORM_INIT __aux_init_cache(_dl_auxv)
|
||||||
|
|
||||||
|
/* Scan the Aux Vector for the "Data Cache Block Size" entry. If found
|
||||||
|
verify that the static extern __cache_line_size is defined by checking
|
||||||
|
for not NULL. If it is defined then assign the cache block size
|
||||||
|
value to __cache_line_size. */
|
||||||
|
static inline void
|
||||||
|
__aux_init_cache (ElfW(auxv_t) *av)
|
||||||
|
{
|
||||||
|
for (; av->a_type != AT_NULL; ++av)
|
||||||
|
switch (av->a_type)
|
||||||
|
{
|
||||||
|
case AT_DCACHEBSIZE:
|
||||||
|
{
|
||||||
|
int *cls = & __cache_line_size;
|
||||||
|
if (cls != NULL)
|
||||||
|
*cls = av->a_un.a_val;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef __ASSUME_STD_AUXV
|
#ifndef __ASSUME_STD_AUXV
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue