2002-08-22 Steven Munroe <sjmunroe@us.ibm.com>

* sysdeps/powerpc/elf/libc-start.c
	(__cache_line_size): Declare.
	(__aux_init_cache): New.
	(__libc_start_main): Change type of `auxvec' parameter to
	`ElfW(auxv_t) *'.  Correct walking of aux vector.  Call
	__aux_init_cache.
	* sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
	(__cache_line_size): Declare.
	(__aux_init_cache): New.
	(DL_PLATFORM_INIT): Define.
	* sysdeps/powerpc/memset.S: Define __cache_line_size and use its
	value to select the correct stride for dcbz.
2002-08-22  Steven Munroe  <sjmunroe@us.ibm.com>

	* sysdeps/powerpc/elf/libc-start.c 
	(__cache_line_size): Declare.
	(__aux_init_cache): New.
	(__libc_start_main): Change type of `auxvec' parameter to
	`ElfW(auxv_t) *'.  Correct walking of aux vector.  Call
	__aux_init_cache.
	* sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c 
	(__cache_line_size): Declare.
	(__aux_init_cache): New.
	(DL_PLATFORM_INIT): Define.
	* sysdeps/powerpc/memset.S: Define __cache_line_size and use its
	value to select the correct stride for dcbz.
This commit is contained in:
Geoff Keating 2002-08-22 19:07:46 +00:00
parent 7a14a672b9
commit b8a5737a49
4 changed files with 192 additions and 11 deletions

View File

@ -1,3 +1,18 @@
2002-08-22 Steven Munroe <sjmunroe@us.ibm.com>
* sysdeps/powerpc/elf/libc-start.c
(__cache_line_size): Declare.
(__aux_init_cache): New.
(__libc_start_main): Change type of `auxvec' parameter to
`ElfW(auxv_t) *'. Correct walking of aux vector. Call
__aux_init_cache.
* sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
(__cache_line_size): Declare.
(__aux_init_cache): New.
(DL_PLATFORM_INIT): Define.
* sysdeps/powerpc/memset.S: Define __cache_line_size and use its
value to select the correct stride for dcbz.
2002-08-22 Andreas Jaeger <aj@suse.de>
* sysdeps/unix/sysv/linux/x86_64/syscalls.list: Fix arguments of

View File

@ -26,6 +26,10 @@ extern void __libc_init_first (int argc, char **argv, char **envp);
extern int _dl_starting_up;
weak_extern (_dl_starting_up)
extern int __cache_line_size;
weak_extern (__cache_line_size)
extern int __libc_multiple_libcs;
extern void *__libc_stack_end;
@ -37,12 +41,33 @@ struct startup_info
void (*fini) (void);
};
/* Scan the Aux Vector for the "Data Cache Block Size" entry. If found
verify that the static extern __cache_line_size is defined by checking
for not NULL. If it is defined then assign the cache block size
value to __cache_line_size. */
static inline void
__aux_init_cache (ElfW(auxv_t) *av)
{
for (; av->a_type != AT_NULL; ++av)
switch (av->a_type)
{
case AT_DCACHEBSIZE:
{
int *cls = & __cache_line_size;
if (cls != NULL)
*cls = av->a_un.a_val;
}
break;
}
}
int
/* GKM FIXME: GCC: this should get __BP_ prefix by virtue of the
BPs in the arglist of startup_info.main and startup_info.init. */
BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
char *__unbounded *__unbounded ubp_ev,
void *__unbounded auxvec, void (*rtld_fini) (void),
ElfW(auxv_t) *__unbounded auxvec, void (*rtld_fini) (void),
struct startup_info *__unbounded stinfo,
char *__unbounded *__unbounded stack_on_entry)
{
@ -66,6 +91,7 @@ BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
as a statically-linked program by Linux... */
if (*stack_on_entry != NULL)
{
char *__unbounded *__unbounded temp;
/* ...in which case, we have argc as the top thing on the
stack, followed by argv (NULL-terminated), envp (likewise),
and the auxilary vector. */
@ -73,10 +99,12 @@ BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
ubp_av = stack_on_entry + 1;
ubp_ev = ubp_av + argc + 1;
#ifdef HAVE_AUX_VECTOR
auxvec = ubp_ev;
while (*(char *__unbounded *__unbounded) auxvec != NULL)
++auxvec;
++auxvec;
temp = ubp_ev;
while (*temp != NULL)
++temp;
auxvec = (ElfW(auxv_t) *)++temp;
# ifndef SHARED
_dl_aux_init ((ElfW(auxv_t) *) auxvec);
# endif
@ -85,6 +113,9 @@ BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
}
INIT_ARGV_and_ENVIRON;
/* Initialize the __cache_line_size variable from the aux vector. */
__aux_init_cache((ElfW(auxv_t) *) auxvec);
/* Store something that has some relationship to the end of the
stack, for backtraces. This variable should be thread-specific. */

View File

@ -21,12 +21,26 @@
#include <bp-sym.h>
#include <bp-asm.h>
/* Define a global static that can hold the cache line size. The
assumption is that startup code will access the "aux vector" to
to obtain the value set by the kernel and store it into this
variable. */
.globl __cache_line_size
.section ".data","aw"
.align 2
.type __cache_line_size,@object
.size __cache_line_size,4
__cache_line_size:
.long 0
.section ".text"
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
Returns 's'.
The memset is done in three sizes: byte (8 bits), word (32 bits),
cache line (256 bits). There is a special case for setting cache lines
to 0, to take advantage of the dcbz instruction. */
The memset is done in four sizes: byte (8 bits), word (32 bits),
32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
There is a special case for setting whole cache lines to 0, which
takes advantage of the dcbz instruction. */
EALIGN (BP_SYM (memset), 5, 1)
@ -50,6 +64,10 @@ EALIGN (BP_SYM (memset), 5, 1)
#define rNEG64 r8 /* constant -64 for clearing with dcbz */
#define rNEG32 r9 /* constant -32 for clearing with dcbz */
#define rGOT r9 /* Address of the Global Offset Table. */
#define rCLS r8 /* Cache line size obtained from static. */
#define rCLM r9 /* Cache line size mask to check for cache alignment. */
#if __BOUNDED_POINTERS__
cmplwi cr1, rRTN, 0
CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
@ -105,7 +123,17 @@ L(caligned):
cmplwi cr1, rCHR, 0
clrrwi. rALIGN, rLEN, 5
mtcrf 0x01, rLEN /* 40th instruction from .align */
beq cr1, L(zloopstart) /* special case for clearing memory using dcbz */
/* Check if we can use the special case for clearing memory using dcbz.
This requires that we know the correct cache line size for this
processor. Getting the __cache_line_size may require establishing GOT
addressability, so branch out of line to set this up. */
beq cr1, L(checklinesize)
/* Store blocks of 32-bytes (256-bits) starting on a 32-byte boundary.
Can't assume that rCHR is zero or that the cache line size is either
32-bytes or even known. */
L(nondcbz):
srwi rTMP, rALIGN, 5
mtctr rTMP
beq L(medium) /* we may not actually get to do a full line */
@ -114,7 +142,9 @@ L(caligned):
li rNEG64, -0x40
bdz L(cloopdone) /* 48th instruction from .align */
L(c3): dcbz rNEG64, rMEMP
/* We can't use dcbz here as we don't know the cache line size. We can
use "data cache block touch for store", which is safe. */
L(c3): dcbtst rNEG64, rMEMP
stw rCHR, -4(rMEMP)
stw rCHR, -8(rMEMP)
stw rCHR, -12(rMEMP)
@ -142,7 +172,10 @@ L(cloopdone):
.align 5
nop
/* Clear lines of memory in 128-byte chunks. */
/* Clear cache lines of memory in 128-byte chunks.
This code is optimized for processors with 32-byte cache lines.
It is further optimized for the 601 processor, which requires
some care in how the code is aligned in the i-cache. */
L(zloopstart):
clrlwi rLEN, rLEN, 27
mtcrf 0x02, rALIGN
@ -226,4 +259,80 @@ L(medium_28t):
stw rCHR, -4(rMEMP)
stw rCHR, -8(rMEMP)
blr
L(checklinesize):
#ifdef SHARED
mflr rTMP
/* If the remaining length is less the 32 bytes then don't bother getting
the cache line size. */
beq L(medium)
/* Establishes GOT addressability so we can load __cache_line_size
from static. This value was set from the aux vector during startup. */
bl _GLOBAL_OFFSET_TABLE_@local-4
mflr rGOT
lwz rGOT,__cache_line_size@got(rGOT)
lwz rCLS,0(rGOT)
mtlr rTMP
#else
/* Load __cache_line_size from static. This value was set from the
aux vector during startup. */
lis rCLS,__cache_line_size@ha
/* If the remaining length is less the 32 bytes then don't bother getting
the cache line size. */
beq L(medium)
lwz rCLS,__cache_line_size@l(rCLS)
#endif
/*If the cache line size was not set then goto to L(nondcbz), which is
safe for any cache line size. */
cmplwi cr1,rCLS,0
beq cr1,L(nondcbz)
/* If the cache line size is 32 bytes then goto to L(zloopstart),
which is coded specificly for 32-byte lines (and 601). */
cmplwi cr1,rCLS,32
beq cr1,L(zloopstart)
/* Now we know the cache line size and it is not 32-bytes. However
we may not yet be aligned to the cache line and may have a partial
line to fill. Touch it 1st to fetch the cache line. */
dcbtst 0,rMEMP
addi rCLM,rCLS,-1
L(getCacheAligned):
cmplwi cr1,rLEN,32
and. rTMP,rCLM,rMEMP
blt cr1,L(handletail32)
beq L(cacheAligned)
/* We are not aligned to start of a cache line yet. Store 32-byte
of data and test again. */
addi rMEMP,rMEMP,32
addi rLEN,rLEN,-32
stw rCHR,-32(rMEMP)
stw rCHR,-28(rMEMP)
stw rCHR,-24(rMEMP)
stw rCHR,-20(rMEMP)
stw rCHR,-16(rMEMP)
stw rCHR,-12(rMEMP)
stw rCHR,-8(rMEMP)
stw rCHR,-4(rMEMP)
b L(getCacheAligned)
/* Now we are aligned to the cache line and can use dcbz. */
L(cacheAligned):
cmplw cr1,rLEN,rCLS
blt cr1,L(handletail32)
dcbz 0,rMEMP
subf rLEN,rCLS,rLEN
add rMEMP,rMEMP,rCLS
b L(cacheAligned)
/* We are here because; the cache line size was set, it was not
32-bytes, and the remainder (rLEN) is now less than the actual cache
line size. Set up the preconditions for L(nondcbz) and go there to
store the remaining bytes. */
L(handletail32):
clrrwi. rALIGN, rLEN, 5
b L(nondcbz)
END (BP_SYM (memset))

View File

@ -20,6 +20,32 @@
#include "config.h"
#include "kernel-features.h"
#include <ldsodefs.h>
extern int __cache_line_size;
weak_extern (__cache_line_size)
#define DL_PLATFORM_INIT __aux_init_cache(_dl_auxv)
/* Scan the Aux Vector for the "Data Cache Block Size" entry. If found
verify that the static extern __cache_line_size is defined by checking
for not NULL. If it is defined then assign the cache block size
value to __cache_line_size. */
static inline void
__aux_init_cache (ElfW(auxv_t) *av)
{
for (; av->a_type != AT_NULL; ++av)
switch (av->a_type)
{
case AT_DCACHEBSIZE:
{
int *cls = & __cache_line_size;
if (cls != NULL)
*cls = av->a_un.a_val;
}
break;
}
}
#ifndef __ASSUME_STD_AUXV