aarch64: Disable using DC ZVA in emag memset
* sysdeps/aarch64/multiarch/memset_base64.S (DC_ZVA_THRESHOLD): Disable DC ZVA code if this macro is defined as zero. * sysdeps/aarch64/multiarch/memset_emag.S (DC_ZVA_THRESHOLD): Change to zero to disable using DC ZVA.
This commit is contained in:
parent
c3ce62cc0b
commit
b68fabfbbc
@ -1,3 +1,10 @@
|
||||
2019-08-14 Feng Xue <fxue@os.amperecomputing.com>
|
||||
|
||||
* sysdeps/aarch64/multiarch/memset_base64.S (DC_ZVA_THRESHOLD):
|
||||
Disable DC ZVA code if this macro is defined as zero.
|
||||
* sysdeps/aarch64/multiarch/memset_emag.S (DC_ZVA_THRESHOLD):
|
||||
Change to zero to disable using DC ZVA.
|
||||
|
||||
2019-08-13 Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
* bits/libc-header-start.h (__GLIBC_USE_IEC_60559_FUNCS_EXT):
|
||||
|
@ -23,6 +23,7 @@
|
||||
# define MEMSET __memset_base64
|
||||
#endif
|
||||
|
||||
/* To disable DC ZVA, set this threshold to 0. */
|
||||
#ifndef DC_ZVA_THRESHOLD
|
||||
# define DC_ZVA_THRESHOLD 512
|
||||
#endif
|
||||
@ -91,11 +92,12 @@ L(set96):
|
||||
.p2align 4
|
||||
L(set_long):
|
||||
stp val, val, [dstin]
|
||||
bic dst, dstin, 15
|
||||
#if DC_ZVA_THRESHOLD
|
||||
cmp count, DC_ZVA_THRESHOLD
|
||||
ccmp val, 0, 0, cs
|
||||
bic dst, dstin, 15
|
||||
b.eq L(zva_64)
|
||||
|
||||
#endif
|
||||
/* Small-size or non-zero memset does not use DC ZVA. */
|
||||
sub count, dstend, dst
|
||||
|
||||
@ -105,7 +107,11 @@ L(set_long):
|
||||
* count is less than 33 bytes, so as to bypass 2 unneccesary stps.
|
||||
*/
|
||||
sub count, count, 64+16+1
|
||||
|
||||
#if DC_ZVA_THRESHOLD
|
||||
/* Align loop on 16-byte boundary, this might be friendly to i-cache. */
|
||||
nop
|
||||
#endif
|
||||
|
||||
1: stp val, val, [dst, 16]
|
||||
stp val, val, [dst, 32]
|
||||
@ -121,6 +127,7 @@ L(set_long):
|
||||
stp val, val, [dstend, -16]
|
||||
ret
|
||||
|
||||
#if DC_ZVA_THRESHOLD
|
||||
.p2align 3
|
||||
L(zva_64):
|
||||
stp val, val, [dst, 16]
|
||||
@ -173,6 +180,7 @@ L(zva_64):
|
||||
1: stp val, val, [dstend, -32]
|
||||
stp val, val, [dstend, -16]
|
||||
ret
|
||||
#endif
|
||||
|
||||
END (MEMSET)
|
||||
libc_hidden_builtin_def (MEMSET)
|
||||
|
@ -21,12 +21,14 @@
|
||||
# define MEMSET __memset_emag
|
||||
|
||||
/*
|
||||
* Using dc zva to zero memory does not produce better performance if
|
||||
* Using DC ZVA to zero memory does not produce better performance if
|
||||
* memory size is not very large, especially when there are multiple
|
||||
* processes/threads contending memory/cache. Here we use a somewhat
|
||||
* large threshold to trigger usage of dc zva.
|
||||
*/
|
||||
# define DC_ZVA_THRESHOLD 1024
|
||||
* processes/threads contending memory/cache. Here we set threshold to
|
||||
* zero to disable using DC ZVA, which is good for multi-process/thread
|
||||
* workloads.
|
||||
*/
|
||||
|
||||
# define DC_ZVA_THRESHOLD 0
|
||||
|
||||
# include "./memset_base64.S"
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user