ea7145477a
Put x86 entry code into a separate link section: .entry.text. Separating the entry text section seems to have performance benefits - caused by more efficient instruction cache usage. Running hackbench with perf stat --repeat showed that the change compresses the icache footprint. The icache load miss rate went down by about 15%: before patch: 19417627 L1-icache-load-misses ( +- 0.147% ) after patch: 16490788 L1-icache-load-misses ( +- 0.180% ) The motivation of the patch was to fix a particular kprobes bug that relates to the entry text section, the performance advantage was discovered accidentally. Whole perf output follows: - results for current tip tree: Performance counter stats for './hackbench/hackbench 10' (500 runs): 19417627 L1-icache-load-misses ( +- 0.147% ) 2676914223 instructions # 0.497 IPC ( +- 0.079% ) 5389516026 cycles ( +- 0.144% ) 0.206267711 seconds time elapsed ( +- 0.138% ) - results for current tip tree with the patch applied: Performance counter stats for './hackbench/hackbench 10' (500 runs): 16490788 L1-icache-load-misses ( +- 0.180% ) 2717734941 instructions # 0.502 IPC ( +- 0.079% ) 5414756975 cycles ( +- 0.148% ) 0.206747566 seconds time elapsed ( +- 0.137% ) Signed-off-by: Jiri Olsa <jolsa@redhat.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: masami.hiramatsu.pt@hitachi.com Cc: ananth@in.ibm.com Cc: davem@davemloft.net Cc: 2nddept-manager@sdl.hitachi.co.jp LKML-Reference: <20110307181039.GB15197@jolsa.redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> |
||
---|---|---|
.. | ||
bitops | ||
4level-fixup.h | ||
atomic64.h | ||
atomic-long.h | ||
atomic.h | ||
audit_change_attr.h | ||
audit_dir_write.h | ||
audit_read.h | ||
audit_signal.h | ||
audit_write.h | ||
auxvec.h | ||
bitops.h | ||
bitsperlong.h | ||
bug.h | ||
bugs.h | ||
cache.h | ||
cacheflush.h | ||
checksum.h | ||
cmpxchg-local.h | ||
cmpxchg.h | ||
cputime.h | ||
current.h | ||
delay.h | ||
device.h | ||
div64.h | ||
dma-coherent.h | ||
dma-mapping-broken.h | ||
dma-mapping-common.h | ||
dma.h | ||
emergency-restart.h | ||
errno-base.h | ||
errno.h | ||
fb.h | ||
fcntl.h | ||
futex.h | ||
getorder.h | ||
gpio.h | ||
hardirq.h | ||
hw_irq.h | ||
ide_iops.h | ||
int-l64.h | ||
int-ll64.h | ||
io.h | ||
ioctl.h | ||
ioctls.h | ||
iomap.h | ||
ipcbuf.h | ||
irq_regs.h | ||
irq.h | ||
irqflags.h | ||
Kbuild | ||
Kbuild.asm | ||
kdebug.h | ||
kmap_types.h | ||
libata-portmap.h | ||
linkage.h | ||
local64.h | ||
local.h | ||
memory_model.h | ||
mm_hooks.h | ||
mman-common.h | ||
mman.h | ||
mmu_context.h | ||
mmu.h | ||
module.h | ||
msgbuf.h | ||
mutex-dec.h | ||
mutex-null.h | ||
mutex-xchg.h | ||
mutex.h | ||
page.h | ||
param.h | ||
parport.h | ||
pci-dma-compat.h | ||
pci.h | ||
percpu.h | ||
pgalloc.h | ||
pgtable-nopmd.h | ||
pgtable-nopud.h | ||
pgtable.h | ||
poll.h | ||
posix_types.h | ||
resource.h | ||
rtc.h | ||
scatterlist.h | ||
sections.h | ||
segment.h | ||
sembuf.h | ||
serial.h | ||
setup.h | ||
shmbuf.h | ||
shmparam.h | ||
siginfo.h | ||
signal-defs.h | ||
signal.h | ||
socket.h | ||
sockios.h | ||
spinlock.h | ||
stat.h | ||
statfs.h | ||
string.h | ||
swab.h | ||
syscall.h | ||
syscalls.h | ||
system.h | ||
termbits.h | ||
termios-base.h | ||
termios.h | ||
timex.h | ||
tlb.h | ||
tlbflush.h | ||
topology.h | ||
types.h | ||
uaccess-unaligned.h | ||
uaccess.h | ||
ucontext.h | ||
unaligned.h | ||
unistd.h | ||
user.h | ||
vga.h | ||
vmlinux.lds.h | ||
xor.h |