linux/arch/sh/mm/cache.c
Stuart Menefy 39ac11c160 sh: Improve performance of SH4 versions of copy/clear_user_highpage
The previous implementation of clear_user_highpage and copy_user_highpage
checked to see if there was a D-cache aliasing issue between the user
and kernel mappings of a page, but if there was they always did a
flush with writeback on the dirtied kernel alias.

However as we now have the ability to map a page into kernel space
with the same cache colour as the user mapping, there is no need to
write back this data.

Currently we also invalidate the kernel alias as a precaution, however
I'm not sure if this is actually required.

Also correct the definition of FIX_CMAP_END so that the mappings created
by kmap_coherent() are actually at the correct colour.

Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
2009-11-24 17:13:35 +09:00

383 lines
10 KiB
C

/*
* arch/sh/mm/cache.c
*
* Copyright (C) 1999, 2000, 2002 Niibe Yutaka
* Copyright (C) 2002 - 2009 Paul Mundt
*
* Released under the terms of the GNU GPL v2.0.
*/
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/fs.h>
#include <linux/smp.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
void (*local_flush_cache_all)(void *args) = cache_noop;
void (*local_flush_cache_mm)(void *args) = cache_noop;
void (*local_flush_cache_dup_mm)(void *args) = cache_noop;
void (*local_flush_cache_page)(void *args) = cache_noop;
void (*local_flush_cache_range)(void *args) = cache_noop;
void (*local_flush_dcache_page)(void *args) = cache_noop;
void (*local_flush_icache_range)(void *args) = cache_noop;
void (*local_flush_icache_page)(void *args) = cache_noop;
void (*local_flush_cache_sigtramp)(void *args) = cache_noop;
void (*__flush_wback_region)(void *start, int size);
EXPORT_SYMBOL(__flush_wback_region);
void (*__flush_purge_region)(void *start, int size);
EXPORT_SYMBOL(__flush_purge_region);
void (*__flush_invalidate_region)(void *start, int size);
EXPORT_SYMBOL(__flush_invalidate_region);
static inline void noop__flush_region(void *start, int size)
{
}
static inline void cacheop_on_each_cpu(void (*func) (void *info), void *info,
int wait)
{
preempt_disable();
smp_call_function(func, info, wait);
func(info);
preempt_enable();
}
/*
* copy_to_user_page
* @vma: vm_area_struct holding the pages
* @page: struct page
* @vaddr: user space address
* @dst: address of page in kernel space (possibly from kmap)
* @src: source address in kernel logical memory
* @len: length of data in bytes (may be less than PAGE_SIZE)
*
* Copy data into the address space of a process other than the current
* process (eg for ptrace).
*/
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long vaddr, void *dst, const void *src,
unsigned long len)
{
if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
!test_bit(PG_dcache_dirty, &page->flags)) {
void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
memcpy(vto, src, len);
kunmap_coherent(vto);
} else {
memcpy(dst, src, len);
if (boot_cpu_data.dcache.n_aliases)
set_bit(PG_dcache_dirty, &page->flags);
}
if (vma->vm_flags & VM_EXEC)
flush_cache_page(vma, vaddr, page_to_pfn(page));
}
void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long vaddr, void *dst, const void *src,
unsigned long len)
{
if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
!test_bit(PG_dcache_dirty, &page->flags)) {
void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
memcpy(dst, vfrom, len);
kunmap_coherent(vfrom);
} else {
memcpy(dst, src, len);
if (boot_cpu_data.dcache.n_aliases)
set_bit(PG_dcache_dirty, &page->flags);
}
}
/*
* copy_user_highpage
* @to: destination page
* @from: source page
* @vaddr: address of pages in user address space
* @vma: vm_area_struct holding the pages
*
* This is used in COW implementation to copy data from page @from to
* page @to. @from was previousl mapped at @vaddr, and @to will be.
* As this is used only in the COW implementation, this means that the
* source is unmodified, and so we don't have to worry about cache
* aliasing on that side.
*/
#ifdef CONFIG_HIGHMEM
/*
* If we ever have a real highmem system, this code will need fixing
* (as will clear_user/clear_user_highmem), because the kmap potentitally
* creates another alias risk.
*/
#error This code is broken with real HIGHMEM
#endif
void copy_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
{
void *vfrom, *vto;
vto = kmap_atomic(to, KM_USER1);
vfrom = kmap_atomic(from, KM_USER0);
if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
__flush_invalidate_region(vto, PAGE_SIZE);
if (boot_cpu_data.dcache.n_aliases && page_mapped(from) &&
!test_bit(PG_dcache_dirty, &from->flags)) {
void *vto_coloured = kmap_coherent(to, vaddr);
copy_page(vto_coloured, vfrom);
kunmap_coherent(vto_coloured);
} else
copy_page(vto, vfrom);
kunmap_atomic(vfrom, KM_USER0);
kunmap_atomic(vto, KM_USER1);
/* Make sure this page is cleared on other CPU's too before using it */
smp_wmb();
}
EXPORT_SYMBOL(copy_user_highpage);
void clear_user_highpage(struct page *page, unsigned long vaddr)
{
void *kaddr = kmap_atomic(page, KM_USER0);
if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK)) {
void *vto;
/* Kernel alias may have modified data in the cache. */
__flush_invalidate_region(kaddr, PAGE_SIZE);
vto = kmap_coherent(page, vaddr);
clear_page(vto);
kunmap_coherent(vto);
} else
clear_page(kaddr);
kunmap_atomic(kaddr, KM_USER0);
}
EXPORT_SYMBOL(clear_user_highpage);
void __update_cache(struct vm_area_struct *vma,
unsigned long address, pte_t pte)
{
struct page *page;
unsigned long pfn = pte_pfn(pte);
if (!boot_cpu_data.dcache.n_aliases)
return;
page = pfn_to_page(pfn);
if (pfn_valid(pfn)) {
int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags);
if (dirty) {
unsigned long addr = (unsigned long)page_address(page);
if (pages_do_alias(addr, address & PAGE_MASK))
__flush_purge_region((void *)addr, PAGE_SIZE);
}
}
}
void __flush_anon_page(struct page *page, unsigned long vmaddr)
{
unsigned long addr = (unsigned long) page_address(page);
if (pages_do_alias(addr, vmaddr)) {
if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
!test_bit(PG_dcache_dirty, &page->flags)) {
void *kaddr;
kaddr = kmap_coherent(page, vmaddr);
/* XXX.. For now kunmap_coherent() does a purge */
/* __flush_purge_region((void *)kaddr, PAGE_SIZE); */
kunmap_coherent(kaddr);
} else
__flush_purge_region((void *)addr, PAGE_SIZE);
}
}
void flush_cache_all(void)
{
cacheop_on_each_cpu(local_flush_cache_all, NULL, 1);
}
EXPORT_SYMBOL(flush_cache_all);
void flush_cache_mm(struct mm_struct *mm)
{
if (boot_cpu_data.dcache.n_aliases == 0)
return;
cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
}
void flush_cache_dup_mm(struct mm_struct *mm)
{
if (boot_cpu_data.dcache.n_aliases == 0)
return;
cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
}
void flush_cache_page(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn)
{
struct flusher_data data;
data.vma = vma;
data.addr1 = addr;
data.addr2 = pfn;
cacheop_on_each_cpu(local_flush_cache_page, (void *)&data, 1);
}
void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
struct flusher_data data;
data.vma = vma;
data.addr1 = start;
data.addr2 = end;
cacheop_on_each_cpu(local_flush_cache_range, (void *)&data, 1);
}
EXPORT_SYMBOL(flush_cache_range);
void flush_dcache_page(struct page *page)
{
cacheop_on_each_cpu(local_flush_dcache_page, page, 1);
}
EXPORT_SYMBOL(flush_dcache_page);
void flush_icache_range(unsigned long start, unsigned long end)
{
struct flusher_data data;
data.vma = NULL;
data.addr1 = start;
data.addr2 = end;
cacheop_on_each_cpu(local_flush_icache_range, (void *)&data, 1);
}
void flush_icache_page(struct vm_area_struct *vma, struct page *page)
{
/* Nothing uses the VMA, so just pass the struct page along */
cacheop_on_each_cpu(local_flush_icache_page, page, 1);
}
void flush_cache_sigtramp(unsigned long address)
{
cacheop_on_each_cpu(local_flush_cache_sigtramp, (void *)address, 1);
}
static void compute_alias(struct cache_info *c)
{
c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0;
}
static void __init emit_cache_params(void)
{
printk(KERN_NOTICE "I-cache : n_ways=%d n_sets=%d way_incr=%d\n",
boot_cpu_data.icache.ways,
boot_cpu_data.icache.sets,
boot_cpu_data.icache.way_incr);
printk(KERN_NOTICE "I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
boot_cpu_data.icache.entry_mask,
boot_cpu_data.icache.alias_mask,
boot_cpu_data.icache.n_aliases);
printk(KERN_NOTICE "D-cache : n_ways=%d n_sets=%d way_incr=%d\n",
boot_cpu_data.dcache.ways,
boot_cpu_data.dcache.sets,
boot_cpu_data.dcache.way_incr);
printk(KERN_NOTICE "D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
boot_cpu_data.dcache.entry_mask,
boot_cpu_data.dcache.alias_mask,
boot_cpu_data.dcache.n_aliases);
/*
* Emit Secondary Cache parameters if the CPU has a probed L2.
*/
if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) {
printk(KERN_NOTICE "S-cache : n_ways=%d n_sets=%d way_incr=%d\n",
boot_cpu_data.scache.ways,
boot_cpu_data.scache.sets,
boot_cpu_data.scache.way_incr);
printk(KERN_NOTICE "S-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
boot_cpu_data.scache.entry_mask,
boot_cpu_data.scache.alias_mask,
boot_cpu_data.scache.n_aliases);
}
}
void __init cpu_cache_init(void)
{
unsigned int cache_disabled = 0;
#ifdef CCR
cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE);
#endif
compute_alias(&boot_cpu_data.icache);
compute_alias(&boot_cpu_data.dcache);
compute_alias(&boot_cpu_data.scache);
__flush_wback_region = noop__flush_region;
__flush_purge_region = noop__flush_region;
__flush_invalidate_region = noop__flush_region;
/*
* No flushing is necessary in the disabled cache case so we can
* just keep the noop functions in local_flush_..() and __flush_..()
*/
if (unlikely(cache_disabled))
goto skip;
if (boot_cpu_data.family == CPU_FAMILY_SH2) {
extern void __weak sh2_cache_init(void);
sh2_cache_init();
}
if (boot_cpu_data.family == CPU_FAMILY_SH2A) {
extern void __weak sh2a_cache_init(void);
sh2a_cache_init();
}
if (boot_cpu_data.family == CPU_FAMILY_SH3) {
extern void __weak sh3_cache_init(void);
sh3_cache_init();
if ((boot_cpu_data.type == CPU_SH7705) &&
(boot_cpu_data.dcache.sets == 512)) {
extern void __weak sh7705_cache_init(void);
sh7705_cache_init();
}
}
if ((boot_cpu_data.family == CPU_FAMILY_SH4) ||
(boot_cpu_data.family == CPU_FAMILY_SH4A) ||
(boot_cpu_data.family == CPU_FAMILY_SH4AL_DSP)) {
extern void __weak sh4_cache_init(void);
sh4_cache_init();
}
if (boot_cpu_data.family == CPU_FAMILY_SH5) {
extern void __weak sh5_cache_init(void);
sh5_cache_init();
}
skip:
emit_cache_params();
}