Merge branch 'slub/lockless' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6

* 'slub/lockless' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6: (21 commits)
  slub: When allocating a new slab also prep the first object
  slub: disable interrupts in cmpxchg_double_slab when falling back to pagelock
  Avoid duplicate _count variables in page_struct
  Revert "SLUB: Fix build breakage in linux/mm_types.h"
  SLUB: Fix build breakage in linux/mm_types.h
  slub: slabinfo update for cmpxchg handling
  slub: Not necessary to check for empty slab on load_freelist
  slub: fast release on full slab
  slub: Add statistics for the case that the current slab does not match the node
  slub: Get rid of the another_slab label
  slub: Avoid disabling interrupts in free slowpath
  slub: Disable interrupts in free_debug processing
  slub: Invert locking and avoid slab lock
  slub: Rework allocator fastpaths
  slub: Pass kmem_cache struct to lock and freeze slab
  slub: explicit list_lock taking
  slub: Add cmpxchg_double_slab()
  mm: Rearrange struct page
  slub: Move page->frozen handling near where the page->freelist handling occurs
  slub: Do not use frozen page flag but a bit in the page counters
  ...
This commit is contained in:
Linus Torvalds 2011-07-30 08:21:48 -10:00
commit c11abbbaa3
5 changed files with 623 additions and 311 deletions

View File

@ -30,23 +30,61 @@ struct address_space;
* moment. Note that we have no way to track which tasks are using
* a page, though if it is a pagecache page, rmap structures can tell us
* who is mapping it.
*
* The objects in struct page are organized in double word blocks in
* order to allows us to use atomic double word operations on portions
* of struct page. That is currently only used by slub but the arrangement
* allows the use of atomic double word operations on the flags/mapping
* and lru list pointers also.
*/
struct page {
/* First double word block */
unsigned long flags; /* Atomic flags, some possibly
* updated asynchronously */
atomic_t _count; /* Usage count, see below. */
union {
atomic_t _mapcount; /* Count of ptes mapped in mms,
* to show when page is mapped
* & limit reverse map searches.
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
* memory, low bit is set, and
* it points to anon_vma object:
* see PAGE_MAPPING_ANON below.
*/
struct { /* SLUB */
u16 inuse;
u16 objects;
/* Second double word */
struct {
union {
pgoff_t index; /* Our offset within mapping. */
void *freelist; /* slub first free object */
};
union {
/* Used for cmpxchg_double in slub */
unsigned long counters;
struct {
union {
atomic_t _mapcount; /* Count of ptes mapped in mms,
* to show when page is mapped
* & limit reverse map searches.
*/
struct {
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
};
atomic_t _count; /* Usage count, see below. */
};
};
};
/* Third double word block */
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
/* Remainder is not double word aligned */
union {
struct {
unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
@ -54,27 +92,13 @@ struct page {
* indicates order in the buddy
* system if PG_buddy is set.
*/
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
* memory, low bit is set, and
* it points to anon_vma object:
* see PAGE_MAPPING_ANON below.
*/
};
#if USE_SPLIT_PTLOCKS
spinlock_t ptl;
spinlock_t ptl;
#endif
struct kmem_cache *slab; /* SLUB: Pointer to slab */
struct page *first_page; /* Compound tail pages */
struct kmem_cache *slab; /* SLUB: Pointer to slab */
struct page *first_page; /* Compound tail pages */
};
union {
pgoff_t index; /* Our offset within mapping. */
void *freelist; /* SLUB: freelist req. slab lock */
};
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
/*
* On machines where all RAM is mapped into kernel address space,
* we can simply calculate the virtual address. On machines with
@ -100,7 +124,16 @@ struct page {
*/
void *shadow;
#endif
};
}
/*
* If another subsystem starts using the double word pairing for atomic
* operations on struct page then it must change the #if to ensure
* proper alignment of the page struct.
*/
#if defined(CONFIG_SLUB) && defined(CONFIG_CMPXCHG_LOCAL)
__attribute__((__aligned__(2*sizeof(unsigned long))))
#endif
;
typedef unsigned long __nocast vm_flags_t;

View File

@ -124,9 +124,6 @@ enum pageflags {
/* SLOB */
PG_slob_free = PG_private,
/* SLUB */
PG_slub_frozen = PG_active,
};
#ifndef __GENERATING_BOUNDS_H
@ -212,8 +209,6 @@ PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
__PAGEFLAG(SlobFree, slob_free)
__PAGEFLAG(SlubFrozen, slub_frozen)
/*
* Private page markings that may be used by the filesystem that owns the page
* for its own purposes.

View File

@ -24,6 +24,7 @@ enum stat_item {
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
FREE_SLAB, /* Slab freed to the page allocator */
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
@ -31,8 +32,10 @@ enum stat_item {
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
DEACTIVATE_BYPASS, /* Implicit deactivation */
ORDER_FALLBACK, /* Number of times fallback was necessary */
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
NR_SLUB_STAT_ITEMS };
struct kmem_cache_cpu {

780
mm/slub.c

File diff suppressed because it is too large Load Diff

View File

@ -2,8 +2,9 @@
* Slabinfo: Tool to get reports about slabs
*
* (C) 2007 sgi, Christoph Lameter
* (C) 2011 Linux Foundation, Christoph Lameter
*
* Compile by:
* Compile with:
*
* gcc -o slabinfo slabinfo.c
*/
@ -39,6 +40,8 @@ struct slabinfo {
unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
unsigned long deactivate_to_head, deactivate_to_tail;
unsigned long deactivate_remote_frees, order_fallback;
unsigned long cmpxchg_double_cpu_fail, cmpxchg_double_fail;
unsigned long alloc_node_mismatch, deactivate_bypass;
int numa[MAX_NODES];
int numa_partial[MAX_NODES];
} slabinfo[MAX_SLABS];
@ -99,7 +102,7 @@ static void fatal(const char *x, ...)
static void usage(void)
{
printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n"
printf("slabinfo 4/15/2011. (c) 2007 sgi/(c) 2011 Linux Foundation.\n\n"
"slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
"-a|--aliases Show aliases\n"
"-A|--activity Most active slabs first\n"
@ -293,7 +296,7 @@ int line = 0;
static void first_line(void)
{
if (show_activity)
printf("Name Objects Alloc Free %%Fast Fallb O\n");
printf("Name Objects Alloc Free %%Fast Fallb O CmpX UL\n");
else
printf("Name Objects Objsize Space "
"Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
@ -379,14 +382,14 @@ static void show_tracking(struct slabinfo *s)
printf("\n%s: Kernel object allocation\n", s->name);
printf("-----------------------------------------------------------------------\n");
if (read_slab_obj(s, "alloc_calls"))
printf(buffer);
printf("%s", buffer);
else
printf("No Data\n");
printf("\n%s: Kernel object freeing\n", s->name);
printf("------------------------------------------------------------------------\n");
if (read_slab_obj(s, "free_calls"))
printf(buffer);
printf("%s", buffer);
else
printf("No Data\n");
@ -400,7 +403,7 @@ static void ops(struct slabinfo *s)
if (read_slab_obj(s, "ops")) {
printf("\n%s: kmem_cache operations\n", s->name);
printf("--------------------------------------------\n");
printf(buffer);
printf("%s", buffer);
} else
printf("\n%s has no kmem_cache operations\n", s->name);
}
@ -462,19 +465,32 @@ static void slab_stats(struct slabinfo *s)
if (s->cpuslab_flush)
printf("Flushes %8lu\n", s->cpuslab_flush);
if (s->alloc_refill)
printf("Refill %8lu\n", s->alloc_refill);
total = s->deactivate_full + s->deactivate_empty +
s->deactivate_to_head + s->deactivate_to_tail;
s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass;
if (total)
printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) "
"ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n",
s->deactivate_full, (s->deactivate_full * 100) / total,
s->deactivate_empty, (s->deactivate_empty * 100) / total,
s->deactivate_to_head, (s->deactivate_to_head * 100) / total,
if (total) {
printf("\nSlab Deactivation Ocurrences %%\n");
printf("-------------------------------------------------\n");
printf("Slab full %7lu %3lu%%\n",
s->deactivate_full, (s->deactivate_full * 100) / total);
printf("Slab empty %7lu %3lu%%\n",
s->deactivate_empty, (s->deactivate_empty * 100) / total);
printf("Moved to head of partial list %7lu %3lu%%\n",
s->deactivate_to_head, (s->deactivate_to_head * 100) / total);
printf("Moved to tail of partial list %7lu %3lu%%\n",
s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
printf("Deactivation bypass %7lu %3lu%%\n",
s->deactivate_bypass, (s->deactivate_bypass * 100) / total);
printf("Refilled from foreign frees %7lu %3lu%%\n",
s->alloc_refill, (s->alloc_refill * 100) / total);
printf("Node mismatch %7lu %3lu%%\n",
s->alloc_node_mismatch, (s->alloc_node_mismatch * 100) / total);
}
if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail)
printf("\nCmpxchg_double Looping\n------------------------\n");
printf("Locked Cmpxchg Double redos %lu\nUnlocked Cmpxchg Double redos %lu\n",
s->cmpxchg_double_fail, s->cmpxchg_double_cpu_fail);
}
static void report(struct slabinfo *s)
@ -573,12 +589,13 @@ static void slabcache(struct slabinfo *s)
total_alloc = s->alloc_fastpath + s->alloc_slowpath;
total_free = s->free_fastpath + s->free_slowpath;
printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d\n",
printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d %4ld %4ld\n",
s->name, s->objects,
total_alloc, total_free,
total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
total_free ? (s->free_fastpath * 100 / total_free) : 0,
s->order_fallback, s->order);
s->order_fallback, s->order, s->cmpxchg_double_fail,
s->cmpxchg_double_cpu_fail);
}
else
printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
@ -1190,6 +1207,10 @@ static void read_slab_dir(void)
slab->deactivate_to_tail = get_obj("deactivate_to_tail");
slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
slab->order_fallback = get_obj("order_fallback");
slab->cmpxchg_double_cpu_fail = get_obj("cmpxchg_double_cpu_fail");
slab->cmpxchg_double_fail = get_obj("cmpxchg_double_fail");
slab->alloc_node_mismatch = get_obj("alloc_node_mismatch");
slab->deactivate_bypass = get_obj("deactivate_bypass");
chdir("..");
if (slab->name[0] == ':')
alias_targets++;