300 lines
6.8 KiB
Plaintext
300 lines
6.8 KiB
Plaintext
|
// Copyright 2009 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
// Malloc profiling.
|
||
|
// Patterned after tcmalloc's algorithms; shorter code.
|
||
|
|
||
|
package runtime
|
||
|
#include "runtime.h"
|
||
|
#include "malloc.h"
|
||
|
#include "defs.h"
|
||
|
#include "go-type.h"
|
||
|
|
||
|
typedef struct __go_open_array Slice;
|
||
|
|
||
|
// NOTE(rsc): Everything here could use cas if contention became an issue.
|
||
|
static Lock proflock = LOCK_INITIALIZER;
|
||
|
|
||
|
// Per-call-stack allocation information.
|
||
|
// Lookup by hashing call stack into a linked-list hash table.
|
||
|
typedef struct Bucket Bucket;
|
||
|
struct Bucket
|
||
|
{
|
||
|
Bucket *next; // next in hash list
|
||
|
Bucket *allnext; // next in list of all buckets
|
||
|
uintptr allocs;
|
||
|
uintptr frees;
|
||
|
uintptr alloc_bytes;
|
||
|
uintptr free_bytes;
|
||
|
uintptr hash;
|
||
|
uintptr nstk;
|
||
|
uintptr stk[1];
|
||
|
};
|
||
|
enum {
|
||
|
BuckHashSize = 179999,
|
||
|
};
|
||
|
static Bucket **buckhash;
|
||
|
static Bucket *buckets;
|
||
|
static uintptr bucketmem;
|
||
|
|
||
|
// Return the bucket for stk[0:nstk], allocating new bucket if needed.
|
||
|
static Bucket*
|
||
|
stkbucket(uintptr *stk, int32 nstk)
|
||
|
{
|
||
|
int32 i;
|
||
|
uintptr h;
|
||
|
Bucket *b;
|
||
|
|
||
|
if(buckhash == nil) {
|
||
|
buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
|
||
|
mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
|
||
|
}
|
||
|
|
||
|
// Hash stack.
|
||
|
h = 0;
|
||
|
for(i=0; i<nstk; i++) {
|
||
|
h += stk[i];
|
||
|
h += h<<10;
|
||
|
h ^= h>>6;
|
||
|
}
|
||
|
h += h<<3;
|
||
|
h ^= h>>11;
|
||
|
|
||
|
i = h%BuckHashSize;
|
||
|
for(b = buckhash[i]; b; b=b->next)
|
||
|
if(b->hash == h && b->nstk == (uintptr)nstk &&
|
||
|
runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
|
||
|
return b;
|
||
|
|
||
|
b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1);
|
||
|
bucketmem += sizeof *b + nstk*sizeof stk[0];
|
||
|
runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
|
||
|
b->hash = h;
|
||
|
b->nstk = nstk;
|
||
|
b->next = buckhash[i];
|
||
|
buckhash[i] = b;
|
||
|
b->allnext = buckets;
|
||
|
buckets = b;
|
||
|
return b;
|
||
|
}
|
||
|
|
||
|
// Map from pointer to Bucket* that allocated it.
|
||
|
// Three levels:
|
||
|
// Linked-list hash table for top N-20 bits.
|
||
|
// Array index for next 13 bits.
|
||
|
// Linked list for next 7 bits.
|
||
|
// This is more efficient than using a general map,
|
||
|
// because of the typical clustering of the pointer keys.
|
||
|
|
||
|
typedef struct AddrHash AddrHash;
|
||
|
typedef struct AddrEntry AddrEntry;
|
||
|
|
||
|
struct AddrHash
|
||
|
{
|
||
|
AddrHash *next; // next in top-level hash table linked list
|
||
|
uintptr addr; // addr>>20
|
||
|
AddrEntry *dense[1<<13];
|
||
|
};
|
||
|
|
||
|
struct AddrEntry
|
||
|
{
|
||
|
AddrEntry *next; // next in bottom-level linked list
|
||
|
uint32 addr;
|
||
|
Bucket *b;
|
||
|
};
|
||
|
|
||
|
enum {
|
||
|
AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space
|
||
|
};
|
||
|
static AddrHash *addrhash[1<<AddrHashBits];
|
||
|
static AddrEntry *addrfree;
|
||
|
static uintptr addrmem;
|
||
|
|
||
|
// Multiplicative hash function:
|
||
|
// hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
|
||
|
// This is a good multiplier as suggested in CLR, Knuth. The hash
|
||
|
// value is taken to be the top AddrHashBits bits of the bottom 32 bits
|
||
|
// of the muliplied value.
|
||
|
enum {
|
||
|
HashMultiplier = 2654435769U
|
||
|
};
|
||
|
|
||
|
// Set the bucket associated with addr to b.
|
||
|
static void
|
||
|
setaddrbucket(uintptr addr, Bucket *b)
|
||
|
{
|
||
|
int32 i;
|
||
|
uint32 h;
|
||
|
AddrHash *ah;
|
||
|
AddrEntry *e;
|
||
|
|
||
|
h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
|
||
|
for(ah=addrhash[h]; ah; ah=ah->next)
|
||
|
if(ah->addr == (addr>>20))
|
||
|
goto found;
|
||
|
|
||
|
ah = runtime_mallocgc(sizeof *ah, RefNoProfiling, 0, 1);
|
||
|
addrmem += sizeof *ah;
|
||
|
ah->next = addrhash[h];
|
||
|
ah->addr = addr>>20;
|
||
|
addrhash[h] = ah;
|
||
|
|
||
|
found:
|
||
|
if((e = addrfree) == nil) {
|
||
|
e = runtime_mallocgc(64*sizeof *e, RefNoProfiling, 0, 0);
|
||
|
addrmem += 64*sizeof *e;
|
||
|
for(i=0; i+1<64; i++)
|
||
|
e[i].next = &e[i+1];
|
||
|
e[63].next = nil;
|
||
|
}
|
||
|
addrfree = e->next;
|
||
|
e->addr = (uint32)~(addr & ((1<<20)-1));
|
||
|
e->b = b;
|
||
|
h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
|
||
|
e->next = ah->dense[h];
|
||
|
ah->dense[h] = e;
|
||
|
}
|
||
|
|
||
|
// Get the bucket associated with addr and clear the association.
|
||
|
static Bucket*
|
||
|
getaddrbucket(uintptr addr)
|
||
|
{
|
||
|
uint32 h;
|
||
|
AddrHash *ah;
|
||
|
AddrEntry *e, **l;
|
||
|
Bucket *b;
|
||
|
|
||
|
h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
|
||
|
for(ah=addrhash[h]; ah; ah=ah->next)
|
||
|
if(ah->addr == (addr>>20))
|
||
|
goto found;
|
||
|
return nil;
|
||
|
|
||
|
found:
|
||
|
h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
|
||
|
for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
|
||
|
if(e->addr == (uint32)~(addr & ((1<<20)-1))) {
|
||
|
*l = e->next;
|
||
|
b = e->b;
|
||
|
e->next = addrfree;
|
||
|
addrfree = e;
|
||
|
return b;
|
||
|
}
|
||
|
}
|
||
|
return nil;
|
||
|
}
|
||
|
|
||
|
// Called by malloc to record a profiled block.
|
||
|
void
|
||
|
runtime_MProf_Malloc(void *p, uintptr size)
|
||
|
{
|
||
|
int32 nstk;
|
||
|
uintptr stk[32];
|
||
|
Bucket *b;
|
||
|
|
||
|
if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
|
||
|
return;
|
||
|
#if 0
|
||
|
nstk = runtime_callers(1, stk, 32);
|
||
|
#else
|
||
|
nstk = 0;
|
||
|
#endif
|
||
|
runtime_lock(&proflock);
|
||
|
b = stkbucket(stk, nstk);
|
||
|
b->allocs++;
|
||
|
b->alloc_bytes += size;
|
||
|
setaddrbucket((uintptr)p, b);
|
||
|
runtime_unlock(&proflock);
|
||
|
__sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
|
||
|
|
||
|
if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
|
||
|
__go_run_goroutine_gc(100);
|
||
|
}
|
||
|
|
||
|
// Called when freeing a profiled block.
|
||
|
void
|
||
|
runtime_MProf_Free(void *p, uintptr size)
|
||
|
{
|
||
|
Bucket *b;
|
||
|
|
||
|
if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
|
||
|
return;
|
||
|
|
||
|
runtime_lock(&proflock);
|
||
|
b = getaddrbucket((uintptr)p);
|
||
|
if(b != nil) {
|
||
|
b->frees++;
|
||
|
b->free_bytes += size;
|
||
|
}
|
||
|
runtime_unlock(&proflock);
|
||
|
__sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
|
||
|
|
||
|
if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
|
||
|
__go_run_goroutine_gc(101);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Go interface to profile data. (Declared in extern.go)
|
||
|
// Assumes Go sizeof(int) == sizeof(int32)
|
||
|
|
||
|
// Must match MemProfileRecord in extern.go.
|
||
|
typedef struct Record Record;
|
||
|
struct Record {
|
||
|
int64 alloc_bytes, free_bytes;
|
||
|
int64 alloc_objects, free_objects;
|
||
|
uintptr stk[32];
|
||
|
};
|
||
|
|
||
|
// Write b's data to r.
|
||
|
static void
|
||
|
record(Record *r, Bucket *b)
|
||
|
{
|
||
|
uint32 i;
|
||
|
|
||
|
r->alloc_bytes = b->alloc_bytes;
|
||
|
r->free_bytes = b->free_bytes;
|
||
|
r->alloc_objects = b->allocs;
|
||
|
r->free_objects = b->frees;
|
||
|
for(i=0; i<b->nstk && i<nelem(r->stk); i++)
|
||
|
r->stk[i] = b->stk[i];
|
||
|
for(; i<nelem(r->stk); i++)
|
||
|
r->stk[i] = 0;
|
||
|
}
|
||
|
|
||
|
func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
|
||
|
Bucket *b;
|
||
|
Record *r;
|
||
|
|
||
|
__sync_bool_compare_and_swap(&m->nomemprof, 0, 1);
|
||
|
|
||
|
runtime_lock(&proflock);
|
||
|
n = 0;
|
||
|
for(b=buckets; b; b=b->allnext)
|
||
|
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
||
|
n++;
|
||
|
ok = false;
|
||
|
if(n <= p.__count) {
|
||
|
ok = true;
|
||
|
r = (Record*)p.__values;
|
||
|
for(b=buckets; b; b=b->allnext)
|
||
|
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
||
|
record(r++, b);
|
||
|
}
|
||
|
runtime_unlock(&proflock);
|
||
|
|
||
|
__sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
|
||
|
|
||
|
if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
|
||
|
__go_run_goroutine_gc(102);
|
||
|
}
|
||
|
|
||
|
void
|
||
|
runtime_MProf_Mark(void (*scan)(byte *, int64))
|
||
|
{
|
||
|
// buckhash is not allocated via mallocgc.
|
||
|
scan((byte*)&buckets, sizeof buckets);
|
||
|
scan((byte*)&addrhash, sizeof addrhash);
|
||
|
scan((byte*)&addrfree, sizeof addrfree);
|
||
|
}
|