58f7dab40d
This replaces mem.go and the C runtime_ReadMemStats function with the Go 1.7 mstats.go. The GCStats code is commented out for now. The corresponding gccgo code is in runtime/mgc0.c. The variables memstats and worldsema are shared between the Go code and the C code, but are not exported. To make this work, add temporary accessor functions acquireWorldsema, releaseWorldsema, getMstats (the latter known as mstats in the C code). Check the preemptoff field of m when allocating and when considering whether to start a GC. This works with the new stopTheWorld and startTheWorld functions in Go, which are essentially the Go 1.7 versions. Change the compiler to stack allocate closures when compiling the runtime package. Within the runtime packages closures do not escape. This is similar to what the gc compiler does, except that the gc compiler, when compiling the runtime package, gives an error if escape analysis shows that a closure does escape. I added this here because the Go version of ReadMemStats calls systemstack with a closure, and having that allocate memory was causing some tests that measure memory allocations to fail. Reviewed-on: https://go-review.googlesource.com/30972 From-SVN: r241124
565 lines
13 KiB
Plaintext
565 lines
13 KiB
Plaintext
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Malloc profiling.
|
|
// Patterned after tcmalloc's algorithms; shorter code.
|
|
|
|
package runtime
|
|
#include "runtime.h"
|
|
#include "arch.h"
|
|
#include "malloc.h"
|
|
#include "defs.h"
|
|
#include "go-type.h"
|
|
#include "go-string.h"
|
|
|
|
// NOTE(rsc): Everything here could use cas if contention became an issue.
|
|
static Lock proflock;
|
|
|
|
// All memory allocations are local and do not escape outside of the profiler.
|
|
// The profiler is forbidden from referring to garbage-collected memory.
|
|
|
|
enum { MProf, BProf }; // profile types
|
|
|
|
// Per-call-stack profiling information.
|
|
// Lookup by hashing call stack into a linked-list hash table.
|
|
struct Bucket
|
|
{
|
|
Bucket *next; // next in hash list
|
|
Bucket *allnext; // next in list of all mbuckets/bbuckets
|
|
int32 typ;
|
|
// Generally unions can break precise GC,
|
|
// this one is fine because it does not contain pointers.
|
|
union
|
|
{
|
|
struct // typ == MProf
|
|
{
|
|
// The following complex 3-stage scheme of stats accumulation
|
|
// is required to obtain a consistent picture of mallocs and frees
|
|
// for some point in time.
|
|
// The problem is that mallocs come in real time, while frees
|
|
// come only after a GC during concurrent sweeping. So if we would
|
|
// naively count them, we would get a skew toward mallocs.
|
|
//
|
|
// Mallocs are accounted in recent stats.
|
|
// Explicit frees are accounted in recent stats.
|
|
// GC frees are accounted in prev stats.
|
|
// After GC prev stats are added to final stats and
|
|
// recent stats are moved into prev stats.
|
|
uintptr allocs;
|
|
uintptr frees;
|
|
uintptr alloc_bytes;
|
|
uintptr free_bytes;
|
|
|
|
uintptr prev_allocs; // since last but one till last gc
|
|
uintptr prev_frees;
|
|
uintptr prev_alloc_bytes;
|
|
uintptr prev_free_bytes;
|
|
|
|
uintptr recent_allocs; // since last gc till now
|
|
uintptr recent_frees;
|
|
uintptr recent_alloc_bytes;
|
|
uintptr recent_free_bytes;
|
|
|
|
};
|
|
struct // typ == BProf
|
|
{
|
|
int64 count;
|
|
int64 cycles;
|
|
};
|
|
};
|
|
uintptr hash; // hash of size + stk
|
|
uintptr size;
|
|
uintptr nstk;
|
|
Location stk[1];
|
|
};
|
|
enum {
|
|
BuckHashSize = 179999,
|
|
};
|
|
static Bucket **buckhash;
|
|
static Bucket *mbuckets; // memory profile buckets
|
|
static Bucket *bbuckets; // blocking profile buckets
|
|
static uintptr bucketmem;
|
|
|
|
// Return the bucket for stk[0:nstk], allocating new bucket if needed.
|
|
static Bucket*
|
|
stkbucket(int32 typ, uintptr size, Location *stk, int32 nstk, bool alloc)
|
|
{
|
|
int32 i, j;
|
|
uintptr h;
|
|
Bucket *b;
|
|
|
|
if(buckhash == nil) {
|
|
buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats()->buckhash_sys);
|
|
if(buckhash == nil)
|
|
runtime_throw("runtime: cannot allocate memory");
|
|
}
|
|
|
|
// Hash stack.
|
|
h = 0;
|
|
for(i=0; i<nstk; i++) {
|
|
h += stk[i].pc;
|
|
h += h<<10;
|
|
h ^= h>>6;
|
|
}
|
|
// hash in size
|
|
h += size;
|
|
h += h<<10;
|
|
h ^= h>>6;
|
|
// finalize
|
|
h += h<<3;
|
|
h ^= h>>11;
|
|
|
|
i = h%BuckHashSize;
|
|
for(b = buckhash[i]; b; b=b->next) {
|
|
if(b->typ == typ && b->hash == h && b->size == size && b->nstk == (uintptr)nstk) {
|
|
for(j = 0; j < nstk; j++) {
|
|
if(b->stk[j].pc != stk[j].pc ||
|
|
b->stk[j].lineno != stk[j].lineno ||
|
|
!__go_strings_equal(b->stk[j].filename, stk[j].filename))
|
|
break;
|
|
}
|
|
if (j == nstk)
|
|
return b;
|
|
}
|
|
}
|
|
|
|
if(!alloc)
|
|
return nil;
|
|
|
|
b = runtime_persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats()->buckhash_sys);
|
|
bucketmem += sizeof *b + nstk*sizeof stk[0];
|
|
runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
|
|
b->typ = typ;
|
|
b->hash = h;
|
|
b->size = size;
|
|
b->nstk = nstk;
|
|
b->next = buckhash[i];
|
|
buckhash[i] = b;
|
|
if(typ == MProf) {
|
|
b->allnext = mbuckets;
|
|
mbuckets = b;
|
|
} else {
|
|
b->allnext = bbuckets;
|
|
bbuckets = b;
|
|
}
|
|
return b;
|
|
}
|
|
|
|
static void
|
|
MProf_GC(void)
|
|
{
|
|
Bucket *b;
|
|
|
|
for(b=mbuckets; b; b=b->allnext) {
|
|
b->allocs += b->prev_allocs;
|
|
b->frees += b->prev_frees;
|
|
b->alloc_bytes += b->prev_alloc_bytes;
|
|
b->free_bytes += b->prev_free_bytes;
|
|
|
|
b->prev_allocs = b->recent_allocs;
|
|
b->prev_frees = b->recent_frees;
|
|
b->prev_alloc_bytes = b->recent_alloc_bytes;
|
|
b->prev_free_bytes = b->recent_free_bytes;
|
|
|
|
b->recent_allocs = 0;
|
|
b->recent_frees = 0;
|
|
b->recent_alloc_bytes = 0;
|
|
b->recent_free_bytes = 0;
|
|
}
|
|
}
|
|
|
|
// Record that a gc just happened: all the 'recent' statistics are now real.
|
|
void
|
|
runtime_MProf_GC(void)
|
|
{
|
|
runtime_lock(&proflock);
|
|
MProf_GC();
|
|
runtime_unlock(&proflock);
|
|
}
|
|
|
|
// Called by malloc to record a profiled block.
|
|
void
|
|
runtime_MProf_Malloc(void *p, uintptr size)
|
|
{
|
|
Location stk[32];
|
|
Bucket *b;
|
|
int32 nstk;
|
|
|
|
nstk = runtime_callers(1, stk, nelem(stk), false);
|
|
runtime_lock(&proflock);
|
|
b = stkbucket(MProf, size, stk, nstk, true);
|
|
b->recent_allocs++;
|
|
b->recent_alloc_bytes += size;
|
|
runtime_unlock(&proflock);
|
|
|
|
// Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
|
|
// This reduces potential contention and chances of deadlocks.
|
|
// Since the object must be alive during call to MProf_Malloc,
|
|
// it's fine to do this non-atomically.
|
|
runtime_setprofilebucket(p, b);
|
|
}
|
|
|
|
// Called when freeing a profiled block.
|
|
void
|
|
runtime_MProf_Free(Bucket *b, uintptr size, bool freed)
|
|
{
|
|
runtime_lock(&proflock);
|
|
if(freed) {
|
|
b->recent_frees++;
|
|
b->recent_free_bytes += size;
|
|
} else {
|
|
b->prev_frees++;
|
|
b->prev_free_bytes += size;
|
|
}
|
|
runtime_unlock(&proflock);
|
|
}
|
|
|
|
int64 runtime_blockprofilerate; // in CPU ticks
|
|
|
|
void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockProfileRate");
|
|
|
|
void
|
|
runtime_SetBlockProfileRate(intgo rate)
|
|
{
|
|
int64 r;
|
|
|
|
if(rate <= 0)
|
|
r = 0; // disable profiling
|
|
else {
|
|
// convert ns to cycles, use float64 to prevent overflow during multiplication
|
|
r = (float64)rate*runtime_tickspersecond()/(1000*1000*1000);
|
|
if(r == 0)
|
|
r = 1;
|
|
}
|
|
runtime_atomicstore64((uint64*)&runtime_blockprofilerate, r);
|
|
}
|
|
|
|
void
|
|
runtime_blockevent(int64 cycles, int32 skip)
|
|
{
|
|
int32 nstk;
|
|
int64 rate;
|
|
Location stk[32];
|
|
Bucket *b;
|
|
|
|
if(cycles <= 0)
|
|
return;
|
|
rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate);
|
|
if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
|
|
return;
|
|
|
|
nstk = runtime_callers(skip, stk, nelem(stk), false);
|
|
runtime_lock(&proflock);
|
|
b = stkbucket(BProf, 0, stk, nstk, true);
|
|
b->count++;
|
|
b->cycles += cycles;
|
|
runtime_unlock(&proflock);
|
|
}
|
|
|
|
// Go interface to profile data. (Declared in debug.go)
|
|
|
|
// Must match MemProfileRecord in debug.go.
|
|
typedef struct Record Record;
|
|
struct Record {
|
|
int64 alloc_bytes, free_bytes;
|
|
int64 alloc_objects, free_objects;
|
|
uintptr stk[32];
|
|
};
|
|
|
|
// Write b's data to r.
|
|
static void
|
|
record(Record *r, Bucket *b)
|
|
{
|
|
uint32 i;
|
|
|
|
r->alloc_bytes = b->alloc_bytes;
|
|
r->free_bytes = b->free_bytes;
|
|
r->alloc_objects = b->allocs;
|
|
r->free_objects = b->frees;
|
|
for(i=0; i<b->nstk && i<nelem(r->stk); i++)
|
|
r->stk[i] = b->stk[i].pc;
|
|
for(; i<nelem(r->stk); i++)
|
|
r->stk[i] = 0;
|
|
}
|
|
|
|
func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
|
|
Bucket *b;
|
|
Record *r;
|
|
bool clear;
|
|
|
|
runtime_lock(&proflock);
|
|
n = 0;
|
|
clear = true;
|
|
for(b=mbuckets; b; b=b->allnext) {
|
|
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
|
n++;
|
|
if(b->allocs != 0 || b->frees != 0)
|
|
clear = false;
|
|
}
|
|
if(clear) {
|
|
// Absolutely no data, suggesting that a garbage collection
|
|
// has not yet happened. In order to allow profiling when
|
|
// garbage collection is disabled from the beginning of execution,
|
|
// accumulate stats as if a GC just happened, and recount buckets.
|
|
MProf_GC();
|
|
MProf_GC();
|
|
n = 0;
|
|
for(b=mbuckets; b; b=b->allnext)
|
|
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
|
n++;
|
|
}
|
|
ok = false;
|
|
if(n <= p.__count) {
|
|
ok = true;
|
|
r = (Record*)p.__values;
|
|
for(b=mbuckets; b; b=b->allnext)
|
|
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
|
record(r++, b);
|
|
}
|
|
runtime_unlock(&proflock);
|
|
}
|
|
|
|
void
|
|
runtime_MProf_Mark(struct Workbuf **wbufp, void (*enqueue1)(struct Workbuf**, Obj))
|
|
{
|
|
// buckhash is not allocated via mallocgc.
|
|
enqueue1(wbufp, (Obj){(byte*)&mbuckets, sizeof mbuckets, 0});
|
|
enqueue1(wbufp, (Obj){(byte*)&bbuckets, sizeof bbuckets, 0});
|
|
}
|
|
|
|
void
|
|
runtime_iterate_memprof(void (*callback)(Bucket*, uintptr, Location*, uintptr, uintptr, uintptr))
|
|
{
|
|
Bucket *b;
|
|
|
|
runtime_lock(&proflock);
|
|
for(b=mbuckets; b; b=b->allnext) {
|
|
callback(b, b->nstk, b->stk, b->size, b->allocs, b->frees);
|
|
}
|
|
runtime_unlock(&proflock);
|
|
}
|
|
|
|
// Must match BlockProfileRecord in debug.go.
|
|
typedef struct BRecord BRecord;
|
|
struct BRecord {
|
|
int64 count;
|
|
int64 cycles;
|
|
uintptr stk[32];
|
|
};
|
|
|
|
func BlockProfile(p Slice) (n int, ok bool) {
|
|
Bucket *b;
|
|
BRecord *r;
|
|
int32 i;
|
|
|
|
runtime_lock(&proflock);
|
|
n = 0;
|
|
for(b=bbuckets; b; b=b->allnext)
|
|
n++;
|
|
ok = false;
|
|
if(n <= p.__count) {
|
|
ok = true;
|
|
r = (BRecord*)p.__values;
|
|
for(b=bbuckets; b; b=b->allnext, r++) {
|
|
r->count = b->count;
|
|
r->cycles = b->cycles;
|
|
for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++)
|
|
r->stk[i] = b->stk[i].pc;
|
|
for(; (uintptr)i<nelem(r->stk); i++)
|
|
r->stk[i] = 0;
|
|
}
|
|
}
|
|
runtime_unlock(&proflock);
|
|
}
|
|
|
|
// Must match StackRecord in debug.go.
|
|
typedef struct TRecord TRecord;
|
|
struct TRecord {
|
|
uintptr stk[32];
|
|
};
|
|
|
|
func ThreadCreateProfile(p Slice) (n int, ok bool) {
|
|
TRecord *r;
|
|
M *first, *mp;
|
|
int32 i;
|
|
|
|
first = runtime_atomicloadp(&runtime_allm);
|
|
n = 0;
|
|
for(mp=first; mp; mp=mp->alllink)
|
|
n++;
|
|
ok = false;
|
|
if(n <= p.__count) {
|
|
ok = true;
|
|
r = (TRecord*)p.__values;
|
|
for(mp=first; mp; mp=mp->alllink) {
|
|
for(i = 0; (uintptr)i < nelem(r->stk); i++) {
|
|
r->stk[i] = mp->createstack[i].pc;
|
|
}
|
|
r++;
|
|
}
|
|
}
|
|
}
|
|
|
|
func Stack(b Slice, all bool) (n int) {
|
|
byte *pc;
|
|
bool enablegc = false;
|
|
|
|
pc = (byte*)(uintptr)runtime_getcallerpc(&b);
|
|
|
|
if(all) {
|
|
runtime_acquireWorldsema();
|
|
runtime_m()->gcing = 1;
|
|
runtime_stopTheWorldWithSema();
|
|
enablegc = mstats()->enablegc;
|
|
mstats()->enablegc = false;
|
|
}
|
|
|
|
if(b.__count == 0)
|
|
n = 0;
|
|
else{
|
|
G* g = runtime_g();
|
|
g->writebuf.__values = b.__values;
|
|
g->writebuf.__count = 0;
|
|
g->writebuf.__capacity = b.__count;
|
|
USED(pc);
|
|
runtime_goroutineheader(g);
|
|
runtime_traceback();
|
|
runtime_printcreatedby(g);
|
|
if(all)
|
|
runtime_tracebackothers(g);
|
|
n = g->writebuf.__count;
|
|
g->writebuf.__values = nil;
|
|
g->writebuf.__count = 0;
|
|
g->writebuf.__capacity = 0;
|
|
}
|
|
|
|
if(all) {
|
|
runtime_m()->gcing = 0;
|
|
mstats()->enablegc = enablegc;
|
|
runtime_releaseWorldsema();
|
|
runtime_startTheWorldWithSema();
|
|
}
|
|
}
|
|
|
|
static void
|
|
saveg(G *gp, TRecord *r)
|
|
{
|
|
int32 n, i;
|
|
Location locstk[nelem(r->stk)];
|
|
|
|
if(gp == runtime_g()) {
|
|
n = runtime_callers(0, locstk, nelem(r->stk), false);
|
|
for(i = 0; i < n; i++)
|
|
r->stk[i] = locstk[i].pc;
|
|
}
|
|
else {
|
|
// FIXME: Not implemented.
|
|
n = 0;
|
|
}
|
|
if((size_t)n < nelem(r->stk))
|
|
r->stk[n] = 0;
|
|
}
|
|
|
|
func GoroutineProfile(b Slice) (n int, ok bool) {
|
|
uintptr i;
|
|
TRecord *r;
|
|
G *gp;
|
|
|
|
ok = false;
|
|
n = runtime_gcount();
|
|
if(n <= b.__count) {
|
|
runtime_acquireWorldsema();
|
|
runtime_m()->gcing = 1;
|
|
runtime_stopTheWorldWithSema();
|
|
|
|
n = runtime_gcount();
|
|
if(n <= b.__count) {
|
|
G* g = runtime_g();
|
|
ok = true;
|
|
r = (TRecord*)b.__values;
|
|
saveg(g, r++);
|
|
for(i = 0; i < runtime_allglen; i++) {
|
|
gp = runtime_allg[i];
|
|
if(gp == g || gp->atomicstatus == _Gdead)
|
|
continue;
|
|
saveg(gp, r++);
|
|
}
|
|
}
|
|
|
|
runtime_m()->gcing = 0;
|
|
runtime_releaseWorldsema();
|
|
runtime_startTheWorldWithSema();
|
|
}
|
|
}
|
|
|
|
// Tracing of alloc/free/gc.
|
|
|
|
static Lock tracelock;
|
|
|
|
static const char*
|
|
typeinfoname(int32 typeinfo)
|
|
{
|
|
if(typeinfo == TypeInfo_SingleObject)
|
|
return "single object";
|
|
else if(typeinfo == TypeInfo_Array)
|
|
return "array";
|
|
else if(typeinfo == TypeInfo_Chan)
|
|
return "channel";
|
|
runtime_throw("typinfoname: unknown type info");
|
|
return nil;
|
|
}
|
|
|
|
void
|
|
runtime_tracealloc(void *p, uintptr size, uintptr typ)
|
|
{
|
|
const char *name;
|
|
Type *type;
|
|
|
|
runtime_lock(&tracelock);
|
|
runtime_m()->traceback = 2;
|
|
type = (Type*)(typ & ~3);
|
|
name = typeinfoname(typ & 3);
|
|
if(type == nil)
|
|
runtime_printf("tracealloc(%p, %p, %s)\n", p, size, name);
|
|
else
|
|
runtime_printf("tracealloc(%p, %p, %s of %S)\n", p, size, name, *type->__reflection);
|
|
if(runtime_m()->curg == nil || runtime_g() == runtime_m()->curg) {
|
|
runtime_goroutineheader(runtime_g());
|
|
runtime_traceback();
|
|
} else {
|
|
runtime_goroutineheader(runtime_m()->curg);
|
|
runtime_traceback();
|
|
}
|
|
runtime_printf("\n");
|
|
runtime_m()->traceback = 0;
|
|
runtime_unlock(&tracelock);
|
|
}
|
|
|
|
void
|
|
runtime_tracefree(void *p, uintptr size)
|
|
{
|
|
runtime_lock(&tracelock);
|
|
runtime_m()->traceback = 2;
|
|
runtime_printf("tracefree(%p, %p)\n", p, size);
|
|
runtime_goroutineheader(runtime_g());
|
|
runtime_traceback();
|
|
runtime_printf("\n");
|
|
runtime_m()->traceback = 0;
|
|
runtime_unlock(&tracelock);
|
|
}
|
|
|
|
void
|
|
runtime_tracegc(void)
|
|
{
|
|
runtime_lock(&tracelock);
|
|
runtime_m()->traceback = 2;
|
|
runtime_printf("tracegc()\n");
|
|
// running on m->g0 stack; show all non-g0 goroutines
|
|
runtime_tracebackothers(runtime_g());
|
|
runtime_printf("end tracegc\n");
|
|
runtime_printf("\n");
|
|
runtime_m()->traceback = 0;
|
|
runtime_unlock(&tracelock);
|
|
}
|