gcc/libgo/go/runtime/mprof.go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Malloc profiling.
// Patterned after tcmalloc's algorithms; shorter code.

package runtime

import (
	"runtime/internal/atomic"
	"unsafe"
)

// Export temporarily for gccgo's C code to call:
//go:linkname mProf_Malloc runtime.mProf_Malloc
//go:linkname mProf_Free runtime.mProf_Free
//go:linkname mProf_GC runtime.mProf_GC
//go:linkname tracealloc runtime.tracealloc
//go:linkname tracefree runtime.tracefree
//go:linkname tracegc runtime.tracegc
//go:linkname iterate_memprof runtime.iterate_memprof

// NOTE(rsc): Everything here could use cas if contention became an issue.
var proflock mutex

// All memory allocations are local and do not escape outside of the profiler.
// The profiler is forbidden from referring to garbage-collected memory.

const (
	// profile types
	memProfile bucketType = 1 + iota
	blockProfile

	// size of bucket hash table
	buckHashSize = 179999

	// max depth of stack to record in bucket
	maxStack = 32
)

type bucketType int

// A bucket holds per-call-stack profiling information.
// The representation is a bit sleazy, inherited from C.
// This struct defines the bucket header. It is followed in
// memory by the stack words and then the actual record
// data, either a memRecord or a blockRecord.
//
// Per-call-stack profiling information.
// Lookup by hashing call stack into a linked-list hash table.
type bucket struct {
	next    *bucket
	allnext *bucket
	typ     bucketType // memBucket or blockBucket
	hash    uintptr
	size    uintptr
	nstk    uintptr
}

// A memRecord is the bucket data for a bucket of type memProfile,
// part of the memory profile.
type memRecord struct {
	// The following complex 3-stage scheme of stats accumulation
	// is required to obtain a consistent picture of mallocs and frees
	// for some point in time.
	// The problem is that mallocs come in real time, while frees
	// come only after a GC during concurrent sweeping. So if we would
	// naively count them, we would get a skew toward mallocs.
	//
	// Mallocs are accounted in recent stats.
	// Explicit frees are accounted in recent stats.
	// GC frees are accounted in prev stats.
	// After GC prev stats are added to final stats and
	// recent stats are moved into prev stats.
	allocs      uintptr
	frees       uintptr
	alloc_bytes uintptr
	free_bytes  uintptr

	// changes between next-to-last GC and last GC
	prev_allocs      uintptr
	prev_frees       uintptr
	prev_alloc_bytes uintptr
	prev_free_bytes  uintptr

	// changes since last GC
	recent_allocs      uintptr
	recent_frees       uintptr
	recent_alloc_bytes uintptr
	recent_free_bytes  uintptr
}

// A blockRecord is the bucket data for a bucket of type blockProfile,
// part of the blocking profile.
type blockRecord struct {
	count  int64
	cycles int64
}

var (
	mbuckets  *bucket // memory profile buckets
	bbuckets  *bucket // blocking profile buckets
	buckhash  *[179999]*bucket
	bucketmem uintptr
)

// newBucket allocates a bucket with the given type and number of stack entries.
func newBucket(typ bucketType, nstk int) *bucket {
	size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(location{})
	switch typ {
	default:
		throw("invalid profile bucket type")
	case memProfile:
		size += unsafe.Sizeof(memRecord{})
	case blockProfile:
		size += unsafe.Sizeof(blockRecord{})
	}

	b := (*bucket)(persistentalloc(size, 0, &memstats.buckhash_sys))
	bucketmem += size
	b.typ = typ
	b.nstk = uintptr(nstk)
	return b
}

// stk returns the slice in b holding the stack.
func (b *bucket) stk() []location {
	stk := (*[maxStack]location)(add(unsafe.Pointer(b), unsafe.Sizeof(*b)))
	return stk[:b.nstk:b.nstk]
}

// mp returns the memRecord associated with the memProfile bucket b.
func (b *bucket) mp() *memRecord {
	if b.typ != memProfile {
		throw("bad use of bucket.mp")
	}
	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(location{}))
	return (*memRecord)(data)
}

// bp returns the blockRecord associated with the blockProfile bucket b.
func (b *bucket) bp() *blockRecord {
	if b.typ != blockProfile {
		throw("bad use of bucket.bp")
	}
	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(location{}))
	return (*blockRecord)(data)
}

// Return the bucket for stk[0:nstk], allocating new bucket if needed.
func stkbucket(typ bucketType, size uintptr, stk []location, alloc bool) *bucket {
	if buckhash == nil {
		buckhash = (*[buckHashSize]*bucket)(sysAlloc(unsafe.Sizeof(*buckhash), &memstats.buckhash_sys))
		if buckhash == nil {
			throw("runtime: cannot allocate memory")
		}
	}

	// Hash stack.
	var h uintptr
	for _, loc := range stk {
		h += loc.pc
		h += h << 10
		h ^= h >> 6
	}
	// hash in size
	h += size
	h += h << 10
	h ^= h >> 6
	// finalize
	h += h << 3
	h ^= h >> 11

	i := int(h % buckHashSize)
	for b := buckhash[i]; b != nil; b = b.next {
		if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) {
			return b
		}
	}

	if !alloc {
		return nil
	}

	// Create new bucket.
	b := newBucket(typ, len(stk))
	copy(b.stk(), stk)
	b.hash = h
	b.size = size
	b.next = buckhash[i]
	buckhash[i] = b
	if typ == memProfile {
		b.allnext = mbuckets
		mbuckets = b
	} else {
		b.allnext = bbuckets
		bbuckets = b
	}
	return b
}

func eqslice(x, y []location) bool {
	if len(x) != len(y) {
		return false
	}
	for i, xi := range x {
		if xi != y[i] {
			return false
		}
	}
	return true
}

func mprof_GC() {
	for b := mbuckets; b != nil; b = b.allnext {
		mp := b.mp()
		mp.allocs += mp.prev_allocs
		mp.frees += mp.prev_frees
		mp.alloc_bytes += mp.prev_alloc_bytes
		mp.free_bytes += mp.prev_free_bytes

		mp.prev_allocs = mp.recent_allocs
		mp.prev_frees = mp.recent_frees
		mp.prev_alloc_bytes = mp.recent_alloc_bytes
		mp.prev_free_bytes = mp.recent_free_bytes

		mp.recent_allocs = 0
		mp.recent_frees = 0
		mp.recent_alloc_bytes = 0
		mp.recent_free_bytes = 0
	}
}

// Record that a gc just happened: all the 'recent' statistics are now real.
func mProf_GC() {
	lock(&proflock)
	mprof_GC()
	unlock(&proflock)
}

// Called by malloc to record a profiled block.
func mProf_Malloc(p unsafe.Pointer, size uintptr) {
	var stk [maxStack]location
	nstk := callers(4, stk[:])
	lock(&proflock)
	b := stkbucket(memProfile, size, stk[:nstk], true)
	mp := b.mp()
	mp.recent_allocs++
	mp.recent_alloc_bytes += size
	unlock(&proflock)

	// Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
	// This reduces potential contention and chances of deadlocks.
	// Since the object must be alive during call to mProf_Malloc,
	// it's fine to do this non-atomically.
	systemstack(func() {
		setprofilebucket(p, b)
	})
}

// Called when freeing a profiled block.
func mProf_Free(b *bucket, size uintptr) {
	lock(&proflock)
	mp := b.mp()
	mp.prev_frees++
	mp.prev_free_bytes += size
	unlock(&proflock)
}

var blockprofilerate uint64 // in CPU ticks

// SetBlockProfileRate controls the fraction of goroutine blocking events
// that are reported in the blocking profile. The profiler aims to sample
// an average of one blocking event per rate nanoseconds spent blocked.
//
// To include every blocking event in the profile, pass rate = 1.
// To turn off profiling entirely, pass rate <= 0.
func SetBlockProfileRate(rate int) {
	var r int64
	if rate <= 0 {
		r = 0 // disable profiling
	} else if rate == 1 {
		r = 1 // profile everything
	} else {
		// convert ns to cycles, use float64 to prevent overflow during multiplication
		r = int64(float64(rate) * float64(tickspersecond()) / (1000 * 1000 * 1000))
		if r == 0 {
			r = 1
		}
	}

	atomic.Store64(&blockprofilerate, uint64(r))
}

func blockevent(cycles int64, skip int) {
	if cycles <= 0 {
		cycles = 1
	}
	rate := int64(atomic.Load64(&blockprofilerate))
	if rate <= 0 || (rate > cycles && int64(fastrand1())%rate > cycles) {
		return
	}
	gp := getg()
	var nstk int
	var stk [maxStack]location
	if gp.m.curg == nil || gp.m.curg == gp {
		nstk = callers(skip, stk[:])
	} else {
		// FIXME: This should get a traceback of gp.m.curg.
		// nstk = gcallers(gp.m.curg, skip, stk[:])
		nstk = callers(skip, stk[:])
	}
	lock(&proflock)
	b := stkbucket(blockProfile, 0, stk[:nstk], true)
	b.bp().count++
	b.bp().cycles += cycles
	unlock(&proflock)
}

// Go interface to profile data.

// A StackRecord describes a single execution stack.
type StackRecord struct {
	Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
}

// Stack returns the stack trace associated with the record,
// a prefix of r.Stack0.
func (r *StackRecord) Stack() []uintptr {
	for i, v := range r.Stack0 {
		if v == 0 {
			return r.Stack0[0:i]
		}
	}
	return r.Stack0[0:]
}

// MemProfileRate controls the fraction of memory allocations
// that are recorded and reported in the memory profile.
// The profiler aims to sample an average of
// one allocation per MemProfileRate bytes allocated.
//
// To include every allocated block in the profile, set MemProfileRate to 1.
// To turn off profiling entirely, set MemProfileRate to 0.
//
// The tools that process the memory profiles assume that the
// profile rate is constant across the lifetime of the program
// and equal to the current value. Programs that change the
// memory profiling rate should do so just once, as early as
// possible in the execution of the program (for example,
// at the beginning of main).
var MemProfileRate int = 512 * 1024

// A MemProfileRecord describes the live objects allocated
// by a particular call sequence (stack trace).
type MemProfileRecord struct {
	AllocBytes, FreeBytes     int64       // number of bytes allocated, freed
	AllocObjects, FreeObjects int64       // number of objects allocated, freed
	Stack0                    [32]uintptr // stack trace for this record; ends at first 0 entry
}

// InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).
func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes }

// InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).
func (r *MemProfileRecord) InUseObjects() int64 {
	return r.AllocObjects - r.FreeObjects
}

// Stack returns the stack trace associated with the record,
// a prefix of r.Stack0.
func (r *MemProfileRecord) Stack() []uintptr {
	for i, v := range r.Stack0 {
		if v == 0 {
			return r.Stack0[0:i]
		}
	}
	return r.Stack0[0:]
}

// MemProfile returns a profile of memory allocated and freed per allocation
// site.
//
// MemProfile returns n, the number of records in the current memory profile.
// If len(p) >= n, MemProfile copies the profile into p and returns n, true.
// If len(p) < n, MemProfile does not change p and returns n, false.
//
// If inuseZero is true, the profile includes allocation records
// where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
// These are sites where memory was allocated, but it has all
// been released back to the runtime.
//
// The returned profile may be up to two garbage collection cycles old.
// This is to avoid skewing the profile toward allocations; because
// allocations happen in real time but frees are delayed until the garbage
// collector performs sweeping, the profile only accounts for allocations
// that have had a chance to be freed by the garbage collector.
//
// Most clients should use the runtime/pprof package or
// the testing package's -test.memprofile flag instead
// of calling MemProfile directly.
func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
	lock(&proflock)
	clear := true
	for b := mbuckets; b != nil; b = b.allnext {
		mp := b.mp()
		if inuseZero || mp.alloc_bytes != mp.free_bytes {
			n++
		}
		if mp.allocs != 0 || mp.frees != 0 {
			clear = false
		}
	}
	if clear {
		// Absolutely no data, suggesting that a garbage collection
		// has not yet happened. In order to allow profiling when
		// garbage collection is disabled from the beginning of execution,
		// accumulate stats as if a GC just happened, and recount buckets.
		mprof_GC()
		mprof_GC()
		n = 0
		for b := mbuckets; b != nil; b = b.allnext {
			mp := b.mp()
			if inuseZero || mp.alloc_bytes != mp.free_bytes {
				n++
			}
		}
	}
	if n <= len(p) {
		ok = true
		idx := 0
		for b := mbuckets; b != nil; b = b.allnext {
			mp := b.mp()
			if inuseZero || mp.alloc_bytes != mp.free_bytes {
				record(&p[idx], b)
				idx++
			}
		}
	}
	unlock(&proflock)
	return
}

// Write b's data to r.
func record(r *MemProfileRecord, b *bucket) {
	mp := b.mp()
	r.AllocBytes = int64(mp.alloc_bytes)
	r.FreeBytes = int64(mp.free_bytes)
	r.AllocObjects = int64(mp.allocs)
	r.FreeObjects = int64(mp.frees)
	for i, loc := range b.stk() {
		if i >= len(r.Stack0) {
			break
		}
		r.Stack0[i] = loc.pc
	}
	for i := int(b.nstk); i < len(r.Stack0); i++ {
		r.Stack0[i] = 0
	}
}

func iterate_memprof(fn func(*bucket, uintptr, *location, uintptr, uintptr, uintptr)) {
	lock(&proflock)
	for b := mbuckets; b != nil; b = b.allnext {
		mp := b.mp()
		fn(b, b.nstk, &b.stk()[0], b.size, mp.allocs, mp.frees)
	}
	unlock(&proflock)
}

// BlockProfileRecord describes blocking events originated
// at a particular call sequence (stack trace).
type BlockProfileRecord struct {
	Count  int64
	Cycles int64
	StackRecord
}

// BlockProfile returns n, the number of records in the current blocking profile.
// If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
// If len(p) < n, BlockProfile does not change p and returns n, false.
//
// Most clients should use the runtime/pprof package or
// the testing package's -test.blockprofile flag instead
// of calling BlockProfile directly.
func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
	lock(&proflock)
	for b := bbuckets; b != nil; b = b.allnext {
		n++
	}
	if n <= len(p) {
		ok = true
		for b := bbuckets; b != nil; b = b.allnext {
			bp := b.bp()
			r := &p[0]
			r.Count = bp.count
			r.Cycles = bp.cycles
			i := 0
			var loc location
			for i, loc = range b.stk() {
				if i >= len(r.Stack0) {
					break
				}
				r.Stack0[i] = loc.pc
			}
			for ; i < len(r.Stack0); i++ {
				r.Stack0[i] = 0
			}
			p = p[1:]
		}
	}
	unlock(&proflock)
	return
}

// ThreadCreateProfile returns n, the number of records in the thread creation profile.
// If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
// If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
//
// Most clients should use the runtime/pprof package instead
// of calling ThreadCreateProfile directly.
func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
	first := (*m)(atomic.Loadp(unsafe.Pointer(allm())))
	for mp := first; mp != nil; mp = mp.alllink {
		n++
	}
	if n <= len(p) {
		ok = true
		i := 0
		for mp := first; mp != nil; mp = mp.alllink {
			for j := range mp.createstack {
				p[i].Stack0[j] = mp.createstack[j].pc
			}
			i++
		}
	}
	return
}

// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
// If len(p) < n, GoroutineProfile does not change p and returns n, false.
//
// Most clients should use the runtime/pprof package instead
// of calling GoroutineProfile directly.
func GoroutineProfile(p []StackRecord) (n int, ok bool) {
	gp := getg()

	isOK := func(gp1 *g) bool {
		// Checking isSystemGoroutine here makes GoroutineProfile
		// consistent with both NumGoroutine and Stack.
		return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1)
	}

	stopTheWorld("profile")

	n = 1
	for _, gp1 := range allgs() {
		if isOK(gp1) {
			n++
		}
	}

	if n <= len(p) {
		ok = true
		r := p

		// Save current goroutine.
		saveg(gp, &r[0])
		r = r[1:]

		// Save other goroutines.
		for _, gp1 := range allgs() {
			if isOK(gp1) {
				if len(r) == 0 {
					// Should be impossible, but better to return a
					// truncated profile than to crash the entire process.
					break
				}
				saveg(gp1, &r[0])
				r = r[1:]
			}
		}
	}

	startTheWorld()

	return n, ok
}

func saveg(gp *g, r *StackRecord) {
	if gp == getg() {
		var locbuf [32]location
		n := callers(1, locbuf[:])
		for i := 0; i < n; i++ {
			r.Stack0[i] = locbuf[i].pc
		}
		if n < len(r.Stack0) {
			r.Stack0[n] = 0
		}
	} else {
		// FIXME: Not implemented.
		r.Stack0[0] = 0
	}
}

// Stack formats a stack trace of the calling goroutine into buf
// and returns the number of bytes written to buf.
// If all is true, Stack formats stack traces of all other goroutines
// into buf after the trace for the current goroutine.
func Stack(buf []byte, all bool) int {
	if all {
		stopTheWorld("stack trace")
	}

	n := 0
	if len(buf) > 0 {
		gp := getg()
		// Force traceback=1 to override GOTRACEBACK setting,
		// so that Stack's results are consistent.
		// GOTRACEBACK is only about crash dumps.
		gp.m.traceback = 1
		gp.writebuf = buf[0:0:len(buf)]
		goroutineheader(gp)
		traceback()
		if all {
			tracebackothers(gp)
		}
		gp.m.traceback = 0
		n = len(gp.writebuf)
		gp.writebuf = nil
	}

	if all {
		startTheWorld()
	}
	return n
}

// Tracing of alloc/free/gc.

var tracelock mutex

func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) {
	lock(&tracelock)
	gp := getg()
	gp.m.traceback = 2
	if typ == nil {
		print("tracealloc(", p, ", ", hex(size), ")\n")
	} else {
		print("tracealloc(", p, ", ", hex(size), ", ", *typ.string, ")\n")
	}
	if gp.m.curg == nil || gp == gp.m.curg {
		goroutineheader(gp)
		traceback()
	} else {
		goroutineheader(gp.m.curg)
		// FIXME: Can't do traceback of other g.
	}
	print("\n")
	gp.m.traceback = 0
	unlock(&tracelock)
}

func tracefree(p unsafe.Pointer, size uintptr) {
	lock(&tracelock)
	gp := getg()
	gp.m.traceback = 2
	print("tracefree(", p, ", ", hex(size), ")\n")
	goroutineheader(gp)
	traceback()
	print("\n")
	gp.m.traceback = 0
	unlock(&tracelock)
}

func tracegc() {
	lock(&tracelock)
	gp := getg()
	gp.m.traceback = 2
	print("tracegc()\n")
	// running on m->g0 stack; show all non-g0 goroutines
	tracebackothers(gp)
	print("end tracegc\n")
	print("\n")
	gp.m.traceback = 0
	unlock(&tracelock)
}
runtime: copy mprof code from Go 1.7 runtime Also create a gccgo version of some of the traceback code in traceback_gccgo.go, replacing some code currently in C. This required modifying the compiler so that when compiling the runtime package a slice expression does not cause a local array variable to escape to the heap. Reviewed-on: https://go-review.googlesource.com/31230 From-SVN: r241189 2016-10-15 00:51:46 +02:00			`// Copyright 2009 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`// Malloc profiling.`
			`// Patterned after tcmalloc's algorithms; shorter code.`

			`package runtime`

			`import (`
			`"runtime/internal/atomic"`
			`"unsafe"`
			`)`

			`// Export temporarily for gccgo's C code to call:`
			`//go:linkname mProf_Malloc runtime.mProf_Malloc`
			`//go:linkname mProf_Free runtime.mProf_Free`
			`//go:linkname mProf_GC runtime.mProf_GC`
			`//go:linkname tracealloc runtime.tracealloc`
			`//go:linkname tracefree runtime.tracefree`
			`//go:linkname tracegc runtime.tracegc`
			`//go:linkname iterate_memprof runtime.iterate_memprof`

			`// NOTE(rsc): Everything here could use cas if contention became an issue.`
			`var proflock mutex`

			`// All memory allocations are local and do not escape outside of the profiler.`
			`// The profiler is forbidden from referring to garbage-collected memory.`

			`const (`
			`// profile types`
			`memProfile bucketType = 1 + iota`
			`blockProfile`

			`// size of bucket hash table`
			`buckHashSize = 179999`

			`// max depth of stack to record in bucket`
			`maxStack = 32`
			`)`

			`type bucketType int`

			`// A bucket holds per-call-stack profiling information.`
			`// The representation is a bit sleazy, inherited from C.`
			`// This struct defines the bucket header. It is followed in`
			`// memory by the stack words and then the actual record`
			`// data, either a memRecord or a blockRecord.`
			`//`
			`// Per-call-stack profiling information.`
			`// Lookup by hashing call stack into a linked-list hash table.`
			`type bucket struct {`
			`next *bucket`
			`allnext *bucket`
			`typ bucketType // memBucket or blockBucket`
			`hash uintptr`
			`size uintptr`
			`nstk uintptr`
			`}`

			`// A memRecord is the bucket data for a bucket of type memProfile,`
			`// part of the memory profile.`
			`type memRecord struct {`
			`// The following complex 3-stage scheme of stats accumulation`
			`// is required to obtain a consistent picture of mallocs and frees`
			`// for some point in time.`
			`// The problem is that mallocs come in real time, while frees`
			`// come only after a GC during concurrent sweeping. So if we would`
			`// naively count them, we would get a skew toward mallocs.`
			`//`
			`// Mallocs are accounted in recent stats.`
			`// Explicit frees are accounted in recent stats.`
			`// GC frees are accounted in prev stats.`
			`// After GC prev stats are added to final stats and`
			`// recent stats are moved into prev stats.`
			`allocs uintptr`
			`frees uintptr`
			`alloc_bytes uintptr`
			`free_bytes uintptr`

			`// changes between next-to-last GC and last GC`
			`prev_allocs uintptr`
			`prev_frees uintptr`
			`prev_alloc_bytes uintptr`
			`prev_free_bytes uintptr`

			`// changes since last GC`
			`recent_allocs uintptr`
			`recent_frees uintptr`
			`recent_alloc_bytes uintptr`
			`recent_free_bytes uintptr`
			`}`

			`// A blockRecord is the bucket data for a bucket of type blockProfile,`
			`// part of the blocking profile.`
			`type blockRecord struct {`
			`count int64`
			`cycles int64`
			`}`

			`var (`
			`mbuckets *bucket // memory profile buckets`
			`bbuckets *bucket // blocking profile buckets`
			`buckhash [179999]bucket`
			`bucketmem uintptr`
			`)`

			`// newBucket allocates a bucket with the given type and number of stack entries.`
			`func newBucket(typ bucketType, nstk int) *bucket {`
			`size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(location{})`
			`switch typ {`
			`default:`
			`throw("invalid profile bucket type")`
			`case memProfile:`
			`size += unsafe.Sizeof(memRecord{})`
			`case blockProfile:`
			`size += unsafe.Sizeof(blockRecord{})`
			`}`

			`b := (*bucket)(persistentalloc(size, 0, &memstats.buckhash_sys))`
			`bucketmem += size`
			`b.typ = typ`
			`b.nstk = uintptr(nstk)`
			`return b`
			`}`

			`// stk returns the slice in b holding the stack.`
			`func (b *bucket) stk() []location {`
			`stk := ([maxStack]location)(add(unsafe.Pointer(b), unsafe.Sizeof(b)))`
			`return stk[:b.nstk:b.nstk]`
			`}`

			`// mp returns the memRecord associated with the memProfile bucket b.`
			`func (b bucket) mp() memRecord {`
			`if b.typ != memProfile {`
			`throw("bad use of bucket.mp")`
			`}`
			`data := add(unsafe.Pointer(b), unsafe.Sizeof(b)+b.nstkunsafe.Sizeof(location{}))`
			`return (*memRecord)(data)`
			`}`

			`// bp returns the blockRecord associated with the blockProfile bucket b.`
			`func (b bucket) bp() blockRecord {`
			`if b.typ != blockProfile {`
			`throw("bad use of bucket.bp")`
			`}`
			`data := add(unsafe.Pointer(b), unsafe.Sizeof(b)+b.nstkunsafe.Sizeof(location{}))`
			`return (*blockRecord)(data)`
			`}`

			`// Return the bucket for stk[0:nstk], allocating new bucket if needed.`
			`func stkbucket(typ bucketType, size uintptr, stk []location, alloc bool) *bucket {`
			`if buckhash == nil {`
			`buckhash = ([buckHashSize]bucket)(sysAlloc(unsafe.Sizeof(*buckhash), &memstats.buckhash_sys))`
			`if buckhash == nil {`
			`throw("runtime: cannot allocate memory")`
			`}`
			`}`

			`// Hash stack.`
			`var h uintptr`
			`for _, loc := range stk {`
			`h += loc.pc`
			`h += h << 10`
			`h ^= h >> 6`
			`}`
			`// hash in size`
			`h += size`
			`h += h << 10`
			`h ^= h >> 6`
			`// finalize`
			`h += h << 3`
			`h ^= h >> 11`

			`i := int(h % buckHashSize)`
			`for b := buckhash[i]; b != nil; b = b.next {`
			`if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) {`
			`return b`
			`}`
			`}`

			`if !alloc {`
			`return nil`
			`}`

			`// Create new bucket.`
			`b := newBucket(typ, len(stk))`
			`copy(b.stk(), stk)`
			`b.hash = h`
			`b.size = size`
			`b.next = buckhash[i]`
			`buckhash[i] = b`
			`if typ == memProfile {`
			`b.allnext = mbuckets`
			`mbuckets = b`
			`} else {`
			`b.allnext = bbuckets`
			`bbuckets = b`
			`}`
			`return b`
			`}`

			`func eqslice(x, y []location) bool {`
			`if len(x) != len(y) {`
			`return false`
			`}`
			`for i, xi := range x {`
			`if xi != y[i] {`
			`return false`
			`}`
			`}`
			`return true`
			`}`

			`func mprof_GC() {`
			`for b := mbuckets; b != nil; b = b.allnext {`
			`mp := b.mp()`
			`mp.allocs += mp.prev_allocs`
			`mp.frees += mp.prev_frees`
			`mp.alloc_bytes += mp.prev_alloc_bytes`
			`mp.free_bytes += mp.prev_free_bytes`

			`mp.prev_allocs = mp.recent_allocs`
			`mp.prev_frees = mp.recent_frees`
			`mp.prev_alloc_bytes = mp.recent_alloc_bytes`
			`mp.prev_free_bytes = mp.recent_free_bytes`

			`mp.recent_allocs = 0`
			`mp.recent_frees = 0`
			`mp.recent_alloc_bytes = 0`
			`mp.recent_free_bytes = 0`
			`}`
			`}`

			`// Record that a gc just happened: all the 'recent' statistics are now real.`
			`func mProf_GC() {`
			`lock(&proflock)`
			`mprof_GC()`
			`unlock(&proflock)`
			`}`

			`// Called by malloc to record a profiled block.`
			`func mProf_Malloc(p unsafe.Pointer, size uintptr) {`
			`var stk [maxStack]location`
			`nstk := callers(4, stk[:])`
			`lock(&proflock)`
			`b := stkbucket(memProfile, size, stk[:nstk], true)`
			`mp := b.mp()`
			`mp.recent_allocs++`
			`mp.recent_alloc_bytes += size`
			`unlock(&proflock)`

			`// Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.`
			`// This reduces potential contention and chances of deadlocks.`
			`// Since the object must be alive during call to mProf_Malloc,`
			`// it's fine to do this non-atomically.`
			`systemstack(func() {`
			`setprofilebucket(p, b)`
			`})`
			`}`

			`// Called when freeing a profiled block.`
			`func mProf_Free(b *bucket, size uintptr) {`
			`lock(&proflock)`
			`mp := b.mp()`
			`mp.prev_frees++`
			`mp.prev_free_bytes += size`
			`unlock(&proflock)`
			`}`

			`var blockprofilerate uint64 // in CPU ticks`

			`// SetBlockProfileRate controls the fraction of goroutine blocking events`
			`// that are reported in the blocking profile. The profiler aims to sample`
			`// an average of one blocking event per rate nanoseconds spent blocked.`
			`//`
			`// To include every blocking event in the profile, pass rate = 1.`
			`// To turn off profiling entirely, pass rate <= 0.`
			`func SetBlockProfileRate(rate int) {`
			`var r int64`
			`if rate <= 0 {`
			`r = 0 // disable profiling`
			`} else if rate == 1 {`
			`r = 1 // profile everything`
			`} else {`
			`// convert ns to cycles, use float64 to prevent overflow during multiplication`
			`r = int64(float64(rate) * float64(tickspersecond()) / (1000 * 1000 * 1000))`
			`if r == 0 {`
			`r = 1`
			`}`
			`}`

			`atomic.Store64(&blockprofilerate, uint64(r))`
			`}`

			`func blockevent(cycles int64, skip int) {`
			`if cycles <= 0 {`
			`cycles = 1`
			`}`
			`rate := int64(atomic.Load64(&blockprofilerate))`
			`if rate <= 0 \|\| (rate > cycles && int64(fastrand1())%rate > cycles) {`
			`return`
			`}`
			`gp := getg()`
			`var nstk int`
			`var stk [maxStack]location`
			`if gp.m.curg == nil \|\| gp.m.curg == gp {`
			`nstk = callers(skip, stk[:])`
			`} else {`
			`// FIXME: This should get a traceback of gp.m.curg.`
			`// nstk = gcallers(gp.m.curg, skip, stk[:])`
			`nstk = callers(skip, stk[:])`
			`}`
			`lock(&proflock)`
			`b := stkbucket(blockProfile, 0, stk[:nstk], true)`
			`b.bp().count++`
			`b.bp().cycles += cycles`
			`unlock(&proflock)`
			`}`

			`// Go interface to profile data.`

			`// A StackRecord describes a single execution stack.`
			`type StackRecord struct {`
			`Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry`
			`}`

			`// Stack returns the stack trace associated with the record,`
			`// a prefix of r.Stack0.`
			`func (r *StackRecord) Stack() []uintptr {`
			`for i, v := range r.Stack0 {`
			`if v == 0 {`
			`return r.Stack0[0:i]`
			`}`
			`}`
			`return r.Stack0[0:]`
			`}`

			`// MemProfileRate controls the fraction of memory allocations`
			`// that are recorded and reported in the memory profile.`
			`// The profiler aims to sample an average of`
			`// one allocation per MemProfileRate bytes allocated.`
			`//`
			`// To include every allocated block in the profile, set MemProfileRate to 1.`
			`// To turn off profiling entirely, set MemProfileRate to 0.`
			`//`
			`// The tools that process the memory profiles assume that the`
			`// profile rate is constant across the lifetime of the program`
			`// and equal to the current value. Programs that change the`
			`// memory profiling rate should do so just once, as early as`
			`// possible in the execution of the program (for example,`
			`// at the beginning of main).`
			`var MemProfileRate int = 512 * 1024`

			`// A MemProfileRecord describes the live objects allocated`
			`// by a particular call sequence (stack trace).`
			`type MemProfileRecord struct {`
			`AllocBytes, FreeBytes int64 // number of bytes allocated, freed`
			`AllocObjects, FreeObjects int64 // number of objects allocated, freed`
			`Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry`
			`}`

			`// InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).`
			`func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes }`

			`// InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).`
			`func (r *MemProfileRecord) InUseObjects() int64 {`
			`return r.AllocObjects - r.FreeObjects`
			`}`

			`// Stack returns the stack trace associated with the record,`
			`// a prefix of r.Stack0.`
			`func (r *MemProfileRecord) Stack() []uintptr {`
			`for i, v := range r.Stack0 {`
			`if v == 0 {`
			`return r.Stack0[0:i]`
			`}`
			`}`
			`return r.Stack0[0:]`
			`}`

			`// MemProfile returns a profile of memory allocated and freed per allocation`
			`// site.`
			`//`
			`// MemProfile returns n, the number of records in the current memory profile.`
			`// If len(p) >= n, MemProfile copies the profile into p and returns n, true.`
			`// If len(p) < n, MemProfile does not change p and returns n, false.`
			`//`
			`// If inuseZero is true, the profile includes allocation records`
			`// where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.`
			`// These are sites where memory was allocated, but it has all`
			`// been released back to the runtime.`
			`//`
			`// The returned profile may be up to two garbage collection cycles old.`
			`// This is to avoid skewing the profile toward allocations; because`
			`// allocations happen in real time but frees are delayed until the garbage`
			`// collector performs sweeping, the profile only accounts for allocations`
			`// that have had a chance to be freed by the garbage collector.`
			`//`
			`// Most clients should use the runtime/pprof package or`
			`// the testing package's -test.memprofile flag instead`
			`// of calling MemProfile directly.`
			`func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {`
			`lock(&proflock)`
			`clear := true`
			`for b := mbuckets; b != nil; b = b.allnext {`
			`mp := b.mp()`
			`if inuseZero \|\| mp.alloc_bytes != mp.free_bytes {`
			`n++`
			`}`
			`if mp.allocs != 0 \|\| mp.frees != 0 {`
			`clear = false`
			`}`
			`}`
			`if clear {`
			`// Absolutely no data, suggesting that a garbage collection`
			`// has not yet happened. In order to allow profiling when`
			`// garbage collection is disabled from the beginning of execution,`
			`// accumulate stats as if a GC just happened, and recount buckets.`
			`mprof_GC()`
			`mprof_GC()`
			`n = 0`
			`for b := mbuckets; b != nil; b = b.allnext {`
			`mp := b.mp()`
			`if inuseZero \|\| mp.alloc_bytes != mp.free_bytes {`
			`n++`
			`}`
			`}`
			`}`
			`if n <= len(p) {`
			`ok = true`
			`idx := 0`
			`for b := mbuckets; b != nil; b = b.allnext {`
			`mp := b.mp()`
			`if inuseZero \|\| mp.alloc_bytes != mp.free_bytes {`
			`record(&p[idx], b)`
			`idx++`
			`}`
			`}`
			`}`
			`unlock(&proflock)`
			`return`
			`}`

			`// Write b's data to r.`
			`func record(r MemProfileRecord, b bucket) {`
			`mp := b.mp()`
			`r.AllocBytes = int64(mp.alloc_bytes)`
			`r.FreeBytes = int64(mp.free_bytes)`
			`r.AllocObjects = int64(mp.allocs)`
			`r.FreeObjects = int64(mp.frees)`
			`for i, loc := range b.stk() {`
			`if i >= len(r.Stack0) {`
			`break`
			`}`
			`r.Stack0[i] = loc.pc`
			`}`
			`for i := int(b.nstk); i < len(r.Stack0); i++ {`
			`r.Stack0[i] = 0`
			`}`
			`}`

			`func iterate_memprof(fn func(bucket, uintptr, location, uintptr, uintptr, uintptr)) {`
			`lock(&proflock)`
			`for b := mbuckets; b != nil; b = b.allnext {`
			`mp := b.mp()`
			`fn(b, b.nstk, &b.stk()[0], b.size, mp.allocs, mp.frees)`
			`}`
			`unlock(&proflock)`
			`}`

			`// BlockProfileRecord describes blocking events originated`
			`// at a particular call sequence (stack trace).`
			`type BlockProfileRecord struct {`
			`Count int64`
			`Cycles int64`
			`StackRecord`
			`}`

			`// BlockProfile returns n, the number of records in the current blocking profile.`
			`// If len(p) >= n, BlockProfile copies the profile into p and returns n, true.`
			`// If len(p) < n, BlockProfile does not change p and returns n, false.`
			`//`
			`// Most clients should use the runtime/pprof package or`
			`// the testing package's -test.blockprofile flag instead`
			`// of calling BlockProfile directly.`
			`func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {`
			`lock(&proflock)`
			`for b := bbuckets; b != nil; b = b.allnext {`
			`n++`
			`}`
			`if n <= len(p) {`
			`ok = true`
			`for b := bbuckets; b != nil; b = b.allnext {`
			`bp := b.bp()`
			`r := &p[0]`
			`r.Count = bp.count`
			`r.Cycles = bp.cycles`
			`i := 0`
			`var loc location`
			`for i, loc = range b.stk() {`
			`if i >= len(r.Stack0) {`
			`break`
			`}`
			`r.Stack0[i] = loc.pc`
			`}`
			`for ; i < len(r.Stack0); i++ {`
			`r.Stack0[i] = 0`
			`}`
			`p = p[1:]`
			`}`
			`}`
			`unlock(&proflock)`
			`return`
			`}`

			`// ThreadCreateProfile returns n, the number of records in the thread creation profile.`
			`// If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.`
			`// If len(p) < n, ThreadCreateProfile does not change p and returns n, false.`
			`//`
			`// Most clients should use the runtime/pprof package instead`
			`// of calling ThreadCreateProfile directly.`
			`func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {`
			`first := (*m)(atomic.Loadp(unsafe.Pointer(allm())))`
			`for mp := first; mp != nil; mp = mp.alllink {`
			`n++`
			`}`
			`if n <= len(p) {`
			`ok = true`
			`i := 0`
			`for mp := first; mp != nil; mp = mp.alllink {`
			`for j := range mp.createstack {`
			`p[i].Stack0[j] = mp.createstack[j].pc`
			`}`
			`i++`
			`}`
			`}`
			`return`
			`}`

			`// GoroutineProfile returns n, the number of records in the active goroutine stack profile.`
			`// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.`
			`// If len(p) < n, GoroutineProfile does not change p and returns n, false.`
			`//`
			`// Most clients should use the runtime/pprof package instead`
			`// of calling GoroutineProfile directly.`
			`func GoroutineProfile(p []StackRecord) (n int, ok bool) {`
			`gp := getg()`

			`isOK := func(gp1 *g) bool {`
			`// Checking isSystemGoroutine here makes GoroutineProfile`
			`// consistent with both NumGoroutine and Stack.`
			`return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1)`
			`}`

			`stopTheWorld("profile")`

			`n = 1`
			`for _, gp1 := range allgs() {`
			`if isOK(gp1) {`
			`n++`
			`}`
			`}`

			`if n <= len(p) {`
			`ok = true`
			`r := p`

			`// Save current goroutine.`
			`saveg(gp, &r[0])`
			`r = r[1:]`

			`// Save other goroutines.`
			`for _, gp1 := range allgs() {`
			`if isOK(gp1) {`
			`if len(r) == 0 {`
			`// Should be impossible, but better to return a`
			`// truncated profile than to crash the entire process.`
			`break`
			`}`
			`saveg(gp1, &r[0])`
			`r = r[1:]`
			`}`
			`}`
			`}`

			`startTheWorld()`

			`return n, ok`
			`}`

			`func saveg(gp g, r StackRecord) {`
			`if gp == getg() {`
			`var locbuf [32]location`
			`n := callers(1, locbuf[:])`
			`for i := 0; i < n; i++ {`
			`r.Stack0[i] = locbuf[i].pc`
			`}`
			`if n < len(r.Stack0) {`
			`r.Stack0[n] = 0`
			`}`
			`} else {`
			`// FIXME: Not implemented.`
			`r.Stack0[0] = 0`
			`}`
			`}`

			`// Stack formats a stack trace of the calling goroutine into buf`
			`// and returns the number of bytes written to buf.`
			`// If all is true, Stack formats stack traces of all other goroutines`
			`// into buf after the trace for the current goroutine.`
			`func Stack(buf []byte, all bool) int {`
			`if all {`
			`stopTheWorld("stack trace")`
			`}`

			`n := 0`
			`if len(buf) > 0 {`
			`gp := getg()`
			`// Force traceback=1 to override GOTRACEBACK setting,`
			`// so that Stack's results are consistent.`
			`// GOTRACEBACK is only about crash dumps.`
			`gp.m.traceback = 1`
			`gp.writebuf = buf[0:0:len(buf)]`
			`goroutineheader(gp)`
			`traceback()`
			`if all {`
			`tracebackothers(gp)`
			`}`
			`gp.m.traceback = 0`
			`n = len(gp.writebuf)`
			`gp.writebuf = nil`
			`}`

			`if all {`
			`startTheWorld()`
			`}`
			`return n`
			`}`

			`// Tracing of alloc/free/gc.`

			`var tracelock mutex`

			`func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) {`
			`lock(&tracelock)`
			`gp := getg()`
			`gp.m.traceback = 2`
			`if typ == nil {`
			`print("tracealloc(", p, ", ", hex(size), ")\n")`
			`} else {`
			`print("tracealloc(", p, ", ", hex(size), ", ", *typ.string, ")\n")`
			`}`
			`if gp.m.curg == nil \|\| gp == gp.m.curg {`
			`goroutineheader(gp)`
			`traceback()`
			`} else {`
			`goroutineheader(gp.m.curg)`
			`// FIXME: Can't do traceback of other g.`
			`}`
			`print("\n")`
			`gp.m.traceback = 0`
			`unlock(&tracelock)`
			`}`

			`func tracefree(p unsafe.Pointer, size uintptr) {`
			`lock(&tracelock)`
			`gp := getg()`
			`gp.m.traceback = 2`
			`print("tracefree(", p, ", ", hex(size), ")\n")`
			`goroutineheader(gp)`
			`traceback()`
			`print("\n")`
			`gp.m.traceback = 0`
			`unlock(&tracelock)`
			`}`

			`func tracegc() {`
			`lock(&tracelock)`
			`gp := getg()`
			`gp.m.traceback = 2`
			`print("tracegc()\n")`
			`// running on m->g0 stack; show all non-g0 goroutines`
			`tracebackothers(gp)`
			`print("end tracegc\n")`
			`print("\n")`
			`gp.m.traceback = 0`
			`unlock(&tracelock)`
			`}`