libgo: build roots index to speed up bulkBarrierPreWrite

To reduce the amount of time spent in write barrier processing
    (specifically runtime.bulkBarrierPreWrite), add support for building a
    'GC roots index', basically a sorted list of all roots, so as to
    allow more efficient lookups of gcdata structures for globals. The
    previous implementation worked on the raw (unsorted) roots list
    itself, which did not scale well.
    
    Reviewed-on: https://go-review.googlesource.com/132595

From-SVN: r264276
This commit is contained in:
Ian Lance Taylor 2018-09-13 16:44:43 +00:00
parent 16ddcc28b5
commit 2919ad1ee3
6 changed files with 125 additions and 23 deletions

View File

@ -1,4 +1,4 @@
acf852f838e6b99f907d84648be853fa2c374393
70bd9801911f8ed27df410d36a928166c37a68fd
The first line of this file holds the git revision number of the last
merge done from the gofrontend repository.

View File

@ -1535,7 +1535,8 @@ Gogo::write_globals()
// Avoid putting runtime.gcRoots itself on the list.
if (this->compiling_runtime()
&& this->package_name() == "runtime"
&& Gogo::unpack_hidden_name(no->name()) == "gcRoots")
&& (Gogo::unpack_hidden_name(no->name()) == "gcRoots"
|| Gogo::unpack_hidden_name(no->name()) == "gcRootsIndex"))
;
else
var_gc.push_back(no);

View File

@ -243,17 +243,21 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
return
}
roots := gcRoots
for roots != nil {
for j := 0; j < roots.count; j++ {
pr := roots.roots[j]
addr := uintptr(pr.decl)
if cgoInRange(p, addr, addr+pr.size) {
cgoCheckBits(pr.decl, pr.gcdata, 0, pr.ptrdata)
return
}
lo := 0
hi := len(gcRootsIndex)
for lo < hi {
m := lo + (hi-lo)/2
pr := gcRootsIndex[m]
addr := uintptr(pr.decl)
if cgoInRange(p, addr, addr+pr.size) {
cgoCheckBits(pr.decl, pr.gcdata, 0, pr.ptrdata)
return
}
if uintptr(p) < addr {
hi = m
} else {
lo = m + 1
}
roots = roots.next
}
return

View File

@ -575,19 +575,23 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
if !inheap(dst) {
// If dst is a global, use the data or BSS bitmaps to
// execute write barriers.
roots := gcRoots
for roots != nil {
for i := 0; i < roots.count; i++ {
pr := roots.roots[i]
addr := uintptr(pr.decl)
if addr <= dst && dst < addr+pr.size {
if dst < addr+pr.ptrdata {
bulkBarrierBitmap(dst, src, size, dst-addr, pr.gcdata)
}
return
lo := 0
hi := len(gcRootsIndex)
for lo < hi {
m := lo + (hi-lo)/2
pr := gcRootsIndex[m]
addr := uintptr(pr.decl)
if addr <= dst && dst < addr+pr.size {
if dst < addr+pr.ptrdata {
bulkBarrierBitmap(dst, src, size, dst-addr, pr.gcdata)
}
return
}
if dst < addr {
hi = m
} else {
lo = m + 1
}
roots = roots.next
}
return
}

View File

@ -12,6 +12,7 @@ import (
)
// gcRoot is a single GC root: a variable plus a ptrmask.
//go:notinheap
type gcRoot struct {
decl unsafe.Pointer // Pointer to variable.
size uintptr // Size of variable.
@ -32,6 +33,97 @@ type gcRootList struct {
// The compiler keeps this variable itself off the list.
var gcRoots *gcRootList
// Slice containing pointers to all reachable gcRoot's sorted by
// starting address (generated at init time from 'gcRoots').
// The compiler also keeps this variable itself off the list.
// The storage backing this slice is allocated via persistentalloc(), the
// idea being that we don't want to treat the slice itself as a global
// variable, since it points to things that don't need to be scanned
// themselves.
var gcRootsIndex []*gcRoot
// rootradixsort performs an in-place radix sort of the 'arr' rootptr slice.
// Note: not a stable sort, however we expect it to be called only on slices
// with no duplicate entries, so this should not matter.
func rootradixsort(arr []*gcRoot, lo, hi int, bit uint) {
// Partition the array into two bins based on the values at the
// specified bit position: 0's bin (grown from the left) and and
// 1's bin (grown from the right). We keep two boundary markers,
// the 0's boundary "zbb" (which grows to the right) and the 1's
// boundary "obb" (which grows to the left). At each step we
// examine the bit for the right-of-ZBB element: if it is zero, we
// leave it in place and move the ZBB to the right. If the bit is
// not zero, then we swap the ZBB and OBB elements and move the
// OBB to the left. When this is done, the two partitions are then
// sorted using the next lower bit.
// 0's bin boundary, initially set to before the first element
zbb := lo - 1
// 1's bin boundary, set to just beyond the last element
obb := hi + 1
// mask to pick up bit of interest
bmask := uintptr(1) << bit
for obb-zbb > 1 {
zbbval := uintptr(arr[zbb+1].decl) & bmask
if zbbval == 0 {
// Move zbb one to the right
zbb++
} else {
// Move obb one to the left and swap
arr[obb-1], arr[zbb+1] = arr[zbb+1], arr[obb-1]
obb--
}
}
if bit != 0 {
// NB: in most cases there is just a single partition to visit
// so if we wanted to reduce stack space we could check for this
// and insert a goto back up to the top.
if zbb-lo > 0 {
rootradixsort(arr, lo, zbb, bit-1)
}
if hi-obb > 0 {
rootradixsort(arr, obb, hi, bit-1)
}
}
}
//go:nowritebarrier
func createGcRootsIndex() {
// Count roots
nroots := 0
gcr := gcRoots
for gcr != nil {
nroots += gcr.count
gcr = gcr.next
}
// Construct the gcRootsIndex slice. Use non-heap storage for the array
// backing the slice.
sp := (*notInHeapSlice)(unsafe.Pointer(&gcRootsIndex))
sp.array = (*notInHeap)(persistentalloc1(sys.PtrSize*uintptr(nroots), sys.PtrSize, &memstats.other_sys))
if sp.array == nil {
throw("runtime: cannot allocate memory")
}
sp.len = nroots
sp.cap = nroots
// Populate the roots index slice
gcr = gcRoots
k := 0
for gcr != nil {
for i := 0; i < gcr.count; i++ {
gcRootsIndex[k] = &gcr.roots[i]
k++
}
gcr = gcr.next
}
// Sort it by starting address.
rootradixsort(gcRootsIndex, 0, nroots-1, sys.PtrSize*8-1)
}
// registerGCRoots is called by compiler-generated code.
//go:linkname registerGCRoots runtime.registerGCRoots

View File

@ -207,6 +207,7 @@ func main() {
fn := main_init // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
fn()
createGcRootsIndex()
close(main_init_done)
needUnlock = false