Skip to content

Commit

Permalink
runtime: fix sigprof stack barrier locking
Browse files Browse the repository at this point in the history
f90b48e intended to require the stack barrier lock in all cases of
sigprof that walked the user stack, but got it wrong. In particular,
if sp < gp.stack.lo || gp.stack.hi < sp, tracebackUser would be true,
but we wouldn't acquire the stack lock. If it then turned out that we
were in a cgo call, it would walk the stack without the lock.

In fact, the whole structure of stack locking is sigprof is somewhat
wrong because it assumes the G to lock is gp.m.curg, but all three
gentraceback calls start from potentially different Gs.

To fix this, we lower the gcTryLockStackBarriers calls much closer to
the gentraceback calls. There are now three separate trylock calls,
each clearly associated with a gentraceback and the locked G clearly
matches the G from which the gentraceback starts. This actually brings
the sigprof logic closer to what it originally was before stack
barrier locking.

This depends on "runtime: increase assumed stack size in
externalthreadhandler" because it very slightly increases the stack
used by sigprof; without this other commit, this is enough to blow the
profiler thread's assumed stack size.

Fixes #12528 (hopefully for real this time!).

For the 1.5 branch, though it will require some backporting. On the
1.5 branch, this will *not* require the "runtime: increase assumed
stack size in externalthreadhandler" commit: there's no pcvalue cache,
so the used stack is smaller.

Change-Id: Id2f6446ac276848f6fc158bee550cccd03186b83
Reviewed-on: https://go-review.googlesource.com/18328
Run-TryBot: Austin Clements <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Russ Cox <[email protected]>
  • Loading branch information
aclements committed Jan 7, 2016
1 parent fdf9b3c commit 3f22ade
Showing 1 changed file with 25 additions and 27 deletions.
52 changes: 25 additions & 27 deletions src/runtime/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -2989,49 +2989,50 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
// To recap, there are no constraints on the assembly being used for the
// transition. We simply require that g and SP match and that the PC is not
// in gogo.
traceback, tracebackUser := true, true
haveStackLock := false
traceback := true
if gp == nil || sp < gp.stack.lo || gp.stack.hi < sp || setsSP(pc) {
traceback = false
} else if gp.m.curg != nil {
// The user stack is safe to scan only if we can
// acquire the stack barrier lock.
if gcTryLockStackBarriers(gp.m.curg) {
haveStackLock = true
} else {
// Stack barriers are being inserted or
// removed, so we can't get a consistent
// traceback of the user stack right now.
tracebackUser = false
if gp == gp.m.curg {
// We're on the user stack, so don't
// do any traceback.
traceback = false
}
}
}
var stk [maxCPUProfStack]uintptr
var haveStackLock *g
n := 0
if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 && tracebackUser {
if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 {
// Cgo, we can't unwind and symbolize arbitrary C code,
// so instead collect Go stack that leads to the cgo call.
// This is especially important on windows, since all syscalls are cgo calls.
n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
if gcTryLockStackBarriers(mp.curg) {
haveStackLock = mp.curg
n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
}
} else if traceback {
var flags uint = _TraceTrap
if tracebackUser {
if gp.m.curg != nil && gcTryLockStackBarriers(gp.m.curg) {
// It's safe to traceback the user stack.
haveStackLock = gp.m.curg
flags |= _TraceJumpStack
}
n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, flags)
// Traceback is safe if we're on the system stack (if
// necessary, flags will stop it before switching to
// the user stack), or if we locked the user stack.
if gp != gp.m.curg || haveStackLock != nil {
n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, flags)
}
}
if haveStackLock != nil {
gcUnlockStackBarriers(haveStackLock)
}

if n <= 0 {
// Normal traceback is impossible or has failed.
// See if it falls into several common cases.
n = 0
if GOOS == "windows" && n == 0 && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 && tracebackUser {
if GOOS == "windows" && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 {
// Libcall, i.e. runtime syscall on windows.
// Collect Go stack that leads to the call.
n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[0], len(stk), nil, nil, 0)
if gcTryLockStackBarriers(mp.libcallg.ptr()) {
n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[0], len(stk), nil, nil, 0)
gcUnlockStackBarriers(mp.libcallg.ptr())
}
}
if n == 0 {
// If all of the above has failed, account it against abstract "System" or "GC".
Expand All @@ -3048,9 +3049,6 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
}
}
}
if haveStackLock {
gcUnlockStackBarriers(gp.m.curg)
}

if prof.hz != 0 {
// Simple cas-lock to coordinate with setcpuprofilerate.
Expand Down

0 comments on commit 3f22ade

Please sign in to comment.