-
-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Harden shutdown logic #1037
Harden shutdown logic #1037
Changes from all commits
61efc4d
cc830ff
c9d3084
6fe8549
00a6e59
3d05764
661fb0a
cc45e21
cf6a268
bfd1211
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ import ( | |
"runtime" | ||
"runtime/pprof" | ||
"strings" | ||
"sync" | ||
"syscall" | ||
"time" | ||
|
||
|
@@ -39,7 +40,6 @@ const ( | |
cpuProfile = "ipfs.cpuprof" | ||
heapProfile = "ipfs.memprof" | ||
errorFormat = "ERROR: %v\n\n" | ||
shutdownMessage = "Received interrupt signal, shutting down..." | ||
) | ||
|
||
type cmdInvocation struct { | ||
|
@@ -132,15 +132,10 @@ func main() { | |
os.Exit(1) | ||
} | ||
|
||
// our global interrupt handler may try to stop the daemon | ||
// before the daemon is ready to be stopped; this dirty | ||
// workaround is for the daemon only; other commands are always | ||
// ready to be stopped | ||
if invoc.cmd != daemonCmd { | ||
close(invoc.req.Context().InitDone) | ||
} | ||
|
||
// ok, finally, run the command invocation. | ||
intrh, ctx := invoc.SetupInterruptHandler(ctx) | ||
defer intrh.Close() | ||
|
||
output, err := invoc.Run(ctx) | ||
if err != nil { | ||
printErr(err) | ||
|
@@ -157,8 +152,6 @@ func main() { | |
} | ||
|
||
func (i *cmdInvocation) Run(ctx context.Context) (output io.Reader, err error) { | ||
// setup our global interrupt handler. | ||
i.setupInterruptHandler() | ||
|
||
// check if user wants to debug. option OR env var. | ||
debug, _, err := i.req.Option("debug").Bool() | ||
|
@@ -226,7 +219,6 @@ func (i *cmdInvocation) Parse(ctx context.Context, args []string) error { | |
if err != nil { | ||
return err | ||
} | ||
i.req.Context().Context = ctx | ||
|
||
repoPath, err := getRepoPath(i.req) | ||
if err != nil { | ||
|
@@ -279,6 +271,8 @@ func callCommand(ctx context.Context, req cmds.Request, root *cmds.Command, cmd | |
log.Info(config.EnvDir, " ", req.Context().ConfigRoot) | ||
var res cmds.Response | ||
|
||
req.Context().Context = ctx | ||
|
||
details, err := commandDetails(req.Path(), root) | ||
if err != nil { | ||
return nil, err | ||
|
@@ -474,59 +468,70 @@ func writeHeapProfileToFile() error { | |
return pprof.WriteHeapProfile(mprof) | ||
} | ||
|
||
// listen for and handle SIGTERM | ||
func (i *cmdInvocation) setupInterruptHandler() { | ||
// IntrHandler helps set up an interrupt handler that can | ||
// be cleanly shut down through the io.Closer interface. | ||
type IntrHandler struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment on this struct |
||
sig chan os.Signal | ||
wg sync.WaitGroup | ||
} | ||
|
||
func NewIntrHandler() *IntrHandler { | ||
ih := &IntrHandler{} | ||
ih.sig = make(chan os.Signal, 1) | ||
return ih | ||
} | ||
|
||
func (ih *IntrHandler) Close() error { | ||
close(ih.sig) | ||
ih.wg.Wait() | ||
return nil | ||
} | ||
|
||
ctx := i.req.Context() | ||
sig := allInterruptSignals() | ||
|
||
// Handle starts handling the given signals, and will call the handler | ||
// callback function each time a signal is catched. The function is passed | ||
// the number of times the handler has been triggered in total, as | ||
// well as the handler itself, so that the handling logic can use the | ||
// handler's wait group to ensure clean shutdown when Close() is called. | ||
func (ih *IntrHandler) Handle(handler func(count int, ih *IntrHandler), sigs ...os.Signal) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment on this method |
||
signal.Notify(ih.sig, sigs...) | ||
ih.wg.Add(1) | ||
go func() { | ||
// first time, try to shut down. | ||
|
||
// loop because we may be | ||
for count := 0; ; count++ { | ||
<-sig | ||
|
||
// if we're still initializing, cannot use `ctx.GetNode()` | ||
select { | ||
default: // initialization not done | ||
fmt.Println(shutdownMessage) | ||
os.Exit(-1) | ||
case <-ctx.InitDone: | ||
} | ||
|
||
switch count { | ||
case 0: | ||
fmt.Println(shutdownMessage) | ||
if ctx.Online { | ||
go func() { | ||
// TODO cancel the command context instead | ||
n, err := ctx.GetNode() | ||
if err != nil { | ||
log.Error(err) | ||
fmt.Println(shutdownMessage) | ||
os.Exit(-1) | ||
} | ||
n.Close() | ||
log.Info("Gracefully shut down.") | ||
}() | ||
} else { | ||
os.Exit(0) | ||
} | ||
|
||
default: | ||
fmt.Println("Received another interrupt before graceful shutdown, terminating...") | ||
os.Exit(-1) | ||
} | ||
defer ih.wg.Done() | ||
count := 0 | ||
for _ = range ih.sig { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can just say:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A, nice! |
||
count++ | ||
handler(count, ih) | ||
} | ||
signal.Stop(ih.sig) | ||
}() | ||
} | ||
|
||
func allInterruptSignals() chan os.Signal { | ||
sigc := make(chan os.Signal, 1) | ||
signal.Notify(sigc, syscall.SIGHUP, syscall.SIGINT, | ||
syscall.SIGTERM) | ||
return sigc | ||
func (i *cmdInvocation) SetupInterruptHandler(ctx context.Context) (io.Closer, context.Context) { | ||
|
||
intrh := NewIntrHandler() | ||
ctx, cancelFunc := context.WithCancel(ctx) | ||
|
||
handlerFunc := func(count int, ih *IntrHandler) { | ||
switch count { | ||
case 1: | ||
fmt.Println() // Prevent un-terminated ^C character in terminal | ||
|
||
ih.wg.Add(1) | ||
go func() { | ||
defer ih.wg.Done() | ||
cancelFunc() | ||
}() | ||
|
||
default: | ||
fmt.Println("Received another interrupt before graceful shutdown, terminating...") | ||
os.Exit(-1) | ||
} | ||
} | ||
|
||
intrh.Handle(handlerFunc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM) | ||
|
||
return intrh, ctx | ||
} | ||
|
||
func profileIfEnabled() (func(), error) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ package corehttp | |
|
||
import ( | ||
"net/http" | ||
"time" | ||
|
||
manners "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/braintree/manners" | ||
ma "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/jbenet/go-multiaddr" | ||
|
@@ -63,6 +64,9 @@ func listenAndServe(node *core.IpfsNode, addr ma.Multiaddr, handler http.Handler | |
var serverError error | ||
serverExited := make(chan struct{}) | ||
|
||
node.Children().Add(1) | ||
defer node.Children().Done() | ||
|
||
go func() { | ||
serverError = server.ListenAndServe(host, handler) | ||
close(serverExited) | ||
|
@@ -75,8 +79,22 @@ func listenAndServe(node *core.IpfsNode, addr ma.Multiaddr, handler http.Handler | |
// if node being closed before server exits, close server | ||
case <-node.Closing(): | ||
log.Infof("server at %s terminating...", addr) | ||
|
||
// make sure keep-alive connections do not keep the server running | ||
server.InnerServer.SetKeepAlivesEnabled(false) | ||
|
||
server.Shutdown <- true | ||
<-serverExited // now, DO wait until server exit | ||
|
||
outer: | ||
for { | ||
// wait until server exits | ||
select { | ||
case <-serverExited: | ||
break outer | ||
case <-time.After(5 * time.Second): | ||
log.Infof("waiting for server at %s to terminate...", addr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. may want to log this immediately the first time. perhaps: server.InnerServer.SetKeepAlivesEnabled(false)
server.Shutdown <- true
outer:
for {
log.Infof("waiting for server at %s to terminate...", addr)
// wait until server exits
select {
case <-serverExited:
break outer
case <-time.After(5 * time.Second):
}
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh didnt catch that. sgtm! |
||
} | ||
} | ||
} | ||
|
||
log.Infof("server at %s terminated", addr) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what is the purpose of the default here? Wouldnt we want to wait and print the log message?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's based on the possibility that the node/daemon may shut down through other means, eg through an API call or similar that wouldn't cancel the context, only close the node. In that case we wouldn't want to block. But if that's not really an option, or we do want to go through the context for those shutdowns as well, it shouldn't have a default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
gotcha, thanks!