feat: update graceful shutdown

This commit is contained in:
Kevin Franklin Kim 2024-03-15 11:01:25 +01:00
parent e21a6e2c6b
commit 6e5b02f74c
No known key found for this signature in database
5 changed files with 106 additions and 102 deletions

View File

@ -6,4 +6,5 @@ import (
var ( var (
ErrServerNotRunning = errors.New("server not running") ErrServerNotRunning = errors.New("server not running")
ErrServerShutdown = errors.New("server is shutting down")
) )

View File

@ -3,7 +3,6 @@ package main
import ( import (
"context" "context"
"net/http" "net/http"
"sync"
"syscall" "syscall"
"time" "time"
@ -12,107 +11,76 @@ import (
"go.uber.org/zap" "go.uber.org/zap"
"github.com/foomo/keel" "github.com/foomo/keel"
"github.com/foomo/keel/log"
) )
func main() { func main() {
service.DefaultHTTPHealthzAddr = "localhost:9400"
l := zap.NewExample().Named("root")
l.Info("1. starting readiness checks")
go call(l.Named("readiness"), "http://localhost:9400/healthz/readiness")
svr := keel.NewServer( svr := keel.NewServer(
//keel.WithLogger(zap.NewExample()), keel.WithLogger(l.Named("server")),
keel.WithHTTPZapService(true),
keel.WithHTTPViperService(true),
keel.WithHTTPPrometheusService(true),
keel.WithHTTPHealthzService(true), keel.WithHTTPHealthzService(true),
) )
l := svr.Logger()
go waitGroup(svr.CancelContext(), l.With(log.FServiceName("waitGroup")))
// create demo service // create demo service
svs := http.NewServeMux() svs := http.NewServeMux()
svs.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { svs.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
l.Info("handling request...")
time.Sleep(3 * time.Second)
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("OK")) _, _ = w.Write([]byte("OK"))
l.Info("... handled request")
}) })
svr.AddService( svr.AddService(
service.NewHTTP(l, "demo", "localhost:8080", svs), service.NewHTTP(l, "http", "localhost:8080", svs),
) )
svr.AddCloser(interfaces.CloseFunc(func(ctx context.Context) error { svr.AddCloser(interfaces.CloserFunc(func(ctx context.Context) error {
l.Info("custom closer") l := l.Named("closer")
l.Info("closing stuff")
time.Sleep(3 * time.Second)
l.Info("done closing stuff")
return nil return nil
})) }))
go svr.Run()
time.Sleep(1 * time.Second)
l.Info("1. starting test")
{
l.Info("2. checking healthz")
readiness(l, "http://localhost:9400/healthz/readiness")
}
go func() { go func() {
l.Info("2. sending request")
if r, err := http.Get("http://localhost:8080"); err != nil { l.Info("3. starting http checks")
go call(l.Named("http"), "http://localhost:8080")
l.Info("4. sleeping for 5 seconds")
time.Sleep(5 * time.Second)
l.Info("5. sending shutdown signal")
if err := syscall.Kill(syscall.Getpid(), syscall.SIGTERM); err != nil {
l.Fatal(err.Error()) l.Fatal(err.Error())
} else {
l.Info(" /", zap.Int("status", r.StatusCode))
} }
}() }()
time.Sleep(100 * time.Millisecond)
l.Info("3. sending shutdown signal") svr.Run()
if err := syscall.Kill(syscall.Getpid(), syscall.SIGTERM); err != nil { l.Info("done")
l.Fatal(err.Error())
}
{
l.Info("2. checking healthz")
readiness(l, "http://localhost:9400/healthz/readiness")
}
l.Info("4. waiting for shutdown")
time.Sleep(10 * time.Second)
l.Info(" done")
} }
func readiness(l *zap.Logger, url string) { func call(l *zap.Logger, url string) {
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) l = l.With(zap.String("url", url))
defer cancel() for {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) func() {
if err != nil { ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
l.Error(err.Error()) defer cancel()
return req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
} if err != nil {
resp, err := http.DefaultClient.Do(req) l.With(zap.Error(err)).Error("failed to create request")
if err != nil {
l.Error(err.Error())
return
}
l.Info(url, zap.Int("status", resp.StatusCode))
}
func waitGroup(ctx context.Context, l *zap.Logger) {
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for {
select {
case <-ctx.Done():
l.Info("Break the loop")
return return
case <-time.After(3 * time.Second):
l.Info("Hello in a loop")
} }
} resp, err := http.DefaultClient.Do(req)
}() if err != nil {
l.With(zap.Error(err)).Error("failed to send request")
wg.Wait() return
}
l.Info("ok", zap.Int("status", resp.StatusCode))
}()
time.Sleep(time.Second)
}
} }

View File

@ -15,12 +15,12 @@ import (
// See k8s for probe documentation // See k8s for probe documentation
// https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#types-of-probe // https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#types-of-probe
func main() { func main() {
service.DefaultHTTPHealthzAddr = "localhost:9400"
// you can override the below config by settings env vars // you can override the below config by settings env vars
_ = os.Setenv("SERVICE_HEALTHZ_ENABLED", "true") _ = os.Setenv("SERVICE_HEALTHZ_ENABLED", "true")
svr := keel.NewServer( svr := keel.NewServer(
keel.WithHTTPZapService(true),
keel.WithHTTPViperService(true),
// allows you to use probes for health checks in cluster: // allows you to use probes for health checks in cluster:
// GET :9400/healthz // GET :9400/healthz
// GET :9400/healthz/readiness // GET :9400/healthz/readiness

View File

@ -62,6 +62,13 @@ func WithShutdownSignals(shutdownSignals ...os.Signal) Option {
} }
} }
// WithGracefulTimeout option
func WithGracefulTimeout(gracefulTimeout time.Duration) Option {
return func(inst *Server) {
inst.gracefulTimeout = gracefulTimeout
}
}
// WithShutdownTimeout option // WithShutdownTimeout option
func WithShutdownTimeout(shutdownTimeout time.Duration) Option { func WithShutdownTimeout(shutdownTimeout time.Duration) Option {
return func(inst *Server) { return func(inst *Server) {

View File

@ -2,6 +2,7 @@ package keel
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"net/http" "net/http"
"os" "os"
@ -13,13 +14,16 @@ import (
"syscall" "syscall"
"time" "time"
"github.com/foomo/keel/config"
"github.com/foomo/keel/env"
"github.com/foomo/keel/healthz" "github.com/foomo/keel/healthz"
"github.com/foomo/keel/interfaces" "github.com/foomo/keel/interfaces"
"github.com/foomo/keel/log"
"github.com/foomo/keel/markdown" "github.com/foomo/keel/markdown"
"github.com/foomo/keel/metrics" "github.com/foomo/keel/metrics"
"github.com/foomo/keel/service" "github.com/foomo/keel/service"
"github.com/foomo/keel/telemetry"
"github.com/go-logr/logr" "github.com/go-logr/logr"
"github.com/pkg/errors"
"github.com/spf13/viper" "github.com/spf13/viper"
otelhost "go.opentelemetry.io/contrib/instrumentation/host" otelhost "go.opentelemetry.io/contrib/instrumentation/host"
otelruntime "go.opentelemetry.io/contrib/instrumentation/runtime" otelruntime "go.opentelemetry.io/contrib/instrumentation/runtime"
@ -29,22 +33,21 @@ import (
"go.opentelemetry.io/otel/trace" "go.opentelemetry.io/otel/trace"
"go.uber.org/zap" "go.uber.org/zap"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
"github.com/foomo/keel/config"
"github.com/foomo/keel/env"
"github.com/foomo/keel/log"
"github.com/foomo/keel/telemetry"
) )
// Server struct // Server struct
type Server struct { type Server struct {
services []Service services []Service
initServices []Service initServices []Service
meter metric.Meter meter metric.Meter
meterProvider metric.MeterProvider meterProvider metric.MeterProvider
tracer trace.Tracer tracer trace.Tracer
traceProvider trace.TracerProvider traceProvider trace.TracerProvider
shutdownSignals []os.Signal shutdown atomic.Bool
shutdownSignals []os.Signal
// gracefulTimeout should equal the readinessProbe's periodSeconds * failureThreshold
gracefulTimeout time.Duration
// shutdownTimeout should equal the readinessProbe's terminationGracePeriodSeconds
shutdownTimeout time.Duration shutdownTimeout time.Duration
running atomic.Bool running atomic.Bool
syncClosers []interface{} syncClosers []interface{}
@ -64,6 +67,7 @@ type Server struct {
func NewServer(opts ...Option) *Server { func NewServer(opts ...Option) *Server {
inst := &Server{ inst := &Server{
gracefulTimeout: 10 * 3 * time.Second,
shutdownTimeout: 30 * time.Second, shutdownTimeout: 30 * time.Second,
shutdownSignals: []os.Signal{syscall.SIGTERM}, shutdownSignals: []os.Signal{syscall.SIGTERM},
syncReadmers: []interfaces.Readmer{}, syncReadmers: []interfaces.Readmer{},
@ -78,21 +82,42 @@ func NewServer(opts ...Option) *Server {
} }
{ // setup error group { // setup error group
inst.AddReadinessHealthzers(healthz.NewHealthzerFn(func(ctx context.Context) error {
if inst.shutdown.Load() {
return ErrServerShutdown
}
return nil
}))
inst.ctxCancel, inst.ctxCancelFn = signal.NotifyContext(inst.ctx, inst.shutdownSignals...) inst.ctxCancel, inst.ctxCancelFn = signal.NotifyContext(inst.ctx, inst.shutdownSignals...)
inst.g, inst.gCtx = errgroup.WithContext(inst.ctxCancel) inst.g, inst.gCtx = errgroup.WithContext(inst.ctxCancel)
// gracefully shutdown // gracefully shutdown
inst.g.Go(func() error { inst.g.Go(func() error {
<-inst.gCtx.Done() <-inst.gCtx.Done()
inst.l.Debug("keel graceful shutdown")
defer inst.ctxCancelFn() defer inst.ctxCancelFn()
inst.l.Info("keel graceful shutdown")
timeoutCtx, timeoutCancel := context.WithTimeout(inst.ctx, inst.shutdownTimeout) timeoutCtx, timeoutCancel := context.WithTimeout(inst.ctxCancel, inst.shutdownTimeout)
defer timeoutCancel() defer timeoutCancel()
inst.shutdown.Store(true)
inst.l.Info("keel pausing graceful shutdown", log.FDuration(inst.gracefulTimeout))
{
timer := time.NewTimer(inst.gracefulTimeout)
select {
case <-timeoutCtx.Done():
timer.Stop()
case <-timer.C:
}
}
inst.l.Info("keel resuming graceful shutdown")
// append internal closers // append internal closers
closers := append(inst.closers(), inst.traceProvider, inst.meterProvider) closers := append(inst.closers(), inst.traceProvider, inst.meterProvider)
inst.l.Debug("keel iterating closers")
for _, closer := range closers { for _, closer := range closers {
l := inst.l.With(log.FName(fmt.Sprintf("%T", closer))) l := inst.l.With(log.FName(fmt.Sprintf("%T", closer)))
switch c := closer.(type) { switch c := closer.(type) {
@ -146,7 +171,10 @@ func NewServer(opts ...Option) *Server {
} }
} }
} }
return inst.gCtx.Err()
inst.l.Debug("keel done closing")
return nil
}) })
} }
@ -307,9 +335,9 @@ func (s *Server) AddReadinessHealthzers(probes ...interface{}) {
} }
// IsCanceled returns true if the internal errgroup has been canceled // IsCanceled returns true if the internal errgroup has been canceled
func (s *Server) IsCanceled() bool { // func (s *Server) IsCanceled() bool {
return errors.Is(s.gCtx.Err(), context.Canceled) // return errors.Is(s.gCtx.Err(), context.Canceled)
} // }
// Healthz returns true if the server is running // Healthz returns true if the server is running
func (s *Server) Healthz() error { func (s *Server) Healthz() error {
@ -321,12 +349,12 @@ func (s *Server) Healthz() error {
// Run runs the server // Run runs the server
func (s *Server) Run() { func (s *Server) Run() {
if s.IsCanceled() { // if s.IsCanceled() {
s.l.Info("keel server canceled") // s.l.Info("keel server canceled")
return // return
} // }
defer s.ctxCancelFn() // defer s.ctxCancelFn()
s.l.Info("starting keel server") s.l.Info("starting keel server")
// start services // start services