supervisor.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. package supervisor
  2. // The service supervision library allows for writing of reliable, service-style software within SignOS.
  3. // It builds upon the Erlang/OTP supervision tree system, adapted to be more Go-ish.
  4. // For detailed design see go/supervision.
  5. import (
  6. "context"
  7. "sync"
  8. "go.uber.org/zap"
  9. )
  10. // A Runnable is a function that will be run in a goroutine, and supervised throughout its lifetime. It can in turn
  11. // start more runnables as its children, and those will form part of a supervision tree.
  12. // The context passed to a runnable is very important and needs to be handled properly. It will be live (non-errored) as
  13. // long as the runnable should be running, and canceled (ctx.Err() will be non-nil) when the supervisor wants it to
  14. // exit. This means this context is also perfectly usable for performing any blocking operations.
  15. type Runnable func(ctx context.Context) error
  16. // RunGroup starts a set of runnables as a group. These runnables will run together, and if any one of them quits
  17. // unexpectedly, the result will be canceled and restarted.
  18. // The context here must be an existing Runnable context, and the spawned runnables will run under the node that this
  19. // context represents.
  20. func RunGroup(ctx context.Context, runnables map[string]Runnable) error {
  21. node, unlock := fromContext(ctx)
  22. defer unlock()
  23. return node.runGroup(runnables)
  24. }
  25. // Run starts a single runnable in its own group.
  26. func Run(ctx context.Context, name string, runnable Runnable) error {
  27. return RunGroup(ctx, map[string]Runnable{
  28. name: runnable,
  29. })
  30. }
  31. // Signal tells the supervisor that the calling runnable has reached a certain state of its lifecycle. All runnables
  32. // should SignalHealthy when they are ready with set up, running other child runnables and are now 'serving'.
  33. func Signal(ctx context.Context, signal SignalType) {
  34. node, unlock := fromContext(ctx)
  35. defer unlock()
  36. node.signal(signal)
  37. }
  38. type SignalType int
  39. const (
  40. // The runnable is healthy, done with setup, done with spawning more Runnables, and ready to serve in a loop.
  41. // The runnable needs to check the parent context and ensure that if that context is done, the runnable exits.
  42. SignalHealthy SignalType = iota
  43. // The runnable is done - it does not need to run any loop. This is useful for Runnables that only set up other
  44. // child runnables. This runnable will be restarted if a related failure happens somewhere in the supervision tree.
  45. SignalDone
  46. )
  47. // Logger returns a Zap logger that will be named after the Distinguished Name of a the runnable (ie its place in the
  48. // supervision tree, dot-separated).
  49. func Logger(ctx context.Context) *zap.Logger {
  50. node, unlock := fromContext(ctx)
  51. defer unlock()
  52. return node.getLogger()
  53. }
  54. // supervisor represents and instance of the supervision system. It keeps track of a supervision tree and a request
  55. // channel to its internal processor goroutine.
  56. type supervisor struct {
  57. // mu guards the entire state of the supervisor.
  58. mu sync.RWMutex
  59. // root is the root node of the supervision tree, named 'root'. It represents the Runnable started with the
  60. // supervisor.New call.
  61. root *node
  62. // logger is the Zap logger used to create loggers available to runnables.
  63. logger *zap.Logger
  64. // ilogger is the Zap logger used for internal logging by the supervisor.
  65. ilogger *zap.Logger
  66. // pReq is an interface channel to the lifecycle processor of the supervisor.
  67. pReq chan *processorRequest
  68. // propagate panics, ie. don't catch them.
  69. propagatePanic bool
  70. }
  71. // SupervisorOpt are runtime configurable options for the supervisor.
  72. type SupervisorOpt func(s *supervisor)
  73. var (
  74. // WithPropagatePanic prevents the Supervisor from catching panics in runnables and treating them as failures.
  75. // This is useful to enable for testing and local debugging.
  76. WithPropagatePanic = func(s *supervisor) {
  77. s.propagatePanic = true
  78. }
  79. )
  80. // New creates a new supervisor with its root running the given root runnable.
  81. // The given context can be used to cancel the entire supervision tree.
  82. func New(ctx context.Context, logger *zap.Logger, rootRunnable Runnable, opts ...SupervisorOpt) *supervisor {
  83. sup := &supervisor{
  84. logger: logger,
  85. ilogger: logger.Named("supervisor"),
  86. pReq: make(chan *processorRequest),
  87. }
  88. for _, o := range opts {
  89. o(sup)
  90. }
  91. sup.root = newNode("root", rootRunnable, sup, nil)
  92. go sup.processor(ctx)
  93. sup.pReq <- &processorRequest{
  94. schedule: &processorRequestSchedule{dn: "root"},
  95. }
  96. return sup
  97. }