فهرست منبع

bridge: simple readiness check

Leo 5 سال پیش
والد
کامیت
561852d499
7فایلهای تغییر یافته به همراه110 افزوده شده و 7 حذف شده
  1. 8 5
      Tiltfile
  2. 11 0
      bridge/cmd/guardiand/bridge.go
  3. 4 0
      bridge/pkg/ethereum/watcher.go
  4. 68 0
      bridge/pkg/readiness/health.go
  5. 12 2
      bridge/pkg/solana/watcher.go
  6. 3 0
      bridge/pkg/terra/watcher.go
  7. 4 0
      devnet/bridge.yaml

+ 8 - 5
Tiltfile

@@ -61,17 +61,20 @@ def build_bridge_yaml():
 
     return encode_yaml_stream(bridge_yaml)
 
+
 k8s_yaml(build_bridge_yaml())
 
-k8s_resource("guardian", resource_deps = ["proto-gen"])
+k8s_resource("guardian", resource_deps=["proto-gen"], port_forwards=[
+    port_forward(6060, name="Debug Server [:6060]"),
+])
 
 # solana agent and cli (runs alongside bridge)
 
 docker_build(
-    ref = "solana-agent",
-    context = ".",
-    only = ["./proto", "./solana"],
-    dockerfile = "Dockerfile.agent",
+    ref="solana-agent",
+    context=".",
+    only=["./proto", "./solana"],
+    dockerfile="Dockerfile.agent",
 
     # Ignore target folders from local (non-container) development.
     ignore = ["./solana/target", "./solana/agent/target", "./solana/cli/target"],

+ 11 - 0
bridge/cmd/guardiand/bridge.go

@@ -22,6 +22,7 @@ import (
 	"github.com/certusone/wormhole/bridge/pkg/p2p"
 	"github.com/certusone/wormhole/bridge/pkg/processor"
 	gossipv1 "github.com/certusone/wormhole/bridge/pkg/proto/gossip/v1"
+	"github.com/certusone/wormhole/bridge/pkg/readiness"
 	solana "github.com/certusone/wormhole/bridge/pkg/solana"
 	"github.com/certusone/wormhole/bridge/pkg/supervisor"
 	"github.com/certusone/wormhole/bridge/pkg/vaa"
@@ -172,9 +173,19 @@ func runBridge(cmd *cobra.Command, args []string) {
 	// Override the default go-log config, which uses a magic environment variable.
 	ipfslog.SetAllLoggers(lvl)
 
+	// Register components for readiness checks.
+	readiness.RegisterComponent("ethSyncing")
+	readiness.RegisterComponent("solanaSyncing")
+	if *terraSupport {
+		readiness.RegisterComponent("terraSyncing")
+	}
+
 	// In devnet mode, we automatically set a number of flags that rely on deterministic keys.
 	if *unsafeDevMode {
 		go func() {
+			// TODO: once monitoring server is implemented, move this to that http server instance
+			http.HandleFunc("/readyz", readiness.Handler)
+
 			logger.Info("debug server listening on [::]:6060")
 			logger.Error("debug server crashed", zap.Error(http.ListenAndServe("[::]:6060", nil)))
 		}()

+ 4 - 0
bridge/pkg/ethereum/watcher.go

@@ -15,6 +15,7 @@ import (
 
 	"github.com/certusone/wormhole/bridge/pkg/common"
 	"github.com/certusone/wormhole/bridge/pkg/ethereum/abi"
+	"github.com/certusone/wormhole/bridge/pkg/readiness"
 	"github.com/certusone/wormhole/bridge/pkg/supervisor"
 	"github.com/certusone/wormhole/bridge/pkg/vaa"
 )
@@ -177,6 +178,9 @@ func (e *EthBridgeWatcher) Run(ctx context.Context) error {
 			case ev := <-headSink:
 				start := time.Now()
 				logger.Info("processing new header", zap.Stringer("block", ev.Number))
+
+				readiness.SetReady("ethSyncing")
+
 				e.pendingLocksGuard.Lock()
 
 				blockNumberU := ev.Number.Uint64()

+ 68 - 0
bridge/pkg/readiness/health.go

@@ -0,0 +1,68 @@
+// package readiness implements a minimal health-checking mechanism for use as k8s readiness probes. It will always
+// return a "ready" state after the conditions have been met for the first time - it's not meant for monitoring.
+//
+// Uses a global singleton registry (similar to the Prometheus client's default behavior).
+package readiness
+
+import (
+	"bytes"
+	"fmt"
+	"net/http"
+	"sync"
+)
+
+var (
+	mu       = sync.Mutex{}
+	registry = map[string]bool{}
+)
+
+// RegisterComponent registers the given component name such that it is required to be ready for the global check to succeed.
+func RegisterComponent(component string) {
+	mu.Lock()
+	if _, ok := registry[component]; ok {
+		panic("component already registered")
+	}
+	registry[component] = false
+	mu.Unlock()
+}
+
+// SetReady sets the given global component state.
+func SetReady(component string) {
+	mu.Lock()
+	if !registry[component] {
+		registry[component] = true
+	}
+	mu.Unlock()
+}
+
+// Handler returns a net/http handler for the readiness check. It returns 200 OK if all components are ready,
+// or 412 Precondition Failed otherwise. For operator convenience, a list of components and their states
+// is returned as plain text (not meant for machine consumption!).
+func Handler(w http.ResponseWriter, r *http.Request) {
+	ready := true
+
+	resp := new(bytes.Buffer)
+	_, err := resp.Write([]byte("[not suitable for monitoring - do not parse]\n\n"))
+	if err != nil {
+		panic(err)
+	}
+
+	for k, v := range registry {
+		_, err = fmt.Fprintln(resp, fmt.Sprintf("%s\t%v", k, v))
+		if err != nil {
+			panic(err)
+		}
+
+		if !v {
+			ready = false
+		}
+	}
+
+	if !ready {
+		w.WriteHeader(http.StatusPreconditionFailed)
+	} else {
+		w.WriteHeader(http.StatusOK)
+	}
+
+	_, _ = resp.WriteTo(w)
+}

+ 12 - 2
bridge/pkg/solana/watcher.go

@@ -13,10 +13,11 @@ import (
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
 
-	agentv1 "github.com/certusone/wormhole/bridge/pkg/proto/agent/v1"
-
 	"go.uber.org/zap"
 
+	agentv1 "github.com/certusone/wormhole/bridge/pkg/proto/agent/v1"
+	"github.com/certusone/wormhole/bridge/pkg/readiness"
+
 	"github.com/certusone/wormhole/bridge/pkg/common"
 	"github.com/certusone/wormhole/bridge/pkg/supervisor"
 	"github.com/certusone/wormhole/bridge/pkg/vaa"
@@ -58,6 +59,15 @@ func (e *SolanaBridgeWatcher) Run(ctx context.Context) error {
 		return fmt.Errorf("failed to subscribe to token lockup events: %w", err)
 	}
 
+	// Check whether agent is up by doing a GetBalance call. This is a bit hacky, but otherwise, a broken agent won't
+	// fail until Recv(). Readiness is best-effort and if this succeeds, it's fair to assume that the watch does too.
+	balance, err := c.GetBalance(timeout, &agentv1.GetBalanceRequest{})
+	if err != nil {
+		return fmt.Errorf("failed to get balance: %v", err)
+	}
+	readiness.SetReady("solanaSyncing")
+	logger.Info("account balance", zap.Uint64("lamports", balance.Balance))
+
 	go func() {
 		logger.Info("watching for on-chain events")
 

+ 3 - 0
bridge/pkg/terra/watcher.go

@@ -12,6 +12,7 @@ import (
 	eth_common "github.com/ethereum/go-ethereum/common"
 
 	"github.com/certusone/wormhole/bridge/pkg/common"
+	"github.com/certusone/wormhole/bridge/pkg/readiness"
 	"github.com/certusone/wormhole/bridge/pkg/supervisor"
 	"github.com/certusone/wormhole/bridge/pkg/vaa"
 	"github.com/gorilla/websocket"
@@ -80,6 +81,8 @@ func (e *BridgeWatcher) Run(ctx context.Context) error {
 	}
 	logger.Info("subscribed to new transaction events")
 
+	readiness.SetReady("terraSyncing")
+
 	go func() {
 		defer close(errC)
 

+ 4 - 0
devnet/bridge.yaml

@@ -89,6 +89,10 @@ spec:
               add:
                 # required for syscall.Mlockall
                 - IPC_LOCK
+          readinessProbe:
+            httpGet:
+              port: 6060
+              path: /readyz
           ports:
             - containerPort: 8999
               name: p2p