query_server.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. // Note: To generate a signer key file do: guardiand keygen --block-type "CCQ SERVER SIGNING KEY" /path/to/key/file
  2. // You will need to add this key to ccqAllowedRequesters in the guardian configs.
  3. package ccq
  4. import (
  5. "context"
  6. "crypto/ecdsa"
  7. "fmt"
  8. "net/http"
  9. "os"
  10. "os/signal"
  11. "syscall"
  12. "time"
  13. "github.com/certusone/wormhole/node/pkg/common"
  14. "github.com/certusone/wormhole/node/pkg/p2p"
  15. "github.com/certusone/wormhole/node/pkg/telemetry"
  16. promremotew "github.com/certusone/wormhole/node/pkg/telemetry/prom_remote_write"
  17. "github.com/certusone/wormhole/node/pkg/version"
  18. ethCrypto "github.com/ethereum/go-ethereum/crypto"
  19. ipfslog "github.com/ipfs/go-log/v2"
  20. "github.com/libp2p/go-libp2p/core/crypto"
  21. "github.com/spf13/cobra"
  22. "go.uber.org/zap"
  23. )
  24. const CCQ_SERVER_SIGNING_KEY = "CCQ SERVER SIGNING KEY"
  25. var (
  26. envStr *string
  27. p2pNetworkID *string
  28. p2pPort *uint
  29. p2pBootstrap *string
  30. listenAddr *string
  31. nodeKeyPath *string
  32. signerKeyPath *string
  33. permFile *string
  34. ethRPC *string
  35. ethContract *string
  36. logLevel *string
  37. telemetryLokiURL *string
  38. telemetryNodeName *string
  39. statusAddr *string
  40. promRemoteURL *string
  41. shutdownDelay1 *uint
  42. shutdownDelay2 *uint
  43. monitorPeers *bool
  44. gossipAdvertiseAddress *string
  45. verifyPermissions *bool
  46. )
  47. const DEV_NETWORK_ID = "/wormhole/dev"
  48. func init() {
  49. envStr = QueryServerCmd.Flags().String("env", "", "environment (devnet, testnet, mainnet)")
  50. p2pNetworkID = QueryServerCmd.Flags().String("network", "", "P2P network identifier (optional, overrides default for environment)")
  51. p2pPort = QueryServerCmd.Flags().Uint("port", 8995, "P2P UDP listener port")
  52. p2pBootstrap = QueryServerCmd.Flags().String("bootstrap", "", "P2P bootstrap peers (optional for testnet or mainnet, overrides default, required for devnet)")
  53. nodeKeyPath = QueryServerCmd.Flags().String("nodeKey", "", "Path to node key (will be generated if it doesn't exist)")
  54. signerKeyPath = QueryServerCmd.Flags().String("signerKey", "", "Path to key used to sign unsigned queries")
  55. listenAddr = QueryServerCmd.Flags().String("listenAddr", "[::]:6069", "Listen address for query server (disabled if blank)")
  56. permFile = QueryServerCmd.Flags().String("permFile", "", "JSON file containing permissions configuration")
  57. ethRPC = QueryServerCmd.Flags().String("ethRPC", "", "Ethereum RPC for fetching current guardian set")
  58. ethContract = QueryServerCmd.Flags().String("ethContract", "", "Ethereum core bridge address for fetching current guardian set")
  59. logLevel = QueryServerCmd.Flags().String("logLevel", "info", "Logging level (debug, info, warn, error, dpanic, panic, fatal)")
  60. telemetryLokiURL = QueryServerCmd.Flags().String("telemetryLokiURL", "", "Loki cloud logging URL")
  61. telemetryNodeName = QueryServerCmd.Flags().String("telemetryNodeName", "", "Node name used in telemetry")
  62. statusAddr = QueryServerCmd.Flags().String("statusAddr", "[::]:6060", "Listen address for status server (disabled if blank)")
  63. promRemoteURL = QueryServerCmd.Flags().String("promRemoteURL", "", "Prometheus remote write URL (Grafana)")
  64. monitorPeers = QueryServerCmd.Flags().Bool("monitorPeers", false, "Should monitor bootstrap peers and attempt to reconnect")
  65. gossipAdvertiseAddress = QueryServerCmd.Flags().String("gossipAdvertiseAddress", "", "External IP to advertize on P2P (use if behind a NAT or running in k8s)")
  66. verifyPermissions = QueryServerCmd.Flags().Bool("verifyPermissions", false, `parse and verify the permissions file and then exit with 0 if success, 1 if failure`)
  67. // The default health check monitoring is every five seconds, with a five second timeout, and you have to miss two, for 20 seconds total.
  68. shutdownDelay1 = QueryServerCmd.Flags().Uint("shutdownDelay1", 25, "Seconds to delay after disabling health check on shutdown")
  69. // The guardians will wait up to 60 seconds before giving up on a request.
  70. shutdownDelay2 = QueryServerCmd.Flags().Uint("shutdownDelay2", 65, "Seconds to wait after delay1 for pending requests to complete")
  71. }
  72. var QueryServerCmd = &cobra.Command{
  73. Use: "query-server",
  74. Short: "Run the cross-chain query server",
  75. Run: runQueryServer,
  76. }
  77. func runQueryServer(cmd *cobra.Command, args []string) {
  78. env, err := common.ParseEnvironment(*envStr)
  79. if err != nil || (env != common.UnsafeDevNet && env != common.TestNet && env != common.MainNet) {
  80. if *envStr == "" {
  81. fmt.Println("Please specify --env")
  82. } else {
  83. fmt.Println("Invalid value for --env, should be devnet, testnet or mainnet", zap.String("val", *envStr))
  84. }
  85. os.Exit(1)
  86. }
  87. if *verifyPermissions {
  88. _, err := parseConfigFile(*permFile, env)
  89. if err != nil {
  90. fmt.Println(err)
  91. os.Exit(1)
  92. }
  93. os.Exit(0)
  94. }
  95. common.SetRestrictiveUmask()
  96. // Setup logging
  97. lvl, err := ipfslog.LevelFromString(*logLevel)
  98. if err != nil {
  99. fmt.Println("Invalid log level")
  100. os.Exit(1)
  101. }
  102. logger := ipfslog.Logger("query-server").Desugar()
  103. ipfslog.SetAllLoggers(lvl)
  104. if *p2pNetworkID == "" {
  105. *p2pNetworkID = p2p.GetNetworkId(env)
  106. } else if env != common.UnsafeDevNet {
  107. logger.Warn("overriding default p2p network ID", zap.String("p2pNetworkID", *p2pNetworkID))
  108. }
  109. if *p2pNetworkID == DEV_NETWORK_ID && env != common.UnsafeDevNet {
  110. logger.Fatal("May not set --network to dev unless --env is also dev", zap.String("network", *p2pNetworkID), zap.String("env", *envStr))
  111. }
  112. networkID := *p2pNetworkID + "/ccq"
  113. if *p2pBootstrap == "" {
  114. *p2pBootstrap, err = p2p.GetCcqBootstrapPeers(env)
  115. if err != nil {
  116. logger.Fatal("failed to determine the bootstrap peers from the environment", zap.String("env", string(env)), zap.Error(err))
  117. }
  118. } else if env != common.UnsafeDevNet {
  119. logger.Warn("overriding default p2p bootstrap peers", zap.String("p2pBootstrap", *p2pBootstrap))
  120. }
  121. if *telemetryLokiURL != "" {
  122. logger.Info("Using Loki telemetry logger")
  123. if *telemetryNodeName == "" {
  124. logger.Fatal("if --telemetryLokiURL is specified --telemetryNodeName must be specified")
  125. }
  126. labels := map[string]string{
  127. "network": *p2pNetworkID,
  128. "node_name": *telemetryNodeName,
  129. "version": version.Version(),
  130. }
  131. tm, err := telemetry.NewLokiCloudLogger(context.Background(), logger, *telemetryLokiURL, "ccq_server", true, labels)
  132. if err != nil {
  133. logger.Fatal("Failed to initialize telemetry", zap.Error(err))
  134. }
  135. defer tm.Close()
  136. logger = tm.WrapLogger(logger) // Wrap logger with telemetry logger
  137. }
  138. // Verify flags
  139. if *nodeKeyPath == "" {
  140. logger.Fatal("Please specify --nodeKey")
  141. }
  142. if *p2pBootstrap == "" {
  143. logger.Fatal("Please specify --bootstrap")
  144. }
  145. if *permFile == "" {
  146. logger.Fatal("Please specify --permFile")
  147. }
  148. if *ethRPC == "" {
  149. logger.Fatal("Please specify --ethRPC")
  150. }
  151. if *ethContract == "" {
  152. logger.Fatal("Please specify --ethContract")
  153. }
  154. permissions, err := NewPermissions(*permFile, env)
  155. if err != nil {
  156. logger.Fatal("Failed to load permissions file", zap.String("permFile", *permFile), zap.Error(err))
  157. }
  158. loggingMap := NewLoggingMap()
  159. // Load p2p private key
  160. var priv crypto.PrivKey
  161. priv, err = common.GetOrCreateNodeKey(logger, *nodeKeyPath)
  162. if err != nil {
  163. logger.Fatal("Failed to load node key", zap.Error(err))
  164. }
  165. var signerKey *ecdsa.PrivateKey
  166. if *signerKeyPath != "" {
  167. signerKey, err = common.LoadArmoredKey(*signerKeyPath, CCQ_SERVER_SIGNING_KEY, false)
  168. if err != nil {
  169. logger.Fatal("Failed to loader signer key", zap.Error(err))
  170. }
  171. logger.Info("will sign unsigned requests if api key supports it", zap.Stringer("signingKey", ethCrypto.PubkeyToAddress(signerKey.PublicKey)))
  172. }
  173. ctx, cancel := context.WithCancel(context.Background())
  174. defer cancel()
  175. // Run p2p
  176. pendingResponses := NewPendingResponses(logger)
  177. p2p, err := runP2P(ctx, priv, *p2pPort, networkID, *p2pBootstrap, *ethRPC, *ethContract, pendingResponses, logger, *monitorPeers, loggingMap, *gossipAdvertiseAddress)
  178. if err != nil {
  179. logger.Fatal("Failed to start p2p", zap.Error(err))
  180. }
  181. // Start the HTTP server
  182. go func() {
  183. s := NewHTTPServer(*listenAddr, p2p.topic_req, permissions, signerKey, pendingResponses, logger, env, loggingMap)
  184. logger.Sugar().Infof("Server listening on %s", *listenAddr)
  185. err := s.ListenAndServe()
  186. if err != nil && err != http.ErrServerClosed {
  187. logger.Fatal("Server closed unexpectedly", zap.Error(err))
  188. }
  189. }()
  190. // Start the status server
  191. var statServer *statusServer
  192. if *statusAddr != "" {
  193. statServer = NewStatusServer(*statusAddr, logger, env)
  194. go func() {
  195. logger.Sugar().Infof("Status server listening on %s", *statusAddr)
  196. err := statServer.httpServer.ListenAndServe()
  197. if err != nil && err != http.ErrServerClosed {
  198. logger.Fatal("Status server closed unexpectedly", zap.Error(err))
  199. }
  200. }()
  201. }
  202. // Start the Prometheus scraper
  203. usingPromRemoteWrite := *promRemoteURL != ""
  204. if usingPromRemoteWrite {
  205. var info promremotew.PromTelemetryInfo
  206. info.PromRemoteURL = *promRemoteURL
  207. info.Labels = map[string]string{
  208. "node_name": *telemetryNodeName,
  209. "network": *p2pNetworkID,
  210. "version": version.Version(),
  211. "product": "ccq_server",
  212. }
  213. err := RunPrometheusScraper(ctx, logger, info)
  214. if err != nil {
  215. logger.Fatal("Failed to start prometheus scraper", zap.Error(err))
  216. }
  217. }
  218. // Handle SIGTERM
  219. sigterm := make(chan os.Signal, 1)
  220. signal.Notify(sigterm, syscall.SIGTERM)
  221. go func() {
  222. <-sigterm
  223. if statServer != nil && *shutdownDelay1 != 0 {
  224. logger.Info("Received sigterm. disabling health checks and pausing.")
  225. statServer.disableHealth()
  226. time.Sleep(time.Duration(*shutdownDelay1) * time.Second)
  227. numPending := 0
  228. logger.Info("Waiting for any outstanding requests to complete before shutting down.")
  229. for count := 0; count < int(*shutdownDelay2); count++ {
  230. time.Sleep(time.Second)
  231. numPending = pendingResponses.NumPending()
  232. if numPending == 0 {
  233. break
  234. }
  235. }
  236. if numPending == 0 {
  237. logger.Info("Done waiting. shutting down.")
  238. } else {
  239. logger.Error("Gave up waiting for pending requests to finish. shutting down anyway.", zap.Int("numStillPending", numPending))
  240. }
  241. } else {
  242. logger.Info("Received sigterm. exiting.")
  243. }
  244. cancel()
  245. }()
  246. // Start watching for permissions file updates.
  247. errC := make(chan error)
  248. permissions.StartWatcher(ctx, logger, errC)
  249. // Star logging cleanup process.
  250. loggingMap.Start(ctx, logger, errC)
  251. // Wait for either a shutdown or a fatal error from the permissions watcher.
  252. select {
  253. case <-ctx.Done():
  254. logger.Info("Context cancelled, exiting...")
  255. break
  256. case err := <-errC:
  257. logger.Error("Encountered an error, exiting", zap.Error(err))
  258. break
  259. }
  260. // Stop the permissions file watcher.
  261. permissions.StopWatcher()
  262. // Shutdown p2p. Without this the same host won't properly discover peers until some timeout
  263. p2p.sub.Cancel()
  264. if err := p2p.topic_req.Close(); err != nil {
  265. logger.Error("Error closing the request topic", zap.Error(err))
  266. }
  267. if err := p2p.topic_resp.Close(); err != nil {
  268. logger.Error("Error closing the response topic", zap.Error(err))
  269. }
  270. if err := p2p.host.Close(); err != nil {
  271. logger.Error("Error closing the host", zap.Error(err))
  272. }
  273. }