Bladeren bron

fix(fortuna): Refactor and improve timestamp lag gauge (#1632)

Set INF value for gauge if RPC is not accessible.
This would trigger the alerts on the monitoring service.
We will not do the same for other gauges (e.g. set balance to 0) since
that's just duplicate alerts for the same underlying symptom.
Amin Moghaddam 1 jaar geleden
bovenliggende
commit
c76d205e90
3 gewijzigde bestanden met toevoegingen van 51 en 45 verwijderingen
  1. 1 1
      apps/fortuna/Cargo.lock
  2. 1 1
      apps/fortuna/Cargo.toml
  3. 49 43
      apps/fortuna/src/command/run.rs

+ 1 - 1
apps/fortuna/Cargo.lock

@@ -1488,7 +1488,7 @@ dependencies = [
 
 [[package]]
 name = "fortuna"
-version = "6.0.0"
+version = "6.0.1"
 dependencies = [
  "anyhow",
  "axum",

+ 1 - 1
apps/fortuna/Cargo.toml

@@ -1,6 +1,6 @@
 [package]
 name    = "fortuna"
-version = "6.0.0"
+version = "6.0.1"
 edition = "2021"
 
 [dependencies]

+ 49 - 43
apps/fortuna/src/command/run.rs

@@ -301,6 +301,50 @@ pub struct ChainLabel {
     pub chain_id: String,
 }
 
+
+#[tracing::instrument(name = "block_timestamp_lag", skip_all, fields(chain_id = chain_id))]
+pub async fn check_block_timestamp_lag(
+    chain_id: String,
+    chain_config: EthereumConfig,
+    metrics: Family<ChainLabel, Gauge>,
+) {
+    let provider = match Provider::<Http>::try_from(&chain_config.geth_rpc_addr) {
+        Ok(r) => r,
+        Err(e) => {
+            tracing::error!("Failed to create provider for chain id - {:?}", e);
+            return;
+        }
+    };
+
+    const INF_LAG: i64 = 1000000; // value that definitely triggers an alert
+    let lag = match provider.get_block(BlockNumber::Latest).await {
+        Ok(block) => match block {
+            Some(block) => {
+                let block_timestamp = block.timestamp;
+                let server_timestamp = SystemTime::now()
+                    .duration_since(UNIX_EPOCH)
+                    .unwrap()
+                    .as_secs();
+                let lag: i64 = (server_timestamp as i64) - (block_timestamp.as_u64() as i64);
+                lag
+            }
+            None => {
+                tracing::error!("Block is None");
+                INF_LAG
+            }
+        },
+        Err(e) => {
+            tracing::error!("Failed to get block - {:?}", e);
+            INF_LAG
+        }
+    };
+    metrics
+        .get_or_create(&ChainLabel {
+            chain_id: chain_id.clone(),
+        })
+        .set(lag);
+}
+
 /// Tracks the difference between the server timestamp and the latest block timestamp for each chain
 pub async fn track_block_timestamp_lag(config: Config, metrics_registry: Arc<RwLock<Registry>>) {
     let metrics = Family::<ChainLabel, Gauge>::default();
@@ -311,49 +355,11 @@ pub async fn track_block_timestamp_lag(config: Config, metrics_registry: Arc<RwL
     );
     loop {
         for (chain_id, chain_config) in &config.chains {
-            let chain_id = chain_id.clone();
-            let chain_config = chain_config.clone();
-            let metrics = metrics.clone();
-
-            spawn(async move {
-                let chain_id = chain_id.clone();
-                let chain_config = chain_config.clone();
-
-                let provider = match Provider::<Http>::try_from(&chain_config.geth_rpc_addr) {
-                    Ok(r) => r,
-                    Err(e) => {
-                        tracing::error!(
-                            "Failed to create provider for chain id {} - {:?}",
-                            &chain_id,
-                            e
-                        );
-                        return;
-                    }
-                };
-
-                match provider.get_block(BlockNumber::Latest).await {
-                    Ok(b) => {
-                        if let Some(block) = b {
-                            let block_timestamp = block.timestamp;
-                            let server_timestamp = SystemTime::now()
-                                .duration_since(UNIX_EPOCH)
-                                .unwrap()
-                                .as_secs();
-                            let lag: i64 =
-                                (server_timestamp as i64) - (block_timestamp.as_u64() as i64);
-
-                            metrics
-                                .get_or_create(&ChainLabel {
-                                    chain_id: chain_id.clone(),
-                                })
-                                .set(lag);
-                        }
-                    }
-                    Err(e) => {
-                        tracing::error!("Failed to get block for chain id {} - {:?}", &chain_id, e);
-                    }
-                };
-            });
+            spawn(check_block_timestamp_lag(
+                chain_id.clone(),
+                chain_config.clone(),
+                metrics.clone(),
+            ));
         }
 
         time::sleep(TRACK_INTERVAL).await;