block.rs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. use {
  2. crate::{
  3. api::BlockchainState,
  4. chain::{ethereum::InstrumentedSignablePythContract, reader::BlockNumber},
  5. eth_utils::utils::EscalationPolicy,
  6. history::History,
  7. keeper::{
  8. keeper_metrics::{ChainIdLabel, KeeperMetrics},
  9. process_event::process_event_with_backoff,
  10. },
  11. },
  12. anyhow::Result,
  13. ethers::types::U256,
  14. std::{
  15. collections::HashSet,
  16. sync::Arc,
  17. time::{SystemTime, UNIX_EPOCH},
  18. },
  19. tokio::{
  20. spawn,
  21. sync::{mpsc, RwLock},
  22. time::{self, Duration},
  23. },
  24. tracing::{self, Instrument},
  25. };
  26. /// How much to wait before retrying in case of an RPC error
  27. const RETRY_INTERVAL: Duration = Duration::from_secs(5);
  28. /// How many blocks to fetch events for in a single rpc call
  29. const BLOCK_BATCH_SIZE: u64 = 100;
  30. /// How much to wait before polling the next latest block
  31. const POLL_INTERVAL: Duration = Duration::from_secs(2);
  32. #[derive(Debug, Clone)]
  33. pub struct BlockRange {
  34. pub from: BlockNumber,
  35. pub to: BlockNumber,
  36. }
  37. #[derive(Clone)]
  38. pub struct ProcessParams {
  39. pub contract: Arc<InstrumentedSignablePythContract>,
  40. pub gas_limit: U256,
  41. pub escalation_policy: EscalationPolicy,
  42. pub chain_state: BlockchainState,
  43. pub metrics: Arc<KeeperMetrics>,
  44. pub history: Arc<History>,
  45. pub fulfilled_requests_cache: Arc<RwLock<HashSet<u64>>>,
  46. }
  47. /// Get the latest safe block number for the chain. Retry internally if there is an error.
  48. pub async fn get_latest_safe_block(chain_state: &BlockchainState) -> BlockNumber {
  49. loop {
  50. match chain_state
  51. .contract
  52. .get_block_number(chain_state.confirmed_block_status)
  53. .await
  54. {
  55. Ok(latest_confirmed_block) => {
  56. tracing::info!(
  57. "Fetched latest safe block {}",
  58. latest_confirmed_block - chain_state.reveal_delay_blocks
  59. );
  60. return latest_confirmed_block - chain_state.reveal_delay_blocks;
  61. }
  62. Err(e) => {
  63. tracing::error!("Error while getting block number. error: {:?}", e);
  64. time::sleep(RETRY_INTERVAL).await;
  65. }
  66. }
  67. }
  68. }
  69. /// Process a range of blocks in batches. It calls the `process_single_block_batch` method for each batch.
  70. #[tracing::instrument(skip_all, fields(
  71. range_from_block = block_range.from, range_to_block = block_range.to
  72. ))]
  73. pub async fn process_block_range(block_range: BlockRange, process_params: ProcessParams) {
  74. let BlockRange {
  75. from: first_block,
  76. to: last_block,
  77. } = block_range;
  78. let mut current_block = first_block;
  79. while current_block <= last_block {
  80. let mut to_block = current_block + BLOCK_BATCH_SIZE;
  81. if to_block > last_block {
  82. to_block = last_block;
  83. }
  84. // TODO: this is handling all blocks sequentially we might want to handle them in parallel in future.
  85. process_single_block_batch(
  86. BlockRange {
  87. from: current_block,
  88. to: to_block,
  89. },
  90. process_params.clone(),
  91. )
  92. .in_current_span()
  93. .await;
  94. current_block = to_block + 1;
  95. }
  96. }
  97. /// Process a batch of blocks for a chain. It will fetch events for all the blocks in a single call for the provided batch
  98. /// and then try to process them one by one. It checks the `fulfilled_request_cache`. If the request was already fulfilled.
  99. /// It won't reprocess it. If the request was already processed, it will reprocess it.
  100. /// If the process fails, it will retry indefinitely.
  101. #[tracing::instrument(name = "batch", skip_all, fields(
  102. batch_from_block = block_range.from, batch_to_block = block_range.to
  103. ))]
  104. pub async fn process_single_block_batch(block_range: BlockRange, process_params: ProcessParams) {
  105. let label = ChainIdLabel {
  106. chain_id: process_params.chain_state.id.clone(),
  107. };
  108. loop {
  109. let events_res = process_params
  110. .chain_state
  111. .contract
  112. .get_request_with_callback_events(
  113. block_range.from,
  114. block_range.to,
  115. process_params.chain_state.provider_address,
  116. )
  117. .await;
  118. // Only update metrics if we successfully retrieved events.
  119. if events_res.is_ok() {
  120. // Track the last time blocks were processed. If anything happens to the processing thread, the
  121. // timestamp will lag, which will trigger an alert.
  122. let server_timestamp = SystemTime::now()
  123. .duration_since(UNIX_EPOCH)
  124. .map(|duration| duration.as_secs() as i64)
  125. .unwrap_or(0);
  126. process_params
  127. .metrics
  128. .process_event_timestamp
  129. .get_or_create(&label)
  130. .set(server_timestamp);
  131. let current_block = process_params
  132. .metrics
  133. .process_event_block_number
  134. .get_or_create(&label)
  135. .get();
  136. if block_range.to > current_block as u64 {
  137. process_params
  138. .metrics
  139. .process_event_block_number
  140. .get_or_create(&label)
  141. .set(block_range.to as i64);
  142. }
  143. }
  144. match events_res {
  145. Ok(events) => {
  146. tracing::info!(num_of_events = &events.len(), "Processing",);
  147. for event in &events {
  148. // the write lock guarantees we spawn only one task per sequence number
  149. let newly_inserted = process_params
  150. .fulfilled_requests_cache
  151. .write()
  152. .await
  153. .insert(event.sequence_number);
  154. if newly_inserted {
  155. spawn(
  156. process_event_with_backoff(event.clone(), process_params.clone())
  157. .in_current_span(),
  158. );
  159. }
  160. }
  161. tracing::info!(num_of_events = &events.len(), "Processed",);
  162. break;
  163. }
  164. Err(e) => {
  165. tracing::error!(
  166. "Error while getting events. Waiting for {} seconds before retry. error: {:?}",
  167. RETRY_INTERVAL.as_secs(),
  168. e
  169. );
  170. time::sleep(RETRY_INTERVAL).await;
  171. }
  172. }
  173. }
  174. }
  175. /// Wrapper for the `watch_blocks` method. If there was an error while watching, it will retry after a delay.
  176. /// It retries indefinitely.
  177. #[tracing::instrument(name = "watch_blocks", skip_all, fields(
  178. initial_safe_block = latest_safe_block
  179. ))]
  180. pub async fn watch_blocks_wrapper(
  181. chain_state: BlockchainState,
  182. latest_safe_block: BlockNumber,
  183. tx: mpsc::Sender<BlockRange>,
  184. retry_previous_blocks: u64,
  185. ) {
  186. let mut last_safe_block_processed = latest_safe_block;
  187. loop {
  188. if let Err(e) = watch_blocks(
  189. chain_state.clone(),
  190. &mut last_safe_block_processed,
  191. tx.clone(),
  192. retry_previous_blocks,
  193. )
  194. .in_current_span()
  195. .await
  196. {
  197. tracing::error!("watching blocks. error: {:?}", e);
  198. time::sleep(RETRY_INTERVAL).await;
  199. }
  200. }
  201. }
  202. /// Watch for new blocks and send the range of blocks for which events have not been handled to the `tx` channel.
  203. /// We are subscribing to new blocks instead of events. If we miss some blocks, it will be fine as we are sending
  204. /// block ranges to the `tx` channel. If we have subscribed to events, we could have missed those and won't even
  205. /// know about it.
  206. pub async fn watch_blocks(
  207. chain_state: BlockchainState,
  208. last_safe_block_processed: &mut BlockNumber,
  209. tx: mpsc::Sender<BlockRange>,
  210. retry_previous_blocks: u64,
  211. ) -> Result<()> {
  212. tracing::info!("Watching blocks to handle new events");
  213. loop {
  214. time::sleep(POLL_INTERVAL).await;
  215. let latest_safe_block = get_latest_safe_block(&chain_state).in_current_span().await;
  216. if latest_safe_block > *last_safe_block_processed {
  217. let mut from = latest_safe_block.saturating_sub(retry_previous_blocks);
  218. // In normal situation, the difference between latest and last safe block should not be more than 2-3 (for arbitrum it can be 10)
  219. // TODO: add a metric for this in separate PR. We need alerts
  220. // But in extreme situation, where we were unable to send the block range multiple times, the difference between latest_safe_block and
  221. // last_safe_block_processed can grow. It is fine to not have the retry mechanisms for those earliest blocks as we expect the rpc
  222. // to be in consistency after this much time.
  223. if from > *last_safe_block_processed {
  224. from = *last_safe_block_processed;
  225. }
  226. match tx
  227. .send(BlockRange {
  228. from,
  229. to: latest_safe_block,
  230. })
  231. .await
  232. {
  233. Ok(_) => {
  234. tracing::info!(
  235. from_block = from,
  236. to_block = &latest_safe_block,
  237. "Block range sent to handle events",
  238. );
  239. *last_safe_block_processed = latest_safe_block;
  240. }
  241. Err(e) => {
  242. tracing::error!(
  243. from_block = from,
  244. to_block = &latest_safe_block,
  245. "Error while sending block range to handle events. These will be handled in next call. error: {:?}",
  246. e
  247. );
  248. }
  249. };
  250. }
  251. }
  252. }
  253. /// It waits on rx channel to receive block ranges and then calls process_block_range to process them
  254. /// for each configured block delay.
  255. #[tracing::instrument(skip_all)]
  256. pub async fn process_new_blocks(
  257. process_params: ProcessParams,
  258. mut rx: mpsc::Receiver<BlockRange>,
  259. block_delays: Vec<u64>,
  260. ) {
  261. tracing::info!("Waiting for new block ranges to process");
  262. loop {
  263. if let Some(block_range) = rx.recv().await {
  264. // Process blocks immediately first
  265. process_block_range(block_range.clone(), process_params.clone())
  266. .in_current_span()
  267. .await;
  268. // Then process with each configured delay
  269. for delay in &block_delays {
  270. let adjusted_range = BlockRange {
  271. from: block_range.from.saturating_sub(*delay),
  272. to: block_range.to.saturating_sub(*delay),
  273. };
  274. process_block_range(adjusted_range, process_params.clone())
  275. .in_current_span()
  276. .await;
  277. }
  278. }
  279. }
  280. }
  281. /// Processes the backlog_range for a chain.
  282. /// It processes the backlog range for each configured block delay.
  283. #[tracing::instrument(skip_all)]
  284. pub async fn process_backlog(
  285. process_params: ProcessParams,
  286. backlog_range: BlockRange,
  287. block_delays: Vec<u64>,
  288. ) {
  289. tracing::info!("Processing backlog");
  290. // Process blocks immediately first
  291. process_block_range(backlog_range.clone(), process_params.clone())
  292. .in_current_span()
  293. .await;
  294. // Then process with each configured delay
  295. for delay in &block_delays {
  296. let adjusted_range = BlockRange {
  297. from: backlog_range.from.saturating_sub(*delay),
  298. to: backlog_range.to.saturating_sub(*delay),
  299. };
  300. process_block_range(adjusted_range, process_params.clone())
  301. .in_current_span()
  302. .await;
  303. }
  304. tracing::info!("Backlog processed");
  305. }