Преглед изворни кода

svm: optimize local program cache creation (#6036)

* incrementally populate the local program cache after loading each transaction
* use loaded accounts to filter and replenish the cache, skipping all accounts-db fetching
* no longer populate the cache for fee-only or discarded transactions
* fix various bugs with tx_usage_counter
* remove ix_usage_counter
hana пре 3 месеци
родитељ
комит
5bf937dcf1

+ 1 - 2
program-runtime/src/invoke_context.rs

@@ -34,7 +34,7 @@ use {
         IndexOfAccount, InstructionAccount, TransactionAccount, TransactionContext,
         MAX_ACCOUNTS_PER_TRANSACTION,
     },
-    solana_type_overrides::sync::{atomic::Ordering, Arc},
+    solana_type_overrides::sync::Arc,
     std::{
         alloc::Layout,
         cell::RefCell,
@@ -593,7 +593,6 @@ impl<'a> InvokeContext<'a> {
             _ => None,
         }
         .ok_or(InstructionError::UnsupportedProgramId)?;
-        entry.ix_usage_counter.fetch_add(1, Ordering::Relaxed);
 
         let program_id = *instruction_context.get_program_key(self.transaction_context)?;
         self.transaction_context

+ 69 - 68
program-runtime/src/loaded_programs.rs

@@ -189,9 +189,7 @@ pub struct ProgramCacheEntry {
     /// Slot in which this entry will become active (can be in the future)
     pub effective_slot: Slot,
     /// How often this entry was used by a transaction
-    pub tx_usage_counter: AtomicU64,
-    /// How often this entry was used by an instruction
-    pub ix_usage_counter: AtomicU64,
+    pub tx_usage_counter: Arc<AtomicU64>,
     /// Latest slot in which the entry was used
     pub latest_access_slot: AtomicU64,
 }
@@ -413,9 +411,8 @@ impl ProgramCacheEntry {
             account_owner: ProgramCacheEntryOwner::try_from(loader_key).unwrap(),
             account_size,
             effective_slot,
-            tx_usage_counter: AtomicU64::new(0),
+            tx_usage_counter: Arc::<AtomicU64>::default(),
             program: ProgramCacheEntryType::Loaded(executable),
-            ix_usage_counter: AtomicU64::new(0),
             latest_access_slot: AtomicU64::new(0),
         })
     }
@@ -437,8 +434,7 @@ impl ProgramCacheEntry {
             account_size: self.account_size,
             deployment_slot: self.deployment_slot,
             effective_slot: self.effective_slot,
-            tx_usage_counter: AtomicU64::new(self.tx_usage_counter.load(Ordering::Relaxed)),
-            ix_usage_counter: AtomicU64::new(self.ix_usage_counter.load(Ordering::Relaxed)),
+            tx_usage_counter: self.tx_usage_counter.clone(),
             latest_access_slot: AtomicU64::new(self.latest_access_slot.load(Ordering::Relaxed)),
         })
     }
@@ -458,9 +454,8 @@ impl ProgramCacheEntry {
             account_owner: ProgramCacheEntryOwner::NativeLoader,
             account_size,
             effective_slot: deployment_slot,
-            tx_usage_counter: AtomicU64::new(0),
+            tx_usage_counter: Arc::<AtomicU64>::default(),
             program: ProgramCacheEntryType::Builtin(program),
-            ix_usage_counter: AtomicU64::new(0),
             latest_access_slot: AtomicU64::new(0),
         }
     }
@@ -469,6 +464,20 @@ impl ProgramCacheEntry {
         slot: Slot,
         account_owner: ProgramCacheEntryOwner,
         reason: ProgramCacheEntryType,
+    ) -> Self {
+        Self::new_tombstone_with_usage_counter(
+            slot,
+            account_owner,
+            reason,
+            Arc::<AtomicU64>::default(),
+        )
+    }
+
+    pub fn new_tombstone_with_usage_counter(
+        slot: Slot,
+        account_owner: ProgramCacheEntryOwner,
+        reason: ProgramCacheEntryType,
+        tx_usage_counter: Arc<AtomicU64>,
     ) -> Self {
         let tombstone = Self {
             program: reason,
@@ -476,8 +485,7 @@ impl ProgramCacheEntry {
             account_size: 0,
             deployment_slot: slot,
             effective_slot: slot,
-            tx_usage_counter: AtomicU64::default(),
-            ix_usage_counter: AtomicU64::default(),
+            tx_usage_counter,
             latest_access_slot: AtomicU64::new(0),
         };
         debug_assert!(tombstone.is_tombstone());
@@ -772,10 +780,11 @@ impl ProgramCacheForTxBatch {
                     // Found a program entry on the current fork, but it's not effective
                     // yet. It indicates that the program has delayed visibility. Return
                     // the tombstone to reflect that.
-                    Arc::new(ProgramCacheEntry::new_tombstone(
+                    Arc::new(ProgramCacheEntry::new_tombstone_with_usage_counter(
                         entry.deployment_slot,
                         entry.account_owner,
                         ProgramCacheEntryType::DelayVisibility,
+                        entry.tx_usage_counter.clone(),
                     ))
                 } else {
                     entry.clone()
@@ -919,10 +928,6 @@ impl<FG: ForkGraph> ProgramCache<FG> {
                             existing.tx_usage_counter.load(Ordering::Relaxed),
                             Ordering::Relaxed,
                         );
-                        entry.ix_usage_counter.fetch_add(
-                            existing.ix_usage_counter.load(Ordering::Relaxed),
-                            Ordering::Relaxed,
-                        );
                         *existing = Arc::clone(&entry);
                         self.stats.reloads.fetch_add(1, Ordering::Relaxed);
                     }
@@ -1060,10 +1065,11 @@ impl<FG: ForkGraph> ProgramCache<FG> {
     /// and returns which program accounts the accounts DB needs to load.
     pub fn extract(
         &self,
-        search_for: &mut Vec<(Pubkey, (ProgramCacheMatchCriteria, u64))>,
+        search_for: &mut Vec<(Pubkey, ProgramCacheMatchCriteria)>,
         loaded_programs_for_tx_batch: &mut ProgramCacheForTxBatch,
-        is_first_round: bool,
-    ) -> Option<(Pubkey, u64)> {
+        increment_usage_counter: bool,
+        count_hits_and_misses: bool,
+    ) -> Option<Pubkey> {
         debug_assert!(self.fork_graph.is_some());
         let fork_graph = self.fork_graph.as_ref().unwrap().upgrade().unwrap();
         let locked_fork_graph = fork_graph.read().unwrap();
@@ -1073,7 +1079,7 @@ impl<FG: ForkGraph> ProgramCache<FG> {
                 entries,
                 loading_entries,
             } => {
-                search_for.retain(|(key, (match_criteria, usage_count))| {
+                search_for.retain(|(key, match_criteria)| {
                     if let Some(second_level) = entries.get(key) {
                         for entry in second_level.iter().rev() {
                             if entry.deployment_slot <= self.latest_root_slot
@@ -1106,19 +1112,22 @@ impl<FG: ForkGraph> ProgramCache<FG> {
                                     // Found a program entry on the current fork, but it's not effective
                                     // yet. It indicates that the program has delayed visibility. Return
                                     // the tombstone to reflect that.
-                                    Arc::new(ProgramCacheEntry::new_tombstone(
+                                    Arc::new(ProgramCacheEntry::new_tombstone_with_usage_counter(
                                         entry.deployment_slot,
                                         entry.account_owner,
                                         ProgramCacheEntryType::DelayVisibility,
+                                        entry.tx_usage_counter.clone(),
                                     ))
                                 } else {
                                     continue;
                                 };
                                 entry_to_return
                                     .update_access_slot(loaded_programs_for_tx_batch.slot);
-                                entry_to_return
-                                    .tx_usage_counter
-                                    .fetch_add(*usage_count, Ordering::Relaxed);
+                                if increment_usage_counter {
+                                    entry_to_return
+                                        .tx_usage_counter
+                                        .fetch_add(1, Ordering::Relaxed);
+                                }
                                 loaded_programs_for_tx_batch
                                     .entries
                                     .insert(*key, entry_to_return);
@@ -1134,7 +1143,7 @@ impl<FG: ForkGraph> ProgramCache<FG> {
                                 loaded_programs_for_tx_batch.slot,
                                 thread::current().id(),
                             ));
-                            cooperative_loading_task = Some((*key, *usage_count));
+                            cooperative_loading_task = Some(*key);
                         }
                     }
                     true
@@ -1142,7 +1151,7 @@ impl<FG: ForkGraph> ProgramCache<FG> {
             }
         }
         drop(locked_fork_graph);
-        if is_first_round {
+        if count_hits_and_misses {
             self.stats
                 .misses
                 .fetch_add(search_for.len() as u64, Ordering::Relaxed);
@@ -1439,8 +1448,7 @@ mod tests {
             account_size: 0,
             deployment_slot,
             effective_slot,
-            tx_usage_counter: usage_counter,
-            ix_usage_counter: AtomicU64::default(),
+            tx_usage_counter: Arc::new(usage_counter),
             latest_access_slot: AtomicU64::new(deployment_slot),
         })
     }
@@ -1455,8 +1463,7 @@ mod tests {
             account_size: 0,
             deployment_slot,
             effective_slot,
-            tx_usage_counter: AtomicU64::default(),
-            ix_usage_counter: AtomicU64::default(),
+            tx_usage_counter: Arc::default(),
             latest_access_slot: AtomicU64::default(),
         })
     }
@@ -1895,8 +1902,7 @@ mod tests {
                 account_size: 0,
                 deployment_slot: 10,
                 effective_slot: 11,
-                tx_usage_counter: AtomicU64::default(),
-                ix_usage_counter: AtomicU64::default(),
+                tx_usage_counter: Arc::default(),
                 latest_access_slot: AtomicU64::default(),
             }),
         ));
@@ -1908,8 +1914,7 @@ mod tests {
                 account_size: 0,
                 deployment_slot: 10,
                 effective_slot: 11,
-                tx_usage_counter: AtomicU64::default(),
-                ix_usage_counter: AtomicU64::default(),
+                tx_usage_counter: Arc::default(),
                 latest_access_slot: AtomicU64::default(),
             }),
         );
@@ -1934,8 +1939,7 @@ mod tests {
                 account_size: 0,
                 deployment_slot: 10,
                 effective_slot: 11,
-                tx_usage_counter: AtomicU64::default(),
-                ix_usage_counter: AtomicU64::default(),
+                tx_usage_counter: Arc::default(),
                 latest_access_slot: AtomicU64::default(),
             }),
         ));
@@ -1947,8 +1951,7 @@ mod tests {
                 account_size: 0,
                 deployment_slot: 10,
                 effective_slot: 11,
-                tx_usage_counter: AtomicU64::default(),
-                ix_usage_counter: AtomicU64::default(),
+                tx_usage_counter: Arc::default(),
                 latest_access_slot: AtomicU64::default(),
             }),
         ));
@@ -2103,8 +2106,7 @@ mod tests {
             account_size: 0,
             deployment_slot: 20,
             effective_slot: 20,
-            tx_usage_counter: AtomicU64::default(),
-            ix_usage_counter: AtomicU64::default(),
+            tx_usage_counter: Arc::default(),
             latest_access_slot: AtomicU64::default(),
         });
         cache.assign_program(program1, updated_program.clone());
@@ -2176,7 +2178,7 @@ mod tests {
         cache: &ProgramCache<TestForkGraphSpecific>,
         loading_slot: Slot,
         keys: &[Pubkey],
-    ) -> Vec<(Pubkey, (ProgramCacheMatchCriteria, u64))> {
+    ) -> Vec<(Pubkey, ProgramCacheMatchCriteria)> {
         let fork_graph = cache.fork_graph.as_ref().unwrap().upgrade().unwrap();
         let locked_fork_graph = fork_graph.read().unwrap();
         let entries = cache.get_flattened_entries_for_tests();
@@ -2193,7 +2195,7 @@ mod tests {
                             )
                     })
                     .map(|(program_id, _entry)| {
-                        (*program_id, (ProgramCacheMatchCriteria::NoCriteria, 1))
+                        (*program_id, ProgramCacheMatchCriteria::NoCriteria)
                     })
             })
             .collect()
@@ -2214,7 +2216,7 @@ mod tests {
     }
 
     fn match_missing(
-        missing: &[(Pubkey, (ProgramCacheMatchCriteria, u64))],
+        missing: &[(Pubkey, ProgramCacheMatchCriteria)],
         program: &Pubkey,
         expected_result: bool,
     ) -> bool {
@@ -2293,7 +2295,7 @@ mod tests {
         assert!(match_missing(&missing, &program2, false));
         assert!(match_missing(&missing, &program3, false));
         let mut extracted = ProgramCacheForTxBatch::new(22, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 20, 22));
         assert!(match_slot(&extracted, &program4, 0, 22));
 
@@ -2302,7 +2304,7 @@ mod tests {
             get_entries_to_load(&cache, 15, &[program1, program2, program3, program4]);
         assert!(match_missing(&missing, &program3, false));
         let mut extracted = ProgramCacheForTxBatch::new(15, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 15));
         assert!(match_slot(&extracted, &program2, 11, 15));
         // The effective slot of program4 deployed in slot 15 is 19. So it should not be usable in slot 16.
@@ -2318,7 +2320,7 @@ mod tests {
             get_entries_to_load(&cache, 18, &[program1, program2, program3, program4]);
         assert!(match_missing(&missing, &program3, false));
         let mut extracted = ProgramCacheForTxBatch::new(18, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 18));
         assert!(match_slot(&extracted, &program2, 11, 18));
         // The effective slot of program4 deployed in slot 15 is 18. So it should be usable in slot 18.
@@ -2329,7 +2331,7 @@ mod tests {
             get_entries_to_load(&cache, 23, &[program1, program2, program3, program4]);
         assert!(match_missing(&missing, &program3, false));
         let mut extracted = ProgramCacheForTxBatch::new(23, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 23));
         assert!(match_slot(&extracted, &program2, 11, 23));
         // The effective slot of program4 deployed in slot 15 is 19. So it should be usable in slot 23.
@@ -2340,7 +2342,7 @@ mod tests {
             get_entries_to_load(&cache, 11, &[program1, program2, program3, program4]);
         assert!(match_missing(&missing, &program3, false));
         let mut extracted = ProgramCacheForTxBatch::new(11, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 11));
         // program2 was updated at slot 11, but is not effective till slot 12. The result should contain a tombstone.
         let tombstone = extracted
@@ -2372,7 +2374,7 @@ mod tests {
             get_entries_to_load(&cache, 21, &[program1, program2, program3, program4]);
         assert!(match_missing(&missing, &program3, false));
         let mut extracted = ProgramCacheForTxBatch::new(21, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         // Since the fork was pruned, we should not find the entry deployed at slot 20.
         assert!(match_slot(&extracted, &program1, 0, 21));
         assert!(match_slot(&extracted, &program2, 11, 21));
@@ -2382,7 +2384,7 @@ mod tests {
         let mut missing =
             get_entries_to_load(&cache, 27, &[program1, program2, program3, program4]);
         let mut extracted = ProgramCacheForTxBatch::new(27, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 27));
         assert!(match_slot(&extracted, &program2, 11, 27));
         assert!(match_slot(&extracted, &program3, 25, 27));
@@ -2410,7 +2412,7 @@ mod tests {
             get_entries_to_load(&cache, 23, &[program1, program2, program3, program4]);
         assert!(match_missing(&missing, &program3, false));
         let mut extracted = ProgramCacheForTxBatch::new(23, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 23));
         assert!(match_slot(&extracted, &program2, 11, 23));
         assert!(match_slot(&extracted, &program4, 15, 23));
@@ -2458,17 +2460,17 @@ mod tests {
         let mut missing = get_entries_to_load(&cache, 12, &[program1, program2, program3]);
         assert!(match_missing(&missing, &program3, false));
         let mut extracted = ProgramCacheForTxBatch::new(12, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 12));
         assert!(match_slot(&extracted, &program2, 11, 12));
 
         // Test the same fork, but request the program modified at a later slot than what's in the cache.
         let mut missing = get_entries_to_load(&cache, 12, &[program1, program2, program3]);
-        missing.get_mut(0).unwrap().1 .0 = ProgramCacheMatchCriteria::DeployedOnOrAfterSlot(5);
-        missing.get_mut(1).unwrap().1 .0 = ProgramCacheMatchCriteria::DeployedOnOrAfterSlot(5);
+        missing.get_mut(0).unwrap().1 = ProgramCacheMatchCriteria::DeployedOnOrAfterSlot(5);
+        missing.get_mut(1).unwrap().1 = ProgramCacheMatchCriteria::DeployedOnOrAfterSlot(5);
         assert!(match_missing(&missing, &program3, false));
         let mut extracted = ProgramCacheForTxBatch::new(12, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_missing(&missing, &program1, true));
         assert!(match_slot(&extracted, &program2, 11, 12));
     }
@@ -2528,14 +2530,14 @@ mod tests {
         let mut missing = get_entries_to_load(&cache, 19, &[program1, program2, program3]);
         assert!(match_missing(&missing, &program3, false));
         let mut extracted = ProgramCacheForTxBatch::new(19, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 19));
         assert!(match_slot(&extracted, &program2, 11, 19));
 
         // Testing fork 0 - 5 - 11 - 25 - 27 with current slot at 27
         let mut missing = get_entries_to_load(&cache, 27, &[program1, program2, program3]);
         let mut extracted = ProgramCacheForTxBatch::new(27, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 27));
         assert!(match_slot(&extracted, &program2, 11, 27));
         assert!(match_missing(&missing, &program3, true));
@@ -2544,7 +2546,7 @@ mod tests {
         let mut missing = get_entries_to_load(&cache, 22, &[program1, program2, program3]);
         assert!(match_missing(&missing, &program2, false));
         let mut extracted = ProgramCacheForTxBatch::new(22, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 20, 22));
         assert!(match_missing(&missing, &program3, true));
     }
@@ -2557,9 +2559,9 @@ mod tests {
         cache.set_fork_graph(Arc::downgrade(&fork_graph));
 
         let program1 = Pubkey::new_unique();
-        let mut missing = vec![(program1, (ProgramCacheMatchCriteria::NoCriteria, 1))];
+        let mut missing = vec![(program1, ProgramCacheMatchCriteria::NoCriteria)];
         let mut extracted = ProgramCacheForTxBatch::new(0, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_missing(&missing, &program1, true));
     }
 
@@ -2580,8 +2582,7 @@ mod tests {
                 account_size: 0,
                 deployment_slot: 0,
                 effective_slot: 0,
-                tx_usage_counter: AtomicU64::default(),
-                ix_usage_counter: AtomicU64::default(),
+                tx_usage_counter: Arc::default(),
                 latest_access_slot: AtomicU64::default(),
             });
             assert!(entry.to_unloaded().is_none());
@@ -2636,7 +2637,7 @@ mod tests {
 
         let mut missing = get_entries_to_load(&cache, 20, &[program1]);
         let mut extracted = ProgramCacheForTxBatch::new(20, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
 
         // The cache should have the program deployed at slot 0
         assert_eq!(
@@ -2677,14 +2678,14 @@ mod tests {
 
         let mut missing = get_entries_to_load(&cache, 20, &[program1, program2]);
         let mut extracted = ProgramCacheForTxBatch::new(20, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 20));
         assert!(match_slot(&extracted, &program2, 10, 20));
 
         let mut missing = get_entries_to_load(&cache, 6, &[program1, program2]);
         assert!(match_missing(&missing, &program2, false));
         let mut extracted = ProgramCacheForTxBatch::new(6, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 5, 6));
 
         // Pruning slot 5 will remove program1 entry deployed at slot 5.
@@ -2693,14 +2694,14 @@ mod tests {
 
         let mut missing = get_entries_to_load(&cache, 20, &[program1, program2]);
         let mut extracted = ProgramCacheForTxBatch::new(20, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 20));
         assert!(match_slot(&extracted, &program2, 10, 20));
 
         let mut missing = get_entries_to_load(&cache, 6, &[program1, program2]);
         assert!(match_missing(&missing, &program2, false));
         let mut extracted = ProgramCacheForTxBatch::new(6, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 6));
 
         // Pruning slot 10 will remove program2 entry deployed at slot 10.
@@ -2710,7 +2711,7 @@ mod tests {
         let mut missing = get_entries_to_load(&cache, 20, &[program1, program2]);
         assert!(match_missing(&missing, &program2, false));
         let mut extracted = ProgramCacheForTxBatch::new(20, cache.environments.clone(), None, 0);
-        cache.extract(&mut missing, &mut extracted, true);
+        cache.extract(&mut missing, &mut extracted, true, true);
         assert!(match_slot(&extracted, &program1, 0, 20));
     }
 

+ 2 - 14
programs/bpf_loader/src/lib.rs

@@ -177,10 +177,6 @@ pub fn deploy_program(
             old_entry.tx_usage_counter.load(Ordering::Relaxed),
             Ordering::Relaxed,
         );
-        executor.ix_usage_counter.store(
-            old_entry.ix_usage_counter.load(Ordering::Relaxed),
-            Ordering::Relaxed,
-        );
     }
     load_program_metrics.program_id = program_id.to_string();
     program_cache_for_tx_batch.store_modified_entry(*program_id, Arc::new(executor));
@@ -452,7 +448,6 @@ pub(crate) fn process_instruction_inner(
     get_or_create_executor_time.stop();
     invoke_context.timings.get_or_create_executor_us += get_or_create_executor_time.as_us();
 
-    executor.ix_usage_counter.fetch_add(1, Ordering::Relaxed);
     match &executor.program {
         ProgramCacheEntryType::FailedVerification(_)
         | ProgramCacheEntryType::Closed
@@ -4105,8 +4100,7 @@ mod tests {
             account_size: 0,
             deployment_slot: 0,
             effective_slot: 0,
-            tx_usage_counter: AtomicU64::new(100),
-            ix_usage_counter: AtomicU64::new(100),
+            tx_usage_counter: Arc::new(AtomicU64::new(100)),
             latest_access_slot: AtomicU64::new(0),
         };
         invoke_context
@@ -4128,10 +4122,6 @@ mod tests {
             updated_program.tx_usage_counter.load(Ordering::Relaxed),
             100
         );
-        assert_eq!(
-            updated_program.ix_usage_counter.load(Ordering::Relaxed),
-            100
-        );
     }
 
     #[test]
@@ -4149,8 +4139,7 @@ mod tests {
             account_size: 0,
             deployment_slot: 0,
             effective_slot: 0,
-            tx_usage_counter: AtomicU64::new(100),
-            ix_usage_counter: AtomicU64::new(100),
+            tx_usage_counter: Arc::new(AtomicU64::new(100)),
             latest_access_slot: AtomicU64::new(0),
         };
         invoke_context
@@ -4170,6 +4159,5 @@ mod tests {
 
         assert_eq!(program2.deployment_slot, 2);
         assert_eq!(program2.tx_usage_counter.load(Ordering::Relaxed), 0);
-        assert_eq!(program2.ix_usage_counter.load(Ordering::Relaxed), 0);
     }
 }

+ 1 - 4
programs/loader-v4/src/lib.rs

@@ -20,7 +20,7 @@ use {
     solana_sbpf::{declare_builtin_function, memory_region::MemoryMapping},
     solana_sdk_ids::{bpf_loader, bpf_loader_deprecated, bpf_loader_upgradeable, loader_v4},
     solana_transaction_context::{BorrowedAccount, InstructionContext},
-    solana_type_overrides::sync::{atomic::Ordering, Arc},
+    solana_type_overrides::sync::Arc,
     std::{cell::RefCell, rc::Rc},
 };
 
@@ -499,9 +499,6 @@ fn process_instruction_inner(
         get_or_create_executor_time.stop();
         invoke_context.timings.get_or_create_executor_us += get_or_create_executor_time.as_us();
         drop(program);
-        loaded_program
-            .ix_usage_counter
-            .fetch_add(1, Ordering::Relaxed);
         match &loaded_program.program {
             ProgramCacheEntryType::FailedVerification(_)
             | ProgramCacheEntryType::Closed

+ 31 - 19
runtime/src/bank.rs

@@ -1447,7 +1447,7 @@ impl Bank {
         report_loaded_programs_stats(
             &parent
                 .transaction_processor
-                .program_cache
+                .global_program_cache
                 .read()
                 .unwrap()
                 .stats,
@@ -1455,7 +1455,7 @@ impl Bank {
         );
 
         new.transaction_processor
-            .program_cache
+            .global_program_cache
             .write()
             .unwrap()
             .stats
@@ -1466,7 +1466,7 @@ impl Bank {
 
     pub fn set_fork_graph_in_program_cache(&self, fork_graph: Weak<RwLock<BankForks>>) {
         self.transaction_processor
-            .program_cache
+            .global_program_cache
             .write()
             .unwrap()
             .set_fork_graph(fork_graph);
@@ -1490,7 +1490,11 @@ impl Bank {
                 .checked_div(2)
                 .unwrap();
 
-        let mut program_cache = self.transaction_processor.program_cache.write().unwrap();
+        let mut program_cache = self
+            .transaction_processor
+            .global_program_cache
+            .write()
+            .unwrap();
 
         if program_cache.upcoming_environments.is_some() {
             if let Some((key, program_to_recompile)) = program_cache.programs_to_recompile.pop() {
@@ -1498,7 +1502,7 @@ impl Bank {
                 drop(program_cache);
                 let environments_for_epoch = self
                     .transaction_processor
-                    .program_cache
+                    .global_program_cache
                     .read()
                     .unwrap()
                     .get_environments_for_epoch(effective_epoch);
@@ -1516,14 +1520,11 @@ impl Bank {
                             .load(Ordering::Relaxed),
                         Ordering::Relaxed,
                     );
-                    recompiled.ix_usage_counter.fetch_add(
-                        program_to_recompile
-                            .ix_usage_counter
-                            .load(Ordering::Relaxed),
-                        Ordering::Relaxed,
-                    );
-                    let mut program_cache =
-                        self.transaction_processor.program_cache.write().unwrap();
+                    let mut program_cache = self
+                        .transaction_processor
+                        .global_program_cache
+                        .write()
+                        .unwrap();
                     program_cache.assign_program(key, recompiled);
                 }
             }
@@ -1533,7 +1534,11 @@ impl Bank {
             // Anticipate the upcoming program runtime environment for the next epoch,
             // so we can try to recompile loaded programs before the feature transition hits.
             drop(program_cache);
-            let mut program_cache = self.transaction_processor.program_cache.write().unwrap();
+            let mut program_cache = self
+                .transaction_processor
+                .global_program_cache
+                .write()
+                .unwrap();
             let program_runtime_environment_v1 = create_program_runtime_environment_v1(
                 &upcoming_feature_set.runtime_features(),
                 &compute_budget,
@@ -1567,7 +1572,7 @@ impl Bank {
 
     pub fn prune_program_cache(&self, new_root_slot: Slot, new_root_epoch: Epoch) {
         self.transaction_processor
-            .program_cache
+            .global_program_cache
             .write()
             .unwrap()
             .prune(new_root_slot, new_root_epoch);
@@ -1575,7 +1580,7 @@ impl Bank {
 
     pub fn prune_program_cache_by_deployment_slot(&self, deployment_slot: Slot) {
         self.transaction_processor
-            .program_cache
+            .global_program_cache
             .write()
             .unwrap()
             .prune_by_deployment_slot(deployment_slot);
@@ -3607,7 +3612,10 @@ impl Bank {
                     if executed_tx.was_successful() && !programs_modified_by_tx.is_empty() {
                         cache
                             .get_or_insert_with(|| {
-                                self.transaction_processor.program_cache.write().unwrap()
+                                self.transaction_processor
+                                    .global_program_cache
+                                    .write()
+                                    .unwrap()
                             })
                             .merge(programs_modified_by_tx);
                     }
@@ -5542,7 +5550,7 @@ impl Bank {
 
                 // Unload a program from the bank's cache
                 self.transaction_processor
-                    .program_cache
+                    .global_program_cache
                     .write()
                     .unwrap()
                     .remove_programs([*old_address].into_iter());
@@ -5927,7 +5935,11 @@ impl Bank {
         ProgramCacheForTxBatch::new_from_cache(
             slot,
             self.epoch_schedule.get_epoch(slot),
-            &self.transaction_processor.program_cache.read().unwrap(),
+            &self
+                .transaction_processor
+                .global_program_cache
+                .read()
+                .unwrap(),
         )
     }
 

+ 2 - 2
runtime/src/bank/builtin_programs.rs

@@ -417,7 +417,7 @@ mod tests_core_bpf_migration {
         // Run `finish_init` to simulate starting up from a snapshot.
         // Clear all builtins to simulate a fresh bank init.
         bank.transaction_processor
-            .program_cache
+            .global_program_cache
             .write()
             .unwrap()
             .remove_programs(
@@ -586,7 +586,7 @@ mod tests_core_bpf_migration {
         // Run `finish_init` to simulate starting up from a snapshot.
         // Clear all builtins to simulate a fresh bank init.
         bank.transaction_processor
-            .program_cache
+            .global_program_cache
             .write()
             .unwrap()
             .remove_programs(

+ 11 - 3
runtime/src/bank/builtins/core_bpf_migration/mod.rs

@@ -137,7 +137,11 @@ impl Bank {
         let mut program_cache_for_tx_batch = ProgramCacheForTxBatch::new_from_cache(
             self.slot,
             self.epoch,
-            &self.transaction_processor.program_cache.read().unwrap(),
+            &self
+                .transaction_processor
+                .global_program_cache
+                .read()
+                .unwrap(),
         );
 
         // Configure a dummy `InvokeContext` from the runtime's current
@@ -206,7 +210,7 @@ impl Bank {
         // Update the program cache by merging with `programs_modified`, which
         // should have been updated by the deploy function.
         self.transaction_processor
-            .program_cache
+            .global_program_cache
             .write()
             .unwrap()
             .merge(&program_cache_for_tx_batch.drain_modified_entries());
@@ -592,7 +596,11 @@ pub(crate) mod tests {
                 .contains(&self.target_program_address));
 
             // The cache should contain the target program.
-            let program_cache = bank.transaction_processor.program_cache.read().unwrap();
+            let program_cache = bank
+                .transaction_processor
+                .global_program_cache
+                .read()
+                .unwrap();
             let entries = program_cache.get_flattened_entries(true, true);
             let target_entry = entries
                 .iter()

+ 42 - 10
runtime/src/bank/tests.rs

@@ -5842,7 +5842,11 @@ fn test_bank_load_program() {
     assert!(bank.process_transaction(&transaction).is_ok());
 
     {
-        let program_cache = bank.transaction_processor.program_cache.read().unwrap();
+        let program_cache = bank
+            .transaction_processor
+            .global_program_cache
+            .read()
+            .unwrap();
         let [program] = program_cache.get_slot_versions_for_tests(&program_key) else {
             panic!();
         };
@@ -5892,7 +5896,11 @@ fn test_bpf_loader_upgradeable_deploy_with_max_len(formalize_loaded_transaction_
     );
     {
         // Make sure it is not in the cache because the account owner is not a loader
-        let program_cache = bank.transaction_processor.program_cache.read().unwrap();
+        let program_cache = bank
+            .transaction_processor
+            .global_program_cache
+            .read()
+            .unwrap();
         let slot_versions = program_cache.get_slot_versions_for_tests(&program_keypair.pubkey());
         assert!(slot_versions.is_empty());
     }
@@ -5968,7 +5976,11 @@ fn test_bpf_loader_upgradeable_deploy_with_max_len(formalize_loaded_transaction_
         )),
     );
     {
-        let program_cache = bank.transaction_processor.program_cache.read().unwrap();
+        let program_cache = bank
+            .transaction_processor
+            .global_program_cache
+            .read()
+            .unwrap();
         let slot_versions = program_cache.get_slot_versions_for_tests(&program_keypair.pubkey());
         assert_eq!(slot_versions.len(), 1);
         assert_eq!(slot_versions[0].deployment_slot, bank.slot());
@@ -5992,7 +6004,11 @@ fn test_bpf_loader_upgradeable_deploy_with_max_len(formalize_loaded_transaction_
         )),
     );
     {
-        let program_cache = bank.transaction_processor.program_cache.read().unwrap();
+        let program_cache = bank
+            .transaction_processor
+            .global_program_cache
+            .read()
+            .unwrap();
         let slot_versions = program_cache.get_slot_versions_for_tests(&buffer_address);
         assert_eq!(slot_versions.len(), 1);
         assert_eq!(slot_versions[0].deployment_slot, bank.slot());
@@ -6094,7 +6110,11 @@ fn test_bpf_loader_upgradeable_deploy_with_max_len(formalize_loaded_transaction_
     let transaction = Transaction::new(&[&binding], invocation_message, bank.last_blockhash());
     assert!(bank.process_transaction(&transaction).is_ok());
     {
-        let program_cache = bank.transaction_processor.program_cache.read().unwrap();
+        let program_cache = bank
+            .transaction_processor
+            .global_program_cache
+            .read()
+            .unwrap();
         let slot_versions = program_cache.get_slot_versions_for_tests(&program_keypair.pubkey());
         assert_eq!(slot_versions.len(), 2);
         assert_eq!(slot_versions[0].deployment_slot, bank.slot() - 1);
@@ -10783,14 +10803,14 @@ fn test_feature_activation_loaded_programs_cache_preparation_phase(
     let bank = new_bank_from_parent_with_bank_forks(&bank_forks, bank, &Pubkey::default(), 16);
     let current_env = bank
         .transaction_processor
-        .program_cache
+        .global_program_cache
         .read()
         .unwrap()
         .get_environments_for_epoch(0)
         .program_runtime_v1;
     let upcoming_env = bank
         .transaction_processor
-        .program_cache
+        .global_program_cache
         .read()
         .unwrap()
         .get_environments_for_epoch(1)
@@ -10798,7 +10818,11 @@ fn test_feature_activation_loaded_programs_cache_preparation_phase(
 
     // Advance the bank to recompile the program.
     {
-        let program_cache = bank.transaction_processor.program_cache.read().unwrap();
+        let program_cache = bank
+            .transaction_processor
+            .global_program_cache
+            .read()
+            .unwrap();
         let slot_versions = program_cache.get_slot_versions_for_tests(&program_keypair.pubkey());
         assert_eq!(slot_versions.len(), 1);
         assert!(Arc::ptr_eq(
@@ -10809,7 +10833,11 @@ fn test_feature_activation_loaded_programs_cache_preparation_phase(
     goto_end_of_slot(bank.clone());
     let bank = new_from_parent_with_fork_next_slot(bank, bank_forks.as_ref());
     {
-        let program_cache = bank.transaction_processor.program_cache.write().unwrap();
+        let program_cache = bank
+            .transaction_processor
+            .global_program_cache
+            .write()
+            .unwrap();
         let slot_versions = program_cache.get_slot_versions_for_tests(&program_keypair.pubkey());
         assert_eq!(slot_versions.len(), 2);
         assert!(Arc::ptr_eq(
@@ -10896,7 +10924,11 @@ fn test_feature_activation_loaded_programs_epoch_transition() {
 
     {
         // Prune for rerooting and thus finishing the recompilation phase.
-        let mut program_cache = bank.transaction_processor.program_cache.write().unwrap();
+        let mut program_cache = bank
+            .transaction_processor
+            .global_program_cache
+            .write()
+            .unwrap();
         program_cache.prune(bank.slot(), bank.epoch());
 
         // Unload all (which is only the entry with the new environment)

+ 2 - 0
svm/src/account_loader.rs

@@ -156,6 +156,7 @@ pub struct FeesOnlyTransaction {
 // type, and itself implements `TransactionProcessingCallback`, behaving
 // exactly like the implementor of the trait, but also returning up-to-date
 // account states mid-batch.
+#[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))]
 pub(crate) struct AccountLoader<'a, CB: TransactionProcessingCallback> {
     loaded_accounts: AHashMap<Pubkey, AccountSharedData>,
     callbacks: &'a CB,
@@ -164,6 +165,7 @@ pub(crate) struct AccountLoader<'a, CB: TransactionProcessingCallback> {
 
 impl<'a, CB: TransactionProcessingCallback> AccountLoader<'a, CB> {
     // create a new AccountLoader for the transaction batch
+    #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))]
     pub(crate) fn new_with_loaded_accounts_capacity(
         account_overrides: Option<&'a AccountOverrides>,
         callbacks: &'a CB,

+ 3 - 3
svm/src/program_loader.rs

@@ -821,9 +821,9 @@ mod tests {
 
         let upcoming_environments = ProgramRuntimeEnvironments::default();
         let current_environments = {
-            let mut program_cache = batch_processor.program_cache.write().unwrap();
-            program_cache.upcoming_environments = Some(upcoming_environments.clone());
-            program_cache.environments.clone()
+            let mut global_program_cache = batch_processor.global_program_cache.write().unwrap();
+            global_program_cache.upcoming_environments = Some(upcoming_environments.clone());
+            global_program_cache.environments.clone()
         };
         mock_bank
             .account_shared_data

+ 245 - 319
svm/src/transaction_processor.rs

@@ -46,7 +46,7 @@ use {
     },
     solana_pubkey::Pubkey,
     solana_rent::Rent,
-    solana_sdk_ids::{native_loader, system_program},
+    solana_sdk_ids::system_program,
     solana_svm_callback::TransactionProcessingCallback,
     solana_svm_feature_set::SVMFeatureSet,
     solana_svm_transaction::{svm_message::SVMMessage, svm_transaction::SVMTransaction},
@@ -55,7 +55,7 @@ use {
     solana_transaction_error::{TransactionError, TransactionResult},
     solana_type_overrides::sync::{atomic::Ordering, Arc, RwLock, RwLockReadGuard},
     std::{
-        collections::{hash_map::Entry, HashMap, HashSet},
+        collections::HashSet,
         fmt::{Debug, Formatter},
         rc::Rc,
         sync::Weak,
@@ -157,7 +157,7 @@ pub struct TransactionBatchProcessor<FG: ForkGraph> {
     sysvar_cache: RwLock<SysvarCache>,
 
     /// Programs required for transaction batch processing
-    pub program_cache: Arc<RwLock<ProgramCache<FG>>>,
+    pub global_program_cache: Arc<RwLock<ProgramCache<FG>>>,
 
     /// Builtin program ids
     pub builtin_program_ids: RwLock<HashSet<Pubkey>>,
@@ -171,7 +171,7 @@ impl<FG: ForkGraph> Debug for TransactionBatchProcessor<FG> {
             .field("slot", &self.slot)
             .field("epoch", &self.epoch)
             .field("sysvar_cache", &self.sysvar_cache)
-            .field("program_cache", &self.program_cache)
+            .field("global_program_cache", &self.global_program_cache)
             .finish()
     }
 }
@@ -182,7 +182,7 @@ impl<FG: ForkGraph> Default for TransactionBatchProcessor<FG> {
             slot: Slot::default(),
             epoch: Epoch::default(),
             sysvar_cache: RwLock::<SysvarCache>::default(),
-            program_cache: Arc::new(RwLock::new(ProgramCache::new(
+            global_program_cache: Arc::new(RwLock::new(ProgramCache::new(
                 Slot::default(),
                 Epoch::default(),
             ))),
@@ -206,7 +206,7 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
         Self {
             slot,
             epoch,
-            program_cache: Arc::new(RwLock::new(ProgramCache::new(slot, epoch))),
+            global_program_cache: Arc::new(RwLock::new(ProgramCache::new(slot, epoch))),
             ..Self::default()
         }
     }
@@ -228,10 +228,10 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
     ) -> Self {
         let processor = Self::new_uninitialized(slot, epoch);
         {
-            let mut program_cache = processor.program_cache.write().unwrap();
-            program_cache.set_fork_graph(fork_graph);
+            let mut global_program_cache = processor.global_program_cache.write().unwrap();
+            global_program_cache.set_fork_graph(fork_graph);
             processor.configure_program_runtime_environments_inner(
-                &mut program_cache,
+                &mut global_program_cache,
                 program_runtime_environment_v1,
                 program_runtime_environment_v2,
             );
@@ -250,7 +250,7 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
             slot,
             epoch,
             sysvar_cache: RwLock::<SysvarCache>::default(),
-            program_cache: self.program_cache.clone(),
+            global_program_cache: self.global_program_cache.clone(),
             builtin_program_ids: RwLock::new(self.builtin_program_ids.read().unwrap().clone()),
             execution_cost: self.execution_cost,
         }
@@ -264,17 +264,17 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
 
     fn configure_program_runtime_environments_inner(
         &self,
-        program_cache: &mut ProgramCache<FG>,
+        global_program_cache: &mut ProgramCache<FG>,
         program_runtime_environment_v1: Option<ProgramRuntimeEnvironment>,
         program_runtime_environment_v2: Option<ProgramRuntimeEnvironment>,
     ) {
         let empty_loader = || Arc::new(BuiltinProgram::new_loader(VmConfig::default()));
 
-        program_cache.latest_root_slot = self.slot;
-        program_cache.latest_root_epoch = self.epoch;
-        program_cache.environments.program_runtime_v1 =
+        global_program_cache.latest_root_slot = self.slot;
+        global_program_cache.latest_root_epoch = self.epoch;
+        global_program_cache.environments.program_runtime_v1 =
             program_runtime_environment_v1.unwrap_or(empty_loader());
-        program_cache.environments.program_runtime_v2 =
+        global_program_cache.environments.program_runtime_v2 =
             program_runtime_environment_v2.unwrap_or(empty_loader());
     }
 
@@ -286,7 +286,7 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
         program_runtime_environment_v2: Option<ProgramRuntimeEnvironment>,
     ) {
         self.configure_program_runtime_environments_inner(
-            &mut self.program_cache.write().unwrap(),
+            &mut self.global_program_cache.write().unwrap(),
             program_runtime_environment_v1,
             program_runtime_environment_v2,
         );
@@ -299,7 +299,7 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
         &self,
         epoch: Epoch,
     ) -> Option<solana_program_runtime::loaded_programs::ProgramRuntimeEnvironments> {
-        self.program_cache
+        self.global_program_cache
             .try_read()
             .ok()
             .map(|cache| cache.get_environments_for_epoch(epoch))
@@ -333,29 +333,45 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
         let mut execute_timings = ExecuteTimings::default();
         let mut processing_results = Vec::with_capacity(sanitized_txs.len());
 
-        let native_loader = native_loader::id();
-        let (program_accounts_map, filter_executable_us) = measure_us!({
-            let mut program_accounts_map = Self::filter_executable_program_accounts(
-                callbacks,
-                sanitized_txs,
-                &check_results,
-                PROGRAM_OWNERS,
+        // Determine a capacity for the internal account cache. This
+        // over-allocates but avoids ever reallocating, and spares us from
+        // deduplicating the account keys lists.
+        let account_keys_in_batch = sanitized_txs.iter().map(|tx| tx.account_keys().len()).sum();
+
+        // Create the account loader, which wraps all external account fetching.
+        let mut account_loader = AccountLoader::new_with_loaded_accounts_capacity(
+            config.account_overrides,
+            callbacks,
+            &environment.feature_set,
+            account_keys_in_batch,
+        );
+
+        // Create the transaction balance collector if recording is enabled.
+        let mut balance_collector = config
+            .recording_config
+            .enable_transaction_balance_recording
+            .then(|| BalanceCollector::new_with_transaction_count(sanitized_txs.len()));
+
+        // Create the batch-local program cache.
+        let mut program_cache_for_tx_batch = {
+            let global_program_cache = self.global_program_cache.read().unwrap();
+            let mut program_cache_for_tx_batch = ProgramCacheForTxBatch::new_from_cache(
+                self.slot,
+                self.epoch,
+                &global_program_cache,
             );
-            for builtin_program in self.builtin_program_ids.read().unwrap().iter() {
-                program_accounts_map.insert(*builtin_program, (&native_loader, 0));
-            }
-            program_accounts_map
-        });
-        execute_timings
-            .saturating_add_in_place(ExecuteTimingType::FilterExecutableUs, filter_executable_us);
+            drop(global_program_cache);
 
-        let (mut program_cache_for_tx_batch, program_cache_us) = measure_us!({
-            let program_cache_for_tx_batch = self.replenish_program_cache(
-                callbacks,
-                &program_accounts_map,
+            let builtins = self.builtin_program_ids.read().unwrap().clone();
+
+            self.replenish_program_cache(
+                &account_loader,
+                &builtins,
+                &mut program_cache_for_tx_batch,
                 &mut execute_timings,
                 config.check_program_modification_slot,
                 config.limit_to_load_programs,
+                false, // increment_usage_counter
             );
 
             if program_cache_for_tx_batch.hit_max_limit {
@@ -372,28 +388,7 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
             }
 
             program_cache_for_tx_batch
-        });
-        execute_timings
-            .saturating_add_in_place(ExecuteTimingType::ProgramCacheUs, program_cache_us);
-
-        // Determine a capacity for the internal account cache. This
-        // over-allocates but avoids ever reallocating, and spares us from
-        // deduplicating the account keys lists.
-        let account_keys_in_batch = sanitized_txs.iter().map(|tx| tx.account_keys().len()).sum();
-
-        // Create the account loader, which wraps all external account fetching.
-        let mut account_loader = AccountLoader::new_with_loaded_accounts_capacity(
-            config.account_overrides,
-            callbacks,
-            &environment.feature_set,
-            account_keys_in_batch,
-        );
-
-        // Create the transaction balance collector if recording is enabled.
-        let mut balance_collector = config
-            .recording_config
-            .enable_transaction_balance_recording
-            .then(|| BalanceCollector::new_with_transaction_count(sanitized_txs.len()));
+        };
 
         let (mut load_us, mut execution_us): (u64, u64) = (0, 0);
 
@@ -439,6 +434,46 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
                     Ok(ProcessedTransaction::FeesOnly(Box::new(fees_only_tx)))
                 }
                 TransactionLoadResult::Loaded(loaded_transaction) => {
+                    let (program_accounts_set, filter_executable_us) = measure_us!(self
+                        .filter_executable_program_accounts(
+                            &account_loader,
+                            &mut program_cache_for_tx_batch,
+                            tx,
+                        ));
+                    execute_timings.saturating_add_in_place(
+                        ExecuteTimingType::FilterExecutableUs,
+                        filter_executable_us,
+                    );
+
+                    let ((), program_cache_us) = measure_us!({
+                        self.replenish_program_cache(
+                            &account_loader,
+                            &program_accounts_set,
+                            &mut program_cache_for_tx_batch,
+                            &mut execute_timings,
+                            config.check_program_modification_slot,
+                            config.limit_to_load_programs,
+                            true, // increment_usage_counter
+                        );
+
+                        if program_cache_for_tx_batch.hit_max_limit {
+                            return LoadAndExecuteSanitizedTransactionsOutput {
+                                error_metrics,
+                                execute_timings,
+                                processing_results: (0..sanitized_txs.len())
+                                    .map(|_| Err(TransactionError::ProgramCacheHitMaxLimit))
+                                    .collect(),
+                                // If we abort the batch and balance recording is enabled, no balances should be
+                                // collected. If this is a leader thread, no batch will be committed.
+                                balance_collector: None,
+                            };
+                        }
+                    });
+                    execute_timings.saturating_add_in_place(
+                        ExecuteTimingType::ProgramCacheUs,
+                        program_cache_us,
+                    );
+
                     let executed_tx = self.execute_loaded_transaction(
                         callbacks,
                         tx,
@@ -454,6 +489,7 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
                     // Also update local program cache with modifications made by the transaction,
                     // if it executed successfully.
                     account_loader.update_accounts_for_executed_tx(tx, &executed_tx);
+
                     if executed_tx.was_successful() {
                         program_cache_for_tx_batch.merge(&executed_tx.programs_modified_by_tx);
                     }
@@ -477,7 +513,7 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
         // occurrences of cooperative loading.
         if program_cache_for_tx_batch.loaded_missing || program_cache_for_tx_batch.merged_modified {
             const SHRINK_LOADED_PROGRAMS_TO_PERCENTAGE: u8 = 90;
-            self.program_cache
+            self.global_program_cache
                 .write()
                 .unwrap()
                 .evict_using_2s_random_selection(
@@ -663,122 +699,105 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
         }
     }
 
-    /// Returns a map from executable program accounts (all accounts owned by any loader)
-    /// to their usage counters, for the transactions with a valid blockhash or nonce.
-    fn filter_executable_program_accounts<'a, CB: TransactionProcessingCallback>(
-        callbacks: &CB,
-        txs: &[impl SVMMessage],
-        check_results: &[TransactionCheckResult],
-        program_owners: &'a [Pubkey],
-    ) -> HashMap<Pubkey, (&'a Pubkey, u64)> {
-        let mut result: HashMap<Pubkey, (&'a Pubkey, u64)> = HashMap::new();
-        check_results.iter().zip(txs).for_each(|etx| {
-            if let (Ok(_), tx) = etx {
-                tx.account_keys()
-                    .iter()
-                    .for_each(|key| match result.entry(*key) {
-                        Entry::Occupied(mut entry) => {
-                            let (_, count) = entry.get_mut();
-                            *count = count.saturating_add(1);
-                        }
-                        Entry::Vacant(entry) => {
-                            if let Some(index) =
-                                callbacks.account_matches_owners(key, program_owners)
-                            {
-                                if let Some(owner) = program_owners.get(index) {
-                                    entry.insert((owner, 1));
-                                }
-                            }
-                        }
-                    });
+    /// Appends to a set of executable program accounts (all accounts owned by any loader)
+    /// for transactions with a valid blockhash or nonce.
+    fn filter_executable_program_accounts<CB: TransactionProcessingCallback>(
+        &self,
+        account_loader: &AccountLoader<CB>,
+        program_cache_for_tx_batch: &mut ProgramCacheForTxBatch,
+        tx: &impl SVMMessage,
+    ) -> HashSet<Pubkey> {
+        let mut program_accounts_set = HashSet::default();
+        for account_key in tx.account_keys().iter() {
+            if let Some(cache_entry) = program_cache_for_tx_batch.find(account_key) {
+                cache_entry.tx_usage_counter.fetch_add(1, Ordering::Relaxed);
+            } else if account_loader
+                .account_matches_owners(account_key, PROGRAM_OWNERS)
+                .is_some()
+            {
+                program_accounts_set.insert(*account_key);
             }
-        });
-        result
+        }
+
+        program_accounts_set
     }
 
     #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))]
     fn replenish_program_cache<CB: TransactionProcessingCallback>(
         &self,
-        callback: &CB,
-        program_accounts_map: &HashMap<Pubkey, (&Pubkey, u64)>,
+        account_loader: &AccountLoader<CB>,
+        program_accounts_set: &HashSet<Pubkey>,
+        program_cache_for_tx_batch: &mut ProgramCacheForTxBatch,
         execute_timings: &mut ExecuteTimings,
         check_program_modification_slot: bool,
         limit_to_load_programs: bool,
-    ) -> ProgramCacheForTxBatch {
-        let mut missing_programs: Vec<(Pubkey, (ProgramCacheMatchCriteria, u64))> =
-            program_accounts_map
-                .iter()
-                .map(|(pubkey, (_, count))| {
-                    let match_criteria = if check_program_modification_slot {
-                        get_program_modification_slot(callback, pubkey)
-                            .map_or(ProgramCacheMatchCriteria::Tombstone, |slot| {
-                                ProgramCacheMatchCriteria::DeployedOnOrAfterSlot(slot)
-                            })
-                    } else {
-                        ProgramCacheMatchCriteria::NoCriteria
-                    };
-                    (*pubkey, (match_criteria, *count))
-                })
-                .collect();
+        increment_usage_counter: bool,
+    ) {
+        let mut missing_programs: Vec<(Pubkey, ProgramCacheMatchCriteria)> = program_accounts_set
+            .iter()
+            .map(|pubkey| {
+                let match_criteria = if check_program_modification_slot {
+                    get_program_modification_slot(account_loader, pubkey)
+                        .map_or(ProgramCacheMatchCriteria::Tombstone, |slot| {
+                            ProgramCacheMatchCriteria::DeployedOnOrAfterSlot(slot)
+                        })
+                } else {
+                    ProgramCacheMatchCriteria::NoCriteria
+                };
+                (*pubkey, match_criteria)
+            })
+            .collect();
 
-        let mut loaded_programs_for_txs: Option<ProgramCacheForTxBatch> = None;
+        let mut count_hits_and_misses = true;
         loop {
             let (program_to_store, task_cookie, task_waiter) = {
                 // Lock the global cache.
-                let program_cache = self.program_cache.read().unwrap();
-                // Initialize our local cache.
-                let is_first_round = loaded_programs_for_txs.is_none();
-                if is_first_round {
-                    loaded_programs_for_txs = Some(ProgramCacheForTxBatch::new_from_cache(
-                        self.slot,
-                        self.epoch,
-                        &program_cache,
-                    ));
-                }
+                let global_program_cache = self.global_program_cache.read().unwrap();
                 // Figure out which program needs to be loaded next.
-                let program_to_load = program_cache.extract(
+                let program_to_load = global_program_cache.extract(
                     &mut missing_programs,
-                    loaded_programs_for_txs.as_mut().unwrap(),
-                    is_first_round,
+                    program_cache_for_tx_batch,
+                    increment_usage_counter,
+                    count_hits_and_misses,
                 );
+                count_hits_and_misses = false;
 
-                let program_to_store = program_to_load.map(|(key, count)| {
+                let program_to_store = program_to_load.map(|key| {
                     // Load, verify and compile one program.
                     let program = load_program_with_pubkey(
-                        callback,
-                        &program_cache.get_environments_for_epoch(self.epoch),
+                        account_loader,
+                        &global_program_cache.get_environments_for_epoch(self.epoch),
                         &key,
                         self.slot,
                         execute_timings,
                         false,
                     )
                     .expect("called load_program_with_pubkey() with nonexistent account");
-                    program.tx_usage_counter.store(count, Ordering::Relaxed);
                     (key, program)
                 });
 
-                let task_waiter = Arc::clone(&program_cache.loading_task_waiter);
+                let task_waiter = Arc::clone(&global_program_cache.loading_task_waiter);
                 (program_to_store, task_waiter.cookie(), task_waiter)
                 // Unlock the global cache again.
             };
 
             if let Some((key, program)) = program_to_store {
-                loaded_programs_for_txs.as_mut().unwrap().loaded_missing = true;
-                let mut program_cache = self.program_cache.write().unwrap();
+                program_cache_for_tx_batch.loaded_missing = true;
+                let mut global_program_cache = self.global_program_cache.write().unwrap();
                 // Submit our last completed loading task.
-                if program_cache.finish_cooperative_loading_task(self.slot, key, program)
+                if global_program_cache.finish_cooperative_loading_task(self.slot, key, program)
                     && limit_to_load_programs
                 {
                     // This branch is taken when there is an error in assigning a program to a
                     // cache slot. It is not possible to mock this error for SVM unit
                     // tests purposes.
-                    let mut ret = ProgramCacheForTxBatch::new_from_cache(
+                    *program_cache_for_tx_batch = ProgramCacheForTxBatch::new_from_cache(
                         self.slot,
                         self.epoch,
-                        &program_cache,
+                        &global_program_cache,
                     );
-                    ret.hit_max_limit = true;
-                    return ret;
+                    program_cache_for_tx_batch.hit_max_limit = true;
+                    return;
                 }
             } else if missing_programs.is_empty() {
                 break;
@@ -789,8 +808,6 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
                 let _new_cookie = task_waiter.wait(task_cookie);
             }
         }
-
-        loaded_programs_for_txs.unwrap()
     }
 
     /// Execute a transaction using the provided loaded accounts and update
@@ -1049,7 +1066,7 @@ impl<FG: ForkGraph> TransactionBatchProcessor<FG> {
         debug!("Adding program {name} under {program_id:?}");
         callbacks.add_builtin_account(name, &program_id);
         self.builtin_program_ids.write().unwrap().insert(program_id);
-        self.program_cache
+        self.global_program_cache
             .write()
             .unwrap()
             .assign_program(program_id, Arc::new(builtin));
@@ -1096,12 +1113,13 @@ mod tests {
         },
         solana_rent::Rent,
         solana_rent_collector::RENT_EXEMPT_RENT_EPOCH,
-        solana_sdk_ids::{bpf_loader, system_program, sysvar},
+        solana_sdk_ids::{bpf_loader, loader_v4, system_program, sysvar},
         solana_signature::Signature,
         solana_svm_callback::{AccountState, InvokeContextCallback},
         solana_transaction::{sanitized::SanitizedTransaction, Transaction},
         solana_transaction_context::TransactionContext,
         solana_transaction_error::{TransactionError, TransactionError::DuplicateInstruction},
+        std::collections::HashMap,
         test_case::test_case,
     };
 
@@ -1472,21 +1490,32 @@ mod tests {
     #[should_panic = "called load_program_with_pubkey() with nonexistent account"]
     fn test_replenish_program_cache_with_nonexistent_accounts() {
         let mock_bank = MockBankCallback::default();
+        let account_loader = (&mock_bank).into();
         let fork_graph = Arc::new(RwLock::new(TestForkGraph {}));
         let batch_processor =
             TransactionBatchProcessor::new(0, 0, Arc::downgrade(&fork_graph), None, None);
         let key = Pubkey::new_unique();
-        let owner = Pubkey::new_unique();
 
-        let mut account_maps: HashMap<Pubkey, (&Pubkey, u64)> = HashMap::new();
-        account_maps.insert(key, (&owner, 4));
+        let mut account_set = HashSet::new();
+        account_set.insert(key);
+
+        let mut program_cache_for_tx_batch = {
+            let global_program_cache = batch_processor.global_program_cache.read().unwrap();
+            ProgramCacheForTxBatch::new_from_cache(
+                batch_processor.slot,
+                batch_processor.epoch,
+                &global_program_cache,
+            )
+        };
 
         batch_processor.replenish_program_cache(
-            &mock_bank,
-            &account_maps,
+            &account_loader,
+            &account_set,
+            &mut program_cache_for_tx_batch,
             &mut ExecuteTimings::default(),
             false,
             true,
+            true,
         );
     }
 
@@ -1497,7 +1526,6 @@ mod tests {
         let batch_processor =
             TransactionBatchProcessor::new(0, 0, Arc::downgrade(&fork_graph), None, None);
         let key = Pubkey::new_unique();
-        let owner = Pubkey::new_unique();
 
         let mut account_data = AccountSharedData::default();
         account_data.set_owner(bpf_loader::id());
@@ -1506,25 +1534,37 @@ mod tests {
             .write()
             .unwrap()
             .insert(key, account_data);
+        let account_loader = (&mock_bank).into();
 
-        let mut account_maps: HashMap<Pubkey, (&Pubkey, u64)> = HashMap::new();
-        account_maps.insert(key, (&owner, 4));
+        let mut account_set = HashSet::new();
+        account_set.insert(key);
         let mut loaded_missing = 0;
 
         for limit_to_load_programs in [false, true] {
-            let result = batch_processor.replenish_program_cache(
-                &mock_bank,
-                &account_maps,
+            let mut program_cache_for_tx_batch = {
+                let global_program_cache = batch_processor.global_program_cache.read().unwrap();
+                ProgramCacheForTxBatch::new_from_cache(
+                    batch_processor.slot,
+                    batch_processor.epoch,
+                    &global_program_cache,
+                )
+            };
+
+            batch_processor.replenish_program_cache(
+                &account_loader,
+                &account_set,
+                &mut program_cache_for_tx_batch,
                 &mut ExecuteTimings::default(),
                 false,
                 limit_to_load_programs,
+                true,
             );
-            assert!(!result.hit_max_limit);
-            if result.loaded_missing {
+            assert!(!program_cache_for_tx_batch.hit_max_limit);
+            if program_cache_for_tx_batch.loaded_missing {
                 loaded_missing += 1;
             }
 
-            let program = result.find(&key).unwrap();
+            let program = program_cache_for_tx_batch.find(&key).unwrap();
             assert!(matches!(
                 program.program,
                 ProgramCacheEntryType::FailedVerification(_)
@@ -1537,47 +1577,28 @@ mod tests {
     fn test_filter_executable_program_accounts() {
         let mock_bank = MockBankCallback::default();
         let key1 = Pubkey::new_unique();
-        let owner1 = Pubkey::new_unique();
+        let owner1 = bpf_loader::id();
+        let key2 = Pubkey::new_unique();
+        let owner2 = loader_v4::id();
 
-        let mut data = AccountSharedData::default();
-        data.set_owner(owner1);
-        data.set_lamports(93);
+        let mut data1 = AccountSharedData::default();
+        data1.set_owner(owner1);
+        data1.set_lamports(93);
         mock_bank
             .account_shared_data
             .write()
             .unwrap()
-            .insert(key1, data);
-
-        let message = Message {
-            account_keys: vec![key1],
-            header: MessageHeader::default(),
-            instructions: vec![CompiledInstruction {
-                program_id_index: 0,
-                accounts: vec![],
-                data: vec![],
-            }],
-            recent_blockhash: Hash::default(),
-        };
+            .insert(key1, data1);
 
-        let sanitized_message = new_unchecked_sanitized_message(message);
-
-        let sanitized_transaction_1 = SanitizedTransaction::new_for_tests(
-            sanitized_message,
-            vec![Signature::new_unique()],
-            false,
-        );
-
-        let key2 = Pubkey::new_unique();
-        let owner2 = Pubkey::new_unique();
-
-        let mut account_data = AccountSharedData::default();
-        account_data.set_owner(owner2);
-        account_data.set_lamports(90);
+        let mut data2 = AccountSharedData::default();
+        data2.set_owner(owner2);
+        data2.set_lamports(90);
         mock_bank
             .account_shared_data
             .write()
             .unwrap()
-            .insert(key2, account_data);
+            .insert(key2, data2);
+        let account_loader = (&mock_bank).into();
 
         let message = Message {
             account_keys: vec![key1, key2],
@@ -1592,34 +1613,22 @@ mod tests {
 
         let sanitized_message = new_unchecked_sanitized_message(message);
 
-        let sanitized_transaction_2 = SanitizedTransaction::new_for_tests(
+        let sanitized_transaction = SanitizedTransaction::new_for_tests(
             sanitized_message,
             vec![Signature::new_unique()],
             false,
         );
 
-        let transactions = vec![
-            sanitized_transaction_1.clone(),
-            sanitized_transaction_2.clone(),
-            sanitized_transaction_1,
-        ];
-        let check_results = vec![
-            Ok(CheckedTransactionDetails::default()),
-            Ok(CheckedTransactionDetails::default()),
-            Err(TransactionError::ProgramAccountNotFound),
-        ];
-        let owners = vec![owner1, owner2];
-
-        let result = TransactionBatchProcessor::<TestForkGraph>::filter_executable_program_accounts(
-            &mock_bank,
-            &transactions,
-            &check_results,
-            &owners,
+        let batch_processor = TransactionBatchProcessor::<TestForkGraph>::default();
+        let program_accounts_set = batch_processor.filter_executable_program_accounts(
+            &account_loader,
+            &mut ProgramCacheForTxBatch::default(),
+            &sanitized_transaction,
         );
 
-        assert_eq!(result.len(), 2);
-        assert_eq!(result[&key1], (&owner1, 2));
-        assert_eq!(result[&key2], (&owner2, 1));
+        assert_eq!(program_accounts_set.len(), 2);
+        assert!(program_accounts_set.contains(&key1));
+        assert!(program_accounts_set.contains(&key2));
     }
 
     #[test]
@@ -1629,8 +1638,8 @@ mod tests {
 
         let non_program_pubkey1 = Pubkey::new_unique();
         let non_program_pubkey2 = Pubkey::new_unique();
-        let program1_pubkey = Pubkey::new_unique();
-        let program2_pubkey = Pubkey::new_unique();
+        let program1_pubkey = bpf_loader::id();
+        let program2_pubkey = loader_v4::id();
         let account1_pubkey = Pubkey::new_unique();
         let account2_pubkey = Pubkey::new_unique();
         let account3_pubkey = Pubkey::new_unique();
@@ -1671,6 +1680,7 @@ mod tests {
             account4_pubkey,
             AccountSharedData::new(40, 1, &program2_pubkey),
         );
+        let account_loader = (&bank).into();
 
         let tx1 = Transaction::new_with_compiled_instructions(
             &[&keypair1],
@@ -1690,123 +1700,34 @@ mod tests {
         );
         let sanitized_tx2 = SanitizedTransaction::from_transaction_for_tests(tx2);
 
-        let owners = &[program1_pubkey, program2_pubkey];
-        let programs =
-            TransactionBatchProcessor::<TestForkGraph>::filter_executable_program_accounts(
-                &bank,
-                &[sanitized_tx1, sanitized_tx2],
-                &[
-                    Ok(CheckedTransactionDetails::default()),
-                    Ok(CheckedTransactionDetails::default()),
-                ],
-                owners,
-            );
+        let batch_processor = TransactionBatchProcessor::<TestForkGraph>::default();
 
-        // The result should contain only account3_pubkey, and account4_pubkey as the program accounts
-        assert_eq!(programs.len(), 2);
-        assert_eq!(
-            programs
-                .get(&account3_pubkey)
-                .expect("failed to find the program account"),
-            &(&program1_pubkey, 2)
-        );
-        assert_eq!(
-            programs
-                .get(&account4_pubkey)
-                .expect("failed to find the program account"),
-            &(&program2_pubkey, 1)
+        let tx1_programs = batch_processor.filter_executable_program_accounts(
+            &account_loader,
+            &mut ProgramCacheForTxBatch::default(),
+            &sanitized_tx1,
         );
-    }
-
-    #[test]
-    fn test_filter_executable_program_accounts_invalid_blockhash() {
-        let keypair1 = Keypair::new();
-        let keypair2 = Keypair::new();
-
-        let non_program_pubkey1 = Pubkey::new_unique();
-        let non_program_pubkey2 = Pubkey::new_unique();
-        let program1_pubkey = Pubkey::new_unique();
-        let program2_pubkey = Pubkey::new_unique();
-        let account1_pubkey = Pubkey::new_unique();
-        let account2_pubkey = Pubkey::new_unique();
-        let account3_pubkey = Pubkey::new_unique();
-        let account4_pubkey = Pubkey::new_unique();
-
-        let account5_pubkey = Pubkey::new_unique();
 
-        let bank = MockBankCallback::default();
-        bank.account_shared_data.write().unwrap().insert(
-            non_program_pubkey1,
-            AccountSharedData::new(1, 10, &account5_pubkey),
-        );
-        bank.account_shared_data.write().unwrap().insert(
-            non_program_pubkey2,
-            AccountSharedData::new(1, 10, &account5_pubkey),
-        );
-        bank.account_shared_data.write().unwrap().insert(
-            program1_pubkey,
-            AccountSharedData::new(40, 1, &account5_pubkey),
-        );
-        bank.account_shared_data.write().unwrap().insert(
-            program2_pubkey,
-            AccountSharedData::new(40, 1, &account5_pubkey),
-        );
-        bank.account_shared_data.write().unwrap().insert(
-            account1_pubkey,
-            AccountSharedData::new(1, 10, &non_program_pubkey1),
-        );
-        bank.account_shared_data.write().unwrap().insert(
-            account2_pubkey,
-            AccountSharedData::new(1, 10, &non_program_pubkey2),
-        );
-        bank.account_shared_data.write().unwrap().insert(
-            account3_pubkey,
-            AccountSharedData::new(40, 1, &program1_pubkey),
-        );
-        bank.account_shared_data.write().unwrap().insert(
-            account4_pubkey,
-            AccountSharedData::new(40, 1, &program2_pubkey),
+        assert_eq!(tx1_programs.len(), 1);
+        assert!(
+            tx1_programs.contains(&account3_pubkey),
+            "failed to find the program account",
         );
 
-        let tx1 = Transaction::new_with_compiled_instructions(
-            &[&keypair1],
-            &[non_program_pubkey1],
-            Hash::new_unique(),
-            vec![account1_pubkey, account2_pubkey, account3_pubkey],
-            vec![CompiledInstruction::new(1, &(), vec![0])],
+        let tx2_programs = batch_processor.filter_executable_program_accounts(
+            &account_loader,
+            &mut ProgramCacheForTxBatch::default(),
+            &sanitized_tx2,
         );
-        let sanitized_tx1 = SanitizedTransaction::from_transaction_for_tests(tx1);
 
-        let tx2 = Transaction::new_with_compiled_instructions(
-            &[&keypair2],
-            &[non_program_pubkey2],
-            Hash::new_unique(),
-            vec![account4_pubkey, account3_pubkey, account2_pubkey],
-            vec![CompiledInstruction::new(1, &(), vec![0])],
+        assert_eq!(tx2_programs.len(), 2);
+        assert!(
+            tx2_programs.contains(&account3_pubkey),
+            "failed to find the program account",
         );
-        // Let's not register blockhash from tx2. This should cause the tx2 to fail
-        let sanitized_tx2 = SanitizedTransaction::from_transaction_for_tests(tx2);
-
-        let owners = &[program1_pubkey, program2_pubkey];
-        let check_results = vec![
-            Ok(CheckedTransactionDetails::default()),
-            Err(TransactionError::BlockhashNotFound),
-        ];
-        let programs =
-            TransactionBatchProcessor::<TestForkGraph>::filter_executable_program_accounts(
-                &bank,
-                &[sanitized_tx1, sanitized_tx2],
-                &check_results,
-                owners,
-            );
-
-        // The result should contain only account3_pubkey as the program accounts
-        assert_eq!(programs.len(), 1);
-        assert_eq!(
-            programs
-                .get(&account3_pubkey)
-                .expect("failed to find the program account"),
-            &(&program1_pubkey, 1)
+        assert!(
+            tx2_programs.contains(&account4_pubkey),
+            "failed to find the program account",
         );
     }
 
@@ -2004,13 +1925,18 @@ mod tests {
         let mut loaded_programs_for_tx_batch = ProgramCacheForTxBatch::new_from_cache(
             0,
             0,
-            &batch_processor.program_cache.read().unwrap(),
-        );
-        batch_processor.program_cache.write().unwrap().extract(
-            &mut vec![(key, (ProgramCacheMatchCriteria::NoCriteria, 1))],
-            &mut loaded_programs_for_tx_batch,
-            true,
+            &batch_processor.global_program_cache.read().unwrap(),
         );
+        batch_processor
+            .global_program_cache
+            .write()
+            .unwrap()
+            .extract(
+                &mut vec![(key, ProgramCacheMatchCriteria::NoCriteria)],
+                &mut loaded_programs_for_tx_batch,
+                true,
+                true,
+            );
         let entry = loaded_programs_for_tx_batch.find(&key).unwrap();
 
         // Repeating code because ProgramCacheEntry does not implement clone.

+ 23 - 11
svm/tests/concurrent_tests.rs

@@ -12,12 +12,11 @@ use {
     solana_instruction::{AccountMeta, Instruction},
     solana_program_runtime::{
         execution_budget::SVMTransactionExecutionAndFeeBudgetLimits,
-        loaded_programs::ProgramCacheEntryType,
+        loaded_programs::{ProgramCacheEntryType, ProgramCacheForTxBatch},
     },
     solana_pubkey::Pubkey,
-    solana_sdk_ids::bpf_loader_upgradeable,
     solana_svm::{
-        account_loader::{CheckedTransactionDetails, TransactionCheckResult},
+        account_loader::{AccountLoader, CheckedTransactionDetails, TransactionCheckResult},
         transaction_processing_result::{
             ProcessedTransaction, TransactionProcessingResultExtensions,
         },
@@ -26,9 +25,10 @@ use {
             TransactionProcessingEnvironment,
         },
     },
+    solana_svm_feature_set::SVMFeatureSet,
     solana_timings::ExecuteTimings,
     solana_transaction::{sanitized::SanitizedTransaction, Transaction},
-    std::collections::{HashMap, HashSet},
+    std::collections::HashSet,
 };
 
 mod mock_bank;
@@ -39,18 +39,13 @@ fn program_cache_execution(threads: usize) {
     let batch_processor =
         TransactionBatchProcessor::new(5, 5, Arc::downgrade(&fork_graph), None, None);
 
-    const LOADER: Pubkey = bpf_loader_upgradeable::id();
     let programs = vec![
         deploy_program("hello-solana".to_string(), 0, &mut mock_bank),
         deploy_program("simple-transfer".to_string(), 0, &mut mock_bank),
         deploy_program("clock-sysvar".to_string(), 0, &mut mock_bank),
     ];
 
-    let account_maps: HashMap<Pubkey, (&Pubkey, u64)> = programs
-        .iter()
-        .enumerate()
-        .map(|(idx, key)| (*key, (&LOADER, idx as u64)))
-        .collect();
+    let account_maps: HashSet<Pubkey> = programs.iter().copied().collect();
 
     let ths: Vec<_> = (0..threads)
         .map(|_| {
@@ -63,12 +58,29 @@ fn program_cache_execution(threads: usize) {
             let maps = account_maps.clone();
             let programs = programs.clone();
             thread::spawn(move || {
-                let result = processor.replenish_program_cache(
+                let feature_set = SVMFeatureSet::all_enabled();
+                let account_loader = AccountLoader::new_with_loaded_accounts_capacity(
+                    None,
                     &local_bank,
+                    &feature_set,
+                    0,
+                );
+                let mut result = {
+                    let global_program_cache = processor.global_program_cache.read().unwrap();
+                    ProgramCacheForTxBatch::new_from_cache(
+                        processor.slot,
+                        processor.epoch,
+                        &global_program_cache,
+                    )
+                };
+                processor.replenish_program_cache(
+                    &account_loader,
                     &maps,
+                    &mut result,
                     &mut ExecuteTimings::default(),
                     false,
                     true,
+                    true,
                 );
                 for key in &programs {
                     let cache_entry = result.find(key);

+ 255 - 3
svm/tests/integration_test.rs

@@ -49,7 +49,7 @@ use {
     solana_transaction_context::TransactionReturnData,
     solana_transaction_error::TransactionError,
     solana_type_overrides::sync::{Arc, RwLock},
-    std::collections::HashMap,
+    std::{collections::HashMap, sync::atomic::Ordering},
     test_case::test_case,
 };
 
@@ -264,7 +264,7 @@ impl SvmTestEnvironment<'_> {
                 let programs_modified_by_tx = &executed_tx.programs_modified_by_tx;
                 if executed_tx.was_successful() && !programs_modified_by_tx.is_empty() {
                     self.batch_processor
-                        .program_cache
+                        .global_program_cache
                         .write()
                         .unwrap()
                         .merge(programs_modified_by_tx);
@@ -278,7 +278,7 @@ impl SvmTestEnvironment<'_> {
     pub fn is_program_blocked(&self, program_id: &Pubkey) -> bool {
         let (_, program_cache_entry) = self
             .batch_processor
-            .program_cache
+            .global_program_cache
             .read()
             .unwrap()
             .get_flattened_entries_for_tests()
@@ -2628,6 +2628,258 @@ fn program_cache_loaderv3_buffer_swap(invoke_changed_program: bool) {
     assert!(env.is_program_blocked(&target));
 }
 
+#[test]
+fn program_cache_stats() {
+    let mut test_entry = SvmTestEntry::default();
+
+    let program_name = "hello-solana";
+    let noop_program = program_address(program_name);
+
+    let fee_payer_keypair = Keypair::new();
+    let fee_payer = fee_payer_keypair.pubkey();
+
+    let mut fee_payer_data = AccountSharedData::default();
+    fee_payer_data.set_lamports(LAMPORTS_PER_SOL * 100);
+    test_entry.add_initial_account(fee_payer, &fee_payer_data);
+
+    test_entry
+        .initial_programs
+        .push((program_name.to_string(), DEPLOYMENT_SLOT, Some(fee_payer)));
+
+    let missing_program = Pubkey::new_unique();
+
+    // set up a future upgrade after the first batch
+    let buffer_address = Pubkey::new_unique();
+    {
+        let mut data = bincode::serialize(&UpgradeableLoaderState::Buffer {
+            authority_address: Some(fee_payer),
+        })
+        .unwrap();
+        let mut program_bytecode = load_program(program_name.to_string());
+        data.append(&mut program_bytecode);
+
+        let buffer_account = AccountSharedData::create(
+            LAMPORTS_PER_SOL,
+            data,
+            bpf_loader_upgradeable::id(),
+            true,
+            u64::MAX,
+        );
+
+        test_entry.add_initial_account(buffer_address, &buffer_account);
+    }
+
+    let make_transaction = |instructions: &[Instruction]| {
+        Transaction::new_signed_with_payer(
+            instructions,
+            Some(&fee_payer),
+            &[&fee_payer_keypair],
+            Hash::default(),
+        )
+    };
+
+    let succesful_noop_instruction = Instruction::new_with_bytes(noop_program, &[], vec![]);
+    let succesful_transfer_instruction =
+        system_instruction::transfer(&fee_payer, &Pubkey::new_unique(), LAMPORTS_PER_SOL);
+    let failing_transfer_instruction =
+        system_instruction::transfer(&fee_payer, &Pubkey::new_unique(), LAMPORTS_PER_SOL * 1000);
+    let fee_only_noop_instruction = Instruction::new_with_bytes(missing_program, &[], vec![]);
+
+    let mut noop_tx_usage = 0;
+    let mut system_tx_usage = 0;
+    let mut successful_transfers = 0;
+
+    test_entry.push_transaction(make_transaction(&[succesful_noop_instruction.clone()]));
+    noop_tx_usage += 1;
+
+    test_entry.push_transaction(make_transaction(&[succesful_transfer_instruction.clone()]));
+    system_tx_usage += 1;
+    successful_transfers += 1;
+
+    test_entry.push_transaction_with_status(
+        make_transaction(&[failing_transfer_instruction.clone()]),
+        ExecutionStatus::ExecutedFailed,
+    );
+    system_tx_usage += 1;
+
+    test_entry.push_transaction(make_transaction(&[
+        succesful_noop_instruction.clone(),
+        succesful_noop_instruction.clone(),
+        succesful_transfer_instruction.clone(),
+        succesful_transfer_instruction.clone(),
+        succesful_noop_instruction.clone(),
+    ]));
+    noop_tx_usage += 1;
+    system_tx_usage += 1;
+    successful_transfers += 2;
+
+    test_entry.push_transaction_with_status(
+        make_transaction(&[
+            failing_transfer_instruction.clone(),
+            succesful_noop_instruction.clone(),
+            succesful_transfer_instruction.clone(),
+        ]),
+        ExecutionStatus::ExecutedFailed,
+    );
+    noop_tx_usage += 1;
+    system_tx_usage += 1;
+
+    // load failure/fee-only does not touch the program cache
+    test_entry.push_transaction_with_status(
+        make_transaction(&[
+            succesful_noop_instruction.clone(),
+            fee_only_noop_instruction.clone(),
+        ]),
+        ExecutionStatus::ProcessedFailed,
+    );
+
+    test_entry.decrease_expected_lamports(
+        &fee_payer,
+        LAMPORTS_PER_SIGNATURE * test_entry.transaction_batch.len() as u64
+            + LAMPORTS_PER_SOL * successful_transfers,
+    );
+
+    // nor does discard
+    test_entry.transaction_batch.push(TransactionBatchItem {
+        transaction: make_transaction(&[succesful_transfer_instruction.clone()]),
+        check_result: Err(TransactionError::BlockhashNotFound),
+        asserts: ExecutionStatus::Discarded.into(),
+    });
+
+    let mut env = SvmTestEnvironment::create(test_entry);
+    env.execute();
+
+    // check all usage stats are as we expect
+    let global_program_cache = env
+        .batch_processor
+        .global_program_cache
+        .read()
+        .unwrap()
+        .get_flattened_entries_for_tests()
+        .into_iter()
+        .rev()
+        .collect::<Vec<_>>();
+
+    let (_, noop_entry) = global_program_cache
+        .iter()
+        .find(|(pubkey, _)| *pubkey == noop_program)
+        .unwrap();
+
+    assert_eq!(
+        noop_entry.tx_usage_counter.load(Ordering::Relaxed),
+        noop_tx_usage,
+        "noop_tx_usage matches"
+    );
+
+    let (_, system_entry) = global_program_cache
+        .iter()
+        .find(|(pubkey, _)| *pubkey == system_program::id())
+        .unwrap();
+
+    assert_eq!(
+        system_entry.tx_usage_counter.load(Ordering::Relaxed),
+        system_tx_usage,
+        "system_tx_usage matches"
+    );
+
+    assert!(
+        !global_program_cache
+            .iter()
+            .any(|(pubkey, _)| *pubkey == missing_program),
+        "missing_program is missing"
+    );
+
+    // set up the second batch
+    let mut test_entry = SvmTestEntry {
+        initial_accounts: env.test_entry.final_accounts.clone(),
+        final_accounts: env.test_entry.final_accounts.clone(),
+        ..SvmTestEntry::default()
+    };
+
+    // upgrade the program. this blocks execution but does not create a tombstone
+    // the main thing we are testing is the tx counter is ported across upgrades
+    //
+    // note the upgrade transaction actually counts as a usage, per the existing rules
+    // the program cache must load the program because it has no idea if it will be used for cpi
+    test_entry.push_transaction(Transaction::new_signed_with_payer(
+        &[loaderv3_instruction::upgrade(
+            &noop_program,
+            &buffer_address,
+            &fee_payer,
+            &Pubkey::new_unique(),
+        )],
+        Some(&fee_payer),
+        &[&fee_payer_keypair],
+        Hash::default(),
+    ));
+    noop_tx_usage += 1;
+
+    test_entry.drop_expected_account(buffer_address);
+
+    test_entry.push_transaction_with_status(
+        make_transaction(&[succesful_noop_instruction.clone()]),
+        ExecutionStatus::ExecutedFailed,
+    );
+    noop_tx_usage += 1;
+
+    test_entry.decrease_expected_lamports(&fee_payer, LAMPORTS_PER_SIGNATURE * 2);
+
+    env.test_entry = test_entry;
+    env.execute();
+
+    let (_, noop_entry) = env
+        .batch_processor
+        .global_program_cache
+        .read()
+        .unwrap()
+        .get_flattened_entries_for_tests()
+        .into_iter()
+        .rev()
+        .find(|(pubkey, _)| *pubkey == noop_program)
+        .unwrap();
+
+    assert_eq!(
+        noop_entry.tx_usage_counter.load(Ordering::Relaxed),
+        noop_tx_usage,
+        "noop_tx_usage matches"
+    );
+
+    // third batch, this creates a delayed visibility tombstone
+    let mut test_entry = SvmTestEntry {
+        initial_accounts: env.test_entry.final_accounts.clone(),
+        final_accounts: env.test_entry.final_accounts.clone(),
+        ..SvmTestEntry::default()
+    };
+
+    test_entry.push_transaction_with_status(
+        make_transaction(&[succesful_noop_instruction.clone()]),
+        ExecutionStatus::ExecutedFailed,
+    );
+    noop_tx_usage += 1;
+
+    test_entry.decrease_expected_lamports(&fee_payer, LAMPORTS_PER_SIGNATURE);
+
+    env.test_entry = test_entry;
+    env.execute();
+
+    let (_, noop_entry) = env
+        .batch_processor
+        .global_program_cache
+        .read()
+        .unwrap()
+        .get_flattened_entries_for_tests()
+        .into_iter()
+        .rev()
+        .find(|(pubkey, _)| *pubkey == noop_program)
+        .unwrap();
+
+    assert_eq!(
+        noop_entry.tx_usage_counter.load(Ordering::Relaxed),
+        noop_tx_usage,
+        "noop_tx_usage matches"
+    );
+}
+
 #[derive(Clone, PartialEq, Eq)]
 enum Inspect<'a> {
     LiveRead(&'a AccountSharedData),