Jelajahi Sumber

delet gpu code (#8967)

delet gpu code, PinnedVec
cavemanloverboy 4 hari lalu
induk
melakukan
4f08cb9585
40 mengubah file dengan 463 tambahan dan 2147 penghapusan
  1. 2 2
      bench-streamer/src/main.rs
  2. 7 4
      core/src/banking_stage.rs
  3. 2 2
      core/src/banking_stage/transaction_scheduler/receive_and_buffer.rs
  4. 1 1
      core/src/banking_trace.rs
  5. 1 1
      core/src/cluster_info_vote_listener.rs
  6. 3 3
      core/src/forwarding_stage.rs
  7. 1 2
      core/src/lib.rs
  8. 4 4
      core/src/repair/repair_handler.rs
  9. 4 4
      core/src/repair/serve_repair.rs
  10. 2 3
      core/src/repair/standard_repair_handler.rs
  11. 0 16
      core/src/replay_stage.rs
  12. 4 17
      core/src/sigverify.rs
  13. 3 4
      core/src/sigverify_stage.rs
  14. 0 9
      core/src/validator.rs
  15. 1 55
      entry/benches/entry_sigverify.rs
  16. 76 473
      entry/src/entry.rs
  17. 7 12
      gossip/src/cluster_info.rs
  18. 0 1
      gossip/src/protocol.rs
  19. 8 52
      ledger/src/blockstore_processor.rs
  20. 1 4
      ledger/src/shred.rs
  21. 2 1
      ledger/src/shred/wire.rs
  22. 29 485
      ledger/src/sigverify_shreds.rs
  23. 1 1
      local-cluster/src/cluster_tests.rs
  24. 8 24
      perf/benches/sigverify.rs
  25. 0 310
      perf/src/cuda_runtime.rs
  26. 1 10
      perf/src/lib.rs
  27. 44 68
      perf/src/packet.rs
  28. 1 161
      perf/src/perf_libs.rs
  29. 184 0
      perf/src/recycled_vec.rs
  30. 1 1
      perf/src/recycler.rs
  31. 3 3
      perf/src/recycler_cache.rs
  32. 29 327
      perf/src/sigverify.rs
  33. 5 31
      poh-bench/src/main.rs
  34. 2 2
      poh/benches/poh_verify.rs
  35. 2 11
      runtime/src/bank/check_transactions.rs
  36. 13 13
      streamer/src/packet.rs
  37. 7 7
      streamer/src/streamer.rs
  38. 4 11
      turbine/src/sigverify_shreds.rs
  39. 0 7
      validator/src/commands/run/execute.rs
  40. 0 5
      wen-restart/src/wen_restart.rs

+ 2 - 2
bench-streamer/src/main.rs

@@ -8,7 +8,7 @@ use {
         sockets::{multi_bind_in_range_with_config, SocketConfiguration},
     },
     solana_streamer::{
-        packet::{Packet, PacketBatchRecycler, PinnedPacketBatch, PACKET_DATA_SIZE},
+        packet::{Packet, PacketBatchRecycler, RecycledPacketBatch, PACKET_DATA_SIZE},
         sendmmsg::batch_send,
         streamer::{receiver, PacketBatchReceiver, StreamerReceiveStats},
     },
@@ -39,7 +39,7 @@ fn producer(dest_addr: &SocketAddr, exit: Arc<AtomicBool>) -> JoinHandle<usize>
         packet.meta_mut().set_socket_addr(dest_addr);
         packet
     };
-    let mut packet_batch = PinnedPacketBatch::with_capacity(batch_size);
+    let mut packet_batch = RecycledPacketBatch::with_capacity(batch_size);
     packet_batch.resize(batch_size, packet);
 
     spawn(move || {

+ 7 - 4
core/src/banking_stage.rs

@@ -1,6 +1,5 @@
 //! The `banking_stage` processes Transaction messages. It is intended to be used
-//! to construct a software pipeline. The stage uses all available CPU cores and
-//! can do its processing in parallel with signature verification on the GPU.
+//! to construct a software pipeline.
 
 #[cfg(feature = "dev-context-only-utils")]
 use qualifier_attr::qualifiers;
@@ -1039,7 +1038,9 @@ mod tests {
             .collect();
         trace!("done");
         assert_eq!(entries.len(), genesis_config.ticks_per_slot as usize);
-        assert!(entries.verify(&start_hash, &entry::thread_pool_for_tests()));
+        assert!(entries
+            .verify(&start_hash, &entry::thread_pool_for_tests())
+            .status());
         assert_eq!(entries[entries.len() - 1].hash, bank.last_blockhash());
     }
 
@@ -1157,7 +1158,9 @@ mod tests {
                 .map(|(_bank, (entry, _tick_height))| entry),
         );
 
-        assert!(entries.verify(&blockhash, &entry::thread_pool_for_tests()));
+        assert!(entries
+            .verify(&blockhash, &entry::thread_pool_for_tests())
+            .status());
         for entry in entries {
             bank.process_entry_transactions(entry.transactions)
                 .iter()

+ 2 - 2
core/src/banking_stage/transaction_scheduler/receive_and_buffer.rs

@@ -583,7 +583,7 @@ mod tests {
             v0, AccountMeta, AddressLookupTableAccount, Instruction, VersionedMessage,
         },
         solana_packet::{Meta, PACKET_DATA_SIZE},
-        solana_perf::packet::{to_packet_batches, Packet, PacketBatch, PinnedPacketBatch},
+        solana_perf::packet::{to_packet_batches, Packet, PacketBatch, RecycledPacketBatch},
         solana_pubkey::Pubkey,
         solana_signer::Signer,
         solana_system_interface::instruction as system_instruction,
@@ -785,7 +785,7 @@ mod tests {
         let (mut receive_and_buffer, mut container) =
             setup_transaction_view_receive_and_buffer(receiver, bank_forks.clone());
 
-        let packet_batches = Arc::new(vec![PacketBatch::from(PinnedPacketBatch::new(vec![
+        let packet_batches = Arc::new(vec![PacketBatch::from(RecycledPacketBatch::new(vec![
             Packet::new([1u8; PACKET_DATA_SIZE], Meta::default()),
         ]))]);
         sender.send(packet_batches).unwrap();

+ 1 - 1
core/src/banking_trace.rs

@@ -64,7 +64,7 @@ pub struct BankingTracer {
 #[cfg_attr(
     feature = "frozen-abi",
     derive(AbiExample),
-    frozen_abi(digest = "9njKW2EBmvkBGCHPysxFxZzg1cXgmTxAUYZZiiqFhVHr")
+    frozen_abi(digest = "dJWSTAdP7tkT5KWT8xfSWXYg3MVaGp846y9j871Xov2")
 )]
 #[derive(Serialize, Deserialize, Debug)]
 pub struct TimedTracedEvent(pub std::time::SystemTime, pub TracedEvent);

+ 1 - 1
core/src/cluster_info_vote_listener.rs

@@ -281,7 +281,7 @@ impl ClusterInfoVoteListener {
         let mut packet_batches = packet::to_packet_batches(&votes, 1);
 
         // Votes should already be filtered by this point.
-        sigverify::ed25519_verify_cpu(
+        sigverify::ed25519_verify(
             &mut packet_batches,
             /*reject_non_vote=*/ false,
             votes.len(),

+ 3 - 3
core/src/forwarding_stage.rs

@@ -850,7 +850,7 @@ mod tests {
         packet::PacketFlags,
         solana_hash::Hash,
         solana_keypair::Keypair,
-        solana_perf::packet::{Packet, PacketBatch, PinnedPacketBatch},
+        solana_perf::packet::{Packet, PacketBatch, RecycledPacketBatch},
         solana_pubkey::Pubkey,
         solana_runtime::genesis_utils::create_genesis_config,
         solana_system_transaction as system_transaction,
@@ -942,13 +942,13 @@ mod tests {
 
         // Send packet batches.
         let non_vote_packets =
-            BankingPacketBatch::new(vec![PacketBatch::from(PinnedPacketBatch::new(vec![
+            BankingPacketBatch::new(vec![PacketBatch::from(RecycledPacketBatch::new(vec![
                 simple_transfer_with_flags(PacketFlags::FROM_STAKED_NODE),
                 simple_transfer_with_flags(PacketFlags::FROM_STAKED_NODE | PacketFlags::DISCARD),
                 simple_transfer_with_flags(PacketFlags::FROM_STAKED_NODE | PacketFlags::FORWARDED),
             ]))]);
         let vote_packets =
-            BankingPacketBatch::new(vec![PacketBatch::from(PinnedPacketBatch::new(vec![
+            BankingPacketBatch::new(vec![PacketBatch::from(RecycledPacketBatch::new(vec![
                 simple_transfer_with_flags(
                     PacketFlags::SIMPLE_VOTE_TX | PacketFlags::FROM_STAKED_NODE,
                 ),

+ 1 - 2
core/src/lib.rs

@@ -12,8 +12,7 @@
 #![recursion_limit = "2048"]
 //! The `solana` library implements the Solana high-performance blockchain architecture.
 //! It includes a full Rust implementation of the architecture (see
-//! [Validator](validator/struct.Validator.html)) as well as hooks to GPU implementations of its most
-//! paralellizable components (i.e. [SigVerify](sigverify/index.html)).  It also includes
+//! [Validator](validator/struct.Validator.html))).  It also includes
 //! command-line tools to spin up validators and a Rust library
 //!
 

+ 4 - 4
core/src/repair/repair_handler.rs

@@ -16,7 +16,7 @@ use {
         blockstore::Blockstore,
         shred::Nonce,
     },
-    solana_perf::packet::{Packet, PacketBatch, PacketBatchRecycler, PinnedPacketBatch},
+    solana_perf::packet::{Packet, PacketBatch, PacketBatchRecycler, RecycledPacketBatch},
     solana_pubkey::Pubkey,
     solana_runtime::bank_forks::SharableBanks,
     std::{
@@ -48,7 +48,7 @@ pub trait RepairHandler {
         // Try to find the requested index in one of the slots
         let packet = self.repair_response_packet(slot, shred_index, from_addr, nonce)?;
         Some(
-            PinnedPacketBatch::new_unpinned_with_recycler_data(
+            RecycledPacketBatch::new_with_recycler_data(
                 recycler,
                 "run_window_request",
                 vec![packet],
@@ -71,7 +71,7 @@ pub trait RepairHandler {
             // meta.received must be at least 1 by this point
             let packet = self.repair_response_packet(slot, meta.received - 1, from_addr, nonce)?;
             return Some(
-                PinnedPacketBatch::new_unpinned_with_recycler_data(
+                RecycledPacketBatch::new_with_recycler_data(
                     recycler,
                     "run_highest_window_request",
                     vec![packet],
@@ -119,7 +119,7 @@ pub trait RepairHandler {
             nonce,
         )?;
         Some(
-            PinnedPacketBatch::new_unpinned_with_recycler_data(
+            RecycledPacketBatch::new_with_recycler_data(
                 recycler,
                 "run_ancestor_hashes",
                 vec![packet],

+ 4 - 4
core/src/repair/serve_repair.rs

@@ -40,7 +40,7 @@ use {
     solana_packet::PACKET_DATA_SIZE,
     solana_perf::{
         data_budget::DataBudget,
-        packet::{Packet, PacketBatch, PacketBatchRecycler, PinnedPacketBatch},
+        packet::{Packet, PacketBatch, PacketBatchRecycler, RecycledPacketBatch},
     },
     solana_pubkey::{Pubkey, PUBKEY_BYTES},
     solana_runtime::bank_forks::SharableBanks,
@@ -1052,7 +1052,7 @@ impl ServeRepair {
 
         if !pending_pings.is_empty() {
             stats.pings_sent += pending_pings.len();
-            let batch = PinnedPacketBatch::new(pending_pings);
+            let batch = RecycledPacketBatch::new(pending_pings);
             let _ = packet_batch_sender.send(batch.into());
         }
     }
@@ -2039,7 +2039,7 @@ mod tests {
                 )
             })
             .collect();
-        let expected = PacketBatch::Pinned(PinnedPacketBatch::new(expected));
+        let expected = PacketBatch::Pinned(RecycledPacketBatch::new(expected));
         assert_eq!(rv, expected);
     }
 
@@ -2081,7 +2081,7 @@ mod tests {
             .expect("run_orphan packets");
 
         // Verify responses
-        let expected = PinnedPacketBatch::new(vec![repair_response::repair_response_packet(
+        let expected = RecycledPacketBatch::new(vec![repair_response::repair_response_packet(
             &blockstore,
             2,
             31, // shred_index

+ 2 - 3
core/src/repair/standard_repair_handler.rs

@@ -2,7 +2,7 @@ use {
     super::{repair_handler::RepairHandler, repair_response},
     solana_clock::Slot,
     solana_ledger::{blockstore::Blockstore, shred::Nonce},
-    solana_perf::packet::{Packet, PacketBatch, PacketBatchRecycler, PinnedPacketBatch},
+    solana_perf::packet::{Packet, PacketBatch, PacketBatchRecycler, RecycledPacketBatch},
     std::{net::SocketAddr, sync::Arc},
 };
 
@@ -45,8 +45,7 @@ impl RepairHandler for StandardRepairHandler {
         max_responses: usize,
         nonce: Nonce,
     ) -> Option<PacketBatch> {
-        let mut res =
-            PinnedPacketBatch::new_unpinned_with_recycler(recycler, max_responses, "run_orphan");
+        let mut res = RecycledPacketBatch::new_with_recycler(recycler, max_responses, "run_orphan");
         // Try to find the next "n" parent slots of the input slot
         let packets = std::iter::successors(self.blockstore.meta(slot).ok()?, |meta| {
             self.blockstore.meta(meta.parent_slot?).ok()?

+ 0 - 16
core/src/replay_stage.rs

@@ -37,7 +37,6 @@ use {
     rayon::{prelude::*, ThreadPool},
     solana_accounts_db::contains::Contains,
     solana_clock::{BankId, Slot, NUM_CONSECUTIVE_LEADER_SLOTS},
-    solana_entry::entry::VerifyRecyclers,
     solana_geyser_plugin_manager::block_metadata_notifier_interface::BlockMetadataNotifierArc,
     solana_gossip::cluster_info::ClusterInfo,
     solana_hash::Hash,
@@ -650,7 +649,6 @@ impl ReplayStage {
             rpc_subscriptions.clone(),
         );
         let run_replay = move || {
-            let verify_recyclers = VerifyRecyclers::default();
             let _exit = Finalizer::new(exit.clone());
 
             let mut identity_keypair = cluster_info.keypair();
@@ -789,7 +787,6 @@ impl ReplayStage {
                     &mut progress,
                     transaction_status_sender.as_ref(),
                     entry_notification_sender.as_ref(),
-                    &verify_recyclers,
                     &replay_vote_sender,
                     &bank_notification_sender,
                     rpc_subscriptions.as_deref(),
@@ -2301,7 +2298,6 @@ impl ReplayStage {
         transaction_status_sender: Option<&TransactionStatusSender>,
         entry_notification_sender: Option<&EntryNotifierSender>,
         replay_vote_sender: &ReplayVoteSender,
-        verify_recyclers: &VerifyRecyclers,
         log_messages_bytes_limit: Option<usize>,
         prioritization_fee_cache: &PrioritizationFeeCache,
     ) -> result::Result<usize, BlockstoreProcessorError> {
@@ -2321,7 +2317,6 @@ impl ReplayStage {
             transaction_status_sender,
             entry_notification_sender,
             Some(replay_vote_sender),
-            verify_recyclers,
             false,
             log_messages_bytes_limit,
             prioritization_fee_cache,
@@ -2951,7 +2946,6 @@ impl ReplayStage {
         progress: &mut ProgressMap,
         transaction_status_sender: Option<&TransactionStatusSender>,
         entry_notification_sender: Option<&EntryNotifierSender>,
-        verify_recyclers: &VerifyRecyclers,
         replay_vote_sender: &ReplayVoteSender,
         replay_timing: &mut ReplayLoopTiming,
         log_messages_bytes_limit: Option<usize>,
@@ -3036,7 +3030,6 @@ impl ReplayStage {
                             transaction_status_sender,
                             entry_notification_sender,
                             &replay_vote_sender.clone(),
-                            &verify_recyclers.clone(),
                             log_messages_bytes_limit,
                             prioritization_fee_cache,
                         );
@@ -3066,7 +3059,6 @@ impl ReplayStage {
         progress: &mut ProgressMap,
         transaction_status_sender: Option<&TransactionStatusSender>,
         entry_notification_sender: Option<&EntryNotifierSender>,
-        verify_recyclers: &VerifyRecyclers,
         replay_vote_sender: &ReplayVoteSender,
         replay_timing: &mut ReplayLoopTiming,
         log_messages_bytes_limit: Option<usize>,
@@ -3125,7 +3117,6 @@ impl ReplayStage {
                     transaction_status_sender,
                     entry_notification_sender,
                     &replay_vote_sender.clone(),
-                    &verify_recyclers.clone(),
                     log_messages_bytes_limit,
                     prioritization_fee_cache,
                 );
@@ -3480,7 +3471,6 @@ impl ReplayStage {
         progress: &mut ProgressMap,
         transaction_status_sender: Option<&TransactionStatusSender>,
         entry_notification_sender: Option<&EntryNotifierSender>,
-        verify_recyclers: &VerifyRecyclers,
         replay_vote_sender: &ReplayVoteSender,
         bank_notification_sender: &Option<BankNotificationSenderConfig>,
         rpc_subscriptions: Option<&RpcSubscriptions>,
@@ -3522,7 +3512,6 @@ impl ReplayStage {
                     progress,
                     transaction_status_sender,
                     entry_notification_sender,
-                    verify_recyclers,
                     replay_vote_sender,
                     replay_timing,
                     log_messages_bytes_limit,
@@ -3542,7 +3531,6 @@ impl ReplayStage {
                         progress,
                         transaction_status_sender,
                         entry_notification_sender,
-                        verify_recyclers,
                         replay_vote_sender,
                         replay_timing,
                         log_messages_bytes_limit,
@@ -5195,7 +5183,6 @@ pub(crate) mod tests {
                 None,
                 None,
                 &replay_vote_sender,
-                &VerifyRecyclers::default(),
                 None,
                 &PrioritizationFeeCache::new(0u64),
             );
@@ -9315,7 +9302,6 @@ pub(crate) mod tests {
         // Set up bank0
         let bank_forks = BankForks::new_rw_arc(Bank::new_for_tests(&genesis_config));
         let bank0 = bank_forks.read().unwrap().get_with_scheduler(0).unwrap();
-        let recyclers = VerifyRecyclers::default();
         let replay_tx_thread_pool = rayon::ThreadPoolBuilder::new()
             .num_threads(1)
             .thread_name(|i| format!("solReplayTx{i:02}"))
@@ -9328,7 +9314,6 @@ pub(crate) mod tests {
             &replay_tx_thread_pool,
             &ProcessOptions::default(),
             None,
-            &recyclers,
             None,
         )
         .unwrap();
@@ -9349,7 +9334,6 @@ pub(crate) mod tests {
             &bank1,
             &replay_tx_thread_pool,
             &ProcessOptions::default(),
-            &recyclers,
             &mut ConfirmationProgress::new(bank0.last_blockhash()),
             None,
             None,

+ 4 - 17
core/src/sigverify.rs

@@ -1,11 +1,9 @@
 //! The `sigverify` module provides digital signature verification functions.
 //! By default, signatures are verified in parallel using all available CPU
-//! cores.  When perf-libs are available signature verification is offloaded
-//! to the GPU.
-//!
+//! cores.
 
 pub use solana_perf::sigverify::{
-    count_packets_in_batches, ed25519_verify_cpu, ed25519_verify_disabled, init, TxOffset,
+    count_packets_in_batches, ed25519_verify, ed25519_verify_disabled, TxOffset,
 };
 use {
     crate::{
@@ -14,14 +12,12 @@ use {
     },
     agave_banking_stage_ingress_types::BankingPacketBatch,
     crossbeam_channel::{Sender, TrySendError},
-    solana_perf::{cuda_runtime::PinnedVec, packet::PacketBatch, recycler::Recycler, sigverify},
+    solana_perf::{packet::PacketBatch, sigverify},
 };
 
 pub struct TransactionSigVerifier {
     banking_stage_sender: BankingPacketSender,
     forward_stage_sender: Option<Sender<(BankingPacketBatch, bool)>>,
-    recycler: Recycler<TxOffset>,
-    recycler_out: Recycler<PinnedVec<u8>>,
     reject_non_vote: bool,
 }
 
@@ -39,12 +35,9 @@ impl TransactionSigVerifier {
         banking_stage_sender: BankingPacketSender,
         forward_stage_sender: Option<Sender<(BankingPacketBatch, bool)>>,
     ) -> Self {
-        init();
         Self {
             banking_stage_sender,
             forward_stage_sender,
-            recycler: Recycler::warmed(50, 4096),
-            recycler_out: Recycler::warmed(50, 4096),
             reject_non_vote: false,
         }
     }
@@ -78,13 +71,7 @@ impl SigVerifier for TransactionSigVerifier {
         mut batches: Vec<PacketBatch>,
         valid_packets: usize,
     ) -> Vec<PacketBatch> {
-        sigverify::ed25519_verify(
-            &mut batches,
-            &self.recycler,
-            &self.recycler_out,
-            self.reject_non_vote,
-            valid_packets,
-        );
+        sigverify::ed25519_verify(&mut batches, self.reject_non_vote, valid_packets);
         batches
     }
 }

+ 3 - 4
core/src/sigverify_stage.rs

@@ -2,8 +2,7 @@
 //! receives a list of lists of packets and outputs the same list, but tags each
 //! top-level list with a list of booleans, telling the next stage whether the
 //! signature in that packet is valid. It assumes each packet contains one
-//! transaction. All processing is done on the CPU by default and on a GPU
-//! if perf-libs are available
+//! transaction. All processing is done on the CPU by default.
 
 use {
     crate::sigverify,
@@ -440,7 +439,7 @@ mod tests {
         crate::{banking_trace::BankingTracer, sigverify::TransactionSigVerifier},
         crossbeam_channel::unbounded,
         solana_perf::{
-            packet::{to_packet_batches, Packet, PinnedPacketBatch},
+            packet::{to_packet_batches, Packet, RecycledPacketBatch},
             test_tx::test_tx,
         },
     };
@@ -457,7 +456,7 @@ mod tests {
     fn test_packet_discard() {
         agave_logger::setup();
         let batch_size = 10;
-        let mut batch = PinnedPacketBatch::with_capacity(batch_size);
+        let mut batch = RecycledPacketBatch::with_capacity(batch_size);
         let packet = Packet::default();
         batch.resize(batch_size, packet);
         batch[3].meta_mut().addr = std::net::IpAddr::from([1u16; 8]);

+ 0 - 9
core/src/validator.rs

@@ -25,7 +25,6 @@ use {
             adjust_nofile_limit, validate_memlock_limit_for_disk_io, ResourceLimitError,
         },
         sample_performance_service::SamplePerformanceService,
-        sigverify,
         snapshot_packager_service::SnapshotPackagerService,
         stats_reporter_service::StatsReporterService,
         system_monitor_service::{
@@ -769,14 +768,6 @@ impl Validator {
             info!("entrypoint: {cluster_entrypoint:?}");
         }
 
-        if solana_perf::perf_libs::api().is_some() {
-            info!("Initializing sigverify, this could take a while...");
-        } else {
-            info!("Initializing sigverify...");
-        }
-        sigverify::init();
-        info!("Initializing sigverify done.");
-
         validate_memlock_limit_for_disk_io(config.accounts_db_config.memlock_budget_size)?;
 
         if !ledger_path.is_dir() {

+ 1 - 55
entry/benches/entry_sigverify.rs

@@ -2,7 +2,7 @@
 extern crate test;
 use {
     agave_reserved_account_keys::ReservedAccountKeys,
-    solana_entry::entry::{self, VerifyRecyclers},
+    solana_entry::entry,
     solana_hash::Hash,
     solana_message::SimpleAddressLoader,
     solana_perf::test_tx::test_tx,
@@ -10,66 +10,12 @@ use {
     solana_transaction::{
         sanitized::{MessageHash, SanitizedTransaction},
         versioned::VersionedTransaction,
-        TransactionVerificationMode,
     },
     solana_transaction_error::TransactionResult as Result,
     std::sync::Arc,
     test::Bencher,
 };
 
-#[bench]
-fn bench_gpusigverify(bencher: &mut Bencher) {
-    let thread_pool = entry::thread_pool_for_benches();
-    let entries = (0..131072)
-        .map(|_| {
-            let transaction = test_tx();
-            entry::next_entry_mut(&mut Hash::default(), 0, vec![transaction])
-        })
-        .collect::<Vec<_>>();
-
-    let verify_transaction = {
-        move |versioned_tx: VersionedTransaction,
-              verification_mode: TransactionVerificationMode|
-              -> Result<RuntimeTransaction<SanitizedTransaction>> {
-            let sanitized_tx = {
-                let message_hash =
-                    if verification_mode == TransactionVerificationMode::FullVerification {
-                        versioned_tx.verify_and_hash_message()?
-                    } else {
-                        versioned_tx.message.hash()
-                    };
-
-                RuntimeTransaction::try_create(
-                    versioned_tx,
-                    MessageHash::Precomputed(message_hash),
-                    None,
-                    SimpleAddressLoader::Disabled,
-                    &ReservedAccountKeys::empty_key_set(),
-                    true,
-                )
-            }?;
-
-            Ok(sanitized_tx)
-        }
-    };
-
-    let recycler = VerifyRecyclers::default();
-
-    bencher.iter(|| {
-        let res = entry::start_verify_transactions(
-            entries.clone(),
-            false,
-            &thread_pool,
-            recycler.clone(),
-            Arc::new(verify_transaction),
-        );
-
-        if let Ok(mut res) = res {
-            let _ans = res.finish_verify();
-        }
-    })
-}
-
 #[bench]
 fn bench_cpusigverify(bencher: &mut Bencher) {
     let thread_pool = entry::thread_pool_for_benches();

+ 76 - 473
entry/src/entry.rs

@@ -11,28 +11,16 @@ use {
     rayon::{prelude::*, ThreadPool},
     serde::{Deserialize, Serialize},
     solana_hash::Hash,
-    solana_measure::measure::Measure,
     solana_merkle_tree::MerkleTree,
-    solana_metrics::*,
-    solana_packet::Meta,
-    solana_perf::{
-        cuda_runtime::PinnedVec,
-        packet::{Packet, PacketBatch, PacketBatchRecycler, PinnedPacketBatch, PACKETS_PER_BATCH},
-        perf_libs,
-        recycler::Recycler,
-        sigverify,
-    },
     solana_runtime_transaction::transaction_with_meta::TransactionWithMeta,
     solana_transaction::{
         versioned::VersionedTransaction, Transaction, TransactionVerificationMode,
     },
-    solana_transaction_error::{TransactionError, TransactionResult as Result},
+    solana_transaction_error::TransactionResult as Result,
     std::{
-        cmp,
         ffi::OsStr,
         iter::repeat_with,
-        sync::{Arc, Mutex, Once, OnceLock},
-        thread::{self, JoinHandle},
+        sync::{Arc, Once, OnceLock},
         time::Instant,
     },
     wincode::{containers::Pod, SchemaRead, SchemaWrite},
@@ -250,146 +238,33 @@ pub fn next_hash(
     }
 }
 
-/// Last action required to verify an entry
-enum VerifyAction {
-    /// Mixin a hash before computing the last hash for a transaction entry
-    Mixin(Hash),
-    /// Compute one last hash for a tick entry
-    Tick,
-    /// No action needed (tick entry with no hashes)
-    None,
-}
-
-pub struct GpuVerificationData {
-    thread_h: Option<JoinHandle<u64>>,
-    hashes: Option<Arc<Mutex<PinnedVec<Hash>>>>,
-    verifications: Option<Vec<(VerifyAction, Hash)>>,
-}
-
-pub enum DeviceVerificationData {
-    Cpu(),
-    Gpu(GpuVerificationData),
-}
-
 pub struct EntryVerificationState {
-    verification_status: EntryVerificationStatus,
+    verification_status: bool,
     poh_duration_us: u64,
-    device_verification_data: DeviceVerificationData,
-}
-
-pub struct GpuSigVerificationData {
-    thread_h: Option<JoinHandle<(bool, u64)>>,
-}
-
-pub enum DeviceSigVerificationData {
-    Cpu(),
-    Gpu(GpuSigVerificationData),
 }
 
 pub struct EntrySigVerificationState<Tx: TransactionWithMeta> {
-    verification_status: EntryVerificationStatus,
+    verification_status: bool,
     entries: Option<Vec<EntryType<Tx>>>,
-    device_verification_data: DeviceSigVerificationData,
-    gpu_verify_duration_us: u64,
 }
 
 impl<Tx: TransactionWithMeta> EntrySigVerificationState<Tx> {
     pub fn entries(&mut self) -> Option<Vec<EntryType<Tx>>> {
         self.entries.take()
     }
-    pub fn finish_verify(&mut self) -> bool {
-        match &mut self.device_verification_data {
-            DeviceSigVerificationData::Gpu(verification_state) => {
-                let (verified, gpu_time_us) =
-                    verification_state.thread_h.take().unwrap().join().unwrap();
-                self.gpu_verify_duration_us = gpu_time_us;
-                self.verification_status = if verified {
-                    EntryVerificationStatus::Success
-                } else {
-                    EntryVerificationStatus::Failure
-                };
-                verified
-            }
-            DeviceSigVerificationData::Cpu() => {
-                self.verification_status == EntryVerificationStatus::Success
-            }
-        }
-    }
-    pub fn status(&self) -> EntryVerificationStatus {
+    pub fn status(&self) -> bool {
         self.verification_status
     }
-    pub fn gpu_verify_duration(&self) -> u64 {
-        self.gpu_verify_duration_us
-    }
-}
-
-#[derive(Default, Clone)]
-pub struct VerifyRecyclers {
-    hash_recycler: Recycler<PinnedVec<Hash>>,
-    tick_count_recycler: Recycler<PinnedVec<u64>>,
-    packet_recycler: PacketBatchRecycler,
-    out_recycler: Recycler<PinnedVec<u8>>,
-    tx_offset_recycler: Recycler<sigverify::TxOffset>,
-}
-
-#[derive(PartialEq, Eq, Clone, Copy, Debug)]
-pub enum EntryVerificationStatus {
-    Failure,
-    Success,
-    Pending,
 }
 
 impl EntryVerificationState {
-    pub fn status(&self) -> EntryVerificationStatus {
+    pub fn status(&self) -> bool {
         self.verification_status
     }
 
     pub fn poh_duration_us(&self) -> u64 {
         self.poh_duration_us
     }
-
-    pub fn finish_verify(&mut self, thread_pool: &ThreadPool) -> bool {
-        match &mut self.device_verification_data {
-            DeviceVerificationData::Gpu(verification_state) => {
-                let gpu_time_us = verification_state.thread_h.take().unwrap().join().unwrap();
-
-                let mut verify_check_time = Measure::start("verify_check");
-                let hashes = verification_state.hashes.take().unwrap();
-                let hashes = Arc::try_unwrap(hashes)
-                    .expect("unwrap Arc")
-                    .into_inner()
-                    .expect("into_inner");
-                let res = thread_pool.install(|| {
-                    hashes
-                        .into_par_iter()
-                        .cloned()
-                        .zip(verification_state.verifications.take().unwrap())
-                        .all(|(hash, (action, expected))| {
-                            let actual = match action {
-                                VerifyAction::Mixin(mixin) => {
-                                    Poh::new(hash, None).record(mixin).unwrap().hash
-                                }
-                                VerifyAction::Tick => Poh::new(hash, None).tick().unwrap().hash,
-                                VerifyAction::None => hash,
-                            };
-                            actual == expected
-                        })
-                });
-                verify_check_time.stop();
-                self.poh_duration_us += gpu_time_us + verify_check_time.as_us();
-
-                self.verification_status = if res {
-                    EntryVerificationStatus::Success
-                } else {
-                    EntryVerificationStatus::Failure
-                };
-                res
-            }
-            DeviceVerificationData::Cpu() => {
-                self.verification_status == EntryVerificationStatus::Success
-            }
-        }
-    }
 }
 
 pub fn verify_transactions<Tx: TransactionWithMeta + Send + Sync>(
@@ -421,37 +296,11 @@ pub fn start_verify_transactions<Tx: TransactionWithMeta + Send + Sync + 'static
     entries: Vec<Entry>,
     skip_verification: bool,
     thread_pool: &ThreadPool,
-    verify_recyclers: VerifyRecyclers,
     verify: Arc<
         dyn Fn(VersionedTransaction, TransactionVerificationMode) -> Result<Tx> + Send + Sync,
     >,
 ) -> Result<EntrySigVerificationState<Tx>> {
-    let api = perf_libs::api();
-
-    // Use the CPU if we have too few transactions for GPU signature verification to be worth it.
-    // We will also use the CPU if no acceleration API is used or if we're skipping
-    // the signature verification as we'd have nothing to do on the GPU in that case.
-    // TODO: make the CPU-to GPU crossover point dynamic, perhaps based on similar future
-    // heuristics to what might be used in sigverify::ed25519_verify when a dynamic crossover
-    // is introduced for that function (see TODO in sigverify::ed25519_verify)
-    let use_cpu = skip_verification
-        || api.is_none()
-        || entries
-            .iter()
-            .try_fold(0, |accum: usize, entry: &Entry| -> Option<usize> {
-                if accum.saturating_add(entry.transactions.len()) < 512 {
-                    Some(accum.saturating_add(entry.transactions.len()))
-                } else {
-                    None
-                }
-            })
-            .is_some();
-
-    if use_cpu {
-        start_verify_transactions_cpu(entries, skip_verification, thread_pool, verify)
-    } else {
-        start_verify_transactions_gpu(entries, verify_recyclers, thread_pool, verify)
-    }
+    start_verify_transactions_cpu(entries, skip_verification, thread_pool, verify)
 }
 
 fn start_verify_transactions_cpu<Tx: TransactionWithMeta + Send + Sync + 'static>(
@@ -475,115 +324,8 @@ fn start_verify_transactions_cpu<Tx: TransactionWithMeta + Send + Sync + 'static
     let entries = verify_transactions(entries, thread_pool, Arc::new(verify_func))?;
 
     Ok(EntrySigVerificationState {
-        verification_status: EntryVerificationStatus::Success,
-        entries: Some(entries),
-        device_verification_data: DeviceSigVerificationData::Cpu(),
-        gpu_verify_duration_us: 0,
-    })
-}
-
-fn start_verify_transactions_gpu<Tx: TransactionWithMeta + Send + Sync + 'static>(
-    entries: Vec<Entry>,
-    verify_recyclers: VerifyRecyclers,
-    thread_pool: &ThreadPool,
-    verify: Arc<
-        dyn Fn(VersionedTransaction, TransactionVerificationMode) -> Result<Tx> + Send + Sync,
-    >,
-) -> Result<EntrySigVerificationState<Tx>> {
-    let verify_func = {
-        move |versioned_tx: VersionedTransaction| -> Result<Tx> {
-            verify(versioned_tx, TransactionVerificationMode::HashOnly)
-        }
-    };
-
-    let entries = verify_transactions(entries, thread_pool, Arc::new(verify_func))?;
-
-    let transactions = entries
-        .iter()
-        .filter_map(|entry_type| match entry_type {
-            EntryType::Tick(_) => None,
-            EntryType::Transactions(transactions) => Some(transactions),
-        })
-        .flatten()
-        .collect::<Vec<_>>();
-
-    if transactions.is_empty() {
-        return Ok(EntrySigVerificationState {
-            verification_status: EntryVerificationStatus::Success,
-            entries: Some(entries),
-            device_verification_data: DeviceSigVerificationData::Cpu(),
-            gpu_verify_duration_us: 0,
-        });
-    }
-
-    let packet_batches = thread_pool.install(|| {
-        transactions
-            .par_chunks(PACKETS_PER_BATCH)
-            .map(|transaction_chunk| {
-                let num_transactions = transaction_chunk.len();
-                let mut packet_batch = PinnedPacketBatch::new_with_recycler(
-                    &verify_recyclers.packet_recycler,
-                    num_transactions,
-                    "entry-sig-verify",
-                );
-                // We use set_len here instead of resize(num_txs, Packet::default()), to save
-                // memory bandwidth and avoid writing a large amount of data that will be overwritten
-                // soon afterwards. As well, Packet::default() actually leaves the packet data
-                // uninitialized, so the initialization would simply write junk into
-                // the vector anyway.
-                unsafe {
-                    packet_batch.set_len(num_transactions);
-                }
-                let transaction_iter = transaction_chunk
-                    .iter()
-                    .map(|tx| tx.to_versioned_transaction());
-
-                let res = packet_batch
-                    .iter_mut()
-                    .zip(transaction_iter)
-                    .all(|(packet, tx)| {
-                        *packet.meta_mut() = Meta::default();
-                        Packet::populate_packet(packet, None, &tx).is_ok()
-                    });
-                if res {
-                    Ok(PacketBatch::from(packet_batch))
-                } else {
-                    Err(TransactionError::SanitizeFailure)
-                }
-            })
-            .collect::<Result<Vec<_>>>()
-    });
-    let mut packet_batches = packet_batches?;
-
-    let tx_offset_recycler = verify_recyclers.tx_offset_recycler;
-    let out_recycler = verify_recyclers.out_recycler;
-    let num_packets = transactions.len();
-    let gpu_verify_thread = thread::Builder::new()
-        .name("solGpuSigVerify".into())
-        .spawn(move || {
-            let mut verify_time = Measure::start("sigverify");
-            sigverify::ed25519_verify(
-                &mut packet_batches,
-                &tx_offset_recycler,
-                &out_recycler,
-                false,
-                num_packets,
-            );
-            let verified = packet_batches
-                .iter()
-                .all(|batch| batch.iter().all(|p| !p.meta().discard()));
-            verify_time.stop();
-            (verified, verify_time.as_us())
-        })
-        .unwrap();
-
-    Ok(EntrySigVerificationState {
-        verification_status: EntryVerificationStatus::Pending,
+        verification_status: true,
         entries: Some(entries),
-        device_verification_data: DeviceSigVerificationData::Gpu(GpuSigVerificationData {
-            thread_h: Some(gpu_verify_thread),
-        }),
-        gpu_verify_duration_us: 0,
     })
 }
 
@@ -616,13 +358,7 @@ pub trait EntrySlice {
         simd_len: usize,
         thread_pool: &ThreadPool,
     ) -> EntryVerificationState;
-    fn start_verify(
-        &self,
-        start_hash: &Hash,
-        thread_pool: &ThreadPool,
-        recyclers: VerifyRecyclers,
-    ) -> EntryVerificationState;
-    fn verify(&self, start_hash: &Hash, thread_pool: &ThreadPool) -> bool;
+    fn verify(&self, start_hash: &Hash, thread_pool: &ThreadPool) -> EntryVerificationState;
     /// Checks that each entry tick has the correct number of hashes. Entry slices do not
     /// necessarily end in a tick, so `tick_hash_count` is used to carry over the hash count
     /// for the next entry slice.
@@ -632,9 +368,8 @@ pub trait EntrySlice {
 }
 
 impl EntrySlice for [Entry] {
-    fn verify(&self, start_hash: &Hash, thread_pool: &ThreadPool) -> bool {
-        self.start_verify(start_hash, thread_pool, VerifyRecyclers::default())
-            .finish_verify(thread_pool)
+    fn verify(&self, start_hash: &Hash, thread_pool: &ThreadPool) -> EntryVerificationState {
+        self.verify_cpu(start_hash, thread_pool)
     }
 
     fn verify_cpu_generic(
@@ -665,13 +400,8 @@ impl EntrySlice for [Entry] {
         });
         let poh_duration_us = now.elapsed().as_micros() as u64;
         EntryVerificationState {
-            verification_status: if res {
-                EntryVerificationStatus::Success
-            } else {
-                EntryVerificationStatus::Failure
-            },
+            verification_status: res,
             poh_duration_us,
-            device_verification_data: DeviceVerificationData::Cpu(),
         }
     }
 
@@ -753,13 +483,8 @@ impl EntrySlice for [Entry] {
         });
         let poh_duration_us = now.elapsed().as_micros() as u64;
         EntryVerificationState {
-            verification_status: if res {
-                EntryVerificationStatus::Success
-            } else {
-                EntryVerificationStatus::Failure
-            },
+            verification_status: res,
             poh_duration_us,
-            device_verification_data: DeviceVerificationData::Cpu(),
         }
     }
 
@@ -785,100 +510,6 @@ impl EntrySlice for [Entry] {
         }
     }
 
-    fn start_verify(
-        &self,
-        start_hash: &Hash,
-        thread_pool: &ThreadPool,
-        recyclers: VerifyRecyclers,
-    ) -> EntryVerificationState {
-        let start = Instant::now();
-        let Some(api) = perf_libs::api() else {
-            return self.verify_cpu(start_hash, thread_pool);
-        };
-        inc_new_counter_info!("entry_verify-num_entries", self.len());
-
-        let genesis = [Entry {
-            num_hashes: 0,
-            hash: *start_hash,
-            transactions: vec![],
-        }];
-
-        let hashes: Vec<Hash> = genesis
-            .iter()
-            .chain(self)
-            .map(|entry| entry.hash)
-            .take(self.len())
-            .collect();
-
-        let mut hashes_pinned = recyclers.hash_recycler.allocate("poh_verify_hash");
-        hashes_pinned.set_pinnable();
-        hashes_pinned.resize(hashes.len(), Hash::default());
-        hashes_pinned.copy_from_slice(&hashes);
-
-        let mut num_hashes_vec = recyclers
-            .tick_count_recycler
-            .allocate("poh_verify_num_hashes");
-        num_hashes_vec.reserve_and_pin(cmp::max(1, self.len()));
-        for entry in self {
-            num_hashes_vec.push(entry.num_hashes.saturating_sub(1));
-        }
-
-        let length = self.len();
-        let hashes = Arc::new(Mutex::new(hashes_pinned));
-        let hashes_clone = hashes.clone();
-
-        let gpu_verify_thread = thread::Builder::new()
-            .name("solGpuPohVerify".into())
-            .spawn(move || {
-                let mut hashes = hashes_clone.lock().unwrap();
-                let gpu_wait = Instant::now();
-                let res;
-                unsafe {
-                    res = (api.poh_verify_many)(
-                        hashes.as_mut_ptr() as *mut u8,
-                        num_hashes_vec.as_ptr(),
-                        length,
-                        1,
-                    );
-                }
-                assert!(res == 0, "GPU PoH verify many failed");
-                inc_new_counter_info!(
-                    "entry_verify-gpu_thread",
-                    gpu_wait.elapsed().as_micros() as usize
-                );
-                gpu_wait.elapsed().as_micros() as u64
-            })
-            .unwrap();
-
-        let verifications = thread_pool.install(|| {
-            self.into_par_iter()
-                .map(|entry| {
-                    let answer = entry.hash;
-                    let action = if entry.transactions.is_empty() {
-                        if entry.num_hashes == 0 {
-                            VerifyAction::None
-                        } else {
-                            VerifyAction::Tick
-                        }
-                    } else {
-                        VerifyAction::Mixin(hash_transactions(&entry.transactions))
-                    };
-                    (action, answer)
-                })
-                .collect()
-        });
-        let device_verification_data = DeviceVerificationData::Gpu(GpuVerificationData {
-            thread_h: Some(gpu_verify_thread),
-            verifications: Some(verifications),
-            hashes: Some(hashes),
-        });
-        EntryVerificationState {
-            verification_status: EntryVerificationStatus::Pending,
-            poh_duration_us: start.elapsed().as_micros() as u64,
-            device_verification_data,
-        }
-    }
-
     fn verify_tick_hash_count(&self, tick_hash_count: &mut u64, hashes_per_tick: u64) -> bool {
         // When hashes_per_tick is 0, hashing is disabled.
         if hashes_per_tick == 0 {
@@ -973,10 +604,12 @@ mod tests {
     use {
         super::*,
         agave_reserved_account_keys::ReservedAccountKeys,
+        rayon::ThreadPoolBuilder,
         solana_hash::Hash,
         solana_keypair::Keypair,
+        solana_measure::measure::Measure,
         solana_message::SimpleAddressLoader,
-        solana_perf::test_tx::{test_invalid_tx, test_tx},
+        solana_perf::test_tx::test_tx,
         solana_pubkey::Pubkey,
         solana_runtime_transaction::runtime_transaction::RuntimeTransaction,
         solana_sha256_hasher::hash,
@@ -1002,7 +635,6 @@ mod tests {
     fn test_verify_transactions<Tx: TransactionWithMeta + Send + Sync + 'static>(
         entries: Vec<Entry>,
         skip_verification: bool,
-        verify_recyclers: VerifyRecyclers,
         thread_pool: &ThreadPool,
         verify: Arc<
             dyn Fn(VersionedTransaction, TransactionVerificationMode) -> Result<Tx> + Send + Sync,
@@ -1022,57 +654,35 @@ mod tests {
 
         let cpu_verify_result =
             verify_transactions(entries.clone(), thread_pool, Arc::new(verify_func));
-        let mut gpu_verify_result: EntrySigVerificationState<Tx> = {
-            let verify_result = start_verify_transactions(
-                entries,
-                skip_verification,
-                thread_pool,
-                verify_recyclers,
-                verify,
-            );
-            match verify_result {
-                Ok(res) => res,
-                _ => EntrySigVerificationState {
-                    verification_status: EntryVerificationStatus::Failure,
-                    entries: None,
-                    device_verification_data: DeviceSigVerificationData::Cpu(),
-                    gpu_verify_duration_us: 0,
-                },
-            }
-        };
 
-        match cpu_verify_result {
-            Ok(_) => {
-                assert!(gpu_verify_result.verification_status != EntryVerificationStatus::Failure);
-                assert!(gpu_verify_result.finish_verify());
-                true
-            }
-            _ => {
-                assert!(
-                    gpu_verify_result.verification_status == EntryVerificationStatus::Failure
-                        || !gpu_verify_result.finish_verify()
-                );
-                false
-            }
-        }
+        cpu_verify_result.is_ok()
     }
 
     #[test]
-    fn test_entry_gpu_verify() {
-        let thread_pool = thread_pool_for_tests();
+    fn test_entry_transaction_verify() {
+        let zero = Hash::default();
+
+        // First, verify entries
+        let keypair = Keypair::new();
+        let tx0 = system_transaction::transfer(&keypair, &keypair.pubkey(), 0, zero);
+        let tx1 = system_transaction::transfer(&keypair, &keypair.pubkey(), 1, zero);
+        let e0 = Entry::new(&zero, 0, vec![tx0, tx1]);
+        assert!(e0.verify(&zero));
+        let tx2 = system_transaction::transfer(&keypair, &keypair.pubkey(), 2, zero);
+        let tx3 = system_transaction::transfer(&keypair, &keypair.pubkey(), 3, zero);
+        let e1 = Entry::new(&zero, 0, vec![tx2, tx3]);
+        assert!(e1.verify(&zero));
 
+        let es = vec![e0, e1];
+        let thread_pool = ThreadPoolBuilder::new().build().unwrap();
+
+        // Next, verify entry slice
         let verify_transaction = {
             move |versioned_tx: VersionedTransaction,
-                  verification_mode: TransactionVerificationMode|
+                  _mode: TransactionVerificationMode|
                   -> Result<RuntimeTransaction<SanitizedTransaction>> {
                 let sanitized_tx = {
-                    let message_hash =
-                        if verification_mode == TransactionVerificationMode::FullVerification {
-                            versioned_tx.verify_and_hash_message()?
-                        } else {
-                            versioned_tx.message.hash()
-                        };
-
+                    let message_hash = versioned_tx.verify_and_hash_message()?;
                     RuntimeTransaction::try_create(
                         versioned_tx,
                         MessageHash::Precomputed(message_hash),
@@ -1083,38 +693,15 @@ mod tests {
                     )
                 }?;
 
+                sanitized_tx.verify()?;
+
                 Ok(sanitized_tx)
             }
         };
 
-        let recycler = VerifyRecyclers::default();
-
-        // Make sure we test with a number of transactions that's not a multiple of PACKETS_PER_BATCH
-        let entries_invalid = (0..1025)
-            .map(|_| {
-                let transaction = test_invalid_tx();
-                next_entry_mut(&mut Hash::default(), 0, vec![transaction])
-            })
-            .collect::<Vec<_>>();
-
-        let entries_valid = (0..1025)
-            .map(|_| {
-                let transaction = test_tx();
-                next_entry_mut(&mut Hash::default(), 0, vec![transaction])
-            })
-            .collect::<Vec<_>>();
-
-        assert!(!test_verify_transactions(
-            entries_invalid,
-            false,
-            recycler.clone(),
-            &thread_pool,
-            Arc::new(verify_transaction)
-        ));
         assert!(test_verify_transactions(
-            entries_valid,
+            es,
             false,
-            recycler,
             &thread_pool,
             Arc::new(verify_transaction)
         ));
@@ -1150,27 +737,27 @@ mod tests {
 
         // Verify entry with 2 transactions
         let mut e0 = [Entry::new(&zero, 0, vec![tx0, tx1])];
-        assert!(e0.verify(&zero, &thread_pool));
+        assert!(e0.verify(&zero, &thread_pool).status());
 
         // Clear signature of the first transaction, see that it does not verify
         let orig_sig = e0[0].transactions[0].signatures[0];
         e0[0].transactions[0].signatures[0] = Signature::default();
-        assert!(!e0.verify(&zero, &thread_pool));
+        assert!(!e0.verify(&zero, &thread_pool).status());
 
         // restore original signature
         e0[0].transactions[0].signatures[0] = orig_sig;
-        assert!(e0.verify(&zero, &thread_pool));
+        assert!(e0.verify(&zero, &thread_pool).status());
 
         // Resize signatures and see verification fails.
         let len = e0[0].transactions[0].signatures.len();
         e0[0].transactions[0]
             .signatures
             .resize(len - 1, Signature::default());
-        assert!(!e0.verify(&zero, &thread_pool));
+        assert!(!e0.verify(&zero, &thread_pool).status());
 
         // Pass an entry with no transactions
         let e0 = [Entry::new(&zero, 0, vec![])];
-        assert!(e0.verify(&zero, &thread_pool));
+        assert!(e0.verify(&zero, &thread_pool).status());
     }
 
     #[test]
@@ -1208,18 +795,24 @@ mod tests {
         let zero = Hash::default();
         let one = hash(zero.as_ref());
         // base case
-        assert!(vec![][..].verify(&zero, &thread_pool));
+        assert!(vec![][..].verify(&zero, &thread_pool).status());
         // singleton case 1
-        assert!(vec![Entry::new_tick(0, &zero)][..].verify(&zero, &thread_pool));
+        assert!(vec![Entry::new_tick(0, &zero)][..]
+            .verify(&zero, &thread_pool)
+            .status());
         // singleton case 2, bad
-        assert!(!vec![Entry::new_tick(0, &zero)][..].verify(&one, &thread_pool));
+        assert!(!vec![Entry::new_tick(0, &zero)][..]
+            .verify(&one, &thread_pool)
+            .status());
         // inductive step
-        assert!(vec![next_entry(&zero, 0, vec![]); 2][..].verify(&zero, &thread_pool));
+        assert!(vec![next_entry(&zero, 0, vec![]); 2][..]
+            .verify(&zero, &thread_pool)
+            .status());
 
         let mut bad_ticks = vec![next_entry(&zero, 0, vec![]); 2];
         bad_ticks[1].hash = one;
         // inductive step, bad
-        assert!(!bad_ticks.verify(&zero, &thread_pool));
+        assert!(!bad_ticks.verify(&zero, &thread_pool).status());
     }
 
     #[test]
@@ -1231,22 +824,26 @@ mod tests {
         let one = hash(zero.as_ref());
         let two = hash(one.as_ref());
         // base case
-        assert!(vec![][..].verify(&one, &thread_pool));
+        assert!(vec![][..].verify(&one, &thread_pool).status());
         // singleton case 1
-        assert!(vec![Entry::new_tick(1, &two)][..].verify(&one, &thread_pool));
+        assert!(vec![Entry::new_tick(1, &two)][..]
+            .verify(&one, &thread_pool)
+            .status());
         // singleton case 2, bad
-        assert!(!vec![Entry::new_tick(1, &two)][..].verify(&two, &thread_pool));
+        assert!(!vec![Entry::new_tick(1, &two)][..]
+            .verify(&two, &thread_pool)
+            .status());
 
         let mut ticks = vec![next_entry(&one, 1, vec![])];
         ticks.push(next_entry(&ticks.last().unwrap().hash, 1, vec![]));
         // inductive step
-        assert!(ticks.verify(&one, &thread_pool));
+        assert!(ticks.verify(&one, &thread_pool).status());
 
         let mut bad_ticks = vec![next_entry(&one, 1, vec![])];
         bad_ticks.push(next_entry(&bad_ticks.last().unwrap().hash, 1, vec![]));
         bad_ticks[1].hash = one;
         // inductive step, bad
-        assert!(!bad_ticks.verify(&one, &thread_pool));
+        assert!(!bad_ticks.verify(&one, &thread_pool).status());
     }
 
     #[test]
@@ -1262,11 +859,15 @@ mod tests {
         let tx0 = system_transaction::transfer(&alice_keypair, &bob_keypair.pubkey(), 1, one);
         let tx1 = system_transaction::transfer(&bob_keypair, &alice_keypair.pubkey(), 1, one);
         // base case
-        assert!(vec![][..].verify(&one, &thread_pool));
+        assert!(vec![][..].verify(&one, &thread_pool).status());
         // singleton case 1
-        assert!(vec![next_entry(&one, 1, vec![tx0.clone()])][..].verify(&one, &thread_pool));
+        assert!(vec![next_entry(&one, 1, vec![tx0.clone()])][..]
+            .verify(&one, &thread_pool)
+            .status());
         // singleton case 2, bad
-        assert!(!vec![next_entry(&one, 1, vec![tx0.clone()])][..].verify(&two, &thread_pool));
+        assert!(!vec![next_entry(&one, 1, vec![tx0.clone()])][..]
+            .verify(&two, &thread_pool)
+            .status());
 
         let mut ticks = vec![next_entry(&one, 1, vec![tx0.clone()])];
         ticks.push(next_entry(
@@ -1276,13 +877,13 @@ mod tests {
         ));
 
         // inductive step
-        assert!(ticks.verify(&one, &thread_pool));
+        assert!(ticks.verify(&one, &thread_pool).status());
 
         let mut bad_ticks = vec![next_entry(&one, 1, vec![tx0])];
         bad_ticks.push(next_entry(&bad_ticks.last().unwrap().hash, 1, vec![tx1]));
         bad_ticks[1].hash = one;
         // inductive step, bad
-        assert!(!bad_ticks.verify(&one, &thread_pool));
+        assert!(!bad_ticks.verify(&one, &thread_pool).status());
     }
 
     #[test]
@@ -1421,7 +1022,9 @@ mod tests {
 
             info!("done.. {time}");
             let mut time = Measure::start("poh");
-            let res = entries.verify(&Hash::default(), &thread_pool_for_tests());
+            let res = entries
+                .verify(&Hash::default(), &thread_pool_for_tests())
+                .status();
             assert_eq!(res, !modified);
             time.stop();
             info!("{time} {res}");

+ 7 - 12
gossip/src/cluster_info.rs

@@ -60,7 +60,7 @@ use {
     },
     solana_perf::{
         data_budget::DataBudget,
-        packet::{Packet, PacketBatch, PacketBatchRecycler, PacketRef, PinnedPacketBatch},
+        packet::{Packet, PacketBatch, PacketBatchRecycler, PacketRef, RecycledPacketBatch},
     },
     solana_pubkey::Pubkey,
     solana_rayon_threadlimit::get_thread_count,
@@ -1372,8 +1372,7 @@ impl ClusterInfo {
         generate_pull_requests: bool,
     ) -> Result<(), GossipError> {
         let _st = ScopedTimer::from(&self.stats.gossip_transmit_loop_time);
-        let mut packet_batch =
-            PinnedPacketBatch::new_unpinned_with_recycler(recycler, 0, "run_gossip");
+        let mut packet_batch = RecycledPacketBatch::new_with_recycler(recycler, 0, "run_gossip");
         self.generate_new_gossip_requests(
             thread_pool,
             gossip_validators,
@@ -1647,7 +1646,7 @@ impl ClusterInfo {
         &'a self,
         now: Instant,
         rng: &'a mut R,
-        packet_batch: &'a mut PinnedPacketBatch,
+        packet_batch: &'a mut RecycledPacketBatch,
     ) -> impl FnMut(&PullRequest) -> bool + 'a
     where
         R: Rng + CryptoRng,
@@ -1688,12 +1687,12 @@ impl ClusterInfo {
         recycler: &PacketBatchRecycler,
         mut requests: Vec<PullRequest>,
         stakes: &HashMap<Pubkey, u64>,
-    ) -> PinnedPacketBatch {
+    ) -> RecycledPacketBatch {
         const DEFAULT_EPOCH_DURATION_MS: u64 = DEFAULT_SLOTS_PER_EPOCH * DEFAULT_MS_PER_SLOT;
         let output_size_limit =
             self.update_data_budget(stakes.len()) / PULL_RESPONSE_MIN_SERIALIZED_SIZE;
         let mut packet_batch =
-            PinnedPacketBatch::new_unpinned_with_recycler(recycler, 64, "handle_pull_requests");
+            RecycledPacketBatch::new_with_recycler(recycler, 64, "handle_pull_requests");
         let mut rng = rand::thread_rng();
         requests.retain({
             let now = Instant::now();
@@ -2554,14 +2553,10 @@ fn make_gossip_packet_batch<S: Borrow<SocketAddr>>(
     pkts: impl IntoIterator<Item = (S, Protocol), IntoIter: ExactSizeIterator>,
     recycler: &PacketBatchRecycler,
     stats: &GossipStats,
-) -> PinnedPacketBatch {
+) -> RecycledPacketBatch {
     let record_gossip_packet = |(_, pkt): &(_, Protocol)| stats.record_gossip_packet(pkt);
     let pkts = pkts.into_iter().inspect(record_gossip_packet);
-    PinnedPacketBatch::new_unpinned_with_recycler_data_and_dests(
-        recycler,
-        "gossip_packet_batch",
-        pkts,
-    )
+    RecycledPacketBatch::new_with_recycler_data_and_dests(recycler, "gossip_packet_batch", pkts)
 }
 
 #[inline]

+ 0 - 1
gossip/src/protocol.rs

@@ -44,7 +44,6 @@ const GOSSIP_PING_TOKEN_SIZE: usize = 32;
 /// Minimum serialized size of a Protocol::PullResponse packet.
 pub(crate) const PULL_RESPONSE_MIN_SERIALIZED_SIZE: usize = 161;
 
-// TODO These messages should go through the gpu pipeline for spam filtering
 /// Gossip protocol messages base enum
 #[derive(Serialize, Deserialize, Debug)]
 #[allow(clippy::large_enum_variant)]

+ 8 - 52
ledger/src/blockstore_processor.rs

@@ -20,9 +20,7 @@ use {
     },
     solana_clock::{Slot, MAX_PROCESSING_AGE},
     solana_cost_model::{cost_model::CostModel, transaction_cost::TransactionCost},
-    solana_entry::entry::{
-        self, create_ticks, Entry, EntrySlice, EntryType, EntryVerificationStatus, VerifyRecyclers,
-    },
+    solana_entry::entry::{self, create_ticks, Entry, EntrySlice, EntryType},
     solana_genesis_config::GenesisConfig,
     solana_hash::Hash,
     solana_keypair::Keypair,
@@ -926,7 +924,6 @@ pub(crate) fn process_blockstore_for_bank_0(
         &replay_tx_thread_pool,
         opts,
         transaction_status_sender,
-        &VerifyRecyclers::default(),
         entry_notification_sender,
     )?;
 
@@ -1131,7 +1128,6 @@ fn confirm_full_slot(
     bank: &BankWithScheduler,
     replay_tx_thread_pool: &ThreadPool,
     opts: &ProcessOptions,
-    recyclers: &VerifyRecyclers,
     progress: &mut ConfirmationProgress,
     transaction_status_sender: Option<&TransactionStatusSender>,
     entry_notification_sender: Option<&EntryNotifierSender>,
@@ -1152,7 +1148,6 @@ fn confirm_full_slot(
         transaction_status_sender,
         entry_notification_sender,
         replay_vote_sender,
-        recyclers,
         opts.allow_dead_slots,
         opts.runtime_config.log_messages_bytes_limit,
         &ignored_prioritization_fee_cache,
@@ -1491,7 +1486,6 @@ pub fn confirm_slot(
     transaction_status_sender: Option<&TransactionStatusSender>,
     entry_notification_sender: Option<&EntryNotifierSender>,
     replay_vote_sender: Option<&ReplayVoteSender>,
-    recyclers: &VerifyRecyclers,
     allow_dead_slots: bool,
     log_messages_bytes_limit: Option<usize>,
     prioritization_fee_cache: &PrioritizationFeeCache,
@@ -1522,7 +1516,6 @@ pub fn confirm_slot(
         transaction_status_sender,
         entry_notification_sender,
         replay_vote_sender,
-        recyclers,
         log_messages_bytes_limit,
         prioritization_fee_cache,
     )
@@ -1539,7 +1532,6 @@ fn confirm_slot_entries(
     transaction_status_sender: Option<&TransactionStatusSender>,
     entry_notification_sender: Option<&EntryNotifierSender>,
     replay_vote_sender: Option<&ReplayVoteSender>,
-    recyclers: &VerifyRecyclers,
     log_messages_bytes_limit: Option<usize>,
     prioritization_fee_cache: &PrioritizationFeeCache,
 ) -> result::Result<(), BlockstoreProcessorError> {
@@ -1612,21 +1604,15 @@ fn confirm_slot_entries(
     }
 
     let last_entry_hash = entries.last().map(|e| e.hash);
-    let verifier = if !skip_verification {
+    if !skip_verification {
         datapoint_debug!("verify-batch-size", ("size", num_entries as i64, i64));
-        let entry_state = entries.start_verify(
-            &progress.last_entry,
-            replay_tx_thread_pool,
-            recyclers.clone(),
-        );
-        if entry_state.status() == EntryVerificationStatus::Failure {
+        let entry_state = entries.verify(&progress.last_entry, replay_tx_thread_pool);
+        *poh_verify_elapsed += entry_state.poh_duration_us();
+        if !entry_state.status() {
             warn!("Ledger proof of history failed at slot: {slot}");
             return Err(BlockError::InvalidEntryHash.into());
         }
-        Some(entry_state)
-    } else {
-        None
-    };
+    }
 
     let verify_transaction = {
         let bank = bank.clone_with_scheduler();
@@ -1642,7 +1628,6 @@ fn confirm_slot_entries(
         entries,
         skip_verification,
         replay_tx_thread_pool,
-        recyclers.clone(),
         Arc::new(verify_transaction),
     );
     let transaction_cpu_duration_us = transaction_verification_start.elapsed().as_micros() as u64;
@@ -1686,16 +1671,8 @@ fn confirm_slot_entries(
     *replay_elapsed += replay_timer.as_us();
 
     {
-        // If running signature verification on the GPU, wait for that computation to finish, and
-        // get the result of it. If we did the signature verification on the CPU, this just returns
-        // the already-computed result produced in start_verify_transactions.  Either way, check the
-        // result of the signature verification.
-        let valid = transaction_verification_result.finish_verify();
-
-        // The GPU Entry verification (if any) is kicked off right when the CPU-side Entry
-        // verification finishes, so these times should be disjoint
-        *transaction_verify_elapsed +=
-            transaction_cpu_duration_us + transaction_verification_result.gpu_verify_duration();
+        let valid = transaction_verification_result.status();
+        *transaction_verify_elapsed += transaction_cpu_duration_us;
 
         if !valid {
             warn!(
@@ -1706,15 +1683,6 @@ fn confirm_slot_entries(
         }
     }
 
-    if let Some(mut verifier) = verifier {
-        let verified = verifier.finish_verify(replay_tx_thread_pool);
-        *poh_verify_elapsed += verifier.poh_duration_us();
-        if !verified {
-            warn!("Ledger proof of history failed at slot: {}", bank.slot());
-            return Err(BlockError::InvalidEntryHash.into());
-        }
-    }
-
     process_result?;
 
     progress.num_shreds += num_shreds;
@@ -1735,7 +1703,6 @@ fn process_bank_0(
     replay_tx_thread_pool: &ThreadPool,
     opts: &ProcessOptions,
     transaction_status_sender: Option<&TransactionStatusSender>,
-    recyclers: &VerifyRecyclers,
     entry_notification_sender: Option<&EntryNotifierSender>,
 ) -> result::Result<(), BlockstoreProcessorError> {
     assert_eq!(bank0.slot(), 0);
@@ -1745,7 +1712,6 @@ fn process_bank_0(
         bank0,
         replay_tx_thread_pool,
         opts,
-        recyclers,
         &mut progress,
         None,
         entry_notification_sender,
@@ -1938,7 +1904,6 @@ fn load_frozen_forks(
     )?;
 
     if Some(bank_forks.read().unwrap().root()) != opts.halt_at_slot {
-        let recyclers = VerifyRecyclers::default();
         let mut all_banks = HashMap::new();
 
         const STATUS_REPORT_INTERVAL: Duration = Duration::from_secs(2);
@@ -1985,7 +1950,6 @@ fn load_frozen_forks(
                 &bank,
                 replay_tx_thread_pool,
                 opts,
-                &recyclers,
                 &mut progress,
                 transaction_status_sender,
                 entry_notification_sender,
@@ -2171,7 +2135,6 @@ pub fn process_single_slot(
     bank: &BankWithScheduler,
     replay_tx_thread_pool: &ThreadPool,
     opts: &ProcessOptions,
-    recyclers: &VerifyRecyclers,
     progress: &mut ConfirmationProgress,
     transaction_status_sender: Option<&TransactionStatusSender>,
     entry_notification_sender: Option<&EntryNotifierSender>,
@@ -2186,7 +2149,6 @@ pub fn process_single_slot(
         bank,
         replay_tx_thread_pool,
         opts,
-        recyclers,
         progress,
         transaction_status_sender,
         entry_notification_sender,
@@ -4250,7 +4212,6 @@ pub mod tests {
             run_verification: true,
             ..ProcessOptions::default()
         };
-        let recyclers = VerifyRecyclers::default();
         let replay_tx_thread_pool = create_thread_pool(1);
         process_bank_0(
             &bank0,
@@ -4258,7 +4219,6 @@ pub mod tests {
             &replay_tx_thread_pool,
             &opts,
             None,
-            &recyclers,
             None,
         )
         .unwrap();
@@ -4273,7 +4233,6 @@ pub mod tests {
             &bank1,
             &replay_tx_thread_pool,
             &opts,
-            &recyclers,
             &mut ConfirmationProgress::new(bank0_last_blockhash),
             None,
             None,
@@ -4900,7 +4859,6 @@ pub mod tests {
             None,
             None,
             None,
-            &VerifyRecyclers::default(),
             None,
             &PrioritizationFeeCache::new(0u64),
         )
@@ -4994,7 +4952,6 @@ pub mod tests {
             Some(&transaction_status_sender),
             None,
             None,
-            &VerifyRecyclers::default(),
             None,
             &PrioritizationFeeCache::new(0u64),
         )
@@ -5039,7 +4996,6 @@ pub mod tests {
             Some(&transaction_status_sender),
             None,
             None,
-            &VerifyRecyclers::default(),
             None,
             &PrioritizationFeeCache::new(0u64),
         )

+ 1 - 4
ledger/src/shred.rs

@@ -50,10 +50,7 @@
 //! So, given a) - c), we must restrict data shred's payload length such that the entire coding
 //! payload can fit into one coding shred / packet.
 
-pub(crate) use self::{
-    merkle_tree::{PROOF_ENTRIES_FOR_32_32_BATCH, SIZE_OF_MERKLE_ROOT},
-    payload::serde_bytes_payload,
-};
+pub(crate) use self::{merkle_tree::PROOF_ENTRIES_FOR_32_32_BATCH, payload::serde_bytes_payload};
 pub use {
     self::{
         payload::Payload,

+ 2 - 1
ledger/src/shred/wire.rs

@@ -15,13 +15,13 @@ use {
     solana_perf::packet::{PacketRef, PacketRefMut},
     solana_signature::{Signature, SIGNATURE_BYTES},
     solana_signer::Signer,
-    std::ops::Range,
 };
 #[cfg(test)]
 use {
     rand::{seq::SliceRandom, Rng},
     solana_perf::packet::Packet,
     std::collections::HashMap,
+    std::ops::Range,
 };
 
 #[inline]
@@ -70,6 +70,7 @@ pub(crate) fn get_signature(shred: &[u8]) -> Option<Signature> {
     Some(Signature::from(bytes))
 }
 
+#[cfg(test)]
 pub(crate) const fn get_signature_range() -> Range<usize> {
     0..SIGNATURE_BYTES
 }

+ 29 - 485
ledger/src/sigverify_shreds.rs

@@ -1,41 +1,21 @@
 #![allow(clippy::implicit_hasher)]
 use {
-    crate::shred::{self, SIZE_OF_MERKLE_ROOT},
-    itertools::{izip, Itertools},
+    crate::shred,
     rayon::{prelude::*, ThreadPool},
     solana_clock::Slot,
     solana_hash::Hash,
     solana_metrics::inc_new_counter_debug,
     solana_nohash_hasher::BuildNoHashHasher,
     solana_perf::{
-        cuda_runtime::PinnedVec,
-        packet::{BytesPacketBatch, Packet, PacketBatch, PacketRef},
-        perf_libs,
-        recycler_cache::RecyclerCache,
-        sigverify::{self, count_packets_in_batches, TxOffset},
+        packet::{PacketBatch, PacketRef},
+        sigverify::count_packets_in_batches,
     },
     solana_pubkey::Pubkey,
     solana_signature::Signature,
-    std::{
-        borrow::Cow,
-        collections::HashMap,
-        iter::{self, repeat},
-        mem::size_of,
-        ops::Range,
-        sync::RwLock,
-    },
-};
-#[cfg(test)]
-use {
-    sha2::{Digest, Sha512},
-    solana_keypair::Keypair,
-    solana_perf::packet::PacketRefMut,
-    solana_signer::Signer,
-    std::sync::Arc,
+    std::{collections::HashMap, sync::RwLock},
 };
-
 #[cfg(test)]
-const SIGN_SHRED_GPU_MIN: usize = 256;
+use {solana_keypair::Keypair, solana_perf::packet::PacketRefMut, solana_signer::Signer};
 
 pub type LruCache = lazy_lru::LruCache<(Signature, Pubkey, /*merkle root:*/ Hash), ()>;
 
@@ -79,7 +59,7 @@ pub fn verify_shred_cpu(
     }
 }
 
-fn verify_shreds_cpu(
+pub fn verify_shreds(
     thread_pool: &ThreadPool,
     batches: &[PacketBatch],
     slot_leaders: &SlotPubkeys,
@@ -102,261 +82,6 @@ fn verify_shreds_cpu(
     rv
 }
 
-fn slot_key_data_for_gpu(
-    thread_pool: &ThreadPool,
-    batches: &[PacketBatch],
-    slot_keys: &SlotPubkeys,
-    recycler_cache: &RecyclerCache,
-) -> (/*pubkeys:*/ PinnedVec<u8>, TxOffset) {
-    //TODO: mark Pubkey::default shreds as failed after the GPU returns
-    assert_eq!(slot_keys.get(&Slot::MAX), Some(&Pubkey::default()));
-    let slots: Vec<Slot> = thread_pool.install(|| {
-        batches
-            .into_par_iter()
-            .flat_map_iter(|batch| {
-                batch.iter().map(|packet| {
-                    if packet.meta().discard() {
-                        return Slot::MAX;
-                    }
-                    let shred = shred::layout::get_shred(packet);
-                    match shred.and_then(shred::layout::get_slot) {
-                        Some(slot) if slot_keys.contains_key(&slot) => slot,
-                        _ => Slot::MAX,
-                    }
-                })
-            })
-            .collect()
-    });
-    let keys_to_slots: HashMap<Pubkey, Vec<Slot>> = slots
-        .iter()
-        .map(|slot| (slot_keys[slot], *slot))
-        .into_group_map();
-    let mut keyvec = recycler_cache.buffer().allocate("shred_gpu_pubkeys");
-    keyvec.set_pinnable();
-
-    let keyvec_size = keys_to_slots.len() * size_of::<Pubkey>();
-    resize_buffer(&mut keyvec, keyvec_size);
-
-    let key_offsets: HashMap<Slot, /*key offset:*/ usize> = {
-        let mut next_offset = 0;
-        keys_to_slots
-            .into_iter()
-            .flat_map(|(key, slots)| {
-                let offset = next_offset;
-                next_offset += std::mem::size_of::<Pubkey>();
-                keyvec[offset..next_offset].copy_from_slice(key.as_ref());
-                slots.into_iter().zip(repeat(offset))
-            })
-            .collect()
-    };
-    let mut offsets = recycler_cache.offsets().allocate("shred_offsets");
-    offsets.set_pinnable();
-    for slot in slots {
-        offsets.push(key_offsets[&slot] as u32);
-    }
-    trace!("keyvec.len: {}", keyvec.len());
-    trace!("keyvec: {keyvec:?}");
-    trace!("offsets: {offsets:?}");
-    (keyvec, offsets)
-}
-
-// Recovers merkle roots from shreds binary.
-fn get_merkle_roots(
-    thread_pool: &ThreadPool,
-    packets: &[PacketBatch],
-    recycler_cache: &RecyclerCache,
-) -> (
-    PinnedVec<u8>,      // Merkle roots
-    Vec<Option<usize>>, // Offsets
-) {
-    let merkle_roots: Vec<Option<Hash>> = thread_pool.install(|| {
-        packets
-            .par_iter()
-            .flat_map(|packets| {
-                packets.par_iter().map(|packet| {
-                    if packet.meta().discard() {
-                        return None;
-                    }
-                    let shred = shred::layout::get_shred(packet)?;
-                    shred::layout::get_merkle_root(shred)
-                })
-            })
-            .collect()
-    });
-    let num_merkle_roots = merkle_roots.iter().flatten().count();
-    let mut buffer = recycler_cache.buffer().allocate("shred_gpu_merkle_roots");
-    buffer.set_pinnable();
-    resize_buffer(&mut buffer, num_merkle_roots * SIZE_OF_MERKLE_ROOT);
-    let offsets = {
-        let mut next_offset = 0;
-        merkle_roots
-            .into_iter()
-            .map(|root| {
-                let root = root?;
-                let offset = next_offset;
-                next_offset += SIZE_OF_MERKLE_ROOT;
-                buffer[offset..next_offset].copy_from_slice(root.as_ref());
-                Some(offset)
-            })
-            .collect()
-    };
-    (buffer, offsets)
-}
-
-// Resizes the buffer to >= size and a multiple of
-// std::mem::size_of::<Packet>().
-fn resize_buffer(buffer: &mut PinnedVec<u8>, size: usize) {
-    //HACK: Pubkeys vector is passed along as a `PacketBatch` buffer to the GPU
-    //TODO: GPU needs a more opaque interface, which can handle variable sized structures for data
-    //Pad the Pubkeys buffer such that it is bigger than a buffer of Packet sized elems
-    let num_packets = size.div_ceil(std::mem::size_of::<Packet>());
-    let size = num_packets * std::mem::size_of::<Packet>();
-    buffer.resize(size, 0u8);
-}
-
-fn elems_from_buffer(buffer: &PinnedVec<u8>) -> perf_libs::Elems {
-    // resize_buffer ensures that buffer size is a multiple of Packet size.
-    debug_assert_eq!(buffer.len() % std::mem::size_of::<Packet>(), 0);
-    let num_packets = buffer.len() / std::mem::size_of::<Packet>();
-    perf_libs::Elems {
-        elems: buffer.as_ptr().cast::<u8>(),
-        num: num_packets as u32,
-    }
-}
-
-// TODO: clean up legacy shred artifacts
-fn shred_gpu_offsets(
-    offset: usize,
-    batches: &[PacketBatch],
-    merkle_roots_offsets: impl IntoIterator<Item = Option<usize>>,
-    recycler_cache: &RecyclerCache,
-) -> (TxOffset, TxOffset, TxOffset) {
-    fn add_offset(range: Range<usize>, offset: usize) -> Range<usize> {
-        range.start + offset..range.end + offset
-    }
-    let mut signature_offsets = recycler_cache.offsets().allocate("shred_signatures");
-    signature_offsets.set_pinnable();
-    let mut msg_start_offsets = recycler_cache.offsets().allocate("shred_msg_starts");
-    msg_start_offsets.set_pinnable();
-    let mut msg_sizes = recycler_cache.offsets().allocate("shred_msg_sizes");
-    msg_sizes.set_pinnable();
-    let offsets = std::iter::successors(Some(offset), |offset| {
-        offset.checked_add(std::mem::size_of::<Packet>())
-    });
-    let packets = batches.iter().flatten();
-    for (offset, _packet, merkle_root_offset) in izip!(offsets, packets, merkle_roots_offsets) {
-        let sig = shred::layout::get_signature_range();
-        let sig = add_offset(sig, offset);
-        debug_assert_eq!(sig.end - sig.start, std::mem::size_of::<Signature>());
-        // Signature may verify for an empty message but the packet will be
-        // discarded during deserialization.
-        let msg: Range<usize> = match merkle_root_offset {
-            None => {
-                0..SIZE_OF_MERKLE_ROOT // legacy shreds - remove valid but useless offset
-            }
-            Some(merkle_root_offset) => {
-                merkle_root_offset..merkle_root_offset + SIZE_OF_MERKLE_ROOT
-            }
-        };
-        signature_offsets.push(sig.start as u32);
-        msg_start_offsets.push(msg.start as u32);
-        let msg_size = msg.end.saturating_sub(msg.start);
-        msg_sizes.push(msg_size as u32);
-    }
-    (signature_offsets, msg_start_offsets, msg_sizes)
-}
-
-pub fn verify_shreds_gpu(
-    thread_pool: &ThreadPool,
-    batches: &[PacketBatch],
-    slot_leaders: &SlotPubkeys,
-    recycler_cache: &RecyclerCache,
-    cache: &RwLock<LruCache>,
-) -> Vec<Vec<u8>> {
-    let Some(api) = perf_libs::api() else {
-        return verify_shreds_cpu(thread_pool, batches, slot_leaders, cache);
-    };
-    let (pubkeys, pubkey_offsets) =
-        slot_key_data_for_gpu(thread_pool, batches, slot_leaders, recycler_cache);
-    //HACK: Pubkeys vector is passed along as a `PacketBatch` buffer to the GPU
-    //TODO: GPU needs a more opaque interface, which can handle variable sized structures for data
-    let (merkle_roots, merkle_roots_offsets) =
-        get_merkle_roots(thread_pool, batches, recycler_cache);
-    // Merkle roots are placed after pubkeys; adjust offsets accordingly.
-    let merkle_roots_offsets = {
-        let shift = pubkeys.len();
-        merkle_roots_offsets
-            .into_iter()
-            .map(move |offset| Some(offset? + shift))
-    };
-    let offset = pubkeys.len() + merkle_roots.len();
-    let (signature_offsets, msg_start_offsets, msg_sizes) =
-        shred_gpu_offsets(offset, batches, merkle_roots_offsets, recycler_cache);
-    let mut out = recycler_cache.buffer().allocate("out_buffer");
-    out.set_pinnable();
-    out.resize(signature_offsets.len(), 0u8);
-    let mut elems = vec![
-        elems_from_buffer(&pubkeys),
-        elems_from_buffer(&merkle_roots),
-    ];
-    // `BytesPacketBatch` cannot be directly used in CUDA. We have to retrieve
-    // and convert byte batches to pinned batches. We must collect here so that
-    // we keep the batches created by `BytesPacketBatch::to_pinned_packet_batch()`
-    // alive.
-    let pinned_batches = batches
-        .iter()
-        .map(|batch| match batch {
-            PacketBatch::Pinned(batch) => Cow::Borrowed(batch),
-            PacketBatch::Bytes(batch) => Cow::Owned(batch.to_pinned_packet_batch()),
-            PacketBatch::Single(packet) => {
-                // this is ugly, but unused (gpu code) and will be removed shortly in follow up PR
-                let mut batch = BytesPacketBatch::with_capacity(1);
-                batch.push(packet.clone());
-                Cow::Owned(batch.to_pinned_packet_batch())
-            }
-        })
-        .collect::<Vec<_>>();
-    elems.extend(pinned_batches.iter().map(|batch| perf_libs::Elems {
-        elems: batch.as_ptr().cast::<u8>(),
-        num: batch.len() as u32,
-    }));
-    let num_packets = elems.iter().map(|elem| elem.num).sum();
-    trace!("Starting verify num packets: {num_packets}");
-    trace!("elem len: {}", elems.len() as u32);
-    trace!("packet sizeof: {}", size_of::<Packet>() as u32);
-    const USE_NON_DEFAULT_STREAM: u8 = 1;
-    unsafe {
-        let res = (api.ed25519_verify_many)(
-            elems.as_ptr(),
-            elems.len() as u32,
-            size_of::<Packet>() as u32,
-            num_packets,
-            signature_offsets.len() as u32,
-            msg_sizes.as_ptr(),
-            pubkey_offsets.as_ptr(),
-            signature_offsets.as_ptr(),
-            msg_start_offsets.as_ptr(),
-            out.as_mut_ptr(),
-            USE_NON_DEFAULT_STREAM,
-        );
-        if res != 0 {
-            trace!("RETURN!!!: {res}");
-        }
-    }
-    trace!("done verify");
-    trace!("out buf {out:?}");
-
-    // Each shred has exactly one signature.
-    let v_sig_lens = batches
-        .iter()
-        .map(|batch| iter::repeat_n(1u32, batch.len()));
-    let mut rvs: Vec<_> = batches.iter().map(|batch| vec![0u8; batch.len()]).collect();
-    sigverify::copy_return_values(v_sig_lens, &out, &mut rvs);
-
-    inc_new_counter_debug!("ed25519_shred_verify_gpu", out.len());
-    rvs
-}
-
 #[cfg(test)]
 fn sign_shred_cpu(keypair: &Keypair, packet: &mut PacketRefMut) {
     let sig = shred::layout::get_signature_range();
@@ -378,7 +103,7 @@ fn sign_shred_cpu(keypair: &Keypair, packet: &mut PacketRefMut) {
 }
 
 #[cfg(test)]
-fn sign_shreds_cpu(thread_pool: &ThreadPool, keypair: &Keypair, batches: &mut [PacketBatch]) {
+fn sign_shreds(thread_pool: &ThreadPool, keypair: &Keypair, batches: &mut [PacketBatch]) {
     let packet_count = count_packets_in_batches(batches);
     debug!("CPU SHRED ECDSA for {packet_count}");
     thread_pool.install(|| {
@@ -391,150 +116,6 @@ fn sign_shreds_cpu(thread_pool: &ThreadPool, keypair: &Keypair, batches: &mut [P
     inc_new_counter_debug!("ed25519_shred_sign_cpu", packet_count);
 }
 
-#[cfg(test)]
-fn sign_shreds_gpu_pinned_keypair(keypair: &Keypair, cache: &RecyclerCache) -> PinnedVec<u8> {
-    let mut vec = cache.buffer().allocate("pinned_keypair");
-    let pubkey = keypair.pubkey().to_bytes();
-    let secret = keypair.secret_bytes();
-    let mut hasher = Sha512::default();
-    hasher.update(secret);
-    let mut result = hasher.finalize();
-    result[0] &= 248;
-    result[31] &= 63;
-    result[31] |= 64;
-    let size = pubkey.len() + result.len();
-    resize_buffer(&mut vec, size);
-    vec[0..pubkey.len()].copy_from_slice(&pubkey);
-    vec[pubkey.len()..size].copy_from_slice(&result);
-    vec
-}
-
-#[cfg(test)]
-fn sign_shreds_gpu(
-    thread_pool: &ThreadPool,
-    keypair: &Keypair,
-    pinned_keypair: &Option<Arc<PinnedVec<u8>>>,
-    batches: &mut [PacketBatch],
-    recycler_cache: &RecyclerCache,
-) {
-    let sig_size = size_of::<Signature>();
-    let pubkey_size = size_of::<Pubkey>();
-    let packet_count = count_packets_in_batches(batches);
-    if packet_count < SIGN_SHRED_GPU_MIN || pinned_keypair.is_none() {
-        return sign_shreds_cpu(thread_pool, keypair, batches);
-    }
-    let Some(api) = perf_libs::api() else {
-        return sign_shreds_cpu(thread_pool, keypair, batches);
-    };
-    let pinned_keypair = pinned_keypair.as_ref().unwrap();
-
-    //should be zero
-    let mut pubkey_offsets = recycler_cache.offsets().allocate("pubkey offsets");
-    pubkey_offsets.resize(packet_count, 0);
-
-    let mut secret_offsets = recycler_cache.offsets().allocate("secret_offsets");
-    secret_offsets.resize(packet_count, pubkey_size as u32);
-
-    let (merkle_roots, merkle_roots_offsets) =
-        get_merkle_roots(thread_pool, batches, recycler_cache);
-    // Merkle roots are placed after the keypair; adjust offsets accordingly.
-    let merkle_roots_offsets = {
-        let shift = pinned_keypair.len();
-        merkle_roots_offsets
-            .into_iter()
-            .map(move |offset| Some(offset? + shift))
-    };
-    let offset = pinned_keypair.len() + merkle_roots.len();
-    trace!("offset: {offset}");
-    let (signature_offsets, msg_start_offsets, msg_sizes) =
-        shred_gpu_offsets(offset, batches, merkle_roots_offsets, recycler_cache);
-    let total_sigs = signature_offsets.len();
-    let mut signatures_out = recycler_cache.buffer().allocate("ed25519 signatures");
-    signatures_out.set_pinnable();
-    signatures_out.resize(total_sigs * sig_size, 0);
-
-    let mut elems = vec![
-        elems_from_buffer(pinned_keypair),
-        elems_from_buffer(&merkle_roots),
-    ];
-    // `BytesPacketBatch` cannot be directly used in CUDA. We have to retrieve
-    // and convert byte batches to pinned batches. We must collect here so that
-    // we keep the batches created by `BytesPacketBatch::to_pinned_packet_batch()`
-    // alive.
-    let pinned_batches = batches
-        .iter_mut()
-        .map(|batch| match batch {
-            PacketBatch::Pinned(batch) => Cow::Borrowed(batch),
-            PacketBatch::Bytes(batch) => Cow::Owned(batch.to_pinned_packet_batch()),
-            PacketBatch::Single(packet) => {
-                // this is ugly, but unused (gpu code) and will be removed shortly in follow up PR
-                let mut batch = BytesPacketBatch::with_capacity(1);
-                batch.push(packet.clone());
-                Cow::Owned(batch.to_pinned_packet_batch())
-            }
-        })
-        .collect::<Vec<_>>();
-    elems.extend(pinned_batches.iter().map(|batch| perf_libs::Elems {
-        elems: batch.as_ptr().cast::<u8>(),
-        num: batch.len() as u32,
-    }));
-    let num_packets = elems.iter().map(|elem| elem.num).sum();
-    trace!("Starting verify num packets: {num_packets}");
-    trace!("elem len: {}", elems.len() as u32);
-    trace!("packet sizeof: {}", size_of::<Packet>() as u32);
-    const USE_NON_DEFAULT_STREAM: u8 = 1;
-    unsafe {
-        let res = (api.ed25519_sign_many)(
-            elems.as_mut_ptr(),
-            elems.len() as u32,
-            size_of::<Packet>() as u32,
-            num_packets,
-            total_sigs as u32,
-            msg_sizes.as_ptr(),
-            pubkey_offsets.as_ptr(),
-            secret_offsets.as_ptr(),
-            msg_start_offsets.as_ptr(),
-            signatures_out.as_mut_ptr(),
-            USE_NON_DEFAULT_STREAM,
-        );
-        if res != 0 {
-            trace!("RETURN!!!: {res}");
-        }
-    }
-    trace!("done sign");
-    // Cumulative number of packets within batches.
-    let num_packets: Vec<_> = batches
-        .iter()
-        .scan(0, |num_packets, batch| {
-            let out = *num_packets;
-            *num_packets += batch.len();
-            Some(out)
-        })
-        .collect();
-    thread_pool.install(|| {
-        batches
-            .par_iter_mut()
-            .zip(num_packets)
-            .for_each(|(batch, num_packets)| {
-                batch
-                    .par_iter_mut()
-                    .enumerate()
-                    .for_each(|(packet_ix, mut packet)| {
-                        let sig_ix = packet_ix + num_packets;
-                        let sig_start = sig_ix * sig_size;
-                        let sig_end = sig_start + sig_size;
-                        let mut buffer = packet
-                            .data(..)
-                            .expect("expected the packet to not be discarded")
-                            .to_vec();
-                        buffer[..sig_size].copy_from_slice(&signatures_out[sig_start..sig_end]);
-                        packet.copy_from_slice(&buffer);
-                    });
-            });
-    });
-    inc_new_counter_debug!("ed25519_shred_sign_gpu", packet_count);
-}
-
 #[cfg(test)]
 mod tests {
     use {
@@ -544,12 +125,14 @@ mod tests {
             shredder::{ReedSolomonCache, Shredder},
         },
         assert_matches::assert_matches,
+        itertools::Itertools,
         rand::{seq::SliceRandom, Rng},
         rayon::ThreadPoolBuilder,
         solana_entry::entry::Entry,
         solana_hash::Hash,
         solana_keypair::Keypair,
-        solana_perf::packet::PinnedPacketBatch,
+        solana_packet::Packet,
+        solana_perf::packet::RecycledPacketBatch,
         solana_signer::Signer,
         solana_system_transaction as system_transaction,
         solana_transaction::Transaction,
@@ -604,23 +187,23 @@ mod tests {
         let mut batches = [batch];
 
         let leader_slots: SlotPubkeys = [(slot, keypair.pubkey())].into_iter().collect();
-        let rv = verify_shreds_cpu(thread_pool, &batches, &leader_slots, &cache);
+        let rv = verify_shreds(thread_pool, &batches, &leader_slots, &cache);
         assert_eq!(rv.into_iter().flatten().all_equal_value().unwrap(), 1);
 
         let wrong_keypair = Keypair::new();
         let leader_slots: SlotPubkeys = [(slot, wrong_keypair.pubkey())].into_iter().collect();
-        let rv = verify_shreds_cpu(thread_pool, &batches, &leader_slots, &cache);
+        let rv = verify_shreds(thread_pool, &batches, &leader_slots, &cache);
         assert_eq!(rv.into_iter().flatten().all_equal_value().unwrap(), 0);
 
         let leader_slots: SlotPubkeys = HashMap::default();
-        let rv = verify_shreds_cpu(thread_pool, &batches, &leader_slots, &cache);
+        let rv = verify_shreds(thread_pool, &batches, &leader_slots, &cache);
         assert_eq!(rv.into_iter().flatten().all_equal_value().unwrap(), 0);
 
         let leader_slots: SlotPubkeys = [(slot, keypair.pubkey())].into_iter().collect();
         batches[0]
             .iter_mut()
             .for_each(|mut packet_ref| packet_ref.meta_mut().size = 0);
-        let rv = verify_shreds_cpu(thread_pool, &batches, &leader_slots, &cache);
+        let rv = verify_shreds(thread_pool, &batches, &leader_slots, &cache);
         assert_eq!(rv.into_iter().flatten().all_equal_value().unwrap(), 0);
     }
 
@@ -630,9 +213,8 @@ mod tests {
         run_test_sigverify_shreds_cpu(&thread_pool, 0xdead_c0de);
     }
 
-    fn run_test_sigverify_shreds_gpu(thread_pool: &ThreadPool, slot: Slot) {
+    fn run_test_sigverify_shreds(thread_pool: &ThreadPool, slot: Slot) {
         agave_logger::setup();
-        let recycler_cache = RecyclerCache::default();
         let cache = RwLock::new(LruCache::new(/*capacity:*/ 128));
 
         let keypair = Keypair::new();
@@ -642,13 +224,7 @@ mod tests {
         let leader_slots: SlotPubkeys = [(u64::MAX, Pubkey::default()), (slot, keypair.pubkey())]
             .into_iter()
             .collect();
-        let rv = verify_shreds_gpu(
-            thread_pool,
-            &batches,
-            &leader_slots,
-            &recycler_cache,
-            &cache,
-        );
+        let rv = verify_shreds(thread_pool, &batches, &leader_slots, &cache);
         assert_eq!(rv.into_iter().flatten().all_equal_value().unwrap(), 1);
 
         let wrong_keypair = Keypair::new();
@@ -658,23 +234,11 @@ mod tests {
         ]
         .into_iter()
         .collect();
-        let rv = verify_shreds_gpu(
-            thread_pool,
-            &batches,
-            &leader_slots,
-            &recycler_cache,
-            &cache,
-        );
+        let rv = verify_shreds(thread_pool, &batches, &leader_slots, &cache);
         assert_eq!(rv.into_iter().flatten().all_equal_value().unwrap(), 0);
 
         let leader_slots: SlotPubkeys = [(u64::MAX, Pubkey::default())].into_iter().collect();
-        let rv = verify_shreds_gpu(
-            thread_pool,
-            &batches,
-            &leader_slots,
-            &recycler_cache,
-            &cache,
-        );
+        let rv = verify_shreds(thread_pool, &batches, &leader_slots, &cache);
         assert_eq!(rv.into_iter().flatten().all_equal_value().unwrap(), 0);
 
         batches[0]
@@ -683,18 +247,12 @@ mod tests {
         let leader_slots: SlotPubkeys = [(u64::MAX, Pubkey::default()), (slot, keypair.pubkey())]
             .into_iter()
             .collect();
-        let rv = verify_shreds_gpu(
-            thread_pool,
-            &batches,
-            &leader_slots,
-            &recycler_cache,
-            &cache,
-        );
+        let rv = verify_shreds(thread_pool, &batches, &leader_slots, &cache);
         assert_eq!(rv.into_iter().flatten().all_equal_value().unwrap(), 0);
     }
 
     fn make_packet_batch(keypair: &Keypair, slot: u64) -> PacketBatch {
-        let mut batch = PinnedPacketBatch::default();
+        let mut batch = RecycledPacketBatch::default();
         let shredder = Shredder::new(slot, slot.saturating_sub(1), 0, 0).unwrap();
         let reed_solomon_cache = ReedSolomonCache::default();
         let (shreds, _) = shredder.entries_to_merkle_shreds_for_tests(
@@ -716,9 +274,9 @@ mod tests {
     }
 
     #[test]
-    fn test_sigverify_shreds_gpu() {
+    fn test_sigverify_shreds() {
         let thread_pool = ThreadPoolBuilder::new().num_threads(3).build().unwrap();
-        run_test_sigverify_shreds_gpu(&thread_pool, 0xdead_c0de);
+        run_test_sigverify_shreds(&thread_pool, 0xdead_c0de);
     }
 
     fn make_transaction<R: Rng>(rng: &mut R) -> Transaction {
@@ -808,7 +366,7 @@ mod tests {
         let packets: Vec<PacketBatch> = repeat_with(|| {
             let size = rng.gen_range(0..16);
             let packets = packets.by_ref().take(size).collect();
-            let batch = PinnedPacketBatch::new(packets);
+            let batch = RecycledPacketBatch::new(packets);
             (size == 0 || !batch.is_empty()).then_some(batch.into())
         })
         .while_some()
@@ -817,7 +375,6 @@ mod tests {
             shreds.len(),
             packets.iter().map(|batch| batch.len()).sum::<usize>()
         );
-        assert!(count_packets_in_batches(&packets) > SIGN_SHRED_GPU_MIN);
         packets
     }
 
@@ -827,7 +384,6 @@ mod tests {
         let mut rng = rand::thread_rng();
         let cache = RwLock::new(LruCache::new(/*capacity:*/ 128));
         let thread_pool = ThreadPoolBuilder::new().num_threads(3).build().unwrap();
-        let recycler_cache = RecyclerCache::default();
         let keypairs = repeat_with(|| rng.gen_range(169_367_809..169_906_789))
             .map(|slot| (slot, Keypair::new()))
             .take(3)
@@ -840,7 +396,7 @@ mod tests {
             .collect();
         let mut packets = make_packets(&mut rng, &shreds);
         assert_eq!(
-            verify_shreds_gpu(&thread_pool, &packets, &pubkeys, &recycler_cache, &cache),
+            verify_shreds(&thread_pool, &packets, &pubkeys, &cache),
             packets
                 .iter()
                 .map(|batch| vec![1u8; batch.len()])
@@ -865,19 +421,15 @@ mod tests {
                     .collect::<Vec<_>>()
             })
             .collect();
-        assert_eq!(
-            verify_shreds_gpu(&thread_pool, &packets, &pubkeys, &recycler_cache, &cache),
-            out
-        );
+        assert_eq!(verify_shreds(&thread_pool, &packets, &pubkeys, &cache), out);
     }
 
     #[test_case(true)]
     #[test_case(false)]
-    fn test_sign_shreds_gpu(is_last_in_slot: bool) {
+    fn test_sign_shreds(is_last_in_slot: bool) {
         let mut rng = rand::thread_rng();
         let cache = RwLock::new(LruCache::new(/*capacity:*/ 128));
         let thread_pool = ThreadPoolBuilder::new().num_threads(3).build().unwrap();
-        let recycler_cache = RecyclerCache::default();
         let shreds = {
             let keypairs = repeat_with(|| rng.gen_range(169_367_809..169_906_789))
                 .map(|slot| (slot, Keypair::new()))
@@ -898,24 +450,16 @@ mod tests {
         let mut packets = make_packets(&mut rng, &shreds);
         // Assert that initially all signatrues are invalid.
         assert_eq!(
-            verify_shreds_gpu(&thread_pool, &packets, &pubkeys, &recycler_cache, &cache),
+            verify_shreds(&thread_pool, &packets, &pubkeys, &cache),
             packets
                 .iter()
                 .map(|batch| vec![0u8; batch.len()])
                 .collect::<Vec<_>>()
         );
-        let pinned_keypair = sign_shreds_gpu_pinned_keypair(&keypair, &recycler_cache);
-        let pinned_keypair = Some(Arc::new(pinned_keypair));
         // Sign and verify shreds signatures.
-        sign_shreds_gpu(
-            &thread_pool,
-            &keypair,
-            &pinned_keypair,
-            &mut packets,
-            &recycler_cache,
-        );
+        sign_shreds(&thread_pool, &keypair, &mut packets);
         assert_eq!(
-            verify_shreds_gpu(&thread_pool, &packets, &pubkeys, &recycler_cache, &cache),
+            verify_shreds(&thread_pool, &packets, &pubkeys, &cache),
             packets
                 .iter()
                 .map(|batch| vec![1u8; batch.len()])

+ 1 - 1
local-cluster/src/cluster_tests.rs

@@ -627,7 +627,7 @@ fn get_and_verify_slot_entries(
     last_entry: &Hash,
 ) -> Vec<Entry> {
     let entries = blockstore.get_slot_entries(slot, 0).unwrap();
-    assert!(entries.verify(last_entry, thread_pool));
+    assert!(entries.verify(last_entry, thread_pool).status());
     entries
 }
 

+ 8 - 24
perf/benches/sigverify.rs

@@ -29,11 +29,9 @@ fn bench_sigverify_simple(b: &mut Bencher) {
         128,
     );
 
-    let recycler = Recycler::default();
-    let recycler_out = Recycler::default();
     // verify packets
     b.iter(|| {
-        sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, num_packets);
+        sigverify::ed25519_verify(&mut batches, false, num_packets);
     })
 }
 
@@ -56,61 +54,49 @@ fn gen_batches(
 fn bench_sigverify_low_packets_small_batch(b: &mut Bencher) {
     let num_packets = sigverify::VERIFY_PACKET_CHUNK_SIZE - 1;
     let mut batches = gen_batches(false, 1, num_packets);
-    let recycler = Recycler::default();
-    let recycler_out = Recycler::default();
     b.iter(|| {
-        sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, num_packets);
+        sigverify::ed25519_verify(&mut batches, false, num_packets);
     })
 }
 
 fn bench_sigverify_low_packets_large_batch(b: &mut Bencher) {
     let num_packets = sigverify::VERIFY_PACKET_CHUNK_SIZE - 1;
     let mut batches = gen_batches(false, LARGE_BATCH_PACKET_COUNT, num_packets);
-    let recycler = Recycler::default();
-    let recycler_out = Recycler::default();
     b.iter(|| {
-        sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, num_packets);
+        sigverify::ed25519_verify(&mut batches, false, num_packets);
     })
 }
 
 fn bench_sigverify_medium_packets_small_batch(b: &mut Bencher) {
     let num_packets = sigverify::VERIFY_PACKET_CHUNK_SIZE * 8;
     let mut batches = gen_batches(false, 1, num_packets);
-    let recycler = Recycler::default();
-    let recycler_out = Recycler::default();
     b.iter(|| {
-        sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, num_packets);
+        sigverify::ed25519_verify(&mut batches, false, num_packets);
     })
 }
 
 fn bench_sigverify_medium_packets_large_batch(b: &mut Bencher) {
     let num_packets = sigverify::VERIFY_PACKET_CHUNK_SIZE * 8;
     let mut batches = gen_batches(false, LARGE_BATCH_PACKET_COUNT, num_packets);
-    let recycler = Recycler::default();
-    let recycler_out = Recycler::default();
     b.iter(|| {
-        sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, num_packets);
+        sigverify::ed25519_verify(&mut batches, false, num_packets);
     })
 }
 
 fn bench_sigverify_high_packets_small_batch(b: &mut Bencher) {
     let num_packets = sigverify::VERIFY_PACKET_CHUNK_SIZE * 32;
     let mut batches = gen_batches(false, 1, num_packets);
-    let recycler = Recycler::default();
-    let recycler_out = Recycler::default();
     b.iter(|| {
-        sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, num_packets);
+        sigverify::ed25519_verify(&mut batches, false, num_packets);
     })
 }
 
 fn bench_sigverify_high_packets_large_batch(b: &mut Bencher) {
     let num_packets = sigverify::VERIFY_PACKET_CHUNK_SIZE * 32;
     let mut batches = gen_batches(false, LARGE_BATCH_PACKET_COUNT, num_packets);
-    let recycler = Recycler::default();
-    let recycler_out = Recycler::default();
     // verify packets
     b.iter(|| {
-        sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, num_packets);
+        sigverify::ed25519_verify(&mut batches, false, num_packets);
     })
 }
 
@@ -151,11 +137,9 @@ fn bench_sigverify_uneven(b: &mut Bencher) {
     }
     info!("num_packets: {num_packets} valid: {num_valid}");
 
-    let recycler = Recycler::default();
-    let recycler_out = Recycler::default();
     // verify packets
     b.iter(|| {
-        sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, num_packets);
+        sigverify::ed25519_verify(&mut batches, false, num_packets);
     })
 }
 

+ 0 - 310
perf/src/cuda_runtime.rs

@@ -1,310 +0,0 @@
-// Module for cuda-related helper functions and wrappers.
-//
-// cudaHostRegister/cudaHostUnregister -
-//    apis for page-pinning memory. Cuda driver/hardware cannot overlap
-//    copies from host memory to GPU memory unless the memory is page-pinned and
-//    cannot be paged to disk. The cuda driver provides these interfaces to pin and unpin memory.
-
-use {
-    crate::{
-        perf_libs,
-        recycler::{RecyclerX, Reset},
-    },
-    rand::{seq::SliceRandom, Rng},
-    rayon::prelude::*,
-    serde::{Deserialize, Serialize},
-    std::{
-        ops::{Deref, DerefMut, Index, IndexMut},
-        os::raw::c_int,
-        slice::{Iter, SliceIndex},
-        sync::Weak,
-    },
-};
-
-const CUDA_SUCCESS: c_int = 0;
-
-fn pin<T>(mem: &mut Vec<T>) {
-    if let Some(api) = perf_libs::api() {
-        use std::{ffi::c_void, mem::size_of};
-
-        let ptr = mem.as_mut_ptr();
-        let size = mem.capacity().saturating_mul(size_of::<T>());
-        let err = unsafe {
-            (api.cuda_host_register)(ptr as *mut c_void, size, /*flags=*/ 0)
-        };
-        assert!(
-            err == CUDA_SUCCESS,
-            "cudaHostRegister error: {err} ptr: {ptr:?} bytes: {size}"
-        );
-    }
-}
-
-fn unpin<T>(mem: *mut T) {
-    if let Some(api) = perf_libs::api() {
-        use std::ffi::c_void;
-
-        let err = unsafe { (api.cuda_host_unregister)(mem as *mut c_void) };
-        assert!(
-            err == CUDA_SUCCESS,
-            "cudaHostUnregister returned: {err} ptr: {mem:?}"
-        );
-    }
-}
-
-// A vector wrapper where the underlying memory can be
-// page-pinned. Controlled by flags in case user only wants
-// to pin in certain circumstances.
-#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
-#[derive(Debug, Default, Serialize, Deserialize)]
-pub struct PinnedVec<T: Default + Clone + Sized> {
-    x: Vec<T>,
-    pinned: bool,
-    pinnable: bool,
-    #[serde(skip)]
-    recycler: Weak<RecyclerX<PinnedVec<T>>>,
-}
-
-impl<T: Default + Clone + Sized> Reset for PinnedVec<T> {
-    fn reset(&mut self) {
-        self.resize(0, T::default());
-    }
-    fn warm(&mut self, size_hint: usize) {
-        self.set_pinnable();
-        self.resize(size_hint, T::default());
-    }
-    fn set_recycler(&mut self, recycler: Weak<RecyclerX<Self>>) {
-        self.recycler = recycler;
-    }
-}
-
-impl<T: Clone + Default + Sized> From<PinnedVec<T>> for Vec<T> {
-    fn from(mut pinned_vec: PinnedVec<T>) -> Self {
-        if pinned_vec.pinned {
-            // If the vector is pinned and has a recycler, just return a clone
-            // so that the next allocation of a PinnedVec will recycle an
-            // already pinned one.
-            if pinned_vec.recycler.strong_count() != 0 {
-                return pinned_vec.x.clone();
-            }
-            unpin(pinned_vec.x.as_mut_ptr());
-            pinned_vec.pinned = false;
-        }
-        pinned_vec.pinnable = false;
-        pinned_vec.recycler = Weak::default();
-        std::mem::take(&mut pinned_vec.x)
-    }
-}
-
-impl<'a, T: Clone + Default + Sized> IntoIterator for &'a PinnedVec<T> {
-    type Item = &'a T;
-    type IntoIter = Iter<'a, T>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.x.iter()
-    }
-}
-
-impl<T: Clone + Default + Sized, I: SliceIndex<[T]>> Index<I> for PinnedVec<T> {
-    type Output = I::Output;
-
-    #[inline]
-    fn index(&self, index: I) -> &Self::Output {
-        &self.x[index]
-    }
-}
-
-impl<T: Clone + Default + Sized, I: SliceIndex<[T]>> IndexMut<I> for PinnedVec<T> {
-    #[inline]
-    fn index_mut(&mut self, index: I) -> &mut Self::Output {
-        &mut self.x[index]
-    }
-}
-
-impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a PinnedVec<T> {
-    type Iter = rayon::slice::Iter<'a, T>;
-    type Item = &'a T;
-    fn into_par_iter(self) -> Self::Iter {
-        self.x.par_iter()
-    }
-}
-
-impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a mut PinnedVec<T> {
-    type Iter = rayon::slice::IterMut<'a, T>;
-    type Item = &'a mut T;
-    fn into_par_iter(self) -> Self::Iter {
-        self.x.par_iter_mut()
-    }
-}
-
-impl<T: Clone + Default + Sized> PinnedVec<T> {
-    pub fn reserve_and_pin(&mut self, size: usize) {
-        if self.x.capacity() < size {
-            if self.pinned {
-                unpin(self.x.as_mut_ptr());
-                self.pinned = false;
-            }
-            self.x.reserve(size);
-        }
-        self.set_pinnable();
-        if !self.pinned {
-            pin(&mut self.x);
-            self.pinned = true;
-        }
-    }
-
-    pub fn set_pinnable(&mut self) {
-        self.pinnable = true;
-    }
-
-    pub fn from_vec(source: Vec<T>) -> Self {
-        Self {
-            x: source,
-            pinned: false,
-            pinnable: false,
-            recycler: Weak::default(),
-        }
-    }
-
-    pub fn with_capacity(capacity: usize) -> Self {
-        Self::from_vec(Vec::with_capacity(capacity))
-    }
-
-    fn prepare_realloc(&mut self, new_size: usize) -> (*mut T, usize) {
-        let old_ptr = self.x.as_mut_ptr();
-        let old_capacity = self.x.capacity();
-        // Predict realloc and unpin.
-        if self.pinned && self.x.capacity() < new_size {
-            unpin(old_ptr);
-            self.pinned = false;
-        }
-        (old_ptr, old_capacity)
-    }
-
-    pub fn push(&mut self, x: T) {
-        let (old_ptr, old_capacity) = self.prepare_realloc(self.x.len().saturating_add(1));
-        self.x.push(x);
-        self.check_ptr(old_ptr, old_capacity, "push");
-    }
-
-    pub fn resize(&mut self, size: usize, elem: T) {
-        let (old_ptr, old_capacity) = self.prepare_realloc(size);
-        self.x.resize(size, elem);
-        self.check_ptr(old_ptr, old_capacity, "resize");
-    }
-
-    pub fn append(&mut self, other: &mut Vec<T>) {
-        let (old_ptr, old_capacity) =
-            self.prepare_realloc(self.x.len().saturating_add(other.len()));
-        self.x.append(other);
-        self.check_ptr(old_ptr, old_capacity, "resize");
-    }
-
-    pub fn append_pinned(&mut self, other: &mut Self) {
-        let (old_ptr, old_capacity) =
-            self.prepare_realloc(self.x.len().saturating_add(other.len()));
-        self.x.append(&mut other.x);
-        self.check_ptr(old_ptr, old_capacity, "resize");
-    }
-
-    pub fn shuffle<R: Rng>(&mut self, rng: &mut R) {
-        self.x.shuffle(rng)
-    }
-
-    fn check_ptr(&mut self, old_ptr: *mut T, old_capacity: usize, from: &'static str) {
-        let api = perf_libs::api();
-        if api.is_some()
-            && self.pinnable
-            && (!std::ptr::eq(self.x.as_ptr(), old_ptr) || self.x.capacity() != old_capacity)
-        {
-            if self.pinned {
-                unpin(old_ptr);
-            }
-
-            trace!(
-                "pinning from check_ptr old: {} size: {} from: {}",
-                old_capacity,
-                self.x.capacity(),
-                from
-            );
-            pin(&mut self.x);
-            self.pinned = true;
-        }
-    }
-}
-
-impl<T: Clone + Default + Sized> Clone for PinnedVec<T> {
-    fn clone(&self) -> Self {
-        let mut x = self.x.clone();
-        let pinned = if self.pinned {
-            pin(&mut x);
-            true
-        } else {
-            false
-        };
-        debug!(
-            "clone PinnedVec: size: {} pinned?: {} pinnable?: {}",
-            self.x.capacity(),
-            self.pinned,
-            self.pinnable
-        );
-        Self {
-            x,
-            pinned,
-            pinnable: self.pinnable,
-            recycler: self.recycler.clone(),
-        }
-    }
-}
-
-impl<T: Sized + Default + Clone> Deref for PinnedVec<T> {
-    type Target = Vec<T>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.x
-    }
-}
-
-impl<T: Sized + Default + Clone> DerefMut for PinnedVec<T> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.x
-    }
-}
-
-impl<T: Sized + Default + Clone> Drop for PinnedVec<T> {
-    fn drop(&mut self) {
-        if let Some(recycler) = self.recycler.upgrade() {
-            recycler.recycle(std::mem::take(self));
-        } else if self.pinned {
-            unpin(self.x.as_mut_ptr());
-        }
-    }
-}
-
-impl<T: Sized + Default + Clone + PartialEq> PartialEq for PinnedVec<T> {
-    fn eq(&self, other: &Self) -> bool {
-        self.x.eq(&other.x)
-    }
-}
-
-impl<T: Sized + Default + Clone + PartialEq + Eq> Eq for PinnedVec<T> {}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_pinned_vec() {
-        let mut mem = PinnedVec::with_capacity(10);
-        mem.set_pinnable();
-        mem.push(50);
-        mem.resize(2, 10);
-        assert_eq!(mem[0], 50);
-        assert_eq!(mem[1], 10);
-        assert_eq!(mem.len(), 2);
-        assert!(!mem.is_empty());
-        let mut iter = mem.iter();
-        assert_eq!(*iter.next().unwrap(), 50);
-        assert_eq!(*iter.next().unwrap(), 10);
-        assert_eq!(iter.next(), None);
-    }
-}

+ 1 - 10
perf/src/lib.rs

@@ -8,12 +8,12 @@
     )
 )]
 #![cfg_attr(feature = "frozen-abi", feature(min_specialization))]
-pub mod cuda_runtime;
 pub mod data_budget;
 pub mod deduper;
 pub mod discard;
 pub mod packet;
 pub mod perf_libs;
+pub mod recycled_vec;
 pub mod recycler;
 pub mod recycler_cache;
 pub mod sigverify;
@@ -55,15 +55,6 @@ fn is_rosetta_emulated() -> bool {
 }
 
 pub fn report_target_features() {
-    warn!(
-        "CUDA is {}abled",
-        if crate::perf_libs::api().is_some() {
-            "en"
-        } else {
-            "dis"
-        }
-    );
-
     // Validator binaries built on a machine with AVX support will generate invalid opcodes
     // when run on machines without AVX causing a non-obvious process abort.  Instead detect
     // the mismatch and error cleanly.

+ 44 - 68
perf/src/packet.rs

@@ -2,7 +2,7 @@
 #[cfg(feature = "dev-context-only-utils")]
 use bytes::{BufMut, BytesMut};
 use {
-    crate::{cuda_runtime::PinnedVec, recycler::Recycler},
+    crate::{recycled_vec::RecycledVec, recycler::Recycler},
     bincode::config::Options,
     bytes::Bytes,
     rayon::{
@@ -151,7 +151,7 @@ impl BytesPacket {
 #[cfg_attr(feature = "frozen-abi", derive(AbiExample, AbiEnumVisitor))]
 #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
 pub enum PacketBatch {
-    Pinned(PinnedPacketBatch),
+    Pinned(RecycledPacketBatch),
     Bytes(BytesPacketBatch),
     Single(BytesPacket),
 }
@@ -258,8 +258,8 @@ impl PacketBatch {
     }
 }
 
-impl From<PinnedPacketBatch> for PacketBatch {
-    fn from(batch: PinnedPacketBatch) -> Self {
+impl From<RecycledPacketBatch> for PacketBatch {
+    fn from(batch: RecycledPacketBatch) -> Self {
         Self::Pinned(batch)
     }
 }
@@ -644,36 +644,21 @@ impl IndexedParallelIterator for PacketBatchParIterMut<'_> {
 
 #[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
 #[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)]
-pub struct PinnedPacketBatch {
-    packets: PinnedVec<Packet>,
+pub struct RecycledPacketBatch {
+    packets: RecycledVec<Packet>,
 }
 
-pub type PacketBatchRecycler = Recycler<PinnedVec<Packet>>;
+pub type PacketBatchRecycler = Recycler<RecycledVec<Packet>>;
 
-impl PinnedPacketBatch {
+impl RecycledPacketBatch {
     pub fn new(packets: Vec<Packet>) -> Self {
-        let packets = PinnedVec::from_vec(packets);
-        Self { packets }
+        Self {
+            packets: RecycledVec::from_vec(packets),
+        }
     }
 
     pub fn with_capacity(capacity: usize) -> Self {
-        let packets = PinnedVec::with_capacity(capacity);
-        Self { packets }
-    }
-
-    pub fn new_pinned_with_capacity(capacity: usize) -> Self {
-        let mut batch = Self::with_capacity(capacity);
-        batch.packets.reserve_and_pin(capacity);
-        batch
-    }
-
-    pub fn new_unpinned_with_recycler(
-        recycler: &PacketBatchRecycler,
-        capacity: usize,
-        name: &'static str,
-    ) -> Self {
-        let mut packets = recycler.allocate(name);
-        packets.reserve(capacity);
+        let packets = RecycledVec::with_capacity(capacity);
         Self { packets }
     }
 
@@ -683,7 +668,7 @@ impl PinnedPacketBatch {
         name: &'static str,
     ) -> Self {
         let mut packets = recycler.allocate(name);
-        packets.reserve_and_pin(capacity);
+        packets.preallocate(capacity);
         Self { packets }
     }
 
@@ -697,7 +682,7 @@ impl PinnedPacketBatch {
         batch
     }
 
-    pub fn new_unpinned_with_recycler_data_and_dests<S, T>(
+    pub fn new_with_recycler_data_and_dests<S, T>(
         recycler: &PacketBatchRecycler,
         name: &'static str,
         dests_and_data: impl IntoIterator<Item = (S, T), IntoIter: ExactSizeIterator>,
@@ -707,7 +692,7 @@ impl PinnedPacketBatch {
         T: solana_packet::Encode,
     {
         let dests_and_data = dests_and_data.into_iter();
-        let mut batch = Self::new_unpinned_with_recycler(recycler, dests_and_data.len(), name);
+        let mut batch = Self::new_with_recycler(recycler, dests_and_data.len(), name);
         batch
             .packets
             .resize(dests_and_data.len(), Packet::default());
@@ -730,38 +715,44 @@ impl PinnedPacketBatch {
         batch
     }
 
-    pub fn new_unpinned_with_recycler_data(
-        recycler: &PacketBatchRecycler,
-        name: &'static str,
-        mut packets: Vec<Packet>,
-    ) -> Self {
-        let mut batch = Self::new_unpinned_with_recycler(recycler, packets.len(), name);
-        batch.packets.append(&mut packets);
-        batch
-    }
-
     pub fn set_addr(&mut self, addr: &SocketAddr) {
         for p in self.iter_mut() {
             p.meta_mut().set_socket_addr(addr);
         }
     }
+
+    pub fn push(&mut self, packet: Packet) {
+        self.packets.push(packet)
+    }
+
+    pub fn truncate(&mut self, len: usize) {
+        self.packets.truncate(len)
+    }
+
+    pub fn resize(&mut self, packets_per_batch: usize, value: Packet) {
+        self.packets.resize(packets_per_batch, value)
+    }
+
+    pub fn capacity(&self) -> usize {
+        self.packets.capacity()
+    }
 }
 
-impl Deref for PinnedPacketBatch {
-    type Target = PinnedVec<Packet>;
+impl Deref for RecycledPacketBatch {
+    type Target = [Packet];
 
     fn deref(&self) -> &Self::Target {
         &self.packets
     }
 }
 
-impl DerefMut for PinnedPacketBatch {
+impl DerefMut for RecycledPacketBatch {
     fn deref_mut(&mut self) -> &mut Self::Target {
         &mut self.packets
     }
 }
 
-impl<I: SliceIndex<[Packet]>> Index<I> for PinnedPacketBatch {
+impl<I: SliceIndex<[Packet]>> Index<I> for RecycledPacketBatch {
     type Output = I::Output;
 
     #[inline]
@@ -770,14 +761,14 @@ impl<I: SliceIndex<[Packet]>> Index<I> for PinnedPacketBatch {
     }
 }
 
-impl<I: SliceIndex<[Packet]>> IndexMut<I> for PinnedPacketBatch {
+impl<I: SliceIndex<[Packet]>> IndexMut<I> for RecycledPacketBatch {
     #[inline]
     fn index_mut(&mut self, index: I) -> &mut Self::Output {
         &mut self.packets[index]
     }
 }
 
-impl<'a> IntoIterator for &'a PinnedPacketBatch {
+impl<'a> IntoIterator for &'a RecycledPacketBatch {
     type Item = &'a Packet;
     type IntoIter = Iter<'a, Packet>;
 
@@ -786,7 +777,7 @@ impl<'a> IntoIterator for &'a PinnedPacketBatch {
     }
 }
 
-impl<'a> IntoParallelIterator for &'a PinnedPacketBatch {
+impl<'a> IntoParallelIterator for &'a RecycledPacketBatch {
     type Iter = rayon::slice::Iter<'a, Packet>;
     type Item = &'a Packet;
     fn into_par_iter(self) -> Self::Iter {
@@ -794,7 +785,7 @@ impl<'a> IntoParallelIterator for &'a PinnedPacketBatch {
     }
 }
 
-impl<'a> IntoParallelIterator for &'a mut PinnedPacketBatch {
+impl<'a> IntoParallelIterator for &'a mut RecycledPacketBatch {
     type Iter = rayon::slice::IterMut<'a, Packet>;
     type Item = &'a mut Packet;
     fn into_par_iter(self) -> Self::Iter {
@@ -802,8 +793,8 @@ impl<'a> IntoParallelIterator for &'a mut PinnedPacketBatch {
     }
 }
 
-impl From<PinnedPacketBatch> for Vec<Packet> {
-    fn from(batch: PinnedPacketBatch) -> Self {
+impl From<RecycledPacketBatch> for Vec<Packet> {
+    fn from(batch: RecycledPacketBatch) -> Self {
         batch.packets.into()
     }
 }
@@ -812,8 +803,8 @@ pub fn to_packet_batches<T: Serialize>(items: &[T], chunk_size: usize) -> Vec<Pa
     items
         .chunks(chunk_size)
         .map(|batch_items| {
-            let mut batch = PinnedPacketBatch::with_capacity(batch_items.len());
-            batch.resize(batch_items.len(), Packet::default());
+            let mut batch = RecycledPacketBatch::with_capacity(batch_items.len());
+            batch.packets.resize(batch_items.len(), Packet::default());
             for (item, packet) in batch_items.iter().zip(batch.packets.iter_mut()) {
                 Packet::populate_packet(packet, None, item).expect("serialize request");
             }
@@ -842,21 +833,6 @@ impl BytesPacketBatch {
         let packets = Vec::with_capacity(capacity);
         Self { packets }
     }
-
-    pub fn to_pinned_packet_batch(&self) -> PinnedPacketBatch {
-        let mut batch = PinnedPacketBatch::new_pinned_with_capacity(self.len());
-        for bytes_packet in self.iter() {
-            let mut packet = Packet::default();
-            let size = bytes_packet.meta().size;
-            *packet.meta_mut() = bytes_packet.meta().clone();
-            packet.meta_mut().size = size;
-            packet.buffer_mut()[..size].copy_from_slice(&bytes_packet.buffer);
-
-            batch.push(packet);
-        }
-
-        batch
-    }
 }
 
 impl Deref for BytesPacketBatch {
@@ -958,7 +934,7 @@ mod tests {
         let recycler = PacketBatchRecycler::default();
         for i in 0..2 {
             let _first_packets =
-                PinnedPacketBatch::new_with_recycler(&recycler, i + 1, "first one");
+                RecycledPacketBatch::new_with_recycler(&recycler, i + 1, "first one");
         }
     }
 }

+ 1 - 161
perf/src/perf_libs.rs

@@ -1,98 +1,8 @@
 use {
-    core::ffi::c_void,
-    dlopen2::symbor::{Container, SymBorApi, Symbol},
     log::*,
-    std::{
-        env,
-        ffi::OsStr,
-        fs,
-        os::raw::{c_int, c_uint},
-        path::{Path, PathBuf},
-        sync::{Once, OnceLock},
-    },
+    std::{env, path::PathBuf},
 };
 
-#[repr(C)]
-pub struct Elems {
-    pub elems: *const u8,
-    pub num: u32,
-}
-
-#[derive(SymBorApi)]
-pub struct Api<'a> {
-    pub ed25519_init: Symbol<'a, unsafe extern "C" fn() -> bool>,
-    pub ed25519_set_verbose: Symbol<'a, unsafe extern "C" fn(val: bool)>,
-
-    #[allow(clippy::type_complexity)]
-    pub ed25519_verify_many: Symbol<
-        'a,
-        unsafe extern "C" fn(
-            vecs: *const Elems,
-            num: u32,          //number of vecs
-            message_size: u32, //size of each element inside the elems field of the vec
-            total_packets: u32,
-            total_signatures: u32,
-            message_lens: *const u32,
-            pubkey_offsets: *const u32,
-            signature_offsets: *const u32,
-            signed_message_offsets: *const u32,
-            out: *mut u8, //combined length of all the items in vecs
-            use_non_default_stream: u8,
-        ) -> u32,
-    >,
-
-    #[allow(clippy::type_complexity)]
-    pub ed25519_sign_many: Symbol<
-        'a,
-        unsafe extern "C" fn(
-            vecs: *mut Elems,
-            num: u32,          //number of vecs
-            message_size: u32, //size of each element inside the elems field of the vec
-            total_packets: u32,
-            total_signatures: u32,
-            message_lens: *const u32,
-            pubkey_offsets: *const u32,
-            privkey_offsets: *const u32,
-            signed_message_offsets: *const u32,
-            sgnatures_out: *mut u8, //combined length of all the items in vecs
-            use_non_default_stream: u8,
-        ) -> u32,
-    >,
-
-    pub poh_verify_many: Symbol<
-        'a,
-        unsafe extern "C" fn(
-            hashes: *mut u8,
-            num_hashes_arr: *const u64,
-            num_elems: usize,
-            use_non_default_stream: u8,
-        ) -> c_int,
-    >,
-
-    pub cuda_host_register:
-        Symbol<'a, unsafe extern "C" fn(ptr: *mut c_void, size: usize, flags: c_uint) -> c_int>,
-
-    pub cuda_host_unregister: Symbol<'a, unsafe extern "C" fn(ptr: *mut c_void) -> c_int>,
-
-    pub ed25519_get_checked_scalar:
-        Symbol<'a, unsafe extern "C" fn(out_scalar: *mut u8, in_scalar: *const u8) -> c_int>,
-
-    pub ed25519_check_packed_ge_small_order:
-        Symbol<'a, unsafe extern "C" fn(packed_ge: *const u8) -> c_int>,
-}
-
-static API: OnceLock<Container<Api>> = OnceLock::new();
-
-fn init(name: &OsStr) {
-    info!("Loading {name:?}");
-    API.get_or_init(|| {
-        unsafe { Container::load(name) }.unwrap_or_else(|err| {
-            error!("Unable to load {name:?}: {err}");
-            std::process::exit(1);
-        })
-    });
-}
-
 pub fn locate_perf_libs() -> Option<PathBuf> {
     let exe = env::current_exe().expect("Unable to get executable path");
     let perf_libs = exe.parent().unwrap().join("perf-libs");
@@ -104,38 +14,6 @@ pub fn locate_perf_libs() -> Option<PathBuf> {
     None
 }
 
-fn find_cuda_home(perf_libs_path: &Path) -> Option<PathBuf> {
-    if let Ok(cuda_home) = env::var("CUDA_HOME") {
-        let path = PathBuf::from(cuda_home);
-        if path.is_dir() {
-            info!("Using CUDA_HOME: {path:?}");
-            return Some(path);
-        }
-        warn!("Ignoring CUDA_HOME, not a path: {path:?}");
-    }
-
-    // Search /usr/local for a `cuda-` directory that matches a perf-libs subdirectory
-    for entry in fs::read_dir(perf_libs_path).unwrap().flatten() {
-        let path = entry.path();
-        if !path.is_dir() {
-            continue;
-        }
-        let dir_name = path.file_name().unwrap().to_str().unwrap_or("");
-        if !dir_name.starts_with("cuda-") {
-            continue;
-        }
-
-        let cuda_home: PathBuf = ["/", "usr", "local", dir_name].iter().collect();
-        if !cuda_home.is_dir() {
-            continue;
-        }
-
-        info!("CUDA installation found at {cuda_home:?}");
-        return Some(cuda_home);
-    }
-    None
-}
-
 pub fn append_to_ld_library_path(mut ld_library_path: String) {
     if let Ok(env_value) = env::var("LD_LIBRARY_PATH") {
         ld_library_path.push(':');
@@ -144,41 +22,3 @@ pub fn append_to_ld_library_path(mut ld_library_path: String) {
     info!("setting ld_library_path to: {ld_library_path:?}");
     env::set_var("LD_LIBRARY_PATH", ld_library_path);
 }
-
-pub fn init_cuda() {
-    if let Some(perf_libs_path) = locate_perf_libs() {
-        if let Some(cuda_home) = find_cuda_home(&perf_libs_path) {
-            let cuda_lib64_dir = cuda_home.join("lib64");
-            if cuda_lib64_dir.is_dir() {
-                // Prefix LD_LIBRARY_PATH with $CUDA_HOME/lib64 directory
-                // to ensure the correct CUDA version is used
-                append_to_ld_library_path(cuda_lib64_dir.to_str().unwrap_or("").to_string())
-            } else {
-                warn!("CUDA lib64 directory does not exist: {cuda_lib64_dir:?}");
-            }
-
-            let libcuda_crypt = perf_libs_path
-                .join(cuda_home.file_name().unwrap())
-                .join("libcuda-crypt.so");
-            return init(libcuda_crypt.as_os_str());
-        } else {
-            warn!("CUDA installation not found");
-        }
-    }
-
-    // Last resort!  Blindly load the shared object and hope it all works out
-    init(OsStr::new("libcuda-crypt.so"))
-}
-
-pub fn api() -> Option<&'static Container<Api<'static>>> {
-    {
-        static INIT_HOOK: Once = Once::new();
-        INIT_HOOK.call_once(|| {
-            if std::env::var("TEST_PERF_LIBS_CUDA").is_ok() {
-                init_cuda();
-            }
-        });
-    }
-
-    API.get()
-}

+ 184 - 0
perf/src/recycled_vec.rs

@@ -0,0 +1,184 @@
+use {
+    crate::recycler::{RecyclerX, Reset},
+    rand::{seq::SliceRandom, Rng},
+    rayon::prelude::*,
+    serde::{Deserialize, Serialize},
+    std::{
+        ops::{Deref, DerefMut, Index, IndexMut},
+        slice::{Iter, SliceIndex},
+        sync::Weak,
+    },
+};
+
+// A vector wrapper which preallocates vector to be used
+// with a recycler
+#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
+#[derive(Debug, Default, Serialize, Deserialize)]
+pub struct RecycledVec<T: Default + Clone + Sized> {
+    x: Vec<T>,
+    #[serde(skip)]
+    recycler: Weak<RecyclerX<RecycledVec<T>>>,
+}
+
+impl<T: Default + Clone + Sized> Reset for RecycledVec<T> {
+    fn reset(&mut self) {
+        self.x.clear();
+    }
+    fn warm(&mut self, size_hint: usize) {
+        self.resize(size_hint, T::default());
+    }
+    fn set_recycler(&mut self, recycler: Weak<RecyclerX<Self>>) {
+        self.recycler = recycler;
+    }
+}
+
+impl<T: Clone + Default + Sized> From<RecycledVec<T>> for Vec<T> {
+    fn from(mut recycled_vec: RecycledVec<T>) -> Self {
+        recycled_vec.recycler = Weak::default();
+        std::mem::take(&mut recycled_vec.x)
+    }
+}
+
+impl<'a, T: Clone + Default + Sized> IntoIterator for &'a RecycledVec<T> {
+    type Item = &'a T;
+    type IntoIter = Iter<'a, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.x.iter()
+    }
+}
+
+impl<T: Clone + Default + Sized, I: SliceIndex<[T]>> Index<I> for RecycledVec<T> {
+    type Output = I::Output;
+
+    #[inline]
+    fn index(&self, index: I) -> &Self::Output {
+        &self.x[index]
+    }
+}
+
+impl<T: Clone + Default + Sized, I: SliceIndex<[T]>> IndexMut<I> for RecycledVec<T> {
+    #[inline]
+    fn index_mut(&mut self, index: I) -> &mut Self::Output {
+        &mut self.x[index]
+    }
+}
+
+impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a RecycledVec<T> {
+    type Iter = rayon::slice::Iter<'a, T>;
+    type Item = &'a T;
+    fn into_par_iter(self) -> Self::Iter {
+        self.x.par_iter()
+    }
+}
+
+impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a mut RecycledVec<T> {
+    type Iter = rayon::slice::IterMut<'a, T>;
+    type Item = &'a mut T;
+    fn into_par_iter(self) -> Self::Iter {
+        self.x.par_iter_mut()
+    }
+}
+
+impl<T: Clone + Default + Sized> RecycledVec<T> {
+    pub fn preallocate(&mut self, size: usize) {
+        let capacity_to_reserve = size.saturating_sub(self.x.capacity());
+        self.x.reserve(capacity_to_reserve);
+    }
+
+    pub fn from_vec(source: Vec<T>) -> Self {
+        Self {
+            x: source,
+            recycler: Weak::default(),
+        }
+    }
+
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self::from_vec(Vec::with_capacity(capacity))
+    }
+
+    pub fn push(&mut self, x: T) {
+        self.x.push(x);
+    }
+
+    pub fn resize(&mut self, size: usize, elem: T) {
+        self.x.resize(size, elem);
+    }
+
+    pub fn append(&mut self, other: &mut Vec<T>) {
+        self.x.append(other);
+    }
+
+    pub fn shuffle<R: Rng>(&mut self, rng: &mut R) {
+        self.x.shuffle(rng)
+    }
+
+    pub fn truncate(&mut self, len: usize) {
+        self.x.truncate(len);
+    }
+
+    pub(crate) fn capacity(&self) -> usize {
+        self.x.capacity()
+    }
+}
+
+impl<T: Clone + Default + Sized> Clone for RecycledVec<T> {
+    fn clone(&self) -> Self {
+        let x = self.x.clone();
+        debug!("clone PreallocatedVec: size: {}", self.x.capacity());
+        Self {
+            x,
+            recycler: self.recycler.clone(),
+        }
+    }
+}
+
+impl<T: Sized + Default + Clone> Deref for RecycledVec<T> {
+    type Target = [T];
+
+    fn deref(&self) -> &Self::Target {
+        &self.x
+    }
+}
+
+impl<T: Sized + Default + Clone> DerefMut for RecycledVec<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.x
+    }
+}
+
+impl<T: Sized + Default + Clone> Drop for RecycledVec<T> {
+    fn drop(&mut self) {
+        if let Some(recycler) = self.recycler.upgrade() {
+            recycler.recycle(std::mem::take(self));
+        }
+    }
+}
+
+impl<T: Sized + Default + Clone + PartialEq> PartialEq for RecycledVec<T> {
+    fn eq(&self, other: &Self) -> bool {
+        self.x.eq(&other.x)
+    }
+}
+
+impl<T: Sized + Default + Clone + PartialEq + Eq> Eq for RecycledVec<T> {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_recycled_vec() {
+        let mut mem = RecycledVec::with_capacity(10);
+        mem.push(50);
+        mem.resize(2, 10);
+        assert_eq!(mem[0], 50);
+        assert_eq!(mem[1], 10);
+        assert_eq!(mem.len(), 2);
+        assert!(!mem.is_empty());
+        let mut iter = mem.iter();
+        assert_eq!(*iter.next().unwrap(), 50);
+        assert_eq!(*iter.next().unwrap(), 10);
+        assert_eq!(iter.next(), None);
+    }
+}

+ 1 - 1
perf/src/recycler.rs

@@ -59,7 +59,7 @@ impl<T: Default> Default for RecyclerX<T> {
 
 #[cfg(feature = "frozen-abi")]
 impl solana_frozen_abi::abi_example::AbiExample
-    for RecyclerX<crate::cuda_runtime::PinnedVec<solana_packet::Packet>>
+    for RecyclerX<crate::recycled_vec::RecycledVec<solana_packet::Packet>>
 {
     fn example() -> Self {
         Self::default()

+ 3 - 3
perf/src/recycler_cache.rs

@@ -1,9 +1,9 @@
-use crate::{cuda_runtime::PinnedVec, recycler::Recycler, sigverify::TxOffset};
+use crate::{recycled_vec::RecycledVec, recycler::Recycler, sigverify::TxOffset};
 
 #[derive(Default, Clone)]
 pub struct RecyclerCache {
     recycler_offsets: Recycler<TxOffset>,
-    recycler_buffer: Recycler<PinnedVec<u8>>,
+    recycler_buffer: Recycler<RecycledVec<u8>>,
 }
 
 impl RecyclerCache {
@@ -16,7 +16,7 @@ impl RecyclerCache {
     pub fn offsets(&self) -> &Recycler<TxOffset> {
         &self.recycler_offsets
     }
-    pub fn buffer(&self) -> &Recycler<PinnedVec<u8>> {
+    pub fn buffer(&self) -> &Recycler<RecycledVec<u8>> {
         &self.recycler_buffer
     }
 }

+ 29 - 327
perf/src/sigverify.rs

@@ -1,16 +1,13 @@
 //! The `sigverify` module provides digital signature verification functions.
 //! By default, signatures are verified in parallel using all available CPU
-//! cores.  When perf-libs are available signature verification is offloaded
-//! to the GPU.
-//!
+//! cores.
 use {
     crate::{
-        cuda_runtime::PinnedVec,
         packet::{
             BytesPacketBatch, Packet, PacketBatch, PacketFlags, PacketRef, PacketRefMut,
-            PinnedPacketBatch, PACKET_DATA_SIZE,
+            RecycledPacketBatch,
         },
-        perf_libs,
+        recycled_vec::RecycledVec,
         recycler::Recycler,
     },
     rayon::{prelude::*, ThreadPool},
@@ -20,7 +17,7 @@ use {
     solana_rayon_threadlimit::get_thread_count,
     solana_short_vec::decode_shortu16_len,
     solana_signature::Signature,
-    std::{borrow::Cow, convert::TryFrom, mem::size_of},
+    std::{convert::TryFrom, mem::size_of},
 };
 
 // Empirically derived to constrain max verify latency to ~8ms at lower packet counts
@@ -34,7 +31,7 @@ static PAR_THREAD_POOL: std::sync::LazyLock<ThreadPool> = std::sync::LazyLock::n
         .unwrap()
 });
 
-pub type TxOffset = PinnedVec<u32>;
+pub type TxOffset = RecycledVec<u32>;
 
 type TxOffsets = (TxOffset, TxOffset, TxOffset, TxOffset, Vec<Vec<u32>>);
 
@@ -96,16 +93,6 @@ impl std::convert::From<std::num::TryFromIntError> for PacketError {
     }
 }
 
-pub fn init() {
-    if let Some(api) = perf_libs::api() {
-        unsafe {
-            (api.ed25519_set_verbose)(true);
-            assert!((api.ed25519_init)(), "ed25519_init() failed");
-            (api.ed25519_set_verbose)(false);
-        }
-    }
-}
-
 /// Returns true if the signatrue on the packet verifies.
 /// Caller must do packet.set_discard(true) if this returns false.
 #[must_use]
@@ -415,14 +402,10 @@ pub fn generate_offsets(
     reject_non_vote: bool,
 ) -> TxOffsets {
     debug!("allocating..");
-    let mut signature_offsets: PinnedVec<_> = recycler.allocate("sig_offsets");
-    signature_offsets.set_pinnable();
-    let mut pubkey_offsets: PinnedVec<_> = recycler.allocate("pubkey_offsets");
-    pubkey_offsets.set_pinnable();
-    let mut msg_start_offsets: PinnedVec<_> = recycler.allocate("msg_start_offsets");
-    msg_start_offsets.set_pinnable();
-    let mut msg_sizes: PinnedVec<_> = recycler.allocate("msg_size_offsets");
-    msg_sizes.set_pinnable();
+    let mut signature_offsets: RecycledVec<_> = recycler.allocate("sig_offsets");
+    let mut pubkey_offsets: RecycledVec<_> = recycler.allocate("pubkey_offsets");
+    let mut msg_start_offsets: RecycledVec<_> = recycler.allocate("msg_start_offsets");
+    let mut msg_sizes: RecycledVec<_> = recycler.allocate("msg_size_offsets");
     let mut current_offset: usize = 0;
     let offsets = batches
         .iter_mut()
@@ -465,7 +448,7 @@ pub fn generate_offsets(
     )
 }
 
-fn split_batches(batches: Vec<PacketBatch>) -> (Vec<BytesPacketBatch>, Vec<PinnedPacketBatch>) {
+fn split_batches(batches: Vec<PacketBatch>) -> (Vec<BytesPacketBatch>, Vec<RecycledPacketBatch>) {
     let mut bytes_batches = Vec::new();
     let mut pinned_batches = Vec::new();
     for batch in batches {
@@ -521,7 +504,7 @@ macro_rules! shrink_batches_fn {
 }
 
 shrink_batches_fn!(shrink_bytes_batches, BytesPacketBatch);
-shrink_batches_fn!(shrink_pinned_batches, PinnedPacketBatch);
+shrink_batches_fn!(shrink_pinned_batches, RecycledPacketBatch);
 
 pub fn shrink_batches(batches: Vec<PacketBatch>) -> Vec<PacketBatch> {
     let (mut bytes_batches, mut pinned_batches) = split_batches(batches);
@@ -534,7 +517,7 @@ pub fn shrink_batches(batches: Vec<PacketBatch>) -> Vec<PacketBatch> {
         .collect()
 }
 
-pub fn ed25519_verify_cpu(batches: &mut [PacketBatch], reject_non_vote: bool, packet_count: usize) {
+pub fn ed25519_verify(batches: &mut [PacketBatch], reject_non_vote: bool, packet_count: usize) {
     debug!("CPU ECDSA for {packet_count}");
     PAR_THREAD_POOL.install(|| {
         batches.par_iter_mut().flatten().for_each(|mut packet| {
@@ -555,7 +538,7 @@ pub fn ed25519_verify_disabled(batches: &mut [PacketBatch]) {
     });
 }
 
-pub fn copy_return_values<I, T>(sig_lens: I, out: &PinnedVec<u8>, rvs: &mut [Vec<u8>])
+pub fn copy_return_values<I, T>(sig_lens: I, out: &RecycledVec<u8>, rvs: &mut [Vec<u8>])
 where
     I: IntoIterator<Item = T>,
     T: IntoIterator<Item = u32>,
@@ -570,34 +553,6 @@ where
     }
 }
 
-// return true for success, i.e ge unpacks and !ge.is_small_order()
-pub fn check_packed_ge_small_order(ge: &[u8; 32]) -> bool {
-    if let Some(api) = perf_libs::api() {
-        unsafe {
-            // Returns 1 == fail, 0 == success
-            let res = (api.ed25519_check_packed_ge_small_order)(ge.as_ptr());
-
-            return res == 0;
-        }
-    }
-    false
-}
-
-pub fn get_checked_scalar(scalar: &[u8; 32]) -> Result<[u8; 32], PacketError> {
-    let mut out = [0u8; 32];
-    if let Some(api) = perf_libs::api() {
-        unsafe {
-            let res = (api.ed25519_get_checked_scalar)(out.as_mut_ptr(), scalar.as_ptr());
-            if res == 0 {
-                return Ok(out);
-            } else {
-                return Err(PacketError::InvalidLen);
-            }
-        }
-    }
-    Ok(out)
-}
-
 pub fn mark_disabled(batches: &mut [PacketBatch], r: &[Vec<u8>]) {
     for (batch, v) in batches.iter_mut().zip(r) {
         for (mut pkt, f) in batch.iter_mut().zip(v) {
@@ -608,99 +563,6 @@ pub fn mark_disabled(batches: &mut [PacketBatch], r: &[Vec<u8>]) {
     }
 }
 
-pub fn ed25519_verify(
-    batches: &mut [PacketBatch],
-    recycler: &Recycler<TxOffset>,
-    recycler_out: &Recycler<PinnedVec<u8>>,
-    reject_non_vote: bool,
-    valid_packet_count: usize,
-) {
-    let Some(api) = perf_libs::api() else {
-        return ed25519_verify_cpu(batches, reject_non_vote, valid_packet_count);
-    };
-    let total_packet_count = count_packets_in_batches(batches);
-    // micro-benchmarks show GPU time for smallest batch around 15-20ms
-    // and CPU speed for 64-128 sigverifies around 10-20ms. 64 is a nice
-    // power-of-two number around that accounting for the fact that the CPU
-    // may be busy doing other things while being a real validator
-    // TODO: dynamically adjust this crossover
-    let maybe_valid_percentage = 100usize
-        .wrapping_mul(valid_packet_count)
-        .checked_div(total_packet_count);
-    let Some(valid_percentage) = maybe_valid_percentage else {
-        return;
-    };
-    if valid_percentage < 90 || valid_packet_count < 64 {
-        ed25519_verify_cpu(batches, reject_non_vote, valid_packet_count);
-        return;
-    }
-
-    let (signature_offsets, pubkey_offsets, msg_start_offsets, msg_sizes, sig_lens) =
-        generate_offsets(batches, recycler, reject_non_vote);
-
-    debug!("CUDA ECDSA for {valid_packet_count}");
-    debug!("allocating out..");
-    let mut out = recycler_out.allocate("out_buffer");
-    out.set_pinnable();
-    let mut elems = Vec::new();
-    let mut rvs = Vec::new();
-
-    let mut num_packets: usize = 0;
-    // `BytesPacketBatch` cannot be directly used in CUDA. We have to retrieve
-    // and convert byte batches to pinned batches. We must collect here so that
-    // we keep the batches created by `BytesPacketBatch::to_pinned_packet_batch()`
-    // alive.
-    let pinned_batches = batches
-        .iter_mut()
-        .map(|batch| match batch {
-            PacketBatch::Pinned(batch) => Cow::Borrowed(batch),
-            PacketBatch::Bytes(batch) => Cow::Owned(batch.to_pinned_packet_batch()),
-            PacketBatch::Single(packet) => {
-                // this is ugly, but unused (gpu code) and will be removed shortly in follow up PR
-                let mut batch = BytesPacketBatch::with_capacity(1);
-                batch.push(packet.clone());
-                Cow::Owned(batch.to_pinned_packet_batch())
-            }
-        })
-        .collect::<Vec<_>>();
-    for batch in pinned_batches.iter() {
-        elems.push(perf_libs::Elems {
-            elems: batch.as_ptr().cast::<u8>(),
-            num: batch.len() as u32,
-        });
-        let v = vec![0u8; batch.len()];
-        rvs.push(v);
-        num_packets = num_packets.saturating_add(batch.len());
-    }
-    out.resize(signature_offsets.len(), 0);
-    trace!("Starting verify num packets: {num_packets}");
-    trace!("elem len: {}", elems.len() as u32);
-    trace!("packet sizeof: {}", size_of::<Packet>() as u32);
-    trace!("len offset: {}", PACKET_DATA_SIZE as u32);
-    const USE_NON_DEFAULT_STREAM: u8 = 1;
-    unsafe {
-        let res = (api.ed25519_verify_many)(
-            elems.as_ptr(),
-            elems.len() as u32,
-            size_of::<Packet>() as u32,
-            num_packets as u32,
-            signature_offsets.len() as u32,
-            msg_sizes.as_ptr(),
-            pubkey_offsets.as_ptr(),
-            signature_offsets.as_ptr(),
-            msg_start_offsets.as_ptr(),
-            out.as_mut_ptr(),
-            USE_NON_DEFAULT_STREAM,
-        );
-        if res != 0 {
-            trace!("RETURN!!!: {res}");
-        }
-    }
-    trace!("done verify");
-    copy_return_values(sig_lens, &out, &mut rvs);
-    mark_disabled(batches, &rvs);
-}
-
 #[cfg(test)]
 #[allow(clippy::arithmetic_side_effects)]
 mod tests {
@@ -708,7 +570,7 @@ mod tests {
         super::*,
         crate::{
             packet::{
-                to_packet_batches, BytesPacket, BytesPacketBatch, Packet, PinnedPacketBatch,
+                to_packet_batches, BytesPacket, BytesPacketBatch, Packet, RecycledPacketBatch,
                 PACKETS_PER_BATCH,
             },
             sigverify::{self, PacketOffsets},
@@ -718,17 +580,14 @@ mod tests {
         },
         bincode::{deserialize, serialize},
         bytes::{BufMut, Bytes, BytesMut},
-        curve25519_dalek::{edwards::CompressedEdwardsY, scalar::Scalar},
         rand::{thread_rng, Rng},
         solana_keypair::Keypair,
         solana_message::{compiled_instruction::CompiledInstruction, Message, MessageHeader},
+        solana_packet::PACKET_DATA_SIZE,
         solana_signature::Signature,
         solana_signer::Signer,
         solana_transaction::{versioned::VersionedTransaction, Transaction},
-        std::{
-            iter::repeat_with,
-            sync::atomic::{AtomicU64, Ordering},
-        },
+        std::iter::repeat_with,
         test_case::test_case,
     };
 
@@ -769,8 +628,9 @@ mod tests {
                     .collect()
             })
             .collect();
-        let out =
-            PinnedVec::<u8>::from_vec(out.into_iter().flatten().flatten().map(u8::from).collect());
+        let out = RecycledVec::<u8>::from_vec(
+            out.into_iter().flatten().flatten().map(u8::from).collect(),
+        );
         let mut rvs: Vec<Vec<u8>> = sig_lens
             .iter()
             .map(|sig_lens| vec![0u8; sig_lens.len()])
@@ -1097,30 +957,6 @@ mod tests {
         );
     }
 
-    fn generate_data_batches_random_size<T>(
-        data: &T,
-        max_packets_per_batch: usize,
-        num_batches: usize,
-    ) -> Vec<Vec<Vec<u8>>>
-    where
-        T: serde::Serialize,
-    {
-        let data = bincode::serialize(data).unwrap();
-
-        // generate packet vector
-        let batches: Vec<_> = (0..num_batches)
-            .map(|_| {
-                let num_elems_per_batch = thread_rng().gen_range(1..max_packets_per_batch);
-                let packet_batch = vec![data.clone(); num_elems_per_batch];
-                assert_eq!(packet_batch.len(), num_elems_per_batch);
-                packet_batch
-            })
-            .collect();
-        assert_eq!(batches.len(), num_batches);
-
-        batches
-    }
-
     fn generate_bytes_packet_batches(
         packet: &BytesPacket,
         num_packets_per_batch: usize,
@@ -1186,10 +1022,8 @@ mod tests {
     }
 
     fn ed25519_verify(batches: &mut [PacketBatch]) {
-        let recycler = Recycler::default();
-        let recycler_out = Recycler::default();
         let packet_count = sigverify::count_packets_in_batches(batches);
-        sigverify::ed25519_verify(batches, &recycler, &recycler_out, false, packet_count);
+        sigverify::ed25519_verify(batches, false, packet_count);
     }
 
     #[test]
@@ -1282,143 +1116,11 @@ mod tests {
             }));
     }
 
-    #[test]
-    fn test_verify_fuzz() {
-        agave_logger::setup();
-
-        let tx = test_multisig_tx();
-        let packet = BytesPacket::from_data(None, tx).unwrap();
-
-        let recycler = Recycler::default();
-        let recycler_out = Recycler::default();
-        for _ in 0..50 {
-            let num_batches = thread_rng().gen_range(2..30);
-            let mut batches = generate_data_batches_random_size(&packet, 128, num_batches);
-
-            let num_modifications = thread_rng().gen_range(0..5);
-            for _ in 0..num_modifications {
-                let batch = thread_rng().gen_range(0..batches.len());
-                let packet = thread_rng().gen_range(0..batches[batch].len());
-                let offset = thread_rng().gen_range(0..batches[batch][packet].len());
-                let add = thread_rng().gen_range(0..255);
-                batches[batch][packet][offset] = batches[batch][packet][offset].wrapping_add(add);
-            }
-
-            let mut batches: Vec<PacketBatch> = batches
-                .iter()
-                .map(|batch| {
-                    let mut packet_batch = BytesPacketBatch::with_capacity(batch.len());
-                    for data in batch {
-                        let packet = BytesPacket::from_bytes(None, Bytes::from(data.clone()));
-                        packet_batch.push(packet);
-                    }
-                    packet_batch.into()
-                })
-                .collect();
-
-            let batch_to_disable = thread_rng().gen_range(0..batches.len());
-            for mut p in batches[batch_to_disable].iter_mut() {
-                p.meta_mut().set_discard(true);
-            }
-
-            // verify from GPU verification pipeline (when GPU verification is enabled) are
-            // equivalent to the CPU verification pipeline.
-            let mut batches_cpu = batches.clone();
-            let packet_count = sigverify::count_packets_in_batches(&batches);
-            sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, packet_count);
-            ed25519_verify_cpu(&mut batches_cpu, false, packet_count);
-
-            // check result
-            batches
-                .iter()
-                .flat_map(|batch| batch.iter())
-                .zip(batches_cpu.iter().flat_map(|batch| batch.iter()))
-                .for_each(|(p1, p2)| assert_eq!(p1, p2));
-        }
-    }
-
     #[test]
     fn test_verify_fail() {
         test_verify_n(5, true);
     }
 
-    #[test]
-    fn test_get_checked_scalar() {
-        agave_logger::setup();
-        if perf_libs::api().is_none() {
-            return;
-        }
-
-        let passed_g = AtomicU64::new(0);
-        let failed_g = AtomicU64::new(0);
-        (0..4).into_par_iter().for_each(|_| {
-            let mut input = [0u8; 32];
-            let mut passed = 0;
-            let mut failed = 0;
-            for _ in 0..1_000_000 {
-                thread_rng().fill(&mut input);
-                let ans = get_checked_scalar(&input);
-                let ref_ans = Scalar::from_canonical_bytes(input).into_option();
-                if let Some(ref_ans) = ref_ans {
-                    passed += 1;
-                    assert_eq!(ans.unwrap(), ref_ans.to_bytes());
-                } else {
-                    failed += 1;
-                    assert!(ans.is_err());
-                }
-            }
-            passed_g.fetch_add(passed, Ordering::Relaxed);
-            failed_g.fetch_add(failed, Ordering::Relaxed);
-        });
-        info!(
-            "passed: {} failed: {}",
-            passed_g.load(Ordering::Relaxed),
-            failed_g.load(Ordering::Relaxed)
-        );
-    }
-
-    #[test]
-    fn test_ge_small_order() {
-        agave_logger::setup();
-        if perf_libs::api().is_none() {
-            return;
-        }
-
-        let passed_g = AtomicU64::new(0);
-        let failed_g = AtomicU64::new(0);
-        (0..4).into_par_iter().for_each(|_| {
-            let mut input = [0u8; 32];
-            let mut passed = 0;
-            let mut failed = 0;
-            for _ in 0..1_000_000 {
-                thread_rng().fill(&mut input);
-                let ans = check_packed_ge_small_order(&input);
-                let ref_ge = CompressedEdwardsY::from_slice(&input).unwrap();
-                if let Some(ref_element) = ref_ge.decompress() {
-                    if ref_element.is_small_order() {
-                        assert!(!ans);
-                    } else {
-                        assert!(ans);
-                    }
-                } else {
-                    assert!(!ans);
-                }
-                if ans {
-                    passed += 1;
-                } else {
-                    failed += 1;
-                }
-            }
-            passed_g.fetch_add(passed, Ordering::Relaxed);
-            failed_g.fetch_add(failed, Ordering::Relaxed);
-        });
-        info!(
-            "passed: {} failed: {}",
-            passed_g.load(Ordering::Relaxed),
-            failed_g.load(Ordering::Relaxed)
-        );
-    }
-
     #[test]
     fn test_is_simple_vote_transaction() {
         agave_logger::setup();
@@ -1585,7 +1287,7 @@ mod tests {
                         let batch = (0..PACKETS_PER_BATCH)
                             .map(|_| Packet::from_data(None, test_tx()).expect("serialize request"))
                             .collect::<Vec<_>>();
-                        PacketBatch::Pinned(PinnedPacketBatch::new(batch))
+                        PacketBatch::Pinned(RecycledPacketBatch::new(batch))
                     }
                 })
                 .collect();
@@ -1629,14 +1331,14 @@ mod tests {
         shrink_batches(Vec::new());
         // One empty batch
         {
-            let batches = vec![PinnedPacketBatch::with_capacity(0).into()];
+            let batches = vec![RecycledPacketBatch::with_capacity(0).into()];
             let batches = shrink_batches(batches);
             assert_eq!(batches.len(), 0);
         }
         // Many empty batches
         {
             let batches = (0..BATCH_COUNT)
-                .map(|_| PinnedPacketBatch::with_capacity(0).into())
+                .map(|_| RecycledPacketBatch::with_capacity(0).into())
                 .collect::<Vec<_>>();
             let batches = shrink_batches(batches);
             assert_eq!(batches.len(), 0);
@@ -1813,10 +1515,10 @@ mod tests {
         let pinned_packet = Packet::from_data(None, tx.clone()).unwrap();
         let bytes_packet = BytesPacket::from_data(None, tx).unwrap();
         let batches = vec![
-            PacketBatch::Pinned(PinnedPacketBatch::new(vec![pinned_packet.clone(); 10])),
+            PacketBatch::Pinned(RecycledPacketBatch::new(vec![pinned_packet.clone(); 10])),
             PacketBatch::Bytes(BytesPacketBatch::from(vec![bytes_packet.clone(); 10])),
-            PacketBatch::Pinned(PinnedPacketBatch::new(vec![pinned_packet.clone(); 10])),
-            PacketBatch::Pinned(PinnedPacketBatch::new(vec![pinned_packet.clone(); 10])),
+            PacketBatch::Pinned(RecycledPacketBatch::new(vec![pinned_packet.clone(); 10])),
+            PacketBatch::Pinned(RecycledPacketBatch::new(vec![pinned_packet.clone(); 10])),
             PacketBatch::Bytes(BytesPacketBatch::from(vec![bytes_packet.clone(); 10])),
         ];
         let (bytes_batches, pinned_batches) = split_batches(batches);
@@ -1830,9 +1532,9 @@ mod tests {
         assert_eq!(
             pinned_batches,
             vec![
-                PinnedPacketBatch::new(vec![pinned_packet.clone(); 10]),
-                PinnedPacketBatch::new(vec![pinned_packet.clone(); 10]),
-                PinnedPacketBatch::new(vec![pinned_packet; 10]),
+                RecycledPacketBatch::new(vec![pinned_packet.clone(); 10]),
+                RecycledPacketBatch::new(vec![pinned_packet.clone(); 10]),
+                RecycledPacketBatch::new(vec![pinned_packet; 10]),
             ]
         )
     }

+ 5 - 31
poh-bench/src/main.rs

@@ -1,12 +1,11 @@
 #![allow(clippy::arithmetic_side_effects)]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-use solana_entry::entry::{self, create_ticks, init_poh, EntrySlice, VerifyRecyclers};
+use solana_entry::entry::{self, create_ticks, init_poh, EntrySlice};
 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-use solana_entry::entry::{create_ticks, init_poh, EntrySlice, VerifyRecyclers};
+use solana_entry::entry::{create_ticks, init_poh, EntrySlice};
 use {
     clap::{crate_description, crate_name, Arg, Command},
     solana_measure::measure::Measure,
-    solana_perf::perf_libs,
     solana_sha256_hasher::hash,
 };
 
@@ -56,12 +55,6 @@ fn main() {
                 .takes_value(true)
                 .help("Number of threads"),
         )
-        .arg(
-            Arg::new("cuda")
-                .long("cuda")
-                .takes_value(false)
-                .help("Use cuda"),
-        )
         .get_matches();
 
     let max_num_entries: u64 = matches.value_of_t("max_num_entries").unwrap_or(64);
@@ -79,16 +72,13 @@ fn main() {
         .thread_name(|i| format!("solPohBench{i:02}"))
         .build()
         .expect("new rayon threadpool");
-    if matches.is_present("cuda") {
-        perf_libs::init_cuda();
-    }
     init_poh();
     while num_entries <= max_num_entries as usize {
         let mut time = Measure::start("time");
         for _ in 0..iterations {
             assert!(ticks[..num_entries]
                 .verify_cpu_generic(&start_hash, &thread_pool)
-                .finish_verify(&thread_pool));
+                .status());
         }
         time.stop();
         println!(
@@ -107,7 +97,7 @@ fn main() {
                 for _ in 0..iterations {
                     assert!(ticks[..num_entries]
                         .verify_cpu_x86_simd(&start_hash, 8, &thread_pool)
-                        .finish_verify(&thread_pool));
+                        .status());
                 }
                 time.stop();
                 println!(
@@ -122,7 +112,7 @@ fn main() {
                 for _ in 0..iterations {
                     assert!(ticks[..num_entries]
                         .verify_cpu_x86_simd(&start_hash, 16, &thread_pool)
-                        .finish_verify(&thread_pool));
+                        .status())
                 }
                 time.stop();
                 println!(
@@ -133,22 +123,6 @@ fn main() {
             }
         }
 
-        if perf_libs::api().is_some() {
-            let mut time = Measure::start("time");
-            let recyclers = VerifyRecyclers::default();
-            for _ in 0..iterations {
-                assert!(ticks[..num_entries]
-                    .start_verify(&start_hash, &thread_pool, recyclers.clone())
-                    .finish_verify(&thread_pool));
-            }
-            time.stop();
-            println!(
-                "{},gpu_cuda,{}",
-                num_entries,
-                time.as_us() / iterations as u64
-            );
-        }
-
         println!();
         num_entries *= 2;
     }

+ 2 - 2
poh/benches/poh_verify.rs

@@ -33,7 +33,7 @@ fn bench_poh_verify_ticks(bencher: &mut Bencher) {
     }
 
     bencher.iter(|| {
-        assert!(ticks.verify(&start_hash, &thread_pool));
+        assert!(ticks.verify(&start_hash, &thread_pool).status());
     })
 }
 
@@ -55,6 +55,6 @@ fn bench_poh_verify_transaction_entries(bencher: &mut Bencher) {
     }
 
     bencher.iter(|| {
-        assert!(ticks.verify(&start_hash, &thread_pool));
+        assert!(ticks.verify(&start_hash, &thread_pool).status());
     })
 }

+ 2 - 11
runtime/src/bank/check_transactions.rs

@@ -3,10 +3,7 @@ use {
     agave_feature_set::{raise_cpi_nesting_limit_to_8, FeatureSet},
     solana_account::{state_traits::StateMut, AccountSharedData},
     solana_accounts_db::blockhash_queue::BlockhashQueue,
-    solana_clock::{
-        Slot, MAX_PROCESSING_AGE, MAX_TRANSACTION_FORWARDING_DELAY,
-        MAX_TRANSACTION_FORWARDING_DELAY_GPU,
-    },
+    solana_clock::{Slot, MAX_PROCESSING_AGE, MAX_TRANSACTION_FORWARDING_DELAY},
     solana_fee::{calculate_fee_details, FeeFeatures},
     solana_fee_structure::{FeeBudgetLimits, FeeDetails},
     solana_nonce::{
@@ -15,7 +12,6 @@ use {
         NONCED_TX_MARKER_IX_INDEX,
     },
     solana_nonce_account as nonce_account,
-    solana_perf::perf_libs,
     solana_program_runtime::execution_budget::SVMTransactionExecutionAndFeeBudgetLimits,
     solana_pubkey::Pubkey,
     solana_runtime_transaction::transaction_with_meta::TransactionWithMeta,
@@ -42,12 +38,7 @@ impl Bank {
         //  1. Transaction forwarding delay
         //  2. The slot at which the next leader will actually process the transaction
         // Drop the transaction if it will expire by the time the next node receives and processes it
-        let api = perf_libs::api();
-        let max_tx_fwd_delay = if api.is_none() {
-            MAX_TRANSACTION_FORWARDING_DELAY
-        } else {
-            MAX_TRANSACTION_FORWARDING_DELAY_GPU
-        };
+        let max_tx_fwd_delay = MAX_TRANSACTION_FORWARDING_DELAY;
 
         self.check_transactions(
             transactions,

+ 13 - 13
streamer/src/packet.rs

@@ -20,8 +20,8 @@ use {
 pub use {
     solana_packet::{Meta, Packet, PACKET_DATA_SIZE},
     solana_perf::packet::{
-        PacketBatch, PacketBatchRecycler, PacketRef, PacketRefMut, PinnedPacketBatch, NUM_PACKETS,
-        PACKETS_PER_BATCH,
+        PacketBatch, PacketBatchRecycler, PacketRef, PacketRefMut, RecycledPacketBatch,
+        NUM_PACKETS, PACKETS_PER_BATCH,
     },
 };
 
@@ -35,7 +35,7 @@ This is a wrapper around recvmmsg(7) call.
 */
 #[cfg(not(unix))]
 pub(crate) fn recv_from(
-    batch: &mut PinnedPacketBatch,
+    batch: &mut RecycledPacketBatch,
     socket: &UdpSocket,
     // If max_wait is None, reads from the socket until either:
     //   * 64 packets are read (PACKETS_PER_BATCH == 64), or
@@ -90,7 +90,7 @@ pub(crate) fn recv_from(
 /// This is a wrapper around recvmmsg(7) call.
 #[cfg(unix)]
 pub(crate) fn recv_from(
-    batch: &mut PinnedPacketBatch,
+    batch: &mut RecycledPacketBatch,
     socket: &UdpSocket,
     // If max_wait is None, reads from the socket until either:
     //   * 64 packets are read (PACKETS_PER_BATCH == 64), or
@@ -133,7 +133,7 @@ pub(crate) fn recv_from(
     /// - If any packets were read, the function will exit.
     /// - If no packets were read, the function will return an error.
     fn recv_from_once(
-        batch: &mut PinnedPacketBatch,
+        batch: &mut RecycledPacketBatch,
         socket: &UdpSocket,
         poll_fd: &mut [PollFd],
     ) -> Result<usize> {
@@ -178,7 +178,7 @@ pub(crate) fn recv_from(
     /// On subsequent iterations, when [`ErrorKind::WouldBlock`] is encountered, poll for the
     /// saturating duration since the start of the loop.
     fn recv_from_coalesce(
-        batch: &mut PinnedPacketBatch,
+        batch: &mut RecycledPacketBatch,
         socket: &UdpSocket,
         max_wait: Duration,
         poll_fd: &mut [PollFd],
@@ -279,7 +279,7 @@ pub(crate) fn recv_from(
     Ok(i)
 }
 pub fn send_to(
-    batch: &PinnedPacketBatch,
+    batch: &RecycledPacketBatch,
     socket: &UdpSocket,
     socket_addr_space: &SocketAddrSpace,
 ) -> Result<()> {
@@ -310,13 +310,13 @@ mod tests {
         // test that the address is actually being updated
         let send_addr: SocketAddr = "127.0.0.1:123".parse().unwrap();
         let packets = vec![Packet::default()];
-        let mut packet_batch = PinnedPacketBatch::new(packets);
+        let mut packet_batch = RecycledPacketBatch::new(packets);
         packet_batch.set_addr(&send_addr);
         assert_eq!(packet_batch[0].meta().socket_addr(), send_addr);
     }
 
     fn recv_from(
-        batch: &mut PinnedPacketBatch,
+        batch: &mut RecycledPacketBatch,
         socket: &UdpSocket,
         max_wait: Option<Duration>,
     ) -> Result<usize> {
@@ -341,7 +341,7 @@ mod tests {
         let send_socket = bind_to_localhost_unique().expect("should bind - sender");
         let saddr = send_socket.local_addr().unwrap();
 
-        let mut batch = PinnedPacketBatch::with_capacity(PACKETS_PER_BATCH);
+        let mut batch = RecycledPacketBatch::with_capacity(PACKETS_PER_BATCH);
         batch.resize(PACKETS_PER_BATCH, Packet::default());
 
         for m in batch.iter_mut() {
@@ -370,7 +370,7 @@ mod tests {
     #[test]
     pub fn debug_trait() {
         write!(io::sink(), "{:?}", Packet::default()).unwrap();
-        write!(io::sink(), "{:?}", PinnedPacketBatch::default()).unwrap();
+        write!(io::sink(), "{:?}", RecycledPacketBatch::default()).unwrap();
     }
 
     #[test]
@@ -396,14 +396,14 @@ mod tests {
         let recv_socket = bind_to_localhost_unique().expect("should bind - receiver");
         let addr = recv_socket.local_addr().unwrap();
         let send_socket = bind_to_localhost_unique().expect("should bind - sender");
-        let mut batch = PinnedPacketBatch::with_capacity(PACKETS_PER_BATCH);
+        let mut batch = RecycledPacketBatch::with_capacity(PACKETS_PER_BATCH);
         batch.resize(PACKETS_PER_BATCH, Packet::default());
 
         // Should only get PACKETS_PER_BATCH packets per iteration even
         // if a lot more were sent, and regardless of packet size
         for _ in 0..2 * PACKETS_PER_BATCH {
             let batch_size = 1;
-            let mut batch = PinnedPacketBatch::with_capacity(batch_size);
+            let mut batch = RecycledPacketBatch::with_capacity(batch_size);
             batch.resize(batch_size, Packet::default());
             for p in batch.iter_mut() {
                 p.meta_mut().set_socket_addr(&addr);

+ 7 - 7
streamer/src/streamer.rs

@@ -4,7 +4,8 @@
 use {
     crate::{
         packet::{
-            self, PacketBatch, PacketBatchRecycler, PacketRef, PinnedPacketBatch, PACKETS_PER_BATCH,
+            self, Packet, PacketBatch, PacketBatchRecycler, PacketRef, RecycledPacketBatch,
+            PACKETS_PER_BATCH,
         },
         sendmmsg::{batch_send, SendPktsError},
     },
@@ -18,7 +19,6 @@ use {
         },
         SocketAddrSpace,
     },
-    solana_packet::Packet,
     solana_pubkey::Pubkey,
     solana_time_utils::timestamp,
     std::{
@@ -190,9 +190,9 @@ fn recv_loop<P: SocketProvider>(
 
     loop {
         let mut packet_batch = if use_pinned_memory {
-            PinnedPacketBatch::new_with_recycler(recycler, PACKETS_PER_BATCH, stats.name)
+            RecycledPacketBatch::new_with_recycler(recycler, PACKETS_PER_BATCH, stats.name)
         } else {
-            PinnedPacketBatch::with_capacity(PACKETS_PER_BATCH)
+            RecycledPacketBatch::with_capacity(PACKETS_PER_BATCH)
         };
         packet_batch.resize(PACKETS_PER_BATCH, Packet::default());
 
@@ -628,7 +628,7 @@ mod test {
     use {
         super::*,
         crate::{
-            packet::{Packet, PinnedPacketBatch, PACKET_DATA_SIZE},
+            packet::{Packet, RecycledPacketBatch, PACKET_DATA_SIZE},
             streamer::{receiver, responder},
         },
         crossbeam_channel::unbounded,
@@ -662,7 +662,7 @@ mod test {
     #[test]
     fn streamer_debug() {
         write!(io::sink(), "{:?}", Packet::default()).unwrap();
-        write!(io::sink(), "{:?}", PinnedPacketBatch::default()).unwrap();
+        write!(io::sink(), "{:?}", RecycledPacketBatch::default()).unwrap();
     }
     #[test]
     fn streamer_send_test() {
@@ -695,7 +695,7 @@ mod test {
                 SocketAddrSpace::Unspecified,
                 None,
             );
-            let mut packet_batch = PinnedPacketBatch::default();
+            let mut packet_batch = RecycledPacketBatch::default();
             for i in 0..NUM_PACKETS {
                 let mut p = Packet::default();
                 {

+ 4 - 11
turbine/src/sigverify_shreds.rs

@@ -17,13 +17,12 @@ use {
             layout::{get_shred, resign_packet},
             wire::is_retransmitter_signed_variant,
         },
-        sigverify_shreds::{verify_shreds_gpu, LruCache, SlotPubkeys},
+        sigverify_shreds::{verify_shreds, LruCache, SlotPubkeys},
     },
     solana_perf::{
         self,
         deduper::Deduper,
         packet::{PacketBatch, PacketRefMut},
-        recycler_cache::RecyclerCache,
     },
     solana_pubkey::Pubkey,
     solana_runtime::{bank::Bank, bank_forks::BankForks},
@@ -83,7 +82,6 @@ pub fn spawn_shred_sigverify(
     verified_sender: Sender<Vec<(shred::Payload, /*is_repaired:*/ bool)>>,
     num_sigverify_threads: NonZeroUsize,
 ) -> JoinHandle<()> {
-    let recycler_cache = RecyclerCache::warmed();
     let mut stats = ShredSigVerifyStats::new(Instant::now());
     let cache = RwLock::new(LruCache::new(SIGVERIFY_LRU_CACHE_CAPACITY));
     let cluster_nodes_cache = ClusterNodesCache::<RetransmitStage>::new(
@@ -112,7 +110,6 @@ pub fn spawn_shred_sigverify(
                 &cluster_info,
                 &bank_forks,
                 &leader_schedule_cache,
-                &recycler_cache,
                 &deduper,
                 &shred_fetch_receiver,
                 &retransmit_sender,
@@ -143,7 +140,6 @@ fn run_shred_sigverify<const K: usize>(
     cluster_info: &ClusterInfo,
     bank_forks: &RwLock<BankForks>,
     leader_schedule_cache: &LeaderScheduleCache,
-    recycler_cache: &RecyclerCache,
     deduper: &Deduper<K, [u8]>,
     shred_fetch_receiver: &Receiver<PacketBatch>,
     retransmit_sender: &EvictingSender<Vec<shred::Payload>>,
@@ -205,7 +201,6 @@ fn run_shred_sigverify<const K: usize>(
         &keypair.pubkey(),
         &working_bank,
         leader_schedule_cache,
-        recycler_cache,
         shred_buffer,
         cache,
     );
@@ -395,7 +390,6 @@ fn verify_packets(
     self_pubkey: &Pubkey,
     working_bank: &Bank,
     leader_schedule_cache: &LeaderScheduleCache,
-    recycler_cache: &RecyclerCache,
     packets: &mut [PacketBatch],
     cache: &RwLock<LruCache>,
 ) {
@@ -404,7 +398,7 @@ fn verify_packets(
             .filter_map(|(slot, pubkey)| Some((slot, pubkey?)))
             .chain(std::iter::once((Slot::MAX, Pubkey::default())))
             .collect();
-    let out = verify_shreds_gpu(thread_pool, packets, &leader_slots, recycler_cache, cache);
+    let out = verify_shreds(thread_pool, packets, &leader_slots, cache);
     solana_perf::sigverify::mark_disabled(packets, &out);
 }
 
@@ -571,7 +565,7 @@ mod tests {
             shred::{Nonce, ProcessShredsStats, ReedSolomonCache, Shredder},
         },
         solana_net_utils::SocketAddrSpace,
-        solana_perf::packet::{Packet, PacketFlags, PinnedPacketBatch},
+        solana_perf::packet::{Packet, PacketFlags, RecycledPacketBatch},
         solana_runtime::bank::Bank,
         solana_signer::Signer,
         solana_time_utils::timestamp,
@@ -589,7 +583,7 @@ mod tests {
         let leader_schedule_cache = LeaderScheduleCache::new_from_bank(&bank);
         let bank_forks = BankForks::new_rw_arc(bank);
         let batch_size = 2;
-        let mut batch = PinnedPacketBatch::with_capacity(batch_size);
+        let mut batch = RecycledPacketBatch::with_capacity(batch_size);
         batch.resize(batch_size, Packet::default());
         let mut batches = vec![batch];
 
@@ -636,7 +630,6 @@ mod tests {
             &Pubkey::new_unique(), // self_pubkey
             &working_bank,
             &leader_schedule_cache,
-            &RecyclerCache::warmed(),
             &mut batches,
             &cache,
         );

+ 0 - 7
validator/src/commands/run/execute.rs

@@ -55,7 +55,6 @@ use {
         use_snapshot_archives_at_startup::{self, UseSnapshotArchivesAtStartup},
     },
     solana_net_utils::multihomed_sockets::BindIpAddrs,
-    solana_perf::recycler::enable_recycler_warming,
     solana_poh::poh_service,
     solana_pubkey::Pubkey,
     solana_runtime::{runtime_config::RuntimeConfig, snapshot_utils},
@@ -124,12 +123,6 @@ pub fn execute(
     info!("{} {}", crate_name!(), solana_version);
     info!("Starting validator with: {:#?}", std::env::args_os());
 
-    let cuda = matches.is_present("cuda");
-    if cuda {
-        solana_perf::perf_libs::init_cuda();
-        enable_recycler_warming();
-    }
-
     solana_core::validator::report_target_features();
 
     let authorized_voter_keypairs = keypairs_of(matches, "authorized_voter_keypairs")

+ 0 - 5
wen-restart/src/wen_restart.rs

@@ -24,7 +24,6 @@ use {
     log::*,
     prost::Message,
     solana_clock::{Epoch, Slot},
-    solana_entry::entry::VerifyRecyclers,
     solana_gossip::{
         cluster_info::{ClusterInfo, GOSSIP_SLEEP_MILLIS},
         restart_crds_values::RestartLastVotedForkSlots,
@@ -616,7 +615,6 @@ pub(crate) fn find_bankhash_of_heaviest_fork(
         .thread_name(|i| format!("solReplayTx{i:02}"))
         .build()
         .expect("new rayon threadpool");
-    let recyclers = VerifyRecyclers::default();
     let mut timing = ExecuteTimings::default();
     let opts = ProcessOptions::default();
     // Now replay all the missing blocks.
@@ -645,7 +643,6 @@ pub(crate) fn find_bankhash_of_heaviest_fork(
                 &bank_with_scheduler,
                 &replay_tx_thread_pool,
                 &opts,
-                &recyclers,
                 &mut progress,
                 None,
                 None,
@@ -2015,7 +2012,6 @@ mod tests {
             .thread_name(|i| format!("solReplayTx{i:02}"))
             .build()
             .expect("new rayon threadpool");
-        let recyclers = VerifyRecyclers::default();
         let mut timing = ExecuteTimings::default();
         let opts = ProcessOptions::default();
         let mut progress = ConfirmationProgress::new(old_root_bank.last_blockhash());
@@ -2030,7 +2026,6 @@ mod tests {
             &bank_with_scheduler,
             &replay_tx_thread_pool,
             &opts,
-            &recyclers,
             &mut progress,
             None,
             None,