6 năm trước cách đây · b4da83a3ab
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -844,6 +844,27 @@ dependencies = [
 
				  "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				 ]
			
 
				 
			
 
				+[[package]]
			
 
				+name = "dlopen"
			
 
				+version = "0.1.8"
			
 
				+source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				+dependencies = [
			
 
				+ "dlopen_derive 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				+ "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				+ "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				+ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				+]
			
 
				+
			
 
				+[[package]]
			
 
				+name = "dlopen_derive"
			
 
				+version = "0.1.4"
			
 
				+source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				+dependencies = [
			
 
				+ "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				+ "quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				+ "syn 0.15.42 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				+]
			
 
				+
			
 
				 [[package]]
			
 
				 name = "docopt"
			
 
				 version = "1.1.0"
			
@@ -3245,6 +3266,8 @@ dependencies = [
 
				  "crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				  "crossbeam-channel 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				  "dir-diff 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				+ "dlopen 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				+ "dlopen_derive 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				  "fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				  "hex-literal 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				  "indexmap 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
			
@@ -3890,14 +3913,6 @@ dependencies = [
 
				  "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
			
 
				 ]
			
 
				 
			
 
				-[[package]]
			
 
				-name = "solana-validator-cuda"
			
 
				-version = "0.20.0"
			
 
				-dependencies = [
			
 
				- "solana-core 0.20.0",
			
 
				- "solana-validator 0.20.0",
			
 
				-]
			
 
				-
			
 
				 [[package]]
			
 
				 name = "solana-vote-api"
			
 
				 version = "0.20.0"
			
@@ -5436,6 +5451,8 @@ dependencies = [
 
				 "checksum dirs 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901"
			
 
				 "checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3"
			
 
				 "checksum dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b"
			
 
				+"checksum dlopen 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "71e80ad39f814a9abe68583cd50a2d45c8a67561c3361ab8da240587dda80937"
			
 
				+"checksum dlopen_derive 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f236d9e1b1fbd81cea0f9cbdc8dcc7e8ebcd80e6659cd7cb2ad5f6c05946c581"
			
 
				 "checksum docopt 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7f525a586d310c87df72ebcd98009e57f1cc030c8c268305287a476beb653969"
			
 
				 "checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e"
			
 
				 "checksum ed25519-dalek 1.0.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)" = "81956bcf7ef761fb4e1d88de3fa181358a0d26cbcb9755b587a08f9119824b86"
			
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,65 +1,4 @@
 
				 [workspace]
			
 
				-# The members list excluding the `validator-cuda` crate
			
 
				-default-members = [
			
 
				-    "bench-exchange",
			
 
				-    "bench-streamer",
			
 
				-    "bench-tps",
			
 
				-    "banking_bench",
			
 
				-    "chacha-sys",
			
 
				-    "client",
			
 
				-    "core",
			
 
				-    "drone",
			
 
				-    "validator",
			
 
				-    "genesis",
			
 
				-    "genesis_programs",
			
 
				-    "gossip",
			
 
				-    "install",
			
 
				-    "keygen",
			
 
				-    "ledger-tool",
			
 
				-    "local_cluster",
			
 
				-    "logger",
			
 
				-    "merkle-tree",
			
 
				-    "measure",
			
 
				-    "metrics",
			
 
				-    "programs/bpf_loader_api",
			
 
				-    "programs/bpf_loader_program",
			
 
				-    "programs/budget_api",
			
 
				-    "programs/budget_program",
			
 
				-    "programs/btc_spv_program",
			
 
				-    "programs/btc_spv_api",
			
 
				-    "programs/btc_spv_bin",
			
 
				-    "programs/config_api",
			
 
				-    "programs/config_program",
			
 
				-    "programs/config_tests",
			
 
				-    "programs/exchange_api",
			
 
				-    "programs/exchange_program",
			
 
				-    "programs/failure_program",
			
 
				-    "programs/move_loader_api",
			
 
				-    "programs/move_loader_program",
			
 
				-    "programs/librapay_api",
			
 
				-    "programs/noop_program",
			
 
				-    "programs/stake_api",
			
 
				-    "programs/stake_program",
			
 
				-    "programs/stake_tests",
			
 
				-    "programs/storage_api",
			
 
				-    "programs/storage_program",
			
 
				-    "programs/token_api",
			
 
				-    "programs/token_program",
			
 
				-    "programs/vote_api",
			
 
				-    "programs/vote_program",
			
 
				-    "replicator",
			
 
				-    "runtime",
			
 
				-    "sdk",
			
 
				-    "sdk-c",
			
 
				-    "upload-perf",
			
 
				-    "netutil",
			
 
				-    "fixed-buf",
			
 
				-    "vote-signer",
			
 
				-    "cli",
			
 
				-    "rayon-threadlimit",
			
 
				-]
			
 
				-
			
 
				-# The default-members list and the `validator-cuda` crate
			
 
				 members = [
			
 
				     "bench-exchange",
			
 
				     "bench-streamer",
			
@@ -117,7 +56,6 @@ members = [
 
				     "vote-signer",
			
 
				     "cli",
			
 
				     "rayon-threadlimit",
			
 
				-    "validator-cuda",
			
 
				 ]
			
 
				 
			
 
				 exclude = [
			
--- a/book/src/running-validator/validator-software.md
+++ b/book/src/running-validator/validator-software.md
@@ -48,13 +48,3 @@ If you are unable to use the prebuilt binaries or prefer to build it yourself fr
 
				 $ ./scripts/cargo-install-all.sh .
			
 
				 $ export PATH=$PWD/bin:$PATH
			
 
				 ```
			
 
				-
			
 
				-If building for CUDA \(Linux only\), fetch the perf-libs first then include the `cuda` feature flag when building:
			
 
				-
			
 
				-```bash
			
 
				-$ ./fetch-perf-libs.sh
			
 
				-$ source target/perf-libs/env.sh
			
 
				-$ ./scripts/cargo-install-all.sh . cuda
			
 
				-$ export PATH=$PWD/bin:$PATH
			
 
				-```
			
 
				-
			
--- a/book/src/running-validator/validator-start.md
+++ b/book/src/running-validator/validator-start.md
@@ -93,9 +93,9 @@ $ NDEBUG=1 USE_INSTALL=1 ./multinode-demo/validator.sh --identity ~/validator-ke
 
				 
			
 
				 ### Enabling CUDA
			
 
				 
			
 
				-If your machine has a GPU with CUDA installed \(Linux-only currently\), use the `solana-validator-cuda` executable instead of `solana-validator`.
			
 
				+If your machine has a GPU with CUDA installed \(Linux-only currently\), include the `--cuda` argument to `solana-validator`.
			
 
				 
			
 
				-Or if you built from source, define the SOLANA\_CUDA flag in your environment _before_ running any of the previusly mentioned commands
			
 
				+Or if you built from source, define the SOLANA\_CUDA flag in your environment _before_ running any of the previously mentioned commands
			
 
				 
			
 
				 ```bash
			
 
				 $ export SOLANA_CUDA=1
			
--- a/ci/publish-tarball.sh
+++ b/ci/publish-tarball.sh
@@ -37,14 +37,12 @@ if [[ -z $CHANNEL_OR_TAG ]]; then
 
				   exit 1
			
 
				 fi
			
 
				 
			
 
				-maybeCUDA=
			
 
				 case "$CI_OS_NAME" in
			
 
				 osx)
			
 
				   TARGET=x86_64-apple-darwin
			
 
				   ;;
			
 
				 linux)
			
 
				   TARGET=x86_64-unknown-linux-gnu
			
 
				-  maybeCUDA=cuda
			
 
				   ;;
			
 
				 windows)
			
 
				   TARGET=x86_64-pc-windows-msvc
			
@@ -70,55 +68,17 @@ echo --- Creating tarball
 
				   ) > solana-release/version.yml
			
 
				 
			
 
				   source ci/rust-version.sh stable
			
 
				-  scripts/cargo-install-all.sh +"$rust_stable" solana-release $maybeCUDA
			
 
				+  scripts/cargo-install-all.sh +"$rust_stable" solana-release
			
 
				 
			
 
				   # Reduce the Windows archive size until
			
 
				   # https://github.com/appveyor/ci/issues/2997 is fixed
			
 
				   if [[ -n $APPVEYOR ]]; then
			
 
				-    rm -f solana-release/bin/solana-validator.exe solana-release/bin/solana-bench-exchange.exe
			
 
				-  fi
			
 
				+    rm -f \
			
 
				+      solana-release/bin/solana-validator.exe \
			
 
				+      solana-release/bin/solana-bench-exchange.exe \
			
 
				 
			
 
				-  if [[ -n $maybeCUDA ]]; then
			
 
				-    # Wrap `solana-validator-cuda` with a script that loads perf-libs
			
 
				-    # automatically if possible
			
 
				-    mkdir -p solana-release/target
			
 
				-    cp -a target/perf-libs solana-release/target/perf-libs
			
 
				-    mkdir -p solana-release/bin/_
			
 
				-    cp solana-release/bin/solana-validator-cuda solana-release/bin/_/solana-validator-cuda
			
 
				-    cp -a solana-release/bin/deps solana-release/bin/_/deps
			
 
				-    cat > solana-release/bin/solana-validator-cuda <<'EOF'
			
 
				-#!/usr/bin/env bash
			
 
				-set -e
			
 
				-SOLANA_ROOT="$(dirname "$0")"/..
			
 
				-if [[ -f "$SOLANA_ROOT"/target/perf-libs/env.sh ]]; then
			
 
				-  source "$SOLANA_ROOT"/target/perf-libs/env.sh
			
 
				-fi
			
 
				-if [[ -z $SOLANA_PERF_LIBS_CUDA ]]; then
			
 
				-  echo
			
 
				-  echo Error: SOLANA_PERF_LIBS_CUDA environment variable undefined
			
 
				-  exit 1
			
 
				-fi
			
 
				-exec "$SOLANA_ROOT"/bin/_/solana-validator-cuda "$@"
			
 
				-EOF
			
 
				-    chmod +x solana-release/bin/solana-validator-cuda
			
 
				   fi
			
 
				 
			
 
				-  # TODO: Remove scripts/ and multinode/... from tarball
			
 
				-  cp -a scripts multinode-demo solana-release/
			
 
				-
			
 
				-  # Add a wrapper script for validator.sh
			
 
				-  # TODO: Remove multinode/... from tarball
			
 
				-  cat > solana-release/bin/validator.sh <<'EOF'
			
 
				-#!/usr/bin/env bash
			
 
				-set -e
			
 
				-cd "$(dirname "$0")"/..
			
 
				-export USE_INSTALL=1
			
 
				-export REQUIRE_LEDGER_DIR=1
			
 
				-export REQUIRE_KEYPAIRS=1
			
 
				-exec multinode-demo/validator.sh "$@"
			
 
				-EOF
			
 
				-  chmod +x solana-release/bin/validator.sh
			
 
				-
			
 
				   tar cvf solana-release-$TARGET.tar solana-release
			
 
				   bzip2 solana-release-$TARGET.tar
			
 
				   cp solana-release/bin/solana-install-init solana-install-init-$TARGET
			
--- a/ci/test-checks.sh
+++ b/ci/test-checks.sh
@@ -15,7 +15,7 @@ _ cargo +"$rust_stable" fmt --all -- --check
 
				 # Clippy gets stuck for unknown reasons if sdk-c is included in the build, so check it separately.
			
 
				 # See https://github.com/solana-labs/solana/issues/5503
			
 
				 _ cargo +"$rust_stable" clippy --version
			
 
				-_ cargo +"$rust_stable" clippy --all --exclude solana-sdk-c --exclude solana-validator-cuda -- --deny=warnings
			
 
				+_ cargo +"$rust_stable" clippy --all --exclude solana-sdk-c -- --deny=warnings
			
 
				 _ cargo +"$rust_stable" clippy --manifest-path sdk-c/Cargo.toml -- --deny=warnings
			
 
				 
			
 
				 _ cargo +"$rust_stable" audit --version
			
--- a/ci/test-stable.sh
+++ b/ci/test-stable.sh
@@ -33,7 +33,7 @@ test-stable)
 
				   echo "Executing $testName"
			
 
				 
			
 
				   _ cargo +"$rust_stable" build --tests --bins ${V:+--verbose}
			
 
				-  _ cargo +"$rust_stable" test --all --exclude solana-local-cluster --exclude solana-validator-cuda ${V:+--verbose} -- --nocapture
			
 
				+  _ cargo +"$rust_stable" test --all --exclude solana-local-cluster ${V:+--verbose} -- --nocapture
			
 
				   ;;
			
 
				 test-stable-perf)
			
 
				   echo "Executing $testName"
			
@@ -61,8 +61,6 @@ test-stable-perf)
 
				     --manifest-path programs/bpf/Cargo.toml \
			
 
				     --no-default-features --features=bpf_c,bpf_rust
			
 
				 
			
 
				-  # Run root package tests with these features
			
 
				-  maybeCuda=
			
 
				   if [[ $(uname) = Linux ]]; then
			
 
				     # Enable persistence mode to keep the CUDA kernel driver loaded, avoiding a
			
 
				     # lengthy and unexpected delay the first time CUDA is involved when the driver
			
@@ -71,19 +69,20 @@ test-stable-perf)
 
				 
			
 
				     rm -rf target/perf-libs
			
 
				     ./fetch-perf-libs.sh
			
 
				-    # shellcheck source=/dev/null
			
 
				-    source ./target/perf-libs/env.sh
			
 
				-    maybeCuda=--features=cuda
			
 
				+
			
 
				+    # Force CUDA for solana-core unit tests
			
 
				+    export TEST_PERF_LIBS_CUDA=1
			
 
				+
			
 
				+    # Force CUDA in ci/localnet-sanity.sh
			
 
				     export SOLANA_CUDA=1
			
 
				   fi
			
 
				 
			
 
				-  # Run root package library tests
			
 
				-  _ cargo +"$rust_stable" build --tests --bins ${V:+--verbose}
			
 
				-  _ cargo +"$rust_stable" test --all --manifest-path=core/Cargo.toml ${V:+--verbose} $maybeCuda --exclude solana-local-cluster -- --nocapture
			
 
				+  _ cargo +"$rust_stable" build --bins ${V:+--verbose}
			
 
				+  _ cargo +"$rust_stable" test --package solana-core --lib ${V:+--verbose} -- --nocapture
			
 
				   ;;
			
 
				 test-local-cluster)
			
 
				   echo "Executing $testName"
			
 
				-  _ cargo +"$rust_stable" build --release --tests --bins ${V:+--verbose}
			
 
				+  _ cargo +"$rust_stable" build --release --bins ${V:+--verbose}
			
 
				   _ cargo +"$rust_stable" test --release --package solana-local-cluster ${V:+--verbose} -- --nocapture
			
 
				   exit 0
			
 
				   ;;
			
--- a/ci/testnet-automation.sh
+++ b/ci/testnet-automation.sh
@@ -39,9 +39,9 @@ launchTestnet() {
 
				 
			
 
				   echo --- start "$nodeCount" node test
			
 
				   if [[ -n $USE_PREBUILT_CHANNEL_TARBALL ]]; then
			
 
				-    net/net.sh start -f "cuda" -o noValidatorSanity -t "$CHANNEL"
			
 
				+    net/net.sh start -o noValidatorSanity -t "$CHANNEL"
			
 
				   else
			
 
				-    net/net.sh start -f "cuda" -o noValidatorSanity -T solana-release*.tar.bz2
			
 
				+    net/net.sh start -o noValidatorSanity -T solana-release*.tar.bz2
			
 
				   fi
			
 
				 
			
 
				   echo --- wait "$ITERATION_WAIT" seconds to complete test
			
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -14,7 +14,6 @@ edition = "2018"
 
				 codecov = { repository = "solana-labs/solana", branch = "master", service = "github" }
			
 
				 
			
 
				 [features]
			
 
				-cuda = []
			
 
				 pin_gpu_memory = []
			
 
				 
			
 
				 [dependencies]
			
@@ -27,6 +26,8 @@ core_affinity = "0.5.9"
 
				 crc = { version = "1.8.1", optional = true }
			
 
				 crossbeam-channel = "0.3"
			
 
				 dir-diff = "0.3.1"
			
 
				+dlopen = "0.1.8"
			
 
				+dlopen_derive = "0.1.4"
			
 
				 fs_extra = "1.1.0"
			
 
				 indexmap = "1.1"
			
 
				 itertools = "0.8.0"
			
--- a/core/build.rs
+++ b/core/build.rs
@@ -1,50 +0,0 @@
 
				-use std::env;
			
 
				-use std::fs;
			
 
				-use std::path::Path;
			
 
				-use std::process::exit;
			
 
				-
			
 
				-fn main() {
			
 
				-    println!("cargo:rerun-if-changed=build.rs");
			
 
				-
			
 
				-    if env::var("CARGO_FEATURE_CUDA").is_ok() {
			
 
				-        if cfg!(not(target_os = "linux")) {
			
 
				-            eprintln!("Error: CUDA feature is only available on Linux");
			
 
				-            exit(1);
			
 
				-        }
			
 
				-        println!("cargo:rustc-cfg=cuda");
			
 
				-
			
 
				-        let perf_libs_dir = {
			
 
				-            let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
			
 
				-            let mut path = Path::new(&manifest_dir);
			
 
				-            path = path.parent().unwrap();
			
 
				-            let mut path = path.join(Path::new("target/perf-libs"));
			
 
				-            path.push(env::var("SOLANA_PERF_LIBS_CUDA").unwrap_or_else(|err| {
			
 
				-                eprintln!("Error: SOLANA_PERF_LIBS_CUDA not defined: {}", err);
			
 
				-                exit(1);
			
 
				-            }));
			
 
				-            path
			
 
				-        };
			
 
				-        let perf_libs_dir = perf_libs_dir.to_str().unwrap();
			
 
				-
			
 
				-        // Ensure `perf_libs_dir` exists.  It's been observed that
			
 
				-        // a cargo:rerun-if-changed= directive with a non-existent
			
 
				-        // directory triggers a rebuild on every |cargo build| invocation
			
 
				-        fs::create_dir_all(&perf_libs_dir).unwrap_or_else(|err| {
			
 
				-            if err.kind() != std::io::ErrorKind::AlreadyExists {
			
 
				-                panic!("Unable to create {}: {:?}", perf_libs_dir, err);
			
 
				-            }
			
 
				-        });
			
 
				-        println!("cargo:rerun-if-changed={}", perf_libs_dir);
			
 
				-        println!("cargo:rustc-link-search=native={}", perf_libs_dir);
			
 
				-        if cfg!(windows) {
			
 
				-            println!("cargo:rerun-if-changed={}/libcuda-crypt.dll", perf_libs_dir);
			
 
				-        } else if cfg!(target_os = "macos") {
			
 
				-            println!(
			
 
				-                "cargo:rerun-if-changed={}/libcuda-crypt.dylib",
			
 
				-                perf_libs_dir
			
 
				-            );
			
 
				-        } else {
			
 
				-            println!("cargo:rerun-if-changed={}/libcuda-crypt.so", perf_libs_dir);
			
 
				-        }
			
 
				-    }
			
 
				-}
			
--- a/core/src/chacha_cuda.rs
+++ b/core/src/chacha_cuda.rs
@@ -1,11 +1,8 @@
 
				-// Module used by validators to approve storage mining proofs
			
 
				-// // in parallel using the GPU
			
 
				+// Module used by validators to approve storage mining proofs in parallel using the GPU
			
 
				 
			
 
				 use crate::blocktree::Blocktree;
			
 
				 use crate::chacha::{CHACHA_BLOCK_SIZE, CHACHA_KEY_SIZE};
			
 
				-use crate::sigverify::{
			
 
				-    chacha_cbc_encrypt_many_sample, chacha_end_sha_state, chacha_init_sha_state,
			
 
				-};
			
 
				+use crate::perf_libs;
			
 
				 use solana_sdk::hash::Hash;
			
 
				 use std::io;
			
 
				 use std::mem::size_of;
			
@@ -22,6 +19,7 @@ pub fn chacha_cbc_encrypt_file_many_keys(
 
				     ivecs: &mut [u8],
			
 
				     samples: &[u64],
			
 
				 ) -> io::Result<Vec<Hash>> {
			
 
				+    let api = perf_libs::api().expect("no perf libs");
			
 
				     if ivecs.len() % CHACHA_BLOCK_SIZE != 0 {
			
 
				         return Err(io::Error::new(
			
 
				             io::ErrorKind::Other,
			
@@ -45,7 +43,7 @@ pub fn chacha_cbc_encrypt_file_many_keys(
 
				     let mut total_size = 0;
			
 
				     let mut time: f32 = 0.0;
			
 
				     unsafe {
			
 
				-        chacha_init_sha_state(int_sha_states.as_mut_ptr(), num_keys as u32);
			
 
				+        (api.chacha_init_sha_state)(int_sha_states.as_mut_ptr(), num_keys as u32);
			
 
				     }
			
 
				     loop {
			
 
				         match blocktree.get_data_shreds(current_slot, start_index, std::u64::MAX, &mut buffer) {
			
@@ -73,7 +71,7 @@ pub fn chacha_cbc_encrypt_file_many_keys(
 
				                 }
			
 
				 
			
 
				                 unsafe {
			
 
				-                    chacha_cbc_encrypt_many_sample(
			
 
				+                    (api.chacha_cbc_encrypt_many_sample)(
			
 
				                         buffer[..size].as_ptr(),
			
 
				                         int_sha_states.as_mut_ptr(),
			
 
				                         size,
			
@@ -97,7 +95,7 @@ pub fn chacha_cbc_encrypt_file_many_keys(
 
				         }
			
 
				     }
			
 
				     unsafe {
			
 
				-        chacha_end_sha_state(
			
 
				+        (api.chacha_end_sha_state)(
			
 
				             int_sha_states.as_ptr(),
			
 
				             sha_states.as_mut_ptr(),
			
 
				             num_keys as u32,
			
@@ -114,22 +112,23 @@ pub fn chacha_cbc_encrypt_file_many_keys(
 
				 
			
 
				 #[cfg(test)]
			
 
				 mod tests {
			
 
				+    use super::*;
			
 
				     use crate::blocktree::get_tmp_ledger_path;
			
 
				-    use crate::blocktree::Blocktree;
			
 
				     use crate::chacha::chacha_cbc_encrypt_ledger;
			
 
				-    use crate::chacha_cuda::chacha_cbc_encrypt_file_many_keys;
			
 
				     use crate::entry::create_ticks;
			
 
				     use crate::replicator::sample_file;
			
 
				     use solana_sdk::clock::DEFAULT_SLOTS_PER_SEGMENT;
			
 
				-    use solana_sdk::hash::Hash;
			
 
				     use solana_sdk::signature::{Keypair, KeypairUtil};
			
 
				     use std::fs::{remove_dir_all, remove_file};
			
 
				     use std::path::Path;
			
 
				-    use std::sync::Arc;
			
 
				 
			
 
				     #[test]
			
 
				     fn test_encrypt_file_many_keys_single() {
			
 
				         solana_logger::setup();
			
 
				+        if perf_libs::api().is_none() {
			
 
				+            info!("perf-libs unavailable, skipped");
			
 
				+            return;
			
 
				+        }
			
 
				 
			
 
				         let slots_per_segment = 32;
			
 
				         let entries = create_ticks(slots_per_segment, Hash::default());
			
@@ -189,6 +188,10 @@ mod tests {
 
				     #[test]
			
 
				     fn test_encrypt_file_many_keys_multiple_keys() {
			
 
				         solana_logger::setup();
			
 
				+        if perf_libs::api().is_none() {
			
 
				+            info!("perf-libs unavailable, skipped");
			
 
				+            return;
			
 
				+        }
			
 
				 
			
 
				         let entries = create_ticks(32, Hash::default());
			
 
				         let ledger_dir = "test_encrypt_file_many_keys_multiple";
			
@@ -255,6 +258,12 @@ mod tests {
 
				 
			
 
				     #[test]
			
 
				     fn test_encrypt_file_many_keys_bad_key_length() {
			
 
				+        solana_logger::setup();
			
 
				+        if perf_libs::api().is_none() {
			
 
				+            info!("perf-libs unavailable, skipped");
			
 
				+            return;
			
 
				+        }
			
 
				+
			
 
				         let mut keys = hex!("abc123");
			
 
				         let ledger_dir = "test_encrypt_file_many_keys_bad_key_length";
			
 
				         let ledger_path = get_tmp_ledger_path(ledger_dir);
			
--- a/core/src/cuda_runtime.rs
+++ b/core/src/cuda_runtime.rs
@@ -5,48 +5,55 @@
 
				 //    copies from host memory to GPU memory unless the memory is page-pinned and
			
 
				 //    cannot be paged to disk. The cuda driver provides these interfaces to pin and unpin memory.
			
 
				 
			
 
				+#[cfg(feature = "pin_gpu_memory")]
			
 
				+use crate::perf_libs;
			
 
				 use crate::recycler::Reset;
			
 
				-
			
 
				-#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))]
			
 
				-use crate::sigverify::{cuda_host_register, cuda_host_unregister};
			
 
				 use std::ops::{Deref, DerefMut};
			
 
				 
			
 
				-#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))]
			
 
				+#[cfg(feature = "pin_gpu_memory")]
			
 
				 use std::os::raw::c_int;
			
 
				 
			
 
				-#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))]
			
 
				+#[cfg(feature = "pin_gpu_memory")]
			
 
				 const CUDA_SUCCESS: c_int = 0;
			
 
				 
			
 
				 pub fn pin<T>(_mem: &mut Vec<T>) {
			
 
				-    #[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))]
			
 
				-    unsafe {
			
 
				-        use core::ffi::c_void;
			
 
				-        use std::mem::size_of;
			
 
				-
			
 
				-        let err = cuda_host_register(
			
 
				-            _mem.as_mut_ptr() as *mut c_void,
			
 
				-            _mem.capacity() * size_of::<T>(),
			
 
				-            0,
			
 
				-        );
			
 
				-        if err != CUDA_SUCCESS {
			
 
				-            error!(
			
 
				-                "cudaHostRegister error: {} ptr: {:?} bytes: {}",
			
 
				-                err,
			
 
				-                _mem.as_ptr(),
			
 
				-                _mem.capacity() * size_of::<T>()
			
 
				-            );
			
 
				+    #[cfg(feature = "pin_gpu_memory")]
			
 
				+    {
			
 
				+        if let Some(api) = perf_libs::api() {
			
 
				+            unsafe {
			
 
				+                use core::ffi::c_void;
			
 
				+                use std::mem::size_of;
			
 
				+
			
 
				+                let err = (api.cuda_host_register)(
			
 
				+                    _mem.as_mut_ptr() as *mut c_void,
			
 
				+                    _mem.capacity() * size_of::<T>(),
			
 
				+                    0,
			
 
				+                );
			
 
				+                if err != CUDA_SUCCESS {
			
 
				+                    error!(
			
 
				+                        "cudaHostRegister error: {} ptr: {:?} bytes: {}",
			
 
				+                        err,
			
 
				+                        _mem.as_ptr(),
			
 
				+                        _mem.capacity() * size_of::<T>()
			
 
				+                    );
			
 
				+                }
			
 
				+            }
			
 
				         }
			
 
				     }
			
 
				 }
			
 
				 
			
 
				 pub fn unpin<T>(_mem: *mut T) {
			
 
				-    #[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))]
			
 
				-    unsafe {
			
 
				-        use core::ffi::c_void;
			
 
				-
			
 
				-        let err = cuda_host_unregister(_mem as *mut c_void);
			
 
				-        if err != CUDA_SUCCESS {
			
 
				-            error!("cudaHostUnregister returned: {} ptr: {:?}", err, _mem);
			
 
				+    #[cfg(feature = "pin_gpu_memory")]
			
 
				+    {
			
 
				+        if let Some(api) = perf_libs::api() {
			
 
				+            unsafe {
			
 
				+                use core::ffi::c_void;
			
 
				+
			
 
				+                let err = (api.cuda_host_unregister)(_mem as *mut c_void);
			
 
				+                if err != CUDA_SUCCESS {
			
 
				+                    error!("cudaHostUnregister returned: {} ptr: {:?}", err, _mem);
			
 
				+                }
			
 
				+            }
			
 
				         }
			
 
				     }
			
 
				 }
			
--- a/core/src/entry.rs
+++ b/core/src/entry.rs
@@ -3,6 +3,7 @@
 
				 //! transactions within it. Entries cannot be reordered, and its field `num_hashes`
			
 
				 //! represents an approximate amount of time since the last Entry was created.
			
 
				 use crate::packet::{Blob, SharedBlob};
			
 
				+use crate::perf_libs;
			
 
				 use crate::poh::Poh;
			
 
				 use crate::result::Result;
			
 
				 use bincode::{deserialize, serialized_size};
			
@@ -10,20 +11,14 @@ use rayon::prelude::*;
 
				 use rayon::ThreadPool;
			
 
				 use solana_merkle_tree::MerkleTree;
			
 
				 use solana_metrics::inc_new_counter_warn;
			
 
				+use solana_rayon_threadlimit::get_thread_count;
			
 
				 use solana_sdk::hash::Hash;
			
 
				 use solana_sdk::timing;
			
 
				 use solana_sdk::transaction::Transaction;
			
 
				 use std::borrow::Borrow;
			
 
				 use std::cell::RefCell;
			
 
				 use std::sync::mpsc::{Receiver, Sender};
			
 
				-use std::sync::{Arc, RwLock};
			
 
				-
			
 
				-#[cfg(feature = "cuda")]
			
 
				-use crate::sigverify::poh_verify_many;
			
 
				-use solana_rayon_threadlimit::get_thread_count;
			
 
				-#[cfg(feature = "cuda")]
			
 
				-use std::sync::Mutex;
			
 
				-#[cfg(feature = "cuda")]
			
 
				+use std::sync::{Arc, Mutex, RwLock};
			
 
				 use std::thread;
			
 
				 use std::time::Instant;
			
 
				 
			
@@ -257,13 +252,12 @@ impl EntrySlice for [Entry] {
 
				         res
			
 
				     }
			
 
				 
			
 
				-    #[cfg(not(feature = "cuda"))]
			
 
				-    fn verify(&self, start_hash: &Hash) -> bool {
			
 
				-        self.verify_cpu(start_hash)
			
 
				-    }
			
 
				-
			
 
				-    #[cfg(feature = "cuda")]
			
 
				     fn verify(&self, start_hash: &Hash) -> bool {
			
 
				+        let api = perf_libs::api();
			
 
				+        if api.is_none() {
			
 
				+            return self.verify_cpu(start_hash);
			
 
				+        }
			
 
				+        let api = api.unwrap();
			
 
				         inc_new_counter_warn!("entry_verify-num_entries", self.len() as usize);
			
 
				 
			
 
				         // Use CPU verify if the batch length is < 1K
			
@@ -287,7 +281,7 @@ impl EntrySlice for [Entry] {
 
				             .collect();
			
 
				 
			
 
				         let num_hashes_vec: Vec<u64> = self
			
 
				-            .into_iter()
			
 
				+            .iter()
			
 
				             .map(|entry| entry.num_hashes.saturating_sub(1))
			
 
				             .collect();
			
 
				 
			
@@ -300,7 +294,7 @@ impl EntrySlice for [Entry] {
 
				             let mut hashes = hashes_clone.lock().unwrap();
			
 
				             let res;
			
 
				             unsafe {
			
 
				-                res = poh_verify_many(
			
 
				+                res = (api.poh_verify_many)(
			
 
				                     hashes.as_mut_ptr() as *mut u8,
			
 
				                     num_hashes_vec.as_ptr(),
			
 
				                     length,
			
--- a/core/src/lib.rs
+++ b/core/src/lib.rs
@@ -10,10 +10,10 @@ pub mod banking_stage;
 
				 pub mod blob_fetch_stage;
			
 
				 pub mod broadcast_stage;
			
 
				 pub mod chacha;
			
 
				-#[cfg(cuda)]
			
 
				 pub mod chacha_cuda;
			
 
				 pub mod cluster_info_vote_listener;
			
 
				 pub mod confidence;
			
 
				+pub mod perf_libs;
			
 
				 pub mod recycler;
			
 
				 #[macro_use]
			
 
				 pub mod contact_info;
			
@@ -75,6 +75,9 @@ pub(crate) mod version;
 
				 pub mod weighted_shuffle;
			
 
				 pub mod window_service;
			
 
				 
			
 
				+#[macro_use]
			
 
				+extern crate dlopen_derive;
			
 
				+
			
 
				 #[macro_use]
			
 
				 extern crate solana_budget_program;
			
 
				 
			
--- a/core/src/perf_libs.rs
+++ b/core/src/perf_libs.rs
@@ -0,0 +1,171 @@
 
				+use crate::packet::Packet;
			
 
				+use core::ffi::c_void;
			
 
				+use dlopen::symbor::{Container, SymBorApi, Symbol};
			
 
				+use std::env;
			
 
				+use std::ffi::OsStr;
			
 
				+use std::fs;
			
 
				+use std::os::raw::{c_int, c_uint};
			
 
				+use std::path::{Path, PathBuf};
			
 
				+use std::sync::Once;
			
 
				+
			
 
				+#[repr(C)]
			
 
				+pub struct Elems {
			
 
				+    pub elems: *const Packet,
			
 
				+    pub num: u32,
			
 
				+}
			
 
				+
			
 
				+#[derive(SymBorApi)]
			
 
				+pub struct Api<'a> {
			
 
				+    pub ed25519_init: Symbol<'a, unsafe extern "C" fn() -> bool>,
			
 
				+    pub ed25519_set_verbose: Symbol<'a, unsafe extern "C" fn(val: bool)>,
			
 
				+
			
 
				+    #[allow(clippy::type_complexity)]
			
 
				+    pub ed25519_verify_many: Symbol<
			
 
				+        'a,
			
 
				+        unsafe extern "C" fn(
			
 
				+            vecs: *const Elems,
			
 
				+            num: u32,          //number of vecs
			
 
				+            message_size: u32, //size of each element inside the elems field of the vec
			
 
				+            total_packets: u32,
			
 
				+            total_signatures: u32,
			
 
				+            message_lens: *const u32,
			
 
				+            pubkey_offsets: *const u32,
			
 
				+            signature_offsets: *const u32,
			
 
				+            signed_message_offsets: *const u32,
			
 
				+            out: *mut u8, //combined length of all the items in vecs
			
 
				+            use_non_default_stream: u8,
			
 
				+        ) -> u32,
			
 
				+    >,
			
 
				+
			
 
				+    pub chacha_cbc_encrypt_many_sample: Symbol<
			
 
				+        'a,
			
 
				+        unsafe extern "C" fn(
			
 
				+            input: *const u8,
			
 
				+            sha_state: *mut u8,
			
 
				+            in_len: usize,
			
 
				+            keys: *const u8,
			
 
				+            ivec: *mut u8,
			
 
				+            num_keys: u32,
			
 
				+            samples: *const u64,
			
 
				+            num_samples: u32,
			
 
				+            starting_block: u64,
			
 
				+            time_us: *mut f32,
			
 
				+        ),
			
 
				+    >,
			
 
				+
			
 
				+    pub chacha_init_sha_state: Symbol<'a, unsafe extern "C" fn(sha_state: *mut u8, num_keys: u32)>,
			
 
				+    pub chacha_end_sha_state:
			
 
				+        Symbol<'a, unsafe extern "C" fn(sha_state_in: *const u8, out: *mut u8, num_keys: u32)>,
			
 
				+
			
 
				+    pub poh_verify_many: Symbol<
			
 
				+        'a,
			
 
				+        unsafe extern "C" fn(
			
 
				+            hashes: *mut u8,
			
 
				+            num_hashes_arr: *const u64,
			
 
				+            num_elems: usize,
			
 
				+            use_non_default_stream: u8,
			
 
				+        ) -> c_int,
			
 
				+    >,
			
 
				+
			
 
				+    pub cuda_host_register:
			
 
				+        Symbol<'a, unsafe extern "C" fn(ptr: *mut c_void, size: usize, flags: c_uint) -> c_int>,
			
 
				+
			
 
				+    pub cuda_host_unregister: Symbol<'a, unsafe extern "C" fn(ptr: *mut c_void) -> c_int>,
			
 
				+}
			
 
				+
			
 
				+static mut API: Option<Container<Api>> = None;
			
 
				+
			
 
				+fn init(name: &OsStr) {
			
 
				+    static INIT_HOOK: Once = Once::new();
			
 
				+
			
 
				+    info!("Loading {:?}", name);
			
 
				+    unsafe {
			
 
				+        INIT_HOOK.call_once(|| {
			
 
				+            API = Some(Container::load(name).unwrap_or_else(|err| {
			
 
				+                error!("Unable to load {:?}: {}", name, err);
			
 
				+                std::process::exit(1);
			
 
				+            }));
			
 
				+        })
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+fn locate_perf_libs() -> Option<PathBuf> {
			
 
				+    let exe = env::current_exe().expect("Unable to get executable path");
			
 
				+    let perf_libs = exe.parent().unwrap().join("perf-libs");
			
 
				+    if perf_libs.is_dir() {
			
 
				+        info!("perf-libs found at {:?}", perf_libs);
			
 
				+        return Some(perf_libs);
			
 
				+    }
			
 
				+    warn!("{:?} does not exist", perf_libs);
			
 
				+    None
			
 
				+}
			
 
				+
			
 
				+fn find_cuda_home(perf_libs_path: &Path) -> Option<PathBuf> {
			
 
				+    // Search /usr/local for a `cuda-` directory that matches a perf-libs subdirectory
			
 
				+    for entry in fs::read_dir(&perf_libs_path).unwrap() {
			
 
				+        if let Ok(entry) = entry {
			
 
				+            let path = entry.path();
			
 
				+            if !path.is_dir() {
			
 
				+                continue;
			
 
				+            }
			
 
				+            let dir_name = path.file_name().unwrap().to_str().unwrap_or("");
			
 
				+            if !dir_name.starts_with("cuda-") {
			
 
				+                continue;
			
 
				+            }
			
 
				+
			
 
				+            let cuda_home: PathBuf = ["/", "usr", "local", dir_name].iter().collect();
			
 
				+            if !cuda_home.is_dir() {
			
 
				+                continue;
			
 
				+            }
			
 
				+
			
 
				+            return Some(cuda_home);
			
 
				+        }
			
 
				+    }
			
 
				+    None
			
 
				+}
			
 
				+
			
 
				+pub fn init_cuda() {
			
 
				+    if let Some(perf_libs_path) = locate_perf_libs() {
			
 
				+        if let Some(cuda_home) = find_cuda_home(&perf_libs_path) {
			
 
				+            info!("CUDA installation found at {:?}", cuda_home);
			
 
				+
			
 
				+            let cuda_lib64_dir = cuda_home.join("lib64");
			
 
				+            if cuda_lib64_dir.is_dir() {
			
 
				+                let ld_library_path = cuda_lib64_dir.to_str().unwrap_or("").to_string()
			
 
				+                    + ":"
			
 
				+                    + &env::var("LD_LIBRARY_PATH").unwrap_or_else(|_| "".to_string());
			
 
				+                info!("LD_LIBRARY_PATH set to {:?}", ld_library_path);
			
 
				+
			
 
				+                // Prefix LD_LIBRARY_PATH with $CUDA_HOME/lib64 directory
			
 
				+                // to ensure the correct CUDA version is used
			
 
				+                env::set_var("LD_LIBRARY_PATH", ld_library_path)
			
 
				+            } else {
			
 
				+                warn!("{:?} does not exist", cuda_lib64_dir);
			
 
				+            }
			
 
				+
			
 
				+            let libcuda_crypt = perf_libs_path
			
 
				+                .join(cuda_home.file_name().unwrap())
			
 
				+                .join("libcuda-crypt.so");
			
 
				+            return init(libcuda_crypt.as_os_str());
			
 
				+        } else {
			
 
				+            warn!("CUDA installation not found");
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // Last resort!  Blindly load the shared object and hope it all works out
			
 
				+    init(OsStr::new("libcuda-crypt.so"))
			
 
				+}
			
 
				+
			
 
				+pub fn api() -> Option<&'static Container<Api<'static>>> {
			
 
				+    #[cfg(test)]
			
 
				+    {
			
 
				+        static INIT_HOOK: Once = Once::new();
			
 
				+        INIT_HOOK.call_once(|| {
			
 
				+            if std::env::var("TEST_PERF_LIBS_CUDA").is_ok() {
			
 
				+                init_cuda();
			
 
				+            }
			
 
				+        })
			
 
				+    }
			
 
				+
			
 
				+    unsafe { API.as_ref() }
			
 
				+}
			
--- a/core/src/sigverify.rs
+++ b/core/src/sigverify.rs
@@ -1,11 +1,12 @@
 
				 //! The `sigverify` module provides digital signature verification functions.
			
 
				 //! By default, signatures are verified in parallel using all available CPU
			
 
				-//! cores.  When `--features=cuda` is enabled, signature verification is
			
 
				-//! offloaded to the GPU.
			
 
				+//! cores.  When perf-libs are available signature verification is offloaded
			
 
				+//! to the GPU.
			
 
				 //!
			
 
				 
			
 
				 use crate::cuda_runtime::PinnedVec;
			
 
				 use crate::packet::{Packet, Packets};
			
 
				+use crate::perf_libs;
			
 
				 use crate::recycler::Recycler;
			
 
				 use crate::result::Result;
			
 
				 use bincode::serialized_size;
			
@@ -19,11 +20,7 @@ use solana_sdk::signature::Signature;
 
				 use solana_sdk::transaction::Transaction;
			
 
				 use std::mem::size_of;
			
 
				 
			
 
				-#[cfg(feature = "cuda")]
			
 
				-use core::ffi::c_void;
			
 
				 use solana_rayon_threadlimit::get_thread_count;
			
 
				-#[cfg(feature = "cuda")]
			
 
				-use std::os::raw::{c_int, c_uint};
			
 
				 pub const NUM_THREADS: u32 = 10;
			
 
				 use std::cell::RefCell;
			
 
				 
			
@@ -36,62 +33,16 @@ pub type TxOffset = PinnedVec<u32>;
 
				 
			
 
				 type TxOffsets = (TxOffset, TxOffset, TxOffset, TxOffset, Vec<Vec<u32>>);
			
 
				 
			
 
				-#[cfg(feature = "cuda")]
			
 
				-#[repr(C)]
			
 
				-struct Elems {
			
 
				-    elems: *const Packet,
			
 
				-    num: u32,
			
 
				-}
			
 
				-
			
 
				-#[cfg(feature = "cuda")]
			
 
				-#[link(name = "cuda-crypt")]
			
 
				-extern "C" {
			
 
				-    fn ed25519_init() -> bool;
			
 
				-    fn ed25519_set_verbose(val: bool);
			
 
				-    fn ed25519_verify_many(
			
 
				-        vecs: *const Elems,
			
 
				-        num: u32,          //number of vecs
			
 
				-        message_size: u32, //size of each element inside the elems field of the vec
			
 
				-        total_packets: u32,
			
 
				-        total_signatures: u32,
			
 
				-        message_lens: *const u32,
			
 
				-        pubkey_offsets: *const u32,
			
 
				-        signature_offsets: *const u32,
			
 
				-        signed_message_offsets: *const u32,
			
 
				-        out: *mut u8, //combined length of all the items in vecs
			
 
				-        use_non_default_stream: u8,
			
 
				-    ) -> u32;
			
 
				-
			
 
				-    pub fn chacha_cbc_encrypt_many_sample(
			
 
				-        input: *const u8,
			
 
				-        sha_state: *mut u8,
			
 
				-        in_len: usize,
			
 
				-        keys: *const u8,
			
 
				-        ivec: *mut u8,
			
 
				-        num_keys: u32,
			
 
				-        samples: *const u64,
			
 
				-        num_samples: u32,
			
 
				-        starting_block: u64,
			
 
				-        time_us: *mut f32,
			
 
				-    );
			
 
				-
			
 
				-    pub fn chacha_init_sha_state(sha_state: *mut u8, num_keys: u32);
			
 
				-    pub fn chacha_end_sha_state(sha_state_in: *const u8, out: *mut u8, num_keys: u32);
			
 
				-
			
 
				-    pub fn poh_verify_many(
			
 
				-        hashes: *mut u8,
			
 
				-        num_hashes_arr: *const u64,
			
 
				-        num_elems: usize,
			
 
				-        use_non_default_stream: u8,
			
 
				-    ) -> c_int;
			
 
				-
			
 
				-    pub fn cuda_host_register(ptr: *mut c_void, size: usize, flags: c_uint) -> c_int;
			
 
				-    pub fn cuda_host_unregister(ptr: *mut c_void) -> c_int;
			
 
				-}
			
 
				-
			
 
				-#[cfg(not(feature = "cuda"))]
			
 
				 pub fn init() {
			
 
				-    // stub
			
 
				+    if let Some(api) = perf_libs::api() {
			
 
				+        unsafe {
			
 
				+            (api.ed25519_set_verbose)(true);
			
 
				+            if !(api.ed25519_init)() {
			
 
				+                panic!("ed25519_init() failed");
			
 
				+            }
			
 
				+            (api.ed25519_set_verbose)(false);
			
 
				+        }
			
 
				+    }
			
 
				 }
			
 
				 
			
 
				 fn verify_packet(packet: &Packet) -> u8 {
			
@@ -130,15 +81,6 @@ fn batch_size(batches: &[Packets]) -> usize {
 
				     batches.iter().map(|p| p.packets.len()).sum()
			
 
				 }
			
 
				 
			
 
				-#[cfg(not(feature = "cuda"))]
			
 
				-pub fn ed25519_verify(
			
 
				-    batches: &[Packets],
			
 
				-    _recycler: &Recycler<TxOffset>,
			
 
				-    _recycler_out: &Recycler<PinnedVec<u8>>,
			
 
				-) -> Vec<Vec<u8>> {
			
 
				-    ed25519_verify_cpu(batches)
			
 
				-}
			
 
				-
			
 
				 pub fn get_packet_offsets(packet: &Packet, current_offset: u32) -> (u32, u32, u32, u32) {
			
 
				     let (sig_len, sig_size) = decode_len(&packet.data);
			
 
				     let msg_start_offset = sig_size + sig_len * size_of::<Signature>();
			
@@ -235,23 +177,17 @@ pub fn ed25519_verify_disabled(batches: &[Packets]) -> Vec<Vec<u8>> {
 
				     rv
			
 
				 }
			
 
				 
			
 
				-#[cfg(feature = "cuda")]
			
 
				-pub fn init() {
			
 
				-    unsafe {
			
 
				-        ed25519_set_verbose(true);
			
 
				-        if !ed25519_init() {
			
 
				-            panic!("ed25519_init() failed");
			
 
				-        }
			
 
				-        ed25519_set_verbose(false);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-#[cfg(feature = "cuda")]
			
 
				 pub fn ed25519_verify(
			
 
				     batches: &[Packets],
			
 
				     recycler: &Recycler<TxOffset>,
			
 
				     recycler_out: &Recycler<PinnedVec<u8>>,
			
 
				 ) -> Vec<Vec<u8>> {
			
 
				+    let api = perf_libs::api();
			
 
				+    if api.is_none() {
			
 
				+        return ed25519_verify_cpu(batches);
			
 
				+    }
			
 
				+    let api = api.unwrap();
			
 
				+
			
 
				     use crate::packet::PACKET_DATA_SIZE;
			
 
				     let count = batch_size(batches);
			
 
				 
			
@@ -276,7 +212,7 @@ pub fn ed25519_verify(
 
				 
			
 
				     let mut num_packets = 0;
			
 
				     for p in batches {
			
 
				-        elems.push(Elems {
			
 
				+        elems.push(perf_libs::Elems {
			
 
				             elems: p.packets.as_ptr(),
			
 
				             num: p.packets.len() as u32,
			
 
				         });
			
@@ -292,7 +228,7 @@ pub fn ed25519_verify(
 
				     trace!("len offset: {}", PACKET_DATA_SIZE as u32);
			
 
				     const USE_NON_DEFAULT_STREAM: u8 = 1;
			
 
				     unsafe {
			
 
				-        let res = ed25519_verify_many(
			
 
				+        let res = (api.ed25519_verify_many)(
			
 
				             elems.as_ptr(),
			
 
				             elems.len() as u32,
			
 
				             size_of::<Packet>() as u32,
			
--- a/core/src/sigverify_stage.rs
+++ b/core/src/sigverify_stage.rs
@@ -3,10 +3,11 @@
 
				 //! top-level list with a list of booleans, telling the next stage whether the
			
 
				 //! signature in that packet is valid. It assumes each packet contains one
			
 
				 //! transaction. All processing is done on the CPU by default and on a GPU
			
 
				-//! if the `cuda` feature is enabled with `--features=cuda`.
			
 
				+//! if perf-libs are available
			
 
				 
			
 
				 use crate::cuda_runtime::PinnedVec;
			
 
				 use crate::packet::Packets;
			
 
				+use crate::perf_libs;
			
 
				 use crate::recycler::Recycler;
			
 
				 use crate::result::{Error, Result};
			
 
				 use crate::service::Service;
			
@@ -21,11 +22,8 @@ use std::sync::mpsc::{Receiver, RecvTimeoutError};
 
				 use std::sync::{Arc, Mutex};
			
 
				 use std::thread::{self, Builder, JoinHandle};
			
 
				 
			
 
				-#[cfg(feature = "cuda")]
			
 
				-const RECV_BATCH_MAX: usize = 5_000;
			
 
				-
			
 
				-#[cfg(not(feature = "cuda"))]
			
 
				-const RECV_BATCH_MAX: usize = 1000;
			
 
				+const RECV_BATCH_MAX_CPU: usize = 1_000;
			
 
				+const RECV_BATCH_MAX_GPU: usize = 5_000;
			
 
				 
			
 
				 pub type VerifiedPackets = Vec<(Packets, Vec<u8>)>;
			
 
				 
			
@@ -70,7 +68,11 @@ impl SigVerifyStage {
 
				     ) -> Result<()> {
			
 
				         let (batch, len, recv_time) = streamer::recv_batch(
			
 
				             &recvr.lock().expect("'recvr' lock in fn verifier"),
			
 
				-            RECV_BATCH_MAX,
			
 
				+            if perf_libs::api().is_some() {
			
 
				+                RECV_BATCH_MAX_GPU
			
 
				+            } else {
			
 
				+                RECV_BATCH_MAX_CPU
			
 
				+            },
			
 
				         )?;
			
 
				         inc_new_counter_info!("sigverify_stage-packets_received", len);
			
 
				 
			
--- a/core/src/storage_stage.rs
+++ b/core/src/storage_stage.rs
@@ -4,7 +4,6 @@
 
				 
			
 
				 use crate::bank_forks::BankForks;
			
 
				 use crate::blocktree::Blocktree;
			
 
				-#[cfg(cuda)]
			
 
				 use crate::chacha_cuda::chacha_cbc_encrypt_file_many_keys;
			
 
				 use crate::cluster_info::ClusterInfo;
			
 
				 use crate::result::{Error, Result};
			
@@ -408,11 +407,11 @@ impl StorageStage {
 
				             samples.push(rng.gen_range(0, 10));
			
 
				         }
			
 
				         debug!("generated samples: {:?}", samples);
			
 
				+
			
 
				         // TODO: cuda required to generate the reference values
			
 
				         // but if it is missing, then we need to take care not to
			
 
				         // process storage mining results.
			
 
				-        #[cfg(cuda)]
			
 
				-        {
			
 
				+        if crate::perf_libs::api().is_some() {
			
 
				             // Lock the keys, since this is the IV memory,
			
 
				             // it will be updated in-place by the encryption.
			
 
				             // Should be overwritten by the proof signatures which replace the
			
@@ -729,10 +728,8 @@ mod tests {
 
				         let keypair = Keypair::new();
			
 
				         let hash = Hash::default();
			
 
				         let signature = keypair.sign_message(&hash.as_ref());
			
 
				-        #[cfg(feature = "cuda")]
			
 
				+
			
 
				         let mut result = storage_state.get_mining_result(&signature);
			
 
				-        #[cfg(not(feature = "cuda"))]
			
 
				-        let result = storage_state.get_mining_result(&signature);
			
 
				 
			
 
				         assert_eq!(result, Hash::default());
			
 
				 
			
@@ -752,26 +749,27 @@ mod tests {
 
				             .collect::<Vec<_>>();
			
 
				         bank_sender.send(rooted_banks).unwrap();
			
 
				 
			
 
				-        #[cfg(feature = "cuda")]
			
 
				-        for _ in 0..5 {
			
 
				-            result = storage_state.get_mining_result(&signature);
			
 
				-            if result != Hash::default() {
			
 
				-                info!("found result = {:?} sleeping..", result);
			
 
				-                break;
			
 
				+        if crate::perf_libs::api().is_some() {
			
 
				+            for _ in 0..5 {
			
 
				+                result = storage_state.get_mining_result(&signature);
			
 
				+                if result != Hash::default() {
			
 
				+                    info!("found result = {:?} sleeping..", result);
			
 
				+                    break;
			
 
				+                }
			
 
				+                info!("result = {:?} sleeping..", result);
			
 
				+                sleep(Duration::new(1, 0));
			
 
				             }
			
 
				-            info!("result = {:?} sleeping..", result);
			
 
				-            sleep(Duration::new(1, 0));
			
 
				         }
			
 
				 
			
 
				         info!("joining..?");
			
 
				         exit.store(true, Ordering::Relaxed);
			
 
				         storage_stage.join().unwrap();
			
 
				 
			
 
				-        #[cfg(not(cuda))]
			
 
				-        assert_eq!(result, Hash::default());
			
 
				-
			
 
				-        #[cfg(cuda)]
			
 
				-        assert_ne!(result, Hash::default());
			
 
				+        if crate::perf_libs::api().is_some() {
			
 
				+            assert_ne!(result, Hash::default());
			
 
				+        } else {
			
 
				+            assert_eq!(result, Hash::default());
			
 
				+        }
			
 
				 
			
 
				         remove_dir_all(ledger_path).unwrap();
			
 
				     }
			
--- a/core/src/validator.rs
+++ b/core/src/validator.rs
@@ -118,7 +118,14 @@ impl Validator {
 
				 
			
 
				         warn!("identity pubkey: {:?}", id);
			
 
				         warn!("vote pubkey: {:?}", vote_account);
			
 
				-        warn!("CUDA is {}abled", if cfg!(cuda) { "en" } else { "dis" });
			
 
				+        warn!(
			
 
				+            "CUDA is {}abled",
			
 
				+            if crate::perf_libs::api().is_some() {
			
 
				+                "en"
			
 
				+            } else {
			
 
				+                "dis"
			
 
				+            }
			
 
				+        );
			
 
				         info!("entrypoint: {:?}", entrypoint_info_option);
			
 
				 
			
 
				         Self::print_node_info(&node);
			
--- a/fetch-perf-libs.sh
+++ b/fetch-perf-libs.sh
@@ -1,19 +1,20 @@
 
				 #!/usr/bin/env bash
			
 
				 
			
 
				 PERF_LIBS_VERSION=v0.15.0
			
 
				+VERSION=$PERF_LIBS_VERSION-1
			
 
				 
			
 
				 set -e
			
 
				 cd "$(dirname "$0")"
			
 
				 
			
 
				-if [[ ! -f target/perf-libs/.$PERF_LIBS_VERSION ]]; then
			
 
				+if [[ ! -f target/perf-libs/.$VERSION ]]; then
			
 
				   if [[ $(uname) != Linux ]]; then
			
 
				-    echo Performance libraries are only available for Linux
			
 
				-    exit 1
			
 
				+    echo Note: Performance libraries are only available for Linux
			
 
				+    exit 0
			
 
				   fi
			
 
				 
			
 
				   if [[ $(uname -m) != x86_64 ]]; then
			
 
				-    echo Performance libraries are only available for x86_64 architecture
			
 
				-    exit 1
			
 
				+    echo Note: Performance libraries are only available for x86_64 architecture
			
 
				+    exit 0
			
 
				   fi
			
 
				 
			
 
				   mkdir -p target/perf-libs
			
@@ -24,72 +25,16 @@ if [[ ! -f target/perf-libs/.$PERF_LIBS_VERSION ]]; then
 
				       https://github.com/solana-labs/solana-perf-libs/releases/download/$PERF_LIBS_VERSION/solana-perf.tgz
			
 
				     tar zxvf solana-perf.tgz
			
 
				     rm -f solana-perf.tgz
			
 
				-    touch .$PERF_LIBS_VERSION
			
 
				+    touch .$VERSION
			
 
				   )
			
 
				-  echo
			
 
				-fi
			
 
				-
			
 
				-cat > target/perf-libs/env.sh <<'EOF'
			
 
				-if [[ -n $SOLANA_PERF_LIBS ]]; then
			
 
				-  echo "solana-perf-libs version: $(cat $SOLANA_PERF_LIBS/solana-perf-HEAD.txt)"
			
 
				-  return
			
 
				-fi
			
 
				-SOLANA_PERF_LIBS="$(cd $(dirname "${BASH_SOURCE[0]}"); pwd)"
			
 
				-
			
 
				-SOLANA_PERF_LIBS_CUDA=
			
 
				-for _supported_cuda in $(cd $SOLANA_PERF_LIBS; find . -maxdepth 1 -type d -regex './cuda-.*' | sort -r); do
			
 
				-  _supported_cuda=$(basename "$_supported_cuda")
			
 
				-  CUDA_HOME=/usr/local/$_supported_cuda
			
 
				-  [[ -d $CUDA_HOME ]] || {
			
 
				-    echo "$_supported_cuda not detected: $CUDA_HOME directory does not exist"
			
 
				-    continue
			
 
				-  }
			
 
				-  [[ -r $CUDA_HOME/version.txt ]] || {
			
 
				-    echo "$_supported_cuda not detected: $CUDA_HOME/version.txt does not exist"
			
 
				-    continue
			
 
				-  }
			
 
				-  echo
			
 
				-  cat "$CUDA_HOME/version.txt"
			
 
				-  echo "CUDA_HOME=$CUDA_HOME"
			
 
				-  SOLANA_PERF_LIBS_CUDA=$_supported_cuda
			
 
				-  export CUDA_HOME
			
 
				-  export SOLANA_PERF_LIBS_CUDA
			
 
				-  break
			
 
				-done
			
 
				 
			
 
				-if [[ -z $SOLANA_PERF_LIBS_CUDA ]]; then
			
 
				-  echo No supported CUDA versions detected
			
 
				-  echo
			
 
				-  echo LD_LIBRARY_PATH="$SOLANA_PERF_LIBS:$LD_LIBRARY_PATH"
			
 
				-  export LD_LIBRARY_PATH="$SOLANA_PERF_LIBS:$LD_LIBRARY_PATH"
			
 
				-else
			
 
				-  echo
			
 
				-  echo LD_LIBRARY_PATH="$SOLANA_PERF_LIBS:$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA:$CUDA_HOME/lib64:$LD_LIBRARY_PATH"
			
 
				-  export LD_LIBRARY_PATH="$SOLANA_PERF_LIBS:$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA:$CUDA_HOME/lib64:$LD_LIBRARY_PATH"
			
 
				+  # Setup symlinks so the perf-libs/ can be found from all binaries run out of
			
 
				+  # target/
			
 
				+  for dir in target/{debug,release}/{,deps/}; do
			
 
				+    mkdir -p $dir
			
 
				+    ln -sfT ../perf-libs ${dir}perf-libs
			
 
				+  done
			
 
				 
			
 
				-  echo PATH="$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA:$CUDA_HOME/bin:$PATH"
			
 
				-  export PATH="$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA:$CUDA_HOME/bin:$PATH"
			
 
				-
			
 
				-  if [[ -r "$CUDA_HOME"/version.txt && -r $SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA/cuda-version.txt ]]; then
			
 
				-    if ! diff "$CUDA_HOME"/version.txt "$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA"/cuda-version.txt > /dev/null; then
			
 
				-        echo ==============================================
			
 
				-        echo "Warning: possible CUDA version mismatch with $CUDA_HOME"
			
 
				-        echo
			
 
				-        echo "Expected version: $(cat "$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA"/cuda-version.txt)"
			
 
				-        echo "Detected version: $(cat "$CUDA_HOME"/version.txt)"
			
 
				-        echo ==============================================
			
 
				-    fi
			
 
				-  else
			
 
				-    echo ==============================================
			
 
				-    echo Warning: unable to validate CUDA version
			
 
				-    echo ==============================================
			
 
				-  fi
			
 
				 fi
			
 
				-echo
			
 
				-echo "solana-perf-libs version: $(cat $SOLANA_PERF_LIBS/solana-perf-HEAD.txt)"
			
 
				-
			
 
				-EOF
			
 
				 
			
 
				-echo "Setup shell environment with:"
			
 
				-echo "    source $PWD/target/perf-libs/env.sh"
			
 
				 exit 0
			
--- a/local_cluster/Cargo.toml
+++ b/local_cluster/Cargo.toml
@@ -34,7 +34,3 @@ solana-rayon-threadlimit = { path = "../rayon-threadlimit", version = "0.20.0" }
 
				 [dev-dependencies]
			
 
				 serial_test = "0.2.0"
			
 
				 serial_test_derive = "0.2.0"
			
 
				-
			
 
				-[features]
			
 
				-cuda = ["solana-core/cuda"]
			
 
				-
			
--- a/multinode-demo/common.sh
+++ b/multinode-demo/common.sh
@@ -18,11 +18,6 @@ if [[ $(uname) != Linux ]]; then
 
				   fi
			
 
				 fi
			
 
				 
			
 
				-if [[ -f "$SOLANA_ROOT"/target/perf-libs/env.sh ]]; then
			
 
				-  # shellcheck source=/dev/null
			
 
				-  source "$SOLANA_ROOT"/target/perf-libs/env.sh
			
 
				-fi
			
 
				-
			
 
				 if [[ -n $USE_INSTALL || ! -f "$SOLANA_ROOT"/Cargo.toml ]]; then
			
 
				   solana_program() {
			
 
				     declare program="$1"
			
@@ -57,7 +52,7 @@ fi
 
				 solana_bench_tps=$(solana_program bench-tps)
			
 
				 solana_drone=$(solana_program drone)
			
 
				 solana_validator=$(solana_program validator)
			
 
				-solana_validator_cuda=$(solana_program validator-cuda)
			
 
				+solana_validator_cuda="$solana_validator --cuda"
			
 
				 solana_genesis=$(solana_program genesis)
			
 
				 solana_gossip=$(solana_program gossip)
			
 
				 solana_keygen=$(solana_program keygen)
			
--- a/net/README.md
+++ b/net/README.md
@@ -73,12 +73,6 @@ $ ./ec2.sh create -g ...
 
				 If deploying a tarball-based network nothing further is required, as GPU presence
			
 
				 is detected at runtime and the CUDA build is auto selected.
			
 
				 
			
 
				-If deploying a locally-built network, first run `./fetch-perf-libs.sh` then
			
 
				-ensure the `cuda` feature is specified at network start:
			
 
				-```bash
			
 
				-$ ./net.sh start -f "cuda"
			
 
				-```
			
 
				-
			
 
				 ### How to interact with a CD testnet deployed by ci/testnet-deploy.sh
			
 
				 
			
 
				 **AWS-Specific Extra Setup**: Follow the steps in
			
--- a/net/net.sh
+++ b/net/net.sh
@@ -32,8 +32,6 @@ Operate a configured testnet
 
				    -t edge|beta|stable|vX.Y.Z         - Deploy the latest tarball release for the
			
 
				                                         specified release channel (edge|beta|stable) or release tag
			
 
				                                         (vX.Y.Z)
			
 
				-   -f [cargoFeatures]                 - List of |cargo --feaures=| to activate
			
 
				-                                        (ignored if -s or -S is specified)
			
 
				    -r / --skip-setup                  - Reuse existing node/ledger configuration from a
			
 
				                                         previous |start| (ie, don't run ./multinode-demo/setup.sh).
			
 
				    -d / --debug                       - Build/deploy the testnet with debug binaries
			
@@ -111,7 +109,6 @@ releaseChannel=
 
				 deployMethod=local
			
 
				 deployIfNewer=
			
 
				 sanityExtraArgs=
			
 
				-cargoFeatures=
			
 
				 skipSetup=false
			
 
				 customPrograms=
			
 
				 updatePlatforms=
			
@@ -220,9 +217,6 @@ while getopts "h?T:t:o:f:rD:c:Fn:i:d" opt "${shortArgs[@]}"; do
 
				       ;;
			
 
				     esac
			
 
				     ;;
			
 
				-  f)
			
 
				-    cargoFeatures=$OPTARG
			
 
				-    ;;
			
 
				   n)
			
 
				     numFullnodesRequested=$OPTARG
			
 
				     ;;
			
@@ -340,11 +334,6 @@ build() {
 
				     set -x
			
 
				     rm -rf farf
			
 
				 
			
 
				-    if [[ -r target/perf-libs/env.sh ]]; then
			
 
				-      # shellcheck source=/dev/null
			
 
				-      source target/perf-libs/env.sh
			
 
				-    fi
			
 
				-
			
 
				     buildVariant=
			
 
				     if $debugBuild; then
			
 
				       buildVariant=debug
			
@@ -352,7 +341,7 @@ build() {
 
				 
			
 
				     $MAYBE_DOCKER bash -c "
			
 
				       set -ex
			
 
				-      scripts/cargo-install-all.sh farf \"$cargoFeatures\" \"$buildVariant\"
			
 
				+      scripts/cargo-install-all.sh farf \"$buildVariant\"
			
 
				       if [[ -n \"$customPrograms\" ]]; then
			
 
				         scripts/cargo-install-custom-programs.sh farf $customPrograms
			
 
				       fi
			
--- a/net/remote/remote-client.sh
+++ b/net/remote/remote-client.sh
@@ -33,12 +33,7 @@ case $deployMethod in
 
				 local|tar)
			
 
				   PATH="$HOME"/.cargo/bin:"$PATH"
			
 
				   export USE_INSTALL=1
			
 
				-
			
 
				-  ./fetch-perf-libs.sh
			
 
				-  # shellcheck source=/dev/null
			
 
				-  source ./target/perf-libs/env.sh
			
 
				-
			
 
				-  net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/.cargo/bin/solana*" ~/.cargo/bin/
			
 
				+  net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/.cargo/bin/*" ~/.cargo/bin/
			
 
				   ;;
			
 
				 skip)
			
 
				   ;;
			
--- a/net/remote/remote-node.sh
+++ b/net/remote/remote-node.sh
@@ -93,15 +93,12 @@ local|tar|skip)
 
				   export USE_INSTALL=1
			
 
				 
			
 
				   ./fetch-perf-libs.sh
			
 
				-  # shellcheck source=/dev/null
			
 
				-  source ./target/perf-libs/env.sh
			
 
				 
			
 
				 cat >> ~/solana/on-reboot <<EOF
			
 
				   PATH="$HOME"/.cargo/bin:"$PATH"
			
 
				   export USE_INSTALL=1
			
 
				 
			
 
				   # shellcheck source=/dev/null
			
 
				-  source ./target/perf-libs/env.sh
			
 
				   SUDO_OK=1 source scripts/tune-system.sh
			
 
				 
			
 
				   (
			
@@ -113,7 +110,7 @@ cat >> ~/solana/on-reboot <<EOF
 
				   scripts/net-stats.sh  > net-stats.log 2>&1 &
			
 
				   echo \$! > net-stats.pid
			
 
				 
			
 
				-  if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-validator-cuda ]]; then
			
 
				+  if [[ -e /dev/nvidia0 ]]; then
			
 
				     echo Selecting solana-validator-cuda
			
 
				     export SOLANA_CUDA=1
			
 
				   fi
			
--- a/net/remote/remote-sanity.sh
+++ b/net/remote/remote-sanity.sh
@@ -67,11 +67,6 @@ case $deployMethod in
 
				 local|tar|skip)
			
 
				   PATH="$HOME"/.cargo/bin:"$PATH"
			
 
				   export USE_INSTALL=1
			
 
				-  if [[ -r target/perf-libs/env.sh ]]; then
			
 
				-    # shellcheck source=/dev/null
			
 
				-    source target/perf-libs/env.sh
			
 
				-  fi
			
 
				-
			
 
				   solana_gossip=solana-gossip
			
 
				   solana_install=solana-install
			
 
				   ;;
			
--- a/scripts/cargo-install-all.sh
+++ b/scripts/cargo-install-all.sh
@@ -17,13 +17,7 @@ fi
 
				 
			
 
				 installDir="$(mkdir -p "$1"; cd "$1"; pwd)"
			
 
				 cargo=cargo
			
 
				-cargoFeatures="$2"
			
 
				-debugBuild="$3"
			
 
				-
			
 
				-if [[ -n $cargoFeatures && $cargoFeatures != cuda ]]; then
			
 
				-  echo "Unsupported feature flag: $cargoFeatures"
			
 
				-  exit 1
			
 
				-fi
			
 
				+debugBuild="$2"
			
 
				 
			
 
				 buildVariant=release
			
 
				 maybeReleaseFlag=--release
			
@@ -35,6 +29,7 @@ fi
 
				 echo "Install location: $installDir ($buildVariant)"
			
 
				 
			
 
				 cd "$(dirname "$0")"/..
			
 
				+./fetch-perf-libs.sh
			
 
				 
			
 
				 SECONDS=0
			
 
				 
			
@@ -78,19 +73,8 @@ for bin in "${BINS[@]}"; do
 
				   cp -fv "target/$buildVariant/$bin" "$installDir"/bin
			
 
				 done
			
 
				 
			
 
				-
			
 
				-if [[ "$cargoFeatures" = cuda ]]; then
			
 
				-  (
			
 
				-    set -x
			
 
				-    ./fetch-perf-libs.sh
			
 
				-
			
 
				-    # shellcheck source=/dev/null
			
 
				-    source ./target/perf-libs/env.sh
			
 
				-
			
 
				-    # shellcheck disable=SC2086 # Don't want to double quote $rust_version
			
 
				-    cargo $rust_version build $maybeReleaseFlag --package solana-validator-cuda
			
 
				-  )
			
 
				-  cp -fv "target/$buildVariant/solana-validator-cuda" "$installDir"/bin
			
 
				+if [[ -d target/perf-libs ]]; then
			
 
				+  cp -a target/perf-libs "$installDir"/bin/perf-libs
			
 
				 fi
			
 
				 
			
 
				 for dir in programs/*; do
			
--- a/scripts/coverage.sh
+++ b/scripts/coverage.sh
@@ -15,7 +15,7 @@ reportName="lcov-${CI_COMMIT:0:9}"
 
				 if [[ -n $1 ]]; then
			
 
				   crate="--package $1"
			
 
				 else
			
 
				-  crate="--all --exclude solana-local-cluster --exclude solana-validator-cuda"
			
 
				+  crate="--all --exclude solana-local-cluster"
			
 
				 fi
			
 
				 
			
 
				 coverageFlags=(-Zprofile)                # Enable coverage
			
--- a/validator-cuda/.gitignore
+++ b/validator-cuda/.gitignore
@@ -1,2 +0,0 @@
 
				-/target/
			
 
				-/farf/
			
--- a/validator-cuda/Cargo.toml
+++ b/validator-cuda/Cargo.toml
@@ -1,14 +0,0 @@
 
				-[package]
			
 
				-authors = ["Solana Maintainers <maintainers@solana.com>"]
			
 
				-edition = "2018"
			
 
				-name = "solana-validator-cuda"
			
 
				-description = "Blockchain, Rebuilt for Scale"
			
 
				-version = "0.20.0"
			
 
				-repository = "https://github.com/solana-labs/solana"
			
 
				-license = "Apache-2.0"
			
 
				-homepage = "https://solana.com/"
			
 
				-publish = false
			
 
				-
			
 
				-[dependencies]
			
 
				-solana-core = { path = "../core", version = "0.20.0", features=["cuda"] }
			
 
				-solana-validator = { path = "../validator", version = "0.20.0" }
			
--- a/validator-cuda/src/main.rs
+++ b/validator-cuda/src/main.rs
@@ -1,3 +0,0 @@
 
				-fn main() {
			
 
				-    solana_validator::main()
			
 
				-}
			
--- a/validator/src/lib.rs
+++ b/validator/src/lib.rs
@@ -390,7 +390,17 @@ pub fn main() {
 
				                 .takes_value(false)
			
 
				                 .help("Skip ledger verification at node bootup"),
			
 
				         )
			
 
				-         .get_matches();
			
 
				+        .arg(
			
 
				+            clap::Arg::with_name("cuda")
			
 
				+                .long("cuda")
			
 
				+                .takes_value(false)
			
 
				+                .help("Use CUDA"),
			
 
				+        )
			
 
				+        .get_matches();
			
 
				+
			
 
				+    if matches.is_present("cuda") {
			
 
				+        solana_core::perf_libs::init_cuda();
			
 
				+    }
			
 
				 
			
 
				     let mut validator_config = ValidatorConfig::default();
			
 
				     let keypair = if let Some(identity) = matches.value_of("identity") {