|
|
@@ -385,41 +385,45 @@ pub fn generate_offsets(
|
|
|
let mut msg_sizes: PinnedVec<_> = recycler.allocate("msg_size_offsets");
|
|
|
msg_sizes.set_pinnable();
|
|
|
let mut current_offset: usize = 0;
|
|
|
- let mut v_sig_lens = Vec::new();
|
|
|
- batches.iter_mut().for_each(|batch| {
|
|
|
- let mut sig_lens = Vec::new();
|
|
|
- batch.packets.iter_mut().for_each(|packet| {
|
|
|
- let packet_offsets = get_packet_offsets(packet, current_offset, reject_non_vote);
|
|
|
-
|
|
|
- sig_lens.push(packet_offsets.sig_len);
|
|
|
+ let offsets = batches
|
|
|
+ .iter_mut()
|
|
|
+ .map(|batch| {
|
|
|
+ batch
|
|
|
+ .packets
|
|
|
+ .iter_mut()
|
|
|
+ .map(|packet| {
|
|
|
+ let packet_offsets =
|
|
|
+ get_packet_offsets(packet, current_offset, reject_non_vote);
|
|
|
|
|
|
- trace!("pubkey_offset: {}", packet_offsets.pubkey_start);
|
|
|
+ trace!("pubkey_offset: {}", packet_offsets.pubkey_start);
|
|
|
|
|
|
- let mut pubkey_offset = packet_offsets.pubkey_start;
|
|
|
- let mut sig_offset = packet_offsets.sig_start;
|
|
|
- let msg_size = current_offset.saturating_add(packet.meta.size) as u32;
|
|
|
- for _ in 0..packet_offsets.sig_len {
|
|
|
- signature_offsets.push(sig_offset);
|
|
|
- sig_offset = sig_offset.saturating_add(size_of::<Signature>() as u32);
|
|
|
+ let mut pubkey_offset = packet_offsets.pubkey_start;
|
|
|
+ let mut sig_offset = packet_offsets.sig_start;
|
|
|
+ let msg_size = current_offset.saturating_add(packet.meta.size) as u32;
|
|
|
+ for _ in 0..packet_offsets.sig_len {
|
|
|
+ signature_offsets.push(sig_offset);
|
|
|
+ sig_offset = sig_offset.saturating_add(size_of::<Signature>() as u32);
|
|
|
|
|
|
- pubkey_offsets.push(pubkey_offset);
|
|
|
- pubkey_offset = pubkey_offset.saturating_add(size_of::<Pubkey>() as u32);
|
|
|
+ pubkey_offsets.push(pubkey_offset);
|
|
|
+ pubkey_offset = pubkey_offset.saturating_add(size_of::<Pubkey>() as u32);
|
|
|
|
|
|
- msg_start_offsets.push(packet_offsets.msg_start);
|
|
|
+ msg_start_offsets.push(packet_offsets.msg_start);
|
|
|
|
|
|
- let msg_size = msg_size.saturating_sub(packet_offsets.msg_start);
|
|
|
- msg_sizes.push(msg_size);
|
|
|
- }
|
|
|
- current_offset = current_offset.saturating_add(size_of::<Packet>());
|
|
|
- });
|
|
|
- v_sig_lens.push(sig_lens);
|
|
|
- });
|
|
|
+ let msg_size = msg_size.saturating_sub(packet_offsets.msg_start);
|
|
|
+ msg_sizes.push(msg_size);
|
|
|
+ }
|
|
|
+ current_offset = current_offset.saturating_add(size_of::<Packet>());
|
|
|
+ packet_offsets.sig_len
|
|
|
+ })
|
|
|
+ .collect()
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
(
|
|
|
signature_offsets,
|
|
|
pubkey_offsets,
|
|
|
msg_start_offsets,
|
|
|
msg_sizes,
|
|
|
- v_sig_lens,
|
|
|
+ offsets,
|
|
|
)
|
|
|
}
|
|
|
|
|
|
@@ -492,9 +496,8 @@ impl Deduper {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-pub fn ed25519_verify_cpu(batches: &mut [PacketBatch], reject_non_vote: bool) {
|
|
|
+pub fn ed25519_verify_cpu(batches: &mut [PacketBatch], reject_non_vote: bool, packet_count: usize) {
|
|
|
use rayon::prelude::*;
|
|
|
- let packet_count = count_packets_in_batches(batches);
|
|
|
debug!("CPU ECDSA for {}", packet_count);
|
|
|
PAR_THREAD_POOL.install(|| {
|
|
|
batches.into_par_iter().for_each(|batch| {
|
|
|
@@ -574,7 +577,9 @@ pub fn get_checked_scalar(scalar: &[u8; 32]) -> Result<[u8; 32], PacketError> {
|
|
|
pub fn mark_disabled(batches: &mut [PacketBatch], r: &[Vec<u8>]) {
|
|
|
for (batch, v) in batches.iter_mut().zip(r) {
|
|
|
for (pkt, f) in batch.packets.iter_mut().zip(v) {
|
|
|
- pkt.meta.set_discard(*f == 0);
|
|
|
+ if !pkt.meta.discard() {
|
|
|
+ pkt.meta.set_discard(*f == 0);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -584,29 +589,35 @@ pub fn ed25519_verify(
|
|
|
recycler: &Recycler<TxOffset>,
|
|
|
recycler_out: &Recycler<PinnedVec<u8>>,
|
|
|
reject_non_vote: bool,
|
|
|
+ valid_packet_count: usize,
|
|
|
) {
|
|
|
let api = perf_libs::api();
|
|
|
if api.is_none() {
|
|
|
- return ed25519_verify_cpu(batches, reject_non_vote);
|
|
|
+ return ed25519_verify_cpu(batches, reject_non_vote, valid_packet_count);
|
|
|
}
|
|
|
let api = api.unwrap();
|
|
|
|
|
|
use crate::packet::PACKET_DATA_SIZE;
|
|
|
- let packet_count = count_packets_in_batches(batches);
|
|
|
|
|
|
+ let total_packet_count = count_packets_in_batches(batches);
|
|
|
// micro-benchmarks show GPU time for smallest batch around 15-20ms
|
|
|
// and CPU speed for 64-128 sigverifies around 10-20ms. 64 is a nice
|
|
|
// power-of-two number around that accounting for the fact that the CPU
|
|
|
// may be busy doing other things while being a real validator
|
|
|
// TODO: dynamically adjust this crossover
|
|
|
- if packet_count < 64 {
|
|
|
- return ed25519_verify_cpu(batches, reject_non_vote);
|
|
|
+ if valid_packet_count < 64
|
|
|
+ || 100usize
|
|
|
+ .wrapping_mul(valid_packet_count)
|
|
|
+ .wrapping_div(total_packet_count)
|
|
|
+ < 90
|
|
|
+ {
|
|
|
+ return ed25519_verify_cpu(batches, reject_non_vote, valid_packet_count);
|
|
|
}
|
|
|
|
|
|
let (signature_offsets, pubkey_offsets, msg_start_offsets, msg_sizes, sig_lens) =
|
|
|
generate_offsets(batches, recycler, reject_non_vote);
|
|
|
|
|
|
- debug!("CUDA ECDSA for {}", packet_count);
|
|
|
+ debug!("CUDA ECDSA for {}", valid_packet_count);
|
|
|
debug!("allocating out..");
|
|
|
let mut out = recycler_out.allocate("out_buffer");
|
|
|
out.set_pinnable();
|
|
|
@@ -619,8 +630,7 @@ pub fn ed25519_verify(
|
|
|
elems: batch.packets.as_ptr(),
|
|
|
num: batch.packets.len() as u32,
|
|
|
});
|
|
|
- let mut v = Vec::new();
|
|
|
- v.resize(batch.packets.len(), 0);
|
|
|
+ let v = vec![0u8; batch.packets.len()];
|
|
|
rvs.push(v);
|
|
|
num_packets = num_packets.saturating_add(batch.packets.len());
|
|
|
}
|
|
|
@@ -651,7 +661,7 @@ pub fn ed25519_verify(
|
|
|
trace!("done verify");
|
|
|
copy_return_values(&sig_lens, &out, &mut rvs);
|
|
|
mark_disabled(batches, &rvs);
|
|
|
- inc_new_counter_debug!("ed25519_verify_gpu", packet_count);
|
|
|
+ inc_new_counter_debug!("ed25519_verify_gpu", valid_packet_count);
|
|
|
}
|
|
|
|
|
|
#[cfg(test)]
|
|
|
@@ -704,6 +714,7 @@ mod tests {
|
|
|
let mut batches: Vec<PacketBatch> = vec![batch];
|
|
|
mark_disabled(&mut batches, &[vec![0]]);
|
|
|
assert!(batches[0].packets[0].meta.discard());
|
|
|
+ batches[0].packets[0].meta.set_discard(false);
|
|
|
mark_disabled(&mut batches, &[vec![1]]);
|
|
|
assert!(!batches[0].packets[0].meta.discard());
|
|
|
}
|
|
|
@@ -1005,6 +1016,29 @@ mod tests {
|
|
|
);
|
|
|
}
|
|
|
|
|
|
+ fn generate_packet_batches_random_size(
|
|
|
+ packet: &Packet,
|
|
|
+ max_packets_per_batch: usize,
|
|
|
+ num_batches: usize,
|
|
|
+ ) -> Vec<PacketBatch> {
|
|
|
+ // generate packet vector
|
|
|
+ let batches: Vec<_> = (0..num_batches)
|
|
|
+ .map(|_| {
|
|
|
+ let mut packet_batch = PacketBatch::default();
|
|
|
+ packet_batch.packets.resize(0, Packet::default());
|
|
|
+ let num_packets_per_batch = thread_rng().gen_range(1, max_packets_per_batch);
|
|
|
+ for _ in 0..num_packets_per_batch {
|
|
|
+ packet_batch.packets.push(packet.clone());
|
|
|
+ }
|
|
|
+ assert_eq!(packet_batch.packets.len(), num_packets_per_batch);
|
|
|
+ packet_batch
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ assert_eq!(batches.len(), num_batches);
|
|
|
+
|
|
|
+ batches
|
|
|
+ }
|
|
|
+
|
|
|
fn generate_packet_batches(
|
|
|
packet: &Packet,
|
|
|
num_packets_per_batch: usize,
|
|
|
@@ -1052,7 +1086,8 @@ mod tests {
|
|
|
fn ed25519_verify(batches: &mut [PacketBatch]) {
|
|
|
let recycler = Recycler::default();
|
|
|
let recycler_out = Recycler::default();
|
|
|
- sigverify::ed25519_verify(batches, &recycler, &recycler_out, false);
|
|
|
+ let packet_count = sigverify::count_packets_in_batches(batches);
|
|
|
+ sigverify::ed25519_verify(batches, &recycler, &recycler_out, false, packet_count);
|
|
|
}
|
|
|
|
|
|
#[test]
|
|
|
@@ -1133,9 +1168,8 @@ mod tests {
|
|
|
let recycler = Recycler::default();
|
|
|
let recycler_out = Recycler::default();
|
|
|
for _ in 0..50 {
|
|
|
- let n = thread_rng().gen_range(1, 30);
|
|
|
let num_batches = thread_rng().gen_range(2, 30);
|
|
|
- let mut batches = generate_packet_batches(&packet, n, num_batches);
|
|
|
+ let mut batches = generate_packet_batches_random_size(&packet, 128, num_batches);
|
|
|
|
|
|
let num_modifications = thread_rng().gen_range(0, 5);
|
|
|
for _ in 0..num_modifications {
|
|
|
@@ -1147,11 +1181,17 @@ mod tests {
|
|
|
batches[batch].packets[packet].data[offset].wrapping_add(add);
|
|
|
}
|
|
|
|
|
|
+ let batch_to_disable = thread_rng().gen_range(0, batches.len());
|
|
|
+ for p in batches[batch_to_disable].packets.iter_mut() {
|
|
|
+ p.meta.set_discard(true);
|
|
|
+ }
|
|
|
+
|
|
|
// verify from GPU verification pipeline (when GPU verification is enabled) are
|
|
|
// equivalent to the CPU verification pipeline.
|
|
|
let mut batches_cpu = batches.clone();
|
|
|
- sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false);
|
|
|
- ed25519_verify_cpu(&mut batches_cpu, false);
|
|
|
+ let packet_count = sigverify::count_packets_in_batches(&batches);
|
|
|
+ sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, packet_count);
|
|
|
+ ed25519_verify_cpu(&mut batches_cpu, false, packet_count);
|
|
|
|
|
|
// check result
|
|
|
batches
|