소스 검색

refreshes ContactInfo.outset before initializing validator (#3135)

Nodes join gossip during bootstrap process with a stub contact-info
which in particular has invalid TVU socket address.

Once the bootstrap is done they re-join gossip a 2nd time with a fully
populated contact-info, but this contact-info has an outset timestamp
older than the 1st one because it was initiated earlier.

In v2.0 the outset timestamp determines which contact-info overrides the
other, so the v2.0 nodes refrain from updating their CRDS table with the
fully initialized contact-info.

The commit refreshes ContactInfo.outset before initializing the
validator so that it overrides the one pushed to the gossip by the
bootstrap stage.
behzad nouri 1 년 전
부모
커밋
c2b350023b
4개의 변경된 파일21개의 추가작업 그리고 9개의 파일을 삭제
  1. 1 1
      gossip/src/cluster_info.rs
  2. 12 7
      gossip/src/contact_info.rs
  3. 1 1
      turbine/src/cluster_nodes.rs
  4. 7 0
      validator/src/main.rs

+ 1 - 1
gossip/src/cluster_info.rs

@@ -666,7 +666,7 @@ impl ClusterInfo {
             *instance = NodeInstance::new(&mut thread_rng(), id, timestamp());
         }
         *self.keypair.write().unwrap() = new_keypair;
-        self.my_contact_info.write().unwrap().set_pubkey(id);
+        self.my_contact_info.write().unwrap().hot_swap_pubkey(id);
 
         self.insert_self();
         self.push_message(CrdsValue::new_signed(

+ 12 - 7
gossip/src/contact_info.rs

@@ -181,11 +181,7 @@ impl ContactInfo {
         Self {
             pubkey,
             wallclock,
-            outset: {
-                let now = SystemTime::now();
-                let elapsed = now.duration_since(UNIX_EPOCH).unwrap();
-                u64::try_from(elapsed.as_micros()).unwrap()
-            },
+            outset: get_node_outset(),
             shred_version,
             version: solana_version::Version::default(),
             addrs: Vec::<IpAddr>::default(),
@@ -210,8 +206,11 @@ impl ContactInfo {
         self.shred_version
     }
 
-    pub fn set_pubkey(&mut self, pubkey: Pubkey) {
-        self.pubkey = pubkey
+    pub fn hot_swap_pubkey(&mut self, pubkey: Pubkey) {
+        self.pubkey = pubkey;
+        // Need to update ContactInfo.outset so that this node's contact-info
+        // will override older node with the same pubkey.
+        self.outset = get_node_outset();
     }
 
     pub fn set_wallclock(&mut self, wallclock: u64) {
@@ -409,6 +408,12 @@ impl ContactInfo {
     }
 }
 
+fn get_node_outset() -> u64 {
+    let now = SystemTime::now();
+    let elapsed = now.duration_since(UNIX_EPOCH).unwrap();
+    u64::try_from(elapsed.as_micros()).unwrap()
+}
+
 impl<'de> Deserialize<'de> for ContactInfo {
     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
     where

+ 1 - 1
turbine/src/cluster_nodes.rs

@@ -547,7 +547,7 @@ pub fn make_test_cluster<R: Rng>(
     .collect();
     nodes.shuffle(rng);
     let keypair = Arc::new(Keypair::new());
-    nodes[0].set_pubkey(keypair.pubkey());
+    nodes[0] = ContactInfo::new_localhost(&keypair.pubkey(), /*wallclock:*/ timestamp());
     let this_node = nodes[0].clone();
     let mut stakes: HashMap<Pubkey, u64> = nodes
         .iter()

+ 7 - 0
validator/src/main.rs

@@ -1901,6 +1901,13 @@ pub fn main() {
         return;
     }
 
+    // Bootstrap code above pushes a contact-info with more recent timestamp to
+    // gossip. If the node is staked the contact-info lingers in gossip causing
+    // false duplicate nodes error.
+    // Below line refreshes the timestamp on contact-info so that it overrides
+    // the one pushed by bootstrap.
+    node.info.hot_swap_pubkey(identity_keypair.pubkey());
+
     let validator = Validator::new(
         node,
         identity_keypair,