wen_restart.rs 152 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797
  1. //! The `wen-restart` module handles automatic repair during a cluster restart
  2. use {
  3. crate::{
  4. heaviest_fork_aggregate::{HeaviestForkAggregate, HeaviestForkAggregateResult},
  5. last_voted_fork_slots_aggregate::{
  6. LastVotedForkSlotsAggregate, LastVotedForkSlotsAggregateResult,
  7. LastVotedForkSlotsEpochInfo, LastVotedForkSlotsFinalResult,
  8. },
  9. solana::wen_restart_proto::{
  10. self, ConflictMessage, GenerateSnapshotRecord, HeaviestForkAggregateRecord,
  11. HeaviestForkRecord, LastVotedForkSlotsAggregateFinal,
  12. LastVotedForkSlotsAggregateRecord, LastVotedForkSlotsEpochInfoRecord,
  13. LastVotedForkSlotsRecord, State as RestartState, WenRestartProgress,
  14. },
  15. },
  16. agave_snapshots::{
  17. paths::{
  18. get_highest_full_snapshot_archive_slot, get_highest_incremental_snapshot_archive_slot,
  19. },
  20. snapshot_archive_info::SnapshotArchiveInfoGetter,
  21. },
  22. anyhow::Result,
  23. log::*,
  24. prost::Message,
  25. solana_clock::{Epoch, Slot},
  26. solana_gossip::{
  27. cluster_info::{ClusterInfo, GOSSIP_SLEEP_MILLIS},
  28. restart_crds_values::RestartLastVotedForkSlots,
  29. },
  30. solana_hash::Hash,
  31. solana_ledger::{
  32. ancestor_iterator::AncestorIterator,
  33. blockstore::Blockstore,
  34. blockstore_processor::{process_single_slot, ConfirmationProgress, ProcessOptions},
  35. leader_schedule_cache::LeaderScheduleCache,
  36. },
  37. solana_pubkey::Pubkey,
  38. solana_runtime::{
  39. accounts_background_service::AbsStatus,
  40. bank::Bank,
  41. bank_forks::BankForks,
  42. snapshot_bank_utils::{
  43. bank_to_full_snapshot_archive, bank_to_incremental_snapshot_archive,
  44. },
  45. snapshot_controller::SnapshotController,
  46. snapshot_utils::purge_all_bank_snapshots,
  47. },
  48. solana_shred_version::compute_shred_version,
  49. solana_svm_timings::ExecuteTimings,
  50. solana_time_utils::timestamp,
  51. solana_vote::vote_transaction::VoteTransaction,
  52. std::{
  53. collections::{HashMap, HashSet},
  54. fs::{read, File},
  55. io::{Cursor, Write},
  56. path::{Path, PathBuf},
  57. str::FromStr,
  58. sync::{
  59. atomic::{AtomicBool, Ordering},
  60. Arc, RwLock,
  61. },
  62. thread::sleep,
  63. time::{Duration, Instant},
  64. },
  65. };
  66. // If >42% of the validators have this block, repair this block locally.
  67. const REPAIR_THRESHOLD: f64 = 0.42;
  68. // When counting Heaviest Fork, only count those with no less than
  69. // 67% - 5% - (100% - active_stake) = active_stake - 38% stake.
  70. // 67% is the supermajority threshold (2/3), 5% is the assumption we
  71. // made regarding how much non-conforming/offline validators the
  72. // algorithm can tolerate.
  73. const HEAVIEST_FORK_THRESHOLD_DELTA: f64 = 0.38;
  74. // The coordinator print new stats every 10 seconds.
  75. const COORDINATOR_STAT_PRINT_INTERVAL_SECONDS: u64 = 10;
  76. #[derive(Debug, PartialEq)]
  77. pub enum WenRestartError {
  78. BankHashMismatch(Slot, Hash, Hash),
  79. BlockNotFound(Slot),
  80. BlockNotFull(Slot),
  81. BlockNotFrozenAfterReplay(Slot, Option<String>),
  82. BlockNotLinkedToExpectedParent(Slot, Option<Slot>, Slot),
  83. ChildStakeLargerThanParent(Slot, u64, Slot, u64),
  84. Exiting,
  85. FutureSnapshotExists(Slot, Slot, String),
  86. GenerateSnapshotWhenOneExists(Slot, String),
  87. GenerateSnapshotWhenDisabled,
  88. HeaviestForkOnLeaderOnDifferentFork(Slot, Slot),
  89. MalformedLastVotedForkSlotsProtobuf(Option<LastVotedForkSlotsRecord>),
  90. MalformedProgress(RestartState, String),
  91. MissingLastVotedForkSlots,
  92. MissingSnapshotInProtobuf,
  93. NotEnoughStakeAgreeingWithUs(Slot, Hash, HashMap<(Slot, Hash), u64>),
  94. UnexpectedState(wen_restart_proto::State),
  95. }
  96. impl std::fmt::Display for WenRestartError {
  97. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  98. match self {
  99. WenRestartError::BankHashMismatch(slot, expected, actual) => {
  100. write!(
  101. f,
  102. "Bank hash mismatch for slot: {slot} expected: {expected} actual: {actual}"
  103. )
  104. }
  105. WenRestartError::BlockNotFound(slot) => {
  106. write!(f, "Block not found: {slot}")
  107. }
  108. WenRestartError::BlockNotFull(slot) => {
  109. write!(f, "Block not full: {slot}")
  110. }
  111. WenRestartError::BlockNotFrozenAfterReplay(slot, err) => {
  112. write!(f, "Block not frozen after replay: {slot} {err:?}")
  113. }
  114. WenRestartError::BlockNotLinkedToExpectedParent(slot, parent, expected_parent) => {
  115. write!(
  116. f,
  117. "Block {slot} is not linked to expected parent {expected_parent} but to \
  118. {parent:?}"
  119. )
  120. }
  121. WenRestartError::ChildStakeLargerThanParent(
  122. slot,
  123. child_stake,
  124. parent,
  125. parent_stake,
  126. ) => {
  127. write!(
  128. f,
  129. "Block {slot} has more stake {child_stake} than its parent {parent} with \
  130. stake {parent_stake}"
  131. )
  132. }
  133. WenRestartError::Exiting => write!(f, "Exiting"),
  134. WenRestartError::FutureSnapshotExists(slot, highest_slot, directory) => {
  135. write!(
  136. f,
  137. "Future snapshot exists for slot: {slot} highest slot: {highest_slot} in \
  138. directory: {directory}",
  139. )
  140. }
  141. WenRestartError::GenerateSnapshotWhenOneExists(slot, directory) => {
  142. write!(
  143. f,
  144. "Generate snapshot when one exists for slot: {slot} in directory: {directory}",
  145. )
  146. }
  147. WenRestartError::GenerateSnapshotWhenDisabled => {
  148. write!(f, "Generate snapshot when snapshots are disabled")
  149. }
  150. WenRestartError::HeaviestForkOnLeaderOnDifferentFork(
  151. coordinator_heaviest_slot,
  152. should_include_slot,
  153. ) => {
  154. write!(
  155. f,
  156. "Heaviest fork on coordinator on different fork: heaviest: \
  157. {coordinator_heaviest_slot} does not include: {should_include_slot}",
  158. )
  159. }
  160. WenRestartError::MalformedLastVotedForkSlotsProtobuf(record) => {
  161. write!(f, "Malformed last voted fork slots protobuf: {record:?}")
  162. }
  163. WenRestartError::MalformedProgress(state, missing) => {
  164. write!(f, "Malformed progress: {state:?} missing {missing}")
  165. }
  166. WenRestartError::MissingLastVotedForkSlots => {
  167. write!(f, "Missing last voted fork slots")
  168. }
  169. WenRestartError::MissingSnapshotInProtobuf => {
  170. write!(f, "Missing snapshot in protobuf")
  171. }
  172. WenRestartError::NotEnoughStakeAgreeingWithUs(slot, hash, block_stake_map) => {
  173. write!(
  174. f,
  175. "Not enough stake agreeing with our slot: {slot} hash: {hash}\n \
  176. {block_stake_map:?}",
  177. )
  178. }
  179. WenRestartError::UnexpectedState(state) => {
  180. write!(f, "Unexpected state: {state:?}")
  181. }
  182. }
  183. }
  184. }
  185. impl std::error::Error for WenRestartError {}
  186. // We need a WenRestartProgressInternalState so we can convert the protobuf written in file
  187. // into internal data structure in the initialize function. It should be easily
  188. // convertible to and from WenRestartProgress protobuf.
  189. #[derive(Debug, PartialEq)]
  190. pub(crate) enum WenRestartProgressInternalState {
  191. Init {
  192. last_voted_fork_slots: Vec<Slot>,
  193. last_vote_bankhash: Hash,
  194. },
  195. LastVotedForkSlots {
  196. last_voted_fork_slots: Vec<Slot>,
  197. aggregate_final_result: Option<LastVotedForkSlotsFinalResult>,
  198. },
  199. FindHeaviestFork {
  200. aggregate_final_result: LastVotedForkSlotsFinalResult,
  201. my_heaviest_fork: Option<HeaviestForkRecord>,
  202. },
  203. HeaviestFork {
  204. my_heaviest_fork_slot: Slot,
  205. my_heaviest_fork_hash: Hash,
  206. },
  207. GenerateSnapshot {
  208. my_heaviest_fork_slot: Slot,
  209. my_snapshot: Option<GenerateSnapshotRecord>,
  210. },
  211. Done {
  212. slot: Slot,
  213. hash: Hash,
  214. shred_version: u16,
  215. },
  216. }
  217. pub(crate) fn send_restart_last_voted_fork_slots(
  218. cluster_info: Arc<ClusterInfo>,
  219. last_voted_fork_slots: &[Slot],
  220. last_vote_bankhash: Hash,
  221. ) -> Result<LastVotedForkSlotsRecord> {
  222. cluster_info.push_restart_last_voted_fork_slots(last_voted_fork_slots, last_vote_bankhash)?;
  223. Ok(LastVotedForkSlotsRecord {
  224. last_voted_fork_slots: last_voted_fork_slots.to_vec(),
  225. last_vote_bankhash: last_vote_bankhash.to_string(),
  226. shred_version: cluster_info.my_shred_version() as u32,
  227. wallclock: timestamp(),
  228. })
  229. }
  230. pub(crate) fn aggregate_restart_last_voted_fork_slots(
  231. wen_restart_path: &PathBuf,
  232. wait_for_supermajority_threshold_percent: u64,
  233. cluster_info: Arc<ClusterInfo>,
  234. last_voted_fork_slots: &Vec<Slot>,
  235. bank_forks: Arc<RwLock<BankForks>>,
  236. blockstore: Arc<Blockstore>,
  237. wen_restart_repair_slots: Arc<RwLock<Vec<Slot>>>,
  238. exit: Arc<AtomicBool>,
  239. progress: &mut WenRestartProgress,
  240. ) -> Result<LastVotedForkSlotsFinalResult> {
  241. let root_bank = bank_forks.read().unwrap().root_bank();
  242. let root_slot = root_bank.slot();
  243. let mut last_voted_fork_slots_aggregate = LastVotedForkSlotsAggregate::new(
  244. root_bank.clone(),
  245. REPAIR_THRESHOLD,
  246. last_voted_fork_slots,
  247. &cluster_info.id(),
  248. );
  249. if let Some(aggregate_record) = &progress.last_voted_fork_slots_aggregate {
  250. for (key_string, message) in &aggregate_record.received {
  251. if let Err(e) =
  252. last_voted_fork_slots_aggregate.aggregate_from_record(key_string, message)
  253. {
  254. error!("Failed to aggregate from record: {e:?}");
  255. }
  256. }
  257. } else {
  258. progress.last_voted_fork_slots_aggregate = Some(LastVotedForkSlotsAggregateRecord {
  259. received: HashMap::new(),
  260. final_result: None,
  261. });
  262. }
  263. let mut cursor = solana_gossip::crds::Cursor::default();
  264. let mut is_full_slots = HashSet::new();
  265. let mut old_progress = WenRestartProgress::default();
  266. loop {
  267. if exit.load(Ordering::Relaxed) {
  268. return Err(WenRestartError::Exiting.into());
  269. }
  270. let start = timestamp();
  271. for new_last_voted_fork_slots in cluster_info.get_restart_last_voted_fork_slots(&mut cursor)
  272. {
  273. let from = new_last_voted_fork_slots.from.to_string();
  274. match last_voted_fork_slots_aggregate.aggregate(new_last_voted_fork_slots) {
  275. LastVotedForkSlotsAggregateResult::Inserted(record) => {
  276. progress
  277. .last_voted_fork_slots_aggregate
  278. .as_mut()
  279. .unwrap()
  280. .received
  281. .insert(from, record);
  282. }
  283. LastVotedForkSlotsAggregateResult::DifferentVersionExists(
  284. old_record,
  285. new_record,
  286. ) => {
  287. info!(
  288. "Different LastVotedForkSlots message exists from {from}: {old_record:#?} \
  289. vs {new_record:#?}"
  290. );
  291. progress.conflict_message.insert(
  292. from,
  293. ConflictMessage {
  294. old_message: format!("{old_record:?}"),
  295. new_message: format!("{new_record:?}"),
  296. },
  297. );
  298. }
  299. LastVotedForkSlotsAggregateResult::AlreadyExists => (),
  300. }
  301. }
  302. // Because all operations on the aggregate are called from this single thread, we can
  303. // fetch all results separately without worrying about them being out of sync. We can
  304. // also use returned iterator without the vector changing underneath us.
  305. let active_percent = last_voted_fork_slots_aggregate.min_active_percent();
  306. let mut filtered_slots: Vec<Slot>;
  307. {
  308. filtered_slots = last_voted_fork_slots_aggregate
  309. .slots_to_repair_iter()
  310. .filter(|slot| {
  311. if *slot <= &root_slot || is_full_slots.contains(*slot) {
  312. return false;
  313. }
  314. if blockstore.is_full(**slot) {
  315. is_full_slots.insert(**slot);
  316. false
  317. } else {
  318. true
  319. }
  320. })
  321. .cloned()
  322. .collect();
  323. }
  324. filtered_slots.sort();
  325. if progress != &old_progress {
  326. info!(
  327. "Active peers: {} Slots to repair: {:?}",
  328. active_percent, &filtered_slots
  329. );
  330. write_wen_restart_records(wen_restart_path, progress)?;
  331. old_progress = progress.clone();
  332. }
  333. if filtered_slots.is_empty()
  334. && active_percent >= wait_for_supermajority_threshold_percent as f64
  335. {
  336. *wen_restart_repair_slots.write().unwrap() = vec![];
  337. break;
  338. }
  339. {
  340. *wen_restart_repair_slots.write().unwrap() = filtered_slots;
  341. }
  342. let elapsed = timestamp().saturating_sub(start);
  343. let time_left = GOSSIP_SLEEP_MILLIS.saturating_sub(elapsed);
  344. if time_left > 0 {
  345. sleep(Duration::from_millis(time_left));
  346. }
  347. }
  348. Ok(last_voted_fork_slots_aggregate.get_final_result())
  349. }
  350. fn is_over_stake_threshold(
  351. epoch_info_vec: &[LastVotedForkSlotsEpochInfo],
  352. epoch: Epoch,
  353. stake: &u64,
  354. ) -> bool {
  355. epoch_info_vec
  356. .iter()
  357. .find(|info| info.epoch == epoch)
  358. .is_some_and(|info| {
  359. let threshold = info
  360. .actively_voting_stake
  361. .checked_sub((info.total_stake as f64 * HEAVIEST_FORK_THRESHOLD_DELTA) as u64)
  362. .unwrap();
  363. stake >= &threshold
  364. })
  365. }
  366. // Verify that all blocks with at least (active_stake_percnet - 38%) of the stake form a
  367. // single chain from the root, and use the highest slot in the blocks as the heaviest fork.
  368. // Please see SIMD 46 "gossip current heaviest fork" for correctness proof.
  369. pub(crate) fn find_heaviest_fork(
  370. aggregate_final_result: LastVotedForkSlotsFinalResult,
  371. bank_forks: Arc<RwLock<BankForks>>,
  372. blockstore: Arc<Blockstore>,
  373. exit: Arc<AtomicBool>,
  374. ) -> Result<(Slot, Hash)> {
  375. let root_bank = bank_forks.read().unwrap().root_bank();
  376. let root_slot = root_bank.slot();
  377. let mut slots = aggregate_final_result
  378. .slots_stake_map
  379. .iter()
  380. .filter(|(slot, stake)| {
  381. **slot > root_slot
  382. && is_over_stake_threshold(
  383. &aggregate_final_result.epoch_info_vec,
  384. root_bank.epoch_schedule().get_epoch(**slot),
  385. stake,
  386. )
  387. })
  388. .map(|(slot, _)| *slot)
  389. .collect::<Vec<Slot>>();
  390. slots.sort();
  391. // The heaviest slot we selected will always be the last of the slots list, or root if the list is empty.
  392. let heaviest_fork_slot = slots.last().map_or(root_slot, |x| *x);
  393. let mut expected_parent = root_slot;
  394. for slot in &slots {
  395. if exit.load(Ordering::Relaxed) {
  396. return Err(WenRestartError::Exiting.into());
  397. }
  398. if let Ok(Some(block_meta)) = blockstore.meta(*slot) {
  399. if block_meta.parent_slot != Some(expected_parent) {
  400. if expected_parent == root_slot {
  401. error!(
  402. "First block {slot} in repair list not linked to local root {root_slot}, \
  403. this could mean our root is too old"
  404. );
  405. } else {
  406. error!(
  407. "Block {slot} in blockstore is not linked to expected parent from Wen \
  408. Restart {expected_parent} but to Block {:?}",
  409. block_meta.parent_slot
  410. );
  411. }
  412. return Err(WenRestartError::BlockNotLinkedToExpectedParent(
  413. *slot,
  414. block_meta.parent_slot,
  415. expected_parent,
  416. )
  417. .into());
  418. }
  419. if !block_meta.is_full() {
  420. return Err(WenRestartError::BlockNotFull(*slot).into());
  421. }
  422. expected_parent = *slot;
  423. } else {
  424. return Err(WenRestartError::BlockNotFound(*slot).into());
  425. }
  426. }
  427. let heaviest_fork_bankhash = find_bankhash_of_heaviest_fork(
  428. heaviest_fork_slot,
  429. slots,
  430. blockstore.clone(),
  431. bank_forks.clone(),
  432. &exit,
  433. )?;
  434. info!("Heaviest fork found: slot: {heaviest_fork_slot}, bankhash: {heaviest_fork_bankhash:?}");
  435. Ok((heaviest_fork_slot, heaviest_fork_bankhash))
  436. }
  437. fn check_slot_smaller_than_intended_snapshot_slot(
  438. slot: Slot,
  439. intended_snapshot_slot: Slot,
  440. directory: &Path,
  441. ) -> Result<()> {
  442. match slot.cmp(&intended_snapshot_slot) {
  443. std::cmp::Ordering::Greater => Err(WenRestartError::FutureSnapshotExists(
  444. intended_snapshot_slot,
  445. slot,
  446. directory.to_string_lossy().to_string(),
  447. )
  448. .into()),
  449. std::cmp::Ordering::Equal => Err(WenRestartError::GenerateSnapshotWhenOneExists(
  450. slot,
  451. directory.to_string_lossy().to_string(),
  452. )
  453. .into()),
  454. std::cmp::Ordering::Less => Ok(()),
  455. }
  456. }
  457. // Given the agreed upon slot, add hard fork and rehash the corresponding bank, then
  458. // generate new snapshot. Generate incremental snapshot if possible, but generate full
  459. // snapshot if there is no full snapshot or snapshot generation is turned off (in this
  460. // case the incremental snasphot based on the full snapshot is incorrect).
  461. //
  462. // We don't use set_root() explicitly, because it may kick off snapshot requests, we
  463. // can't have multiple snapshot requests in progress. In bank_to_snapshot_archive()
  464. // everything set_root() does will be done (without bank_forks setting root). So
  465. // when we restart from the snapshot bank on my_heaviest_fork_slot will become root.
  466. pub(crate) fn generate_snapshot(
  467. bank_forks: Arc<RwLock<BankForks>>,
  468. snapshot_controller: &SnapshotController,
  469. abs_status: &AbsStatus,
  470. genesis_config_hash: Hash,
  471. my_heaviest_fork_slot: Slot,
  472. ) -> Result<GenerateSnapshotRecord> {
  473. let new_root_bank;
  474. {
  475. let my_bank_forks = bank_forks.read().unwrap();
  476. let old_root_bank = my_bank_forks.root_bank();
  477. if !old_root_bank
  478. .hard_forks()
  479. .iter()
  480. .any(|(slot, _)| slot == &my_heaviest_fork_slot)
  481. {
  482. old_root_bank.register_hard_fork(my_heaviest_fork_slot);
  483. }
  484. // my_heaviest_fork_slot is guaranteed to have a bank in bank_forks, it's checked in
  485. // find_bankhash_of_heaviest_fork().
  486. match my_bank_forks.get(my_heaviest_fork_slot) {
  487. Some(bank) => new_root_bank = bank.clone(),
  488. None => {
  489. return Err(WenRestartError::BlockNotFound(my_heaviest_fork_slot).into());
  490. }
  491. }
  492. let mut banks = vec![&new_root_bank];
  493. let parents = new_root_bank.parents();
  494. banks.extend(parents.iter());
  495. }
  496. // Snapshot generation calls AccountsDb background tasks (flush/clean/shrink).
  497. // These cannot run concurrent with each other, so we must shutdown
  498. // AccountsBackgroundService before proceeding.
  499. abs_status.stop();
  500. info!("Waiting for AccountsBackgroundService to stop");
  501. while abs_status.is_running() {
  502. std::thread::yield_now();
  503. }
  504. let snapshot_config = snapshot_controller.snapshot_config();
  505. let mut directory = &snapshot_config.full_snapshot_archives_dir;
  506. // Calculate the full_snapshot_slot an incremental snapshot should depend on. If the
  507. // validator is configured not the generate snapshot, it will only have the initial
  508. // snapshot on disk, which might be too old to generate an incremental snapshot from.
  509. // In this case we also set full_snapshot_slot to None.
  510. let full_snapshot_slot = if snapshot_config.should_generate_snapshots() {
  511. get_highest_full_snapshot_archive_slot(directory)
  512. } else {
  513. None
  514. };
  515. // In very rare cases it's possible that the local root is not on the heaviest fork, so the
  516. // validator generated snapshot for slots > local root. If the cluster agreed upon restart
  517. // slot my_heaviest_fork_slot is less than the current highest full_snapshot_slot, that means the
  518. // locally rooted full_snapshot_slot will be rolled back. this requires human inspection。
  519. //
  520. // In even rarer cases, the selected slot might be the latest full snapshot slot. We could
  521. // just re-generate a new snapshot to make sure the snapshot is up to date after hard fork,
  522. // but for now we just return an error to keep the code simple.
  523. let new_snapshot_path = if let Some(full_snapshot_slot) = full_snapshot_slot {
  524. check_slot_smaller_than_intended_snapshot_slot(
  525. full_snapshot_slot,
  526. my_heaviest_fork_slot,
  527. directory,
  528. )?;
  529. directory = &snapshot_config.incremental_snapshot_archives_dir;
  530. if let Some(incremental_snapshot_slot) =
  531. get_highest_incremental_snapshot_archive_slot(directory, full_snapshot_slot)
  532. {
  533. check_slot_smaller_than_intended_snapshot_slot(
  534. incremental_snapshot_slot,
  535. my_heaviest_fork_slot,
  536. directory,
  537. )?;
  538. }
  539. bank_to_incremental_snapshot_archive(
  540. &snapshot_config.bank_snapshots_dir,
  541. &new_root_bank,
  542. full_snapshot_slot,
  543. Some(snapshot_config.snapshot_version),
  544. &snapshot_config.full_snapshot_archives_dir,
  545. &snapshot_config.incremental_snapshot_archives_dir,
  546. snapshot_config.archive_format,
  547. )?
  548. .path()
  549. .display()
  550. .to_string()
  551. } else {
  552. info!(
  553. "Can't find full snapshot, generating full snapshot for slot: {my_heaviest_fork_slot}"
  554. );
  555. bank_to_full_snapshot_archive(
  556. &snapshot_config.bank_snapshots_dir,
  557. &new_root_bank,
  558. Some(snapshot_config.snapshot_version),
  559. &snapshot_config.full_snapshot_archives_dir,
  560. &snapshot_config.incremental_snapshot_archives_dir,
  561. snapshot_config.archive_format,
  562. )?
  563. .path()
  564. .display()
  565. .to_string()
  566. };
  567. let new_shred_version =
  568. compute_shred_version(&genesis_config_hash, Some(&new_root_bank.hard_forks()));
  569. info!("wen_restart snapshot generated on {new_snapshot_path} base slot {full_snapshot_slot:?}");
  570. // We might have bank snapshots past the my_heaviest_fork_slot, we need to purge them.
  571. purge_all_bank_snapshots(&snapshot_config.bank_snapshots_dir);
  572. Ok(GenerateSnapshotRecord {
  573. path: new_snapshot_path,
  574. slot: my_heaviest_fork_slot,
  575. bankhash: new_root_bank.hash().to_string(),
  576. shred_version: new_shred_version as u32,
  577. })
  578. }
  579. // Find the hash of the heaviest fork, if block hasn't been replayed, replay to get the hash.
  580. pub(crate) fn find_bankhash_of_heaviest_fork(
  581. heaviest_fork_slot: Slot,
  582. slots: Vec<Slot>,
  583. blockstore: Arc<Blockstore>,
  584. bank_forks: Arc<RwLock<BankForks>>,
  585. exit: &AtomicBool,
  586. ) -> Result<Hash> {
  587. if let Some(hash) = bank_forks
  588. .read()
  589. .unwrap()
  590. .get(heaviest_fork_slot)
  591. .map(|bank| bank.hash())
  592. {
  593. return Ok(hash);
  594. }
  595. let root_bank = bank_forks.read().unwrap().root_bank();
  596. let leader_schedule_cache = LeaderScheduleCache::new_from_bank(&root_bank);
  597. let replay_tx_thread_pool = rayon::ThreadPoolBuilder::new()
  598. .thread_name(|i| format!("solReplayTx{i:02}"))
  599. .build()
  600. .expect("new rayon threadpool");
  601. let mut timing = ExecuteTimings::default();
  602. let opts = ProcessOptions::default();
  603. // Now replay all the missing blocks.
  604. let mut parent_bank = root_bank;
  605. for slot in slots {
  606. if exit.load(Ordering::Relaxed) {
  607. return Err(WenRestartError::Exiting.into());
  608. }
  609. let saved_bank = bank_forks.read().unwrap().get_with_scheduler(slot);
  610. let bank_with_scheduler = saved_bank.unwrap_or_else(|| {
  611. let new_bank = Bank::new_from_parent(
  612. parent_bank.clone(),
  613. &leader_schedule_cache
  614. .slot_leader_at(slot, Some(&parent_bank))
  615. .unwrap(),
  616. slot,
  617. );
  618. bank_forks.write().unwrap().insert_from_ledger(new_bank)
  619. });
  620. let bank = if bank_with_scheduler.is_frozen() {
  621. bank_with_scheduler.clone_without_scheduler()
  622. } else {
  623. let mut progress = ConfirmationProgress::new(parent_bank.last_blockhash());
  624. if let Err(e) = process_single_slot(
  625. &blockstore,
  626. &bank_with_scheduler,
  627. &replay_tx_thread_pool,
  628. &opts,
  629. &mut progress,
  630. None,
  631. None,
  632. None,
  633. &mut timing,
  634. ) {
  635. return Err(
  636. WenRestartError::BlockNotFrozenAfterReplay(slot, Some(e.to_string())).into(),
  637. );
  638. }
  639. let cur_bank;
  640. {
  641. cur_bank = bank_forks
  642. .read()
  643. .unwrap()
  644. .get(slot)
  645. .expect("bank should have been just inserted");
  646. }
  647. cur_bank
  648. };
  649. parent_bank = bank;
  650. }
  651. Ok(parent_bank.hash())
  652. }
  653. // Aggregate the heaviest fork at the coordinator.
  654. pub(crate) fn aggregate_restart_heaviest_fork(
  655. wen_restart_path: &PathBuf,
  656. cluster_info: Arc<ClusterInfo>,
  657. bank_forks: Arc<RwLock<BankForks>>,
  658. exit: Arc<AtomicBool>,
  659. progress: &mut WenRestartProgress,
  660. ) -> Result<()> {
  661. let root_bank = bank_forks.read().unwrap().root_bank();
  662. if progress.my_heaviest_fork.is_none() {
  663. return Err(WenRestartError::MalformedProgress(
  664. RestartState::HeaviestFork,
  665. "my_heaviest_fork".to_string(),
  666. )
  667. .into());
  668. }
  669. let my_heaviest_fork = progress.my_heaviest_fork.clone().unwrap();
  670. let heaviest_fork_slot = my_heaviest_fork.slot;
  671. let heaviest_fork_hash = Hash::from_str(&my_heaviest_fork.bankhash)?;
  672. // Use the epoch_stakes associated with the heaviest fork slot we picked.
  673. let epoch_stakes = root_bank
  674. .epoch_stakes(root_bank.epoch_schedule().get_epoch(heaviest_fork_slot))
  675. .unwrap();
  676. let total_stake = epoch_stakes.total_stake();
  677. let mut heaviest_fork_aggregate = HeaviestForkAggregate::new(
  678. cluster_info.my_shred_version(),
  679. epoch_stakes,
  680. heaviest_fork_slot,
  681. heaviest_fork_hash,
  682. &cluster_info.id(),
  683. );
  684. if let Some(aggregate_record) = &progress.heaviest_fork_aggregate {
  685. for message in &aggregate_record.received {
  686. if let Err(e) = heaviest_fork_aggregate.aggregate_from_record(message) {
  687. // Do not abort wen_restart if we got one malformed message.
  688. error!("Failed to aggregate from record: {e:?}");
  689. }
  690. }
  691. } else {
  692. progress.heaviest_fork_aggregate = Some(HeaviestForkAggregateRecord {
  693. received: Vec::new(),
  694. total_active_stake: 0,
  695. });
  696. }
  697. let mut cursor = solana_gossip::crds::Cursor::default();
  698. let mut total_active_stake = 0;
  699. let mut stat_printed_at = Instant::now();
  700. let mut old_progress = WenRestartProgress::default();
  701. loop {
  702. if exit.load(Ordering::Relaxed) {
  703. return Ok(());
  704. }
  705. let start = timestamp();
  706. for new_heaviest_fork in cluster_info.get_restart_heaviest_fork(&mut cursor) {
  707. info!("Received new heaviest fork: {new_heaviest_fork:?}");
  708. let from = new_heaviest_fork.from.to_string();
  709. match heaviest_fork_aggregate.aggregate(new_heaviest_fork) {
  710. HeaviestForkAggregateResult::Inserted(record) => {
  711. info!("Successfully aggregated new heaviest fork: {record:?}");
  712. progress
  713. .heaviest_fork_aggregate
  714. .as_mut()
  715. .unwrap()
  716. .received
  717. .push(record);
  718. }
  719. HeaviestForkAggregateResult::DifferentVersionExists(old_record, new_record) => {
  720. warn!(
  721. "Different version from {from} exists old {old_record:#?} vs new \
  722. {new_record:#?}"
  723. );
  724. progress.conflict_message.insert(
  725. from,
  726. ConflictMessage {
  727. old_message: format!("{old_record:?}"),
  728. new_message: format!("{new_record:?}"),
  729. },
  730. );
  731. }
  732. HeaviestForkAggregateResult::ZeroStakeIgnored => (),
  733. HeaviestForkAggregateResult::AlreadyExists => (),
  734. HeaviestForkAggregateResult::Malformed => (),
  735. }
  736. }
  737. let current_total_active_stake = heaviest_fork_aggregate.total_active_stake();
  738. if current_total_active_stake > total_active_stake {
  739. total_active_stake = current_total_active_stake;
  740. progress
  741. .heaviest_fork_aggregate
  742. .as_mut()
  743. .unwrap()
  744. .total_active_stake = current_total_active_stake;
  745. }
  746. if old_progress != *progress {
  747. info!(
  748. "Total active stake: {} Total stake {}",
  749. heaviest_fork_aggregate.total_active_stake(),
  750. total_stake
  751. );
  752. write_wen_restart_records(wen_restart_path, progress)?;
  753. old_progress = progress.clone();
  754. }
  755. let elapsed = timestamp().saturating_sub(start);
  756. let time_left = GOSSIP_SLEEP_MILLIS.saturating_sub(elapsed);
  757. if time_left > 0 {
  758. sleep(Duration::from_millis(time_left));
  759. }
  760. // Print the block stake map after a while.
  761. if stat_printed_at.elapsed() > Duration::from_secs(COORDINATOR_STAT_PRINT_INTERVAL_SECONDS)
  762. {
  763. heaviest_fork_aggregate.print_block_stake_map();
  764. stat_printed_at = Instant::now();
  765. }
  766. }
  767. }
  768. pub(crate) fn repair_heaviest_fork(
  769. my_heaviest_fork_slot: Slot,
  770. heaviest_slot: Slot,
  771. exit: Arc<AtomicBool>,
  772. blockstore: Arc<Blockstore>,
  773. wen_restart_repair_slots: Arc<RwLock<Vec<Slot>>>,
  774. ) -> Result<()> {
  775. loop {
  776. if exit.load(Ordering::Relaxed) {
  777. return Err(WenRestartError::Exiting.into());
  778. }
  779. // Repair all ancestors of heaviest_slot (including itself) which are larger than
  780. // my_heaviest_fork_slot.
  781. let to_repair = if blockstore.meta(heaviest_slot).is_ok_and(|x| x.is_some()) {
  782. AncestorIterator::new_inclusive(heaviest_slot, &blockstore)
  783. .take_while(|slot| *slot > my_heaviest_fork_slot)
  784. .filter(|slot| !blockstore.is_full(*slot))
  785. .collect()
  786. } else {
  787. vec![heaviest_slot]
  788. };
  789. info!("wen_restart repair slots: {to_repair:?}");
  790. if to_repair.is_empty() {
  791. return Ok(()); // All blocks are full
  792. }
  793. *wen_restart_repair_slots.write().unwrap() = to_repair;
  794. sleep(Duration::from_millis(GOSSIP_SLEEP_MILLIS));
  795. }
  796. }
  797. pub(crate) fn verify_coordinator_heaviest_fork(
  798. my_heaviest_fork_slot: Slot,
  799. coordinator_heaviest_slot: Slot,
  800. coordinator_heaviest_hash: &Hash,
  801. bank_forks: Arc<RwLock<BankForks>>,
  802. blockstore: Arc<Blockstore>,
  803. exit: Arc<AtomicBool>,
  804. wen_restart_repair_slots: Arc<RwLock<Vec<Slot>>>,
  805. ) -> Result<()> {
  806. repair_heaviest_fork(
  807. my_heaviest_fork_slot,
  808. coordinator_heaviest_slot,
  809. exit.clone(),
  810. blockstore.clone(),
  811. wen_restart_repair_slots.clone(),
  812. )?;
  813. let root_slot = bank_forks.read().unwrap().root_bank().slot();
  814. let mut coordinator_heaviest_slot_ancestors: Vec<Slot> =
  815. AncestorIterator::new_inclusive(coordinator_heaviest_slot, &blockstore)
  816. .take_while(|slot| slot >= &root_slot)
  817. .collect();
  818. coordinator_heaviest_slot_ancestors.sort();
  819. if !coordinator_heaviest_slot_ancestors.contains(&root_slot) {
  820. return Err(WenRestartError::HeaviestForkOnLeaderOnDifferentFork(
  821. coordinator_heaviest_slot,
  822. root_slot,
  823. )
  824. .into());
  825. }
  826. if coordinator_heaviest_slot > my_heaviest_fork_slot
  827. && !coordinator_heaviest_slot_ancestors.contains(&my_heaviest_fork_slot)
  828. {
  829. return Err(WenRestartError::HeaviestForkOnLeaderOnDifferentFork(
  830. coordinator_heaviest_slot,
  831. my_heaviest_fork_slot,
  832. )
  833. .into());
  834. }
  835. if coordinator_heaviest_slot < my_heaviest_fork_slot
  836. && !AncestorIterator::new(my_heaviest_fork_slot, &blockstore)
  837. .any(|slot| slot == coordinator_heaviest_slot)
  838. {
  839. return Err(WenRestartError::HeaviestForkOnLeaderOnDifferentFork(
  840. coordinator_heaviest_slot,
  841. my_heaviest_fork_slot,
  842. )
  843. .into());
  844. }
  845. let my_bankhash = if !coordinator_heaviest_slot_ancestors.is_empty() {
  846. find_bankhash_of_heaviest_fork(
  847. coordinator_heaviest_slot,
  848. coordinator_heaviest_slot_ancestors,
  849. blockstore.clone(),
  850. bank_forks.clone(),
  851. &exit,
  852. )?
  853. } else {
  854. bank_forks
  855. .read()
  856. .unwrap()
  857. .get(coordinator_heaviest_slot)
  858. .unwrap()
  859. .hash()
  860. };
  861. if my_bankhash != *coordinator_heaviest_hash {
  862. return Err(WenRestartError::BankHashMismatch(
  863. coordinator_heaviest_slot,
  864. my_bankhash,
  865. *coordinator_heaviest_hash,
  866. )
  867. .into());
  868. }
  869. Ok(())
  870. }
  871. pub(crate) fn receive_restart_heaviest_fork(
  872. wen_restart_coordinator: Pubkey,
  873. cluster_info: Arc<ClusterInfo>,
  874. exit: Arc<AtomicBool>,
  875. progress: &mut WenRestartProgress,
  876. ) -> Result<(Slot, Hash)> {
  877. let mut cursor = solana_gossip::crds::Cursor::default();
  878. loop {
  879. if exit.load(Ordering::Relaxed) {
  880. return Err(WenRestartError::Exiting.into());
  881. }
  882. for new_heaviest_fork in cluster_info.get_restart_heaviest_fork(&mut cursor) {
  883. if new_heaviest_fork.from == wen_restart_coordinator {
  884. info!(
  885. "Received new heaviest fork from coordinator: {wen_restart_coordinator} \
  886. {new_heaviest_fork:?}"
  887. );
  888. let coordinator_heaviest_slot = new_heaviest_fork.last_slot;
  889. let coordinator_heaviest_hash = new_heaviest_fork.last_slot_hash;
  890. progress.coordinator_heaviest_fork = Some(HeaviestForkRecord {
  891. slot: coordinator_heaviest_slot,
  892. bankhash: coordinator_heaviest_hash.to_string(),
  893. total_active_stake: 0,
  894. wallclock: new_heaviest_fork.wallclock,
  895. shred_version: new_heaviest_fork.shred_version as u32,
  896. from: new_heaviest_fork.from.to_string(),
  897. });
  898. return Ok((coordinator_heaviest_slot, coordinator_heaviest_hash));
  899. }
  900. }
  901. sleep(Duration::from_millis(GOSSIP_SLEEP_MILLIS));
  902. }
  903. }
  904. pub(crate) fn send_and_receive_heaviest_fork(
  905. my_heaviest_fork_slot: Slot,
  906. my_heaviest_fork_hash: Hash,
  907. config: &WenRestartConfig,
  908. progress: &mut WenRestartProgress,
  909. pushfn: impl FnOnce(Slot, Hash),
  910. ) -> Result<(Slot, Hash)> {
  911. if config.cluster_info.id() == config.wen_restart_coordinator {
  912. pushfn(my_heaviest_fork_slot, my_heaviest_fork_hash);
  913. Ok((my_heaviest_fork_slot, my_heaviest_fork_hash))
  914. } else {
  915. let (coordinator_slot, coordinator_hash) = receive_restart_heaviest_fork(
  916. config.wen_restart_coordinator,
  917. config.cluster_info.clone(),
  918. config.exit.clone(),
  919. progress,
  920. )?;
  921. match verify_coordinator_heaviest_fork(
  922. my_heaviest_fork_slot,
  923. coordinator_slot,
  924. &coordinator_hash,
  925. config.bank_forks.clone(),
  926. config.blockstore.clone(),
  927. config.exit.clone(),
  928. config.wen_restart_repair_slots.clone().unwrap(),
  929. ) {
  930. Ok(()) => pushfn(coordinator_slot, coordinator_hash),
  931. Err(e) => {
  932. warn!("Failed to verify coordinator heaviest fork: {e:?}, exit soon");
  933. pushfn(my_heaviest_fork_slot, my_heaviest_fork_hash);
  934. // flush_push_queue only flushes the messages to crds, doesn't guarantee
  935. // sending them out, so we still need to wait for a while before exiting.
  936. config.cluster_info.flush_push_queue();
  937. sleep(Duration::from_millis(GOSSIP_SLEEP_MILLIS));
  938. return Err(e);
  939. }
  940. }
  941. Ok((coordinator_slot, coordinator_hash))
  942. }
  943. }
  944. #[derive(Clone)]
  945. pub struct WenRestartConfig {
  946. pub wen_restart_path: PathBuf,
  947. pub wen_restart_coordinator: Pubkey,
  948. pub last_vote: VoteTransaction,
  949. pub blockstore: Arc<Blockstore>,
  950. pub cluster_info: Arc<ClusterInfo>,
  951. pub bank_forks: Arc<RwLock<BankForks>>,
  952. pub wen_restart_repair_slots: Option<Arc<RwLock<Vec<Slot>>>>,
  953. pub wait_for_supermajority_threshold_percent: u64,
  954. pub snapshot_controller: Option<Arc<SnapshotController>>,
  955. pub abs_status: AbsStatus,
  956. pub genesis_config_hash: Hash,
  957. pub exit: Arc<AtomicBool>,
  958. }
  959. pub fn wait_for_wen_restart(config: WenRestartConfig) -> Result<()> {
  960. let (mut state, mut progress) = initialize(
  961. &config.wen_restart_path,
  962. config.last_vote.clone(),
  963. config.blockstore.clone(),
  964. )?;
  965. loop {
  966. state = match state {
  967. WenRestartProgressInternalState::Init {
  968. last_voted_fork_slots,
  969. last_vote_bankhash,
  970. } => {
  971. progress.my_last_voted_fork_slots = Some(send_restart_last_voted_fork_slots(
  972. config.cluster_info.clone(),
  973. &last_voted_fork_slots,
  974. last_vote_bankhash,
  975. )?);
  976. WenRestartProgressInternalState::Init {
  977. last_voted_fork_slots,
  978. last_vote_bankhash,
  979. }
  980. }
  981. WenRestartProgressInternalState::LastVotedForkSlots {
  982. last_voted_fork_slots,
  983. aggregate_final_result,
  984. } => {
  985. let final_result = match aggregate_final_result {
  986. Some(result) => result,
  987. None => aggregate_restart_last_voted_fork_slots(
  988. &config.wen_restart_path,
  989. config.wait_for_supermajority_threshold_percent,
  990. config.cluster_info.clone(),
  991. &last_voted_fork_slots,
  992. config.bank_forks.clone(),
  993. config.blockstore.clone(),
  994. config.wen_restart_repair_slots.clone().unwrap(),
  995. config.exit.clone(),
  996. &mut progress,
  997. )?,
  998. };
  999. WenRestartProgressInternalState::LastVotedForkSlots {
  1000. last_voted_fork_slots,
  1001. aggregate_final_result: Some(final_result),
  1002. }
  1003. }
  1004. WenRestartProgressInternalState::FindHeaviestFork {
  1005. aggregate_final_result,
  1006. my_heaviest_fork,
  1007. } => {
  1008. let heaviest_fork = match my_heaviest_fork {
  1009. Some(heaviest_fork) => heaviest_fork,
  1010. None => {
  1011. let (slot, bankhash) = find_heaviest_fork(
  1012. aggregate_final_result.clone(),
  1013. config.bank_forks.clone(),
  1014. config.blockstore.clone(),
  1015. config.exit.clone(),
  1016. )?;
  1017. info!("Heaviest fork found: slot: {slot}, bankhash: {bankhash}");
  1018. HeaviestForkRecord {
  1019. slot,
  1020. bankhash: bankhash.to_string(),
  1021. total_active_stake: 0,
  1022. wallclock: 0,
  1023. shred_version: config.cluster_info.my_shred_version() as u32,
  1024. from: config.cluster_info.id().to_string(),
  1025. }
  1026. }
  1027. };
  1028. WenRestartProgressInternalState::FindHeaviestFork {
  1029. aggregate_final_result,
  1030. my_heaviest_fork: Some(heaviest_fork),
  1031. }
  1032. }
  1033. WenRestartProgressInternalState::HeaviestFork {
  1034. my_heaviest_fork_slot,
  1035. my_heaviest_fork_hash,
  1036. } => {
  1037. let (slot, hash) = send_and_receive_heaviest_fork(
  1038. my_heaviest_fork_slot,
  1039. my_heaviest_fork_hash,
  1040. &config,
  1041. &mut progress,
  1042. |slot, hash| {
  1043. config
  1044. .cluster_info
  1045. .push_restart_heaviest_fork(slot, hash, 0);
  1046. },
  1047. )?;
  1048. WenRestartProgressInternalState::HeaviestFork {
  1049. my_heaviest_fork_slot: slot,
  1050. my_heaviest_fork_hash: hash,
  1051. }
  1052. }
  1053. WenRestartProgressInternalState::GenerateSnapshot {
  1054. my_heaviest_fork_slot,
  1055. my_snapshot,
  1056. } => {
  1057. let snapshot_record = match my_snapshot {
  1058. Some(record) => record,
  1059. None => match &config.snapshot_controller {
  1060. Some(snapshot_controller) => generate_snapshot(
  1061. config.bank_forks.clone(),
  1062. snapshot_controller,
  1063. &config.abs_status,
  1064. config.genesis_config_hash,
  1065. my_heaviest_fork_slot,
  1066. ),
  1067. None => {
  1068. // Only tests don't have a snapshot controller
  1069. Err(WenRestartError::GenerateSnapshotWhenDisabled.into())
  1070. }
  1071. }?,
  1072. };
  1073. WenRestartProgressInternalState::GenerateSnapshot {
  1074. my_heaviest_fork_slot,
  1075. my_snapshot: Some(snapshot_record),
  1076. }
  1077. }
  1078. // Proceed to restart if we are ready to wait for supermajority.
  1079. WenRestartProgressInternalState::Done {
  1080. slot,
  1081. hash,
  1082. shred_version,
  1083. } => {
  1084. error!(
  1085. "Wen start finished, please remove --wen_restart and restart with \
  1086. --wait-for-supermajority {slot} --expected-bank-hash {hash} \
  1087. --expected-shred-version {shred_version} --no-snapshot-fetch",
  1088. );
  1089. if config.cluster_info.id() == config.wen_restart_coordinator {
  1090. aggregate_restart_heaviest_fork(
  1091. &config.wen_restart_path,
  1092. config.cluster_info.clone(),
  1093. config.bank_forks.clone(),
  1094. config.exit.clone(),
  1095. &mut progress,
  1096. )?;
  1097. }
  1098. return Ok(());
  1099. }
  1100. };
  1101. state = increment_and_write_wen_restart_records(
  1102. &config.wen_restart_path,
  1103. state,
  1104. &mut progress,
  1105. )?;
  1106. }
  1107. }
  1108. pub(crate) fn increment_and_write_wen_restart_records(
  1109. records_path: &PathBuf,
  1110. current_state: WenRestartProgressInternalState,
  1111. progress: &mut WenRestartProgress,
  1112. ) -> Result<WenRestartProgressInternalState> {
  1113. let new_state = match current_state {
  1114. WenRestartProgressInternalState::Init {
  1115. last_voted_fork_slots,
  1116. last_vote_bankhash: _,
  1117. } => {
  1118. progress.set_state(RestartState::LastVotedForkSlots);
  1119. WenRestartProgressInternalState::LastVotedForkSlots {
  1120. last_voted_fork_slots,
  1121. aggregate_final_result: None,
  1122. }
  1123. }
  1124. WenRestartProgressInternalState::LastVotedForkSlots {
  1125. last_voted_fork_slots: _,
  1126. aggregate_final_result,
  1127. } => {
  1128. if let Some(aggregate_final_result) = aggregate_final_result {
  1129. progress.set_state(RestartState::HeaviestFork);
  1130. if let Some(aggregate_record) = progress.last_voted_fork_slots_aggregate.as_mut() {
  1131. aggregate_record.final_result = Some(LastVotedForkSlotsAggregateFinal {
  1132. slots_stake_map: aggregate_final_result.slots_stake_map.clone(),
  1133. epoch_infos: aggregate_final_result
  1134. .epoch_info_vec
  1135. .iter()
  1136. .map(|info| LastVotedForkSlotsEpochInfoRecord {
  1137. epoch: info.epoch,
  1138. total_stake: info.total_stake,
  1139. actively_voting_stake: info.actively_voting_stake,
  1140. actively_voting_for_this_epoch_stake: info
  1141. .actively_voting_for_this_epoch_stake,
  1142. })
  1143. .collect(),
  1144. });
  1145. }
  1146. WenRestartProgressInternalState::FindHeaviestFork {
  1147. aggregate_final_result,
  1148. my_heaviest_fork: None,
  1149. }
  1150. } else {
  1151. return Err(
  1152. WenRestartError::UnexpectedState(RestartState::LastVotedForkSlots).into(),
  1153. );
  1154. }
  1155. }
  1156. WenRestartProgressInternalState::FindHeaviestFork {
  1157. aggregate_final_result: _,
  1158. my_heaviest_fork,
  1159. } => {
  1160. if let Some(my_heaviest_fork) = my_heaviest_fork {
  1161. progress.my_heaviest_fork = Some(my_heaviest_fork.clone());
  1162. WenRestartProgressInternalState::HeaviestFork {
  1163. my_heaviest_fork_slot: my_heaviest_fork.slot,
  1164. my_heaviest_fork_hash: Hash::from_str(&my_heaviest_fork.bankhash).unwrap(),
  1165. }
  1166. } else {
  1167. return Err(WenRestartError::UnexpectedState(RestartState::HeaviestFork).into());
  1168. }
  1169. }
  1170. WenRestartProgressInternalState::HeaviestFork {
  1171. my_heaviest_fork_slot,
  1172. ..
  1173. } => {
  1174. progress.set_state(RestartState::GenerateSnapshot);
  1175. WenRestartProgressInternalState::GenerateSnapshot {
  1176. my_heaviest_fork_slot,
  1177. my_snapshot: None,
  1178. }
  1179. }
  1180. WenRestartProgressInternalState::GenerateSnapshot {
  1181. my_heaviest_fork_slot: _,
  1182. my_snapshot,
  1183. } => {
  1184. if let Some(my_snapshot) = my_snapshot {
  1185. progress.set_state(RestartState::Done);
  1186. progress.my_snapshot = Some(my_snapshot.clone());
  1187. WenRestartProgressInternalState::Done {
  1188. slot: my_snapshot.slot,
  1189. hash: Hash::from_str(&my_snapshot.bankhash).unwrap(),
  1190. shred_version: my_snapshot.shred_version as u16,
  1191. }
  1192. } else {
  1193. return Err(WenRestartError::MissingSnapshotInProtobuf.into());
  1194. }
  1195. }
  1196. WenRestartProgressInternalState::Done { .. } => {
  1197. return Err(WenRestartError::UnexpectedState(RestartState::Done).into())
  1198. }
  1199. };
  1200. write_wen_restart_records(records_path, progress)?;
  1201. Ok(new_state)
  1202. }
  1203. pub(crate) fn initialize(
  1204. records_path: &PathBuf,
  1205. last_vote: VoteTransaction,
  1206. blockstore: Arc<Blockstore>,
  1207. ) -> Result<(WenRestartProgressInternalState, WenRestartProgress)> {
  1208. let progress = match read_wen_restart_records(records_path) {
  1209. Ok(progress) => progress,
  1210. Err(e) => {
  1211. let stdio_err = e.downcast_ref::<std::io::Error>();
  1212. if stdio_err.is_some_and(|e| e.kind() == std::io::ErrorKind::NotFound) {
  1213. info!("wen restart proto file not found at {records_path:?}, write init state");
  1214. let progress = WenRestartProgress {
  1215. state: RestartState::Init.into(),
  1216. ..Default::default()
  1217. };
  1218. write_wen_restart_records(records_path, &progress)?;
  1219. progress
  1220. } else {
  1221. return Err(e);
  1222. }
  1223. }
  1224. };
  1225. match progress.state() {
  1226. RestartState::Done => {
  1227. if let Some(my_snapshot) = progress.my_snapshot.as_ref() {
  1228. Ok((
  1229. WenRestartProgressInternalState::Done {
  1230. slot: my_snapshot.slot,
  1231. hash: Hash::from_str(&my_snapshot.bankhash).unwrap(),
  1232. shred_version: my_snapshot.shred_version as u16,
  1233. },
  1234. progress,
  1235. ))
  1236. } else {
  1237. Err(WenRestartError::MissingSnapshotInProtobuf.into())
  1238. }
  1239. }
  1240. RestartState::Init => {
  1241. let last_voted_fork_slots;
  1242. let last_vote_bankhash;
  1243. match &progress.my_last_voted_fork_slots {
  1244. Some(my_last_voted_fork_slots) => {
  1245. last_voted_fork_slots = my_last_voted_fork_slots.last_voted_fork_slots.clone();
  1246. last_vote_bankhash =
  1247. Hash::from_str(&my_last_voted_fork_slots.last_vote_bankhash).unwrap();
  1248. }
  1249. None => {
  1250. // repair and restart option does not work without last voted slot.
  1251. if let Some(last_vote_slot) = last_vote.last_voted_slot() {
  1252. last_vote_bankhash = last_vote.hash();
  1253. last_voted_fork_slots =
  1254. AncestorIterator::new_inclusive(last_vote_slot, &blockstore)
  1255. .take(RestartLastVotedForkSlots::MAX_SLOTS)
  1256. .collect();
  1257. } else {
  1258. error!(
  1259. "Cannot find last voted slot in the tower storage, it either means \
  1260. that this node has never voted or the tower storage is corrupted. \
  1261. Unfortunately, since WenRestart is a consensus protocol depending on \
  1262. each participant to send their last voted fork slots, your validator \
  1263. cannot participate.Please check discord for the conclusion of the \
  1264. WenRestart protocol, then generate a snapshot and use \
  1265. --wait-for-supermajority to restart the validator."
  1266. );
  1267. return Err(WenRestartError::MissingLastVotedForkSlots.into());
  1268. }
  1269. }
  1270. }
  1271. Ok((
  1272. WenRestartProgressInternalState::Init {
  1273. last_voted_fork_slots,
  1274. last_vote_bankhash,
  1275. },
  1276. progress,
  1277. ))
  1278. }
  1279. RestartState::LastVotedForkSlots => {
  1280. if let Some(record) = progress.my_last_voted_fork_slots.as_ref() {
  1281. Ok((
  1282. WenRestartProgressInternalState::LastVotedForkSlots {
  1283. last_voted_fork_slots: record.last_voted_fork_slots.clone(),
  1284. aggregate_final_result: progress
  1285. .last_voted_fork_slots_aggregate
  1286. .as_ref()
  1287. .and_then(|r| {
  1288. r.final_result.as_ref().map(|result| {
  1289. LastVotedForkSlotsFinalResult {
  1290. slots_stake_map: result.slots_stake_map.clone(),
  1291. epoch_info_vec: result
  1292. .epoch_infos
  1293. .iter()
  1294. .map(|info| LastVotedForkSlotsEpochInfo {
  1295. epoch: info.epoch,
  1296. total_stake: info.total_stake,
  1297. actively_voting_stake: info.actively_voting_stake,
  1298. actively_voting_for_this_epoch_stake: info
  1299. .actively_voting_for_this_epoch_stake,
  1300. })
  1301. .collect(),
  1302. }
  1303. })
  1304. }),
  1305. },
  1306. progress,
  1307. ))
  1308. } else {
  1309. Err(WenRestartError::MalformedLastVotedForkSlotsProtobuf(None).into())
  1310. }
  1311. }
  1312. RestartState::HeaviestFork => Ok((
  1313. WenRestartProgressInternalState::FindHeaviestFork {
  1314. aggregate_final_result: progress
  1315. .last_voted_fork_slots_aggregate
  1316. .as_ref()
  1317. .and_then(|r| {
  1318. r.final_result
  1319. .as_ref()
  1320. .map(|result| LastVotedForkSlotsFinalResult {
  1321. slots_stake_map: result.slots_stake_map.clone(),
  1322. epoch_info_vec: result
  1323. .epoch_infos
  1324. .iter()
  1325. .map(|info| LastVotedForkSlotsEpochInfo {
  1326. epoch: info.epoch,
  1327. total_stake: info.total_stake,
  1328. actively_voting_stake: info.actively_voting_stake,
  1329. actively_voting_for_this_epoch_stake: info
  1330. .actively_voting_for_this_epoch_stake,
  1331. })
  1332. .collect(),
  1333. })
  1334. })
  1335. .ok_or(WenRestartError::MalformedProgress(
  1336. RestartState::HeaviestFork,
  1337. "final_result in last_voted_fork_slots_aggregate".to_string(),
  1338. ))?,
  1339. my_heaviest_fork: progress.my_heaviest_fork.clone(),
  1340. },
  1341. progress,
  1342. )),
  1343. RestartState::GenerateSnapshot => Ok((
  1344. WenRestartProgressInternalState::GenerateSnapshot {
  1345. my_heaviest_fork_slot: progress
  1346. .my_heaviest_fork
  1347. .as_ref()
  1348. .ok_or(WenRestartError::MalformedProgress(
  1349. RestartState::GenerateSnapshot,
  1350. "my_heaviest_fork".to_string(),
  1351. ))?
  1352. .slot,
  1353. my_snapshot: progress.my_snapshot.clone(),
  1354. },
  1355. progress,
  1356. )),
  1357. }
  1358. }
  1359. fn read_wen_restart_records(records_path: &PathBuf) -> Result<WenRestartProgress> {
  1360. let buffer = read(records_path)?;
  1361. let progress = WenRestartProgress::decode(&mut Cursor::new(buffer))?;
  1362. info!("read record {progress:?}");
  1363. Ok(progress)
  1364. }
  1365. pub(crate) fn write_wen_restart_records(
  1366. records_path: &PathBuf,
  1367. new_progress: &WenRestartProgress,
  1368. ) -> Result<()> {
  1369. // overwrite anything if exists
  1370. let mut file = File::create(records_path)?;
  1371. info!("writing new record {new_progress:?}");
  1372. let mut buf = Vec::with_capacity(new_progress.encoded_len());
  1373. new_progress.encode(&mut buf)?;
  1374. file.write_all(&buf)?;
  1375. Ok(())
  1376. }
  1377. #[cfg(test)]
  1378. mod tests {
  1379. use {
  1380. crate::wen_restart::{tests::wen_restart_proto::LastVotedForkSlotsAggregateFinal, *},
  1381. agave_snapshots::{
  1382. paths::build_incremental_snapshot_archive_path,
  1383. snapshot_config::{SnapshotConfig, SnapshotUsage},
  1384. snapshot_hash::SnapshotHash,
  1385. },
  1386. crossbeam_channel::unbounded,
  1387. solana_entry::entry::create_ticks,
  1388. solana_genesis_utils::MAX_GENESIS_ARCHIVE_UNPACKED_SIZE,
  1389. solana_gossip::{
  1390. cluster_info::ClusterInfo,
  1391. contact_info::ContactInfo,
  1392. crds::GossipRoute,
  1393. crds_data::CrdsData,
  1394. crds_value::CrdsValue,
  1395. restart_crds_values::{RestartHeaviestFork, RestartLastVotedForkSlots},
  1396. },
  1397. solana_hash::Hash,
  1398. solana_keypair::Keypair,
  1399. solana_ledger::{
  1400. blockstore::{create_new_ledger, entries_to_test_shreds, Blockstore},
  1401. blockstore_options::LedgerColumnOptions,
  1402. blockstore_processor::{fill_blockstore_slot_with_ticks, test_process_blockstore},
  1403. get_tmp_ledger_path_auto_delete,
  1404. },
  1405. solana_net_utils::SocketAddrSpace,
  1406. solana_pubkey::Pubkey,
  1407. solana_runtime::{
  1408. epoch_stakes::VersionedEpochStakes,
  1409. genesis_utils::{
  1410. create_genesis_config_with_vote_accounts, GenesisConfigInfo, ValidatorVoteKeypairs,
  1411. },
  1412. snapshot_bank_utils::bank_to_full_snapshot_archive,
  1413. },
  1414. solana_signer::Signer,
  1415. solana_time_utils::timestamp,
  1416. solana_vote::vote_account::VoteAccount,
  1417. solana_vote_interface::state::{TowerSync, Vote},
  1418. solana_vote_program::vote_state::create_v4_account_with_authorized,
  1419. std::{fs::remove_file, sync::Arc, thread::Builder},
  1420. tempfile::TempDir,
  1421. };
  1422. const SHRED_VERSION: u16 = 2;
  1423. const EXPECTED_SLOTS: Slot = 40;
  1424. const TICKS_PER_SLOT: u64 = 2;
  1425. const TOTAL_VALIDATOR_COUNT: u16 = 20;
  1426. const MY_INDEX: usize = TOTAL_VALIDATOR_COUNT as usize - 1;
  1427. const COORDINATOR_INDEX: usize = 0;
  1428. const WAIT_FOR_THREAD_TIMEOUT: u64 = 10_000;
  1429. const WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT: u64 = 80;
  1430. const NON_CONFORMING_VALIDATOR_PERCENT: u64 = 5;
  1431. fn push_restart_last_voted_fork_slots(
  1432. cluster_info: Arc<ClusterInfo>,
  1433. node: &ContactInfo,
  1434. last_voted_fork_slots: &[Slot],
  1435. last_vote_hash: &Hash,
  1436. node_keypair: &Keypair,
  1437. wallclock: u64,
  1438. ) {
  1439. let slots = RestartLastVotedForkSlots::new(
  1440. *node.pubkey(),
  1441. wallclock,
  1442. last_voted_fork_slots,
  1443. *last_vote_hash,
  1444. SHRED_VERSION,
  1445. )
  1446. .unwrap();
  1447. let entries = vec![
  1448. CrdsValue::new(CrdsData::from(node), node_keypair),
  1449. CrdsValue::new(CrdsData::RestartLastVotedForkSlots(slots), node_keypair),
  1450. ];
  1451. {
  1452. let mut gossip_crds = cluster_info.gossip.crds.write().unwrap();
  1453. for entry in entries {
  1454. assert!(gossip_crds
  1455. .insert(entry, /*now=*/ 0, GossipRoute::LocalMessage)
  1456. .is_ok());
  1457. }
  1458. }
  1459. }
  1460. fn push_restart_heaviest_fork(
  1461. cluster_info: Arc<ClusterInfo>,
  1462. node: &ContactInfo,
  1463. heaviest_fork_slot: Slot,
  1464. heaviest_fork_hash: &Hash,
  1465. observed_stake: u64,
  1466. node_keypair: &Keypair,
  1467. wallclock: u64,
  1468. ) {
  1469. let heaviest_fork = RestartHeaviestFork {
  1470. from: *node.pubkey(),
  1471. wallclock,
  1472. last_slot: heaviest_fork_slot,
  1473. last_slot_hash: *heaviest_fork_hash,
  1474. observed_stake,
  1475. shred_version: SHRED_VERSION,
  1476. };
  1477. assert!(cluster_info
  1478. .gossip
  1479. .crds
  1480. .write()
  1481. .unwrap()
  1482. .insert(
  1483. CrdsValue::new(CrdsData::RestartHeaviestFork(heaviest_fork), node_keypair),
  1484. /*now=*/ 0,
  1485. GossipRoute::LocalMessage
  1486. )
  1487. .is_ok());
  1488. }
  1489. struct WenRestartTestInitResult {
  1490. pub validator_voting_keypairs: Vec<ValidatorVoteKeypairs>,
  1491. pub blockstore: Arc<Blockstore>,
  1492. pub cluster_info: Arc<ClusterInfo>,
  1493. pub bank_forks: Arc<RwLock<BankForks>>,
  1494. pub last_voted_fork_slots: Vec<Slot>,
  1495. pub wen_restart_proto_path: PathBuf,
  1496. pub wen_restart_coordinator: Pubkey,
  1497. pub last_blockhash: Hash,
  1498. pub genesis_config_hash: Hash,
  1499. }
  1500. fn insert_slots_into_blockstore(
  1501. blockstore: Arc<Blockstore>,
  1502. first_parent: Slot,
  1503. slots_to_insert: &[Slot],
  1504. entries_per_slot: u64,
  1505. start_blockhash: Hash,
  1506. ) -> Hash {
  1507. let mut last_hash = start_blockhash;
  1508. let mut last_parent = first_parent;
  1509. for i in slots_to_insert {
  1510. last_hash = fill_blockstore_slot_with_ticks(
  1511. &blockstore,
  1512. entries_per_slot,
  1513. *i,
  1514. last_parent,
  1515. last_hash,
  1516. );
  1517. last_parent = *i;
  1518. }
  1519. last_hash
  1520. }
  1521. fn wen_restart_test_init(ledger_path: &TempDir) -> WenRestartTestInitResult {
  1522. let validator_voting_keypairs: Vec<_> = (0..TOTAL_VALIDATOR_COUNT)
  1523. .map(|_| ValidatorVoteKeypairs::new_rand())
  1524. .collect();
  1525. let node_keypair = Arc::new(
  1526. validator_voting_keypairs[MY_INDEX]
  1527. .node_keypair
  1528. .insecure_clone(),
  1529. );
  1530. let wen_restart_coordinator = validator_voting_keypairs[COORDINATOR_INDEX]
  1531. .node_keypair
  1532. .pubkey();
  1533. let cluster_info = Arc::new(ClusterInfo::new(
  1534. {
  1535. let mut contact_info =
  1536. ContactInfo::new_localhost(&node_keypair.pubkey(), timestamp());
  1537. contact_info.set_shred_version(SHRED_VERSION);
  1538. contact_info
  1539. },
  1540. node_keypair.clone(),
  1541. SocketAddrSpace::Unspecified,
  1542. ));
  1543. let GenesisConfigInfo {
  1544. mut genesis_config, ..
  1545. } = create_genesis_config_with_vote_accounts(
  1546. 10_000,
  1547. &validator_voting_keypairs,
  1548. vec![100; validator_voting_keypairs.len()],
  1549. );
  1550. genesis_config.ticks_per_slot = TICKS_PER_SLOT;
  1551. let start_blockhash = create_new_ledger(
  1552. ledger_path.path(),
  1553. &genesis_config,
  1554. MAX_GENESIS_ARCHIVE_UNPACKED_SIZE,
  1555. LedgerColumnOptions::default(),
  1556. )
  1557. .unwrap();
  1558. let blockstore = Arc::new(Blockstore::open(ledger_path.path()).unwrap());
  1559. let (bank_forks, ..) = test_process_blockstore(
  1560. &genesis_config,
  1561. &blockstore,
  1562. &ProcessOptions {
  1563. run_verification: true,
  1564. ..ProcessOptions::default()
  1565. },
  1566. Arc::default(),
  1567. );
  1568. let mut last_blockhash = start_blockhash;
  1569. // Skip block 1, 2 links directly to 0.
  1570. let last_parent: Slot = 2;
  1571. let mut last_voted_fork_slots: Vec<Slot> = Vec::new();
  1572. last_voted_fork_slots
  1573. .extend(last_parent..last_parent.saturating_add(EXPECTED_SLOTS).saturating_add(1));
  1574. last_blockhash = insert_slots_into_blockstore(
  1575. blockstore.clone(),
  1576. 0,
  1577. &last_voted_fork_slots,
  1578. genesis_config.ticks_per_slot,
  1579. last_blockhash,
  1580. );
  1581. last_voted_fork_slots.insert(0, 0);
  1582. last_voted_fork_slots.reverse();
  1583. let mut wen_restart_proto_path = ledger_path.path().to_path_buf();
  1584. wen_restart_proto_path.push("wen_restart_status.proto");
  1585. let _ = remove_file(&wen_restart_proto_path);
  1586. WenRestartTestInitResult {
  1587. validator_voting_keypairs,
  1588. blockstore,
  1589. cluster_info,
  1590. bank_forks,
  1591. last_voted_fork_slots,
  1592. wen_restart_proto_path,
  1593. wen_restart_coordinator,
  1594. last_blockhash,
  1595. genesis_config_hash: genesis_config.hash(),
  1596. }
  1597. }
  1598. fn wait_on_expected_progress_with_timeout(
  1599. wen_restart_proto_path: PathBuf,
  1600. expected_progress: WenRestartProgress,
  1601. ) {
  1602. let start = timestamp();
  1603. let mut progress = WenRestartProgress {
  1604. state: RestartState::Init.into(),
  1605. ..Default::default()
  1606. };
  1607. loop {
  1608. if let Ok(new_progress) = read_wen_restart_records(&wen_restart_proto_path) {
  1609. progress = new_progress;
  1610. if let Some(my_last_voted_fork_slots) = &expected_progress.my_last_voted_fork_slots
  1611. {
  1612. if let Some(record) = progress.my_last_voted_fork_slots.as_mut() {
  1613. record.wallclock = my_last_voted_fork_slots.wallclock;
  1614. }
  1615. }
  1616. if progress == expected_progress {
  1617. return;
  1618. }
  1619. }
  1620. if timestamp().saturating_sub(start) > WAIT_FOR_THREAD_TIMEOUT {
  1621. assert_eq!(
  1622. progress.my_last_voted_fork_slots,
  1623. expected_progress.my_last_voted_fork_slots
  1624. );
  1625. assert_eq!(
  1626. progress.last_voted_fork_slots_aggregate,
  1627. expected_progress.last_voted_fork_slots_aggregate
  1628. );
  1629. panic!(
  1630. "wait_on_expected_progress_with_timeout failed to get expected progress {:?} \
  1631. expected {:?}",
  1632. &progress, expected_progress
  1633. );
  1634. }
  1635. sleep(Duration::from_millis(10));
  1636. }
  1637. }
  1638. fn wen_restart_test_succeed_after_failure(
  1639. test_state: WenRestartTestInitResult,
  1640. last_vote_bankhash: Hash,
  1641. expected_progress: WenRestartProgress,
  1642. ) {
  1643. // continue normally after the error, we should be good.
  1644. let exit = Arc::new(AtomicBool::new(false));
  1645. let last_vote_slot: Slot = test_state.last_voted_fork_slots[0];
  1646. let wen_restart_config = WenRestartConfig {
  1647. wen_restart_path: test_state.wen_restart_proto_path.clone(),
  1648. wen_restart_coordinator: test_state.wen_restart_coordinator,
  1649. last_vote: VoteTransaction::from(Vote::new(vec![last_vote_slot], last_vote_bankhash)),
  1650. blockstore: test_state.blockstore.clone(),
  1651. cluster_info: test_state.cluster_info.clone(),
  1652. bank_forks: test_state.bank_forks.clone(),
  1653. wen_restart_repair_slots: Some(Arc::new(RwLock::new(Vec::new()))),
  1654. wait_for_supermajority_threshold_percent: 80,
  1655. snapshot_controller: None,
  1656. abs_status: AbsStatus::new_for_tests(),
  1657. genesis_config_hash: test_state.genesis_config_hash,
  1658. exit: exit.clone(),
  1659. };
  1660. let wen_restart_thread_handle = Builder::new()
  1661. .name("solana-wen-restart".to_string())
  1662. .spawn(move || {
  1663. let _ = wait_for_wen_restart(wen_restart_config).is_ok();
  1664. })
  1665. .unwrap();
  1666. wait_on_expected_progress_with_timeout(
  1667. test_state.wen_restart_proto_path.clone(),
  1668. expected_progress,
  1669. );
  1670. exit.store(true, Ordering::Relaxed);
  1671. assert!(wen_restart_thread_handle.join().is_ok());
  1672. let _ = remove_file(&test_state.wen_restart_proto_path);
  1673. }
  1674. #[test]
  1675. fn test_wen_restart_normal_flow() {
  1676. let ledger_path = get_tmp_ledger_path_auto_delete!();
  1677. let wen_restart_repair_slots = Some(Arc::new(RwLock::new(Vec::new())));
  1678. let test_state = wen_restart_test_init(&ledger_path);
  1679. let last_vote_slot = test_state.last_voted_fork_slots[0];
  1680. let last_vote_bankhash = Hash::new_unique();
  1681. let expected_slots_to_repair: Vec<Slot> =
  1682. (last_vote_slot + 1..last_vote_slot + 3).collect();
  1683. let my_pubkey = &test_state.validator_voting_keypairs[MY_INDEX]
  1684. .node_keypair
  1685. .pubkey();
  1686. let bank_snapshots_dir = tempfile::TempDir::new().unwrap();
  1687. let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
  1688. let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
  1689. let snapshot_config = SnapshotConfig {
  1690. bank_snapshots_dir: bank_snapshots_dir.as_ref().to_path_buf(),
  1691. full_snapshot_archives_dir: full_snapshot_archives_dir.as_ref().to_path_buf(),
  1692. incremental_snapshot_archives_dir: incremental_snapshot_archives_dir
  1693. .as_ref()
  1694. .to_path_buf(),
  1695. ..Default::default()
  1696. };
  1697. let old_root_bank = test_state.bank_forks.read().unwrap().root_bank();
  1698. // Trigger full snapshot generation on the old root bank.
  1699. assert!(bank_to_full_snapshot_archive(
  1700. snapshot_config.bank_snapshots_dir.clone(),
  1701. &old_root_bank,
  1702. Some(snapshot_config.snapshot_version),
  1703. snapshot_config.full_snapshot_archives_dir.clone(),
  1704. snapshot_config.incremental_snapshot_archives_dir.clone(),
  1705. snapshot_config.archive_format,
  1706. )
  1707. .is_ok());
  1708. let exit = Arc::new(AtomicBool::new(false));
  1709. let (abs_request_sender, _abs_request_receiver) = unbounded();
  1710. let snapshot_controller =
  1711. SnapshotController::new(abs_request_sender, snapshot_config, last_vote_slot);
  1712. let wen_restart_config = WenRestartConfig {
  1713. wen_restart_path: test_state.wen_restart_proto_path.clone(),
  1714. wen_restart_coordinator: test_state.wen_restart_coordinator,
  1715. last_vote: VoteTransaction::from(Vote::new(vec![last_vote_slot], last_vote_bankhash)),
  1716. blockstore: test_state.blockstore.clone(),
  1717. cluster_info: test_state.cluster_info.clone(),
  1718. bank_forks: test_state.bank_forks.clone(),
  1719. wen_restart_repair_slots: wen_restart_repair_slots.clone(),
  1720. wait_for_supermajority_threshold_percent: 80,
  1721. snapshot_controller: Some(Arc::new(snapshot_controller)),
  1722. abs_status: AbsStatus::new_for_tests(),
  1723. genesis_config_hash: test_state.genesis_config_hash,
  1724. exit: exit.clone(),
  1725. };
  1726. let wen_restart_thread_handle = Builder::new()
  1727. .name("solana-wen-restart".to_string())
  1728. .spawn(move || {
  1729. assert!(wait_for_wen_restart(wen_restart_config).is_ok());
  1730. })
  1731. .unwrap();
  1732. let mut rng = rand::thread_rng();
  1733. let mut expected_received_last_voted_fork_slots = HashMap::new();
  1734. let validators_to_take: usize =
  1735. (WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT * TOTAL_VALIDATOR_COUNT as u64 / 100 - 1)
  1736. .try_into()
  1737. .unwrap();
  1738. let mut last_voted_fork_slots_from_others = test_state.last_voted_fork_slots.clone();
  1739. last_voted_fork_slots_from_others.reverse();
  1740. last_voted_fork_slots_from_others.append(&mut expected_slots_to_repair.clone());
  1741. for keypairs in test_state
  1742. .validator_voting_keypairs
  1743. .iter()
  1744. .take(validators_to_take)
  1745. {
  1746. let node_pubkey = keypairs.node_keypair.pubkey();
  1747. let node = ContactInfo::new_rand(&mut rng, Some(node_pubkey));
  1748. let last_vote_hash = Hash::new_unique();
  1749. let now = timestamp();
  1750. push_restart_last_voted_fork_slots(
  1751. test_state.cluster_info.clone(),
  1752. &node,
  1753. &last_voted_fork_slots_from_others,
  1754. &last_vote_hash,
  1755. &keypairs.node_keypair,
  1756. now,
  1757. );
  1758. expected_received_last_voted_fork_slots.insert(
  1759. node_pubkey.to_string(),
  1760. LastVotedForkSlotsRecord {
  1761. last_voted_fork_slots: last_voted_fork_slots_from_others.clone(),
  1762. last_vote_bankhash: last_vote_hash.to_string(),
  1763. shred_version: SHRED_VERSION as u32,
  1764. wallclock: now,
  1765. },
  1766. );
  1767. }
  1768. // Simulating successful repair of missing blocks.
  1769. let _ = insert_slots_into_blockstore(
  1770. test_state.blockstore.clone(),
  1771. last_vote_slot,
  1772. &expected_slots_to_repair,
  1773. TICKS_PER_SLOT,
  1774. test_state.last_blockhash,
  1775. );
  1776. let my_heaviest_fork_slot = last_vote_slot + 2;
  1777. let my_heaviest_fork_bankhash;
  1778. loop {
  1779. if let Some(bank) = test_state
  1780. .bank_forks
  1781. .read()
  1782. .unwrap()
  1783. .get(my_heaviest_fork_slot)
  1784. {
  1785. // When deciding the local heaviest fork, we will freeze the bank.
  1786. if bank.is_frozen() {
  1787. my_heaviest_fork_bankhash = bank.hash();
  1788. break;
  1789. }
  1790. }
  1791. sleep(Duration::from_millis(100));
  1792. }
  1793. // Now simulate receiving HeaviestFork messages from coordinator.
  1794. let coordinator_heaviest_fork_slot = my_heaviest_fork_slot - 1;
  1795. let coordinator_heaviest_fork_bankhash = test_state
  1796. .bank_forks
  1797. .read()
  1798. .unwrap()
  1799. .get(coordinator_heaviest_fork_slot)
  1800. .unwrap()
  1801. .hash();
  1802. let coordinator_keypair =
  1803. &test_state.validator_voting_keypairs[COORDINATOR_INDEX].node_keypair;
  1804. let node = ContactInfo::new_rand(&mut rng, Some(coordinator_keypair.pubkey()));
  1805. let now = timestamp();
  1806. push_restart_heaviest_fork(
  1807. test_state.cluster_info.clone(),
  1808. &node,
  1809. coordinator_heaviest_fork_slot,
  1810. &coordinator_heaviest_fork_bankhash,
  1811. 0,
  1812. coordinator_keypair,
  1813. now,
  1814. );
  1815. assert!(wen_restart_thread_handle.join().is_ok());
  1816. exit.store(true, Ordering::Relaxed);
  1817. let progress = read_wen_restart_records(&test_state.wen_restart_proto_path).unwrap();
  1818. let progress_start_time = progress
  1819. .my_last_voted_fork_slots
  1820. .as_ref()
  1821. .unwrap()
  1822. .wallclock;
  1823. let mut expected_slots_stake_map: HashMap<Slot, u64> = test_state
  1824. .last_voted_fork_slots
  1825. .iter()
  1826. .map(|slot| {
  1827. (
  1828. *slot,
  1829. WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT * TOTAL_VALIDATOR_COUNT as u64,
  1830. )
  1831. })
  1832. .collect();
  1833. let stake_for_new_slots = validators_to_take as u64 * 100;
  1834. expected_slots_stake_map.extend(
  1835. expected_slots_to_repair
  1836. .iter()
  1837. .map(|slot| (*slot, stake_for_new_slots)),
  1838. );
  1839. let voted_stake = (validators_to_take + 1) as u64 * 100;
  1840. assert_eq!(
  1841. progress,
  1842. WenRestartProgress {
  1843. state: RestartState::Done.into(),
  1844. my_last_voted_fork_slots: Some(LastVotedForkSlotsRecord {
  1845. last_voted_fork_slots: test_state.last_voted_fork_slots,
  1846. last_vote_bankhash: last_vote_bankhash.to_string(),
  1847. shred_version: SHRED_VERSION as u32,
  1848. wallclock: progress_start_time,
  1849. }),
  1850. last_voted_fork_slots_aggregate: Some(LastVotedForkSlotsAggregateRecord {
  1851. received: expected_received_last_voted_fork_slots,
  1852. final_result: Some(LastVotedForkSlotsAggregateFinal {
  1853. slots_stake_map: expected_slots_stake_map,
  1854. epoch_infos: vec![
  1855. LastVotedForkSlotsEpochInfoRecord {
  1856. epoch: 0,
  1857. total_stake: 2000,
  1858. actively_voting_stake: voted_stake,
  1859. actively_voting_for_this_epoch_stake: voted_stake,
  1860. },
  1861. LastVotedForkSlotsEpochInfoRecord {
  1862. epoch: 1,
  1863. total_stake: 2000,
  1864. actively_voting_stake: voted_stake,
  1865. actively_voting_for_this_epoch_stake: voted_stake,
  1866. },
  1867. ],
  1868. }),
  1869. }),
  1870. my_heaviest_fork: Some(HeaviestForkRecord {
  1871. slot: my_heaviest_fork_slot,
  1872. bankhash: my_heaviest_fork_bankhash.to_string(),
  1873. total_active_stake: 0,
  1874. shred_version: SHRED_VERSION as u32,
  1875. wallclock: 0,
  1876. from: my_pubkey.to_string(),
  1877. }),
  1878. heaviest_fork_aggregate: None,
  1879. my_snapshot: Some(GenerateSnapshotRecord {
  1880. slot: coordinator_heaviest_fork_slot,
  1881. bankhash: progress.my_snapshot.as_ref().unwrap().bankhash.clone(),
  1882. shred_version: progress.my_snapshot.as_ref().unwrap().shred_version,
  1883. path: progress.my_snapshot.as_ref().unwrap().path.clone(),
  1884. }),
  1885. coordinator_heaviest_fork: Some(HeaviestForkRecord {
  1886. slot: coordinator_heaviest_fork_slot,
  1887. bankhash: coordinator_heaviest_fork_bankhash.to_string(),
  1888. total_active_stake: 0,
  1889. shred_version: SHRED_VERSION as u32,
  1890. wallclock: progress
  1891. .coordinator_heaviest_fork
  1892. .as_ref()
  1893. .unwrap()
  1894. .wallclock,
  1895. from: coordinator_keypair.pubkey().to_string(),
  1896. }),
  1897. ..Default::default()
  1898. }
  1899. );
  1900. }
  1901. fn change_proto_file_readonly(wen_restart_proto_path: &PathBuf, readonly: bool) {
  1902. let mut perms = std::fs::metadata(wen_restart_proto_path)
  1903. .unwrap()
  1904. .permissions();
  1905. perms.set_readonly(readonly);
  1906. std::fs::set_permissions(wen_restart_proto_path, perms).unwrap();
  1907. }
  1908. #[test]
  1909. fn test_wen_restart_divergence_across_epoch_boundary() {
  1910. agave_logger::setup();
  1911. let ledger_path = get_tmp_ledger_path_auto_delete!();
  1912. let test_state = wen_restart_test_init(&ledger_path);
  1913. let last_vote_slot = test_state.last_voted_fork_slots[0];
  1914. let old_root_bank = test_state.bank_forks.read().unwrap().root_bank();
  1915. // Add bank last_vote + 1 linking directly to 0, tweak its epoch_stakes, and then add it to bank_forks.
  1916. let my_heaviest_fork_slot = last_vote_slot + 1;
  1917. let mut new_root_bank = Bank::new_from_parent(
  1918. old_root_bank.clone(),
  1919. &Pubkey::default(),
  1920. my_heaviest_fork_slot,
  1921. );
  1922. assert_eq!(new_root_bank.epoch(), 1);
  1923. // For epoch 2, make validator 0 have 90% of the stake.
  1924. let vote_accounts_hash_map = test_state
  1925. .validator_voting_keypairs
  1926. .iter()
  1927. .enumerate()
  1928. .map(|(i, keypairs)| {
  1929. let stake = if i == 0 {
  1930. 900 * (TOTAL_VALIDATOR_COUNT - 1) as u64
  1931. } else {
  1932. 100
  1933. };
  1934. let authorized_voter = keypairs.vote_keypair.pubkey();
  1935. let node_id = keypairs.node_keypair.pubkey();
  1936. (
  1937. authorized_voter,
  1938. (
  1939. stake,
  1940. VoteAccount::try_from(create_v4_account_with_authorized(
  1941. &node_id,
  1942. &authorized_voter,
  1943. &node_id,
  1944. None,
  1945. 0,
  1946. 100,
  1947. ))
  1948. .unwrap(),
  1949. ),
  1950. )
  1951. })
  1952. .collect();
  1953. let epoch2_epoch_stakes = VersionedEpochStakes::new_for_tests(vote_accounts_hash_map, 2);
  1954. new_root_bank.set_epoch_stakes_for_test(2, epoch2_epoch_stakes);
  1955. let _ = insert_slots_into_blockstore(
  1956. test_state.blockstore.clone(),
  1957. 0,
  1958. &[my_heaviest_fork_slot],
  1959. TICKS_PER_SLOT,
  1960. old_root_bank.last_blockhash(),
  1961. );
  1962. let replay_tx_thread_pool = rayon::ThreadPoolBuilder::new()
  1963. .thread_name(|i| format!("solReplayTx{i:02}"))
  1964. .build()
  1965. .expect("new rayon threadpool");
  1966. let mut timing = ExecuteTimings::default();
  1967. let opts = ProcessOptions::default();
  1968. let mut progress = ConfirmationProgress::new(old_root_bank.last_blockhash());
  1969. let last_vote_bankhash = new_root_bank.hash();
  1970. let bank_with_scheduler = test_state
  1971. .bank_forks
  1972. .write()
  1973. .unwrap()
  1974. .insert_from_ledger(new_root_bank);
  1975. if let Err(e) = process_single_slot(
  1976. &test_state.blockstore,
  1977. &bank_with_scheduler,
  1978. &replay_tx_thread_pool,
  1979. &opts,
  1980. &mut progress,
  1981. None,
  1982. None,
  1983. None,
  1984. &mut timing,
  1985. ) {
  1986. panic!("process_single_slot failed: {e:?}");
  1987. }
  1988. {
  1989. let mut bank_forks = test_state.bank_forks.write().unwrap();
  1990. let _ = bank_forks.set_root(last_vote_slot + 1, None, Some(last_vote_slot + 1));
  1991. }
  1992. let new_root_bank = test_state
  1993. .bank_forks
  1994. .read()
  1995. .unwrap()
  1996. .get(last_vote_slot + 1)
  1997. .unwrap();
  1998. // Add two more banks: old_epoch_bank (slot = last_vote_slot + 2) and
  1999. // new_epoch_bank (slot = first slot in epoch 2). They both link to last_vote_slot + 1.
  2000. // old_epoch_bank has everyone's votes except 0, so it has > 66% stake in the old epoch.
  2001. // new_epoch_bank has 0's vote, so it has > 66% stake in the new epoch.
  2002. let old_epoch_slot = my_heaviest_fork_slot + 1;
  2003. let _ = insert_slots_into_blockstore(
  2004. test_state.blockstore.clone(),
  2005. new_root_bank.slot(),
  2006. &[old_epoch_slot],
  2007. TICKS_PER_SLOT,
  2008. new_root_bank.last_blockhash(),
  2009. );
  2010. let new_epoch_slot = new_root_bank.epoch_schedule().get_first_slot_in_epoch(2);
  2011. let _ = insert_slots_into_blockstore(
  2012. test_state.blockstore.clone(),
  2013. my_heaviest_fork_slot,
  2014. &[new_epoch_slot],
  2015. TICKS_PER_SLOT,
  2016. new_root_bank.last_blockhash(),
  2017. );
  2018. let mut rng = rand::thread_rng();
  2019. // Everyone except 0 votes for old_epoch_bank.
  2020. for (index, keypairs) in test_state
  2021. .validator_voting_keypairs
  2022. .iter()
  2023. .take(TOTAL_VALIDATOR_COUNT as usize - 1)
  2024. .enumerate()
  2025. {
  2026. let node_pubkey = keypairs.node_keypair.pubkey();
  2027. let node = ContactInfo::new_rand(&mut rng, Some(node_pubkey));
  2028. let last_vote_hash = Hash::new_unique();
  2029. let now = timestamp();
  2030. // Validator 0 votes for the new_epoch_bank while everyone elese vote for old_epoch_bank.
  2031. let last_voted_fork_slots = if index == 0 {
  2032. vec![new_epoch_slot, my_heaviest_fork_slot, 0]
  2033. } else {
  2034. vec![old_epoch_slot, my_heaviest_fork_slot, 0]
  2035. };
  2036. push_restart_last_voted_fork_slots(
  2037. test_state.cluster_info.clone(),
  2038. &node,
  2039. &last_voted_fork_slots,
  2040. &last_vote_hash,
  2041. &keypairs.node_keypair,
  2042. now,
  2043. );
  2044. }
  2045. assert_eq!(
  2046. wait_for_wen_restart(WenRestartConfig {
  2047. wen_restart_path: test_state.wen_restart_proto_path,
  2048. wen_restart_coordinator: test_state.wen_restart_coordinator,
  2049. last_vote: VoteTransaction::from(Vote::new(
  2050. vec![my_heaviest_fork_slot],
  2051. last_vote_bankhash
  2052. )),
  2053. blockstore: test_state.blockstore,
  2054. cluster_info: test_state.cluster_info,
  2055. bank_forks: test_state.bank_forks,
  2056. wen_restart_repair_slots: Some(Arc::new(RwLock::new(Vec::new()))),
  2057. wait_for_supermajority_threshold_percent: 80,
  2058. snapshot_controller: None,
  2059. abs_status: AbsStatus::new_for_tests(),
  2060. genesis_config_hash: test_state.genesis_config_hash,
  2061. exit: Arc::new(AtomicBool::new(false)),
  2062. })
  2063. .unwrap_err()
  2064. .downcast::<WenRestartError>()
  2065. .unwrap(),
  2066. WenRestartError::BlockNotLinkedToExpectedParent(
  2067. new_epoch_slot,
  2068. Some(my_heaviest_fork_slot),
  2069. old_epoch_slot
  2070. )
  2071. );
  2072. }
  2073. #[test]
  2074. fn test_wen_restart_initialize() {
  2075. agave_logger::setup();
  2076. let ledger_path = get_tmp_ledger_path_auto_delete!();
  2077. let test_state = wen_restart_test_init(&ledger_path);
  2078. let last_vote_bankhash = Hash::new_unique();
  2079. let mut last_voted_fork_slots = test_state.last_voted_fork_slots.clone();
  2080. last_voted_fork_slots.reverse();
  2081. let mut file = File::create(&test_state.wen_restart_proto_path).unwrap();
  2082. file.write_all(b"garbage").unwrap();
  2083. assert_eq!(
  2084. initialize(
  2085. &test_state.wen_restart_proto_path,
  2086. VoteTransaction::from(Vote::new(last_voted_fork_slots.clone(), last_vote_bankhash)),
  2087. test_state.blockstore.clone()
  2088. )
  2089. .unwrap_err()
  2090. .downcast::<prost::DecodeError>()
  2091. .unwrap(),
  2092. prost::DecodeError::new("invalid wire type value: 7")
  2093. );
  2094. assert!(remove_file(&test_state.wen_restart_proto_path).is_ok());
  2095. let last_vote_bankhash = Hash::new_unique();
  2096. let empty_last_vote = VoteTransaction::from(Vote::new(vec![], last_vote_bankhash));
  2097. assert_eq!(
  2098. initialize(
  2099. &test_state.wen_restart_proto_path,
  2100. empty_last_vote.clone(),
  2101. test_state.blockstore.clone()
  2102. )
  2103. .unwrap_err()
  2104. .downcast::<WenRestartError>()
  2105. .unwrap(),
  2106. WenRestartError::MissingLastVotedForkSlots,
  2107. );
  2108. assert!(write_wen_restart_records(
  2109. &test_state.wen_restart_proto_path,
  2110. &WenRestartProgress {
  2111. state: RestartState::LastVotedForkSlots.into(),
  2112. ..Default::default()
  2113. },
  2114. )
  2115. .is_ok());
  2116. assert_eq!(
  2117. initialize(
  2118. &test_state.wen_restart_proto_path,
  2119. VoteTransaction::from(Vote::new(last_voted_fork_slots.clone(), last_vote_bankhash)),
  2120. test_state.blockstore.clone()
  2121. )
  2122. .err()
  2123. .unwrap()
  2124. .to_string(),
  2125. "Malformed last voted fork slots protobuf: None"
  2126. );
  2127. let progress_missing_heaviest_fork_aggregate = WenRestartProgress {
  2128. state: RestartState::HeaviestFork.into(),
  2129. my_heaviest_fork: Some(HeaviestForkRecord {
  2130. slot: 0,
  2131. bankhash: Hash::new_unique().to_string(),
  2132. total_active_stake: 0,
  2133. shred_version: SHRED_VERSION as u32,
  2134. wallclock: 0,
  2135. from: Pubkey::new_unique().to_string(),
  2136. }),
  2137. ..Default::default()
  2138. };
  2139. assert!(write_wen_restart_records(
  2140. &test_state.wen_restart_proto_path,
  2141. &progress_missing_heaviest_fork_aggregate,
  2142. )
  2143. .is_ok());
  2144. assert_eq!(
  2145. initialize(
  2146. &test_state.wen_restart_proto_path,
  2147. VoteTransaction::from(Vote::new(last_voted_fork_slots.clone(), last_vote_bankhash)),
  2148. test_state.blockstore.clone()
  2149. )
  2150. .err()
  2151. .unwrap()
  2152. .to_string(),
  2153. "Malformed progress: HeaviestFork missing final_result in \
  2154. last_voted_fork_slots_aggregate",
  2155. );
  2156. let progress_missing_my_heaviestfork = WenRestartProgress {
  2157. state: RestartState::GenerateSnapshot.into(),
  2158. my_snapshot: Some(GenerateSnapshotRecord {
  2159. slot: 0,
  2160. bankhash: Hash::new_unique().to_string(),
  2161. shred_version: SHRED_VERSION as u32,
  2162. path: "/path/to/snapshot".to_string(),
  2163. }),
  2164. ..Default::default()
  2165. };
  2166. assert!(write_wen_restart_records(
  2167. &test_state.wen_restart_proto_path,
  2168. &progress_missing_my_heaviestfork,
  2169. )
  2170. .is_ok());
  2171. assert_eq!(
  2172. initialize(
  2173. &test_state.wen_restart_proto_path,
  2174. VoteTransaction::from(Vote::new(last_voted_fork_slots.clone(), last_vote_bankhash)),
  2175. test_state.blockstore.clone()
  2176. )
  2177. .err()
  2178. .unwrap()
  2179. .to_string(),
  2180. "Malformed progress: GenerateSnapshot missing my_heaviest_fork",
  2181. );
  2182. // Now test successful initialization.
  2183. assert!(remove_file(&test_state.wen_restart_proto_path).is_ok());
  2184. // Test the case where the file is not found.
  2185. let mut vote = TowerSync::from(vec![(test_state.last_voted_fork_slots[0], 1)]);
  2186. vote.hash = last_vote_bankhash;
  2187. let last_vote = VoteTransaction::from(vote);
  2188. assert_eq!(
  2189. initialize(
  2190. &test_state.wen_restart_proto_path,
  2191. last_vote.clone(),
  2192. test_state.blockstore.clone()
  2193. )
  2194. .unwrap(),
  2195. (
  2196. WenRestartProgressInternalState::Init {
  2197. last_voted_fork_slots: test_state.last_voted_fork_slots.clone(),
  2198. last_vote_bankhash
  2199. },
  2200. WenRestartProgress {
  2201. state: RestartState::Init.into(),
  2202. ..Default::default()
  2203. }
  2204. )
  2205. );
  2206. let progress = WenRestartProgress {
  2207. state: RestartState::Init.into(),
  2208. my_last_voted_fork_slots: Some(LastVotedForkSlotsRecord {
  2209. last_voted_fork_slots: test_state.last_voted_fork_slots.clone(),
  2210. last_vote_bankhash: last_vote_bankhash.to_string(),
  2211. shred_version: SHRED_VERSION as u32,
  2212. wallclock: 0,
  2213. }),
  2214. ..Default::default()
  2215. };
  2216. assert!(write_wen_restart_records(&test_state.wen_restart_proto_path, &progress,).is_ok());
  2217. assert_eq!(
  2218. initialize(
  2219. &test_state.wen_restart_proto_path,
  2220. last_vote.clone(),
  2221. test_state.blockstore.clone()
  2222. )
  2223. .unwrap(),
  2224. (
  2225. WenRestartProgressInternalState::Init {
  2226. last_voted_fork_slots: test_state.last_voted_fork_slots.clone(),
  2227. last_vote_bankhash,
  2228. },
  2229. progress
  2230. )
  2231. );
  2232. let progress = WenRestartProgress {
  2233. state: RestartState::LastVotedForkSlots.into(),
  2234. my_last_voted_fork_slots: Some(LastVotedForkSlotsRecord {
  2235. last_voted_fork_slots: test_state.last_voted_fork_slots.clone(),
  2236. last_vote_bankhash: last_vote_bankhash.to_string(),
  2237. shred_version: SHRED_VERSION as u32,
  2238. wallclock: 0,
  2239. }),
  2240. ..Default::default()
  2241. };
  2242. assert!(write_wen_restart_records(&test_state.wen_restart_proto_path, &progress,).is_ok());
  2243. assert_eq!(
  2244. initialize(
  2245. &test_state.wen_restart_proto_path,
  2246. last_vote.clone(),
  2247. test_state.blockstore.clone()
  2248. )
  2249. .unwrap(),
  2250. (
  2251. WenRestartProgressInternalState::LastVotedForkSlots {
  2252. last_voted_fork_slots: test_state.last_voted_fork_slots.clone(),
  2253. aggregate_final_result: None,
  2254. },
  2255. progress
  2256. )
  2257. );
  2258. let progress = WenRestartProgress {
  2259. state: RestartState::HeaviestFork.into(),
  2260. my_heaviest_fork: Some(HeaviestForkRecord {
  2261. slot: 0,
  2262. bankhash: Hash::new_unique().to_string(),
  2263. total_active_stake: 0,
  2264. shred_version: SHRED_VERSION as u32,
  2265. wallclock: 0,
  2266. from: Pubkey::new_unique().to_string(),
  2267. }),
  2268. last_voted_fork_slots_aggregate: Some(LastVotedForkSlotsAggregateRecord {
  2269. received: HashMap::new(),
  2270. final_result: Some(LastVotedForkSlotsAggregateFinal {
  2271. slots_stake_map: HashMap::new(),
  2272. epoch_infos: vec![
  2273. LastVotedForkSlotsEpochInfoRecord {
  2274. epoch: 1,
  2275. total_stake: 1000,
  2276. actively_voting_stake: 800,
  2277. actively_voting_for_this_epoch_stake: 800,
  2278. },
  2279. LastVotedForkSlotsEpochInfoRecord {
  2280. epoch: 2,
  2281. total_stake: 1000,
  2282. actively_voting_stake: 900,
  2283. actively_voting_for_this_epoch_stake: 900,
  2284. },
  2285. ],
  2286. }),
  2287. }),
  2288. ..Default::default()
  2289. };
  2290. assert!(write_wen_restart_records(&test_state.wen_restart_proto_path, &progress,).is_ok());
  2291. assert_eq!(
  2292. initialize(
  2293. &test_state.wen_restart_proto_path,
  2294. last_vote.clone(),
  2295. test_state.blockstore.clone()
  2296. )
  2297. .unwrap(),
  2298. (
  2299. WenRestartProgressInternalState::FindHeaviestFork {
  2300. aggregate_final_result: LastVotedForkSlotsFinalResult {
  2301. slots_stake_map: HashMap::new(),
  2302. epoch_info_vec: vec![
  2303. LastVotedForkSlotsEpochInfo {
  2304. epoch: 1,
  2305. total_stake: 1000,
  2306. actively_voting_stake: 800,
  2307. actively_voting_for_this_epoch_stake: 800,
  2308. },
  2309. LastVotedForkSlotsEpochInfo {
  2310. epoch: 2,
  2311. total_stake: 1000,
  2312. actively_voting_stake: 900,
  2313. actively_voting_for_this_epoch_stake: 900,
  2314. }
  2315. ],
  2316. },
  2317. my_heaviest_fork: progress.my_heaviest_fork.clone(),
  2318. },
  2319. progress
  2320. )
  2321. );
  2322. let progress = WenRestartProgress {
  2323. state: RestartState::GenerateSnapshot.into(),
  2324. my_heaviest_fork: Some(HeaviestForkRecord {
  2325. slot: 0,
  2326. bankhash: Hash::new_unique().to_string(),
  2327. total_active_stake: 0,
  2328. shred_version: SHRED_VERSION as u32,
  2329. wallclock: 0,
  2330. from: Pubkey::new_unique().to_string(),
  2331. }),
  2332. my_snapshot: Some(GenerateSnapshotRecord {
  2333. slot: 0,
  2334. bankhash: Hash::new_unique().to_string(),
  2335. shred_version: SHRED_VERSION as u32,
  2336. path: "/path/to/snapshot".to_string(),
  2337. }),
  2338. ..Default::default()
  2339. };
  2340. assert!(write_wen_restart_records(&test_state.wen_restart_proto_path, &progress,).is_ok());
  2341. assert_eq!(
  2342. initialize(
  2343. &test_state.wen_restart_proto_path,
  2344. VoteTransaction::from(Vote::new(last_voted_fork_slots.clone(), last_vote_bankhash)),
  2345. test_state.blockstore.clone()
  2346. )
  2347. .unwrap(),
  2348. (
  2349. WenRestartProgressInternalState::GenerateSnapshot {
  2350. my_heaviest_fork_slot: 0,
  2351. my_snapshot: progress.my_snapshot.clone(),
  2352. },
  2353. progress,
  2354. )
  2355. );
  2356. let last_vote_slot = test_state.last_voted_fork_slots[0];
  2357. let snapshot_slot_hash = Hash::new_unique();
  2358. let progress = WenRestartProgress {
  2359. state: RestartState::Done.into(),
  2360. my_last_voted_fork_slots: Some(LastVotedForkSlotsRecord {
  2361. last_voted_fork_slots: test_state.last_voted_fork_slots.clone(),
  2362. last_vote_bankhash: last_vote_bankhash.to_string(),
  2363. shred_version: SHRED_VERSION as u32,
  2364. wallclock: 0,
  2365. }),
  2366. my_heaviest_fork: Some(HeaviestForkRecord {
  2367. slot: last_vote_slot,
  2368. bankhash: snapshot_slot_hash.to_string(),
  2369. total_active_stake: 0,
  2370. shred_version: SHRED_VERSION as u32,
  2371. wallclock: 0,
  2372. from: Pubkey::new_unique().to_string(),
  2373. }),
  2374. my_snapshot: Some(GenerateSnapshotRecord {
  2375. slot: last_vote_slot,
  2376. bankhash: snapshot_slot_hash.to_string(),
  2377. shred_version: SHRED_VERSION as u32,
  2378. path: "/path/to/snapshot".to_string(),
  2379. }),
  2380. ..Default::default()
  2381. };
  2382. assert!(write_wen_restart_records(&test_state.wen_restart_proto_path, &progress,).is_ok());
  2383. assert_eq!(
  2384. initialize(
  2385. &test_state.wen_restart_proto_path,
  2386. VoteTransaction::from(Vote::new(last_voted_fork_slots, last_vote_bankhash)),
  2387. test_state.blockstore.clone()
  2388. )
  2389. .unwrap(),
  2390. (
  2391. WenRestartProgressInternalState::Done {
  2392. slot: last_vote_slot,
  2393. hash: snapshot_slot_hash,
  2394. shred_version: SHRED_VERSION,
  2395. },
  2396. progress
  2397. )
  2398. );
  2399. }
  2400. #[test]
  2401. fn test_wen_restart_send_last_voted_fork_failures() {
  2402. let ledger_path = get_tmp_ledger_path_auto_delete!();
  2403. let test_state = wen_restart_test_init(&ledger_path);
  2404. let progress = wen_restart_proto::WenRestartProgress {
  2405. state: RestartState::Init.into(),
  2406. ..Default::default()
  2407. };
  2408. let original_progress = progress.clone();
  2409. assert_eq!(
  2410. send_restart_last_voted_fork_slots(
  2411. test_state.cluster_info.clone(),
  2412. &[],
  2413. Hash::new_unique(),
  2414. )
  2415. .err()
  2416. .unwrap()
  2417. .to_string(),
  2418. "Last voted fork cannot be empty"
  2419. );
  2420. assert_eq!(progress, original_progress);
  2421. let last_vote_bankhash = Hash::new_unique();
  2422. let last_voted_fork_slots = test_state.last_voted_fork_slots.clone();
  2423. wen_restart_test_succeed_after_failure(
  2424. test_state,
  2425. last_vote_bankhash,
  2426. WenRestartProgress {
  2427. state: RestartState::LastVotedForkSlots.into(),
  2428. my_last_voted_fork_slots: Some(LastVotedForkSlotsRecord {
  2429. last_voted_fork_slots,
  2430. last_vote_bankhash: last_vote_bankhash.to_string(),
  2431. shred_version: SHRED_VERSION as u32,
  2432. wallclock: 0,
  2433. }),
  2434. last_voted_fork_slots_aggregate: Some(LastVotedForkSlotsAggregateRecord {
  2435. received: HashMap::new(),
  2436. final_result: None,
  2437. }),
  2438. ..Default::default()
  2439. },
  2440. );
  2441. }
  2442. #[test]
  2443. fn test_write_wen_restart_records_failure() {
  2444. let ledger_path = get_tmp_ledger_path_auto_delete!();
  2445. let test_state = wen_restart_test_init(&ledger_path);
  2446. let progress = wen_restart_proto::WenRestartProgress {
  2447. state: RestartState::Init.into(),
  2448. ..Default::default()
  2449. };
  2450. assert!(write_wen_restart_records(&test_state.wen_restart_proto_path, &progress).is_ok());
  2451. change_proto_file_readonly(&test_state.wen_restart_proto_path, true);
  2452. assert_eq!(
  2453. write_wen_restart_records(&test_state.wen_restart_proto_path, &progress)
  2454. .unwrap_err()
  2455. .downcast::<std::io::Error>()
  2456. .unwrap()
  2457. .kind(),
  2458. std::io::ErrorKind::PermissionDenied,
  2459. );
  2460. change_proto_file_readonly(&test_state.wen_restart_proto_path, false);
  2461. assert!(write_wen_restart_records(&test_state.wen_restart_proto_path, &progress).is_ok());
  2462. let last_voted_fork_slots = test_state.last_voted_fork_slots.clone();
  2463. let last_vote_bankhash = Hash::new_unique();
  2464. wen_restart_test_succeed_after_failure(
  2465. test_state,
  2466. last_vote_bankhash,
  2467. WenRestartProgress {
  2468. state: RestartState::LastVotedForkSlots.into(),
  2469. my_last_voted_fork_slots: Some(LastVotedForkSlotsRecord {
  2470. last_voted_fork_slots,
  2471. last_vote_bankhash: last_vote_bankhash.to_string(),
  2472. shred_version: SHRED_VERSION as u32,
  2473. wallclock: 0,
  2474. }),
  2475. last_voted_fork_slots_aggregate: Some(LastVotedForkSlotsAggregateRecord {
  2476. received: HashMap::new(),
  2477. final_result: None,
  2478. }),
  2479. ..Default::default()
  2480. },
  2481. );
  2482. }
  2483. #[test]
  2484. fn test_wen_restart_aggregate_last_voted_fork_stop_and_restart() {
  2485. let ledger_path = get_tmp_ledger_path_auto_delete!();
  2486. let test_state = wen_restart_test_init(&ledger_path);
  2487. let last_vote_slot: Slot = test_state.last_voted_fork_slots[0];
  2488. let last_vote_bankhash = Hash::new_unique();
  2489. let start_time = timestamp();
  2490. assert!(write_wen_restart_records(
  2491. &test_state.wen_restart_proto_path,
  2492. &WenRestartProgress {
  2493. state: RestartState::LastVotedForkSlots.into(),
  2494. my_last_voted_fork_slots: Some(LastVotedForkSlotsRecord {
  2495. last_voted_fork_slots: test_state.last_voted_fork_slots.clone(),
  2496. last_vote_bankhash: last_vote_bankhash.to_string(),
  2497. shred_version: SHRED_VERSION as u32,
  2498. wallclock: start_time,
  2499. }),
  2500. last_voted_fork_slots_aggregate: Some(LastVotedForkSlotsAggregateRecord {
  2501. received: HashMap::new(),
  2502. final_result: None,
  2503. }),
  2504. ..Default::default()
  2505. }
  2506. )
  2507. .is_ok());
  2508. let mut rng = rand::thread_rng();
  2509. let mut expected_messages = HashMap::new();
  2510. let expected_slots_to_repair: Vec<Slot> =
  2511. (last_vote_slot + 1..last_vote_slot + 3).collect();
  2512. let mut last_voted_fork_slots_from_others = test_state.last_voted_fork_slots.clone();
  2513. last_voted_fork_slots_from_others.reverse();
  2514. last_voted_fork_slots_from_others.append(&mut expected_slots_to_repair.clone());
  2515. let progress = WenRestartProgress {
  2516. state: RestartState::LastVotedForkSlots.into(),
  2517. my_last_voted_fork_slots: Some(LastVotedForkSlotsRecord {
  2518. last_voted_fork_slots: test_state.last_voted_fork_slots.clone(),
  2519. last_vote_bankhash: last_vote_bankhash.to_string(),
  2520. shred_version: SHRED_VERSION as u32,
  2521. wallclock: start_time,
  2522. }),
  2523. ..Default::default()
  2524. };
  2525. let validators_to_take: usize =
  2526. (WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT * TOTAL_VALIDATOR_COUNT as u64 / 100 - 1)
  2527. .try_into()
  2528. .unwrap();
  2529. for keypairs in test_state
  2530. .validator_voting_keypairs
  2531. .iter()
  2532. .take(validators_to_take)
  2533. {
  2534. let wen_restart_proto_path_clone = test_state.wen_restart_proto_path.clone();
  2535. let cluster_info_clone = test_state.cluster_info.clone();
  2536. let bank_forks_clone = test_state.bank_forks.clone();
  2537. let blockstore_clone = test_state.blockstore.clone();
  2538. let exit = Arc::new(AtomicBool::new(false));
  2539. let exit_clone = exit.clone();
  2540. let mut progress_clone = progress.clone();
  2541. let last_voted_fork_slots = test_state.last_voted_fork_slots.clone();
  2542. let wen_restart_thread_handle = Builder::new()
  2543. .name("solana-wen-restart".to_string())
  2544. .spawn(move || {
  2545. assert!(aggregate_restart_last_voted_fork_slots(
  2546. &wen_restart_proto_path_clone,
  2547. WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT,
  2548. cluster_info_clone,
  2549. &last_voted_fork_slots,
  2550. bank_forks_clone,
  2551. blockstore_clone,
  2552. Arc::new(RwLock::new(Vec::new())),
  2553. exit_clone,
  2554. &mut progress_clone,
  2555. )
  2556. .is_ok());
  2557. })
  2558. .unwrap();
  2559. let node_pubkey = keypairs.node_keypair.pubkey();
  2560. let node = ContactInfo::new_rand(&mut rng, Some(node_pubkey));
  2561. let last_vote_hash = Hash::new_unique();
  2562. let now = timestamp();
  2563. push_restart_last_voted_fork_slots(
  2564. test_state.cluster_info.clone(),
  2565. &node,
  2566. &last_voted_fork_slots_from_others,
  2567. &last_vote_hash,
  2568. &keypairs.node_keypair,
  2569. now,
  2570. );
  2571. expected_messages.insert(
  2572. node_pubkey.to_string(),
  2573. LastVotedForkSlotsRecord {
  2574. last_voted_fork_slots: last_voted_fork_slots_from_others.clone(),
  2575. last_vote_bankhash: last_vote_hash.to_string(),
  2576. shred_version: SHRED_VERSION as u32,
  2577. wallclock: now,
  2578. },
  2579. );
  2580. wait_on_expected_progress_with_timeout(
  2581. test_state.wen_restart_proto_path.clone(),
  2582. WenRestartProgress {
  2583. state: RestartState::LastVotedForkSlots.into(),
  2584. my_last_voted_fork_slots: Some(LastVotedForkSlotsRecord {
  2585. last_voted_fork_slots: test_state.last_voted_fork_slots.clone(),
  2586. last_vote_bankhash: last_vote_bankhash.to_string(),
  2587. shred_version: SHRED_VERSION as u32,
  2588. wallclock: start_time,
  2589. }),
  2590. last_voted_fork_slots_aggregate: Some(LastVotedForkSlotsAggregateRecord {
  2591. received: expected_messages.clone(),
  2592. final_result: None,
  2593. }),
  2594. ..Default::default()
  2595. },
  2596. );
  2597. exit.store(true, Ordering::Relaxed);
  2598. let _ = wen_restart_thread_handle.join();
  2599. }
  2600. // Simulating successful repair of missing blocks.
  2601. let _ = insert_slots_into_blockstore(
  2602. test_state.blockstore.clone(),
  2603. last_vote_slot,
  2604. &expected_slots_to_repair,
  2605. TICKS_PER_SLOT,
  2606. test_state.last_blockhash,
  2607. );
  2608. let last_voted_fork_slots = test_state.last_voted_fork_slots.clone();
  2609. wen_restart_test_succeed_after_failure(
  2610. test_state,
  2611. last_vote_bankhash,
  2612. WenRestartProgress {
  2613. state: RestartState::LastVotedForkSlots.into(),
  2614. my_last_voted_fork_slots: Some(LastVotedForkSlotsRecord {
  2615. last_voted_fork_slots,
  2616. last_vote_bankhash: last_vote_bankhash.to_string(),
  2617. shred_version: SHRED_VERSION as u32,
  2618. wallclock: start_time,
  2619. }),
  2620. last_voted_fork_slots_aggregate: Some(LastVotedForkSlotsAggregateRecord {
  2621. received: expected_messages,
  2622. final_result: None,
  2623. }),
  2624. ..Default::default()
  2625. },
  2626. );
  2627. }
  2628. #[test]
  2629. fn test_increment_and_write_wen_restart_records() {
  2630. agave_logger::setup();
  2631. let my_dir = TempDir::new().unwrap();
  2632. let mut wen_restart_proto_path = my_dir.path().to_path_buf();
  2633. wen_restart_proto_path.push("wen_restart_status.proto");
  2634. let last_vote_bankhash = Hash::new_unique();
  2635. let my_last_voted_fork_slots = Some(LastVotedForkSlotsRecord {
  2636. last_voted_fork_slots: vec![0, 1],
  2637. last_vote_bankhash: last_vote_bankhash.to_string(),
  2638. shred_version: 0,
  2639. wallclock: 0,
  2640. });
  2641. let last_voted_fork_slots_aggregate = Some(LastVotedForkSlotsAggregateRecord {
  2642. received: HashMap::new(),
  2643. final_result: Some(LastVotedForkSlotsAggregateFinal {
  2644. slots_stake_map: vec![(0, 900), (1, 800)].into_iter().collect(),
  2645. epoch_infos: vec![LastVotedForkSlotsEpochInfoRecord {
  2646. epoch: 0,
  2647. total_stake: 2000,
  2648. actively_voting_stake: 900,
  2649. actively_voting_for_this_epoch_stake: 900,
  2650. }],
  2651. }),
  2652. });
  2653. let my_pubkey = Pubkey::new_unique();
  2654. let my_heaviest_fork = Some(HeaviestForkRecord {
  2655. slot: 1,
  2656. bankhash: Hash::default().to_string(),
  2657. total_active_stake: 900,
  2658. shred_version: SHRED_VERSION as u32,
  2659. wallclock: 0,
  2660. from: my_pubkey.to_string(),
  2661. });
  2662. let coordinator_heaviest_fork = Some(HeaviestForkRecord {
  2663. slot: 2,
  2664. bankhash: Hash::new_unique().to_string(),
  2665. total_active_stake: 800,
  2666. shred_version: SHRED_VERSION as u32,
  2667. wallclock: 0,
  2668. from: Pubkey::new_unique().to_string(),
  2669. });
  2670. let my_bankhash = Hash::new_unique();
  2671. let new_shred_version = SHRED_VERSION + 57;
  2672. let my_snapshot = Some(GenerateSnapshotRecord {
  2673. slot: 1,
  2674. bankhash: my_bankhash.to_string(),
  2675. path: "snapshot_1".to_string(),
  2676. shred_version: new_shred_version as u32,
  2677. });
  2678. let expected_slots_stake_map: HashMap<Slot, u64> =
  2679. vec![(0, 900), (1, 800)].into_iter().collect();
  2680. for (entrance_state, exit_state, entrance_progress, exit_progress) in [
  2681. (
  2682. WenRestartProgressInternalState::Init {
  2683. last_voted_fork_slots: vec![0, 1],
  2684. last_vote_bankhash,
  2685. },
  2686. WenRestartProgressInternalState::LastVotedForkSlots {
  2687. last_voted_fork_slots: vec![0, 1],
  2688. aggregate_final_result: None,
  2689. },
  2690. WenRestartProgress {
  2691. state: RestartState::LastVotedForkSlots.into(),
  2692. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2693. ..Default::default()
  2694. },
  2695. WenRestartProgress {
  2696. state: RestartState::LastVotedForkSlots.into(),
  2697. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2698. ..Default::default()
  2699. },
  2700. ),
  2701. (
  2702. WenRestartProgressInternalState::LastVotedForkSlots {
  2703. last_voted_fork_slots: vec![0, 1],
  2704. aggregate_final_result: Some(LastVotedForkSlotsFinalResult {
  2705. slots_stake_map: expected_slots_stake_map.clone(),
  2706. epoch_info_vec: vec![LastVotedForkSlotsEpochInfo {
  2707. epoch: 0,
  2708. total_stake: 2000,
  2709. actively_voting_stake: 900,
  2710. actively_voting_for_this_epoch_stake: 900,
  2711. }],
  2712. }),
  2713. },
  2714. WenRestartProgressInternalState::FindHeaviestFork {
  2715. aggregate_final_result: LastVotedForkSlotsFinalResult {
  2716. slots_stake_map: expected_slots_stake_map.clone(),
  2717. epoch_info_vec: vec![LastVotedForkSlotsEpochInfo {
  2718. epoch: 0,
  2719. total_stake: 2000,
  2720. actively_voting_stake: 900,
  2721. actively_voting_for_this_epoch_stake: 900,
  2722. }],
  2723. },
  2724. my_heaviest_fork: None,
  2725. },
  2726. WenRestartProgress {
  2727. state: RestartState::LastVotedForkSlots.into(),
  2728. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2729. last_voted_fork_slots_aggregate: last_voted_fork_slots_aggregate.clone(),
  2730. ..Default::default()
  2731. },
  2732. WenRestartProgress {
  2733. state: RestartState::HeaviestFork.into(),
  2734. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2735. last_voted_fork_slots_aggregate: last_voted_fork_slots_aggregate.clone(),
  2736. ..Default::default()
  2737. },
  2738. ),
  2739. (
  2740. WenRestartProgressInternalState::FindHeaviestFork {
  2741. aggregate_final_result: LastVotedForkSlotsFinalResult {
  2742. slots_stake_map: expected_slots_stake_map,
  2743. epoch_info_vec: vec![LastVotedForkSlotsEpochInfo {
  2744. epoch: 0,
  2745. total_stake: 2000,
  2746. actively_voting_stake: 900,
  2747. actively_voting_for_this_epoch_stake: 900,
  2748. }],
  2749. },
  2750. my_heaviest_fork: Some(HeaviestForkRecord {
  2751. slot: 1,
  2752. bankhash: Hash::default().to_string(),
  2753. total_active_stake: 900,
  2754. shred_version: SHRED_VERSION as u32,
  2755. wallclock: 0,
  2756. from: my_pubkey.to_string(),
  2757. }),
  2758. },
  2759. WenRestartProgressInternalState::HeaviestFork {
  2760. my_heaviest_fork_slot: 1,
  2761. my_heaviest_fork_hash: Hash::default(),
  2762. },
  2763. WenRestartProgress {
  2764. state: RestartState::HeaviestFork.into(),
  2765. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2766. last_voted_fork_slots_aggregate: last_voted_fork_slots_aggregate.clone(),
  2767. ..Default::default()
  2768. },
  2769. WenRestartProgress {
  2770. state: RestartState::HeaviestFork.into(),
  2771. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2772. last_voted_fork_slots_aggregate: last_voted_fork_slots_aggregate.clone(),
  2773. my_heaviest_fork: my_heaviest_fork.clone(),
  2774. ..Default::default()
  2775. },
  2776. ),
  2777. (
  2778. WenRestartProgressInternalState::HeaviestFork {
  2779. my_heaviest_fork_slot: 1,
  2780. my_heaviest_fork_hash: Hash::default(),
  2781. },
  2782. WenRestartProgressInternalState::GenerateSnapshot {
  2783. my_heaviest_fork_slot: 1,
  2784. my_snapshot: None,
  2785. },
  2786. WenRestartProgress {
  2787. state: RestartState::HeaviestFork.into(),
  2788. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2789. last_voted_fork_slots_aggregate: last_voted_fork_slots_aggregate.clone(),
  2790. my_heaviest_fork: my_heaviest_fork.clone(),
  2791. coordinator_heaviest_fork: coordinator_heaviest_fork.clone(),
  2792. ..Default::default()
  2793. },
  2794. WenRestartProgress {
  2795. state: RestartState::GenerateSnapshot.into(),
  2796. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2797. last_voted_fork_slots_aggregate: last_voted_fork_slots_aggregate.clone(),
  2798. my_heaviest_fork: my_heaviest_fork.clone(),
  2799. coordinator_heaviest_fork: coordinator_heaviest_fork.clone(),
  2800. ..Default::default()
  2801. },
  2802. ),
  2803. (
  2804. WenRestartProgressInternalState::GenerateSnapshot {
  2805. my_heaviest_fork_slot: 1,
  2806. my_snapshot: my_snapshot.clone(),
  2807. },
  2808. WenRestartProgressInternalState::Done {
  2809. slot: 1,
  2810. hash: my_bankhash,
  2811. shred_version: new_shred_version,
  2812. },
  2813. WenRestartProgress {
  2814. state: RestartState::HeaviestFork.into(),
  2815. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2816. last_voted_fork_slots_aggregate: last_voted_fork_slots_aggregate.clone(),
  2817. my_heaviest_fork: my_heaviest_fork.clone(),
  2818. coordinator_heaviest_fork: coordinator_heaviest_fork.clone(),
  2819. ..Default::default()
  2820. },
  2821. WenRestartProgress {
  2822. state: RestartState::Done.into(),
  2823. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2824. last_voted_fork_slots_aggregate: last_voted_fork_slots_aggregate.clone(),
  2825. my_heaviest_fork: my_heaviest_fork.clone(),
  2826. coordinator_heaviest_fork,
  2827. my_snapshot: my_snapshot.clone(),
  2828. ..Default::default()
  2829. },
  2830. ),
  2831. ] {
  2832. let mut progress = entrance_progress;
  2833. let state = increment_and_write_wen_restart_records(
  2834. &wen_restart_proto_path,
  2835. entrance_state,
  2836. &mut progress,
  2837. )
  2838. .unwrap();
  2839. assert_eq!(&state, &exit_state);
  2840. assert_eq!(&progress, &exit_progress);
  2841. }
  2842. let mut progress = WenRestartProgress {
  2843. state: RestartState::Done.into(),
  2844. my_last_voted_fork_slots: my_last_voted_fork_slots.clone(),
  2845. last_voted_fork_slots_aggregate: last_voted_fork_slots_aggregate.clone(),
  2846. ..Default::default()
  2847. };
  2848. assert_eq!(
  2849. increment_and_write_wen_restart_records(
  2850. &wen_restart_proto_path,
  2851. WenRestartProgressInternalState::Done {
  2852. slot: 1,
  2853. hash: my_bankhash,
  2854. shred_version: new_shred_version,
  2855. },
  2856. &mut progress
  2857. )
  2858. .unwrap_err()
  2859. .downcast::<WenRestartError>()
  2860. .unwrap(),
  2861. WenRestartError::UnexpectedState(RestartState::Done),
  2862. );
  2863. }
  2864. #[test]
  2865. fn test_find_heaviest_fork_failures() {
  2866. agave_logger::setup();
  2867. let ledger_path = get_tmp_ledger_path_auto_delete!();
  2868. let exit = Arc::new(AtomicBool::new(false));
  2869. let test_state = wen_restart_test_init(&ledger_path);
  2870. let last_vote_slot = test_state.last_voted_fork_slots[0];
  2871. let slot_with_no_block = 1;
  2872. // This fails because corresponding block is not found, which is wrong, we should have
  2873. // repaired all eligible blocks when we exit LastVotedForkSlots state.
  2874. assert_eq!(
  2875. find_heaviest_fork(
  2876. LastVotedForkSlotsFinalResult {
  2877. slots_stake_map: vec![(0, 900), (slot_with_no_block, 800)]
  2878. .into_iter()
  2879. .collect(),
  2880. epoch_info_vec: vec![LastVotedForkSlotsEpochInfo {
  2881. epoch: 0,
  2882. total_stake: 1000,
  2883. actively_voting_stake: 900,
  2884. actively_voting_for_this_epoch_stake: 900,
  2885. }],
  2886. },
  2887. test_state.bank_forks.clone(),
  2888. test_state.blockstore.clone(),
  2889. exit.clone(),
  2890. )
  2891. .unwrap_err()
  2892. .downcast::<WenRestartError>()
  2893. .unwrap(),
  2894. WenRestartError::BlockNotFound(slot_with_no_block),
  2895. );
  2896. // The following fails because we expect to see the first slot in slots_stake_map doesn't chain to local root.
  2897. assert_eq!(
  2898. find_heaviest_fork(
  2899. LastVotedForkSlotsFinalResult {
  2900. slots_stake_map: vec![(3, 900)].into_iter().collect(),
  2901. epoch_info_vec: vec![LastVotedForkSlotsEpochInfo {
  2902. epoch: 0,
  2903. total_stake: 1000,
  2904. actively_voting_stake: 900,
  2905. actively_voting_for_this_epoch_stake: 900,
  2906. }],
  2907. },
  2908. test_state.bank_forks.clone(),
  2909. test_state.blockstore.clone(),
  2910. exit.clone(),
  2911. )
  2912. .unwrap_err()
  2913. .downcast::<WenRestartError>()
  2914. .unwrap(),
  2915. WenRestartError::BlockNotLinkedToExpectedParent(3, Some(2), 0),
  2916. );
  2917. // The following fails because we expect to see the some slot in slots_stake_map doesn't chain to the
  2918. // one before it.
  2919. assert_eq!(
  2920. find_heaviest_fork(
  2921. LastVotedForkSlotsFinalResult {
  2922. slots_stake_map: vec![(2, 900), (5, 900)].into_iter().collect(),
  2923. epoch_info_vec: vec![LastVotedForkSlotsEpochInfo {
  2924. epoch: 0,
  2925. total_stake: 1000,
  2926. actively_voting_stake: 900,
  2927. actively_voting_for_this_epoch_stake: 900,
  2928. }],
  2929. },
  2930. test_state.bank_forks.clone(),
  2931. test_state.blockstore.clone(),
  2932. exit.clone(),
  2933. )
  2934. .unwrap_err()
  2935. .downcast::<WenRestartError>()
  2936. .unwrap(),
  2937. WenRestartError::BlockNotLinkedToExpectedParent(5, Some(4), 2),
  2938. );
  2939. // The following fails because the new slot is not full.
  2940. let not_full_slot = last_vote_slot + 5;
  2941. let parent_slot = last_vote_slot;
  2942. let num_slots = (not_full_slot - parent_slot).max(1);
  2943. let mut entries = create_ticks(num_slots * TICKS_PER_SLOT, 0, test_state.last_blockhash);
  2944. assert!(entries.len() > 1);
  2945. entries.pop();
  2946. let shreds = entries_to_test_shreds(&entries, not_full_slot, parent_slot, false, 0);
  2947. test_state
  2948. .blockstore
  2949. .insert_shreds(shreds, None, false)
  2950. .unwrap();
  2951. let mut slots_stake_map: HashMap<Slot, u64> = test_state
  2952. .last_voted_fork_slots
  2953. .iter()
  2954. .map(|slot| (*slot, 900))
  2955. .collect();
  2956. slots_stake_map.insert(not_full_slot, 800);
  2957. assert_eq!(
  2958. find_heaviest_fork(
  2959. LastVotedForkSlotsFinalResult {
  2960. slots_stake_map,
  2961. epoch_info_vec: vec![
  2962. LastVotedForkSlotsEpochInfo {
  2963. epoch: 0,
  2964. total_stake: 1000,
  2965. actively_voting_stake: 900,
  2966. actively_voting_for_this_epoch_stake: 900,
  2967. },
  2968. LastVotedForkSlotsEpochInfo {
  2969. epoch: 1,
  2970. total_stake: 1000,
  2971. actively_voting_stake: 900,
  2972. actively_voting_for_this_epoch_stake: 900,
  2973. },
  2974. ],
  2975. },
  2976. test_state.bank_forks.clone(),
  2977. test_state.blockstore.clone(),
  2978. exit.clone(),
  2979. )
  2980. .unwrap_err()
  2981. .downcast::<WenRestartError>()
  2982. .unwrap(),
  2983. WenRestartError::BlockNotFull(not_full_slot)
  2984. );
  2985. // The following fails because we added two blocks at the end of the chain, they are full in blockstore
  2986. // but the parent of the first one is missing.
  2987. let missing_parent = last_vote_slot.saturating_add(1);
  2988. let new_slot = last_vote_slot.saturating_add(2);
  2989. let new_hash = insert_slots_into_blockstore(
  2990. test_state.blockstore.clone(),
  2991. last_vote_slot,
  2992. &[missing_parent],
  2993. 1,
  2994. test_state.last_blockhash,
  2995. );
  2996. let _ = insert_slots_into_blockstore(
  2997. test_state.blockstore.clone(),
  2998. missing_parent,
  2999. &[new_slot],
  3000. TICKS_PER_SLOT,
  3001. new_hash,
  3002. );
  3003. let mut slots_stake_map: HashMap<Slot, u64> = test_state
  3004. .last_voted_fork_slots
  3005. .iter()
  3006. .map(|slot| (*slot, 900))
  3007. .collect();
  3008. slots_stake_map.insert(missing_parent, 800);
  3009. slots_stake_map.insert(new_slot, 800);
  3010. assert_eq!(
  3011. find_heaviest_fork(
  3012. LastVotedForkSlotsFinalResult {
  3013. slots_stake_map,
  3014. epoch_info_vec: vec![
  3015. LastVotedForkSlotsEpochInfo {
  3016. epoch: 0,
  3017. total_stake: 1000,
  3018. actively_voting_stake: 900,
  3019. actively_voting_for_this_epoch_stake: 900,
  3020. },
  3021. LastVotedForkSlotsEpochInfo {
  3022. epoch: 1,
  3023. total_stake: 1000,
  3024. actively_voting_stake: 900,
  3025. actively_voting_for_this_epoch_stake: 900,
  3026. },
  3027. ],
  3028. },
  3029. test_state.bank_forks.clone(),
  3030. test_state.blockstore.clone(),
  3031. exit.clone(),
  3032. )
  3033. .unwrap_err()
  3034. .downcast::<WenRestartError>()
  3035. .unwrap(),
  3036. WenRestartError::BlockNotFrozenAfterReplay(
  3037. missing_parent,
  3038. Some("invalid block error: incomplete block".to_string())
  3039. ),
  3040. );
  3041. }
  3042. fn start_aggregate_heaviest_fork_thread(
  3043. test_state: &WenRestartTestInitResult,
  3044. heaviest_fork_slot: Slot,
  3045. heaviest_fork_bankhash: Hash,
  3046. exit: Arc<AtomicBool>,
  3047. expected_error: Option<WenRestartError>,
  3048. ) -> std::thread::JoinHandle<()> {
  3049. let progress = wen_restart_proto::WenRestartProgress {
  3050. state: RestartState::HeaviestFork.into(),
  3051. my_heaviest_fork: Some(HeaviestForkRecord {
  3052. slot: heaviest_fork_slot,
  3053. bankhash: heaviest_fork_bankhash.to_string(),
  3054. total_active_stake: WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT
  3055. .saturating_mul(TOTAL_VALIDATOR_COUNT as u64),
  3056. shred_version: SHRED_VERSION as u32,
  3057. wallclock: 0,
  3058. from: test_state.cluster_info.id().to_string(),
  3059. }),
  3060. ..Default::default()
  3061. };
  3062. let wen_restart_path = test_state.wen_restart_proto_path.clone();
  3063. let cluster_info = test_state.cluster_info.clone();
  3064. let bank_forks = test_state.bank_forks.clone();
  3065. Builder::new()
  3066. .name("solana-wen-restart-aggregate-heaviest-fork".to_string())
  3067. .spawn(move || {
  3068. let result = aggregate_restart_heaviest_fork(
  3069. &wen_restart_path,
  3070. cluster_info,
  3071. bank_forks,
  3072. exit,
  3073. &mut progress.clone(),
  3074. );
  3075. if let Some(expected_error) = expected_error {
  3076. assert_eq!(
  3077. result.unwrap_err().downcast::<WenRestartError>().unwrap(),
  3078. expected_error
  3079. );
  3080. } else {
  3081. assert!(result.is_ok());
  3082. }
  3083. })
  3084. .unwrap()
  3085. }
  3086. #[test]
  3087. fn test_aggregate_heaviest_fork() {
  3088. let ledger_path = get_tmp_ledger_path_auto_delete!();
  3089. let test_state = wen_restart_test_init(&ledger_path);
  3090. let heaviest_fork_slot = test_state.last_voted_fork_slots[0] + 3;
  3091. let heaviest_fork_bankhash = Hash::new_unique();
  3092. let expected_active_stake = (WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT
  3093. - NON_CONFORMING_VALIDATOR_PERCENT)
  3094. * TOTAL_VALIDATOR_COUNT as u64;
  3095. let exit = Arc::new(AtomicBool::new(false));
  3096. let thread = start_aggregate_heaviest_fork_thread(
  3097. &test_state,
  3098. heaviest_fork_slot,
  3099. heaviest_fork_bankhash,
  3100. exit.clone(),
  3101. None,
  3102. );
  3103. let validators_to_take: usize =
  3104. (WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT * TOTAL_VALIDATOR_COUNT as u64 / 100 - 1)
  3105. .try_into()
  3106. .unwrap();
  3107. for keypair in test_state
  3108. .validator_voting_keypairs
  3109. .iter()
  3110. .take(validators_to_take)
  3111. {
  3112. let node_pubkey = keypair.node_keypair.pubkey();
  3113. let node = ContactInfo::new_rand(&mut rand::thread_rng(), Some(node_pubkey));
  3114. let now = timestamp();
  3115. push_restart_heaviest_fork(
  3116. test_state.cluster_info.clone(),
  3117. &node,
  3118. heaviest_fork_slot,
  3119. &heaviest_fork_bankhash,
  3120. expected_active_stake,
  3121. &keypair.node_keypair,
  3122. now,
  3123. );
  3124. }
  3125. exit.store(true, Ordering::Relaxed);
  3126. assert!(thread.join().is_ok());
  3127. }
  3128. #[test]
  3129. fn test_generate_snapshot() {
  3130. agave_logger::setup();
  3131. let ledger_path = get_tmp_ledger_path_auto_delete!();
  3132. let test_state = wen_restart_test_init(&ledger_path);
  3133. let bank_snapshots_dir = tempfile::TempDir::new().unwrap();
  3134. let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
  3135. let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
  3136. let snapshot_config = SnapshotConfig {
  3137. bank_snapshots_dir: bank_snapshots_dir.as_ref().to_path_buf(),
  3138. full_snapshot_archives_dir: full_snapshot_archives_dir.as_ref().to_path_buf(),
  3139. incremental_snapshot_archives_dir: incremental_snapshot_archives_dir
  3140. .as_ref()
  3141. .to_path_buf(),
  3142. usage: SnapshotUsage::LoadAndGenerate,
  3143. ..Default::default()
  3144. };
  3145. let old_root_bank = test_state.bank_forks.read().unwrap().root_bank();
  3146. let old_root_slot = old_root_bank.slot();
  3147. let new_root_slot = test_state.last_voted_fork_slots[1];
  3148. let exit = Arc::new(AtomicBool::new(false));
  3149. let mut slots = test_state.last_voted_fork_slots.clone();
  3150. slots.reverse();
  3151. let old_last_vote_bankhash = find_bankhash_of_heaviest_fork(
  3152. new_root_slot,
  3153. slots,
  3154. test_state.blockstore.clone(),
  3155. test_state.bank_forks.clone(),
  3156. &exit,
  3157. )
  3158. .unwrap();
  3159. // We don't have any full snapshot, so if we call generate_snapshot() on the old
  3160. // root bank now, it should generate a full snapshot.
  3161. let (abs_request_sender, _abs_request_receiver) = unbounded();
  3162. let snapshot_controller =
  3163. SnapshotController::new(abs_request_sender.clone(), snapshot_config, new_root_slot);
  3164. let snapshot_config = snapshot_controller.snapshot_config();
  3165. let generated_record = generate_snapshot(
  3166. test_state.bank_forks.clone(),
  3167. &snapshot_controller,
  3168. &AbsStatus::new_for_tests(),
  3169. test_state.genesis_config_hash,
  3170. old_root_slot,
  3171. )
  3172. .unwrap();
  3173. assert!(Path::new(&generated_record.path).exists());
  3174. assert!(generated_record.path.starts_with(
  3175. snapshot_config
  3176. .full_snapshot_archives_dir
  3177. .to_string_lossy()
  3178. .as_ref()
  3179. ));
  3180. let generated_record = generate_snapshot(
  3181. test_state.bank_forks.clone(),
  3182. &snapshot_controller,
  3183. &AbsStatus::new_for_tests(),
  3184. test_state.genesis_config_hash,
  3185. new_root_slot,
  3186. )
  3187. .unwrap();
  3188. let new_root_bankhash = test_state
  3189. .bank_forks
  3190. .read()
  3191. .unwrap()
  3192. .get(new_root_slot)
  3193. .unwrap()
  3194. .hash();
  3195. assert_ne!(old_last_vote_bankhash, new_root_bankhash);
  3196. let new_shred_version = generated_record.shred_version;
  3197. assert_ne!(new_shred_version, SHRED_VERSION as u32);
  3198. let snapshot_hash = Hash::from_str(
  3199. generated_record
  3200. .path
  3201. .split('-')
  3202. .next_back()
  3203. .unwrap()
  3204. .split('.')
  3205. .next()
  3206. .unwrap(),
  3207. )
  3208. .unwrap();
  3209. assert_eq!(
  3210. generated_record,
  3211. GenerateSnapshotRecord {
  3212. slot: new_root_slot,
  3213. bankhash: new_root_bankhash.to_string(),
  3214. shred_version: new_shred_version,
  3215. path: build_incremental_snapshot_archive_path(
  3216. &snapshot_config.incremental_snapshot_archives_dir,
  3217. old_root_slot,
  3218. new_root_slot,
  3219. &SnapshotHash(snapshot_hash),
  3220. snapshot_config.archive_format,
  3221. )
  3222. .display()
  3223. .to_string(),
  3224. },
  3225. );
  3226. // Now generate a snapshot for older slot, it should fail because we already
  3227. // have a full snapshot.
  3228. assert_eq!(
  3229. generate_snapshot(
  3230. test_state.bank_forks.clone(),
  3231. &snapshot_controller,
  3232. &AbsStatus::new_for_tests(),
  3233. test_state.genesis_config_hash,
  3234. old_root_slot,
  3235. )
  3236. .unwrap_err()
  3237. .downcast::<WenRestartError>()
  3238. .unwrap(),
  3239. WenRestartError::GenerateSnapshotWhenOneExists(
  3240. old_root_slot,
  3241. snapshot_config
  3242. .full_snapshot_archives_dir
  3243. .to_string_lossy()
  3244. .to_string()
  3245. ),
  3246. );
  3247. // fails if we already have an incremental snapshot (we just generated one at new_root_slot).
  3248. let older_slot = new_root_slot - 1;
  3249. assert_eq!(
  3250. generate_snapshot(
  3251. test_state.bank_forks.clone(),
  3252. &snapshot_controller,
  3253. &AbsStatus::new_for_tests(),
  3254. test_state.genesis_config_hash,
  3255. older_slot,
  3256. )
  3257. .unwrap_err()
  3258. .downcast::<WenRestartError>()
  3259. .unwrap(),
  3260. WenRestartError::FutureSnapshotExists(
  3261. older_slot,
  3262. new_root_slot,
  3263. snapshot_config
  3264. .incremental_snapshot_archives_dir
  3265. .to_string_lossy()
  3266. .to_string()
  3267. ),
  3268. );
  3269. // Generate snapshot for a slot without any block, it should fail.
  3270. let empty_slot = new_root_slot + 100;
  3271. assert_eq!(
  3272. generate_snapshot(
  3273. test_state.bank_forks.clone(),
  3274. &snapshot_controller,
  3275. &AbsStatus::new_for_tests(),
  3276. test_state.genesis_config_hash,
  3277. empty_slot,
  3278. )
  3279. .unwrap_err()
  3280. .downcast::<WenRestartError>()
  3281. .unwrap(),
  3282. WenRestartError::BlockNotFound(empty_slot),
  3283. );
  3284. // Now turn off snapshot generation, we should generate a full snapshot.
  3285. let snapshot_config = SnapshotConfig {
  3286. bank_snapshots_dir: bank_snapshots_dir.as_ref().to_path_buf(),
  3287. full_snapshot_archives_dir: full_snapshot_archives_dir.as_ref().to_path_buf(),
  3288. incremental_snapshot_archives_dir: incremental_snapshot_archives_dir
  3289. .as_ref()
  3290. .to_path_buf(),
  3291. usage: SnapshotUsage::LoadOnly,
  3292. ..Default::default()
  3293. };
  3294. let snapshot_controller =
  3295. SnapshotController::new(abs_request_sender.clone(), snapshot_config, new_root_slot);
  3296. let snapshot_config = snapshot_controller.snapshot_config();
  3297. let generated_record = generate_snapshot(
  3298. test_state.bank_forks.clone(),
  3299. &snapshot_controller,
  3300. &AbsStatus::new_for_tests(),
  3301. test_state.genesis_config_hash,
  3302. test_state.last_voted_fork_slots[0],
  3303. )
  3304. .unwrap();
  3305. assert!(Path::new(&generated_record.path).exists());
  3306. assert!(generated_record.path.starts_with(
  3307. snapshot_config
  3308. .full_snapshot_archives_dir
  3309. .to_string_lossy()
  3310. .as_ref()
  3311. ));
  3312. }
  3313. #[test]
  3314. fn test_return_ok_after_wait_is_done() {
  3315. let ledger_path = get_tmp_ledger_path_auto_delete!();
  3316. let test_state = wen_restart_test_init(&ledger_path);
  3317. let last_vote_slot = test_state.last_voted_fork_slots[0];
  3318. let last_vote_bankhash = Hash::new_unique();
  3319. let config = WenRestartConfig {
  3320. wen_restart_path: test_state.wen_restart_proto_path.clone(),
  3321. wen_restart_coordinator: test_state.wen_restart_coordinator,
  3322. last_vote: VoteTransaction::from(Vote::new(vec![last_vote_slot], last_vote_bankhash)),
  3323. blockstore: test_state.blockstore.clone(),
  3324. cluster_info: test_state.cluster_info.clone(),
  3325. bank_forks: test_state.bank_forks.clone(),
  3326. wen_restart_repair_slots: Some(Arc::new(RwLock::new(Vec::new()))),
  3327. wait_for_supermajority_threshold_percent: 80,
  3328. snapshot_controller: None,
  3329. abs_status: AbsStatus::new_for_tests(),
  3330. genesis_config_hash: test_state.genesis_config_hash,
  3331. exit: Arc::new(AtomicBool::new(false)),
  3332. };
  3333. assert!(write_wen_restart_records(
  3334. &test_state.wen_restart_proto_path,
  3335. &WenRestartProgress {
  3336. state: RestartState::Done.into(),
  3337. ..Default::default()
  3338. }
  3339. )
  3340. .is_ok());
  3341. assert_eq!(
  3342. wait_for_wen_restart(config.clone())
  3343. .unwrap_err()
  3344. .downcast::<WenRestartError>()
  3345. .unwrap(),
  3346. WenRestartError::MissingSnapshotInProtobuf
  3347. );
  3348. assert!(write_wen_restart_records(
  3349. &test_state.wen_restart_proto_path,
  3350. &WenRestartProgress {
  3351. state: RestartState::Done.into(),
  3352. my_snapshot: Some(GenerateSnapshotRecord {
  3353. slot: 0,
  3354. bankhash: Hash::new_unique().to_string(),
  3355. shred_version: SHRED_VERSION as u32,
  3356. path: "snapshot".to_string(),
  3357. }),
  3358. ..Default::default()
  3359. }
  3360. )
  3361. .is_ok());
  3362. assert!(wait_for_wen_restart(config).is_ok());
  3363. }
  3364. #[test]
  3365. fn test_receive_restart_heaviest_fork() {
  3366. let mut rng = rand::thread_rng();
  3367. let coordinator_keypair = Keypair::new();
  3368. let node_keypair = Arc::new(Keypair::new());
  3369. let cluster_info = Arc::new(ClusterInfo::new(
  3370. {
  3371. let mut contact_info =
  3372. ContactInfo::new_localhost(&node_keypair.pubkey(), timestamp());
  3373. contact_info.set_shred_version(SHRED_VERSION);
  3374. contact_info
  3375. },
  3376. node_keypair.clone(),
  3377. SocketAddrSpace::Unspecified,
  3378. ));
  3379. let exit = Arc::new(AtomicBool::new(false));
  3380. let random_keypair = Keypair::new();
  3381. let random_node = ContactInfo::new_rand(&mut rng, Some(random_keypair.pubkey()));
  3382. let random_slot = 3;
  3383. let random_hash = Hash::new_unique();
  3384. push_restart_heaviest_fork(
  3385. cluster_info.clone(),
  3386. &random_node,
  3387. random_slot,
  3388. &random_hash,
  3389. 0,
  3390. &random_keypair,
  3391. timestamp(),
  3392. );
  3393. let coordinator_node = ContactInfo::new_rand(&mut rng, Some(coordinator_keypair.pubkey()));
  3394. let coordinator_slot = 6;
  3395. let coordinator_hash = Hash::new_unique();
  3396. push_restart_heaviest_fork(
  3397. cluster_info.clone(),
  3398. &coordinator_node,
  3399. coordinator_slot,
  3400. &coordinator_hash,
  3401. 0,
  3402. &coordinator_keypair,
  3403. timestamp(),
  3404. );
  3405. let mut progress = WenRestartProgress {
  3406. state: RestartState::HeaviestFork.into(),
  3407. ..Default::default()
  3408. };
  3409. assert_eq!(
  3410. receive_restart_heaviest_fork(
  3411. coordinator_keypair.pubkey(),
  3412. cluster_info,
  3413. exit,
  3414. &mut progress
  3415. )
  3416. .unwrap(),
  3417. (coordinator_slot, coordinator_hash)
  3418. );
  3419. }
  3420. #[test]
  3421. fn test_repair_heaviest_fork() {
  3422. let ledger_path = get_tmp_ledger_path_auto_delete!();
  3423. let my_heaviest_fork_slot = 1;
  3424. let coordinator_heaviest_slot_parent = 2;
  3425. let coordinator_heaviest_slot = 3;
  3426. let exit = Arc::new(AtomicBool::new(false));
  3427. let blockstore = Arc::new(Blockstore::open(ledger_path.path()).unwrap());
  3428. let wen_restart_repair_slots = Arc::new(RwLock::new(Vec::new()));
  3429. let exit_clone = exit.clone();
  3430. let blockstore_clone = blockstore.clone();
  3431. let wen_restart_repair_slots_clone = wen_restart_repair_slots.clone();
  3432. let repair_heaviest_fork_thread_handle = Builder::new()
  3433. .name("solana-repair-heaviest-fork".to_string())
  3434. .spawn(move || {
  3435. assert!(repair_heaviest_fork(
  3436. my_heaviest_fork_slot,
  3437. coordinator_heaviest_slot,
  3438. exit_clone,
  3439. blockstore_clone,
  3440. wen_restart_repair_slots_clone
  3441. )
  3442. .is_ok());
  3443. })
  3444. .unwrap();
  3445. sleep(Duration::from_millis(GOSSIP_SLEEP_MILLIS));
  3446. // When there is nothing in blockstore, should repair the heaviest slot.
  3447. assert_eq!(
  3448. *wen_restart_repair_slots.read().unwrap(),
  3449. vec![coordinator_heaviest_slot]
  3450. );
  3451. // Now add block 3, 3's parent is 2, should repair 2.
  3452. let _ = insert_slots_into_blockstore(
  3453. blockstore.clone(),
  3454. coordinator_heaviest_slot_parent,
  3455. &[coordinator_heaviest_slot],
  3456. TICKS_PER_SLOT,
  3457. Hash::default(),
  3458. );
  3459. sleep(Duration::from_millis(GOSSIP_SLEEP_MILLIS));
  3460. assert_eq!(
  3461. *wen_restart_repair_slots.read().unwrap(),
  3462. vec![coordinator_heaviest_slot_parent]
  3463. );
  3464. // Insert 2 which links to 1, should exit now.
  3465. let _ = insert_slots_into_blockstore(
  3466. blockstore.clone(),
  3467. my_heaviest_fork_slot,
  3468. &[coordinator_heaviest_slot_parent],
  3469. TICKS_PER_SLOT,
  3470. Hash::default(),
  3471. );
  3472. repair_heaviest_fork_thread_handle.join().unwrap();
  3473. }
  3474. #[test]
  3475. fn test_verify_coordinator_heaviest_fork() {
  3476. let ledger_path = get_tmp_ledger_path_auto_delete!();
  3477. let test_state = wen_restart_test_init(&ledger_path);
  3478. let last_vote = test_state.last_voted_fork_slots[0];
  3479. let exit = Arc::new(AtomicBool::new(false));
  3480. // Create two forks: last_vote -> last_vote+1 and last_vote -> last_vote+2
  3481. let root_bank;
  3482. {
  3483. root_bank = test_state.bank_forks.read().unwrap().root_bank().clone();
  3484. }
  3485. let coordinator_slot = last_vote + 1;
  3486. let my_slot = last_vote + 2;
  3487. let _ = insert_slots_into_blockstore(
  3488. test_state.blockstore.clone(),
  3489. last_vote,
  3490. &[coordinator_slot],
  3491. TICKS_PER_SLOT,
  3492. test_state.last_blockhash,
  3493. );
  3494. let _ = insert_slots_into_blockstore(
  3495. test_state.blockstore.clone(),
  3496. last_vote,
  3497. &[my_slot],
  3498. TICKS_PER_SLOT,
  3499. test_state.last_blockhash,
  3500. );
  3501. let wen_restart_repair_slots = Arc::new(RwLock::new(Vec::new()));
  3502. assert_eq!(
  3503. verify_coordinator_heaviest_fork(
  3504. my_slot,
  3505. coordinator_slot,
  3506. &Hash::default(),
  3507. test_state.bank_forks.clone(),
  3508. test_state.blockstore.clone(),
  3509. exit.clone(),
  3510. wen_restart_repair_slots.clone()
  3511. )
  3512. .unwrap_err()
  3513. .downcast::<WenRestartError>()
  3514. .unwrap(),
  3515. WenRestartError::HeaviestForkOnLeaderOnDifferentFork(coordinator_slot, my_slot)
  3516. );
  3517. let coordinator_hash = Hash::new_unique();
  3518. let my_hash = root_bank.hash();
  3519. let root_slot = root_bank.slot();
  3520. assert_eq!(
  3521. verify_coordinator_heaviest_fork(
  3522. root_slot,
  3523. root_slot,
  3524. &coordinator_hash,
  3525. test_state.bank_forks.clone(),
  3526. test_state.blockstore.clone(),
  3527. exit.clone(),
  3528. wen_restart_repair_slots.clone()
  3529. )
  3530. .unwrap_err()
  3531. .downcast::<WenRestartError>()
  3532. .unwrap(),
  3533. WenRestartError::BankHashMismatch(root_slot, my_hash, coordinator_hash)
  3534. );
  3535. }
  3536. #[test]
  3537. fn test_send_and_receive_heaviest_fork() {
  3538. let ledger_path = get_tmp_ledger_path_auto_delete!();
  3539. let test_state = wen_restart_test_init(&ledger_path);
  3540. let last_vote = test_state.last_voted_fork_slots[0];
  3541. let exit = Arc::new(AtomicBool::new(false));
  3542. let mut pushed_slot = 0;
  3543. let mut pushed_hash = Hash::default();
  3544. // The coordinator always sends its own choice.
  3545. let coordinator_slot = last_vote;
  3546. let mut slots = test_state.last_voted_fork_slots.clone();
  3547. slots.reverse();
  3548. let coordinator_hash = find_bankhash_of_heaviest_fork(
  3549. coordinator_slot,
  3550. slots,
  3551. test_state.blockstore.clone(),
  3552. test_state.bank_forks.clone(),
  3553. &exit,
  3554. )
  3555. .unwrap();
  3556. let mut progress = WenRestartProgress {
  3557. state: RestartState::HeaviestFork.into(),
  3558. ..Default::default()
  3559. };
  3560. // Set coordinator to myself, should return my choice.
  3561. let mut config = WenRestartConfig {
  3562. wen_restart_path: test_state.wen_restart_proto_path.clone(),
  3563. wen_restart_coordinator: test_state.cluster_info.id(),
  3564. last_vote: VoteTransaction::from(Vote::new(vec![last_vote], Hash::default())),
  3565. blockstore: test_state.blockstore.clone(),
  3566. cluster_info: test_state.cluster_info.clone(),
  3567. bank_forks: test_state.bank_forks.clone(),
  3568. wen_restart_repair_slots: Some(Arc::new(RwLock::new(Vec::new()))),
  3569. wait_for_supermajority_threshold_percent: 80,
  3570. snapshot_controller: None,
  3571. abs_status: AbsStatus::new_for_tests(),
  3572. genesis_config_hash: test_state.genesis_config_hash,
  3573. exit: exit.clone(),
  3574. };
  3575. assert_eq!(
  3576. send_and_receive_heaviest_fork(
  3577. coordinator_slot,
  3578. coordinator_hash,
  3579. &config,
  3580. &mut progress,
  3581. |slot, hash| {
  3582. pushed_slot = slot;
  3583. pushed_hash = hash;
  3584. }
  3585. )
  3586. .unwrap(),
  3587. (coordinator_slot, coordinator_hash)
  3588. );
  3589. assert_eq!(pushed_slot, coordinator_slot);
  3590. assert_eq!(pushed_hash, coordinator_hash);
  3591. // Now set the coordinator the someone else, need to return their choice.
  3592. let coordinator_keypair =
  3593. &test_state.validator_voting_keypairs[COORDINATOR_INDEX].node_keypair;
  3594. config.wen_restart_coordinator = coordinator_keypair.pubkey();
  3595. let mut rng = rand::thread_rng();
  3596. let node = ContactInfo::new_rand(&mut rng, Some(coordinator_keypair.pubkey()));
  3597. let now = timestamp();
  3598. push_restart_heaviest_fork(
  3599. test_state.cluster_info.clone(),
  3600. &node,
  3601. coordinator_slot,
  3602. &coordinator_hash,
  3603. 0,
  3604. coordinator_keypair,
  3605. now,
  3606. );
  3607. let my_slot = test_state.last_voted_fork_slots[1];
  3608. let my_hash = test_state
  3609. .bank_forks
  3610. .read()
  3611. .unwrap()
  3612. .get(my_slot)
  3613. .unwrap()
  3614. .hash();
  3615. assert_eq!(
  3616. send_and_receive_heaviest_fork(
  3617. my_slot,
  3618. my_hash,
  3619. &config,
  3620. &mut progress,
  3621. |slot, hash| {
  3622. pushed_slot = slot;
  3623. pushed_hash = hash;
  3624. }
  3625. )
  3626. .unwrap(),
  3627. (coordinator_slot, coordinator_hash)
  3628. );
  3629. assert_eq!(pushed_slot, coordinator_slot);
  3630. assert_eq!(pushed_hash, coordinator_hash);
  3631. // my slot on a different fork, should exit with error but still push heaviest fork.
  3632. let my_slot = coordinator_slot + 1;
  3633. let _ = insert_slots_into_blockstore(
  3634. test_state.blockstore.clone(),
  3635. 0,
  3636. &[coordinator_slot],
  3637. TICKS_PER_SLOT,
  3638. test_state.last_blockhash,
  3639. );
  3640. let my_hash = Hash::new_unique();
  3641. assert_eq!(
  3642. send_and_receive_heaviest_fork(
  3643. my_slot,
  3644. my_hash,
  3645. &config,
  3646. &mut progress,
  3647. |slot, hash| {
  3648. pushed_slot = slot;
  3649. pushed_hash = hash;
  3650. }
  3651. )
  3652. .unwrap_err()
  3653. .downcast::<WenRestartError>()
  3654. .unwrap(),
  3655. WenRestartError::HeaviestForkOnLeaderOnDifferentFork(coordinator_slot, my_slot)
  3656. );
  3657. assert_eq!(pushed_slot, my_slot);
  3658. assert_eq!(pushed_hash, my_hash);
  3659. }
  3660. fn run_and_check_find_bankhash_of_heaviest_fork(
  3661. test_state: &WenRestartTestInitResult,
  3662. slots: &[Slot],
  3663. slot: Slot,
  3664. ) {
  3665. let exit = Arc::new(AtomicBool::new(false));
  3666. assert_eq!(
  3667. find_bankhash_of_heaviest_fork(
  3668. slot,
  3669. slots.to_vec(),
  3670. test_state.blockstore.clone(),
  3671. test_state.bank_forks.clone(),
  3672. &exit,
  3673. )
  3674. .unwrap(),
  3675. test_state
  3676. .bank_forks
  3677. .read()
  3678. .unwrap()
  3679. .get(slot)
  3680. .unwrap()
  3681. .hash()
  3682. );
  3683. }
  3684. #[test]
  3685. fn test_find_bankhash_of_heaviest_fork() {
  3686. let ledger_path = get_tmp_ledger_path_auto_delete!();
  3687. let test_state = wen_restart_test_init(&ledger_path);
  3688. let last_vote = test_state.last_voted_fork_slots[0];
  3689. let mut slots = test_state.last_voted_fork_slots.clone();
  3690. slots.reverse();
  3691. run_and_check_find_bankhash_of_heaviest_fork(&test_state, &slots, last_vote);
  3692. let new_slot = last_vote + 1;
  3693. let _ = insert_slots_into_blockstore(
  3694. test_state.blockstore.clone(),
  3695. last_vote,
  3696. &[new_slot],
  3697. TICKS_PER_SLOT,
  3698. test_state.last_blockhash,
  3699. );
  3700. slots.push(new_slot);
  3701. run_and_check_find_bankhash_of_heaviest_fork(&test_state, &slots, new_slot);
  3702. let slot_full_but_not_replayed = last_vote + 2;
  3703. let _ = insert_slots_into_blockstore(
  3704. test_state.blockstore.clone(),
  3705. last_vote,
  3706. &[slot_full_but_not_replayed],
  3707. TICKS_PER_SLOT,
  3708. test_state.last_blockhash,
  3709. );
  3710. let new_bank = Bank::new_from_parent(
  3711. test_state.bank_forks.read().unwrap().get(new_slot).unwrap(),
  3712. &Pubkey::default(),
  3713. slot_full_but_not_replayed,
  3714. );
  3715. let _ = test_state
  3716. .bank_forks
  3717. .write()
  3718. .unwrap()
  3719. .insert_from_ledger(new_bank);
  3720. run_and_check_find_bankhash_of_heaviest_fork(
  3721. &test_state,
  3722. &slots,
  3723. slot_full_but_not_replayed,
  3724. );
  3725. }
  3726. }