local_cluster.rs 230 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967
  1. #![allow(clippy::arithmetic_side_effects)]
  2. use {
  3. agave_snapshots::{
  4. paths as snapshot_paths, snapshot_archive_info::SnapshotArchiveInfoGetter,
  5. snapshot_config::SnapshotConfig, SnapshotInterval, SnapshotKind,
  6. },
  7. assert_matches::assert_matches,
  8. crossbeam_channel::{unbounded, Receiver},
  9. gag::BufferRedirect,
  10. itertools::Itertools,
  11. log::*,
  12. rand::seq::SliceRandom,
  13. serial_test::serial,
  14. solana_account::AccountSharedData,
  15. solana_accounts_db::utils::create_accounts_run_and_snapshot_dirs,
  16. solana_client_traits::AsyncClient,
  17. solana_clock::{
  18. self as clock, Slot, DEFAULT_SLOTS_PER_EPOCH, DEFAULT_TICKS_PER_SLOT, MAX_PROCESSING_AGE,
  19. },
  20. solana_cluster_type::ClusterType,
  21. solana_commitment_config::CommitmentConfig,
  22. solana_core::{
  23. consensus::{
  24. tower_storage::FileTowerStorage, Tower, SWITCH_FORK_THRESHOLD, VOTE_THRESHOLD_DEPTH,
  25. },
  26. optimistic_confirmation_verifier::OptimisticConfirmationVerifier,
  27. replay_stage::DUPLICATE_THRESHOLD,
  28. validator::{BlockVerificationMethod, ValidatorConfig},
  29. },
  30. solana_download_utils::download_snapshot_archive,
  31. solana_entry::entry::create_ticks,
  32. solana_epoch_schedule::{MAX_LEADER_SCHEDULE_EPOCH_OFFSET, MINIMUM_SLOTS_PER_EPOCH},
  33. solana_genesis_utils::open_genesis_config,
  34. solana_gossip::{crds_data::MAX_VOTES, gossip_service::discover_validators},
  35. solana_hard_forks::HardForks,
  36. solana_hash::Hash,
  37. solana_keypair::Keypair,
  38. solana_ledger::{
  39. ancestor_iterator::AncestorIterator,
  40. bank_forks_utils,
  41. blockstore::{entries_to_test_shreds, Blockstore},
  42. blockstore_processor::ProcessOptions,
  43. leader_schedule::{FixedSchedule, IdentityKeyedLeaderSchedule},
  44. shred::{ProcessShredsStats, ReedSolomonCache, Shred, Shredder},
  45. use_snapshot_archives_at_startup::UseSnapshotArchivesAtStartup,
  46. },
  47. solana_local_cluster::{
  48. cluster::{Cluster, ClusterValidatorInfo, QuicTpuClient},
  49. cluster_tests,
  50. integration_tests::{
  51. copy_blocks, create_custom_leader_schedule,
  52. create_custom_leader_schedule_with_random_keys, farf_dir, generate_account_paths,
  53. last_root_in_tower, last_vote_in_tower, ms_for_n_slots, open_blockstore,
  54. purge_slots_with_count, remove_tower, remove_tower_if_exists, restore_tower,
  55. run_cluster_partition, run_kill_partition_switch_threshold, save_tower,
  56. setup_snapshot_validator_config, test_faulty_node, wait_for_duplicate_proof,
  57. wait_for_last_vote_in_tower_to_land_in_ledger, SnapshotValidatorConfig,
  58. ValidatorTestConfig, DEFAULT_NODE_STAKE, RUST_LOG_FILTER,
  59. },
  60. local_cluster::{ClusterConfig, LocalCluster, DEFAULT_MINT_LAMPORTS},
  61. validator_configs::*,
  62. },
  63. solana_net_utils::SocketAddrSpace,
  64. solana_poh_config::PohConfig,
  65. solana_pubkey::Pubkey,
  66. solana_pubsub_client::pubsub_client::PubsubClient,
  67. solana_rpc_client::rpc_client::RpcClient,
  68. solana_rpc_client_api::{
  69. config::{
  70. RpcBlockSubscribeConfig, RpcBlockSubscribeFilter, RpcProgramAccountsConfig,
  71. RpcSignatureSubscribeConfig,
  72. },
  73. response::RpcSignatureResult,
  74. },
  75. solana_runtime::{commitment::VOTE_THRESHOLD_SIZE, snapshot_bank_utils, snapshot_utils},
  76. solana_signer::Signer,
  77. solana_stake_interface::{self as stake, state::NEW_WARMUP_COOLDOWN_RATE},
  78. solana_system_interface::program as system_program,
  79. solana_system_transaction as system_transaction,
  80. solana_turbine::broadcast_stage::{
  81. broadcast_duplicates_run::{BroadcastDuplicatesConfig, ClusterPartition},
  82. BroadcastStageType,
  83. },
  84. solana_vote::{vote_parser, vote_transaction},
  85. solana_vote_interface::state::TowerSync,
  86. solana_vote_program::vote_state::MAX_LOCKOUT_HISTORY,
  87. std::{
  88. collections::{BTreeSet, HashMap, HashSet},
  89. fs,
  90. io::Read,
  91. iter,
  92. num::NonZeroU64,
  93. path::Path,
  94. sync::{
  95. atomic::{AtomicBool, AtomicUsize, Ordering},
  96. Arc, Mutex,
  97. },
  98. thread::{sleep, Builder, JoinHandle},
  99. time::{Duration, Instant},
  100. },
  101. strum::{EnumCount, IntoEnumIterator},
  102. };
  103. #[test]
  104. #[serial]
  105. fn test_local_cluster_start_and_exit() {
  106. agave_logger::setup();
  107. let num_nodes = 1;
  108. let cluster = LocalCluster::new_with_equal_stakes(
  109. num_nodes,
  110. DEFAULT_MINT_LAMPORTS,
  111. DEFAULT_NODE_STAKE,
  112. SocketAddrSpace::Unspecified,
  113. );
  114. assert_eq!(cluster.validators.len(), num_nodes);
  115. }
  116. #[test]
  117. #[serial]
  118. fn test_local_cluster_start_and_exit_with_config() {
  119. agave_logger::setup();
  120. const NUM_NODES: usize = 1;
  121. let mut config = ClusterConfig {
  122. validator_configs: make_identical_validator_configs(
  123. &ValidatorConfig::default_for_test(),
  124. NUM_NODES,
  125. ),
  126. node_stakes: vec![DEFAULT_NODE_STAKE; NUM_NODES],
  127. ticks_per_slot: 8,
  128. slots_per_epoch: MINIMUM_SLOTS_PER_EPOCH,
  129. stakers_slot_offset: MINIMUM_SLOTS_PER_EPOCH,
  130. ..ClusterConfig::default()
  131. };
  132. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  133. assert_eq!(cluster.validators.len(), NUM_NODES);
  134. }
  135. #[test]
  136. #[serial]
  137. fn test_spend_and_verify_all_nodes_1() {
  138. agave_logger::setup_with_default(RUST_LOG_FILTER);
  139. error!("test_spend_and_verify_all_nodes_1");
  140. let num_nodes = 1;
  141. let local = LocalCluster::new_with_equal_stakes(
  142. num_nodes,
  143. DEFAULT_MINT_LAMPORTS,
  144. DEFAULT_NODE_STAKE,
  145. SocketAddrSpace::Unspecified,
  146. );
  147. cluster_tests::spend_and_verify_all_nodes(
  148. &local.entry_point_info,
  149. &local.funding_keypair,
  150. num_nodes,
  151. HashSet::new(),
  152. SocketAddrSpace::Unspecified,
  153. &local.connection_cache,
  154. );
  155. }
  156. #[test]
  157. #[serial]
  158. fn test_spend_and_verify_all_nodes_2() {
  159. agave_logger::setup_with_default(RUST_LOG_FILTER);
  160. error!("test_spend_and_verify_all_nodes_2");
  161. let num_nodes = 2;
  162. let local = LocalCluster::new_with_equal_stakes(
  163. num_nodes,
  164. DEFAULT_MINT_LAMPORTS,
  165. DEFAULT_NODE_STAKE,
  166. SocketAddrSpace::Unspecified,
  167. );
  168. cluster_tests::spend_and_verify_all_nodes(
  169. &local.entry_point_info,
  170. &local.funding_keypair,
  171. num_nodes,
  172. HashSet::new(),
  173. SocketAddrSpace::Unspecified,
  174. &local.connection_cache,
  175. );
  176. }
  177. #[test]
  178. #[serial]
  179. fn test_spend_and_verify_all_nodes_3() {
  180. agave_logger::setup_with_default(RUST_LOG_FILTER);
  181. error!("test_spend_and_verify_all_nodes_3");
  182. let num_nodes = 3;
  183. let local = LocalCluster::new_with_equal_stakes(
  184. num_nodes,
  185. DEFAULT_MINT_LAMPORTS,
  186. DEFAULT_NODE_STAKE,
  187. SocketAddrSpace::Unspecified,
  188. );
  189. cluster_tests::spend_and_verify_all_nodes(
  190. &local.entry_point_info,
  191. &local.funding_keypair,
  192. num_nodes,
  193. HashSet::new(),
  194. SocketAddrSpace::Unspecified,
  195. &local.connection_cache,
  196. );
  197. }
  198. #[test]
  199. #[serial]
  200. fn test_local_cluster_signature_subscribe() {
  201. agave_logger::setup_with_default(RUST_LOG_FILTER);
  202. let num_nodes = 2;
  203. let cluster = LocalCluster::new_with_equal_stakes(
  204. num_nodes,
  205. DEFAULT_MINT_LAMPORTS,
  206. DEFAULT_NODE_STAKE,
  207. SocketAddrSpace::Unspecified,
  208. );
  209. let nodes = cluster.get_node_pubkeys();
  210. // Get non leader
  211. let non_bootstrap_id = nodes
  212. .into_iter()
  213. .find(|id| id != cluster.entry_point_info.pubkey())
  214. .unwrap();
  215. let non_bootstrap_info = cluster.get_contact_info(&non_bootstrap_id).unwrap();
  216. let tx_client = cluster
  217. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  218. .unwrap();
  219. let (blockhash, _) = tx_client
  220. .rpc_client()
  221. .get_latest_blockhash_with_commitment(CommitmentConfig::processed())
  222. .unwrap();
  223. let mut transaction = system_transaction::transfer(
  224. &cluster.funding_keypair,
  225. &solana_pubkey::new_rand(),
  226. 10,
  227. blockhash,
  228. );
  229. let (mut sig_subscribe_client, receiver) = PubsubClient::signature_subscribe(
  230. format!("ws://{}", non_bootstrap_info.rpc_pubsub().unwrap()),
  231. &transaction.signatures[0],
  232. Some(RpcSignatureSubscribeConfig {
  233. commitment: Some(CommitmentConfig::processed()),
  234. enable_received_notification: Some(true),
  235. }),
  236. )
  237. .unwrap();
  238. LocalCluster::send_transaction_with_retries(
  239. &tx_client,
  240. &[&cluster.funding_keypair],
  241. &mut transaction,
  242. 5,
  243. )
  244. .unwrap();
  245. let mut got_received_notification = false;
  246. loop {
  247. let responses: Vec<_> = receiver.try_iter().collect();
  248. let mut should_break = false;
  249. for response in responses {
  250. match response.value {
  251. RpcSignatureResult::ProcessedSignature(_) => {
  252. should_break = true;
  253. break;
  254. }
  255. RpcSignatureResult::ReceivedSignature(_) => {
  256. got_received_notification = true;
  257. }
  258. }
  259. }
  260. if should_break {
  261. break;
  262. }
  263. sleep(Duration::from_millis(100));
  264. }
  265. // If we don't drop the cluster, the blocking web socket service
  266. // won't return, and the `sig_subscribe_client` won't shut down
  267. drop(cluster);
  268. sig_subscribe_client.shutdown().unwrap();
  269. assert!(got_received_notification);
  270. }
  271. #[test]
  272. #[serial]
  273. fn test_two_unbalanced_stakes() {
  274. agave_logger::setup_with_default(RUST_LOG_FILTER);
  275. error!("test_two_unbalanced_stakes");
  276. let validator_config = ValidatorConfig::default_for_test();
  277. let num_ticks_per_second = 100;
  278. let num_ticks_per_slot = 16;
  279. let num_slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH;
  280. let mut cluster = LocalCluster::new(
  281. &mut ClusterConfig {
  282. node_stakes: vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  283. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  284. validator_configs: make_identical_validator_configs(&validator_config, 2),
  285. ticks_per_slot: num_ticks_per_slot,
  286. slots_per_epoch: num_slots_per_epoch,
  287. stakers_slot_offset: num_slots_per_epoch,
  288. poh_config: PohConfig::new_sleep(Duration::from_millis(1000 / num_ticks_per_second)),
  289. ..ClusterConfig::default()
  290. },
  291. SocketAddrSpace::Unspecified,
  292. );
  293. cluster_tests::sleep_n_epochs(
  294. 10.0,
  295. &cluster.genesis_config.poh_config,
  296. num_ticks_per_slot,
  297. num_slots_per_epoch,
  298. );
  299. cluster.close_preserve_ledgers();
  300. let leader_pubkey = *cluster.entry_point_info.pubkey();
  301. let leader_ledger = cluster.validators[&leader_pubkey].info.ledger_path.clone();
  302. cluster_tests::verify_ledger_ticks(&leader_ledger, num_ticks_per_slot as usize);
  303. }
  304. #[test]
  305. #[serial]
  306. fn test_forwarding() {
  307. agave_logger::setup_with_default(RUST_LOG_FILTER);
  308. // Set up a cluster where one node is never the leader, so all txs sent to this node
  309. // will be have to be forwarded in order to be confirmed
  310. let mut config = ClusterConfig {
  311. node_stakes: vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  312. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  313. validator_configs: make_identical_validator_configs(
  314. &ValidatorConfig::default_for_test(),
  315. 2,
  316. ),
  317. ..ClusterConfig::default()
  318. };
  319. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  320. let cluster_nodes = discover_validators(
  321. &cluster.entry_point_info.gossip().unwrap(),
  322. 2,
  323. cluster.entry_point_info.shred_version(),
  324. SocketAddrSpace::Unspecified,
  325. )
  326. .unwrap();
  327. assert!(cluster_nodes.len() >= 2);
  328. let leader_pubkey = *cluster.entry_point_info.pubkey();
  329. let validator_info = cluster_nodes
  330. .iter()
  331. .find(|c| c.pubkey() != &leader_pubkey)
  332. .unwrap();
  333. // Confirm that transactions were forwarded to and processed by the leader.
  334. cluster_tests::send_many_transactions(
  335. validator_info,
  336. &cluster.funding_keypair,
  337. &cluster.connection_cache,
  338. 10,
  339. 20,
  340. );
  341. }
  342. #[test]
  343. #[serial]
  344. fn test_restart_node() {
  345. agave_logger::setup_with_default(RUST_LOG_FILTER);
  346. error!("test_restart_node");
  347. let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH * 2;
  348. let ticks_per_slot = 16;
  349. let validator_config = ValidatorConfig::default_for_test();
  350. let mut cluster = LocalCluster::new(
  351. &mut ClusterConfig {
  352. node_stakes: vec![DEFAULT_NODE_STAKE],
  353. validator_configs: vec![safe_clone_config(&validator_config)],
  354. ticks_per_slot,
  355. slots_per_epoch,
  356. stakers_slot_offset: slots_per_epoch,
  357. skip_warmup_slots: true,
  358. ..ClusterConfig::default()
  359. },
  360. SocketAddrSpace::Unspecified,
  361. );
  362. let nodes = cluster.get_node_pubkeys();
  363. cluster_tests::sleep_n_epochs(
  364. 1.0,
  365. &cluster.genesis_config.poh_config,
  366. clock::DEFAULT_TICKS_PER_SLOT,
  367. slots_per_epoch,
  368. );
  369. cluster.exit_restart_node(&nodes[0], validator_config, SocketAddrSpace::Unspecified);
  370. cluster_tests::sleep_n_epochs(
  371. 0.5,
  372. &cluster.genesis_config.poh_config,
  373. clock::DEFAULT_TICKS_PER_SLOT,
  374. slots_per_epoch,
  375. );
  376. cluster_tests::send_many_transactions(
  377. &cluster.entry_point_info,
  378. &cluster.funding_keypair,
  379. &cluster.connection_cache,
  380. 10,
  381. 1,
  382. );
  383. }
  384. #[test]
  385. #[serial]
  386. fn test_mainnet_beta_cluster_type() {
  387. agave_logger::setup_with_default(RUST_LOG_FILTER);
  388. let mut config = ClusterConfig {
  389. cluster_type: ClusterType::MainnetBeta,
  390. node_stakes: vec![DEFAULT_NODE_STAKE],
  391. validator_configs: make_identical_validator_configs(
  392. &ValidatorConfig::default_for_test(),
  393. 1,
  394. ),
  395. ..ClusterConfig::default()
  396. };
  397. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  398. let cluster_nodes = discover_validators(
  399. &cluster.entry_point_info.gossip().unwrap(),
  400. 1,
  401. cluster.entry_point_info.shred_version(),
  402. SocketAddrSpace::Unspecified,
  403. )
  404. .unwrap();
  405. assert_eq!(cluster_nodes.len(), 1);
  406. let client = cluster
  407. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  408. .unwrap();
  409. // Programs that are available at epoch 0
  410. for program_id in [
  411. &solana_sdk_ids::system_program::id(),
  412. &stake::program::id(),
  413. &solana_vote_program::id(),
  414. &solana_sdk_ids::bpf_loader_deprecated::id(),
  415. &solana_sdk_ids::bpf_loader::id(),
  416. &solana_sdk_ids::bpf_loader_upgradeable::id(),
  417. ]
  418. .iter()
  419. {
  420. assert_matches!(
  421. (
  422. program_id,
  423. client
  424. .rpc_client()
  425. .get_account_with_commitment(program_id, CommitmentConfig::processed())
  426. .unwrap()
  427. .value
  428. ),
  429. (_program_id, Some(_))
  430. );
  431. }
  432. // Programs that are not available at epoch 0
  433. for program_id in [].iter() {
  434. assert_eq!(
  435. (
  436. program_id,
  437. client
  438. .rpc_client()
  439. .get_account_with_commitment(program_id, CommitmentConfig::processed())
  440. .unwrap()
  441. .value
  442. ),
  443. (program_id, None)
  444. );
  445. }
  446. }
  447. #[test]
  448. #[serial]
  449. fn test_snapshot_download() {
  450. agave_logger::setup_with_default(RUST_LOG_FILTER);
  451. // First set up the cluster with 1 node
  452. let snapshot_interval_slots = NonZeroU64::new(50).unwrap();
  453. let num_account_paths = 3;
  454. let leader_snapshot_test_config =
  455. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  456. let validator_snapshot_test_config =
  457. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  458. let stake = DEFAULT_NODE_STAKE;
  459. let mut config = ClusterConfig {
  460. node_stakes: vec![stake],
  461. validator_configs: make_identical_validator_configs(
  462. &leader_snapshot_test_config.validator_config,
  463. 1,
  464. ),
  465. ..ClusterConfig::default()
  466. };
  467. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  468. let full_snapshot_archives_dir = &leader_snapshot_test_config
  469. .validator_config
  470. .snapshot_config
  471. .full_snapshot_archives_dir;
  472. trace!("Waiting for snapshot");
  473. let full_snapshot_archive_info = cluster.wait_for_next_full_snapshot(
  474. full_snapshot_archives_dir,
  475. Some(Duration::from_secs(5 * 60)),
  476. );
  477. trace!("found: {}", full_snapshot_archive_info.path().display());
  478. // Download the snapshot, then boot a validator from it.
  479. download_snapshot_archive(
  480. &cluster.entry_point_info.rpc().unwrap(),
  481. &validator_snapshot_test_config
  482. .validator_config
  483. .snapshot_config
  484. .full_snapshot_archives_dir,
  485. &validator_snapshot_test_config
  486. .validator_config
  487. .snapshot_config
  488. .incremental_snapshot_archives_dir,
  489. (
  490. full_snapshot_archive_info.slot(),
  491. *full_snapshot_archive_info.hash(),
  492. ),
  493. SnapshotKind::FullSnapshot,
  494. validator_snapshot_test_config
  495. .validator_config
  496. .snapshot_config
  497. .maximum_full_snapshot_archives_to_retain,
  498. validator_snapshot_test_config
  499. .validator_config
  500. .snapshot_config
  501. .maximum_incremental_snapshot_archives_to_retain,
  502. false,
  503. &mut None,
  504. )
  505. .unwrap();
  506. cluster.add_validator(
  507. &validator_snapshot_test_config.validator_config,
  508. stake,
  509. Arc::new(Keypair::new()),
  510. None,
  511. SocketAddrSpace::Unspecified,
  512. );
  513. }
  514. #[test]
  515. #[serial]
  516. fn test_incremental_snapshot_download() {
  517. agave_logger::setup_with_default(RUST_LOG_FILTER);
  518. // First set up the cluster with 1 node
  519. let incremental_snapshot_interval = 9;
  520. let full_snapshot_interval = incremental_snapshot_interval * 3;
  521. let num_account_paths = 3;
  522. let leader_snapshot_test_config = SnapshotValidatorConfig::new(
  523. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  524. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  525. num_account_paths,
  526. );
  527. let validator_snapshot_test_config = SnapshotValidatorConfig::new(
  528. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  529. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  530. num_account_paths,
  531. );
  532. let stake = DEFAULT_NODE_STAKE;
  533. let mut config = ClusterConfig {
  534. node_stakes: vec![stake],
  535. validator_configs: make_identical_validator_configs(
  536. &leader_snapshot_test_config.validator_config,
  537. 1,
  538. ),
  539. ..ClusterConfig::default()
  540. };
  541. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  542. let full_snapshot_archives_dir = &leader_snapshot_test_config
  543. .validator_config
  544. .snapshot_config
  545. .full_snapshot_archives_dir;
  546. let incremental_snapshot_archives_dir = &leader_snapshot_test_config
  547. .validator_config
  548. .snapshot_config
  549. .incremental_snapshot_archives_dir;
  550. debug!(
  551. "snapshot config:\n\tfull snapshot interval: {full_snapshot_interval}\n\tincremental \
  552. snapshot interval: {incremental_snapshot_interval}",
  553. );
  554. debug!(
  555. "leader config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  556. {}\n\tincremental snapshot archives dir: {}",
  557. leader_snapshot_test_config
  558. .bank_snapshots_dir
  559. .path()
  560. .display(),
  561. leader_snapshot_test_config
  562. .full_snapshot_archives_dir
  563. .path()
  564. .display(),
  565. leader_snapshot_test_config
  566. .incremental_snapshot_archives_dir
  567. .path()
  568. .display(),
  569. );
  570. debug!(
  571. "validator config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  572. {}\n\tincremental snapshot archives dir: {}",
  573. validator_snapshot_test_config
  574. .bank_snapshots_dir
  575. .path()
  576. .display(),
  577. validator_snapshot_test_config
  578. .full_snapshot_archives_dir
  579. .path()
  580. .display(),
  581. validator_snapshot_test_config
  582. .incremental_snapshot_archives_dir
  583. .path()
  584. .display(),
  585. );
  586. trace!("Waiting for snapshots");
  587. let (incremental_snapshot_archive_info, full_snapshot_archive_info) = cluster
  588. .wait_for_next_incremental_snapshot(
  589. full_snapshot_archives_dir,
  590. incremental_snapshot_archives_dir,
  591. Some(Duration::from_secs(5 * 60)),
  592. );
  593. trace!(
  594. "found: {} and {}",
  595. full_snapshot_archive_info.path().display(),
  596. incremental_snapshot_archive_info.path().display()
  597. );
  598. // Download the snapshots, then boot a validator from them.
  599. download_snapshot_archive(
  600. &cluster.entry_point_info.rpc().unwrap(),
  601. &validator_snapshot_test_config
  602. .validator_config
  603. .snapshot_config
  604. .full_snapshot_archives_dir,
  605. &validator_snapshot_test_config
  606. .validator_config
  607. .snapshot_config
  608. .incremental_snapshot_archives_dir,
  609. (
  610. full_snapshot_archive_info.slot(),
  611. *full_snapshot_archive_info.hash(),
  612. ),
  613. SnapshotKind::FullSnapshot,
  614. validator_snapshot_test_config
  615. .validator_config
  616. .snapshot_config
  617. .maximum_full_snapshot_archives_to_retain,
  618. validator_snapshot_test_config
  619. .validator_config
  620. .snapshot_config
  621. .maximum_incremental_snapshot_archives_to_retain,
  622. false,
  623. &mut None,
  624. )
  625. .unwrap();
  626. download_snapshot_archive(
  627. &cluster.entry_point_info.rpc().unwrap(),
  628. &validator_snapshot_test_config
  629. .validator_config
  630. .snapshot_config
  631. .full_snapshot_archives_dir,
  632. &validator_snapshot_test_config
  633. .validator_config
  634. .snapshot_config
  635. .incremental_snapshot_archives_dir,
  636. (
  637. incremental_snapshot_archive_info.slot(),
  638. *incremental_snapshot_archive_info.hash(),
  639. ),
  640. SnapshotKind::IncrementalSnapshot(incremental_snapshot_archive_info.base_slot()),
  641. validator_snapshot_test_config
  642. .validator_config
  643. .snapshot_config
  644. .maximum_full_snapshot_archives_to_retain,
  645. validator_snapshot_test_config
  646. .validator_config
  647. .snapshot_config
  648. .maximum_incremental_snapshot_archives_to_retain,
  649. false,
  650. &mut None,
  651. )
  652. .unwrap();
  653. cluster.add_validator(
  654. &validator_snapshot_test_config.validator_config,
  655. stake,
  656. Arc::new(Keypair::new()),
  657. None,
  658. SocketAddrSpace::Unspecified,
  659. );
  660. }
  661. /// Test the scenario where a node starts up from a snapshot and its blockstore has enough new
  662. /// roots that cross the full snapshot interval. In this scenario, the node needs to take a full
  663. /// snapshot while processing the blockstore so that once the background services start up, there
  664. /// is the correct full snapshot available to take subsequent incremental snapshots.
  665. ///
  666. /// For this test...
  667. /// - Start a leader node and run it long enough to take a full and incremental snapshot
  668. /// - Download those snapshots to a validator node
  669. /// - Copy the validator snapshots to a back up directory
  670. /// - Start up the validator node
  671. /// - Wait for the validator node to see enough root slots to cross the full snapshot interval
  672. /// - Delete the snapshots on the validator node and restore the ones from the backup
  673. /// - Restart the validator node to trigger the scenario we're trying to test
  674. /// - Wait for the validator node to generate a new incremental snapshot
  675. /// - Copy the new incremental snapshot (and its associated full snapshot) to another new validator
  676. /// - Start up this new validator to ensure the snapshots from ^^^ are good
  677. #[test]
  678. #[serial]
  679. fn test_incremental_snapshot_download_with_crossing_full_snapshot_interval_at_startup() {
  680. agave_logger::setup_with_default(RUST_LOG_FILTER);
  681. // If these intervals change, also make sure to change the loop timers accordingly.
  682. let incremental_snapshot_interval = 9;
  683. let full_snapshot_interval = incremental_snapshot_interval * 5;
  684. let num_account_paths = 3;
  685. let leader_snapshot_test_config = SnapshotValidatorConfig::new(
  686. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  687. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  688. num_account_paths,
  689. );
  690. let mut validator_snapshot_test_config = SnapshotValidatorConfig::new(
  691. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  692. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  693. num_account_paths,
  694. );
  695. // The test has asserts that require the validator always boots from snapshot archives
  696. validator_snapshot_test_config
  697. .validator_config
  698. .use_snapshot_archives_at_startup = UseSnapshotArchivesAtStartup::Always;
  699. let stake = DEFAULT_NODE_STAKE;
  700. let mut config = ClusterConfig {
  701. node_stakes: vec![stake],
  702. validator_configs: make_identical_validator_configs(
  703. &leader_snapshot_test_config.validator_config,
  704. 1,
  705. ),
  706. ..ClusterConfig::default()
  707. };
  708. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  709. info!(
  710. "snapshot config:\n\tfull snapshot interval: {full_snapshot_interval:?}\n\tincremental \
  711. snapshot interval: {incremental_snapshot_interval:?}",
  712. );
  713. debug!(
  714. "leader config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  715. {}\n\tincremental snapshot archives dir: {}",
  716. leader_snapshot_test_config
  717. .bank_snapshots_dir
  718. .path()
  719. .display(),
  720. leader_snapshot_test_config
  721. .full_snapshot_archives_dir
  722. .path()
  723. .display(),
  724. leader_snapshot_test_config
  725. .incremental_snapshot_archives_dir
  726. .path()
  727. .display(),
  728. );
  729. debug!(
  730. "validator config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  731. {}\n\tincremental snapshot archives dir: {}",
  732. validator_snapshot_test_config
  733. .bank_snapshots_dir
  734. .path()
  735. .display(),
  736. validator_snapshot_test_config
  737. .full_snapshot_archives_dir
  738. .path()
  739. .display(),
  740. validator_snapshot_test_config
  741. .incremental_snapshot_archives_dir
  742. .path()
  743. .display(),
  744. );
  745. info!("Waiting for leader to create the next incremental snapshot...");
  746. let (incremental_snapshot_archive, full_snapshot_archive) =
  747. LocalCluster::wait_for_next_incremental_snapshot(
  748. &cluster,
  749. leader_snapshot_test_config
  750. .full_snapshot_archives_dir
  751. .path(),
  752. leader_snapshot_test_config
  753. .incremental_snapshot_archives_dir
  754. .path(),
  755. Some(Duration::from_secs(5 * 60)),
  756. );
  757. info!(
  758. "Found snapshots:\n\tfull snapshot: {}\n\tincremental snapshot: {}",
  759. full_snapshot_archive.path().display(),
  760. incremental_snapshot_archive.path().display()
  761. );
  762. assert_eq!(
  763. full_snapshot_archive.slot(),
  764. incremental_snapshot_archive.base_slot()
  765. );
  766. info!("Waiting for leader to create snapshots... DONE");
  767. // Download the snapshots, then boot a validator from them.
  768. info!("Downloading full snapshot to validator...");
  769. download_snapshot_archive(
  770. &cluster.entry_point_info.rpc().unwrap(),
  771. validator_snapshot_test_config
  772. .full_snapshot_archives_dir
  773. .path(),
  774. validator_snapshot_test_config
  775. .incremental_snapshot_archives_dir
  776. .path(),
  777. (full_snapshot_archive.slot(), *full_snapshot_archive.hash()),
  778. SnapshotKind::FullSnapshot,
  779. validator_snapshot_test_config
  780. .validator_config
  781. .snapshot_config
  782. .maximum_full_snapshot_archives_to_retain,
  783. validator_snapshot_test_config
  784. .validator_config
  785. .snapshot_config
  786. .maximum_incremental_snapshot_archives_to_retain,
  787. false,
  788. &mut None,
  789. )
  790. .unwrap();
  791. let downloaded_full_snapshot_archive = snapshot_paths::get_highest_full_snapshot_archive_info(
  792. validator_snapshot_test_config
  793. .full_snapshot_archives_dir
  794. .path(),
  795. )
  796. .unwrap();
  797. info!(
  798. "Downloaded full snapshot, slot: {}",
  799. downloaded_full_snapshot_archive.slot()
  800. );
  801. info!("Downloading incremental snapshot to validator...");
  802. download_snapshot_archive(
  803. &cluster.entry_point_info.rpc().unwrap(),
  804. validator_snapshot_test_config
  805. .full_snapshot_archives_dir
  806. .path(),
  807. validator_snapshot_test_config
  808. .incremental_snapshot_archives_dir
  809. .path(),
  810. (
  811. incremental_snapshot_archive.slot(),
  812. *incremental_snapshot_archive.hash(),
  813. ),
  814. SnapshotKind::IncrementalSnapshot(incremental_snapshot_archive.base_slot()),
  815. validator_snapshot_test_config
  816. .validator_config
  817. .snapshot_config
  818. .maximum_full_snapshot_archives_to_retain,
  819. validator_snapshot_test_config
  820. .validator_config
  821. .snapshot_config
  822. .maximum_incremental_snapshot_archives_to_retain,
  823. false,
  824. &mut None,
  825. )
  826. .unwrap();
  827. let downloaded_incremental_snapshot_archive =
  828. snapshot_paths::get_highest_incremental_snapshot_archive_info(
  829. validator_snapshot_test_config
  830. .incremental_snapshot_archives_dir
  831. .path(),
  832. full_snapshot_archive.slot(),
  833. )
  834. .unwrap();
  835. info!(
  836. "Downloaded incremental snapshot, slot: {}, base slot: {}",
  837. downloaded_incremental_snapshot_archive.slot(),
  838. downloaded_incremental_snapshot_archive.base_slot(),
  839. );
  840. assert_eq!(
  841. downloaded_full_snapshot_archive.slot(),
  842. downloaded_incremental_snapshot_archive.base_slot()
  843. );
  844. // closure to copy files in a directory to another directory
  845. let copy_files = |from: &Path, to: &Path| {
  846. trace!(
  847. "copying files from dir {}, to dir {}",
  848. from.display(),
  849. to.display()
  850. );
  851. for entry in fs::read_dir(from).unwrap() {
  852. let entry = entry.unwrap();
  853. if entry.file_type().unwrap().is_dir() {
  854. continue;
  855. }
  856. let from_file_path = entry.path();
  857. let to_file_path = to.join(from_file_path.file_name().unwrap());
  858. trace!(
  859. "\t\tcopying file from {} to {}...",
  860. from_file_path.display(),
  861. to_file_path.display()
  862. );
  863. fs::copy(from_file_path, to_file_path).unwrap();
  864. }
  865. };
  866. // closure to delete files in a directory
  867. let delete_files = |dir: &Path| {
  868. trace!("deleting files in dir {}", dir.display());
  869. for entry in fs::read_dir(dir).unwrap() {
  870. let entry = entry.unwrap();
  871. if entry.file_type().unwrap().is_dir() {
  872. continue;
  873. }
  874. let file_path = entry.path();
  875. trace!("\t\tdeleting file {}...", file_path.display());
  876. fs::remove_file(file_path).unwrap();
  877. }
  878. };
  879. let copy_files_with_remote = |from: &Path, to: &Path| {
  880. copy_files(from, to);
  881. let remote_from = snapshot_paths::build_snapshot_archives_remote_dir(from);
  882. let remote_to = snapshot_paths::build_snapshot_archives_remote_dir(to);
  883. let _ = fs::create_dir_all(&remote_from);
  884. let _ = fs::create_dir_all(&remote_to);
  885. copy_files(&remote_from, &remote_to);
  886. };
  887. let delete_files_with_remote = |from: &Path| {
  888. delete_files(from);
  889. let remote_dir = snapshot_paths::build_snapshot_archives_remote_dir(from);
  890. let _ = fs::create_dir_all(&remote_dir);
  891. delete_files(&remote_dir);
  892. };
  893. // After downloading the snapshots, copy them over to a backup directory. Later we'll need to
  894. // restart the node and guarantee that the only snapshots present are these initial ones. So,
  895. // the easiest way to do that is create a backup now, delete the ones on the node before
  896. // restart, then copy the backup ones over again.
  897. let backup_validator_full_snapshot_archives_dir = tempfile::tempdir_in(farf_dir()).unwrap();
  898. trace!(
  899. "Backing up validator full snapshots to dir: {}...",
  900. backup_validator_full_snapshot_archives_dir.path().display()
  901. );
  902. copy_files_with_remote(
  903. validator_snapshot_test_config
  904. .full_snapshot_archives_dir
  905. .path(),
  906. backup_validator_full_snapshot_archives_dir.path(),
  907. );
  908. let backup_validator_incremental_snapshot_archives_dir =
  909. tempfile::tempdir_in(farf_dir()).unwrap();
  910. trace!(
  911. "Backing up validator incremental snapshots to dir: {}...",
  912. backup_validator_incremental_snapshot_archives_dir
  913. .path()
  914. .display()
  915. );
  916. copy_files_with_remote(
  917. validator_snapshot_test_config
  918. .incremental_snapshot_archives_dir
  919. .path(),
  920. backup_validator_incremental_snapshot_archives_dir.path(),
  921. );
  922. info!("Starting the validator...");
  923. let validator_identity = Arc::new(Keypair::new());
  924. cluster.add_validator(
  925. &validator_snapshot_test_config.validator_config,
  926. stake,
  927. validator_identity.clone(),
  928. None,
  929. SocketAddrSpace::Unspecified,
  930. );
  931. info!("Starting the validator... DONE");
  932. // To ensure that a snapshot will be taken during startup, the blockstore needs to have roots
  933. // that cross a full snapshot interval.
  934. let starting_slot = incremental_snapshot_archive.slot();
  935. let next_full_snapshot_slot = starting_slot + full_snapshot_interval;
  936. info!(
  937. "Waiting for the validator to see enough slots to cross a full snapshot interval \
  938. ({next_full_snapshot_slot})..."
  939. );
  940. let timer = Instant::now();
  941. loop {
  942. let validator_current_slot = cluster
  943. .build_validator_tpu_quic_client(&validator_identity.pubkey())
  944. .unwrap()
  945. .rpc_client()
  946. .get_slot_with_commitment(CommitmentConfig::finalized())
  947. .unwrap();
  948. trace!("validator current slot: {validator_current_slot}");
  949. if validator_current_slot > next_full_snapshot_slot {
  950. break;
  951. }
  952. assert!(
  953. timer.elapsed() < Duration::from_secs(30),
  954. "It should not take longer than 30 seconds to cross the next full snapshot interval."
  955. );
  956. std::thread::yield_now();
  957. }
  958. info!(
  959. "Waited {:?} for the validator to see enough slots to cross a full snapshot interval... \
  960. DONE",
  961. timer.elapsed()
  962. );
  963. // Get the highest full snapshot archive info for the validator, now that it has crossed the
  964. // next full snapshot interval. We are going to use this to look up the same snapshot on the
  965. // leader, which we'll then use to compare to the full snapshot the validator will create
  966. // during startup. This ensures the snapshot creation process during startup is correct.
  967. //
  968. // Putting this all in its own block so its clear we're only intended to keep the leader's info
  969. let leader_full_snapshot_archive_for_comparison = {
  970. let validator_full_snapshot = snapshot_paths::get_highest_full_snapshot_archive_info(
  971. validator_snapshot_test_config
  972. .full_snapshot_archives_dir
  973. .path(),
  974. )
  975. .unwrap();
  976. // Now get the same full snapshot on the LEADER that we just got from the validator
  977. let mut leader_full_snapshots = snapshot_paths::get_full_snapshot_archives(
  978. leader_snapshot_test_config
  979. .full_snapshot_archives_dir
  980. .path(),
  981. );
  982. leader_full_snapshots.retain(|full_snapshot| {
  983. full_snapshot.slot() == validator_full_snapshot.slot()
  984. && full_snapshot.hash() == validator_full_snapshot.hash()
  985. });
  986. let leader_full_snapshot = leader_full_snapshots.first().unwrap();
  987. // And for sanity, the full snapshot from the leader and the validator MUST be the same
  988. assert_eq!(
  989. (
  990. validator_full_snapshot.slot(),
  991. validator_full_snapshot.hash()
  992. ),
  993. (leader_full_snapshot.slot(), leader_full_snapshot.hash())
  994. );
  995. leader_full_snapshot.clone()
  996. };
  997. info!(
  998. "leader full snapshot archive for comparison: \
  999. {leader_full_snapshot_archive_for_comparison:#?}"
  1000. );
  1001. // Stop the validator before we reset its snapshots
  1002. info!("Stopping the validator...");
  1003. let validator_info = cluster.exit_node(&validator_identity.pubkey());
  1004. info!("Stopping the validator... DONE");
  1005. info!("Delete all the snapshots on the validator and restore the originals from the backup...");
  1006. delete_files_with_remote(
  1007. validator_snapshot_test_config
  1008. .full_snapshot_archives_dir
  1009. .path(),
  1010. );
  1011. delete_files_with_remote(
  1012. validator_snapshot_test_config
  1013. .incremental_snapshot_archives_dir
  1014. .path(),
  1015. );
  1016. copy_files_with_remote(
  1017. backup_validator_full_snapshot_archives_dir.path(),
  1018. validator_snapshot_test_config
  1019. .full_snapshot_archives_dir
  1020. .path(),
  1021. );
  1022. copy_files_with_remote(
  1023. backup_validator_incremental_snapshot_archives_dir.path(),
  1024. validator_snapshot_test_config
  1025. .incremental_snapshot_archives_dir
  1026. .path(),
  1027. );
  1028. info!(
  1029. "Delete all the snapshots on the validator and restore the originals from the backup... \
  1030. DONE"
  1031. );
  1032. // Get the highest full snapshot slot *before* restarting, as a comparison
  1033. let validator_full_snapshot_slot_at_startup =
  1034. snapshot_paths::get_highest_full_snapshot_archive_slot(
  1035. validator_snapshot_test_config
  1036. .full_snapshot_archives_dir
  1037. .path(),
  1038. )
  1039. .unwrap();
  1040. info!(
  1041. "Restarting the validator with full snapshot {validator_full_snapshot_slot_at_startup}..."
  1042. );
  1043. cluster.restart_node(
  1044. &validator_identity.pubkey(),
  1045. validator_info,
  1046. SocketAddrSpace::Unspecified,
  1047. );
  1048. info!("Restarting the validator... DONE");
  1049. // Now, we want to ensure that the validator can make a new incremental snapshot based on the
  1050. // new full snapshot that was created during the restart.
  1051. info!("Waiting for the validator to make new snapshots...");
  1052. let validator_next_full_snapshot_slot =
  1053. validator_full_snapshot_slot_at_startup + full_snapshot_interval;
  1054. let validator_next_incremental_snapshot_slot =
  1055. validator_next_full_snapshot_slot + incremental_snapshot_interval;
  1056. info!("Waiting for validator next full snapshot slot: {validator_next_full_snapshot_slot}");
  1057. info!(
  1058. "Waiting for validator next incremental snapshot slot: \
  1059. {validator_next_incremental_snapshot_slot}"
  1060. );
  1061. let timer = Instant::now();
  1062. loop {
  1063. if let Some(full_snapshot_slot) = snapshot_paths::get_highest_full_snapshot_archive_slot(
  1064. validator_snapshot_test_config
  1065. .full_snapshot_archives_dir
  1066. .path(),
  1067. ) {
  1068. if full_snapshot_slot >= validator_next_full_snapshot_slot {
  1069. if let Some(incremental_snapshot_slot) =
  1070. snapshot_paths::get_highest_incremental_snapshot_archive_slot(
  1071. validator_snapshot_test_config
  1072. .incremental_snapshot_archives_dir
  1073. .path(),
  1074. full_snapshot_slot,
  1075. )
  1076. {
  1077. if incremental_snapshot_slot >= validator_next_incremental_snapshot_slot {
  1078. // specific incremental snapshot is not important, just that one was created
  1079. info!(
  1080. "Validator made new snapshots, full snapshot slot: \
  1081. {full_snapshot_slot}, incremental snapshot slot: \
  1082. {incremental_snapshot_slot}",
  1083. );
  1084. break;
  1085. }
  1086. }
  1087. }
  1088. }
  1089. assert!(
  1090. timer.elapsed() < Duration::from_secs(30),
  1091. "It should not take longer than 30 seconds to cross the next incremental snapshot \
  1092. interval."
  1093. );
  1094. std::thread::yield_now();
  1095. }
  1096. info!(
  1097. "Waited {:?} for the validator to make new snapshots... DONE",
  1098. timer.elapsed()
  1099. );
  1100. // Check to make sure that the full snapshot the validator created during startup is the same
  1101. // or one greater than the snapshot the leader created.
  1102. let validator_full_snapshot_archives = snapshot_paths::get_full_snapshot_archives(
  1103. validator_snapshot_test_config
  1104. .full_snapshot_archives_dir
  1105. .path(),
  1106. );
  1107. info!("validator full snapshot archives: {validator_full_snapshot_archives:#?}");
  1108. let validator_full_snapshot_archive_for_comparison = validator_full_snapshot_archives
  1109. .into_iter()
  1110. .find(|validator_full_snapshot_archive| {
  1111. validator_full_snapshot_archive.slot()
  1112. == leader_full_snapshot_archive_for_comparison.slot()
  1113. })
  1114. .expect("validator created an unexpected full snapshot");
  1115. info!(
  1116. "Validator full snapshot archive for comparison: \
  1117. {validator_full_snapshot_archive_for_comparison:#?}"
  1118. );
  1119. assert_eq!(
  1120. validator_full_snapshot_archive_for_comparison.hash(),
  1121. leader_full_snapshot_archive_for_comparison.hash(),
  1122. );
  1123. // And lastly, startup another node with the new snapshots to ensure they work
  1124. let final_validator_snapshot_test_config = SnapshotValidatorConfig::new(
  1125. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  1126. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  1127. num_account_paths,
  1128. );
  1129. // Copy over the snapshots to the new node that it will boot from
  1130. copy_files(
  1131. validator_snapshot_test_config
  1132. .full_snapshot_archives_dir
  1133. .path(),
  1134. final_validator_snapshot_test_config
  1135. .full_snapshot_archives_dir
  1136. .path(),
  1137. );
  1138. copy_files(
  1139. validator_snapshot_test_config
  1140. .incremental_snapshot_archives_dir
  1141. .path(),
  1142. final_validator_snapshot_test_config
  1143. .incremental_snapshot_archives_dir
  1144. .path(),
  1145. );
  1146. info!("Starting final validator...");
  1147. let final_validator_identity = Arc::new(Keypair::new());
  1148. cluster.add_validator(
  1149. &final_validator_snapshot_test_config.validator_config,
  1150. stake,
  1151. final_validator_identity,
  1152. None,
  1153. SocketAddrSpace::Unspecified,
  1154. );
  1155. info!("Starting final validator... DONE");
  1156. }
  1157. #[allow(unused_attributes)]
  1158. #[test]
  1159. #[serial]
  1160. fn test_snapshot_restart_tower() {
  1161. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1162. // First set up the cluster with 2 nodes
  1163. let snapshot_interval_slots = NonZeroU64::new(10).unwrap();
  1164. let num_account_paths = 2;
  1165. let leader_snapshot_test_config =
  1166. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1167. let validator_snapshot_test_config =
  1168. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1169. let mut config = ClusterConfig {
  1170. node_stakes: vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  1171. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  1172. validator_configs: vec![
  1173. safe_clone_config(&leader_snapshot_test_config.validator_config),
  1174. safe_clone_config(&validator_snapshot_test_config.validator_config),
  1175. ],
  1176. skip_warmup_slots: true,
  1177. ..ClusterConfig::default()
  1178. };
  1179. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1180. // Let the nodes run for a while, then stop one of the validators
  1181. sleep(Duration::from_millis(5000));
  1182. let all_pubkeys = cluster.get_node_pubkeys();
  1183. let validator_id = all_pubkeys
  1184. .into_iter()
  1185. .find(|x| x != cluster.entry_point_info.pubkey())
  1186. .unwrap();
  1187. let validator_info = cluster.exit_node(&validator_id);
  1188. let full_snapshot_archives_dir = &leader_snapshot_test_config
  1189. .validator_config
  1190. .snapshot_config
  1191. .full_snapshot_archives_dir;
  1192. let full_snapshot_archive_info = cluster.wait_for_next_full_snapshot(
  1193. full_snapshot_archives_dir,
  1194. Some(Duration::from_secs(5 * 60)),
  1195. );
  1196. // Copy archive to validator's snapshot output directory
  1197. let validator_archive_path = snapshot_paths::build_full_snapshot_archive_path(
  1198. validator_snapshot_test_config
  1199. .full_snapshot_archives_dir
  1200. .keep(),
  1201. full_snapshot_archive_info.slot(),
  1202. full_snapshot_archive_info.hash(),
  1203. full_snapshot_archive_info.archive_format(),
  1204. );
  1205. fs::hard_link(full_snapshot_archive_info.path(), validator_archive_path).unwrap();
  1206. // Restart validator from snapshot, the validator's tower state in this snapshot
  1207. // will contain slots < the root bank of the snapshot. Validator should not panic.
  1208. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1209. // Test cluster can still make progress and get confirmations in tower
  1210. // Use the restarted node as the discovery point so that we get updated
  1211. // validator's ContactInfo
  1212. let restarted_node_info = cluster.get_contact_info(&validator_id).unwrap();
  1213. cluster_tests::spend_and_verify_all_nodes(
  1214. restarted_node_info,
  1215. &cluster.funding_keypair,
  1216. 2,
  1217. HashSet::new(),
  1218. SocketAddrSpace::Unspecified,
  1219. &cluster.connection_cache,
  1220. );
  1221. }
  1222. #[test]
  1223. #[serial]
  1224. fn test_snapshots_blockstore_floor() {
  1225. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1226. // First set up the cluster with 1 snapshotting leader
  1227. let snapshot_interval_slots = NonZeroU64::new(100).unwrap();
  1228. let num_account_paths = 4;
  1229. let leader_snapshot_test_config =
  1230. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1231. let mut validator_snapshot_test_config =
  1232. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1233. let full_snapshot_archives_dir = &leader_snapshot_test_config
  1234. .validator_config
  1235. .snapshot_config
  1236. .full_snapshot_archives_dir;
  1237. let mut config = ClusterConfig {
  1238. node_stakes: vec![DEFAULT_NODE_STAKE],
  1239. validator_configs: make_identical_validator_configs(
  1240. &leader_snapshot_test_config.validator_config,
  1241. 1,
  1242. ),
  1243. ..ClusterConfig::default()
  1244. };
  1245. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1246. trace!("Waiting for snapshot tar to be generated with slot",);
  1247. let archive_info = loop {
  1248. let archive =
  1249. snapshot_paths::get_highest_full_snapshot_archive_info(full_snapshot_archives_dir);
  1250. if archive.is_some() {
  1251. trace!("snapshot exists");
  1252. break archive.unwrap();
  1253. }
  1254. sleep(Duration::from_millis(5000));
  1255. };
  1256. // Copy archive to validator's snapshot output directory
  1257. let validator_archive_path = snapshot_paths::build_full_snapshot_archive_path(
  1258. validator_snapshot_test_config
  1259. .full_snapshot_archives_dir
  1260. .keep(),
  1261. archive_info.slot(),
  1262. archive_info.hash(),
  1263. validator_snapshot_test_config
  1264. .validator_config
  1265. .snapshot_config
  1266. .archive_format,
  1267. );
  1268. fs::hard_link(archive_info.path(), validator_archive_path).unwrap();
  1269. let slot_floor = archive_info.slot();
  1270. // Start up a new node from a snapshot
  1271. let cluster_nodes = discover_validators(
  1272. &cluster.entry_point_info.gossip().unwrap(),
  1273. 1,
  1274. cluster.entry_point_info.shred_version(),
  1275. SocketAddrSpace::Unspecified,
  1276. )
  1277. .unwrap();
  1278. let mut known_validators = HashSet::new();
  1279. known_validators.insert(*cluster_nodes[0].pubkey());
  1280. validator_snapshot_test_config
  1281. .validator_config
  1282. .known_validators = Some(known_validators);
  1283. cluster.add_validator(
  1284. &validator_snapshot_test_config.validator_config,
  1285. DEFAULT_NODE_STAKE,
  1286. Arc::new(Keypair::new()),
  1287. None,
  1288. SocketAddrSpace::Unspecified,
  1289. );
  1290. let all_pubkeys = cluster.get_node_pubkeys();
  1291. let validator_id = all_pubkeys
  1292. .into_iter()
  1293. .find(|x| x != cluster.entry_point_info.pubkey())
  1294. .unwrap();
  1295. let validator_client = cluster
  1296. .build_validator_tpu_quic_client(&validator_id)
  1297. .unwrap();
  1298. let mut current_slot = 0;
  1299. // Let this validator run a while with repair
  1300. let target_slot = slot_floor + 40;
  1301. while current_slot <= target_slot {
  1302. trace!("current_slot: {current_slot}");
  1303. if let Ok(slot) = validator_client
  1304. .rpc_client()
  1305. .get_slot_with_commitment(CommitmentConfig::processed())
  1306. {
  1307. current_slot = slot;
  1308. } else {
  1309. continue;
  1310. }
  1311. sleep(Duration::from_secs(1));
  1312. }
  1313. // Check the validator ledger doesn't contain any slots < slot_floor
  1314. cluster.close_preserve_ledgers();
  1315. let validator_ledger_path = &cluster.validators[&validator_id];
  1316. let blockstore = Blockstore::open(&validator_ledger_path.info.ledger_path).unwrap();
  1317. // Skip the zeroth slot in blockstore that the ledger is initialized with
  1318. let (first_slot, _) = blockstore.slot_meta_iterator(1).unwrap().next().unwrap();
  1319. assert_eq!(first_slot, slot_floor);
  1320. }
  1321. #[test]
  1322. #[serial]
  1323. fn test_snapshots_restart_validity() {
  1324. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1325. let snapshot_interval_slots = NonZeroU64::new(100).unwrap();
  1326. let num_account_paths = 1;
  1327. let mut snapshot_test_config =
  1328. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1329. let full_snapshot_archives_dir = &snapshot_test_config
  1330. .validator_config
  1331. .snapshot_config
  1332. .full_snapshot_archives_dir;
  1333. // Set up the cluster with 1 snapshotting validator
  1334. let mut all_account_storage_dirs = vec![std::mem::take(
  1335. &mut snapshot_test_config.account_storage_dirs,
  1336. )];
  1337. let mut config = ClusterConfig {
  1338. node_stakes: vec![DEFAULT_NODE_STAKE],
  1339. validator_configs: make_identical_validator_configs(
  1340. &snapshot_test_config.validator_config,
  1341. 1,
  1342. ),
  1343. ..ClusterConfig::default()
  1344. };
  1345. // Create and reboot the node from snapshot `num_runs` times
  1346. let num_runs = 3;
  1347. let mut expected_balances = HashMap::new();
  1348. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1349. for i in 1..num_runs {
  1350. info!("run {i}");
  1351. // Push transactions to one of the nodes and confirm that transactions were
  1352. // forwarded to and processed.
  1353. trace!("Sending transactions");
  1354. let new_balances = cluster_tests::send_many_transactions(
  1355. &cluster.entry_point_info,
  1356. &cluster.funding_keypair,
  1357. &cluster.connection_cache,
  1358. 10,
  1359. 10,
  1360. );
  1361. expected_balances.extend(new_balances);
  1362. cluster.wait_for_next_full_snapshot(
  1363. full_snapshot_archives_dir,
  1364. Some(Duration::from_secs(5 * 60)),
  1365. );
  1366. // Create new account paths since validator exit is not guaranteed to cleanup RPC threads,
  1367. // which may delete the old accounts on exit at any point
  1368. let (new_account_storage_dirs, new_account_storage_paths) =
  1369. generate_account_paths(num_account_paths);
  1370. all_account_storage_dirs.push(new_account_storage_dirs);
  1371. snapshot_test_config.validator_config.account_paths = new_account_storage_paths;
  1372. // Restart node
  1373. trace!("Restarting cluster from snapshot");
  1374. let nodes = cluster.get_node_pubkeys();
  1375. cluster.exit_restart_node(
  1376. &nodes[0],
  1377. safe_clone_config(&snapshot_test_config.validator_config),
  1378. SocketAddrSpace::Unspecified,
  1379. );
  1380. // Verify account balances on validator
  1381. trace!("Verifying balances");
  1382. cluster_tests::verify_balances(
  1383. expected_balances.clone(),
  1384. &cluster.entry_point_info,
  1385. cluster.connection_cache.clone(),
  1386. );
  1387. // Check that we can still push transactions
  1388. trace!("Spending and verifying");
  1389. cluster_tests::spend_and_verify_all_nodes(
  1390. &cluster.entry_point_info,
  1391. &cluster.funding_keypair,
  1392. 1,
  1393. HashSet::new(),
  1394. SocketAddrSpace::Unspecified,
  1395. &cluster.connection_cache,
  1396. );
  1397. }
  1398. }
  1399. #[test]
  1400. #[serial]
  1401. #[allow(unused_attributes)]
  1402. #[ignore]
  1403. fn test_fail_entry_verification_leader() {
  1404. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1405. let leader_stake = (DUPLICATE_THRESHOLD * 100.0) as u64 + 1;
  1406. let validator_stake1 = (100 - leader_stake) / 2;
  1407. let validator_stake2 = 100 - leader_stake - validator_stake1;
  1408. let (cluster, _) = test_faulty_node(
  1409. BroadcastStageType::FailEntryVerification,
  1410. vec![leader_stake, validator_stake1, validator_stake2],
  1411. None,
  1412. None,
  1413. );
  1414. cluster.check_for_new_roots(
  1415. 16,
  1416. "test_fail_entry_verification_leader",
  1417. SocketAddrSpace::Unspecified,
  1418. );
  1419. }
  1420. #[test]
  1421. #[serial]
  1422. #[ignore]
  1423. #[allow(unused_attributes)]
  1424. fn test_fake_shreds_broadcast_leader() {
  1425. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1426. let node_stakes = vec![300, 100];
  1427. let (cluster, _) = test_faulty_node(
  1428. BroadcastStageType::BroadcastFakeShreds,
  1429. node_stakes,
  1430. None,
  1431. None,
  1432. );
  1433. cluster.check_for_new_roots(
  1434. 16,
  1435. "test_fake_shreds_broadcast_leader",
  1436. SocketAddrSpace::Unspecified,
  1437. );
  1438. }
  1439. #[test]
  1440. #[serial]
  1441. fn test_wait_for_max_stake() {
  1442. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1443. let validator_config = ValidatorConfig::default_for_test();
  1444. let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH;
  1445. // Set this large enough to allow for skipped slots but still be able to
  1446. // make a root and derive the new leader schedule in time.
  1447. let stakers_slot_offset = slots_per_epoch.saturating_mul(MAX_LEADER_SCHEDULE_EPOCH_OFFSET);
  1448. // Reduce this so that we can complete the test faster by advancing through
  1449. // slots/epochs faster. But don't make it too small because it can cause the
  1450. // test to fail in two important ways:
  1451. // 1. Increase likelihood of skipped slots, which can prevent rooting and
  1452. // lead to not generating leader schedule in time and cluster getting
  1453. // stuck.
  1454. // 2. Make the cluster advance through too many epochs before all the
  1455. // validators spin up, which can lead to not properly observing gossip
  1456. // votes, not repairing missing slots, and some subset of nodes getting
  1457. // stuck.
  1458. let ticks_per_slot = 32;
  1459. let num_validators = 4;
  1460. let mut config = ClusterConfig {
  1461. node_stakes: vec![DEFAULT_NODE_STAKE; num_validators],
  1462. validator_configs: make_identical_validator_configs(&validator_config, num_validators),
  1463. slots_per_epoch,
  1464. stakers_slot_offset,
  1465. ticks_per_slot,
  1466. ..ClusterConfig::default()
  1467. };
  1468. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1469. let client = RpcClient::new_socket(cluster.entry_point_info.rpc().unwrap());
  1470. let num_validators_activating_stake = num_validators - 1;
  1471. // Number of epochs it is expected to take to completely activate the stake
  1472. // for all the validators.
  1473. let num_expected_epochs = (num_validators_activating_stake as f64)
  1474. .log(1. + NEW_WARMUP_COOLDOWN_RATE)
  1475. .ceil() as u32
  1476. + 1;
  1477. let expected_test_duration = config.poh_config.target_tick_duration
  1478. * ticks_per_slot as u32
  1479. * slots_per_epoch as u32
  1480. * num_expected_epochs;
  1481. // Make the timeout double the expected duration to provide some margin.
  1482. // Especially considering tests may be running in parallel.
  1483. let timeout = expected_test_duration * 2;
  1484. if let Err(err) = client.wait_for_max_stake_below_threshold_with_timeout(
  1485. CommitmentConfig::default(),
  1486. (100 / num_validators_activating_stake) as f32,
  1487. timeout,
  1488. ) {
  1489. panic!("wait_for_max_stake failed: {err:?}");
  1490. }
  1491. assert!(client.get_slot().unwrap() > 10);
  1492. }
  1493. #[test]
  1494. #[serial]
  1495. // Test that when a leader is leader for banks B_i..B_{i+n}, and B_i is not
  1496. // votable, then B_{i+1} still chains to B_i
  1497. fn test_no_voting() {
  1498. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1499. let validator_config = ValidatorConfig {
  1500. voting_disabled: true,
  1501. ..ValidatorConfig::default_for_test()
  1502. };
  1503. let mut config = ClusterConfig {
  1504. node_stakes: vec![DEFAULT_NODE_STAKE],
  1505. validator_configs: vec![validator_config],
  1506. ..ClusterConfig::default()
  1507. };
  1508. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1509. let client = cluster
  1510. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  1511. .unwrap();
  1512. loop {
  1513. let last_slot = client
  1514. .rpc_client()
  1515. .get_slot_with_commitment(CommitmentConfig::processed())
  1516. .expect("Couldn't get slot");
  1517. if last_slot > 4 * VOTE_THRESHOLD_DEPTH as u64 {
  1518. break;
  1519. }
  1520. sleep(Duration::from_secs(1));
  1521. }
  1522. cluster.close_preserve_ledgers();
  1523. let leader_pubkey = *cluster.entry_point_info.pubkey();
  1524. let ledger_path = cluster.validators[&leader_pubkey].info.ledger_path.clone();
  1525. let ledger = Blockstore::open(&ledger_path).unwrap();
  1526. for i in 0..2 * VOTE_THRESHOLD_DEPTH {
  1527. let meta = ledger.meta(i as u64).unwrap().unwrap();
  1528. let parent = meta.parent_slot;
  1529. let expected_parent = i.saturating_sub(1);
  1530. assert_eq!(parent, Some(expected_parent as u64));
  1531. }
  1532. }
  1533. #[test]
  1534. #[serial]
  1535. fn test_optimistic_confirmation_violation_detection() {
  1536. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1537. // First set up the cluster with 2 nodes
  1538. let slots_per_epoch = 2048;
  1539. let node_stakes = vec![50 * DEFAULT_NODE_STAKE, 51 * DEFAULT_NODE_STAKE];
  1540. let validator_keys: Vec<_> = [
  1541. "4qhhXNTbKD1a5vxDDLZcHKj7ELNeiivtUBxn3wUK1F5VRsQVP89VUhfXqSfgiFB14GfuBgtrQ96n9NvWQADVkcCg",
  1542. "3kHBzVwie5vTEaY6nFCPeFT8qDpoXzn7dCEioGRNBTnUDpvwnG85w8Wq63gVWpVTP8k2a8cgcWRjSXyUkEygpXWS",
  1543. ]
  1544. .iter()
  1545. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  1546. .take(node_stakes.len())
  1547. .collect();
  1548. // Do not restart the validator which is the cluster entrypoint because its gossip port
  1549. // might be changed after restart resulting in the two nodes not being able to
  1550. // to form a cluster. The heavier validator is the second node.
  1551. let node_to_restart = validator_keys[1].0.pubkey();
  1552. // WFSM as we require a OC slot > 50 within 100 seconds
  1553. let mut validator_config = ValidatorConfig::default_for_test();
  1554. validator_config.wait_for_supermajority = Some(0);
  1555. let mut config = ClusterConfig {
  1556. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  1557. node_stakes: node_stakes.clone(),
  1558. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  1559. validator_keys: Some(validator_keys),
  1560. slots_per_epoch,
  1561. stakers_slot_offset: slots_per_epoch,
  1562. skip_warmup_slots: true,
  1563. ..ClusterConfig::default()
  1564. };
  1565. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1566. // Let the nodes run for a while. Wait for validators to vote on slot `S`
  1567. // so that the vote on `S-1` is definitely in gossip and optimistic confirmation is
  1568. // detected on slot `S-1` for sure, then stop the heavier of the two
  1569. // validators
  1570. let client = cluster
  1571. .build_validator_tpu_quic_client(&node_to_restart)
  1572. .unwrap();
  1573. let start = Instant::now();
  1574. let target_slot = 50;
  1575. let max_wait_time_seconds = 100;
  1576. let mut optimistically_confirmed_slot;
  1577. loop {
  1578. optimistically_confirmed_slot = client
  1579. .rpc_client()
  1580. .get_slot_with_commitment(CommitmentConfig::confirmed())
  1581. .unwrap();
  1582. if optimistically_confirmed_slot > target_slot {
  1583. break;
  1584. }
  1585. if start.elapsed() > Duration::from_secs(max_wait_time_seconds) {
  1586. cluster.exit();
  1587. panic!(
  1588. "Didn't get optimistcally confirmed slot > {target_slot} within \
  1589. {max_wait_time_seconds} seconds"
  1590. );
  1591. }
  1592. sleep(Duration::from_millis(100));
  1593. }
  1594. info!("exiting node");
  1595. drop(client);
  1596. let mut exited_validator_info = cluster.exit_node(&node_to_restart);
  1597. info!("exiting node success");
  1598. // Mark fork as dead on the heavier validator, this should make the fork effectively
  1599. // dead, even though it was optimistically confirmed. The smaller validator should
  1600. // create and jump over to a new fork
  1601. // Also, remove saved tower to intentionally make the restarted validator to violate the
  1602. // optimistic confirmation
  1603. let optimistically_confirmed_slot_parent = {
  1604. let tower = restore_tower(
  1605. &exited_validator_info.info.ledger_path,
  1606. &exited_validator_info.info.keypair.pubkey(),
  1607. )
  1608. .unwrap();
  1609. // Vote must exist since we waited for OC and so this node must have voted
  1610. let last_voted_slot = tower.last_voted_slot().expect("vote must exist");
  1611. let blockstore = open_blockstore(&exited_validator_info.info.ledger_path);
  1612. // The last vote must be descended from the OC slot
  1613. assert!(
  1614. AncestorIterator::new_inclusive(last_voted_slot, &blockstore)
  1615. .contains(&optimistically_confirmed_slot)
  1616. );
  1617. info!(
  1618. "Setting slot: {optimistically_confirmed_slot} on main fork as dead, should cause fork"
  1619. );
  1620. // Necessary otherwise tower will inform this validator that it's latest
  1621. // vote is on slot `optimistically_confirmed_slot`. This will then prevent this validator
  1622. // from resetting to the parent of `optimistically_confirmed_slot` to create an alternative fork because
  1623. // 1) Validator can't vote on earlier ancestor of last vote due to switch threshold (can't vote
  1624. // on ancestors of last vote)
  1625. // 2) Won't reset to this earlier ancestor because reset can only happen on same voted fork if
  1626. // it's for the last vote slot or later
  1627. remove_tower(&exited_validator_info.info.ledger_path, &node_to_restart);
  1628. blockstore
  1629. .set_dead_slot(optimistically_confirmed_slot)
  1630. .unwrap();
  1631. blockstore
  1632. .meta(optimistically_confirmed_slot)
  1633. .unwrap()
  1634. .unwrap()
  1635. .parent_slot
  1636. .unwrap()
  1637. };
  1638. {
  1639. // Buffer stderr to detect optimistic slot violation log
  1640. let buf = std::env::var("OPTIMISTIC_CONF_TEST_DUMP_LOG")
  1641. .err()
  1642. .map(|_| BufferRedirect::stderr().unwrap());
  1643. // In order to prevent the node from voting on a slot it's already voted on
  1644. // which can potentially cause a panic in gossip, start up the validator as a
  1645. // non voter and wait for it to make a new block
  1646. exited_validator_info.config.voting_disabled = true;
  1647. cluster.restart_node(
  1648. &node_to_restart,
  1649. exited_validator_info,
  1650. SocketAddrSpace::Unspecified,
  1651. );
  1652. // Wait for this node to make a fork that doesn't include the `optimistically_confirmed_slot``
  1653. info!(
  1654. "Looking for slot not equal to {optimistically_confirmed_slot} with parent \
  1655. {optimistically_confirmed_slot_parent}"
  1656. );
  1657. let start = Instant::now();
  1658. let new_fork_slot;
  1659. 'outer: loop {
  1660. sleep(Duration::from_millis(1000));
  1661. let ledger_path = cluster.ledger_path(&node_to_restart);
  1662. let blockstore = open_blockstore(&ledger_path);
  1663. let potential_new_forks = blockstore
  1664. .meta(optimistically_confirmed_slot_parent)
  1665. .unwrap()
  1666. .unwrap()
  1667. .next_slots;
  1668. for slot in potential_new_forks {
  1669. // Wait for a fork to be created that does not include the OC slot
  1670. // Now on restart the validator should only vote for this new`slot` which they have
  1671. // never voted on before and thus avoids the panic in gossip
  1672. if slot > optimistically_confirmed_slot && blockstore.is_full(slot) {
  1673. new_fork_slot = slot;
  1674. break 'outer;
  1675. }
  1676. }
  1677. if start.elapsed() > Duration::from_secs(max_wait_time_seconds) {
  1678. cluster.exit();
  1679. panic!("Didn't get new fork within {max_wait_time_seconds} seconds");
  1680. }
  1681. }
  1682. // Exit again, restart with voting enabled
  1683. let mut exited_validator_info = cluster.exit_node(&node_to_restart);
  1684. exited_validator_info.config.voting_disabled = false;
  1685. cluster.restart_node(
  1686. &node_to_restart,
  1687. exited_validator_info,
  1688. SocketAddrSpace::Unspecified,
  1689. );
  1690. // Wait for a root descended from `new_fork_slot` to be set.
  1691. let client = cluster
  1692. .build_validator_tpu_quic_client(&node_to_restart)
  1693. .unwrap();
  1694. info!("looking for root > {optimistically_confirmed_slot} on new fork {new_fork_slot}");
  1695. let start = Instant::now();
  1696. loop {
  1697. info!("Client connecting to: {}", client.rpc_client().url());
  1698. let last_root = client
  1699. .rpc_client()
  1700. .get_slot_with_commitment(CommitmentConfig::finalized())
  1701. .unwrap();
  1702. if last_root > new_fork_slot {
  1703. info!("Found root: {last_root} > {new_fork_slot}");
  1704. let ledger_path = cluster.ledger_path(&node_to_restart);
  1705. let blockstore = open_blockstore(&ledger_path);
  1706. if AncestorIterator::new_inclusive(last_root, &blockstore).contains(&new_fork_slot)
  1707. {
  1708. break;
  1709. }
  1710. }
  1711. if start.elapsed() > Duration::from_secs(max_wait_time_seconds) {
  1712. cluster.exit();
  1713. panic!("Didn't get root on new fork within {max_wait_time_seconds} seconds");
  1714. }
  1715. sleep(Duration::from_millis(100));
  1716. }
  1717. // Check to see that validator detected optimistic confirmation for
  1718. // `last_voted_slot` failed
  1719. let expected_log =
  1720. OptimisticConfirmationVerifier::format_optimistic_confirmed_slot_violation_log(
  1721. optimistically_confirmed_slot,
  1722. );
  1723. // Violation detection thread can be behind so poll logs up to 10 seconds
  1724. if let Some(mut buf) = buf {
  1725. let start = Instant::now();
  1726. let mut success = false;
  1727. let mut output = String::new();
  1728. while start.elapsed().as_secs() < 10 {
  1729. buf.read_to_string(&mut output).unwrap();
  1730. if output.contains(&expected_log) {
  1731. success = true;
  1732. break;
  1733. }
  1734. sleep(Duration::from_millis(10));
  1735. }
  1736. print!("{output}");
  1737. assert!(success);
  1738. } else {
  1739. panic!("dumped log and disabled testing");
  1740. }
  1741. }
  1742. // Make sure validator still makes progress
  1743. cluster_tests::check_for_new_roots(
  1744. 16,
  1745. &[cluster.get_contact_info(&node_to_restart).unwrap().clone()],
  1746. &cluster.connection_cache,
  1747. "test_optimistic_confirmation_violation",
  1748. );
  1749. }
  1750. #[test]
  1751. #[serial]
  1752. fn test_validator_saves_tower() {
  1753. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1754. let validator_config = ValidatorConfig {
  1755. require_tower: true,
  1756. ..ValidatorConfig::default_for_test()
  1757. };
  1758. let validator_identity_keypair = Arc::new(Keypair::new());
  1759. let validator_id = validator_identity_keypair.pubkey();
  1760. let mut config = ClusterConfig {
  1761. node_stakes: vec![DEFAULT_NODE_STAKE],
  1762. validator_configs: vec![validator_config],
  1763. validator_keys: Some(vec![(validator_identity_keypair.clone(), true)]),
  1764. ..ClusterConfig::default()
  1765. };
  1766. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1767. let validator_client = cluster
  1768. .build_validator_tpu_quic_client(&validator_id)
  1769. .unwrap();
  1770. let ledger_path = cluster
  1771. .validators
  1772. .get(&validator_id)
  1773. .unwrap()
  1774. .info
  1775. .ledger_path
  1776. .clone();
  1777. let file_tower_storage = FileTowerStorage::new(ledger_path.clone());
  1778. // Wait for some votes to be generated
  1779. loop {
  1780. if let Ok(slot) = validator_client
  1781. .rpc_client()
  1782. .get_slot_with_commitment(CommitmentConfig::processed())
  1783. {
  1784. trace!("current slot: {slot}");
  1785. if slot > 2 {
  1786. break;
  1787. }
  1788. }
  1789. sleep(Duration::from_millis(10));
  1790. }
  1791. // Stop validator and check saved tower
  1792. let validator_info = cluster.exit_node(&validator_id);
  1793. let tower1 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1794. trace!("tower1: {tower1:?}");
  1795. assert_eq!(tower1.root(), 0);
  1796. assert!(tower1.last_voted_slot().is_some());
  1797. // Restart the validator and wait for a new root
  1798. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1799. let validator_client = cluster
  1800. .build_validator_tpu_quic_client(&validator_id)
  1801. .unwrap();
  1802. // Wait for the first new root
  1803. let last_replayed_root = loop {
  1804. if let Ok(root) = validator_client
  1805. .rpc_client()
  1806. .get_slot_with_commitment(CommitmentConfig::finalized())
  1807. {
  1808. trace!("current root: {root}");
  1809. if root > 0 {
  1810. break root;
  1811. }
  1812. }
  1813. sleep(Duration::from_millis(50));
  1814. };
  1815. // Stop validator, and check saved tower
  1816. let validator_info = cluster.exit_node(&validator_id);
  1817. let tower2 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1818. trace!("tower2: {tower2:?}");
  1819. assert_eq!(tower2.root(), last_replayed_root);
  1820. // Rollback saved tower to `tower1` to simulate a validator starting from a newer snapshot
  1821. // without having to wait for that snapshot to be generated in this test
  1822. tower1
  1823. .save(&file_tower_storage, &validator_identity_keypair)
  1824. .unwrap();
  1825. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1826. let validator_client = cluster
  1827. .build_validator_tpu_quic_client(&validator_id)
  1828. .unwrap();
  1829. // Wait for a new root, demonstrating the validator was able to make progress from the older `tower1`
  1830. let new_root = loop {
  1831. if let Ok(root) = validator_client
  1832. .rpc_client()
  1833. .get_slot_with_commitment(CommitmentConfig::finalized())
  1834. {
  1835. trace!("current root: {root}, last_replayed_root: {last_replayed_root}");
  1836. if root > last_replayed_root {
  1837. break root;
  1838. }
  1839. }
  1840. sleep(Duration::from_millis(50));
  1841. };
  1842. // Check the new root is reflected in the saved tower state
  1843. let mut validator_info = cluster.exit_node(&validator_id);
  1844. let tower3 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1845. trace!("tower3: {tower3:?}");
  1846. let tower3_root = tower3.root();
  1847. assert!(tower3_root >= new_root);
  1848. // Remove the tower file entirely and allow the validator to start without a tower. It will
  1849. // rebuild tower from its vote account contents
  1850. remove_tower(&ledger_path, &validator_id);
  1851. validator_info.config.require_tower = false;
  1852. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1853. let validator_client = cluster
  1854. .build_validator_tpu_quic_client(&validator_id)
  1855. .unwrap();
  1856. // Wait for another new root
  1857. let new_root = loop {
  1858. if let Ok(root) = validator_client
  1859. .rpc_client()
  1860. .get_slot_with_commitment(CommitmentConfig::finalized())
  1861. {
  1862. trace!("current root: {root}, last tower root: {tower3_root}");
  1863. if root > tower3_root {
  1864. break root;
  1865. }
  1866. }
  1867. sleep(Duration::from_millis(50));
  1868. };
  1869. cluster.close_preserve_ledgers();
  1870. let tower4 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1871. trace!("tower4: {tower4:?}");
  1872. assert!(tower4.root() >= new_root);
  1873. }
  1874. fn root_in_tower(tower_path: &Path, node_pubkey: &Pubkey) -> Option<Slot> {
  1875. restore_tower(tower_path, node_pubkey).map(|tower| tower.root())
  1876. }
  1877. enum ClusterMode {
  1878. MasterOnly,
  1879. MasterSlave,
  1880. }
  1881. fn do_test_future_tower(cluster_mode: ClusterMode) {
  1882. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1883. // First set up the cluster with 4 nodes
  1884. let slots_per_epoch = 2048;
  1885. let node_stakes = match cluster_mode {
  1886. ClusterMode::MasterOnly => vec![DEFAULT_NODE_STAKE],
  1887. ClusterMode::MasterSlave => vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  1888. };
  1889. let validator_keys = [
  1890. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  1891. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  1892. ]
  1893. .iter()
  1894. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  1895. .take(node_stakes.len())
  1896. .collect::<Vec<_>>();
  1897. let validators = validator_keys
  1898. .iter()
  1899. .map(|(kp, _)| kp.pubkey())
  1900. .collect::<Vec<_>>();
  1901. let validator_a_pubkey = match cluster_mode {
  1902. ClusterMode::MasterOnly => validators[0],
  1903. ClusterMode::MasterSlave => validators[1],
  1904. };
  1905. let mut validator_config = ValidatorConfig::default_for_test();
  1906. validator_config.wait_for_supermajority = Some(0);
  1907. let mut config = ClusterConfig {
  1908. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  1909. node_stakes: node_stakes.clone(),
  1910. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  1911. validator_keys: Some(validator_keys),
  1912. slots_per_epoch,
  1913. stakers_slot_offset: slots_per_epoch,
  1914. skip_warmup_slots: true,
  1915. ..ClusterConfig::default()
  1916. };
  1917. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1918. let val_a_ledger_path = cluster.ledger_path(&validator_a_pubkey);
  1919. loop {
  1920. sleep(Duration::from_millis(100));
  1921. if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  1922. if root >= 15 {
  1923. break;
  1924. }
  1925. }
  1926. }
  1927. let purged_slot_before_restart = 10;
  1928. let validator_a_info = cluster.exit_node(&validator_a_pubkey);
  1929. {
  1930. // create a warped future tower without mangling the tower itself
  1931. info!(
  1932. "Revert blockstore before slot {purged_slot_before_restart} and effectively create a \
  1933. future tower",
  1934. );
  1935. let blockstore = open_blockstore(&val_a_ledger_path);
  1936. purge_slots_with_count(&blockstore, purged_slot_before_restart, 100);
  1937. }
  1938. cluster.restart_node(
  1939. &validator_a_pubkey,
  1940. validator_a_info,
  1941. SocketAddrSpace::Unspecified,
  1942. );
  1943. let mut newly_rooted = false;
  1944. let some_root_after_restart = purged_slot_before_restart + 25; // 25 is arbitrary; just wait a bit
  1945. for _ in 0..600 {
  1946. sleep(Duration::from_millis(100));
  1947. if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  1948. if root >= some_root_after_restart {
  1949. newly_rooted = true;
  1950. break;
  1951. }
  1952. }
  1953. }
  1954. let _validator_a_info = cluster.exit_node(&validator_a_pubkey);
  1955. if newly_rooted {
  1956. // there should be no forks; i.e. monotonically increasing ancestor chain
  1957. let (last_vote, _) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  1958. let blockstore = open_blockstore(&val_a_ledger_path);
  1959. let actual_block_ancestors = AncestorIterator::new_inclusive(last_vote, &blockstore)
  1960. .take_while(|a| *a >= some_root_after_restart)
  1961. .collect::<Vec<_>>();
  1962. let expected_countinuous_no_fork_votes = (some_root_after_restart..=last_vote)
  1963. .rev()
  1964. .collect::<Vec<_>>();
  1965. assert_eq!(actual_block_ancestors, expected_countinuous_no_fork_votes);
  1966. assert!(actual_block_ancestors.len() > MAX_LOCKOUT_HISTORY);
  1967. info!("validator managed to handle future tower!");
  1968. } else {
  1969. panic!("no root detected");
  1970. }
  1971. }
  1972. #[test]
  1973. #[serial]
  1974. fn test_future_tower_master_only() {
  1975. do_test_future_tower(ClusterMode::MasterOnly);
  1976. }
  1977. #[test]
  1978. #[serial]
  1979. fn test_future_tower_master_slave() {
  1980. do_test_future_tower(ClusterMode::MasterSlave);
  1981. }
  1982. fn restart_whole_cluster_after_hard_fork(
  1983. cluster: &Arc<Mutex<LocalCluster>>,
  1984. validator_a_pubkey: Pubkey,
  1985. validator_b_pubkey: Pubkey,
  1986. mut validator_a_info: ClusterValidatorInfo,
  1987. validator_b_info: ClusterValidatorInfo,
  1988. ) {
  1989. // restart validator A first
  1990. let cluster_for_a = cluster.clone();
  1991. let val_a_ledger_path = validator_a_info.info.ledger_path.clone();
  1992. // Spawn a thread because wait_for_supermajority blocks in Validator::new()!
  1993. let thread = std::thread::spawn(move || {
  1994. let restart_context = cluster_for_a
  1995. .lock()
  1996. .unwrap()
  1997. .create_restart_context(&validator_a_pubkey, &mut validator_a_info);
  1998. let restarted_validator_info = LocalCluster::restart_node_with_context(
  1999. validator_a_info,
  2000. restart_context,
  2001. SocketAddrSpace::Unspecified,
  2002. );
  2003. cluster_for_a
  2004. .lock()
  2005. .unwrap()
  2006. .add_node(&validator_a_pubkey, restarted_validator_info);
  2007. });
  2008. // test validator A actually to wait for supermajority
  2009. let mut last_vote = None;
  2010. for _ in 0..10 {
  2011. sleep(Duration::from_millis(1000));
  2012. let (new_last_vote, _) =
  2013. last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  2014. if let Some(last_vote) = last_vote {
  2015. assert_eq!(last_vote, new_last_vote);
  2016. } else {
  2017. last_vote = Some(new_last_vote);
  2018. }
  2019. }
  2020. // restart validator B normally
  2021. cluster.lock().unwrap().restart_node(
  2022. &validator_b_pubkey,
  2023. validator_b_info,
  2024. SocketAddrSpace::Unspecified,
  2025. );
  2026. // validator A should now start so join its thread here
  2027. thread.join().unwrap();
  2028. }
  2029. #[test]
  2030. #[serial]
  2031. fn test_hard_fork_invalidates_tower() {
  2032. agave_logger::setup_with_default(RUST_LOG_FILTER);
  2033. // First set up the cluster with 2 nodes
  2034. let slots_per_epoch = 2048;
  2035. let node_stakes = vec![60 * DEFAULT_NODE_STAKE, 40 * DEFAULT_NODE_STAKE];
  2036. let validator_keys = [
  2037. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2038. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2039. ]
  2040. .iter()
  2041. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2042. .take(node_stakes.len())
  2043. .collect::<Vec<_>>();
  2044. let validators = validator_keys
  2045. .iter()
  2046. .map(|(kp, _)| kp.pubkey())
  2047. .collect::<Vec<_>>();
  2048. let validator_a_pubkey = validators[0];
  2049. let validator_b_pubkey = validators[1];
  2050. let mut config = ClusterConfig {
  2051. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  2052. node_stakes: node_stakes.clone(),
  2053. validator_configs: make_identical_validator_configs(
  2054. &ValidatorConfig::default_for_test(),
  2055. node_stakes.len(),
  2056. ),
  2057. validator_keys: Some(validator_keys),
  2058. slots_per_epoch,
  2059. stakers_slot_offset: slots_per_epoch,
  2060. skip_warmup_slots: true,
  2061. ..ClusterConfig::default()
  2062. };
  2063. let cluster = std::sync::Arc::new(std::sync::Mutex::new(LocalCluster::new(
  2064. &mut config,
  2065. SocketAddrSpace::Unspecified,
  2066. )));
  2067. let val_a_ledger_path = cluster.lock().unwrap().ledger_path(&validator_a_pubkey);
  2068. let min_root = 15;
  2069. loop {
  2070. sleep(Duration::from_millis(100));
  2071. if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  2072. if root >= min_root {
  2073. break;
  2074. }
  2075. }
  2076. }
  2077. let mut validator_a_info = cluster.lock().unwrap().exit_node(&validator_a_pubkey);
  2078. let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey);
  2079. // setup hard fork at slot < a previously rooted slot!
  2080. // hard fork earlier than root is very unrealistic in the wild, but it's handy for
  2081. // persistent tower's lockout behavior...
  2082. let hard_fork_slot = min_root - 5;
  2083. let hard_fork_slots = Some(vec![hard_fork_slot]);
  2084. let mut hard_forks = solana_hard_forks::HardForks::default();
  2085. hard_forks.register(hard_fork_slot);
  2086. let expected_shred_version = solana_shred_version::compute_shred_version(
  2087. &cluster.lock().unwrap().genesis_config.hash(),
  2088. Some(&hard_forks),
  2089. );
  2090. cluster
  2091. .lock()
  2092. .unwrap()
  2093. .set_shred_version(expected_shred_version);
  2094. validator_a_info
  2095. .config
  2096. .new_hard_forks
  2097. .clone_from(&hard_fork_slots);
  2098. validator_a_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2099. validator_a_info.config.expected_shred_version = Some(expected_shred_version);
  2100. validator_b_info.config.new_hard_forks = hard_fork_slots;
  2101. validator_b_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2102. validator_b_info.config.expected_shred_version = Some(expected_shred_version);
  2103. // Clear ledger of all slots post hard fork
  2104. {
  2105. let blockstore_a = open_blockstore(&validator_a_info.info.ledger_path);
  2106. let blockstore_b = open_blockstore(&validator_b_info.info.ledger_path);
  2107. purge_slots_with_count(&blockstore_a, hard_fork_slot + 1, 100);
  2108. purge_slots_with_count(&blockstore_b, hard_fork_slot + 1, 100);
  2109. }
  2110. restart_whole_cluster_after_hard_fork(
  2111. &cluster,
  2112. validator_a_pubkey,
  2113. validator_b_pubkey,
  2114. validator_a_info,
  2115. validator_b_info,
  2116. );
  2117. // new slots should be rooted after hard-fork cluster relaunch
  2118. cluster
  2119. .lock()
  2120. .unwrap()
  2121. .check_for_new_roots(16, "hard fork", SocketAddrSpace::Unspecified);
  2122. }
  2123. #[test]
  2124. #[serial]
  2125. fn test_run_test_load_program_accounts_root() {
  2126. run_test_load_program_accounts(CommitmentConfig::finalized());
  2127. }
  2128. fn create_simple_snapshot_config(ledger_path: &Path) -> SnapshotConfig {
  2129. SnapshotConfig {
  2130. full_snapshot_archives_dir: ledger_path.to_path_buf(),
  2131. bank_snapshots_dir: ledger_path.join(snapshot_paths::BANK_SNAPSHOTS_DIR),
  2132. ..SnapshotConfig::default()
  2133. }
  2134. }
  2135. fn create_snapshot_to_hard_fork(
  2136. blockstore: &Blockstore,
  2137. snapshot_slot: Slot,
  2138. hard_forks: Vec<Slot>,
  2139. ) {
  2140. let process_options = ProcessOptions {
  2141. halt_at_slot: Some(snapshot_slot),
  2142. new_hard_forks: Some(hard_forks),
  2143. run_verification: false,
  2144. ..ProcessOptions::default()
  2145. };
  2146. let ledger_path = blockstore.ledger_path();
  2147. let genesis_config = open_genesis_config(ledger_path, u64::MAX).unwrap();
  2148. let snapshot_config = create_simple_snapshot_config(ledger_path);
  2149. let (bank_forks, ..) = bank_forks_utils::load(
  2150. &genesis_config,
  2151. blockstore,
  2152. vec![
  2153. create_accounts_run_and_snapshot_dirs(ledger_path.join("accounts"))
  2154. .unwrap()
  2155. .0,
  2156. ],
  2157. &snapshot_config,
  2158. process_options,
  2159. None,
  2160. None,
  2161. None,
  2162. Arc::default(),
  2163. )
  2164. .unwrap();
  2165. let bank = bank_forks.read().unwrap().get(snapshot_slot).unwrap();
  2166. let full_snapshot_archive_info = snapshot_bank_utils::bank_to_full_snapshot_archive(
  2167. ledger_path,
  2168. &bank,
  2169. Some(snapshot_config.snapshot_version),
  2170. ledger_path,
  2171. ledger_path,
  2172. snapshot_config.archive_format,
  2173. )
  2174. .unwrap();
  2175. info!(
  2176. "Successfully created snapshot for slot {}, hash {}: {}",
  2177. bank.slot(),
  2178. bank.hash(),
  2179. full_snapshot_archive_info.path().display(),
  2180. );
  2181. }
  2182. #[test]
  2183. #[ignore]
  2184. #[serial]
  2185. fn test_hard_fork_with_gap_in_roots() {
  2186. agave_logger::setup_with_default(RUST_LOG_FILTER);
  2187. // First set up the cluster with 2 nodes
  2188. let slots_per_epoch = 2048;
  2189. let node_stakes = vec![60, 40];
  2190. let validator_keys = [
  2191. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2192. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2193. ]
  2194. .iter()
  2195. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2196. .take(node_stakes.len())
  2197. .collect::<Vec<_>>();
  2198. let validators = validator_keys
  2199. .iter()
  2200. .map(|(kp, _)| kp.pubkey())
  2201. .collect::<Vec<_>>();
  2202. let validator_a_pubkey = validators[0];
  2203. let validator_b_pubkey = validators[1];
  2204. let validator_config = ValidatorConfig {
  2205. snapshot_config: LocalCluster::create_dummy_load_only_snapshot_config(),
  2206. ..ValidatorConfig::default_for_test()
  2207. };
  2208. let mut config = ClusterConfig {
  2209. mint_lamports: 100_000,
  2210. node_stakes: node_stakes.clone(),
  2211. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  2212. validator_keys: Some(validator_keys),
  2213. slots_per_epoch,
  2214. stakers_slot_offset: slots_per_epoch,
  2215. skip_warmup_slots: true,
  2216. ..ClusterConfig::default()
  2217. };
  2218. let cluster = std::sync::Arc::new(std::sync::Mutex::new(LocalCluster::new(
  2219. &mut config,
  2220. SocketAddrSpace::Unspecified,
  2221. )));
  2222. let val_a_ledger_path = cluster.lock().unwrap().ledger_path(&validator_a_pubkey);
  2223. let val_b_ledger_path = cluster.lock().unwrap().ledger_path(&validator_b_pubkey);
  2224. let min_last_vote = 45;
  2225. let min_root = 10;
  2226. loop {
  2227. sleep(Duration::from_millis(100));
  2228. if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  2229. if last_vote >= min_last_vote
  2230. && root_in_tower(&val_a_ledger_path, &validator_a_pubkey) > Some(min_root)
  2231. {
  2232. break;
  2233. }
  2234. }
  2235. }
  2236. // stop all nodes of the cluster
  2237. let mut validator_a_info = cluster.lock().unwrap().exit_node(&validator_a_pubkey);
  2238. let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey);
  2239. // hard fork slot is effectively a (possibly skipping) new root.
  2240. // assert that the precondition of validator a to test gap between
  2241. // blockstore and hard fork...
  2242. let hard_fork_slot = min_last_vote - 5;
  2243. assert!(hard_fork_slot > root_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap());
  2244. let hard_fork_slots = Some(vec![hard_fork_slot]);
  2245. let mut hard_forks = HardForks::default();
  2246. hard_forks.register(hard_fork_slot);
  2247. let expected_shred_version = solana_shred_version::compute_shred_version(
  2248. &cluster.lock().unwrap().genesis_config.hash(),
  2249. Some(&hard_forks),
  2250. );
  2251. // create hard-forked snapshot only for validator a, emulating the manual cluster restart
  2252. // procedure with `agave-ledger-tool create-snapshot`
  2253. let genesis_slot = 0;
  2254. {
  2255. let blockstore_a = Blockstore::open(&val_a_ledger_path).unwrap();
  2256. create_snapshot_to_hard_fork(&blockstore_a, hard_fork_slot, vec![hard_fork_slot]);
  2257. // Intentionally make agave-validator unbootable by replaying blocks from the genesis to
  2258. // ensure the hard-forked snapshot is used always. Otherwise, we couldn't create a gap
  2259. // in the ledger roots column family reliably.
  2260. // There was a bug which caused the hard-forked snapshot at an unrooted slot to forget
  2261. // to root some slots (thus, creating a gap in roots, which shouldn't happen).
  2262. purge_slots_with_count(&blockstore_a, genesis_slot, 1);
  2263. let next_slot = genesis_slot + 1;
  2264. let mut meta = blockstore_a.meta(next_slot).unwrap().unwrap();
  2265. meta.unset_parent();
  2266. blockstore_a.put_meta(next_slot, &meta).unwrap();
  2267. }
  2268. // strictly speaking, new_hard_forks isn't needed for validator a.
  2269. // but when snapshot loading isn't working, you might see:
  2270. // shred version mismatch: expected NNNN found: MMMM
  2271. //validator_a_info.config.new_hard_forks = hard_fork_slots.clone();
  2272. // effectively pass the --hard-fork parameter to validator b
  2273. validator_b_info.config.new_hard_forks = hard_fork_slots;
  2274. validator_a_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2275. validator_a_info.config.expected_shred_version = Some(expected_shred_version);
  2276. validator_b_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2277. validator_b_info.config.expected_shred_version = Some(expected_shred_version);
  2278. restart_whole_cluster_after_hard_fork(
  2279. &cluster,
  2280. validator_a_pubkey,
  2281. validator_b_pubkey,
  2282. validator_a_info,
  2283. validator_b_info,
  2284. );
  2285. // new slots should be rooted after hard-fork cluster relaunch
  2286. cluster
  2287. .lock()
  2288. .unwrap()
  2289. .check_for_new_roots(16, "hard fork", SocketAddrSpace::Unspecified);
  2290. // drop everything to open blockstores below
  2291. drop(cluster);
  2292. let (common_last_vote, common_root) = {
  2293. let (last_vote_a, _) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  2294. let (last_vote_b, _) = last_vote_in_tower(&val_b_ledger_path, &validator_b_pubkey).unwrap();
  2295. let root_a = root_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  2296. let root_b = root_in_tower(&val_b_ledger_path, &validator_b_pubkey).unwrap();
  2297. (last_vote_a.min(last_vote_b), root_a.min(root_b))
  2298. };
  2299. let blockstore_a = Blockstore::open(&val_a_ledger_path).unwrap();
  2300. let blockstore_b = Blockstore::open(&val_b_ledger_path).unwrap();
  2301. // collect all slot/root parents
  2302. let mut slots_a = AncestorIterator::new(common_last_vote, &blockstore_a).collect::<Vec<_>>();
  2303. let mut roots_a = blockstore_a
  2304. .reversed_rooted_slot_iterator(common_root)
  2305. .unwrap()
  2306. .collect::<Vec<_>>();
  2307. // artificially restore the forcibly purged genesis only for the validator A just for the sake of
  2308. // the final assertions.
  2309. slots_a.push(genesis_slot);
  2310. roots_a.push(genesis_slot);
  2311. let slots_b = AncestorIterator::new(common_last_vote, &blockstore_b).collect::<Vec<_>>();
  2312. let roots_b = blockstore_b
  2313. .reversed_rooted_slot_iterator(common_root)
  2314. .unwrap()
  2315. .collect::<Vec<_>>();
  2316. // compare them all!
  2317. assert_eq!((&slots_a, &roots_a), (&slots_b, &roots_b));
  2318. assert_eq!(&slots_a[slots_a.len() - roots_a.len()..].to_vec(), &roots_a);
  2319. assert_eq!(&slots_b[slots_b.len() - roots_b.len()..].to_vec(), &roots_b);
  2320. }
  2321. #[test]
  2322. #[serial]
  2323. fn test_restart_tower_rollback() {
  2324. // Test node crashing and failing to save its tower before restart
  2325. // Cluster continues to make progress, this node is able to rejoin with
  2326. // outdated tower post restart.
  2327. agave_logger::setup_with_default(RUST_LOG_FILTER);
  2328. // First set up the cluster with 2 nodes
  2329. let slots_per_epoch = 2048;
  2330. let node_stakes = vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE];
  2331. let validator_strings = [
  2332. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2333. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2334. ];
  2335. let validator_keys = validator_strings
  2336. .iter()
  2337. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2338. .take(node_stakes.len())
  2339. .collect::<Vec<_>>();
  2340. let b_pubkey = validator_keys[1].0.pubkey();
  2341. let mut validator_config = ValidatorConfig::default_for_test();
  2342. validator_config.wait_for_supermajority = Some(0);
  2343. let mut config = ClusterConfig {
  2344. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  2345. node_stakes: node_stakes.clone(),
  2346. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  2347. validator_keys: Some(validator_keys),
  2348. slots_per_epoch,
  2349. stakers_slot_offset: slots_per_epoch,
  2350. skip_warmup_slots: true,
  2351. ..ClusterConfig::default()
  2352. };
  2353. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2354. let val_b_ledger_path = cluster.ledger_path(&b_pubkey);
  2355. let mut earlier_tower: Tower;
  2356. loop {
  2357. sleep(Duration::from_millis(1000));
  2358. // Grab the current saved tower
  2359. earlier_tower = restore_tower(&val_b_ledger_path, &b_pubkey).unwrap();
  2360. if earlier_tower.last_voted_slot().unwrap_or(0) > 1 {
  2361. break;
  2362. }
  2363. }
  2364. let mut exited_validator_info: ClusterValidatorInfo;
  2365. let last_voted_slot: Slot;
  2366. loop {
  2367. sleep(Duration::from_millis(1000));
  2368. // Wait for second, lesser staked validator to make a root past the earlier_tower's
  2369. // latest vote slot, then exit that validator
  2370. let tower = restore_tower(&val_b_ledger_path, &b_pubkey).unwrap();
  2371. if tower.root()
  2372. > earlier_tower
  2373. .last_voted_slot()
  2374. .expect("Earlier tower must have at least one vote")
  2375. {
  2376. exited_validator_info = cluster.exit_node(&b_pubkey);
  2377. last_voted_slot = tower.last_voted_slot().unwrap();
  2378. break;
  2379. }
  2380. }
  2381. // Now rewrite the tower with the *earlier_tower*. We disable voting until we reach
  2382. // a slot we did not previously vote for in order to avoid duplicate vote slashing
  2383. // issues.
  2384. save_tower(
  2385. &val_b_ledger_path,
  2386. &earlier_tower,
  2387. &exited_validator_info.info.keypair,
  2388. );
  2389. exited_validator_info.config.wait_to_vote_slot = Some(last_voted_slot + 10);
  2390. cluster.restart_node(
  2391. &b_pubkey,
  2392. exited_validator_info,
  2393. SocketAddrSpace::Unspecified,
  2394. );
  2395. // Check this node is making new roots
  2396. cluster.check_for_new_roots(
  2397. 20,
  2398. "test_restart_tower_rollback",
  2399. SocketAddrSpace::Unspecified,
  2400. );
  2401. }
  2402. #[test]
  2403. #[serial]
  2404. fn test_run_test_load_program_accounts_partition_root() {
  2405. run_test_load_program_accounts_partition(CommitmentConfig::finalized());
  2406. }
  2407. fn run_test_load_program_accounts_partition(scan_commitment: CommitmentConfig) {
  2408. let num_slots_per_validator = 8;
  2409. let partitions: [usize; 2] = [1, 1];
  2410. let (leader_schedule, validator_keys) = create_custom_leader_schedule_with_random_keys(&[
  2411. num_slots_per_validator,
  2412. num_slots_per_validator,
  2413. ]);
  2414. let (update_client_sender, update_client_receiver) = unbounded();
  2415. let (scan_client_sender, scan_client_receiver) = unbounded();
  2416. let exit = Arc::new(AtomicBool::new(false));
  2417. let (t_update, t_scan, additional_accounts) = setup_transfer_scan_threads(
  2418. 100,
  2419. exit.clone(),
  2420. scan_commitment,
  2421. update_client_receiver,
  2422. scan_client_receiver,
  2423. );
  2424. let on_partition_start = |cluster: &mut LocalCluster, _: &mut ()| {
  2425. let update_client = cluster
  2426. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2427. .unwrap();
  2428. update_client_sender.send(update_client).unwrap();
  2429. let scan_client = cluster
  2430. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2431. .unwrap();
  2432. scan_client_sender.send(scan_client).unwrap();
  2433. };
  2434. let on_partition_before_resolved = |_: &mut LocalCluster, _: &mut ()| {};
  2435. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  2436. cluster.check_for_new_roots(
  2437. 20,
  2438. "run_test_load_program_accounts_partition",
  2439. SocketAddrSpace::Unspecified,
  2440. );
  2441. exit.store(true, Ordering::Relaxed);
  2442. t_update.join().unwrap();
  2443. t_scan.join().unwrap();
  2444. };
  2445. run_cluster_partition(
  2446. &partitions,
  2447. Some((leader_schedule, validator_keys)),
  2448. (),
  2449. on_partition_start,
  2450. on_partition_before_resolved,
  2451. on_partition_resolved,
  2452. None,
  2453. false,
  2454. additional_accounts,
  2455. );
  2456. }
  2457. #[test]
  2458. #[serial]
  2459. fn test_rpc_block_subscribe() {
  2460. let leader_stake = 100 * DEFAULT_NODE_STAKE;
  2461. let rpc_stake = DEFAULT_NODE_STAKE;
  2462. let total_stake = leader_stake + rpc_stake;
  2463. let node_stakes = vec![leader_stake, rpc_stake];
  2464. let mut validator_config = ValidatorConfig::default_for_test();
  2465. validator_config.enable_default_rpc_block_subscribe();
  2466. validator_config.wait_for_supermajority = Some(0);
  2467. let validator_keys = [
  2468. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2469. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2470. ]
  2471. .iter()
  2472. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2473. .take(node_stakes.len())
  2474. .collect::<Vec<_>>();
  2475. let rpc_node_pubkey = &validator_keys[1].0.pubkey();
  2476. let mut config = ClusterConfig {
  2477. mint_lamports: total_stake,
  2478. node_stakes,
  2479. validator_configs: make_identical_validator_configs(&validator_config, 2),
  2480. validator_keys: Some(validator_keys),
  2481. skip_warmup_slots: true,
  2482. ..ClusterConfig::default()
  2483. };
  2484. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2485. let rpc_node_contact_info = cluster.get_contact_info(rpc_node_pubkey).unwrap();
  2486. let (mut block_subscribe_client, receiver) = PubsubClient::block_subscribe(
  2487. format!(
  2488. "ws://{}",
  2489. // It is important that we subscribe to a non leader node as there
  2490. // is a race condition which can cause leader nodes to not send
  2491. // BlockUpdate notifications properly. See https://github.com/solana-labs/solana/pull/34421
  2492. &rpc_node_contact_info.rpc_pubsub().unwrap().to_string()
  2493. ),
  2494. RpcBlockSubscribeFilter::All,
  2495. Some(RpcBlockSubscribeConfig {
  2496. commitment: Some(CommitmentConfig::confirmed()),
  2497. encoding: None,
  2498. transaction_details: None,
  2499. show_rewards: None,
  2500. max_supported_transaction_version: None,
  2501. }),
  2502. )
  2503. .unwrap();
  2504. let mut received_block = false;
  2505. let max_wait = 10_000;
  2506. let start = Instant::now();
  2507. while !received_block {
  2508. assert!(
  2509. start.elapsed() <= Duration::from_millis(max_wait),
  2510. "Went too long {max_wait} ms without receiving a confirmed block",
  2511. );
  2512. let responses: Vec<_> = receiver.try_iter().collect();
  2513. // Wait for a response
  2514. if !responses.is_empty() {
  2515. for response in responses {
  2516. assert!(response.value.err.is_none());
  2517. assert!(response.value.block.is_some());
  2518. if response.value.slot > 1 {
  2519. received_block = true;
  2520. }
  2521. }
  2522. }
  2523. sleep(Duration::from_millis(100));
  2524. }
  2525. // If we don't drop the cluster, the blocking web socket service
  2526. // won't return, and the `block_subscribe_client` won't shut down
  2527. drop(cluster);
  2528. block_subscribe_client.shutdown().unwrap();
  2529. }
  2530. #[test]
  2531. #[serial]
  2532. #[allow(unused_attributes)]
  2533. fn test_oc_bad_signatures() {
  2534. agave_logger::setup_with_default(RUST_LOG_FILTER);
  2535. let total_stake = 100 * DEFAULT_NODE_STAKE;
  2536. let leader_stake = (total_stake as f64 * VOTE_THRESHOLD_SIZE) as u64;
  2537. let our_node_stake = total_stake - leader_stake;
  2538. let node_stakes = vec![leader_stake, our_node_stake];
  2539. let validator_keys = [
  2540. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2541. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2542. ]
  2543. .iter()
  2544. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2545. .take(node_stakes.len())
  2546. .collect::<Vec<_>>();
  2547. // Give bootstrap node all the leader slots to avoid initial forking leading
  2548. // to casting votes with invalid blockhash. This is not what is meant to be
  2549. // test and only inflates test time.
  2550. let fixed_schedule = FixedSchedule {
  2551. leader_schedule: Arc::new(Box::new(IdentityKeyedLeaderSchedule::new_from_schedule(
  2552. vec![validator_keys.first().unwrap().0.pubkey()],
  2553. ))),
  2554. };
  2555. let mut validator_config = ValidatorConfig {
  2556. require_tower: true,
  2557. wait_for_supermajority: Some(0),
  2558. fixed_leader_schedule: Some(fixed_schedule),
  2559. ..ValidatorConfig::default_for_test()
  2560. };
  2561. validator_config.enable_default_rpc_block_subscribe();
  2562. let our_id = validator_keys.last().unwrap().0.pubkey();
  2563. let mut config = ClusterConfig {
  2564. mint_lamports: total_stake,
  2565. node_stakes,
  2566. validator_configs: make_identical_validator_configs(&validator_config, 2),
  2567. validator_keys: Some(validator_keys),
  2568. skip_warmup_slots: true,
  2569. ..ClusterConfig::default()
  2570. };
  2571. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2572. // 2) Kill our node and start up a thread to simulate votes to control our voting behavior
  2573. let our_info = cluster.exit_node(&our_id);
  2574. let node_keypair = our_info.info.keypair;
  2575. let vote_keypair = our_info.info.voting_keypair;
  2576. info!(
  2577. "our node id: {}, vote id: {}",
  2578. node_keypair.pubkey(),
  2579. vote_keypair.pubkey()
  2580. );
  2581. // 3) Start up a spy to listen for and push votes to leader TPU
  2582. let client = cluster
  2583. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2584. .unwrap();
  2585. let voter_thread_sleep_ms: usize = 100;
  2586. let num_votes_simulated = Arc::new(AtomicUsize::new(0));
  2587. let gossip_voter = cluster_tests::start_gossip_voter(
  2588. &cluster.entry_point_info.gossip().unwrap(),
  2589. &node_keypair,
  2590. |(_label, leader_vote_tx)| {
  2591. let vote = vote_parser::parse_vote_transaction(&leader_vote_tx)
  2592. .map(|(_, vote, ..)| vote)
  2593. .unwrap();
  2594. // Filter out empty votes
  2595. if !vote.is_empty() {
  2596. Some((vote, leader_vote_tx))
  2597. } else {
  2598. None
  2599. }
  2600. },
  2601. {
  2602. let node_keypair = node_keypair.insecure_clone();
  2603. let vote_keypair = vote_keypair.insecure_clone();
  2604. let num_votes_simulated = num_votes_simulated.clone();
  2605. move |vote_slot, leader_vote_tx, parsed_vote, _cluster_info| {
  2606. info!("received vote for {vote_slot}");
  2607. let vote_hash = parsed_vote.hash();
  2608. info!("Simulating vote from our node on slot {vote_slot}, hash {vote_hash}");
  2609. // Add all recent vote slots on this fork to allow cluster to pass
  2610. // vote threshold checks in replay. Note this will instantly force a
  2611. // root by this validator.
  2612. let tower_sync = TowerSync::new_from_slots(vec![vote_slot], vote_hash, None);
  2613. let bad_authorized_signer_keypair = Keypair::new();
  2614. let mut vote_tx = vote_transaction::new_tower_sync_transaction(
  2615. tower_sync,
  2616. leader_vote_tx.message.recent_blockhash,
  2617. &node_keypair,
  2618. &vote_keypair,
  2619. // Make a bad signer
  2620. &bad_authorized_signer_keypair,
  2621. None,
  2622. );
  2623. LocalCluster::send_transaction_with_retries(
  2624. &client,
  2625. &[&node_keypair, &bad_authorized_signer_keypair],
  2626. &mut vote_tx,
  2627. 5,
  2628. )
  2629. .unwrap();
  2630. num_votes_simulated.fetch_add(1, Ordering::Relaxed);
  2631. }
  2632. },
  2633. voter_thread_sleep_ms as u64,
  2634. cluster.validators.len().saturating_sub(1),
  2635. 0,
  2636. 0,
  2637. cluster.entry_point_info.shred_version(),
  2638. );
  2639. let (mut block_subscribe_client, receiver) = PubsubClient::block_subscribe(
  2640. format!(
  2641. "ws://{}",
  2642. &cluster.entry_point_info.rpc_pubsub().unwrap().to_string()
  2643. ),
  2644. RpcBlockSubscribeFilter::All,
  2645. Some(RpcBlockSubscribeConfig {
  2646. commitment: Some(CommitmentConfig::confirmed()),
  2647. encoding: None,
  2648. transaction_details: None,
  2649. show_rewards: None,
  2650. max_supported_transaction_version: None,
  2651. }),
  2652. )
  2653. .unwrap();
  2654. const MAX_VOTES_TO_SIMULATE: usize = 10;
  2655. // Make sure test doesn't take too long
  2656. assert!(voter_thread_sleep_ms * MAX_VOTES_TO_SIMULATE <= 1000);
  2657. loop {
  2658. let responses: Vec<_> = receiver.try_iter().collect();
  2659. // Nothing should get optimistically confirmed or rooted
  2660. assert!(responses.is_empty());
  2661. // Wait for the voter thread to attempt sufficient number of votes to give
  2662. // a chance for the violation to occur
  2663. if num_votes_simulated.load(Ordering::Relaxed) > MAX_VOTES_TO_SIMULATE {
  2664. break;
  2665. }
  2666. sleep(Duration::from_millis(100));
  2667. }
  2668. // Clean up voter thread
  2669. gossip_voter.close();
  2670. // If we don't drop the cluster, the blocking web socket service
  2671. // won't return, and the `block_subscribe_client` won't shut down
  2672. drop(cluster);
  2673. block_subscribe_client.shutdown().unwrap();
  2674. }
  2675. #[test]
  2676. #[serial]
  2677. #[ignore]
  2678. fn test_votes_land_in_fork_during_long_partition() {
  2679. let total_stake = 3 * DEFAULT_NODE_STAKE;
  2680. // Make `lighter_stake` insufficient for switching threshold
  2681. let lighter_stake = (SWITCH_FORK_THRESHOLD * total_stake as f64) as u64;
  2682. let heavier_stake = lighter_stake + 1;
  2683. let failures_stake = total_stake - lighter_stake - heavier_stake;
  2684. // Give lighter stake 30 consecutive slots before
  2685. // the heavier stake gets a single slot
  2686. let partitions: &[(usize, usize)] =
  2687. &[(heavier_stake as usize, 1), (lighter_stake as usize, 30)];
  2688. #[derive(Default)]
  2689. struct PartitionContext {
  2690. heaviest_validator_key: Pubkey,
  2691. lighter_validator_key: Pubkey,
  2692. heavier_fork_slot: Slot,
  2693. }
  2694. let on_partition_start = |_cluster: &mut LocalCluster,
  2695. validator_keys: &[Pubkey],
  2696. _dead_validator_infos: Vec<ClusterValidatorInfo>,
  2697. context: &mut PartitionContext| {
  2698. // validator_keys[0] is the validator that will be killed, i.e. the validator with
  2699. // stake == `failures_stake`
  2700. context.heaviest_validator_key = validator_keys[1];
  2701. context.lighter_validator_key = validator_keys[2];
  2702. };
  2703. let on_before_partition_resolved =
  2704. |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  2705. let lighter_validator_ledger_path = cluster.ledger_path(&context.lighter_validator_key);
  2706. let heavier_validator_ledger_path =
  2707. cluster.ledger_path(&context.heaviest_validator_key);
  2708. // Wait for each node to have created and voted on its own partition
  2709. loop {
  2710. let (heavier_validator_latest_vote_slot, _) = last_vote_in_tower(
  2711. &heavier_validator_ledger_path,
  2712. &context.heaviest_validator_key,
  2713. )
  2714. .unwrap();
  2715. info!(
  2716. "Checking heavier validator's last vote {heavier_validator_latest_vote_slot} \
  2717. is on a separate fork"
  2718. );
  2719. let lighter_validator_blockstore = open_blockstore(&lighter_validator_ledger_path);
  2720. if lighter_validator_blockstore
  2721. .meta(heavier_validator_latest_vote_slot)
  2722. .unwrap()
  2723. .is_none()
  2724. {
  2725. context.heavier_fork_slot = heavier_validator_latest_vote_slot;
  2726. return;
  2727. }
  2728. sleep(Duration::from_millis(100));
  2729. }
  2730. };
  2731. let on_partition_resolved = |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  2732. let lighter_validator_ledger_path = cluster.ledger_path(&context.lighter_validator_key);
  2733. let start = Instant::now();
  2734. let max_wait = ms_for_n_slots(MAX_PROCESSING_AGE as u64, DEFAULT_TICKS_PER_SLOT);
  2735. // Wait for the lighter node to switch over and root the `context.heavier_fork_slot`
  2736. loop {
  2737. assert!(
  2738. // Should finish faster than if the cluster were relying on replay vote
  2739. // refreshing to refresh the vote on blockhash expiration for the vote
  2740. // transaction.
  2741. start.elapsed() <= Duration::from_millis(max_wait),
  2742. "Went too long {max_wait} ms without a root",
  2743. );
  2744. let lighter_validator_blockstore = open_blockstore(&lighter_validator_ledger_path);
  2745. if lighter_validator_blockstore.is_root(context.heavier_fork_slot) {
  2746. info!(
  2747. "Partition resolved, new root made in {}ms",
  2748. start.elapsed().as_millis()
  2749. );
  2750. return;
  2751. }
  2752. sleep(Duration::from_millis(100));
  2753. }
  2754. };
  2755. run_kill_partition_switch_threshold(
  2756. &[(failures_stake as usize, 0)],
  2757. partitions,
  2758. None,
  2759. PartitionContext::default(),
  2760. on_partition_start,
  2761. on_before_partition_resolved,
  2762. on_partition_resolved,
  2763. );
  2764. }
  2765. fn setup_transfer_scan_threads(
  2766. num_starting_accounts: usize,
  2767. exit: Arc<AtomicBool>,
  2768. scan_commitment: CommitmentConfig,
  2769. update_client_receiver: Receiver<QuicTpuClient>,
  2770. scan_client_receiver: Receiver<QuicTpuClient>,
  2771. ) -> (
  2772. JoinHandle<()>,
  2773. JoinHandle<()>,
  2774. Vec<(Pubkey, AccountSharedData)>,
  2775. ) {
  2776. let exit_ = exit.clone();
  2777. let starting_keypairs: Arc<Vec<Keypair>> = Arc::new(
  2778. iter::repeat_with(Keypair::new)
  2779. .take(num_starting_accounts)
  2780. .collect(),
  2781. );
  2782. let target_keypairs: Arc<Vec<Keypair>> = Arc::new(
  2783. iter::repeat_with(Keypair::new)
  2784. .take(num_starting_accounts)
  2785. .collect(),
  2786. );
  2787. let starting_accounts: Vec<(Pubkey, AccountSharedData)> = starting_keypairs
  2788. .iter()
  2789. .map(|k| {
  2790. (
  2791. k.pubkey(),
  2792. AccountSharedData::new(1, 0, &system_program::id()),
  2793. )
  2794. })
  2795. .collect();
  2796. let starting_keypairs_ = starting_keypairs.clone();
  2797. let target_keypairs_ = target_keypairs.clone();
  2798. let t_update = Builder::new()
  2799. .name("update".to_string())
  2800. .spawn(move || {
  2801. let client = update_client_receiver.recv().unwrap();
  2802. loop {
  2803. if exit_.load(Ordering::Relaxed) {
  2804. return;
  2805. }
  2806. let (blockhash, _) = client
  2807. .rpc_client()
  2808. .get_latest_blockhash_with_commitment(CommitmentConfig::processed())
  2809. .unwrap();
  2810. for i in 0..starting_keypairs_.len() {
  2811. let result = client.async_transfer(
  2812. 1,
  2813. &starting_keypairs_[i],
  2814. &target_keypairs_[i].pubkey(),
  2815. blockhash,
  2816. );
  2817. if result.is_err() {
  2818. debug!("Failed in transfer for starting keypair: {result:?}");
  2819. }
  2820. }
  2821. for i in 0..starting_keypairs_.len() {
  2822. let result = client.async_transfer(
  2823. 1,
  2824. &target_keypairs_[i],
  2825. &starting_keypairs_[i].pubkey(),
  2826. blockhash,
  2827. );
  2828. if result.is_err() {
  2829. debug!("Failed in transfer for starting keypair: {result:?}");
  2830. }
  2831. }
  2832. }
  2833. })
  2834. .unwrap();
  2835. // Scan, the total funds should add up to the original
  2836. let mut scan_commitment_config = RpcProgramAccountsConfig::default();
  2837. scan_commitment_config.account_config.commitment = Some(scan_commitment);
  2838. let tracked_pubkeys: HashSet<Pubkey> = starting_keypairs
  2839. .iter()
  2840. .chain(target_keypairs.iter())
  2841. .map(|k| k.pubkey())
  2842. .collect();
  2843. let expected_total_balance = num_starting_accounts as u64;
  2844. let t_scan = Builder::new()
  2845. .name("scan".to_string())
  2846. .spawn(move || {
  2847. let client = scan_client_receiver.recv().unwrap();
  2848. loop {
  2849. if exit.load(Ordering::Relaxed) {
  2850. return;
  2851. }
  2852. if let Some(total_scan_balance) = client
  2853. .rpc_client()
  2854. .get_program_ui_accounts_with_config(
  2855. &system_program::id(),
  2856. scan_commitment_config.clone(),
  2857. )
  2858. .ok()
  2859. .map(|result| {
  2860. result
  2861. .into_iter()
  2862. .map(|(key, account)| {
  2863. if tracked_pubkeys.contains(&key) {
  2864. account.lamports
  2865. } else {
  2866. 0
  2867. }
  2868. })
  2869. .sum::<u64>()
  2870. })
  2871. {
  2872. assert_eq!(total_scan_balance, expected_total_balance);
  2873. }
  2874. }
  2875. })
  2876. .unwrap();
  2877. (t_update, t_scan, starting_accounts)
  2878. }
  2879. fn run_test_load_program_accounts(scan_commitment: CommitmentConfig) {
  2880. agave_logger::setup_with_default(RUST_LOG_FILTER);
  2881. // First set up the cluster with 2 nodes
  2882. let slots_per_epoch = 2048;
  2883. let node_stakes = vec![51 * DEFAULT_NODE_STAKE, 50 * DEFAULT_NODE_STAKE];
  2884. let validator_keys: Vec<_> = [
  2885. "4qhhXNTbKD1a5vxDDLZcHKj7ELNeiivtUBxn3wUK1F5VRsQVP89VUhfXqSfgiFB14GfuBgtrQ96n9NvWQADVkcCg",
  2886. "3kHBzVwie5vTEaY6nFCPeFT8qDpoXzn7dCEioGRNBTnUDpvwnG85w8Wq63gVWpVTP8k2a8cgcWRjSXyUkEygpXWS",
  2887. ]
  2888. .iter()
  2889. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2890. .take(node_stakes.len())
  2891. .collect();
  2892. let num_starting_accounts = 100;
  2893. let exit = Arc::new(AtomicBool::new(false));
  2894. let (update_client_sender, update_client_receiver) = unbounded();
  2895. let (scan_client_sender, scan_client_receiver) = unbounded();
  2896. // Setup the update/scan threads
  2897. let (t_update, t_scan, starting_accounts) = setup_transfer_scan_threads(
  2898. num_starting_accounts,
  2899. exit.clone(),
  2900. scan_commitment,
  2901. update_client_receiver,
  2902. scan_client_receiver,
  2903. );
  2904. let mut validator_config = ValidatorConfig::default_for_test();
  2905. validator_config.wait_for_supermajority = Some(0);
  2906. let mut config = ClusterConfig {
  2907. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  2908. node_stakes: node_stakes.clone(),
  2909. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  2910. validator_keys: Some(validator_keys),
  2911. slots_per_epoch,
  2912. stakers_slot_offset: slots_per_epoch,
  2913. skip_warmup_slots: true,
  2914. additional_accounts: starting_accounts,
  2915. ..ClusterConfig::default()
  2916. };
  2917. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2918. // Give the threads a client to use for querying the cluster
  2919. let all_pubkeys = cluster.get_node_pubkeys();
  2920. let other_validator_id = all_pubkeys
  2921. .into_iter()
  2922. .find(|x| x != cluster.entry_point_info.pubkey())
  2923. .unwrap();
  2924. let client = cluster
  2925. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2926. .unwrap();
  2927. update_client_sender.send(client).unwrap();
  2928. let scan_client = cluster
  2929. .build_validator_tpu_quic_client(&other_validator_id)
  2930. .unwrap();
  2931. scan_client_sender.send(scan_client).unwrap();
  2932. // Wait for some roots to pass
  2933. cluster.check_for_new_roots(
  2934. 40,
  2935. "run_test_load_program_accounts",
  2936. SocketAddrSpace::Unspecified,
  2937. );
  2938. // Exit and ensure no violations of consistency were found
  2939. exit.store(true, Ordering::Relaxed);
  2940. t_update.join().unwrap();
  2941. t_scan.join().unwrap();
  2942. }
  2943. #[test]
  2944. #[serial]
  2945. fn test_no_lockout_violation_with_tower() {
  2946. do_test_lockout_violation_with_or_without_tower(true);
  2947. }
  2948. #[test]
  2949. #[serial]
  2950. fn test_lockout_violation_without_tower() {
  2951. do_test_lockout_violation_with_or_without_tower(false);
  2952. }
  2953. // A bit convoluted test case; but this roughly follows this test theoretical scenario:
  2954. // Validator A, B, C have 31, 36, 33 % of stake respectively. Leader schedule is split, first half
  2955. // of the test B is always leader, second half C is.
  2956. // We don't give validator A any slots because it's going to be deleting its ledger,
  2957. // so it may create different blocks for slots it's already created blocks for on a different fork
  2958. //
  2959. // Step 1: Kill C, only A and B should be running
  2960. //
  2961. // base_slot -> next_slot_on_a (Wait for A to vote)
  2962. //
  2963. // Step 2:
  2964. // Kill A and B once we verify that A has voted voted on some `next_slot_on_a` >= 1.
  2965. // Copy B's ledger to A and C but only up to slot `next_slot_on_a`.
  2966. //
  2967. // Step 3:
  2968. // Restart validator C to make it produce blocks on a fork from `base_slot`
  2969. // that doesn't include `next_slot_on_a`. Wait for it to vote on its own fork.
  2970. //
  2971. // base_slot -> next_slot_on_c
  2972. //
  2973. // Step 4: Restart `A` which had 31% of the stake, it's missing `next_slot_on_a` in
  2974. // its ledger since we copied the ledger from B excluding this slot, so it sees
  2975. //
  2976. // base_slot -> next_slot_on_c
  2977. //
  2978. // Step 5:
  2979. // Without the persisted tower:
  2980. // `A` would choose to vote on the new fork from C on `next_slot_on_c`
  2981. //
  2982. // With the persisted tower:
  2983. // `A` should not be able to generate a switching proof.
  2984. //
  2985. fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) {
  2986. agave_logger::setup_with("info");
  2987. // First set up the cluster with 4 nodes
  2988. let slots_per_epoch = 2048;
  2989. let node_stakes = vec![
  2990. 31 * DEFAULT_NODE_STAKE,
  2991. 36 * DEFAULT_NODE_STAKE,
  2992. 33 * DEFAULT_NODE_STAKE,
  2993. ];
  2994. let validator_b_last_leader_slot: Slot = 8;
  2995. let truncated_slots: Slot = 100;
  2996. // Each pubkeys are prefixed with A, B, C
  2997. let validator_keys = [
  2998. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2999. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  3000. "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye",
  3001. ]
  3002. .iter()
  3003. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  3004. .take(node_stakes.len())
  3005. .collect::<Vec<_>>();
  3006. let validators = validator_keys
  3007. .iter()
  3008. .map(|(kp, _)| kp.pubkey())
  3009. .collect::<Vec<_>>();
  3010. let (validator_a_pubkey, validator_b_pubkey, validator_c_pubkey) =
  3011. (validators[0], validators[1], validators[2]);
  3012. // Disable voting on all validators other than validator B
  3013. let mut default_config = ValidatorConfig::default_for_test();
  3014. // Ensure B can make leader blocks up till the fork slot, and give the remaining slots to C. This is
  3015. // also important so `C` doesn't run into NoPropagatedConfirmation errors on making its first forked
  3016. // slot.
  3017. //
  3018. // Don't give validator A any slots because it's going to be deleting its ledger, so it may create
  3019. // versions of slots it's already created, but on a different fork.
  3020. let validator_to_slots = vec![
  3021. (
  3022. validator_b_pubkey,
  3023. validator_b_last_leader_slot as usize + 1,
  3024. ),
  3025. (validator_c_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize),
  3026. ];
  3027. // Trick C into not producing any blocks during this time, in case its leader slots come up before we can
  3028. // kill the validator. We don't want any forks during the time validator B is producing its initial blocks.
  3029. let c_validator_to_slots = vec![(validator_b_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize)];
  3030. let c_leader_schedule = create_custom_leader_schedule(c_validator_to_slots.into_iter());
  3031. let leader_schedule = Arc::new(create_custom_leader_schedule(
  3032. validator_to_slots.into_iter(),
  3033. ));
  3034. for slot in 0..=validator_b_last_leader_slot {
  3035. assert_eq!(leader_schedule[slot], validator_b_pubkey);
  3036. }
  3037. default_config.fixed_leader_schedule = Some(FixedSchedule {
  3038. leader_schedule: leader_schedule.clone(),
  3039. });
  3040. default_config.wait_for_supermajority = Some(0);
  3041. let mut validator_configs =
  3042. make_identical_validator_configs(&default_config, node_stakes.len());
  3043. // Disable voting on validator C
  3044. validator_configs[2].voting_disabled = true;
  3045. // C should not produce any blocks at this time
  3046. validator_configs[2].fixed_leader_schedule = Some(FixedSchedule {
  3047. leader_schedule: Arc::new(c_leader_schedule),
  3048. });
  3049. let mut config = ClusterConfig {
  3050. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  3051. node_stakes,
  3052. validator_configs,
  3053. validator_keys: Some(validator_keys),
  3054. slots_per_epoch,
  3055. stakers_slot_offset: slots_per_epoch,
  3056. skip_warmup_slots: true,
  3057. ..ClusterConfig::default()
  3058. };
  3059. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  3060. let val_a_ledger_path = cluster.ledger_path(&validator_a_pubkey);
  3061. let val_b_ledger_path = cluster.ledger_path(&validator_b_pubkey);
  3062. let val_c_ledger_path = cluster.ledger_path(&validator_c_pubkey);
  3063. info!("val_a {validator_a_pubkey} ledger path {val_a_ledger_path:?}");
  3064. info!("val_b {validator_b_pubkey} ledger path {val_b_ledger_path:?}");
  3065. info!("val_c {validator_c_pubkey} ledger path {val_c_ledger_path:?}");
  3066. info!("Exiting validator C");
  3067. let mut validator_c_info = cluster.exit_node(&validator_c_pubkey);
  3068. info!("Waiting on validator A to vote");
  3069. // Step 1: Wait for validator A to vote so the tower file exists, and so we can determine the
  3070. // `base_slot` and `next_slot_on_a`
  3071. loop {
  3072. if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  3073. if last_vote >= 1 {
  3074. break;
  3075. }
  3076. }
  3077. sleep(Duration::from_millis(100));
  3078. }
  3079. // kill A and B
  3080. info!("Exiting validators A and B");
  3081. let _validator_b_info = cluster.exit_node(&validator_b_pubkey);
  3082. let validator_a_info = cluster.exit_node(&validator_a_pubkey);
  3083. let next_slot_on_a = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey)
  3084. .unwrap()
  3085. .0;
  3086. let base_slot = next_slot_on_a - 1;
  3087. info!("base slot: {base_slot}, next_slot_on_a: {next_slot_on_a}");
  3088. // Step 2:
  3089. // Truncate ledger, copy over B's ledger to C
  3090. info!("Create validator C's ledger");
  3091. {
  3092. // first copy from validator B's ledger
  3093. std::fs::remove_dir_all(&validator_c_info.info.ledger_path).unwrap();
  3094. let mut opt = fs_extra::dir::CopyOptions::new();
  3095. opt.copy_inside = true;
  3096. fs_extra::dir::copy(&val_b_ledger_path, &val_c_ledger_path, &opt).unwrap();
  3097. // Remove B's tower in C's new copied ledger
  3098. remove_tower(&val_c_ledger_path, &validator_b_pubkey);
  3099. let blockstore = open_blockstore(&val_c_ledger_path);
  3100. purge_slots_with_count(&blockstore, next_slot_on_a, truncated_slots);
  3101. }
  3102. info!("Create validator A's ledger");
  3103. {
  3104. // Now we copy these blocks to A
  3105. let b_blockstore = open_blockstore(&val_b_ledger_path);
  3106. let a_blockstore = open_blockstore(&val_a_ledger_path);
  3107. copy_blocks(next_slot_on_a, &b_blockstore, &a_blockstore, false);
  3108. // Purge unnecessary slots
  3109. purge_slots_with_count(&a_blockstore, next_slot_on_a + 1, truncated_slots);
  3110. }
  3111. {
  3112. let blockstore = open_blockstore(&val_a_ledger_path);
  3113. if !with_tower {
  3114. info!("Removing tower!");
  3115. remove_tower(&val_a_ledger_path, &validator_a_pubkey);
  3116. // Remove next_slot_on_a from ledger to force validator A to select
  3117. // votes_on_c_fork. Otherwise, in the test case without a tower,
  3118. // the validator A will immediately vote for 27 on restart, because it
  3119. // hasn't gotten the heavier fork from validator C yet.
  3120. // Then it will be stuck on 27 unable to switch because C doesn't
  3121. // have enough stake to generate a switching proof
  3122. purge_slots_with_count(&blockstore, next_slot_on_a, truncated_slots);
  3123. } else {
  3124. info!("Not removing tower!");
  3125. }
  3126. }
  3127. // Step 3:
  3128. // Run validator C only to make it produce and vote on its own fork.
  3129. info!("Restart validator C again!!!");
  3130. validator_c_info.config.voting_disabled = false;
  3131. // C should now produce blocks
  3132. validator_c_info.config.fixed_leader_schedule = Some(FixedSchedule { leader_schedule });
  3133. cluster.restart_node(
  3134. &validator_c_pubkey,
  3135. validator_c_info,
  3136. SocketAddrSpace::Unspecified,
  3137. );
  3138. let mut votes_on_c_fork = std::collections::BTreeSet::new();
  3139. let mut last_vote = 0;
  3140. let now = Instant::now();
  3141. loop {
  3142. let elapsed = now.elapsed();
  3143. assert!(
  3144. elapsed <= Duration::from_secs(30),
  3145. "C failed to create a fork past {base_slot} in {} seconds, last_vote {last_vote}, \
  3146. votes_on_c_fork: {votes_on_c_fork:?}",
  3147. elapsed.as_secs(),
  3148. );
  3149. sleep(Duration::from_millis(100));
  3150. if let Some((newest_vote, _)) = last_vote_in_tower(&val_c_ledger_path, &validator_c_pubkey)
  3151. {
  3152. last_vote = newest_vote;
  3153. if last_vote != base_slot {
  3154. votes_on_c_fork.insert(last_vote);
  3155. // Collect 4 votes
  3156. if votes_on_c_fork.len() >= 4 {
  3157. break;
  3158. }
  3159. }
  3160. }
  3161. }
  3162. assert!(!votes_on_c_fork.is_empty());
  3163. info!("Collected validator C's votes: {votes_on_c_fork:?}");
  3164. // Step 4:
  3165. // verify whether there was violation or not
  3166. info!("Restart validator A again!!!");
  3167. cluster.restart_node(
  3168. &validator_a_pubkey,
  3169. validator_a_info,
  3170. SocketAddrSpace::Unspecified,
  3171. );
  3172. // monitor for actual votes from validator A
  3173. let mut bad_vote_detected = false;
  3174. let mut a_votes = vec![];
  3175. for _ in 0..100 {
  3176. sleep(Duration::from_millis(100));
  3177. if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  3178. a_votes.push(last_vote);
  3179. let blockstore = open_blockstore(&val_a_ledger_path);
  3180. let mut ancestors = AncestorIterator::new(last_vote, &blockstore);
  3181. if ancestors.any(|a| votes_on_c_fork.contains(&a)) {
  3182. bad_vote_detected = true;
  3183. break;
  3184. }
  3185. }
  3186. }
  3187. info!("Observed A's votes on: {a_votes:?}");
  3188. // an elaborate way of assert!(with_tower && !bad_vote_detected || ...)
  3189. let expects_optimistic_confirmation_violation = !with_tower;
  3190. if bad_vote_detected != expects_optimistic_confirmation_violation {
  3191. if bad_vote_detected {
  3192. panic!("No violation expected because of persisted tower!");
  3193. } else {
  3194. panic!("Violation expected because of removed persisted tower!");
  3195. }
  3196. } else if bad_vote_detected {
  3197. info!(
  3198. "THIS TEST expected violations. And indeed, there was some, because of removed \
  3199. persisted tower."
  3200. );
  3201. } else {
  3202. info!(
  3203. "THIS TEST expected no violation. And indeed, there was none, thanks to persisted \
  3204. tower."
  3205. );
  3206. }
  3207. }
  3208. #[test]
  3209. #[serial]
  3210. // Steps in this test:
  3211. // We want to create a situation like:
  3212. /*
  3213. 1 (2%, killed and restarted) --- 200 (37%, lighter fork)
  3214. /
  3215. 0
  3216. \-------- 4 (38%, heavier fork)
  3217. */
  3218. // where the 2% that voted on slot 1 don't see their votes land in a block
  3219. // due to blockhash expiration, and thus without resigning their votes with
  3220. // a newer blockhash, will deem slot 4 the heavier fork and try to switch to
  3221. // slot 4, which doesn't pass the switch threshold. This stalls the network.
  3222. // We do this by:
  3223. // 1) Creating a partition so all three nodes don't see each other
  3224. // 2) Kill the validator with 2%
  3225. // 3) Wait for longer than blockhash expiration
  3226. // 4) Copy in the lighter fork's blocks up, *only* up to the first slot in the lighter fork
  3227. // (not all the blocks on the lighter fork!), call this slot `L`
  3228. // 5) Restart the validator with 2% so that he votes on `L`, but the vote doesn't land
  3229. // due to blockhash expiration
  3230. // 6) Resolve the partition so that the 2% repairs the other fork, and tries to switch,
  3231. // stalling the network.
  3232. fn test_fork_choice_refresh_old_votes() {
  3233. agave_logger::setup_with_default(RUST_LOG_FILTER);
  3234. let max_switch_threshold_failure_pct = 1.0 - 2.0 * SWITCH_FORK_THRESHOLD;
  3235. let total_stake = 100 * DEFAULT_NODE_STAKE;
  3236. let max_failures_stake = (max_switch_threshold_failure_pct * total_stake as f64) as u64;
  3237. // 1% less than the failure stake, where the 2% is allocated to a validator that
  3238. // has no leader slots and thus won't be able to vote on its own fork.
  3239. let failures_stake = max_failures_stake;
  3240. let total_alive_stake = total_stake - failures_stake;
  3241. let alive_stake_1 = total_alive_stake / 2 - 1;
  3242. let alive_stake_2 = total_alive_stake - alive_stake_1 - 1;
  3243. // Heavier fork still doesn't have enough stake to switch. Both branches need
  3244. // the vote to land from the validator with `alive_stake_3` to allow the other
  3245. // fork to switch.
  3246. let alive_stake_3 = 2 * DEFAULT_NODE_STAKE;
  3247. assert!(alive_stake_1 < alive_stake_2);
  3248. assert!(alive_stake_1 + alive_stake_3 > alive_stake_2);
  3249. let num_lighter_partition_slots_per_rotation = 8;
  3250. // ratio of total number of leader slots to the number of leader slots allocated
  3251. // to the lighter partition
  3252. let total_slots_to_lighter_partition_ratio = 2;
  3253. let partitions: &[(usize, usize)] = &[
  3254. (
  3255. alive_stake_1 as usize,
  3256. num_lighter_partition_slots_per_rotation,
  3257. ),
  3258. (
  3259. alive_stake_2 as usize,
  3260. (total_slots_to_lighter_partition_ratio - 1) * num_lighter_partition_slots_per_rotation,
  3261. ),
  3262. (alive_stake_3 as usize, 0),
  3263. ];
  3264. #[derive(Default)]
  3265. struct PartitionContext {
  3266. smallest_validator_info: Option<ClusterValidatorInfo>,
  3267. lighter_fork_validator_key: Pubkey,
  3268. heaviest_validator_key: Pubkey,
  3269. first_slot_in_lighter_partition: Slot,
  3270. }
  3271. let on_partition_start = |cluster: &mut LocalCluster,
  3272. validator_keys: &[Pubkey],
  3273. _: Vec<ClusterValidatorInfo>,
  3274. context: &mut PartitionContext| {
  3275. // Kill validator with alive_stake_3, second in `partitions` slice
  3276. let smallest_validator_key = &validator_keys[3];
  3277. let info = cluster.exit_node(smallest_validator_key);
  3278. context.smallest_validator_info = Some(info);
  3279. // validator_keys[0] is the validator that will be killed, i.e. the validator with
  3280. // stake == `failures_stake`
  3281. context.lighter_fork_validator_key = validator_keys[1];
  3282. // Third in `partitions` slice
  3283. context.heaviest_validator_key = validator_keys[2];
  3284. };
  3285. let ticks_per_slot = 32;
  3286. let on_before_partition_resolved =
  3287. |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  3288. // Equal to ms_per_slot * MAX_PROCESSING_AGE, rounded up
  3289. let sleep_time_ms = ms_for_n_slots(
  3290. MAX_PROCESSING_AGE as u64 * total_slots_to_lighter_partition_ratio as u64,
  3291. ticks_per_slot,
  3292. );
  3293. info!("Wait for blockhashes to expire, {sleep_time_ms} ms");
  3294. // Wait for blockhashes to expire
  3295. sleep(Duration::from_millis(sleep_time_ms));
  3296. let smallest_validator_key = context
  3297. .smallest_validator_info
  3298. .as_ref()
  3299. .unwrap()
  3300. .info
  3301. .keypair
  3302. .pubkey();
  3303. let smallest_ledger_path = context
  3304. .smallest_validator_info
  3305. .as_ref()
  3306. .unwrap()
  3307. .info
  3308. .ledger_path
  3309. .clone();
  3310. info!(
  3311. "smallest validator key: {smallest_validator_key}, path: {smallest_ledger_path:?}"
  3312. );
  3313. let lighter_fork_ledger_path = cluster.ledger_path(&context.lighter_fork_validator_key);
  3314. let heaviest_ledger_path = cluster.ledger_path(&context.heaviest_validator_key);
  3315. // Wait for blockhashes to expire
  3316. let mut distance_from_tip: usize;
  3317. loop {
  3318. // Get latest votes. We make sure to wait until the vote has landed in
  3319. // blockstore. This is important because if we were the leader for the block there
  3320. // is a possibility of voting before broadcast has inserted in blockstore.
  3321. let lighter_fork_latest_vote = wait_for_last_vote_in_tower_to_land_in_ledger(
  3322. &lighter_fork_ledger_path,
  3323. &context.lighter_fork_validator_key,
  3324. )
  3325. .unwrap();
  3326. let heaviest_fork_latest_vote = wait_for_last_vote_in_tower_to_land_in_ledger(
  3327. &heaviest_ledger_path,
  3328. &context.heaviest_validator_key,
  3329. )
  3330. .unwrap();
  3331. // Check if sufficient blockhashes have expired on the smaller fork
  3332. {
  3333. let smallest_blockstore = open_blockstore(&smallest_ledger_path);
  3334. let lighter_fork_blockstore = open_blockstore(&lighter_fork_ledger_path);
  3335. let heaviest_blockstore = open_blockstore(&heaviest_ledger_path);
  3336. info!("Opened blockstores");
  3337. // Find the first slot on the smaller fork
  3338. let lighter_ancestors: BTreeSet<Slot> =
  3339. std::iter::once(lighter_fork_latest_vote)
  3340. .chain(AncestorIterator::new(
  3341. lighter_fork_latest_vote,
  3342. &lighter_fork_blockstore,
  3343. ))
  3344. .collect();
  3345. let heavier_ancestors: BTreeSet<Slot> =
  3346. std::iter::once(heaviest_fork_latest_vote)
  3347. .chain(AncestorIterator::new(
  3348. heaviest_fork_latest_vote,
  3349. &heaviest_blockstore,
  3350. ))
  3351. .collect();
  3352. let (different_ancestor_index, different_ancestor) = lighter_ancestors
  3353. .iter()
  3354. .enumerate()
  3355. .zip(heavier_ancestors.iter())
  3356. .find(|((_index, lighter_fork_ancestor), heavier_fork_ancestor)| {
  3357. lighter_fork_ancestor != heavier_fork_ancestor
  3358. })
  3359. .unwrap()
  3360. .0;
  3361. let last_common_ancestor_index = different_ancestor_index - 1;
  3362. // It's critical that the heavier fork has at least one vote on it.
  3363. // This is important because the smallest validator must see a vote on the heavier fork
  3364. // to avoid voting again on its own fork.
  3365. // Because we don't have a simple method of parsing blockstore for all votes, we proxy this check
  3366. // by ensuring the heavier fork was long enough to land a vote. The minimum length would be 4 more
  3367. // than the last common ancestor N, because the first vote would be made at least by N+3 (if threshold check failed on slot N+1),
  3368. // and then would land by slot N + 4.
  3369. assert!(heavier_ancestors.len() > last_common_ancestor_index + 4);
  3370. context.first_slot_in_lighter_partition = *different_ancestor;
  3371. distance_from_tip = lighter_ancestors.len() - different_ancestor_index - 1;
  3372. info!(
  3373. "Distance in number of blocks between earliest slot {} and latest slot {} \
  3374. on lighter partition is {}",
  3375. context.first_slot_in_lighter_partition,
  3376. lighter_fork_latest_vote,
  3377. distance_from_tip
  3378. );
  3379. if distance_from_tip > MAX_PROCESSING_AGE {
  3380. // Must have been updated in the above loop
  3381. assert!(context.first_slot_in_lighter_partition != 0);
  3382. info!(
  3383. "First slot in lighter partition is {}",
  3384. context.first_slot_in_lighter_partition
  3385. );
  3386. // Copy all the blocks from the smaller partition up to `first_slot_in_lighter_partition`
  3387. // into the smallest validator's blockstore so that it will attempt to refresh
  3388. copy_blocks(
  3389. lighter_fork_latest_vote,
  3390. &lighter_fork_blockstore,
  3391. &smallest_blockstore,
  3392. false,
  3393. );
  3394. // Also copy all the blocks from the heavier partition so the smallest validator will
  3395. // not vote again on its own fork
  3396. copy_blocks(
  3397. heaviest_fork_latest_vote,
  3398. &heaviest_blockstore,
  3399. &smallest_blockstore,
  3400. false,
  3401. );
  3402. // Simulate a vote for the `first_slot_in_lighter_partition`
  3403. let bank_hash = lighter_fork_blockstore
  3404. .get_bank_hash(context.first_slot_in_lighter_partition)
  3405. .unwrap();
  3406. cluster_tests::apply_votes_to_tower(
  3407. &context
  3408. .smallest_validator_info
  3409. .as_ref()
  3410. .unwrap()
  3411. .info
  3412. .keypair,
  3413. vec![(context.first_slot_in_lighter_partition, bank_hash)],
  3414. smallest_ledger_path,
  3415. );
  3416. drop(smallest_blockstore);
  3417. break;
  3418. }
  3419. }
  3420. sleep(Duration::from_millis(ms_for_n_slots(
  3421. ((MAX_PROCESSING_AGE - distance_from_tip)
  3422. * total_slots_to_lighter_partition_ratio) as u64,
  3423. ticks_per_slot,
  3424. )));
  3425. }
  3426. // Restart the smallest validator that we killed earlier in `on_partition_start()`
  3427. cluster.restart_node(
  3428. &smallest_validator_key,
  3429. context.smallest_validator_info.take().unwrap(),
  3430. SocketAddrSpace::Unspecified,
  3431. );
  3432. // Now resolve partition, allow validator to see the fork with the heavier validator,
  3433. // but the fork it's currently on is the heaviest, if only its own vote landed!
  3434. };
  3435. // Check that new roots were set after the partition resolves (gives time
  3436. // for lockouts built during partition to resolve and gives validators an opportunity
  3437. // to try and switch forks)
  3438. let on_partition_resolved = |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  3439. // Wait until a root is made past the first slot on the correct fork
  3440. cluster.check_min_slot_is_rooted(
  3441. context.first_slot_in_lighter_partition,
  3442. "test_fork_choice_refresh_old_votes",
  3443. SocketAddrSpace::Unspecified,
  3444. );
  3445. // Check that the correct fork was rooted
  3446. let heaviest_ledger_path = cluster.ledger_path(&context.heaviest_validator_key);
  3447. let heaviest_blockstore = open_blockstore(&heaviest_ledger_path);
  3448. info!(
  3449. "checking that {} was rooted in {:?}",
  3450. context.first_slot_in_lighter_partition, heaviest_ledger_path
  3451. );
  3452. assert!(heaviest_blockstore.is_root(context.first_slot_in_lighter_partition));
  3453. };
  3454. run_kill_partition_switch_threshold(
  3455. &[(failures_stake as usize - 1, 0)],
  3456. partitions,
  3457. Some(ticks_per_slot),
  3458. PartitionContext::default(),
  3459. on_partition_start,
  3460. on_before_partition_resolved,
  3461. on_partition_resolved,
  3462. );
  3463. }
  3464. #[test]
  3465. #[serial]
  3466. fn test_kill_heaviest_partition() {
  3467. // This test:
  3468. // 1) Spins up four partitions, the heaviest being the first with more stake
  3469. // 2) Schedules the other validators for sufficient slots in the schedule
  3470. // so that they will still be locked out of voting for the major partition
  3471. // when the partition resolves
  3472. // 3) Kills the most staked partition. Validators are locked out, but should all
  3473. // eventually choose the major partition
  3474. // 4) Check for recovery
  3475. let num_slots_per_validator = 8;
  3476. let partitions: [usize; 4] = [
  3477. 11 * DEFAULT_NODE_STAKE as usize,
  3478. 10 * DEFAULT_NODE_STAKE as usize,
  3479. 10 * DEFAULT_NODE_STAKE as usize,
  3480. 10 * DEFAULT_NODE_STAKE as usize,
  3481. ];
  3482. let (leader_schedule, validator_keys) = create_custom_leader_schedule_with_random_keys(&[
  3483. num_slots_per_validator * (partitions.len() - 1),
  3484. num_slots_per_validator,
  3485. num_slots_per_validator,
  3486. num_slots_per_validator,
  3487. ]);
  3488. let empty = |_: &mut LocalCluster, _: &mut ()| {};
  3489. let validator_to_kill = validator_keys[0].pubkey();
  3490. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  3491. info!("Killing validator with id: {validator_to_kill}");
  3492. cluster.exit_node(&validator_to_kill);
  3493. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  3494. };
  3495. run_cluster_partition(
  3496. &partitions,
  3497. Some((leader_schedule, validator_keys)),
  3498. (),
  3499. empty,
  3500. empty,
  3501. on_partition_resolved,
  3502. None,
  3503. true,
  3504. vec![],
  3505. )
  3506. }
  3507. #[test]
  3508. #[serial]
  3509. fn test_kill_partition_switch_threshold_no_progress() {
  3510. let max_switch_threshold_failure_pct = 1.0 - 2.0 * SWITCH_FORK_THRESHOLD;
  3511. let total_stake = 10_000 * DEFAULT_NODE_STAKE;
  3512. let max_failures_stake = (max_switch_threshold_failure_pct * total_stake as f64) as u64;
  3513. let failures_stake = max_failures_stake;
  3514. let total_alive_stake = total_stake - failures_stake;
  3515. let alive_stake_1 = total_alive_stake / 2;
  3516. let alive_stake_2 = total_alive_stake - alive_stake_1;
  3517. // Check that no new roots were set 400 slots after partition resolves (gives time
  3518. // for lockouts built during partition to resolve and gives validators an opportunity
  3519. // to try and switch forks)
  3520. let on_partition_start =
  3521. |_: &mut LocalCluster, _: &[Pubkey], _: Vec<ClusterValidatorInfo>, _: &mut ()| {};
  3522. let on_before_partition_resolved = |_: &mut LocalCluster, _: &mut ()| {};
  3523. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  3524. cluster.check_no_new_roots(400, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  3525. };
  3526. // This kills `max_failures_stake`, so no progress should be made
  3527. run_kill_partition_switch_threshold(
  3528. &[(failures_stake as usize, 16)],
  3529. &[(alive_stake_1 as usize, 8), (alive_stake_2 as usize, 8)],
  3530. None,
  3531. (),
  3532. on_partition_start,
  3533. on_before_partition_resolved,
  3534. on_partition_resolved,
  3535. );
  3536. }
  3537. #[test]
  3538. #[serial]
  3539. fn test_kill_partition_switch_threshold_progress() {
  3540. let max_switch_threshold_failure_pct = 1.0 - 2.0 * SWITCH_FORK_THRESHOLD;
  3541. let total_stake = 10_000 * DEFAULT_NODE_STAKE;
  3542. // Kill `< max_failures_stake` of the validators
  3543. let max_failures_stake = (max_switch_threshold_failure_pct * total_stake as f64) as u64;
  3544. let failures_stake = max_failures_stake - 1;
  3545. let total_alive_stake = total_stake - failures_stake;
  3546. // Partition the remaining alive validators, should still make progress
  3547. // once the partition resolves
  3548. let alive_stake_1 = total_alive_stake / 2;
  3549. let alive_stake_2 = total_alive_stake - alive_stake_1;
  3550. let bigger = std::cmp::max(alive_stake_1, alive_stake_2);
  3551. let smaller = std::cmp::min(alive_stake_1, alive_stake_2);
  3552. // At least one of the forks must have > SWITCH_FORK_THRESHOLD in order
  3553. // to guarantee switching proofs can be created. Make sure the other fork
  3554. // is <= SWITCH_FORK_THRESHOLD to make sure progress can be made. Caches
  3555. // bugs such as liveness issues bank-weighted fork choice, which may stall
  3556. // because the fork with less stake could have more weight, but other fork would:
  3557. // 1) Not be able to generate a switching proof
  3558. // 2) Other more staked fork stops voting, so doesn't catch up in bank weight.
  3559. assert!(
  3560. bigger as f64 / total_stake as f64 > SWITCH_FORK_THRESHOLD
  3561. && smaller as f64 / total_stake as f64 <= SWITCH_FORK_THRESHOLD
  3562. );
  3563. let on_partition_start =
  3564. |_: &mut LocalCluster, _: &[Pubkey], _: Vec<ClusterValidatorInfo>, _: &mut ()| {};
  3565. let on_before_partition_resolved = |_: &mut LocalCluster, _: &mut ()| {};
  3566. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  3567. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  3568. };
  3569. run_kill_partition_switch_threshold(
  3570. &[(failures_stake as usize, 16)],
  3571. &[(alive_stake_1 as usize, 8), (alive_stake_2 as usize, 8)],
  3572. None,
  3573. (),
  3574. on_partition_start,
  3575. on_before_partition_resolved,
  3576. on_partition_resolved,
  3577. );
  3578. }
  3579. #[test]
  3580. #[serial]
  3581. #[allow(unused_attributes)]
  3582. fn test_duplicate_shreds_broadcast_leader() {
  3583. run_duplicate_shreds_broadcast_leader(true);
  3584. }
  3585. #[test]
  3586. #[serial]
  3587. #[ignore]
  3588. #[allow(unused_attributes)]
  3589. fn test_duplicate_shreds_broadcast_leader_ancestor_hashes() {
  3590. run_duplicate_shreds_broadcast_leader(false);
  3591. }
  3592. fn run_duplicate_shreds_broadcast_leader(vote_on_duplicate: bool) {
  3593. agave_logger::setup_with_default(RUST_LOG_FILTER);
  3594. // Create 4 nodes:
  3595. // 1) Bad leader sending different versions of shreds to both of the other nodes
  3596. // 2) 1 node who's voting behavior in gossip
  3597. // 3) 1 validator gets the same version as the leader, will see duplicate confirmation
  3598. // 4) 1 validator will not get the same version as the leader. For each of these
  3599. // duplicate slots `S` either:
  3600. // a) The leader's version of `S` gets > DUPLICATE_THRESHOLD of votes in gossip and so this
  3601. // node will repair that correct version
  3602. // b) A descendant `D` of some version of `S` gets > DUPLICATE_THRESHOLD votes in gossip,
  3603. // but no version of `S` does. Then the node will not know to repair the right version
  3604. // by just looking at gossip, but will instead have to use EpochSlots repair after
  3605. // detecting that a descendant does not chain to its version of `S`, and marks that descendant
  3606. // dead.
  3607. // Scenarios a) or b) are triggered by our node in 2) who's voting behavior we control.
  3608. // Critical that bad_leader_stake + good_node_stake < DUPLICATE_THRESHOLD and that
  3609. // bad_leader_stake + good_node_stake + our_node_stake > DUPLICATE_THRESHOLD so that
  3610. // our vote is the determining factor.
  3611. //
  3612. // Also critical that bad_leader_stake > 1 - DUPLICATE_THRESHOLD, so that the leader
  3613. // doesn't try and dump his own block, which will happen if:
  3614. // 1. A version is duplicate confirmed
  3615. // 2. The version they played/stored into blockstore isn't the one that is duplicated
  3616. // confirmed.
  3617. let bad_leader_stake = 10_000_000 * DEFAULT_NODE_STAKE;
  3618. // Ensure that the good_node_stake is always on the critical path, and the partition node
  3619. // should never be on the critical path. This way, none of the bad shreds sent to the partition
  3620. // node corrupt the good node.
  3621. let good_node_stake = 500 * DEFAULT_NODE_STAKE;
  3622. let our_node_stake = 10_000_000 * DEFAULT_NODE_STAKE;
  3623. let partition_node_stake = DEFAULT_NODE_STAKE;
  3624. let node_stakes = vec![
  3625. bad_leader_stake,
  3626. partition_node_stake,
  3627. good_node_stake,
  3628. // Needs to be last in the vector, so that we can
  3629. // find the id of this node. See call to `test_faulty_node`
  3630. // below for more details.
  3631. our_node_stake,
  3632. ];
  3633. assert_eq!(*node_stakes.last().unwrap(), our_node_stake);
  3634. let total_stake: u64 = node_stakes.iter().sum();
  3635. assert!(
  3636. ((bad_leader_stake + good_node_stake) as f64 / total_stake as f64) < DUPLICATE_THRESHOLD
  3637. );
  3638. assert!(
  3639. (bad_leader_stake + good_node_stake + our_node_stake) as f64 / total_stake as f64
  3640. > DUPLICATE_THRESHOLD
  3641. );
  3642. assert!((bad_leader_stake as f64 / total_stake as f64) >= 1.0 - DUPLICATE_THRESHOLD);
  3643. // Important that the partition node stake is the smallest so that it gets selected
  3644. // for the partition.
  3645. assert!(partition_node_stake < our_node_stake && partition_node_stake < good_node_stake);
  3646. let (duplicate_slot_sender, duplicate_slot_receiver) = unbounded();
  3647. // 1) Set up the cluster
  3648. let (mut cluster, validator_keys) = test_faulty_node(
  3649. BroadcastStageType::BroadcastDuplicates(BroadcastDuplicatesConfig {
  3650. partition: ClusterPartition::Stake(partition_node_stake),
  3651. duplicate_slot_sender: Some(duplicate_slot_sender),
  3652. }),
  3653. node_stakes,
  3654. None,
  3655. None,
  3656. );
  3657. // This is why it's important our node was last in `node_stakes`
  3658. let our_id = validator_keys.last().unwrap().pubkey();
  3659. // 2) Kill our node and start up a thread to simulate votes to control our voting behavior
  3660. let our_info = cluster.exit_node(&our_id);
  3661. let node_keypair = our_info.info.keypair;
  3662. let vote_keypair = our_info.info.voting_keypair;
  3663. let bad_leader_id = *cluster.entry_point_info.pubkey();
  3664. let bad_leader_ledger_path = cluster.validators[&bad_leader_id].info.ledger_path.clone();
  3665. info!("our node id: {}", node_keypair.pubkey());
  3666. // 3) Start up a gossip instance to listen for and push votes
  3667. let voter_thread_sleep_ms = 100;
  3668. let gossip_voter = cluster_tests::start_gossip_voter(
  3669. &cluster.entry_point_info.gossip().unwrap(),
  3670. &node_keypair,
  3671. move |(label, leader_vote_tx)| {
  3672. // Filter out votes not from the bad leader
  3673. if label.pubkey() == bad_leader_id {
  3674. let vote = vote_parser::parse_vote_transaction(&leader_vote_tx)
  3675. .map(|(_, vote, ..)| vote)
  3676. .unwrap();
  3677. // Filter out empty votes
  3678. if !vote.is_empty() {
  3679. Some((vote, leader_vote_tx))
  3680. } else {
  3681. None
  3682. }
  3683. } else {
  3684. None
  3685. }
  3686. },
  3687. {
  3688. let node_keypair = node_keypair.insecure_clone();
  3689. let vote_keypair = vote_keypair.insecure_clone();
  3690. let mut gossip_vote_index = 0;
  3691. let mut duplicate_slots = vec![];
  3692. move |latest_vote_slot, leader_vote_tx, parsed_vote, cluster_info| {
  3693. info!("received vote for {latest_vote_slot}");
  3694. // Add to EpochSlots. Mark all slots frozen between slot..=max_vote_slot.
  3695. let new_epoch_slots: Vec<Slot> = (0..latest_vote_slot + 1).collect();
  3696. info!("Simulating epoch slots from our node: {new_epoch_slots:?}");
  3697. cluster_info.push_epoch_slots(&new_epoch_slots);
  3698. for slot in duplicate_slot_receiver.try_iter() {
  3699. duplicate_slots.push(slot);
  3700. }
  3701. let vote_hash = parsed_vote.hash();
  3702. if vote_on_duplicate || !duplicate_slots.contains(&latest_vote_slot) {
  3703. info!(
  3704. "Simulating vote from our node on slot {latest_vote_slot}, hash \
  3705. {vote_hash}"
  3706. );
  3707. // Add all recent vote slots on this fork to allow cluster to pass
  3708. // vote threshold checks in replay. Note this will instantly force a
  3709. // root by this validator, but we're not concerned with lockout violations
  3710. // by this validator so it's fine.
  3711. let leader_blockstore = open_blockstore(&bad_leader_ledger_path);
  3712. let mut vote_slots: Vec<(Slot, u32)> =
  3713. AncestorIterator::new_inclusive(latest_vote_slot, &leader_blockstore)
  3714. .take(MAX_LOCKOUT_HISTORY)
  3715. .zip(1..)
  3716. .collect();
  3717. vote_slots.reverse();
  3718. let mut vote = TowerSync::from(vote_slots);
  3719. let root =
  3720. AncestorIterator::new_inclusive(latest_vote_slot, &leader_blockstore)
  3721. .nth(MAX_LOCKOUT_HISTORY);
  3722. vote.root = root;
  3723. vote.hash = vote_hash;
  3724. let vote_tx = vote_transaction::new_tower_sync_transaction(
  3725. vote,
  3726. leader_vote_tx.message.recent_blockhash,
  3727. &node_keypair,
  3728. &vote_keypair,
  3729. &vote_keypair,
  3730. None,
  3731. );
  3732. gossip_vote_index += 1;
  3733. gossip_vote_index %= MAX_VOTES;
  3734. cluster_info.push_vote_at_index(vote_tx, gossip_vote_index, &node_keypair);
  3735. }
  3736. }
  3737. },
  3738. voter_thread_sleep_ms as u64,
  3739. cluster.validators.len().saturating_sub(1),
  3740. 5000, // Refresh if 5 seconds of inactivity
  3741. 5, // Refresh the past 5 votes
  3742. cluster.entry_point_info.shred_version(),
  3743. );
  3744. // 4) Check that the cluster is making progress
  3745. cluster.check_for_new_roots(
  3746. 16,
  3747. "test_duplicate_shreds_broadcast_leader",
  3748. SocketAddrSpace::Unspecified,
  3749. );
  3750. // Clean up threads
  3751. gossip_voter.close();
  3752. }
  3753. #[test]
  3754. #[serial]
  3755. #[ignore]
  3756. fn test_switch_threshold_uses_gossip_votes() {
  3757. agave_logger::setup_with_default(RUST_LOG_FILTER);
  3758. let total_stake = 100 * DEFAULT_NODE_STAKE;
  3759. // Minimum stake needed to generate a switching proof
  3760. let minimum_switch_stake = (SWITCH_FORK_THRESHOLD * total_stake as f64) as u64;
  3761. // Make the heavier stake insufficient for switching so tha the lighter validator
  3762. // cannot switch without seeing a vote from the dead/failure_stake validator.
  3763. let heavier_stake = minimum_switch_stake;
  3764. let lighter_stake = heavier_stake - 1;
  3765. let failures_stake = total_stake - heavier_stake - lighter_stake;
  3766. let partitions: &[(usize, usize)] = &[(heavier_stake as usize, 8), (lighter_stake as usize, 8)];
  3767. #[derive(Default)]
  3768. struct PartitionContext {
  3769. heaviest_validator_key: Pubkey,
  3770. lighter_validator_key: Pubkey,
  3771. dead_validator_info: Option<ClusterValidatorInfo>,
  3772. }
  3773. let on_partition_start = |_cluster: &mut LocalCluster,
  3774. validator_keys: &[Pubkey],
  3775. mut dead_validator_infos: Vec<ClusterValidatorInfo>,
  3776. context: &mut PartitionContext| {
  3777. assert_eq!(dead_validator_infos.len(), 1);
  3778. context.dead_validator_info = Some(dead_validator_infos.pop().unwrap());
  3779. // validator_keys[0] is the validator that will be killed, i.e. the validator with
  3780. // stake == `failures_stake`
  3781. context.heaviest_validator_key = validator_keys[1];
  3782. context.lighter_validator_key = validator_keys[2];
  3783. };
  3784. let on_before_partition_resolved = |_: &mut LocalCluster, _: &mut PartitionContext| {};
  3785. // Check that new roots were set after the partition resolves (gives time
  3786. // for lockouts built during partition to resolve and gives validators an opportunity
  3787. // to try and switch forks)
  3788. let on_partition_resolved = |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  3789. let lighter_validator_ledger_path = cluster.ledger_path(&context.lighter_validator_key);
  3790. let heavier_validator_ledger_path = cluster.ledger_path(&context.heaviest_validator_key);
  3791. let (lighter_validator_latest_vote, _) = last_vote_in_tower(
  3792. &lighter_validator_ledger_path,
  3793. &context.lighter_validator_key,
  3794. )
  3795. .unwrap();
  3796. info!("Lighter validator's latest vote is for slot {lighter_validator_latest_vote}");
  3797. // Lighter partition should stop voting after detecting the heavier partition and try
  3798. // to switch. Loop until we see a greater vote by the heavier validator than the last
  3799. // vote made by the lighter validator on the lighter fork.
  3800. let mut heavier_validator_latest_vote;
  3801. let mut heavier_validator_latest_vote_hash;
  3802. let heavier_blockstore = open_blockstore(&heavier_validator_ledger_path);
  3803. loop {
  3804. let (sanity_check_lighter_validator_latest_vote, _) = last_vote_in_tower(
  3805. &lighter_validator_ledger_path,
  3806. &context.lighter_validator_key,
  3807. )
  3808. .unwrap();
  3809. // Lighter validator should stop voting, because `on_partition_resolved` is only
  3810. // called after a propagation time where blocks from the other fork should have
  3811. // finished propagating
  3812. assert_eq!(
  3813. sanity_check_lighter_validator_latest_vote,
  3814. lighter_validator_latest_vote
  3815. );
  3816. let (new_heavier_validator_latest_vote, new_heavier_validator_latest_vote_hash) =
  3817. last_vote_in_tower(
  3818. &heavier_validator_ledger_path,
  3819. &context.heaviest_validator_key,
  3820. )
  3821. .unwrap();
  3822. heavier_validator_latest_vote = new_heavier_validator_latest_vote;
  3823. heavier_validator_latest_vote_hash = new_heavier_validator_latest_vote_hash;
  3824. // Latest vote for each validator should be on different forks
  3825. assert_ne!(lighter_validator_latest_vote, heavier_validator_latest_vote);
  3826. if heavier_validator_latest_vote > lighter_validator_latest_vote {
  3827. let heavier_ancestors: HashSet<Slot> =
  3828. AncestorIterator::new(heavier_validator_latest_vote, &heavier_blockstore)
  3829. .collect();
  3830. assert!(!heavier_ancestors.contains(&lighter_validator_latest_vote));
  3831. break;
  3832. }
  3833. }
  3834. info!("Checking to make sure lighter validator doesn't switch");
  3835. let mut latest_slot = lighter_validator_latest_vote;
  3836. // Number of chances the validator had to switch votes but didn't
  3837. let mut total_voting_opportunities = 0;
  3838. while total_voting_opportunities <= 5 {
  3839. let (new_latest_slot, latest_slot_ancestors) =
  3840. find_latest_replayed_slot_from_ledger(&lighter_validator_ledger_path, latest_slot);
  3841. latest_slot = new_latest_slot;
  3842. // Ensure `latest_slot` is on the other fork
  3843. if latest_slot_ancestors.contains(&heavier_validator_latest_vote) {
  3844. let tower = restore_tower(
  3845. &lighter_validator_ledger_path,
  3846. &context.lighter_validator_key,
  3847. )
  3848. .unwrap();
  3849. // Check that there was an opportunity to vote
  3850. if !tower.is_locked_out(latest_slot, &latest_slot_ancestors) {
  3851. // Ensure the lighter blockstore has not voted again
  3852. let new_lighter_validator_latest_vote = tower.last_voted_slot().unwrap();
  3853. assert_eq!(
  3854. new_lighter_validator_latest_vote,
  3855. lighter_validator_latest_vote
  3856. );
  3857. info!("Incrementing voting opportunities: {total_voting_opportunities}");
  3858. total_voting_opportunities += 1;
  3859. } else {
  3860. info!("Tower still locked out, can't vote for slot: {latest_slot}");
  3861. }
  3862. } else if latest_slot > heavier_validator_latest_vote {
  3863. warn!(
  3864. "validator is still generating blocks on its own fork, last processed slot: \
  3865. {latest_slot}"
  3866. );
  3867. }
  3868. sleep(Duration::from_millis(50));
  3869. }
  3870. // Make a vote from the killed validator for slot `heavier_validator_latest_vote` in gossip
  3871. info!("Simulate vote for slot: {heavier_validator_latest_vote} from dead validator");
  3872. let vote_keypair = &context
  3873. .dead_validator_info
  3874. .as_ref()
  3875. .unwrap()
  3876. .info
  3877. .voting_keypair
  3878. .clone();
  3879. let node_keypair = &context
  3880. .dead_validator_info
  3881. .as_ref()
  3882. .unwrap()
  3883. .info
  3884. .keypair
  3885. .clone();
  3886. cluster_tests::submit_vote_to_cluster_gossip(
  3887. node_keypair,
  3888. vote_keypair,
  3889. heavier_validator_latest_vote,
  3890. heavier_validator_latest_vote_hash,
  3891. // Make the vote transaction with a random blockhash. Thus, the vote only lives in gossip but
  3892. // never makes it into a block
  3893. Hash::new_unique(),
  3894. cluster
  3895. .get_contact_info(&context.heaviest_validator_key)
  3896. .unwrap()
  3897. .gossip()
  3898. .unwrap(),
  3899. &SocketAddrSpace::Unspecified,
  3900. )
  3901. .unwrap();
  3902. loop {
  3903. // Wait for the lighter validator to switch to the heavier fork
  3904. let (new_lighter_validator_latest_vote, _) = last_vote_in_tower(
  3905. &lighter_validator_ledger_path,
  3906. &context.lighter_validator_key,
  3907. )
  3908. .unwrap();
  3909. if new_lighter_validator_latest_vote != lighter_validator_latest_vote {
  3910. info!(
  3911. "Lighter validator switched forks at slot: {new_lighter_validator_latest_vote}"
  3912. );
  3913. let (heavier_validator_latest_vote, _) = last_vote_in_tower(
  3914. &heavier_validator_ledger_path,
  3915. &context.heaviest_validator_key,
  3916. )
  3917. .unwrap();
  3918. let (smaller, larger) =
  3919. if new_lighter_validator_latest_vote > heavier_validator_latest_vote {
  3920. (
  3921. heavier_validator_latest_vote,
  3922. new_lighter_validator_latest_vote,
  3923. )
  3924. } else {
  3925. (
  3926. new_lighter_validator_latest_vote,
  3927. heavier_validator_latest_vote,
  3928. )
  3929. };
  3930. // Check the new vote is on the same fork as the heaviest fork
  3931. let heavier_blockstore = open_blockstore(&heavier_validator_ledger_path);
  3932. let larger_slot_ancestors: HashSet<Slot> =
  3933. AncestorIterator::new(larger, &heavier_blockstore)
  3934. .chain(std::iter::once(larger))
  3935. .collect();
  3936. assert!(larger_slot_ancestors.contains(&smaller));
  3937. break;
  3938. } else {
  3939. sleep(Duration::from_millis(50));
  3940. }
  3941. }
  3942. };
  3943. let ticks_per_slot = 8;
  3944. run_kill_partition_switch_threshold(
  3945. &[(failures_stake as usize, 0)],
  3946. partitions,
  3947. Some(ticks_per_slot),
  3948. PartitionContext::default(),
  3949. on_partition_start,
  3950. on_before_partition_resolved,
  3951. on_partition_resolved,
  3952. );
  3953. }
  3954. #[test]
  3955. #[serial]
  3956. fn test_listener_startup() {
  3957. let mut config = ClusterConfig {
  3958. node_stakes: vec![DEFAULT_NODE_STAKE],
  3959. num_listeners: 3,
  3960. validator_configs: make_identical_validator_configs(
  3961. &ValidatorConfig::default_for_test(),
  3962. 1,
  3963. ),
  3964. ..ClusterConfig::default()
  3965. };
  3966. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  3967. let cluster_nodes = discover_validators(
  3968. &cluster.entry_point_info.gossip().unwrap(),
  3969. 4,
  3970. cluster.entry_point_info.shred_version(),
  3971. SocketAddrSpace::Unspecified,
  3972. )
  3973. .unwrap();
  3974. assert_eq!(cluster_nodes.len(), 4);
  3975. }
  3976. fn find_latest_replayed_slot_from_ledger(
  3977. ledger_path: &Path,
  3978. mut latest_slot: Slot,
  3979. ) -> (Slot, HashSet<Slot>) {
  3980. loop {
  3981. let mut blockstore = open_blockstore(ledger_path);
  3982. // This is kind of a hack because we can't query for new frozen blocks over RPC
  3983. // since the validator is not voting.
  3984. let new_latest_slots: Vec<Slot> = blockstore
  3985. .slot_meta_iterator(latest_slot)
  3986. .unwrap()
  3987. .filter_map(|(s, _)| if s > latest_slot { Some(s) } else { None })
  3988. .collect();
  3989. if let Some(new_latest_slot) = new_latest_slots.first() {
  3990. latest_slot = *new_latest_slot;
  3991. info!("Checking latest_slot {latest_slot}");
  3992. // Wait for the slot to be fully received by the validator
  3993. loop {
  3994. info!("Waiting for slot {latest_slot} to be full");
  3995. if blockstore.is_full(latest_slot) {
  3996. break;
  3997. } else {
  3998. sleep(Duration::from_millis(50));
  3999. blockstore = open_blockstore(ledger_path);
  4000. }
  4001. }
  4002. // Wait for the slot to be replayed
  4003. loop {
  4004. info!("Waiting for slot {latest_slot} to be replayed");
  4005. if blockstore.get_bank_hash(latest_slot).is_some() {
  4006. return (
  4007. latest_slot,
  4008. AncestorIterator::new(latest_slot, &blockstore).collect(),
  4009. );
  4010. } else {
  4011. sleep(Duration::from_millis(50));
  4012. blockstore = open_blockstore(ledger_path);
  4013. }
  4014. }
  4015. }
  4016. sleep(Duration::from_millis(50));
  4017. }
  4018. }
  4019. #[test]
  4020. #[serial]
  4021. fn test_cluster_partition_1_1() {
  4022. let empty = |_: &mut LocalCluster, _: &mut ()| {};
  4023. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  4024. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  4025. };
  4026. run_cluster_partition(
  4027. &[1, 1],
  4028. None,
  4029. (),
  4030. empty,
  4031. empty,
  4032. on_partition_resolved,
  4033. None,
  4034. false,
  4035. vec![],
  4036. )
  4037. }
  4038. #[test]
  4039. #[serial]
  4040. fn test_cluster_partition_1_1_1() {
  4041. let empty = |_: &mut LocalCluster, _: &mut ()| {};
  4042. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  4043. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  4044. };
  4045. run_cluster_partition(
  4046. &[1, 1, 1],
  4047. None,
  4048. (),
  4049. empty,
  4050. empty,
  4051. on_partition_resolved,
  4052. None,
  4053. false,
  4054. vec![],
  4055. )
  4056. }
  4057. #[test]
  4058. #[serial]
  4059. fn test_leader_failure_4() {
  4060. agave_logger::setup_with_default(RUST_LOG_FILTER);
  4061. error!("test_leader_failure_4");
  4062. // Cluster needs a supermajority to remain even after taking 1 node offline,
  4063. // so the minimum number of nodes for this test is 4.
  4064. let num_nodes = 4;
  4065. let mut validator_config = ValidatorConfig::default_for_test();
  4066. validator_config.wait_for_supermajority = Some(0);
  4067. // Embed vote and stake account in genesis to avoid waiting for stake
  4068. // activation and race conditions around accepting gossip votes, repairing
  4069. // blocks, etc. before we advance through too many epochs.
  4070. let validator_keys: Option<Vec<(Arc<Keypair>, bool)>> = Some(
  4071. (0..num_nodes)
  4072. .map(|_| (Arc::new(Keypair::new()), true))
  4073. .collect(),
  4074. );
  4075. // Skip the warmup slots because these short epochs can cause problems when
  4076. // bringing multiple fresh validators online that are pre-staked in genesis.
  4077. // The problems arise because we skip their leader slots while they're still
  4078. // starting up, experience partitioning, and can fail to generate leader
  4079. // schedules in time because the short epochs have the same slots per epoch
  4080. // as the total tower height, so any skipped slots can lead to not rooting,
  4081. // not generating leader schedule, and stalling the cluster.
  4082. let skip_warmup_slots = true;
  4083. let mut config = ClusterConfig {
  4084. node_stakes: vec![DEFAULT_NODE_STAKE; num_nodes],
  4085. validator_configs: make_identical_validator_configs(&validator_config, num_nodes),
  4086. validator_keys,
  4087. skip_warmup_slots,
  4088. ..ClusterConfig::default()
  4089. };
  4090. let local = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  4091. cluster_tests::kill_entry_and_spend_and_verify_rest(
  4092. &local.entry_point_info,
  4093. &local
  4094. .validators
  4095. .get(local.entry_point_info.pubkey())
  4096. .unwrap()
  4097. .config
  4098. .validator_exit,
  4099. &local.funding_keypair,
  4100. &local.connection_cache,
  4101. num_nodes,
  4102. config.ticks_per_slot * config.poh_config.target_tick_duration.as_millis() as u64,
  4103. SocketAddrSpace::Unspecified,
  4104. );
  4105. }
  4106. // This test verifies that even if votes from a validator end up taking too long to land, and thus
  4107. // some of the referenced slots are slots are no longer present in the slot hashes sysvar,
  4108. // consensus can still be attained.
  4109. //
  4110. // Validator A (60%)
  4111. // Validator B (40%)
  4112. // / --- 10 --- [..] --- 16 (B is voting, due to network issues is initially not able to see the other fork at all)
  4113. // /
  4114. // 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 (A votes 1 - 9 votes are landing normally. B does the same however votes are not landing)
  4115. // \
  4116. // \--[..]-- 73 (majority fork)
  4117. // A is voting on the majority fork and B wants to switch to this fork however in this majority fork
  4118. // the earlier votes for B (1 - 9) never landed so when B eventually goes to vote on 73, slots in
  4119. // its local vote state are no longer present in slot hashes.
  4120. //
  4121. // 1. Wait for B's tower to see local vote state was updated to new fork
  4122. // 2. Wait X blocks, check B's vote state on chain has been properly updated
  4123. //
  4124. // NOTE: it is not reliable for B to organically have 1 to reach 2^16 lockout, so we simulate the 6
  4125. // consecutive votes on the minor fork by manually incrementing the confirmation levels for the
  4126. // common ancestor votes in tower.
  4127. // To allow this test to run in a reasonable time we change the
  4128. // slot_hash expiry to 64 slots.
  4129. #[test]
  4130. #[serial]
  4131. fn test_slot_hash_expiry() {
  4132. agave_logger::setup_with_default(RUST_LOG_FILTER);
  4133. solana_slot_hashes::set_entries_for_tests_only(64);
  4134. let slots_per_epoch = 2048;
  4135. let node_stakes = vec![60 * DEFAULT_NODE_STAKE, 40 * DEFAULT_NODE_STAKE];
  4136. let validator_keys = [
  4137. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  4138. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  4139. ]
  4140. .iter()
  4141. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  4142. .collect::<Vec<_>>();
  4143. let node_vote_keys = [
  4144. "3NDQ3ud86RTVg8hTy2dDWnS4P8NfjhZ2gDgQAJbr3heaKaUVS1FW3sTLKA1GmDrY9aySzsa4QxpDkbLv47yHxzr3",
  4145. "46ZHpHE6PEvXYPu3hf9iQqjBk2ZNDaJ9ejqKWHEjxaQjpAGasKaWKbKHbP3646oZhfgDRzx95DH9PCBKKsoCVngk",
  4146. ]
  4147. .iter()
  4148. .map(|s| Arc::new(Keypair::from_base58_string(s)))
  4149. .collect::<Vec<_>>();
  4150. let vs = validator_keys
  4151. .iter()
  4152. .map(|(kp, _)| kp.pubkey())
  4153. .collect::<Vec<_>>();
  4154. let (a_pubkey, b_pubkey) = (vs[0], vs[1]);
  4155. // We want B to not vote (we are trying to simulate its votes not landing until it gets to the
  4156. // minority fork)
  4157. let mut validator_config = ValidatorConfig::default_for_test();
  4158. validator_config.wait_for_supermajority = Some(0);
  4159. let mut validator_configs =
  4160. make_identical_validator_configs(&validator_config, node_stakes.len());
  4161. validator_configs[1].voting_disabled = true;
  4162. let mut config = ClusterConfig {
  4163. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  4164. node_stakes,
  4165. validator_configs,
  4166. validator_keys: Some(validator_keys),
  4167. node_vote_keys: Some(node_vote_keys),
  4168. slots_per_epoch,
  4169. stakers_slot_offset: slots_per_epoch,
  4170. skip_warmup_slots: true,
  4171. ..ClusterConfig::default()
  4172. };
  4173. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  4174. let mut common_ancestor_slot = 8;
  4175. let a_ledger_path = cluster.ledger_path(&a_pubkey);
  4176. let b_ledger_path = cluster.ledger_path(&b_pubkey);
  4177. // Immediately kill B (we just needed it for the initial stake distribution)
  4178. info!("Killing B");
  4179. let mut b_info = cluster.exit_node(&b_pubkey);
  4180. // Let A run for a while until we get to the common ancestor
  4181. info!("Letting A run until common_ancestor_slot");
  4182. loop {
  4183. if let Some((last_vote, _)) = last_vote_in_tower(&a_ledger_path, &a_pubkey) {
  4184. if last_vote >= common_ancestor_slot {
  4185. break;
  4186. }
  4187. }
  4188. sleep(Duration::from_millis(100));
  4189. }
  4190. // Keep A running, but setup B so that it thinks it has voted up until common ancestor (but
  4191. // doesn't know anything past that)
  4192. {
  4193. info!("Copying A's ledger to B");
  4194. std::fs::remove_dir_all(&b_info.info.ledger_path).unwrap();
  4195. let mut opt = fs_extra::dir::CopyOptions::new();
  4196. opt.copy_inside = true;
  4197. fs_extra::dir::copy(&a_ledger_path, &b_ledger_path, &opt).unwrap();
  4198. // remove A's tower in B's new copied ledger
  4199. info!("Removing A's tower in B's ledger dir");
  4200. remove_tower(&b_ledger_path, &a_pubkey);
  4201. // load A's tower and save it as B's tower
  4202. info!("Loading A's tower");
  4203. if let Some(mut a_tower) = restore_tower(&a_ledger_path, &a_pubkey) {
  4204. a_tower.node_pubkey = b_pubkey;
  4205. // Update common_ancestor_slot because A is still running
  4206. if let Some(s) = a_tower.last_voted_slot() {
  4207. common_ancestor_slot = s;
  4208. info!("New common_ancestor_slot {common_ancestor_slot}");
  4209. } else {
  4210. panic!("A's tower has no votes");
  4211. }
  4212. info!("Increase lockout by 6 confirmation levels and save as B's tower");
  4213. a_tower.increase_lockout(6);
  4214. save_tower(&b_ledger_path, &a_tower, &b_info.info.keypair);
  4215. info!("B's new tower: {:?}", a_tower.tower_slots());
  4216. } else {
  4217. panic!("A's tower is missing");
  4218. }
  4219. // Get rid of any slots past common_ancestor_slot
  4220. info!("Removing extra slots from B's blockstore");
  4221. let blockstore = open_blockstore(&b_ledger_path);
  4222. purge_slots_with_count(&blockstore, common_ancestor_slot + 1, 100);
  4223. }
  4224. info!(
  4225. "Run A on majority fork until it reaches slot hash expiry {}",
  4226. solana_slot_hashes::get_entries()
  4227. );
  4228. let mut last_vote_on_a;
  4229. // Keep A running for a while longer so the majority fork has some decent size
  4230. loop {
  4231. last_vote_on_a =
  4232. wait_for_last_vote_in_tower_to_land_in_ledger(&a_ledger_path, &a_pubkey).unwrap();
  4233. if last_vote_on_a >= common_ancestor_slot + 2 * (solana_slot_hashes::get_entries() as u64) {
  4234. let blockstore = open_blockstore(&a_ledger_path);
  4235. info!(
  4236. "A majority fork: {:?}",
  4237. AncestorIterator::new(last_vote_on_a, &blockstore).collect::<Vec<Slot>>()
  4238. );
  4239. break;
  4240. }
  4241. sleep(Duration::from_millis(100));
  4242. }
  4243. // Kill A and restart B with voting. B should now fork off
  4244. info!("Killing A");
  4245. let a_info = cluster.exit_node(&a_pubkey);
  4246. info!("Restarting B");
  4247. b_info.config.voting_disabled = false;
  4248. cluster.restart_node(&b_pubkey, b_info, SocketAddrSpace::Unspecified);
  4249. // B will fork off and accumulate enough lockout
  4250. info!("Allowing B to fork");
  4251. loop {
  4252. let blockstore = open_blockstore(&b_ledger_path);
  4253. let last_vote =
  4254. wait_for_last_vote_in_tower_to_land_in_ledger(&b_ledger_path, &b_pubkey).unwrap();
  4255. let mut ancestors = AncestorIterator::new(last_vote, &blockstore);
  4256. if let Some(index) = ancestors.position(|x| x == common_ancestor_slot) {
  4257. if index > 7 {
  4258. info!(
  4259. "B has forked for enough lockout: {:?}",
  4260. AncestorIterator::new(last_vote, &blockstore).collect::<Vec<Slot>>()
  4261. );
  4262. break;
  4263. }
  4264. }
  4265. sleep(Duration::from_millis(1000));
  4266. }
  4267. info!("Kill B");
  4268. b_info = cluster.exit_node(&b_pubkey);
  4269. info!("Resolve the partition");
  4270. {
  4271. // Here we let B know about the missing blocks that A had produced on its partition
  4272. let a_blockstore = open_blockstore(&a_ledger_path);
  4273. let b_blockstore = open_blockstore(&b_ledger_path);
  4274. copy_blocks(last_vote_on_a, &a_blockstore, &b_blockstore, false);
  4275. }
  4276. // Now restart A and B and see if B is able to eventually switch onto the majority fork
  4277. info!("Restarting A & B");
  4278. cluster.restart_node(&a_pubkey, a_info, SocketAddrSpace::Unspecified);
  4279. cluster.restart_node(&b_pubkey, b_info, SocketAddrSpace::Unspecified);
  4280. info!("Waiting for B to switch to majority fork and make a root");
  4281. cluster_tests::check_for_new_roots(
  4282. 16,
  4283. &[cluster.get_contact_info(&a_pubkey).unwrap().clone()],
  4284. &cluster.connection_cache,
  4285. "test_slot_hashes_expiry",
  4286. );
  4287. }
  4288. // This test simulates a case where a leader sends a duplicate block with different ancestry. One
  4289. // version builds off of the rooted path, however the other version builds off a pruned branch. The
  4290. // validators that receive the pruned version will need to repair in order to continue, which
  4291. // requires an ancestor hashes repair.
  4292. //
  4293. // We setup 3 validators:
  4294. // - majority, will produce the rooted path
  4295. // - minority, will produce the pruned path
  4296. // - our_node, will be fed the pruned version of the duplicate block and need to repair
  4297. //
  4298. // Additionally we setup 3 observer nodes to propagate votes and participate in the ancestor hashes
  4299. // sample.
  4300. //
  4301. // Fork structure:
  4302. //
  4303. // 0 - 1 - ... - 10 (fork slot) - 30 - ... - 61 (rooted path) - ...
  4304. // |
  4305. // |- 11 - ... - 29 (pruned path) - 81'
  4306. //
  4307. //
  4308. // Steps:
  4309. // 1) Different leader schedule, minority thinks it produces 0-29 and majority rest, majority
  4310. // thinks minority produces all blocks. This is to avoid majority accidentally producing blocks
  4311. // before it shuts down.
  4312. // 2) Start cluster, kill our_node.
  4313. // 3) Kill majority cluster after it votes for any slot > fork slot (guarantees that the fork slot is
  4314. // reached as minority cannot pass threshold otherwise).
  4315. // 4) Let minority produce forks on pruned forks until out of leader slots then kill.
  4316. // 5) Truncate majority ledger past fork slot so it starts building off of fork slot.
  4317. // 6) Restart majority and wait until it starts producing blocks on main fork and roots something
  4318. // past the fork slot.
  4319. // 7) Construct our ledger by copying majority ledger and copying blocks from minority for the pruned path.
  4320. // 8) In our node's ledger, change the parent of the latest slot in majority fork to be the latest
  4321. // slot in the minority fork (simulates duplicate built off of pruned block)
  4322. // 9) Start our node which will pruned the minority fork on ledger replay and verify that we can make roots.
  4323. //
  4324. #[test]
  4325. #[serial]
  4326. #[ignore]
  4327. fn test_duplicate_with_pruned_ancestor() {
  4328. agave_logger::setup_with("info,solana_metrics=off");
  4329. solana_core::repair::duplicate_repair_status::set_ancestor_hash_repair_sample_size_for_tests_only(3);
  4330. let majority_leader_stake = 10_000_000 * DEFAULT_NODE_STAKE;
  4331. let minority_leader_stake = 2_000_000 * DEFAULT_NODE_STAKE;
  4332. let our_node = DEFAULT_NODE_STAKE;
  4333. let observer_stake = DEFAULT_NODE_STAKE;
  4334. let slots_per_epoch = 2048;
  4335. let fork_slot: u64 = 10;
  4336. let fork_length: u64 = 20;
  4337. let majority_fork_buffer = 5;
  4338. let mut node_stakes = vec![majority_leader_stake, minority_leader_stake, our_node];
  4339. // We need enough observers to reach `ANCESTOR_HASH_REPAIR_SAMPLE_SIZE`
  4340. node_stakes.append(&mut vec![observer_stake; 3]);
  4341. let num_nodes = node_stakes.len();
  4342. let validator_keys = [
  4343. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  4344. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  4345. "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye",
  4346. ]
  4347. .iter()
  4348. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  4349. .chain(std::iter::repeat_with(|| (Arc::new(Keypair::new()), true)))
  4350. .take(node_stakes.len())
  4351. .collect::<Vec<_>>();
  4352. let validators = validator_keys
  4353. .iter()
  4354. .map(|(kp, _)| kp.pubkey())
  4355. .collect::<Vec<_>>();
  4356. let (majority_pubkey, minority_pubkey, our_node_pubkey) =
  4357. (validators[0], validators[1], validators[2]);
  4358. let mut default_config = ValidatorConfig::default_for_test();
  4359. // Minority fork is leader long enough to create pruned fork
  4360. let validator_to_slots = vec![
  4361. (minority_pubkey, (fork_slot + fork_length) as usize),
  4362. (majority_pubkey, slots_per_epoch as usize),
  4363. ];
  4364. let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter());
  4365. default_config.fixed_leader_schedule = Some(FixedSchedule {
  4366. leader_schedule: Arc::new(leader_schedule),
  4367. });
  4368. let mut validator_configs = make_identical_validator_configs(&default_config, num_nodes);
  4369. // Don't let majority produce anything past the fork by tricking its leader schedule
  4370. validator_configs[0].fixed_leader_schedule = Some(FixedSchedule {
  4371. leader_schedule: Arc::new(create_custom_leader_schedule(
  4372. [(minority_pubkey, slots_per_epoch as usize)].into_iter(),
  4373. )),
  4374. });
  4375. let mut config = ClusterConfig {
  4376. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  4377. node_stakes,
  4378. validator_configs,
  4379. validator_keys: Some(validator_keys),
  4380. slots_per_epoch,
  4381. stakers_slot_offset: slots_per_epoch,
  4382. skip_warmup_slots: true,
  4383. ..ClusterConfig::default()
  4384. };
  4385. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  4386. let majority_ledger_path = cluster.ledger_path(&majority_pubkey);
  4387. let minority_ledger_path = cluster.ledger_path(&minority_pubkey);
  4388. let our_node_ledger_path = cluster.ledger_path(&our_node_pubkey);
  4389. info!("majority {majority_pubkey} ledger path {majority_ledger_path:?}");
  4390. info!("minority {minority_pubkey} ledger path {minority_ledger_path:?}");
  4391. info!("our_node {our_node_pubkey} ledger path {our_node_ledger_path:?}");
  4392. info!("Killing our node");
  4393. let our_node_info = cluster.exit_node(&our_node_pubkey);
  4394. info!("Waiting on majority validator to vote on at least {fork_slot}");
  4395. let now = Instant::now();
  4396. let mut last_majority_vote = 0;
  4397. loop {
  4398. let elapsed = now.elapsed();
  4399. assert!(
  4400. elapsed <= Duration::from_secs(30),
  4401. "Majority validator failed to vote on a slot >= {fork_slot} in {} secs, majority \
  4402. validator last vote: {last_majority_vote}",
  4403. elapsed.as_secs(),
  4404. );
  4405. sleep(Duration::from_millis(100));
  4406. if let Some((last_vote, _)) = last_vote_in_tower(&majority_ledger_path, &majority_pubkey) {
  4407. last_majority_vote = last_vote;
  4408. if last_vote >= fork_slot {
  4409. break;
  4410. }
  4411. }
  4412. }
  4413. info!("Killing majority validator, waiting for minority fork to reach a depth of at least 15");
  4414. let mut majority_validator_info = cluster.exit_node(&majority_pubkey);
  4415. let now = Instant::now();
  4416. let mut last_minority_vote = 0;
  4417. while last_minority_vote < fork_slot + 15 {
  4418. let elapsed = now.elapsed();
  4419. assert!(
  4420. elapsed <= Duration::from_secs(30),
  4421. "Minority validator failed to create a fork of depth >= {} in 15 secs, \
  4422. last_minority_vote: {last_minority_vote}",
  4423. elapsed.as_secs(),
  4424. );
  4425. if let Some((last_vote, _)) = last_vote_in_tower(&minority_ledger_path, &minority_pubkey) {
  4426. last_minority_vote = last_vote;
  4427. }
  4428. }
  4429. info!("Killing minority validator, fork created successfully: {last_minority_vote:?}");
  4430. let last_minority_vote =
  4431. wait_for_last_vote_in_tower_to_land_in_ledger(&minority_ledger_path, &minority_pubkey)
  4432. .unwrap();
  4433. let minority_validator_info = cluster.exit_node(&minority_pubkey);
  4434. info!("Truncating majority validator ledger to {fork_slot}");
  4435. {
  4436. remove_tower(&majority_ledger_path, &majority_pubkey);
  4437. let blockstore = open_blockstore(&majority_ledger_path);
  4438. purge_slots_with_count(&blockstore, fork_slot + 1, 100);
  4439. }
  4440. info!("Restarting majority validator");
  4441. // Make sure we don't send duplicate votes
  4442. majority_validator_info.config.wait_to_vote_slot = Some(fork_slot + fork_length);
  4443. // Fix the leader schedule so we can produce blocks
  4444. majority_validator_info
  4445. .config
  4446. .fixed_leader_schedule
  4447. .clone_from(&minority_validator_info.config.fixed_leader_schedule);
  4448. cluster.restart_node(
  4449. &majority_pubkey,
  4450. majority_validator_info,
  4451. SocketAddrSpace::Unspecified,
  4452. );
  4453. let mut last_majority_root = 0;
  4454. let now = Instant::now();
  4455. info!(
  4456. "Waiting for majority validator to root something past {}",
  4457. fork_slot + fork_length + majority_fork_buffer
  4458. );
  4459. while last_majority_root <= fork_slot + fork_length + majority_fork_buffer {
  4460. let elapsed = now.elapsed();
  4461. assert!(
  4462. elapsed <= Duration::from_secs(60),
  4463. "Majority validator failed to root something > {} in {} secs, last majority validator \
  4464. vote: {last_majority_vote}",
  4465. fork_slot + fork_length + majority_fork_buffer,
  4466. elapsed.as_secs(),
  4467. );
  4468. sleep(Duration::from_millis(100));
  4469. if let Some(last_root) = last_root_in_tower(&majority_ledger_path, &majority_pubkey) {
  4470. last_majority_root = last_root;
  4471. }
  4472. }
  4473. let last_majority_vote =
  4474. wait_for_last_vote_in_tower_to_land_in_ledger(&majority_ledger_path, &majority_pubkey)
  4475. .unwrap();
  4476. info!(
  4477. "Creating duplicate block built off of pruned branch for our node. Last majority vote \
  4478. {last_majority_vote}, Last minority vote {last_minority_vote}"
  4479. );
  4480. {
  4481. {
  4482. // Copy majority fork
  4483. std::fs::remove_dir_all(&our_node_info.info.ledger_path).unwrap();
  4484. let mut opt = fs_extra::dir::CopyOptions::new();
  4485. opt.copy_inside = true;
  4486. fs_extra::dir::copy(&majority_ledger_path, &our_node_ledger_path, &opt).unwrap();
  4487. remove_tower(&our_node_ledger_path, &majority_pubkey);
  4488. }
  4489. // Copy minority fork to our blockstore
  4490. // Set trusted=true in blockstore copy to skip the parent slot >= latest root check;
  4491. // this check would otherwise prevent the pruned fork from being inserted
  4492. let minority_blockstore = open_blockstore(&minority_validator_info.info.ledger_path);
  4493. let our_blockstore = open_blockstore(&our_node_info.info.ledger_path);
  4494. copy_blocks(
  4495. last_minority_vote,
  4496. &minority_blockstore,
  4497. &our_blockstore,
  4498. true,
  4499. );
  4500. // Change last block parent to chain off of (purged) minority fork
  4501. info!("For our node, changing parent of {last_majority_vote} to {last_minority_vote}");
  4502. our_blockstore.clear_unconfirmed_slot(last_majority_vote);
  4503. let entries = create_ticks(
  4504. 64 * (std::cmp::max(1, last_majority_vote - last_minority_vote)),
  4505. 0,
  4506. Hash::default(),
  4507. );
  4508. let shreds =
  4509. entries_to_test_shreds(&entries, last_majority_vote, last_minority_vote, true, 0);
  4510. our_blockstore.insert_shreds(shreds, None, false).unwrap();
  4511. }
  4512. // Actual test, `our_node` will replay the minority fork, then the majority fork which will
  4513. // prune the minority fork. Then finally the problematic block will be skipped (not replayed)
  4514. // because its parent has been pruned from bank forks. Meanwhile the majority validator has
  4515. // continued making blocks and voting, duplicate confirming everything. This will cause the
  4516. // pruned fork to become popular triggering an ancestor hashes repair, eventually allowing our
  4517. // node to dump & repair & continue making roots.
  4518. info!("Restarting our node, verifying that our node is making roots past the duplicate block");
  4519. cluster.restart_node(
  4520. &our_node_pubkey,
  4521. our_node_info,
  4522. SocketAddrSpace::Unspecified,
  4523. );
  4524. cluster_tests::check_for_new_roots(
  4525. 16,
  4526. &[cluster.get_contact_info(&our_node_pubkey).unwrap().clone()],
  4527. &cluster.connection_cache,
  4528. "test_duplicate_with_pruned_ancestor",
  4529. );
  4530. }
  4531. /// Test fastboot to ensure a node can boot from local state and still produce correct snapshots
  4532. ///
  4533. /// 1. Start node 1 and wait for it to take snapshots
  4534. /// 2. Start node 2 with the snapshots from (1)
  4535. /// 3. Wait for node 2 to take a bank snapshot
  4536. /// 4. Restart node 2 with the local state from (3)
  4537. /// 5. Wait for node 2 to take new snapshots
  4538. /// 6. Start node 3 with the snapshots from (5)
  4539. /// 7. Wait for node 3 to take new snapshots
  4540. /// 8. Ensure the snapshots from (7) match node's 1 and 2
  4541. #[test]
  4542. #[serial]
  4543. fn test_boot_from_local_state() {
  4544. agave_logger::setup_with_default("error,local_cluster=info");
  4545. const FULL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4546. SnapshotInterval::Slots(NonZeroU64::new(100).unwrap());
  4547. const INCREMENTAL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4548. SnapshotInterval::Slots(NonZeroU64::new(10).unwrap());
  4549. let validator1_config =
  4550. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 2);
  4551. let validator2_config =
  4552. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 4);
  4553. let validator3_config =
  4554. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 3);
  4555. let mut cluster_config = ClusterConfig {
  4556. node_stakes: vec![100 * DEFAULT_NODE_STAKE],
  4557. validator_configs: make_identical_validator_configs(&validator1_config.validator_config, 1),
  4558. ..ClusterConfig::default()
  4559. };
  4560. let mut cluster = LocalCluster::new(&mut cluster_config, SocketAddrSpace::Unspecified);
  4561. // in order to boot from local state, need to first have snapshot archives
  4562. info!("Waiting for validator1 to create snapshots...");
  4563. let (incremental_snapshot_archive, full_snapshot_archive) =
  4564. LocalCluster::wait_for_next_incremental_snapshot(
  4565. &cluster,
  4566. &validator1_config.full_snapshot_archives_dir,
  4567. &validator1_config.incremental_snapshot_archives_dir,
  4568. Some(Duration::from_secs(5 * 60)),
  4569. );
  4570. debug!(
  4571. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4572. {incremental_snapshot_archive:?}"
  4573. );
  4574. info!("Waiting for validator1 to create snapshots... DONE");
  4575. info!("Copying snapshots to validator2...");
  4576. std::fs::copy(
  4577. full_snapshot_archive.path(),
  4578. validator2_config
  4579. .full_snapshot_archives_dir
  4580. .path()
  4581. .join(full_snapshot_archive.path().file_name().unwrap()),
  4582. )
  4583. .unwrap();
  4584. std::fs::copy(
  4585. incremental_snapshot_archive.path(),
  4586. validator2_config
  4587. .incremental_snapshot_archives_dir
  4588. .path()
  4589. .join(incremental_snapshot_archive.path().file_name().unwrap()),
  4590. )
  4591. .unwrap();
  4592. info!("Copying snapshots to validator2... DONE");
  4593. info!("Starting validator2...");
  4594. let validator2_identity = Arc::new(Keypair::new());
  4595. cluster.add_validator(
  4596. &validator2_config.validator_config,
  4597. DEFAULT_NODE_STAKE,
  4598. validator2_identity.clone(),
  4599. None,
  4600. SocketAddrSpace::Unspecified,
  4601. );
  4602. info!("Starting validator2... DONE");
  4603. // wait for a new bank snapshot to fastboot from that is newer than its snapshot archives
  4604. info!("Waiting for validator2 to create a new bank snapshot...");
  4605. let timer = Instant::now();
  4606. let bank_snapshot = loop {
  4607. if let Some(bank_snapshot) =
  4608. snapshot_utils::get_highest_bank_snapshot(&validator2_config.bank_snapshots_dir)
  4609. {
  4610. if bank_snapshot.slot > incremental_snapshot_archive.slot() {
  4611. break bank_snapshot;
  4612. }
  4613. }
  4614. assert!(
  4615. timer.elapsed() < Duration::from_secs(30),
  4616. "It should not take longer than 30 seconds to create a new bank snapshot"
  4617. );
  4618. std::thread::yield_now();
  4619. };
  4620. debug!("bank snapshot: {bank_snapshot:?}");
  4621. info!("Waiting for validator2 to create a new bank snapshot... DONE");
  4622. // restart WITH fastboot
  4623. info!("Restarting validator2 from local state...");
  4624. let mut validator2_info = cluster.exit_node(&validator2_identity.pubkey());
  4625. validator2_info.config.use_snapshot_archives_at_startup = UseSnapshotArchivesAtStartup::Never;
  4626. cluster.restart_node(
  4627. &validator2_identity.pubkey(),
  4628. validator2_info,
  4629. SocketAddrSpace::Unspecified,
  4630. );
  4631. info!("Restarting validator2 from local state... DONE");
  4632. info!("Waiting for validator2 to create snapshots...");
  4633. let (incremental_snapshot_archive, full_snapshot_archive) =
  4634. LocalCluster::wait_for_next_incremental_snapshot(
  4635. &cluster,
  4636. &validator2_config.full_snapshot_archives_dir,
  4637. &validator2_config.incremental_snapshot_archives_dir,
  4638. Some(Duration::from_secs(5 * 60)),
  4639. );
  4640. debug!(
  4641. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4642. {incremental_snapshot_archive:?}"
  4643. );
  4644. info!("Waiting for validator2 to create snapshots... DONE");
  4645. info!("Copying snapshots to validator3...");
  4646. std::fs::copy(
  4647. full_snapshot_archive.path(),
  4648. validator3_config
  4649. .full_snapshot_archives_dir
  4650. .path()
  4651. .join(full_snapshot_archive.path().file_name().unwrap()),
  4652. )
  4653. .unwrap();
  4654. std::fs::copy(
  4655. incremental_snapshot_archive.path(),
  4656. validator3_config
  4657. .incremental_snapshot_archives_dir
  4658. .path()
  4659. .join(incremental_snapshot_archive.path().file_name().unwrap()),
  4660. )
  4661. .unwrap();
  4662. info!("Copying snapshots to validator3... DONE");
  4663. info!("Starting validator3...");
  4664. let validator3_identity = Arc::new(Keypair::new());
  4665. cluster.add_validator(
  4666. &validator3_config.validator_config,
  4667. DEFAULT_NODE_STAKE,
  4668. validator3_identity,
  4669. None,
  4670. SocketAddrSpace::Unspecified,
  4671. );
  4672. info!("Starting validator3... DONE");
  4673. // wait for a new snapshot to ensure the validator is making roots
  4674. info!("Waiting for validator3 to create snapshots...");
  4675. let (incremental_snapshot_archive, full_snapshot_archive) =
  4676. LocalCluster::wait_for_next_incremental_snapshot(
  4677. &cluster,
  4678. &validator3_config.full_snapshot_archives_dir,
  4679. &validator3_config.incremental_snapshot_archives_dir,
  4680. Some(Duration::from_secs(5 * 60)),
  4681. );
  4682. debug!(
  4683. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4684. {incremental_snapshot_archive:?}"
  4685. );
  4686. info!("Waiting for validator3 to create snapshots... DONE");
  4687. // Ensure that all validators have the correct state by comparing snapshots.
  4688. // Since validator1 has been running the longest, if may be ahead of the others,
  4689. // so use it as the comparison for others.
  4690. // - wait for validator1 to take new snapshots
  4691. // - wait for the other validators to have high enough snapshots
  4692. // - ensure the other validators' snapshots match validator1's
  4693. //
  4694. // NOTE: There's a chance validator 2 or 3 has crossed the next full snapshot past what
  4695. // validator 1 has. If that happens, validator 2 or 3 may have purged the snapshots needed
  4696. // to compare with validator 1, and thus assert. If that happens, the full snapshot interval
  4697. // may need to be adjusted larger.
  4698. info!("Waiting for validator1 to create snapshots...");
  4699. let (incremental_snapshot_archive, full_snapshot_archive) =
  4700. LocalCluster::wait_for_next_incremental_snapshot(
  4701. &cluster,
  4702. &validator1_config.full_snapshot_archives_dir,
  4703. &validator1_config.incremental_snapshot_archives_dir,
  4704. Some(Duration::from_secs(5 * 60)),
  4705. );
  4706. debug!(
  4707. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4708. {incremental_snapshot_archive:?}"
  4709. );
  4710. info!("Waiting for validator1 to create snapshots... DONE");
  4711. // These structs are used to provide better error logs if the asserts below are violated.
  4712. // The `allow(dead_code)` annotation is to appease clippy, which thinks the field is unused...
  4713. #[allow(dead_code)]
  4714. #[derive(Debug)]
  4715. struct SnapshotSlot(Slot);
  4716. #[allow(dead_code)]
  4717. #[derive(Debug)]
  4718. struct BaseSlot(Slot);
  4719. for (i, other_validator_config) in [(2, &validator2_config), (3, &validator3_config)] {
  4720. info!("Checking if validator{i} has the same snapshots as validator1...");
  4721. let timer = Instant::now();
  4722. loop {
  4723. if let Some(other_full_snapshot_slot) =
  4724. snapshot_paths::get_highest_full_snapshot_archive_slot(
  4725. &other_validator_config.full_snapshot_archives_dir,
  4726. )
  4727. {
  4728. let other_incremental_snapshot_slot =
  4729. snapshot_paths::get_highest_incremental_snapshot_archive_slot(
  4730. &other_validator_config.incremental_snapshot_archives_dir,
  4731. other_full_snapshot_slot,
  4732. );
  4733. if other_full_snapshot_slot >= full_snapshot_archive.slot()
  4734. && other_incremental_snapshot_slot >= Some(incremental_snapshot_archive.slot())
  4735. {
  4736. break;
  4737. }
  4738. }
  4739. assert!(
  4740. timer.elapsed() < Duration::from_secs(60),
  4741. "It should not take longer than 60 seconds to take snapshots",
  4742. );
  4743. std::thread::yield_now();
  4744. }
  4745. let other_full_snapshot_archives = snapshot_paths::get_full_snapshot_archives(
  4746. &other_validator_config.full_snapshot_archives_dir,
  4747. );
  4748. debug!("validator{i} full snapshot archives: {other_full_snapshot_archives:?}");
  4749. assert!(
  4750. other_full_snapshot_archives
  4751. .iter()
  4752. .any(
  4753. |other_full_snapshot_archive| other_full_snapshot_archive.slot()
  4754. == full_snapshot_archive.slot()
  4755. && other_full_snapshot_archive.hash() == full_snapshot_archive.hash()
  4756. ),
  4757. "full snapshot archive does not match!\n validator1: {:?}\n validator{i}: {:?}",
  4758. (
  4759. SnapshotSlot(full_snapshot_archive.slot()),
  4760. full_snapshot_archive.hash(),
  4761. ),
  4762. other_full_snapshot_archives
  4763. .iter()
  4764. .sorted_unstable()
  4765. .rev()
  4766. .map(|snap| (SnapshotSlot(snap.slot()), snap.hash()))
  4767. .collect::<Vec<_>>(),
  4768. );
  4769. let other_incremental_snapshot_archives = snapshot_paths::get_incremental_snapshot_archives(
  4770. &other_validator_config.incremental_snapshot_archives_dir,
  4771. );
  4772. debug!(
  4773. "validator{i} incremental snapshot archives: {other_incremental_snapshot_archives:?}"
  4774. );
  4775. assert!(
  4776. other_incremental_snapshot_archives
  4777. .iter()
  4778. .any(
  4779. |other_incremental_snapshot_archive| other_incremental_snapshot_archive
  4780. .base_slot()
  4781. == incremental_snapshot_archive.base_slot()
  4782. && other_incremental_snapshot_archive.slot()
  4783. == incremental_snapshot_archive.slot()
  4784. && other_incremental_snapshot_archive.hash()
  4785. == incremental_snapshot_archive.hash()
  4786. ),
  4787. "incremental snapshot archive does not match!\n validator1: {:?}\n validator{i}: \
  4788. {:?}",
  4789. (
  4790. BaseSlot(incremental_snapshot_archive.base_slot()),
  4791. SnapshotSlot(incremental_snapshot_archive.slot()),
  4792. incremental_snapshot_archive.hash(),
  4793. ),
  4794. other_incremental_snapshot_archives
  4795. .iter()
  4796. .sorted_unstable()
  4797. .rev()
  4798. .map(|snap| (
  4799. BaseSlot(snap.base_slot()),
  4800. SnapshotSlot(snap.slot()),
  4801. snap.hash(),
  4802. ))
  4803. .collect::<Vec<_>>(),
  4804. );
  4805. info!("Checking if validator{i} has the same snapshots as validator1... DONE");
  4806. }
  4807. }
  4808. /// Test fastboot to ensure a node can boot in case it crashed while archiving a full snapshot
  4809. ///
  4810. /// 1. Start a node and wait for it to take at least two full snapshots and one more
  4811. /// bank snapshot POST afterwards (for simplicity, wait for 2 full and 1 incremental).
  4812. /// 2. To simulate a node crashing while archiving a full snapshot, stop the node and
  4813. /// then delete the latest full snapshot archive.
  4814. /// 3. Restart the node. This should succeed, and boot from the older full snapshot archive,
  4815. /// *not* the latest bank snapshot POST.
  4816. /// 4. Take another incremental snapshot. This ensures the correct snapshot was loaded,
  4817. /// AND ensures the correct accounts hashes are present (which are needed when making
  4818. /// the bank snapshot POST for the new incremental snapshot).
  4819. #[test]
  4820. #[serial]
  4821. fn test_boot_from_local_state_missing_archive() {
  4822. agave_logger::setup_with_default(RUST_LOG_FILTER);
  4823. const FULL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4824. SnapshotInterval::Slots(NonZeroU64::new(20).unwrap());
  4825. const INCREMENTAL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4826. SnapshotInterval::Slots(NonZeroU64::new(10).unwrap());
  4827. let validator_config =
  4828. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 7);
  4829. let mut cluster_config = ClusterConfig {
  4830. node_stakes: vec![100 * DEFAULT_NODE_STAKE],
  4831. validator_configs: make_identical_validator_configs(&validator_config.validator_config, 1),
  4832. ..ClusterConfig::default()
  4833. };
  4834. let mut cluster = LocalCluster::new(&mut cluster_config, SocketAddrSpace::Unspecified);
  4835. // we need two full snapshots and an incremental snapshot for this test
  4836. info!("Waiting for validator to create snapshots...");
  4837. LocalCluster::wait_for_next_full_snapshot(
  4838. &cluster,
  4839. &validator_config.full_snapshot_archives_dir,
  4840. Some(Duration::from_secs(5 * 60)),
  4841. );
  4842. LocalCluster::wait_for_next_full_snapshot(
  4843. &cluster,
  4844. &validator_config.full_snapshot_archives_dir,
  4845. Some(Duration::from_secs(5 * 60)),
  4846. );
  4847. LocalCluster::wait_for_next_incremental_snapshot(
  4848. &cluster,
  4849. &validator_config.full_snapshot_archives_dir,
  4850. &validator_config.incremental_snapshot_archives_dir,
  4851. Some(Duration::from_secs(5 * 60)),
  4852. );
  4853. debug!(
  4854. "snapshot archives:\n\tfull: {:?}\n\tincr: {:?}",
  4855. snapshot_paths::get_full_snapshot_archives(
  4856. validator_config.full_snapshot_archives_dir.path()
  4857. ),
  4858. snapshot_paths::get_incremental_snapshot_archives(
  4859. validator_config.incremental_snapshot_archives_dir.path()
  4860. ),
  4861. );
  4862. info!("Waiting for validator to create snapshots... DONE");
  4863. // now delete the latest full snapshot archive and restart, to simulate a crash while archiving
  4864. // a full snapshot package
  4865. info!("Stopping validator...");
  4866. let validator_pubkey = cluster.get_node_pubkeys()[0];
  4867. let mut validator_info = cluster.exit_node(&validator_pubkey);
  4868. info!("Stopping validator... DONE");
  4869. info!("Deleting latest full snapshot archive...");
  4870. let highest_full_snapshot = snapshot_paths::get_highest_full_snapshot_archive_info(
  4871. validator_config.full_snapshot_archives_dir.path(),
  4872. )
  4873. .unwrap();
  4874. fs::remove_file(highest_full_snapshot.path()).unwrap();
  4875. info!("Deleting latest full snapshot archive... DONE");
  4876. info!("Restarting validator...");
  4877. // if we set this to `Never`, the validator should not boot
  4878. validator_info.config.use_snapshot_archives_at_startup =
  4879. UseSnapshotArchivesAtStartup::WhenNewest;
  4880. cluster.restart_node(
  4881. &validator_pubkey,
  4882. validator_info,
  4883. SocketAddrSpace::Unspecified,
  4884. );
  4885. info!("Restarting validator... DONE");
  4886. // ensure we can create new incremental snapshots, since that is what used to fail
  4887. info!("Waiting for validator to create snapshots...");
  4888. LocalCluster::wait_for_next_incremental_snapshot(
  4889. &cluster,
  4890. &validator_config.full_snapshot_archives_dir,
  4891. &validator_config.incremental_snapshot_archives_dir,
  4892. Some(Duration::from_secs(5 * 60)),
  4893. );
  4894. info!("Waiting for validator to create snapshots... DONE");
  4895. }
  4896. // We want to simulate the following:
  4897. // /--- 1 --- 3 (duplicate block)
  4898. // 0
  4899. // \--- 2
  4900. //
  4901. // 1. > DUPLICATE_THRESHOLD of the nodes vote on some version of the duplicate block 3,
  4902. // but don't immediately duplicate confirm so they remove 3 from fork choice and reset PoH back to 1.
  4903. // 2. All the votes on 3 don't land because there are no further blocks building off 3.
  4904. // 3. Some < SWITCHING_THRESHOLD of nodes vote on 2, making it the heaviest fork because no votes on 3 landed
  4905. // 4. Nodes then see duplicate confirmation on 3.
  4906. // 5. Unless somebody builds off of 3 to include the duplicate confirmed votes, 2 will still be the heaviest.
  4907. // However, because 2 has < SWITCHING_THRESHOLD of the votes, people who voted on 3 can't switch, leading to a
  4908. // stall
  4909. #[test]
  4910. #[serial]
  4911. #[allow(unused_attributes)]
  4912. #[ignore]
  4913. fn test_duplicate_shreds_switch_failure() {
  4914. fn wait_for_duplicate_fork_frozen(ledger_path: &Path, dup_slot: Slot) -> Hash {
  4915. // Ensure all the slots <= dup_slot are also full so we know we can replay up to dup_slot
  4916. // on restart
  4917. info!("Waiting to receive and replay entire duplicate fork with tip {dup_slot}");
  4918. loop {
  4919. let duplicate_fork_validator_blockstore = open_blockstore(ledger_path);
  4920. if let Some(frozen_hash) = duplicate_fork_validator_blockstore.get_bank_hash(dup_slot) {
  4921. return frozen_hash;
  4922. }
  4923. sleep(Duration::from_millis(1000));
  4924. }
  4925. }
  4926. fn clear_ledger_and_tower(ledger_path: &Path, pubkey: &Pubkey, start_slot: Slot) {
  4927. remove_tower_if_exists(ledger_path, pubkey);
  4928. let blockstore = open_blockstore(ledger_path);
  4929. purge_slots_with_count(&blockstore, start_slot, 1000);
  4930. {
  4931. // Remove all duplicate proofs so that this dup_slot will vote on the `dup_slot`.
  4932. while let Some((proof_slot, _)) = blockstore.get_first_duplicate_proof() {
  4933. blockstore.remove_slot_duplicate_proof(proof_slot).unwrap();
  4934. }
  4935. }
  4936. }
  4937. fn restart_dup_validator(
  4938. cluster: &mut LocalCluster,
  4939. mut duplicate_fork_validator_info: ClusterValidatorInfo,
  4940. pubkey: &Pubkey,
  4941. dup_slot: Slot,
  4942. dup_shred1: &Shred,
  4943. dup_shred2: &Shred,
  4944. ) {
  4945. let disable_turbine = Arc::new(AtomicBool::new(true));
  4946. duplicate_fork_validator_info.config.voting_disabled = false;
  4947. duplicate_fork_validator_info.config.turbine_disabled = disable_turbine.clone();
  4948. info!("Restarting node: {pubkey}");
  4949. cluster.restart_node(
  4950. pubkey,
  4951. duplicate_fork_validator_info,
  4952. SocketAddrSpace::Unspecified,
  4953. );
  4954. let ledger_path = cluster.ledger_path(pubkey);
  4955. // Lift the partition after `pubkey` votes on the `dup_slot`
  4956. info!("Waiting on duplicate fork to vote on duplicate slot: {dup_slot}");
  4957. loop {
  4958. let last_vote = last_vote_in_tower(&ledger_path, pubkey);
  4959. if let Some((latest_vote_slot, _hash)) = last_vote {
  4960. info!("latest vote: {latest_vote_slot}");
  4961. if latest_vote_slot == dup_slot {
  4962. break;
  4963. }
  4964. }
  4965. sleep(Duration::from_millis(1000));
  4966. }
  4967. disable_turbine.store(false, Ordering::Relaxed);
  4968. // Send the validator the other version of the shred so they realize it's duplicate
  4969. info!("Resending duplicate shreds to duplicate fork validator");
  4970. cluster.send_shreds_to_validator(vec![dup_shred1, dup_shred2], pubkey);
  4971. // Check the validator detected a duplicate proof
  4972. info!("Waiting on duplicate fork validator to see duplicate shreds and make a proof",);
  4973. loop {
  4974. let duplicate_fork_validator_blockstore = open_blockstore(&ledger_path);
  4975. if let Some(dup_proof) = duplicate_fork_validator_blockstore.get_first_duplicate_proof()
  4976. {
  4977. assert_eq!(dup_proof.0, dup_slot);
  4978. break;
  4979. }
  4980. sleep(Duration::from_millis(1000));
  4981. }
  4982. }
  4983. agave_logger::setup_with_default(RUST_LOG_FILTER);
  4984. let validator_keypairs = [
  4985. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  4986. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  4987. "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye",
  4988. "2XFPyuzPuXMsPnkH98UNcQpfA7M4b2TUhRxcWEoWjy4M6ojQ7HGJSvotktEVbaq49Qxt16wUjdqvSJc6ecbFfZwj",
  4989. ]
  4990. .iter()
  4991. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  4992. .collect::<Vec<_>>();
  4993. let validators = validator_keypairs
  4994. .iter()
  4995. .map(|(kp, _)| kp.pubkey())
  4996. .collect::<Vec<_>>();
  4997. // Create 4 nodes:
  4998. // 1) Two nodes that sum to > DUPLICATE_THRESHOLD but < 2/3+ supermajority. It's important both
  4999. // of them individually have <= DUPLICATE_THRESHOLD to avoid duplicate confirming their own blocks
  5000. // immediately upon voting
  5001. // 2) One with <= SWITCHING_THRESHOLD so that validator from 1) can't switch to it
  5002. // 3) One bad leader to make duplicate slots
  5003. let total_stake = 100 * DEFAULT_NODE_STAKE;
  5004. let target_switch_fork_stake = (total_stake as f64 * SWITCH_FORK_THRESHOLD) as u64;
  5005. // duplicate_fork_node1_stake + duplicate_fork_node2_stake > DUPLICATE_THRESHOLD. Don't want
  5006. // one node with > DUPLICATE_THRESHOLD, otherwise they will automatically duplicate confirm a
  5007. // slot when they vote, which will prevent them from resetting to an earlier ancestor when they
  5008. // later discover that slot as duplicate.
  5009. let duplicate_fork_node1_stake = (total_stake as f64 * DUPLICATE_THRESHOLD) as u64;
  5010. let duplicate_fork_node2_stake = 1;
  5011. let duplicate_leader_stake = total_stake
  5012. - target_switch_fork_stake
  5013. - duplicate_fork_node1_stake
  5014. - duplicate_fork_node2_stake;
  5015. assert!(
  5016. duplicate_fork_node1_stake + duplicate_fork_node2_stake
  5017. > (total_stake as f64 * DUPLICATE_THRESHOLD) as u64
  5018. );
  5019. assert!(duplicate_fork_node1_stake <= (total_stake as f64 * DUPLICATE_THRESHOLD) as u64);
  5020. assert!(duplicate_fork_node2_stake <= (total_stake as f64 * DUPLICATE_THRESHOLD) as u64);
  5021. let node_stakes = vec![
  5022. duplicate_leader_stake,
  5023. target_switch_fork_stake,
  5024. duplicate_fork_node1_stake,
  5025. duplicate_fork_node2_stake,
  5026. ];
  5027. let (
  5028. // Has to be first in order to be picked as the duplicate leader
  5029. duplicate_leader_validator_pubkey,
  5030. target_switch_fork_validator_pubkey,
  5031. duplicate_fork_validator1_pubkey,
  5032. duplicate_fork_validator2_pubkey,
  5033. ) = (validators[0], validators[1], validators[2], validators[3]);
  5034. info!(
  5035. "duplicate_fork_validator1_pubkey: {duplicate_fork_validator1_pubkey}, \
  5036. duplicate_fork_validator2_pubkey: {duplicate_fork_validator2_pubkey}, \
  5037. target_switch_fork_validator_pubkey: {target_switch_fork_validator_pubkey}, \
  5038. duplicate_leader_validator_pubkey: {duplicate_leader_validator_pubkey}",
  5039. );
  5040. let validator_to_slots = vec![
  5041. (duplicate_leader_validator_pubkey, 50),
  5042. (target_switch_fork_validator_pubkey, 5),
  5043. // The ideal sequence of events for the `duplicate_fork_validator1_pubkey` validator would go:
  5044. // 1. Vote for duplicate block `D`
  5045. // 2. See `D` is duplicate, remove from fork choice and reset to ancestor `A`, potentially generating a fork off that ancestor
  5046. // 3. See `D` is duplicate confirmed, but because of the bug fixed by https://github.com/solana-labs/solana/pull/28172
  5047. // where we disallow resetting to a slot which matches the last vote slot, we still don't build off `D`,
  5048. // and continue building on `A`.
  5049. //
  5050. // The `target_switch_fork_validator_pubkey` fork is necessary in 2. to force the validator stall trying to switch
  5051. // vote on that other fork and prevent the validator from making a freebie vote from `A` and allowing consensus to continue.
  5052. // It's important we don't give the `duplicate_fork_validator1_pubkey` leader slots until a certain number
  5053. // of slots have elapsed to ensure:
  5054. // 1. We have ample time to ensure he doesn't have a chance to make a block until after 2 when they see the block is duplicate.
  5055. // Otherwise, they'll build the block on top of the duplicate block, which will possibly include a vote for the duplicate block.
  5056. // We want to avoid this because this will make fork choice pick the duplicate block.
  5057. // 2. Ensure the `duplicate_fork_validator1_pubkey` sees the target switch fork before it can make another vote
  5058. // on any forks he himself generates from A. Otherwise, he will make a freebie vote on his own fork from `A` and
  5059. // consensus will continue on that fork.
  5060. // Give the duplicate fork validator plenty of leader slots after the initial delay to prevent
  5061. // 1. Switch fork from getting locked out for too long
  5062. // 2. A lot of consecutive slots in which to build up lockout in tower and make new roots
  5063. // to resolve the partition
  5064. (duplicate_fork_validator1_pubkey, 500),
  5065. ];
  5066. let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter());
  5067. // 1) Set up the cluster
  5068. let (duplicate_slot_sender, duplicate_slot_receiver) = unbounded();
  5069. let validator_configs = validator_keypairs
  5070. .into_iter()
  5071. .map(|(validator_keypair, in_genesis)| {
  5072. let pubkey = validator_keypair.pubkey();
  5073. // Only allow the leader to vote so that no version gets duplicate confirmed.
  5074. // This is to avoid the leader dumping his own block.
  5075. let voting_disabled = { pubkey != duplicate_leader_validator_pubkey };
  5076. ValidatorTestConfig {
  5077. validator_keypair,
  5078. validator_config: ValidatorConfig {
  5079. voting_disabled,
  5080. ..ValidatorConfig::default_for_test()
  5081. },
  5082. in_genesis,
  5083. }
  5084. })
  5085. .collect();
  5086. let (mut cluster, _validator_keypairs) = test_faulty_node(
  5087. BroadcastStageType::BroadcastDuplicates(BroadcastDuplicatesConfig {
  5088. partition: ClusterPartition::Pubkey(vec![
  5089. // Don't include the other dup validator here, otherwise
  5090. // this dup version will have enough to be duplicate confirmed and
  5091. // will cause the dup leader to try and dump its own slot,
  5092. // crashing before it can signal the duplicate slot via the
  5093. // `duplicate_slot_receiver` below
  5094. duplicate_fork_validator1_pubkey,
  5095. ]),
  5096. duplicate_slot_sender: Some(duplicate_slot_sender),
  5097. }),
  5098. node_stakes,
  5099. Some(validator_configs),
  5100. Some(FixedSchedule {
  5101. leader_schedule: Arc::new(leader_schedule),
  5102. }),
  5103. );
  5104. // Kill two validators that might duplicate confirm the duplicate block
  5105. info!("Killing unnecessary validators");
  5106. let duplicate_fork_validator2_ledger_path =
  5107. cluster.ledger_path(&duplicate_fork_validator2_pubkey);
  5108. let duplicate_fork_validator2_info = cluster.exit_node(&duplicate_fork_validator2_pubkey);
  5109. let target_switch_fork_validator_ledger_path =
  5110. cluster.ledger_path(&target_switch_fork_validator_pubkey);
  5111. let mut target_switch_fork_validator_info =
  5112. cluster.exit_node(&target_switch_fork_validator_pubkey);
  5113. // 2) Wait for a duplicate slot to land on both validators and for the target switch
  5114. // fork validator to get another version of the slot. Also ensure all versions of
  5115. // the block are playable
  5116. let dup_slot = duplicate_slot_receiver
  5117. .recv_timeout(Duration::from_millis(30_000))
  5118. .expect("Duplicate leader failed to make a duplicate slot in allotted time");
  5119. // Make sure both validators received and replay the complete blocks
  5120. let dup_frozen_hash = wait_for_duplicate_fork_frozen(
  5121. &cluster.ledger_path(&duplicate_fork_validator1_pubkey),
  5122. dup_slot,
  5123. );
  5124. let original_frozen_hash = wait_for_duplicate_fork_frozen(
  5125. &cluster.ledger_path(&duplicate_leader_validator_pubkey),
  5126. dup_slot,
  5127. );
  5128. assert_ne!(
  5129. original_frozen_hash, dup_frozen_hash,
  5130. "Duplicate leader and partition target got same hash: {original_frozen_hash}",
  5131. );
  5132. // 3) Force `duplicate_fork_validator1_pubkey` to see a duplicate proof
  5133. info!("Waiting for duplicate proof for slot: {dup_slot}");
  5134. let duplicate_proof = {
  5135. // Grab the other version of the slot from the `duplicate_leader_validator_pubkey`
  5136. // which we confirmed to have a different version of the frozen hash in the loop
  5137. // above
  5138. let ledger_path = cluster.ledger_path(&duplicate_leader_validator_pubkey);
  5139. let blockstore = open_blockstore(&ledger_path);
  5140. let dup_shred = blockstore
  5141. .get_data_shreds_for_slot(dup_slot, 0)
  5142. .unwrap()
  5143. .pop()
  5144. .unwrap();
  5145. info!(
  5146. "Sending duplicate shred: {:?} to {:?}",
  5147. dup_shred.signature(),
  5148. duplicate_fork_validator1_pubkey
  5149. );
  5150. cluster.send_shreds_to_validator(vec![&dup_shred], &duplicate_fork_validator1_pubkey);
  5151. wait_for_duplicate_proof(
  5152. &cluster.ledger_path(&duplicate_fork_validator1_pubkey),
  5153. dup_slot,
  5154. )
  5155. .unwrap_or_else(|| panic!("Duplicate proof for slot {dup_slot} not found"))
  5156. };
  5157. // 3) Kill all the validators
  5158. info!("Killing remaining validators");
  5159. let duplicate_fork_validator1_ledger_path =
  5160. cluster.ledger_path(&duplicate_fork_validator1_pubkey);
  5161. let duplicate_fork_validator1_info = cluster.exit_node(&duplicate_fork_validator1_pubkey);
  5162. let duplicate_leader_ledger_path = cluster.ledger_path(&duplicate_leader_validator_pubkey);
  5163. cluster.exit_node(&duplicate_leader_validator_pubkey);
  5164. let dup_shred1 = Shred::new_from_serialized_shred(duplicate_proof.shred1.clone()).unwrap();
  5165. let dup_shred2 = Shred::new_from_serialized_shred(duplicate_proof.shred2).unwrap();
  5166. assert_eq!(dup_shred1.slot(), dup_shred2.slot());
  5167. assert_eq!(dup_shred1.slot(), dup_slot);
  5168. // Purge everything including the `dup_slot` from the `target_switch_fork_validator_pubkey`
  5169. info!("Purging towers and ledgers for: {duplicate_leader_validator_pubkey:?}");
  5170. Blockstore::destroy(&target_switch_fork_validator_ledger_path).unwrap();
  5171. {
  5172. let blockstore1 = open_blockstore(&duplicate_leader_ledger_path);
  5173. let blockstore2 = open_blockstore(&target_switch_fork_validator_ledger_path);
  5174. copy_blocks(dup_slot, &blockstore1, &blockstore2, false);
  5175. }
  5176. clear_ledger_and_tower(
  5177. &target_switch_fork_validator_ledger_path,
  5178. &target_switch_fork_validator_pubkey,
  5179. dup_slot,
  5180. );
  5181. info!("Purging towers and ledgers for: {duplicate_fork_validator1_pubkey:?}");
  5182. clear_ledger_and_tower(
  5183. &duplicate_fork_validator1_ledger_path,
  5184. &duplicate_fork_validator1_pubkey,
  5185. dup_slot + 1,
  5186. );
  5187. info!("Purging towers and ledgers for: {duplicate_fork_validator2_pubkey:?}");
  5188. // Copy validator 1's ledger to validator 2 so that they have the same version
  5189. // of the duplicate slot
  5190. clear_ledger_and_tower(
  5191. &duplicate_fork_validator2_ledger_path,
  5192. &duplicate_fork_validator2_pubkey,
  5193. dup_slot,
  5194. );
  5195. Blockstore::destroy(&duplicate_fork_validator2_ledger_path).unwrap();
  5196. {
  5197. let blockstore1 = open_blockstore(&duplicate_fork_validator1_ledger_path);
  5198. let blockstore2 = open_blockstore(&duplicate_fork_validator2_ledger_path);
  5199. copy_blocks(dup_slot, &blockstore1, &blockstore2, false);
  5200. }
  5201. // Set entrypoint to `target_switch_fork_validator_pubkey` so we can run discovery in gossip even without the
  5202. // bad leader
  5203. cluster.set_entry_point(target_switch_fork_validator_info.info.contact_info.clone());
  5204. // 4) Restart `target_switch_fork_validator_pubkey`, and ensure they vote on their own leader slot
  5205. // that's not descended from the duplicate slot
  5206. info!("Restarting switch fork node");
  5207. target_switch_fork_validator_info.config.voting_disabled = false;
  5208. cluster.restart_node(
  5209. &target_switch_fork_validator_pubkey,
  5210. target_switch_fork_validator_info,
  5211. SocketAddrSpace::Unspecified,
  5212. );
  5213. let target_switch_fork_validator_ledger_path =
  5214. cluster.ledger_path(&target_switch_fork_validator_pubkey);
  5215. info!("Waiting for switch fork to make block past duplicate fork");
  5216. loop {
  5217. let last_vote = wait_for_last_vote_in_tower_to_land_in_ledger(
  5218. &target_switch_fork_validator_ledger_path,
  5219. &target_switch_fork_validator_pubkey,
  5220. );
  5221. if let Some(latest_vote_slot) = last_vote {
  5222. if latest_vote_slot > dup_slot {
  5223. let blockstore = open_blockstore(&target_switch_fork_validator_ledger_path);
  5224. let ancestor_slots: HashSet<Slot> =
  5225. AncestorIterator::new_inclusive(latest_vote_slot, &blockstore).collect();
  5226. assert!(ancestor_slots.contains(&latest_vote_slot));
  5227. assert!(ancestor_slots.contains(&0));
  5228. assert!(!ancestor_slots.contains(&dup_slot));
  5229. break;
  5230. }
  5231. }
  5232. sleep(Duration::from_millis(1000));
  5233. }
  5234. // Now restart the duplicate validators
  5235. // Start the node with partition enabled so they don't see the `target_switch_fork_validator_pubkey`
  5236. // before voting on the duplicate block
  5237. info!("Restarting duplicate fork node");
  5238. // Ensure `duplicate_fork_validator1_pubkey` votes before starting up `duplicate_fork_validator2_pubkey`
  5239. // to prevent them seeing `dup_slot` as duplicate confirmed before voting.
  5240. restart_dup_validator(
  5241. &mut cluster,
  5242. duplicate_fork_validator1_info,
  5243. &duplicate_fork_validator1_pubkey,
  5244. dup_slot,
  5245. &dup_shred1,
  5246. &dup_shred2,
  5247. );
  5248. restart_dup_validator(
  5249. &mut cluster,
  5250. duplicate_fork_validator2_info,
  5251. &duplicate_fork_validator2_pubkey,
  5252. dup_slot,
  5253. &dup_shred1,
  5254. &dup_shred2,
  5255. );
  5256. // Wait for the `duplicate_fork_validator1_pubkey` to make another leader block on top
  5257. // of the duplicate fork which includes their own vote for `dup_block`. This
  5258. // should make the duplicate fork the heaviest
  5259. info!("Waiting on duplicate fork validator to generate block on top of duplicate fork",);
  5260. loop {
  5261. let duplicate_fork_validator_blockstore =
  5262. open_blockstore(&cluster.ledger_path(&duplicate_fork_validator1_pubkey));
  5263. let meta = duplicate_fork_validator_blockstore
  5264. .meta(dup_slot)
  5265. .unwrap()
  5266. .unwrap();
  5267. if !meta.next_slots.is_empty() {
  5268. info!(
  5269. "duplicate fork validator saw new slots: {:?} on top of duplicate slot",
  5270. meta.next_slots
  5271. );
  5272. break;
  5273. }
  5274. sleep(Duration::from_millis(1000));
  5275. }
  5276. // Check that the cluster is making progress
  5277. cluster.check_for_new_roots(
  5278. 16,
  5279. "test_duplicate_shreds_switch_failure",
  5280. SocketAddrSpace::Unspecified,
  5281. );
  5282. }
  5283. #[test]
  5284. #[serial]
  5285. fn test_randomly_mixed_block_verification_methods_between_bootstrap_and_not() {
  5286. // tailored logging just to see two block verification methods are working correctly
  5287. agave_logger::setup_with_default(
  5288. "solana_metrics::metrics=warn,solana_core=warn,\
  5289. solana_runtime::installed_scheduler_pool=trace,solana_ledger::blockstore_processor=debug,\
  5290. info",
  5291. );
  5292. let num_nodes = BlockVerificationMethod::COUNT;
  5293. let mut config =
  5294. ClusterConfig::new_with_equal_stakes(num_nodes, DEFAULT_MINT_LAMPORTS, DEFAULT_NODE_STAKE);
  5295. // Overwrite block_verification_method with shuffled variants
  5296. let mut methods = BlockVerificationMethod::iter().collect::<Vec<_>>();
  5297. methods.shuffle(&mut rand::thread_rng());
  5298. for (validator_config, method) in config.validator_configs.iter_mut().zip_eq(methods) {
  5299. validator_config.block_verification_method = method;
  5300. }
  5301. let local = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  5302. cluster_tests::spend_and_verify_all_nodes(
  5303. &local.entry_point_info,
  5304. &local.funding_keypair,
  5305. num_nodes,
  5306. HashSet::new(),
  5307. SocketAddrSpace::Unspecified,
  5308. &local.connection_cache,
  5309. );
  5310. }
  5311. /// Forks previous marked invalid should be marked as such in fork choice on restart
  5312. #[test]
  5313. #[ignore]
  5314. #[serial]
  5315. fn test_invalid_forks_persisted_on_restart() {
  5316. agave_logger::setup_with("info,solana_metrics=off,solana_ledger=off");
  5317. let dup_slot = 10;
  5318. let validator_keypairs = [
  5319. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  5320. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  5321. ]
  5322. .iter()
  5323. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  5324. .collect::<Vec<_>>();
  5325. let majority_keypair = validator_keypairs[1].0.clone();
  5326. let validators = validator_keypairs
  5327. .iter()
  5328. .map(|(kp, _)| kp.pubkey())
  5329. .collect::<Vec<_>>();
  5330. let node_stakes = vec![DEFAULT_NODE_STAKE, 100 * DEFAULT_NODE_STAKE];
  5331. let (target_pubkey, majority_pubkey) = (validators[0], validators[1]);
  5332. // Need majority validator to make the dup_slot
  5333. let validator_to_slots = vec![
  5334. (majority_pubkey, dup_slot as usize + 5),
  5335. (target_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize),
  5336. ];
  5337. let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter());
  5338. let mut default_config = ValidatorConfig::default_for_test();
  5339. default_config.fixed_leader_schedule = Some(FixedSchedule {
  5340. leader_schedule: Arc::new(leader_schedule),
  5341. });
  5342. let mut validator_configs = make_identical_validator_configs(&default_config, 2);
  5343. // Majority shouldn't duplicate confirm anything
  5344. validator_configs[1].voting_disabled = true;
  5345. let mut cluster = LocalCluster::new(
  5346. &mut ClusterConfig {
  5347. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  5348. validator_configs,
  5349. node_stakes,
  5350. validator_keys: Some(validator_keypairs),
  5351. skip_warmup_slots: true,
  5352. ..ClusterConfig::default()
  5353. },
  5354. SocketAddrSpace::Unspecified,
  5355. );
  5356. let target_ledger_path = cluster.ledger_path(&target_pubkey);
  5357. // Wait for us to vote past duplicate slot
  5358. let timer = Instant::now();
  5359. loop {
  5360. if let Some(slot) =
  5361. wait_for_last_vote_in_tower_to_land_in_ledger(&target_ledger_path, &target_pubkey)
  5362. {
  5363. if slot > dup_slot {
  5364. break;
  5365. }
  5366. }
  5367. assert!(
  5368. timer.elapsed() < Duration::from_secs(30),
  5369. "Did not make more than 10 blocks in 30 seconds"
  5370. );
  5371. sleep(Duration::from_millis(100));
  5372. }
  5373. // Send duplicate
  5374. let parent = {
  5375. let blockstore = open_blockstore(&target_ledger_path);
  5376. let parent = blockstore
  5377. .meta(dup_slot)
  5378. .unwrap()
  5379. .unwrap()
  5380. .parent_slot
  5381. .unwrap();
  5382. let entries = create_ticks(
  5383. 64 * (std::cmp::max(1, dup_slot - parent)),
  5384. 0,
  5385. cluster.genesis_config.hash(),
  5386. );
  5387. let last_hash = entries.last().unwrap().hash;
  5388. let version = solana_shred_version::version_from_hash(&last_hash);
  5389. let dup_shreds = Shredder::new(dup_slot, parent, 0, version)
  5390. .unwrap()
  5391. .entries_to_merkle_shreds_for_tests(
  5392. &majority_keypair,
  5393. &entries,
  5394. true, // is_full_slot
  5395. Hash::default(), // chained_merkle_root
  5396. 0, // next_shred_index,
  5397. 0, // next_code_index
  5398. &ReedSolomonCache::default(),
  5399. &mut ProcessShredsStats::default(),
  5400. )
  5401. .0;
  5402. info!("Sending duplicate shreds for {dup_slot}");
  5403. cluster.send_shreds_to_validator(dup_shreds.iter().collect(), &target_pubkey);
  5404. wait_for_duplicate_proof(&target_ledger_path, dup_slot)
  5405. .expect("Duplicate proof for {dup_slot} not found");
  5406. parent
  5407. };
  5408. info!("Duplicate proof for {dup_slot} has landed, restarting node");
  5409. let info = cluster.exit_node(&target_pubkey);
  5410. {
  5411. let blockstore = open_blockstore(&target_ledger_path);
  5412. purge_slots_with_count(&blockstore, dup_slot + 5, 100);
  5413. }
  5414. // Restart, should create an entirely new fork
  5415. cluster.restart_node(&target_pubkey, info, SocketAddrSpace::Unspecified);
  5416. info!("Waiting for fork built off {parent}");
  5417. let timer = Instant::now();
  5418. let mut checked_children: HashSet<Slot> = HashSet::default();
  5419. let mut done = false;
  5420. while !done {
  5421. let blockstore = open_blockstore(&target_ledger_path);
  5422. let parent_meta = blockstore.meta(parent).unwrap().expect("Meta must exist");
  5423. for child in parent_meta.next_slots {
  5424. if checked_children.contains(&child) {
  5425. continue;
  5426. }
  5427. if blockstore.is_full(child) {
  5428. let shreds = blockstore
  5429. .get_data_shreds_for_slot(child, 0)
  5430. .expect("Child is full");
  5431. let mut is_our_block = true;
  5432. for shred in shreds {
  5433. is_our_block &= shred.verify(&target_pubkey);
  5434. }
  5435. if is_our_block {
  5436. done = true;
  5437. }
  5438. checked_children.insert(child);
  5439. }
  5440. }
  5441. assert!(
  5442. timer.elapsed() < Duration::from_secs(30),
  5443. "Did not create a new fork off parent {parent} in 30 seconds after restart"
  5444. );
  5445. sleep(Duration::from_millis(100));
  5446. }
  5447. }