local_cluster.rs 230 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966
  1. #![allow(clippy::arithmetic_side_effects)]
  2. use {
  3. agave_snapshots::{snapshot_config::SnapshotConfig, SnapshotInterval},
  4. assert_matches::assert_matches,
  5. crossbeam_channel::{unbounded, Receiver},
  6. gag::BufferRedirect,
  7. itertools::Itertools,
  8. log::*,
  9. rand::seq::SliceRandom,
  10. serial_test::serial,
  11. solana_account::AccountSharedData,
  12. solana_accounts_db::utils::create_accounts_run_and_snapshot_dirs,
  13. solana_client_traits::AsyncClient,
  14. solana_clock::{
  15. self as clock, Slot, DEFAULT_SLOTS_PER_EPOCH, DEFAULT_TICKS_PER_SLOT, MAX_PROCESSING_AGE,
  16. },
  17. solana_cluster_type::ClusterType,
  18. solana_commitment_config::CommitmentConfig,
  19. solana_core::{
  20. consensus::{
  21. tower_storage::FileTowerStorage, Tower, SWITCH_FORK_THRESHOLD, VOTE_THRESHOLD_DEPTH,
  22. },
  23. optimistic_confirmation_verifier::OptimisticConfirmationVerifier,
  24. replay_stage::DUPLICATE_THRESHOLD,
  25. validator::{BlockVerificationMethod, ValidatorConfig},
  26. },
  27. solana_download_utils::download_snapshot_archive,
  28. solana_entry::entry::create_ticks,
  29. solana_epoch_schedule::{MAX_LEADER_SCHEDULE_EPOCH_OFFSET, MINIMUM_SLOTS_PER_EPOCH},
  30. solana_genesis_utils::open_genesis_config,
  31. solana_gossip::{crds_data::MAX_VOTES, gossip_service::discover_validators},
  32. solana_hard_forks::HardForks,
  33. solana_hash::Hash,
  34. solana_keypair::Keypair,
  35. solana_ledger::{
  36. ancestor_iterator::AncestorIterator,
  37. bank_forks_utils,
  38. blockstore::{entries_to_test_shreds, Blockstore},
  39. blockstore_processor::ProcessOptions,
  40. leader_schedule::{FixedSchedule, IdentityKeyedLeaderSchedule},
  41. shred::{ProcessShredsStats, ReedSolomonCache, Shred, Shredder},
  42. use_snapshot_archives_at_startup::UseSnapshotArchivesAtStartup,
  43. },
  44. solana_local_cluster::{
  45. cluster::{Cluster, ClusterValidatorInfo, QuicTpuClient},
  46. cluster_tests,
  47. integration_tests::{
  48. copy_blocks, create_custom_leader_schedule,
  49. create_custom_leader_schedule_with_random_keys, farf_dir, generate_account_paths,
  50. last_root_in_tower, last_vote_in_tower, ms_for_n_slots, open_blockstore,
  51. purge_slots_with_count, remove_tower, remove_tower_if_exists, restore_tower,
  52. run_cluster_partition, run_kill_partition_switch_threshold, save_tower,
  53. setup_snapshot_validator_config, test_faulty_node, wait_for_duplicate_proof,
  54. wait_for_last_vote_in_tower_to_land_in_ledger, SnapshotValidatorConfig,
  55. ValidatorTestConfig, DEFAULT_NODE_STAKE, RUST_LOG_FILTER,
  56. },
  57. local_cluster::{ClusterConfig, LocalCluster, DEFAULT_MINT_LAMPORTS},
  58. validator_configs::*,
  59. },
  60. solana_poh_config::PohConfig,
  61. solana_pubkey::Pubkey,
  62. solana_pubsub_client::pubsub_client::PubsubClient,
  63. solana_rpc_client::rpc_client::RpcClient,
  64. solana_rpc_client_api::{
  65. config::{
  66. RpcBlockSubscribeConfig, RpcBlockSubscribeFilter, RpcProgramAccountsConfig,
  67. RpcSignatureSubscribeConfig,
  68. },
  69. response::RpcSignatureResult,
  70. },
  71. solana_runtime::{
  72. commitment::VOTE_THRESHOLD_SIZE,
  73. snapshot_archive_info::SnapshotArchiveInfoGetter,
  74. snapshot_bank_utils,
  75. snapshot_package::SnapshotKind,
  76. snapshot_utils::{self, BANK_SNAPSHOTS_DIR},
  77. },
  78. solana_signer::Signer,
  79. solana_stake_interface::{self as stake, state::NEW_WARMUP_COOLDOWN_RATE},
  80. solana_streamer::socket::SocketAddrSpace,
  81. solana_system_interface::program as system_program,
  82. solana_system_transaction as system_transaction,
  83. solana_turbine::broadcast_stage::{
  84. broadcast_duplicates_run::{BroadcastDuplicatesConfig, ClusterPartition},
  85. BroadcastStageType,
  86. },
  87. solana_vote::{vote_parser, vote_transaction},
  88. solana_vote_interface::state::TowerSync,
  89. solana_vote_program::vote_state::MAX_LOCKOUT_HISTORY,
  90. std::{
  91. collections::{BTreeSet, HashMap, HashSet},
  92. fs,
  93. io::Read,
  94. iter,
  95. num::NonZeroU64,
  96. path::Path,
  97. sync::{
  98. atomic::{AtomicBool, AtomicUsize, Ordering},
  99. Arc, Mutex,
  100. },
  101. thread::{sleep, Builder, JoinHandle},
  102. time::{Duration, Instant},
  103. },
  104. strum::{EnumCount, IntoEnumIterator},
  105. };
  106. #[test]
  107. #[serial]
  108. fn test_local_cluster_start_and_exit() {
  109. agave_logger::setup();
  110. let num_nodes = 1;
  111. let cluster = LocalCluster::new_with_equal_stakes(
  112. num_nodes,
  113. DEFAULT_MINT_LAMPORTS,
  114. DEFAULT_NODE_STAKE,
  115. SocketAddrSpace::Unspecified,
  116. );
  117. assert_eq!(cluster.validators.len(), num_nodes);
  118. }
  119. #[test]
  120. #[serial]
  121. fn test_local_cluster_start_and_exit_with_config() {
  122. agave_logger::setup();
  123. const NUM_NODES: usize = 1;
  124. let mut config = ClusterConfig {
  125. validator_configs: make_identical_validator_configs(
  126. &ValidatorConfig::default_for_test(),
  127. NUM_NODES,
  128. ),
  129. node_stakes: vec![DEFAULT_NODE_STAKE; NUM_NODES],
  130. ticks_per_slot: 8,
  131. slots_per_epoch: MINIMUM_SLOTS_PER_EPOCH,
  132. stakers_slot_offset: MINIMUM_SLOTS_PER_EPOCH,
  133. ..ClusterConfig::default()
  134. };
  135. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  136. assert_eq!(cluster.validators.len(), NUM_NODES);
  137. }
  138. #[test]
  139. #[serial]
  140. fn test_spend_and_verify_all_nodes_1() {
  141. agave_logger::setup_with_default(RUST_LOG_FILTER);
  142. error!("test_spend_and_verify_all_nodes_1");
  143. let num_nodes = 1;
  144. let local = LocalCluster::new_with_equal_stakes(
  145. num_nodes,
  146. DEFAULT_MINT_LAMPORTS,
  147. DEFAULT_NODE_STAKE,
  148. SocketAddrSpace::Unspecified,
  149. );
  150. cluster_tests::spend_and_verify_all_nodes(
  151. &local.entry_point_info,
  152. &local.funding_keypair,
  153. num_nodes,
  154. HashSet::new(),
  155. SocketAddrSpace::Unspecified,
  156. &local.connection_cache,
  157. );
  158. }
  159. #[test]
  160. #[serial]
  161. fn test_spend_and_verify_all_nodes_2() {
  162. agave_logger::setup_with_default(RUST_LOG_FILTER);
  163. error!("test_spend_and_verify_all_nodes_2");
  164. let num_nodes = 2;
  165. let local = LocalCluster::new_with_equal_stakes(
  166. num_nodes,
  167. DEFAULT_MINT_LAMPORTS,
  168. DEFAULT_NODE_STAKE,
  169. SocketAddrSpace::Unspecified,
  170. );
  171. cluster_tests::spend_and_verify_all_nodes(
  172. &local.entry_point_info,
  173. &local.funding_keypair,
  174. num_nodes,
  175. HashSet::new(),
  176. SocketAddrSpace::Unspecified,
  177. &local.connection_cache,
  178. );
  179. }
  180. #[test]
  181. #[serial]
  182. fn test_spend_and_verify_all_nodes_3() {
  183. agave_logger::setup_with_default(RUST_LOG_FILTER);
  184. error!("test_spend_and_verify_all_nodes_3");
  185. let num_nodes = 3;
  186. let local = LocalCluster::new_with_equal_stakes(
  187. num_nodes,
  188. DEFAULT_MINT_LAMPORTS,
  189. DEFAULT_NODE_STAKE,
  190. SocketAddrSpace::Unspecified,
  191. );
  192. cluster_tests::spend_and_verify_all_nodes(
  193. &local.entry_point_info,
  194. &local.funding_keypair,
  195. num_nodes,
  196. HashSet::new(),
  197. SocketAddrSpace::Unspecified,
  198. &local.connection_cache,
  199. );
  200. }
  201. #[test]
  202. #[serial]
  203. fn test_local_cluster_signature_subscribe() {
  204. agave_logger::setup_with_default(RUST_LOG_FILTER);
  205. let num_nodes = 2;
  206. let cluster = LocalCluster::new_with_equal_stakes(
  207. num_nodes,
  208. DEFAULT_MINT_LAMPORTS,
  209. DEFAULT_NODE_STAKE,
  210. SocketAddrSpace::Unspecified,
  211. );
  212. let nodes = cluster.get_node_pubkeys();
  213. // Get non leader
  214. let non_bootstrap_id = nodes
  215. .into_iter()
  216. .find(|id| id != cluster.entry_point_info.pubkey())
  217. .unwrap();
  218. let non_bootstrap_info = cluster.get_contact_info(&non_bootstrap_id).unwrap();
  219. let tx_client = cluster
  220. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  221. .unwrap();
  222. let (blockhash, _) = tx_client
  223. .rpc_client()
  224. .get_latest_blockhash_with_commitment(CommitmentConfig::processed())
  225. .unwrap();
  226. let mut transaction = system_transaction::transfer(
  227. &cluster.funding_keypair,
  228. &solana_pubkey::new_rand(),
  229. 10,
  230. blockhash,
  231. );
  232. let (mut sig_subscribe_client, receiver) = PubsubClient::signature_subscribe(
  233. format!("ws://{}", non_bootstrap_info.rpc_pubsub().unwrap()),
  234. &transaction.signatures[0],
  235. Some(RpcSignatureSubscribeConfig {
  236. commitment: Some(CommitmentConfig::processed()),
  237. enable_received_notification: Some(true),
  238. }),
  239. )
  240. .unwrap();
  241. LocalCluster::send_transaction_with_retries(
  242. &tx_client,
  243. &[&cluster.funding_keypair],
  244. &mut transaction,
  245. 5,
  246. )
  247. .unwrap();
  248. let mut got_received_notification = false;
  249. loop {
  250. let responses: Vec<_> = receiver.try_iter().collect();
  251. let mut should_break = false;
  252. for response in responses {
  253. match response.value {
  254. RpcSignatureResult::ProcessedSignature(_) => {
  255. should_break = true;
  256. break;
  257. }
  258. RpcSignatureResult::ReceivedSignature(_) => {
  259. got_received_notification = true;
  260. }
  261. }
  262. }
  263. if should_break {
  264. break;
  265. }
  266. sleep(Duration::from_millis(100));
  267. }
  268. // If we don't drop the cluster, the blocking web socket service
  269. // won't return, and the `sig_subscribe_client` won't shut down
  270. drop(cluster);
  271. sig_subscribe_client.shutdown().unwrap();
  272. assert!(got_received_notification);
  273. }
  274. #[test]
  275. #[serial]
  276. fn test_two_unbalanced_stakes() {
  277. agave_logger::setup_with_default(RUST_LOG_FILTER);
  278. error!("test_two_unbalanced_stakes");
  279. let validator_config = ValidatorConfig::default_for_test();
  280. let num_ticks_per_second = 100;
  281. let num_ticks_per_slot = 16;
  282. let num_slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH;
  283. let mut cluster = LocalCluster::new(
  284. &mut ClusterConfig {
  285. node_stakes: vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  286. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  287. validator_configs: make_identical_validator_configs(&validator_config, 2),
  288. ticks_per_slot: num_ticks_per_slot,
  289. slots_per_epoch: num_slots_per_epoch,
  290. stakers_slot_offset: num_slots_per_epoch,
  291. poh_config: PohConfig::new_sleep(Duration::from_millis(1000 / num_ticks_per_second)),
  292. ..ClusterConfig::default()
  293. },
  294. SocketAddrSpace::Unspecified,
  295. );
  296. cluster_tests::sleep_n_epochs(
  297. 10.0,
  298. &cluster.genesis_config.poh_config,
  299. num_ticks_per_slot,
  300. num_slots_per_epoch,
  301. );
  302. cluster.close_preserve_ledgers();
  303. let leader_pubkey = *cluster.entry_point_info.pubkey();
  304. let leader_ledger = cluster.validators[&leader_pubkey].info.ledger_path.clone();
  305. cluster_tests::verify_ledger_ticks(&leader_ledger, num_ticks_per_slot as usize);
  306. }
  307. #[test]
  308. #[serial]
  309. fn test_forwarding() {
  310. agave_logger::setup_with_default(RUST_LOG_FILTER);
  311. // Set up a cluster where one node is never the leader, so all txs sent to this node
  312. // will be have to be forwarded in order to be confirmed
  313. let mut config = ClusterConfig {
  314. node_stakes: vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  315. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  316. validator_configs: make_identical_validator_configs(
  317. &ValidatorConfig::default_for_test(),
  318. 2,
  319. ),
  320. ..ClusterConfig::default()
  321. };
  322. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  323. let cluster_nodes = discover_validators(
  324. &cluster.entry_point_info.gossip().unwrap(),
  325. 2,
  326. cluster.entry_point_info.shred_version(),
  327. SocketAddrSpace::Unspecified,
  328. )
  329. .unwrap();
  330. assert!(cluster_nodes.len() >= 2);
  331. let leader_pubkey = *cluster.entry_point_info.pubkey();
  332. let validator_info = cluster_nodes
  333. .iter()
  334. .find(|c| c.pubkey() != &leader_pubkey)
  335. .unwrap();
  336. // Confirm that transactions were forwarded to and processed by the leader.
  337. cluster_tests::send_many_transactions(
  338. validator_info,
  339. &cluster.funding_keypair,
  340. &cluster.connection_cache,
  341. 10,
  342. 20,
  343. );
  344. }
  345. #[test]
  346. #[serial]
  347. fn test_restart_node() {
  348. agave_logger::setup_with_default(RUST_LOG_FILTER);
  349. error!("test_restart_node");
  350. let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH * 2;
  351. let ticks_per_slot = 16;
  352. let validator_config = ValidatorConfig::default_for_test();
  353. let mut cluster = LocalCluster::new(
  354. &mut ClusterConfig {
  355. node_stakes: vec![DEFAULT_NODE_STAKE],
  356. validator_configs: vec![safe_clone_config(&validator_config)],
  357. ticks_per_slot,
  358. slots_per_epoch,
  359. stakers_slot_offset: slots_per_epoch,
  360. skip_warmup_slots: true,
  361. ..ClusterConfig::default()
  362. },
  363. SocketAddrSpace::Unspecified,
  364. );
  365. let nodes = cluster.get_node_pubkeys();
  366. cluster_tests::sleep_n_epochs(
  367. 1.0,
  368. &cluster.genesis_config.poh_config,
  369. clock::DEFAULT_TICKS_PER_SLOT,
  370. slots_per_epoch,
  371. );
  372. cluster.exit_restart_node(&nodes[0], validator_config, SocketAddrSpace::Unspecified);
  373. cluster_tests::sleep_n_epochs(
  374. 0.5,
  375. &cluster.genesis_config.poh_config,
  376. clock::DEFAULT_TICKS_PER_SLOT,
  377. slots_per_epoch,
  378. );
  379. cluster_tests::send_many_transactions(
  380. &cluster.entry_point_info,
  381. &cluster.funding_keypair,
  382. &cluster.connection_cache,
  383. 10,
  384. 1,
  385. );
  386. }
  387. #[test]
  388. #[serial]
  389. fn test_mainnet_beta_cluster_type() {
  390. agave_logger::setup_with_default(RUST_LOG_FILTER);
  391. let mut config = ClusterConfig {
  392. cluster_type: ClusterType::MainnetBeta,
  393. node_stakes: vec![DEFAULT_NODE_STAKE],
  394. validator_configs: make_identical_validator_configs(
  395. &ValidatorConfig::default_for_test(),
  396. 1,
  397. ),
  398. ..ClusterConfig::default()
  399. };
  400. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  401. let cluster_nodes = discover_validators(
  402. &cluster.entry_point_info.gossip().unwrap(),
  403. 1,
  404. cluster.entry_point_info.shred_version(),
  405. SocketAddrSpace::Unspecified,
  406. )
  407. .unwrap();
  408. assert_eq!(cluster_nodes.len(), 1);
  409. let client = cluster
  410. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  411. .unwrap();
  412. // Programs that are available at epoch 0
  413. for program_id in [
  414. &solana_sdk_ids::system_program::id(),
  415. &stake::program::id(),
  416. &solana_vote_program::id(),
  417. &solana_sdk_ids::bpf_loader_deprecated::id(),
  418. &solana_sdk_ids::bpf_loader::id(),
  419. &solana_sdk_ids::bpf_loader_upgradeable::id(),
  420. ]
  421. .iter()
  422. {
  423. assert_matches!(
  424. (
  425. program_id,
  426. client
  427. .rpc_client()
  428. .get_account_with_commitment(program_id, CommitmentConfig::processed())
  429. .unwrap()
  430. .value
  431. ),
  432. (_program_id, Some(_))
  433. );
  434. }
  435. // Programs that are not available at epoch 0
  436. for program_id in [].iter() {
  437. assert_eq!(
  438. (
  439. program_id,
  440. client
  441. .rpc_client()
  442. .get_account_with_commitment(program_id, CommitmentConfig::processed())
  443. .unwrap()
  444. .value
  445. ),
  446. (program_id, None)
  447. );
  448. }
  449. }
  450. #[test]
  451. #[serial]
  452. fn test_snapshot_download() {
  453. agave_logger::setup_with_default(RUST_LOG_FILTER);
  454. // First set up the cluster with 1 node
  455. let snapshot_interval_slots = NonZeroU64::new(50).unwrap();
  456. let num_account_paths = 3;
  457. let leader_snapshot_test_config =
  458. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  459. let validator_snapshot_test_config =
  460. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  461. let stake = DEFAULT_NODE_STAKE;
  462. let mut config = ClusterConfig {
  463. node_stakes: vec![stake],
  464. validator_configs: make_identical_validator_configs(
  465. &leader_snapshot_test_config.validator_config,
  466. 1,
  467. ),
  468. ..ClusterConfig::default()
  469. };
  470. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  471. let full_snapshot_archives_dir = &leader_snapshot_test_config
  472. .validator_config
  473. .snapshot_config
  474. .full_snapshot_archives_dir;
  475. trace!("Waiting for snapshot");
  476. let full_snapshot_archive_info = cluster.wait_for_next_full_snapshot(
  477. full_snapshot_archives_dir,
  478. Some(Duration::from_secs(5 * 60)),
  479. );
  480. trace!("found: {}", full_snapshot_archive_info.path().display());
  481. // Download the snapshot, then boot a validator from it.
  482. download_snapshot_archive(
  483. &cluster.entry_point_info.rpc().unwrap(),
  484. &validator_snapshot_test_config
  485. .validator_config
  486. .snapshot_config
  487. .full_snapshot_archives_dir,
  488. &validator_snapshot_test_config
  489. .validator_config
  490. .snapshot_config
  491. .incremental_snapshot_archives_dir,
  492. (
  493. full_snapshot_archive_info.slot(),
  494. *full_snapshot_archive_info.hash(),
  495. ),
  496. SnapshotKind::FullSnapshot,
  497. validator_snapshot_test_config
  498. .validator_config
  499. .snapshot_config
  500. .maximum_full_snapshot_archives_to_retain,
  501. validator_snapshot_test_config
  502. .validator_config
  503. .snapshot_config
  504. .maximum_incremental_snapshot_archives_to_retain,
  505. false,
  506. &mut None,
  507. )
  508. .unwrap();
  509. cluster.add_validator(
  510. &validator_snapshot_test_config.validator_config,
  511. stake,
  512. Arc::new(Keypair::new()),
  513. None,
  514. SocketAddrSpace::Unspecified,
  515. );
  516. }
  517. #[test]
  518. #[serial]
  519. fn test_incremental_snapshot_download() {
  520. agave_logger::setup_with_default(RUST_LOG_FILTER);
  521. // First set up the cluster with 1 node
  522. let incremental_snapshot_interval = 9;
  523. let full_snapshot_interval = incremental_snapshot_interval * 3;
  524. let num_account_paths = 3;
  525. let leader_snapshot_test_config = SnapshotValidatorConfig::new(
  526. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  527. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  528. num_account_paths,
  529. );
  530. let validator_snapshot_test_config = SnapshotValidatorConfig::new(
  531. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  532. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  533. num_account_paths,
  534. );
  535. let stake = DEFAULT_NODE_STAKE;
  536. let mut config = ClusterConfig {
  537. node_stakes: vec![stake],
  538. validator_configs: make_identical_validator_configs(
  539. &leader_snapshot_test_config.validator_config,
  540. 1,
  541. ),
  542. ..ClusterConfig::default()
  543. };
  544. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  545. let full_snapshot_archives_dir = &leader_snapshot_test_config
  546. .validator_config
  547. .snapshot_config
  548. .full_snapshot_archives_dir;
  549. let incremental_snapshot_archives_dir = &leader_snapshot_test_config
  550. .validator_config
  551. .snapshot_config
  552. .incremental_snapshot_archives_dir;
  553. debug!(
  554. "snapshot config:\n\tfull snapshot interval: {full_snapshot_interval}\n\tincremental \
  555. snapshot interval: {incremental_snapshot_interval}",
  556. );
  557. debug!(
  558. "leader config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  559. {}\n\tincremental snapshot archives dir: {}",
  560. leader_snapshot_test_config
  561. .bank_snapshots_dir
  562. .path()
  563. .display(),
  564. leader_snapshot_test_config
  565. .full_snapshot_archives_dir
  566. .path()
  567. .display(),
  568. leader_snapshot_test_config
  569. .incremental_snapshot_archives_dir
  570. .path()
  571. .display(),
  572. );
  573. debug!(
  574. "validator config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  575. {}\n\tincremental snapshot archives dir: {}",
  576. validator_snapshot_test_config
  577. .bank_snapshots_dir
  578. .path()
  579. .display(),
  580. validator_snapshot_test_config
  581. .full_snapshot_archives_dir
  582. .path()
  583. .display(),
  584. validator_snapshot_test_config
  585. .incremental_snapshot_archives_dir
  586. .path()
  587. .display(),
  588. );
  589. trace!("Waiting for snapshots");
  590. let (incremental_snapshot_archive_info, full_snapshot_archive_info) = cluster
  591. .wait_for_next_incremental_snapshot(
  592. full_snapshot_archives_dir,
  593. incremental_snapshot_archives_dir,
  594. Some(Duration::from_secs(5 * 60)),
  595. );
  596. trace!(
  597. "found: {} and {}",
  598. full_snapshot_archive_info.path().display(),
  599. incremental_snapshot_archive_info.path().display()
  600. );
  601. // Download the snapshots, then boot a validator from them.
  602. download_snapshot_archive(
  603. &cluster.entry_point_info.rpc().unwrap(),
  604. &validator_snapshot_test_config
  605. .validator_config
  606. .snapshot_config
  607. .full_snapshot_archives_dir,
  608. &validator_snapshot_test_config
  609. .validator_config
  610. .snapshot_config
  611. .incremental_snapshot_archives_dir,
  612. (
  613. full_snapshot_archive_info.slot(),
  614. *full_snapshot_archive_info.hash(),
  615. ),
  616. SnapshotKind::FullSnapshot,
  617. validator_snapshot_test_config
  618. .validator_config
  619. .snapshot_config
  620. .maximum_full_snapshot_archives_to_retain,
  621. validator_snapshot_test_config
  622. .validator_config
  623. .snapshot_config
  624. .maximum_incremental_snapshot_archives_to_retain,
  625. false,
  626. &mut None,
  627. )
  628. .unwrap();
  629. download_snapshot_archive(
  630. &cluster.entry_point_info.rpc().unwrap(),
  631. &validator_snapshot_test_config
  632. .validator_config
  633. .snapshot_config
  634. .full_snapshot_archives_dir,
  635. &validator_snapshot_test_config
  636. .validator_config
  637. .snapshot_config
  638. .incremental_snapshot_archives_dir,
  639. (
  640. incremental_snapshot_archive_info.slot(),
  641. *incremental_snapshot_archive_info.hash(),
  642. ),
  643. SnapshotKind::IncrementalSnapshot(incremental_snapshot_archive_info.base_slot()),
  644. validator_snapshot_test_config
  645. .validator_config
  646. .snapshot_config
  647. .maximum_full_snapshot_archives_to_retain,
  648. validator_snapshot_test_config
  649. .validator_config
  650. .snapshot_config
  651. .maximum_incremental_snapshot_archives_to_retain,
  652. false,
  653. &mut None,
  654. )
  655. .unwrap();
  656. cluster.add_validator(
  657. &validator_snapshot_test_config.validator_config,
  658. stake,
  659. Arc::new(Keypair::new()),
  660. None,
  661. SocketAddrSpace::Unspecified,
  662. );
  663. }
  664. /// Test the scenario where a node starts up from a snapshot and its blockstore has enough new
  665. /// roots that cross the full snapshot interval. In this scenario, the node needs to take a full
  666. /// snapshot while processing the blockstore so that once the background services start up, there
  667. /// is the correct full snapshot available to take subsequent incremental snapshots.
  668. ///
  669. /// For this test...
  670. /// - Start a leader node and run it long enough to take a full and incremental snapshot
  671. /// - Download those snapshots to a validator node
  672. /// - Copy the validator snapshots to a back up directory
  673. /// - Start up the validator node
  674. /// - Wait for the validator node to see enough root slots to cross the full snapshot interval
  675. /// - Delete the snapshots on the validator node and restore the ones from the backup
  676. /// - Restart the validator node to trigger the scenario we're trying to test
  677. /// - Wait for the validator node to generate a new incremental snapshot
  678. /// - Copy the new incremental snapshot (and its associated full snapshot) to another new validator
  679. /// - Start up this new validator to ensure the snapshots from ^^^ are good
  680. #[test]
  681. #[serial]
  682. fn test_incremental_snapshot_download_with_crossing_full_snapshot_interval_at_startup() {
  683. agave_logger::setup_with_default(RUST_LOG_FILTER);
  684. // If these intervals change, also make sure to change the loop timers accordingly.
  685. let incremental_snapshot_interval = 9;
  686. let full_snapshot_interval = incremental_snapshot_interval * 5;
  687. let num_account_paths = 3;
  688. let leader_snapshot_test_config = SnapshotValidatorConfig::new(
  689. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  690. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  691. num_account_paths,
  692. );
  693. let mut validator_snapshot_test_config = SnapshotValidatorConfig::new(
  694. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  695. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  696. num_account_paths,
  697. );
  698. // The test has asserts that require the validator always boots from snapshot archives
  699. validator_snapshot_test_config
  700. .validator_config
  701. .use_snapshot_archives_at_startup = UseSnapshotArchivesAtStartup::Always;
  702. let stake = DEFAULT_NODE_STAKE;
  703. let mut config = ClusterConfig {
  704. node_stakes: vec![stake],
  705. validator_configs: make_identical_validator_configs(
  706. &leader_snapshot_test_config.validator_config,
  707. 1,
  708. ),
  709. ..ClusterConfig::default()
  710. };
  711. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  712. info!(
  713. "snapshot config:\n\tfull snapshot interval: {full_snapshot_interval:?}\n\tincremental \
  714. snapshot interval: {incremental_snapshot_interval:?}",
  715. );
  716. debug!(
  717. "leader config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  718. {}\n\tincremental snapshot archives dir: {}",
  719. leader_snapshot_test_config
  720. .bank_snapshots_dir
  721. .path()
  722. .display(),
  723. leader_snapshot_test_config
  724. .full_snapshot_archives_dir
  725. .path()
  726. .display(),
  727. leader_snapshot_test_config
  728. .incremental_snapshot_archives_dir
  729. .path()
  730. .display(),
  731. );
  732. debug!(
  733. "validator config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  734. {}\n\tincremental snapshot archives dir: {}",
  735. validator_snapshot_test_config
  736. .bank_snapshots_dir
  737. .path()
  738. .display(),
  739. validator_snapshot_test_config
  740. .full_snapshot_archives_dir
  741. .path()
  742. .display(),
  743. validator_snapshot_test_config
  744. .incremental_snapshot_archives_dir
  745. .path()
  746. .display(),
  747. );
  748. info!("Waiting for leader to create the next incremental snapshot...");
  749. let (incremental_snapshot_archive, full_snapshot_archive) =
  750. LocalCluster::wait_for_next_incremental_snapshot(
  751. &cluster,
  752. leader_snapshot_test_config
  753. .full_snapshot_archives_dir
  754. .path(),
  755. leader_snapshot_test_config
  756. .incremental_snapshot_archives_dir
  757. .path(),
  758. Some(Duration::from_secs(5 * 60)),
  759. );
  760. info!(
  761. "Found snapshots:\n\tfull snapshot: {}\n\tincremental snapshot: {}",
  762. full_snapshot_archive.path().display(),
  763. incremental_snapshot_archive.path().display()
  764. );
  765. assert_eq!(
  766. full_snapshot_archive.slot(),
  767. incremental_snapshot_archive.base_slot()
  768. );
  769. info!("Waiting for leader to create snapshots... DONE");
  770. // Download the snapshots, then boot a validator from them.
  771. info!("Downloading full snapshot to validator...");
  772. download_snapshot_archive(
  773. &cluster.entry_point_info.rpc().unwrap(),
  774. validator_snapshot_test_config
  775. .full_snapshot_archives_dir
  776. .path(),
  777. validator_snapshot_test_config
  778. .incremental_snapshot_archives_dir
  779. .path(),
  780. (full_snapshot_archive.slot(), *full_snapshot_archive.hash()),
  781. SnapshotKind::FullSnapshot,
  782. validator_snapshot_test_config
  783. .validator_config
  784. .snapshot_config
  785. .maximum_full_snapshot_archives_to_retain,
  786. validator_snapshot_test_config
  787. .validator_config
  788. .snapshot_config
  789. .maximum_incremental_snapshot_archives_to_retain,
  790. false,
  791. &mut None,
  792. )
  793. .unwrap();
  794. let downloaded_full_snapshot_archive = snapshot_utils::get_highest_full_snapshot_archive_info(
  795. validator_snapshot_test_config
  796. .full_snapshot_archives_dir
  797. .path(),
  798. )
  799. .unwrap();
  800. info!(
  801. "Downloaded full snapshot, slot: {}",
  802. downloaded_full_snapshot_archive.slot()
  803. );
  804. info!("Downloading incremental snapshot to validator...");
  805. download_snapshot_archive(
  806. &cluster.entry_point_info.rpc().unwrap(),
  807. validator_snapshot_test_config
  808. .full_snapshot_archives_dir
  809. .path(),
  810. validator_snapshot_test_config
  811. .incremental_snapshot_archives_dir
  812. .path(),
  813. (
  814. incremental_snapshot_archive.slot(),
  815. *incremental_snapshot_archive.hash(),
  816. ),
  817. SnapshotKind::IncrementalSnapshot(incremental_snapshot_archive.base_slot()),
  818. validator_snapshot_test_config
  819. .validator_config
  820. .snapshot_config
  821. .maximum_full_snapshot_archives_to_retain,
  822. validator_snapshot_test_config
  823. .validator_config
  824. .snapshot_config
  825. .maximum_incremental_snapshot_archives_to_retain,
  826. false,
  827. &mut None,
  828. )
  829. .unwrap();
  830. let downloaded_incremental_snapshot_archive =
  831. snapshot_utils::get_highest_incremental_snapshot_archive_info(
  832. validator_snapshot_test_config
  833. .incremental_snapshot_archives_dir
  834. .path(),
  835. full_snapshot_archive.slot(),
  836. )
  837. .unwrap();
  838. info!(
  839. "Downloaded incremental snapshot, slot: {}, base slot: {}",
  840. downloaded_incremental_snapshot_archive.slot(),
  841. downloaded_incremental_snapshot_archive.base_slot(),
  842. );
  843. assert_eq!(
  844. downloaded_full_snapshot_archive.slot(),
  845. downloaded_incremental_snapshot_archive.base_slot()
  846. );
  847. // closure to copy files in a directory to another directory
  848. let copy_files = |from: &Path, to: &Path| {
  849. trace!(
  850. "copying files from dir {}, to dir {}",
  851. from.display(),
  852. to.display()
  853. );
  854. for entry in fs::read_dir(from).unwrap() {
  855. let entry = entry.unwrap();
  856. if entry.file_type().unwrap().is_dir() {
  857. continue;
  858. }
  859. let from_file_path = entry.path();
  860. let to_file_path = to.join(from_file_path.file_name().unwrap());
  861. trace!(
  862. "\t\tcopying file from {} to {}...",
  863. from_file_path.display(),
  864. to_file_path.display()
  865. );
  866. fs::copy(from_file_path, to_file_path).unwrap();
  867. }
  868. };
  869. // closure to delete files in a directory
  870. let delete_files = |dir: &Path| {
  871. trace!("deleting files in dir {}", dir.display());
  872. for entry in fs::read_dir(dir).unwrap() {
  873. let entry = entry.unwrap();
  874. if entry.file_type().unwrap().is_dir() {
  875. continue;
  876. }
  877. let file_path = entry.path();
  878. trace!("\t\tdeleting file {}...", file_path.display());
  879. fs::remove_file(file_path).unwrap();
  880. }
  881. };
  882. let copy_files_with_remote = |from: &Path, to: &Path| {
  883. copy_files(from, to);
  884. let remote_from = snapshot_utils::build_snapshot_archives_remote_dir(from);
  885. let remote_to = snapshot_utils::build_snapshot_archives_remote_dir(to);
  886. let _ = fs::create_dir_all(&remote_from);
  887. let _ = fs::create_dir_all(&remote_to);
  888. copy_files(&remote_from, &remote_to);
  889. };
  890. let delete_files_with_remote = |from: &Path| {
  891. delete_files(from);
  892. let remote_dir = snapshot_utils::build_snapshot_archives_remote_dir(from);
  893. let _ = fs::create_dir_all(&remote_dir);
  894. delete_files(&remote_dir);
  895. };
  896. // After downloading the snapshots, copy them over to a backup directory. Later we'll need to
  897. // restart the node and guarantee that the only snapshots present are these initial ones. So,
  898. // the easiest way to do that is create a backup now, delete the ones on the node before
  899. // restart, then copy the backup ones over again.
  900. let backup_validator_full_snapshot_archives_dir = tempfile::tempdir_in(farf_dir()).unwrap();
  901. trace!(
  902. "Backing up validator full snapshots to dir: {}...",
  903. backup_validator_full_snapshot_archives_dir.path().display()
  904. );
  905. copy_files_with_remote(
  906. validator_snapshot_test_config
  907. .full_snapshot_archives_dir
  908. .path(),
  909. backup_validator_full_snapshot_archives_dir.path(),
  910. );
  911. let backup_validator_incremental_snapshot_archives_dir =
  912. tempfile::tempdir_in(farf_dir()).unwrap();
  913. trace!(
  914. "Backing up validator incremental snapshots to dir: {}...",
  915. backup_validator_incremental_snapshot_archives_dir
  916. .path()
  917. .display()
  918. );
  919. copy_files_with_remote(
  920. validator_snapshot_test_config
  921. .incremental_snapshot_archives_dir
  922. .path(),
  923. backup_validator_incremental_snapshot_archives_dir.path(),
  924. );
  925. info!("Starting the validator...");
  926. let validator_identity = Arc::new(Keypair::new());
  927. cluster.add_validator(
  928. &validator_snapshot_test_config.validator_config,
  929. stake,
  930. validator_identity.clone(),
  931. None,
  932. SocketAddrSpace::Unspecified,
  933. );
  934. info!("Starting the validator... DONE");
  935. // To ensure that a snapshot will be taken during startup, the blockstore needs to have roots
  936. // that cross a full snapshot interval.
  937. let starting_slot = incremental_snapshot_archive.slot();
  938. let next_full_snapshot_slot = starting_slot + full_snapshot_interval;
  939. info!(
  940. "Waiting for the validator to see enough slots to cross a full snapshot interval \
  941. ({next_full_snapshot_slot})..."
  942. );
  943. let timer = Instant::now();
  944. loop {
  945. let validator_current_slot = cluster
  946. .build_validator_tpu_quic_client(&validator_identity.pubkey())
  947. .unwrap()
  948. .rpc_client()
  949. .get_slot_with_commitment(CommitmentConfig::finalized())
  950. .unwrap();
  951. trace!("validator current slot: {validator_current_slot}");
  952. if validator_current_slot > next_full_snapshot_slot {
  953. break;
  954. }
  955. assert!(
  956. timer.elapsed() < Duration::from_secs(30),
  957. "It should not take longer than 30 seconds to cross the next full snapshot interval."
  958. );
  959. std::thread::yield_now();
  960. }
  961. info!(
  962. "Waited {:?} for the validator to see enough slots to cross a full snapshot interval... \
  963. DONE",
  964. timer.elapsed()
  965. );
  966. // Get the highest full snapshot archive info for the validator, now that it has crossed the
  967. // next full snapshot interval. We are going to use this to look up the same snapshot on the
  968. // leader, which we'll then use to compare to the full snapshot the validator will create
  969. // during startup. This ensures the snapshot creation process during startup is correct.
  970. //
  971. // Putting this all in its own block so its clear we're only intended to keep the leader's info
  972. let leader_full_snapshot_archive_for_comparison = {
  973. let validator_full_snapshot = snapshot_utils::get_highest_full_snapshot_archive_info(
  974. validator_snapshot_test_config
  975. .full_snapshot_archives_dir
  976. .path(),
  977. )
  978. .unwrap();
  979. // Now get the same full snapshot on the LEADER that we just got from the validator
  980. let mut leader_full_snapshots = snapshot_utils::get_full_snapshot_archives(
  981. leader_snapshot_test_config
  982. .full_snapshot_archives_dir
  983. .path(),
  984. );
  985. leader_full_snapshots.retain(|full_snapshot| {
  986. full_snapshot.slot() == validator_full_snapshot.slot()
  987. && full_snapshot.hash() == validator_full_snapshot.hash()
  988. });
  989. let leader_full_snapshot = leader_full_snapshots.first().unwrap();
  990. // And for sanity, the full snapshot from the leader and the validator MUST be the same
  991. assert_eq!(
  992. (
  993. validator_full_snapshot.slot(),
  994. validator_full_snapshot.hash()
  995. ),
  996. (leader_full_snapshot.slot(), leader_full_snapshot.hash())
  997. );
  998. leader_full_snapshot.clone()
  999. };
  1000. info!(
  1001. "leader full snapshot archive for comparison: \
  1002. {leader_full_snapshot_archive_for_comparison:#?}"
  1003. );
  1004. // Stop the validator before we reset its snapshots
  1005. info!("Stopping the validator...");
  1006. let validator_info = cluster.exit_node(&validator_identity.pubkey());
  1007. info!("Stopping the validator... DONE");
  1008. info!("Delete all the snapshots on the validator and restore the originals from the backup...");
  1009. delete_files_with_remote(
  1010. validator_snapshot_test_config
  1011. .full_snapshot_archives_dir
  1012. .path(),
  1013. );
  1014. delete_files_with_remote(
  1015. validator_snapshot_test_config
  1016. .incremental_snapshot_archives_dir
  1017. .path(),
  1018. );
  1019. copy_files_with_remote(
  1020. backup_validator_full_snapshot_archives_dir.path(),
  1021. validator_snapshot_test_config
  1022. .full_snapshot_archives_dir
  1023. .path(),
  1024. );
  1025. copy_files_with_remote(
  1026. backup_validator_incremental_snapshot_archives_dir.path(),
  1027. validator_snapshot_test_config
  1028. .incremental_snapshot_archives_dir
  1029. .path(),
  1030. );
  1031. info!(
  1032. "Delete all the snapshots on the validator and restore the originals from the backup... \
  1033. DONE"
  1034. );
  1035. // Get the highest full snapshot slot *before* restarting, as a comparison
  1036. let validator_full_snapshot_slot_at_startup =
  1037. snapshot_utils::get_highest_full_snapshot_archive_slot(
  1038. validator_snapshot_test_config
  1039. .full_snapshot_archives_dir
  1040. .path(),
  1041. )
  1042. .unwrap();
  1043. info!(
  1044. "Restarting the validator with full snapshot {validator_full_snapshot_slot_at_startup}..."
  1045. );
  1046. cluster.restart_node(
  1047. &validator_identity.pubkey(),
  1048. validator_info,
  1049. SocketAddrSpace::Unspecified,
  1050. );
  1051. info!("Restarting the validator... DONE");
  1052. // Now, we want to ensure that the validator can make a new incremental snapshot based on the
  1053. // new full snapshot that was created during the restart.
  1054. info!("Waiting for the validator to make new snapshots...");
  1055. let validator_next_full_snapshot_slot =
  1056. validator_full_snapshot_slot_at_startup + full_snapshot_interval;
  1057. let validator_next_incremental_snapshot_slot =
  1058. validator_next_full_snapshot_slot + incremental_snapshot_interval;
  1059. info!("Waiting for validator next full snapshot slot: {validator_next_full_snapshot_slot}");
  1060. info!(
  1061. "Waiting for validator next incremental snapshot slot: \
  1062. {validator_next_incremental_snapshot_slot}"
  1063. );
  1064. let timer = Instant::now();
  1065. loop {
  1066. if let Some(full_snapshot_slot) = snapshot_utils::get_highest_full_snapshot_archive_slot(
  1067. validator_snapshot_test_config
  1068. .full_snapshot_archives_dir
  1069. .path(),
  1070. ) {
  1071. if full_snapshot_slot >= validator_next_full_snapshot_slot {
  1072. if let Some(incremental_snapshot_slot) =
  1073. snapshot_utils::get_highest_incremental_snapshot_archive_slot(
  1074. validator_snapshot_test_config
  1075. .incremental_snapshot_archives_dir
  1076. .path(),
  1077. full_snapshot_slot,
  1078. )
  1079. {
  1080. if incremental_snapshot_slot >= validator_next_incremental_snapshot_slot {
  1081. // specific incremental snapshot is not important, just that one was created
  1082. info!(
  1083. "Validator made new snapshots, full snapshot slot: \
  1084. {full_snapshot_slot}, incremental snapshot slot: \
  1085. {incremental_snapshot_slot}",
  1086. );
  1087. break;
  1088. }
  1089. }
  1090. }
  1091. }
  1092. assert!(
  1093. timer.elapsed() < Duration::from_secs(30),
  1094. "It should not take longer than 30 seconds to cross the next incremental snapshot \
  1095. interval."
  1096. );
  1097. std::thread::yield_now();
  1098. }
  1099. info!(
  1100. "Waited {:?} for the validator to make new snapshots... DONE",
  1101. timer.elapsed()
  1102. );
  1103. // Check to make sure that the full snapshot the validator created during startup is the same
  1104. // or one greater than the snapshot the leader created.
  1105. let validator_full_snapshot_archives = snapshot_utils::get_full_snapshot_archives(
  1106. validator_snapshot_test_config
  1107. .full_snapshot_archives_dir
  1108. .path(),
  1109. );
  1110. info!("validator full snapshot archives: {validator_full_snapshot_archives:#?}");
  1111. let validator_full_snapshot_archive_for_comparison = validator_full_snapshot_archives
  1112. .into_iter()
  1113. .find(|validator_full_snapshot_archive| {
  1114. validator_full_snapshot_archive.slot()
  1115. == leader_full_snapshot_archive_for_comparison.slot()
  1116. })
  1117. .expect("validator created an unexpected full snapshot");
  1118. info!(
  1119. "Validator full snapshot archive for comparison: \
  1120. {validator_full_snapshot_archive_for_comparison:#?}"
  1121. );
  1122. assert_eq!(
  1123. validator_full_snapshot_archive_for_comparison.hash(),
  1124. leader_full_snapshot_archive_for_comparison.hash(),
  1125. );
  1126. // And lastly, startup another node with the new snapshots to ensure they work
  1127. let final_validator_snapshot_test_config = SnapshotValidatorConfig::new(
  1128. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  1129. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  1130. num_account_paths,
  1131. );
  1132. // Copy over the snapshots to the new node that it will boot from
  1133. copy_files(
  1134. validator_snapshot_test_config
  1135. .full_snapshot_archives_dir
  1136. .path(),
  1137. final_validator_snapshot_test_config
  1138. .full_snapshot_archives_dir
  1139. .path(),
  1140. );
  1141. copy_files(
  1142. validator_snapshot_test_config
  1143. .incremental_snapshot_archives_dir
  1144. .path(),
  1145. final_validator_snapshot_test_config
  1146. .incremental_snapshot_archives_dir
  1147. .path(),
  1148. );
  1149. info!("Starting final validator...");
  1150. let final_validator_identity = Arc::new(Keypair::new());
  1151. cluster.add_validator(
  1152. &final_validator_snapshot_test_config.validator_config,
  1153. stake,
  1154. final_validator_identity,
  1155. None,
  1156. SocketAddrSpace::Unspecified,
  1157. );
  1158. info!("Starting final validator... DONE");
  1159. }
  1160. #[allow(unused_attributes)]
  1161. #[test]
  1162. #[serial]
  1163. fn test_snapshot_restart_tower() {
  1164. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1165. // First set up the cluster with 2 nodes
  1166. let snapshot_interval_slots = NonZeroU64::new(10).unwrap();
  1167. let num_account_paths = 2;
  1168. let leader_snapshot_test_config =
  1169. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1170. let validator_snapshot_test_config =
  1171. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1172. let mut config = ClusterConfig {
  1173. node_stakes: vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  1174. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  1175. validator_configs: vec![
  1176. safe_clone_config(&leader_snapshot_test_config.validator_config),
  1177. safe_clone_config(&validator_snapshot_test_config.validator_config),
  1178. ],
  1179. skip_warmup_slots: true,
  1180. ..ClusterConfig::default()
  1181. };
  1182. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1183. // Let the nodes run for a while, then stop one of the validators
  1184. sleep(Duration::from_millis(5000));
  1185. let all_pubkeys = cluster.get_node_pubkeys();
  1186. let validator_id = all_pubkeys
  1187. .into_iter()
  1188. .find(|x| x != cluster.entry_point_info.pubkey())
  1189. .unwrap();
  1190. let validator_info = cluster.exit_node(&validator_id);
  1191. let full_snapshot_archives_dir = &leader_snapshot_test_config
  1192. .validator_config
  1193. .snapshot_config
  1194. .full_snapshot_archives_dir;
  1195. let full_snapshot_archive_info = cluster.wait_for_next_full_snapshot(
  1196. full_snapshot_archives_dir,
  1197. Some(Duration::from_secs(5 * 60)),
  1198. );
  1199. // Copy archive to validator's snapshot output directory
  1200. let validator_archive_path = snapshot_utils::build_full_snapshot_archive_path(
  1201. validator_snapshot_test_config
  1202. .full_snapshot_archives_dir
  1203. .keep(),
  1204. full_snapshot_archive_info.slot(),
  1205. full_snapshot_archive_info.hash(),
  1206. full_snapshot_archive_info.archive_format(),
  1207. );
  1208. fs::hard_link(full_snapshot_archive_info.path(), validator_archive_path).unwrap();
  1209. // Restart validator from snapshot, the validator's tower state in this snapshot
  1210. // will contain slots < the root bank of the snapshot. Validator should not panic.
  1211. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1212. // Test cluster can still make progress and get confirmations in tower
  1213. // Use the restarted node as the discovery point so that we get updated
  1214. // validator's ContactInfo
  1215. let restarted_node_info = cluster.get_contact_info(&validator_id).unwrap();
  1216. cluster_tests::spend_and_verify_all_nodes(
  1217. restarted_node_info,
  1218. &cluster.funding_keypair,
  1219. 2,
  1220. HashSet::new(),
  1221. SocketAddrSpace::Unspecified,
  1222. &cluster.connection_cache,
  1223. );
  1224. }
  1225. #[test]
  1226. #[serial]
  1227. fn test_snapshots_blockstore_floor() {
  1228. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1229. // First set up the cluster with 1 snapshotting leader
  1230. let snapshot_interval_slots = NonZeroU64::new(100).unwrap();
  1231. let num_account_paths = 4;
  1232. let leader_snapshot_test_config =
  1233. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1234. let mut validator_snapshot_test_config =
  1235. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1236. let full_snapshot_archives_dir = &leader_snapshot_test_config
  1237. .validator_config
  1238. .snapshot_config
  1239. .full_snapshot_archives_dir;
  1240. let mut config = ClusterConfig {
  1241. node_stakes: vec![DEFAULT_NODE_STAKE],
  1242. validator_configs: make_identical_validator_configs(
  1243. &leader_snapshot_test_config.validator_config,
  1244. 1,
  1245. ),
  1246. ..ClusterConfig::default()
  1247. };
  1248. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1249. trace!("Waiting for snapshot tar to be generated with slot",);
  1250. let archive_info = loop {
  1251. let archive =
  1252. snapshot_utils::get_highest_full_snapshot_archive_info(full_snapshot_archives_dir);
  1253. if archive.is_some() {
  1254. trace!("snapshot exists");
  1255. break archive.unwrap();
  1256. }
  1257. sleep(Duration::from_millis(5000));
  1258. };
  1259. // Copy archive to validator's snapshot output directory
  1260. let validator_archive_path = snapshot_utils::build_full_snapshot_archive_path(
  1261. validator_snapshot_test_config
  1262. .full_snapshot_archives_dir
  1263. .keep(),
  1264. archive_info.slot(),
  1265. archive_info.hash(),
  1266. validator_snapshot_test_config
  1267. .validator_config
  1268. .snapshot_config
  1269. .archive_format,
  1270. );
  1271. fs::hard_link(archive_info.path(), validator_archive_path).unwrap();
  1272. let slot_floor = archive_info.slot();
  1273. // Start up a new node from a snapshot
  1274. let cluster_nodes = discover_validators(
  1275. &cluster.entry_point_info.gossip().unwrap(),
  1276. 1,
  1277. cluster.entry_point_info.shred_version(),
  1278. SocketAddrSpace::Unspecified,
  1279. )
  1280. .unwrap();
  1281. let mut known_validators = HashSet::new();
  1282. known_validators.insert(*cluster_nodes[0].pubkey());
  1283. validator_snapshot_test_config
  1284. .validator_config
  1285. .known_validators = Some(known_validators);
  1286. cluster.add_validator(
  1287. &validator_snapshot_test_config.validator_config,
  1288. DEFAULT_NODE_STAKE,
  1289. Arc::new(Keypair::new()),
  1290. None,
  1291. SocketAddrSpace::Unspecified,
  1292. );
  1293. let all_pubkeys = cluster.get_node_pubkeys();
  1294. let validator_id = all_pubkeys
  1295. .into_iter()
  1296. .find(|x| x != cluster.entry_point_info.pubkey())
  1297. .unwrap();
  1298. let validator_client = cluster
  1299. .build_validator_tpu_quic_client(&validator_id)
  1300. .unwrap();
  1301. let mut current_slot = 0;
  1302. // Let this validator run a while with repair
  1303. let target_slot = slot_floor + 40;
  1304. while current_slot <= target_slot {
  1305. trace!("current_slot: {current_slot}");
  1306. if let Ok(slot) = validator_client
  1307. .rpc_client()
  1308. .get_slot_with_commitment(CommitmentConfig::processed())
  1309. {
  1310. current_slot = slot;
  1311. } else {
  1312. continue;
  1313. }
  1314. sleep(Duration::from_secs(1));
  1315. }
  1316. // Check the validator ledger doesn't contain any slots < slot_floor
  1317. cluster.close_preserve_ledgers();
  1318. let validator_ledger_path = &cluster.validators[&validator_id];
  1319. let blockstore = Blockstore::open(&validator_ledger_path.info.ledger_path).unwrap();
  1320. // Skip the zeroth slot in blockstore that the ledger is initialized with
  1321. let (first_slot, _) = blockstore.slot_meta_iterator(1).unwrap().next().unwrap();
  1322. assert_eq!(first_slot, slot_floor);
  1323. }
  1324. #[test]
  1325. #[serial]
  1326. fn test_snapshots_restart_validity() {
  1327. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1328. let snapshot_interval_slots = NonZeroU64::new(100).unwrap();
  1329. let num_account_paths = 1;
  1330. let mut snapshot_test_config =
  1331. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1332. let full_snapshot_archives_dir = &snapshot_test_config
  1333. .validator_config
  1334. .snapshot_config
  1335. .full_snapshot_archives_dir;
  1336. // Set up the cluster with 1 snapshotting validator
  1337. let mut all_account_storage_dirs = vec![std::mem::take(
  1338. &mut snapshot_test_config.account_storage_dirs,
  1339. )];
  1340. let mut config = ClusterConfig {
  1341. node_stakes: vec![DEFAULT_NODE_STAKE],
  1342. validator_configs: make_identical_validator_configs(
  1343. &snapshot_test_config.validator_config,
  1344. 1,
  1345. ),
  1346. ..ClusterConfig::default()
  1347. };
  1348. // Create and reboot the node from snapshot `num_runs` times
  1349. let num_runs = 3;
  1350. let mut expected_balances = HashMap::new();
  1351. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1352. for i in 1..num_runs {
  1353. info!("run {i}");
  1354. // Push transactions to one of the nodes and confirm that transactions were
  1355. // forwarded to and processed.
  1356. trace!("Sending transactions");
  1357. let new_balances = cluster_tests::send_many_transactions(
  1358. &cluster.entry_point_info,
  1359. &cluster.funding_keypair,
  1360. &cluster.connection_cache,
  1361. 10,
  1362. 10,
  1363. );
  1364. expected_balances.extend(new_balances);
  1365. cluster.wait_for_next_full_snapshot(
  1366. full_snapshot_archives_dir,
  1367. Some(Duration::from_secs(5 * 60)),
  1368. );
  1369. // Create new account paths since validator exit is not guaranteed to cleanup RPC threads,
  1370. // which may delete the old accounts on exit at any point
  1371. let (new_account_storage_dirs, new_account_storage_paths) =
  1372. generate_account_paths(num_account_paths);
  1373. all_account_storage_dirs.push(new_account_storage_dirs);
  1374. snapshot_test_config.validator_config.account_paths = new_account_storage_paths;
  1375. // Restart node
  1376. trace!("Restarting cluster from snapshot");
  1377. let nodes = cluster.get_node_pubkeys();
  1378. cluster.exit_restart_node(
  1379. &nodes[0],
  1380. safe_clone_config(&snapshot_test_config.validator_config),
  1381. SocketAddrSpace::Unspecified,
  1382. );
  1383. // Verify account balances on validator
  1384. trace!("Verifying balances");
  1385. cluster_tests::verify_balances(
  1386. expected_balances.clone(),
  1387. &cluster.entry_point_info,
  1388. cluster.connection_cache.clone(),
  1389. );
  1390. // Check that we can still push transactions
  1391. trace!("Spending and verifying");
  1392. cluster_tests::spend_and_verify_all_nodes(
  1393. &cluster.entry_point_info,
  1394. &cluster.funding_keypair,
  1395. 1,
  1396. HashSet::new(),
  1397. SocketAddrSpace::Unspecified,
  1398. &cluster.connection_cache,
  1399. );
  1400. }
  1401. }
  1402. #[test]
  1403. #[serial]
  1404. #[allow(unused_attributes)]
  1405. #[ignore]
  1406. fn test_fail_entry_verification_leader() {
  1407. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1408. let leader_stake = (DUPLICATE_THRESHOLD * 100.0) as u64 + 1;
  1409. let validator_stake1 = (100 - leader_stake) / 2;
  1410. let validator_stake2 = 100 - leader_stake - validator_stake1;
  1411. let (cluster, _) = test_faulty_node(
  1412. BroadcastStageType::FailEntryVerification,
  1413. vec![leader_stake, validator_stake1, validator_stake2],
  1414. None,
  1415. None,
  1416. );
  1417. cluster.check_for_new_roots(
  1418. 16,
  1419. "test_fail_entry_verification_leader",
  1420. SocketAddrSpace::Unspecified,
  1421. );
  1422. }
  1423. #[test]
  1424. #[serial]
  1425. #[ignore]
  1426. #[allow(unused_attributes)]
  1427. fn test_fake_shreds_broadcast_leader() {
  1428. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1429. let node_stakes = vec![300, 100];
  1430. let (cluster, _) = test_faulty_node(
  1431. BroadcastStageType::BroadcastFakeShreds,
  1432. node_stakes,
  1433. None,
  1434. None,
  1435. );
  1436. cluster.check_for_new_roots(
  1437. 16,
  1438. "test_fake_shreds_broadcast_leader",
  1439. SocketAddrSpace::Unspecified,
  1440. );
  1441. }
  1442. #[test]
  1443. #[serial]
  1444. fn test_wait_for_max_stake() {
  1445. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1446. let validator_config = ValidatorConfig::default_for_test();
  1447. let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH;
  1448. // Set this large enough to allow for skipped slots but still be able to
  1449. // make a root and derive the new leader schedule in time.
  1450. let stakers_slot_offset = slots_per_epoch.saturating_mul(MAX_LEADER_SCHEDULE_EPOCH_OFFSET);
  1451. // Reduce this so that we can complete the test faster by advancing through
  1452. // slots/epochs faster. But don't make it too small because it can cause the
  1453. // test to fail in two important ways:
  1454. // 1. Increase likelihood of skipped slots, which can prevent rooting and
  1455. // lead to not generating leader schedule in time and cluster getting
  1456. // stuck.
  1457. // 2. Make the cluster advance through too many epochs before all the
  1458. // validators spin up, which can lead to not properly observing gossip
  1459. // votes, not repairing missing slots, and some subset of nodes getting
  1460. // stuck.
  1461. let ticks_per_slot = 32;
  1462. let num_validators = 4;
  1463. let mut config = ClusterConfig {
  1464. node_stakes: vec![DEFAULT_NODE_STAKE; num_validators],
  1465. validator_configs: make_identical_validator_configs(&validator_config, num_validators),
  1466. slots_per_epoch,
  1467. stakers_slot_offset,
  1468. ticks_per_slot,
  1469. ..ClusterConfig::default()
  1470. };
  1471. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1472. let client = RpcClient::new_socket(cluster.entry_point_info.rpc().unwrap());
  1473. let num_validators_activating_stake = num_validators - 1;
  1474. // Number of epochs it is expected to take to completely activate the stake
  1475. // for all the validators.
  1476. let num_expected_epochs = (num_validators_activating_stake as f64)
  1477. .log(1. + NEW_WARMUP_COOLDOWN_RATE)
  1478. .ceil() as u32
  1479. + 1;
  1480. let expected_test_duration = config.poh_config.target_tick_duration
  1481. * ticks_per_slot as u32
  1482. * slots_per_epoch as u32
  1483. * num_expected_epochs;
  1484. // Make the timeout double the expected duration to provide some margin.
  1485. // Especially considering tests may be running in parallel.
  1486. let timeout = expected_test_duration * 2;
  1487. if let Err(err) = client.wait_for_max_stake_below_threshold_with_timeout(
  1488. CommitmentConfig::default(),
  1489. (100 / num_validators_activating_stake) as f32,
  1490. timeout,
  1491. ) {
  1492. panic!("wait_for_max_stake failed: {err:?}");
  1493. }
  1494. assert!(client.get_slot().unwrap() > 10);
  1495. }
  1496. #[test]
  1497. #[serial]
  1498. // Test that when a leader is leader for banks B_i..B_{i+n}, and B_i is not
  1499. // votable, then B_{i+1} still chains to B_i
  1500. fn test_no_voting() {
  1501. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1502. let validator_config = ValidatorConfig {
  1503. voting_disabled: true,
  1504. ..ValidatorConfig::default_for_test()
  1505. };
  1506. let mut config = ClusterConfig {
  1507. node_stakes: vec![DEFAULT_NODE_STAKE],
  1508. validator_configs: vec![validator_config],
  1509. ..ClusterConfig::default()
  1510. };
  1511. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1512. let client = cluster
  1513. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  1514. .unwrap();
  1515. loop {
  1516. let last_slot = client
  1517. .rpc_client()
  1518. .get_slot_with_commitment(CommitmentConfig::processed())
  1519. .expect("Couldn't get slot");
  1520. if last_slot > 4 * VOTE_THRESHOLD_DEPTH as u64 {
  1521. break;
  1522. }
  1523. sleep(Duration::from_secs(1));
  1524. }
  1525. cluster.close_preserve_ledgers();
  1526. let leader_pubkey = *cluster.entry_point_info.pubkey();
  1527. let ledger_path = cluster.validators[&leader_pubkey].info.ledger_path.clone();
  1528. let ledger = Blockstore::open(&ledger_path).unwrap();
  1529. for i in 0..2 * VOTE_THRESHOLD_DEPTH {
  1530. let meta = ledger.meta(i as u64).unwrap().unwrap();
  1531. let parent = meta.parent_slot;
  1532. let expected_parent = i.saturating_sub(1);
  1533. assert_eq!(parent, Some(expected_parent as u64));
  1534. }
  1535. }
  1536. #[test]
  1537. #[serial]
  1538. fn test_optimistic_confirmation_violation_detection() {
  1539. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1540. // First set up the cluster with 2 nodes
  1541. let slots_per_epoch = 2048;
  1542. let node_stakes = vec![50 * DEFAULT_NODE_STAKE, 51 * DEFAULT_NODE_STAKE];
  1543. let validator_keys: Vec<_> = [
  1544. "4qhhXNTbKD1a5vxDDLZcHKj7ELNeiivtUBxn3wUK1F5VRsQVP89VUhfXqSfgiFB14GfuBgtrQ96n9NvWQADVkcCg",
  1545. "3kHBzVwie5vTEaY6nFCPeFT8qDpoXzn7dCEioGRNBTnUDpvwnG85w8Wq63gVWpVTP8k2a8cgcWRjSXyUkEygpXWS",
  1546. ]
  1547. .iter()
  1548. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  1549. .take(node_stakes.len())
  1550. .collect();
  1551. // Do not restart the validator which is the cluster entrypoint because its gossip port
  1552. // might be changed after restart resulting in the two nodes not being able to
  1553. // to form a cluster. The heavier validator is the second node.
  1554. let node_to_restart = validator_keys[1].0.pubkey();
  1555. // WFSM as we require a OC slot > 50 within 100 seconds
  1556. let mut validator_config = ValidatorConfig::default_for_test();
  1557. validator_config.wait_for_supermajority = Some(0);
  1558. let mut config = ClusterConfig {
  1559. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  1560. node_stakes: node_stakes.clone(),
  1561. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  1562. validator_keys: Some(validator_keys),
  1563. slots_per_epoch,
  1564. stakers_slot_offset: slots_per_epoch,
  1565. skip_warmup_slots: true,
  1566. ..ClusterConfig::default()
  1567. };
  1568. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1569. // Let the nodes run for a while. Wait for validators to vote on slot `S`
  1570. // so that the vote on `S-1` is definitely in gossip and optimistic confirmation is
  1571. // detected on slot `S-1` for sure, then stop the heavier of the two
  1572. // validators
  1573. let client = cluster
  1574. .build_validator_tpu_quic_client(&node_to_restart)
  1575. .unwrap();
  1576. let start = Instant::now();
  1577. let target_slot = 50;
  1578. let max_wait_time_seconds = 100;
  1579. let mut optimistically_confirmed_slot;
  1580. loop {
  1581. optimistically_confirmed_slot = client
  1582. .rpc_client()
  1583. .get_slot_with_commitment(CommitmentConfig::confirmed())
  1584. .unwrap();
  1585. if optimistically_confirmed_slot > target_slot {
  1586. break;
  1587. }
  1588. if start.elapsed() > Duration::from_secs(max_wait_time_seconds) {
  1589. cluster.exit();
  1590. panic!(
  1591. "Didn't get optimistcally confirmed slot > {target_slot} within \
  1592. {max_wait_time_seconds} seconds"
  1593. );
  1594. }
  1595. sleep(Duration::from_millis(100));
  1596. }
  1597. info!("exiting node");
  1598. drop(client);
  1599. let mut exited_validator_info = cluster.exit_node(&node_to_restart);
  1600. info!("exiting node success");
  1601. // Mark fork as dead on the heavier validator, this should make the fork effectively
  1602. // dead, even though it was optimistically confirmed. The smaller validator should
  1603. // create and jump over to a new fork
  1604. // Also, remove saved tower to intentionally make the restarted validator to violate the
  1605. // optimistic confirmation
  1606. let optimistically_confirmed_slot_parent = {
  1607. let tower = restore_tower(
  1608. &exited_validator_info.info.ledger_path,
  1609. &exited_validator_info.info.keypair.pubkey(),
  1610. )
  1611. .unwrap();
  1612. // Vote must exist since we waited for OC and so this node must have voted
  1613. let last_voted_slot = tower.last_voted_slot().expect("vote must exist");
  1614. let blockstore = open_blockstore(&exited_validator_info.info.ledger_path);
  1615. // The last vote must be descended from the OC slot
  1616. assert!(
  1617. AncestorIterator::new_inclusive(last_voted_slot, &blockstore)
  1618. .contains(&optimistically_confirmed_slot)
  1619. );
  1620. info!(
  1621. "Setting slot: {optimistically_confirmed_slot} on main fork as dead, should cause fork"
  1622. );
  1623. // Necessary otherwise tower will inform this validator that it's latest
  1624. // vote is on slot `optimistically_confirmed_slot`. This will then prevent this validator
  1625. // from resetting to the parent of `optimistically_confirmed_slot` to create an alternative fork because
  1626. // 1) Validator can't vote on earlier ancestor of last vote due to switch threshold (can't vote
  1627. // on ancestors of last vote)
  1628. // 2) Won't reset to this earlier ancestor because reset can only happen on same voted fork if
  1629. // it's for the last vote slot or later
  1630. remove_tower(&exited_validator_info.info.ledger_path, &node_to_restart);
  1631. blockstore
  1632. .set_dead_slot(optimistically_confirmed_slot)
  1633. .unwrap();
  1634. blockstore
  1635. .meta(optimistically_confirmed_slot)
  1636. .unwrap()
  1637. .unwrap()
  1638. .parent_slot
  1639. .unwrap()
  1640. };
  1641. {
  1642. // Buffer stderr to detect optimistic slot violation log
  1643. let buf = std::env::var("OPTIMISTIC_CONF_TEST_DUMP_LOG")
  1644. .err()
  1645. .map(|_| BufferRedirect::stderr().unwrap());
  1646. // In order to prevent the node from voting on a slot it's already voted on
  1647. // which can potentially cause a panic in gossip, start up the validator as a
  1648. // non voter and wait for it to make a new block
  1649. exited_validator_info.config.voting_disabled = true;
  1650. cluster.restart_node(
  1651. &node_to_restart,
  1652. exited_validator_info,
  1653. SocketAddrSpace::Unspecified,
  1654. );
  1655. // Wait for this node to make a fork that doesn't include the `optimistically_confirmed_slot``
  1656. info!(
  1657. "Looking for slot not equal to {optimistically_confirmed_slot} with parent \
  1658. {optimistically_confirmed_slot_parent}"
  1659. );
  1660. let start = Instant::now();
  1661. let new_fork_slot;
  1662. 'outer: loop {
  1663. sleep(Duration::from_millis(1000));
  1664. let ledger_path = cluster.ledger_path(&node_to_restart);
  1665. let blockstore = open_blockstore(&ledger_path);
  1666. let potential_new_forks = blockstore
  1667. .meta(optimistically_confirmed_slot_parent)
  1668. .unwrap()
  1669. .unwrap()
  1670. .next_slots;
  1671. for slot in potential_new_forks {
  1672. // Wait for a fork to be created that does not include the OC slot
  1673. // Now on restart the validator should only vote for this new`slot` which they have
  1674. // never voted on before and thus avoids the panic in gossip
  1675. if slot > optimistically_confirmed_slot && blockstore.is_full(slot) {
  1676. new_fork_slot = slot;
  1677. break 'outer;
  1678. }
  1679. }
  1680. if start.elapsed() > Duration::from_secs(max_wait_time_seconds) {
  1681. cluster.exit();
  1682. panic!("Didn't get new fork within {max_wait_time_seconds} seconds");
  1683. }
  1684. }
  1685. // Exit again, restart with voting enabled
  1686. let mut exited_validator_info = cluster.exit_node(&node_to_restart);
  1687. exited_validator_info.config.voting_disabled = false;
  1688. cluster.restart_node(
  1689. &node_to_restart,
  1690. exited_validator_info,
  1691. SocketAddrSpace::Unspecified,
  1692. );
  1693. // Wait for a root descended from `new_fork_slot` to be set.
  1694. let client = cluster
  1695. .build_validator_tpu_quic_client(&node_to_restart)
  1696. .unwrap();
  1697. info!("looking for root > {optimistically_confirmed_slot} on new fork {new_fork_slot}");
  1698. let start = Instant::now();
  1699. loop {
  1700. info!("Client connecting to: {}", client.rpc_client().url());
  1701. let last_root = client
  1702. .rpc_client()
  1703. .get_slot_with_commitment(CommitmentConfig::finalized())
  1704. .unwrap();
  1705. if last_root > new_fork_slot {
  1706. info!("Found root: {last_root} > {new_fork_slot}");
  1707. let ledger_path = cluster.ledger_path(&node_to_restart);
  1708. let blockstore = open_blockstore(&ledger_path);
  1709. if AncestorIterator::new_inclusive(last_root, &blockstore).contains(&new_fork_slot)
  1710. {
  1711. break;
  1712. }
  1713. }
  1714. if start.elapsed() > Duration::from_secs(max_wait_time_seconds) {
  1715. cluster.exit();
  1716. panic!("Didn't get root on new fork within {max_wait_time_seconds} seconds");
  1717. }
  1718. sleep(Duration::from_millis(100));
  1719. }
  1720. // Check to see that validator detected optimistic confirmation for
  1721. // `last_voted_slot` failed
  1722. let expected_log =
  1723. OptimisticConfirmationVerifier::format_optimistic_confirmed_slot_violation_log(
  1724. optimistically_confirmed_slot,
  1725. );
  1726. // Violation detection thread can be behind so poll logs up to 10 seconds
  1727. if let Some(mut buf) = buf {
  1728. let start = Instant::now();
  1729. let mut success = false;
  1730. let mut output = String::new();
  1731. while start.elapsed().as_secs() < 10 {
  1732. buf.read_to_string(&mut output).unwrap();
  1733. if output.contains(&expected_log) {
  1734. success = true;
  1735. break;
  1736. }
  1737. sleep(Duration::from_millis(10));
  1738. }
  1739. print!("{output}");
  1740. assert!(success);
  1741. } else {
  1742. panic!("dumped log and disabled testing");
  1743. }
  1744. }
  1745. // Make sure validator still makes progress
  1746. cluster_tests::check_for_new_roots(
  1747. 16,
  1748. &[cluster.get_contact_info(&node_to_restart).unwrap().clone()],
  1749. &cluster.connection_cache,
  1750. "test_optimistic_confirmation_violation",
  1751. );
  1752. }
  1753. #[test]
  1754. #[serial]
  1755. fn test_validator_saves_tower() {
  1756. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1757. let validator_config = ValidatorConfig {
  1758. require_tower: true,
  1759. ..ValidatorConfig::default_for_test()
  1760. };
  1761. let validator_identity_keypair = Arc::new(Keypair::new());
  1762. let validator_id = validator_identity_keypair.pubkey();
  1763. let mut config = ClusterConfig {
  1764. node_stakes: vec![DEFAULT_NODE_STAKE],
  1765. validator_configs: vec![validator_config],
  1766. validator_keys: Some(vec![(validator_identity_keypair.clone(), true)]),
  1767. ..ClusterConfig::default()
  1768. };
  1769. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1770. let validator_client = cluster
  1771. .build_validator_tpu_quic_client(&validator_id)
  1772. .unwrap();
  1773. let ledger_path = cluster
  1774. .validators
  1775. .get(&validator_id)
  1776. .unwrap()
  1777. .info
  1778. .ledger_path
  1779. .clone();
  1780. let file_tower_storage = FileTowerStorage::new(ledger_path.clone());
  1781. // Wait for some votes to be generated
  1782. loop {
  1783. if let Ok(slot) = validator_client
  1784. .rpc_client()
  1785. .get_slot_with_commitment(CommitmentConfig::processed())
  1786. {
  1787. trace!("current slot: {slot}");
  1788. if slot > 2 {
  1789. break;
  1790. }
  1791. }
  1792. sleep(Duration::from_millis(10));
  1793. }
  1794. // Stop validator and check saved tower
  1795. let validator_info = cluster.exit_node(&validator_id);
  1796. let tower1 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1797. trace!("tower1: {tower1:?}");
  1798. assert_eq!(tower1.root(), 0);
  1799. assert!(tower1.last_voted_slot().is_some());
  1800. // Restart the validator and wait for a new root
  1801. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1802. let validator_client = cluster
  1803. .build_validator_tpu_quic_client(&validator_id)
  1804. .unwrap();
  1805. // Wait for the first new root
  1806. let last_replayed_root = loop {
  1807. if let Ok(root) = validator_client
  1808. .rpc_client()
  1809. .get_slot_with_commitment(CommitmentConfig::finalized())
  1810. {
  1811. trace!("current root: {root}");
  1812. if root > 0 {
  1813. break root;
  1814. }
  1815. }
  1816. sleep(Duration::from_millis(50));
  1817. };
  1818. // Stop validator, and check saved tower
  1819. let validator_info = cluster.exit_node(&validator_id);
  1820. let tower2 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1821. trace!("tower2: {tower2:?}");
  1822. assert_eq!(tower2.root(), last_replayed_root);
  1823. // Rollback saved tower to `tower1` to simulate a validator starting from a newer snapshot
  1824. // without having to wait for that snapshot to be generated in this test
  1825. tower1
  1826. .save(&file_tower_storage, &validator_identity_keypair)
  1827. .unwrap();
  1828. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1829. let validator_client = cluster
  1830. .build_validator_tpu_quic_client(&validator_id)
  1831. .unwrap();
  1832. // Wait for a new root, demonstrating the validator was able to make progress from the older `tower1`
  1833. let new_root = loop {
  1834. if let Ok(root) = validator_client
  1835. .rpc_client()
  1836. .get_slot_with_commitment(CommitmentConfig::finalized())
  1837. {
  1838. trace!("current root: {root}, last_replayed_root: {last_replayed_root}");
  1839. if root > last_replayed_root {
  1840. break root;
  1841. }
  1842. }
  1843. sleep(Duration::from_millis(50));
  1844. };
  1845. // Check the new root is reflected in the saved tower state
  1846. let mut validator_info = cluster.exit_node(&validator_id);
  1847. let tower3 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1848. trace!("tower3: {tower3:?}");
  1849. let tower3_root = tower3.root();
  1850. assert!(tower3_root >= new_root);
  1851. // Remove the tower file entirely and allow the validator to start without a tower. It will
  1852. // rebuild tower from its vote account contents
  1853. remove_tower(&ledger_path, &validator_id);
  1854. validator_info.config.require_tower = false;
  1855. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1856. let validator_client = cluster
  1857. .build_validator_tpu_quic_client(&validator_id)
  1858. .unwrap();
  1859. // Wait for another new root
  1860. let new_root = loop {
  1861. if let Ok(root) = validator_client
  1862. .rpc_client()
  1863. .get_slot_with_commitment(CommitmentConfig::finalized())
  1864. {
  1865. trace!("current root: {root}, last tower root: {tower3_root}");
  1866. if root > tower3_root {
  1867. break root;
  1868. }
  1869. }
  1870. sleep(Duration::from_millis(50));
  1871. };
  1872. cluster.close_preserve_ledgers();
  1873. let tower4 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1874. trace!("tower4: {tower4:?}");
  1875. assert!(tower4.root() >= new_root);
  1876. }
  1877. fn root_in_tower(tower_path: &Path, node_pubkey: &Pubkey) -> Option<Slot> {
  1878. restore_tower(tower_path, node_pubkey).map(|tower| tower.root())
  1879. }
  1880. enum ClusterMode {
  1881. MasterOnly,
  1882. MasterSlave,
  1883. }
  1884. fn do_test_future_tower(cluster_mode: ClusterMode) {
  1885. agave_logger::setup_with_default(RUST_LOG_FILTER);
  1886. // First set up the cluster with 4 nodes
  1887. let slots_per_epoch = 2048;
  1888. let node_stakes = match cluster_mode {
  1889. ClusterMode::MasterOnly => vec![DEFAULT_NODE_STAKE],
  1890. ClusterMode::MasterSlave => vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  1891. };
  1892. let validator_keys = [
  1893. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  1894. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  1895. ]
  1896. .iter()
  1897. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  1898. .take(node_stakes.len())
  1899. .collect::<Vec<_>>();
  1900. let validators = validator_keys
  1901. .iter()
  1902. .map(|(kp, _)| kp.pubkey())
  1903. .collect::<Vec<_>>();
  1904. let validator_a_pubkey = match cluster_mode {
  1905. ClusterMode::MasterOnly => validators[0],
  1906. ClusterMode::MasterSlave => validators[1],
  1907. };
  1908. let mut validator_config = ValidatorConfig::default_for_test();
  1909. validator_config.wait_for_supermajority = Some(0);
  1910. let mut config = ClusterConfig {
  1911. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  1912. node_stakes: node_stakes.clone(),
  1913. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  1914. validator_keys: Some(validator_keys),
  1915. slots_per_epoch,
  1916. stakers_slot_offset: slots_per_epoch,
  1917. skip_warmup_slots: true,
  1918. ..ClusterConfig::default()
  1919. };
  1920. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1921. let val_a_ledger_path = cluster.ledger_path(&validator_a_pubkey);
  1922. loop {
  1923. sleep(Duration::from_millis(100));
  1924. if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  1925. if root >= 15 {
  1926. break;
  1927. }
  1928. }
  1929. }
  1930. let purged_slot_before_restart = 10;
  1931. let validator_a_info = cluster.exit_node(&validator_a_pubkey);
  1932. {
  1933. // create a warped future tower without mangling the tower itself
  1934. info!(
  1935. "Revert blockstore before slot {purged_slot_before_restart} and effectively create a \
  1936. future tower",
  1937. );
  1938. let blockstore = open_blockstore(&val_a_ledger_path);
  1939. purge_slots_with_count(&blockstore, purged_slot_before_restart, 100);
  1940. }
  1941. cluster.restart_node(
  1942. &validator_a_pubkey,
  1943. validator_a_info,
  1944. SocketAddrSpace::Unspecified,
  1945. );
  1946. let mut newly_rooted = false;
  1947. let some_root_after_restart = purged_slot_before_restart + 25; // 25 is arbitrary; just wait a bit
  1948. for _ in 0..600 {
  1949. sleep(Duration::from_millis(100));
  1950. if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  1951. if root >= some_root_after_restart {
  1952. newly_rooted = true;
  1953. break;
  1954. }
  1955. }
  1956. }
  1957. let _validator_a_info = cluster.exit_node(&validator_a_pubkey);
  1958. if newly_rooted {
  1959. // there should be no forks; i.e. monotonically increasing ancestor chain
  1960. let (last_vote, _) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  1961. let blockstore = open_blockstore(&val_a_ledger_path);
  1962. let actual_block_ancestors = AncestorIterator::new_inclusive(last_vote, &blockstore)
  1963. .take_while(|a| *a >= some_root_after_restart)
  1964. .collect::<Vec<_>>();
  1965. let expected_countinuous_no_fork_votes = (some_root_after_restart..=last_vote)
  1966. .rev()
  1967. .collect::<Vec<_>>();
  1968. assert_eq!(actual_block_ancestors, expected_countinuous_no_fork_votes);
  1969. assert!(actual_block_ancestors.len() > MAX_LOCKOUT_HISTORY);
  1970. info!("validator managed to handle future tower!");
  1971. } else {
  1972. panic!("no root detected");
  1973. }
  1974. }
  1975. #[test]
  1976. #[serial]
  1977. fn test_future_tower_master_only() {
  1978. do_test_future_tower(ClusterMode::MasterOnly);
  1979. }
  1980. #[test]
  1981. #[serial]
  1982. fn test_future_tower_master_slave() {
  1983. do_test_future_tower(ClusterMode::MasterSlave);
  1984. }
  1985. fn restart_whole_cluster_after_hard_fork(
  1986. cluster: &Arc<Mutex<LocalCluster>>,
  1987. validator_a_pubkey: Pubkey,
  1988. validator_b_pubkey: Pubkey,
  1989. mut validator_a_info: ClusterValidatorInfo,
  1990. validator_b_info: ClusterValidatorInfo,
  1991. ) {
  1992. // restart validator A first
  1993. let cluster_for_a = cluster.clone();
  1994. let val_a_ledger_path = validator_a_info.info.ledger_path.clone();
  1995. // Spawn a thread because wait_for_supermajority blocks in Validator::new()!
  1996. let thread = std::thread::spawn(move || {
  1997. let restart_context = cluster_for_a
  1998. .lock()
  1999. .unwrap()
  2000. .create_restart_context(&validator_a_pubkey, &mut validator_a_info);
  2001. let restarted_validator_info = LocalCluster::restart_node_with_context(
  2002. validator_a_info,
  2003. restart_context,
  2004. SocketAddrSpace::Unspecified,
  2005. );
  2006. cluster_for_a
  2007. .lock()
  2008. .unwrap()
  2009. .add_node(&validator_a_pubkey, restarted_validator_info);
  2010. });
  2011. // test validator A actually to wait for supermajority
  2012. let mut last_vote = None;
  2013. for _ in 0..10 {
  2014. sleep(Duration::from_millis(1000));
  2015. let (new_last_vote, _) =
  2016. last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  2017. if let Some(last_vote) = last_vote {
  2018. assert_eq!(last_vote, new_last_vote);
  2019. } else {
  2020. last_vote = Some(new_last_vote);
  2021. }
  2022. }
  2023. // restart validator B normally
  2024. cluster.lock().unwrap().restart_node(
  2025. &validator_b_pubkey,
  2026. validator_b_info,
  2027. SocketAddrSpace::Unspecified,
  2028. );
  2029. // validator A should now start so join its thread here
  2030. thread.join().unwrap();
  2031. }
  2032. #[test]
  2033. #[serial]
  2034. fn test_hard_fork_invalidates_tower() {
  2035. agave_logger::setup_with_default(RUST_LOG_FILTER);
  2036. // First set up the cluster with 2 nodes
  2037. let slots_per_epoch = 2048;
  2038. let node_stakes = vec![60 * DEFAULT_NODE_STAKE, 40 * DEFAULT_NODE_STAKE];
  2039. let validator_keys = [
  2040. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2041. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2042. ]
  2043. .iter()
  2044. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2045. .take(node_stakes.len())
  2046. .collect::<Vec<_>>();
  2047. let validators = validator_keys
  2048. .iter()
  2049. .map(|(kp, _)| kp.pubkey())
  2050. .collect::<Vec<_>>();
  2051. let validator_a_pubkey = validators[0];
  2052. let validator_b_pubkey = validators[1];
  2053. let mut config = ClusterConfig {
  2054. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  2055. node_stakes: node_stakes.clone(),
  2056. validator_configs: make_identical_validator_configs(
  2057. &ValidatorConfig::default_for_test(),
  2058. node_stakes.len(),
  2059. ),
  2060. validator_keys: Some(validator_keys),
  2061. slots_per_epoch,
  2062. stakers_slot_offset: slots_per_epoch,
  2063. skip_warmup_slots: true,
  2064. ..ClusterConfig::default()
  2065. };
  2066. let cluster = std::sync::Arc::new(std::sync::Mutex::new(LocalCluster::new(
  2067. &mut config,
  2068. SocketAddrSpace::Unspecified,
  2069. )));
  2070. let val_a_ledger_path = cluster.lock().unwrap().ledger_path(&validator_a_pubkey);
  2071. let min_root = 15;
  2072. loop {
  2073. sleep(Duration::from_millis(100));
  2074. if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  2075. if root >= min_root {
  2076. break;
  2077. }
  2078. }
  2079. }
  2080. let mut validator_a_info = cluster.lock().unwrap().exit_node(&validator_a_pubkey);
  2081. let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey);
  2082. // setup hard fork at slot < a previously rooted slot!
  2083. // hard fork earlier than root is very unrealistic in the wild, but it's handy for
  2084. // persistent tower's lockout behavior...
  2085. let hard_fork_slot = min_root - 5;
  2086. let hard_fork_slots = Some(vec![hard_fork_slot]);
  2087. let mut hard_forks = solana_hard_forks::HardForks::default();
  2088. hard_forks.register(hard_fork_slot);
  2089. let expected_shred_version = solana_shred_version::compute_shred_version(
  2090. &cluster.lock().unwrap().genesis_config.hash(),
  2091. Some(&hard_forks),
  2092. );
  2093. cluster
  2094. .lock()
  2095. .unwrap()
  2096. .set_shred_version(expected_shred_version);
  2097. validator_a_info
  2098. .config
  2099. .new_hard_forks
  2100. .clone_from(&hard_fork_slots);
  2101. validator_a_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2102. validator_a_info.config.expected_shred_version = Some(expected_shred_version);
  2103. validator_b_info.config.new_hard_forks = hard_fork_slots;
  2104. validator_b_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2105. validator_b_info.config.expected_shred_version = Some(expected_shred_version);
  2106. // Clear ledger of all slots post hard fork
  2107. {
  2108. let blockstore_a = open_blockstore(&validator_a_info.info.ledger_path);
  2109. let blockstore_b = open_blockstore(&validator_b_info.info.ledger_path);
  2110. purge_slots_with_count(&blockstore_a, hard_fork_slot + 1, 100);
  2111. purge_slots_with_count(&blockstore_b, hard_fork_slot + 1, 100);
  2112. }
  2113. restart_whole_cluster_after_hard_fork(
  2114. &cluster,
  2115. validator_a_pubkey,
  2116. validator_b_pubkey,
  2117. validator_a_info,
  2118. validator_b_info,
  2119. );
  2120. // new slots should be rooted after hard-fork cluster relaunch
  2121. cluster
  2122. .lock()
  2123. .unwrap()
  2124. .check_for_new_roots(16, "hard fork", SocketAddrSpace::Unspecified);
  2125. }
  2126. #[test]
  2127. #[serial]
  2128. fn test_run_test_load_program_accounts_root() {
  2129. run_test_load_program_accounts(CommitmentConfig::finalized());
  2130. }
  2131. fn create_simple_snapshot_config(ledger_path: &Path) -> SnapshotConfig {
  2132. SnapshotConfig {
  2133. full_snapshot_archives_dir: ledger_path.to_path_buf(),
  2134. bank_snapshots_dir: ledger_path.join(BANK_SNAPSHOTS_DIR),
  2135. ..SnapshotConfig::default()
  2136. }
  2137. }
  2138. fn create_snapshot_to_hard_fork(
  2139. blockstore: &Blockstore,
  2140. snapshot_slot: Slot,
  2141. hard_forks: Vec<Slot>,
  2142. ) {
  2143. let process_options = ProcessOptions {
  2144. halt_at_slot: Some(snapshot_slot),
  2145. new_hard_forks: Some(hard_forks),
  2146. run_verification: false,
  2147. ..ProcessOptions::default()
  2148. };
  2149. let ledger_path = blockstore.ledger_path();
  2150. let genesis_config = open_genesis_config(ledger_path, u64::MAX).unwrap();
  2151. let snapshot_config = create_simple_snapshot_config(ledger_path);
  2152. let (bank_forks, ..) = bank_forks_utils::load(
  2153. &genesis_config,
  2154. blockstore,
  2155. vec![
  2156. create_accounts_run_and_snapshot_dirs(ledger_path.join("accounts"))
  2157. .unwrap()
  2158. .0,
  2159. ],
  2160. &snapshot_config,
  2161. process_options,
  2162. None,
  2163. None,
  2164. None,
  2165. Arc::default(),
  2166. )
  2167. .unwrap();
  2168. let bank = bank_forks.read().unwrap().get(snapshot_slot).unwrap();
  2169. let full_snapshot_archive_info = snapshot_bank_utils::bank_to_full_snapshot_archive(
  2170. ledger_path,
  2171. &bank,
  2172. Some(snapshot_config.snapshot_version),
  2173. ledger_path,
  2174. ledger_path,
  2175. snapshot_config.archive_format,
  2176. )
  2177. .unwrap();
  2178. info!(
  2179. "Successfully created snapshot for slot {}, hash {}: {}",
  2180. bank.slot(),
  2181. bank.hash(),
  2182. full_snapshot_archive_info.path().display(),
  2183. );
  2184. }
  2185. #[test]
  2186. #[ignore]
  2187. #[serial]
  2188. fn test_hard_fork_with_gap_in_roots() {
  2189. agave_logger::setup_with_default(RUST_LOG_FILTER);
  2190. // First set up the cluster with 2 nodes
  2191. let slots_per_epoch = 2048;
  2192. let node_stakes = vec![60, 40];
  2193. let validator_keys = [
  2194. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2195. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2196. ]
  2197. .iter()
  2198. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2199. .take(node_stakes.len())
  2200. .collect::<Vec<_>>();
  2201. let validators = validator_keys
  2202. .iter()
  2203. .map(|(kp, _)| kp.pubkey())
  2204. .collect::<Vec<_>>();
  2205. let validator_a_pubkey = validators[0];
  2206. let validator_b_pubkey = validators[1];
  2207. let validator_config = ValidatorConfig {
  2208. snapshot_config: LocalCluster::create_dummy_load_only_snapshot_config(),
  2209. ..ValidatorConfig::default_for_test()
  2210. };
  2211. let mut config = ClusterConfig {
  2212. mint_lamports: 100_000,
  2213. node_stakes: node_stakes.clone(),
  2214. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  2215. validator_keys: Some(validator_keys),
  2216. slots_per_epoch,
  2217. stakers_slot_offset: slots_per_epoch,
  2218. skip_warmup_slots: true,
  2219. ..ClusterConfig::default()
  2220. };
  2221. let cluster = std::sync::Arc::new(std::sync::Mutex::new(LocalCluster::new(
  2222. &mut config,
  2223. SocketAddrSpace::Unspecified,
  2224. )));
  2225. let val_a_ledger_path = cluster.lock().unwrap().ledger_path(&validator_a_pubkey);
  2226. let val_b_ledger_path = cluster.lock().unwrap().ledger_path(&validator_b_pubkey);
  2227. let min_last_vote = 45;
  2228. let min_root = 10;
  2229. loop {
  2230. sleep(Duration::from_millis(100));
  2231. if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  2232. if last_vote >= min_last_vote
  2233. && root_in_tower(&val_a_ledger_path, &validator_a_pubkey) > Some(min_root)
  2234. {
  2235. break;
  2236. }
  2237. }
  2238. }
  2239. // stop all nodes of the cluster
  2240. let mut validator_a_info = cluster.lock().unwrap().exit_node(&validator_a_pubkey);
  2241. let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey);
  2242. // hard fork slot is effectively a (possibly skipping) new root.
  2243. // assert that the precondition of validator a to test gap between
  2244. // blockstore and hard fork...
  2245. let hard_fork_slot = min_last_vote - 5;
  2246. assert!(hard_fork_slot > root_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap());
  2247. let hard_fork_slots = Some(vec![hard_fork_slot]);
  2248. let mut hard_forks = HardForks::default();
  2249. hard_forks.register(hard_fork_slot);
  2250. let expected_shred_version = solana_shred_version::compute_shred_version(
  2251. &cluster.lock().unwrap().genesis_config.hash(),
  2252. Some(&hard_forks),
  2253. );
  2254. // create hard-forked snapshot only for validator a, emulating the manual cluster restart
  2255. // procedure with `agave-ledger-tool create-snapshot`
  2256. let genesis_slot = 0;
  2257. {
  2258. let blockstore_a = Blockstore::open(&val_a_ledger_path).unwrap();
  2259. create_snapshot_to_hard_fork(&blockstore_a, hard_fork_slot, vec![hard_fork_slot]);
  2260. // Intentionally make agave-validator unbootable by replaying blocks from the genesis to
  2261. // ensure the hard-forked snapshot is used always. Otherwise, we couldn't create a gap
  2262. // in the ledger roots column family reliably.
  2263. // There was a bug which caused the hard-forked snapshot at an unrooted slot to forget
  2264. // to root some slots (thus, creating a gap in roots, which shouldn't happen).
  2265. purge_slots_with_count(&blockstore_a, genesis_slot, 1);
  2266. let next_slot = genesis_slot + 1;
  2267. let mut meta = blockstore_a.meta(next_slot).unwrap().unwrap();
  2268. meta.unset_parent();
  2269. blockstore_a.put_meta(next_slot, &meta).unwrap();
  2270. }
  2271. // strictly speaking, new_hard_forks isn't needed for validator a.
  2272. // but when snapshot loading isn't working, you might see:
  2273. // shred version mismatch: expected NNNN found: MMMM
  2274. //validator_a_info.config.new_hard_forks = hard_fork_slots.clone();
  2275. // effectively pass the --hard-fork parameter to validator b
  2276. validator_b_info.config.new_hard_forks = hard_fork_slots;
  2277. validator_a_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2278. validator_a_info.config.expected_shred_version = Some(expected_shred_version);
  2279. validator_b_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2280. validator_b_info.config.expected_shred_version = Some(expected_shred_version);
  2281. restart_whole_cluster_after_hard_fork(
  2282. &cluster,
  2283. validator_a_pubkey,
  2284. validator_b_pubkey,
  2285. validator_a_info,
  2286. validator_b_info,
  2287. );
  2288. // new slots should be rooted after hard-fork cluster relaunch
  2289. cluster
  2290. .lock()
  2291. .unwrap()
  2292. .check_for_new_roots(16, "hard fork", SocketAddrSpace::Unspecified);
  2293. // drop everything to open blockstores below
  2294. drop(cluster);
  2295. let (common_last_vote, common_root) = {
  2296. let (last_vote_a, _) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  2297. let (last_vote_b, _) = last_vote_in_tower(&val_b_ledger_path, &validator_b_pubkey).unwrap();
  2298. let root_a = root_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  2299. let root_b = root_in_tower(&val_b_ledger_path, &validator_b_pubkey).unwrap();
  2300. (last_vote_a.min(last_vote_b), root_a.min(root_b))
  2301. };
  2302. let blockstore_a = Blockstore::open(&val_a_ledger_path).unwrap();
  2303. let blockstore_b = Blockstore::open(&val_b_ledger_path).unwrap();
  2304. // collect all slot/root parents
  2305. let mut slots_a = AncestorIterator::new(common_last_vote, &blockstore_a).collect::<Vec<_>>();
  2306. let mut roots_a = blockstore_a
  2307. .reversed_rooted_slot_iterator(common_root)
  2308. .unwrap()
  2309. .collect::<Vec<_>>();
  2310. // artificially restore the forcibly purged genesis only for the validator A just for the sake of
  2311. // the final assertions.
  2312. slots_a.push(genesis_slot);
  2313. roots_a.push(genesis_slot);
  2314. let slots_b = AncestorIterator::new(common_last_vote, &blockstore_b).collect::<Vec<_>>();
  2315. let roots_b = blockstore_b
  2316. .reversed_rooted_slot_iterator(common_root)
  2317. .unwrap()
  2318. .collect::<Vec<_>>();
  2319. // compare them all!
  2320. assert_eq!((&slots_a, &roots_a), (&slots_b, &roots_b));
  2321. assert_eq!(&slots_a[slots_a.len() - roots_a.len()..].to_vec(), &roots_a);
  2322. assert_eq!(&slots_b[slots_b.len() - roots_b.len()..].to_vec(), &roots_b);
  2323. }
  2324. #[test]
  2325. #[serial]
  2326. fn test_restart_tower_rollback() {
  2327. // Test node crashing and failing to save its tower before restart
  2328. // Cluster continues to make progress, this node is able to rejoin with
  2329. // outdated tower post restart.
  2330. agave_logger::setup_with_default(RUST_LOG_FILTER);
  2331. // First set up the cluster with 2 nodes
  2332. let slots_per_epoch = 2048;
  2333. let node_stakes = vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE];
  2334. let validator_strings = [
  2335. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2336. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2337. ];
  2338. let validator_keys = validator_strings
  2339. .iter()
  2340. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2341. .take(node_stakes.len())
  2342. .collect::<Vec<_>>();
  2343. let b_pubkey = validator_keys[1].0.pubkey();
  2344. let mut validator_config = ValidatorConfig::default_for_test();
  2345. validator_config.wait_for_supermajority = Some(0);
  2346. let mut config = ClusterConfig {
  2347. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  2348. node_stakes: node_stakes.clone(),
  2349. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  2350. validator_keys: Some(validator_keys),
  2351. slots_per_epoch,
  2352. stakers_slot_offset: slots_per_epoch,
  2353. skip_warmup_slots: true,
  2354. ..ClusterConfig::default()
  2355. };
  2356. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2357. let val_b_ledger_path = cluster.ledger_path(&b_pubkey);
  2358. let mut earlier_tower: Tower;
  2359. loop {
  2360. sleep(Duration::from_millis(1000));
  2361. // Grab the current saved tower
  2362. earlier_tower = restore_tower(&val_b_ledger_path, &b_pubkey).unwrap();
  2363. if earlier_tower.last_voted_slot().unwrap_or(0) > 1 {
  2364. break;
  2365. }
  2366. }
  2367. let mut exited_validator_info: ClusterValidatorInfo;
  2368. let last_voted_slot: Slot;
  2369. loop {
  2370. sleep(Duration::from_millis(1000));
  2371. // Wait for second, lesser staked validator to make a root past the earlier_tower's
  2372. // latest vote slot, then exit that validator
  2373. let tower = restore_tower(&val_b_ledger_path, &b_pubkey).unwrap();
  2374. if tower.root()
  2375. > earlier_tower
  2376. .last_voted_slot()
  2377. .expect("Earlier tower must have at least one vote")
  2378. {
  2379. exited_validator_info = cluster.exit_node(&b_pubkey);
  2380. last_voted_slot = tower.last_voted_slot().unwrap();
  2381. break;
  2382. }
  2383. }
  2384. // Now rewrite the tower with the *earlier_tower*. We disable voting until we reach
  2385. // a slot we did not previously vote for in order to avoid duplicate vote slashing
  2386. // issues.
  2387. save_tower(
  2388. &val_b_ledger_path,
  2389. &earlier_tower,
  2390. &exited_validator_info.info.keypair,
  2391. );
  2392. exited_validator_info.config.wait_to_vote_slot = Some(last_voted_slot + 10);
  2393. cluster.restart_node(
  2394. &b_pubkey,
  2395. exited_validator_info,
  2396. SocketAddrSpace::Unspecified,
  2397. );
  2398. // Check this node is making new roots
  2399. cluster.check_for_new_roots(
  2400. 20,
  2401. "test_restart_tower_rollback",
  2402. SocketAddrSpace::Unspecified,
  2403. );
  2404. }
  2405. #[test]
  2406. #[serial]
  2407. fn test_run_test_load_program_accounts_partition_root() {
  2408. run_test_load_program_accounts_partition(CommitmentConfig::finalized());
  2409. }
  2410. fn run_test_load_program_accounts_partition(scan_commitment: CommitmentConfig) {
  2411. let num_slots_per_validator = 8;
  2412. let partitions: [usize; 2] = [1, 1];
  2413. let (leader_schedule, validator_keys) = create_custom_leader_schedule_with_random_keys(&[
  2414. num_slots_per_validator,
  2415. num_slots_per_validator,
  2416. ]);
  2417. let (update_client_sender, update_client_receiver) = unbounded();
  2418. let (scan_client_sender, scan_client_receiver) = unbounded();
  2419. let exit = Arc::new(AtomicBool::new(false));
  2420. let (t_update, t_scan, additional_accounts) = setup_transfer_scan_threads(
  2421. 100,
  2422. exit.clone(),
  2423. scan_commitment,
  2424. update_client_receiver,
  2425. scan_client_receiver,
  2426. );
  2427. let on_partition_start = |cluster: &mut LocalCluster, _: &mut ()| {
  2428. let update_client = cluster
  2429. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2430. .unwrap();
  2431. update_client_sender.send(update_client).unwrap();
  2432. let scan_client = cluster
  2433. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2434. .unwrap();
  2435. scan_client_sender.send(scan_client).unwrap();
  2436. };
  2437. let on_partition_before_resolved = |_: &mut LocalCluster, _: &mut ()| {};
  2438. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  2439. cluster.check_for_new_roots(
  2440. 20,
  2441. "run_test_load_program_accounts_partition",
  2442. SocketAddrSpace::Unspecified,
  2443. );
  2444. exit.store(true, Ordering::Relaxed);
  2445. t_update.join().unwrap();
  2446. t_scan.join().unwrap();
  2447. };
  2448. run_cluster_partition(
  2449. &partitions,
  2450. Some((leader_schedule, validator_keys)),
  2451. (),
  2452. on_partition_start,
  2453. on_partition_before_resolved,
  2454. on_partition_resolved,
  2455. None,
  2456. additional_accounts,
  2457. );
  2458. }
  2459. #[test]
  2460. #[serial]
  2461. fn test_rpc_block_subscribe() {
  2462. let leader_stake = 100 * DEFAULT_NODE_STAKE;
  2463. let rpc_stake = DEFAULT_NODE_STAKE;
  2464. let total_stake = leader_stake + rpc_stake;
  2465. let node_stakes = vec![leader_stake, rpc_stake];
  2466. let mut validator_config = ValidatorConfig::default_for_test();
  2467. validator_config.enable_default_rpc_block_subscribe();
  2468. validator_config.wait_for_supermajority = Some(0);
  2469. let validator_keys = [
  2470. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2471. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2472. ]
  2473. .iter()
  2474. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2475. .take(node_stakes.len())
  2476. .collect::<Vec<_>>();
  2477. let rpc_node_pubkey = &validator_keys[1].0.pubkey();
  2478. let mut config = ClusterConfig {
  2479. mint_lamports: total_stake,
  2480. node_stakes,
  2481. validator_configs: make_identical_validator_configs(&validator_config, 2),
  2482. validator_keys: Some(validator_keys),
  2483. skip_warmup_slots: true,
  2484. ..ClusterConfig::default()
  2485. };
  2486. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2487. let rpc_node_contact_info = cluster.get_contact_info(rpc_node_pubkey).unwrap();
  2488. let (mut block_subscribe_client, receiver) = PubsubClient::block_subscribe(
  2489. format!(
  2490. "ws://{}",
  2491. // It is important that we subscribe to a non leader node as there
  2492. // is a race condition which can cause leader nodes to not send
  2493. // BlockUpdate notifications properly. See https://github.com/solana-labs/solana/pull/34421
  2494. &rpc_node_contact_info.rpc_pubsub().unwrap().to_string()
  2495. ),
  2496. RpcBlockSubscribeFilter::All,
  2497. Some(RpcBlockSubscribeConfig {
  2498. commitment: Some(CommitmentConfig::confirmed()),
  2499. encoding: None,
  2500. transaction_details: None,
  2501. show_rewards: None,
  2502. max_supported_transaction_version: None,
  2503. }),
  2504. )
  2505. .unwrap();
  2506. let mut received_block = false;
  2507. let max_wait = 10_000;
  2508. let start = Instant::now();
  2509. while !received_block {
  2510. assert!(
  2511. start.elapsed() <= Duration::from_millis(max_wait),
  2512. "Went too long {max_wait} ms without receiving a confirmed block",
  2513. );
  2514. let responses: Vec<_> = receiver.try_iter().collect();
  2515. // Wait for a response
  2516. if !responses.is_empty() {
  2517. for response in responses {
  2518. assert!(response.value.err.is_none());
  2519. assert!(response.value.block.is_some());
  2520. if response.value.slot > 1 {
  2521. received_block = true;
  2522. }
  2523. }
  2524. }
  2525. sleep(Duration::from_millis(100));
  2526. }
  2527. // If we don't drop the cluster, the blocking web socket service
  2528. // won't return, and the `block_subscribe_client` won't shut down
  2529. drop(cluster);
  2530. block_subscribe_client.shutdown().unwrap();
  2531. }
  2532. #[test]
  2533. #[serial]
  2534. #[allow(unused_attributes)]
  2535. fn test_oc_bad_signatures() {
  2536. agave_logger::setup_with_default(RUST_LOG_FILTER);
  2537. let total_stake = 100 * DEFAULT_NODE_STAKE;
  2538. let leader_stake = (total_stake as f64 * VOTE_THRESHOLD_SIZE) as u64;
  2539. let our_node_stake = total_stake - leader_stake;
  2540. let node_stakes = vec![leader_stake, our_node_stake];
  2541. let validator_keys = [
  2542. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2543. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2544. ]
  2545. .iter()
  2546. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2547. .take(node_stakes.len())
  2548. .collect::<Vec<_>>();
  2549. // Give bootstrap node all the leader slots to avoid initial forking leading
  2550. // to casting votes with invalid blockhash. This is not what is meant to be
  2551. // test and only inflates test time.
  2552. let fixed_schedule = FixedSchedule {
  2553. leader_schedule: Arc::new(Box::new(IdentityKeyedLeaderSchedule::new_from_schedule(
  2554. vec![validator_keys.first().unwrap().0.pubkey()],
  2555. ))),
  2556. };
  2557. let mut validator_config = ValidatorConfig {
  2558. require_tower: true,
  2559. wait_for_supermajority: Some(0),
  2560. fixed_leader_schedule: Some(fixed_schedule),
  2561. ..ValidatorConfig::default_for_test()
  2562. };
  2563. validator_config.enable_default_rpc_block_subscribe();
  2564. let our_id = validator_keys.last().unwrap().0.pubkey();
  2565. let mut config = ClusterConfig {
  2566. mint_lamports: total_stake,
  2567. node_stakes,
  2568. validator_configs: make_identical_validator_configs(&validator_config, 2),
  2569. validator_keys: Some(validator_keys),
  2570. skip_warmup_slots: true,
  2571. ..ClusterConfig::default()
  2572. };
  2573. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2574. // 2) Kill our node and start up a thread to simulate votes to control our voting behavior
  2575. let our_info = cluster.exit_node(&our_id);
  2576. let node_keypair = our_info.info.keypair;
  2577. let vote_keypair = our_info.info.voting_keypair;
  2578. info!(
  2579. "our node id: {}, vote id: {}",
  2580. node_keypair.pubkey(),
  2581. vote_keypair.pubkey()
  2582. );
  2583. // 3) Start up a spy to listen for and push votes to leader TPU
  2584. let client = cluster
  2585. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2586. .unwrap();
  2587. let voter_thread_sleep_ms: usize = 100;
  2588. let num_votes_simulated = Arc::new(AtomicUsize::new(0));
  2589. let gossip_voter = cluster_tests::start_gossip_voter(
  2590. &cluster.entry_point_info.gossip().unwrap(),
  2591. &node_keypair,
  2592. |(_label, leader_vote_tx)| {
  2593. let vote = vote_parser::parse_vote_transaction(&leader_vote_tx)
  2594. .map(|(_, vote, ..)| vote)
  2595. .unwrap();
  2596. // Filter out empty votes
  2597. if !vote.is_empty() {
  2598. Some((vote, leader_vote_tx))
  2599. } else {
  2600. None
  2601. }
  2602. },
  2603. {
  2604. let node_keypair = node_keypair.insecure_clone();
  2605. let vote_keypair = vote_keypair.insecure_clone();
  2606. let num_votes_simulated = num_votes_simulated.clone();
  2607. move |vote_slot, leader_vote_tx, parsed_vote, _cluster_info| {
  2608. info!("received vote for {vote_slot}");
  2609. let vote_hash = parsed_vote.hash();
  2610. info!("Simulating vote from our node on slot {vote_slot}, hash {vote_hash}");
  2611. // Add all recent vote slots on this fork to allow cluster to pass
  2612. // vote threshold checks in replay. Note this will instantly force a
  2613. // root by this validator.
  2614. let tower_sync = TowerSync::new_from_slots(vec![vote_slot], vote_hash, None);
  2615. let bad_authorized_signer_keypair = Keypair::new();
  2616. let mut vote_tx = vote_transaction::new_tower_sync_transaction(
  2617. tower_sync,
  2618. leader_vote_tx.message.recent_blockhash,
  2619. &node_keypair,
  2620. &vote_keypair,
  2621. // Make a bad signer
  2622. &bad_authorized_signer_keypair,
  2623. None,
  2624. );
  2625. LocalCluster::send_transaction_with_retries(
  2626. &client,
  2627. &[&node_keypair, &bad_authorized_signer_keypair],
  2628. &mut vote_tx,
  2629. 5,
  2630. )
  2631. .unwrap();
  2632. num_votes_simulated.fetch_add(1, Ordering::Relaxed);
  2633. }
  2634. },
  2635. voter_thread_sleep_ms as u64,
  2636. cluster.validators.len().saturating_sub(1),
  2637. 0,
  2638. 0,
  2639. cluster.entry_point_info.shred_version(),
  2640. );
  2641. let (mut block_subscribe_client, receiver) = PubsubClient::block_subscribe(
  2642. format!(
  2643. "ws://{}",
  2644. &cluster.entry_point_info.rpc_pubsub().unwrap().to_string()
  2645. ),
  2646. RpcBlockSubscribeFilter::All,
  2647. Some(RpcBlockSubscribeConfig {
  2648. commitment: Some(CommitmentConfig::confirmed()),
  2649. encoding: None,
  2650. transaction_details: None,
  2651. show_rewards: None,
  2652. max_supported_transaction_version: None,
  2653. }),
  2654. )
  2655. .unwrap();
  2656. const MAX_VOTES_TO_SIMULATE: usize = 10;
  2657. // Make sure test doesn't take too long
  2658. assert!(voter_thread_sleep_ms * MAX_VOTES_TO_SIMULATE <= 1000);
  2659. loop {
  2660. let responses: Vec<_> = receiver.try_iter().collect();
  2661. // Nothing should get optimistically confirmed or rooted
  2662. assert!(responses.is_empty());
  2663. // Wait for the voter thread to attempt sufficient number of votes to give
  2664. // a chance for the violation to occur
  2665. if num_votes_simulated.load(Ordering::Relaxed) > MAX_VOTES_TO_SIMULATE {
  2666. break;
  2667. }
  2668. sleep(Duration::from_millis(100));
  2669. }
  2670. // Clean up voter thread
  2671. gossip_voter.close();
  2672. // If we don't drop the cluster, the blocking web socket service
  2673. // won't return, and the `block_subscribe_client` won't shut down
  2674. drop(cluster);
  2675. block_subscribe_client.shutdown().unwrap();
  2676. }
  2677. #[test]
  2678. #[serial]
  2679. #[ignore]
  2680. fn test_votes_land_in_fork_during_long_partition() {
  2681. let total_stake = 3 * DEFAULT_NODE_STAKE;
  2682. // Make `lighter_stake` insufficient for switching threshold
  2683. let lighter_stake = (SWITCH_FORK_THRESHOLD * total_stake as f64) as u64;
  2684. let heavier_stake = lighter_stake + 1;
  2685. let failures_stake = total_stake - lighter_stake - heavier_stake;
  2686. // Give lighter stake 30 consecutive slots before
  2687. // the heavier stake gets a single slot
  2688. let partitions: &[(usize, usize)] =
  2689. &[(heavier_stake as usize, 1), (lighter_stake as usize, 30)];
  2690. #[derive(Default)]
  2691. struct PartitionContext {
  2692. heaviest_validator_key: Pubkey,
  2693. lighter_validator_key: Pubkey,
  2694. heavier_fork_slot: Slot,
  2695. }
  2696. let on_partition_start = |_cluster: &mut LocalCluster,
  2697. validator_keys: &[Pubkey],
  2698. _dead_validator_infos: Vec<ClusterValidatorInfo>,
  2699. context: &mut PartitionContext| {
  2700. // validator_keys[0] is the validator that will be killed, i.e. the validator with
  2701. // stake == `failures_stake`
  2702. context.heaviest_validator_key = validator_keys[1];
  2703. context.lighter_validator_key = validator_keys[2];
  2704. };
  2705. let on_before_partition_resolved =
  2706. |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  2707. let lighter_validator_ledger_path = cluster.ledger_path(&context.lighter_validator_key);
  2708. let heavier_validator_ledger_path =
  2709. cluster.ledger_path(&context.heaviest_validator_key);
  2710. // Wait for each node to have created and voted on its own partition
  2711. loop {
  2712. let (heavier_validator_latest_vote_slot, _) = last_vote_in_tower(
  2713. &heavier_validator_ledger_path,
  2714. &context.heaviest_validator_key,
  2715. )
  2716. .unwrap();
  2717. info!(
  2718. "Checking heavier validator's last vote {heavier_validator_latest_vote_slot} \
  2719. is on a separate fork"
  2720. );
  2721. let lighter_validator_blockstore = open_blockstore(&lighter_validator_ledger_path);
  2722. if lighter_validator_blockstore
  2723. .meta(heavier_validator_latest_vote_slot)
  2724. .unwrap()
  2725. .is_none()
  2726. {
  2727. context.heavier_fork_slot = heavier_validator_latest_vote_slot;
  2728. return;
  2729. }
  2730. sleep(Duration::from_millis(100));
  2731. }
  2732. };
  2733. let on_partition_resolved = |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  2734. let lighter_validator_ledger_path = cluster.ledger_path(&context.lighter_validator_key);
  2735. let start = Instant::now();
  2736. let max_wait = ms_for_n_slots(MAX_PROCESSING_AGE as u64, DEFAULT_TICKS_PER_SLOT);
  2737. // Wait for the lighter node to switch over and root the `context.heavier_fork_slot`
  2738. loop {
  2739. assert!(
  2740. // Should finish faster than if the cluster were relying on replay vote
  2741. // refreshing to refresh the vote on blockhash expiration for the vote
  2742. // transaction.
  2743. start.elapsed() <= Duration::from_millis(max_wait),
  2744. "Went too long {max_wait} ms without a root",
  2745. );
  2746. let lighter_validator_blockstore = open_blockstore(&lighter_validator_ledger_path);
  2747. if lighter_validator_blockstore.is_root(context.heavier_fork_slot) {
  2748. info!(
  2749. "Partition resolved, new root made in {}ms",
  2750. start.elapsed().as_millis()
  2751. );
  2752. return;
  2753. }
  2754. sleep(Duration::from_millis(100));
  2755. }
  2756. };
  2757. run_kill_partition_switch_threshold(
  2758. &[(failures_stake as usize, 0)],
  2759. partitions,
  2760. None,
  2761. PartitionContext::default(),
  2762. on_partition_start,
  2763. on_before_partition_resolved,
  2764. on_partition_resolved,
  2765. );
  2766. }
  2767. fn setup_transfer_scan_threads(
  2768. num_starting_accounts: usize,
  2769. exit: Arc<AtomicBool>,
  2770. scan_commitment: CommitmentConfig,
  2771. update_client_receiver: Receiver<QuicTpuClient>,
  2772. scan_client_receiver: Receiver<QuicTpuClient>,
  2773. ) -> (
  2774. JoinHandle<()>,
  2775. JoinHandle<()>,
  2776. Vec<(Pubkey, AccountSharedData)>,
  2777. ) {
  2778. let exit_ = exit.clone();
  2779. let starting_keypairs: Arc<Vec<Keypair>> = Arc::new(
  2780. iter::repeat_with(Keypair::new)
  2781. .take(num_starting_accounts)
  2782. .collect(),
  2783. );
  2784. let target_keypairs: Arc<Vec<Keypair>> = Arc::new(
  2785. iter::repeat_with(Keypair::new)
  2786. .take(num_starting_accounts)
  2787. .collect(),
  2788. );
  2789. let starting_accounts: Vec<(Pubkey, AccountSharedData)> = starting_keypairs
  2790. .iter()
  2791. .map(|k| {
  2792. (
  2793. k.pubkey(),
  2794. AccountSharedData::new(1, 0, &system_program::id()),
  2795. )
  2796. })
  2797. .collect();
  2798. let starting_keypairs_ = starting_keypairs.clone();
  2799. let target_keypairs_ = target_keypairs.clone();
  2800. let t_update = Builder::new()
  2801. .name("update".to_string())
  2802. .spawn(move || {
  2803. let client = update_client_receiver.recv().unwrap();
  2804. loop {
  2805. if exit_.load(Ordering::Relaxed) {
  2806. return;
  2807. }
  2808. let (blockhash, _) = client
  2809. .rpc_client()
  2810. .get_latest_blockhash_with_commitment(CommitmentConfig::processed())
  2811. .unwrap();
  2812. for i in 0..starting_keypairs_.len() {
  2813. let result = client.async_transfer(
  2814. 1,
  2815. &starting_keypairs_[i],
  2816. &target_keypairs_[i].pubkey(),
  2817. blockhash,
  2818. );
  2819. if result.is_err() {
  2820. debug!("Failed in transfer for starting keypair: {result:?}");
  2821. }
  2822. }
  2823. for i in 0..starting_keypairs_.len() {
  2824. let result = client.async_transfer(
  2825. 1,
  2826. &target_keypairs_[i],
  2827. &starting_keypairs_[i].pubkey(),
  2828. blockhash,
  2829. );
  2830. if result.is_err() {
  2831. debug!("Failed in transfer for starting keypair: {result:?}");
  2832. }
  2833. }
  2834. }
  2835. })
  2836. .unwrap();
  2837. // Scan, the total funds should add up to the original
  2838. let mut scan_commitment_config = RpcProgramAccountsConfig::default();
  2839. scan_commitment_config.account_config.commitment = Some(scan_commitment);
  2840. let tracked_pubkeys: HashSet<Pubkey> = starting_keypairs
  2841. .iter()
  2842. .chain(target_keypairs.iter())
  2843. .map(|k| k.pubkey())
  2844. .collect();
  2845. let expected_total_balance = num_starting_accounts as u64;
  2846. let t_scan = Builder::new()
  2847. .name("scan".to_string())
  2848. .spawn(move || {
  2849. let client = scan_client_receiver.recv().unwrap();
  2850. loop {
  2851. if exit.load(Ordering::Relaxed) {
  2852. return;
  2853. }
  2854. if let Some(total_scan_balance) = client
  2855. .rpc_client()
  2856. .get_program_ui_accounts_with_config(
  2857. &system_program::id(),
  2858. scan_commitment_config.clone(),
  2859. )
  2860. .ok()
  2861. .map(|result| {
  2862. result
  2863. .into_iter()
  2864. .map(|(key, account)| {
  2865. if tracked_pubkeys.contains(&key) {
  2866. account.lamports
  2867. } else {
  2868. 0
  2869. }
  2870. })
  2871. .sum::<u64>()
  2872. })
  2873. {
  2874. assert_eq!(total_scan_balance, expected_total_balance);
  2875. }
  2876. }
  2877. })
  2878. .unwrap();
  2879. (t_update, t_scan, starting_accounts)
  2880. }
  2881. fn run_test_load_program_accounts(scan_commitment: CommitmentConfig) {
  2882. agave_logger::setup_with_default(RUST_LOG_FILTER);
  2883. // First set up the cluster with 2 nodes
  2884. let slots_per_epoch = 2048;
  2885. let node_stakes = vec![51 * DEFAULT_NODE_STAKE, 50 * DEFAULT_NODE_STAKE];
  2886. let validator_keys: Vec<_> = [
  2887. "4qhhXNTbKD1a5vxDDLZcHKj7ELNeiivtUBxn3wUK1F5VRsQVP89VUhfXqSfgiFB14GfuBgtrQ96n9NvWQADVkcCg",
  2888. "3kHBzVwie5vTEaY6nFCPeFT8qDpoXzn7dCEioGRNBTnUDpvwnG85w8Wq63gVWpVTP8k2a8cgcWRjSXyUkEygpXWS",
  2889. ]
  2890. .iter()
  2891. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2892. .take(node_stakes.len())
  2893. .collect();
  2894. let num_starting_accounts = 100;
  2895. let exit = Arc::new(AtomicBool::new(false));
  2896. let (update_client_sender, update_client_receiver) = unbounded();
  2897. let (scan_client_sender, scan_client_receiver) = unbounded();
  2898. // Setup the update/scan threads
  2899. let (t_update, t_scan, starting_accounts) = setup_transfer_scan_threads(
  2900. num_starting_accounts,
  2901. exit.clone(),
  2902. scan_commitment,
  2903. update_client_receiver,
  2904. scan_client_receiver,
  2905. );
  2906. let mut validator_config = ValidatorConfig::default_for_test();
  2907. validator_config.wait_for_supermajority = Some(0);
  2908. let mut config = ClusterConfig {
  2909. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  2910. node_stakes: node_stakes.clone(),
  2911. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  2912. validator_keys: Some(validator_keys),
  2913. slots_per_epoch,
  2914. stakers_slot_offset: slots_per_epoch,
  2915. skip_warmup_slots: true,
  2916. additional_accounts: starting_accounts,
  2917. ..ClusterConfig::default()
  2918. };
  2919. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2920. // Give the threads a client to use for querying the cluster
  2921. let all_pubkeys = cluster.get_node_pubkeys();
  2922. let other_validator_id = all_pubkeys
  2923. .into_iter()
  2924. .find(|x| x != cluster.entry_point_info.pubkey())
  2925. .unwrap();
  2926. let client = cluster
  2927. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2928. .unwrap();
  2929. update_client_sender.send(client).unwrap();
  2930. let scan_client = cluster
  2931. .build_validator_tpu_quic_client(&other_validator_id)
  2932. .unwrap();
  2933. scan_client_sender.send(scan_client).unwrap();
  2934. // Wait for some roots to pass
  2935. cluster.check_for_new_roots(
  2936. 40,
  2937. "run_test_load_program_accounts",
  2938. SocketAddrSpace::Unspecified,
  2939. );
  2940. // Exit and ensure no violations of consistency were found
  2941. exit.store(true, Ordering::Relaxed);
  2942. t_update.join().unwrap();
  2943. t_scan.join().unwrap();
  2944. }
  2945. #[test]
  2946. #[serial]
  2947. fn test_no_lockout_violation_with_tower() {
  2948. do_test_lockout_violation_with_or_without_tower(true);
  2949. }
  2950. #[test]
  2951. #[serial]
  2952. fn test_lockout_violation_without_tower() {
  2953. do_test_lockout_violation_with_or_without_tower(false);
  2954. }
  2955. // A bit convoluted test case; but this roughly follows this test theoretical scenario:
  2956. // Validator A, B, C have 31, 36, 33 % of stake respectively. Leader schedule is split, first half
  2957. // of the test B is always leader, second half C is.
  2958. // We don't give validator A any slots because it's going to be deleting its ledger,
  2959. // so it may create different blocks for slots it's already created blocks for on a different fork
  2960. //
  2961. // Step 1: Kill C, only A and B should be running
  2962. //
  2963. // base_slot -> next_slot_on_a (Wait for A to vote)
  2964. //
  2965. // Step 2:
  2966. // Kill A and B once we verify that A has voted voted on some `next_slot_on_a` >= 1.
  2967. // Copy B's ledger to A and C but only up to slot `next_slot_on_a`.
  2968. //
  2969. // Step 3:
  2970. // Restart validator C to make it produce blocks on a fork from `base_slot`
  2971. // that doesn't include `next_slot_on_a`. Wait for it to vote on its own fork.
  2972. //
  2973. // base_slot -> next_slot_on_c
  2974. //
  2975. // Step 4: Restart `A` which had 31% of the stake, it's missing `next_slot_on_a` in
  2976. // its ledger since we copied the ledger from B excluding this slot, so it sees
  2977. //
  2978. // base_slot -> next_slot_on_c
  2979. //
  2980. // Step 5:
  2981. // Without the persisted tower:
  2982. // `A` would choose to vote on the new fork from C on `next_slot_on_c`
  2983. //
  2984. // With the persisted tower:
  2985. // `A` should not be able to generate a switching proof.
  2986. //
  2987. fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) {
  2988. agave_logger::setup_with("info");
  2989. // First set up the cluster with 4 nodes
  2990. let slots_per_epoch = 2048;
  2991. let node_stakes = vec![
  2992. 31 * DEFAULT_NODE_STAKE,
  2993. 36 * DEFAULT_NODE_STAKE,
  2994. 33 * DEFAULT_NODE_STAKE,
  2995. ];
  2996. let validator_b_last_leader_slot: Slot = 8;
  2997. let truncated_slots: Slot = 100;
  2998. // Each pubkeys are prefixed with A, B, C
  2999. let validator_keys = [
  3000. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  3001. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  3002. "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye",
  3003. ]
  3004. .iter()
  3005. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  3006. .take(node_stakes.len())
  3007. .collect::<Vec<_>>();
  3008. let validators = validator_keys
  3009. .iter()
  3010. .map(|(kp, _)| kp.pubkey())
  3011. .collect::<Vec<_>>();
  3012. let (validator_a_pubkey, validator_b_pubkey, validator_c_pubkey) =
  3013. (validators[0], validators[1], validators[2]);
  3014. // Disable voting on all validators other than validator B
  3015. let mut default_config = ValidatorConfig::default_for_test();
  3016. // Ensure B can make leader blocks up till the fork slot, and give the remaining slots to C. This is
  3017. // also important so `C` doesn't run into NoPropagatedConfirmation errors on making its first forked
  3018. // slot.
  3019. //
  3020. // Don't give validator A any slots because it's going to be deleting its ledger, so it may create
  3021. // versions of slots it's already created, but on a different fork.
  3022. let validator_to_slots = vec![
  3023. (
  3024. validator_b_pubkey,
  3025. validator_b_last_leader_slot as usize + 1,
  3026. ),
  3027. (validator_c_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize),
  3028. ];
  3029. // Trick C into not producing any blocks during this time, in case its leader slots come up before we can
  3030. // kill the validator. We don't want any forks during the time validator B is producing its initial blocks.
  3031. let c_validator_to_slots = vec![(validator_b_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize)];
  3032. let c_leader_schedule = create_custom_leader_schedule(c_validator_to_slots.into_iter());
  3033. let leader_schedule = Arc::new(create_custom_leader_schedule(
  3034. validator_to_slots.into_iter(),
  3035. ));
  3036. for slot in 0..=validator_b_last_leader_slot {
  3037. assert_eq!(leader_schedule[slot], validator_b_pubkey);
  3038. }
  3039. default_config.fixed_leader_schedule = Some(FixedSchedule {
  3040. leader_schedule: leader_schedule.clone(),
  3041. });
  3042. default_config.wait_for_supermajority = Some(0);
  3043. let mut validator_configs =
  3044. make_identical_validator_configs(&default_config, node_stakes.len());
  3045. // Disable voting on validator C
  3046. validator_configs[2].voting_disabled = true;
  3047. // C should not produce any blocks at this time
  3048. validator_configs[2].fixed_leader_schedule = Some(FixedSchedule {
  3049. leader_schedule: Arc::new(c_leader_schedule),
  3050. });
  3051. let mut config = ClusterConfig {
  3052. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  3053. node_stakes,
  3054. validator_configs,
  3055. validator_keys: Some(validator_keys),
  3056. slots_per_epoch,
  3057. stakers_slot_offset: slots_per_epoch,
  3058. skip_warmup_slots: true,
  3059. ..ClusterConfig::default()
  3060. };
  3061. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  3062. let val_a_ledger_path = cluster.ledger_path(&validator_a_pubkey);
  3063. let val_b_ledger_path = cluster.ledger_path(&validator_b_pubkey);
  3064. let val_c_ledger_path = cluster.ledger_path(&validator_c_pubkey);
  3065. info!("val_a {validator_a_pubkey} ledger path {val_a_ledger_path:?}");
  3066. info!("val_b {validator_b_pubkey} ledger path {val_b_ledger_path:?}");
  3067. info!("val_c {validator_c_pubkey} ledger path {val_c_ledger_path:?}");
  3068. info!("Exiting validator C");
  3069. let mut validator_c_info = cluster.exit_node(&validator_c_pubkey);
  3070. info!("Waiting on validator A to vote");
  3071. // Step 1: Wait for validator A to vote so the tower file exists, and so we can determine the
  3072. // `base_slot` and `next_slot_on_a`
  3073. loop {
  3074. if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  3075. if last_vote >= 1 {
  3076. break;
  3077. }
  3078. }
  3079. sleep(Duration::from_millis(100));
  3080. }
  3081. // kill A and B
  3082. info!("Exiting validators A and B");
  3083. let _validator_b_info = cluster.exit_node(&validator_b_pubkey);
  3084. let validator_a_info = cluster.exit_node(&validator_a_pubkey);
  3085. let next_slot_on_a = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey)
  3086. .unwrap()
  3087. .0;
  3088. let base_slot = next_slot_on_a - 1;
  3089. info!("base slot: {base_slot}, next_slot_on_a: {next_slot_on_a}");
  3090. // Step 2:
  3091. // Truncate ledger, copy over B's ledger to C
  3092. info!("Create validator C's ledger");
  3093. {
  3094. // first copy from validator B's ledger
  3095. std::fs::remove_dir_all(&validator_c_info.info.ledger_path).unwrap();
  3096. let mut opt = fs_extra::dir::CopyOptions::new();
  3097. opt.copy_inside = true;
  3098. fs_extra::dir::copy(&val_b_ledger_path, &val_c_ledger_path, &opt).unwrap();
  3099. // Remove B's tower in C's new copied ledger
  3100. remove_tower(&val_c_ledger_path, &validator_b_pubkey);
  3101. let blockstore = open_blockstore(&val_c_ledger_path);
  3102. purge_slots_with_count(&blockstore, next_slot_on_a, truncated_slots);
  3103. }
  3104. info!("Create validator A's ledger");
  3105. {
  3106. // Now we copy these blocks to A
  3107. let b_blockstore = open_blockstore(&val_b_ledger_path);
  3108. let a_blockstore = open_blockstore(&val_a_ledger_path);
  3109. copy_blocks(next_slot_on_a, &b_blockstore, &a_blockstore, false);
  3110. // Purge unnecessary slots
  3111. purge_slots_with_count(&a_blockstore, next_slot_on_a + 1, truncated_slots);
  3112. }
  3113. {
  3114. let blockstore = open_blockstore(&val_a_ledger_path);
  3115. if !with_tower {
  3116. info!("Removing tower!");
  3117. remove_tower(&val_a_ledger_path, &validator_a_pubkey);
  3118. // Remove next_slot_on_a from ledger to force validator A to select
  3119. // votes_on_c_fork. Otherwise, in the test case without a tower,
  3120. // the validator A will immediately vote for 27 on restart, because it
  3121. // hasn't gotten the heavier fork from validator C yet.
  3122. // Then it will be stuck on 27 unable to switch because C doesn't
  3123. // have enough stake to generate a switching proof
  3124. purge_slots_with_count(&blockstore, next_slot_on_a, truncated_slots);
  3125. } else {
  3126. info!("Not removing tower!");
  3127. }
  3128. }
  3129. // Step 3:
  3130. // Run validator C only to make it produce and vote on its own fork.
  3131. info!("Restart validator C again!!!");
  3132. validator_c_info.config.voting_disabled = false;
  3133. // C should now produce blocks
  3134. validator_c_info.config.fixed_leader_schedule = Some(FixedSchedule { leader_schedule });
  3135. cluster.restart_node(
  3136. &validator_c_pubkey,
  3137. validator_c_info,
  3138. SocketAddrSpace::Unspecified,
  3139. );
  3140. let mut votes_on_c_fork = std::collections::BTreeSet::new();
  3141. let mut last_vote = 0;
  3142. let now = Instant::now();
  3143. loop {
  3144. let elapsed = now.elapsed();
  3145. assert!(
  3146. elapsed <= Duration::from_secs(30),
  3147. "C failed to create a fork past {base_slot} in {} seconds, last_vote {last_vote}, \
  3148. votes_on_c_fork: {votes_on_c_fork:?}",
  3149. elapsed.as_secs(),
  3150. );
  3151. sleep(Duration::from_millis(100));
  3152. if let Some((newest_vote, _)) = last_vote_in_tower(&val_c_ledger_path, &validator_c_pubkey)
  3153. {
  3154. last_vote = newest_vote;
  3155. if last_vote != base_slot {
  3156. votes_on_c_fork.insert(last_vote);
  3157. // Collect 4 votes
  3158. if votes_on_c_fork.len() >= 4 {
  3159. break;
  3160. }
  3161. }
  3162. }
  3163. }
  3164. assert!(!votes_on_c_fork.is_empty());
  3165. info!("Collected validator C's votes: {votes_on_c_fork:?}");
  3166. // Step 4:
  3167. // verify whether there was violation or not
  3168. info!("Restart validator A again!!!");
  3169. cluster.restart_node(
  3170. &validator_a_pubkey,
  3171. validator_a_info,
  3172. SocketAddrSpace::Unspecified,
  3173. );
  3174. // monitor for actual votes from validator A
  3175. let mut bad_vote_detected = false;
  3176. let mut a_votes = vec![];
  3177. for _ in 0..100 {
  3178. sleep(Duration::from_millis(100));
  3179. if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  3180. a_votes.push(last_vote);
  3181. let blockstore = open_blockstore(&val_a_ledger_path);
  3182. let mut ancestors = AncestorIterator::new(last_vote, &blockstore);
  3183. if ancestors.any(|a| votes_on_c_fork.contains(&a)) {
  3184. bad_vote_detected = true;
  3185. break;
  3186. }
  3187. }
  3188. }
  3189. info!("Observed A's votes on: {a_votes:?}");
  3190. // an elaborate way of assert!(with_tower && !bad_vote_detected || ...)
  3191. let expects_optimistic_confirmation_violation = !with_tower;
  3192. if bad_vote_detected != expects_optimistic_confirmation_violation {
  3193. if bad_vote_detected {
  3194. panic!("No violation expected because of persisted tower!");
  3195. } else {
  3196. panic!("Violation expected because of removed persisted tower!");
  3197. }
  3198. } else if bad_vote_detected {
  3199. info!(
  3200. "THIS TEST expected violations. And indeed, there was some, because of removed \
  3201. persisted tower."
  3202. );
  3203. } else {
  3204. info!(
  3205. "THIS TEST expected no violation. And indeed, there was none, thanks to persisted \
  3206. tower."
  3207. );
  3208. }
  3209. }
  3210. #[test]
  3211. #[serial]
  3212. // Steps in this test:
  3213. // We want to create a situation like:
  3214. /*
  3215. 1 (2%, killed and restarted) --- 200 (37%, lighter fork)
  3216. /
  3217. 0
  3218. \-------- 4 (38%, heavier fork)
  3219. */
  3220. // where the 2% that voted on slot 1 don't see their votes land in a block
  3221. // due to blockhash expiration, and thus without resigning their votes with
  3222. // a newer blockhash, will deem slot 4 the heavier fork and try to switch to
  3223. // slot 4, which doesn't pass the switch threshold. This stalls the network.
  3224. // We do this by:
  3225. // 1) Creating a partition so all three nodes don't see each other
  3226. // 2) Kill the validator with 2%
  3227. // 3) Wait for longer than blockhash expiration
  3228. // 4) Copy in the lighter fork's blocks up, *only* up to the first slot in the lighter fork
  3229. // (not all the blocks on the lighter fork!), call this slot `L`
  3230. // 5) Restart the validator with 2% so that he votes on `L`, but the vote doesn't land
  3231. // due to blockhash expiration
  3232. // 6) Resolve the partition so that the 2% repairs the other fork, and tries to switch,
  3233. // stalling the network.
  3234. fn test_fork_choice_refresh_old_votes() {
  3235. agave_logger::setup_with_default(RUST_LOG_FILTER);
  3236. let max_switch_threshold_failure_pct = 1.0 - 2.0 * SWITCH_FORK_THRESHOLD;
  3237. let total_stake = 100 * DEFAULT_NODE_STAKE;
  3238. let max_failures_stake = (max_switch_threshold_failure_pct * total_stake as f64) as u64;
  3239. // 1% less than the failure stake, where the 2% is allocated to a validator that
  3240. // has no leader slots and thus won't be able to vote on its own fork.
  3241. let failures_stake = max_failures_stake;
  3242. let total_alive_stake = total_stake - failures_stake;
  3243. let alive_stake_1 = total_alive_stake / 2 - 1;
  3244. let alive_stake_2 = total_alive_stake - alive_stake_1 - 1;
  3245. // Heavier fork still doesn't have enough stake to switch. Both branches need
  3246. // the vote to land from the validator with `alive_stake_3` to allow the other
  3247. // fork to switch.
  3248. let alive_stake_3 = 2 * DEFAULT_NODE_STAKE;
  3249. assert!(alive_stake_1 < alive_stake_2);
  3250. assert!(alive_stake_1 + alive_stake_3 > alive_stake_2);
  3251. let num_lighter_partition_slots_per_rotation = 8;
  3252. // ratio of total number of leader slots to the number of leader slots allocated
  3253. // to the lighter partition
  3254. let total_slots_to_lighter_partition_ratio = 2;
  3255. let partitions: &[(usize, usize)] = &[
  3256. (
  3257. alive_stake_1 as usize,
  3258. num_lighter_partition_slots_per_rotation,
  3259. ),
  3260. (
  3261. alive_stake_2 as usize,
  3262. (total_slots_to_lighter_partition_ratio - 1) * num_lighter_partition_slots_per_rotation,
  3263. ),
  3264. (alive_stake_3 as usize, 0),
  3265. ];
  3266. #[derive(Default)]
  3267. struct PartitionContext {
  3268. smallest_validator_info: Option<ClusterValidatorInfo>,
  3269. lighter_fork_validator_key: Pubkey,
  3270. heaviest_validator_key: Pubkey,
  3271. first_slot_in_lighter_partition: Slot,
  3272. }
  3273. let on_partition_start = |cluster: &mut LocalCluster,
  3274. validator_keys: &[Pubkey],
  3275. _: Vec<ClusterValidatorInfo>,
  3276. context: &mut PartitionContext| {
  3277. // Kill validator with alive_stake_3, second in `partitions` slice
  3278. let smallest_validator_key = &validator_keys[3];
  3279. let info = cluster.exit_node(smallest_validator_key);
  3280. context.smallest_validator_info = Some(info);
  3281. // validator_keys[0] is the validator that will be killed, i.e. the validator with
  3282. // stake == `failures_stake`
  3283. context.lighter_fork_validator_key = validator_keys[1];
  3284. // Third in `partitions` slice
  3285. context.heaviest_validator_key = validator_keys[2];
  3286. };
  3287. let ticks_per_slot = 32;
  3288. let on_before_partition_resolved =
  3289. |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  3290. // Equal to ms_per_slot * MAX_PROCESSING_AGE, rounded up
  3291. let sleep_time_ms = ms_for_n_slots(
  3292. MAX_PROCESSING_AGE as u64 * total_slots_to_lighter_partition_ratio as u64,
  3293. ticks_per_slot,
  3294. );
  3295. info!("Wait for blockhashes to expire, {sleep_time_ms} ms");
  3296. // Wait for blockhashes to expire
  3297. sleep(Duration::from_millis(sleep_time_ms));
  3298. let smallest_validator_key = context
  3299. .smallest_validator_info
  3300. .as_ref()
  3301. .unwrap()
  3302. .info
  3303. .keypair
  3304. .pubkey();
  3305. let smallest_ledger_path = context
  3306. .smallest_validator_info
  3307. .as_ref()
  3308. .unwrap()
  3309. .info
  3310. .ledger_path
  3311. .clone();
  3312. info!(
  3313. "smallest validator key: {smallest_validator_key}, path: {smallest_ledger_path:?}"
  3314. );
  3315. let lighter_fork_ledger_path = cluster.ledger_path(&context.lighter_fork_validator_key);
  3316. let heaviest_ledger_path = cluster.ledger_path(&context.heaviest_validator_key);
  3317. // Wait for blockhashes to expire
  3318. let mut distance_from_tip: usize;
  3319. loop {
  3320. // Get latest votes. We make sure to wait until the vote has landed in
  3321. // blockstore. This is important because if we were the leader for the block there
  3322. // is a possibility of voting before broadcast has inserted in blockstore.
  3323. let lighter_fork_latest_vote = wait_for_last_vote_in_tower_to_land_in_ledger(
  3324. &lighter_fork_ledger_path,
  3325. &context.lighter_fork_validator_key,
  3326. )
  3327. .unwrap();
  3328. let heaviest_fork_latest_vote = wait_for_last_vote_in_tower_to_land_in_ledger(
  3329. &heaviest_ledger_path,
  3330. &context.heaviest_validator_key,
  3331. )
  3332. .unwrap();
  3333. // Check if sufficient blockhashes have expired on the smaller fork
  3334. {
  3335. let smallest_blockstore = open_blockstore(&smallest_ledger_path);
  3336. let lighter_fork_blockstore = open_blockstore(&lighter_fork_ledger_path);
  3337. let heaviest_blockstore = open_blockstore(&heaviest_ledger_path);
  3338. info!("Opened blockstores");
  3339. // Find the first slot on the smaller fork
  3340. let lighter_ancestors: BTreeSet<Slot> =
  3341. std::iter::once(lighter_fork_latest_vote)
  3342. .chain(AncestorIterator::new(
  3343. lighter_fork_latest_vote,
  3344. &lighter_fork_blockstore,
  3345. ))
  3346. .collect();
  3347. let heavier_ancestors: BTreeSet<Slot> =
  3348. std::iter::once(heaviest_fork_latest_vote)
  3349. .chain(AncestorIterator::new(
  3350. heaviest_fork_latest_vote,
  3351. &heaviest_blockstore,
  3352. ))
  3353. .collect();
  3354. let (different_ancestor_index, different_ancestor) = lighter_ancestors
  3355. .iter()
  3356. .enumerate()
  3357. .zip(heavier_ancestors.iter())
  3358. .find(|((_index, lighter_fork_ancestor), heavier_fork_ancestor)| {
  3359. lighter_fork_ancestor != heavier_fork_ancestor
  3360. })
  3361. .unwrap()
  3362. .0;
  3363. let last_common_ancestor_index = different_ancestor_index - 1;
  3364. // It's critical that the heavier fork has at least one vote on it.
  3365. // This is important because the smallest validator must see a vote on the heavier fork
  3366. // to avoid voting again on its own fork.
  3367. // Because we don't have a simple method of parsing blockstore for all votes, we proxy this check
  3368. // by ensuring the heavier fork was long enough to land a vote. The minimum length would be 4 more
  3369. // than the last common ancestor N, because the first vote would be made at least by N+3 (if threshold check failed on slot N+1),
  3370. // and then would land by slot N + 4.
  3371. assert!(heavier_ancestors.len() > last_common_ancestor_index + 4);
  3372. context.first_slot_in_lighter_partition = *different_ancestor;
  3373. distance_from_tip = lighter_ancestors.len() - different_ancestor_index - 1;
  3374. info!(
  3375. "Distance in number of blocks between earliest slot {} and latest slot {} \
  3376. on lighter partition is {}",
  3377. context.first_slot_in_lighter_partition,
  3378. lighter_fork_latest_vote,
  3379. distance_from_tip
  3380. );
  3381. if distance_from_tip > MAX_PROCESSING_AGE {
  3382. // Must have been updated in the above loop
  3383. assert!(context.first_slot_in_lighter_partition != 0);
  3384. info!(
  3385. "First slot in lighter partition is {}",
  3386. context.first_slot_in_lighter_partition
  3387. );
  3388. // Copy all the blocks from the smaller partition up to `first_slot_in_lighter_partition`
  3389. // into the smallest validator's blockstore so that it will attempt to refresh
  3390. copy_blocks(
  3391. lighter_fork_latest_vote,
  3392. &lighter_fork_blockstore,
  3393. &smallest_blockstore,
  3394. false,
  3395. );
  3396. // Also copy all the blocks from the heavier partition so the smallest validator will
  3397. // not vote again on its own fork
  3398. copy_blocks(
  3399. heaviest_fork_latest_vote,
  3400. &heaviest_blockstore,
  3401. &smallest_blockstore,
  3402. false,
  3403. );
  3404. // Simulate a vote for the `first_slot_in_lighter_partition`
  3405. let bank_hash = lighter_fork_blockstore
  3406. .get_bank_hash(context.first_slot_in_lighter_partition)
  3407. .unwrap();
  3408. cluster_tests::apply_votes_to_tower(
  3409. &context
  3410. .smallest_validator_info
  3411. .as_ref()
  3412. .unwrap()
  3413. .info
  3414. .keypair,
  3415. vec![(context.first_slot_in_lighter_partition, bank_hash)],
  3416. smallest_ledger_path,
  3417. );
  3418. drop(smallest_blockstore);
  3419. break;
  3420. }
  3421. }
  3422. sleep(Duration::from_millis(ms_for_n_slots(
  3423. ((MAX_PROCESSING_AGE - distance_from_tip)
  3424. * total_slots_to_lighter_partition_ratio) as u64,
  3425. ticks_per_slot,
  3426. )));
  3427. }
  3428. // Restart the smallest validator that we killed earlier in `on_partition_start()`
  3429. cluster.restart_node(
  3430. &smallest_validator_key,
  3431. context.smallest_validator_info.take().unwrap(),
  3432. SocketAddrSpace::Unspecified,
  3433. );
  3434. // Now resolve partition, allow validator to see the fork with the heavier validator,
  3435. // but the fork it's currently on is the heaviest, if only its own vote landed!
  3436. };
  3437. // Check that new roots were set after the partition resolves (gives time
  3438. // for lockouts built during partition to resolve and gives validators an opportunity
  3439. // to try and switch forks)
  3440. let on_partition_resolved = |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  3441. // Wait until a root is made past the first slot on the correct fork
  3442. cluster.check_min_slot_is_rooted(
  3443. context.first_slot_in_lighter_partition,
  3444. "test_fork_choice_refresh_old_votes",
  3445. SocketAddrSpace::Unspecified,
  3446. );
  3447. // Check that the correct fork was rooted
  3448. let heaviest_ledger_path = cluster.ledger_path(&context.heaviest_validator_key);
  3449. let heaviest_blockstore = open_blockstore(&heaviest_ledger_path);
  3450. info!(
  3451. "checking that {} was rooted in {:?}",
  3452. context.first_slot_in_lighter_partition, heaviest_ledger_path
  3453. );
  3454. assert!(heaviest_blockstore.is_root(context.first_slot_in_lighter_partition));
  3455. };
  3456. run_kill_partition_switch_threshold(
  3457. &[(failures_stake as usize - 1, 0)],
  3458. partitions,
  3459. Some(ticks_per_slot),
  3460. PartitionContext::default(),
  3461. on_partition_start,
  3462. on_before_partition_resolved,
  3463. on_partition_resolved,
  3464. );
  3465. }
  3466. #[test]
  3467. #[serial]
  3468. fn test_kill_heaviest_partition() {
  3469. // This test:
  3470. // 1) Spins up four partitions, the heaviest being the first with more stake
  3471. // 2) Schedules the other validators for sufficient slots in the schedule
  3472. // so that they will still be locked out of voting for the major partition
  3473. // when the partition resolves
  3474. // 3) Kills the most staked partition. Validators are locked out, but should all
  3475. // eventually choose the major partition
  3476. // 4) Check for recovery
  3477. let num_slots_per_validator = 8;
  3478. let partitions: [usize; 4] = [
  3479. 11 * DEFAULT_NODE_STAKE as usize,
  3480. 10 * DEFAULT_NODE_STAKE as usize,
  3481. 10 * DEFAULT_NODE_STAKE as usize,
  3482. 10 * DEFAULT_NODE_STAKE as usize,
  3483. ];
  3484. let (leader_schedule, validator_keys) = create_custom_leader_schedule_with_random_keys(&[
  3485. num_slots_per_validator * (partitions.len() - 1),
  3486. num_slots_per_validator,
  3487. num_slots_per_validator,
  3488. num_slots_per_validator,
  3489. ]);
  3490. let empty = |_: &mut LocalCluster, _: &mut ()| {};
  3491. let validator_to_kill = validator_keys[0].pubkey();
  3492. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  3493. info!("Killing validator with id: {validator_to_kill}");
  3494. cluster.exit_node(&validator_to_kill);
  3495. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  3496. };
  3497. run_cluster_partition(
  3498. &partitions,
  3499. Some((leader_schedule, validator_keys)),
  3500. (),
  3501. empty,
  3502. empty,
  3503. on_partition_resolved,
  3504. None,
  3505. vec![],
  3506. )
  3507. }
  3508. #[test]
  3509. #[serial]
  3510. fn test_kill_partition_switch_threshold_no_progress() {
  3511. let max_switch_threshold_failure_pct = 1.0 - 2.0 * SWITCH_FORK_THRESHOLD;
  3512. let total_stake = 10_000 * DEFAULT_NODE_STAKE;
  3513. let max_failures_stake = (max_switch_threshold_failure_pct * total_stake as f64) as u64;
  3514. let failures_stake = max_failures_stake;
  3515. let total_alive_stake = total_stake - failures_stake;
  3516. let alive_stake_1 = total_alive_stake / 2;
  3517. let alive_stake_2 = total_alive_stake - alive_stake_1;
  3518. // Check that no new roots were set 400 slots after partition resolves (gives time
  3519. // for lockouts built during partition to resolve and gives validators an opportunity
  3520. // to try and switch forks)
  3521. let on_partition_start =
  3522. |_: &mut LocalCluster, _: &[Pubkey], _: Vec<ClusterValidatorInfo>, _: &mut ()| {};
  3523. let on_before_partition_resolved = |_: &mut LocalCluster, _: &mut ()| {};
  3524. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  3525. cluster.check_no_new_roots(400, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  3526. };
  3527. // This kills `max_failures_stake`, so no progress should be made
  3528. run_kill_partition_switch_threshold(
  3529. &[(failures_stake as usize, 16)],
  3530. &[(alive_stake_1 as usize, 8), (alive_stake_2 as usize, 8)],
  3531. None,
  3532. (),
  3533. on_partition_start,
  3534. on_before_partition_resolved,
  3535. on_partition_resolved,
  3536. );
  3537. }
  3538. #[test]
  3539. #[serial]
  3540. fn test_kill_partition_switch_threshold_progress() {
  3541. let max_switch_threshold_failure_pct = 1.0 - 2.0 * SWITCH_FORK_THRESHOLD;
  3542. let total_stake = 10_000 * DEFAULT_NODE_STAKE;
  3543. // Kill `< max_failures_stake` of the validators
  3544. let max_failures_stake = (max_switch_threshold_failure_pct * total_stake as f64) as u64;
  3545. let failures_stake = max_failures_stake - 1;
  3546. let total_alive_stake = total_stake - failures_stake;
  3547. // Partition the remaining alive validators, should still make progress
  3548. // once the partition resolves
  3549. let alive_stake_1 = total_alive_stake / 2;
  3550. let alive_stake_2 = total_alive_stake - alive_stake_1;
  3551. let bigger = std::cmp::max(alive_stake_1, alive_stake_2);
  3552. let smaller = std::cmp::min(alive_stake_1, alive_stake_2);
  3553. // At least one of the forks must have > SWITCH_FORK_THRESHOLD in order
  3554. // to guarantee switching proofs can be created. Make sure the other fork
  3555. // is <= SWITCH_FORK_THRESHOLD to make sure progress can be made. Caches
  3556. // bugs such as liveness issues bank-weighted fork choice, which may stall
  3557. // because the fork with less stake could have more weight, but other fork would:
  3558. // 1) Not be able to generate a switching proof
  3559. // 2) Other more staked fork stops voting, so doesn't catch up in bank weight.
  3560. assert!(
  3561. bigger as f64 / total_stake as f64 > SWITCH_FORK_THRESHOLD
  3562. && smaller as f64 / total_stake as f64 <= SWITCH_FORK_THRESHOLD
  3563. );
  3564. let on_partition_start =
  3565. |_: &mut LocalCluster, _: &[Pubkey], _: Vec<ClusterValidatorInfo>, _: &mut ()| {};
  3566. let on_before_partition_resolved = |_: &mut LocalCluster, _: &mut ()| {};
  3567. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  3568. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  3569. };
  3570. run_kill_partition_switch_threshold(
  3571. &[(failures_stake as usize, 16)],
  3572. &[(alive_stake_1 as usize, 8), (alive_stake_2 as usize, 8)],
  3573. None,
  3574. (),
  3575. on_partition_start,
  3576. on_before_partition_resolved,
  3577. on_partition_resolved,
  3578. );
  3579. }
  3580. #[test]
  3581. #[serial]
  3582. #[allow(unused_attributes)]
  3583. fn test_duplicate_shreds_broadcast_leader() {
  3584. run_duplicate_shreds_broadcast_leader(true);
  3585. }
  3586. #[test]
  3587. #[serial]
  3588. #[ignore]
  3589. #[allow(unused_attributes)]
  3590. fn test_duplicate_shreds_broadcast_leader_ancestor_hashes() {
  3591. run_duplicate_shreds_broadcast_leader(false);
  3592. }
  3593. fn run_duplicate_shreds_broadcast_leader(vote_on_duplicate: bool) {
  3594. agave_logger::setup_with_default(RUST_LOG_FILTER);
  3595. // Create 4 nodes:
  3596. // 1) Bad leader sending different versions of shreds to both of the other nodes
  3597. // 2) 1 node who's voting behavior in gossip
  3598. // 3) 1 validator gets the same version as the leader, will see duplicate confirmation
  3599. // 4) 1 validator will not get the same version as the leader. For each of these
  3600. // duplicate slots `S` either:
  3601. // a) The leader's version of `S` gets > DUPLICATE_THRESHOLD of votes in gossip and so this
  3602. // node will repair that correct version
  3603. // b) A descendant `D` of some version of `S` gets > DUPLICATE_THRESHOLD votes in gossip,
  3604. // but no version of `S` does. Then the node will not know to repair the right version
  3605. // by just looking at gossip, but will instead have to use EpochSlots repair after
  3606. // detecting that a descendant does not chain to its version of `S`, and marks that descendant
  3607. // dead.
  3608. // Scenarios a) or b) are triggered by our node in 2) who's voting behavior we control.
  3609. // Critical that bad_leader_stake + good_node_stake < DUPLICATE_THRESHOLD and that
  3610. // bad_leader_stake + good_node_stake + our_node_stake > DUPLICATE_THRESHOLD so that
  3611. // our vote is the determining factor.
  3612. //
  3613. // Also critical that bad_leader_stake > 1 - DUPLICATE_THRESHOLD, so that the leader
  3614. // doesn't try and dump his own block, which will happen if:
  3615. // 1. A version is duplicate confirmed
  3616. // 2. The version they played/stored into blockstore isn't the one that is duplicated
  3617. // confirmed.
  3618. let bad_leader_stake = 10_000_000 * DEFAULT_NODE_STAKE;
  3619. // Ensure that the good_node_stake is always on the critical path, and the partition node
  3620. // should never be on the critical path. This way, none of the bad shreds sent to the partition
  3621. // node corrupt the good node.
  3622. let good_node_stake = 500 * DEFAULT_NODE_STAKE;
  3623. let our_node_stake = 10_000_000 * DEFAULT_NODE_STAKE;
  3624. let partition_node_stake = DEFAULT_NODE_STAKE;
  3625. let node_stakes = vec![
  3626. bad_leader_stake,
  3627. partition_node_stake,
  3628. good_node_stake,
  3629. // Needs to be last in the vector, so that we can
  3630. // find the id of this node. See call to `test_faulty_node`
  3631. // below for more details.
  3632. our_node_stake,
  3633. ];
  3634. assert_eq!(*node_stakes.last().unwrap(), our_node_stake);
  3635. let total_stake: u64 = node_stakes.iter().sum();
  3636. assert!(
  3637. ((bad_leader_stake + good_node_stake) as f64 / total_stake as f64) < DUPLICATE_THRESHOLD
  3638. );
  3639. assert!(
  3640. (bad_leader_stake + good_node_stake + our_node_stake) as f64 / total_stake as f64
  3641. > DUPLICATE_THRESHOLD
  3642. );
  3643. assert!((bad_leader_stake as f64 / total_stake as f64) >= 1.0 - DUPLICATE_THRESHOLD);
  3644. // Important that the partition node stake is the smallest so that it gets selected
  3645. // for the partition.
  3646. assert!(partition_node_stake < our_node_stake && partition_node_stake < good_node_stake);
  3647. let (duplicate_slot_sender, duplicate_slot_receiver) = unbounded();
  3648. // 1) Set up the cluster
  3649. let (mut cluster, validator_keys) = test_faulty_node(
  3650. BroadcastStageType::BroadcastDuplicates(BroadcastDuplicatesConfig {
  3651. partition: ClusterPartition::Stake(partition_node_stake),
  3652. duplicate_slot_sender: Some(duplicate_slot_sender),
  3653. }),
  3654. node_stakes,
  3655. None,
  3656. None,
  3657. );
  3658. // This is why it's important our node was last in `node_stakes`
  3659. let our_id = validator_keys.last().unwrap().pubkey();
  3660. // 2) Kill our node and start up a thread to simulate votes to control our voting behavior
  3661. let our_info = cluster.exit_node(&our_id);
  3662. let node_keypair = our_info.info.keypair;
  3663. let vote_keypair = our_info.info.voting_keypair;
  3664. let bad_leader_id = *cluster.entry_point_info.pubkey();
  3665. let bad_leader_ledger_path = cluster.validators[&bad_leader_id].info.ledger_path.clone();
  3666. info!("our node id: {}", node_keypair.pubkey());
  3667. // 3) Start up a gossip instance to listen for and push votes
  3668. let voter_thread_sleep_ms = 100;
  3669. let gossip_voter = cluster_tests::start_gossip_voter(
  3670. &cluster.entry_point_info.gossip().unwrap(),
  3671. &node_keypair,
  3672. move |(label, leader_vote_tx)| {
  3673. // Filter out votes not from the bad leader
  3674. if label.pubkey() == bad_leader_id {
  3675. let vote = vote_parser::parse_vote_transaction(&leader_vote_tx)
  3676. .map(|(_, vote, ..)| vote)
  3677. .unwrap();
  3678. // Filter out empty votes
  3679. if !vote.is_empty() {
  3680. Some((vote, leader_vote_tx))
  3681. } else {
  3682. None
  3683. }
  3684. } else {
  3685. None
  3686. }
  3687. },
  3688. {
  3689. let node_keypair = node_keypair.insecure_clone();
  3690. let vote_keypair = vote_keypair.insecure_clone();
  3691. let mut gossip_vote_index = 0;
  3692. let mut duplicate_slots = vec![];
  3693. move |latest_vote_slot, leader_vote_tx, parsed_vote, cluster_info| {
  3694. info!("received vote for {latest_vote_slot}");
  3695. // Add to EpochSlots. Mark all slots frozen between slot..=max_vote_slot.
  3696. let new_epoch_slots: Vec<Slot> = (0..latest_vote_slot + 1).collect();
  3697. info!("Simulating epoch slots from our node: {new_epoch_slots:?}");
  3698. cluster_info.push_epoch_slots(&new_epoch_slots);
  3699. for slot in duplicate_slot_receiver.try_iter() {
  3700. duplicate_slots.push(slot);
  3701. }
  3702. let vote_hash = parsed_vote.hash();
  3703. if vote_on_duplicate || !duplicate_slots.contains(&latest_vote_slot) {
  3704. info!(
  3705. "Simulating vote from our node on slot {latest_vote_slot}, hash \
  3706. {vote_hash}"
  3707. );
  3708. // Add all recent vote slots on this fork to allow cluster to pass
  3709. // vote threshold checks in replay. Note this will instantly force a
  3710. // root by this validator, but we're not concerned with lockout violations
  3711. // by this validator so it's fine.
  3712. let leader_blockstore = open_blockstore(&bad_leader_ledger_path);
  3713. let mut vote_slots: Vec<(Slot, u32)> =
  3714. AncestorIterator::new_inclusive(latest_vote_slot, &leader_blockstore)
  3715. .take(MAX_LOCKOUT_HISTORY)
  3716. .zip(1..)
  3717. .collect();
  3718. vote_slots.reverse();
  3719. let mut vote = TowerSync::from(vote_slots);
  3720. let root =
  3721. AncestorIterator::new_inclusive(latest_vote_slot, &leader_blockstore)
  3722. .nth(MAX_LOCKOUT_HISTORY);
  3723. vote.root = root;
  3724. vote.hash = vote_hash;
  3725. let vote_tx = vote_transaction::new_tower_sync_transaction(
  3726. vote,
  3727. leader_vote_tx.message.recent_blockhash,
  3728. &node_keypair,
  3729. &vote_keypair,
  3730. &vote_keypair,
  3731. None,
  3732. );
  3733. gossip_vote_index += 1;
  3734. gossip_vote_index %= MAX_VOTES;
  3735. cluster_info.push_vote_at_index(vote_tx, gossip_vote_index);
  3736. }
  3737. }
  3738. },
  3739. voter_thread_sleep_ms as u64,
  3740. cluster.validators.len().saturating_sub(1),
  3741. 5000, // Refresh if 5 seconds of inactivity
  3742. 5, // Refresh the past 5 votes
  3743. cluster.entry_point_info.shred_version(),
  3744. );
  3745. // 4) Check that the cluster is making progress
  3746. cluster.check_for_new_roots(
  3747. 16,
  3748. "test_duplicate_shreds_broadcast_leader",
  3749. SocketAddrSpace::Unspecified,
  3750. );
  3751. // Clean up threads
  3752. gossip_voter.close();
  3753. }
  3754. #[test]
  3755. #[serial]
  3756. #[ignore]
  3757. fn test_switch_threshold_uses_gossip_votes() {
  3758. agave_logger::setup_with_default(RUST_LOG_FILTER);
  3759. let total_stake = 100 * DEFAULT_NODE_STAKE;
  3760. // Minimum stake needed to generate a switching proof
  3761. let minimum_switch_stake = (SWITCH_FORK_THRESHOLD * total_stake as f64) as u64;
  3762. // Make the heavier stake insufficient for switching so tha the lighter validator
  3763. // cannot switch without seeing a vote from the dead/failure_stake validator.
  3764. let heavier_stake = minimum_switch_stake;
  3765. let lighter_stake = heavier_stake - 1;
  3766. let failures_stake = total_stake - heavier_stake - lighter_stake;
  3767. let partitions: &[(usize, usize)] = &[(heavier_stake as usize, 8), (lighter_stake as usize, 8)];
  3768. #[derive(Default)]
  3769. struct PartitionContext {
  3770. heaviest_validator_key: Pubkey,
  3771. lighter_validator_key: Pubkey,
  3772. dead_validator_info: Option<ClusterValidatorInfo>,
  3773. }
  3774. let on_partition_start = |_cluster: &mut LocalCluster,
  3775. validator_keys: &[Pubkey],
  3776. mut dead_validator_infos: Vec<ClusterValidatorInfo>,
  3777. context: &mut PartitionContext| {
  3778. assert_eq!(dead_validator_infos.len(), 1);
  3779. context.dead_validator_info = Some(dead_validator_infos.pop().unwrap());
  3780. // validator_keys[0] is the validator that will be killed, i.e. the validator with
  3781. // stake == `failures_stake`
  3782. context.heaviest_validator_key = validator_keys[1];
  3783. context.lighter_validator_key = validator_keys[2];
  3784. };
  3785. let on_before_partition_resolved = |_: &mut LocalCluster, _: &mut PartitionContext| {};
  3786. // Check that new roots were set after the partition resolves (gives time
  3787. // for lockouts built during partition to resolve and gives validators an opportunity
  3788. // to try and switch forks)
  3789. let on_partition_resolved = |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  3790. let lighter_validator_ledger_path = cluster.ledger_path(&context.lighter_validator_key);
  3791. let heavier_validator_ledger_path = cluster.ledger_path(&context.heaviest_validator_key);
  3792. let (lighter_validator_latest_vote, _) = last_vote_in_tower(
  3793. &lighter_validator_ledger_path,
  3794. &context.lighter_validator_key,
  3795. )
  3796. .unwrap();
  3797. info!("Lighter validator's latest vote is for slot {lighter_validator_latest_vote}");
  3798. // Lighter partition should stop voting after detecting the heavier partition and try
  3799. // to switch. Loop until we see a greater vote by the heavier validator than the last
  3800. // vote made by the lighter validator on the lighter fork.
  3801. let mut heavier_validator_latest_vote;
  3802. let mut heavier_validator_latest_vote_hash;
  3803. let heavier_blockstore = open_blockstore(&heavier_validator_ledger_path);
  3804. loop {
  3805. let (sanity_check_lighter_validator_latest_vote, _) = last_vote_in_tower(
  3806. &lighter_validator_ledger_path,
  3807. &context.lighter_validator_key,
  3808. )
  3809. .unwrap();
  3810. // Lighter validator should stop voting, because `on_partition_resolved` is only
  3811. // called after a propagation time where blocks from the other fork should have
  3812. // finished propagating
  3813. assert_eq!(
  3814. sanity_check_lighter_validator_latest_vote,
  3815. lighter_validator_latest_vote
  3816. );
  3817. let (new_heavier_validator_latest_vote, new_heavier_validator_latest_vote_hash) =
  3818. last_vote_in_tower(
  3819. &heavier_validator_ledger_path,
  3820. &context.heaviest_validator_key,
  3821. )
  3822. .unwrap();
  3823. heavier_validator_latest_vote = new_heavier_validator_latest_vote;
  3824. heavier_validator_latest_vote_hash = new_heavier_validator_latest_vote_hash;
  3825. // Latest vote for each validator should be on different forks
  3826. assert_ne!(lighter_validator_latest_vote, heavier_validator_latest_vote);
  3827. if heavier_validator_latest_vote > lighter_validator_latest_vote {
  3828. let heavier_ancestors: HashSet<Slot> =
  3829. AncestorIterator::new(heavier_validator_latest_vote, &heavier_blockstore)
  3830. .collect();
  3831. assert!(!heavier_ancestors.contains(&lighter_validator_latest_vote));
  3832. break;
  3833. }
  3834. }
  3835. info!("Checking to make sure lighter validator doesn't switch");
  3836. let mut latest_slot = lighter_validator_latest_vote;
  3837. // Number of chances the validator had to switch votes but didn't
  3838. let mut total_voting_opportunities = 0;
  3839. while total_voting_opportunities <= 5 {
  3840. let (new_latest_slot, latest_slot_ancestors) =
  3841. find_latest_replayed_slot_from_ledger(&lighter_validator_ledger_path, latest_slot);
  3842. latest_slot = new_latest_slot;
  3843. // Ensure `latest_slot` is on the other fork
  3844. if latest_slot_ancestors.contains(&heavier_validator_latest_vote) {
  3845. let tower = restore_tower(
  3846. &lighter_validator_ledger_path,
  3847. &context.lighter_validator_key,
  3848. )
  3849. .unwrap();
  3850. // Check that there was an opportunity to vote
  3851. if !tower.is_locked_out(latest_slot, &latest_slot_ancestors) {
  3852. // Ensure the lighter blockstore has not voted again
  3853. let new_lighter_validator_latest_vote = tower.last_voted_slot().unwrap();
  3854. assert_eq!(
  3855. new_lighter_validator_latest_vote,
  3856. lighter_validator_latest_vote
  3857. );
  3858. info!("Incrementing voting opportunities: {total_voting_opportunities}");
  3859. total_voting_opportunities += 1;
  3860. } else {
  3861. info!("Tower still locked out, can't vote for slot: {latest_slot}");
  3862. }
  3863. } else if latest_slot > heavier_validator_latest_vote {
  3864. warn!(
  3865. "validator is still generating blocks on its own fork, last processed slot: \
  3866. {latest_slot}"
  3867. );
  3868. }
  3869. sleep(Duration::from_millis(50));
  3870. }
  3871. // Make a vote from the killed validator for slot `heavier_validator_latest_vote` in gossip
  3872. info!("Simulate vote for slot: {heavier_validator_latest_vote} from dead validator");
  3873. let vote_keypair = &context
  3874. .dead_validator_info
  3875. .as_ref()
  3876. .unwrap()
  3877. .info
  3878. .voting_keypair
  3879. .clone();
  3880. let node_keypair = &context
  3881. .dead_validator_info
  3882. .as_ref()
  3883. .unwrap()
  3884. .info
  3885. .keypair
  3886. .clone();
  3887. cluster_tests::submit_vote_to_cluster_gossip(
  3888. node_keypair,
  3889. vote_keypair,
  3890. heavier_validator_latest_vote,
  3891. heavier_validator_latest_vote_hash,
  3892. // Make the vote transaction with a random blockhash. Thus, the vote only lives in gossip but
  3893. // never makes it into a block
  3894. Hash::new_unique(),
  3895. cluster
  3896. .get_contact_info(&context.heaviest_validator_key)
  3897. .unwrap()
  3898. .gossip()
  3899. .unwrap(),
  3900. &SocketAddrSpace::Unspecified,
  3901. )
  3902. .unwrap();
  3903. loop {
  3904. // Wait for the lighter validator to switch to the heavier fork
  3905. let (new_lighter_validator_latest_vote, _) = last_vote_in_tower(
  3906. &lighter_validator_ledger_path,
  3907. &context.lighter_validator_key,
  3908. )
  3909. .unwrap();
  3910. if new_lighter_validator_latest_vote != lighter_validator_latest_vote {
  3911. info!(
  3912. "Lighter validator switched forks at slot: {new_lighter_validator_latest_vote}"
  3913. );
  3914. let (heavier_validator_latest_vote, _) = last_vote_in_tower(
  3915. &heavier_validator_ledger_path,
  3916. &context.heaviest_validator_key,
  3917. )
  3918. .unwrap();
  3919. let (smaller, larger) =
  3920. if new_lighter_validator_latest_vote > heavier_validator_latest_vote {
  3921. (
  3922. heavier_validator_latest_vote,
  3923. new_lighter_validator_latest_vote,
  3924. )
  3925. } else {
  3926. (
  3927. new_lighter_validator_latest_vote,
  3928. heavier_validator_latest_vote,
  3929. )
  3930. };
  3931. // Check the new vote is on the same fork as the heaviest fork
  3932. let heavier_blockstore = open_blockstore(&heavier_validator_ledger_path);
  3933. let larger_slot_ancestors: HashSet<Slot> =
  3934. AncestorIterator::new(larger, &heavier_blockstore)
  3935. .chain(std::iter::once(larger))
  3936. .collect();
  3937. assert!(larger_slot_ancestors.contains(&smaller));
  3938. break;
  3939. } else {
  3940. sleep(Duration::from_millis(50));
  3941. }
  3942. }
  3943. };
  3944. let ticks_per_slot = 8;
  3945. run_kill_partition_switch_threshold(
  3946. &[(failures_stake as usize, 0)],
  3947. partitions,
  3948. Some(ticks_per_slot),
  3949. PartitionContext::default(),
  3950. on_partition_start,
  3951. on_before_partition_resolved,
  3952. on_partition_resolved,
  3953. );
  3954. }
  3955. #[test]
  3956. #[serial]
  3957. fn test_listener_startup() {
  3958. let mut config = ClusterConfig {
  3959. node_stakes: vec![DEFAULT_NODE_STAKE],
  3960. num_listeners: 3,
  3961. validator_configs: make_identical_validator_configs(
  3962. &ValidatorConfig::default_for_test(),
  3963. 1,
  3964. ),
  3965. ..ClusterConfig::default()
  3966. };
  3967. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  3968. let cluster_nodes = discover_validators(
  3969. &cluster.entry_point_info.gossip().unwrap(),
  3970. 4,
  3971. cluster.entry_point_info.shred_version(),
  3972. SocketAddrSpace::Unspecified,
  3973. )
  3974. .unwrap();
  3975. assert_eq!(cluster_nodes.len(), 4);
  3976. }
  3977. fn find_latest_replayed_slot_from_ledger(
  3978. ledger_path: &Path,
  3979. mut latest_slot: Slot,
  3980. ) -> (Slot, HashSet<Slot>) {
  3981. loop {
  3982. let mut blockstore = open_blockstore(ledger_path);
  3983. // This is kind of a hack because we can't query for new frozen blocks over RPC
  3984. // since the validator is not voting.
  3985. let new_latest_slots: Vec<Slot> = blockstore
  3986. .slot_meta_iterator(latest_slot)
  3987. .unwrap()
  3988. .filter_map(|(s, _)| if s > latest_slot { Some(s) } else { None })
  3989. .collect();
  3990. if let Some(new_latest_slot) = new_latest_slots.first() {
  3991. latest_slot = *new_latest_slot;
  3992. info!("Checking latest_slot {latest_slot}");
  3993. // Wait for the slot to be fully received by the validator
  3994. loop {
  3995. info!("Waiting for slot {latest_slot} to be full");
  3996. if blockstore.is_full(latest_slot) {
  3997. break;
  3998. } else {
  3999. sleep(Duration::from_millis(50));
  4000. blockstore = open_blockstore(ledger_path);
  4001. }
  4002. }
  4003. // Wait for the slot to be replayed
  4004. loop {
  4005. info!("Waiting for slot {latest_slot} to be replayed");
  4006. if blockstore.get_bank_hash(latest_slot).is_some() {
  4007. return (
  4008. latest_slot,
  4009. AncestorIterator::new(latest_slot, &blockstore).collect(),
  4010. );
  4011. } else {
  4012. sleep(Duration::from_millis(50));
  4013. blockstore = open_blockstore(ledger_path);
  4014. }
  4015. }
  4016. }
  4017. sleep(Duration::from_millis(50));
  4018. }
  4019. }
  4020. #[test]
  4021. #[serial]
  4022. fn test_cluster_partition_1_1() {
  4023. let empty = |_: &mut LocalCluster, _: &mut ()| {};
  4024. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  4025. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  4026. };
  4027. run_cluster_partition(
  4028. &[1, 1],
  4029. None,
  4030. (),
  4031. empty,
  4032. empty,
  4033. on_partition_resolved,
  4034. None,
  4035. vec![],
  4036. )
  4037. }
  4038. #[test]
  4039. #[serial]
  4040. fn test_cluster_partition_1_1_1() {
  4041. let empty = |_: &mut LocalCluster, _: &mut ()| {};
  4042. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  4043. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  4044. };
  4045. run_cluster_partition(
  4046. &[1, 1, 1],
  4047. None,
  4048. (),
  4049. empty,
  4050. empty,
  4051. on_partition_resolved,
  4052. None,
  4053. vec![],
  4054. )
  4055. }
  4056. #[test]
  4057. #[serial]
  4058. fn test_leader_failure_4() {
  4059. agave_logger::setup_with_default(RUST_LOG_FILTER);
  4060. error!("test_leader_failure_4");
  4061. // Cluster needs a supermajority to remain even after taking 1 node offline,
  4062. // so the minimum number of nodes for this test is 4.
  4063. let num_nodes = 4;
  4064. let mut validator_config = ValidatorConfig::default_for_test();
  4065. validator_config.wait_for_supermajority = Some(0);
  4066. // Embed vote and stake account in genesis to avoid waiting for stake
  4067. // activation and race conditions around accepting gossip votes, repairing
  4068. // blocks, etc. before we advance through too many epochs.
  4069. let validator_keys: Option<Vec<(Arc<Keypair>, bool)>> = Some(
  4070. (0..num_nodes)
  4071. .map(|_| (Arc::new(Keypair::new()), true))
  4072. .collect(),
  4073. );
  4074. // Skip the warmup slots because these short epochs can cause problems when
  4075. // bringing multiple fresh validators online that are pre-staked in genesis.
  4076. // The problems arise because we skip their leader slots while they're still
  4077. // starting up, experience partitioning, and can fail to generate leader
  4078. // schedules in time because the short epochs have the same slots per epoch
  4079. // as the total tower height, so any skipped slots can lead to not rooting,
  4080. // not generating leader schedule, and stalling the cluster.
  4081. let skip_warmup_slots = true;
  4082. let mut config = ClusterConfig {
  4083. node_stakes: vec![DEFAULT_NODE_STAKE; num_nodes],
  4084. validator_configs: make_identical_validator_configs(&validator_config, num_nodes),
  4085. validator_keys,
  4086. skip_warmup_slots,
  4087. ..ClusterConfig::default()
  4088. };
  4089. let local = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  4090. cluster_tests::kill_entry_and_spend_and_verify_rest(
  4091. &local.entry_point_info,
  4092. &local
  4093. .validators
  4094. .get(local.entry_point_info.pubkey())
  4095. .unwrap()
  4096. .config
  4097. .validator_exit,
  4098. &local.funding_keypair,
  4099. &local.connection_cache,
  4100. num_nodes,
  4101. config.ticks_per_slot * config.poh_config.target_tick_duration.as_millis() as u64,
  4102. SocketAddrSpace::Unspecified,
  4103. );
  4104. }
  4105. // This test verifies that even if votes from a validator end up taking too long to land, and thus
  4106. // some of the referenced slots are slots are no longer present in the slot hashes sysvar,
  4107. // consensus can still be attained.
  4108. //
  4109. // Validator A (60%)
  4110. // Validator B (40%)
  4111. // / --- 10 --- [..] --- 16 (B is voting, due to network issues is initially not able to see the other fork at all)
  4112. // /
  4113. // 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 (A votes 1 - 9 votes are landing normally. B does the same however votes are not landing)
  4114. // \
  4115. // \--[..]-- 73 (majority fork)
  4116. // A is voting on the majority fork and B wants to switch to this fork however in this majority fork
  4117. // the earlier votes for B (1 - 9) never landed so when B eventually goes to vote on 73, slots in
  4118. // its local vote state are no longer present in slot hashes.
  4119. //
  4120. // 1. Wait for B's tower to see local vote state was updated to new fork
  4121. // 2. Wait X blocks, check B's vote state on chain has been properly updated
  4122. //
  4123. // NOTE: it is not reliable for B to organically have 1 to reach 2^16 lockout, so we simulate the 6
  4124. // consecutive votes on the minor fork by manually incrementing the confirmation levels for the
  4125. // common ancestor votes in tower.
  4126. // To allow this test to run in a reasonable time we change the
  4127. // slot_hash expiry to 64 slots.
  4128. #[test]
  4129. #[serial]
  4130. fn test_slot_hash_expiry() {
  4131. agave_logger::setup_with_default(RUST_LOG_FILTER);
  4132. solana_slot_hashes::set_entries_for_tests_only(64);
  4133. let slots_per_epoch = 2048;
  4134. let node_stakes = vec![60 * DEFAULT_NODE_STAKE, 40 * DEFAULT_NODE_STAKE];
  4135. let validator_keys = [
  4136. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  4137. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  4138. ]
  4139. .iter()
  4140. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  4141. .collect::<Vec<_>>();
  4142. let node_vote_keys = [
  4143. "3NDQ3ud86RTVg8hTy2dDWnS4P8NfjhZ2gDgQAJbr3heaKaUVS1FW3sTLKA1GmDrY9aySzsa4QxpDkbLv47yHxzr3",
  4144. "46ZHpHE6PEvXYPu3hf9iQqjBk2ZNDaJ9ejqKWHEjxaQjpAGasKaWKbKHbP3646oZhfgDRzx95DH9PCBKKsoCVngk",
  4145. ]
  4146. .iter()
  4147. .map(|s| Arc::new(Keypair::from_base58_string(s)))
  4148. .collect::<Vec<_>>();
  4149. let vs = validator_keys
  4150. .iter()
  4151. .map(|(kp, _)| kp.pubkey())
  4152. .collect::<Vec<_>>();
  4153. let (a_pubkey, b_pubkey) = (vs[0], vs[1]);
  4154. // We want B to not vote (we are trying to simulate its votes not landing until it gets to the
  4155. // minority fork)
  4156. let mut validator_config = ValidatorConfig::default_for_test();
  4157. validator_config.wait_for_supermajority = Some(0);
  4158. let mut validator_configs =
  4159. make_identical_validator_configs(&validator_config, node_stakes.len());
  4160. validator_configs[1].voting_disabled = true;
  4161. let mut config = ClusterConfig {
  4162. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  4163. node_stakes,
  4164. validator_configs,
  4165. validator_keys: Some(validator_keys),
  4166. node_vote_keys: Some(node_vote_keys),
  4167. slots_per_epoch,
  4168. stakers_slot_offset: slots_per_epoch,
  4169. skip_warmup_slots: true,
  4170. ..ClusterConfig::default()
  4171. };
  4172. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  4173. let mut common_ancestor_slot = 8;
  4174. let a_ledger_path = cluster.ledger_path(&a_pubkey);
  4175. let b_ledger_path = cluster.ledger_path(&b_pubkey);
  4176. // Immediately kill B (we just needed it for the initial stake distribution)
  4177. info!("Killing B");
  4178. let mut b_info = cluster.exit_node(&b_pubkey);
  4179. // Let A run for a while until we get to the common ancestor
  4180. info!("Letting A run until common_ancestor_slot");
  4181. loop {
  4182. if let Some((last_vote, _)) = last_vote_in_tower(&a_ledger_path, &a_pubkey) {
  4183. if last_vote >= common_ancestor_slot {
  4184. break;
  4185. }
  4186. }
  4187. sleep(Duration::from_millis(100));
  4188. }
  4189. // Keep A running, but setup B so that it thinks it has voted up until common ancestor (but
  4190. // doesn't know anything past that)
  4191. {
  4192. info!("Copying A's ledger to B");
  4193. std::fs::remove_dir_all(&b_info.info.ledger_path).unwrap();
  4194. let mut opt = fs_extra::dir::CopyOptions::new();
  4195. opt.copy_inside = true;
  4196. fs_extra::dir::copy(&a_ledger_path, &b_ledger_path, &opt).unwrap();
  4197. // remove A's tower in B's new copied ledger
  4198. info!("Removing A's tower in B's ledger dir");
  4199. remove_tower(&b_ledger_path, &a_pubkey);
  4200. // load A's tower and save it as B's tower
  4201. info!("Loading A's tower");
  4202. if let Some(mut a_tower) = restore_tower(&a_ledger_path, &a_pubkey) {
  4203. a_tower.node_pubkey = b_pubkey;
  4204. // Update common_ancestor_slot because A is still running
  4205. if let Some(s) = a_tower.last_voted_slot() {
  4206. common_ancestor_slot = s;
  4207. info!("New common_ancestor_slot {common_ancestor_slot}");
  4208. } else {
  4209. panic!("A's tower has no votes");
  4210. }
  4211. info!("Increase lockout by 6 confirmation levels and save as B's tower");
  4212. a_tower.increase_lockout(6);
  4213. save_tower(&b_ledger_path, &a_tower, &b_info.info.keypair);
  4214. info!("B's new tower: {:?}", a_tower.tower_slots());
  4215. } else {
  4216. panic!("A's tower is missing");
  4217. }
  4218. // Get rid of any slots past common_ancestor_slot
  4219. info!("Removing extra slots from B's blockstore");
  4220. let blockstore = open_blockstore(&b_ledger_path);
  4221. purge_slots_with_count(&blockstore, common_ancestor_slot + 1, 100);
  4222. }
  4223. info!(
  4224. "Run A on majority fork until it reaches slot hash expiry {}",
  4225. solana_slot_hashes::get_entries()
  4226. );
  4227. let mut last_vote_on_a;
  4228. // Keep A running for a while longer so the majority fork has some decent size
  4229. loop {
  4230. last_vote_on_a =
  4231. wait_for_last_vote_in_tower_to_land_in_ledger(&a_ledger_path, &a_pubkey).unwrap();
  4232. if last_vote_on_a >= common_ancestor_slot + 2 * (solana_slot_hashes::get_entries() as u64) {
  4233. let blockstore = open_blockstore(&a_ledger_path);
  4234. info!(
  4235. "A majority fork: {:?}",
  4236. AncestorIterator::new(last_vote_on_a, &blockstore).collect::<Vec<Slot>>()
  4237. );
  4238. break;
  4239. }
  4240. sleep(Duration::from_millis(100));
  4241. }
  4242. // Kill A and restart B with voting. B should now fork off
  4243. info!("Killing A");
  4244. let a_info = cluster.exit_node(&a_pubkey);
  4245. info!("Restarting B");
  4246. b_info.config.voting_disabled = false;
  4247. cluster.restart_node(&b_pubkey, b_info, SocketAddrSpace::Unspecified);
  4248. // B will fork off and accumulate enough lockout
  4249. info!("Allowing B to fork");
  4250. loop {
  4251. let blockstore = open_blockstore(&b_ledger_path);
  4252. let last_vote =
  4253. wait_for_last_vote_in_tower_to_land_in_ledger(&b_ledger_path, &b_pubkey).unwrap();
  4254. let mut ancestors = AncestorIterator::new(last_vote, &blockstore);
  4255. if let Some(index) = ancestors.position(|x| x == common_ancestor_slot) {
  4256. if index > 7 {
  4257. info!(
  4258. "B has forked for enough lockout: {:?}",
  4259. AncestorIterator::new(last_vote, &blockstore).collect::<Vec<Slot>>()
  4260. );
  4261. break;
  4262. }
  4263. }
  4264. sleep(Duration::from_millis(1000));
  4265. }
  4266. info!("Kill B");
  4267. b_info = cluster.exit_node(&b_pubkey);
  4268. info!("Resolve the partition");
  4269. {
  4270. // Here we let B know about the missing blocks that A had produced on its partition
  4271. let a_blockstore = open_blockstore(&a_ledger_path);
  4272. let b_blockstore = open_blockstore(&b_ledger_path);
  4273. copy_blocks(last_vote_on_a, &a_blockstore, &b_blockstore, false);
  4274. }
  4275. // Now restart A and B and see if B is able to eventually switch onto the majority fork
  4276. info!("Restarting A & B");
  4277. cluster.restart_node(&a_pubkey, a_info, SocketAddrSpace::Unspecified);
  4278. cluster.restart_node(&b_pubkey, b_info, SocketAddrSpace::Unspecified);
  4279. info!("Waiting for B to switch to majority fork and make a root");
  4280. cluster_tests::check_for_new_roots(
  4281. 16,
  4282. &[cluster.get_contact_info(&a_pubkey).unwrap().clone()],
  4283. &cluster.connection_cache,
  4284. "test_slot_hashes_expiry",
  4285. );
  4286. }
  4287. // This test simulates a case where a leader sends a duplicate block with different ancestry. One
  4288. // version builds off of the rooted path, however the other version builds off a pruned branch. The
  4289. // validators that receive the pruned version will need to repair in order to continue, which
  4290. // requires an ancestor hashes repair.
  4291. //
  4292. // We setup 3 validators:
  4293. // - majority, will produce the rooted path
  4294. // - minority, will produce the pruned path
  4295. // - our_node, will be fed the pruned version of the duplicate block and need to repair
  4296. //
  4297. // Additionally we setup 3 observer nodes to propagate votes and participate in the ancestor hashes
  4298. // sample.
  4299. //
  4300. // Fork structure:
  4301. //
  4302. // 0 - 1 - ... - 10 (fork slot) - 30 - ... - 61 (rooted path) - ...
  4303. // |
  4304. // |- 11 - ... - 29 (pruned path) - 81'
  4305. //
  4306. //
  4307. // Steps:
  4308. // 1) Different leader schedule, minority thinks it produces 0-29 and majority rest, majority
  4309. // thinks minority produces all blocks. This is to avoid majority accidentally producing blocks
  4310. // before it shuts down.
  4311. // 2) Start cluster, kill our_node.
  4312. // 3) Kill majority cluster after it votes for any slot > fork slot (guarantees that the fork slot is
  4313. // reached as minority cannot pass threshold otherwise).
  4314. // 4) Let minority produce forks on pruned forks until out of leader slots then kill.
  4315. // 5) Truncate majority ledger past fork slot so it starts building off of fork slot.
  4316. // 6) Restart majority and wait until it starts producing blocks on main fork and roots something
  4317. // past the fork slot.
  4318. // 7) Construct our ledger by copying majority ledger and copying blocks from minority for the pruned path.
  4319. // 8) In our node's ledger, change the parent of the latest slot in majority fork to be the latest
  4320. // slot in the minority fork (simulates duplicate built off of pruned block)
  4321. // 9) Start our node which will pruned the minority fork on ledger replay and verify that we can make roots.
  4322. //
  4323. #[test]
  4324. #[serial]
  4325. #[ignore]
  4326. fn test_duplicate_with_pruned_ancestor() {
  4327. agave_logger::setup_with("info,solana_metrics=off");
  4328. solana_core::repair::duplicate_repair_status::set_ancestor_hash_repair_sample_size_for_tests_only(3);
  4329. let majority_leader_stake = 10_000_000 * DEFAULT_NODE_STAKE;
  4330. let minority_leader_stake = 2_000_000 * DEFAULT_NODE_STAKE;
  4331. let our_node = DEFAULT_NODE_STAKE;
  4332. let observer_stake = DEFAULT_NODE_STAKE;
  4333. let slots_per_epoch = 2048;
  4334. let fork_slot: u64 = 10;
  4335. let fork_length: u64 = 20;
  4336. let majority_fork_buffer = 5;
  4337. let mut node_stakes = vec![majority_leader_stake, minority_leader_stake, our_node];
  4338. // We need enough observers to reach `ANCESTOR_HASH_REPAIR_SAMPLE_SIZE`
  4339. node_stakes.append(&mut vec![observer_stake; 3]);
  4340. let num_nodes = node_stakes.len();
  4341. let validator_keys = [
  4342. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  4343. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  4344. "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye",
  4345. ]
  4346. .iter()
  4347. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  4348. .chain(std::iter::repeat_with(|| (Arc::new(Keypair::new()), true)))
  4349. .take(node_stakes.len())
  4350. .collect::<Vec<_>>();
  4351. let validators = validator_keys
  4352. .iter()
  4353. .map(|(kp, _)| kp.pubkey())
  4354. .collect::<Vec<_>>();
  4355. let (majority_pubkey, minority_pubkey, our_node_pubkey) =
  4356. (validators[0], validators[1], validators[2]);
  4357. let mut default_config = ValidatorConfig::default_for_test();
  4358. // Minority fork is leader long enough to create pruned fork
  4359. let validator_to_slots = vec![
  4360. (minority_pubkey, (fork_slot + fork_length) as usize),
  4361. (majority_pubkey, slots_per_epoch as usize),
  4362. ];
  4363. let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter());
  4364. default_config.fixed_leader_schedule = Some(FixedSchedule {
  4365. leader_schedule: Arc::new(leader_schedule),
  4366. });
  4367. let mut validator_configs = make_identical_validator_configs(&default_config, num_nodes);
  4368. // Don't let majority produce anything past the fork by tricking its leader schedule
  4369. validator_configs[0].fixed_leader_schedule = Some(FixedSchedule {
  4370. leader_schedule: Arc::new(create_custom_leader_schedule(
  4371. [(minority_pubkey, slots_per_epoch as usize)].into_iter(),
  4372. )),
  4373. });
  4374. let mut config = ClusterConfig {
  4375. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  4376. node_stakes,
  4377. validator_configs,
  4378. validator_keys: Some(validator_keys),
  4379. slots_per_epoch,
  4380. stakers_slot_offset: slots_per_epoch,
  4381. skip_warmup_slots: true,
  4382. ..ClusterConfig::default()
  4383. };
  4384. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  4385. let majority_ledger_path = cluster.ledger_path(&majority_pubkey);
  4386. let minority_ledger_path = cluster.ledger_path(&minority_pubkey);
  4387. let our_node_ledger_path = cluster.ledger_path(&our_node_pubkey);
  4388. info!("majority {majority_pubkey} ledger path {majority_ledger_path:?}");
  4389. info!("minority {minority_pubkey} ledger path {minority_ledger_path:?}");
  4390. info!("our_node {our_node_pubkey} ledger path {our_node_ledger_path:?}");
  4391. info!("Killing our node");
  4392. let our_node_info = cluster.exit_node(&our_node_pubkey);
  4393. info!("Waiting on majority validator to vote on at least {fork_slot}");
  4394. let now = Instant::now();
  4395. let mut last_majority_vote = 0;
  4396. loop {
  4397. let elapsed = now.elapsed();
  4398. assert!(
  4399. elapsed <= Duration::from_secs(30),
  4400. "Majority validator failed to vote on a slot >= {fork_slot} in {} secs, majority \
  4401. validator last vote: {last_majority_vote}",
  4402. elapsed.as_secs(),
  4403. );
  4404. sleep(Duration::from_millis(100));
  4405. if let Some((last_vote, _)) = last_vote_in_tower(&majority_ledger_path, &majority_pubkey) {
  4406. last_majority_vote = last_vote;
  4407. if last_vote >= fork_slot {
  4408. break;
  4409. }
  4410. }
  4411. }
  4412. info!("Killing majority validator, waiting for minority fork to reach a depth of at least 15");
  4413. let mut majority_validator_info = cluster.exit_node(&majority_pubkey);
  4414. let now = Instant::now();
  4415. let mut last_minority_vote = 0;
  4416. while last_minority_vote < fork_slot + 15 {
  4417. let elapsed = now.elapsed();
  4418. assert!(
  4419. elapsed <= Duration::from_secs(30),
  4420. "Minority validator failed to create a fork of depth >= {} in 15 secs, \
  4421. last_minority_vote: {last_minority_vote}",
  4422. elapsed.as_secs(),
  4423. );
  4424. if let Some((last_vote, _)) = last_vote_in_tower(&minority_ledger_path, &minority_pubkey) {
  4425. last_minority_vote = last_vote;
  4426. }
  4427. }
  4428. info!("Killing minority validator, fork created successfully: {last_minority_vote:?}");
  4429. let last_minority_vote =
  4430. wait_for_last_vote_in_tower_to_land_in_ledger(&minority_ledger_path, &minority_pubkey)
  4431. .unwrap();
  4432. let minority_validator_info = cluster.exit_node(&minority_pubkey);
  4433. info!("Truncating majority validator ledger to {fork_slot}");
  4434. {
  4435. remove_tower(&majority_ledger_path, &majority_pubkey);
  4436. let blockstore = open_blockstore(&majority_ledger_path);
  4437. purge_slots_with_count(&blockstore, fork_slot + 1, 100);
  4438. }
  4439. info!("Restarting majority validator");
  4440. // Make sure we don't send duplicate votes
  4441. majority_validator_info.config.wait_to_vote_slot = Some(fork_slot + fork_length);
  4442. // Fix the leader schedule so we can produce blocks
  4443. majority_validator_info
  4444. .config
  4445. .fixed_leader_schedule
  4446. .clone_from(&minority_validator_info.config.fixed_leader_schedule);
  4447. cluster.restart_node(
  4448. &majority_pubkey,
  4449. majority_validator_info,
  4450. SocketAddrSpace::Unspecified,
  4451. );
  4452. let mut last_majority_root = 0;
  4453. let now = Instant::now();
  4454. info!(
  4455. "Waiting for majority validator to root something past {}",
  4456. fork_slot + fork_length + majority_fork_buffer
  4457. );
  4458. while last_majority_root <= fork_slot + fork_length + majority_fork_buffer {
  4459. let elapsed = now.elapsed();
  4460. assert!(
  4461. elapsed <= Duration::from_secs(60),
  4462. "Majority validator failed to root something > {} in {} secs, last majority validator \
  4463. vote: {last_majority_vote}",
  4464. fork_slot + fork_length + majority_fork_buffer,
  4465. elapsed.as_secs(),
  4466. );
  4467. sleep(Duration::from_millis(100));
  4468. if let Some(last_root) = last_root_in_tower(&majority_ledger_path, &majority_pubkey) {
  4469. last_majority_root = last_root;
  4470. }
  4471. }
  4472. let last_majority_vote =
  4473. wait_for_last_vote_in_tower_to_land_in_ledger(&majority_ledger_path, &majority_pubkey)
  4474. .unwrap();
  4475. info!(
  4476. "Creating duplicate block built off of pruned branch for our node. Last majority vote \
  4477. {last_majority_vote}, Last minority vote {last_minority_vote}"
  4478. );
  4479. {
  4480. {
  4481. // Copy majority fork
  4482. std::fs::remove_dir_all(&our_node_info.info.ledger_path).unwrap();
  4483. let mut opt = fs_extra::dir::CopyOptions::new();
  4484. opt.copy_inside = true;
  4485. fs_extra::dir::copy(&majority_ledger_path, &our_node_ledger_path, &opt).unwrap();
  4486. remove_tower(&our_node_ledger_path, &majority_pubkey);
  4487. }
  4488. // Copy minority fork to our blockstore
  4489. // Set trusted=true in blockstore copy to skip the parent slot >= latest root check;
  4490. // this check would otherwise prevent the pruned fork from being inserted
  4491. let minority_blockstore = open_blockstore(&minority_validator_info.info.ledger_path);
  4492. let our_blockstore = open_blockstore(&our_node_info.info.ledger_path);
  4493. copy_blocks(
  4494. last_minority_vote,
  4495. &minority_blockstore,
  4496. &our_blockstore,
  4497. true,
  4498. );
  4499. // Change last block parent to chain off of (purged) minority fork
  4500. info!("For our node, changing parent of {last_majority_vote} to {last_minority_vote}");
  4501. our_blockstore.clear_unconfirmed_slot(last_majority_vote);
  4502. let entries = create_ticks(
  4503. 64 * (std::cmp::max(1, last_majority_vote - last_minority_vote)),
  4504. 0,
  4505. Hash::default(),
  4506. );
  4507. let shreds =
  4508. entries_to_test_shreds(&entries, last_majority_vote, last_minority_vote, true, 0);
  4509. our_blockstore.insert_shreds(shreds, None, false).unwrap();
  4510. }
  4511. // Actual test, `our_node` will replay the minority fork, then the majority fork which will
  4512. // prune the minority fork. Then finally the problematic block will be skipped (not replayed)
  4513. // because its parent has been pruned from bank forks. Meanwhile the majority validator has
  4514. // continued making blocks and voting, duplicate confirming everything. This will cause the
  4515. // pruned fork to become popular triggering an ancestor hashes repair, eventually allowing our
  4516. // node to dump & repair & continue making roots.
  4517. info!("Restarting our node, verifying that our node is making roots past the duplicate block");
  4518. cluster.restart_node(
  4519. &our_node_pubkey,
  4520. our_node_info,
  4521. SocketAddrSpace::Unspecified,
  4522. );
  4523. cluster_tests::check_for_new_roots(
  4524. 16,
  4525. &[cluster.get_contact_info(&our_node_pubkey).unwrap().clone()],
  4526. &cluster.connection_cache,
  4527. "test_duplicate_with_pruned_ancestor",
  4528. );
  4529. }
  4530. /// Test fastboot to ensure a node can boot from local state and still produce correct snapshots
  4531. ///
  4532. /// 1. Start node 1 and wait for it to take snapshots
  4533. /// 2. Start node 2 with the snapshots from (1)
  4534. /// 3. Wait for node 2 to take a bank snapshot
  4535. /// 4. Restart node 2 with the local state from (3)
  4536. /// 5. Wait for node 2 to take new snapshots
  4537. /// 6. Start node 3 with the snapshots from (5)
  4538. /// 7. Wait for node 3 to take new snapshots
  4539. /// 8. Ensure the snapshots from (7) match node's 1 and 2
  4540. #[test]
  4541. #[serial]
  4542. fn test_boot_from_local_state() {
  4543. agave_logger::setup_with_default("error,local_cluster=info");
  4544. const FULL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4545. SnapshotInterval::Slots(NonZeroU64::new(100).unwrap());
  4546. const INCREMENTAL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4547. SnapshotInterval::Slots(NonZeroU64::new(10).unwrap());
  4548. let validator1_config =
  4549. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 2);
  4550. let validator2_config =
  4551. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 4);
  4552. let validator3_config =
  4553. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 3);
  4554. let mut cluster_config = ClusterConfig {
  4555. node_stakes: vec![100 * DEFAULT_NODE_STAKE],
  4556. validator_configs: make_identical_validator_configs(&validator1_config.validator_config, 1),
  4557. ..ClusterConfig::default()
  4558. };
  4559. let mut cluster = LocalCluster::new(&mut cluster_config, SocketAddrSpace::Unspecified);
  4560. // in order to boot from local state, need to first have snapshot archives
  4561. info!("Waiting for validator1 to create snapshots...");
  4562. let (incremental_snapshot_archive, full_snapshot_archive) =
  4563. LocalCluster::wait_for_next_incremental_snapshot(
  4564. &cluster,
  4565. &validator1_config.full_snapshot_archives_dir,
  4566. &validator1_config.incremental_snapshot_archives_dir,
  4567. Some(Duration::from_secs(5 * 60)),
  4568. );
  4569. debug!(
  4570. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4571. {incremental_snapshot_archive:?}"
  4572. );
  4573. info!("Waiting for validator1 to create snapshots... DONE");
  4574. info!("Copying snapshots to validator2...");
  4575. std::fs::copy(
  4576. full_snapshot_archive.path(),
  4577. validator2_config
  4578. .full_snapshot_archives_dir
  4579. .path()
  4580. .join(full_snapshot_archive.path().file_name().unwrap()),
  4581. )
  4582. .unwrap();
  4583. std::fs::copy(
  4584. incremental_snapshot_archive.path(),
  4585. validator2_config
  4586. .incremental_snapshot_archives_dir
  4587. .path()
  4588. .join(incremental_snapshot_archive.path().file_name().unwrap()),
  4589. )
  4590. .unwrap();
  4591. info!("Copying snapshots to validator2... DONE");
  4592. info!("Starting validator2...");
  4593. let validator2_identity = Arc::new(Keypair::new());
  4594. cluster.add_validator(
  4595. &validator2_config.validator_config,
  4596. DEFAULT_NODE_STAKE,
  4597. validator2_identity.clone(),
  4598. None,
  4599. SocketAddrSpace::Unspecified,
  4600. );
  4601. info!("Starting validator2... DONE");
  4602. // wait for a new bank snapshot to fastboot from that is newer than its snapshot archives
  4603. info!("Waiting for validator2 to create a new bank snapshot...");
  4604. let timer = Instant::now();
  4605. let bank_snapshot = loop {
  4606. if let Some(bank_snapshot) =
  4607. snapshot_utils::get_highest_bank_snapshot(&validator2_config.bank_snapshots_dir)
  4608. {
  4609. if bank_snapshot.slot > incremental_snapshot_archive.slot() {
  4610. break bank_snapshot;
  4611. }
  4612. }
  4613. assert!(
  4614. timer.elapsed() < Duration::from_secs(30),
  4615. "It should not take longer than 30 seconds to create a new bank snapshot"
  4616. );
  4617. std::thread::yield_now();
  4618. };
  4619. debug!("bank snapshot: {bank_snapshot:?}");
  4620. info!("Waiting for validator2 to create a new bank snapshot... DONE");
  4621. // restart WITH fastboot
  4622. info!("Restarting validator2 from local state...");
  4623. let mut validator2_info = cluster.exit_node(&validator2_identity.pubkey());
  4624. validator2_info.config.use_snapshot_archives_at_startup = UseSnapshotArchivesAtStartup::Never;
  4625. cluster.restart_node(
  4626. &validator2_identity.pubkey(),
  4627. validator2_info,
  4628. SocketAddrSpace::Unspecified,
  4629. );
  4630. info!("Restarting validator2 from local state... DONE");
  4631. info!("Waiting for validator2 to create snapshots...");
  4632. let (incremental_snapshot_archive, full_snapshot_archive) =
  4633. LocalCluster::wait_for_next_incremental_snapshot(
  4634. &cluster,
  4635. &validator2_config.full_snapshot_archives_dir,
  4636. &validator2_config.incremental_snapshot_archives_dir,
  4637. Some(Duration::from_secs(5 * 60)),
  4638. );
  4639. debug!(
  4640. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4641. {incremental_snapshot_archive:?}"
  4642. );
  4643. info!("Waiting for validator2 to create snapshots... DONE");
  4644. info!("Copying snapshots to validator3...");
  4645. std::fs::copy(
  4646. full_snapshot_archive.path(),
  4647. validator3_config
  4648. .full_snapshot_archives_dir
  4649. .path()
  4650. .join(full_snapshot_archive.path().file_name().unwrap()),
  4651. )
  4652. .unwrap();
  4653. std::fs::copy(
  4654. incremental_snapshot_archive.path(),
  4655. validator3_config
  4656. .incremental_snapshot_archives_dir
  4657. .path()
  4658. .join(incremental_snapshot_archive.path().file_name().unwrap()),
  4659. )
  4660. .unwrap();
  4661. info!("Copying snapshots to validator3... DONE");
  4662. info!("Starting validator3...");
  4663. let validator3_identity = Arc::new(Keypair::new());
  4664. cluster.add_validator(
  4665. &validator3_config.validator_config,
  4666. DEFAULT_NODE_STAKE,
  4667. validator3_identity,
  4668. None,
  4669. SocketAddrSpace::Unspecified,
  4670. );
  4671. info!("Starting validator3... DONE");
  4672. // wait for a new snapshot to ensure the validator is making roots
  4673. info!("Waiting for validator3 to create snapshots...");
  4674. let (incremental_snapshot_archive, full_snapshot_archive) =
  4675. LocalCluster::wait_for_next_incremental_snapshot(
  4676. &cluster,
  4677. &validator3_config.full_snapshot_archives_dir,
  4678. &validator3_config.incremental_snapshot_archives_dir,
  4679. Some(Duration::from_secs(5 * 60)),
  4680. );
  4681. debug!(
  4682. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4683. {incremental_snapshot_archive:?}"
  4684. );
  4685. info!("Waiting for validator3 to create snapshots... DONE");
  4686. // Ensure that all validators have the correct state by comparing snapshots.
  4687. // Since validator1 has been running the longest, if may be ahead of the others,
  4688. // so use it as the comparison for others.
  4689. // - wait for validator1 to take new snapshots
  4690. // - wait for the other validators to have high enough snapshots
  4691. // - ensure the other validators' snapshots match validator1's
  4692. //
  4693. // NOTE: There's a chance validator 2 or 3 has crossed the next full snapshot past what
  4694. // validator 1 has. If that happens, validator 2 or 3 may have purged the snapshots needed
  4695. // to compare with validator 1, and thus assert. If that happens, the full snapshot interval
  4696. // may need to be adjusted larger.
  4697. info!("Waiting for validator1 to create snapshots...");
  4698. let (incremental_snapshot_archive, full_snapshot_archive) =
  4699. LocalCluster::wait_for_next_incremental_snapshot(
  4700. &cluster,
  4701. &validator1_config.full_snapshot_archives_dir,
  4702. &validator1_config.incremental_snapshot_archives_dir,
  4703. Some(Duration::from_secs(5 * 60)),
  4704. );
  4705. debug!(
  4706. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4707. {incremental_snapshot_archive:?}"
  4708. );
  4709. info!("Waiting for validator1 to create snapshots... DONE");
  4710. // These structs are used to provide better error logs if the asserts below are violated.
  4711. // The `allow(dead_code)` annotation is to appease clippy, which thinks the field is unused...
  4712. #[allow(dead_code)]
  4713. #[derive(Debug)]
  4714. struct SnapshotSlot(Slot);
  4715. #[allow(dead_code)]
  4716. #[derive(Debug)]
  4717. struct BaseSlot(Slot);
  4718. for (i, other_validator_config) in [(2, &validator2_config), (3, &validator3_config)] {
  4719. info!("Checking if validator{i} has the same snapshots as validator1...");
  4720. let timer = Instant::now();
  4721. loop {
  4722. if let Some(other_full_snapshot_slot) =
  4723. snapshot_utils::get_highest_full_snapshot_archive_slot(
  4724. &other_validator_config.full_snapshot_archives_dir,
  4725. )
  4726. {
  4727. let other_incremental_snapshot_slot =
  4728. snapshot_utils::get_highest_incremental_snapshot_archive_slot(
  4729. &other_validator_config.incremental_snapshot_archives_dir,
  4730. other_full_snapshot_slot,
  4731. );
  4732. if other_full_snapshot_slot >= full_snapshot_archive.slot()
  4733. && other_incremental_snapshot_slot >= Some(incremental_snapshot_archive.slot())
  4734. {
  4735. break;
  4736. }
  4737. }
  4738. assert!(
  4739. timer.elapsed() < Duration::from_secs(60),
  4740. "It should not take longer than 60 seconds to take snapshots",
  4741. );
  4742. std::thread::yield_now();
  4743. }
  4744. let other_full_snapshot_archives = snapshot_utils::get_full_snapshot_archives(
  4745. &other_validator_config.full_snapshot_archives_dir,
  4746. );
  4747. debug!("validator{i} full snapshot archives: {other_full_snapshot_archives:?}");
  4748. assert!(
  4749. other_full_snapshot_archives
  4750. .iter()
  4751. .any(
  4752. |other_full_snapshot_archive| other_full_snapshot_archive.slot()
  4753. == full_snapshot_archive.slot()
  4754. && other_full_snapshot_archive.hash() == full_snapshot_archive.hash()
  4755. ),
  4756. "full snapshot archive does not match!\n validator1: {:?}\n validator{i}: {:?}",
  4757. (
  4758. SnapshotSlot(full_snapshot_archive.slot()),
  4759. full_snapshot_archive.hash(),
  4760. ),
  4761. other_full_snapshot_archives
  4762. .iter()
  4763. .sorted_unstable()
  4764. .rev()
  4765. .map(|snap| (SnapshotSlot(snap.slot()), snap.hash()))
  4766. .collect::<Vec<_>>(),
  4767. );
  4768. let other_incremental_snapshot_archives = snapshot_utils::get_incremental_snapshot_archives(
  4769. &other_validator_config.incremental_snapshot_archives_dir,
  4770. );
  4771. debug!(
  4772. "validator{i} incremental snapshot archives: {other_incremental_snapshot_archives:?}"
  4773. );
  4774. assert!(
  4775. other_incremental_snapshot_archives
  4776. .iter()
  4777. .any(
  4778. |other_incremental_snapshot_archive| other_incremental_snapshot_archive
  4779. .base_slot()
  4780. == incremental_snapshot_archive.base_slot()
  4781. && other_incremental_snapshot_archive.slot()
  4782. == incremental_snapshot_archive.slot()
  4783. && other_incremental_snapshot_archive.hash()
  4784. == incremental_snapshot_archive.hash()
  4785. ),
  4786. "incremental snapshot archive does not match!\n validator1: {:?}\n validator{i}: \
  4787. {:?}",
  4788. (
  4789. BaseSlot(incremental_snapshot_archive.base_slot()),
  4790. SnapshotSlot(incremental_snapshot_archive.slot()),
  4791. incremental_snapshot_archive.hash(),
  4792. ),
  4793. other_incremental_snapshot_archives
  4794. .iter()
  4795. .sorted_unstable()
  4796. .rev()
  4797. .map(|snap| (
  4798. BaseSlot(snap.base_slot()),
  4799. SnapshotSlot(snap.slot()),
  4800. snap.hash(),
  4801. ))
  4802. .collect::<Vec<_>>(),
  4803. );
  4804. info!("Checking if validator{i} has the same snapshots as validator1... DONE");
  4805. }
  4806. }
  4807. /// Test fastboot to ensure a node can boot in case it crashed while archiving a full snapshot
  4808. ///
  4809. /// 1. Start a node and wait for it to take at least two full snapshots and one more
  4810. /// bank snapshot POST afterwards (for simplicity, wait for 2 full and 1 incremental).
  4811. /// 2. To simulate a node crashing while archiving a full snapshot, stop the node and
  4812. /// then delete the latest full snapshot archive.
  4813. /// 3. Restart the node. This should succeed, and boot from the older full snapshot archive,
  4814. /// *not* the latest bank snapshot POST.
  4815. /// 4. Take another incremental snapshot. This ensures the correct snapshot was loaded,
  4816. /// AND ensures the correct accounts hashes are present (which are needed when making
  4817. /// the bank snapshot POST for the new incremental snapshot).
  4818. #[test]
  4819. #[serial]
  4820. fn test_boot_from_local_state_missing_archive() {
  4821. agave_logger::setup_with_default(RUST_LOG_FILTER);
  4822. const FULL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4823. SnapshotInterval::Slots(NonZeroU64::new(20).unwrap());
  4824. const INCREMENTAL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4825. SnapshotInterval::Slots(NonZeroU64::new(10).unwrap());
  4826. let validator_config =
  4827. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 7);
  4828. let mut cluster_config = ClusterConfig {
  4829. node_stakes: vec![100 * DEFAULT_NODE_STAKE],
  4830. validator_configs: make_identical_validator_configs(&validator_config.validator_config, 1),
  4831. ..ClusterConfig::default()
  4832. };
  4833. let mut cluster = LocalCluster::new(&mut cluster_config, SocketAddrSpace::Unspecified);
  4834. // we need two full snapshots and an incremental snapshot for this test
  4835. info!("Waiting for validator to create snapshots...");
  4836. LocalCluster::wait_for_next_full_snapshot(
  4837. &cluster,
  4838. &validator_config.full_snapshot_archives_dir,
  4839. Some(Duration::from_secs(5 * 60)),
  4840. );
  4841. LocalCluster::wait_for_next_full_snapshot(
  4842. &cluster,
  4843. &validator_config.full_snapshot_archives_dir,
  4844. Some(Duration::from_secs(5 * 60)),
  4845. );
  4846. LocalCluster::wait_for_next_incremental_snapshot(
  4847. &cluster,
  4848. &validator_config.full_snapshot_archives_dir,
  4849. &validator_config.incremental_snapshot_archives_dir,
  4850. Some(Duration::from_secs(5 * 60)),
  4851. );
  4852. debug!(
  4853. "snapshot archives:\n\tfull: {:?}\n\tincr: {:?}",
  4854. snapshot_utils::get_full_snapshot_archives(
  4855. validator_config.full_snapshot_archives_dir.path()
  4856. ),
  4857. snapshot_utils::get_incremental_snapshot_archives(
  4858. validator_config.incremental_snapshot_archives_dir.path()
  4859. ),
  4860. );
  4861. info!("Waiting for validator to create snapshots... DONE");
  4862. // now delete the latest full snapshot archive and restart, to simulate a crash while archiving
  4863. // a full snapshot package
  4864. info!("Stopping validator...");
  4865. let validator_pubkey = cluster.get_node_pubkeys()[0];
  4866. let mut validator_info = cluster.exit_node(&validator_pubkey);
  4867. info!("Stopping validator... DONE");
  4868. info!("Deleting latest full snapshot archive...");
  4869. let highest_full_snapshot = snapshot_utils::get_highest_full_snapshot_archive_info(
  4870. validator_config.full_snapshot_archives_dir.path(),
  4871. )
  4872. .unwrap();
  4873. fs::remove_file(highest_full_snapshot.path()).unwrap();
  4874. info!("Deleting latest full snapshot archive... DONE");
  4875. info!("Restarting validator...");
  4876. // if we set this to `Never`, the validator should not boot
  4877. validator_info.config.use_snapshot_archives_at_startup =
  4878. UseSnapshotArchivesAtStartup::WhenNewest;
  4879. cluster.restart_node(
  4880. &validator_pubkey,
  4881. validator_info,
  4882. SocketAddrSpace::Unspecified,
  4883. );
  4884. info!("Restarting validator... DONE");
  4885. // ensure we can create new incremental snapshots, since that is what used to fail
  4886. info!("Waiting for validator to create snapshots...");
  4887. LocalCluster::wait_for_next_incremental_snapshot(
  4888. &cluster,
  4889. &validator_config.full_snapshot_archives_dir,
  4890. &validator_config.incremental_snapshot_archives_dir,
  4891. Some(Duration::from_secs(5 * 60)),
  4892. );
  4893. info!("Waiting for validator to create snapshots... DONE");
  4894. }
  4895. // We want to simulate the following:
  4896. // /--- 1 --- 3 (duplicate block)
  4897. // 0
  4898. // \--- 2
  4899. //
  4900. // 1. > DUPLICATE_THRESHOLD of the nodes vote on some version of the duplicate block 3,
  4901. // but don't immediately duplicate confirm so they remove 3 from fork choice and reset PoH back to 1.
  4902. // 2. All the votes on 3 don't land because there are no further blocks building off 3.
  4903. // 3. Some < SWITCHING_THRESHOLD of nodes vote on 2, making it the heaviest fork because no votes on 3 landed
  4904. // 4. Nodes then see duplicate confirmation on 3.
  4905. // 5. Unless somebody builds off of 3 to include the duplicate confirmed votes, 2 will still be the heaviest.
  4906. // However, because 2 has < SWITCHING_THRESHOLD of the votes, people who voted on 3 can't switch, leading to a
  4907. // stall
  4908. #[test]
  4909. #[serial]
  4910. #[allow(unused_attributes)]
  4911. #[ignore]
  4912. fn test_duplicate_shreds_switch_failure() {
  4913. fn wait_for_duplicate_fork_frozen(ledger_path: &Path, dup_slot: Slot) -> Hash {
  4914. // Ensure all the slots <= dup_slot are also full so we know we can replay up to dup_slot
  4915. // on restart
  4916. info!("Waiting to receive and replay entire duplicate fork with tip {dup_slot}");
  4917. loop {
  4918. let duplicate_fork_validator_blockstore = open_blockstore(ledger_path);
  4919. if let Some(frozen_hash) = duplicate_fork_validator_blockstore.get_bank_hash(dup_slot) {
  4920. return frozen_hash;
  4921. }
  4922. sleep(Duration::from_millis(1000));
  4923. }
  4924. }
  4925. fn clear_ledger_and_tower(ledger_path: &Path, pubkey: &Pubkey, start_slot: Slot) {
  4926. remove_tower_if_exists(ledger_path, pubkey);
  4927. let blockstore = open_blockstore(ledger_path);
  4928. purge_slots_with_count(&blockstore, start_slot, 1000);
  4929. {
  4930. // Remove all duplicate proofs so that this dup_slot will vote on the `dup_slot`.
  4931. while let Some((proof_slot, _)) = blockstore.get_first_duplicate_proof() {
  4932. blockstore.remove_slot_duplicate_proof(proof_slot).unwrap();
  4933. }
  4934. }
  4935. }
  4936. fn restart_dup_validator(
  4937. cluster: &mut LocalCluster,
  4938. mut duplicate_fork_validator_info: ClusterValidatorInfo,
  4939. pubkey: &Pubkey,
  4940. dup_slot: Slot,
  4941. dup_shred1: &Shred,
  4942. dup_shred2: &Shred,
  4943. ) {
  4944. let disable_turbine = Arc::new(AtomicBool::new(true));
  4945. duplicate_fork_validator_info.config.voting_disabled = false;
  4946. duplicate_fork_validator_info.config.turbine_disabled = disable_turbine.clone();
  4947. info!("Restarting node: {pubkey}");
  4948. cluster.restart_node(
  4949. pubkey,
  4950. duplicate_fork_validator_info,
  4951. SocketAddrSpace::Unspecified,
  4952. );
  4953. let ledger_path = cluster.ledger_path(pubkey);
  4954. // Lift the partition after `pubkey` votes on the `dup_slot`
  4955. info!("Waiting on duplicate fork to vote on duplicate slot: {dup_slot}");
  4956. loop {
  4957. let last_vote = last_vote_in_tower(&ledger_path, pubkey);
  4958. if let Some((latest_vote_slot, _hash)) = last_vote {
  4959. info!("latest vote: {latest_vote_slot}");
  4960. if latest_vote_slot == dup_slot {
  4961. break;
  4962. }
  4963. }
  4964. sleep(Duration::from_millis(1000));
  4965. }
  4966. disable_turbine.store(false, Ordering::Relaxed);
  4967. // Send the validator the other version of the shred so they realize it's duplicate
  4968. info!("Resending duplicate shreds to duplicate fork validator");
  4969. cluster.send_shreds_to_validator(vec![dup_shred1, dup_shred2], pubkey);
  4970. // Check the validator detected a duplicate proof
  4971. info!("Waiting on duplicate fork validator to see duplicate shreds and make a proof",);
  4972. loop {
  4973. let duplicate_fork_validator_blockstore = open_blockstore(&ledger_path);
  4974. if let Some(dup_proof) = duplicate_fork_validator_blockstore.get_first_duplicate_proof()
  4975. {
  4976. assert_eq!(dup_proof.0, dup_slot);
  4977. break;
  4978. }
  4979. sleep(Duration::from_millis(1000));
  4980. }
  4981. }
  4982. agave_logger::setup_with_default(RUST_LOG_FILTER);
  4983. let validator_keypairs = [
  4984. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  4985. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  4986. "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye",
  4987. "2XFPyuzPuXMsPnkH98UNcQpfA7M4b2TUhRxcWEoWjy4M6ojQ7HGJSvotktEVbaq49Qxt16wUjdqvSJc6ecbFfZwj",
  4988. ]
  4989. .iter()
  4990. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  4991. .collect::<Vec<_>>();
  4992. let validators = validator_keypairs
  4993. .iter()
  4994. .map(|(kp, _)| kp.pubkey())
  4995. .collect::<Vec<_>>();
  4996. // Create 4 nodes:
  4997. // 1) Two nodes that sum to > DUPLICATE_THRESHOLD but < 2/3+ supermajority. It's important both
  4998. // of them individually have <= DUPLICATE_THRESHOLD to avoid duplicate confirming their own blocks
  4999. // immediately upon voting
  5000. // 2) One with <= SWITCHING_THRESHOLD so that validator from 1) can't switch to it
  5001. // 3) One bad leader to make duplicate slots
  5002. let total_stake = 100 * DEFAULT_NODE_STAKE;
  5003. let target_switch_fork_stake = (total_stake as f64 * SWITCH_FORK_THRESHOLD) as u64;
  5004. // duplicate_fork_node1_stake + duplicate_fork_node2_stake > DUPLICATE_THRESHOLD. Don't want
  5005. // one node with > DUPLICATE_THRESHOLD, otherwise they will automatically duplicate confirm a
  5006. // slot when they vote, which will prevent them from resetting to an earlier ancestor when they
  5007. // later discover that slot as duplicate.
  5008. let duplicate_fork_node1_stake = (total_stake as f64 * DUPLICATE_THRESHOLD) as u64;
  5009. let duplicate_fork_node2_stake = 1;
  5010. let duplicate_leader_stake = total_stake
  5011. - target_switch_fork_stake
  5012. - duplicate_fork_node1_stake
  5013. - duplicate_fork_node2_stake;
  5014. assert!(
  5015. duplicate_fork_node1_stake + duplicate_fork_node2_stake
  5016. > (total_stake as f64 * DUPLICATE_THRESHOLD) as u64
  5017. );
  5018. assert!(duplicate_fork_node1_stake <= (total_stake as f64 * DUPLICATE_THRESHOLD) as u64);
  5019. assert!(duplicate_fork_node2_stake <= (total_stake as f64 * DUPLICATE_THRESHOLD) as u64);
  5020. let node_stakes = vec![
  5021. duplicate_leader_stake,
  5022. target_switch_fork_stake,
  5023. duplicate_fork_node1_stake,
  5024. duplicate_fork_node2_stake,
  5025. ];
  5026. let (
  5027. // Has to be first in order to be picked as the duplicate leader
  5028. duplicate_leader_validator_pubkey,
  5029. target_switch_fork_validator_pubkey,
  5030. duplicate_fork_validator1_pubkey,
  5031. duplicate_fork_validator2_pubkey,
  5032. ) = (validators[0], validators[1], validators[2], validators[3]);
  5033. info!(
  5034. "duplicate_fork_validator1_pubkey: {duplicate_fork_validator1_pubkey}, \
  5035. duplicate_fork_validator2_pubkey: {duplicate_fork_validator2_pubkey}, \
  5036. target_switch_fork_validator_pubkey: {target_switch_fork_validator_pubkey}, \
  5037. duplicate_leader_validator_pubkey: {duplicate_leader_validator_pubkey}",
  5038. );
  5039. let validator_to_slots = vec![
  5040. (duplicate_leader_validator_pubkey, 50),
  5041. (target_switch_fork_validator_pubkey, 5),
  5042. // The ideal sequence of events for the `duplicate_fork_validator1_pubkey` validator would go:
  5043. // 1. Vote for duplicate block `D`
  5044. // 2. See `D` is duplicate, remove from fork choice and reset to ancestor `A`, potentially generating a fork off that ancestor
  5045. // 3. See `D` is duplicate confirmed, but because of the bug fixed by https://github.com/solana-labs/solana/pull/28172
  5046. // where we disallow resetting to a slot which matches the last vote slot, we still don't build off `D`,
  5047. // and continue building on `A`.
  5048. //
  5049. // The `target_switch_fork_validator_pubkey` fork is necessary in 2. to force the validator stall trying to switch
  5050. // vote on that other fork and prevent the validator from making a freebie vote from `A` and allowing consensus to continue.
  5051. // It's important we don't give the `duplicate_fork_validator1_pubkey` leader slots until a certain number
  5052. // of slots have elapsed to ensure:
  5053. // 1. We have ample time to ensure he doesn't have a chance to make a block until after 2 when they see the block is duplicate.
  5054. // Otherwise, they'll build the block on top of the duplicate block, which will possibly include a vote for the duplicate block.
  5055. // We want to avoid this because this will make fork choice pick the duplicate block.
  5056. // 2. Ensure the `duplicate_fork_validator1_pubkey` sees the target switch fork before it can make another vote
  5057. // on any forks he himself generates from A. Otherwise, he will make a freebie vote on his own fork from `A` and
  5058. // consensus will continue on that fork.
  5059. // Give the duplicate fork validator plenty of leader slots after the initial delay to prevent
  5060. // 1. Switch fork from getting locked out for too long
  5061. // 2. A lot of consecutive slots in which to build up lockout in tower and make new roots
  5062. // to resolve the partition
  5063. (duplicate_fork_validator1_pubkey, 500),
  5064. ];
  5065. let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter());
  5066. // 1) Set up the cluster
  5067. let (duplicate_slot_sender, duplicate_slot_receiver) = unbounded();
  5068. let validator_configs = validator_keypairs
  5069. .into_iter()
  5070. .map(|(validator_keypair, in_genesis)| {
  5071. let pubkey = validator_keypair.pubkey();
  5072. // Only allow the leader to vote so that no version gets duplicate confirmed.
  5073. // This is to avoid the leader dumping his own block.
  5074. let voting_disabled = { pubkey != duplicate_leader_validator_pubkey };
  5075. ValidatorTestConfig {
  5076. validator_keypair,
  5077. validator_config: ValidatorConfig {
  5078. voting_disabled,
  5079. ..ValidatorConfig::default_for_test()
  5080. },
  5081. in_genesis,
  5082. }
  5083. })
  5084. .collect();
  5085. let (mut cluster, _validator_keypairs) = test_faulty_node(
  5086. BroadcastStageType::BroadcastDuplicates(BroadcastDuplicatesConfig {
  5087. partition: ClusterPartition::Pubkey(vec![
  5088. // Don't include the other dup validator here, otherwise
  5089. // this dup version will have enough to be duplicate confirmed and
  5090. // will cause the dup leader to try and dump its own slot,
  5091. // crashing before it can signal the duplicate slot via the
  5092. // `duplicate_slot_receiver` below
  5093. duplicate_fork_validator1_pubkey,
  5094. ]),
  5095. duplicate_slot_sender: Some(duplicate_slot_sender),
  5096. }),
  5097. node_stakes,
  5098. Some(validator_configs),
  5099. Some(FixedSchedule {
  5100. leader_schedule: Arc::new(leader_schedule),
  5101. }),
  5102. );
  5103. // Kill two validators that might duplicate confirm the duplicate block
  5104. info!("Killing unnecessary validators");
  5105. let duplicate_fork_validator2_ledger_path =
  5106. cluster.ledger_path(&duplicate_fork_validator2_pubkey);
  5107. let duplicate_fork_validator2_info = cluster.exit_node(&duplicate_fork_validator2_pubkey);
  5108. let target_switch_fork_validator_ledger_path =
  5109. cluster.ledger_path(&target_switch_fork_validator_pubkey);
  5110. let mut target_switch_fork_validator_info =
  5111. cluster.exit_node(&target_switch_fork_validator_pubkey);
  5112. // 2) Wait for a duplicate slot to land on both validators and for the target switch
  5113. // fork validator to get another version of the slot. Also ensure all versions of
  5114. // the block are playable
  5115. let dup_slot = duplicate_slot_receiver
  5116. .recv_timeout(Duration::from_millis(30_000))
  5117. .expect("Duplicate leader failed to make a duplicate slot in allotted time");
  5118. // Make sure both validators received and replay the complete blocks
  5119. let dup_frozen_hash = wait_for_duplicate_fork_frozen(
  5120. &cluster.ledger_path(&duplicate_fork_validator1_pubkey),
  5121. dup_slot,
  5122. );
  5123. let original_frozen_hash = wait_for_duplicate_fork_frozen(
  5124. &cluster.ledger_path(&duplicate_leader_validator_pubkey),
  5125. dup_slot,
  5126. );
  5127. assert_ne!(
  5128. original_frozen_hash, dup_frozen_hash,
  5129. "Duplicate leader and partition target got same hash: {original_frozen_hash}",
  5130. );
  5131. // 3) Force `duplicate_fork_validator1_pubkey` to see a duplicate proof
  5132. info!("Waiting for duplicate proof for slot: {dup_slot}");
  5133. let duplicate_proof = {
  5134. // Grab the other version of the slot from the `duplicate_leader_validator_pubkey`
  5135. // which we confirmed to have a different version of the frozen hash in the loop
  5136. // above
  5137. let ledger_path = cluster.ledger_path(&duplicate_leader_validator_pubkey);
  5138. let blockstore = open_blockstore(&ledger_path);
  5139. let dup_shred = blockstore
  5140. .get_data_shreds_for_slot(dup_slot, 0)
  5141. .unwrap()
  5142. .pop()
  5143. .unwrap();
  5144. info!(
  5145. "Sending duplicate shred: {:?} to {:?}",
  5146. dup_shred.signature(),
  5147. duplicate_fork_validator1_pubkey
  5148. );
  5149. cluster.send_shreds_to_validator(vec![&dup_shred], &duplicate_fork_validator1_pubkey);
  5150. wait_for_duplicate_proof(
  5151. &cluster.ledger_path(&duplicate_fork_validator1_pubkey),
  5152. dup_slot,
  5153. )
  5154. .unwrap_or_else(|| panic!("Duplicate proof for slot {dup_slot} not found"))
  5155. };
  5156. // 3) Kill all the validators
  5157. info!("Killing remaining validators");
  5158. let duplicate_fork_validator1_ledger_path =
  5159. cluster.ledger_path(&duplicate_fork_validator1_pubkey);
  5160. let duplicate_fork_validator1_info = cluster.exit_node(&duplicate_fork_validator1_pubkey);
  5161. let duplicate_leader_ledger_path = cluster.ledger_path(&duplicate_leader_validator_pubkey);
  5162. cluster.exit_node(&duplicate_leader_validator_pubkey);
  5163. let dup_shred1 = Shred::new_from_serialized_shred(duplicate_proof.shred1.clone()).unwrap();
  5164. let dup_shred2 = Shred::new_from_serialized_shred(duplicate_proof.shred2).unwrap();
  5165. assert_eq!(dup_shred1.slot(), dup_shred2.slot());
  5166. assert_eq!(dup_shred1.slot(), dup_slot);
  5167. // Purge everything including the `dup_slot` from the `target_switch_fork_validator_pubkey`
  5168. info!("Purging towers and ledgers for: {duplicate_leader_validator_pubkey:?}");
  5169. Blockstore::destroy(&target_switch_fork_validator_ledger_path).unwrap();
  5170. {
  5171. let blockstore1 = open_blockstore(&duplicate_leader_ledger_path);
  5172. let blockstore2 = open_blockstore(&target_switch_fork_validator_ledger_path);
  5173. copy_blocks(dup_slot, &blockstore1, &blockstore2, false);
  5174. }
  5175. clear_ledger_and_tower(
  5176. &target_switch_fork_validator_ledger_path,
  5177. &target_switch_fork_validator_pubkey,
  5178. dup_slot,
  5179. );
  5180. info!("Purging towers and ledgers for: {duplicate_fork_validator1_pubkey:?}");
  5181. clear_ledger_and_tower(
  5182. &duplicate_fork_validator1_ledger_path,
  5183. &duplicate_fork_validator1_pubkey,
  5184. dup_slot + 1,
  5185. );
  5186. info!("Purging towers and ledgers for: {duplicate_fork_validator2_pubkey:?}");
  5187. // Copy validator 1's ledger to validator 2 so that they have the same version
  5188. // of the duplicate slot
  5189. clear_ledger_and_tower(
  5190. &duplicate_fork_validator2_ledger_path,
  5191. &duplicate_fork_validator2_pubkey,
  5192. dup_slot,
  5193. );
  5194. Blockstore::destroy(&duplicate_fork_validator2_ledger_path).unwrap();
  5195. {
  5196. let blockstore1 = open_blockstore(&duplicate_fork_validator1_ledger_path);
  5197. let blockstore2 = open_blockstore(&duplicate_fork_validator2_ledger_path);
  5198. copy_blocks(dup_slot, &blockstore1, &blockstore2, false);
  5199. }
  5200. // Set entrypoint to `target_switch_fork_validator_pubkey` so we can run discovery in gossip even without the
  5201. // bad leader
  5202. cluster.set_entry_point(target_switch_fork_validator_info.info.contact_info.clone());
  5203. // 4) Restart `target_switch_fork_validator_pubkey`, and ensure they vote on their own leader slot
  5204. // that's not descended from the duplicate slot
  5205. info!("Restarting switch fork node");
  5206. target_switch_fork_validator_info.config.voting_disabled = false;
  5207. cluster.restart_node(
  5208. &target_switch_fork_validator_pubkey,
  5209. target_switch_fork_validator_info,
  5210. SocketAddrSpace::Unspecified,
  5211. );
  5212. let target_switch_fork_validator_ledger_path =
  5213. cluster.ledger_path(&target_switch_fork_validator_pubkey);
  5214. info!("Waiting for switch fork to make block past duplicate fork");
  5215. loop {
  5216. let last_vote = wait_for_last_vote_in_tower_to_land_in_ledger(
  5217. &target_switch_fork_validator_ledger_path,
  5218. &target_switch_fork_validator_pubkey,
  5219. );
  5220. if let Some(latest_vote_slot) = last_vote {
  5221. if latest_vote_slot > dup_slot {
  5222. let blockstore = open_blockstore(&target_switch_fork_validator_ledger_path);
  5223. let ancestor_slots: HashSet<Slot> =
  5224. AncestorIterator::new_inclusive(latest_vote_slot, &blockstore).collect();
  5225. assert!(ancestor_slots.contains(&latest_vote_slot));
  5226. assert!(ancestor_slots.contains(&0));
  5227. assert!(!ancestor_slots.contains(&dup_slot));
  5228. break;
  5229. }
  5230. }
  5231. sleep(Duration::from_millis(1000));
  5232. }
  5233. // Now restart the duplicate validators
  5234. // Start the node with partition enabled so they don't see the `target_switch_fork_validator_pubkey`
  5235. // before voting on the duplicate block
  5236. info!("Restarting duplicate fork node");
  5237. // Ensure `duplicate_fork_validator1_pubkey` votes before starting up `duplicate_fork_validator2_pubkey`
  5238. // to prevent them seeing `dup_slot` as duplicate confirmed before voting.
  5239. restart_dup_validator(
  5240. &mut cluster,
  5241. duplicate_fork_validator1_info,
  5242. &duplicate_fork_validator1_pubkey,
  5243. dup_slot,
  5244. &dup_shred1,
  5245. &dup_shred2,
  5246. );
  5247. restart_dup_validator(
  5248. &mut cluster,
  5249. duplicate_fork_validator2_info,
  5250. &duplicate_fork_validator2_pubkey,
  5251. dup_slot,
  5252. &dup_shred1,
  5253. &dup_shred2,
  5254. );
  5255. // Wait for the `duplicate_fork_validator1_pubkey` to make another leader block on top
  5256. // of the duplicate fork which includes their own vote for `dup_block`. This
  5257. // should make the duplicate fork the heaviest
  5258. info!("Waiting on duplicate fork validator to generate block on top of duplicate fork",);
  5259. loop {
  5260. let duplicate_fork_validator_blockstore =
  5261. open_blockstore(&cluster.ledger_path(&duplicate_fork_validator1_pubkey));
  5262. let meta = duplicate_fork_validator_blockstore
  5263. .meta(dup_slot)
  5264. .unwrap()
  5265. .unwrap();
  5266. if !meta.next_slots.is_empty() {
  5267. info!(
  5268. "duplicate fork validator saw new slots: {:?} on top of duplicate slot",
  5269. meta.next_slots
  5270. );
  5271. break;
  5272. }
  5273. sleep(Duration::from_millis(1000));
  5274. }
  5275. // Check that the cluster is making progress
  5276. cluster.check_for_new_roots(
  5277. 16,
  5278. "test_duplicate_shreds_switch_failure",
  5279. SocketAddrSpace::Unspecified,
  5280. );
  5281. }
  5282. #[test]
  5283. #[serial]
  5284. fn test_randomly_mixed_block_verification_methods_between_bootstrap_and_not() {
  5285. // tailored logging just to see two block verification methods are working correctly
  5286. agave_logger::setup_with_default(
  5287. "solana_metrics::metrics=warn,solana_core=warn,\
  5288. solana_runtime::installed_scheduler_pool=trace,solana_ledger::blockstore_processor=debug,\
  5289. info",
  5290. );
  5291. let num_nodes = BlockVerificationMethod::COUNT;
  5292. let mut config =
  5293. ClusterConfig::new_with_equal_stakes(num_nodes, DEFAULT_MINT_LAMPORTS, DEFAULT_NODE_STAKE);
  5294. // Overwrite block_verification_method with shuffled variants
  5295. let mut methods = BlockVerificationMethod::iter().collect::<Vec<_>>();
  5296. methods.shuffle(&mut rand::thread_rng());
  5297. for (validator_config, method) in config.validator_configs.iter_mut().zip_eq(methods) {
  5298. validator_config.block_verification_method = method;
  5299. }
  5300. let local = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  5301. cluster_tests::spend_and_verify_all_nodes(
  5302. &local.entry_point_info,
  5303. &local.funding_keypair,
  5304. num_nodes,
  5305. HashSet::new(),
  5306. SocketAddrSpace::Unspecified,
  5307. &local.connection_cache,
  5308. );
  5309. }
  5310. /// Forks previous marked invalid should be marked as such in fork choice on restart
  5311. #[test]
  5312. #[ignore]
  5313. #[serial]
  5314. fn test_invalid_forks_persisted_on_restart() {
  5315. agave_logger::setup_with("info,solana_metrics=off,solana_ledger=off");
  5316. let dup_slot = 10;
  5317. let validator_keypairs = [
  5318. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  5319. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  5320. ]
  5321. .iter()
  5322. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  5323. .collect::<Vec<_>>();
  5324. let majority_keypair = validator_keypairs[1].0.clone();
  5325. let validators = validator_keypairs
  5326. .iter()
  5327. .map(|(kp, _)| kp.pubkey())
  5328. .collect::<Vec<_>>();
  5329. let node_stakes = vec![DEFAULT_NODE_STAKE, 100 * DEFAULT_NODE_STAKE];
  5330. let (target_pubkey, majority_pubkey) = (validators[0], validators[1]);
  5331. // Need majority validator to make the dup_slot
  5332. let validator_to_slots = vec![
  5333. (majority_pubkey, dup_slot as usize + 5),
  5334. (target_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize),
  5335. ];
  5336. let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter());
  5337. let mut default_config = ValidatorConfig::default_for_test();
  5338. default_config.fixed_leader_schedule = Some(FixedSchedule {
  5339. leader_schedule: Arc::new(leader_schedule),
  5340. });
  5341. let mut validator_configs = make_identical_validator_configs(&default_config, 2);
  5342. // Majority shouldn't duplicate confirm anything
  5343. validator_configs[1].voting_disabled = true;
  5344. let mut cluster = LocalCluster::new(
  5345. &mut ClusterConfig {
  5346. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  5347. validator_configs,
  5348. node_stakes,
  5349. validator_keys: Some(validator_keypairs),
  5350. skip_warmup_slots: true,
  5351. ..ClusterConfig::default()
  5352. },
  5353. SocketAddrSpace::Unspecified,
  5354. );
  5355. let target_ledger_path = cluster.ledger_path(&target_pubkey);
  5356. // Wait for us to vote past duplicate slot
  5357. let timer = Instant::now();
  5358. loop {
  5359. if let Some(slot) =
  5360. wait_for_last_vote_in_tower_to_land_in_ledger(&target_ledger_path, &target_pubkey)
  5361. {
  5362. if slot > dup_slot {
  5363. break;
  5364. }
  5365. }
  5366. assert!(
  5367. timer.elapsed() < Duration::from_secs(30),
  5368. "Did not make more than 10 blocks in 30 seconds"
  5369. );
  5370. sleep(Duration::from_millis(100));
  5371. }
  5372. // Send duplicate
  5373. let parent = {
  5374. let blockstore = open_blockstore(&target_ledger_path);
  5375. let parent = blockstore
  5376. .meta(dup_slot)
  5377. .unwrap()
  5378. .unwrap()
  5379. .parent_slot
  5380. .unwrap();
  5381. let entries = create_ticks(
  5382. 64 * (std::cmp::max(1, dup_slot - parent)),
  5383. 0,
  5384. cluster.genesis_config.hash(),
  5385. );
  5386. let last_hash = entries.last().unwrap().hash;
  5387. let version = solana_shred_version::version_from_hash(&last_hash);
  5388. let dup_shreds = Shredder::new(dup_slot, parent, 0, version)
  5389. .unwrap()
  5390. .entries_to_merkle_shreds_for_tests(
  5391. &majority_keypair,
  5392. &entries,
  5393. true, // is_full_slot
  5394. Hash::default(), // chained_merkle_root
  5395. 0, // next_shred_index,
  5396. 0, // next_code_index
  5397. &ReedSolomonCache::default(),
  5398. &mut ProcessShredsStats::default(),
  5399. )
  5400. .0;
  5401. info!("Sending duplicate shreds for {dup_slot}");
  5402. cluster.send_shreds_to_validator(dup_shreds.iter().collect(), &target_pubkey);
  5403. wait_for_duplicate_proof(&target_ledger_path, dup_slot)
  5404. .expect("Duplicate proof for {dup_slot} not found");
  5405. parent
  5406. };
  5407. info!("Duplicate proof for {dup_slot} has landed, restarting node");
  5408. let info = cluster.exit_node(&target_pubkey);
  5409. {
  5410. let blockstore = open_blockstore(&target_ledger_path);
  5411. purge_slots_with_count(&blockstore, dup_slot + 5, 100);
  5412. }
  5413. // Restart, should create an entirely new fork
  5414. cluster.restart_node(&target_pubkey, info, SocketAddrSpace::Unspecified);
  5415. info!("Waiting for fork built off {parent}");
  5416. let timer = Instant::now();
  5417. let mut checked_children: HashSet<Slot> = HashSet::default();
  5418. let mut done = false;
  5419. while !done {
  5420. let blockstore = open_blockstore(&target_ledger_path);
  5421. let parent_meta = blockstore.meta(parent).unwrap().expect("Meta must exist");
  5422. for child in parent_meta.next_slots {
  5423. if checked_children.contains(&child) {
  5424. continue;
  5425. }
  5426. if blockstore.is_full(child) {
  5427. let shreds = blockstore
  5428. .get_data_shreds_for_slot(child, 0)
  5429. .expect("Child is full");
  5430. let mut is_our_block = true;
  5431. for shred in shreds {
  5432. is_our_block &= shred.verify(&target_pubkey);
  5433. }
  5434. if is_our_block {
  5435. done = true;
  5436. }
  5437. checked_children.insert(child);
  5438. }
  5439. }
  5440. assert!(
  5441. timer.elapsed() < Duration::from_secs(30),
  5442. "Did not create a new fork off parent {parent} in 30 seconds after restart"
  5443. );
  5444. sleep(Duration::from_millis(100));
  5445. }
  5446. }