local_cluster.rs 280 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342
  1. #![allow(clippy::arithmetic_side_effects)]
  2. use {
  3. assert_matches::assert_matches,
  4. crossbeam_channel::{unbounded, Receiver},
  5. gag::BufferRedirect,
  6. itertools::Itertools,
  7. log::*,
  8. rand::seq::SliceRandom,
  9. serial_test::serial,
  10. solana_account::AccountSharedData,
  11. solana_accounts_db::{
  12. hardened_unpack::open_genesis_config, utils::create_accounts_run_and_snapshot_dirs,
  13. },
  14. solana_bls_signatures::{keypair::Keypair as BLSKeypair, Signature as BLSSignature},
  15. solana_client::connection_cache::ConnectionCache,
  16. solana_client_traits::AsyncClient,
  17. solana_clock::{
  18. self as clock, Slot, DEFAULT_SLOTS_PER_EPOCH, DEFAULT_TICKS_PER_SLOT, MAX_PROCESSING_AGE,
  19. NUM_CONSECUTIVE_LEADER_SLOTS,
  20. },
  21. solana_commitment_config::CommitmentConfig,
  22. solana_connection_cache::client_connection::ClientConnection,
  23. solana_core::{
  24. consensus::{
  25. tower_storage::FileTowerStorage, Tower, SWITCH_FORK_THRESHOLD, VOTE_THRESHOLD_DEPTH,
  26. },
  27. optimistic_confirmation_verifier::OptimisticConfirmationVerifier,
  28. replay_stage::DUPLICATE_THRESHOLD,
  29. validator::{BlockVerificationMethod, ValidatorConfig},
  30. voting_service::{AlpenglowPortOverride, VotingServiceOverride},
  31. },
  32. solana_download_utils::download_snapshot_archive,
  33. solana_entry::entry::create_ticks,
  34. solana_epoch_schedule::{MAX_LEADER_SCHEDULE_EPOCH_OFFSET, MINIMUM_SLOTS_PER_EPOCH},
  35. solana_genesis_config::ClusterType,
  36. solana_gossip::{crds_data::MAX_VOTES, gossip_service::discover_validators},
  37. solana_hard_forks::HardForks,
  38. solana_hash::Hash,
  39. solana_keypair::{keypair_from_seed, Keypair},
  40. solana_ledger::{
  41. ancestor_iterator::AncestorIterator,
  42. bank_forks_utils,
  43. blockstore::{entries_to_test_shreds, Blockstore},
  44. blockstore_processor::ProcessOptions,
  45. leader_schedule::FixedSchedule,
  46. shred::{ProcessShredsStats, ReedSolomonCache, Shred, Shredder},
  47. use_snapshot_archives_at_startup::UseSnapshotArchivesAtStartup,
  48. },
  49. solana_local_cluster::{
  50. cluster::{Cluster, ClusterValidatorInfo, QuicTpuClient},
  51. cluster_tests,
  52. integration_tests::{
  53. copy_blocks, create_custom_leader_schedule,
  54. create_custom_leader_schedule_with_random_keys, farf_dir, generate_account_paths,
  55. last_root_in_tower, last_vote_in_tower, ms_for_n_slots, open_blockstore,
  56. purge_slots_with_count, remove_tower, remove_tower_if_exists, restore_tower,
  57. run_cluster_partition, run_kill_partition_switch_threshold, save_tower,
  58. setup_snapshot_validator_config, test_faulty_node, wait_for_duplicate_proof,
  59. wait_for_last_vote_in_tower_to_land_in_ledger, SnapshotValidatorConfig,
  60. ValidatorTestConfig, AG_DEBUG_LOG_FILTER, DEFAULT_NODE_STAKE, RUST_LOG_FILTER,
  61. },
  62. local_cluster::{ClusterConfig, LocalCluster, DEFAULT_MINT_LAMPORTS},
  63. validator_configs::*,
  64. },
  65. solana_poh_config::PohConfig,
  66. solana_pubkey::Pubkey,
  67. solana_pubsub_client::pubsub_client::PubsubClient,
  68. solana_rpc_client::rpc_client::RpcClient,
  69. solana_rpc_client_api::{
  70. config::{
  71. RpcBlockSubscribeConfig, RpcBlockSubscribeFilter, RpcProgramAccountsConfig,
  72. RpcSignatureSubscribeConfig,
  73. },
  74. response::RpcSignatureResult,
  75. },
  76. solana_runtime::{
  77. commitment::VOTE_THRESHOLD_SIZE,
  78. snapshot_archive_info::SnapshotArchiveInfoGetter,
  79. snapshot_bank_utils,
  80. snapshot_config::SnapshotConfig,
  81. snapshot_package::SnapshotKind,
  82. snapshot_utils::{self, SnapshotInterval},
  83. },
  84. solana_signer::Signer,
  85. solana_stake_interface::{self as stake, state::NEW_WARMUP_COOLDOWN_RATE},
  86. solana_streamer::socket::SocketAddrSpace,
  87. solana_system_interface::program as system_program,
  88. solana_system_transaction as system_transaction,
  89. solana_turbine::broadcast_stage::{
  90. broadcast_duplicates_run::{BroadcastDuplicatesConfig, ClusterPartition},
  91. BroadcastStageType,
  92. },
  93. solana_vote::{
  94. vote_parser::{self},
  95. vote_transaction,
  96. },
  97. solana_vote_interface::state::TowerSync,
  98. solana_vote_program::vote_state::MAX_LOCKOUT_HISTORY,
  99. solana_votor_messages::{
  100. bls_message::{BLSMessage, CertificateType, VoteMessage, BLS_KEYPAIR_DERIVE_SEED},
  101. vote::Vote,
  102. },
  103. std::{
  104. collections::{BTreeSet, HashMap, HashSet},
  105. fs,
  106. io::Read,
  107. iter,
  108. num::NonZeroU64,
  109. path::Path,
  110. sync::{
  111. atomic::{AtomicBool, AtomicUsize, Ordering},
  112. Arc, Mutex,
  113. },
  114. thread::{sleep, Builder, JoinHandle},
  115. time::{Duration, Instant},
  116. },
  117. strum::{EnumCount, IntoEnumIterator},
  118. };
  119. #[test]
  120. #[serial]
  121. fn test_local_cluster_start_and_exit() {
  122. solana_logger::setup();
  123. let num_nodes = 1;
  124. let cluster = LocalCluster::new_with_equal_stakes(
  125. num_nodes,
  126. DEFAULT_MINT_LAMPORTS,
  127. DEFAULT_NODE_STAKE,
  128. SocketAddrSpace::Unspecified,
  129. );
  130. assert_eq!(cluster.validators.len(), num_nodes);
  131. }
  132. #[test]
  133. #[serial]
  134. fn test_local_cluster_start_and_exit_with_config() {
  135. solana_logger::setup();
  136. const NUM_NODES: usize = 1;
  137. let mut config = ClusterConfig {
  138. validator_configs: make_identical_validator_configs(
  139. &ValidatorConfig::default_for_test(),
  140. NUM_NODES,
  141. ),
  142. node_stakes: vec![DEFAULT_NODE_STAKE; NUM_NODES],
  143. ticks_per_slot: 8,
  144. slots_per_epoch: MINIMUM_SLOTS_PER_EPOCH,
  145. stakers_slot_offset: MINIMUM_SLOTS_PER_EPOCH,
  146. ..ClusterConfig::default()
  147. };
  148. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  149. assert_eq!(cluster.validators.len(), NUM_NODES);
  150. }
  151. fn test_alpenglow_nodes_basic(num_nodes: usize, num_offline_nodes: usize) {
  152. solana_logger::setup_with_default(AG_DEBUG_LOG_FILTER);
  153. let validator_keys = (0..num_nodes)
  154. .map(|i| (Arc::new(keypair_from_seed(&[i as u8; 32]).unwrap()), true))
  155. .collect::<Vec<_>>();
  156. let mut config = ClusterConfig {
  157. validator_configs: make_identical_validator_configs(
  158. &ValidatorConfig::default_for_test(),
  159. num_nodes,
  160. ),
  161. validator_keys: Some(validator_keys.clone()),
  162. node_stakes: vec![DEFAULT_NODE_STAKE; num_nodes],
  163. ticks_per_slot: 8,
  164. slots_per_epoch: MINIMUM_SLOTS_PER_EPOCH * 2,
  165. stakers_slot_offset: MINIMUM_SLOTS_PER_EPOCH * 2,
  166. poh_config: PohConfig {
  167. target_tick_duration: PohConfig::default().target_tick_duration,
  168. hashes_per_tick: Some(clock::DEFAULT_HASHES_PER_TICK),
  169. target_tick_count: None,
  170. },
  171. ..ClusterConfig::default()
  172. };
  173. let mut cluster = LocalCluster::new_alpenglow(&mut config, SocketAddrSpace::Unspecified);
  174. assert_eq!(cluster.validators.len(), num_nodes);
  175. // Check transactions land
  176. cluster_tests::spend_and_verify_all_nodes(
  177. &cluster.entry_point_info,
  178. &cluster.funding_keypair,
  179. num_nodes,
  180. HashSet::new(),
  181. SocketAddrSpace::Unspecified,
  182. &cluster.connection_cache,
  183. );
  184. if num_offline_nodes > 0 {
  185. // Bring nodes offline
  186. info!("Shutting down {num_offline_nodes} nodes");
  187. for (key, _) in validator_keys.iter().take(num_offline_nodes) {
  188. cluster.exit_node(&key.pubkey());
  189. }
  190. }
  191. // Check for new roots
  192. cluster.check_for_new_roots(
  193. 16,
  194. &format!("test_{}_nodes_alpenglow", num_nodes),
  195. SocketAddrSpace::Unspecified,
  196. );
  197. }
  198. #[test]
  199. #[serial]
  200. fn test_1_node_alpenglow() {
  201. const NUM_NODES: usize = 1;
  202. test_alpenglow_nodes_basic(NUM_NODES, 0);
  203. }
  204. #[test]
  205. #[serial]
  206. fn test_2_nodes_alpenglow() {
  207. const NUM_NODES: usize = 2;
  208. test_alpenglow_nodes_basic(NUM_NODES, 0);
  209. }
  210. #[test]
  211. #[serial]
  212. fn test_4_nodes_alpenglow() {
  213. const NUM_NODES: usize = 4;
  214. test_alpenglow_nodes_basic(NUM_NODES, 0);
  215. }
  216. #[test]
  217. #[serial]
  218. fn test_4_nodes_with_1_offline_alpenglow() {
  219. const NUM_NODES: usize = 4;
  220. const NUM_OFFLINE: usize = 1;
  221. test_alpenglow_nodes_basic(NUM_NODES, NUM_OFFLINE);
  222. }
  223. #[test]
  224. #[serial]
  225. fn test_spend_and_verify_all_nodes_1() {
  226. solana_logger::setup_with_default(RUST_LOG_FILTER);
  227. error!("test_spend_and_verify_all_nodes_1");
  228. let num_nodes = 1;
  229. let local = LocalCluster::new_with_equal_stakes(
  230. num_nodes,
  231. DEFAULT_MINT_LAMPORTS,
  232. DEFAULT_NODE_STAKE,
  233. SocketAddrSpace::Unspecified,
  234. );
  235. cluster_tests::spend_and_verify_all_nodes(
  236. &local.entry_point_info,
  237. &local.funding_keypair,
  238. num_nodes,
  239. HashSet::new(),
  240. SocketAddrSpace::Unspecified,
  241. &local.connection_cache,
  242. );
  243. }
  244. #[test]
  245. #[serial]
  246. fn test_spend_and_verify_all_nodes_2() {
  247. solana_logger::setup_with_default(RUST_LOG_FILTER);
  248. error!("test_spend_and_verify_all_nodes_2");
  249. let num_nodes = 2;
  250. let local = LocalCluster::new_with_equal_stakes(
  251. num_nodes,
  252. DEFAULT_MINT_LAMPORTS,
  253. DEFAULT_NODE_STAKE,
  254. SocketAddrSpace::Unspecified,
  255. );
  256. cluster_tests::spend_and_verify_all_nodes(
  257. &local.entry_point_info,
  258. &local.funding_keypair,
  259. num_nodes,
  260. HashSet::new(),
  261. SocketAddrSpace::Unspecified,
  262. &local.connection_cache,
  263. );
  264. }
  265. #[test]
  266. #[serial]
  267. fn test_spend_and_verify_all_nodes_3() {
  268. solana_logger::setup_with_default(RUST_LOG_FILTER);
  269. error!("test_spend_and_verify_all_nodes_3");
  270. let num_nodes = 3;
  271. let local = LocalCluster::new_with_equal_stakes(
  272. num_nodes,
  273. DEFAULT_MINT_LAMPORTS,
  274. DEFAULT_NODE_STAKE,
  275. SocketAddrSpace::Unspecified,
  276. );
  277. cluster_tests::spend_and_verify_all_nodes(
  278. &local.entry_point_info,
  279. &local.funding_keypair,
  280. num_nodes,
  281. HashSet::new(),
  282. SocketAddrSpace::Unspecified,
  283. &local.connection_cache,
  284. );
  285. }
  286. #[test]
  287. #[serial]
  288. fn test_local_cluster_signature_subscribe() {
  289. solana_logger::setup_with_default(RUST_LOG_FILTER);
  290. let num_nodes = 2;
  291. let cluster = LocalCluster::new_with_equal_stakes(
  292. num_nodes,
  293. DEFAULT_MINT_LAMPORTS,
  294. DEFAULT_NODE_STAKE,
  295. SocketAddrSpace::Unspecified,
  296. );
  297. let nodes = cluster.get_node_pubkeys();
  298. // Get non leader
  299. let non_bootstrap_id = nodes
  300. .into_iter()
  301. .find(|id| id != cluster.entry_point_info.pubkey())
  302. .unwrap();
  303. let non_bootstrap_info = cluster.get_contact_info(&non_bootstrap_id).unwrap();
  304. let tx_client = cluster
  305. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  306. .unwrap();
  307. let (blockhash, _) = tx_client
  308. .rpc_client()
  309. .get_latest_blockhash_with_commitment(CommitmentConfig::processed())
  310. .unwrap();
  311. let mut transaction = system_transaction::transfer(
  312. &cluster.funding_keypair,
  313. &solana_pubkey::new_rand(),
  314. 10,
  315. blockhash,
  316. );
  317. let (mut sig_subscribe_client, receiver) = PubsubClient::signature_subscribe(
  318. &format!("ws://{}", non_bootstrap_info.rpc_pubsub().unwrap()),
  319. &transaction.signatures[0],
  320. Some(RpcSignatureSubscribeConfig {
  321. commitment: Some(CommitmentConfig::processed()),
  322. enable_received_notification: Some(true),
  323. }),
  324. )
  325. .unwrap();
  326. LocalCluster::send_transaction_with_retries(
  327. &tx_client,
  328. &[&cluster.funding_keypair],
  329. &mut transaction,
  330. 5,
  331. )
  332. .unwrap();
  333. let mut got_received_notification = false;
  334. loop {
  335. let responses: Vec<_> = receiver.try_iter().collect();
  336. let mut should_break = false;
  337. for response in responses {
  338. match response.value {
  339. RpcSignatureResult::ProcessedSignature(_) => {
  340. should_break = true;
  341. break;
  342. }
  343. RpcSignatureResult::ReceivedSignature(_) => {
  344. got_received_notification = true;
  345. }
  346. }
  347. }
  348. if should_break {
  349. break;
  350. }
  351. sleep(Duration::from_millis(100));
  352. }
  353. // If we don't drop the cluster, the blocking web socket service
  354. // won't return, and the `sig_subscribe_client` won't shut down
  355. drop(cluster);
  356. sig_subscribe_client.shutdown().unwrap();
  357. assert!(got_received_notification);
  358. }
  359. #[test]
  360. #[serial]
  361. fn test_two_unbalanced_stakes() {
  362. solana_logger::setup_with_default(RUST_LOG_FILTER);
  363. error!("test_two_unbalanced_stakes");
  364. let validator_config = ValidatorConfig::default_for_test();
  365. let num_ticks_per_second = 100;
  366. let num_ticks_per_slot = 10;
  367. let num_slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH;
  368. let mut cluster = LocalCluster::new(
  369. &mut ClusterConfig {
  370. node_stakes: vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  371. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  372. validator_configs: make_identical_validator_configs(&validator_config, 2),
  373. ticks_per_slot: num_ticks_per_slot,
  374. slots_per_epoch: num_slots_per_epoch,
  375. stakers_slot_offset: num_slots_per_epoch,
  376. poh_config: PohConfig::new_sleep(Duration::from_millis(1000 / num_ticks_per_second)),
  377. ..ClusterConfig::default()
  378. },
  379. SocketAddrSpace::Unspecified,
  380. );
  381. cluster_tests::sleep_n_epochs(
  382. 10.0,
  383. &cluster.genesis_config.poh_config,
  384. num_ticks_per_slot,
  385. num_slots_per_epoch,
  386. );
  387. cluster.close_preserve_ledgers();
  388. let leader_pubkey = *cluster.entry_point_info.pubkey();
  389. let leader_ledger = cluster.validators[&leader_pubkey].info.ledger_path.clone();
  390. cluster_tests::verify_ledger_ticks(&leader_ledger, num_ticks_per_slot as usize);
  391. }
  392. #[test]
  393. #[serial]
  394. fn test_forwarding() {
  395. solana_logger::setup_with_default(RUST_LOG_FILTER);
  396. // Set up a cluster where one node is never the leader, so all txs sent to this node
  397. // will be have to be forwarded in order to be confirmed
  398. let mut config = ClusterConfig {
  399. node_stakes: vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  400. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  401. validator_configs: make_identical_validator_configs(
  402. &ValidatorConfig::default_for_test(),
  403. 2,
  404. ),
  405. ..ClusterConfig::default()
  406. };
  407. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  408. let cluster_nodes = discover_validators(
  409. &cluster.entry_point_info.gossip().unwrap(),
  410. 2,
  411. cluster.entry_point_info.shred_version(),
  412. SocketAddrSpace::Unspecified,
  413. )
  414. .unwrap();
  415. assert!(cluster_nodes.len() >= 2);
  416. let leader_pubkey = *cluster.entry_point_info.pubkey();
  417. let validator_info = cluster_nodes
  418. .iter()
  419. .find(|c| c.pubkey() != &leader_pubkey)
  420. .unwrap();
  421. // Confirm that transactions were forwarded to and processed by the leader.
  422. cluster_tests::send_many_transactions(
  423. validator_info,
  424. &cluster.funding_keypair,
  425. &cluster.connection_cache,
  426. 10,
  427. 20,
  428. );
  429. }
  430. #[test]
  431. #[serial]
  432. fn test_restart_node() {
  433. solana_logger::setup_with_default(RUST_LOG_FILTER);
  434. error!("test_restart_node");
  435. let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH * 2;
  436. let ticks_per_slot = 16;
  437. let validator_config = ValidatorConfig::default_for_test();
  438. let mut cluster = LocalCluster::new(
  439. &mut ClusterConfig {
  440. node_stakes: vec![DEFAULT_NODE_STAKE],
  441. validator_configs: vec![safe_clone_config(&validator_config)],
  442. ticks_per_slot,
  443. slots_per_epoch,
  444. stakers_slot_offset: slots_per_epoch,
  445. skip_warmup_slots: true,
  446. ..ClusterConfig::default()
  447. },
  448. SocketAddrSpace::Unspecified,
  449. );
  450. let nodes = cluster.get_node_pubkeys();
  451. cluster_tests::sleep_n_epochs(
  452. 1.0,
  453. &cluster.genesis_config.poh_config,
  454. clock::DEFAULT_TICKS_PER_SLOT,
  455. slots_per_epoch,
  456. );
  457. cluster.exit_restart_node(&nodes[0], validator_config, SocketAddrSpace::Unspecified);
  458. cluster_tests::sleep_n_epochs(
  459. 0.5,
  460. &cluster.genesis_config.poh_config,
  461. clock::DEFAULT_TICKS_PER_SLOT,
  462. slots_per_epoch,
  463. );
  464. cluster_tests::send_many_transactions(
  465. &cluster.entry_point_info,
  466. &cluster.funding_keypair,
  467. &cluster.connection_cache,
  468. 10,
  469. 1,
  470. );
  471. }
  472. #[test]
  473. #[serial]
  474. fn test_mainnet_beta_cluster_type() {
  475. solana_logger::setup_with_default(RUST_LOG_FILTER);
  476. let mut config = ClusterConfig {
  477. cluster_type: ClusterType::MainnetBeta,
  478. node_stakes: vec![DEFAULT_NODE_STAKE],
  479. validator_configs: make_identical_validator_configs(
  480. &ValidatorConfig::default_for_test(),
  481. 1,
  482. ),
  483. ..ClusterConfig::default()
  484. };
  485. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  486. let cluster_nodes = discover_validators(
  487. &cluster.entry_point_info.gossip().unwrap(),
  488. 1,
  489. cluster.entry_point_info.shred_version(),
  490. SocketAddrSpace::Unspecified,
  491. )
  492. .unwrap();
  493. assert_eq!(cluster_nodes.len(), 1);
  494. let client = cluster
  495. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  496. .unwrap();
  497. // Programs that are available at epoch 0
  498. for program_id in [
  499. &solana_sdk_ids::system_program::id(),
  500. &stake::program::id(),
  501. &solana_vote_program::id(),
  502. &solana_sdk_ids::bpf_loader_deprecated::id(),
  503. &solana_sdk_ids::bpf_loader::id(),
  504. &solana_sdk_ids::bpf_loader_upgradeable::id(),
  505. ]
  506. .iter()
  507. {
  508. assert_matches!(
  509. (
  510. program_id,
  511. client
  512. .rpc_client()
  513. .get_account_with_commitment(program_id, CommitmentConfig::processed())
  514. .unwrap()
  515. .value
  516. ),
  517. (_program_id, Some(_))
  518. );
  519. }
  520. // Programs that are not available at epoch 0
  521. for program_id in [].iter() {
  522. assert_eq!(
  523. (
  524. program_id,
  525. client
  526. .rpc_client()
  527. .get_account_with_commitment(program_id, CommitmentConfig::processed())
  528. .unwrap()
  529. .value
  530. ),
  531. (program_id, None)
  532. );
  533. }
  534. }
  535. #[test]
  536. #[serial]
  537. fn test_snapshot_download() {
  538. solana_logger::setup_with_default(RUST_LOG_FILTER);
  539. // First set up the cluster with 1 node
  540. let snapshot_interval_slots = NonZeroU64::new(50).unwrap();
  541. let num_account_paths = 3;
  542. let leader_snapshot_test_config =
  543. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  544. let validator_snapshot_test_config =
  545. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  546. let stake = DEFAULT_NODE_STAKE;
  547. let mut config = ClusterConfig {
  548. node_stakes: vec![stake],
  549. validator_configs: make_identical_validator_configs(
  550. &leader_snapshot_test_config.validator_config,
  551. 1,
  552. ),
  553. ..ClusterConfig::default()
  554. };
  555. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  556. let full_snapshot_archives_dir = &leader_snapshot_test_config
  557. .validator_config
  558. .snapshot_config
  559. .full_snapshot_archives_dir;
  560. trace!("Waiting for snapshot");
  561. let full_snapshot_archive_info = cluster.wait_for_next_full_snapshot(
  562. full_snapshot_archives_dir,
  563. Some(Duration::from_secs(5 * 60)),
  564. );
  565. trace!("found: {}", full_snapshot_archive_info.path().display());
  566. // Download the snapshot, then boot a validator from it.
  567. download_snapshot_archive(
  568. &cluster.entry_point_info.rpc().unwrap(),
  569. &validator_snapshot_test_config
  570. .validator_config
  571. .snapshot_config
  572. .full_snapshot_archives_dir,
  573. &validator_snapshot_test_config
  574. .validator_config
  575. .snapshot_config
  576. .incremental_snapshot_archives_dir,
  577. (
  578. full_snapshot_archive_info.slot(),
  579. *full_snapshot_archive_info.hash(),
  580. ),
  581. SnapshotKind::FullSnapshot,
  582. validator_snapshot_test_config
  583. .validator_config
  584. .snapshot_config
  585. .maximum_full_snapshot_archives_to_retain,
  586. validator_snapshot_test_config
  587. .validator_config
  588. .snapshot_config
  589. .maximum_incremental_snapshot_archives_to_retain,
  590. false,
  591. &mut None,
  592. )
  593. .unwrap();
  594. cluster.add_validator(
  595. &validator_snapshot_test_config.validator_config,
  596. stake,
  597. Arc::new(Keypair::new()),
  598. None,
  599. SocketAddrSpace::Unspecified,
  600. );
  601. }
  602. #[test]
  603. #[serial]
  604. fn test_incremental_snapshot_download() {
  605. solana_logger::setup_with_default(RUST_LOG_FILTER);
  606. // First set up the cluster with 1 node
  607. let incremental_snapshot_interval = 9;
  608. let full_snapshot_interval = incremental_snapshot_interval * 3;
  609. let num_account_paths = 3;
  610. let leader_snapshot_test_config = SnapshotValidatorConfig::new(
  611. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  612. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  613. num_account_paths,
  614. );
  615. let validator_snapshot_test_config = SnapshotValidatorConfig::new(
  616. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  617. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  618. num_account_paths,
  619. );
  620. let stake = DEFAULT_NODE_STAKE;
  621. let mut config = ClusterConfig {
  622. node_stakes: vec![stake],
  623. validator_configs: make_identical_validator_configs(
  624. &leader_snapshot_test_config.validator_config,
  625. 1,
  626. ),
  627. ..ClusterConfig::default()
  628. };
  629. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  630. let full_snapshot_archives_dir = &leader_snapshot_test_config
  631. .validator_config
  632. .snapshot_config
  633. .full_snapshot_archives_dir;
  634. let incremental_snapshot_archives_dir = &leader_snapshot_test_config
  635. .validator_config
  636. .snapshot_config
  637. .incremental_snapshot_archives_dir;
  638. debug!(
  639. "snapshot config:\n\tfull snapshot interval: {full_snapshot_interval}\n\tincremental \
  640. snapshot interval: {incremental_snapshot_interval}",
  641. );
  642. debug!(
  643. "leader config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  644. {}\n\tincremental snapshot archives dir: {}",
  645. leader_snapshot_test_config
  646. .bank_snapshots_dir
  647. .path()
  648. .display(),
  649. leader_snapshot_test_config
  650. .full_snapshot_archives_dir
  651. .path()
  652. .display(),
  653. leader_snapshot_test_config
  654. .incremental_snapshot_archives_dir
  655. .path()
  656. .display(),
  657. );
  658. debug!(
  659. "validator config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  660. {}\n\tincremental snapshot archives dir: {}",
  661. validator_snapshot_test_config
  662. .bank_snapshots_dir
  663. .path()
  664. .display(),
  665. validator_snapshot_test_config
  666. .full_snapshot_archives_dir
  667. .path()
  668. .display(),
  669. validator_snapshot_test_config
  670. .incremental_snapshot_archives_dir
  671. .path()
  672. .display(),
  673. );
  674. trace!("Waiting for snapshots");
  675. let (incremental_snapshot_archive_info, full_snapshot_archive_info) = cluster
  676. .wait_for_next_incremental_snapshot(
  677. full_snapshot_archives_dir,
  678. incremental_snapshot_archives_dir,
  679. Some(Duration::from_secs(5 * 60)),
  680. );
  681. trace!(
  682. "found: {} and {}",
  683. full_snapshot_archive_info.path().display(),
  684. incremental_snapshot_archive_info.path().display()
  685. );
  686. // Download the snapshots, then boot a validator from them.
  687. download_snapshot_archive(
  688. &cluster.entry_point_info.rpc().unwrap(),
  689. &validator_snapshot_test_config
  690. .validator_config
  691. .snapshot_config
  692. .full_snapshot_archives_dir,
  693. &validator_snapshot_test_config
  694. .validator_config
  695. .snapshot_config
  696. .incremental_snapshot_archives_dir,
  697. (
  698. full_snapshot_archive_info.slot(),
  699. *full_snapshot_archive_info.hash(),
  700. ),
  701. SnapshotKind::FullSnapshot,
  702. validator_snapshot_test_config
  703. .validator_config
  704. .snapshot_config
  705. .maximum_full_snapshot_archives_to_retain,
  706. validator_snapshot_test_config
  707. .validator_config
  708. .snapshot_config
  709. .maximum_incremental_snapshot_archives_to_retain,
  710. false,
  711. &mut None,
  712. )
  713. .unwrap();
  714. download_snapshot_archive(
  715. &cluster.entry_point_info.rpc().unwrap(),
  716. &validator_snapshot_test_config
  717. .validator_config
  718. .snapshot_config
  719. .full_snapshot_archives_dir,
  720. &validator_snapshot_test_config
  721. .validator_config
  722. .snapshot_config
  723. .incremental_snapshot_archives_dir,
  724. (
  725. incremental_snapshot_archive_info.slot(),
  726. *incremental_snapshot_archive_info.hash(),
  727. ),
  728. SnapshotKind::IncrementalSnapshot(incremental_snapshot_archive_info.base_slot()),
  729. validator_snapshot_test_config
  730. .validator_config
  731. .snapshot_config
  732. .maximum_full_snapshot_archives_to_retain,
  733. validator_snapshot_test_config
  734. .validator_config
  735. .snapshot_config
  736. .maximum_incremental_snapshot_archives_to_retain,
  737. false,
  738. &mut None,
  739. )
  740. .unwrap();
  741. cluster.add_validator(
  742. &validator_snapshot_test_config.validator_config,
  743. stake,
  744. Arc::new(Keypair::new()),
  745. None,
  746. SocketAddrSpace::Unspecified,
  747. );
  748. }
  749. /// Test the scenario where a node starts up from a snapshot and its blockstore has enough new
  750. /// roots that cross the full snapshot interval. In this scenario, the node needs to take a full
  751. /// snapshot while processing the blockstore so that once the background services start up, there
  752. /// is the correct full snapshot available to take subsequent incremental snapshots.
  753. ///
  754. /// For this test...
  755. /// - Start a leader node and run it long enough to take a full and incremental snapshot
  756. /// - Download those snapshots to a validator node
  757. /// - Copy the validator snapshots to a back up directory
  758. /// - Start up the validator node
  759. /// - Wait for the validator node to see enough root slots to cross the full snapshot interval
  760. /// - Delete the snapshots on the validator node and restore the ones from the backup
  761. /// - Restart the validator node to trigger the scenario we're trying to test
  762. /// - Wait for the validator node to generate a new incremental snapshot
  763. /// - Copy the new incremental snapshot (and its associated full snapshot) to another new validator
  764. /// - Start up this new validator to ensure the snapshots from ^^^ are good
  765. #[test]
  766. #[serial]
  767. fn test_incremental_snapshot_download_with_crossing_full_snapshot_interval_at_startup() {
  768. solana_logger::setup_with_default(RUST_LOG_FILTER);
  769. // If these intervals change, also make sure to change the loop timers accordingly.
  770. let incremental_snapshot_interval = 9;
  771. let full_snapshot_interval = incremental_snapshot_interval * 5;
  772. let num_account_paths = 3;
  773. let leader_snapshot_test_config = SnapshotValidatorConfig::new(
  774. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  775. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  776. num_account_paths,
  777. );
  778. let mut validator_snapshot_test_config = SnapshotValidatorConfig::new(
  779. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  780. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  781. num_account_paths,
  782. );
  783. // The test has asserts that require the validator always boots from snapshot archives
  784. validator_snapshot_test_config
  785. .validator_config
  786. .use_snapshot_archives_at_startup = UseSnapshotArchivesAtStartup::Always;
  787. let stake = DEFAULT_NODE_STAKE;
  788. let mut config = ClusterConfig {
  789. node_stakes: vec![stake],
  790. validator_configs: make_identical_validator_configs(
  791. &leader_snapshot_test_config.validator_config,
  792. 1,
  793. ),
  794. ..ClusterConfig::default()
  795. };
  796. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  797. info!(
  798. "snapshot config:\n\tfull snapshot interval: {full_snapshot_interval:?}\n\tincremental \
  799. snapshot interval: {incremental_snapshot_interval:?}",
  800. );
  801. debug!(
  802. "leader config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  803. {}\n\tincremental snapshot archives dir: {}",
  804. leader_snapshot_test_config
  805. .bank_snapshots_dir
  806. .path()
  807. .display(),
  808. leader_snapshot_test_config
  809. .full_snapshot_archives_dir
  810. .path()
  811. .display(),
  812. leader_snapshot_test_config
  813. .incremental_snapshot_archives_dir
  814. .path()
  815. .display(),
  816. );
  817. debug!(
  818. "validator config:\n\tbank snapshots dir: {}\n\tfull snapshot archives dir: \
  819. {}\n\tincremental snapshot archives dir: {}",
  820. validator_snapshot_test_config
  821. .bank_snapshots_dir
  822. .path()
  823. .display(),
  824. validator_snapshot_test_config
  825. .full_snapshot_archives_dir
  826. .path()
  827. .display(),
  828. validator_snapshot_test_config
  829. .incremental_snapshot_archives_dir
  830. .path()
  831. .display(),
  832. );
  833. info!("Waiting for leader to create the next incremental snapshot...");
  834. let (incremental_snapshot_archive, full_snapshot_archive) =
  835. LocalCluster::wait_for_next_incremental_snapshot(
  836. &cluster,
  837. leader_snapshot_test_config
  838. .full_snapshot_archives_dir
  839. .path(),
  840. leader_snapshot_test_config
  841. .incremental_snapshot_archives_dir
  842. .path(),
  843. Some(Duration::from_secs(5 * 60)),
  844. );
  845. info!(
  846. "Found snapshots:\n\tfull snapshot: {}\n\tincremental snapshot: {}",
  847. full_snapshot_archive.path().display(),
  848. incremental_snapshot_archive.path().display()
  849. );
  850. assert_eq!(
  851. full_snapshot_archive.slot(),
  852. incremental_snapshot_archive.base_slot()
  853. );
  854. info!("Waiting for leader to create snapshots... DONE");
  855. // Download the snapshots, then boot a validator from them.
  856. info!("Downloading full snapshot to validator...");
  857. download_snapshot_archive(
  858. &cluster.entry_point_info.rpc().unwrap(),
  859. validator_snapshot_test_config
  860. .full_snapshot_archives_dir
  861. .path(),
  862. validator_snapshot_test_config
  863. .incremental_snapshot_archives_dir
  864. .path(),
  865. (full_snapshot_archive.slot(), *full_snapshot_archive.hash()),
  866. SnapshotKind::FullSnapshot,
  867. validator_snapshot_test_config
  868. .validator_config
  869. .snapshot_config
  870. .maximum_full_snapshot_archives_to_retain,
  871. validator_snapshot_test_config
  872. .validator_config
  873. .snapshot_config
  874. .maximum_incremental_snapshot_archives_to_retain,
  875. false,
  876. &mut None,
  877. )
  878. .unwrap();
  879. let downloaded_full_snapshot_archive = snapshot_utils::get_highest_full_snapshot_archive_info(
  880. validator_snapshot_test_config
  881. .full_snapshot_archives_dir
  882. .path(),
  883. )
  884. .unwrap();
  885. info!(
  886. "Downloaded full snapshot, slot: {}",
  887. downloaded_full_snapshot_archive.slot()
  888. );
  889. info!("Downloading incremental snapshot to validator...");
  890. download_snapshot_archive(
  891. &cluster.entry_point_info.rpc().unwrap(),
  892. validator_snapshot_test_config
  893. .full_snapshot_archives_dir
  894. .path(),
  895. validator_snapshot_test_config
  896. .incremental_snapshot_archives_dir
  897. .path(),
  898. (
  899. incremental_snapshot_archive.slot(),
  900. *incremental_snapshot_archive.hash(),
  901. ),
  902. SnapshotKind::IncrementalSnapshot(incremental_snapshot_archive.base_slot()),
  903. validator_snapshot_test_config
  904. .validator_config
  905. .snapshot_config
  906. .maximum_full_snapshot_archives_to_retain,
  907. validator_snapshot_test_config
  908. .validator_config
  909. .snapshot_config
  910. .maximum_incremental_snapshot_archives_to_retain,
  911. false,
  912. &mut None,
  913. )
  914. .unwrap();
  915. let downloaded_incremental_snapshot_archive =
  916. snapshot_utils::get_highest_incremental_snapshot_archive_info(
  917. validator_snapshot_test_config
  918. .incremental_snapshot_archives_dir
  919. .path(),
  920. full_snapshot_archive.slot(),
  921. )
  922. .unwrap();
  923. info!(
  924. "Downloaded incremental snapshot, slot: {}, base slot: {}",
  925. downloaded_incremental_snapshot_archive.slot(),
  926. downloaded_incremental_snapshot_archive.base_slot(),
  927. );
  928. assert_eq!(
  929. downloaded_full_snapshot_archive.slot(),
  930. downloaded_incremental_snapshot_archive.base_slot()
  931. );
  932. // closure to copy files in a directory to another directory
  933. let copy_files = |from: &Path, to: &Path| {
  934. trace!(
  935. "copying files from dir {}, to dir {}",
  936. from.display(),
  937. to.display()
  938. );
  939. for entry in fs::read_dir(from).unwrap() {
  940. let entry = entry.unwrap();
  941. if entry.file_type().unwrap().is_dir() {
  942. continue;
  943. }
  944. let from_file_path = entry.path();
  945. let to_file_path = to.join(from_file_path.file_name().unwrap());
  946. trace!(
  947. "\t\tcopying file from {} to {}...",
  948. from_file_path.display(),
  949. to_file_path.display()
  950. );
  951. fs::copy(from_file_path, to_file_path).unwrap();
  952. }
  953. };
  954. // closure to delete files in a directory
  955. let delete_files = |dir: &Path| {
  956. trace!("deleting files in dir {}", dir.display());
  957. for entry in fs::read_dir(dir).unwrap() {
  958. let entry = entry.unwrap();
  959. if entry.file_type().unwrap().is_dir() {
  960. continue;
  961. }
  962. let file_path = entry.path();
  963. trace!("\t\tdeleting file {}...", file_path.display());
  964. fs::remove_file(file_path).unwrap();
  965. }
  966. };
  967. let copy_files_with_remote = |from: &Path, to: &Path| {
  968. copy_files(from, to);
  969. let remote_from = snapshot_utils::build_snapshot_archives_remote_dir(from);
  970. let remote_to = snapshot_utils::build_snapshot_archives_remote_dir(to);
  971. let _ = fs::create_dir_all(&remote_from);
  972. let _ = fs::create_dir_all(&remote_to);
  973. copy_files(&remote_from, &remote_to);
  974. };
  975. let delete_files_with_remote = |from: &Path| {
  976. delete_files(from);
  977. let remote_dir = snapshot_utils::build_snapshot_archives_remote_dir(from);
  978. let _ = fs::create_dir_all(&remote_dir);
  979. delete_files(&remote_dir);
  980. };
  981. // After downloading the snapshots, copy them over to a backup directory. Later we'll need to
  982. // restart the node and guarantee that the only snapshots present are these initial ones. So,
  983. // the easiest way to do that is create a backup now, delete the ones on the node before
  984. // restart, then copy the backup ones over again.
  985. let backup_validator_full_snapshot_archives_dir = tempfile::tempdir_in(farf_dir()).unwrap();
  986. trace!(
  987. "Backing up validator full snapshots to dir: {}...",
  988. backup_validator_full_snapshot_archives_dir.path().display()
  989. );
  990. copy_files_with_remote(
  991. validator_snapshot_test_config
  992. .full_snapshot_archives_dir
  993. .path(),
  994. backup_validator_full_snapshot_archives_dir.path(),
  995. );
  996. let backup_validator_incremental_snapshot_archives_dir =
  997. tempfile::tempdir_in(farf_dir()).unwrap();
  998. trace!(
  999. "Backing up validator incremental snapshots to dir: {}...",
  1000. backup_validator_incremental_snapshot_archives_dir
  1001. .path()
  1002. .display()
  1003. );
  1004. copy_files_with_remote(
  1005. validator_snapshot_test_config
  1006. .incremental_snapshot_archives_dir
  1007. .path(),
  1008. backup_validator_incremental_snapshot_archives_dir.path(),
  1009. );
  1010. info!("Starting the validator...");
  1011. let validator_identity = Arc::new(Keypair::new());
  1012. cluster.add_validator(
  1013. &validator_snapshot_test_config.validator_config,
  1014. stake,
  1015. validator_identity.clone(),
  1016. None,
  1017. SocketAddrSpace::Unspecified,
  1018. );
  1019. info!("Starting the validator... DONE");
  1020. // To ensure that a snapshot will be taken during startup, the blockstore needs to have roots
  1021. // that cross a full snapshot interval.
  1022. let starting_slot = incremental_snapshot_archive.slot();
  1023. let next_full_snapshot_slot = starting_slot + full_snapshot_interval;
  1024. info!(
  1025. "Waiting for the validator to see enough slots to cross a full snapshot interval \
  1026. ({next_full_snapshot_slot})..."
  1027. );
  1028. let timer = Instant::now();
  1029. loop {
  1030. let validator_current_slot = cluster
  1031. .build_validator_tpu_quic_client(&validator_identity.pubkey())
  1032. .unwrap()
  1033. .rpc_client()
  1034. .get_slot_with_commitment(CommitmentConfig::finalized())
  1035. .unwrap();
  1036. trace!("validator current slot: {validator_current_slot}");
  1037. if validator_current_slot > next_full_snapshot_slot {
  1038. break;
  1039. }
  1040. assert!(
  1041. timer.elapsed() < Duration::from_secs(30),
  1042. "It should not take longer than 30 seconds to cross the next full snapshot interval."
  1043. );
  1044. std::thread::yield_now();
  1045. }
  1046. info!(
  1047. "Waited {:?} for the validator to see enough slots to cross a full snapshot interval... \
  1048. DONE",
  1049. timer.elapsed()
  1050. );
  1051. // Get the highest full snapshot archive info for the validator, now that it has crossed the
  1052. // next full snapshot interval. We are going to use this to look up the same snapshot on the
  1053. // leader, which we'll then use to compare to the full snapshot the validator will create
  1054. // during startup. This ensures the snapshot creation process during startup is correct.
  1055. //
  1056. // Putting this all in its own block so its clear we're only intended to keep the leader's info
  1057. let leader_full_snapshot_archive_for_comparison = {
  1058. let validator_full_snapshot = snapshot_utils::get_highest_full_snapshot_archive_info(
  1059. validator_snapshot_test_config
  1060. .full_snapshot_archives_dir
  1061. .path(),
  1062. )
  1063. .unwrap();
  1064. // Now get the same full snapshot on the LEADER that we just got from the validator
  1065. let mut leader_full_snapshots = snapshot_utils::get_full_snapshot_archives(
  1066. leader_snapshot_test_config
  1067. .full_snapshot_archives_dir
  1068. .path(),
  1069. );
  1070. leader_full_snapshots.retain(|full_snapshot| {
  1071. full_snapshot.slot() == validator_full_snapshot.slot()
  1072. && full_snapshot.hash() == validator_full_snapshot.hash()
  1073. });
  1074. let leader_full_snapshot = leader_full_snapshots.first().unwrap();
  1075. // And for sanity, the full snapshot from the leader and the validator MUST be the same
  1076. assert_eq!(
  1077. (
  1078. validator_full_snapshot.slot(),
  1079. validator_full_snapshot.hash()
  1080. ),
  1081. (leader_full_snapshot.slot(), leader_full_snapshot.hash())
  1082. );
  1083. leader_full_snapshot.clone()
  1084. };
  1085. info!(
  1086. "leader full snapshot archive for comparison: \
  1087. {leader_full_snapshot_archive_for_comparison:#?}"
  1088. );
  1089. // Stop the validator before we reset its snapshots
  1090. info!("Stopping the validator...");
  1091. let validator_info = cluster.exit_node(&validator_identity.pubkey());
  1092. info!("Stopping the validator... DONE");
  1093. info!("Delete all the snapshots on the validator and restore the originals from the backup...");
  1094. delete_files_with_remote(
  1095. validator_snapshot_test_config
  1096. .full_snapshot_archives_dir
  1097. .path(),
  1098. );
  1099. delete_files_with_remote(
  1100. validator_snapshot_test_config
  1101. .incremental_snapshot_archives_dir
  1102. .path(),
  1103. );
  1104. copy_files_with_remote(
  1105. backup_validator_full_snapshot_archives_dir.path(),
  1106. validator_snapshot_test_config
  1107. .full_snapshot_archives_dir
  1108. .path(),
  1109. );
  1110. copy_files_with_remote(
  1111. backup_validator_incremental_snapshot_archives_dir.path(),
  1112. validator_snapshot_test_config
  1113. .incremental_snapshot_archives_dir
  1114. .path(),
  1115. );
  1116. info!(
  1117. "Delete all the snapshots on the validator and restore the originals from the backup... \
  1118. DONE"
  1119. );
  1120. // Get the highest full snapshot slot *before* restarting, as a comparison
  1121. let validator_full_snapshot_slot_at_startup =
  1122. snapshot_utils::get_highest_full_snapshot_archive_slot(
  1123. validator_snapshot_test_config
  1124. .full_snapshot_archives_dir
  1125. .path(),
  1126. )
  1127. .unwrap();
  1128. info!(
  1129. "Restarting the validator with full snapshot {validator_full_snapshot_slot_at_startup}..."
  1130. );
  1131. cluster.restart_node(
  1132. &validator_identity.pubkey(),
  1133. validator_info,
  1134. SocketAddrSpace::Unspecified,
  1135. );
  1136. info!("Restarting the validator... DONE");
  1137. // Now, we want to ensure that the validator can make a new incremental snapshot based on the
  1138. // new full snapshot that was created during the restart.
  1139. info!("Waiting for the validator to make new snapshots...");
  1140. let validator_next_full_snapshot_slot =
  1141. validator_full_snapshot_slot_at_startup + full_snapshot_interval;
  1142. let validator_next_incremental_snapshot_slot =
  1143. validator_next_full_snapshot_slot + incremental_snapshot_interval;
  1144. info!("Waiting for validator next full snapshot slot: {validator_next_full_snapshot_slot}");
  1145. info!(
  1146. "Waiting for validator next incremental snapshot slot: \
  1147. {validator_next_incremental_snapshot_slot}"
  1148. );
  1149. let timer = Instant::now();
  1150. loop {
  1151. if let Some(full_snapshot_slot) = snapshot_utils::get_highest_full_snapshot_archive_slot(
  1152. validator_snapshot_test_config
  1153. .full_snapshot_archives_dir
  1154. .path(),
  1155. ) {
  1156. if full_snapshot_slot >= validator_next_full_snapshot_slot {
  1157. if let Some(incremental_snapshot_slot) =
  1158. snapshot_utils::get_highest_incremental_snapshot_archive_slot(
  1159. validator_snapshot_test_config
  1160. .incremental_snapshot_archives_dir
  1161. .path(),
  1162. full_snapshot_slot,
  1163. )
  1164. {
  1165. if incremental_snapshot_slot >= validator_next_incremental_snapshot_slot {
  1166. // specific incremental snapshot is not important, just that one was created
  1167. info!(
  1168. "Validator made new snapshots, full snapshot slot: \
  1169. {full_snapshot_slot}, incremental snapshot slot: \
  1170. {incremental_snapshot_slot}",
  1171. );
  1172. break;
  1173. }
  1174. }
  1175. }
  1176. }
  1177. assert!(
  1178. timer.elapsed() < Duration::from_secs(30),
  1179. "It should not take longer than 30 seconds to cross the next incremental snapshot \
  1180. interval."
  1181. );
  1182. std::thread::yield_now();
  1183. }
  1184. info!(
  1185. "Waited {:?} for the validator to make new snapshots... DONE",
  1186. timer.elapsed()
  1187. );
  1188. // Check to make sure that the full snapshot the validator created during startup is the same
  1189. // or one greater than the snapshot the leader created.
  1190. let validator_full_snapshot_archives = snapshot_utils::get_full_snapshot_archives(
  1191. validator_snapshot_test_config
  1192. .full_snapshot_archives_dir
  1193. .path(),
  1194. );
  1195. info!("validator full snapshot archives: {validator_full_snapshot_archives:#?}");
  1196. let validator_full_snapshot_archive_for_comparison = validator_full_snapshot_archives
  1197. .into_iter()
  1198. .find(|validator_full_snapshot_archive| {
  1199. validator_full_snapshot_archive.slot()
  1200. == leader_full_snapshot_archive_for_comparison.slot()
  1201. })
  1202. .expect("validator created an unexpected full snapshot");
  1203. info!(
  1204. "Validator full snapshot archive for comparison: \
  1205. {validator_full_snapshot_archive_for_comparison:#?}"
  1206. );
  1207. assert_eq!(
  1208. validator_full_snapshot_archive_for_comparison.hash(),
  1209. leader_full_snapshot_archive_for_comparison.hash(),
  1210. );
  1211. // And lastly, startup another node with the new snapshots to ensure they work
  1212. let final_validator_snapshot_test_config = SnapshotValidatorConfig::new(
  1213. SnapshotInterval::Slots(NonZeroU64::new(full_snapshot_interval).unwrap()),
  1214. SnapshotInterval::Slots(NonZeroU64::new(incremental_snapshot_interval).unwrap()),
  1215. num_account_paths,
  1216. );
  1217. // Copy over the snapshots to the new node that it will boot from
  1218. copy_files(
  1219. validator_snapshot_test_config
  1220. .full_snapshot_archives_dir
  1221. .path(),
  1222. final_validator_snapshot_test_config
  1223. .full_snapshot_archives_dir
  1224. .path(),
  1225. );
  1226. copy_files(
  1227. validator_snapshot_test_config
  1228. .incremental_snapshot_archives_dir
  1229. .path(),
  1230. final_validator_snapshot_test_config
  1231. .incremental_snapshot_archives_dir
  1232. .path(),
  1233. );
  1234. info!("Starting final validator...");
  1235. let final_validator_identity = Arc::new(Keypair::new());
  1236. cluster.add_validator(
  1237. &final_validator_snapshot_test_config.validator_config,
  1238. stake,
  1239. final_validator_identity,
  1240. None,
  1241. SocketAddrSpace::Unspecified,
  1242. );
  1243. info!("Starting final validator... DONE");
  1244. }
  1245. #[allow(unused_attributes)]
  1246. #[test]
  1247. #[serial]
  1248. fn test_snapshot_restart_tower() {
  1249. solana_logger::setup_with_default(RUST_LOG_FILTER);
  1250. // First set up the cluster with 2 nodes
  1251. let snapshot_interval_slots = NonZeroU64::new(10).unwrap();
  1252. let num_account_paths = 2;
  1253. let leader_snapshot_test_config =
  1254. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1255. let validator_snapshot_test_config =
  1256. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1257. let mut config = ClusterConfig {
  1258. node_stakes: vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  1259. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  1260. validator_configs: vec![
  1261. safe_clone_config(&leader_snapshot_test_config.validator_config),
  1262. safe_clone_config(&validator_snapshot_test_config.validator_config),
  1263. ],
  1264. skip_warmup_slots: true,
  1265. ..ClusterConfig::default()
  1266. };
  1267. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1268. // Let the nodes run for a while, then stop one of the validators
  1269. sleep(Duration::from_millis(5000));
  1270. let all_pubkeys = cluster.get_node_pubkeys();
  1271. let validator_id = all_pubkeys
  1272. .into_iter()
  1273. .find(|x| x != cluster.entry_point_info.pubkey())
  1274. .unwrap();
  1275. let validator_info = cluster.exit_node(&validator_id);
  1276. let full_snapshot_archives_dir = &leader_snapshot_test_config
  1277. .validator_config
  1278. .snapshot_config
  1279. .full_snapshot_archives_dir;
  1280. let full_snapshot_archive_info = cluster.wait_for_next_full_snapshot(
  1281. full_snapshot_archives_dir,
  1282. Some(Duration::from_secs(5 * 60)),
  1283. );
  1284. // Copy archive to validator's snapshot output directory
  1285. let validator_archive_path = snapshot_utils::build_full_snapshot_archive_path(
  1286. validator_snapshot_test_config
  1287. .full_snapshot_archives_dir
  1288. .keep(),
  1289. full_snapshot_archive_info.slot(),
  1290. full_snapshot_archive_info.hash(),
  1291. full_snapshot_archive_info.archive_format(),
  1292. );
  1293. fs::hard_link(full_snapshot_archive_info.path(), validator_archive_path).unwrap();
  1294. // Restart validator from snapshot, the validator's tower state in this snapshot
  1295. // will contain slots < the root bank of the snapshot. Validator should not panic.
  1296. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1297. // Test cluster can still make progress and get confirmations in tower
  1298. // Use the restarted node as the discovery point so that we get updated
  1299. // validator's ContactInfo
  1300. let restarted_node_info = cluster.get_contact_info(&validator_id).unwrap();
  1301. cluster_tests::spend_and_verify_all_nodes(
  1302. restarted_node_info,
  1303. &cluster.funding_keypair,
  1304. 2,
  1305. HashSet::new(),
  1306. SocketAddrSpace::Unspecified,
  1307. &cluster.connection_cache,
  1308. );
  1309. }
  1310. #[test]
  1311. #[serial]
  1312. #[ignore]
  1313. fn test_snapshots_blockstore_floor() {
  1314. solana_logger::setup_with_default(RUST_LOG_FILTER);
  1315. // First set up the cluster with 1 snapshotting leader
  1316. let snapshot_interval_slots = NonZeroU64::new(100).unwrap();
  1317. let num_account_paths = 4;
  1318. let leader_snapshot_test_config =
  1319. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1320. let mut validator_snapshot_test_config =
  1321. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1322. let full_snapshot_archives_dir = &leader_snapshot_test_config
  1323. .validator_config
  1324. .snapshot_config
  1325. .full_snapshot_archives_dir;
  1326. let mut config = ClusterConfig {
  1327. node_stakes: vec![DEFAULT_NODE_STAKE],
  1328. validator_configs: make_identical_validator_configs(
  1329. &leader_snapshot_test_config.validator_config,
  1330. 1,
  1331. ),
  1332. ..ClusterConfig::default()
  1333. };
  1334. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1335. trace!("Waiting for snapshot tar to be generated with slot",);
  1336. let archive_info = loop {
  1337. let archive =
  1338. snapshot_utils::get_highest_full_snapshot_archive_info(full_snapshot_archives_dir);
  1339. if archive.is_some() {
  1340. trace!("snapshot exists");
  1341. break archive.unwrap();
  1342. }
  1343. sleep(Duration::from_millis(5000));
  1344. };
  1345. // Copy archive to validator's snapshot output directory
  1346. let validator_archive_path = snapshot_utils::build_full_snapshot_archive_path(
  1347. validator_snapshot_test_config
  1348. .full_snapshot_archives_dir
  1349. .keep(),
  1350. archive_info.slot(),
  1351. archive_info.hash(),
  1352. validator_snapshot_test_config
  1353. .validator_config
  1354. .snapshot_config
  1355. .archive_format,
  1356. );
  1357. fs::hard_link(archive_info.path(), validator_archive_path).unwrap();
  1358. let slot_floor = archive_info.slot();
  1359. // Start up a new node from a snapshot
  1360. let cluster_nodes = discover_validators(
  1361. &cluster.entry_point_info.gossip().unwrap(),
  1362. 1,
  1363. cluster.entry_point_info.shred_version(),
  1364. SocketAddrSpace::Unspecified,
  1365. )
  1366. .unwrap();
  1367. let mut known_validators = HashSet::new();
  1368. known_validators.insert(*cluster_nodes[0].pubkey());
  1369. validator_snapshot_test_config
  1370. .validator_config
  1371. .known_validators = Some(known_validators);
  1372. cluster.add_validator(
  1373. &validator_snapshot_test_config.validator_config,
  1374. DEFAULT_NODE_STAKE,
  1375. Arc::new(Keypair::new()),
  1376. None,
  1377. SocketAddrSpace::Unspecified,
  1378. );
  1379. let all_pubkeys = cluster.get_node_pubkeys();
  1380. let validator_id = all_pubkeys
  1381. .into_iter()
  1382. .find(|x| x != cluster.entry_point_info.pubkey())
  1383. .unwrap();
  1384. let validator_client = cluster
  1385. .build_validator_tpu_quic_client(&validator_id)
  1386. .unwrap();
  1387. let mut current_slot = 0;
  1388. // Let this validator run a while with repair
  1389. let target_slot = slot_floor + 40;
  1390. while current_slot <= target_slot {
  1391. trace!("current_slot: {current_slot}");
  1392. if let Ok(slot) = validator_client
  1393. .rpc_client()
  1394. .get_slot_with_commitment(CommitmentConfig::processed())
  1395. {
  1396. current_slot = slot;
  1397. } else {
  1398. continue;
  1399. }
  1400. sleep(Duration::from_secs(1));
  1401. }
  1402. // Check the validator ledger doesn't contain any slots < slot_floor
  1403. cluster.close_preserve_ledgers();
  1404. let validator_ledger_path = &cluster.validators[&validator_id];
  1405. let blockstore = Blockstore::open(&validator_ledger_path.info.ledger_path).unwrap();
  1406. // Skip the zeroth slot in blockstore that the ledger is initialized with
  1407. let (first_slot, _) = blockstore.slot_meta_iterator(1).unwrap().next().unwrap();
  1408. assert_eq!(first_slot, slot_floor);
  1409. }
  1410. #[test]
  1411. #[serial]
  1412. fn test_snapshots_restart_validity() {
  1413. solana_logger::setup_with_default(RUST_LOG_FILTER);
  1414. let snapshot_interval_slots = NonZeroU64::new(100).unwrap();
  1415. let num_account_paths = 1;
  1416. let mut snapshot_test_config =
  1417. setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
  1418. let full_snapshot_archives_dir = &snapshot_test_config
  1419. .validator_config
  1420. .snapshot_config
  1421. .full_snapshot_archives_dir;
  1422. // Set up the cluster with 1 snapshotting validator
  1423. let mut all_account_storage_dirs = vec![std::mem::take(
  1424. &mut snapshot_test_config.account_storage_dirs,
  1425. )];
  1426. let mut config = ClusterConfig {
  1427. node_stakes: vec![DEFAULT_NODE_STAKE],
  1428. validator_configs: make_identical_validator_configs(
  1429. &snapshot_test_config.validator_config,
  1430. 1,
  1431. ),
  1432. ..ClusterConfig::default()
  1433. };
  1434. // Create and reboot the node from snapshot `num_runs` times
  1435. let num_runs = 3;
  1436. let mut expected_balances = HashMap::new();
  1437. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1438. for i in 1..num_runs {
  1439. info!("run {i}");
  1440. // Push transactions to one of the nodes and confirm that transactions were
  1441. // forwarded to and processed.
  1442. trace!("Sending transactions");
  1443. let new_balances = cluster_tests::send_many_transactions(
  1444. &cluster.entry_point_info,
  1445. &cluster.funding_keypair,
  1446. &cluster.connection_cache,
  1447. 10,
  1448. 10,
  1449. );
  1450. expected_balances.extend(new_balances);
  1451. cluster.wait_for_next_full_snapshot(
  1452. full_snapshot_archives_dir,
  1453. Some(Duration::from_secs(5 * 60)),
  1454. );
  1455. // Create new account paths since validator exit is not guaranteed to cleanup RPC threads,
  1456. // which may delete the old accounts on exit at any point
  1457. let (new_account_storage_dirs, new_account_storage_paths) =
  1458. generate_account_paths(num_account_paths);
  1459. all_account_storage_dirs.push(new_account_storage_dirs);
  1460. snapshot_test_config.validator_config.account_paths = new_account_storage_paths;
  1461. // Restart node
  1462. trace!("Restarting cluster from snapshot");
  1463. let nodes = cluster.get_node_pubkeys();
  1464. cluster.exit_restart_node(
  1465. &nodes[0],
  1466. safe_clone_config(&snapshot_test_config.validator_config),
  1467. SocketAddrSpace::Unspecified,
  1468. );
  1469. // Verify account balances on validator
  1470. trace!("Verifying balances");
  1471. cluster_tests::verify_balances(
  1472. expected_balances.clone(),
  1473. &cluster.entry_point_info,
  1474. cluster.connection_cache.clone(),
  1475. );
  1476. // Check that we can still push transactions
  1477. trace!("Spending and verifying");
  1478. cluster_tests::spend_and_verify_all_nodes(
  1479. &cluster.entry_point_info,
  1480. &cluster.funding_keypair,
  1481. 1,
  1482. HashSet::new(),
  1483. SocketAddrSpace::Unspecified,
  1484. &cluster.connection_cache,
  1485. );
  1486. }
  1487. }
  1488. #[test]
  1489. #[serial]
  1490. #[allow(unused_attributes)]
  1491. #[ignore]
  1492. fn test_fail_entry_verification_leader() {
  1493. solana_logger::setup_with_default(RUST_LOG_FILTER);
  1494. let leader_stake = (DUPLICATE_THRESHOLD * 100.0) as u64 + 1;
  1495. let validator_stake1 = (100 - leader_stake) / 2;
  1496. let validator_stake2 = 100 - leader_stake - validator_stake1;
  1497. let (cluster, _) = test_faulty_node(
  1498. BroadcastStageType::FailEntryVerification,
  1499. vec![leader_stake, validator_stake1, validator_stake2],
  1500. None,
  1501. None,
  1502. );
  1503. cluster.check_for_new_roots(
  1504. 16,
  1505. "test_fail_entry_verification_leader",
  1506. SocketAddrSpace::Unspecified,
  1507. );
  1508. }
  1509. #[test]
  1510. #[serial]
  1511. #[ignore]
  1512. #[allow(unused_attributes)]
  1513. fn test_fake_shreds_broadcast_leader() {
  1514. solana_logger::setup_with_default(RUST_LOG_FILTER);
  1515. let node_stakes = vec![300, 100];
  1516. let (cluster, _) = test_faulty_node(
  1517. BroadcastStageType::BroadcastFakeShreds,
  1518. node_stakes,
  1519. None,
  1520. None,
  1521. );
  1522. cluster.check_for_new_roots(
  1523. 16,
  1524. "test_fake_shreds_broadcast_leader",
  1525. SocketAddrSpace::Unspecified,
  1526. );
  1527. }
  1528. #[test]
  1529. #[serial]
  1530. #[ignore]
  1531. fn test_wait_for_max_stake() {
  1532. solana_logger::setup_with_default(RUST_LOG_FILTER);
  1533. let validator_config = ValidatorConfig::default_for_test();
  1534. let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH;
  1535. // Set this large enough to allow for skipped slots but still be able to
  1536. // make a root and derive the new leader schedule in time.
  1537. let stakers_slot_offset = slots_per_epoch.saturating_mul(MAX_LEADER_SCHEDULE_EPOCH_OFFSET);
  1538. // Reduce this so that we can complete the test faster by advancing through
  1539. // slots/epochs faster. But don't make it too small because it can cause the
  1540. // test to fail in two important ways:
  1541. // 1. Increase likelihood of skipped slots, which can prevent rooting and
  1542. // lead to not generating leader schedule in time and cluster getting
  1543. // stuck.
  1544. // 2. Make the cluster advance through too many epochs before all the
  1545. // validators spin up, which can lead to not properly observing gossip
  1546. // votes, not repairing missing slots, and some subset of nodes getting
  1547. // stuck.
  1548. let ticks_per_slot = 32;
  1549. let num_validators = 4;
  1550. let mut config = ClusterConfig {
  1551. node_stakes: vec![DEFAULT_NODE_STAKE; num_validators],
  1552. validator_configs: make_identical_validator_configs(&validator_config, num_validators),
  1553. slots_per_epoch,
  1554. stakers_slot_offset,
  1555. ticks_per_slot,
  1556. ..ClusterConfig::default()
  1557. };
  1558. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1559. let client = RpcClient::new_socket(cluster.entry_point_info.rpc().unwrap());
  1560. let num_validators_activating_stake = num_validators - 1;
  1561. // Number of epochs it is expected to take to completely activate the stake
  1562. // for all the validators.
  1563. let num_expected_epochs = (num_validators_activating_stake as f64)
  1564. .log(1. + NEW_WARMUP_COOLDOWN_RATE)
  1565. .ceil() as u32
  1566. + 1;
  1567. let expected_test_duration = config.poh_config.target_tick_duration
  1568. * ticks_per_slot as u32
  1569. * slots_per_epoch as u32
  1570. * num_expected_epochs;
  1571. // Make the timeout double the expected duration to provide some margin.
  1572. // Especially considering tests may be running in parallel.
  1573. let timeout = expected_test_duration * 2;
  1574. if let Err(err) = client.wait_for_max_stake_below_threshold_with_timeout(
  1575. CommitmentConfig::default(),
  1576. (100 / num_validators_activating_stake) as f32,
  1577. timeout,
  1578. ) {
  1579. panic!("wait_for_max_stake failed: {err:?}");
  1580. }
  1581. assert!(client.get_slot().unwrap() > 10);
  1582. }
  1583. #[test]
  1584. #[serial]
  1585. // Test that when a leader is leader for banks B_i..B_{i+n}, and B_i is not
  1586. // votable, then B_{i+1} still chains to B_i
  1587. fn test_no_voting() {
  1588. solana_logger::setup_with_default(RUST_LOG_FILTER);
  1589. let validator_config = ValidatorConfig {
  1590. voting_disabled: true,
  1591. ..ValidatorConfig::default_for_test()
  1592. };
  1593. let mut config = ClusterConfig {
  1594. node_stakes: vec![DEFAULT_NODE_STAKE],
  1595. validator_configs: vec![validator_config],
  1596. ..ClusterConfig::default()
  1597. };
  1598. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1599. let client = cluster
  1600. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  1601. .unwrap();
  1602. loop {
  1603. let last_slot = client
  1604. .rpc_client()
  1605. .get_slot_with_commitment(CommitmentConfig::processed())
  1606. .expect("Couldn't get slot");
  1607. if last_slot > 4 * VOTE_THRESHOLD_DEPTH as u64 {
  1608. break;
  1609. }
  1610. sleep(Duration::from_secs(1));
  1611. }
  1612. cluster.close_preserve_ledgers();
  1613. let leader_pubkey = *cluster.entry_point_info.pubkey();
  1614. let ledger_path = cluster.validators[&leader_pubkey].info.ledger_path.clone();
  1615. let ledger = Blockstore::open(&ledger_path).unwrap();
  1616. for i in 0..2 * VOTE_THRESHOLD_DEPTH {
  1617. let meta = ledger.meta(i as u64).unwrap().unwrap();
  1618. let parent = meta.parent_slot;
  1619. let expected_parent = i.saturating_sub(1);
  1620. assert_eq!(parent, Some(expected_parent as u64));
  1621. }
  1622. }
  1623. #[test]
  1624. #[serial]
  1625. fn test_optimistic_confirmation_violation_detection() {
  1626. solana_logger::setup_with_default(RUST_LOG_FILTER);
  1627. // First set up the cluster with 2 nodes
  1628. let slots_per_epoch = 2048;
  1629. let node_stakes = vec![50 * DEFAULT_NODE_STAKE, 51 * DEFAULT_NODE_STAKE];
  1630. let validator_keys: Vec<_> = [
  1631. "4qhhXNTbKD1a5vxDDLZcHKj7ELNeiivtUBxn3wUK1F5VRsQVP89VUhfXqSfgiFB14GfuBgtrQ96n9NvWQADVkcCg",
  1632. "3kHBzVwie5vTEaY6nFCPeFT8qDpoXzn7dCEioGRNBTnUDpvwnG85w8Wq63gVWpVTP8k2a8cgcWRjSXyUkEygpXWS",
  1633. ]
  1634. .iter()
  1635. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  1636. .take(node_stakes.len())
  1637. .collect();
  1638. // Do not restart the validator which is the cluster entrypoint because its gossip port
  1639. // might be changed after restart resulting in the two nodes not being able to
  1640. // to form a cluster. The heavier validator is the second node.
  1641. let node_to_restart = validator_keys[1].0.pubkey();
  1642. let mut config = ClusterConfig {
  1643. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  1644. node_stakes: node_stakes.clone(),
  1645. validator_configs: make_identical_validator_configs(
  1646. &ValidatorConfig::default_for_test(),
  1647. node_stakes.len(),
  1648. ),
  1649. validator_keys: Some(validator_keys),
  1650. slots_per_epoch,
  1651. stakers_slot_offset: slots_per_epoch,
  1652. skip_warmup_slots: true,
  1653. ..ClusterConfig::default()
  1654. };
  1655. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1656. // Let the nodes run for a while. Wait for validators to vote on slot `S`
  1657. // so that the vote on `S-1` is definitely in gossip and optimistic confirmation is
  1658. // detected on slot `S-1` for sure, then stop the heavier of the two
  1659. // validators
  1660. let client = cluster
  1661. .build_validator_tpu_quic_client(&node_to_restart)
  1662. .unwrap();
  1663. let start = Instant::now();
  1664. let target_slot = 50;
  1665. let max_wait_time_seconds = 100;
  1666. let mut optimistically_confirmed_slot;
  1667. loop {
  1668. optimistically_confirmed_slot = client
  1669. .rpc_client()
  1670. .get_slot_with_commitment(CommitmentConfig::confirmed())
  1671. .unwrap();
  1672. if optimistically_confirmed_slot > target_slot {
  1673. break;
  1674. }
  1675. if start.elapsed() > Duration::from_secs(max_wait_time_seconds) {
  1676. cluster.exit();
  1677. panic!(
  1678. "Didn't get optimistcally confirmed slot > {target_slot} within \
  1679. {max_wait_time_seconds} seconds"
  1680. );
  1681. }
  1682. sleep(Duration::from_millis(100));
  1683. }
  1684. info!("exiting node");
  1685. drop(client);
  1686. let mut exited_validator_info = cluster.exit_node(&node_to_restart);
  1687. info!("exiting node success");
  1688. // Mark fork as dead on the heavier validator, this should make the fork effectively
  1689. // dead, even though it was optimistically confirmed. The smaller validator should
  1690. // create and jump over to a new fork
  1691. // Also, remove saved tower to intentionally make the restarted validator to violate the
  1692. // optimistic confirmation
  1693. let optimistically_confirmed_slot_parent = {
  1694. let tower = restore_tower(
  1695. &exited_validator_info.info.ledger_path,
  1696. &exited_validator_info.info.keypair.pubkey(),
  1697. )
  1698. .unwrap();
  1699. // Vote must exist since we waited for OC and so this node must have voted
  1700. let last_voted_slot = tower.last_voted_slot().expect("vote must exist");
  1701. let blockstore = open_blockstore(&exited_validator_info.info.ledger_path);
  1702. // The last vote must be descended from the OC slot
  1703. assert!(
  1704. AncestorIterator::new_inclusive(last_voted_slot, &blockstore)
  1705. .contains(&optimistically_confirmed_slot)
  1706. );
  1707. info!(
  1708. "Setting slot: {optimistically_confirmed_slot} on main fork as dead, should cause fork"
  1709. );
  1710. // Necessary otherwise tower will inform this validator that it's latest
  1711. // vote is on slot `optimistically_confirmed_slot`. This will then prevent this validator
  1712. // from resetting to the parent of `optimistically_confirmed_slot` to create an alternative fork because
  1713. // 1) Validator can't vote on earlier ancestor of last vote due to switch threshold (can't vote
  1714. // on ancestors of last vote)
  1715. // 2) Won't reset to this earlier ancestor because reset can only happen on same voted fork if
  1716. // it's for the last vote slot or later
  1717. remove_tower(&exited_validator_info.info.ledger_path, &node_to_restart);
  1718. blockstore
  1719. .set_dead_slot(optimistically_confirmed_slot)
  1720. .unwrap();
  1721. blockstore
  1722. .meta(optimistically_confirmed_slot)
  1723. .unwrap()
  1724. .unwrap()
  1725. .parent_slot
  1726. .unwrap()
  1727. };
  1728. {
  1729. // Buffer stderr to detect optimistic slot violation log
  1730. let buf = std::env::var("OPTIMISTIC_CONF_TEST_DUMP_LOG")
  1731. .err()
  1732. .map(|_| BufferRedirect::stderr().unwrap());
  1733. // In order to prevent the node from voting on a slot it's already voted on
  1734. // which can potentially cause a panic in gossip, start up the validator as a
  1735. // non voter and wait for it to make a new block
  1736. exited_validator_info.config.voting_disabled = true;
  1737. cluster.restart_node(
  1738. &node_to_restart,
  1739. exited_validator_info,
  1740. SocketAddrSpace::Unspecified,
  1741. );
  1742. // Wait for this node to make a fork that doesn't include the `optimistically_confirmed_slot``
  1743. info!(
  1744. "Looking for slot not equal to {optimistically_confirmed_slot} with parent \
  1745. {optimistically_confirmed_slot_parent}"
  1746. );
  1747. let start = Instant::now();
  1748. let new_fork_slot;
  1749. 'outer: loop {
  1750. sleep(Duration::from_millis(1000));
  1751. let ledger_path = cluster.ledger_path(&node_to_restart);
  1752. let blockstore = open_blockstore(&ledger_path);
  1753. let potential_new_forks = blockstore
  1754. .meta(optimistically_confirmed_slot_parent)
  1755. .unwrap()
  1756. .unwrap()
  1757. .next_slots;
  1758. for slot in potential_new_forks {
  1759. // Wait for a fork to be created that does not include the OC slot
  1760. // Now on restart the validator should only vote for this new`slot` which they have
  1761. // never voted on before and thus avoids the panic in gossip
  1762. if slot > optimistically_confirmed_slot && blockstore.is_full(slot) {
  1763. new_fork_slot = slot;
  1764. break 'outer;
  1765. }
  1766. }
  1767. if start.elapsed() > Duration::from_secs(max_wait_time_seconds) {
  1768. cluster.exit();
  1769. panic!("Didn't get new fork within {max_wait_time_seconds} seconds");
  1770. }
  1771. }
  1772. // Exit again, restart with voting enabled
  1773. let mut exited_validator_info = cluster.exit_node(&node_to_restart);
  1774. exited_validator_info.config.voting_disabled = false;
  1775. cluster.restart_node(
  1776. &node_to_restart,
  1777. exited_validator_info,
  1778. SocketAddrSpace::Unspecified,
  1779. );
  1780. // Wait for a root descended from `new_fork_slot` to be set.
  1781. let client = cluster
  1782. .build_validator_tpu_quic_client(&node_to_restart)
  1783. .unwrap();
  1784. info!("looking for root > {optimistically_confirmed_slot} on new fork {new_fork_slot}");
  1785. let start = Instant::now();
  1786. loop {
  1787. info!("Client connecting to: {}", client.rpc_client().url());
  1788. let last_root = client
  1789. .rpc_client()
  1790. .get_slot_with_commitment(CommitmentConfig::finalized())
  1791. .unwrap();
  1792. if last_root > new_fork_slot {
  1793. info!("Found root: {last_root} > {new_fork_slot}");
  1794. let ledger_path = cluster.ledger_path(&node_to_restart);
  1795. let blockstore = open_blockstore(&ledger_path);
  1796. if AncestorIterator::new_inclusive(last_root, &blockstore).contains(&new_fork_slot)
  1797. {
  1798. break;
  1799. }
  1800. }
  1801. if start.elapsed() > Duration::from_secs(max_wait_time_seconds) {
  1802. cluster.exit();
  1803. panic!("Didn't get root on new fork within {max_wait_time_seconds} seconds");
  1804. }
  1805. sleep(Duration::from_millis(100));
  1806. }
  1807. // Check to see that validator detected optimistic confirmation for
  1808. // `last_voted_slot` failed
  1809. let expected_log =
  1810. OptimisticConfirmationVerifier::format_optimistic_confirmed_slot_violation_log(
  1811. optimistically_confirmed_slot,
  1812. );
  1813. // Violation detection thread can be behind so poll logs up to 10 seconds
  1814. if let Some(mut buf) = buf {
  1815. let start = Instant::now();
  1816. let mut success = false;
  1817. let mut output = String::new();
  1818. while start.elapsed().as_secs() < 10 {
  1819. buf.read_to_string(&mut output).unwrap();
  1820. if output.contains(&expected_log) {
  1821. success = true;
  1822. break;
  1823. }
  1824. sleep(Duration::from_millis(10));
  1825. }
  1826. print!("{output}");
  1827. assert!(success);
  1828. } else {
  1829. panic!("dumped log and disabled testing");
  1830. }
  1831. }
  1832. // Make sure validator still makes progress
  1833. cluster_tests::check_for_new_roots(
  1834. 16,
  1835. &[cluster.get_contact_info(&node_to_restart).unwrap().clone()],
  1836. &cluster.connection_cache,
  1837. "test_optimistic_confirmation_violation",
  1838. );
  1839. }
  1840. #[test]
  1841. #[serial]
  1842. fn test_validator_saves_tower() {
  1843. solana_logger::setup_with_default(RUST_LOG_FILTER);
  1844. let validator_config = ValidatorConfig {
  1845. require_tower: true,
  1846. ..ValidatorConfig::default_for_test()
  1847. };
  1848. let validator_identity_keypair = Arc::new(Keypair::new());
  1849. let validator_id = validator_identity_keypair.pubkey();
  1850. let mut config = ClusterConfig {
  1851. node_stakes: vec![DEFAULT_NODE_STAKE],
  1852. validator_configs: vec![validator_config],
  1853. validator_keys: Some(vec![(validator_identity_keypair.clone(), true)]),
  1854. ..ClusterConfig::default()
  1855. };
  1856. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  1857. let validator_client = cluster
  1858. .build_validator_tpu_quic_client(&validator_id)
  1859. .unwrap();
  1860. let ledger_path = cluster
  1861. .validators
  1862. .get(&validator_id)
  1863. .unwrap()
  1864. .info
  1865. .ledger_path
  1866. .clone();
  1867. let file_tower_storage = FileTowerStorage::new(ledger_path.clone());
  1868. // Wait for some votes to be generated
  1869. loop {
  1870. if let Ok(slot) = validator_client
  1871. .rpc_client()
  1872. .get_slot_with_commitment(CommitmentConfig::processed())
  1873. {
  1874. trace!("current slot: {slot}");
  1875. if slot > 2 {
  1876. break;
  1877. }
  1878. }
  1879. sleep(Duration::from_millis(10));
  1880. }
  1881. // Stop validator and check saved tower
  1882. let validator_info = cluster.exit_node(&validator_id);
  1883. let tower1 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1884. trace!("tower1: {tower1:?}");
  1885. assert_eq!(tower1.root(), 0);
  1886. assert!(tower1.last_voted_slot().is_some());
  1887. // Restart the validator and wait for a new root
  1888. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1889. let validator_client = cluster
  1890. .build_validator_tpu_quic_client(&validator_id)
  1891. .unwrap();
  1892. // Wait for the first new root
  1893. let last_replayed_root = loop {
  1894. if let Ok(root) = validator_client
  1895. .rpc_client()
  1896. .get_slot_with_commitment(CommitmentConfig::finalized())
  1897. {
  1898. trace!("current root: {root}");
  1899. if root > 0 {
  1900. break root;
  1901. }
  1902. }
  1903. sleep(Duration::from_millis(50));
  1904. };
  1905. // Stop validator, and check saved tower
  1906. let validator_info = cluster.exit_node(&validator_id);
  1907. let tower2 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1908. trace!("tower2: {tower2:?}");
  1909. assert_eq!(tower2.root(), last_replayed_root);
  1910. // Rollback saved tower to `tower1` to simulate a validator starting from a newer snapshot
  1911. // without having to wait for that snapshot to be generated in this test
  1912. tower1
  1913. .save(&file_tower_storage, &validator_identity_keypair)
  1914. .unwrap();
  1915. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1916. let validator_client = cluster
  1917. .build_validator_tpu_quic_client(&validator_id)
  1918. .unwrap();
  1919. // Wait for a new root, demonstrating the validator was able to make progress from the older `tower1`
  1920. let new_root = loop {
  1921. if let Ok(root) = validator_client
  1922. .rpc_client()
  1923. .get_slot_with_commitment(CommitmentConfig::finalized())
  1924. {
  1925. trace!("current root: {root}, last_replayed_root: {last_replayed_root}");
  1926. if root > last_replayed_root {
  1927. break root;
  1928. }
  1929. }
  1930. sleep(Duration::from_millis(50));
  1931. };
  1932. // Check the new root is reflected in the saved tower state
  1933. let mut validator_info = cluster.exit_node(&validator_id);
  1934. let tower3 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1935. trace!("tower3: {tower3:?}");
  1936. let tower3_root = tower3.root();
  1937. assert!(tower3_root >= new_root);
  1938. // Remove the tower file entirely and allow the validator to start without a tower. It will
  1939. // rebuild tower from its vote account contents
  1940. remove_tower(&ledger_path, &validator_id);
  1941. validator_info.config.require_tower = false;
  1942. cluster.restart_node(&validator_id, validator_info, SocketAddrSpace::Unspecified);
  1943. let validator_client = cluster
  1944. .build_validator_tpu_quic_client(&validator_id)
  1945. .unwrap();
  1946. // Wait for another new root
  1947. let new_root = loop {
  1948. if let Ok(root) = validator_client
  1949. .rpc_client()
  1950. .get_slot_with_commitment(CommitmentConfig::finalized())
  1951. {
  1952. trace!("current root: {root}, last tower root: {tower3_root}");
  1953. if root > tower3_root {
  1954. break root;
  1955. }
  1956. }
  1957. sleep(Duration::from_millis(50));
  1958. };
  1959. cluster.close_preserve_ledgers();
  1960. let tower4 = Tower::restore(&file_tower_storage, &validator_id).unwrap();
  1961. trace!("tower4: {tower4:?}");
  1962. assert!(tower4.root() >= new_root);
  1963. }
  1964. fn root_in_tower(tower_path: &Path, node_pubkey: &Pubkey) -> Option<Slot> {
  1965. restore_tower(tower_path, node_pubkey).map(|tower| tower.root())
  1966. }
  1967. enum ClusterMode {
  1968. MasterOnly,
  1969. MasterSlave,
  1970. }
  1971. fn do_test_future_tower(cluster_mode: ClusterMode) {
  1972. solana_logger::setup_with_default(RUST_LOG_FILTER);
  1973. // First set up the cluster with 4 nodes
  1974. let slots_per_epoch = 2048;
  1975. let node_stakes = match cluster_mode {
  1976. ClusterMode::MasterOnly => vec![DEFAULT_NODE_STAKE],
  1977. ClusterMode::MasterSlave => vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE],
  1978. };
  1979. let validator_keys = [
  1980. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  1981. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  1982. ]
  1983. .iter()
  1984. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  1985. .take(node_stakes.len())
  1986. .collect::<Vec<_>>();
  1987. let validators = validator_keys
  1988. .iter()
  1989. .map(|(kp, _)| kp.pubkey())
  1990. .collect::<Vec<_>>();
  1991. let validator_a_pubkey = match cluster_mode {
  1992. ClusterMode::MasterOnly => validators[0],
  1993. ClusterMode::MasterSlave => validators[1],
  1994. };
  1995. let mut config = ClusterConfig {
  1996. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  1997. node_stakes: node_stakes.clone(),
  1998. validator_configs: make_identical_validator_configs(
  1999. &ValidatorConfig::default_for_test(),
  2000. node_stakes.len(),
  2001. ),
  2002. validator_keys: Some(validator_keys),
  2003. slots_per_epoch,
  2004. stakers_slot_offset: slots_per_epoch,
  2005. skip_warmup_slots: true,
  2006. ..ClusterConfig::default()
  2007. };
  2008. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2009. let val_a_ledger_path = cluster.ledger_path(&validator_a_pubkey);
  2010. loop {
  2011. sleep(Duration::from_millis(100));
  2012. if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  2013. if root >= 15 {
  2014. break;
  2015. }
  2016. }
  2017. }
  2018. let purged_slot_before_restart = 10;
  2019. let validator_a_info = cluster.exit_node(&validator_a_pubkey);
  2020. {
  2021. // create a warped future tower without mangling the tower itself
  2022. info!(
  2023. "Revert blockstore before slot {purged_slot_before_restart} and effectively create a \
  2024. future tower",
  2025. );
  2026. let blockstore = open_blockstore(&val_a_ledger_path);
  2027. purge_slots_with_count(&blockstore, purged_slot_before_restart, 100);
  2028. }
  2029. cluster.restart_node(
  2030. &validator_a_pubkey,
  2031. validator_a_info,
  2032. SocketAddrSpace::Unspecified,
  2033. );
  2034. let mut newly_rooted = false;
  2035. let some_root_after_restart = purged_slot_before_restart + 25; // 25 is arbitrary; just wait a bit
  2036. for _ in 0..600 {
  2037. sleep(Duration::from_millis(100));
  2038. if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  2039. if root >= some_root_after_restart {
  2040. newly_rooted = true;
  2041. break;
  2042. }
  2043. }
  2044. }
  2045. let _validator_a_info = cluster.exit_node(&validator_a_pubkey);
  2046. if newly_rooted {
  2047. // there should be no forks; i.e. monotonically increasing ancestor chain
  2048. let (last_vote, _) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  2049. let blockstore = open_blockstore(&val_a_ledger_path);
  2050. let actual_block_ancestors = AncestorIterator::new_inclusive(last_vote, &blockstore)
  2051. .take_while(|a| *a >= some_root_after_restart)
  2052. .collect::<Vec<_>>();
  2053. let expected_countinuous_no_fork_votes = (some_root_after_restart..=last_vote)
  2054. .rev()
  2055. .collect::<Vec<_>>();
  2056. assert_eq!(actual_block_ancestors, expected_countinuous_no_fork_votes);
  2057. assert!(actual_block_ancestors.len() > MAX_LOCKOUT_HISTORY);
  2058. info!("validator managed to handle future tower!");
  2059. } else {
  2060. panic!("no root detected");
  2061. }
  2062. }
  2063. #[test]
  2064. #[serial]
  2065. fn test_future_tower_master_only() {
  2066. do_test_future_tower(ClusterMode::MasterOnly);
  2067. }
  2068. #[test]
  2069. #[serial]
  2070. fn test_future_tower_master_slave() {
  2071. do_test_future_tower(ClusterMode::MasterSlave);
  2072. }
  2073. fn restart_whole_cluster_after_hard_fork(
  2074. cluster: &Arc<Mutex<LocalCluster>>,
  2075. validator_a_pubkey: Pubkey,
  2076. validator_b_pubkey: Pubkey,
  2077. mut validator_a_info: ClusterValidatorInfo,
  2078. validator_b_info: ClusterValidatorInfo,
  2079. ) {
  2080. // restart validator A first
  2081. let cluster_for_a = cluster.clone();
  2082. let val_a_ledger_path = validator_a_info.info.ledger_path.clone();
  2083. // Spawn a thread because wait_for_supermajority blocks in Validator::new()!
  2084. let thread = std::thread::spawn(move || {
  2085. let restart_context = cluster_for_a
  2086. .lock()
  2087. .unwrap()
  2088. .create_restart_context(&validator_a_pubkey, &mut validator_a_info);
  2089. let restarted_validator_info = LocalCluster::restart_node_with_context(
  2090. validator_a_info,
  2091. restart_context,
  2092. SocketAddrSpace::Unspecified,
  2093. );
  2094. cluster_for_a
  2095. .lock()
  2096. .unwrap()
  2097. .add_node(&validator_a_pubkey, restarted_validator_info);
  2098. });
  2099. // test validator A actually to wait for supermajority
  2100. let mut last_vote = None;
  2101. for _ in 0..10 {
  2102. sleep(Duration::from_millis(1000));
  2103. let (new_last_vote, _) =
  2104. last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  2105. if let Some(last_vote) = last_vote {
  2106. assert_eq!(last_vote, new_last_vote);
  2107. } else {
  2108. last_vote = Some(new_last_vote);
  2109. }
  2110. }
  2111. // restart validator B normally
  2112. cluster.lock().unwrap().restart_node(
  2113. &validator_b_pubkey,
  2114. validator_b_info,
  2115. SocketAddrSpace::Unspecified,
  2116. );
  2117. // validator A should now start so join its thread here
  2118. thread.join().unwrap();
  2119. }
  2120. #[test]
  2121. #[serial]
  2122. fn test_hard_fork_invalidates_tower() {
  2123. solana_logger::setup_with_default(RUST_LOG_FILTER);
  2124. // First set up the cluster with 2 nodes
  2125. let slots_per_epoch = 2048;
  2126. let node_stakes = vec![60 * DEFAULT_NODE_STAKE, 40 * DEFAULT_NODE_STAKE];
  2127. let validator_keys = [
  2128. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2129. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2130. ]
  2131. .iter()
  2132. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2133. .take(node_stakes.len())
  2134. .collect::<Vec<_>>();
  2135. let validators = validator_keys
  2136. .iter()
  2137. .map(|(kp, _)| kp.pubkey())
  2138. .collect::<Vec<_>>();
  2139. let validator_a_pubkey = validators[0];
  2140. let validator_b_pubkey = validators[1];
  2141. let mut config = ClusterConfig {
  2142. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  2143. node_stakes: node_stakes.clone(),
  2144. validator_configs: make_identical_validator_configs(
  2145. &ValidatorConfig::default_for_test(),
  2146. node_stakes.len(),
  2147. ),
  2148. validator_keys: Some(validator_keys),
  2149. slots_per_epoch,
  2150. stakers_slot_offset: slots_per_epoch,
  2151. skip_warmup_slots: true,
  2152. ..ClusterConfig::default()
  2153. };
  2154. let cluster = std::sync::Arc::new(std::sync::Mutex::new(LocalCluster::new(
  2155. &mut config,
  2156. SocketAddrSpace::Unspecified,
  2157. )));
  2158. let val_a_ledger_path = cluster.lock().unwrap().ledger_path(&validator_a_pubkey);
  2159. let min_root = 15;
  2160. loop {
  2161. sleep(Duration::from_millis(100));
  2162. if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  2163. if root >= min_root {
  2164. break;
  2165. }
  2166. }
  2167. }
  2168. let mut validator_a_info = cluster.lock().unwrap().exit_node(&validator_a_pubkey);
  2169. let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey);
  2170. // setup hard fork at slot < a previously rooted slot!
  2171. // hard fork earlier than root is very unrealistic in the wild, but it's handy for
  2172. // persistent tower's lockout behavior...
  2173. let hard_fork_slot = min_root - 5;
  2174. let hard_fork_slots = Some(vec![hard_fork_slot]);
  2175. let mut hard_forks = solana_hard_forks::HardForks::default();
  2176. hard_forks.register(hard_fork_slot);
  2177. let expected_shred_version = solana_shred_version::compute_shred_version(
  2178. &cluster.lock().unwrap().genesis_config.hash(),
  2179. Some(&hard_forks),
  2180. );
  2181. cluster
  2182. .lock()
  2183. .unwrap()
  2184. .set_shred_version(expected_shred_version);
  2185. validator_a_info
  2186. .config
  2187. .new_hard_forks
  2188. .clone_from(&hard_fork_slots);
  2189. validator_a_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2190. validator_a_info.config.expected_shred_version = Some(expected_shred_version);
  2191. validator_b_info.config.new_hard_forks = hard_fork_slots;
  2192. validator_b_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2193. validator_b_info.config.expected_shred_version = Some(expected_shred_version);
  2194. // Clear ledger of all slots post hard fork
  2195. {
  2196. let blockstore_a = open_blockstore(&validator_a_info.info.ledger_path);
  2197. let blockstore_b = open_blockstore(&validator_b_info.info.ledger_path);
  2198. purge_slots_with_count(&blockstore_a, hard_fork_slot + 1, 100);
  2199. purge_slots_with_count(&blockstore_b, hard_fork_slot + 1, 100);
  2200. }
  2201. restart_whole_cluster_after_hard_fork(
  2202. &cluster,
  2203. validator_a_pubkey,
  2204. validator_b_pubkey,
  2205. validator_a_info,
  2206. validator_b_info,
  2207. );
  2208. // new slots should be rooted after hard-fork cluster relaunch
  2209. cluster
  2210. .lock()
  2211. .unwrap()
  2212. .check_for_new_roots(16, "hard fork", SocketAddrSpace::Unspecified);
  2213. }
  2214. #[test]
  2215. #[serial]
  2216. fn test_run_test_load_program_accounts_root() {
  2217. run_test_load_program_accounts(CommitmentConfig::finalized());
  2218. }
  2219. fn create_simple_snapshot_config(ledger_path: &Path) -> SnapshotConfig {
  2220. SnapshotConfig {
  2221. full_snapshot_archives_dir: ledger_path.to_path_buf(),
  2222. bank_snapshots_dir: ledger_path.join("snapshot"),
  2223. ..SnapshotConfig::default()
  2224. }
  2225. }
  2226. fn create_snapshot_to_hard_fork(
  2227. blockstore: &Blockstore,
  2228. snapshot_slot: Slot,
  2229. hard_forks: Vec<Slot>,
  2230. ) {
  2231. let process_options = ProcessOptions {
  2232. halt_at_slot: Some(snapshot_slot),
  2233. new_hard_forks: Some(hard_forks),
  2234. run_verification: false,
  2235. ..ProcessOptions::default()
  2236. };
  2237. let ledger_path = blockstore.ledger_path();
  2238. let genesis_config = open_genesis_config(ledger_path, u64::MAX).unwrap();
  2239. let snapshot_config = create_simple_snapshot_config(ledger_path);
  2240. let (bank_forks, ..) = bank_forks_utils::load(
  2241. &genesis_config,
  2242. blockstore,
  2243. vec![
  2244. create_accounts_run_and_snapshot_dirs(ledger_path.join("accounts"))
  2245. .unwrap()
  2246. .0,
  2247. ],
  2248. &snapshot_config,
  2249. process_options,
  2250. None,
  2251. None,
  2252. None,
  2253. Arc::default(),
  2254. )
  2255. .unwrap();
  2256. let bank = bank_forks.read().unwrap().get(snapshot_slot).unwrap();
  2257. let full_snapshot_archive_info = snapshot_bank_utils::bank_to_full_snapshot_archive(
  2258. ledger_path,
  2259. &bank,
  2260. Some(snapshot_config.snapshot_version),
  2261. ledger_path,
  2262. ledger_path,
  2263. snapshot_config.archive_format,
  2264. )
  2265. .unwrap();
  2266. info!(
  2267. "Successfully created snapshot for slot {}, hash {}: {}",
  2268. bank.slot(),
  2269. bank.hash(),
  2270. full_snapshot_archive_info.path().display(),
  2271. );
  2272. }
  2273. #[test]
  2274. #[ignore]
  2275. #[serial]
  2276. fn test_hard_fork_with_gap_in_roots() {
  2277. solana_logger::setup_with_default(RUST_LOG_FILTER);
  2278. // First set up the cluster with 2 nodes
  2279. let slots_per_epoch = 2048;
  2280. let node_stakes = vec![60, 40];
  2281. let validator_keys = [
  2282. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2283. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2284. ]
  2285. .iter()
  2286. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2287. .take(node_stakes.len())
  2288. .collect::<Vec<_>>();
  2289. let validators = validator_keys
  2290. .iter()
  2291. .map(|(kp, _)| kp.pubkey())
  2292. .collect::<Vec<_>>();
  2293. let validator_a_pubkey = validators[0];
  2294. let validator_b_pubkey = validators[1];
  2295. let validator_config = ValidatorConfig {
  2296. snapshot_config: LocalCluster::create_dummy_load_only_snapshot_config(),
  2297. ..ValidatorConfig::default_for_test()
  2298. };
  2299. let mut config = ClusterConfig {
  2300. mint_lamports: 100_000,
  2301. node_stakes: node_stakes.clone(),
  2302. validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()),
  2303. validator_keys: Some(validator_keys),
  2304. slots_per_epoch,
  2305. stakers_slot_offset: slots_per_epoch,
  2306. skip_warmup_slots: true,
  2307. ..ClusterConfig::default()
  2308. };
  2309. let cluster = std::sync::Arc::new(std::sync::Mutex::new(LocalCluster::new(
  2310. &mut config,
  2311. SocketAddrSpace::Unspecified,
  2312. )));
  2313. let val_a_ledger_path = cluster.lock().unwrap().ledger_path(&validator_a_pubkey);
  2314. let val_b_ledger_path = cluster.lock().unwrap().ledger_path(&validator_b_pubkey);
  2315. let min_last_vote = 45;
  2316. let min_root = 10;
  2317. loop {
  2318. sleep(Duration::from_millis(100));
  2319. if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  2320. if last_vote >= min_last_vote
  2321. && root_in_tower(&val_a_ledger_path, &validator_a_pubkey) > Some(min_root)
  2322. {
  2323. break;
  2324. }
  2325. }
  2326. }
  2327. // stop all nodes of the cluster
  2328. let mut validator_a_info = cluster.lock().unwrap().exit_node(&validator_a_pubkey);
  2329. let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey);
  2330. // hard fork slot is effectively a (possibly skipping) new root.
  2331. // assert that the precondition of validator a to test gap between
  2332. // blockstore and hard fork...
  2333. let hard_fork_slot = min_last_vote - 5;
  2334. assert!(hard_fork_slot > root_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap());
  2335. let hard_fork_slots = Some(vec![hard_fork_slot]);
  2336. let mut hard_forks = HardForks::default();
  2337. hard_forks.register(hard_fork_slot);
  2338. let expected_shred_version = solana_shred_version::compute_shred_version(
  2339. &cluster.lock().unwrap().genesis_config.hash(),
  2340. Some(&hard_forks),
  2341. );
  2342. // create hard-forked snapshot only for validator a, emulating the manual cluster restart
  2343. // procedure with `agave-ledger-tool create-snapshot`
  2344. let genesis_slot = 0;
  2345. {
  2346. let blockstore_a = Blockstore::open(&val_a_ledger_path).unwrap();
  2347. create_snapshot_to_hard_fork(&blockstore_a, hard_fork_slot, vec![hard_fork_slot]);
  2348. // Intentionally make agave-validator unbootable by replaying blocks from the genesis to
  2349. // ensure the hard-forked snapshot is used always. Otherwise, we couldn't create a gap
  2350. // in the ledger roots column family reliably.
  2351. // There was a bug which caused the hard-forked snapshot at an unrooted slot to forget
  2352. // to root some slots (thus, creating a gap in roots, which shouldn't happen).
  2353. purge_slots_with_count(&blockstore_a, genesis_slot, 1);
  2354. let next_slot = genesis_slot + 1;
  2355. let mut meta = blockstore_a.meta(next_slot).unwrap().unwrap();
  2356. meta.unset_parent();
  2357. blockstore_a.put_meta(next_slot, &meta).unwrap();
  2358. }
  2359. // strictly speaking, new_hard_forks isn't needed for validator a.
  2360. // but when snapshot loading isn't working, you might see:
  2361. // shred version mismatch: expected NNNN found: MMMM
  2362. //validator_a_info.config.new_hard_forks = hard_fork_slots.clone();
  2363. // effectively pass the --hard-fork parameter to validator b
  2364. validator_b_info.config.new_hard_forks = hard_fork_slots;
  2365. validator_a_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2366. validator_a_info.config.expected_shred_version = Some(expected_shred_version);
  2367. validator_b_info.config.wait_for_supermajority = Some(hard_fork_slot);
  2368. validator_b_info.config.expected_shred_version = Some(expected_shred_version);
  2369. restart_whole_cluster_after_hard_fork(
  2370. &cluster,
  2371. validator_a_pubkey,
  2372. validator_b_pubkey,
  2373. validator_a_info,
  2374. validator_b_info,
  2375. );
  2376. // new slots should be rooted after hard-fork cluster relaunch
  2377. cluster
  2378. .lock()
  2379. .unwrap()
  2380. .check_for_new_roots(16, "hard fork", SocketAddrSpace::Unspecified);
  2381. // drop everything to open blockstores below
  2382. drop(cluster);
  2383. let (common_last_vote, common_root) = {
  2384. let (last_vote_a, _) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  2385. let (last_vote_b, _) = last_vote_in_tower(&val_b_ledger_path, &validator_b_pubkey).unwrap();
  2386. let root_a = root_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
  2387. let root_b = root_in_tower(&val_b_ledger_path, &validator_b_pubkey).unwrap();
  2388. (last_vote_a.min(last_vote_b), root_a.min(root_b))
  2389. };
  2390. let blockstore_a = Blockstore::open(&val_a_ledger_path).unwrap();
  2391. let blockstore_b = Blockstore::open(&val_b_ledger_path).unwrap();
  2392. // collect all slot/root parents
  2393. let mut slots_a = AncestorIterator::new(common_last_vote, &blockstore_a).collect::<Vec<_>>();
  2394. let mut roots_a = blockstore_a
  2395. .reversed_rooted_slot_iterator(common_root)
  2396. .unwrap()
  2397. .collect::<Vec<_>>();
  2398. // artificially restore the forcibly purged genesis only for the validator A just for the sake of
  2399. // the final assertions.
  2400. slots_a.push(genesis_slot);
  2401. roots_a.push(genesis_slot);
  2402. let slots_b = AncestorIterator::new(common_last_vote, &blockstore_b).collect::<Vec<_>>();
  2403. let roots_b = blockstore_b
  2404. .reversed_rooted_slot_iterator(common_root)
  2405. .unwrap()
  2406. .collect::<Vec<_>>();
  2407. // compare them all!
  2408. assert_eq!((&slots_a, &roots_a), (&slots_b, &roots_b));
  2409. assert_eq!(&slots_a[slots_a.len() - roots_a.len()..].to_vec(), &roots_a);
  2410. assert_eq!(&slots_b[slots_b.len() - roots_b.len()..].to_vec(), &roots_b);
  2411. }
  2412. #[test]
  2413. #[serial]
  2414. fn test_restart_tower_rollback() {
  2415. // Test node crashing and failing to save its tower before restart
  2416. // Cluster continues to make progress, this node is able to rejoin with
  2417. // outdated tower post restart.
  2418. solana_logger::setup_with_default(RUST_LOG_FILTER);
  2419. // First set up the cluster with 2 nodes
  2420. let slots_per_epoch = 2048;
  2421. let node_stakes = vec![DEFAULT_NODE_STAKE * 100, DEFAULT_NODE_STAKE];
  2422. let validator_strings = [
  2423. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2424. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2425. ];
  2426. let validator_keys = validator_strings
  2427. .iter()
  2428. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2429. .take(node_stakes.len())
  2430. .collect::<Vec<_>>();
  2431. let b_pubkey = validator_keys[1].0.pubkey();
  2432. let mut config = ClusterConfig {
  2433. mint_lamports: DEFAULT_MINT_LAMPORTS + DEFAULT_NODE_STAKE * 100,
  2434. node_stakes: node_stakes.clone(),
  2435. validator_configs: make_identical_validator_configs(
  2436. &ValidatorConfig::default_for_test(),
  2437. node_stakes.len(),
  2438. ),
  2439. validator_keys: Some(validator_keys),
  2440. slots_per_epoch,
  2441. stakers_slot_offset: slots_per_epoch,
  2442. skip_warmup_slots: true,
  2443. ..ClusterConfig::default()
  2444. };
  2445. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2446. let val_b_ledger_path = cluster.ledger_path(&b_pubkey);
  2447. let mut earlier_tower: Tower;
  2448. loop {
  2449. sleep(Duration::from_millis(1000));
  2450. // Grab the current saved tower
  2451. earlier_tower = restore_tower(&val_b_ledger_path, &b_pubkey).unwrap();
  2452. if earlier_tower.last_voted_slot().unwrap_or(0) > 1 {
  2453. break;
  2454. }
  2455. }
  2456. let mut exited_validator_info: ClusterValidatorInfo;
  2457. let last_voted_slot: Slot;
  2458. loop {
  2459. sleep(Duration::from_millis(1000));
  2460. // Wait for second, lesser staked validator to make a root past the earlier_tower's
  2461. // latest vote slot, then exit that validator
  2462. let tower = restore_tower(&val_b_ledger_path, &b_pubkey).unwrap();
  2463. if tower.root()
  2464. > earlier_tower
  2465. .last_voted_slot()
  2466. .expect("Earlier tower must have at least one vote")
  2467. {
  2468. exited_validator_info = cluster.exit_node(&b_pubkey);
  2469. last_voted_slot = tower.last_voted_slot().unwrap();
  2470. break;
  2471. }
  2472. }
  2473. // Now rewrite the tower with the *earlier_tower*. We disable voting until we reach
  2474. // a slot we did not previously vote for in order to avoid duplicate vote slashing
  2475. // issues.
  2476. save_tower(
  2477. &val_b_ledger_path,
  2478. &earlier_tower,
  2479. &exited_validator_info.info.keypair,
  2480. );
  2481. exited_validator_info.config.wait_to_vote_slot = Some(last_voted_slot + 10);
  2482. cluster.restart_node(
  2483. &b_pubkey,
  2484. exited_validator_info,
  2485. SocketAddrSpace::Unspecified,
  2486. );
  2487. // Check this node is making new roots
  2488. cluster.check_for_new_roots(
  2489. 20,
  2490. "test_restart_tower_rollback",
  2491. SocketAddrSpace::Unspecified,
  2492. );
  2493. }
  2494. #[test]
  2495. #[serial]
  2496. fn test_run_test_load_program_accounts_partition_root() {
  2497. run_test_load_program_accounts_partition(CommitmentConfig::finalized(), false);
  2498. }
  2499. #[test]
  2500. #[serial]
  2501. fn test_alpenglow_run_test_load_program_accounts_partition_root() {
  2502. run_test_load_program_accounts_partition(CommitmentConfig::finalized(), true);
  2503. }
  2504. fn run_test_load_program_accounts_partition(scan_commitment: CommitmentConfig, is_alpenglow: bool) {
  2505. let num_slots_per_validator = 8;
  2506. let partitions: [usize; 2] = [1, 1];
  2507. let (leader_schedule, validator_keys) = create_custom_leader_schedule_with_random_keys(&[
  2508. num_slots_per_validator,
  2509. num_slots_per_validator,
  2510. ]);
  2511. let (update_client_sender, update_client_receiver) = unbounded();
  2512. let (scan_client_sender, scan_client_receiver) = unbounded();
  2513. let exit = Arc::new(AtomicBool::new(false));
  2514. let (t_update, t_scan, additional_accounts) = setup_transfer_scan_threads(
  2515. 1000,
  2516. exit.clone(),
  2517. scan_commitment,
  2518. update_client_receiver,
  2519. scan_client_receiver,
  2520. );
  2521. let on_partition_start = |cluster: &mut LocalCluster, _: &mut ()| {
  2522. let update_client = cluster
  2523. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2524. .unwrap();
  2525. update_client_sender.send(update_client).unwrap();
  2526. let scan_client = cluster
  2527. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2528. .unwrap();
  2529. scan_client_sender.send(scan_client).unwrap();
  2530. };
  2531. let on_partition_before_resolved = |_: &mut LocalCluster, _: &mut ()| {};
  2532. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  2533. cluster.check_for_new_roots(
  2534. 16,
  2535. "run_test_load_program_accounts_partition",
  2536. SocketAddrSpace::Unspecified,
  2537. );
  2538. exit.store(true, Ordering::Relaxed);
  2539. t_update.join().unwrap();
  2540. t_scan.join().unwrap();
  2541. };
  2542. run_cluster_partition(
  2543. &partitions,
  2544. Some((leader_schedule, validator_keys)),
  2545. (),
  2546. on_partition_start,
  2547. on_partition_before_resolved,
  2548. on_partition_resolved,
  2549. None,
  2550. additional_accounts,
  2551. is_alpenglow,
  2552. );
  2553. }
  2554. #[test]
  2555. #[serial]
  2556. fn test_rpc_block_subscribe() {
  2557. let leader_stake = 100 * DEFAULT_NODE_STAKE;
  2558. let rpc_stake = DEFAULT_NODE_STAKE;
  2559. let total_stake = leader_stake + rpc_stake;
  2560. let node_stakes = vec![leader_stake, rpc_stake];
  2561. let mut validator_config = ValidatorConfig::default_for_test();
  2562. validator_config.enable_default_rpc_block_subscribe();
  2563. let validator_keys = [
  2564. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2565. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2566. ]
  2567. .iter()
  2568. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2569. .take(node_stakes.len())
  2570. .collect::<Vec<_>>();
  2571. let rpc_node_pubkey = &validator_keys[1].0.pubkey();
  2572. let mut config = ClusterConfig {
  2573. mint_lamports: total_stake,
  2574. node_stakes,
  2575. validator_configs: make_identical_validator_configs(&validator_config, 2),
  2576. validator_keys: Some(validator_keys),
  2577. skip_warmup_slots: true,
  2578. ..ClusterConfig::default()
  2579. };
  2580. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2581. let rpc_node_contact_info = cluster.get_contact_info(rpc_node_pubkey).unwrap();
  2582. let (mut block_subscribe_client, receiver) = PubsubClient::block_subscribe(
  2583. &format!(
  2584. "ws://{}",
  2585. // It is important that we subscribe to a non leader node as there
  2586. // is a race condition which can cause leader nodes to not send
  2587. // BlockUpdate notifications properly. See https://github.com/solana-labs/solana/pull/34421
  2588. &rpc_node_contact_info.rpc_pubsub().unwrap().to_string()
  2589. ),
  2590. RpcBlockSubscribeFilter::All,
  2591. Some(RpcBlockSubscribeConfig {
  2592. commitment: Some(CommitmentConfig::confirmed()),
  2593. encoding: None,
  2594. transaction_details: None,
  2595. show_rewards: None,
  2596. max_supported_transaction_version: None,
  2597. }),
  2598. )
  2599. .unwrap();
  2600. let mut received_block = false;
  2601. let max_wait = 10_000;
  2602. let start = Instant::now();
  2603. while !received_block {
  2604. assert!(
  2605. start.elapsed() <= Duration::from_millis(max_wait),
  2606. "Went too long {max_wait} ms without receiving a confirmed block",
  2607. );
  2608. let responses: Vec<_> = receiver.try_iter().collect();
  2609. // Wait for a response
  2610. if !responses.is_empty() {
  2611. for response in responses {
  2612. assert!(response.value.err.is_none());
  2613. assert!(response.value.block.is_some());
  2614. if response.value.slot > 1 {
  2615. received_block = true;
  2616. }
  2617. }
  2618. }
  2619. sleep(Duration::from_millis(100));
  2620. }
  2621. // If we don't drop the cluster, the blocking web socket service
  2622. // won't return, and the `block_subscribe_client` won't shut down
  2623. drop(cluster);
  2624. block_subscribe_client.shutdown().unwrap();
  2625. }
  2626. #[test]
  2627. #[serial]
  2628. #[allow(unused_attributes)]
  2629. fn test_oc_bad_signatures() {
  2630. solana_logger::setup_with_default(RUST_LOG_FILTER);
  2631. let total_stake = 100 * DEFAULT_NODE_STAKE;
  2632. let leader_stake = (total_stake as f64 * VOTE_THRESHOLD_SIZE) as u64;
  2633. let our_node_stake = total_stake - leader_stake;
  2634. let node_stakes = vec![leader_stake, our_node_stake];
  2635. let mut validator_config = ValidatorConfig {
  2636. require_tower: true,
  2637. ..ValidatorConfig::default_for_test()
  2638. };
  2639. validator_config.enable_default_rpc_block_subscribe();
  2640. let validator_keys = [
  2641. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  2642. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  2643. ]
  2644. .iter()
  2645. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2646. .take(node_stakes.len())
  2647. .collect::<Vec<_>>();
  2648. let our_id = validator_keys.last().unwrap().0.pubkey();
  2649. let mut config = ClusterConfig {
  2650. mint_lamports: total_stake,
  2651. node_stakes,
  2652. validator_configs: make_identical_validator_configs(&validator_config, 2),
  2653. validator_keys: Some(validator_keys),
  2654. skip_warmup_slots: true,
  2655. ..ClusterConfig::default()
  2656. };
  2657. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  2658. // 2) Kill our node and start up a thread to simulate votes to control our voting behavior
  2659. let our_info = cluster.exit_node(&our_id);
  2660. let node_keypair = our_info.info.keypair;
  2661. let vote_keypair = our_info.info.voting_keypair;
  2662. info!(
  2663. "our node id: {}, vote id: {}",
  2664. node_keypair.pubkey(),
  2665. vote_keypair.pubkey()
  2666. );
  2667. // 3) Start up a spy to listen for and push votes to leader TPU
  2668. let client = cluster
  2669. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  2670. .unwrap();
  2671. let cluster_funding_keypair = cluster.funding_keypair.insecure_clone();
  2672. let voter_thread_sleep_ms: usize = 100;
  2673. let num_votes_simulated = Arc::new(AtomicUsize::new(0));
  2674. let gossip_voter = cluster_tests::start_gossip_voter(
  2675. &cluster.entry_point_info.gossip().unwrap(),
  2676. &node_keypair,
  2677. |(_label, leader_vote_tx)| {
  2678. let vote = vote_parser::parse_vote_transaction(&leader_vote_tx)
  2679. .map(|(_, vote, ..)| vote)
  2680. .unwrap()
  2681. .as_tower_transaction()
  2682. .unwrap();
  2683. // Filter out empty votes
  2684. if !vote.is_empty() {
  2685. Some((vote.into(), leader_vote_tx))
  2686. } else {
  2687. None
  2688. }
  2689. },
  2690. {
  2691. let node_keypair = node_keypair.insecure_clone();
  2692. let vote_keypair = vote_keypair.insecure_clone();
  2693. let num_votes_simulated = num_votes_simulated.clone();
  2694. move |vote_slot, leader_vote_tx, parsed_vote, _cluster_info| {
  2695. info!("received vote for {}", vote_slot);
  2696. let parsed_vote = parsed_vote.as_tower_transaction_ref().unwrap();
  2697. let vote_hash = parsed_vote.hash();
  2698. info!("Simulating vote from our node on slot {vote_slot}, hash {vote_hash}");
  2699. // Add all recent vote slots on this fork to allow cluster to pass
  2700. // vote threshold checks in replay. Note this will instantly force a
  2701. // root by this validator.
  2702. let tower_sync = TowerSync::new_from_slots(vec![vote_slot], vote_hash, None);
  2703. let bad_authorized_signer_keypair = Keypair::new();
  2704. let mut vote_tx = vote_transaction::new_tower_sync_transaction(
  2705. tower_sync,
  2706. leader_vote_tx.message.recent_blockhash,
  2707. &node_keypair,
  2708. &vote_keypair,
  2709. // Make a bad signer
  2710. &bad_authorized_signer_keypair,
  2711. None,
  2712. );
  2713. LocalCluster::send_transaction_with_retries(
  2714. &client,
  2715. &[&cluster_funding_keypair, &bad_authorized_signer_keypair],
  2716. &mut vote_tx,
  2717. 5,
  2718. )
  2719. .unwrap();
  2720. num_votes_simulated.fetch_add(1, Ordering::Relaxed);
  2721. }
  2722. },
  2723. voter_thread_sleep_ms as u64,
  2724. cluster.validators.len().saturating_sub(1),
  2725. 0,
  2726. 0,
  2727. cluster.entry_point_info.shred_version(),
  2728. );
  2729. let (mut block_subscribe_client, receiver) = PubsubClient::block_subscribe(
  2730. &format!(
  2731. "ws://{}",
  2732. &cluster.entry_point_info.rpc_pubsub().unwrap().to_string()
  2733. ),
  2734. RpcBlockSubscribeFilter::All,
  2735. Some(RpcBlockSubscribeConfig {
  2736. commitment: Some(CommitmentConfig::confirmed()),
  2737. encoding: None,
  2738. transaction_details: None,
  2739. show_rewards: None,
  2740. max_supported_transaction_version: None,
  2741. }),
  2742. )
  2743. .unwrap();
  2744. const MAX_VOTES_TO_SIMULATE: usize = 10;
  2745. // Make sure test doesn't take too long
  2746. assert!(voter_thread_sleep_ms * MAX_VOTES_TO_SIMULATE <= 1000);
  2747. loop {
  2748. let responses: Vec<_> = receiver.try_iter().collect();
  2749. // Nothing should get optimistically confirmed or rooted
  2750. assert!(responses.is_empty());
  2751. // Wait for the voter thread to attempt sufficient number of votes to give
  2752. // a chance for the violation to occur
  2753. if num_votes_simulated.load(Ordering::Relaxed) > MAX_VOTES_TO_SIMULATE {
  2754. break;
  2755. }
  2756. sleep(Duration::from_millis(100));
  2757. }
  2758. // Clean up voter thread
  2759. gossip_voter.close();
  2760. // If we don't drop the cluster, the blocking web socket service
  2761. // won't return, and the `block_subscribe_client` won't shut down
  2762. drop(cluster);
  2763. block_subscribe_client.shutdown().unwrap();
  2764. }
  2765. #[test]
  2766. #[serial]
  2767. #[ignore]
  2768. fn test_votes_land_in_fork_during_long_partition() {
  2769. let total_stake = 3 * DEFAULT_NODE_STAKE;
  2770. // Make `lighter_stake` insufficient for switching threshold
  2771. let lighter_stake = (SWITCH_FORK_THRESHOLD * total_stake as f64) as u64;
  2772. let heavier_stake = lighter_stake + 1;
  2773. let failures_stake = total_stake - lighter_stake - heavier_stake;
  2774. // Give lighter stake 30 consecutive slots before
  2775. // the heavier stake gets a single slot
  2776. let partitions: &[(usize, usize)] =
  2777. &[(heavier_stake as usize, 1), (lighter_stake as usize, 30)];
  2778. #[derive(Default)]
  2779. struct PartitionContext {
  2780. heaviest_validator_key: Pubkey,
  2781. lighter_validator_key: Pubkey,
  2782. heavier_fork_slot: Slot,
  2783. }
  2784. let on_partition_start = |_cluster: &mut LocalCluster,
  2785. validator_keys: &[Pubkey],
  2786. _dead_validator_infos: Vec<ClusterValidatorInfo>,
  2787. context: &mut PartitionContext| {
  2788. // validator_keys[0] is the validator that will be killed, i.e. the validator with
  2789. // stake == `failures_stake`
  2790. context.heaviest_validator_key = validator_keys[1];
  2791. context.lighter_validator_key = validator_keys[2];
  2792. };
  2793. let on_before_partition_resolved =
  2794. |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  2795. let lighter_validator_ledger_path = cluster.ledger_path(&context.lighter_validator_key);
  2796. let heavier_validator_ledger_path =
  2797. cluster.ledger_path(&context.heaviest_validator_key);
  2798. // Wait for each node to have created and voted on its own partition
  2799. loop {
  2800. let (heavier_validator_latest_vote_slot, _) = last_vote_in_tower(
  2801. &heavier_validator_ledger_path,
  2802. &context.heaviest_validator_key,
  2803. )
  2804. .unwrap();
  2805. info!(
  2806. "Checking heavier validator's last vote {heavier_validator_latest_vote_slot} \
  2807. is on a separate fork"
  2808. );
  2809. let lighter_validator_blockstore = open_blockstore(&lighter_validator_ledger_path);
  2810. if lighter_validator_blockstore
  2811. .meta(heavier_validator_latest_vote_slot)
  2812. .unwrap()
  2813. .is_none()
  2814. {
  2815. context.heavier_fork_slot = heavier_validator_latest_vote_slot;
  2816. return;
  2817. }
  2818. sleep(Duration::from_millis(100));
  2819. }
  2820. };
  2821. let on_partition_resolved = |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  2822. let lighter_validator_ledger_path = cluster.ledger_path(&context.lighter_validator_key);
  2823. let start = Instant::now();
  2824. let max_wait = ms_for_n_slots(MAX_PROCESSING_AGE as u64, DEFAULT_TICKS_PER_SLOT);
  2825. // Wait for the lighter node to switch over and root the `context.heavier_fork_slot`
  2826. loop {
  2827. assert!(
  2828. // Should finish faster than if the cluster were relying on replay vote
  2829. // refreshing to refresh the vote on blockhash expiration for the vote
  2830. // transaction.
  2831. start.elapsed() <= Duration::from_millis(max_wait),
  2832. "Went too long {max_wait} ms without a root",
  2833. );
  2834. let lighter_validator_blockstore = open_blockstore(&lighter_validator_ledger_path);
  2835. if lighter_validator_blockstore.is_root(context.heavier_fork_slot) {
  2836. info!(
  2837. "Partition resolved, new root made in {}ms",
  2838. start.elapsed().as_millis()
  2839. );
  2840. return;
  2841. }
  2842. sleep(Duration::from_millis(100));
  2843. }
  2844. };
  2845. run_kill_partition_switch_threshold(
  2846. &[(failures_stake as usize, 0)],
  2847. partitions,
  2848. None,
  2849. PartitionContext::default(),
  2850. on_partition_start,
  2851. on_before_partition_resolved,
  2852. on_partition_resolved,
  2853. );
  2854. }
  2855. fn setup_transfer_scan_threads(
  2856. num_starting_accounts: usize,
  2857. exit: Arc<AtomicBool>,
  2858. scan_commitment: CommitmentConfig,
  2859. update_client_receiver: Receiver<QuicTpuClient>,
  2860. scan_client_receiver: Receiver<QuicTpuClient>,
  2861. ) -> (
  2862. JoinHandle<()>,
  2863. JoinHandle<()>,
  2864. Vec<(Pubkey, AccountSharedData)>,
  2865. ) {
  2866. let exit_ = exit.clone();
  2867. let starting_keypairs: Arc<Vec<Keypair>> = Arc::new(
  2868. iter::repeat_with(Keypair::new)
  2869. .take(num_starting_accounts)
  2870. .collect(),
  2871. );
  2872. let target_keypairs: Arc<Vec<Keypair>> = Arc::new(
  2873. iter::repeat_with(Keypair::new)
  2874. .take(num_starting_accounts)
  2875. .collect(),
  2876. );
  2877. let starting_accounts: Vec<(Pubkey, AccountSharedData)> = starting_keypairs
  2878. .iter()
  2879. .map(|k| {
  2880. (
  2881. k.pubkey(),
  2882. AccountSharedData::new(1, 0, &system_program::id()),
  2883. )
  2884. })
  2885. .collect();
  2886. let starting_keypairs_ = starting_keypairs.clone();
  2887. let target_keypairs_ = target_keypairs.clone();
  2888. let t_update = Builder::new()
  2889. .name("update".to_string())
  2890. .spawn(move || {
  2891. let client = update_client_receiver.recv().unwrap();
  2892. loop {
  2893. if exit_.load(Ordering::Relaxed) {
  2894. return;
  2895. }
  2896. let (blockhash, _) = client
  2897. .rpc_client()
  2898. .get_latest_blockhash_with_commitment(CommitmentConfig::processed())
  2899. .unwrap();
  2900. for i in 0..starting_keypairs_.len() {
  2901. let result = client.async_transfer(
  2902. 1,
  2903. &starting_keypairs_[i],
  2904. &target_keypairs_[i].pubkey(),
  2905. blockhash,
  2906. );
  2907. if result.is_err() {
  2908. debug!("Failed in transfer for starting keypair: {result:?}");
  2909. }
  2910. }
  2911. for i in 0..starting_keypairs_.len() {
  2912. let result = client.async_transfer(
  2913. 1,
  2914. &target_keypairs_[i],
  2915. &starting_keypairs_[i].pubkey(),
  2916. blockhash,
  2917. );
  2918. if result.is_err() {
  2919. debug!("Failed in transfer for starting keypair: {result:?}");
  2920. }
  2921. }
  2922. }
  2923. })
  2924. .unwrap();
  2925. // Scan, the total funds should add up to the original
  2926. let mut scan_commitment_config = RpcProgramAccountsConfig::default();
  2927. scan_commitment_config.account_config.commitment = Some(scan_commitment);
  2928. let tracked_pubkeys: HashSet<Pubkey> = starting_keypairs
  2929. .iter()
  2930. .chain(target_keypairs.iter())
  2931. .map(|k| k.pubkey())
  2932. .collect();
  2933. let expected_total_balance = num_starting_accounts as u64;
  2934. let t_scan = Builder::new()
  2935. .name("scan".to_string())
  2936. .spawn(move || {
  2937. let client = scan_client_receiver.recv().unwrap();
  2938. loop {
  2939. if exit.load(Ordering::Relaxed) {
  2940. return;
  2941. }
  2942. if let Some(total_scan_balance) = client
  2943. .rpc_client()
  2944. .get_program_accounts_with_config(
  2945. &system_program::id(),
  2946. scan_commitment_config.clone(),
  2947. )
  2948. .ok()
  2949. .map(|result| {
  2950. result
  2951. .into_iter()
  2952. .map(|(key, account)| {
  2953. if tracked_pubkeys.contains(&key) {
  2954. account.lamports
  2955. } else {
  2956. 0
  2957. }
  2958. })
  2959. .sum::<u64>()
  2960. })
  2961. {
  2962. assert_eq!(total_scan_balance, expected_total_balance);
  2963. }
  2964. }
  2965. })
  2966. .unwrap();
  2967. (t_update, t_scan, starting_accounts)
  2968. }
  2969. fn run_test_load_program_accounts(scan_commitment: CommitmentConfig) {
  2970. solana_logger::setup_with_default(RUST_LOG_FILTER);
  2971. // First set up the cluster with 2 nodes
  2972. let slots_per_epoch = 2048;
  2973. let node_stakes = vec![51 * DEFAULT_NODE_STAKE, 50 * DEFAULT_NODE_STAKE];
  2974. let validator_keys: Vec<_> = [
  2975. "4qhhXNTbKD1a5vxDDLZcHKj7ELNeiivtUBxn3wUK1F5VRsQVP89VUhfXqSfgiFB14GfuBgtrQ96n9NvWQADVkcCg",
  2976. "3kHBzVwie5vTEaY6nFCPeFT8qDpoXzn7dCEioGRNBTnUDpvwnG85w8Wq63gVWpVTP8k2a8cgcWRjSXyUkEygpXWS",
  2977. ]
  2978. .iter()
  2979. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  2980. .take(node_stakes.len())
  2981. .collect();
  2982. let num_starting_accounts = 1000;
  2983. let exit = Arc::new(AtomicBool::new(false));
  2984. let (update_client_sender, update_client_receiver) = unbounded();
  2985. let (scan_client_sender, scan_client_receiver) = unbounded();
  2986. // Setup the update/scan threads
  2987. let (t_update, t_scan, starting_accounts) = setup_transfer_scan_threads(
  2988. num_starting_accounts,
  2989. exit.clone(),
  2990. scan_commitment,
  2991. update_client_receiver,
  2992. scan_client_receiver,
  2993. );
  2994. let mut config = ClusterConfig {
  2995. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  2996. node_stakes: node_stakes.clone(),
  2997. validator_configs: make_identical_validator_configs(
  2998. &ValidatorConfig::default_for_test(),
  2999. node_stakes.len(),
  3000. ),
  3001. validator_keys: Some(validator_keys),
  3002. slots_per_epoch,
  3003. stakers_slot_offset: slots_per_epoch,
  3004. skip_warmup_slots: true,
  3005. additional_accounts: starting_accounts,
  3006. ..ClusterConfig::default()
  3007. };
  3008. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  3009. // Give the threads a client to use for querying the cluster
  3010. let all_pubkeys = cluster.get_node_pubkeys();
  3011. let other_validator_id = all_pubkeys
  3012. .into_iter()
  3013. .find(|x| x != cluster.entry_point_info.pubkey())
  3014. .unwrap();
  3015. let client = cluster
  3016. .build_validator_tpu_quic_client(cluster.entry_point_info.pubkey())
  3017. .unwrap();
  3018. update_client_sender.send(client).unwrap();
  3019. let scan_client = cluster
  3020. .build_validator_tpu_quic_client(&other_validator_id)
  3021. .unwrap();
  3022. scan_client_sender.send(scan_client).unwrap();
  3023. // Wait for some roots to pass
  3024. cluster.check_for_new_roots(
  3025. 40,
  3026. "run_test_load_program_accounts",
  3027. SocketAddrSpace::Unspecified,
  3028. );
  3029. // Exit and ensure no violations of consistency were found
  3030. exit.store(true, Ordering::Relaxed);
  3031. t_update.join().unwrap();
  3032. t_scan.join().unwrap();
  3033. }
  3034. #[test]
  3035. #[serial]
  3036. fn test_no_lockout_violation_with_tower() {
  3037. do_test_lockout_violation_with_or_without_tower(true);
  3038. }
  3039. #[test]
  3040. #[serial]
  3041. fn test_lockout_violation_without_tower() {
  3042. do_test_lockout_violation_with_or_without_tower(false);
  3043. }
  3044. // A bit convoluted test case; but this roughly follows this test theoretical scenario:
  3045. // Validator A, B, C have 31, 36, 33 % of stake respectively. Leader schedule is split, first half
  3046. // of the test B is always leader, second half C is.
  3047. // We don't give validator A any slots because it's going to be deleting its ledger,
  3048. // so it may create different blocks for slots it's already created blocks for on a different fork
  3049. //
  3050. // Step 1: Kill C, only A and B should be running
  3051. //
  3052. // base_slot -> next_slot_on_a (Wait for A to vote)
  3053. //
  3054. // Step 2:
  3055. // Kill A and B once we verify that A has voted voted on some `next_slot_on_a` >= 1.
  3056. // Copy B's ledger to A and C but only up to slot `next_slot_on_a`.
  3057. //
  3058. // Step 3:
  3059. // Restart validator C to make it produce blocks on a fork from `base_slot`
  3060. // that doesn't include `next_slot_on_a`. Wait for it to vote on its own fork.
  3061. //
  3062. // base_slot -> next_slot_on_c
  3063. //
  3064. // Step 4: Restart `A` which had 31% of the stake, it's missing `next_slot_on_a` in
  3065. // its ledger since we copied the ledger from B excluding this slot, so it sees
  3066. //
  3067. // base_slot -> next_slot_on_c
  3068. //
  3069. // Step 5:
  3070. // Without the persisted tower:
  3071. // `A` would choose to vote on the new fork from C on `next_slot_on_c`
  3072. //
  3073. // With the persisted tower:
  3074. // `A` should not be able to generate a switching proof.
  3075. //
  3076. fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) {
  3077. solana_logger::setup_with("info");
  3078. // First set up the cluster with 4 nodes
  3079. let slots_per_epoch = 2048;
  3080. let node_stakes = vec![
  3081. 31 * DEFAULT_NODE_STAKE,
  3082. 36 * DEFAULT_NODE_STAKE,
  3083. 33 * DEFAULT_NODE_STAKE,
  3084. ];
  3085. let validator_b_last_leader_slot: Slot = 8;
  3086. let truncated_slots: Slot = 100;
  3087. // Each pubkeys are prefixed with A, B, C
  3088. let validator_keys = [
  3089. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  3090. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  3091. "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye",
  3092. ]
  3093. .iter()
  3094. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  3095. .take(node_stakes.len())
  3096. .collect::<Vec<_>>();
  3097. let validators = validator_keys
  3098. .iter()
  3099. .map(|(kp, _)| kp.pubkey())
  3100. .collect::<Vec<_>>();
  3101. let (validator_a_pubkey, validator_b_pubkey, validator_c_pubkey) =
  3102. (validators[0], validators[1], validators[2]);
  3103. // Disable voting on all validators other than validator B
  3104. let mut default_config = ValidatorConfig::default_for_test();
  3105. // Ensure B can make leader blocks up till the fork slot, and give the remaining slots to C. This is
  3106. // also important so `C` doesn't run into NoPropagatedConfirmation errors on making its first forked
  3107. // slot.
  3108. //
  3109. // Don't give validator A any slots because it's going to be deleting its ledger, so it may create
  3110. // versions of slots it's already created, but on a different fork.
  3111. let validator_to_slots = vec![
  3112. (
  3113. validator_b_pubkey,
  3114. (validator_b_last_leader_slot + NUM_CONSECUTIVE_LEADER_SLOTS) as usize,
  3115. ),
  3116. (validator_c_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize),
  3117. ];
  3118. // Trick C into not producing any blocks during this time, in case its leader slots come up before we can
  3119. // kill the validator. We don't want any forks during the time validator B is producing its initial blocks.
  3120. let c_validator_to_slots = vec![(validator_b_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize)];
  3121. let c_leader_schedule = create_custom_leader_schedule(c_validator_to_slots.into_iter());
  3122. let leader_schedule = Arc::new(create_custom_leader_schedule(
  3123. validator_to_slots.into_iter(),
  3124. ));
  3125. for slot in 0..=validator_b_last_leader_slot {
  3126. assert_eq!(leader_schedule[slot], validator_b_pubkey);
  3127. }
  3128. default_config.fixed_leader_schedule = Some(FixedSchedule {
  3129. leader_schedule: leader_schedule.clone(),
  3130. });
  3131. let mut validator_configs =
  3132. make_identical_validator_configs(&default_config, node_stakes.len());
  3133. // Disable voting on validator C
  3134. validator_configs[2].voting_disabled = true;
  3135. // C should not produce any blocks at this time
  3136. validator_configs[2].fixed_leader_schedule = Some(FixedSchedule {
  3137. leader_schedule: Arc::new(c_leader_schedule),
  3138. });
  3139. let mut config = ClusterConfig {
  3140. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  3141. node_stakes,
  3142. validator_configs,
  3143. validator_keys: Some(validator_keys),
  3144. slots_per_epoch,
  3145. stakers_slot_offset: slots_per_epoch,
  3146. skip_warmup_slots: true,
  3147. ..ClusterConfig::default()
  3148. };
  3149. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  3150. let val_a_ledger_path = cluster.ledger_path(&validator_a_pubkey);
  3151. let val_b_ledger_path = cluster.ledger_path(&validator_b_pubkey);
  3152. let val_c_ledger_path = cluster.ledger_path(&validator_c_pubkey);
  3153. info!("val_a {validator_a_pubkey} ledger path {val_a_ledger_path:?}");
  3154. info!("val_b {validator_b_pubkey} ledger path {val_b_ledger_path:?}");
  3155. info!("val_c {validator_c_pubkey} ledger path {val_c_ledger_path:?}");
  3156. info!("Exiting validator C");
  3157. let mut validator_c_info = cluster.exit_node(&validator_c_pubkey);
  3158. info!("Waiting on validator A to vote");
  3159. // Step 1: Wait for validator A to vote so the tower file exists, and so we can determine the
  3160. // `base_slot` and `next_slot_on_a`
  3161. loop {
  3162. if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  3163. if last_vote >= 1 {
  3164. break;
  3165. }
  3166. }
  3167. sleep(Duration::from_millis(100));
  3168. }
  3169. // kill A and B
  3170. info!("Exiting validators A and B");
  3171. let _validator_b_info = cluster.exit_node(&validator_b_pubkey);
  3172. let validator_a_info = cluster.exit_node(&validator_a_pubkey);
  3173. let next_slot_on_a = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey)
  3174. .unwrap()
  3175. .0;
  3176. let base_slot = next_slot_on_a - 1;
  3177. info!("base slot: {base_slot}, next_slot_on_a: {next_slot_on_a}");
  3178. // Step 2:
  3179. // Truncate ledger, copy over B's ledger to C
  3180. info!("Create validator C's ledger");
  3181. {
  3182. // first copy from validator B's ledger
  3183. std::fs::remove_dir_all(&validator_c_info.info.ledger_path).unwrap();
  3184. let mut opt = fs_extra::dir::CopyOptions::new();
  3185. opt.copy_inside = true;
  3186. fs_extra::dir::copy(&val_b_ledger_path, &val_c_ledger_path, &opt).unwrap();
  3187. // Remove B's tower in C's new copied ledger
  3188. remove_tower(&val_c_ledger_path, &validator_b_pubkey);
  3189. let blockstore = open_blockstore(&val_c_ledger_path);
  3190. purge_slots_with_count(&blockstore, next_slot_on_a, truncated_slots);
  3191. }
  3192. info!("Create validator A's ledger");
  3193. {
  3194. // Now we copy these blocks to A
  3195. let b_blockstore = open_blockstore(&val_b_ledger_path);
  3196. let a_blockstore = open_blockstore(&val_a_ledger_path);
  3197. copy_blocks(next_slot_on_a, &b_blockstore, &a_blockstore, false);
  3198. // Purge unnecessary slots
  3199. purge_slots_with_count(&a_blockstore, next_slot_on_a + 1, truncated_slots);
  3200. }
  3201. {
  3202. let blockstore = open_blockstore(&val_a_ledger_path);
  3203. if !with_tower {
  3204. info!("Removing tower!");
  3205. remove_tower(&val_a_ledger_path, &validator_a_pubkey);
  3206. // Remove next_slot_on_a from ledger to force validator A to select
  3207. // votes_on_c_fork. Otherwise, in the test case without a tower,
  3208. // the validator A will immediately vote for 27 on restart, because it
  3209. // hasn't gotten the heavier fork from validator C yet.
  3210. // Then it will be stuck on 27 unable to switch because C doesn't
  3211. // have enough stake to generate a switching proof
  3212. purge_slots_with_count(&blockstore, next_slot_on_a, truncated_slots);
  3213. } else {
  3214. info!("Not removing tower!");
  3215. }
  3216. }
  3217. // Step 3:
  3218. // Run validator C only to make it produce and vote on its own fork.
  3219. info!("Restart validator C again!!!");
  3220. validator_c_info.config.voting_disabled = false;
  3221. // C should now produce blocks
  3222. validator_c_info.config.fixed_leader_schedule = Some(FixedSchedule { leader_schedule });
  3223. cluster.restart_node(
  3224. &validator_c_pubkey,
  3225. validator_c_info,
  3226. SocketAddrSpace::Unspecified,
  3227. );
  3228. let mut votes_on_c_fork = std::collections::BTreeSet::new();
  3229. let mut last_vote = 0;
  3230. let now = Instant::now();
  3231. loop {
  3232. let elapsed = now.elapsed();
  3233. assert!(
  3234. elapsed <= Duration::from_secs(30),
  3235. "C failed to create a fork past {base_slot} in {} seconds, last_vote {last_vote}, \
  3236. votes_on_c_fork: {votes_on_c_fork:?}",
  3237. elapsed.as_secs(),
  3238. );
  3239. sleep(Duration::from_millis(100));
  3240. if let Some((newest_vote, _)) = last_vote_in_tower(&val_c_ledger_path, &validator_c_pubkey)
  3241. {
  3242. last_vote = newest_vote;
  3243. if last_vote != base_slot {
  3244. votes_on_c_fork.insert(last_vote);
  3245. // Collect 4 votes
  3246. if votes_on_c_fork.len() >= 4 {
  3247. break;
  3248. }
  3249. }
  3250. }
  3251. }
  3252. assert!(!votes_on_c_fork.is_empty());
  3253. info!("Collected validator C's votes: {votes_on_c_fork:?}");
  3254. // Step 4:
  3255. // verify whether there was violation or not
  3256. info!("Restart validator A again!!!");
  3257. cluster.restart_node(
  3258. &validator_a_pubkey,
  3259. validator_a_info,
  3260. SocketAddrSpace::Unspecified,
  3261. );
  3262. // monitor for actual votes from validator A
  3263. let mut bad_vote_detected = false;
  3264. let mut a_votes = vec![];
  3265. for _ in 0..100 {
  3266. sleep(Duration::from_millis(100));
  3267. if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
  3268. a_votes.push(last_vote);
  3269. let blockstore = open_blockstore(&val_a_ledger_path);
  3270. let mut ancestors = AncestorIterator::new(last_vote, &blockstore);
  3271. if ancestors.any(|a| votes_on_c_fork.contains(&a)) {
  3272. bad_vote_detected = true;
  3273. break;
  3274. }
  3275. }
  3276. }
  3277. info!("Observed A's votes on: {a_votes:?}");
  3278. // an elaborate way of assert!(with_tower && !bad_vote_detected || ...)
  3279. let expects_optimistic_confirmation_violation = !with_tower;
  3280. if bad_vote_detected != expects_optimistic_confirmation_violation {
  3281. if bad_vote_detected {
  3282. panic!("No violation expected because of persisted tower!");
  3283. } else {
  3284. panic!("Violation expected because of removed persisted tower!");
  3285. }
  3286. } else if bad_vote_detected {
  3287. info!(
  3288. "THIS TEST expected violations. And indeed, there was some, because of removed \
  3289. persisted tower."
  3290. );
  3291. } else {
  3292. info!(
  3293. "THIS TEST expected no violation. And indeed, there was none, thanks to persisted \
  3294. tower."
  3295. );
  3296. }
  3297. }
  3298. #[test]
  3299. #[serial]
  3300. // Steps in this test:
  3301. // We want to create a situation like:
  3302. /*
  3303. 1 (2%, killed and restarted) --- 200 (37%, lighter fork)
  3304. /
  3305. 0
  3306. \-------- 4 (38%, heavier fork)
  3307. */
  3308. // where the 2% that voted on slot 1 don't see their votes land in a block
  3309. // due to blockhash expiration, and thus without resigning their votes with
  3310. // a newer blockhash, will deem slot 4 the heavier fork and try to switch to
  3311. // slot 4, which doesn't pass the switch threshold. This stalls the network.
  3312. // We do this by:
  3313. // 1) Creating a partition so all three nodes don't see each other
  3314. // 2) Kill the validator with 2%
  3315. // 3) Wait for longer than blockhash expiration
  3316. // 4) Copy in the lighter fork's blocks up, *only* up to the first slot in the lighter fork
  3317. // (not all the blocks on the lighter fork!), call this slot `L`
  3318. // 5) Restart the validator with 2% so that he votes on `L`, but the vote doesn't land
  3319. // due to blockhash expiration
  3320. // 6) Resolve the partition so that the 2% repairs the other fork, and tries to switch,
  3321. // stalling the network.
  3322. fn test_fork_choice_refresh_old_votes() {
  3323. solana_logger::setup_with_default(RUST_LOG_FILTER);
  3324. let max_switch_threshold_failure_pct = 1.0 - 2.0 * SWITCH_FORK_THRESHOLD;
  3325. let total_stake = 100 * DEFAULT_NODE_STAKE;
  3326. let max_failures_stake = (max_switch_threshold_failure_pct * total_stake as f64) as u64;
  3327. // 1% less than the failure stake, where the 2% is allocated to a validator that
  3328. // has no leader slots and thus won't be able to vote on its own fork.
  3329. let failures_stake = max_failures_stake;
  3330. let total_alive_stake = total_stake - failures_stake;
  3331. let alive_stake_1 = total_alive_stake / 2 - 1;
  3332. let alive_stake_2 = total_alive_stake - alive_stake_1 - 1;
  3333. // Heavier fork still doesn't have enough stake to switch. Both branches need
  3334. // the vote to land from the validator with `alive_stake_3` to allow the other
  3335. // fork to switch.
  3336. let alive_stake_3 = 2 * DEFAULT_NODE_STAKE;
  3337. assert!(alive_stake_1 < alive_stake_2);
  3338. assert!(alive_stake_1 + alive_stake_3 > alive_stake_2);
  3339. let num_lighter_partition_slots_per_rotation = 8;
  3340. // ratio of total number of leader slots to the number of leader slots allocated
  3341. // to the lighter partition
  3342. let total_slots_to_lighter_partition_ratio = 2;
  3343. let partitions: &[(usize, usize)] = &[
  3344. (
  3345. alive_stake_1 as usize,
  3346. num_lighter_partition_slots_per_rotation,
  3347. ),
  3348. (
  3349. alive_stake_2 as usize,
  3350. (total_slots_to_lighter_partition_ratio - 1) * num_lighter_partition_slots_per_rotation,
  3351. ),
  3352. (alive_stake_3 as usize, 0),
  3353. ];
  3354. #[derive(Default)]
  3355. struct PartitionContext {
  3356. smallest_validator_info: Option<ClusterValidatorInfo>,
  3357. lighter_fork_validator_key: Pubkey,
  3358. heaviest_validator_key: Pubkey,
  3359. first_slot_in_lighter_partition: Slot,
  3360. }
  3361. let on_partition_start = |cluster: &mut LocalCluster,
  3362. validator_keys: &[Pubkey],
  3363. _: Vec<ClusterValidatorInfo>,
  3364. context: &mut PartitionContext| {
  3365. // Kill validator with alive_stake_3, second in `partitions` slice
  3366. let smallest_validator_key = &validator_keys[3];
  3367. let info = cluster.exit_node(smallest_validator_key);
  3368. context.smallest_validator_info = Some(info);
  3369. // validator_keys[0] is the validator that will be killed, i.e. the validator with
  3370. // stake == `failures_stake`
  3371. context.lighter_fork_validator_key = validator_keys[1];
  3372. // Third in `partitions` slice
  3373. context.heaviest_validator_key = validator_keys[2];
  3374. };
  3375. let ticks_per_slot = 32;
  3376. let on_before_partition_resolved =
  3377. |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  3378. // Equal to ms_per_slot * MAX_PROCESSING_AGE, rounded up
  3379. let sleep_time_ms = ms_for_n_slots(
  3380. MAX_PROCESSING_AGE as u64 * total_slots_to_lighter_partition_ratio as u64,
  3381. ticks_per_slot,
  3382. );
  3383. info!("Wait for blockhashes to expire, {sleep_time_ms} ms");
  3384. // Wait for blockhashes to expire
  3385. sleep(Duration::from_millis(sleep_time_ms));
  3386. let smallest_validator_key = context
  3387. .smallest_validator_info
  3388. .as_ref()
  3389. .unwrap()
  3390. .info
  3391. .keypair
  3392. .pubkey();
  3393. let smallest_ledger_path = context
  3394. .smallest_validator_info
  3395. .as_ref()
  3396. .unwrap()
  3397. .info
  3398. .ledger_path
  3399. .clone();
  3400. info!(
  3401. "smallest validator key: {smallest_validator_key}, path: {smallest_ledger_path:?}"
  3402. );
  3403. let lighter_fork_ledger_path = cluster.ledger_path(&context.lighter_fork_validator_key);
  3404. let heaviest_ledger_path = cluster.ledger_path(&context.heaviest_validator_key);
  3405. // Wait for blockhashes to expire
  3406. let mut distance_from_tip: usize;
  3407. loop {
  3408. // Get latest votes. We make sure to wait until the vote has landed in
  3409. // blockstore. This is important because if we were the leader for the block there
  3410. // is a possibility of voting before broadcast has inserted in blockstore.
  3411. let lighter_fork_latest_vote = wait_for_last_vote_in_tower_to_land_in_ledger(
  3412. &lighter_fork_ledger_path,
  3413. &context.lighter_fork_validator_key,
  3414. )
  3415. .unwrap();
  3416. let heaviest_fork_latest_vote = wait_for_last_vote_in_tower_to_land_in_ledger(
  3417. &heaviest_ledger_path,
  3418. &context.heaviest_validator_key,
  3419. )
  3420. .unwrap();
  3421. // Check if sufficient blockhashes have expired on the smaller fork
  3422. {
  3423. let smallest_blockstore = open_blockstore(&smallest_ledger_path);
  3424. let lighter_fork_blockstore = open_blockstore(&lighter_fork_ledger_path);
  3425. let heaviest_blockstore = open_blockstore(&heaviest_ledger_path);
  3426. info!("Opened blockstores");
  3427. // Find the first slot on the smaller fork
  3428. let lighter_ancestors: BTreeSet<Slot> =
  3429. std::iter::once(lighter_fork_latest_vote)
  3430. .chain(AncestorIterator::new(
  3431. lighter_fork_latest_vote,
  3432. &lighter_fork_blockstore,
  3433. ))
  3434. .collect();
  3435. let heavier_ancestors: BTreeSet<Slot> =
  3436. std::iter::once(heaviest_fork_latest_vote)
  3437. .chain(AncestorIterator::new(
  3438. heaviest_fork_latest_vote,
  3439. &heaviest_blockstore,
  3440. ))
  3441. .collect();
  3442. let (different_ancestor_index, different_ancestor) = lighter_ancestors
  3443. .iter()
  3444. .enumerate()
  3445. .zip(heavier_ancestors.iter())
  3446. .find(|((_index, lighter_fork_ancestor), heavier_fork_ancestor)| {
  3447. lighter_fork_ancestor != heavier_fork_ancestor
  3448. })
  3449. .unwrap()
  3450. .0;
  3451. let last_common_ancestor_index = different_ancestor_index - 1;
  3452. // It's critical that the heavier fork has at least one vote on it.
  3453. // This is important because the smallest validator must see a vote on the heavier fork
  3454. // to avoid voting again on its own fork.
  3455. // Because we don't have a simple method of parsing blockstore for all votes, we proxy this check
  3456. // by ensuring the heavier fork was long enough to land a vote. The minimum length would be 4 more
  3457. // than the last common ancestor N, because the first vote would be made at least by N+3 (if threshold check failed on slot N+1),
  3458. // and then would land by slot N + 4.
  3459. assert!(heavier_ancestors.len() > last_common_ancestor_index + 4);
  3460. context.first_slot_in_lighter_partition = *different_ancestor;
  3461. distance_from_tip = lighter_ancestors.len() - different_ancestor_index - 1;
  3462. info!(
  3463. "Distance in number of blocks between earliest slot {} and latest slot {} \
  3464. on lighter partition is {}",
  3465. context.first_slot_in_lighter_partition,
  3466. lighter_fork_latest_vote,
  3467. distance_from_tip
  3468. );
  3469. if distance_from_tip > MAX_PROCESSING_AGE {
  3470. // Must have been updated in the above loop
  3471. assert!(context.first_slot_in_lighter_partition != 0);
  3472. info!(
  3473. "First slot in lighter partition is {}",
  3474. context.first_slot_in_lighter_partition
  3475. );
  3476. // Copy all the blocks from the smaller partition up to `first_slot_in_lighter_partition`
  3477. // into the smallest validator's blockstore so that it will attempt to refresh
  3478. copy_blocks(
  3479. lighter_fork_latest_vote,
  3480. &lighter_fork_blockstore,
  3481. &smallest_blockstore,
  3482. false,
  3483. );
  3484. // Also copy all the blocks from the heavier partition so the smallest validator will
  3485. // not vote again on its own fork
  3486. copy_blocks(
  3487. heaviest_fork_latest_vote,
  3488. &heaviest_blockstore,
  3489. &smallest_blockstore,
  3490. false,
  3491. );
  3492. // Simulate a vote for the `first_slot_in_lighter_partition`
  3493. let bank_hash = lighter_fork_blockstore
  3494. .get_bank_hash(context.first_slot_in_lighter_partition)
  3495. .unwrap();
  3496. cluster_tests::apply_votes_to_tower(
  3497. &context
  3498. .smallest_validator_info
  3499. .as_ref()
  3500. .unwrap()
  3501. .info
  3502. .keypair,
  3503. vec![(context.first_slot_in_lighter_partition, bank_hash)],
  3504. smallest_ledger_path,
  3505. );
  3506. drop(smallest_blockstore);
  3507. break;
  3508. }
  3509. }
  3510. sleep(Duration::from_millis(ms_for_n_slots(
  3511. ((MAX_PROCESSING_AGE - distance_from_tip)
  3512. * total_slots_to_lighter_partition_ratio) as u64,
  3513. ticks_per_slot,
  3514. )));
  3515. }
  3516. // Restart the smallest validator that we killed earlier in `on_partition_start()`
  3517. cluster.restart_node(
  3518. &smallest_validator_key,
  3519. context.smallest_validator_info.take().unwrap(),
  3520. SocketAddrSpace::Unspecified,
  3521. );
  3522. // Now resolve partition, allow validator to see the fork with the heavier validator,
  3523. // but the fork it's currently on is the heaviest, if only its own vote landed!
  3524. };
  3525. // Check that new roots were set after the partition resolves (gives time
  3526. // for lockouts built during partition to resolve and gives validators an opportunity
  3527. // to try and switch forks)
  3528. let on_partition_resolved = |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  3529. // Wait until a root is made past the first slot on the correct fork
  3530. cluster.check_min_slot_is_rooted(
  3531. context.first_slot_in_lighter_partition,
  3532. "test_fork_choice_refresh_old_votes",
  3533. SocketAddrSpace::Unspecified,
  3534. );
  3535. // Check that the correct fork was rooted
  3536. let heaviest_ledger_path = cluster.ledger_path(&context.heaviest_validator_key);
  3537. let heaviest_blockstore = open_blockstore(&heaviest_ledger_path);
  3538. info!(
  3539. "checking that {} was rooted in {:?}",
  3540. context.first_slot_in_lighter_partition, heaviest_ledger_path
  3541. );
  3542. assert!(heaviest_blockstore.is_root(context.first_slot_in_lighter_partition));
  3543. };
  3544. run_kill_partition_switch_threshold(
  3545. &[(failures_stake as usize - 1, 0)],
  3546. partitions,
  3547. Some(ticks_per_slot),
  3548. PartitionContext::default(),
  3549. on_partition_start,
  3550. on_before_partition_resolved,
  3551. on_partition_resolved,
  3552. );
  3553. }
  3554. #[test]
  3555. #[serial]
  3556. fn test_kill_heaviest_partition() {
  3557. // This test:
  3558. // 1) Spins up four partitions, the heaviest being the first with more stake
  3559. // 2) Schedules the other validators for sufficient slots in the schedule
  3560. // so that they will still be locked out of voting for the major partition
  3561. // when the partition resolves
  3562. // 3) Kills the most staked partition. Validators are locked out, but should all
  3563. // eventually choose the major partition
  3564. // 4) Check for recovery
  3565. let num_slots_per_validator = 8;
  3566. let partitions: [usize; 4] = [
  3567. 11 * DEFAULT_NODE_STAKE as usize,
  3568. 10 * DEFAULT_NODE_STAKE as usize,
  3569. 10 * DEFAULT_NODE_STAKE as usize,
  3570. 10 * DEFAULT_NODE_STAKE as usize,
  3571. ];
  3572. let (leader_schedule, validator_keys) = create_custom_leader_schedule_with_random_keys(&[
  3573. num_slots_per_validator * (partitions.len() - 1),
  3574. num_slots_per_validator,
  3575. num_slots_per_validator,
  3576. num_slots_per_validator,
  3577. ]);
  3578. let empty = |_: &mut LocalCluster, _: &mut ()| {};
  3579. let validator_to_kill = validator_keys[0].pubkey();
  3580. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  3581. info!("Killing validator with id: {validator_to_kill}");
  3582. cluster.exit_node(&validator_to_kill);
  3583. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  3584. };
  3585. run_cluster_partition(
  3586. &partitions,
  3587. Some((leader_schedule, validator_keys)),
  3588. (),
  3589. empty,
  3590. empty,
  3591. on_partition_resolved,
  3592. None,
  3593. vec![],
  3594. // TODO: make Alpenglow equivalent when skips are available
  3595. false,
  3596. )
  3597. }
  3598. #[test]
  3599. #[serial]
  3600. #[ignore]
  3601. fn test_kill_partition_switch_threshold_no_progress() {
  3602. let max_switch_threshold_failure_pct = 1.0 - 2.0 * SWITCH_FORK_THRESHOLD;
  3603. let total_stake = 10_000 * DEFAULT_NODE_STAKE;
  3604. let max_failures_stake = (max_switch_threshold_failure_pct * total_stake as f64) as u64;
  3605. let failures_stake = max_failures_stake;
  3606. let total_alive_stake = total_stake - failures_stake;
  3607. let alive_stake_1 = total_alive_stake / 2;
  3608. let alive_stake_2 = total_alive_stake - alive_stake_1;
  3609. // Check that no new roots were set 400 slots after partition resolves (gives time
  3610. // for lockouts built during partition to resolve and gives validators an opportunity
  3611. // to try and switch forks)
  3612. let on_partition_start =
  3613. |_: &mut LocalCluster, _: &[Pubkey], _: Vec<ClusterValidatorInfo>, _: &mut ()| {};
  3614. let on_before_partition_resolved = |_: &mut LocalCluster, _: &mut ()| {};
  3615. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  3616. cluster.check_no_new_roots(400, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  3617. };
  3618. // This kills `max_failures_stake`, so no progress should be made
  3619. run_kill_partition_switch_threshold(
  3620. &[(failures_stake as usize, 16)],
  3621. &[(alive_stake_1 as usize, 8), (alive_stake_2 as usize, 8)],
  3622. None,
  3623. (),
  3624. on_partition_start,
  3625. on_before_partition_resolved,
  3626. on_partition_resolved,
  3627. );
  3628. }
  3629. #[test]
  3630. #[serial]
  3631. #[ignore]
  3632. fn test_kill_partition_switch_threshold_progress() {
  3633. let max_switch_threshold_failure_pct = 1.0 - 2.0 * SWITCH_FORK_THRESHOLD;
  3634. let total_stake = 10_000 * DEFAULT_NODE_STAKE;
  3635. // Kill `< max_failures_stake` of the validators
  3636. let max_failures_stake = (max_switch_threshold_failure_pct * total_stake as f64) as u64;
  3637. let failures_stake = max_failures_stake - 1;
  3638. let total_alive_stake = total_stake - failures_stake;
  3639. // Partition the remaining alive validators, should still make progress
  3640. // once the partition resolves
  3641. let alive_stake_1 = total_alive_stake / 2;
  3642. let alive_stake_2 = total_alive_stake - alive_stake_1;
  3643. let bigger = std::cmp::max(alive_stake_1, alive_stake_2);
  3644. let smaller = std::cmp::min(alive_stake_1, alive_stake_2);
  3645. // At least one of the forks must have > SWITCH_FORK_THRESHOLD in order
  3646. // to guarantee switching proofs can be created. Make sure the other fork
  3647. // is <= SWITCH_FORK_THRESHOLD to make sure progress can be made. Caches
  3648. // bugs such as liveness issues bank-weighted fork choice, which may stall
  3649. // because the fork with less stake could have more weight, but other fork would:
  3650. // 1) Not be able to generate a switching proof
  3651. // 2) Other more staked fork stops voting, so doesn't catch up in bank weight.
  3652. assert!(
  3653. bigger as f64 / total_stake as f64 > SWITCH_FORK_THRESHOLD
  3654. && smaller as f64 / total_stake as f64 <= SWITCH_FORK_THRESHOLD
  3655. );
  3656. let on_partition_start =
  3657. |_: &mut LocalCluster, _: &[Pubkey], _: Vec<ClusterValidatorInfo>, _: &mut ()| {};
  3658. let on_before_partition_resolved = |_: &mut LocalCluster, _: &mut ()| {};
  3659. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  3660. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  3661. };
  3662. run_kill_partition_switch_threshold(
  3663. &[(failures_stake as usize, 16)],
  3664. &[(alive_stake_1 as usize, 8), (alive_stake_2 as usize, 8)],
  3665. None,
  3666. (),
  3667. on_partition_start,
  3668. on_before_partition_resolved,
  3669. on_partition_resolved,
  3670. );
  3671. }
  3672. #[test]
  3673. #[serial]
  3674. #[allow(unused_attributes)]
  3675. fn test_duplicate_shreds_broadcast_leader() {
  3676. run_duplicate_shreds_broadcast_leader(true);
  3677. }
  3678. #[test]
  3679. #[serial]
  3680. #[ignore]
  3681. #[allow(unused_attributes)]
  3682. fn test_duplicate_shreds_broadcast_leader_ancestor_hashes() {
  3683. run_duplicate_shreds_broadcast_leader(false);
  3684. }
  3685. fn run_duplicate_shreds_broadcast_leader(vote_on_duplicate: bool) {
  3686. solana_logger::setup_with_default(RUST_LOG_FILTER);
  3687. // Create 4 nodes:
  3688. // 1) Bad leader sending different versions of shreds to both of the other nodes
  3689. // 2) 1 node who's voting behavior in gossip
  3690. // 3) 1 validator gets the same version as the leader, will see duplicate confirmation
  3691. // 4) 1 validator will not get the same version as the leader. For each of these
  3692. // duplicate slots `S` either:
  3693. // a) The leader's version of `S` gets > DUPLICATE_THRESHOLD of votes in gossip and so this
  3694. // node will repair that correct version
  3695. // b) A descendant `D` of some version of `S` gets > DUPLICATE_THRESHOLD votes in gossip,
  3696. // but no version of `S` does. Then the node will not know to repair the right version
  3697. // by just looking at gossip, but will instead have to use EpochSlots repair after
  3698. // detecting that a descendant does not chain to its version of `S`, and marks that descendant
  3699. // dead.
  3700. // Scenarios a) or b) are triggered by our node in 2) who's voting behavior we control.
  3701. // Critical that bad_leader_stake + good_node_stake < DUPLICATE_THRESHOLD and that
  3702. // bad_leader_stake + good_node_stake + our_node_stake > DUPLICATE_THRESHOLD so that
  3703. // our vote is the determining factor.
  3704. //
  3705. // Also critical that bad_leader_stake > 1 - DUPLICATE_THRESHOLD, so that the leader
  3706. // doesn't try and dump his own block, which will happen if:
  3707. // 1. A version is duplicate confirmed
  3708. // 2. The version they played/stored into blockstore isn't the one that is duplicated
  3709. // confirmed.
  3710. let bad_leader_stake = 10_000_000 * DEFAULT_NODE_STAKE;
  3711. // Ensure that the good_node_stake is always on the critical path, and the partition node
  3712. // should never be on the critical path. This way, none of the bad shreds sent to the partition
  3713. // node corrupt the good node.
  3714. let good_node_stake = 500 * DEFAULT_NODE_STAKE;
  3715. let our_node_stake = 10_000_000 * DEFAULT_NODE_STAKE;
  3716. let partition_node_stake = DEFAULT_NODE_STAKE;
  3717. let node_stakes = vec![
  3718. bad_leader_stake,
  3719. partition_node_stake,
  3720. good_node_stake,
  3721. // Needs to be last in the vector, so that we can
  3722. // find the id of this node. See call to `test_faulty_node`
  3723. // below for more details.
  3724. our_node_stake,
  3725. ];
  3726. assert_eq!(*node_stakes.last().unwrap(), our_node_stake);
  3727. let total_stake: u64 = node_stakes.iter().sum();
  3728. assert!(
  3729. ((bad_leader_stake + good_node_stake) as f64 / total_stake as f64) < DUPLICATE_THRESHOLD
  3730. );
  3731. assert!(
  3732. (bad_leader_stake + good_node_stake + our_node_stake) as f64 / total_stake as f64
  3733. > DUPLICATE_THRESHOLD
  3734. );
  3735. assert!((bad_leader_stake as f64 / total_stake as f64) >= 1.0 - DUPLICATE_THRESHOLD);
  3736. // Important that the partition node stake is the smallest so that it gets selected
  3737. // for the partition.
  3738. assert!(partition_node_stake < our_node_stake && partition_node_stake < good_node_stake);
  3739. let (duplicate_slot_sender, duplicate_slot_receiver) = unbounded();
  3740. // 1) Set up the cluster
  3741. let (mut cluster, validator_keys) = test_faulty_node(
  3742. BroadcastStageType::BroadcastDuplicates(BroadcastDuplicatesConfig {
  3743. partition: ClusterPartition::Stake(partition_node_stake),
  3744. duplicate_slot_sender: Some(duplicate_slot_sender),
  3745. }),
  3746. node_stakes,
  3747. None,
  3748. None,
  3749. );
  3750. // This is why it's important our node was last in `node_stakes`
  3751. let our_id = validator_keys.last().unwrap().pubkey();
  3752. // 2) Kill our node and start up a thread to simulate votes to control our voting behavior
  3753. let our_info = cluster.exit_node(&our_id);
  3754. let node_keypair = our_info.info.keypair;
  3755. let vote_keypair = our_info.info.voting_keypair;
  3756. let bad_leader_id = *cluster.entry_point_info.pubkey();
  3757. let bad_leader_ledger_path = cluster.validators[&bad_leader_id].info.ledger_path.clone();
  3758. info!("our node id: {}", node_keypair.pubkey());
  3759. // 3) Start up a gossip instance to listen for and push votes
  3760. let voter_thread_sleep_ms = 100;
  3761. let gossip_voter = cluster_tests::start_gossip_voter(
  3762. &cluster.entry_point_info.gossip().unwrap(),
  3763. &node_keypair,
  3764. move |(label, leader_vote_tx)| {
  3765. // Filter out votes not from the bad leader
  3766. if label.pubkey() == bad_leader_id {
  3767. let vote = vote_parser::parse_vote_transaction(&leader_vote_tx)
  3768. .map(|(_, vote, ..)| vote)
  3769. .unwrap()
  3770. .as_tower_transaction()
  3771. .unwrap();
  3772. // Filter out empty votes
  3773. if !vote.is_empty() {
  3774. Some((vote.into(), leader_vote_tx))
  3775. } else {
  3776. None
  3777. }
  3778. } else {
  3779. None
  3780. }
  3781. },
  3782. {
  3783. let node_keypair = node_keypair.insecure_clone();
  3784. let vote_keypair = vote_keypair.insecure_clone();
  3785. let mut gossip_vote_index = 0;
  3786. let mut duplicate_slots = vec![];
  3787. move |latest_vote_slot, leader_vote_tx, parsed_vote, cluster_info| {
  3788. info!("received vote for {latest_vote_slot}");
  3789. // Add to EpochSlots. Mark all slots frozen between slot..=max_vote_slot.
  3790. let new_epoch_slots: Vec<Slot> = (0..latest_vote_slot + 1).collect();
  3791. info!("Simulating epoch slots from our node: {new_epoch_slots:?}");
  3792. cluster_info.push_epoch_slots(&new_epoch_slots);
  3793. for slot in duplicate_slot_receiver.try_iter() {
  3794. duplicate_slots.push(slot);
  3795. }
  3796. let parsed_vote = parsed_vote.as_tower_transaction_ref().unwrap();
  3797. let vote_hash = parsed_vote.hash();
  3798. if vote_on_duplicate || !duplicate_slots.contains(&latest_vote_slot) {
  3799. info!(
  3800. "Simulating vote from our node on slot {latest_vote_slot}, hash \
  3801. {vote_hash}"
  3802. );
  3803. // Add all recent vote slots on this fork to allow cluster to pass
  3804. // vote threshold checks in replay. Note this will instantly force a
  3805. // root by this validator, but we're not concerned with lockout violations
  3806. // by this validator so it's fine.
  3807. let leader_blockstore = open_blockstore(&bad_leader_ledger_path);
  3808. let mut vote_slots: Vec<(Slot, u32)> =
  3809. AncestorIterator::new_inclusive(latest_vote_slot, &leader_blockstore)
  3810. .take(MAX_LOCKOUT_HISTORY)
  3811. .zip(1..)
  3812. .collect();
  3813. vote_slots.reverse();
  3814. let mut vote = TowerSync::from(vote_slots);
  3815. let root =
  3816. AncestorIterator::new_inclusive(latest_vote_slot, &leader_blockstore)
  3817. .nth(MAX_LOCKOUT_HISTORY);
  3818. vote.root = root;
  3819. vote.hash = vote_hash;
  3820. let vote_tx = vote_transaction::new_tower_sync_transaction(
  3821. vote,
  3822. leader_vote_tx.message.recent_blockhash,
  3823. &node_keypair,
  3824. &vote_keypair,
  3825. &vote_keypair,
  3826. None,
  3827. );
  3828. gossip_vote_index += 1;
  3829. gossip_vote_index %= MAX_VOTES;
  3830. cluster_info.push_vote_at_index(vote_tx, gossip_vote_index);
  3831. }
  3832. }
  3833. },
  3834. voter_thread_sleep_ms as u64,
  3835. cluster.validators.len().saturating_sub(1),
  3836. 5000, // Refresh if 5 seconds of inactivity
  3837. 5, // Refresh the past 5 votes
  3838. cluster.entry_point_info.shred_version(),
  3839. );
  3840. // 4) Check that the cluster is making progress
  3841. cluster.check_for_new_roots(
  3842. 16,
  3843. "test_duplicate_shreds_broadcast_leader",
  3844. SocketAddrSpace::Unspecified,
  3845. );
  3846. // Clean up threads
  3847. gossip_voter.close();
  3848. }
  3849. #[test]
  3850. #[serial]
  3851. #[ignore]
  3852. fn test_switch_threshold_uses_gossip_votes() {
  3853. solana_logger::setup_with_default(RUST_LOG_FILTER);
  3854. let total_stake = 100 * DEFAULT_NODE_STAKE;
  3855. // Minimum stake needed to generate a switching proof
  3856. let minimum_switch_stake = (SWITCH_FORK_THRESHOLD * total_stake as f64) as u64;
  3857. // Make the heavier stake insufficient for switching so tha the lighter validator
  3858. // cannot switch without seeing a vote from the dead/failure_stake validator.
  3859. let heavier_stake = minimum_switch_stake;
  3860. let lighter_stake = heavier_stake - 1;
  3861. let failures_stake = total_stake - heavier_stake - lighter_stake;
  3862. let partitions: &[(usize, usize)] = &[(heavier_stake as usize, 8), (lighter_stake as usize, 8)];
  3863. #[derive(Default)]
  3864. struct PartitionContext {
  3865. heaviest_validator_key: Pubkey,
  3866. lighter_validator_key: Pubkey,
  3867. dead_validator_info: Option<ClusterValidatorInfo>,
  3868. }
  3869. let on_partition_start = |_cluster: &mut LocalCluster,
  3870. validator_keys: &[Pubkey],
  3871. mut dead_validator_infos: Vec<ClusterValidatorInfo>,
  3872. context: &mut PartitionContext| {
  3873. assert_eq!(dead_validator_infos.len(), 1);
  3874. context.dead_validator_info = Some(dead_validator_infos.pop().unwrap());
  3875. // validator_keys[0] is the validator that will be killed, i.e. the validator with
  3876. // stake == `failures_stake`
  3877. context.heaviest_validator_key = validator_keys[1];
  3878. context.lighter_validator_key = validator_keys[2];
  3879. };
  3880. let on_before_partition_resolved = |_: &mut LocalCluster, _: &mut PartitionContext| {};
  3881. // Check that new roots were set after the partition resolves (gives time
  3882. // for lockouts built during partition to resolve and gives validators an opportunity
  3883. // to try and switch forks)
  3884. let on_partition_resolved = |cluster: &mut LocalCluster, context: &mut PartitionContext| {
  3885. let lighter_validator_ledger_path = cluster.ledger_path(&context.lighter_validator_key);
  3886. let heavier_validator_ledger_path = cluster.ledger_path(&context.heaviest_validator_key);
  3887. let (lighter_validator_latest_vote, _) = last_vote_in_tower(
  3888. &lighter_validator_ledger_path,
  3889. &context.lighter_validator_key,
  3890. )
  3891. .unwrap();
  3892. info!("Lighter validator's latest vote is for slot {lighter_validator_latest_vote}");
  3893. // Lighter partition should stop voting after detecting the heavier partition and try
  3894. // to switch. Loop until we see a greater vote by the heavier validator than the last
  3895. // vote made by the lighter validator on the lighter fork.
  3896. let mut heavier_validator_latest_vote;
  3897. let mut heavier_validator_latest_vote_hash;
  3898. let heavier_blockstore = open_blockstore(&heavier_validator_ledger_path);
  3899. loop {
  3900. let (sanity_check_lighter_validator_latest_vote, _) = last_vote_in_tower(
  3901. &lighter_validator_ledger_path,
  3902. &context.lighter_validator_key,
  3903. )
  3904. .unwrap();
  3905. // Lighter validator should stop voting, because `on_partition_resolved` is only
  3906. // called after a propagation time where blocks from the other fork should have
  3907. // finished propagating
  3908. assert_eq!(
  3909. sanity_check_lighter_validator_latest_vote,
  3910. lighter_validator_latest_vote
  3911. );
  3912. let (new_heavier_validator_latest_vote, new_heavier_validator_latest_vote_hash) =
  3913. last_vote_in_tower(
  3914. &heavier_validator_ledger_path,
  3915. &context.heaviest_validator_key,
  3916. )
  3917. .unwrap();
  3918. heavier_validator_latest_vote = new_heavier_validator_latest_vote;
  3919. heavier_validator_latest_vote_hash = new_heavier_validator_latest_vote_hash;
  3920. // Latest vote for each validator should be on different forks
  3921. assert_ne!(lighter_validator_latest_vote, heavier_validator_latest_vote);
  3922. if heavier_validator_latest_vote > lighter_validator_latest_vote {
  3923. let heavier_ancestors: HashSet<Slot> =
  3924. AncestorIterator::new(heavier_validator_latest_vote, &heavier_blockstore)
  3925. .collect();
  3926. assert!(!heavier_ancestors.contains(&lighter_validator_latest_vote));
  3927. break;
  3928. }
  3929. }
  3930. info!("Checking to make sure lighter validator doesn't switch");
  3931. let mut latest_slot = lighter_validator_latest_vote;
  3932. // Number of chances the validator had to switch votes but didn't
  3933. let mut total_voting_opportunities = 0;
  3934. while total_voting_opportunities <= 5 {
  3935. let (new_latest_slot, latest_slot_ancestors) =
  3936. find_latest_replayed_slot_from_ledger(&lighter_validator_ledger_path, latest_slot);
  3937. latest_slot = new_latest_slot;
  3938. // Ensure `latest_slot` is on the other fork
  3939. if latest_slot_ancestors.contains(&heavier_validator_latest_vote) {
  3940. let tower = restore_tower(
  3941. &lighter_validator_ledger_path,
  3942. &context.lighter_validator_key,
  3943. )
  3944. .unwrap();
  3945. // Check that there was an opportunity to vote
  3946. if !tower.is_locked_out(latest_slot, &latest_slot_ancestors) {
  3947. // Ensure the lighter blockstore has not voted again
  3948. let new_lighter_validator_latest_vote = tower.last_voted_slot().unwrap();
  3949. assert_eq!(
  3950. new_lighter_validator_latest_vote,
  3951. lighter_validator_latest_vote
  3952. );
  3953. info!("Incrementing voting opportunities: {total_voting_opportunities}");
  3954. total_voting_opportunities += 1;
  3955. } else {
  3956. info!("Tower still locked out, can't vote for slot: {latest_slot}");
  3957. }
  3958. } else if latest_slot > heavier_validator_latest_vote {
  3959. warn!(
  3960. "validator is still generating blocks on its own fork, last processed slot: \
  3961. {latest_slot}"
  3962. );
  3963. }
  3964. sleep(Duration::from_millis(50));
  3965. }
  3966. // Make a vote from the killed validator for slot `heavier_validator_latest_vote` in gossip
  3967. info!("Simulate vote for slot: {heavier_validator_latest_vote} from dead validator");
  3968. let vote_keypair = &context
  3969. .dead_validator_info
  3970. .as_ref()
  3971. .unwrap()
  3972. .info
  3973. .voting_keypair
  3974. .clone();
  3975. let node_keypair = &context
  3976. .dead_validator_info
  3977. .as_ref()
  3978. .unwrap()
  3979. .info
  3980. .keypair
  3981. .clone();
  3982. cluster_tests::submit_vote_to_cluster_gossip(
  3983. node_keypair,
  3984. vote_keypair,
  3985. heavier_validator_latest_vote,
  3986. heavier_validator_latest_vote_hash,
  3987. // Make the vote transaction with a random blockhash. Thus, the vote only lives in gossip but
  3988. // never makes it into a block
  3989. Hash::new_unique(),
  3990. cluster
  3991. .get_contact_info(&context.heaviest_validator_key)
  3992. .unwrap()
  3993. .gossip()
  3994. .unwrap(),
  3995. &SocketAddrSpace::Unspecified,
  3996. )
  3997. .unwrap();
  3998. loop {
  3999. // Wait for the lighter validator to switch to the heavier fork
  4000. let (new_lighter_validator_latest_vote, _) = last_vote_in_tower(
  4001. &lighter_validator_ledger_path,
  4002. &context.lighter_validator_key,
  4003. )
  4004. .unwrap();
  4005. if new_lighter_validator_latest_vote != lighter_validator_latest_vote {
  4006. info!(
  4007. "Lighter validator switched forks at slot: {new_lighter_validator_latest_vote}"
  4008. );
  4009. let (heavier_validator_latest_vote, _) = last_vote_in_tower(
  4010. &heavier_validator_ledger_path,
  4011. &context.heaviest_validator_key,
  4012. )
  4013. .unwrap();
  4014. let (smaller, larger) =
  4015. if new_lighter_validator_latest_vote > heavier_validator_latest_vote {
  4016. (
  4017. heavier_validator_latest_vote,
  4018. new_lighter_validator_latest_vote,
  4019. )
  4020. } else {
  4021. (
  4022. new_lighter_validator_latest_vote,
  4023. heavier_validator_latest_vote,
  4024. )
  4025. };
  4026. // Check the new vote is on the same fork as the heaviest fork
  4027. let heavier_blockstore = open_blockstore(&heavier_validator_ledger_path);
  4028. let larger_slot_ancestors: HashSet<Slot> =
  4029. AncestorIterator::new(larger, &heavier_blockstore)
  4030. .chain(std::iter::once(larger))
  4031. .collect();
  4032. assert!(larger_slot_ancestors.contains(&smaller));
  4033. break;
  4034. } else {
  4035. sleep(Duration::from_millis(50));
  4036. }
  4037. }
  4038. };
  4039. let ticks_per_slot = 8;
  4040. run_kill_partition_switch_threshold(
  4041. &[(failures_stake as usize, 0)],
  4042. partitions,
  4043. Some(ticks_per_slot),
  4044. PartitionContext::default(),
  4045. on_partition_start,
  4046. on_before_partition_resolved,
  4047. on_partition_resolved,
  4048. );
  4049. }
  4050. #[test]
  4051. #[serial]
  4052. fn test_listener_startup() {
  4053. let mut config = ClusterConfig {
  4054. node_stakes: vec![DEFAULT_NODE_STAKE],
  4055. num_listeners: 3,
  4056. validator_configs: make_identical_validator_configs(
  4057. &ValidatorConfig::default_for_test(),
  4058. 1,
  4059. ),
  4060. ..ClusterConfig::default()
  4061. };
  4062. let cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  4063. let cluster_nodes = discover_validators(
  4064. &cluster.entry_point_info.gossip().unwrap(),
  4065. 4,
  4066. cluster.entry_point_info.shred_version(),
  4067. SocketAddrSpace::Unspecified,
  4068. )
  4069. .unwrap();
  4070. assert_eq!(cluster_nodes.len(), 4);
  4071. }
  4072. fn find_latest_replayed_slot_from_ledger(
  4073. ledger_path: &Path,
  4074. mut latest_slot: Slot,
  4075. ) -> (Slot, HashSet<Slot>) {
  4076. loop {
  4077. let mut blockstore = open_blockstore(ledger_path);
  4078. // This is kind of a hack because we can't query for new frozen blocks over RPC
  4079. // since the validator is not voting.
  4080. let new_latest_slots: Vec<Slot> = blockstore
  4081. .slot_meta_iterator(latest_slot)
  4082. .unwrap()
  4083. .filter_map(|(s, _)| if s > latest_slot { Some(s) } else { None })
  4084. .collect();
  4085. if let Some(new_latest_slot) = new_latest_slots.first() {
  4086. latest_slot = *new_latest_slot;
  4087. info!("Checking latest_slot {latest_slot}");
  4088. // Wait for the slot to be fully received by the validator
  4089. loop {
  4090. info!("Waiting for slot {latest_slot} to be full");
  4091. if blockstore.is_full(latest_slot) {
  4092. break;
  4093. } else {
  4094. sleep(Duration::from_millis(50));
  4095. blockstore = open_blockstore(ledger_path);
  4096. }
  4097. }
  4098. // Wait for the slot to be replayed
  4099. loop {
  4100. info!("Waiting for slot {latest_slot} to be replayed");
  4101. if blockstore.get_bank_hash(latest_slot).is_some() {
  4102. return (
  4103. latest_slot,
  4104. AncestorIterator::new(latest_slot, &blockstore).collect(),
  4105. );
  4106. } else {
  4107. sleep(Duration::from_millis(50));
  4108. blockstore = open_blockstore(ledger_path);
  4109. }
  4110. }
  4111. }
  4112. sleep(Duration::from_millis(50));
  4113. }
  4114. }
  4115. #[test]
  4116. #[serial]
  4117. fn test_cluster_partition_1_1() {
  4118. run_test_cluster_partition(2, false);
  4119. }
  4120. #[test]
  4121. #[serial]
  4122. fn test_alpenglow_cluster_partition_1_1() {
  4123. run_test_cluster_partition(2, true);
  4124. }
  4125. #[test]
  4126. #[serial]
  4127. fn test_cluster_partition_1_1_1() {
  4128. run_test_cluster_partition(3, false);
  4129. }
  4130. #[test]
  4131. #[serial]
  4132. fn test_alpenglow_cluster_partition_1_1_1() {
  4133. run_test_cluster_partition(3, true);
  4134. }
  4135. fn run_test_cluster_partition(num_partitions: usize, is_alpenglow: bool) {
  4136. let empty = |_: &mut LocalCluster, _: &mut ()| {};
  4137. let on_partition_resolved = |cluster: &mut LocalCluster, _: &mut ()| {
  4138. cluster.check_for_new_roots(16, "PARTITION_TEST", SocketAddrSpace::Unspecified);
  4139. };
  4140. let partition_sizes = vec![1; num_partitions];
  4141. run_cluster_partition(
  4142. &partition_sizes,
  4143. None,
  4144. (),
  4145. empty,
  4146. empty,
  4147. on_partition_resolved,
  4148. None,
  4149. vec![],
  4150. is_alpenglow,
  4151. )
  4152. }
  4153. #[test]
  4154. #[serial]
  4155. fn test_leader_failure_4() {
  4156. solana_logger::setup_with_default(RUST_LOG_FILTER);
  4157. error!("test_leader_failure_4");
  4158. // Cluster needs a supermajority to remain even after taking 1 node offline,
  4159. // so the minimum number of nodes for this test is 4.
  4160. let num_nodes = 4;
  4161. let validator_config = ValidatorConfig::default_for_test();
  4162. // Embed vote and stake account in genesis to avoid waiting for stake
  4163. // activation and race conditions around accepting gossip votes, repairing
  4164. // blocks, etc. before we advance through too many epochs.
  4165. let validator_keys: Option<Vec<(Arc<Keypair>, bool)>> = Some(
  4166. (0..num_nodes)
  4167. .map(|_| (Arc::new(Keypair::new()), true))
  4168. .collect(),
  4169. );
  4170. // Skip the warmup slots because these short epochs can cause problems when
  4171. // bringing multiple fresh validators online that are pre-staked in genesis.
  4172. // The problems arise because we skip their leader slots while they're still
  4173. // starting up, experience partitioning, and can fail to generate leader
  4174. // schedules in time because the short epochs have the same slots per epoch
  4175. // as the total tower height, so any skipped slots can lead to not rooting,
  4176. // not generating leader schedule, and stalling the cluster.
  4177. let skip_warmup_slots = true;
  4178. let mut config = ClusterConfig {
  4179. node_stakes: vec![DEFAULT_NODE_STAKE; num_nodes],
  4180. validator_configs: make_identical_validator_configs(&validator_config, num_nodes),
  4181. validator_keys,
  4182. skip_warmup_slots,
  4183. ..ClusterConfig::default()
  4184. };
  4185. let local = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  4186. cluster_tests::kill_entry_and_spend_and_verify_rest(
  4187. &local.entry_point_info,
  4188. &local
  4189. .validators
  4190. .get(local.entry_point_info.pubkey())
  4191. .unwrap()
  4192. .config
  4193. .validator_exit,
  4194. &local.funding_keypair,
  4195. &local.connection_cache,
  4196. num_nodes,
  4197. config.ticks_per_slot * config.poh_config.target_tick_duration.as_millis() as u64,
  4198. SocketAddrSpace::Unspecified,
  4199. );
  4200. }
  4201. // This test verifies that even if votes from a validator end up taking too long to land, and thus
  4202. // some of the referenced slots are slots are no longer present in the slot hashes sysvar,
  4203. // consensus can still be attained.
  4204. //
  4205. // Validator A (60%)
  4206. // Validator B (40%)
  4207. // / --- 10 --- [..] --- 16 (B is voting, due to network issues is initially not able to see the other fork at all)
  4208. // /
  4209. // 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 (A votes 1 - 9 votes are landing normally. B does the same however votes are not landing)
  4210. // \
  4211. // \--[..]-- 73 (majority fork)
  4212. // A is voting on the majority fork and B wants to switch to this fork however in this majority fork
  4213. // the earlier votes for B (1 - 9) never landed so when B eventually goes to vote on 73, slots in
  4214. // its local vote state are no longer present in slot hashes.
  4215. //
  4216. // 1. Wait for B's tower to see local vote state was updated to new fork
  4217. // 2. Wait X blocks, check B's vote state on chain has been properly updated
  4218. //
  4219. // NOTE: it is not reliable for B to organically have 1 to reach 2^16 lockout, so we simulate the 6
  4220. // consecutive votes on the minor fork by manually incrementing the confirmation levels for the
  4221. // common ancestor votes in tower.
  4222. // To allow this test to run in a reasonable time we change the
  4223. // slot_hash expiry to 64 slots.
  4224. #[test]
  4225. #[serial]
  4226. fn test_slot_hash_expiry() {
  4227. solana_logger::setup_with_default(RUST_LOG_FILTER);
  4228. solana_slot_hashes::set_entries_for_tests_only(64);
  4229. let slots_per_epoch = 2048;
  4230. let node_stakes = vec![60 * DEFAULT_NODE_STAKE, 40 * DEFAULT_NODE_STAKE];
  4231. let validator_keys = [
  4232. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  4233. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  4234. ]
  4235. .iter()
  4236. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  4237. .collect::<Vec<_>>();
  4238. let node_vote_keys = [
  4239. "3NDQ3ud86RTVg8hTy2dDWnS4P8NfjhZ2gDgQAJbr3heaKaUVS1FW3sTLKA1GmDrY9aySzsa4QxpDkbLv47yHxzr3",
  4240. "46ZHpHE6PEvXYPu3hf9iQqjBk2ZNDaJ9ejqKWHEjxaQjpAGasKaWKbKHbP3646oZhfgDRzx95DH9PCBKKsoCVngk",
  4241. ]
  4242. .iter()
  4243. .map(|s| Arc::new(Keypair::from_base58_string(s)))
  4244. .collect::<Vec<_>>();
  4245. let vs = validator_keys
  4246. .iter()
  4247. .map(|(kp, _)| kp.pubkey())
  4248. .collect::<Vec<_>>();
  4249. let (a_pubkey, b_pubkey) = (vs[0], vs[1]);
  4250. // We want B to not vote (we are trying to simulate its votes not landing until it gets to the
  4251. // minority fork)
  4252. let mut validator_configs =
  4253. make_identical_validator_configs(&ValidatorConfig::default_for_test(), node_stakes.len());
  4254. validator_configs[1].voting_disabled = true;
  4255. let mut config = ClusterConfig {
  4256. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  4257. node_stakes,
  4258. validator_configs,
  4259. validator_keys: Some(validator_keys),
  4260. node_vote_keys: Some(node_vote_keys),
  4261. slots_per_epoch,
  4262. stakers_slot_offset: slots_per_epoch,
  4263. skip_warmup_slots: true,
  4264. ..ClusterConfig::default()
  4265. };
  4266. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  4267. let mut common_ancestor_slot = 8;
  4268. let a_ledger_path = cluster.ledger_path(&a_pubkey);
  4269. let b_ledger_path = cluster.ledger_path(&b_pubkey);
  4270. // Immediately kill B (we just needed it for the initial stake distribution)
  4271. info!("Killing B");
  4272. let mut b_info = cluster.exit_node(&b_pubkey);
  4273. // Let A run for a while until we get to the common ancestor
  4274. info!("Letting A run until common_ancestor_slot");
  4275. loop {
  4276. if let Some((last_vote, _)) = last_vote_in_tower(&a_ledger_path, &a_pubkey) {
  4277. if last_vote >= common_ancestor_slot {
  4278. break;
  4279. }
  4280. }
  4281. sleep(Duration::from_millis(100));
  4282. }
  4283. // Keep A running, but setup B so that it thinks it has voted up until common ancestor (but
  4284. // doesn't know anything past that)
  4285. {
  4286. info!("Copying A's ledger to B");
  4287. std::fs::remove_dir_all(&b_info.info.ledger_path).unwrap();
  4288. let mut opt = fs_extra::dir::CopyOptions::new();
  4289. opt.copy_inside = true;
  4290. fs_extra::dir::copy(&a_ledger_path, &b_ledger_path, &opt).unwrap();
  4291. // remove A's tower in B's new copied ledger
  4292. info!("Removing A's tower in B's ledger dir");
  4293. remove_tower(&b_ledger_path, &a_pubkey);
  4294. // load A's tower and save it as B's tower
  4295. info!("Loading A's tower");
  4296. if let Some(mut a_tower) = restore_tower(&a_ledger_path, &a_pubkey) {
  4297. a_tower.node_pubkey = b_pubkey;
  4298. // Update common_ancestor_slot because A is still running
  4299. if let Some(s) = a_tower.last_voted_slot() {
  4300. common_ancestor_slot = s;
  4301. info!("New common_ancestor_slot {common_ancestor_slot}");
  4302. } else {
  4303. panic!("A's tower has no votes");
  4304. }
  4305. info!("Increase lockout by 6 confirmation levels and save as B's tower");
  4306. a_tower.increase_lockout(6);
  4307. save_tower(&b_ledger_path, &a_tower, &b_info.info.keypair);
  4308. info!("B's new tower: {:?}", a_tower.tower_slots());
  4309. } else {
  4310. panic!("A's tower is missing");
  4311. }
  4312. // Get rid of any slots past common_ancestor_slot
  4313. info!("Removing extra slots from B's blockstore");
  4314. let blockstore = open_blockstore(&b_ledger_path);
  4315. purge_slots_with_count(&blockstore, common_ancestor_slot + 1, 100);
  4316. }
  4317. info!(
  4318. "Run A on majority fork until it reaches slot hash expiry {}",
  4319. solana_slot_hashes::get_entries()
  4320. );
  4321. let mut last_vote_on_a;
  4322. // Keep A running for a while longer so the majority fork has some decent size
  4323. loop {
  4324. last_vote_on_a =
  4325. wait_for_last_vote_in_tower_to_land_in_ledger(&a_ledger_path, &a_pubkey).unwrap();
  4326. if last_vote_on_a >= common_ancestor_slot + 2 * (solana_slot_hashes::get_entries() as u64) {
  4327. let blockstore = open_blockstore(&a_ledger_path);
  4328. info!(
  4329. "A majority fork: {:?}",
  4330. AncestorIterator::new(last_vote_on_a, &blockstore).collect::<Vec<Slot>>()
  4331. );
  4332. break;
  4333. }
  4334. sleep(Duration::from_millis(100));
  4335. }
  4336. // Kill A and restart B with voting. B should now fork off
  4337. info!("Killing A");
  4338. let a_info = cluster.exit_node(&a_pubkey);
  4339. info!("Restarting B");
  4340. b_info.config.voting_disabled = false;
  4341. cluster.restart_node(&b_pubkey, b_info, SocketAddrSpace::Unspecified);
  4342. // B will fork off and accumulate enough lockout
  4343. info!("Allowing B to fork");
  4344. loop {
  4345. let blockstore = open_blockstore(&b_ledger_path);
  4346. let last_vote =
  4347. wait_for_last_vote_in_tower_to_land_in_ledger(&b_ledger_path, &b_pubkey).unwrap();
  4348. let mut ancestors = AncestorIterator::new(last_vote, &blockstore);
  4349. if let Some(index) = ancestors.position(|x| x == common_ancestor_slot) {
  4350. if index > 7 {
  4351. info!(
  4352. "B has forked for enough lockout: {:?}",
  4353. AncestorIterator::new(last_vote, &blockstore).collect::<Vec<Slot>>()
  4354. );
  4355. break;
  4356. }
  4357. }
  4358. sleep(Duration::from_millis(1000));
  4359. }
  4360. info!("Kill B");
  4361. b_info = cluster.exit_node(&b_pubkey);
  4362. info!("Resolve the partition");
  4363. {
  4364. // Here we let B know about the missing blocks that A had produced on its partition
  4365. let a_blockstore = open_blockstore(&a_ledger_path);
  4366. let b_blockstore = open_blockstore(&b_ledger_path);
  4367. copy_blocks(last_vote_on_a, &a_blockstore, &b_blockstore, false);
  4368. }
  4369. // Now restart A and B and see if B is able to eventually switch onto the majority fork
  4370. info!("Restarting A & B");
  4371. cluster.restart_node(&a_pubkey, a_info, SocketAddrSpace::Unspecified);
  4372. cluster.restart_node(&b_pubkey, b_info, SocketAddrSpace::Unspecified);
  4373. info!("Waiting for B to switch to majority fork and make a root");
  4374. cluster_tests::check_for_new_roots(
  4375. 16,
  4376. &[cluster.get_contact_info(&a_pubkey).unwrap().clone()],
  4377. &cluster.connection_cache,
  4378. "test_slot_hashes_expiry",
  4379. );
  4380. }
  4381. // This test simulates a case where a leader sends a duplicate block with different ancestry. One
  4382. // version builds off of the rooted path, however the other version builds off a pruned branch. The
  4383. // validators that receive the pruned version will need to repair in order to continue, which
  4384. // requires an ancestor hashes repair.
  4385. //
  4386. // We setup 3 validators:
  4387. // - majority, will produce the rooted path
  4388. // - minority, will produce the pruned path
  4389. // - our_node, will be fed the pruned version of the duplicate block and need to repair
  4390. //
  4391. // Additionally we setup 3 observer nodes to propagate votes and participate in the ancestor hashes
  4392. // sample.
  4393. //
  4394. // Fork structure:
  4395. //
  4396. // 0 - 1 - ... - 10 (fork slot) - 30 - ... - 61 (rooted path) - ...
  4397. // |
  4398. // |- 11 - ... - 29 (pruned path) - 81'
  4399. //
  4400. //
  4401. // Steps:
  4402. // 1) Different leader schedule, minority thinks it produces 0-29 and majority rest, majority
  4403. // thinks minority produces all blocks. This is to avoid majority accidentally producing blocks
  4404. // before it shuts down.
  4405. // 2) Start cluster, kill our_node.
  4406. // 3) Kill majority cluster after it votes for any slot > fork slot (guarantees that the fork slot is
  4407. // reached as minority cannot pass threshold otherwise).
  4408. // 4) Let minority produce forks on pruned forks until out of leader slots then kill.
  4409. // 5) Truncate majority ledger past fork slot so it starts building off of fork slot.
  4410. // 6) Restart majority and wait until it starts producing blocks on main fork and roots something
  4411. // past the fork slot.
  4412. // 7) Construct our ledger by copying majority ledger and copying blocks from minority for the pruned path.
  4413. // 8) In our node's ledger, change the parent of the latest slot in majority fork to be the latest
  4414. // slot in the minority fork (simulates duplicate built off of pruned block)
  4415. // 9) Start our node which will pruned the minority fork on ledger replay and verify that we can make roots.
  4416. //
  4417. #[test]
  4418. #[serial]
  4419. #[ignore]
  4420. fn test_duplicate_with_pruned_ancestor() {
  4421. solana_logger::setup_with("info,solana_metrics=off");
  4422. solana_core::repair::duplicate_repair_status::set_ancestor_hash_repair_sample_size_for_tests_only(3);
  4423. let majority_leader_stake = 10_000_000 * DEFAULT_NODE_STAKE;
  4424. let minority_leader_stake = 2_000_000 * DEFAULT_NODE_STAKE;
  4425. let our_node = DEFAULT_NODE_STAKE;
  4426. let observer_stake = DEFAULT_NODE_STAKE;
  4427. let slots_per_epoch = 2048;
  4428. let fork_slot: u64 = 12;
  4429. let fork_length: u64 = 20;
  4430. let majority_fork_buffer = 5;
  4431. let mut node_stakes = vec![majority_leader_stake, minority_leader_stake, our_node];
  4432. // We need enough observers to reach `ANCESTOR_HASH_REPAIR_SAMPLE_SIZE`
  4433. node_stakes.append(&mut vec![observer_stake; 3]);
  4434. let num_nodes = node_stakes.len();
  4435. let validator_keys = [
  4436. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  4437. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  4438. "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye",
  4439. ]
  4440. .iter()
  4441. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  4442. .chain(std::iter::repeat_with(|| (Arc::new(Keypair::new()), true)))
  4443. .take(node_stakes.len())
  4444. .collect::<Vec<_>>();
  4445. let validators = validator_keys
  4446. .iter()
  4447. .map(|(kp, _)| kp.pubkey())
  4448. .collect::<Vec<_>>();
  4449. let (majority_pubkey, minority_pubkey, our_node_pubkey) =
  4450. (validators[0], validators[1], validators[2]);
  4451. let mut default_config = ValidatorConfig::default_for_test();
  4452. // Minority fork is leader long enough to create pruned fork
  4453. let validator_to_slots = vec![
  4454. (minority_pubkey, (fork_slot + fork_length) as usize),
  4455. (majority_pubkey, slots_per_epoch as usize),
  4456. ];
  4457. let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter());
  4458. default_config.fixed_leader_schedule = Some(FixedSchedule {
  4459. leader_schedule: Arc::new(leader_schedule),
  4460. });
  4461. let mut validator_configs = make_identical_validator_configs(&default_config, num_nodes);
  4462. // Don't let majority produce anything past the fork by tricking its leader schedule
  4463. validator_configs[0].fixed_leader_schedule = Some(FixedSchedule {
  4464. leader_schedule: Arc::new(create_custom_leader_schedule(
  4465. [(minority_pubkey, slots_per_epoch as usize)].into_iter(),
  4466. )),
  4467. });
  4468. let mut config = ClusterConfig {
  4469. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  4470. node_stakes,
  4471. validator_configs,
  4472. validator_keys: Some(validator_keys),
  4473. slots_per_epoch,
  4474. stakers_slot_offset: slots_per_epoch,
  4475. skip_warmup_slots: true,
  4476. ..ClusterConfig::default()
  4477. };
  4478. let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  4479. let majority_ledger_path = cluster.ledger_path(&majority_pubkey);
  4480. let minority_ledger_path = cluster.ledger_path(&minority_pubkey);
  4481. let our_node_ledger_path = cluster.ledger_path(&our_node_pubkey);
  4482. info!("majority {majority_pubkey} ledger path {majority_ledger_path:?}");
  4483. info!("minority {minority_pubkey} ledger path {minority_ledger_path:?}");
  4484. info!("our_node {our_node_pubkey} ledger path {our_node_ledger_path:?}");
  4485. info!("Killing our node");
  4486. let our_node_info = cluster.exit_node(&our_node_pubkey);
  4487. info!("Waiting on majority validator to vote on at least {fork_slot}");
  4488. let now = Instant::now();
  4489. let mut last_majority_vote = 0;
  4490. loop {
  4491. let elapsed = now.elapsed();
  4492. assert!(
  4493. elapsed <= Duration::from_secs(30),
  4494. "Majority validator failed to vote on a slot >= {fork_slot} in {} secs, majority \
  4495. validator last vote: {last_majority_vote}",
  4496. elapsed.as_secs(),
  4497. );
  4498. sleep(Duration::from_millis(100));
  4499. if let Some((last_vote, _)) = last_vote_in_tower(&majority_ledger_path, &majority_pubkey) {
  4500. last_majority_vote = last_vote;
  4501. if last_vote >= fork_slot {
  4502. break;
  4503. }
  4504. }
  4505. }
  4506. info!("Killing majority validator, waiting for minority fork to reach a depth of at least 15");
  4507. let mut majority_validator_info = cluster.exit_node(&majority_pubkey);
  4508. let now = Instant::now();
  4509. let mut last_minority_vote = 0;
  4510. while last_minority_vote < fork_slot + 15 {
  4511. let elapsed = now.elapsed();
  4512. assert!(
  4513. elapsed <= Duration::from_secs(30),
  4514. "Minority validator failed to create a fork of depth >= {} in 15 secs, \
  4515. last_minority_vote: {last_minority_vote}",
  4516. elapsed.as_secs(),
  4517. );
  4518. if let Some((last_vote, _)) = last_vote_in_tower(&minority_ledger_path, &minority_pubkey) {
  4519. last_minority_vote = last_vote;
  4520. }
  4521. }
  4522. info!("Killing minority validator, fork created successfully: {last_minority_vote:?}");
  4523. let last_minority_vote =
  4524. wait_for_last_vote_in_tower_to_land_in_ledger(&minority_ledger_path, &minority_pubkey)
  4525. .unwrap();
  4526. let minority_validator_info = cluster.exit_node(&minority_pubkey);
  4527. info!("Truncating majority validator ledger to {fork_slot}");
  4528. {
  4529. remove_tower(&majority_ledger_path, &majority_pubkey);
  4530. let blockstore = open_blockstore(&majority_ledger_path);
  4531. purge_slots_with_count(&blockstore, fork_slot + 1, 100);
  4532. }
  4533. info!("Restarting majority validator");
  4534. // Make sure we don't send duplicate votes
  4535. majority_validator_info.config.wait_to_vote_slot = Some(fork_slot + fork_length);
  4536. // Fix the leader schedule so we can produce blocks
  4537. majority_validator_info
  4538. .config
  4539. .fixed_leader_schedule
  4540. .clone_from(&minority_validator_info.config.fixed_leader_schedule);
  4541. cluster.restart_node(
  4542. &majority_pubkey,
  4543. majority_validator_info,
  4544. SocketAddrSpace::Unspecified,
  4545. );
  4546. let mut last_majority_root = 0;
  4547. let now = Instant::now();
  4548. info!(
  4549. "Waiting for majority validator to root something past {}",
  4550. fork_slot + fork_length + majority_fork_buffer
  4551. );
  4552. while last_majority_root <= fork_slot + fork_length + majority_fork_buffer {
  4553. let elapsed = now.elapsed();
  4554. assert!(
  4555. elapsed <= Duration::from_secs(60),
  4556. "Majority validator failed to root something > {} in {} secs, last majority validator \
  4557. vote: {last_majority_vote}",
  4558. fork_slot + fork_length + majority_fork_buffer,
  4559. elapsed.as_secs(),
  4560. );
  4561. sleep(Duration::from_millis(100));
  4562. if let Some(last_root) = last_root_in_tower(&majority_ledger_path, &majority_pubkey) {
  4563. last_majority_root = last_root;
  4564. }
  4565. }
  4566. let last_majority_vote =
  4567. wait_for_last_vote_in_tower_to_land_in_ledger(&majority_ledger_path, &majority_pubkey)
  4568. .unwrap();
  4569. info!(
  4570. "Creating duplicate block built off of pruned branch for our node. Last majority vote \
  4571. {last_majority_vote}, Last minority vote {last_minority_vote}"
  4572. );
  4573. {
  4574. {
  4575. // Copy majority fork
  4576. std::fs::remove_dir_all(&our_node_info.info.ledger_path).unwrap();
  4577. let mut opt = fs_extra::dir::CopyOptions::new();
  4578. opt.copy_inside = true;
  4579. fs_extra::dir::copy(&majority_ledger_path, &our_node_ledger_path, &opt).unwrap();
  4580. remove_tower(&our_node_ledger_path, &majority_pubkey);
  4581. }
  4582. // Copy minority fork to our blockstore
  4583. // Set trusted=true in blockstore copy to skip the parent slot >= latest root check;
  4584. // this check would otherwise prevent the pruned fork from being inserted
  4585. let minority_blockstore = open_blockstore(&minority_validator_info.info.ledger_path);
  4586. let our_blockstore = open_blockstore(&our_node_info.info.ledger_path);
  4587. copy_blocks(
  4588. last_minority_vote,
  4589. &minority_blockstore,
  4590. &our_blockstore,
  4591. true,
  4592. );
  4593. // Change last block parent to chain off of (purged) minority fork
  4594. info!("For our node, changing parent of {last_majority_vote} to {last_minority_vote}");
  4595. our_blockstore.clear_unconfirmed_slot(last_majority_vote);
  4596. let entries = create_ticks(
  4597. 64 * (std::cmp::max(1, last_majority_vote - last_minority_vote)),
  4598. 0,
  4599. Hash::default(),
  4600. );
  4601. let shreds =
  4602. entries_to_test_shreds(&entries, last_majority_vote, last_minority_vote, true, 0);
  4603. our_blockstore.insert_shreds(shreds, None, false).unwrap();
  4604. }
  4605. // Actual test, `our_node` will replay the minority fork, then the majority fork which will
  4606. // prune the minority fork. Then finally the problematic block will be skipped (not replayed)
  4607. // because its parent has been pruned from bank forks. Meanwhile the majority validator has
  4608. // continued making blocks and voting, duplicate confirming everything. This will cause the
  4609. // pruned fork to become popular triggering an ancestor hashes repair, eventually allowing our
  4610. // node to dump & repair & continue making roots.
  4611. info!("Restarting our node, verifying that our node is making roots past the duplicate block");
  4612. cluster.restart_node(
  4613. &our_node_pubkey,
  4614. our_node_info,
  4615. SocketAddrSpace::Unspecified,
  4616. );
  4617. cluster_tests::check_for_new_roots(
  4618. 16,
  4619. &[cluster.get_contact_info(&our_node_pubkey).unwrap().clone()],
  4620. &cluster.connection_cache,
  4621. "test_duplicate_with_pruned_ancestor",
  4622. );
  4623. }
  4624. /// Test fastboot to ensure a node can boot from local state and still produce correct snapshots
  4625. ///
  4626. /// 1. Start node 1 and wait for it to take snapshots
  4627. /// 2. Start node 2 with the snapshots from (1)
  4628. /// 3. Wait for node 2 to take a bank snapshot
  4629. /// 4. Restart node 2 with the local state from (3)
  4630. /// 5. Wait for node 2 to take new snapshots
  4631. /// 6. Start node 3 with the snapshots from (5)
  4632. /// 7. Wait for node 3 to take new snapshots
  4633. /// 8. Ensure the snapshots from (7) match node's 1 and 2
  4634. #[test]
  4635. #[serial]
  4636. fn test_boot_from_local_state() {
  4637. solana_logger::setup_with_default("error,local_cluster=info");
  4638. const FULL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4639. SnapshotInterval::Slots(NonZeroU64::new(100).unwrap());
  4640. const INCREMENTAL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4641. SnapshotInterval::Slots(NonZeroU64::new(10).unwrap());
  4642. let validator1_config =
  4643. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 2);
  4644. let validator2_config =
  4645. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 4);
  4646. let validator3_config =
  4647. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 3);
  4648. let mut cluster_config = ClusterConfig {
  4649. node_stakes: vec![100 * DEFAULT_NODE_STAKE],
  4650. validator_configs: make_identical_validator_configs(&validator1_config.validator_config, 1),
  4651. ..ClusterConfig::default()
  4652. };
  4653. let mut cluster = LocalCluster::new(&mut cluster_config, SocketAddrSpace::Unspecified);
  4654. // in order to boot from local state, need to first have snapshot archives
  4655. info!("Waiting for validator1 to create snapshots...");
  4656. let (incremental_snapshot_archive, full_snapshot_archive) =
  4657. LocalCluster::wait_for_next_incremental_snapshot(
  4658. &cluster,
  4659. &validator1_config.full_snapshot_archives_dir,
  4660. &validator1_config.incremental_snapshot_archives_dir,
  4661. Some(Duration::from_secs(5 * 60)),
  4662. );
  4663. debug!(
  4664. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4665. {incremental_snapshot_archive:?}"
  4666. );
  4667. info!("Waiting for validator1 to create snapshots... DONE");
  4668. info!("Copying snapshots to validator2...");
  4669. std::fs::copy(
  4670. full_snapshot_archive.path(),
  4671. validator2_config
  4672. .full_snapshot_archives_dir
  4673. .path()
  4674. .join(full_snapshot_archive.path().file_name().unwrap()),
  4675. )
  4676. .unwrap();
  4677. std::fs::copy(
  4678. incremental_snapshot_archive.path(),
  4679. validator2_config
  4680. .incremental_snapshot_archives_dir
  4681. .path()
  4682. .join(incremental_snapshot_archive.path().file_name().unwrap()),
  4683. )
  4684. .unwrap();
  4685. info!("Copying snapshots to validator2... DONE");
  4686. info!("Starting validator2...");
  4687. let validator2_identity = Arc::new(Keypair::new());
  4688. cluster.add_validator(
  4689. &validator2_config.validator_config,
  4690. DEFAULT_NODE_STAKE,
  4691. validator2_identity.clone(),
  4692. None,
  4693. SocketAddrSpace::Unspecified,
  4694. );
  4695. info!("Starting validator2... DONE");
  4696. // wait for a new bank snapshot to fastboot from that is newer than its snapshot archives
  4697. info!("Waiting for validator2 to create a new bank snapshot...");
  4698. let timer = Instant::now();
  4699. let bank_snapshot = loop {
  4700. if let Some(bank_snapshot) =
  4701. snapshot_utils::get_highest_bank_snapshot_post(&validator2_config.bank_snapshots_dir)
  4702. {
  4703. if bank_snapshot.slot > incremental_snapshot_archive.slot() {
  4704. break bank_snapshot;
  4705. }
  4706. }
  4707. assert!(
  4708. timer.elapsed() < Duration::from_secs(30),
  4709. "It should not take longer than 30 seconds to create a new bank snapshot"
  4710. );
  4711. std::thread::yield_now();
  4712. };
  4713. debug!("bank snapshot: {bank_snapshot:?}");
  4714. info!("Waiting for validator2 to create a new bank snapshot... DONE");
  4715. // restart WITH fastboot
  4716. info!("Restarting validator2 from local state...");
  4717. let mut validator2_info = cluster.exit_node(&validator2_identity.pubkey());
  4718. validator2_info.config.use_snapshot_archives_at_startup = UseSnapshotArchivesAtStartup::Never;
  4719. cluster.restart_node(
  4720. &validator2_identity.pubkey(),
  4721. validator2_info,
  4722. SocketAddrSpace::Unspecified,
  4723. );
  4724. info!("Restarting validator2 from local state... DONE");
  4725. info!("Waiting for validator2 to create snapshots...");
  4726. let (incremental_snapshot_archive, full_snapshot_archive) =
  4727. LocalCluster::wait_for_next_incremental_snapshot(
  4728. &cluster,
  4729. &validator2_config.full_snapshot_archives_dir,
  4730. &validator2_config.incremental_snapshot_archives_dir,
  4731. Some(Duration::from_secs(5 * 60)),
  4732. );
  4733. debug!(
  4734. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4735. {incremental_snapshot_archive:?}"
  4736. );
  4737. info!("Waiting for validator2 to create snapshots... DONE");
  4738. info!("Copying snapshots to validator3...");
  4739. std::fs::copy(
  4740. full_snapshot_archive.path(),
  4741. validator3_config
  4742. .full_snapshot_archives_dir
  4743. .path()
  4744. .join(full_snapshot_archive.path().file_name().unwrap()),
  4745. )
  4746. .unwrap();
  4747. std::fs::copy(
  4748. incremental_snapshot_archive.path(),
  4749. validator3_config
  4750. .incremental_snapshot_archives_dir
  4751. .path()
  4752. .join(incremental_snapshot_archive.path().file_name().unwrap()),
  4753. )
  4754. .unwrap();
  4755. info!("Copying snapshots to validator3... DONE");
  4756. info!("Starting validator3...");
  4757. let validator3_identity = Arc::new(Keypair::new());
  4758. cluster.add_validator(
  4759. &validator3_config.validator_config,
  4760. DEFAULT_NODE_STAKE,
  4761. validator3_identity,
  4762. None,
  4763. SocketAddrSpace::Unspecified,
  4764. );
  4765. info!("Starting validator3... DONE");
  4766. // wait for a new snapshot to ensure the validator is making roots
  4767. info!("Waiting for validator3 to create snapshots...");
  4768. let (incremental_snapshot_archive, full_snapshot_archive) =
  4769. LocalCluster::wait_for_next_incremental_snapshot(
  4770. &cluster,
  4771. &validator3_config.full_snapshot_archives_dir,
  4772. &validator3_config.incremental_snapshot_archives_dir,
  4773. Some(Duration::from_secs(5 * 60)),
  4774. );
  4775. debug!(
  4776. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4777. {incremental_snapshot_archive:?}"
  4778. );
  4779. info!("Waiting for validator3 to create snapshots... DONE");
  4780. // Ensure that all validators have the correct state by comparing snapshots.
  4781. // Since validator1 has been running the longest, if may be ahead of the others,
  4782. // so use it as the comparison for others.
  4783. // - wait for validator1 to take new snapshots
  4784. // - wait for the other validators to have high enough snapshots
  4785. // - ensure the other validators' snapshots match validator1's
  4786. //
  4787. // NOTE: There's a chance validator 2 or 3 has crossed the next full snapshot past what
  4788. // validator 1 has. If that happens, validator 2 or 3 may have purged the snapshots needed
  4789. // to compare with validator 1, and thus assert. If that happens, the full snapshot interval
  4790. // may need to be adjusted larger.
  4791. info!("Waiting for validator1 to create snapshots...");
  4792. let (incremental_snapshot_archive, full_snapshot_archive) =
  4793. LocalCluster::wait_for_next_incremental_snapshot(
  4794. &cluster,
  4795. &validator1_config.full_snapshot_archives_dir,
  4796. &validator1_config.incremental_snapshot_archives_dir,
  4797. Some(Duration::from_secs(5 * 60)),
  4798. );
  4799. debug!(
  4800. "snapshot archives:\n\tfull: {full_snapshot_archive:?}\n\tincr: \
  4801. {incremental_snapshot_archive:?}"
  4802. );
  4803. info!("Waiting for validator1 to create snapshots... DONE");
  4804. // These structs are used to provide better error logs if the asserts below are violated.
  4805. // The `allow(dead_code)` annotation is to appease clippy, which thinks the field is unused...
  4806. #[allow(dead_code)]
  4807. #[derive(Debug)]
  4808. struct SnapshotSlot(Slot);
  4809. #[allow(dead_code)]
  4810. #[derive(Debug)]
  4811. struct BaseSlot(Slot);
  4812. for (i, other_validator_config) in [(2, &validator2_config), (3, &validator3_config)] {
  4813. info!("Checking if validator{i} has the same snapshots as validator1...");
  4814. let timer = Instant::now();
  4815. loop {
  4816. if let Some(other_full_snapshot_slot) =
  4817. snapshot_utils::get_highest_full_snapshot_archive_slot(
  4818. &other_validator_config.full_snapshot_archives_dir,
  4819. )
  4820. {
  4821. let other_incremental_snapshot_slot =
  4822. snapshot_utils::get_highest_incremental_snapshot_archive_slot(
  4823. &other_validator_config.incremental_snapshot_archives_dir,
  4824. other_full_snapshot_slot,
  4825. );
  4826. if other_full_snapshot_slot >= full_snapshot_archive.slot()
  4827. && other_incremental_snapshot_slot >= Some(incremental_snapshot_archive.slot())
  4828. {
  4829. break;
  4830. }
  4831. }
  4832. assert!(
  4833. timer.elapsed() < Duration::from_secs(60),
  4834. "It should not take longer than 60 seconds to take snapshots",
  4835. );
  4836. std::thread::yield_now();
  4837. }
  4838. let other_full_snapshot_archives = snapshot_utils::get_full_snapshot_archives(
  4839. &other_validator_config.full_snapshot_archives_dir,
  4840. );
  4841. debug!("validator{i} full snapshot archives: {other_full_snapshot_archives:?}");
  4842. assert!(
  4843. other_full_snapshot_archives
  4844. .iter()
  4845. .any(
  4846. |other_full_snapshot_archive| other_full_snapshot_archive.slot()
  4847. == full_snapshot_archive.slot()
  4848. && other_full_snapshot_archive.hash() == full_snapshot_archive.hash()
  4849. ),
  4850. "full snapshot archive does not match!\n validator1: {:?}\n validator{i}: {:?}",
  4851. (
  4852. SnapshotSlot(full_snapshot_archive.slot()),
  4853. full_snapshot_archive.hash(),
  4854. ),
  4855. other_full_snapshot_archives
  4856. .iter()
  4857. .sorted_unstable()
  4858. .rev()
  4859. .map(|snap| (SnapshotSlot(snap.slot()), snap.hash()))
  4860. .collect::<Vec<_>>(),
  4861. );
  4862. let other_incremental_snapshot_archives = snapshot_utils::get_incremental_snapshot_archives(
  4863. &other_validator_config.incremental_snapshot_archives_dir,
  4864. );
  4865. debug!(
  4866. "validator{i} incremental snapshot archives: {other_incremental_snapshot_archives:?}"
  4867. );
  4868. assert!(
  4869. other_incremental_snapshot_archives
  4870. .iter()
  4871. .any(
  4872. |other_incremental_snapshot_archive| other_incremental_snapshot_archive
  4873. .base_slot()
  4874. == incremental_snapshot_archive.base_slot()
  4875. && other_incremental_snapshot_archive.slot()
  4876. == incremental_snapshot_archive.slot()
  4877. && other_incremental_snapshot_archive.hash()
  4878. == incremental_snapshot_archive.hash()
  4879. ),
  4880. "incremental snapshot archive does not match!\n validator1: {:?}\n validator{i}: \
  4881. {:?}",
  4882. (
  4883. BaseSlot(incremental_snapshot_archive.base_slot()),
  4884. SnapshotSlot(incremental_snapshot_archive.slot()),
  4885. incremental_snapshot_archive.hash(),
  4886. ),
  4887. other_incremental_snapshot_archives
  4888. .iter()
  4889. .sorted_unstable()
  4890. .rev()
  4891. .map(|snap| (
  4892. BaseSlot(snap.base_slot()),
  4893. SnapshotSlot(snap.slot()),
  4894. snap.hash(),
  4895. ))
  4896. .collect::<Vec<_>>(),
  4897. );
  4898. info!("Checking if validator{i} has the same snapshots as validator1... DONE");
  4899. }
  4900. }
  4901. /// Test fastboot to ensure a node can boot in case it crashed while archiving a full snapshot
  4902. ///
  4903. /// 1. Start a node and wait for it to take at least two full snapshots and one more
  4904. /// bank snapshot POST afterwards (for simplicity, wait for 2 full and 1 incremental).
  4905. /// 2. To simulate a node crashing while archiving a full snapshot, stop the node and
  4906. /// then delete the latest full snapshot archive.
  4907. /// 3. Restart the node. This should succeed, and boot from the older full snapshot archive,
  4908. /// *not* the latest bank snapshot POST.
  4909. /// 4. Take another incremental snapshot. This ensures the correct snapshot was loaded,
  4910. /// AND ensures the correct accounts hashes are present (which are needed when making
  4911. /// the bank snapshot POST for the new incremental snapshot).
  4912. #[test]
  4913. #[serial]
  4914. fn test_boot_from_local_state_missing_archive() {
  4915. solana_logger::setup_with_default(RUST_LOG_FILTER);
  4916. const FULL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4917. SnapshotInterval::Slots(NonZeroU64::new(20).unwrap());
  4918. const INCREMENTAL_SNAPSHOT_INTERVAL: SnapshotInterval =
  4919. SnapshotInterval::Slots(NonZeroU64::new(10).unwrap());
  4920. let validator_config =
  4921. SnapshotValidatorConfig::new(FULL_SNAPSHOT_INTERVAL, INCREMENTAL_SNAPSHOT_INTERVAL, 7);
  4922. let mut cluster_config = ClusterConfig {
  4923. node_stakes: vec![100 * DEFAULT_NODE_STAKE],
  4924. validator_configs: make_identical_validator_configs(&validator_config.validator_config, 1),
  4925. ..ClusterConfig::default()
  4926. };
  4927. let mut cluster = LocalCluster::new(&mut cluster_config, SocketAddrSpace::Unspecified);
  4928. // we need two full snapshots and an incremental snapshot for this test
  4929. info!("Waiting for validator to create snapshots...");
  4930. LocalCluster::wait_for_next_full_snapshot(
  4931. &cluster,
  4932. &validator_config.full_snapshot_archives_dir,
  4933. Some(Duration::from_secs(5 * 60)),
  4934. );
  4935. LocalCluster::wait_for_next_full_snapshot(
  4936. &cluster,
  4937. &validator_config.full_snapshot_archives_dir,
  4938. Some(Duration::from_secs(5 * 60)),
  4939. );
  4940. LocalCluster::wait_for_next_incremental_snapshot(
  4941. &cluster,
  4942. &validator_config.full_snapshot_archives_dir,
  4943. &validator_config.incremental_snapshot_archives_dir,
  4944. Some(Duration::from_secs(5 * 60)),
  4945. );
  4946. debug!(
  4947. "snapshot archives:\n\tfull: {:?}\n\tincr: {:?}",
  4948. snapshot_utils::get_full_snapshot_archives(
  4949. validator_config.full_snapshot_archives_dir.path()
  4950. ),
  4951. snapshot_utils::get_incremental_snapshot_archives(
  4952. validator_config.incremental_snapshot_archives_dir.path()
  4953. ),
  4954. );
  4955. info!("Waiting for validator to create snapshots... DONE");
  4956. // now delete the latest full snapshot archive and restart, to simulate a crash while archiving
  4957. // a full snapshot package
  4958. info!("Stopping validator...");
  4959. let validator_pubkey = cluster.get_node_pubkeys()[0];
  4960. let mut validator_info = cluster.exit_node(&validator_pubkey);
  4961. info!("Stopping validator... DONE");
  4962. info!("Deleting latest full snapshot archive...");
  4963. let highest_full_snapshot = snapshot_utils::get_highest_full_snapshot_archive_info(
  4964. validator_config.full_snapshot_archives_dir.path(),
  4965. )
  4966. .unwrap();
  4967. fs::remove_file(highest_full_snapshot.path()).unwrap();
  4968. info!("Deleting latest full snapshot archive... DONE");
  4969. info!("Restarting validator...");
  4970. // if we set this to `Never`, the validator should not boot
  4971. validator_info.config.use_snapshot_archives_at_startup =
  4972. UseSnapshotArchivesAtStartup::WhenNewest;
  4973. cluster.restart_node(
  4974. &validator_pubkey,
  4975. validator_info,
  4976. SocketAddrSpace::Unspecified,
  4977. );
  4978. info!("Restarting validator... DONE");
  4979. // ensure we can create new incremental snapshots, since that is what used to fail
  4980. info!("Waiting for validator to create snapshots...");
  4981. LocalCluster::wait_for_next_incremental_snapshot(
  4982. &cluster,
  4983. &validator_config.full_snapshot_archives_dir,
  4984. &validator_config.incremental_snapshot_archives_dir,
  4985. Some(Duration::from_secs(5 * 60)),
  4986. );
  4987. info!("Waiting for validator to create snapshots... DONE");
  4988. }
  4989. // We want to simulate the following:
  4990. // /--- 1 --- 3 (duplicate block)
  4991. // 0
  4992. // \--- 2
  4993. //
  4994. // 1. > DUPLICATE_THRESHOLD of the nodes vote on some version of the duplicate block 3,
  4995. // but don't immediately duplicate confirm so they remove 3 from fork choice and reset PoH back to 1.
  4996. // 2. All the votes on 3 don't land because there are no further blocks building off 3.
  4997. // 3. Some < SWITCHING_THRESHOLD of nodes vote on 2, making it the heaviest fork because no votes on 3 landed
  4998. // 4. Nodes then see duplicate confirmation on 3.
  4999. // 5. Unless somebody builds off of 3 to include the duplicate confirmed votes, 2 will still be the heaviest.
  5000. // However, because 2 has < SWITCHING_THRESHOLD of the votes, people who voted on 3 can't switch, leading to a
  5001. // stall
  5002. #[test]
  5003. #[serial]
  5004. #[allow(unused_attributes)]
  5005. #[ignore]
  5006. fn test_duplicate_shreds_switch_failure() {
  5007. fn wait_for_duplicate_fork_frozen(ledger_path: &Path, dup_slot: Slot) -> Hash {
  5008. // Ensure all the slots <= dup_slot are also full so we know we can replay up to dup_slot
  5009. // on restart
  5010. info!("Waiting to receive and replay entire duplicate fork with tip {dup_slot}");
  5011. loop {
  5012. let duplicate_fork_validator_blockstore = open_blockstore(ledger_path);
  5013. if let Some(frozen_hash) = duplicate_fork_validator_blockstore.get_bank_hash(dup_slot) {
  5014. return frozen_hash;
  5015. }
  5016. sleep(Duration::from_millis(1000));
  5017. }
  5018. }
  5019. fn clear_ledger_and_tower(ledger_path: &Path, pubkey: &Pubkey, start_slot: Slot) {
  5020. remove_tower_if_exists(ledger_path, pubkey);
  5021. let blockstore = open_blockstore(ledger_path);
  5022. purge_slots_with_count(&blockstore, start_slot, 1000);
  5023. {
  5024. // Remove all duplicate proofs so that this dup_slot will vote on the `dup_slot`.
  5025. while let Some((proof_slot, _)) = blockstore.get_first_duplicate_proof() {
  5026. blockstore.remove_slot_duplicate_proof(proof_slot).unwrap();
  5027. }
  5028. }
  5029. }
  5030. fn restart_dup_validator(
  5031. cluster: &mut LocalCluster,
  5032. mut duplicate_fork_validator_info: ClusterValidatorInfo,
  5033. pubkey: &Pubkey,
  5034. dup_slot: Slot,
  5035. dup_shred1: &Shred,
  5036. dup_shred2: &Shred,
  5037. ) {
  5038. let disable_turbine = Arc::new(AtomicBool::new(true));
  5039. duplicate_fork_validator_info.config.voting_disabled = false;
  5040. duplicate_fork_validator_info.config.turbine_disabled = disable_turbine.clone();
  5041. info!("Restarting node: {pubkey}");
  5042. cluster.restart_node(
  5043. pubkey,
  5044. duplicate_fork_validator_info,
  5045. SocketAddrSpace::Unspecified,
  5046. );
  5047. let ledger_path = cluster.ledger_path(pubkey);
  5048. // Lift the partition after `pubkey` votes on the `dup_slot`
  5049. info!("Waiting on duplicate fork to vote on duplicate slot: {dup_slot}");
  5050. loop {
  5051. let last_vote = last_vote_in_tower(&ledger_path, pubkey);
  5052. if let Some((latest_vote_slot, _hash)) = last_vote {
  5053. info!("latest vote: {latest_vote_slot}");
  5054. if latest_vote_slot == dup_slot {
  5055. break;
  5056. }
  5057. }
  5058. sleep(Duration::from_millis(1000));
  5059. }
  5060. disable_turbine.store(false, Ordering::Relaxed);
  5061. // Send the validator the other version of the shred so they realize it's duplicate
  5062. info!("Resending duplicate shreds to duplicate fork validator");
  5063. cluster.send_shreds_to_validator(vec![dup_shred1, dup_shred2], pubkey);
  5064. // Check the validator detected a duplicate proof
  5065. info!("Waiting on duplicate fork validator to see duplicate shreds and make a proof",);
  5066. loop {
  5067. let duplicate_fork_validator_blockstore = open_blockstore(&ledger_path);
  5068. if let Some(dup_proof) = duplicate_fork_validator_blockstore.get_first_duplicate_proof()
  5069. {
  5070. assert_eq!(dup_proof.0, dup_slot);
  5071. break;
  5072. }
  5073. sleep(Duration::from_millis(1000));
  5074. }
  5075. }
  5076. solana_logger::setup_with_default(RUST_LOG_FILTER);
  5077. let validator_keypairs = [
  5078. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  5079. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  5080. "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye",
  5081. "2XFPyuzPuXMsPnkH98UNcQpfA7M4b2TUhRxcWEoWjy4M6ojQ7HGJSvotktEVbaq49Qxt16wUjdqvSJc6ecbFfZwj",
  5082. ]
  5083. .iter()
  5084. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  5085. .collect::<Vec<_>>();
  5086. let validators = validator_keypairs
  5087. .iter()
  5088. .map(|(kp, _)| kp.pubkey())
  5089. .collect::<Vec<_>>();
  5090. // Create 4 nodes:
  5091. // 1) Two nodes that sum to > DUPLICATE_THRESHOLD but < 2/3+ supermajority. It's important both
  5092. // of them individually have <= DUPLICATE_THRESHOLD to avoid duplicate confirming their own blocks
  5093. // immediately upon voting
  5094. // 2) One with <= SWITCHING_THRESHOLD so that validator from 1) can't switch to it
  5095. // 3) One bad leader to make duplicate slots
  5096. let total_stake = 100 * DEFAULT_NODE_STAKE;
  5097. let target_switch_fork_stake = (total_stake as f64 * SWITCH_FORK_THRESHOLD) as u64;
  5098. // duplicate_fork_node1_stake + duplicate_fork_node2_stake > DUPLICATE_THRESHOLD. Don't want
  5099. // one node with > DUPLICATE_THRESHOLD, otherwise they will automatically duplicate confirm a
  5100. // slot when they vote, which will prevent them from resetting to an earlier ancestor when they
  5101. // later discover that slot as duplicate.
  5102. let duplicate_fork_node1_stake = (total_stake as f64 * DUPLICATE_THRESHOLD) as u64;
  5103. let duplicate_fork_node2_stake = 1;
  5104. let duplicate_leader_stake = total_stake
  5105. - target_switch_fork_stake
  5106. - duplicate_fork_node1_stake
  5107. - duplicate_fork_node2_stake;
  5108. assert!(
  5109. duplicate_fork_node1_stake + duplicate_fork_node2_stake
  5110. > (total_stake as f64 * DUPLICATE_THRESHOLD) as u64
  5111. );
  5112. assert!(duplicate_fork_node1_stake <= (total_stake as f64 * DUPLICATE_THRESHOLD) as u64);
  5113. assert!(duplicate_fork_node2_stake <= (total_stake as f64 * DUPLICATE_THRESHOLD) as u64);
  5114. let node_stakes = vec![
  5115. duplicate_leader_stake,
  5116. target_switch_fork_stake,
  5117. duplicate_fork_node1_stake,
  5118. duplicate_fork_node2_stake,
  5119. ];
  5120. let (
  5121. // Has to be first in order to be picked as the duplicate leader
  5122. duplicate_leader_validator_pubkey,
  5123. target_switch_fork_validator_pubkey,
  5124. duplicate_fork_validator1_pubkey,
  5125. duplicate_fork_validator2_pubkey,
  5126. ) = (validators[0], validators[1], validators[2], validators[3]);
  5127. info!(
  5128. "duplicate_fork_validator1_pubkey: {duplicate_fork_validator1_pubkey}, \
  5129. duplicate_fork_validator2_pubkey: {duplicate_fork_validator2_pubkey}, \
  5130. target_switch_fork_validator_pubkey: {target_switch_fork_validator_pubkey}, \
  5131. duplicate_leader_validator_pubkey: {duplicate_leader_validator_pubkey}",
  5132. );
  5133. let validator_to_slots = vec![
  5134. (duplicate_leader_validator_pubkey, 52),
  5135. (target_switch_fork_validator_pubkey, 8),
  5136. // The ideal sequence of events for the `duplicate_fork_validator1_pubkey` validator would go:
  5137. // 1. Vote for duplicate block `D`
  5138. // 2. See `D` is duplicate, remove from fork choice and reset to ancestor `A`, potentially generating a fork off that ancestor
  5139. // 3. See `D` is duplicate confirmed, but because of the bug fixed by https://github.com/solana-labs/solana/pull/28172
  5140. // where we disallow resetting to a slot which matches the last vote slot, we still don't build off `D`,
  5141. // and continue building on `A`.
  5142. //
  5143. // The `target_switch_fork_validator_pubkey` fork is necessary in 2. to force the validator stall trying to switch
  5144. // vote on that other fork and prevent the validator from making a freebie vote from `A` and allowing consensus to continue.
  5145. // It's important we don't give the `duplicate_fork_validator1_pubkey` leader slots until a certain number
  5146. // of slots have elapsed to ensure:
  5147. // 1. We have ample time to ensure he doesn't have a chance to make a block until after 2 when they see the block is duplicate.
  5148. // Otherwise, they'll build the block on top of the duplicate block, which will possibly include a vote for the duplicate block.
  5149. // We want to avoid this because this will make fork choice pick the duplicate block.
  5150. // 2. Ensure the `duplicate_fork_validator1_pubkey` sees the target switch fork before it can make another vote
  5151. // on any forks he himself generates from A. Otherwise, he will make a freebie vote on his own fork from `A` and
  5152. // consensus will continue on that fork.
  5153. // Give the duplicate fork validator plenty of leader slots after the initial delay to prevent
  5154. // 1. Switch fork from getting locked out for too long
  5155. // 2. A lot of consecutive slots in which to build up lockout in tower and make new roots
  5156. // to resolve the partition
  5157. (duplicate_fork_validator1_pubkey, 500),
  5158. ];
  5159. let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter());
  5160. // 1) Set up the cluster
  5161. let (duplicate_slot_sender, duplicate_slot_receiver) = unbounded();
  5162. let validator_configs = validator_keypairs
  5163. .into_iter()
  5164. .map(|(validator_keypair, in_genesis)| {
  5165. let pubkey = validator_keypair.pubkey();
  5166. // Only allow the leader to vote so that no version gets duplicate confirmed.
  5167. // This is to avoid the leader dumping his own block.
  5168. let voting_disabled = { pubkey != duplicate_leader_validator_pubkey };
  5169. ValidatorTestConfig {
  5170. validator_keypair,
  5171. validator_config: ValidatorConfig {
  5172. voting_disabled,
  5173. ..ValidatorConfig::default_for_test()
  5174. },
  5175. in_genesis,
  5176. }
  5177. })
  5178. .collect();
  5179. let (mut cluster, _validator_keypairs) = test_faulty_node(
  5180. BroadcastStageType::BroadcastDuplicates(BroadcastDuplicatesConfig {
  5181. partition: ClusterPartition::Pubkey(vec![
  5182. // Don't include the other dup validator here, otherwise
  5183. // this dup version will have enough to be duplicate confirmed and
  5184. // will cause the dup leader to try and dump its own slot,
  5185. // crashing before it can signal the duplicate slot via the
  5186. // `duplicate_slot_receiver` below
  5187. duplicate_fork_validator1_pubkey,
  5188. ]),
  5189. duplicate_slot_sender: Some(duplicate_slot_sender),
  5190. }),
  5191. node_stakes,
  5192. Some(validator_configs),
  5193. Some(FixedSchedule {
  5194. leader_schedule: Arc::new(leader_schedule),
  5195. }),
  5196. );
  5197. // Kill two validators that might duplicate confirm the duplicate block
  5198. info!("Killing unnecessary validators");
  5199. let duplicate_fork_validator2_ledger_path =
  5200. cluster.ledger_path(&duplicate_fork_validator2_pubkey);
  5201. let duplicate_fork_validator2_info = cluster.exit_node(&duplicate_fork_validator2_pubkey);
  5202. let target_switch_fork_validator_ledger_path =
  5203. cluster.ledger_path(&target_switch_fork_validator_pubkey);
  5204. let mut target_switch_fork_validator_info =
  5205. cluster.exit_node(&target_switch_fork_validator_pubkey);
  5206. // 2) Wait for a duplicate slot to land on both validators and for the target switch
  5207. // fork validator to get another version of the slot. Also ensure all versions of
  5208. // the block are playable
  5209. let dup_slot = duplicate_slot_receiver
  5210. .recv_timeout(Duration::from_millis(30_000))
  5211. .expect("Duplicate leader failed to make a duplicate slot in allotted time");
  5212. // Make sure both validators received and replay the complete blocks
  5213. let dup_frozen_hash = wait_for_duplicate_fork_frozen(
  5214. &cluster.ledger_path(&duplicate_fork_validator1_pubkey),
  5215. dup_slot,
  5216. );
  5217. let original_frozen_hash = wait_for_duplicate_fork_frozen(
  5218. &cluster.ledger_path(&duplicate_leader_validator_pubkey),
  5219. dup_slot,
  5220. );
  5221. assert_ne!(
  5222. original_frozen_hash, dup_frozen_hash,
  5223. "Duplicate leader and partition target got same hash: {original_frozen_hash}",
  5224. );
  5225. // 3) Force `duplicate_fork_validator1_pubkey` to see a duplicate proof
  5226. info!("Waiting for duplicate proof for slot: {dup_slot}");
  5227. let duplicate_proof = {
  5228. // Grab the other version of the slot from the `duplicate_leader_validator_pubkey`
  5229. // which we confirmed to have a different version of the frozen hash in the loop
  5230. // above
  5231. let ledger_path = cluster.ledger_path(&duplicate_leader_validator_pubkey);
  5232. let blockstore = open_blockstore(&ledger_path);
  5233. let dup_shred = blockstore
  5234. .get_data_shreds_for_slot(dup_slot, 0)
  5235. .unwrap()
  5236. .pop()
  5237. .unwrap();
  5238. info!(
  5239. "Sending duplicate shred: {:?} to {:?}",
  5240. dup_shred.signature(),
  5241. duplicate_fork_validator1_pubkey
  5242. );
  5243. cluster.send_shreds_to_validator(vec![&dup_shred], &duplicate_fork_validator1_pubkey);
  5244. wait_for_duplicate_proof(
  5245. &cluster.ledger_path(&duplicate_fork_validator1_pubkey),
  5246. dup_slot,
  5247. )
  5248. .unwrap_or_else(|| panic!("Duplicate proof for slot {dup_slot} not found"))
  5249. };
  5250. // 3) Kill all the validators
  5251. info!("Killing remaining validators");
  5252. let duplicate_fork_validator1_ledger_path =
  5253. cluster.ledger_path(&duplicate_fork_validator1_pubkey);
  5254. let duplicate_fork_validator1_info = cluster.exit_node(&duplicate_fork_validator1_pubkey);
  5255. let duplicate_leader_ledger_path = cluster.ledger_path(&duplicate_leader_validator_pubkey);
  5256. cluster.exit_node(&duplicate_leader_validator_pubkey);
  5257. let dup_shred1 = Shred::new_from_serialized_shred(duplicate_proof.shred1.clone()).unwrap();
  5258. let dup_shred2 = Shred::new_from_serialized_shred(duplicate_proof.shred2).unwrap();
  5259. assert_eq!(dup_shred1.slot(), dup_shred2.slot());
  5260. assert_eq!(dup_shred1.slot(), dup_slot);
  5261. // Purge everything including the `dup_slot` from the `target_switch_fork_validator_pubkey`
  5262. info!("Purging towers and ledgers for: {duplicate_leader_validator_pubkey:?}");
  5263. Blockstore::destroy(&target_switch_fork_validator_ledger_path).unwrap();
  5264. {
  5265. let blockstore1 = open_blockstore(&duplicate_leader_ledger_path);
  5266. let blockstore2 = open_blockstore(&target_switch_fork_validator_ledger_path);
  5267. copy_blocks(dup_slot, &blockstore1, &blockstore2, false);
  5268. }
  5269. clear_ledger_and_tower(
  5270. &target_switch_fork_validator_ledger_path,
  5271. &target_switch_fork_validator_pubkey,
  5272. dup_slot,
  5273. );
  5274. info!("Purging towers and ledgers for: {duplicate_fork_validator1_pubkey:?}");
  5275. clear_ledger_and_tower(
  5276. &duplicate_fork_validator1_ledger_path,
  5277. &duplicate_fork_validator1_pubkey,
  5278. dup_slot + 1,
  5279. );
  5280. info!("Purging towers and ledgers for: {duplicate_fork_validator2_pubkey:?}");
  5281. // Copy validator 1's ledger to validator 2 so that they have the same version
  5282. // of the duplicate slot
  5283. clear_ledger_and_tower(
  5284. &duplicate_fork_validator2_ledger_path,
  5285. &duplicate_fork_validator2_pubkey,
  5286. dup_slot,
  5287. );
  5288. Blockstore::destroy(&duplicate_fork_validator2_ledger_path).unwrap();
  5289. {
  5290. let blockstore1 = open_blockstore(&duplicate_fork_validator1_ledger_path);
  5291. let blockstore2 = open_blockstore(&duplicate_fork_validator2_ledger_path);
  5292. copy_blocks(dup_slot, &blockstore1, &blockstore2, false);
  5293. }
  5294. // Set entrypoint to `target_switch_fork_validator_pubkey` so we can run discovery in gossip even without the
  5295. // bad leader
  5296. cluster.set_entry_point(target_switch_fork_validator_info.info.contact_info.clone());
  5297. // 4) Restart `target_switch_fork_validator_pubkey`, and ensure they vote on their own leader slot
  5298. // that's not descended from the duplicate slot
  5299. info!("Restarting switch fork node");
  5300. target_switch_fork_validator_info.config.voting_disabled = false;
  5301. cluster.restart_node(
  5302. &target_switch_fork_validator_pubkey,
  5303. target_switch_fork_validator_info,
  5304. SocketAddrSpace::Unspecified,
  5305. );
  5306. let target_switch_fork_validator_ledger_path =
  5307. cluster.ledger_path(&target_switch_fork_validator_pubkey);
  5308. info!("Waiting for switch fork to make block past duplicate fork");
  5309. loop {
  5310. let last_vote = wait_for_last_vote_in_tower_to_land_in_ledger(
  5311. &target_switch_fork_validator_ledger_path,
  5312. &target_switch_fork_validator_pubkey,
  5313. );
  5314. if let Some(latest_vote_slot) = last_vote {
  5315. if latest_vote_slot > dup_slot {
  5316. let blockstore = open_blockstore(&target_switch_fork_validator_ledger_path);
  5317. let ancestor_slots: HashSet<Slot> =
  5318. AncestorIterator::new_inclusive(latest_vote_slot, &blockstore).collect();
  5319. assert!(ancestor_slots.contains(&latest_vote_slot));
  5320. assert!(ancestor_slots.contains(&0));
  5321. assert!(!ancestor_slots.contains(&dup_slot));
  5322. break;
  5323. }
  5324. }
  5325. sleep(Duration::from_millis(1000));
  5326. }
  5327. // Now restart the duplicate validators
  5328. // Start the node with partition enabled so they don't see the `target_switch_fork_validator_pubkey`
  5329. // before voting on the duplicate block
  5330. info!("Restarting duplicate fork node");
  5331. // Ensure `duplicate_fork_validator1_pubkey` votes before starting up `duplicate_fork_validator2_pubkey`
  5332. // to prevent them seeing `dup_slot` as duplicate confirmed before voting.
  5333. restart_dup_validator(
  5334. &mut cluster,
  5335. duplicate_fork_validator1_info,
  5336. &duplicate_fork_validator1_pubkey,
  5337. dup_slot,
  5338. &dup_shred1,
  5339. &dup_shred2,
  5340. );
  5341. restart_dup_validator(
  5342. &mut cluster,
  5343. duplicate_fork_validator2_info,
  5344. &duplicate_fork_validator2_pubkey,
  5345. dup_slot,
  5346. &dup_shred1,
  5347. &dup_shred2,
  5348. );
  5349. // Wait for the `duplicate_fork_validator1_pubkey` to make another leader block on top
  5350. // of the duplicate fork which includes their own vote for `dup_block`. This
  5351. // should make the duplicate fork the heaviest
  5352. info!("Waiting on duplicate fork validator to generate block on top of duplicate fork",);
  5353. loop {
  5354. let duplicate_fork_validator_blockstore =
  5355. open_blockstore(&cluster.ledger_path(&duplicate_fork_validator1_pubkey));
  5356. let meta = duplicate_fork_validator_blockstore
  5357. .meta(dup_slot)
  5358. .unwrap()
  5359. .unwrap();
  5360. if !meta.next_slots.is_empty() {
  5361. info!(
  5362. "duplicate fork validator saw new slots: {:?} on top of duplicate slot",
  5363. meta.next_slots
  5364. );
  5365. break;
  5366. }
  5367. sleep(Duration::from_millis(1000));
  5368. }
  5369. // Check that the cluster is making progress
  5370. cluster.check_for_new_roots(
  5371. 16,
  5372. "test_duplicate_shreds_switch_failure",
  5373. SocketAddrSpace::Unspecified,
  5374. );
  5375. }
  5376. #[test]
  5377. #[serial]
  5378. fn test_randomly_mixed_block_verification_methods_between_bootstrap_and_not() {
  5379. // tailored logging just to see two block verification methods are working correctly
  5380. solana_logger::setup_with_default(
  5381. "solana_metrics::metrics=warn,solana_core=warn,\
  5382. solana_runtime::installed_scheduler_pool=trace,solana_ledger::blockstore_processor=debug,\
  5383. info",
  5384. );
  5385. let num_nodes = BlockVerificationMethod::COUNT;
  5386. let mut config =
  5387. ClusterConfig::new_with_equal_stakes(num_nodes, DEFAULT_MINT_LAMPORTS, DEFAULT_NODE_STAKE);
  5388. // Overwrite block_verification_method with shuffled variants
  5389. let mut methods = BlockVerificationMethod::iter().collect::<Vec<_>>();
  5390. methods.shuffle(&mut rand::thread_rng());
  5391. for (validator_config, method) in config.validator_configs.iter_mut().zip_eq(methods) {
  5392. validator_config.block_verification_method = method;
  5393. }
  5394. let local = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
  5395. cluster_tests::spend_and_verify_all_nodes(
  5396. &local.entry_point_info,
  5397. &local.funding_keypair,
  5398. num_nodes,
  5399. HashSet::new(),
  5400. SocketAddrSpace::Unspecified,
  5401. &local.connection_cache,
  5402. );
  5403. }
  5404. /// Forks previous marked invalid should be marked as such in fork choice on restart
  5405. #[test]
  5406. #[ignore]
  5407. #[serial]
  5408. fn test_invalid_forks_persisted_on_restart() {
  5409. solana_logger::setup_with("info,solana_metrics=off,solana_ledger=off");
  5410. let dup_slot = 10;
  5411. let validator_keypairs = [
  5412. "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
  5413. "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
  5414. ]
  5415. .iter()
  5416. .map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
  5417. .collect::<Vec<_>>();
  5418. let majority_keypair = validator_keypairs[1].0.clone();
  5419. let validators = validator_keypairs
  5420. .iter()
  5421. .map(|(kp, _)| kp.pubkey())
  5422. .collect::<Vec<_>>();
  5423. let node_stakes = vec![DEFAULT_NODE_STAKE, 100 * DEFAULT_NODE_STAKE];
  5424. let (target_pubkey, majority_pubkey) = (validators[0], validators[1]);
  5425. // Need majority validator to make the dup_slot
  5426. let validator_to_slots = vec![
  5427. (majority_pubkey, dup_slot as usize + 6),
  5428. (target_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize),
  5429. ];
  5430. let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter());
  5431. let mut default_config = ValidatorConfig::default_for_test();
  5432. default_config.fixed_leader_schedule = Some(FixedSchedule {
  5433. leader_schedule: Arc::new(leader_schedule),
  5434. });
  5435. let mut validator_configs = make_identical_validator_configs(&default_config, 2);
  5436. // Majority shouldn't duplicate confirm anything
  5437. validator_configs[1].voting_disabled = true;
  5438. let mut cluster = LocalCluster::new(
  5439. &mut ClusterConfig {
  5440. mint_lamports: DEFAULT_MINT_LAMPORTS + node_stakes.iter().sum::<u64>(),
  5441. validator_configs,
  5442. node_stakes,
  5443. validator_keys: Some(validator_keypairs),
  5444. skip_warmup_slots: true,
  5445. ..ClusterConfig::default()
  5446. },
  5447. SocketAddrSpace::Unspecified,
  5448. );
  5449. let target_ledger_path = cluster.ledger_path(&target_pubkey);
  5450. // Wait for us to vote past duplicate slot
  5451. let timer = Instant::now();
  5452. loop {
  5453. if let Some(slot) =
  5454. wait_for_last_vote_in_tower_to_land_in_ledger(&target_ledger_path, &target_pubkey)
  5455. {
  5456. if slot > dup_slot {
  5457. break;
  5458. }
  5459. }
  5460. assert!(
  5461. timer.elapsed() < Duration::from_secs(30),
  5462. "Did not make more than 10 blocks in 30 seconds"
  5463. );
  5464. sleep(Duration::from_millis(100));
  5465. }
  5466. // Send duplicate
  5467. let parent = {
  5468. let blockstore = open_blockstore(&target_ledger_path);
  5469. let parent = blockstore
  5470. .meta(dup_slot)
  5471. .unwrap()
  5472. .unwrap()
  5473. .parent_slot
  5474. .unwrap();
  5475. let entries = create_ticks(
  5476. 64 * (std::cmp::max(1, dup_slot - parent)),
  5477. 0,
  5478. cluster.genesis_config.hash(),
  5479. );
  5480. let last_hash = entries.last().unwrap().hash;
  5481. let version = solana_shred_version::version_from_hash(&last_hash);
  5482. let dup_shreds = Shredder::new(dup_slot, parent, 0, version)
  5483. .unwrap()
  5484. .entries_to_merkle_shreds_for_tests(
  5485. &majority_keypair,
  5486. &entries,
  5487. true, // is_full_slot
  5488. None, // chained_merkle_root
  5489. 0, // next_shred_index,
  5490. 0, // next_code_index
  5491. &ReedSolomonCache::default(),
  5492. &mut ProcessShredsStats::default(),
  5493. )
  5494. .0;
  5495. info!("Sending duplicate shreds for {dup_slot}");
  5496. cluster.send_shreds_to_validator(dup_shreds.iter().collect(), &target_pubkey);
  5497. wait_for_duplicate_proof(&target_ledger_path, dup_slot)
  5498. .expect("Duplicate proof for {dup_slot} not found");
  5499. parent
  5500. };
  5501. info!("Duplicate proof for {dup_slot} has landed, restarting node");
  5502. let info = cluster.exit_node(&target_pubkey);
  5503. {
  5504. let blockstore = open_blockstore(&target_ledger_path);
  5505. purge_slots_with_count(&blockstore, dup_slot + 5, 100);
  5506. }
  5507. // Restart, should create an entirely new fork
  5508. cluster.restart_node(&target_pubkey, info, SocketAddrSpace::Unspecified);
  5509. info!("Waiting for fork built off {parent}");
  5510. let timer = Instant::now();
  5511. let mut checked_children: HashSet<Slot> = HashSet::default();
  5512. let mut done = false;
  5513. while !done {
  5514. let blockstore = open_blockstore(&target_ledger_path);
  5515. let parent_meta = blockstore.meta(parent).unwrap().expect("Meta must exist");
  5516. for child in parent_meta.next_slots {
  5517. if checked_children.contains(&child) {
  5518. continue;
  5519. }
  5520. if blockstore.is_full(child) {
  5521. let shreds = blockstore
  5522. .get_data_shreds_for_slot(child, 0)
  5523. .expect("Child is full");
  5524. let mut is_our_block = true;
  5525. for shred in shreds {
  5526. is_our_block &= shred.verify(&target_pubkey);
  5527. }
  5528. if is_our_block {
  5529. done = true;
  5530. }
  5531. checked_children.insert(child);
  5532. }
  5533. }
  5534. assert!(
  5535. timer.elapsed() < Duration::from_secs(30),
  5536. "Did not create a new fork off parent {parent} in 30 seconds after restart"
  5537. );
  5538. sleep(Duration::from_millis(100));
  5539. }
  5540. }
  5541. #[test]
  5542. #[serial]
  5543. fn test_restart_node_alpenglow() {
  5544. solana_logger::setup_with_default(AG_DEBUG_LOG_FILTER);
  5545. let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH * 2;
  5546. let ticks_per_slot = 16;
  5547. let validator_config = ValidatorConfig::default_for_test();
  5548. let mut cluster = LocalCluster::new_alpenglow(
  5549. &mut ClusterConfig {
  5550. node_stakes: vec![DEFAULT_NODE_STAKE],
  5551. validator_configs: vec![safe_clone_config(&validator_config)],
  5552. ticks_per_slot,
  5553. slots_per_epoch,
  5554. stakers_slot_offset: slots_per_epoch,
  5555. skip_warmup_slots: true,
  5556. ..ClusterConfig::default()
  5557. },
  5558. SocketAddrSpace::Unspecified,
  5559. );
  5560. let nodes = cluster.get_node_pubkeys();
  5561. cluster_tests::sleep_n_epochs(
  5562. 1.0,
  5563. &cluster.genesis_config.poh_config,
  5564. clock::DEFAULT_TICKS_PER_SLOT,
  5565. slots_per_epoch,
  5566. );
  5567. info!("Restarting node");
  5568. cluster.exit_restart_node(&nodes[0], validator_config, SocketAddrSpace::Unspecified);
  5569. cluster_tests::sleep_n_epochs(
  5570. 0.5,
  5571. &cluster.genesis_config.poh_config,
  5572. clock::DEFAULT_TICKS_PER_SLOT,
  5573. slots_per_epoch,
  5574. );
  5575. cluster_tests::send_many_transactions(
  5576. &cluster.entry_point_info,
  5577. &cluster.funding_keypair,
  5578. &cluster.connection_cache,
  5579. 10,
  5580. 1,
  5581. );
  5582. }
  5583. /// We start 2 nodes, where the first node A holds 90% of the stake
  5584. ///
  5585. /// We let A run by itself, and ensure that B can join and rejoin the network
  5586. /// through fast forwarding their slot on receiving A's finalization certificate
  5587. #[test]
  5588. #[serial]
  5589. fn test_alpenglow_imbalanced_stakes_catchup() {
  5590. solana_logger::setup_with_default(AG_DEBUG_LOG_FILTER);
  5591. // Create node stakes
  5592. let slots_per_epoch = 512;
  5593. let total_stake = 2 * DEFAULT_NODE_STAKE;
  5594. let tenth_stake = total_stake / 10;
  5595. let node_a_stake = 9 * tenth_stake;
  5596. let node_b_stake = total_stake - node_a_stake;
  5597. let node_stakes = vec![node_a_stake, node_b_stake];
  5598. let num_nodes = node_stakes.len();
  5599. // Create leader schedule with A and B as leader 72/28
  5600. let (leader_schedule, validator_keys) =
  5601. create_custom_leader_schedule_with_random_keys(&[72, 28]);
  5602. let leader_schedule = FixedSchedule {
  5603. leader_schedule: Arc::new(leader_schedule),
  5604. };
  5605. // Create our UDP socket to listen to votes
  5606. let vote_listener_addr = solana_net_utils::bind_to_localhost().unwrap();
  5607. let mut validator_config = ValidatorConfig::default_for_test();
  5608. validator_config.fixed_leader_schedule = Some(leader_schedule);
  5609. validator_config.voting_service_test_override = Some(VotingServiceOverride {
  5610. additional_listeners: vec![vote_listener_addr.local_addr().unwrap()],
  5611. alpenglow_port_override: AlpenglowPortOverride::default(),
  5612. });
  5613. // Collect node pubkeys
  5614. let node_pubkeys = validator_keys
  5615. .iter()
  5616. .map(|key| key.pubkey())
  5617. .collect::<Vec<_>>();
  5618. // Cluster config
  5619. let mut cluster_config = ClusterConfig {
  5620. mint_lamports: total_stake,
  5621. node_stakes,
  5622. validator_configs: make_identical_validator_configs(&validator_config, num_nodes),
  5623. validator_keys: Some(
  5624. validator_keys
  5625. .iter()
  5626. .cloned()
  5627. .zip(iter::repeat_with(|| true))
  5628. .collect(),
  5629. ),
  5630. slots_per_epoch,
  5631. stakers_slot_offset: slots_per_epoch,
  5632. ticks_per_slot: DEFAULT_TICKS_PER_SLOT,
  5633. skip_warmup_slots: true,
  5634. ..ClusterConfig::default()
  5635. };
  5636. // Create local cluster
  5637. let mut cluster =
  5638. LocalCluster::new_alpenglow(&mut cluster_config, SocketAddrSpace::Unspecified);
  5639. // Ensure all nodes are voting
  5640. cluster.check_for_new_processed(
  5641. 8,
  5642. "test_alpenglow_imbalanced_stakes_catchup",
  5643. SocketAddrSpace::Unspecified,
  5644. );
  5645. info!("exiting node B");
  5646. let b_info = cluster.exit_node(&node_pubkeys[1]);
  5647. // Let A make roots by itself
  5648. cluster.check_for_new_roots(
  5649. 8,
  5650. "test_alpenglow_imbalanced_stakes_catchup",
  5651. SocketAddrSpace::Unspecified,
  5652. );
  5653. info!("restarting node B");
  5654. cluster.restart_node(&node_pubkeys[1], b_info, SocketAddrSpace::Unspecified);
  5655. // Ensure all nodes are voting
  5656. cluster.check_for_new_notarized_votes(
  5657. 16,
  5658. "test_alpenglow_imbalanced_stakes_catchup",
  5659. SocketAddrSpace::Unspecified,
  5660. vote_listener_addr,
  5661. );
  5662. }
  5663. fn broadcast_vote(
  5664. bls_message: BLSMessage,
  5665. tpu_socket_addrs: &[std::net::SocketAddr],
  5666. additional_listeners: Option<&Vec<std::net::SocketAddr>>,
  5667. connection_cache: Arc<ConnectionCache>,
  5668. ) {
  5669. for tpu_socket_addr in tpu_socket_addrs
  5670. .iter()
  5671. .chain(additional_listeners.unwrap_or(&vec![]).iter())
  5672. {
  5673. let buf = bincode::serialize(&bls_message).unwrap();
  5674. let client = connection_cache.get_connection(tpu_socket_addr);
  5675. client.send_data_async(buf).unwrap_or_else(|_| {
  5676. panic!("Failed to broadcast vote to {}", tpu_socket_addr);
  5677. });
  5678. }
  5679. }
  5680. fn _vote_to_tuple(vote: &Vote) -> (u64, u8) {
  5681. let discriminant = if vote.is_notarization() {
  5682. 0
  5683. } else if vote.is_finalize() {
  5684. 1
  5685. } else if vote.is_skip() {
  5686. 2
  5687. } else if vote.is_notarize_fallback() {
  5688. 3
  5689. } else if vote.is_skip_fallback() {
  5690. 4
  5691. } else {
  5692. panic!("Invalid vote type: {:?}", vote)
  5693. };
  5694. let slot = vote.slot();
  5695. (slot, discriminant)
  5696. }
  5697. /// This test validates the Alpenglow consensus protocol's ability to maintain liveness when a node
  5698. /// needs to issue a NotarizeFallback vote. The test sets up a two-node cluster with a specific
  5699. /// stake distribution to create a scenario where:
  5700. ///
  5701. /// - Node A has 60% of stake minus a small amount (epsilon)
  5702. /// - Node B has 40% of stake plus a small amount (epsilon)
  5703. ///
  5704. /// The test simulates the following sequence:
  5705. /// 1. Node B (as leader) proposes a block for slot 32
  5706. /// 2. Node A is unable to receive the block (simulated via turbine disconnection)
  5707. /// 3. Node A sends Skip votes to both nodes for slot 32
  5708. /// 4. Node B sends Notarize votes to both nodes for slot 32
  5709. /// 5. Node A receives both votes and its certificate pool determines:
  5710. /// - Skip has (60% - epsilon) votes
  5711. /// - Notarize has (40% + epsilon) votes
  5712. /// - Protocol determines it's "SafeToNotar" and issues a NotarizeFallback vote
  5713. /// 6. Node B doesn't issue NotarizeFallback because it already submitted a Notarize
  5714. /// 7. Node B receives Node A's NotarizeFallback vote
  5715. /// 8. Network progresses and maintains liveness after this fallback scenario
  5716. #[test]
  5717. #[serial]
  5718. fn test_alpenglow_ensure_liveness_after_single_notar_fallback() {
  5719. solana_logger::setup_with_default(AG_DEBUG_LOG_FILTER);
  5720. // Configure total stake and stake distribution
  5721. let total_stake = 2 * DEFAULT_NODE_STAKE;
  5722. let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH;
  5723. let node_a_stake = total_stake * 6 / 10 - 1;
  5724. let node_b_stake = total_stake * 4 / 10 + 1;
  5725. let node_stakes = vec![node_a_stake, node_b_stake];
  5726. let num_nodes = node_stakes.len();
  5727. assert_eq!(total_stake, node_a_stake + node_b_stake);
  5728. // Control components
  5729. let node_a_turbine_disabled = Arc::new(AtomicBool::new(false));
  5730. // Create leader schedule
  5731. let (leader_schedule, validator_keys) = create_custom_leader_schedule_with_random_keys(&[0, 4]);
  5732. let leader_schedule = FixedSchedule {
  5733. leader_schedule: Arc::new(leader_schedule),
  5734. };
  5735. // Create our UDP socket to listen to votes
  5736. let vote_listener = solana_net_utils::bind_to_localhost().unwrap();
  5737. // Create validator configs
  5738. let mut validator_config = ValidatorConfig::default_for_test();
  5739. validator_config.fixed_leader_schedule = Some(leader_schedule);
  5740. validator_config.voting_service_test_override = Some(VotingServiceOverride {
  5741. additional_listeners: vec![vote_listener.local_addr().unwrap()],
  5742. alpenglow_port_override: AlpenglowPortOverride::default(),
  5743. });
  5744. let mut validator_configs = make_identical_validator_configs(&validator_config, num_nodes);
  5745. validator_configs[0].turbine_disabled = node_a_turbine_disabled.clone();
  5746. assert_eq!(num_nodes, validator_keys.len());
  5747. // Cluster config
  5748. let mut cluster_config = ClusterConfig {
  5749. mint_lamports: total_stake,
  5750. node_stakes,
  5751. validator_configs,
  5752. validator_keys: Some(
  5753. validator_keys
  5754. .iter()
  5755. .cloned()
  5756. .zip(iter::repeat_with(|| true))
  5757. .collect(),
  5758. ),
  5759. slots_per_epoch,
  5760. stakers_slot_offset: slots_per_epoch,
  5761. ticks_per_slot: DEFAULT_TICKS_PER_SLOT,
  5762. ..ClusterConfig::default()
  5763. };
  5764. // Create local cluster
  5765. let cluster = LocalCluster::new_alpenglow(&mut cluster_config, SocketAddrSpace::Unspecified);
  5766. assert_eq!(cluster.validators.len(), num_nodes);
  5767. // Track Node A's votes and when the test can conclude
  5768. let mut post_experiment_votes = HashMap::new();
  5769. let mut post_experiment_roots = HashSet::new();
  5770. // Start vote listener thread to monitor and control the experiment
  5771. let vote_listener = std::thread::spawn({
  5772. let mut buf = [0_u8; 65_535];
  5773. let mut check_for_roots = false;
  5774. let mut slots_with_skip = HashSet::new();
  5775. move || loop {
  5776. let n_bytes = vote_listener.recv(&mut buf).unwrap();
  5777. let bls_message = bincode::deserialize::<BLSMessage>(&buf[0..n_bytes]).unwrap();
  5778. let BLSMessage::Vote(vote_message) = bls_message else {
  5779. continue;
  5780. };
  5781. let vote = vote_message.vote;
  5782. // Since A has 60% of the stake, it will be node 0, and B will be node 1
  5783. let node_index = vote_message.rank;
  5784. // Once we've received a vote from node B at slot 31, we can start the experiment.
  5785. if vote.slot() == 31 && node_index == 1 {
  5786. node_a_turbine_disabled.store(true, Ordering::Relaxed);
  5787. }
  5788. if vote.slot() >= 32 && node_index == 0 {
  5789. if vote.is_skip() {
  5790. slots_with_skip.insert(vote.slot());
  5791. }
  5792. if !check_for_roots && vote.slot() == 32 && vote.is_notarize_fallback() {
  5793. check_for_roots = true;
  5794. assert!(slots_with_skip.contains(&32)); // skip on slot 32
  5795. }
  5796. }
  5797. // We should see a skip followed by a notar fallback. Once we do, the experiment is
  5798. // complete.
  5799. if check_for_roots {
  5800. node_a_turbine_disabled.store(false, Ordering::Relaxed);
  5801. if vote.is_finalize() {
  5802. let value = post_experiment_votes.entry(vote.slot()).or_insert(vec![]);
  5803. value.push(node_index);
  5804. if value.len() == 2 {
  5805. post_experiment_roots.insert(vote.slot());
  5806. if post_experiment_roots.len() >= 10 {
  5807. break;
  5808. }
  5809. }
  5810. }
  5811. }
  5812. }
  5813. });
  5814. vote_listener.join().unwrap();
  5815. }
  5816. /// Test to validate the Alpenglow consensus protocol's ability to maintain liveness when a node
  5817. /// needs to issue multiple NotarizeFallback votes due to Byzantine behavior and network partitioning.
  5818. ///
  5819. /// This test simulates a complex Byzantine scenario with four nodes having the following stake distribution:
  5820. /// - Node A (Leader): 20% - ε (small epsilon)
  5821. /// - Node B: 40%
  5822. /// - Node C: 20%
  5823. /// - Node D: 20% + ε
  5824. ///
  5825. /// The test validates the protocol's behavior through the following phases:
  5826. ///
  5827. /// ## Phase 1: Initial Network Partition
  5828. /// - Node C's turbine is disabled at slot 50, causing it to miss blocks and vote Skip
  5829. /// - Node A (leader) proposes blocks normally
  5830. /// - Node B initially copies Node A's votes
  5831. /// - Node D copies Node A's votes
  5832. /// - Node C accumulates 10 NotarizeFallback votes while in this steady state
  5833. ///
  5834. /// ## Phase 2: Byzantine Equivocation
  5835. /// After Node C has issued sufficient NotarizeFallback votes, Node A begins equivocating:
  5836. /// - Node A votes for block b1 (original block)
  5837. /// - Node B votes for block b2 (equivocated block with different block_id and bank_hash)
  5838. /// - Node C continues voting Skip but observes conflicting votes
  5839. /// - Node D votes for block b1 (same as Node A)
  5840. ///
  5841. /// This creates a voting distribution where:
  5842. /// - b1 has 40% stake (A: 20%-ε + D: 20%+ε)
  5843. /// - b2 has 40% stake (B: 40%)
  5844. /// - Skip has 20% stake (C: 20%)
  5845. ///
  5846. /// ## Phase 3: Double NotarizeFallback
  5847. /// Node C, observing the conflicting votes, triggers SafeToNotar for both blocks:
  5848. /// - Issues NotarizeFallback for b1 (A's block)
  5849. /// - Issues NotarizeFallback for b2 (B's equivocated block)
  5850. /// - Verifies the block IDs are different due to equivocation
  5851. /// - Continues this pattern until 3 slots have double NotarizeFallback votes
  5852. ///
  5853. /// ## Phase 4: Recovery and Liveness
  5854. /// After confirming the double NotarizeFallback behavior:
  5855. /// - Node A stops equivocating
  5856. /// - Node C's turbine is re-enabled
  5857. /// - Network returns to normal operation
  5858. /// - Test verifies 10+ new roots are created, ensuring liveness is maintained
  5859. ///
  5860. /// ## Key Validation Points
  5861. /// - SafeToNotar triggers correctly when conflicting blocks have sufficient stake
  5862. /// - NotarizeFallback votes are issued for both equivocated blocks
  5863. /// - Network maintains liveness despite Byzantine behavior and temporary partitions
  5864. /// - Protocol correctly handles the edge case where multiple blocks have equal stake
  5865. /// - Recovery is possible once Byzantine behavior stops
  5866. ///
  5867. /// NOTE: we could get away with just three nodes in this test, assigning A a total of 40% stake,
  5868. /// since node D *always* copy votes node A. But, doing so technically makes all nodes have >= 20%
  5869. /// stake, meaning that none of them is allowed to be Byzantine. We opt to be a bit more explicit in
  5870. /// this test.
  5871. #[test]
  5872. #[serial]
  5873. #[ignore]
  5874. fn test_alpenglow_ensure_liveness_after_double_notar_fallback() {
  5875. solana_logger::setup_with_default(AG_DEBUG_LOG_FILTER);
  5876. // Configure total stake and stake distribution
  5877. const TOTAL_STAKE: u64 = 10 * DEFAULT_NODE_STAKE;
  5878. const SLOTS_PER_EPOCH: u64 = MINIMUM_SLOTS_PER_EPOCH;
  5879. // Node stakes with slight imbalance to trigger fallback behavior
  5880. let node_stakes = [
  5881. TOTAL_STAKE * 2 / 10 - 1, // Node A (Leader): 20% - ε
  5882. TOTAL_STAKE * 4 / 10, // Node B: 40%
  5883. TOTAL_STAKE * 2 / 10, // Node C: 20%
  5884. TOTAL_STAKE * 2 / 10 + 1, // Node D: 20% + ε
  5885. ];
  5886. assert_eq!(TOTAL_STAKE, node_stakes.iter().sum::<u64>());
  5887. // Control components
  5888. let node_c_turbine_disabled = Arc::new(AtomicBool::new(false));
  5889. // Create leader schedule with Node A as primary leader
  5890. let (leader_schedule, validator_keys) =
  5891. create_custom_leader_schedule_with_random_keys(&[4, 0, 0, 0]);
  5892. let leader_schedule = FixedSchedule {
  5893. leader_schedule: Arc::new(leader_schedule),
  5894. };
  5895. // Create UDP socket to listen to votes
  5896. let vote_listener_socket = solana_net_utils::bind_to_localhost().unwrap();
  5897. // Create validator configs
  5898. let mut validator_config = ValidatorConfig::default_for_test();
  5899. validator_config.fixed_leader_schedule = Some(leader_schedule);
  5900. validator_config.voting_service_test_override = Some(VotingServiceOverride {
  5901. additional_listeners: vec![vote_listener_socket.local_addr().unwrap()],
  5902. alpenglow_port_override: AlpenglowPortOverride::default(),
  5903. });
  5904. let mut validator_configs =
  5905. make_identical_validator_configs(&validator_config, node_stakes.len());
  5906. validator_configs[2].turbine_disabled = node_c_turbine_disabled.clone();
  5907. // Cluster config
  5908. let mut cluster_config = ClusterConfig {
  5909. mint_lamports: TOTAL_STAKE,
  5910. node_stakes: node_stakes.to_vec(),
  5911. validator_configs,
  5912. validator_keys: Some(
  5913. validator_keys
  5914. .iter()
  5915. .cloned()
  5916. .zip(std::iter::repeat(true))
  5917. .collect(),
  5918. ),
  5919. slots_per_epoch: SLOTS_PER_EPOCH,
  5920. stakers_slot_offset: SLOTS_PER_EPOCH,
  5921. ticks_per_slot: DEFAULT_TICKS_PER_SLOT,
  5922. ..ClusterConfig::default()
  5923. };
  5924. // Create local cluster
  5925. let mut cluster =
  5926. LocalCluster::new_alpenglow(&mut cluster_config, SocketAddrSpace::Unspecified);
  5927. // Create mapping from vote pubkeys to node indices
  5928. let vote_pubkeys: HashMap<_, _> = validator_keys
  5929. .iter()
  5930. .enumerate()
  5931. .filter_map(|(index, keypair)| {
  5932. cluster
  5933. .validators
  5934. .get(&keypair.pubkey())
  5935. .map(|validator| (validator.info.voting_keypair.pubkey(), index))
  5936. })
  5937. .collect();
  5938. assert_eq!(vote_pubkeys.len(), node_stakes.len());
  5939. // Collect node pubkeys and TPU addresses
  5940. let node_pubkeys: Vec<_> = validator_keys.iter().map(|key| key.pubkey()).collect();
  5941. let tpu_socket_addrs: Vec<_> = node_pubkeys
  5942. .iter()
  5943. .map(|pubkey| {
  5944. cluster
  5945. .get_contact_info(pubkey)
  5946. .unwrap()
  5947. .tpu_vote(cluster.connection_cache.protocol())
  5948. .unwrap_or_else(|| panic!("Failed to get TPU address for {}", pubkey))
  5949. })
  5950. .collect();
  5951. // Exit nodes B and D to control their voting behavior
  5952. let node_b_info = cluster.exit_node(&validator_keys[1].pubkey());
  5953. let node_b_vote_keypair = node_b_info.info.voting_keypair.clone();
  5954. let node_d_info = cluster.exit_node(&validator_keys[3].pubkey());
  5955. let node_d_vote_keypair = node_d_info.info.voting_keypair.clone();
  5956. // Vote listener state
  5957. #[derive(Debug)]
  5958. struct VoteListenerState {
  5959. num_notar_fallback_votes: u32,
  5960. a_equivocates: bool,
  5961. notar_fallback_map: HashMap<Slot, Vec<Hash>>,
  5962. double_notar_fallback_slots: Vec<Slot>,
  5963. check_for_roots: bool,
  5964. post_experiment_votes: HashMap<Slot, Vec<u16>>,
  5965. post_experiment_roots: HashSet<Slot>,
  5966. }
  5967. impl VoteListenerState {
  5968. fn new() -> Self {
  5969. Self {
  5970. num_notar_fallback_votes: 0,
  5971. a_equivocates: false,
  5972. notar_fallback_map: HashMap::new(),
  5973. double_notar_fallback_slots: Vec::new(),
  5974. check_for_roots: false,
  5975. post_experiment_votes: HashMap::new(),
  5976. post_experiment_roots: HashSet::new(),
  5977. }
  5978. }
  5979. fn sign_and_construct_vote_message(
  5980. &self,
  5981. vote: Vote,
  5982. keypair: &Keypair,
  5983. rank: u16,
  5984. ) -> BLSMessage {
  5985. let bls_keypair =
  5986. BLSKeypair::derive_from_signer(keypair, BLS_KEYPAIR_DERIVE_SEED).unwrap();
  5987. let signature: BLSSignature = bls_keypair
  5988. .sign(bincode::serialize(&vote).unwrap().as_slice())
  5989. .into();
  5990. BLSMessage::new_vote(vote, signature, rank)
  5991. }
  5992. fn handle_node_a_vote(
  5993. &self,
  5994. vote_message: &VoteMessage,
  5995. node_b_keypair: &Keypair,
  5996. node_d_keypair: &Keypair,
  5997. tpu_socket_addrs: &[std::net::SocketAddr],
  5998. connection_cache: Arc<ConnectionCache>,
  5999. ) {
  6000. // Create vote for Node B (potentially equivocated)
  6001. let vote = &vote_message.vote;
  6002. let vote_b = if self.a_equivocates && vote.is_notarization() {
  6003. let new_block_id = Hash::new_unique();
  6004. Vote::new_notarization_vote(vote.slot(), new_block_id)
  6005. } else {
  6006. *vote
  6007. };
  6008. broadcast_vote(
  6009. self.sign_and_construct_vote_message(
  6010. vote_b,
  6011. node_b_keypair,
  6012. 1, // Node B's rank is 1
  6013. ),
  6014. tpu_socket_addrs,
  6015. None,
  6016. connection_cache.clone(),
  6017. );
  6018. // Create vote for Node D (always copies Node A)
  6019. broadcast_vote(
  6020. self.sign_and_construct_vote_message(
  6021. *vote,
  6022. node_d_keypair,
  6023. 3, // Node D's rank is 3
  6024. ),
  6025. tpu_socket_addrs,
  6026. None,
  6027. connection_cache,
  6028. );
  6029. }
  6030. fn handle_node_c_vote(
  6031. &mut self,
  6032. vote: &Vote,
  6033. node_c_turbine_disabled: &Arc<AtomicBool>,
  6034. ) -> bool {
  6035. let turbine_disabled = node_c_turbine_disabled.load(Ordering::Acquire);
  6036. // Count NotarizeFallback votes while turbine is disabled
  6037. if turbine_disabled && vote.is_notarize_fallback() {
  6038. self.num_notar_fallback_votes += 1;
  6039. }
  6040. // Handle double NotarizeFallback during equivocation
  6041. if self.a_equivocates && vote.is_notarize_fallback() {
  6042. let block_id = vote.block_id().copied().unwrap();
  6043. let entry = self.notar_fallback_map.entry(vote.slot()).or_default();
  6044. entry.push(block_id);
  6045. assert!(
  6046. entry.len() <= 2,
  6047. "More than 2 NotarizeFallback votes for slot {}",
  6048. vote.slot()
  6049. );
  6050. if entry.len() == 2 {
  6051. // Verify equivocation: different block IDs
  6052. assert_ne!(
  6053. entry[0], entry[1],
  6054. "Block IDs should differ due to equivocation"
  6055. );
  6056. self.double_notar_fallback_slots.push(vote.slot());
  6057. // End experiment after 3 double NotarizeFallback slots
  6058. if self.double_notar_fallback_slots.len() == 3 {
  6059. info!("Phase 4, checking for 10 roots");
  6060. self.a_equivocates = false;
  6061. node_c_turbine_disabled.store(false, Ordering::Release);
  6062. self.check_for_roots = true;
  6063. }
  6064. }
  6065. }
  6066. // Start equivocation after stable NotarizeFallback behavior
  6067. if turbine_disabled && self.num_notar_fallback_votes == 10 {
  6068. info!("Phase 2, checking for 3 double notarize fallback votes from C");
  6069. self.a_equivocates = true;
  6070. }
  6071. // Disable turbine at slot 50 to start the experiment
  6072. if vote.slot() == 50 {
  6073. info!("Phase 1, checking for 10 notarize fallback votes from C");
  6074. node_c_turbine_disabled.store(true, Ordering::Release);
  6075. }
  6076. false
  6077. }
  6078. fn handle_finalize_vote(&mut self, vote_message: &VoteMessage) -> bool {
  6079. if !self.check_for_roots {
  6080. return false;
  6081. }
  6082. let slot = vote_message.vote.slot();
  6083. let slot_votes = self.post_experiment_votes.entry(slot).or_default();
  6084. slot_votes.push(vote_message.rank);
  6085. // We expect votes from 2 nodes (A and C) since B and D are copy-voting
  6086. if slot_votes.len() == 2 {
  6087. self.post_experiment_roots.insert(slot);
  6088. // End test after 10 new roots
  6089. if self.post_experiment_roots.len() >= 10 {
  6090. return true;
  6091. }
  6092. }
  6093. false
  6094. }
  6095. }
  6096. // Start vote listener thread to monitor and control the experiment
  6097. let vote_listener_thread = std::thread::spawn({
  6098. let mut buf = [0u8; 65_535];
  6099. let mut state = VoteListenerState::new();
  6100. move || {
  6101. loop {
  6102. let n_bytes = vote_listener_socket.recv(&mut buf).unwrap();
  6103. let BLSMessage::Vote(vote_message) =
  6104. bincode::deserialize::<BLSMessage>(&buf[0..n_bytes]).unwrap()
  6105. else {
  6106. continue;
  6107. };
  6108. match vote_message.rank {
  6109. 0 => {
  6110. // Node A: Handle vote broadcasting to B and D
  6111. state.handle_node_a_vote(
  6112. &vote_message,
  6113. &node_b_vote_keypair,
  6114. &node_d_vote_keypair,
  6115. &tpu_socket_addrs,
  6116. cluster.connection_cache.clone(),
  6117. );
  6118. }
  6119. 2 => {
  6120. // Node C: Handle experiment state transitions
  6121. state.handle_node_c_vote(&vote_message.vote, &node_c_turbine_disabled);
  6122. }
  6123. _ => {}
  6124. }
  6125. // Check for finalization votes to determine test completion
  6126. if vote_message.vote.is_finalize() && state.handle_finalize_vote(&vote_message) {
  6127. break;
  6128. }
  6129. }
  6130. }
  6131. });
  6132. vote_listener_thread.join().unwrap();
  6133. }
  6134. /// Test to validate Alpenglow's ability to maintain liveness when nodes issue both NotarizeFallback
  6135. /// and SkipFallback votes in an intertwined manner.
  6136. ///
  6137. /// This test simulates a consensus scenario with four nodes having specific stake distributions:
  6138. /// - Node A: 40% + epsilon stake
  6139. /// - Node B: 40% - epsilon stake
  6140. /// - Node C: 20% - epsilon stake
  6141. /// - Node D: epsilon stake (minimal, acts as perpetual leader)
  6142. ///
  6143. /// The test proceeds through two main stages:
  6144. ///
  6145. /// ## Stage 1: Stable Network Operation
  6146. /// All nodes are voting normally for leader D's proposals, with notarization votes going through
  6147. /// successfully and the network maintaining consensus.
  6148. ///
  6149. /// ## Stage 2: Network Partition and Fallback Scenario
  6150. /// At slot 50, Node A's turbine is disabled, creating a network partition. This triggers the
  6151. /// following sequence:
  6152. /// 1. Node D (leader) proposes a block b1
  6153. /// 2. Nodes B, C, and D can communicate and vote to notarize b1
  6154. /// 3. Node A is partitioned and cannot receive b1, so it issues a skip vote
  6155. /// 4. The vote distribution creates a complex fallback scenario:
  6156. /// - Nodes B, C, D: Issue notarize votes initially, then skip fallback votes
  6157. /// - Node A: Issues skip vote initially, then notarize fallback vote
  6158. /// 5. This creates the specific vote pattern:
  6159. /// - B, C, D: notarize + skip_fallback
  6160. /// - A: skip + notarize_fallback
  6161. ///
  6162. /// The test validates that:
  6163. /// - The network can handle intertwined fallback scenarios
  6164. /// - Consensus is maintained despite complex vote patterns
  6165. /// - The network continues to make progress and create new roots after the partition is resolved
  6166. /// - At least 10 new roots are created post-experiment to ensure sustained liveness
  6167. #[test]
  6168. #[serial]
  6169. fn test_alpenglow_ensure_liveness_after_intertwined_notar_and_skip_fallbacks() {
  6170. solana_logger::setup_with_default(AG_DEBUG_LOG_FILTER);
  6171. // Configure stake distribution for the four-node cluster
  6172. const TOTAL_STAKE: u64 = 10 * DEFAULT_NODE_STAKE;
  6173. const EPSILON: u64 = 1;
  6174. const NUM_NODES: usize = 4;
  6175. // Ensure that node stakes are in decreasing order, so node_index can directly be set as
  6176. // vote_message.rank.
  6177. let node_stakes = [
  6178. TOTAL_STAKE * 4 / 10 + EPSILON, // Node A: 40% + epsilon
  6179. TOTAL_STAKE * 4 / 10 - EPSILON, // Node B: 40% - epsilon
  6180. TOTAL_STAKE * 2 / 10 - EPSILON, // Node C: 20% - epsilon
  6181. EPSILON, // Node D: epsilon
  6182. ];
  6183. assert_eq!(NUM_NODES, node_stakes.len());
  6184. // Verify stake distribution adds up correctly
  6185. assert_eq!(TOTAL_STAKE, node_stakes.iter().sum::<u64>());
  6186. // Control mechanism for network partition
  6187. let node_a_turbine_disabled = Arc::new(AtomicBool::new(false));
  6188. // Create leader schedule with A as perpetual leader
  6189. let (leader_schedule, validator_keys) =
  6190. create_custom_leader_schedule_with_random_keys(&[0, 0, 0, 4]);
  6191. let leader_schedule = FixedSchedule {
  6192. leader_schedule: Arc::new(leader_schedule),
  6193. };
  6194. // Set up vote monitoring
  6195. let vote_listener_socket =
  6196. solana_net_utils::bind_to_localhost().expect("Failed to bind vote listener socket");
  6197. // Configure validators
  6198. let mut validator_config = ValidatorConfig::default_for_test();
  6199. validator_config.fixed_leader_schedule = Some(leader_schedule);
  6200. validator_config.voting_service_test_override = Some(VotingServiceOverride {
  6201. additional_listeners: vec![vote_listener_socket.local_addr().unwrap()],
  6202. alpenglow_port_override: AlpenglowPortOverride::default(),
  6203. });
  6204. let mut validator_configs = make_identical_validator_configs(&validator_config, NUM_NODES);
  6205. // Node A (index 0) will have its turbine disabled during the experiment
  6206. validator_configs[0].turbine_disabled = node_a_turbine_disabled.clone();
  6207. assert_eq!(NUM_NODES, validator_keys.len());
  6208. // Set up cluster configuration
  6209. let mut cluster_config = ClusterConfig {
  6210. mint_lamports: TOTAL_STAKE,
  6211. node_stakes: node_stakes.to_vec(),
  6212. validator_configs,
  6213. validator_keys: Some(
  6214. validator_keys
  6215. .iter()
  6216. .cloned()
  6217. .zip(std::iter::repeat(true))
  6218. .collect(),
  6219. ),
  6220. ..ClusterConfig::default()
  6221. };
  6222. // Initialize the cluster
  6223. let cluster = LocalCluster::new_alpenglow(&mut cluster_config, SocketAddrSpace::Unspecified);
  6224. assert_eq!(NUM_NODES, cluster.validators.len());
  6225. /// Helper struct to manage experiment state and vote pattern tracking
  6226. #[derive(Debug, PartialEq, Eq)]
  6227. enum Stage {
  6228. Stability,
  6229. ObserveSkipFallbacks,
  6230. ObserveLiveness,
  6231. }
  6232. impl Stage {
  6233. fn timeout(&self) -> Duration {
  6234. match self {
  6235. Stage::Stability => Duration::from_secs(60),
  6236. Stage::ObserveSkipFallbacks => Duration::from_secs(120),
  6237. Stage::ObserveLiveness => Duration::from_secs(180),
  6238. }
  6239. }
  6240. fn all() -> Vec<Stage> {
  6241. vec![
  6242. Stage::Stability,
  6243. Stage::ObserveSkipFallbacks,
  6244. Stage::ObserveLiveness,
  6245. ]
  6246. }
  6247. }
  6248. #[derive(Debug)]
  6249. struct ExperimentState {
  6250. stage: Stage,
  6251. vote_type_bitmap: HashMap<u64, [u8; 4]>, // slot -> [node_vote_pattern; 4]
  6252. consecutive_pattern_matches: usize,
  6253. post_experiment_roots: HashSet<u64>,
  6254. }
  6255. impl ExperimentState {
  6256. fn new() -> Self {
  6257. Self {
  6258. stage: Stage::Stability,
  6259. vote_type_bitmap: HashMap::new(),
  6260. consecutive_pattern_matches: 0,
  6261. post_experiment_roots: HashSet::new(),
  6262. }
  6263. }
  6264. fn record_vote_bitmap(&mut self, slot: u64, node_index: usize, vote: &Vote) {
  6265. let (_, vote_type) = _vote_to_tuple(vote);
  6266. let slot_pattern = self.vote_type_bitmap.entry(slot).or_insert([0u8; 4]);
  6267. assert!(node_index < NUM_NODES, "Invalid node index: {}", node_index);
  6268. slot_pattern[node_index] |= 1 << vote_type;
  6269. }
  6270. fn matches_expected_pattern(&mut self) -> bool {
  6271. // Expected patterns:
  6272. // Nodes 1, 2, 3: notarize + skip_fallback = (1 << 0) | (1 << 4) = 17
  6273. // Node 0: skip + notarize_fallback = (1 << 2) | (1 << 3) = 12
  6274. const EXPECTED_PATTERN_MAJORITY: u8 = 17; // notarize + skip_fallback
  6275. const EXPECTED_PATTERN_MINORITY: u8 = 12; // skip + notarize_fallback
  6276. for pattern in self.vote_type_bitmap.values() {
  6277. if pattern[0] == EXPECTED_PATTERN_MINORITY
  6278. && pattern[1] == EXPECTED_PATTERN_MAJORITY
  6279. && pattern[2] == EXPECTED_PATTERN_MAJORITY
  6280. && pattern[3] == EXPECTED_PATTERN_MAJORITY
  6281. {
  6282. self.consecutive_pattern_matches += 1;
  6283. if self.consecutive_pattern_matches >= 3 {
  6284. return true;
  6285. }
  6286. }
  6287. }
  6288. false
  6289. }
  6290. fn record_certificate(&mut self, slot: u64) {
  6291. self.post_experiment_roots.insert(slot);
  6292. }
  6293. fn sufficient_roots_created(&self) -> bool {
  6294. self.post_experiment_roots.len() >= 8
  6295. }
  6296. }
  6297. // Start vote monitoring thread
  6298. let vote_listener_thread = std::thread::spawn({
  6299. let node_c_turbine_disabled = node_a_turbine_disabled.clone();
  6300. move || {
  6301. let mut buffer = [0u8; 65_535];
  6302. let mut experiment_state = ExperimentState::new();
  6303. let timer = std::time::Instant::now();
  6304. loop {
  6305. let bytes_received = vote_listener_socket
  6306. .recv(&mut buffer)
  6307. .expect("Failed to receive vote data");
  6308. let bls_message = bincode::deserialize::<BLSMessage>(&buffer[..bytes_received])
  6309. .expect("Failed to deserialize BLS message");
  6310. match bls_message {
  6311. BLSMessage::Vote(vote_message) => {
  6312. let vote = &vote_message.vote;
  6313. let node_index = vote_message.rank as usize;
  6314. // Stage timeouts
  6315. let elapsed_time = timer.elapsed();
  6316. for stage in Stage::all() {
  6317. if elapsed_time > stage.timeout() {
  6318. panic!(
  6319. "Timeout during {:?}. node_c_turbine_disabled: {:#?}. Latest vote: {:#?}. Experiment state: {:#?}",
  6320. stage,
  6321. node_c_turbine_disabled.load(Ordering::Acquire),
  6322. vote,
  6323. experiment_state
  6324. );
  6325. }
  6326. }
  6327. // Stage 1: Wait for stability, then introduce partition at slot 20
  6328. if vote.slot() == 20 && !node_c_turbine_disabled.load(Ordering::Acquire) {
  6329. node_c_turbine_disabled.store(true, Ordering::Release);
  6330. experiment_state.stage = Stage::ObserveSkipFallbacks;
  6331. }
  6332. // Stage 2: Monitor for expected fallback vote patterns
  6333. if experiment_state.stage == Stage::ObserveSkipFallbacks {
  6334. experiment_state.record_vote_bitmap(vote.slot(), node_index, vote);
  6335. // Check if we've observed the expected pattern for 3 consecutive slots
  6336. if experiment_state.matches_expected_pattern() {
  6337. node_c_turbine_disabled.store(false, Ordering::Release);
  6338. experiment_state.stage = Stage::ObserveLiveness;
  6339. }
  6340. }
  6341. }
  6342. BLSMessage::Certificate(cert_message) => {
  6343. // Stage 3: Verify continued liveness after partition resolution
  6344. if experiment_state.stage == Stage::ObserveLiveness
  6345. && [CertificateType::Finalize, CertificateType::FinalizeFast]
  6346. .contains(&cert_message.certificate.certificate_type())
  6347. {
  6348. experiment_state.record_certificate(cert_message.certificate.slot());
  6349. if experiment_state.sufficient_roots_created() {
  6350. break;
  6351. }
  6352. }
  6353. }
  6354. }
  6355. }
  6356. }
  6357. });
  6358. vote_listener_thread
  6359. .join()
  6360. .expect("Vote listener thread panicked");
  6361. }
  6362. /// Test to validate the Alpenglow consensus protocol's ability to maintain liveness when a node
  6363. /// needs to issue NotarizeFallback votes due to the second fallback condition.
  6364. ///
  6365. /// This test simulates a scenario with three nodes having the following stake distribution:
  6366. /// - Node A: 40% - ε (small epsilon)
  6367. /// - Node B (Leader): 30% + ε
  6368. /// - Node C: 30%
  6369. ///
  6370. /// The test validates the protocol's behavior through two main phases:
  6371. ///
  6372. /// ## Phase 1: Node A Goes Offline (Byzantine + Offline Stake)
  6373. /// - Node A (40% - ε stake) is taken offline, representing combined Byzantine and offline stake
  6374. /// - This leaves Node B (30% + ε) and Node C (30%) as the active validators
  6375. /// - Despite the significant offline stake, the remaining nodes can still achieve consensus
  6376. /// - Network continues to slow finalize blocks with the remaining 60% + ε stake
  6377. ///
  6378. /// ## Phase 2: Network Partition Triggers NotarizeFallback
  6379. /// - Node C's turbine is disabled at slot 20, causing it to miss incoming blocks
  6380. /// - Node B (as leader) proposes blocks and votes Notarize for them
  6381. /// - Node C, unable to receive blocks, votes Skip for the same slots
  6382. /// - This creates a voting scenario where:
  6383. /// - Notarize votes: 30% + ε (Node B only)
  6384. /// - Skip votes: 30% (Node C only)
  6385. /// - Offline: 40% - ε (Node A)
  6386. ///
  6387. /// ## NotarizeFallback Condition 2 Trigger
  6388. /// Node C observes that:
  6389. /// - There are insufficient notarization votes for the current block (30% + ε < 40%)
  6390. /// - But the combination of notarize + skip votes represents >= 60% participation while there is
  6391. /// sufficient notarize stake (>= 20%).
  6392. /// - Protocol determines it's "SafeToNotar" under condition 2 and issues NotarizeFallback
  6393. ///
  6394. /// ## Phase 3: Recovery and Liveness Verification
  6395. /// After observing 5 NotarizeFallback votes from Node C:
  6396. /// - Node C's turbine is re-enabled to restore normal block reception
  6397. /// - Network returns to normal operation with both active nodes
  6398. /// - Test verifies 10+ new roots are created, ensuring liveness is maintained
  6399. ///
  6400. /// ## Key Validation Points
  6401. /// - Protocol handles significant offline stake (40%) gracefully
  6402. /// - NotarizeFallback condition 2 triggers correctly with insufficient notarization
  6403. /// - Network maintains liveness despite temporary partitioning
  6404. /// - Recovery is seamless once partition is resolved
  6405. #[test]
  6406. #[serial]
  6407. fn test_alpenglow_ensure_liveness_after_second_notar_fallback_condition() {
  6408. solana_logger::setup_with_default(AG_DEBUG_LOG_FILTER);
  6409. // Configure total stake and stake distribution
  6410. const TOTAL_STAKE: u64 = 10 * DEFAULT_NODE_STAKE;
  6411. const SLOTS_PER_EPOCH: u64 = MINIMUM_SLOTS_PER_EPOCH;
  6412. // Node stakes designed to trigger NotarizeFallback condition 2
  6413. let node_stakes = [
  6414. TOTAL_STAKE * 4 / 10 - 1, // Node A: 40% - ε (will go offline)
  6415. TOTAL_STAKE * 3 / 10 + 1, // Node B: 30% + ε (leader, stays online)
  6416. TOTAL_STAKE * 3 / 10, // Node C: 30% (will be partitioned)
  6417. ];
  6418. assert_eq!(TOTAL_STAKE, node_stakes.iter().sum::<u64>());
  6419. // Control component for network partition simulation
  6420. let node_c_turbine_disabled = Arc::new(AtomicBool::new(false));
  6421. // Create leader schedule with Node B as primary leader (Node A will go offline)
  6422. let (leader_schedule, validator_keys) =
  6423. create_custom_leader_schedule_with_random_keys(&[0, 4, 0]);
  6424. let leader_schedule = FixedSchedule {
  6425. leader_schedule: Arc::new(leader_schedule),
  6426. };
  6427. // Create UDP socket to listen to votes for experiment control
  6428. let vote_listener_socket = solana_net_utils::bind_to_localhost().unwrap();
  6429. // Create validator configs
  6430. let mut validator_config = ValidatorConfig::default_for_test();
  6431. validator_config.fixed_leader_schedule = Some(leader_schedule);
  6432. validator_config.voting_service_test_override = Some(VotingServiceOverride {
  6433. additional_listeners: vec![vote_listener_socket.local_addr().unwrap()],
  6434. alpenglow_port_override: AlpenglowPortOverride::default(),
  6435. });
  6436. let mut validator_configs =
  6437. make_identical_validator_configs(&validator_config, node_stakes.len());
  6438. // Node C will have its turbine disabled during the experiment
  6439. validator_configs[2].turbine_disabled = node_c_turbine_disabled.clone();
  6440. // Cluster configuration
  6441. let mut cluster_config = ClusterConfig {
  6442. mint_lamports: TOTAL_STAKE,
  6443. node_stakes: node_stakes.to_vec(),
  6444. validator_configs,
  6445. validator_keys: Some(
  6446. validator_keys
  6447. .iter()
  6448. .cloned()
  6449. .zip(std::iter::repeat(true))
  6450. .collect(),
  6451. ),
  6452. slots_per_epoch: SLOTS_PER_EPOCH,
  6453. stakers_slot_offset: SLOTS_PER_EPOCH,
  6454. ticks_per_slot: DEFAULT_TICKS_PER_SLOT,
  6455. ..ClusterConfig::default()
  6456. };
  6457. // Create local cluster
  6458. let mut cluster =
  6459. LocalCluster::new_alpenglow(&mut cluster_config, SocketAddrSpace::Unspecified);
  6460. // Create mapping from vote pubkeys to node indices for vote identification
  6461. let vote_pubkeys: HashMap<_, _> = validator_keys
  6462. .iter()
  6463. .enumerate()
  6464. .filter_map(|(index, keypair)| {
  6465. cluster
  6466. .validators
  6467. .get(&keypair.pubkey())
  6468. .map(|validator| (validator.info.voting_keypair.pubkey(), index))
  6469. })
  6470. .collect();
  6471. assert_eq!(vote_pubkeys.len(), node_stakes.len());
  6472. // Vote listener state management
  6473. #[derive(Debug, PartialEq, Eq)]
  6474. enum Stage {
  6475. WaitForReady,
  6476. Stability,
  6477. ObserveNotarFallbacks,
  6478. ObserveLiveness,
  6479. }
  6480. impl Stage {
  6481. fn timeout(&self) -> Duration {
  6482. match self {
  6483. Stage::WaitForReady => Duration::from_secs(60),
  6484. Stage::Stability => Duration::from_secs(60),
  6485. Stage::ObserveNotarFallbacks => Duration::from_secs(120),
  6486. Stage::ObserveLiveness => Duration::from_secs(180),
  6487. }
  6488. }
  6489. fn all() -> Vec<Stage> {
  6490. vec![
  6491. Stage::WaitForReady,
  6492. Stage::Stability,
  6493. Stage::ObserveNotarFallbacks,
  6494. Stage::ObserveLiveness,
  6495. ]
  6496. }
  6497. }
  6498. #[derive(Debug)]
  6499. struct ExperimentState {
  6500. stage: Stage,
  6501. number_of_nodes: usize,
  6502. initial_notar_votes: HashSet<usize>,
  6503. notar_fallbacks: HashSet<Slot>,
  6504. post_experiment_roots: HashSet<Slot>,
  6505. }
  6506. impl ExperimentState {
  6507. fn new(number_of_nodes: usize) -> Self {
  6508. Self {
  6509. stage: Stage::WaitForReady,
  6510. number_of_nodes,
  6511. initial_notar_votes: HashSet::new(),
  6512. notar_fallbacks: HashSet::new(),
  6513. post_experiment_roots: HashSet::new(),
  6514. }
  6515. }
  6516. fn wait_for_nodes_ready(
  6517. &mut self,
  6518. vote: &Vote,
  6519. node_name: usize,
  6520. cluster: &mut LocalCluster,
  6521. node_a_pubkey: &Pubkey,
  6522. ) {
  6523. if self.stage != Stage::WaitForReady || !vote.is_notarization() {
  6524. return;
  6525. }
  6526. self.initial_notar_votes.insert(node_name);
  6527. // Wait until we have observed a notarization vote from all nodes.
  6528. if self.initial_notar_votes.len() >= self.number_of_nodes {
  6529. // Phase 1: Take Node A offline to simulate Byzantine + offline stake
  6530. // This represents 40% - ε of total stake going offline
  6531. info!("Phase 1: Exiting Node A. Transitioning to stability phase.");
  6532. cluster.exit_node(node_a_pubkey);
  6533. self.stage = Stage::Stability;
  6534. }
  6535. }
  6536. fn handle_experiment_start(
  6537. &mut self,
  6538. vote: &Vote,
  6539. node_c_turbine_disabled: &Arc<AtomicBool>,
  6540. ) {
  6541. // Phase 2: Start network partition experiment at slot 20
  6542. if vote.slot() >= 20 && self.stage == Stage::Stability {
  6543. info!(
  6544. "Starting network partition experiment at slot {}",
  6545. vote.slot()
  6546. );
  6547. node_c_turbine_disabled.store(true, Ordering::Relaxed);
  6548. self.stage = Stage::ObserveNotarFallbacks;
  6549. }
  6550. }
  6551. fn handle_notar_fallback(
  6552. &mut self,
  6553. vote: &Vote,
  6554. node_name: usize,
  6555. node_c_turbine_disabled: &Arc<AtomicBool>,
  6556. ) {
  6557. // Track NotarizeFallback votes from Node C
  6558. if self.stage == Stage::ObserveNotarFallbacks
  6559. && node_name == 2
  6560. && vote.is_notarize_fallback()
  6561. {
  6562. self.notar_fallbacks.insert(vote.slot());
  6563. info!(
  6564. "Node C issued NotarizeFallback for slot {}, total fallbacks: {}",
  6565. vote.slot(),
  6566. self.notar_fallbacks.len()
  6567. );
  6568. // Phase 3: End partition after observing sufficient NotarizeFallback votes
  6569. if self.notar_fallbacks.len() >= 5 {
  6570. info!("Sufficient NotarizeFallback votes observed, ending partition");
  6571. node_c_turbine_disabled.store(false, Ordering::Relaxed);
  6572. self.stage = Stage::ObserveLiveness;
  6573. }
  6574. }
  6575. }
  6576. fn record_certificate(&mut self, slot: u64) {
  6577. self.post_experiment_roots.insert(slot);
  6578. }
  6579. fn sufficient_roots_created(&self) -> bool {
  6580. self.post_experiment_roots.len() >= 8
  6581. }
  6582. }
  6583. // Start vote listener thread to monitor and control the experiment
  6584. let vote_listener_thread = std::thread::spawn({
  6585. let mut buf = [0u8; 65_535];
  6586. let node_c_turbine_disabled = node_c_turbine_disabled.clone();
  6587. let mut experiment_state = ExperimentState::new(vote_pubkeys.len());
  6588. let timer = std::time::Instant::now();
  6589. move || {
  6590. loop {
  6591. let n_bytes = vote_listener_socket.recv(&mut buf).unwrap();
  6592. let bls_message = bincode::deserialize::<BLSMessage>(&buf[0..n_bytes]).unwrap();
  6593. match bls_message {
  6594. BLSMessage::Vote(vote_message) => {
  6595. let vote = &vote_message.vote;
  6596. let node_name = vote_message.rank as usize;
  6597. // Stage timeouts
  6598. let elapsed_time = timer.elapsed();
  6599. for stage in Stage::all() {
  6600. if elapsed_time > stage.timeout() {
  6601. panic!(
  6602. "Timeout during {:?}. node_c_turbine_disabled: {:#?}. Latest vote: {:#?}. Experiment state: {:#?}",
  6603. stage,
  6604. node_c_turbine_disabled.load(Ordering::Acquire),
  6605. vote,
  6606. experiment_state
  6607. );
  6608. }
  6609. }
  6610. // Handle experiment phase transitions
  6611. experiment_state.wait_for_nodes_ready(
  6612. vote,
  6613. node_name,
  6614. &mut cluster,
  6615. &validator_keys[0].pubkey(),
  6616. );
  6617. experiment_state.handle_experiment_start(vote, &node_c_turbine_disabled);
  6618. experiment_state.handle_notar_fallback(
  6619. vote,
  6620. node_name,
  6621. &node_c_turbine_disabled,
  6622. );
  6623. }
  6624. BLSMessage::Certificate(cert_message) => {
  6625. // Wait until the final stage before looking for finalization certificates.
  6626. if experiment_state.stage != Stage::ObserveLiveness {
  6627. continue;
  6628. }
  6629. // Observing finalization certificates to ensure liveness.
  6630. if [CertificateType::Finalize, CertificateType::FinalizeFast]
  6631. .contains(&cert_message.certificate.certificate_type())
  6632. {
  6633. experiment_state.record_certificate(cert_message.certificate.slot());
  6634. if experiment_state.sufficient_roots_created() {
  6635. break;
  6636. }
  6637. }
  6638. }
  6639. }
  6640. }
  6641. }
  6642. });
  6643. vote_listener_thread.join().unwrap();
  6644. }