hwcontext_vulkan.c 172 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463
  1. /*
  2. * Copyright (c) Lynne
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #define VK_NO_PROTOTYPES
  21. #define VK_ENABLE_BETA_EXTENSIONS
  22. #ifdef _WIN32
  23. #include <windows.h> /* Included to prevent conflicts with CreateSemaphore */
  24. #include <versionhelpers.h>
  25. #include "compat/w32dlfcn.h"
  26. #else
  27. #include <dlfcn.h>
  28. #include <unistd.h>
  29. #endif
  30. #include "thread.h"
  31. #include "config.h"
  32. #include "pixdesc.h"
  33. #include "avstring.h"
  34. #include "imgutils.h"
  35. #include "hwcontext.h"
  36. #include "hwcontext_internal.h"
  37. #include "hwcontext_vulkan.h"
  38. #include "mem.h"
  39. #include "vulkan.h"
  40. #include "vulkan_loader.h"
  41. #if CONFIG_VAAPI
  42. #include "hwcontext_vaapi.h"
  43. #endif
  44. #if CONFIG_LIBDRM
  45. #if CONFIG_VAAPI
  46. #include <va/va_drmcommon.h>
  47. #endif
  48. #ifdef __linux__
  49. #include <sys/sysmacros.h>
  50. #endif
  51. #include <sys/stat.h>
  52. #include <xf86drm.h>
  53. #include <drm_fourcc.h>
  54. #include "hwcontext_drm.h"
  55. #endif
  56. #if HAVE_LINUX_DMA_BUF_H
  57. #include <sys/ioctl.h>
  58. #include <linux/dma-buf.h>
  59. #endif
  60. #if CONFIG_CUDA
  61. #include "hwcontext_cuda_internal.h"
  62. #include "cuda_check.h"
  63. #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
  64. #endif
  65. typedef struct VulkanDeviceFeatures {
  66. VkPhysicalDeviceFeatures2 device;
  67. VkPhysicalDeviceVulkan11Features vulkan_1_1;
  68. VkPhysicalDeviceVulkan12Features vulkan_1_2;
  69. VkPhysicalDeviceVulkan13Features vulkan_1_3;
  70. VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore;
  71. #ifdef VK_KHR_shader_expect_assume
  72. VkPhysicalDeviceShaderExpectAssumeFeaturesKHR expect_assume;
  73. #endif
  74. VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maintenance_1;
  75. #ifdef VK_KHR_video_maintenance2
  76. VkPhysicalDeviceVideoMaintenance2FeaturesKHR video_maintenance_2;
  77. #endif
  78. VkPhysicalDeviceShaderObjectFeaturesEXT shader_object;
  79. VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperative_matrix;
  80. VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptor_buffer;
  81. VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float;
  82. VkPhysicalDeviceOpticalFlowFeaturesNV optical_flow;
  83. #ifdef VK_KHR_shader_relaxed_extended_instruction
  84. VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR relaxed_extended_instruction;
  85. #endif
  86. } VulkanDeviceFeatures;
  87. typedef struct VulkanDevicePriv {
  88. /**
  89. * The public AVVulkanDeviceContext. See hwcontext_vulkan.h for it.
  90. */
  91. AVVulkanDeviceContext p;
  92. /* Vulkan library and loader functions */
  93. void *libvulkan;
  94. FFVulkanContext vkctx;
  95. AVVulkanDeviceQueueFamily *compute_qf;
  96. AVVulkanDeviceQueueFamily *transfer_qf;
  97. /* Properties */
  98. VkPhysicalDeviceProperties2 props;
  99. VkPhysicalDeviceMemoryProperties mprops;
  100. VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
  101. VkPhysicalDeviceDriverProperties dprops;
  102. /* Opaque FD external semaphore properties */
  103. VkExternalSemaphoreProperties ext_sem_props_opaque;
  104. /* Enabled features */
  105. VulkanDeviceFeatures feats;
  106. /* Queues */
  107. pthread_mutex_t **qf_mutex;
  108. uint32_t nb_tot_qfs;
  109. uint32_t img_qfs[64];
  110. uint32_t nb_img_qfs;
  111. /* Debug callback */
  112. VkDebugUtilsMessengerEXT debug_ctx;
  113. /* Settings */
  114. int use_linear_images;
  115. /* Option to allocate all image planes in a single allocation */
  116. int contiguous_planes;
  117. /* Disable multiplane images */
  118. int disable_multiplane;
  119. /* Nvidia */
  120. int dev_is_nvidia;
  121. } VulkanDevicePriv;
  122. typedef struct VulkanFramesPriv {
  123. /**
  124. * The public AVVulkanFramesContext. See hwcontext_vulkan.h for it.
  125. */
  126. AVVulkanFramesContext p;
  127. /* Image conversions */
  128. FFVkExecPool compute_exec;
  129. /* Image transfers */
  130. FFVkExecPool upload_exec;
  131. FFVkExecPool download_exec;
  132. /* Temporary buffer pools */
  133. AVBufferPool *tmp;
  134. /* Modifier info list to free at uninit */
  135. VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
  136. } VulkanFramesPriv;
  137. typedef struct AVVkFrameInternal {
  138. pthread_mutex_t update_mutex;
  139. #if CONFIG_CUDA
  140. /* Importing external memory into cuda is really expensive so we keep the
  141. * memory imported all the time */
  142. AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
  143. CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
  144. CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
  145. CUarray cu_array[AV_NUM_DATA_POINTERS];
  146. CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
  147. #ifdef _WIN32
  148. HANDLE ext_mem_handle[AV_NUM_DATA_POINTERS];
  149. HANDLE ext_sem_handle[AV_NUM_DATA_POINTERS];
  150. #endif
  151. #endif
  152. } AVVkFrameInternal;
  153. /* Initialize all structs in VulkanDeviceFeatures */
  154. static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *feats)
  155. {
  156. VulkanDevicePriv *p = ctx->hwctx;
  157. FFVulkanContext *s = &p->vkctx;
  158. feats->device = (VkPhysicalDeviceFeatures2) {
  159. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
  160. };
  161. FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_1, FF_VK_EXT_NO_FLAG,
  162. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
  163. FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_2, FF_VK_EXT_NO_FLAG,
  164. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
  165. FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_3, FF_VK_EXT_NO_FLAG,
  166. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
  167. FF_VK_STRUCT_EXT(s, &feats->device, &feats->timeline_semaphore, FF_VK_EXT_PORTABILITY_SUBSET,
  168. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES);
  169. #ifdef VK_KHR_shader_expect_assume
  170. FF_VK_STRUCT_EXT(s, &feats->device, &feats->expect_assume, FF_VK_EXT_EXPECT_ASSUME,
  171. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_EXPECT_ASSUME_FEATURES_KHR);
  172. #endif
  173. FF_VK_STRUCT_EXT(s, &feats->device, &feats->video_maintenance_1, FF_VK_EXT_VIDEO_MAINTENANCE_1,
  174. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR);
  175. #ifdef VK_KHR_video_maintenance2
  176. FF_VK_STRUCT_EXT(s, &feats->device, &feats->video_maintenance_2, FF_VK_EXT_VIDEO_MAINTENANCE_2,
  177. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_2_FEATURES_KHR);
  178. #endif
  179. FF_VK_STRUCT_EXT(s, &feats->device, &feats->shader_object, FF_VK_EXT_SHADER_OBJECT,
  180. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT);
  181. FF_VK_STRUCT_EXT(s, &feats->device, &feats->cooperative_matrix, FF_VK_EXT_COOP_MATRIX,
  182. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR);
  183. FF_VK_STRUCT_EXT(s, &feats->device, &feats->descriptor_buffer, FF_VK_EXT_DESCRIPTOR_BUFFER,
  184. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT);
  185. FF_VK_STRUCT_EXT(s, &feats->device, &feats->atomic_float, FF_VK_EXT_ATOMIC_FLOAT,
  186. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT);
  187. #ifdef VK_KHR_shader_relaxed_extended_instruction
  188. FF_VK_STRUCT_EXT(s, &feats->device, &feats->relaxed_extended_instruction, FF_VK_EXT_RELAXED_EXTENDED_INSTR,
  189. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR);
  190. #endif
  191. FF_VK_STRUCT_EXT(s, &feats->device, &feats->optical_flow, FF_VK_EXT_OPTICAL_FLOW,
  192. VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV);
  193. }
  194. /* Copy all needed device features */
  195. static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceFeatures *src)
  196. {
  197. #define COPY_VAL(VAL) \
  198. do { \
  199. dst->VAL = src->VAL; \
  200. } while (0) \
  201. COPY_VAL(device.features.shaderImageGatherExtended);
  202. COPY_VAL(device.features.shaderStorageImageReadWithoutFormat);
  203. COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat);
  204. COPY_VAL(device.features.fragmentStoresAndAtomics);
  205. COPY_VAL(device.features.vertexPipelineStoresAndAtomics);
  206. COPY_VAL(device.features.shaderInt64);
  207. COPY_VAL(device.features.shaderInt16);
  208. COPY_VAL(device.features.shaderFloat64);
  209. COPY_VAL(device.features.shaderStorageImageReadWithoutFormat);
  210. COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat);
  211. COPY_VAL(vulkan_1_1.samplerYcbcrConversion);
  212. COPY_VAL(vulkan_1_1.storagePushConstant16);
  213. COPY_VAL(vulkan_1_1.storageBuffer16BitAccess);
  214. COPY_VAL(vulkan_1_1.uniformAndStorageBuffer16BitAccess);
  215. COPY_VAL(vulkan_1_2.timelineSemaphore);
  216. COPY_VAL(vulkan_1_2.scalarBlockLayout);
  217. COPY_VAL(vulkan_1_2.bufferDeviceAddress);
  218. COPY_VAL(vulkan_1_2.hostQueryReset);
  219. COPY_VAL(vulkan_1_2.storagePushConstant8);
  220. COPY_VAL(vulkan_1_2.shaderInt8);
  221. COPY_VAL(vulkan_1_2.storageBuffer8BitAccess);
  222. COPY_VAL(vulkan_1_2.uniformAndStorageBuffer8BitAccess);
  223. COPY_VAL(vulkan_1_2.shaderFloat16);
  224. COPY_VAL(vulkan_1_2.shaderBufferInt64Atomics);
  225. COPY_VAL(vulkan_1_2.shaderSharedInt64Atomics);
  226. COPY_VAL(vulkan_1_2.vulkanMemoryModel);
  227. COPY_VAL(vulkan_1_2.vulkanMemoryModelDeviceScope);
  228. COPY_VAL(vulkan_1_3.dynamicRendering);
  229. COPY_VAL(vulkan_1_3.maintenance4);
  230. COPY_VAL(vulkan_1_3.synchronization2);
  231. COPY_VAL(vulkan_1_3.computeFullSubgroups);
  232. COPY_VAL(vulkan_1_3.subgroupSizeControl);
  233. COPY_VAL(vulkan_1_3.shaderZeroInitializeWorkgroupMemory);
  234. COPY_VAL(vulkan_1_3.dynamicRendering);
  235. COPY_VAL(timeline_semaphore.timelineSemaphore);
  236. COPY_VAL(video_maintenance_1.videoMaintenance1);
  237. #ifdef VK_KHR_video_maintenance2
  238. COPY_VAL(video_maintenance_2.videoMaintenance2);
  239. #endif
  240. COPY_VAL(shader_object.shaderObject);
  241. COPY_VAL(cooperative_matrix.cooperativeMatrix);
  242. COPY_VAL(descriptor_buffer.descriptorBuffer);
  243. COPY_VAL(descriptor_buffer.descriptorBufferPushDescriptors);
  244. COPY_VAL(atomic_float.shaderBufferFloat32Atomics);
  245. COPY_VAL(atomic_float.shaderBufferFloat32AtomicAdd);
  246. #ifdef VK_KHR_shader_relaxed_extended_instruction
  247. COPY_VAL(relaxed_extended_instruction.shaderRelaxedExtendedInstruction);
  248. #endif
  249. #ifdef VK_KHR_shader_expect_assume
  250. COPY_VAL(expect_assume.shaderExpectAssume);
  251. #endif
  252. COPY_VAL(optical_flow.opticalFlow);
  253. #undef COPY_VAL
  254. }
  255. #define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT)
  256. #define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)
  257. static const struct FFVkFormatEntry {
  258. VkFormat vkf;
  259. enum AVPixelFormat pixfmt;
  260. VkImageAspectFlags aspect;
  261. int vk_planes;
  262. int nb_images;
  263. int nb_images_fallback;
  264. const VkFormat fallback[5];
  265. } vk_formats_list[] = {
  266. /* Gray formats */
  267. { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } },
  268. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
  269. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY12, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
  270. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY14, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
  271. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
  272. { VK_FORMAT_R32_UINT, AV_PIX_FMT_GRAY32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_UINT } },
  273. { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } },
  274. /* RGB formats */
  275. { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
  276. { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  277. { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } },
  278. { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } },
  279. { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } },
  280. { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  281. { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
  282. { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
  283. { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
  284. { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  285. { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
  286. { VK_FORMAT_A2B10G10R10_UNORM_PACK32, AV_PIX_FMT_X2BGR10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2B10G10R10_UNORM_PACK32 } },
  287. { VK_FORMAT_R32G32B32_SFLOAT, AV_PIX_FMT_RGBF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_SFLOAT } },
  288. { VK_FORMAT_R32G32B32A32_SFLOAT, AV_PIX_FMT_RGBAF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_SFLOAT } },
  289. { VK_FORMAT_R32G32B32_UINT, AV_PIX_FMT_RGB96, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_UINT } },
  290. { VK_FORMAT_R32G32B32A32_UINT, AV_PIX_FMT_RGBA128, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_UINT } },
  291. /* Planar RGB */
  292. { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRP, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  293. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP10, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  294. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP12, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  295. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP14, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  296. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP16, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  297. { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  298. /* Planar RGB + Alpha */
  299. { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  300. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  301. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  302. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP14, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  303. { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  304. { VK_FORMAT_R32_UINT, AV_PIX_FMT_GBRAP32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT } },
  305. { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  306. /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
  307. { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  308. { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  309. { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  310. { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  311. /* Two-plane 422 YUV at 8, 10 and 16 bits */
  312. { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  313. { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  314. { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  315. { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  316. /* Two-plane 444 YUV at 8, 10 and 16 bits */
  317. { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  318. { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  319. { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  320. { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  321. /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */
  322. { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  323. { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  324. { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  325. { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  326. { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  327. { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  328. { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  329. { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  330. { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  331. { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  332. { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  333. { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  334. /* Single plane 422 at 8, 10, 12 and 16 bits */
  335. { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  336. { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
  337. { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  338. { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  339. { VK_FORMAT_G16B16G16R16_422_UNORM, AV_PIX_FMT_Y216, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  340. /* Single plane 444 at 8, 10, 12 and 16 bits */
  341. { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_UYVA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
  342. { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_XV30, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  343. { VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  344. { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
  345. };
  346. static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list);
  347. const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
  348. {
  349. for (int i = 0; i < nb_vk_formats_list; i++)
  350. if (vk_formats_list[i].pixfmt == p)
  351. return vk_formats_list[i].fallback;
  352. return NULL;
  353. }
  354. static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p)
  355. {
  356. for (int i = 0; i < nb_vk_formats_list; i++)
  357. if (vk_formats_list[i].pixfmt == p)
  358. return &vk_formats_list[i];
  359. return NULL;
  360. }
  361. static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
  362. VkImageTiling tiling,
  363. VkFormat fmts[AV_NUM_DATA_POINTERS], /* Output format list */
  364. int *nb_images, /* Output number of images */
  365. VkImageAspectFlags *aspect, /* Output aspect */
  366. VkImageUsageFlags *supported_usage, /* Output supported usage */
  367. int disable_multiplane, int need_storage)
  368. {
  369. VulkanDevicePriv *priv = dev_ctx->hwctx;
  370. AVVulkanDeviceContext *hwctx = &priv->p;
  371. FFVulkanFunctions *vk = &priv->vkctx.vkfn;
  372. const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
  373. VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
  374. VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
  375. for (int i = 0; i < nb_vk_formats_list; i++) {
  376. if (vk_formats_list[i].pixfmt == p) {
  377. VkFormatProperties3 fprops = {
  378. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
  379. };
  380. VkFormatProperties2 prop = {
  381. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
  382. .pNext = &fprops,
  383. };
  384. VkFormatFeatureFlagBits2 feats_primary, feats_secondary;
  385. int basics_primary = 0, basics_secondary = 0;
  386. int storage_primary = 0, storage_secondary = 0;
  387. vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
  388. vk_formats_list[i].vkf,
  389. &prop);
  390. feats_primary = tiling == VK_IMAGE_TILING_LINEAR ?
  391. fprops.linearTilingFeatures : fprops.optimalTilingFeatures;
  392. basics_primary = (feats_primary & basic_flags) == basic_flags;
  393. storage_primary = !!(feats_primary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
  394. if (vk_formats_list[i].vkf != vk_formats_list[i].fallback[0]) {
  395. vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
  396. vk_formats_list[i].fallback[0],
  397. &prop);
  398. feats_secondary = tiling == VK_IMAGE_TILING_LINEAR ?
  399. fprops.linearTilingFeatures : fprops.optimalTilingFeatures;
  400. basics_secondary = (feats_secondary & basic_flags) == basic_flags;
  401. storage_secondary = !!(feats_secondary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
  402. } else {
  403. basics_secondary = basics_primary;
  404. storage_secondary = storage_primary;
  405. }
  406. if (basics_primary &&
  407. !(disable_multiplane && vk_formats_list[i].vk_planes > 1) &&
  408. (!need_storage || (need_storage && (storage_primary | storage_secondary)))) {
  409. if (fmts) {
  410. if (vk_formats_list[i].nb_images > 1) {
  411. for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++)
  412. fmts[j] = vk_formats_list[i].fallback[j];
  413. } else {
  414. fmts[0] = vk_formats_list[i].vkf;
  415. }
  416. }
  417. if (nb_images)
  418. *nb_images = 1;
  419. if (aspect)
  420. *aspect = vk_formats_list[i].aspect;
  421. if (supported_usage)
  422. *supported_usage = ff_vk_map_feats_to_usage(feats_primary) |
  423. ((need_storage && (storage_primary | storage_secondary)) ?
  424. VK_IMAGE_USAGE_STORAGE_BIT : 0);
  425. return 0;
  426. } else if (basics_secondary &&
  427. (!need_storage || (need_storage && storage_secondary))) {
  428. if (fmts) {
  429. for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++)
  430. fmts[j] = vk_formats_list[i].fallback[j];
  431. }
  432. if (nb_images)
  433. *nb_images = vk_formats_list[i].nb_images_fallback;
  434. if (aspect)
  435. *aspect = vk_formats_list[i].aspect;
  436. if (supported_usage)
  437. *supported_usage = ff_vk_map_feats_to_usage(feats_secondary);
  438. return 0;
  439. } else {
  440. return AVERROR(ENOTSUP);
  441. }
  442. }
  443. }
  444. return AVERROR(EINVAL);
  445. }
  446. #if CONFIG_VULKAN_STATIC
  447. VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance,
  448. const char *pName);
  449. #endif
  450. static int load_libvulkan(AVHWDeviceContext *ctx)
  451. {
  452. VulkanDevicePriv *p = ctx->hwctx;
  453. AVVulkanDeviceContext *hwctx = &p->p;
  454. #if CONFIG_VULKAN_STATIC
  455. hwctx->get_proc_addr = vkGetInstanceProcAddr;
  456. #else
  457. static const char *lib_names[] = {
  458. #if defined(_WIN32)
  459. "vulkan-1.dll",
  460. #elif defined(__APPLE__)
  461. "libvulkan.dylib",
  462. "libvulkan.1.dylib",
  463. "libMoltenVK.dylib",
  464. #else
  465. "libvulkan.so.1",
  466. "libvulkan.so",
  467. #endif
  468. };
  469. for (int i = 0; i < FF_ARRAY_ELEMS(lib_names); i++) {
  470. p->libvulkan = dlopen(lib_names[i], RTLD_NOW | RTLD_LOCAL);
  471. if (p->libvulkan)
  472. break;
  473. }
  474. if (!p->libvulkan) {
  475. av_log(ctx, AV_LOG_ERROR, "Unable to open the libvulkan library!\n");
  476. return AVERROR_UNKNOWN;
  477. }
  478. hwctx->get_proc_addr = (PFN_vkGetInstanceProcAddr)dlsym(p->libvulkan, "vkGetInstanceProcAddr");
  479. #endif /* CONFIG_VULKAN_STATIC */
  480. return 0;
  481. }
  482. typedef struct VulkanOptExtension {
  483. const char *name;
  484. FFVulkanExtensions flag;
  485. } VulkanOptExtension;
  486. static const VulkanOptExtension optional_instance_exts[] = {
  487. { VK_EXT_LAYER_SETTINGS_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
  488. #ifdef __APPLE__
  489. { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
  490. #endif
  491. };
  492. static const VulkanOptExtension optional_device_exts[] = {
  493. /* Misc or required by other extensions */
  494. { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_PORTABILITY_SUBSET },
  495. { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
  496. { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER },
  497. { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
  498. { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },
  499. { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
  500. { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW },
  501. { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT },
  502. #ifdef VK_KHR_shader_expect_assume
  503. { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME },
  504. #endif
  505. { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 },
  506. #ifdef VK_KHR_video_maintenance2
  507. { VK_KHR_VIDEO_MAINTENANCE_2_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_2 },
  508. #endif
  509. /* Imports/exports */
  510. { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
  511. { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_DMABUF_MEMORY },
  512. { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, FF_VK_EXT_DRM_MODIFIER_FLAGS },
  513. { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM },
  514. { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY },
  515. #ifdef _WIN32
  516. { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
  517. { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
  518. #endif
  519. /* Video encoding/decoding */
  520. { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE },
  521. { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE },
  522. { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE },
  523. { VK_KHR_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 },
  524. { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
  525. { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 },
  526. { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
  527. { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 },
  528. };
  529. static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
  530. VkDebugUtilsMessageTypeFlagsEXT messageType,
  531. const VkDebugUtilsMessengerCallbackDataEXT *data,
  532. void *priv)
  533. {
  534. int l;
  535. AVHWDeviceContext *ctx = priv;
  536. /* Ignore false positives */
  537. switch (data->messageIdNumber) {
  538. case 0x086974c1: /* BestPractices-vkCreateCommandPool-command-buffer-reset */
  539. case 0xfd92477a: /* BestPractices-vkAllocateMemory-small-allocation */
  540. case 0x618ab1e7: /* VUID-VkImageViewCreateInfo-usage-02275 */
  541. case 0x30f4ac70: /* VUID-VkImageCreateInfo-pNext-06811 */
  542. return VK_FALSE;
  543. default:
  544. break;
  545. }
  546. switch (severity) {
  547. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
  548. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
  549. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
  550. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
  551. default: l = AV_LOG_DEBUG; break;
  552. }
  553. av_log(ctx, l, "%s\n", data->pMessage);
  554. for (int i = 0; i < data->cmdBufLabelCount; i++)
  555. av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
  556. return VK_FALSE;
  557. }
  558. #define ADD_VAL_TO_LIST(list, count, val) \
  559. do { \
  560. list = av_realloc_array(list, sizeof(*list), ++count); \
  561. if (!list) { \
  562. err = AVERROR(ENOMEM); \
  563. goto fail; \
  564. } \
  565. list[count - 1] = av_strdup(val); \
  566. if (!list[count - 1]) { \
  567. err = AVERROR(ENOMEM); \
  568. goto fail; \
  569. } \
  570. } while(0)
  571. #define RELEASE_PROPS(props, count) \
  572. if (props) { \
  573. for (int i = 0; i < count; i++) \
  574. av_free((void *)((props)[i])); \
  575. av_free((void *)props); \
  576. }
  577. enum FFVulkanDebugMode {
  578. FF_VULKAN_DEBUG_NONE = 0,
  579. /* Standard GPU-assisted validation */
  580. FF_VULKAN_DEBUG_VALIDATE = 1,
  581. /* Passes printfs in shaders to the debug callback */
  582. FF_VULKAN_DEBUG_PRINTF = 2,
  583. /* Enables extra printouts */
  584. FF_VULKAN_DEBUG_PRACTICES = 3,
  585. /* Disables validation but keeps shader debug info and optimizations */
  586. FF_VULKAN_DEBUG_PROFILE = 4,
  587. FF_VULKAN_DEBUG_NB,
  588. };
  589. static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
  590. const char * const **dst, uint32_t *num,
  591. enum FFVulkanDebugMode debug_mode)
  592. {
  593. const char *tstr;
  594. const char **extension_names = NULL;
  595. VulkanDevicePriv *p = ctx->hwctx;
  596. AVVulkanDeviceContext *hwctx = &p->p;
  597. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  598. int err = 0, found, extensions_found = 0;
  599. const char *mod;
  600. int optional_exts_num;
  601. uint32_t sup_ext_count;
  602. char *user_exts_str = NULL;
  603. AVDictionaryEntry *user_exts;
  604. VkExtensionProperties *sup_ext;
  605. const VulkanOptExtension *optional_exts;
  606. if (!dev) {
  607. mod = "instance";
  608. optional_exts = optional_instance_exts;
  609. optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
  610. user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
  611. if (user_exts) {
  612. user_exts_str = av_strdup(user_exts->value);
  613. if (!user_exts_str) {
  614. err = AVERROR(ENOMEM);
  615. goto fail;
  616. }
  617. }
  618. vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
  619. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  620. if (!sup_ext)
  621. return AVERROR(ENOMEM);
  622. vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
  623. } else {
  624. mod = "device";
  625. optional_exts = optional_device_exts;
  626. optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
  627. user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
  628. if (user_exts) {
  629. user_exts_str = av_strdup(user_exts->value);
  630. if (!user_exts_str) {
  631. err = AVERROR(ENOMEM);
  632. goto fail;
  633. }
  634. }
  635. vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  636. &sup_ext_count, NULL);
  637. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  638. if (!sup_ext)
  639. return AVERROR(ENOMEM);
  640. vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  641. &sup_ext_count, sup_ext);
  642. }
  643. for (int i = 0; i < optional_exts_num; i++) {
  644. tstr = optional_exts[i].name;
  645. found = 0;
  646. /* Intel has had a bad descriptor buffer implementation for a while */
  647. if (p->dprops.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA &&
  648. !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME))
  649. continue;
  650. if (dev &&
  651. ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
  652. (debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
  653. (debug_mode == FF_VULKAN_DEBUG_PRACTICES)) &&
  654. !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME)) {
  655. continue;
  656. }
  657. for (int j = 0; j < sup_ext_count; j++) {
  658. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  659. found = 1;
  660. break;
  661. }
  662. }
  663. if (!found)
  664. continue;
  665. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
  666. p->vkctx.extensions |= optional_exts[i].flag;
  667. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  668. }
  669. if (!dev &&
  670. ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
  671. (debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
  672. (debug_mode == FF_VULKAN_DEBUG_PRACTICES))) {
  673. tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
  674. found = 0;
  675. for (int j = 0; j < sup_ext_count; j++) {
  676. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  677. found = 1;
  678. break;
  679. }
  680. }
  681. if (found) {
  682. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
  683. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  684. } else {
  685. av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
  686. tstr);
  687. err = AVERROR(EINVAL);
  688. goto fail;
  689. }
  690. }
  691. #ifdef VK_KHR_shader_relaxed_extended_instruction
  692. if (((debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
  693. (debug_mode == FF_VULKAN_DEBUG_PROFILE)) && dev) {
  694. tstr = VK_KHR_SHADER_RELAXED_EXTENDED_INSTRUCTION_EXTENSION_NAME;
  695. found = 0;
  696. for (int j = 0; j < sup_ext_count; j++) {
  697. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  698. found = 1;
  699. break;
  700. }
  701. }
  702. if (found) {
  703. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
  704. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  705. } else {
  706. av_log(ctx, AV_LOG_ERROR, "Debug_printf/profile enabled, but extension \"%s\" not found!\n",
  707. tstr);
  708. err = AVERROR(EINVAL);
  709. goto fail;
  710. }
  711. }
  712. #endif
  713. if (user_exts_str) {
  714. char *save, *token = av_strtok(user_exts_str, "+", &save);
  715. while (token) {
  716. found = 0;
  717. for (int j = 0; j < sup_ext_count; j++) {
  718. if (!strcmp(token, sup_ext[j].extensionName)) {
  719. found = 1;
  720. break;
  721. }
  722. }
  723. if (found) {
  724. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token);
  725. ADD_VAL_TO_LIST(extension_names, extensions_found, token);
  726. } else {
  727. av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
  728. mod, token);
  729. }
  730. token = av_strtok(NULL, "+", &save);
  731. }
  732. }
  733. *dst = extension_names;
  734. *num = extensions_found;
  735. av_free(user_exts_str);
  736. av_free(sup_ext);
  737. return 0;
  738. fail:
  739. RELEASE_PROPS(extension_names, extensions_found);
  740. av_free(user_exts_str);
  741. av_free(sup_ext);
  742. return err;
  743. }
  744. static int check_layers(AVHWDeviceContext *ctx, AVDictionary *opts,
  745. const char * const **dst, uint32_t *num,
  746. enum FFVulkanDebugMode *debug_mode)
  747. {
  748. int err = 0;
  749. VulkanDevicePriv *priv = ctx->hwctx;
  750. FFVulkanFunctions *vk = &priv->vkctx.vkfn;
  751. static const char layer_standard_validation[] = { "VK_LAYER_KHRONOS_validation" };
  752. int layer_standard_validation_found = 0;
  753. uint32_t sup_layer_count;
  754. VkLayerProperties *sup_layers;
  755. AVDictionaryEntry *user_layers = av_dict_get(opts, "layers", NULL, 0);
  756. char *user_layers_str = NULL;
  757. char *save, *token;
  758. const char **enabled_layers = NULL;
  759. uint32_t enabled_layers_count = 0;
  760. AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
  761. enum FFVulkanDebugMode mode;
  762. *debug_mode = mode = FF_VULKAN_DEBUG_NONE;
  763. /* Get a list of all layers */
  764. vk->EnumerateInstanceLayerProperties(&sup_layer_count, NULL);
  765. sup_layers = av_malloc_array(sup_layer_count, sizeof(VkLayerProperties));
  766. if (!sup_layers)
  767. return AVERROR(ENOMEM);
  768. vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers);
  769. av_log(ctx, AV_LOG_VERBOSE, "Supported layers:\n");
  770. for (int i = 0; i < sup_layer_count; i++)
  771. av_log(ctx, AV_LOG_VERBOSE, "\t%s\n", sup_layers[i].layerName);
  772. /* If no user layers or debug layers are given, return */
  773. if (!debug_opt && !user_layers)
  774. goto end;
  775. /* Check for any properly supported validation layer */
  776. if (debug_opt) {
  777. if (!strcmp(debug_opt->value, "profile")) {
  778. mode = FF_VULKAN_DEBUG_PROFILE;
  779. } else if (!strcmp(debug_opt->value, "printf")) {
  780. mode = FF_VULKAN_DEBUG_PRINTF;
  781. } else if (!strcmp(debug_opt->value, "validate")) {
  782. mode = FF_VULKAN_DEBUG_VALIDATE;
  783. } else if (!strcmp(debug_opt->value, "practices")) {
  784. mode = FF_VULKAN_DEBUG_PRACTICES;
  785. } else {
  786. char *end_ptr = NULL;
  787. int idx = strtol(debug_opt->value, &end_ptr, 10);
  788. if (end_ptr == debug_opt->value || end_ptr[0] != '\0' ||
  789. idx < 0 || idx >= FF_VULKAN_DEBUG_NB) {
  790. av_log(ctx, AV_LOG_ERROR, "Invalid debugging mode \"%s\"\n",
  791. debug_opt->value);
  792. err = AVERROR(EINVAL);
  793. goto end;
  794. }
  795. mode = idx;
  796. }
  797. }
  798. /* If mode is VALIDATE or PRINTF, try to find the standard validation layer extension */
  799. if ((mode == FF_VULKAN_DEBUG_VALIDATE) ||
  800. (mode == FF_VULKAN_DEBUG_PRINTF) ||
  801. (mode == FF_VULKAN_DEBUG_PRACTICES)) {
  802. for (int i = 0; i < sup_layer_count; i++) {
  803. if (!strcmp(layer_standard_validation, sup_layers[i].layerName)) {
  804. av_log(ctx, AV_LOG_VERBOSE, "Standard validation layer %s is enabled\n",
  805. layer_standard_validation);
  806. ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, layer_standard_validation);
  807. *debug_mode = mode;
  808. layer_standard_validation_found = 1;
  809. break;
  810. }
  811. }
  812. if (!layer_standard_validation_found) {
  813. av_log(ctx, AV_LOG_ERROR,
  814. "Validation Layer \"%s\" not supported\n", layer_standard_validation);
  815. err = AVERROR(ENOTSUP);
  816. goto end;
  817. }
  818. } else if (mode == FF_VULKAN_DEBUG_PROFILE) {
  819. *debug_mode = mode;
  820. }
  821. /* Process any custom layers enabled */
  822. if (user_layers) {
  823. int found;
  824. user_layers_str = av_strdup(user_layers->value);
  825. if (!user_layers_str) {
  826. err = AVERROR(ENOMEM);
  827. goto fail;
  828. }
  829. token = av_strtok(user_layers_str, "+", &save);
  830. while (token) {
  831. found = 0;
  832. /* If debug=1/2 was specified as an option, skip this layer */
  833. if (!strcmp(layer_standard_validation, token) && layer_standard_validation_found) {
  834. token = av_strtok(NULL, "+", &save);
  835. break;
  836. }
  837. /* Try to find the layer in the list of supported layers */
  838. for (int j = 0; j < sup_layer_count; j++) {
  839. if (!strcmp(token, sup_layers[j].layerName)) {
  840. found = 1;
  841. break;
  842. }
  843. }
  844. if (found) {
  845. av_log(ctx, AV_LOG_VERBOSE, "Using layer: %s\n", token);
  846. ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, token);
  847. /* If debug was not set as an option, force it */
  848. if (!strcmp(layer_standard_validation, token))
  849. *debug_mode = FF_VULKAN_DEBUG_VALIDATE;
  850. } else {
  851. av_log(ctx, AV_LOG_ERROR,
  852. "Layer \"%s\" not supported\n", token);
  853. err = AVERROR(EINVAL);
  854. goto end;
  855. }
  856. token = av_strtok(NULL, "+", &save);
  857. }
  858. }
  859. fail:
  860. end:
  861. av_free(sup_layers);
  862. av_free(user_layers_str);
  863. if (err < 0) {
  864. RELEASE_PROPS(enabled_layers, enabled_layers_count);
  865. } else {
  866. *dst = enabled_layers;
  867. *num = enabled_layers_count;
  868. }
  869. return err;
  870. }
  871. /* Creates a VkInstance */
  872. static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts,
  873. enum FFVulkanDebugMode *debug_mode)
  874. {
  875. int err = 0;
  876. VkResult ret;
  877. VulkanDevicePriv *p = ctx->hwctx;
  878. AVVulkanDeviceContext *hwctx = &p->p;
  879. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  880. VkApplicationInfo application_info = {
  881. .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
  882. .pApplicationName = "ffmpeg",
  883. .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  884. LIBAVUTIL_VERSION_MINOR,
  885. LIBAVUTIL_VERSION_MICRO),
  886. .pEngineName = "libavutil",
  887. .apiVersion = VK_API_VERSION_1_3,
  888. .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  889. LIBAVUTIL_VERSION_MINOR,
  890. LIBAVUTIL_VERSION_MICRO),
  891. };
  892. VkValidationFeaturesEXT validation_features = {
  893. .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
  894. };
  895. VkInstanceCreateInfo inst_props = {
  896. .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
  897. .pApplicationInfo = &application_info,
  898. };
  899. if (!hwctx->get_proc_addr) {
  900. err = load_libvulkan(ctx);
  901. if (err < 0)
  902. return err;
  903. }
  904. err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 0, 0);
  905. if (err < 0) {
  906. av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n");
  907. return err;
  908. }
  909. err = check_layers(ctx, opts, &inst_props.ppEnabledLayerNames,
  910. &inst_props.enabledLayerCount, debug_mode);
  911. if (err)
  912. goto fail;
  913. /* Check for present/missing extensions */
  914. err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
  915. &inst_props.enabledExtensionCount, *debug_mode);
  916. hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
  917. hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
  918. if (err < 0)
  919. goto fail;
  920. /* Enable debug features if needed */
  921. if (*debug_mode == FF_VULKAN_DEBUG_VALIDATE) {
  922. static const VkValidationFeatureEnableEXT feat_list_validate[] = {
  923. VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
  924. VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
  925. VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
  926. };
  927. validation_features.pEnabledValidationFeatures = feat_list_validate;
  928. validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_validate);
  929. inst_props.pNext = &validation_features;
  930. } else if (*debug_mode == FF_VULKAN_DEBUG_PRINTF) {
  931. static const VkValidationFeatureEnableEXT feat_list_debug[] = {
  932. VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
  933. VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
  934. VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT,
  935. };
  936. validation_features.pEnabledValidationFeatures = feat_list_debug;
  937. validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_debug);
  938. inst_props.pNext = &validation_features;
  939. } else if (*debug_mode == FF_VULKAN_DEBUG_PRACTICES) {
  940. static const VkValidationFeatureEnableEXT feat_list_practices[] = {
  941. VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
  942. VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT,
  943. };
  944. validation_features.pEnabledValidationFeatures = feat_list_practices;
  945. validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_practices);
  946. inst_props.pNext = &validation_features;
  947. }
  948. #ifdef __APPLE__
  949. for (int i = 0; i < inst_props.enabledExtensionCount; i++) {
  950. if (!strcmp(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME,
  951. inst_props.ppEnabledExtensionNames[i])) {
  952. inst_props.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
  953. break;
  954. }
  955. }
  956. #endif
  957. /* Try to create the instance */
  958. ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
  959. /* Check for errors */
  960. if (ret != VK_SUCCESS) {
  961. av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
  962. ff_vk_ret2str(ret));
  963. err = AVERROR_EXTERNAL;
  964. goto fail;
  965. }
  966. err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 0);
  967. if (err < 0) {
  968. av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n");
  969. goto fail;
  970. }
  971. /* Setup debugging callback if needed */
  972. if ((*debug_mode == FF_VULKAN_DEBUG_VALIDATE) ||
  973. (*debug_mode == FF_VULKAN_DEBUG_PRINTF) ||
  974. (*debug_mode == FF_VULKAN_DEBUG_PRACTICES)) {
  975. VkDebugUtilsMessengerCreateInfoEXT dbg = {
  976. .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
  977. .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
  978. VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
  979. VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
  980. VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
  981. .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
  982. VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
  983. VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
  984. .pfnUserCallback = vk_dbg_callback,
  985. .pUserData = ctx,
  986. };
  987. vk->CreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
  988. hwctx->alloc, &p->debug_ctx);
  989. }
  990. err = 0;
  991. fail:
  992. RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount);
  993. return err;
  994. }
  995. typedef struct VulkanDeviceSelection {
  996. uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
  997. int has_uuid;
  998. uint32_t drm_major; /* Will use this second unless !has_drm */
  999. uint32_t drm_minor; /* Will use this second unless !has_drm */
  1000. uint32_t has_drm; /* has drm node info */
  1001. const char *name; /* Will use this third unless NULL */
  1002. uint32_t pci_device; /* Will use this fourth unless 0x0 */
  1003. uint32_t vendor_id; /* Last resort to find something deterministic */
  1004. int index; /* Finally fall back to index */
  1005. } VulkanDeviceSelection;
  1006. static const char *vk_dev_type(enum VkPhysicalDeviceType type)
  1007. {
  1008. switch (type) {
  1009. case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
  1010. case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
  1011. case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
  1012. case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
  1013. default: return "unknown";
  1014. }
  1015. }
  1016. /* Finds a device */
  1017. static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
  1018. {
  1019. int err = 0, choice = -1;
  1020. uint32_t num;
  1021. VkResult ret;
  1022. VulkanDevicePriv *p = ctx->hwctx;
  1023. AVVulkanDeviceContext *hwctx = &p->p;
  1024. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1025. VkPhysicalDevice *devices = NULL;
  1026. VkPhysicalDeviceIDProperties *idp = NULL;
  1027. VkPhysicalDeviceProperties2 *prop = NULL;
  1028. VkPhysicalDeviceDriverProperties *driver_prop = NULL;
  1029. VkPhysicalDeviceDrmPropertiesEXT *drm_prop = NULL;
  1030. ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL);
  1031. if (ret != VK_SUCCESS || !num) {
  1032. av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", ff_vk_ret2str(ret));
  1033. return AVERROR(ENODEV);
  1034. }
  1035. devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
  1036. if (!devices)
  1037. return AVERROR(ENOMEM);
  1038. ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices);
  1039. if (ret != VK_SUCCESS) {
  1040. av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
  1041. ff_vk_ret2str(ret));
  1042. err = AVERROR(ENODEV);
  1043. goto end;
  1044. }
  1045. prop = av_calloc(num, sizeof(*prop));
  1046. if (!prop) {
  1047. err = AVERROR(ENOMEM);
  1048. goto end;
  1049. }
  1050. idp = av_calloc(num, sizeof(*idp));
  1051. if (!idp) {
  1052. err = AVERROR(ENOMEM);
  1053. goto end;
  1054. }
  1055. driver_prop = av_calloc(num, sizeof(*driver_prop));
  1056. if (!driver_prop) {
  1057. err = AVERROR(ENOMEM);
  1058. goto end;
  1059. }
  1060. if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) {
  1061. drm_prop = av_calloc(num, sizeof(*drm_prop));
  1062. if (!drm_prop) {
  1063. err = AVERROR(ENOMEM);
  1064. goto end;
  1065. }
  1066. }
  1067. av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
  1068. for (int i = 0; i < num; i++) {
  1069. if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) {
  1070. drm_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT;
  1071. driver_prop[i].pNext = &drm_prop[i];
  1072. }
  1073. driver_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
  1074. idp[i].pNext = &driver_prop[i];
  1075. idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
  1076. prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  1077. prop[i].pNext = &idp[i];
  1078. vk->GetPhysicalDeviceProperties2(devices[i], &prop[i]);
  1079. av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
  1080. prop[i].properties.deviceName,
  1081. vk_dev_type(prop[i].properties.deviceType),
  1082. prop[i].properties.deviceID);
  1083. }
  1084. if (select->has_uuid) {
  1085. for (int i = 0; i < num; i++) {
  1086. if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
  1087. choice = i;
  1088. goto end;
  1089. }
  1090. }
  1091. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
  1092. err = AVERROR(ENODEV);
  1093. goto end;
  1094. } else if ((p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) && select->has_drm) {
  1095. for (int i = 0; i < num; i++) {
  1096. if ((select->drm_major == drm_prop[i].primaryMajor &&
  1097. select->drm_minor == drm_prop[i].primaryMinor) ||
  1098. (select->drm_major == drm_prop[i].renderMajor &&
  1099. select->drm_minor == drm_prop[i].renderMinor)) {
  1100. choice = i;
  1101. goto end;
  1102. }
  1103. }
  1104. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n",
  1105. select->drm_major, select->drm_minor);
  1106. err = AVERROR(ENODEV);
  1107. goto end;
  1108. } else if (select->name) {
  1109. av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
  1110. for (int i = 0; i < num; i++) {
  1111. if (strstr(prop[i].properties.deviceName, select->name)) {
  1112. choice = i;
  1113. goto end;
  1114. }
  1115. }
  1116. av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
  1117. select->name);
  1118. err = AVERROR(ENODEV);
  1119. goto end;
  1120. } else if (select->pci_device) {
  1121. av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
  1122. for (int i = 0; i < num; i++) {
  1123. if (select->pci_device == prop[i].properties.deviceID) {
  1124. choice = i;
  1125. goto end;
  1126. }
  1127. }
  1128. av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
  1129. select->pci_device);
  1130. err = AVERROR(EINVAL);
  1131. goto end;
  1132. } else if (select->vendor_id) {
  1133. av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
  1134. for (int i = 0; i < num; i++) {
  1135. if (select->vendor_id == prop[i].properties.vendorID) {
  1136. choice = i;
  1137. goto end;
  1138. }
  1139. }
  1140. av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
  1141. select->vendor_id);
  1142. err = AVERROR(ENODEV);
  1143. goto end;
  1144. } else {
  1145. if (select->index < num) {
  1146. choice = select->index;
  1147. goto end;
  1148. }
  1149. av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
  1150. select->index);
  1151. err = AVERROR(ENODEV);
  1152. goto end;
  1153. }
  1154. end:
  1155. if (choice > -1) {
  1156. av_log(ctx, AV_LOG_VERBOSE, "Device %d selected: %s (%s) (0x%x)\n",
  1157. choice, prop[choice].properties.deviceName,
  1158. vk_dev_type(prop[choice].properties.deviceType),
  1159. prop[choice].properties.deviceID);
  1160. hwctx->phys_dev = devices[choice];
  1161. p->props = prop[choice];
  1162. p->props.pNext = NULL;
  1163. p->dprops = driver_prop[choice];
  1164. p->dprops.pNext = NULL;
  1165. }
  1166. av_free(devices);
  1167. av_free(prop);
  1168. av_free(idp);
  1169. av_free(drm_prop);
  1170. av_free(driver_prop);
  1171. return err;
  1172. }
  1173. /* Picks the least used qf with the fewest unneeded flags, or -1 if none found */
  1174. static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf,
  1175. VkQueueFlagBits flags)
  1176. {
  1177. int index = -1;
  1178. uint32_t min_score = UINT32_MAX;
  1179. for (int i = 0; i < num_qf; i++) {
  1180. VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
  1181. /* Per the spec, reporting transfer caps is optional for these 2 types */
  1182. if ((flags & VK_QUEUE_TRANSFER_BIT) &&
  1183. (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)))
  1184. qflags |= VK_QUEUE_TRANSFER_BIT;
  1185. if (qflags & flags) {
  1186. uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits;
  1187. if (score < min_score) {
  1188. index = i;
  1189. min_score = score;
  1190. }
  1191. }
  1192. }
  1193. if (index > -1)
  1194. qf[index].queueFamilyProperties.timestampValidBits++;
  1195. return index;
  1196. }
  1197. static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf,
  1198. VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf,
  1199. VkVideoCodecOperationFlagBitsKHR flags)
  1200. {
  1201. int index = -1;
  1202. uint32_t min_score = UINT32_MAX;
  1203. for (int i = 0; i < num_qf; i++) {
  1204. const VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
  1205. const VkQueueFlagBits vflags = qf_vid[i].videoCodecOperations;
  1206. if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR)))
  1207. continue;
  1208. if (vflags & flags) {
  1209. uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits;
  1210. if (score < min_score) {
  1211. index = i;
  1212. min_score = score;
  1213. }
  1214. }
  1215. }
  1216. if (index > -1)
  1217. qf[index].queueFamilyProperties.timestampValidBits++;
  1218. return index;
  1219. }
  1220. static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
  1221. {
  1222. uint32_t num;
  1223. VulkanDevicePriv *p = ctx->hwctx;
  1224. AVVulkanDeviceContext *hwctx = &p->p;
  1225. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1226. VkQueueFamilyProperties2 *qf = NULL;
  1227. VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL;
  1228. /* First get the number of queue families */
  1229. vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
  1230. if (!num) {
  1231. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  1232. return AVERROR_EXTERNAL;
  1233. }
  1234. /* Then allocate memory */
  1235. qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2));
  1236. if (!qf)
  1237. return AVERROR(ENOMEM);
  1238. qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR));
  1239. if (!qf_vid)
  1240. return AVERROR(ENOMEM);
  1241. for (uint32_t i = 0; i < num; i++) {
  1242. qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
  1243. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
  1244. };
  1245. qf[i] = (VkQueueFamilyProperties2) {
  1246. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
  1247. .pNext = p->vkctx.extensions & FF_VK_EXT_VIDEO_QUEUE ? &qf_vid[i] : NULL,
  1248. };
  1249. }
  1250. /* Finally retrieve the queue families */
  1251. vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf);
  1252. av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n");
  1253. for (int i = 0; i < num; i++) {
  1254. av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s%s (queues: %i)\n", i,
  1255. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
  1256. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
  1257. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
  1258. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
  1259. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
  1260. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
  1261. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_OPTICAL_FLOW_BIT_NV) ? " optical_flow" : "",
  1262. ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
  1263. qf[i].queueFamilyProperties.queueCount);
  1264. /* We use this field to keep a score of how many times we've used that
  1265. * queue family in order to make better choices. */
  1266. qf[i].queueFamilyProperties.timestampValidBits = 0;
  1267. }
  1268. hwctx->nb_qf = 0;
  1269. /* Pick each queue family to use */
  1270. #define PICK_QF(type, vid_op) \
  1271. do { \
  1272. uint32_t i; \
  1273. uint32_t idx; \
  1274. \
  1275. if (vid_op) \
  1276. idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \
  1277. else \
  1278. idx = pick_queue_family(qf, num, type); \
  1279. \
  1280. if (idx == -1) \
  1281. continue; \
  1282. \
  1283. for (i = 0; i < hwctx->nb_qf; i++) { \
  1284. if (hwctx->qf[i].idx == idx) { \
  1285. hwctx->qf[i].flags |= type; \
  1286. hwctx->qf[i].video_caps |= vid_op; \
  1287. break; \
  1288. } \
  1289. } \
  1290. if (i == hwctx->nb_qf) { \
  1291. hwctx->qf[i].idx = idx; \
  1292. hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \
  1293. hwctx->qf[i].flags = type; \
  1294. hwctx->qf[i].video_caps = vid_op; \
  1295. hwctx->nb_qf++; \
  1296. } \
  1297. } while (0)
  1298. PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1299. PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1300. PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1301. PICK_QF(VK_QUEUE_OPTICAL_FLOW_BIT_NV, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
  1302. PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR);
  1303. PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR);
  1304. PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR);
  1305. PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
  1306. PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
  1307. av_free(qf);
  1308. av_free(qf_vid);
  1309. #undef PICK_QF
  1310. cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf,
  1311. sizeof(VkDeviceQueueCreateInfo));
  1312. if (!cd->pQueueCreateInfos)
  1313. return AVERROR(ENOMEM);
  1314. for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
  1315. int dup = 0;
  1316. float *weights = NULL;
  1317. VkDeviceQueueCreateInfo *pc;
  1318. for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) {
  1319. if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) {
  1320. dup = 1;
  1321. break;
  1322. }
  1323. }
  1324. if (dup)
  1325. continue;
  1326. weights = av_malloc_array(hwctx->qf[i].num, sizeof(float));
  1327. if (!weights) {
  1328. for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++)
  1329. av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities);
  1330. av_free((void *)cd->pQueueCreateInfos);
  1331. return AVERROR(ENOMEM);
  1332. }
  1333. for (uint32_t j = 0; j < hwctx->qf[i].num; j++)
  1334. weights[j] = 1.0;
  1335. pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
  1336. pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) {
  1337. .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
  1338. .queueFamilyIndex = hwctx->qf[i].idx,
  1339. .queueCount = hwctx->qf[i].num,
  1340. .pQueuePriorities = weights,
  1341. };
  1342. }
  1343. #if FF_API_VULKAN_FIXED_QUEUES
  1344. FF_DISABLE_DEPRECATION_WARNINGS
  1345. /* Setup deprecated fields */
  1346. hwctx->queue_family_index = -1;
  1347. hwctx->queue_family_comp_index = -1;
  1348. hwctx->queue_family_tx_index = -1;
  1349. hwctx->queue_family_encode_index = -1;
  1350. hwctx->queue_family_decode_index = -1;
  1351. #define SET_OLD_QF(field, nb_field, type) \
  1352. do { \
  1353. if (field < 0 && hwctx->qf[i].flags & type) { \
  1354. field = hwctx->qf[i].idx; \
  1355. nb_field = hwctx->qf[i].num; \
  1356. } \
  1357. } while (0)
  1358. for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
  1359. SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
  1360. SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
  1361. SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
  1362. SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
  1363. SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
  1364. }
  1365. #undef SET_OLD_QF
  1366. FF_ENABLE_DEPRECATION_WARNINGS
  1367. #endif
  1368. return 0;
  1369. }
  1370. /* Only resources created by vulkan_device_create should be released here,
  1371. * resources created by vulkan_device_init should be released by
  1372. * vulkan_device_uninit, to make sure we don't free user provided resources,
  1373. * and there is no leak.
  1374. */
  1375. static void vulkan_device_free(AVHWDeviceContext *ctx)
  1376. {
  1377. VulkanDevicePriv *p = ctx->hwctx;
  1378. AVVulkanDeviceContext *hwctx = &p->p;
  1379. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1380. if (hwctx->act_dev)
  1381. vk->DestroyDevice(hwctx->act_dev, hwctx->alloc);
  1382. if (p->debug_ctx)
  1383. vk->DestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
  1384. hwctx->alloc);
  1385. if (hwctx->inst)
  1386. vk->DestroyInstance(hwctx->inst, hwctx->alloc);
  1387. if (p->libvulkan)
  1388. dlclose(p->libvulkan);
  1389. RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
  1390. RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
  1391. }
  1392. static void vulkan_device_uninit(AVHWDeviceContext *ctx)
  1393. {
  1394. VulkanDevicePriv *p = ctx->hwctx;
  1395. for (uint32_t i = 0; i < p->nb_tot_qfs; i++) {
  1396. pthread_mutex_destroy(p->qf_mutex[i]);
  1397. av_freep(&p->qf_mutex[i]);
  1398. }
  1399. av_freep(&p->qf_mutex);
  1400. ff_vk_uninit(&p->vkctx);
  1401. }
  1402. static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
  1403. VulkanDeviceSelection *dev_select,
  1404. int disable_multiplane,
  1405. AVDictionary *opts, int flags)
  1406. {
  1407. int err = 0;
  1408. VkResult ret;
  1409. AVDictionaryEntry *opt_d;
  1410. VulkanDevicePriv *p = ctx->hwctx;
  1411. AVVulkanDeviceContext *hwctx = &p->p;
  1412. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1413. enum FFVulkanDebugMode debug_mode = FF_VULKAN_DEBUG_NONE;
  1414. VulkanDeviceFeatures supported_feats = { 0 };
  1415. VkDeviceCreateInfo dev_info = {
  1416. .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
  1417. };
  1418. /* Create an instance if not given one */
  1419. if ((err = create_instance(ctx, opts, &debug_mode)))
  1420. goto end;
  1421. /* Find a physical device (if not given one) */
  1422. if ((err = find_device(ctx, dev_select)))
  1423. goto end;
  1424. /* Find and enable extensions for the physical device */
  1425. if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
  1426. &dev_info.enabledExtensionCount, debug_mode))) {
  1427. for (int i = 0; i < dev_info.queueCreateInfoCount; i++)
  1428. av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities);
  1429. av_free((void *)dev_info.pQueueCreateInfos);
  1430. goto end;
  1431. }
  1432. /* Get all supported features for the physical device */
  1433. device_features_init(ctx, &supported_feats);
  1434. vk->GetPhysicalDeviceFeatures2(hwctx->phys_dev, &supported_feats.device);
  1435. /* Copy all needed features from those supported and activate them */
  1436. device_features_init(ctx, &p->feats);
  1437. device_features_copy_needed(&p->feats, &supported_feats);
  1438. dev_info.pNext = p->feats.device.pNext;
  1439. dev_info.pEnabledFeatures = &p->feats.device.features;
  1440. /* Setup enabled queue families */
  1441. if ((err = setup_queue_families(ctx, &dev_info)))
  1442. goto end;
  1443. /* Finally create the device */
  1444. ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
  1445. &hwctx->act_dev);
  1446. for (int i = 0; i < dev_info.queueCreateInfoCount; i++)
  1447. av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities);
  1448. av_free((void *)dev_info.pQueueCreateInfos);
  1449. if (ret != VK_SUCCESS) {
  1450. av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
  1451. ff_vk_ret2str(ret));
  1452. for (int i = 0; i < dev_info.enabledExtensionCount; i++)
  1453. av_free((void *)dev_info.ppEnabledExtensionNames[i]);
  1454. av_free((void *)dev_info.ppEnabledExtensionNames);
  1455. err = AVERROR_EXTERNAL;
  1456. goto end;
  1457. }
  1458. /* Tiled images setting, use them by default */
  1459. opt_d = av_dict_get(opts, "linear_images", NULL, 0);
  1460. if (opt_d)
  1461. p->use_linear_images = strtol(opt_d->value, NULL, 10);
  1462. /* The disable_multiplane argument takes precedent over the option */
  1463. p->disable_multiplane = disable_multiplane;
  1464. if (!p->disable_multiplane) {
  1465. opt_d = av_dict_get(opts, "disable_multiplane", NULL, 0);
  1466. if (opt_d)
  1467. p->disable_multiplane = strtol(opt_d->value, NULL, 10);
  1468. }
  1469. /* Set the public device feature struct and its pNext chain */
  1470. hwctx->device_features = p->feats.device;
  1471. /* Set the list of all active extensions */
  1472. hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
  1473. hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
  1474. /* The extension lists need to be freed */
  1475. ctx->free = vulkan_device_free;
  1476. end:
  1477. return err;
  1478. }
  1479. static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
  1480. {
  1481. VulkanDevicePriv *p = ctx->hwctx;
  1482. pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
  1483. }
  1484. static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
  1485. {
  1486. VulkanDevicePriv *p = ctx->hwctx;
  1487. pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
  1488. }
  1489. static int vulkan_device_init(AVHWDeviceContext *ctx)
  1490. {
  1491. int err = 0;
  1492. uint32_t qf_num;
  1493. VulkanDevicePriv *p = ctx->hwctx;
  1494. AVVulkanDeviceContext *hwctx = &p->p;
  1495. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1496. VkQueueFamilyProperties2 *qf;
  1497. VkQueueFamilyVideoPropertiesKHR *qf_vid;
  1498. VkPhysicalDeviceExternalSemaphoreInfo ext_sem_props_info;
  1499. int graph_index, comp_index, tx_index, enc_index, dec_index;
  1500. /* Set device extension flags */
  1501. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
  1502. for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
  1503. if (!strcmp(hwctx->enabled_dev_extensions[i],
  1504. optional_device_exts[j].name)) {
  1505. p->vkctx.extensions |= optional_device_exts[j].flag;
  1506. break;
  1507. }
  1508. }
  1509. }
  1510. err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 1);
  1511. if (err < 0) {
  1512. av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n");
  1513. return err;
  1514. }
  1515. p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  1516. p->props.pNext = &p->hprops;
  1517. p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
  1518. vk->GetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props);
  1519. av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n",
  1520. p->props.properties.deviceName);
  1521. av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
  1522. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %"PRIu64"\n",
  1523. p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
  1524. av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n",
  1525. p->props.properties.limits.minMemoryMapAlignment);
  1526. av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n",
  1527. p->props.properties.limits.nonCoherentAtomSize);
  1528. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
  1529. av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n",
  1530. p->hprops.minImportedHostPointerAlignment);
  1531. p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
  1532. vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
  1533. if (!qf_num) {
  1534. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  1535. return AVERROR_EXTERNAL;
  1536. }
  1537. ext_sem_props_info = (VkPhysicalDeviceExternalSemaphoreInfo) {
  1538. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO,
  1539. };
  1540. /* Opaque FD semaphore properties */
  1541. ext_sem_props_info.handleType =
  1542. #ifdef _WIN32
  1543. IsWindows8OrGreater()
  1544. ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
  1545. : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT;
  1546. #else
  1547. VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
  1548. #endif
  1549. p->ext_sem_props_opaque.sType = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES;
  1550. vk->GetPhysicalDeviceExternalSemaphoreProperties(hwctx->phys_dev,
  1551. &ext_sem_props_info,
  1552. &p->ext_sem_props_opaque);
  1553. qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2));
  1554. if (!qf)
  1555. return AVERROR(ENOMEM);
  1556. qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR));
  1557. if (!qf_vid) {
  1558. av_free(qf);
  1559. return AVERROR(ENOMEM);
  1560. }
  1561. for (uint32_t i = 0; i < qf_num; i++) {
  1562. qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
  1563. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
  1564. };
  1565. qf[i] = (VkQueueFamilyProperties2) {
  1566. .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
  1567. .pNext = p->vkctx.extensions & FF_VK_EXT_VIDEO_QUEUE ? &qf_vid[i] : NULL,
  1568. };
  1569. }
  1570. vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf);
  1571. p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
  1572. if (!p->qf_mutex) {
  1573. err = AVERROR(ENOMEM);
  1574. goto end;
  1575. }
  1576. p->nb_tot_qfs = qf_num;
  1577. for (uint32_t i = 0; i < qf_num; i++) {
  1578. p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount,
  1579. sizeof(**p->qf_mutex));
  1580. if (!p->qf_mutex[i]) {
  1581. err = AVERROR(ENOMEM);
  1582. goto end;
  1583. }
  1584. for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) {
  1585. err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
  1586. if (err != 0) {
  1587. av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
  1588. av_err2str(err));
  1589. err = AVERROR(err);
  1590. goto end;
  1591. }
  1592. }
  1593. }
  1594. #if FF_API_VULKAN_FIXED_QUEUES
  1595. FF_DISABLE_DEPRECATION_WARNINGS
  1596. graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1;
  1597. comp_index = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1;
  1598. tx_index = hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1;
  1599. dec_index = hwctx->nb_decode_queues ? hwctx->queue_family_decode_index : -1;
  1600. enc_index = hwctx->nb_encode_queues ? hwctx->queue_family_encode_index : -1;
  1601. #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \
  1602. do { \
  1603. if (ctx_qf < 0 && required) { \
  1604. av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
  1605. " in the context!\n", type); \
  1606. err = AVERROR(EINVAL); \
  1607. goto end; \
  1608. } else if (fidx < 0 || ctx_qf < 0) { \
  1609. break; \
  1610. } else if (ctx_qf >= qf_num) { \
  1611. av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
  1612. type, ctx_qf, qf_num); \
  1613. err = AVERROR(EINVAL); \
  1614. goto end; \
  1615. } \
  1616. \
  1617. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
  1618. " for%s%s%s%s%s\n", \
  1619. ctx_qf, qc, \
  1620. ctx_qf == graph_index ? " graphics" : "", \
  1621. ctx_qf == comp_index ? " compute" : "", \
  1622. ctx_qf == tx_index ? " transfers" : "", \
  1623. ctx_qf == enc_index ? " encode" : "", \
  1624. ctx_qf == dec_index ? " decode" : ""); \
  1625. graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \
  1626. comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \
  1627. tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
  1628. enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
  1629. dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
  1630. } while (0)
  1631. CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
  1632. CHECK_QUEUE("compute", 1, comp_index, hwctx->queue_family_comp_index, hwctx->nb_comp_queues);
  1633. CHECK_QUEUE("upload", 1, tx_index, hwctx->queue_family_tx_index, hwctx->nb_tx_queues);
  1634. CHECK_QUEUE("decode", 0, dec_index, hwctx->queue_family_decode_index, hwctx->nb_decode_queues);
  1635. CHECK_QUEUE("encode", 0, enc_index, hwctx->queue_family_encode_index, hwctx->nb_encode_queues);
  1636. #undef CHECK_QUEUE
  1637. /* Update the new queue family fields. If non-zero already,
  1638. * it means API users have set it. */
  1639. if (!hwctx->nb_qf) {
  1640. #define ADD_QUEUE(ctx_qf, qc, flag) \
  1641. do { \
  1642. if (ctx_qf != -1) { \
  1643. hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \
  1644. .idx = ctx_qf, \
  1645. .num = qc, \
  1646. .flags = flag, \
  1647. }; \
  1648. } \
  1649. } while (0)
  1650. ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
  1651. ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
  1652. ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
  1653. ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
  1654. ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
  1655. #undef ADD_QUEUE
  1656. }
  1657. FF_ENABLE_DEPRECATION_WARNINGS
  1658. #endif
  1659. for (int i = 0; i < hwctx->nb_qf; i++) {
  1660. if (!hwctx->qf[i].video_caps &&
  1661. hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR |
  1662. VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) {
  1663. hwctx->qf[i].video_caps = qf_vid[hwctx->qf[i].idx].videoCodecOperations;
  1664. }
  1665. }
  1666. /* Setup array for pQueueFamilyIndices with used queue families */
  1667. p->nb_img_qfs = 0;
  1668. for (int i = 0; i < hwctx->nb_qf; i++) {
  1669. int seen = 0;
  1670. /* Make sure each entry is unique
  1671. * (VUID-VkBufferCreateInfo-sharingMode-01419) */
  1672. for (int j = (i - 1); j >= 0; j--) {
  1673. if (hwctx->qf[i].idx == hwctx->qf[j].idx) {
  1674. seen = 1;
  1675. break;
  1676. }
  1677. }
  1678. if (!seen)
  1679. p->img_qfs[p->nb_img_qfs++] = hwctx->qf[i].idx;
  1680. }
  1681. if (!hwctx->lock_queue)
  1682. hwctx->lock_queue = lock_queue;
  1683. if (!hwctx->unlock_queue)
  1684. hwctx->unlock_queue = unlock_queue;
  1685. /* Get device capabilities */
  1686. vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
  1687. p->vkctx.device = ctx;
  1688. p->vkctx.hwctx = hwctx;
  1689. ff_vk_load_props(&p->vkctx);
  1690. p->compute_qf = ff_vk_qf_find(&p->vkctx, VK_QUEUE_COMPUTE_BIT, 0);
  1691. p->transfer_qf = ff_vk_qf_find(&p->vkctx, VK_QUEUE_TRANSFER_BIT, 0);
  1692. end:
  1693. av_free(qf_vid);
  1694. av_free(qf);
  1695. return err;
  1696. }
  1697. static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
  1698. AVDictionary *opts, int flags)
  1699. {
  1700. VulkanDeviceSelection dev_select = { 0 };
  1701. if (device && device[0]) {
  1702. char *end = NULL;
  1703. dev_select.index = strtol(device, &end, 10);
  1704. if (end == device) {
  1705. dev_select.index = 0;
  1706. dev_select.name = device;
  1707. }
  1708. }
  1709. return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
  1710. }
  1711. static int vulkan_device_derive(AVHWDeviceContext *ctx,
  1712. AVHWDeviceContext *src_ctx,
  1713. AVDictionary *opts, int flags)
  1714. {
  1715. av_unused VulkanDeviceSelection dev_select = { 0 };
  1716. /* If there's only one device on the system, then even if its not covered
  1717. * by the following checks (e.g. non-PCIe ARM GPU), having an empty
  1718. * dev_select will mean it'll get picked. */
  1719. switch(src_ctx->type) {
  1720. #if CONFIG_VAAPI
  1721. case AV_HWDEVICE_TYPE_VAAPI: {
  1722. AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
  1723. VADisplay dpy = src_hwctx->display;
  1724. #if VA_CHECK_VERSION(1, 15, 0)
  1725. VAStatus vas;
  1726. VADisplayAttribute attr = {
  1727. .type = VADisplayPCIID,
  1728. };
  1729. #endif
  1730. const char *vendor;
  1731. #if VA_CHECK_VERSION(1, 15, 0)
  1732. vas = vaGetDisplayAttributes(dpy, &attr, 1);
  1733. if (vas == VA_STATUS_SUCCESS && attr.flags != VA_DISPLAY_ATTRIB_NOT_SUPPORTED)
  1734. dev_select.pci_device = (attr.value & 0xFFFF);
  1735. #endif
  1736. if (!dev_select.pci_device) {
  1737. vendor = vaQueryVendorString(dpy);
  1738. if (!vendor) {
  1739. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
  1740. return AVERROR_EXTERNAL;
  1741. }
  1742. if (strstr(vendor, "AMD"))
  1743. dev_select.vendor_id = 0x1002;
  1744. }
  1745. return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
  1746. }
  1747. #endif
  1748. #if CONFIG_LIBDRM
  1749. case AV_HWDEVICE_TYPE_DRM: {
  1750. int err;
  1751. struct stat drm_node_info;
  1752. drmDevice *drm_dev_info;
  1753. AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
  1754. err = fstat(src_hwctx->fd, &drm_node_info);
  1755. if (err) {
  1756. av_log(ctx, AV_LOG_ERROR, "Unable to get node info from DRM fd: %s!\n",
  1757. av_err2str(AVERROR(errno)));
  1758. return AVERROR_EXTERNAL;
  1759. }
  1760. dev_select.drm_major = major(drm_node_info.st_dev);
  1761. dev_select.drm_minor = minor(drm_node_info.st_dev);
  1762. dev_select.has_drm = 1;
  1763. err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
  1764. if (err) {
  1765. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd: %s!\n",
  1766. av_err2str(AVERROR(errno)));
  1767. return AVERROR_EXTERNAL;
  1768. }
  1769. if (drm_dev_info->bustype == DRM_BUS_PCI)
  1770. dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
  1771. drmFreeDevice(&drm_dev_info);
  1772. return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags);
  1773. }
  1774. #endif
  1775. #if CONFIG_CUDA
  1776. case AV_HWDEVICE_TYPE_CUDA: {
  1777. AVHWDeviceContext *cuda_cu = src_ctx;
  1778. AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
  1779. AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
  1780. CudaFunctions *cu = cu_internal->cuda_dl;
  1781. int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
  1782. cu_internal->cuda_device));
  1783. if (ret < 0) {
  1784. av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
  1785. return AVERROR_EXTERNAL;
  1786. }
  1787. dev_select.has_uuid = 1;
  1788. /*
  1789. * CUDA is not able to import multiplane images, so always derive a
  1790. * Vulkan device with multiplane disabled.
  1791. */
  1792. return vulkan_device_create_internal(ctx, &dev_select, 1, opts, flags);
  1793. }
  1794. #endif
  1795. default:
  1796. return AVERROR(ENOSYS);
  1797. }
  1798. }
  1799. static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
  1800. const void *hwconfig,
  1801. AVHWFramesConstraints *constraints)
  1802. {
  1803. int count = 0;
  1804. VulkanDevicePriv *p = ctx->hwctx;
  1805. for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
  1806. count += vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
  1807. p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
  1808. VK_IMAGE_TILING_OPTIMAL,
  1809. NULL, NULL, NULL, NULL, p->disable_multiplane, 1) >= 0;
  1810. }
  1811. constraints->valid_sw_formats = av_malloc_array(count + 1,
  1812. sizeof(enum AVPixelFormat));
  1813. if (!constraints->valid_sw_formats)
  1814. return AVERROR(ENOMEM);
  1815. count = 0;
  1816. for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
  1817. if (vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
  1818. p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
  1819. VK_IMAGE_TILING_OPTIMAL,
  1820. NULL, NULL, NULL, NULL, p->disable_multiplane, 1) >= 0) {
  1821. constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt;
  1822. }
  1823. }
  1824. constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
  1825. constraints->min_width = 1;
  1826. constraints->min_height = 1;
  1827. constraints->max_width = p->props.properties.limits.maxImageDimension2D;
  1828. constraints->max_height = p->props.properties.limits.maxImageDimension2D;
  1829. constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
  1830. if (!constraints->valid_hw_formats)
  1831. return AVERROR(ENOMEM);
  1832. constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
  1833. constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
  1834. return 0;
  1835. }
  1836. static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
  1837. VkMemoryPropertyFlagBits req_flags, const void *alloc_extension,
  1838. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  1839. {
  1840. VkResult ret;
  1841. int index = -1;
  1842. VulkanDevicePriv *p = ctx->hwctx;
  1843. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1844. AVVulkanDeviceContext *dev_hwctx = &p->p;
  1845. VkMemoryAllocateInfo alloc_info = {
  1846. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  1847. .pNext = alloc_extension,
  1848. .allocationSize = req->size,
  1849. };
  1850. /* The vulkan spec requires memory types to be sorted in the "optimal"
  1851. * order, so the first matching type we find will be the best/fastest one */
  1852. for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
  1853. const VkMemoryType *type = &p->mprops.memoryTypes[i];
  1854. /* The memory type must be supported by the requirements (bitfield) */
  1855. if (!(req->memoryTypeBits & (1 << i)))
  1856. continue;
  1857. /* The memory type flags must include our properties */
  1858. if ((type->propertyFlags & req_flags) != req_flags)
  1859. continue;
  1860. /* The memory type must be large enough */
  1861. if (req->size > p->mprops.memoryHeaps[type->heapIndex].size)
  1862. continue;
  1863. /* Found a suitable memory type */
  1864. index = i;
  1865. break;
  1866. }
  1867. if (index < 0) {
  1868. av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  1869. req_flags);
  1870. return AVERROR(EINVAL);
  1871. }
  1872. alloc_info.memoryTypeIndex = index;
  1873. ret = vk->AllocateMemory(dev_hwctx->act_dev, &alloc_info,
  1874. dev_hwctx->alloc, mem);
  1875. if (ret != VK_SUCCESS) {
  1876. av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
  1877. ff_vk_ret2str(ret));
  1878. return AVERROR(ENOMEM);
  1879. }
  1880. *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
  1881. return 0;
  1882. }
  1883. static void vulkan_free_internal(AVVkFrame *f)
  1884. {
  1885. av_unused AVVkFrameInternal *internal = f->internal;
  1886. #if CONFIG_CUDA
  1887. if (internal->cuda_fc_ref) {
  1888. AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
  1889. int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
  1890. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1891. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1892. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1893. CudaFunctions *cu = cu_internal->cuda_dl;
  1894. for (int i = 0; i < planes; i++) {
  1895. if (internal->cu_sem[i])
  1896. CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
  1897. if (internal->cu_mma[i])
  1898. CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
  1899. if (internal->ext_mem[i])
  1900. CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
  1901. #ifdef _WIN32
  1902. if (internal->ext_sem_handle[i])
  1903. CloseHandle(internal->ext_sem_handle[i]);
  1904. if (internal->ext_mem_handle[i])
  1905. CloseHandle(internal->ext_mem_handle[i]);
  1906. #endif
  1907. }
  1908. av_buffer_unref(&internal->cuda_fc_ref);
  1909. }
  1910. #endif
  1911. pthread_mutex_destroy(&internal->update_mutex);
  1912. av_freep(&f->internal);
  1913. }
  1914. static void vulkan_frame_free(AVHWFramesContext *hwfc, AVVkFrame *f)
  1915. {
  1916. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  1917. AVVulkanDeviceContext *hwctx = &p->p;
  1918. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1919. int nb_images = ff_vk_count_images(f);
  1920. int nb_sems = 0;
  1921. while (nb_sems < FF_ARRAY_ELEMS(f->sem) && f->sem[nb_sems])
  1922. nb_sems++;
  1923. if (nb_sems) {
  1924. VkSemaphoreWaitInfo sem_wait = {
  1925. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
  1926. .flags = 0x0,
  1927. .pSemaphores = f->sem,
  1928. .pValues = f->sem_value,
  1929. .semaphoreCount = nb_sems,
  1930. };
  1931. vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX);
  1932. }
  1933. vulkan_free_internal(f);
  1934. for (int i = 0; i < nb_images; i++) {
  1935. vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1936. vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1937. vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  1938. }
  1939. av_free(f);
  1940. }
  1941. static void vulkan_frame_free_cb(void *opaque, uint8_t *data)
  1942. {
  1943. vulkan_frame_free(opaque, (AVVkFrame*)data);
  1944. }
  1945. static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
  1946. void *alloc_pnext, size_t alloc_pnext_stride)
  1947. {
  1948. int img_cnt = 0, err;
  1949. VkResult ret;
  1950. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1951. VulkanDevicePriv *p = ctx->hwctx;
  1952. AVVulkanDeviceContext *hwctx = &p->p;
  1953. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  1954. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
  1955. while (f->img[img_cnt]) {
  1956. int use_ded_mem;
  1957. VkImageMemoryRequirementsInfo2 req_desc = {
  1958. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  1959. .image = f->img[img_cnt],
  1960. };
  1961. VkMemoryDedicatedAllocateInfo ded_alloc = {
  1962. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  1963. .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride),
  1964. };
  1965. VkMemoryDedicatedRequirements ded_req = {
  1966. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  1967. };
  1968. VkMemoryRequirements2 req = {
  1969. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  1970. .pNext = &ded_req,
  1971. };
  1972. vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
  1973. if (f->tiling == VK_IMAGE_TILING_LINEAR)
  1974. req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
  1975. p->props.properties.limits.minMemoryMapAlignment);
  1976. /* In case the implementation prefers/requires dedicated allocation */
  1977. use_ded_mem = ded_req.prefersDedicatedAllocation |
  1978. ded_req.requiresDedicatedAllocation;
  1979. if (use_ded_mem)
  1980. ded_alloc.image = f->img[img_cnt];
  1981. /* Allocate memory */
  1982. if ((err = alloc_mem(ctx, &req.memoryRequirements,
  1983. f->tiling == VK_IMAGE_TILING_LINEAR ?
  1984. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
  1985. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1986. use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
  1987. &f->flags, &f->mem[img_cnt])))
  1988. return err;
  1989. f->size[img_cnt] = req.memoryRequirements.size;
  1990. bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1991. bind_info[img_cnt].image = f->img[img_cnt];
  1992. bind_info[img_cnt].memory = f->mem[img_cnt];
  1993. img_cnt++;
  1994. }
  1995. /* Bind the allocated memory to the images */
  1996. ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info);
  1997. if (ret != VK_SUCCESS) {
  1998. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1999. ff_vk_ret2str(ret));
  2000. return AVERROR_EXTERNAL;
  2001. }
  2002. return 0;
  2003. }
  2004. enum PrepMode {
  2005. PREP_MODE_GENERAL,
  2006. PREP_MODE_WRITE,
  2007. PREP_MODE_EXTERNAL_EXPORT,
  2008. PREP_MODE_EXTERNAL_IMPORT,
  2009. PREP_MODE_DECODING_DST,
  2010. PREP_MODE_DECODING_DPB,
  2011. PREP_MODE_ENCODING_DPB,
  2012. };
  2013. static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx,
  2014. AVVkFrame *frame, enum PrepMode pmode)
  2015. {
  2016. int err;
  2017. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2018. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2019. VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
  2020. int nb_img_bar = 0;
  2021. uint32_t dst_qf = VK_QUEUE_FAMILY_IGNORED;
  2022. VkImageLayout new_layout;
  2023. VkAccessFlags2 new_access;
  2024. VkPipelineStageFlagBits2 src_stage = VK_PIPELINE_STAGE_2_NONE;
  2025. /* This is dirty - but it works. The vulkan.c dependency system doesn't
  2026. * free non-refcounted frames, and non-refcounted hardware frames cannot
  2027. * happen anywhere outside of here. */
  2028. AVBufferRef tmp_ref = {
  2029. .data = (uint8_t *)hwfc,
  2030. };
  2031. AVFrame tmp_frame = {
  2032. .data[0] = (uint8_t *)frame,
  2033. .hw_frames_ctx = &tmp_ref,
  2034. };
  2035. VkCommandBuffer cmd_buf;
  2036. FFVkExecContext *exec = ff_vk_exec_get(&p->vkctx, ectx);
  2037. cmd_buf = exec->buf;
  2038. ff_vk_exec_start(&p->vkctx, exec);
  2039. err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, &tmp_frame,
  2040. VK_PIPELINE_STAGE_2_NONE,
  2041. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
  2042. if (err < 0)
  2043. return err;
  2044. switch (pmode) {
  2045. case PREP_MODE_GENERAL:
  2046. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  2047. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  2048. break;
  2049. case PREP_MODE_WRITE:
  2050. new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  2051. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  2052. break;
  2053. case PREP_MODE_EXTERNAL_IMPORT:
  2054. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  2055. new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
  2056. break;
  2057. case PREP_MODE_EXTERNAL_EXPORT:
  2058. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  2059. new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
  2060. dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
  2061. src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
  2062. break;
  2063. case PREP_MODE_DECODING_DST:
  2064. new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
  2065. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  2066. break;
  2067. case PREP_MODE_DECODING_DPB:
  2068. new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
  2069. new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
  2070. break;
  2071. case PREP_MODE_ENCODING_DPB:
  2072. new_layout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR;
  2073. new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
  2074. break;
  2075. }
  2076. ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar,
  2077. src_stage,
  2078. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  2079. new_access, new_layout, dst_qf);
  2080. vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
  2081. .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
  2082. .pImageMemoryBarriers = img_bar,
  2083. .imageMemoryBarrierCount = nb_img_bar,
  2084. });
  2085. err = ff_vk_exec_submit(&p->vkctx, exec);
  2086. if (err < 0)
  2087. return err;
  2088. /* We can do this because there are no real dependencies */
  2089. ff_vk_exec_discard_deps(&p->vkctx, exec);
  2090. return 0;
  2091. }
  2092. static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format,
  2093. int frame_w, int frame_h, int plane)
  2094. {
  2095. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
  2096. /* Currently always true unless gray + alpha support is added */
  2097. if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB ||
  2098. !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) {
  2099. *w = frame_w;
  2100. *h = frame_h;
  2101. return;
  2102. }
  2103. *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w);
  2104. *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h);
  2105. }
  2106. static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
  2107. VkImageTiling tiling, VkImageUsageFlagBits usage,
  2108. VkImageCreateFlags flags, int nb_layers,
  2109. void *create_pnext)
  2110. {
  2111. int err;
  2112. VkResult ret;
  2113. AVVulkanFramesContext *hwfc_vk = hwfc->hwctx;
  2114. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2115. VulkanDevicePriv *p = ctx->hwctx;
  2116. AVVulkanDeviceContext *hwctx = &p->p;
  2117. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2118. AVVkFrame *f;
  2119. VkSemaphoreTypeCreateInfo sem_type_info = {
  2120. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
  2121. .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
  2122. .initialValue = 0,
  2123. };
  2124. VkSemaphoreCreateInfo sem_spawn = {
  2125. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  2126. .pNext = &sem_type_info,
  2127. };
  2128. VkExportSemaphoreCreateInfo ext_sem_info_opaque = {
  2129. .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
  2130. #ifdef _WIN32
  2131. .handleTypes = IsWindows8OrGreater()
  2132. ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
  2133. : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
  2134. #else
  2135. .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  2136. #endif
  2137. };
  2138. /* Check if exporting is supported before chaining any structs */
  2139. if (p->ext_sem_props_opaque.externalSemaphoreFeatures & VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT) {
  2140. if (p->vkctx.extensions & (FF_VK_EXT_EXTERNAL_WIN32_SEM | FF_VK_EXT_EXTERNAL_FD_SEM))
  2141. ff_vk_link_struct(&sem_type_info, &ext_sem_info_opaque);
  2142. }
  2143. f = av_vk_frame_alloc();
  2144. if (!f) {
  2145. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  2146. return AVERROR(ENOMEM);
  2147. }
  2148. // TODO: check witdh and height for alignment in case of multiplanar (must be mod-2 if subsampled)
  2149. /* Create the images */
  2150. for (int i = 0; (hwfc_vk->format[i] != VK_FORMAT_UNDEFINED); i++) {
  2151. VkImageCreateInfo create_info = {
  2152. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  2153. .pNext = create_pnext,
  2154. .imageType = VK_IMAGE_TYPE_2D,
  2155. .format = hwfc_vk->format[i],
  2156. .extent.depth = 1,
  2157. .mipLevels = 1,
  2158. .arrayLayers = nb_layers,
  2159. .flags = flags,
  2160. .tiling = tiling,
  2161. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  2162. .usage = usage,
  2163. .samples = VK_SAMPLE_COUNT_1_BIT,
  2164. .pQueueFamilyIndices = p->img_qfs,
  2165. .queueFamilyIndexCount = p->nb_img_qfs,
  2166. .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  2167. VK_SHARING_MODE_EXCLUSIVE,
  2168. };
  2169. get_plane_wh(&create_info.extent.width, &create_info.extent.height,
  2170. hwfc->sw_format, hwfc->width, hwfc->height, i);
  2171. ret = vk->CreateImage(hwctx->act_dev, &create_info,
  2172. hwctx->alloc, &f->img[i]);
  2173. if (ret != VK_SUCCESS) {
  2174. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  2175. ff_vk_ret2str(ret));
  2176. err = AVERROR(EINVAL);
  2177. goto fail;
  2178. }
  2179. /* Create semaphore */
  2180. ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
  2181. hwctx->alloc, &f->sem[i]);
  2182. if (ret != VK_SUCCESS) {
  2183. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  2184. ff_vk_ret2str(ret));
  2185. err = AVERROR_EXTERNAL;
  2186. goto fail;
  2187. }
  2188. f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
  2189. f->layout[i] = create_info.initialLayout;
  2190. f->access[i] = 0x0;
  2191. f->sem_value[i] = 0;
  2192. }
  2193. f->flags = 0x0;
  2194. f->tiling = tiling;
  2195. *frame = f;
  2196. return 0;
  2197. fail:
  2198. vulkan_frame_free(hwfc, f);
  2199. return err;
  2200. }
  2201. /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
  2202. static void try_export_flags(AVHWFramesContext *hwfc,
  2203. VkExternalMemoryHandleTypeFlags *comp_handle_types,
  2204. VkExternalMemoryHandleTypeFlagBits *iexp,
  2205. VkExternalMemoryHandleTypeFlagBits exp)
  2206. {
  2207. VkResult ret;
  2208. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  2209. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2210. AVVulkanDeviceContext *dev_hwctx = &p->p;
  2211. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2212. const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info =
  2213. ff_vk_find_struct(hwctx->create_pnext,
  2214. VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
  2215. int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info;
  2216. int nb_mods;
  2217. VkExternalImageFormatProperties eprops = {
  2218. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  2219. };
  2220. VkImageFormatProperties2 props = {
  2221. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  2222. .pNext = &eprops,
  2223. };
  2224. VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
  2225. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
  2226. .pNext = NULL,
  2227. .pQueueFamilyIndices = p->img_qfs,
  2228. .queueFamilyIndexCount = p->nb_img_qfs,
  2229. .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  2230. VK_SHARING_MODE_EXCLUSIVE,
  2231. };
  2232. VkPhysicalDeviceExternalImageFormatInfo enext = {
  2233. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  2234. .handleType = exp,
  2235. .pNext = has_mods ? &phy_dev_mod_info : NULL,
  2236. };
  2237. VkPhysicalDeviceImageFormatInfo2 pinfo = {
  2238. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  2239. .pNext = !exp ? NULL : &enext,
  2240. .format = vk_find_format_entry(hwfc->sw_format)->vkf,
  2241. .type = VK_IMAGE_TYPE_2D,
  2242. .tiling = hwctx->tiling,
  2243. .usage = hwctx->usage,
  2244. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  2245. };
  2246. nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1;
  2247. for (int i = 0; i < nb_mods; i++) {
  2248. if (has_mods)
  2249. phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[i];
  2250. ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
  2251. &pinfo, &props);
  2252. if (ret == VK_SUCCESS) {
  2253. *iexp |= exp;
  2254. *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
  2255. }
  2256. }
  2257. }
  2258. static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
  2259. {
  2260. int err;
  2261. AVVkFrame *f;
  2262. AVBufferRef *avbuf = NULL;
  2263. AVHWFramesContext *hwfc = opaque;
  2264. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2265. VulkanFramesPriv *fp = hwfc->hwctx;
  2266. AVVulkanFramesContext *hwctx = &fp->p;
  2267. VkExternalMemoryHandleTypeFlags e = 0x0;
  2268. VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
  2269. VkExternalMemoryImageCreateInfo eiinfo = {
  2270. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  2271. .pNext = hwctx->create_pnext,
  2272. };
  2273. #ifdef _WIN32
  2274. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
  2275. try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater()
  2276. ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
  2277. : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
  2278. #else
  2279. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
  2280. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  2281. VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
  2282. #endif
  2283. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_DMABUF_MEMORY)
  2284. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  2285. VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
  2286. for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
  2287. eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
  2288. eminfo[i].pNext = hwctx->alloc_pnext[i];
  2289. eminfo[i].handleTypes = e;
  2290. }
  2291. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
  2292. hwctx->nb_layers,
  2293. eiinfo.handleTypes ? &eiinfo : hwctx->create_pnext);
  2294. if (err)
  2295. return NULL;
  2296. err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
  2297. if (err)
  2298. goto fail;
  2299. if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
  2300. !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
  2301. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB);
  2302. else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)
  2303. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST);
  2304. else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)
  2305. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_ENCODING_DPB);
  2306. else if (hwctx->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT)
  2307. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE);
  2308. else
  2309. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_GENERAL);
  2310. if (err)
  2311. goto fail;
  2312. avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
  2313. vulkan_frame_free_cb, hwfc, 0);
  2314. if (!avbuf)
  2315. goto fail;
  2316. return avbuf;
  2317. fail:
  2318. vulkan_frame_free(hwfc, f);
  2319. return NULL;
  2320. }
  2321. static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
  2322. {
  2323. pthread_mutex_lock(&vkf->internal->update_mutex);
  2324. }
  2325. static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
  2326. {
  2327. pthread_mutex_unlock(&vkf->internal->update_mutex);
  2328. }
  2329. static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
  2330. {
  2331. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2332. VulkanFramesPriv *fp = hwfc->hwctx;
  2333. if (fp->modifier_info) {
  2334. if (fp->modifier_info->pDrmFormatModifiers)
  2335. av_freep(&fp->modifier_info->pDrmFormatModifiers);
  2336. av_freep(&fp->modifier_info);
  2337. }
  2338. ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec);
  2339. ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec);
  2340. ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec);
  2341. av_buffer_pool_uninit(&fp->tmp);
  2342. }
  2343. static int vulkan_frames_init(AVHWFramesContext *hwfc)
  2344. {
  2345. int err;
  2346. AVVkFrame *f;
  2347. VulkanFramesPriv *fp = hwfc->hwctx;
  2348. AVVulkanFramesContext *hwctx = &fp->p;
  2349. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  2350. VkImageUsageFlagBits supported_usage;
  2351. const struct FFVkFormatEntry *fmt;
  2352. int disable_multiplane = p->disable_multiplane ||
  2353. (hwctx->flags & AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE);
  2354. /* Defaults */
  2355. if (!hwctx->nb_layers)
  2356. hwctx->nb_layers = 1;
  2357. /* VK_IMAGE_TILING_OPTIMAL == 0, can't check for it really */
  2358. if (p->use_linear_images &&
  2359. (hwctx->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT))
  2360. hwctx->tiling = VK_IMAGE_TILING_LINEAR;
  2361. fmt = vk_find_format_entry(hwfc->sw_format);
  2362. if (!fmt) {
  2363. av_log(hwfc, AV_LOG_ERROR, "Unsupported pixel format: %s!\n",
  2364. av_get_pix_fmt_name(hwfc->sw_format));
  2365. return AVERROR(EINVAL);
  2366. }
  2367. if (hwctx->format[0] != VK_FORMAT_UNDEFINED) {
  2368. if (hwctx->format[0] != fmt->vkf) {
  2369. for (int i = 0; i < fmt->nb_images_fallback; i++) {
  2370. if (hwctx->format[i] != fmt->fallback[i]) {
  2371. av_log(hwfc, AV_LOG_ERROR, "Incompatible Vulkan format given "
  2372. "for the current sw_format %s!\n",
  2373. av_get_pix_fmt_name(hwfc->sw_format));
  2374. return AVERROR(EINVAL);
  2375. }
  2376. }
  2377. }
  2378. /* Check if the sw_format itself is supported */
  2379. err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format,
  2380. hwctx->tiling, NULL,
  2381. NULL, NULL, &supported_usage, 0,
  2382. !hwctx->usage ||
  2383. (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT));
  2384. if (err < 0) {
  2385. av_log(hwfc, AV_LOG_ERROR, "Unsupported sw format: %s!\n",
  2386. av_get_pix_fmt_name(hwfc->sw_format));
  2387. return AVERROR(EINVAL);
  2388. }
  2389. } else {
  2390. err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format,
  2391. hwctx->tiling, hwctx->format, NULL,
  2392. NULL, &supported_usage,
  2393. disable_multiplane,
  2394. !hwctx->usage ||
  2395. (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT));
  2396. if (err < 0)
  2397. return err;
  2398. }
  2399. /* Image usage flags */
  2400. if (!hwctx->usage) {
  2401. hwctx->usage = supported_usage & (VK_BUFFER_USAGE_TRANSFER_DST_BIT |
  2402. VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
  2403. VK_IMAGE_USAGE_STORAGE_BIT |
  2404. VK_IMAGE_USAGE_SAMPLED_BIT);
  2405. /* Enables encoding of images, if supported by format and extensions */
  2406. if ((supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) &&
  2407. (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
  2408. FF_VK_EXT_VIDEO_MAINTENANCE_1)))
  2409. hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
  2410. }
  2411. /* Image creation flags.
  2412. * Only fill them in automatically if the image is not going to be used as
  2413. * a DPB-only image, and we have SAMPLED/STORAGE bits set. */
  2414. if (!hwctx->img_flags) {
  2415. int is_lone_dpb = ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) ||
  2416. ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
  2417. !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)));
  2418. int sampleable = hwctx->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
  2419. VK_IMAGE_USAGE_STORAGE_BIT);
  2420. hwctx->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
  2421. if (sampleable && !is_lone_dpb) {
  2422. hwctx->img_flags |= VK_IMAGE_CREATE_ALIAS_BIT;
  2423. if ((fmt->vk_planes > 1) && (hwctx->format[0] == fmt->vkf))
  2424. hwctx->img_flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
  2425. }
  2426. }
  2427. /* If the image has an ENCODE_SRC usage, and the maintenance1
  2428. * extension is supported, check if it has a profile list.
  2429. * If there's no profile list, or it has no encode operations,
  2430. * then allow creating the image with no specific profile. */
  2431. if ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) &&
  2432. (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
  2433. FF_VK_EXT_VIDEO_MAINTENANCE_1))) {
  2434. const VkVideoProfileListInfoKHR *pl;
  2435. pl = ff_vk_find_struct(hwctx->create_pnext, VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR);
  2436. if (!pl) {
  2437. hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR;
  2438. } else {
  2439. uint32_t i;
  2440. for (i = 0; i < pl->profileCount; i++) {
  2441. /* Video ops start at exactly 0x00010000 */
  2442. if (pl->pProfiles[i].videoCodecOperation & 0xFFFF0000)
  2443. break;
  2444. }
  2445. if (i == pl->profileCount)
  2446. hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR;
  2447. }
  2448. }
  2449. if (!hwctx->lock_frame)
  2450. hwctx->lock_frame = lock_frame;
  2451. if (!hwctx->unlock_frame)
  2452. hwctx->unlock_frame = unlock_frame;
  2453. err = ff_vk_exec_pool_init(&p->vkctx, p->compute_qf, &fp->compute_exec,
  2454. p->compute_qf->num, 0, 0, 0, NULL);
  2455. if (err)
  2456. return err;
  2457. err = ff_vk_exec_pool_init(&p->vkctx, p->transfer_qf, &fp->upload_exec,
  2458. p->transfer_qf->num*2, 0, 0, 0, NULL);
  2459. if (err)
  2460. return err;
  2461. err = ff_vk_exec_pool_init(&p->vkctx, p->transfer_qf, &fp->download_exec,
  2462. p->transfer_qf->num, 0, 0, 0, NULL);
  2463. if (err)
  2464. return err;
  2465. /* Test to see if allocation will fail */
  2466. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
  2467. hwctx->nb_layers, hwctx->create_pnext);
  2468. if (err)
  2469. return err;
  2470. vulkan_frame_free(hwfc, f);
  2471. /* If user did not specify a pool, hwfc->pool will be set to the internal one
  2472. * in hwcontext.c just after this gets called */
  2473. if (!hwfc->pool) {
  2474. ffhwframesctx(hwfc)->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
  2475. hwfc, vulkan_pool_alloc,
  2476. NULL);
  2477. if (!ffhwframesctx(hwfc)->pool_internal)
  2478. return AVERROR(ENOMEM);
  2479. }
  2480. return 0;
  2481. }
  2482. static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
  2483. {
  2484. frame->buf[0] = av_buffer_pool_get(hwfc->pool);
  2485. if (!frame->buf[0])
  2486. return AVERROR(ENOMEM);
  2487. frame->data[0] = frame->buf[0]->data;
  2488. frame->format = AV_PIX_FMT_VULKAN;
  2489. frame->width = hwfc->width;
  2490. frame->height = hwfc->height;
  2491. return 0;
  2492. }
  2493. static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
  2494. enum AVHWFrameTransferDirection dir,
  2495. enum AVPixelFormat **formats)
  2496. {
  2497. enum AVPixelFormat *fmts;
  2498. int n = 2;
  2499. #if CONFIG_CUDA
  2500. n++;
  2501. #endif
  2502. fmts = av_malloc_array(n, sizeof(*fmts));
  2503. if (!fmts)
  2504. return AVERROR(ENOMEM);
  2505. n = 0;
  2506. fmts[n++] = hwfc->sw_format;
  2507. #if CONFIG_CUDA
  2508. fmts[n++] = AV_PIX_FMT_CUDA;
  2509. #endif
  2510. fmts[n++] = AV_PIX_FMT_NONE;
  2511. *formats = fmts;
  2512. return 0;
  2513. }
  2514. #if CONFIG_LIBDRM
  2515. static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  2516. {
  2517. vulkan_frame_free(hwfc, hwmap->priv);
  2518. }
  2519. static const struct {
  2520. uint32_t drm_fourcc;
  2521. VkFormat vk_format;
  2522. } vulkan_drm_format_map[] = {
  2523. { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
  2524. { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
  2525. { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
  2526. { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
  2527. { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
  2528. { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
  2529. { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  2530. { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  2531. { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  2532. { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  2533. { DRM_FORMAT_ARGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
  2534. { DRM_FORMAT_ABGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 },
  2535. { DRM_FORMAT_XRGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
  2536. { DRM_FORMAT_XBGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 },
  2537. // All these DRM_FORMATs were added in the same libdrm commit.
  2538. #ifdef DRM_FORMAT_XYUV8888
  2539. { DRM_FORMAT_XYUV8888, VK_FORMAT_R8G8B8A8_UNORM },
  2540. { DRM_FORMAT_XVYU2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 } ,
  2541. { DRM_FORMAT_XVYU12_16161616, VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16 } ,
  2542. { DRM_FORMAT_XVYU16161616, VK_FORMAT_R16G16B16A16_UNORM } ,
  2543. #endif
  2544. };
  2545. static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
  2546. {
  2547. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  2548. if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
  2549. return vulkan_drm_format_map[i].vk_format;
  2550. return VK_FORMAT_UNDEFINED;
  2551. }
  2552. static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
  2553. const AVFrame *src, int flags)
  2554. {
  2555. int err = 0;
  2556. VkResult ret;
  2557. AVVkFrame *f;
  2558. int bind_counts = 0;
  2559. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2560. VulkanDevicePriv *p = ctx->hwctx;
  2561. AVVulkanDeviceContext *hwctx = &p->p;
  2562. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2563. const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
  2564. VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES];
  2565. VkBindImagePlaneMemoryInfo plane_info[AV_DRM_MAX_PLANES];
  2566. for (int i = 0; i < desc->nb_layers; i++) {
  2567. if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
  2568. av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
  2569. desc->layers[i].format);
  2570. return AVERROR(EINVAL);
  2571. }
  2572. }
  2573. if (!(f = av_vk_frame_alloc())) {
  2574. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  2575. err = AVERROR(ENOMEM);
  2576. goto fail;
  2577. }
  2578. f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
  2579. for (int i = 0; i < desc->nb_layers; i++) {
  2580. const int planes = desc->layers[i].nb_planes;
  2581. /* Semaphore */
  2582. VkSemaphoreTypeCreateInfo sem_type_info = {
  2583. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
  2584. .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
  2585. .initialValue = 0,
  2586. };
  2587. VkSemaphoreCreateInfo sem_spawn = {
  2588. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  2589. .pNext = &sem_type_info,
  2590. };
  2591. /* Image creation */
  2592. VkSubresourceLayout ext_img_layouts[AV_DRM_MAX_PLANES];
  2593. VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = {
  2594. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
  2595. .drmFormatModifier = desc->objects[0].format_modifier,
  2596. .drmFormatModifierPlaneCount = planes,
  2597. .pPlaneLayouts = (const VkSubresourceLayout *)&ext_img_layouts,
  2598. };
  2599. VkExternalMemoryImageCreateInfo ext_img_spec = {
  2600. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  2601. .pNext = &ext_img_mod_spec,
  2602. .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2603. };
  2604. VkImageCreateInfo create_info = {
  2605. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  2606. .pNext = &ext_img_spec,
  2607. .imageType = VK_IMAGE_TYPE_2D,
  2608. .format = drm_to_vulkan_fmt(desc->layers[i].format),
  2609. .extent.depth = 1,
  2610. .mipLevels = 1,
  2611. .arrayLayers = 1,
  2612. .flags = 0x0,
  2613. .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
  2614. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
  2615. .usage = 0x0, /* filled in below */
  2616. .samples = VK_SAMPLE_COUNT_1_BIT,
  2617. .pQueueFamilyIndices = p->img_qfs,
  2618. .queueFamilyIndexCount = p->nb_img_qfs,
  2619. .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  2620. VK_SHARING_MODE_EXCLUSIVE,
  2621. };
  2622. /* Image format verification */
  2623. VkExternalImageFormatProperties ext_props = {
  2624. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  2625. };
  2626. VkImageFormatProperties2 props_ret = {
  2627. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  2628. .pNext = &ext_props,
  2629. };
  2630. VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = {
  2631. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
  2632. .drmFormatModifier = ext_img_mod_spec.drmFormatModifier,
  2633. .pQueueFamilyIndices = create_info.pQueueFamilyIndices,
  2634. .queueFamilyIndexCount = create_info.queueFamilyIndexCount,
  2635. .sharingMode = create_info.sharingMode,
  2636. };
  2637. VkPhysicalDeviceExternalImageFormatInfo props_ext = {
  2638. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  2639. .pNext = &props_drm_mod,
  2640. .handleType = ext_img_spec.handleTypes,
  2641. };
  2642. VkPhysicalDeviceImageFormatInfo2 fmt_props;
  2643. if (flags & AV_HWFRAME_MAP_READ)
  2644. create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT |
  2645. VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
  2646. if (flags & AV_HWFRAME_MAP_WRITE)
  2647. create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT |
  2648. VK_IMAGE_USAGE_TRANSFER_DST_BIT;
  2649. fmt_props = (VkPhysicalDeviceImageFormatInfo2) {
  2650. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  2651. .pNext = &props_ext,
  2652. .format = create_info.format,
  2653. .type = create_info.imageType,
  2654. .tiling = create_info.tiling,
  2655. .usage = create_info.usage,
  2656. .flags = create_info.flags,
  2657. };
  2658. /* Check if importing is possible for this combination of parameters */
  2659. ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->phys_dev,
  2660. &fmt_props, &props_ret);
  2661. if (ret != VK_SUCCESS) {
  2662. av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n",
  2663. ff_vk_ret2str(ret));
  2664. err = AVERROR_EXTERNAL;
  2665. goto fail;
  2666. }
  2667. /* Set the image width/height */
  2668. get_plane_wh(&create_info.extent.width, &create_info.extent.height,
  2669. hwfc->sw_format, src->width, src->height, i);
  2670. /* Set the subresource layout based on the layer properties */
  2671. for (int j = 0; j < planes; j++) {
  2672. ext_img_layouts[j].offset = desc->layers[i].planes[j].offset;
  2673. ext_img_layouts[j].rowPitch = desc->layers[i].planes[j].pitch;
  2674. ext_img_layouts[j].size = 0; /* The specs say so for all 3 */
  2675. ext_img_layouts[j].arrayPitch = 0;
  2676. ext_img_layouts[j].depthPitch = 0;
  2677. }
  2678. /* Create image */
  2679. ret = vk->CreateImage(hwctx->act_dev, &create_info,
  2680. hwctx->alloc, &f->img[i]);
  2681. if (ret != VK_SUCCESS) {
  2682. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  2683. ff_vk_ret2str(ret));
  2684. err = AVERROR(EINVAL);
  2685. goto fail;
  2686. }
  2687. ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
  2688. hwctx->alloc, &f->sem[i]);
  2689. if (ret != VK_SUCCESS) {
  2690. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  2691. ff_vk_ret2str(ret));
  2692. err = AVERROR_EXTERNAL;
  2693. goto fail;
  2694. }
  2695. f->queue_family[i] = VK_QUEUE_FAMILY_EXTERNAL;
  2696. f->layout[i] = create_info.initialLayout;
  2697. f->access[i] = 0x0;
  2698. f->sem_value[i] = 0;
  2699. }
  2700. for (int i = 0; i < desc->nb_layers; i++) {
  2701. /* Memory requirements */
  2702. VkImageMemoryRequirementsInfo2 req_desc = {
  2703. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  2704. .image = f->img[i],
  2705. };
  2706. VkMemoryDedicatedRequirements ded_req = {
  2707. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  2708. };
  2709. VkMemoryRequirements2 req2 = {
  2710. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  2711. .pNext = &ded_req,
  2712. };
  2713. /* Allocation/importing */
  2714. VkMemoryFdPropertiesKHR fdmp = {
  2715. .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
  2716. };
  2717. /* This assumes that a layer will never be constructed from multiple
  2718. * objects. If that was to happen in the real world, this code would
  2719. * need to import each plane separately.
  2720. */
  2721. VkImportMemoryFdInfoKHR idesc = {
  2722. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
  2723. .fd = dup(desc->objects[desc->layers[i].planes[0].object_index].fd),
  2724. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2725. };
  2726. VkMemoryDedicatedAllocateInfo ded_alloc = {
  2727. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  2728. .pNext = &idesc,
  2729. .image = req_desc.image,
  2730. };
  2731. /* Get object properties */
  2732. ret = vk->GetMemoryFdPropertiesKHR(hwctx->act_dev,
  2733. VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2734. idesc.fd, &fdmp);
  2735. if (ret != VK_SUCCESS) {
  2736. av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
  2737. ff_vk_ret2str(ret));
  2738. err = AVERROR_EXTERNAL;
  2739. close(idesc.fd);
  2740. goto fail;
  2741. }
  2742. vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2);
  2743. /* Only a single bit must be set, not a range, and it must match */
  2744. req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits;
  2745. err = alloc_mem(ctx, &req2.memoryRequirements,
  2746. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  2747. (ded_req.prefersDedicatedAllocation ||
  2748. ded_req.requiresDedicatedAllocation) ?
  2749. &ded_alloc : ded_alloc.pNext,
  2750. &f->flags, &f->mem[i]);
  2751. if (err) {
  2752. close(idesc.fd);
  2753. return err;
  2754. }
  2755. f->size[i] = req2.memoryRequirements.size;
  2756. }
  2757. for (int i = 0; i < desc->nb_layers; i++) {
  2758. const int planes = desc->layers[i].nb_planes;
  2759. for (int j = 0; j < planes; j++) {
  2760. VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  2761. j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
  2762. VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
  2763. plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
  2764. plane_info[bind_counts].pNext = NULL;
  2765. plane_info[bind_counts].planeAspect = aspect;
  2766. bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  2767. bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL;
  2768. bind_info[bind_counts].image = f->img[i];
  2769. bind_info[bind_counts].memory = f->mem[i];
  2770. /* Offset is already signalled via pPlaneLayouts above */
  2771. bind_info[bind_counts].memoryOffset = 0;
  2772. bind_counts++;
  2773. }
  2774. }
  2775. /* Bind the allocated memory to the images */
  2776. ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
  2777. if (ret != VK_SUCCESS) {
  2778. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  2779. ff_vk_ret2str(ret));
  2780. err = AVERROR_EXTERNAL;
  2781. goto fail;
  2782. }
  2783. *frame = f;
  2784. return 0;
  2785. fail:
  2786. vulkan_frame_free(hwfc, f);
  2787. return err;
  2788. }
  2789. static int vulkan_map_from_drm_frame_sync(AVHWFramesContext *hwfc, AVFrame *dst,
  2790. const AVFrame *src, int flags)
  2791. {
  2792. int err;
  2793. VkResult ret;
  2794. AVHWDeviceContext *ctx = hwfc->device_ctx;
  2795. VulkanDevicePriv *p = ctx->hwctx;
  2796. VulkanFramesPriv *fp = hwfc->hwctx;
  2797. AVVulkanDeviceContext *hwctx = &p->p;
  2798. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2799. const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
  2800. #ifdef DMA_BUF_IOCTL_EXPORT_SYNC_FILE
  2801. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM) {
  2802. VkCommandBuffer cmd_buf;
  2803. FFVkExecContext *exec;
  2804. VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
  2805. VkSemaphore drm_sync_sem[AV_DRM_MAX_PLANES] = { 0 };
  2806. int nb_img_bar = 0;
  2807. for (int i = 0; i < desc->nb_objects; i++) {
  2808. VkSemaphoreTypeCreateInfo sem_type_info = {
  2809. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
  2810. .semaphoreType = VK_SEMAPHORE_TYPE_BINARY,
  2811. };
  2812. VkSemaphoreCreateInfo sem_spawn = {
  2813. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  2814. .pNext = &sem_type_info,
  2815. };
  2816. VkImportSemaphoreFdInfoKHR import_info;
  2817. struct dma_buf_export_sync_file implicit_fd_info = {
  2818. .flags = DMA_BUF_SYNC_READ,
  2819. .fd = -1,
  2820. };
  2821. if (ioctl(desc->objects[i].fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
  2822. &implicit_fd_info)) {
  2823. err = AVERROR(errno);
  2824. av_log(hwctx, AV_LOG_ERROR, "Failed to retrieve implicit DRM sync file: %s\n",
  2825. av_err2str(err));
  2826. for (; i >= 0; i--)
  2827. vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
  2828. return err;
  2829. }
  2830. ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
  2831. hwctx->alloc, &drm_sync_sem[i]);
  2832. if (ret != VK_SUCCESS) {
  2833. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  2834. ff_vk_ret2str(ret));
  2835. err = AVERROR_EXTERNAL;
  2836. for (; i >= 0; i--)
  2837. vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
  2838. return err;
  2839. }
  2840. import_info = (VkImportSemaphoreFdInfoKHR) {
  2841. .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
  2842. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
  2843. .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
  2844. .semaphore = drm_sync_sem[i],
  2845. .fd = implicit_fd_info.fd,
  2846. };
  2847. ret = vk->ImportSemaphoreFdKHR(hwctx->act_dev, &import_info);
  2848. if (ret != VK_SUCCESS) {
  2849. av_log(hwctx, AV_LOG_ERROR, "Failed to import semaphore: %s\n",
  2850. ff_vk_ret2str(ret));
  2851. err = AVERROR_EXTERNAL;
  2852. for (; i >= 0; i--)
  2853. vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc);
  2854. return err;
  2855. }
  2856. }
  2857. exec = ff_vk_exec_get(&p->vkctx, &fp->compute_exec);
  2858. cmd_buf = exec->buf;
  2859. ff_vk_exec_start(&p->vkctx, exec);
  2860. /* Ownership of semaphores is passed */
  2861. err = ff_vk_exec_add_dep_bool_sem(&p->vkctx, exec,
  2862. drm_sync_sem, desc->nb_objects,
  2863. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, 1);
  2864. if (err < 0)
  2865. return err;
  2866. err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, dst,
  2867. VK_PIPELINE_STAGE_2_NONE,
  2868. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
  2869. if (err < 0)
  2870. return err;
  2871. ff_vk_frame_barrier(&p->vkctx, exec, dst, img_bar, &nb_img_bar,
  2872. VK_PIPELINE_STAGE_2_NONE,
  2873. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  2874. ((flags & AV_HWFRAME_MAP_READ) ?
  2875. VK_ACCESS_2_SHADER_SAMPLED_READ_BIT : 0x0) |
  2876. ((flags & AV_HWFRAME_MAP_WRITE) ?
  2877. VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT : 0x0),
  2878. VK_IMAGE_LAYOUT_GENERAL,
  2879. VK_QUEUE_FAMILY_IGNORED);
  2880. vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
  2881. .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
  2882. .pImageMemoryBarriers = img_bar,
  2883. .imageMemoryBarrierCount = nb_img_bar,
  2884. });
  2885. err = ff_vk_exec_submit(&p->vkctx, exec);
  2886. if (err < 0)
  2887. return err;
  2888. } else
  2889. #endif
  2890. {
  2891. AVVkFrame *f = (AVVkFrame *)dst->data[0];
  2892. av_log(hwctx, AV_LOG_WARNING, "No support for synchronization when importing DMA-BUFs, "
  2893. "image may be corrupted.\n");
  2894. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_IMPORT);
  2895. if (err)
  2896. return err;
  2897. }
  2898. return 0;
  2899. }
  2900. static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  2901. const AVFrame *src, int flags)
  2902. {
  2903. int err = 0;
  2904. AVVkFrame *f;
  2905. if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src, flags)))
  2906. return err;
  2907. /* The unmapping function will free this */
  2908. dst->data[0] = (uint8_t *)f;
  2909. dst->width = src->width;
  2910. dst->height = src->height;
  2911. err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
  2912. &vulkan_unmap_from_drm, f);
  2913. if (err < 0)
  2914. goto fail;
  2915. err = vulkan_map_from_drm_frame_sync(hwfc, dst, src, flags);
  2916. if (err < 0)
  2917. return err;
  2918. av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
  2919. return 0;
  2920. fail:
  2921. vulkan_frame_free(hwfc->device_ctx->hwctx, f);
  2922. dst->data[0] = NULL;
  2923. return err;
  2924. }
  2925. #if CONFIG_VAAPI
  2926. static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
  2927. AVFrame *dst, const AVFrame *src,
  2928. int flags)
  2929. {
  2930. int err;
  2931. AVFrame *tmp = av_frame_alloc();
  2932. AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  2933. AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
  2934. VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
  2935. if (!tmp)
  2936. return AVERROR(ENOMEM);
  2937. /* We have to sync since like the previous comment said, no semaphores */
  2938. vaSyncSurface(vaapi_ctx->display, surface_id);
  2939. tmp->format = AV_PIX_FMT_DRM_PRIME;
  2940. err = av_hwframe_map(tmp, src, flags);
  2941. if (err < 0)
  2942. goto fail;
  2943. err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
  2944. if (err < 0)
  2945. goto fail;
  2946. err = ff_hwframe_map_replace(dst, src);
  2947. fail:
  2948. av_frame_free(&tmp);
  2949. return err;
  2950. }
  2951. #endif
  2952. #endif
  2953. #if CONFIG_CUDA
  2954. static int export_mem_to_cuda(AVHWDeviceContext *ctx,
  2955. AVHWDeviceContext *cuda_cu, CudaFunctions *cu,
  2956. AVVkFrameInternal *dst_int, int idx,
  2957. VkDeviceMemory mem, size_t size)
  2958. {
  2959. VkResult ret;
  2960. VulkanDevicePriv *p = ctx->hwctx;
  2961. AVVulkanDeviceContext *hwctx = &p->p;
  2962. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  2963. #ifdef _WIN32
  2964. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  2965. .type = IsWindows8OrGreater()
  2966. ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
  2967. : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT,
  2968. .size = size,
  2969. };
  2970. VkMemoryGetWin32HandleInfoKHR export_info = {
  2971. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
  2972. .memory = mem,
  2973. .handleType = IsWindows8OrGreater()
  2974. ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
  2975. : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
  2976. };
  2977. ret = vk->GetMemoryWin32HandleKHR(hwctx->act_dev, &export_info,
  2978. &ext_desc.handle.win32.handle);
  2979. if (ret != VK_SUCCESS) {
  2980. av_log(ctx, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n",
  2981. ff_vk_ret2str(ret));
  2982. return AVERROR_EXTERNAL;
  2983. }
  2984. dst_int->ext_mem_handle[idx] = ext_desc.handle.win32.handle;
  2985. #else
  2986. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  2987. .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
  2988. .size = size,
  2989. };
  2990. VkMemoryGetFdInfoKHR export_info = {
  2991. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  2992. .memory = mem,
  2993. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
  2994. };
  2995. ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
  2996. &ext_desc.handle.fd);
  2997. if (ret != VK_SUCCESS) {
  2998. av_log(ctx, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n",
  2999. ff_vk_ret2str(ret));
  3000. return AVERROR_EXTERNAL;
  3001. }
  3002. #endif
  3003. ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[idx], &ext_desc));
  3004. if (ret < 0) {
  3005. #ifndef _WIN32
  3006. close(ext_desc.handle.fd);
  3007. #endif
  3008. return AVERROR_EXTERNAL;
  3009. }
  3010. return 0;
  3011. }
  3012. static int export_sem_to_cuda(AVHWDeviceContext *ctx,
  3013. AVHWDeviceContext *cuda_cu, CudaFunctions *cu,
  3014. AVVkFrameInternal *dst_int, int idx,
  3015. VkSemaphore sem)
  3016. {
  3017. VkResult ret;
  3018. VulkanDevicePriv *p = ctx->hwctx;
  3019. AVVulkanDeviceContext *hwctx = &p->p;
  3020. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3021. #ifdef _WIN32
  3022. VkSemaphoreGetWin32HandleInfoKHR sem_export = {
  3023. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
  3024. .semaphore = sem,
  3025. .handleType = IsWindows8OrGreater()
  3026. ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
  3027. : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
  3028. };
  3029. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  3030. .type = 10 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 */,
  3031. };
  3032. #else
  3033. VkSemaphoreGetFdInfoKHR sem_export = {
  3034. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
  3035. .semaphore = sem,
  3036. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  3037. };
  3038. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  3039. .type = 9 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD */,
  3040. };
  3041. #endif
  3042. #ifdef _WIN32
  3043. ret = vk->GetSemaphoreWin32HandleKHR(hwctx->act_dev, &sem_export,
  3044. &ext_sem_desc.handle.win32.handle);
  3045. #else
  3046. ret = vk->GetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
  3047. &ext_sem_desc.handle.fd);
  3048. #endif
  3049. if (ret != VK_SUCCESS) {
  3050. av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
  3051. ff_vk_ret2str(ret));
  3052. return AVERROR_EXTERNAL;
  3053. }
  3054. #ifdef _WIN32
  3055. dst_int->ext_sem_handle[idx] = ext_sem_desc.handle.win32.handle;
  3056. #endif
  3057. ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[idx],
  3058. &ext_sem_desc));
  3059. if (ret < 0) {
  3060. #ifndef _WIN32
  3061. close(ext_sem_desc.handle.fd);
  3062. #endif
  3063. return AVERROR_EXTERNAL;
  3064. }
  3065. return 0;
  3066. }
  3067. static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
  3068. AVBufferRef *cuda_hwfc,
  3069. const AVFrame *frame)
  3070. {
  3071. int err;
  3072. VkResult ret;
  3073. AVVkFrame *dst_f;
  3074. AVVkFrameInternal *dst_int;
  3075. AVHWDeviceContext *ctx = hwfc->device_ctx;
  3076. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  3077. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  3078. VulkanDevicePriv *p = ctx->hwctx;
  3079. AVVulkanDeviceContext *hwctx = &p->p;
  3080. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3081. int nb_images;
  3082. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
  3083. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  3084. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  3085. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  3086. CudaFunctions *cu = cu_internal->cuda_dl;
  3087. CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
  3088. CU_AD_FORMAT_UNSIGNED_INT8;
  3089. dst_f = (AVVkFrame *)frame->data[0];
  3090. dst_int = dst_f->internal;
  3091. if (!dst_int->cuda_fc_ref) {
  3092. size_t offsets[3] = { 0 };
  3093. dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
  3094. if (!dst_int->cuda_fc_ref)
  3095. return AVERROR(ENOMEM);
  3096. nb_images = ff_vk_count_images(dst_f);
  3097. for (int i = 0; i < nb_images; i++) {
  3098. err = export_mem_to_cuda(ctx, cuda_cu, cu, dst_int, i,
  3099. dst_f->mem[i], dst_f->size[i]);
  3100. if (err < 0)
  3101. goto fail;
  3102. err = export_sem_to_cuda(ctx, cuda_cu, cu, dst_int, i,
  3103. dst_f->sem[i]);
  3104. if (err < 0)
  3105. goto fail;
  3106. }
  3107. if (nb_images != planes) {
  3108. for (int i = 0; i < planes; i++) {
  3109. VkImageSubresource subres = {
  3110. .aspectMask = i == 2 ? VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT :
  3111. i == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
  3112. VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT
  3113. };
  3114. VkSubresourceLayout layout = { 0 };
  3115. vk->GetImageSubresourceLayout(hwctx->act_dev, dst_f->img[FFMIN(i, nb_images - 1)],
  3116. &subres, &layout);
  3117. offsets[i] = layout.offset;
  3118. }
  3119. }
  3120. for (int i = 0; i < planes; i++) {
  3121. CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
  3122. .offset = offsets[i],
  3123. .arrayDesc = {
  3124. .Depth = 0,
  3125. .Format = cufmt,
  3126. .NumChannels = 1 + ((planes == 2) && i),
  3127. .Flags = 0,
  3128. },
  3129. .numLevels = 1,
  3130. };
  3131. int p_w, p_h;
  3132. get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  3133. tex_desc.arrayDesc.Width = p_w;
  3134. tex_desc.arrayDesc.Height = p_h;
  3135. ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
  3136. dst_int->ext_mem[FFMIN(i, nb_images - 1)],
  3137. &tex_desc));
  3138. if (ret < 0) {
  3139. err = AVERROR_EXTERNAL;
  3140. goto fail;
  3141. }
  3142. ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
  3143. dst_int->cu_mma[i], 0));
  3144. if (ret < 0) {
  3145. err = AVERROR_EXTERNAL;
  3146. goto fail;
  3147. }
  3148. }
  3149. }
  3150. return 0;
  3151. fail:
  3152. vulkan_free_internal(dst_f);
  3153. return err;
  3154. }
  3155. static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
  3156. AVFrame *dst, const AVFrame *src)
  3157. {
  3158. int err;
  3159. CUcontext dummy;
  3160. AVVkFrame *dst_f;
  3161. AVVkFrameInternal *dst_int;
  3162. VulkanFramesPriv *fp = hwfc->hwctx;
  3163. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  3164. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  3165. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  3166. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  3167. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  3168. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  3169. CudaFunctions *cu = cu_internal->cuda_dl;
  3170. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
  3171. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
  3172. dst_f = (AVVkFrame *)dst->data[0];
  3173. err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT);
  3174. if (err < 0)
  3175. return err;
  3176. err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  3177. if (err < 0)
  3178. return err;
  3179. err = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
  3180. if (err < 0) {
  3181. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3182. return err;
  3183. }
  3184. dst_int = dst_f->internal;
  3185. for (int i = 0; i < planes; i++) {
  3186. s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0;
  3187. s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
  3188. }
  3189. err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
  3190. planes, cuda_dev->stream));
  3191. if (err < 0)
  3192. goto fail;
  3193. for (int i = 0; i < planes; i++) {
  3194. CUDA_MEMCPY2D cpy = {
  3195. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  3196. .srcDevice = (CUdeviceptr)src->data[i],
  3197. .srcPitch = src->linesize[i],
  3198. .srcY = 0,
  3199. .dstMemoryType = CU_MEMORYTYPE_ARRAY,
  3200. .dstArray = dst_int->cu_array[i],
  3201. };
  3202. int p_w, p_h;
  3203. get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  3204. cpy.WidthInBytes = p_w * desc->comp[i].step;
  3205. cpy.Height = p_h;
  3206. err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  3207. if (err < 0)
  3208. goto fail;
  3209. }
  3210. err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
  3211. planes, cuda_dev->stream));
  3212. if (err < 0)
  3213. goto fail;
  3214. for (int i = 0; i < planes; i++)
  3215. dst_f->sem_value[i]++;
  3216. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3217. av_log(hwfc, AV_LOG_VERBOSE, "Transferred CUDA image to Vulkan!\n");
  3218. return err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT);
  3219. fail:
  3220. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3221. vulkan_free_internal(dst_f);
  3222. av_buffer_unref(&dst->buf[0]);
  3223. return err;
  3224. }
  3225. #endif
  3226. static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
  3227. const AVFrame *src, int flags)
  3228. {
  3229. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3230. switch (src->format) {
  3231. #if CONFIG_LIBDRM
  3232. #if CONFIG_VAAPI
  3233. case AV_PIX_FMT_VAAPI:
  3234. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3235. return vulkan_map_from_vaapi(hwfc, dst, src, flags);
  3236. else
  3237. return AVERROR(ENOSYS);
  3238. #endif
  3239. case AV_PIX_FMT_DRM_PRIME:
  3240. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3241. return vulkan_map_from_drm(hwfc, dst, src, flags);
  3242. else
  3243. return AVERROR(ENOSYS);
  3244. #endif
  3245. default:
  3246. return AVERROR(ENOSYS);
  3247. }
  3248. }
  3249. #if CONFIG_LIBDRM
  3250. typedef struct VulkanDRMMapping {
  3251. AVDRMFrameDescriptor drm_desc;
  3252. AVVkFrame *source;
  3253. } VulkanDRMMapping;
  3254. static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  3255. {
  3256. AVDRMFrameDescriptor *drm_desc = hwmap->priv;
  3257. for (int i = 0; i < drm_desc->nb_objects; i++)
  3258. close(drm_desc->objects[i].fd);
  3259. av_free(drm_desc);
  3260. }
  3261. static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
  3262. {
  3263. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  3264. if (vulkan_drm_format_map[i].vk_format == vkfmt)
  3265. return vulkan_drm_format_map[i].drm_fourcc;
  3266. return DRM_FORMAT_INVALID;
  3267. }
  3268. static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  3269. const AVFrame *src, int flags)
  3270. {
  3271. int err = 0;
  3272. VkResult ret;
  3273. AVVkFrame *f = (AVVkFrame *)src->data[0];
  3274. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3275. AVVulkanDeviceContext *hwctx = &p->p;
  3276. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3277. VulkanFramesPriv *fp = hwfc->hwctx;
  3278. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  3279. VkImageDrmFormatModifierPropertiesEXT drm_mod = {
  3280. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
  3281. };
  3282. VkSemaphoreWaitInfo wait_info = {
  3283. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
  3284. .flags = 0x0,
  3285. .semaphoreCount = planes,
  3286. };
  3287. AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
  3288. if (!drm_desc)
  3289. return AVERROR(ENOMEM);
  3290. err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_EXPORT);
  3291. if (err < 0)
  3292. goto end;
  3293. /* Wait for the operation to finish so we can cleanly export it. */
  3294. wait_info.pSemaphores = f->sem;
  3295. wait_info.pValues = f->sem_value;
  3296. vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX);
  3297. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
  3298. if (err < 0)
  3299. goto end;
  3300. ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
  3301. &drm_mod);
  3302. if (ret != VK_SUCCESS) {
  3303. av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
  3304. err = AVERROR_EXTERNAL;
  3305. goto end;
  3306. }
  3307. for (int i = 0; (i < planes) && (f->mem[i]); i++) {
  3308. VkMemoryGetFdInfoKHR export_info = {
  3309. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  3310. .memory = f->mem[i],
  3311. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  3312. };
  3313. ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
  3314. &drm_desc->objects[i].fd);
  3315. if (ret != VK_SUCCESS) {
  3316. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  3317. err = AVERROR_EXTERNAL;
  3318. goto end;
  3319. }
  3320. drm_desc->nb_objects++;
  3321. drm_desc->objects[i].size = f->size[i];
  3322. drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
  3323. }
  3324. drm_desc->nb_layers = planes;
  3325. for (int i = 0; i < drm_desc->nb_layers; i++) {
  3326. VkSubresourceLayout layout;
  3327. VkImageSubresource sub = {
  3328. .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
  3329. };
  3330. VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
  3331. drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
  3332. drm_desc->layers[i].nb_planes = 1;
  3333. if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
  3334. av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
  3335. err = AVERROR_PATCHWELCOME;
  3336. goto end;
  3337. }
  3338. drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
  3339. if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
  3340. continue;
  3341. vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  3342. drm_desc->layers[i].planes[0].offset = layout.offset;
  3343. drm_desc->layers[i].planes[0].pitch = layout.rowPitch;
  3344. }
  3345. dst->width = src->width;
  3346. dst->height = src->height;
  3347. dst->data[0] = (uint8_t *)drm_desc;
  3348. av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
  3349. return 0;
  3350. end:
  3351. av_free(drm_desc);
  3352. return err;
  3353. }
  3354. #if CONFIG_VAAPI
  3355. static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
  3356. const AVFrame *src, int flags)
  3357. {
  3358. int err;
  3359. AVFrame *tmp = av_frame_alloc();
  3360. if (!tmp)
  3361. return AVERROR(ENOMEM);
  3362. tmp->format = AV_PIX_FMT_DRM_PRIME;
  3363. err = vulkan_map_to_drm(hwfc, tmp, src, flags);
  3364. if (err < 0)
  3365. goto fail;
  3366. err = av_hwframe_map(dst, tmp, flags);
  3367. if (err < 0)
  3368. goto fail;
  3369. err = ff_hwframe_map_replace(dst, src);
  3370. fail:
  3371. av_frame_free(&tmp);
  3372. return err;
  3373. }
  3374. #endif
  3375. #endif
  3376. static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
  3377. const AVFrame *src, int flags)
  3378. {
  3379. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3380. switch (dst->format) {
  3381. #if CONFIG_LIBDRM
  3382. case AV_PIX_FMT_DRM_PRIME:
  3383. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3384. return vulkan_map_to_drm(hwfc, dst, src, flags);
  3385. else
  3386. return AVERROR(ENOSYS);
  3387. #if CONFIG_VAAPI
  3388. case AV_PIX_FMT_VAAPI:
  3389. if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
  3390. return vulkan_map_to_vaapi(hwfc, dst, src, flags);
  3391. else
  3392. return AVERROR(ENOSYS);
  3393. #endif
  3394. #endif
  3395. default:
  3396. break;
  3397. }
  3398. return AVERROR(ENOSYS);
  3399. }
  3400. static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf,
  3401. AVFrame *swf, VkBufferImageCopy *region,
  3402. int planes, int upload)
  3403. {
  3404. VkResult ret;
  3405. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3406. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3407. AVVulkanDeviceContext *hwctx = &p->p;
  3408. FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data;
  3409. const VkMappedMemoryRange flush_info = {
  3410. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  3411. .memory = vkbuf->mem,
  3412. .size = VK_WHOLE_SIZE,
  3413. };
  3414. if (!upload && !(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  3415. ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1,
  3416. &flush_info);
  3417. if (ret != VK_SUCCESS) {
  3418. av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n",
  3419. ff_vk_ret2str(ret));
  3420. return AVERROR_EXTERNAL;
  3421. }
  3422. }
  3423. if (upload) {
  3424. for (int i = 0; i < planes; i++)
  3425. av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset,
  3426. region[i].bufferRowLength,
  3427. swf->data[i],
  3428. swf->linesize[i],
  3429. swf->linesize[i],
  3430. region[i].imageExtent.height);
  3431. } else {
  3432. for (int i = 0; i < planes; i++)
  3433. av_image_copy_plane(swf->data[i],
  3434. swf->linesize[i],
  3435. vkbuf->mapped_mem + region[i].bufferOffset,
  3436. region[i].bufferRowLength,
  3437. swf->linesize[i],
  3438. region[i].imageExtent.height);
  3439. }
  3440. if (upload && !(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  3441. ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1,
  3442. &flush_info);
  3443. if (ret != VK_SUCCESS) {
  3444. av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n",
  3445. ff_vk_ret2str(ret));
  3446. return AVERROR_EXTERNAL;
  3447. }
  3448. }
  3449. return 0;
  3450. }
  3451. static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst,
  3452. AVFrame *swf, VkBufferImageCopy *region, int upload)
  3453. {
  3454. int err;
  3455. uint32_t p_w, p_h;
  3456. VulkanFramesPriv *fp = hwfc->hwctx;
  3457. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3458. const int planes = av_pix_fmt_count_planes(swf->format);
  3459. VkBufferUsageFlags buf_usage = upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT :
  3460. VK_BUFFER_USAGE_TRANSFER_DST_BIT;
  3461. size_t buf_offset = 0;
  3462. for (int i = 0; i < planes; i++) {
  3463. get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
  3464. region[i] = (VkBufferImageCopy) {
  3465. .bufferOffset = buf_offset,
  3466. .bufferRowLength = FFALIGN(swf->linesize[i],
  3467. p->props.properties.limits.optimalBufferCopyRowPitchAlignment),
  3468. .bufferImageHeight = p_h,
  3469. .imageSubresource.layerCount = 1,
  3470. .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
  3471. /* Rest of the fields adjusted/filled in later */
  3472. };
  3473. buf_offset += FFALIGN(p_h*region[i].bufferRowLength,
  3474. p->props.properties.limits.optimalBufferCopyOffsetAlignment);
  3475. }
  3476. err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst, buf_usage,
  3477. NULL, buf_offset,
  3478. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
  3479. VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
  3480. if (err < 0)
  3481. return err;
  3482. return 0;
  3483. }
  3484. static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs,
  3485. AVFrame *swf, VkBufferImageCopy *region, int upload)
  3486. {
  3487. int err;
  3488. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3489. int nb_src_bufs;
  3490. const int planes = av_pix_fmt_count_planes(swf->format);
  3491. VkBufferUsageFlags buf_usage = upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT :
  3492. VK_BUFFER_USAGE_TRANSFER_DST_BIT;
  3493. /* We can't host map images with negative strides */
  3494. for (int i = 0; i < planes; i++)
  3495. if (swf->linesize[i] < 0)
  3496. return AVERROR(EINVAL);
  3497. /* Count the number of buffers in the software frame */
  3498. nb_src_bufs = 0;
  3499. while (swf->buf[nb_src_bufs])
  3500. nb_src_bufs++;
  3501. /* Single buffer contains all planes */
  3502. if (nb_src_bufs == 1) {
  3503. err = ff_vk_host_map_buffer(&p->vkctx, &dst[0],
  3504. swf->data[0], swf->buf[0],
  3505. buf_usage);
  3506. if (err < 0)
  3507. return err;
  3508. (*nb_bufs)++;
  3509. for (int i = 0; i < planes; i++)
  3510. region[i].bufferOffset = ((FFVkBuffer *)dst[0]->data)->virtual_offset +
  3511. swf->data[i] - swf->data[0];
  3512. } else if (nb_src_bufs == planes) { /* One buffer per plane */
  3513. for (int i = 0; i < planes; i++) {
  3514. err = ff_vk_host_map_buffer(&p->vkctx, &dst[i],
  3515. swf->data[i], swf->buf[i],
  3516. buf_usage);
  3517. if (err < 0)
  3518. goto fail;
  3519. (*nb_bufs)++;
  3520. region[i].bufferOffset = ((FFVkBuffer *)dst[i]->data)->virtual_offset;
  3521. }
  3522. } else {
  3523. /* Weird layout (3 planes, 2 buffers), patch welcome, fallback to copy */
  3524. return AVERROR_PATCHWELCOME;
  3525. }
  3526. return 0;
  3527. fail:
  3528. for (int i = 0; i < (*nb_bufs); i++)
  3529. av_buffer_unref(&dst[i]);
  3530. return err;
  3531. }
  3532. static int vulkan_transfer_frame(AVHWFramesContext *hwfc,
  3533. AVFrame *swf, AVFrame *hwf,
  3534. int upload)
  3535. {
  3536. int err;
  3537. VulkanFramesPriv *fp = hwfc->hwctx;
  3538. VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3539. FFVulkanFunctions *vk = &p->vkctx.vkfn;
  3540. int host_mapped = 0;
  3541. AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0];
  3542. VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane
  3543. const int planes = av_pix_fmt_count_planes(swf->format);
  3544. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format);
  3545. const int nb_images = ff_vk_count_images(hwf_vk);
  3546. VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
  3547. int nb_img_bar = 0;
  3548. AVBufferRef *bufs[AV_NUM_DATA_POINTERS];
  3549. int nb_bufs = 0;
  3550. VkCommandBuffer cmd_buf;
  3551. FFVkExecContext *exec;
  3552. /* Sanity checking */
  3553. if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
  3554. av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
  3555. return AVERROR(EINVAL);
  3556. }
  3557. if (swf->width > hwfc->width || swf->height > hwfc->height)
  3558. return AVERROR(EINVAL);
  3559. for (int i = 0; i < av_pix_fmt_count_planes(swf->format); i++) {
  3560. uint32_t p_w, p_h;
  3561. get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
  3562. /* Buffer region for this plane */
  3563. region[i] = (VkBufferImageCopy) {
  3564. .bufferOffset = 0,
  3565. .bufferRowLength = swf->linesize[i],
  3566. .bufferImageHeight = p_h,
  3567. .imageSubresource.layerCount = 1,
  3568. .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
  3569. /* Rest of the fields adjusted/filled in later */
  3570. };
  3571. }
  3572. /* Setup buffers first */
  3573. if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
  3574. err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload);
  3575. if (err >= 0)
  3576. host_mapped = 1;
  3577. }
  3578. if (!host_mapped) {
  3579. err = get_plane_buf(hwfc, &bufs[0], swf, region, upload);
  3580. if (err < 0)
  3581. goto end;
  3582. nb_bufs = 1;
  3583. if (upload) {
  3584. err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1);
  3585. if (err < 0)
  3586. goto end;
  3587. }
  3588. }
  3589. exec = ff_vk_exec_get(&p->vkctx, &fp->upload_exec);
  3590. cmd_buf = exec->buf;
  3591. ff_vk_exec_start(&p->vkctx, exec);
  3592. /* Prep destination Vulkan frame */
  3593. err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf,
  3594. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  3595. VK_PIPELINE_STAGE_2_TRANSFER_BIT);
  3596. if (err < 0)
  3597. goto end;
  3598. /* No need to declare buf deps for synchronous transfers (downloads) */
  3599. if (upload) {
  3600. /* Add the software frame backing the buffers if we're host mapping */
  3601. if (host_mapped) {
  3602. err = ff_vk_exec_add_dep_sw_frame(&p->vkctx, exec, swf);
  3603. if (err < 0) {
  3604. ff_vk_exec_discard_deps(&p->vkctx, exec);
  3605. goto end;
  3606. }
  3607. }
  3608. /* Add the buffers as a dependency */
  3609. err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1);
  3610. if (err < 0) {
  3611. ff_vk_exec_discard_deps(&p->vkctx, exec);
  3612. goto end;
  3613. }
  3614. }
  3615. ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar,
  3616. VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
  3617. VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
  3618. upload ? VK_ACCESS_TRANSFER_WRITE_BIT :
  3619. VK_ACCESS_TRANSFER_READ_BIT,
  3620. upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL :
  3621. VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
  3622. VK_QUEUE_FAMILY_IGNORED);
  3623. vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
  3624. .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
  3625. .pImageMemoryBarriers = img_bar,
  3626. .imageMemoryBarrierCount = nb_img_bar,
  3627. });
  3628. for (int i = 0; i < planes; i++) {
  3629. int buf_idx = FFMIN(i, (nb_bufs - 1));
  3630. int img_idx = FFMIN(i, (nb_images - 1));
  3631. FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data;
  3632. uint32_t orig_stride = region[i].bufferRowLength;
  3633. region[i].bufferRowLength /= desc->comp[i].step;
  3634. region[i].imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i);
  3635. if (upload)
  3636. vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf,
  3637. hwf_vk->img[img_idx],
  3638. img_bar[img_idx].newLayout,
  3639. 1, &region[i]);
  3640. else
  3641. vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx],
  3642. img_bar[img_idx].newLayout,
  3643. vkbuf->buf,
  3644. 1, &region[i]);
  3645. region[i].bufferRowLength = orig_stride;
  3646. }
  3647. err = ff_vk_exec_submit(&p->vkctx, exec);
  3648. if (err < 0) {
  3649. ff_vk_exec_discard_deps(&p->vkctx, exec);
  3650. } else if (!upload) {
  3651. ff_vk_exec_wait(&p->vkctx, exec);
  3652. if (!host_mapped)
  3653. err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0);
  3654. }
  3655. end:
  3656. for (int i = 0; i < nb_bufs; i++)
  3657. av_buffer_unref(&bufs[i]);
  3658. return err;
  3659. }
  3660. static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
  3661. const AVFrame *src)
  3662. {
  3663. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3664. switch (src->format) {
  3665. #if CONFIG_CUDA
  3666. case AV_PIX_FMT_CUDA:
  3667. #ifdef _WIN32
  3668. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
  3669. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
  3670. #else
  3671. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
  3672. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
  3673. #endif
  3674. return vulkan_transfer_data_from_cuda(hwfc, dst, src);
  3675. #endif
  3676. default:
  3677. if (src->hw_frames_ctx)
  3678. return AVERROR(ENOSYS);
  3679. else
  3680. return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1);
  3681. }
  3682. }
  3683. #if CONFIG_CUDA
  3684. static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
  3685. const AVFrame *src)
  3686. {
  3687. int err;
  3688. CUcontext dummy;
  3689. AVVkFrame *dst_f;
  3690. AVVkFrameInternal *dst_int;
  3691. VulkanFramesPriv *fp = hwfc->hwctx;
  3692. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  3693. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  3694. int nb_images;
  3695. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
  3696. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  3697. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  3698. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  3699. CudaFunctions *cu = cu_internal->cuda_dl;
  3700. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
  3701. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
  3702. dst_f = (AVVkFrame *)src->data[0];
  3703. nb_images = ff_vk_count_images(dst_f);
  3704. err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT);
  3705. if (err < 0)
  3706. return err;
  3707. err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  3708. if (err < 0)
  3709. return err;
  3710. err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
  3711. if (err < 0) {
  3712. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3713. return err;
  3714. }
  3715. dst_int = dst_f->internal;
  3716. for (int i = 0; i < planes; i++) {
  3717. s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0;
  3718. s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
  3719. }
  3720. err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
  3721. nb_images, cuda_dev->stream));
  3722. if (err < 0)
  3723. goto fail;
  3724. for (int i = 0; i < planes; i++) {
  3725. CUDA_MEMCPY2D cpy = {
  3726. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  3727. .dstDevice = (CUdeviceptr)dst->data[i],
  3728. .dstPitch = dst->linesize[i],
  3729. .dstY = 0,
  3730. .srcMemoryType = CU_MEMORYTYPE_ARRAY,
  3731. .srcArray = dst_int->cu_array[i],
  3732. };
  3733. int w, h;
  3734. get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  3735. cpy.WidthInBytes = w * desc->comp[i].step;
  3736. cpy.Height = h;
  3737. err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  3738. if (err < 0)
  3739. goto fail;
  3740. }
  3741. err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
  3742. nb_images, cuda_dev->stream));
  3743. if (err < 0)
  3744. goto fail;
  3745. for (int i = 0; i < planes; i++)
  3746. dst_f->sem_value[i]++;
  3747. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3748. av_log(hwfc, AV_LOG_VERBOSE, "Transferred Vulkan image to CUDA!\n");
  3749. return prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT);
  3750. fail:
  3751. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  3752. vulkan_free_internal(dst_f);
  3753. av_buffer_unref(&dst->buf[0]);
  3754. return err;
  3755. }
  3756. #endif
  3757. static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
  3758. const AVFrame *src)
  3759. {
  3760. av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
  3761. switch (dst->format) {
  3762. #if CONFIG_CUDA
  3763. case AV_PIX_FMT_CUDA:
  3764. #ifdef _WIN32
  3765. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
  3766. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
  3767. #else
  3768. if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
  3769. (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
  3770. #endif
  3771. return vulkan_transfer_data_to_cuda(hwfc, dst, src);
  3772. #endif
  3773. default:
  3774. if (dst->hw_frames_ctx)
  3775. return AVERROR(ENOSYS);
  3776. else
  3777. return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0);
  3778. }
  3779. }
  3780. static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
  3781. AVHWFramesContext *src_fc, int flags)
  3782. {
  3783. return vulkan_frames_init(dst_fc);
  3784. }
  3785. AVVkFrame *av_vk_frame_alloc(void)
  3786. {
  3787. int err;
  3788. AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
  3789. if (!f)
  3790. return NULL;
  3791. f->internal = av_mallocz(sizeof(*f->internal));
  3792. if (!f->internal) {
  3793. av_free(f);
  3794. return NULL;
  3795. }
  3796. err = pthread_mutex_init(&f->internal->update_mutex, NULL);
  3797. if (err != 0) {
  3798. av_free(f->internal);
  3799. av_free(f);
  3800. return NULL;
  3801. }
  3802. return f;
  3803. }
  3804. const HWContextType ff_hwcontext_type_vulkan = {
  3805. .type = AV_HWDEVICE_TYPE_VULKAN,
  3806. .name = "Vulkan",
  3807. .device_hwctx_size = sizeof(VulkanDevicePriv),
  3808. .frames_hwctx_size = sizeof(VulkanFramesPriv),
  3809. .device_init = &vulkan_device_init,
  3810. .device_uninit = &vulkan_device_uninit,
  3811. .device_create = &vulkan_device_create,
  3812. .device_derive = &vulkan_device_derive,
  3813. .frames_get_constraints = &vulkan_frames_get_constraints,
  3814. .frames_init = vulkan_frames_init,
  3815. .frames_get_buffer = vulkan_get_buffer,
  3816. .frames_uninit = vulkan_frames_uninit,
  3817. .transfer_get_formats = vulkan_transfer_get_formats,
  3818. .transfer_data_to = vulkan_transfer_data_to,
  3819. .transfer_data_from = vulkan_transfer_data_from,
  3820. .map_to = vulkan_map_to,
  3821. .map_from = vulkan_map_from,
  3822. .frames_derive_to = &vulkan_frames_derive_to,
  3823. .pix_fmts = (const enum AVPixelFormat []) {
  3824. AV_PIX_FMT_VULKAN,
  3825. AV_PIX_FMT_NONE
  3826. },
  3827. };