libaomenc.c 67 KB


  1. /*
  2. * Copyright (c) 2010, Google, Inc.
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * AV1 encoder support via libaom
  23. */
  24. #include <limits.h>
  25. #define AOM_DISABLE_CTRL_TYPECHECKS 1
  26. #include <aom/aom_encoder.h>
  27. #include <aom/aomcx.h>
  28. #include "libavutil/avassert.h"
  29. #include "libavutil/base64.h"
  30. #include "libavutil/common.h"
  31. #include "libavutil/cpu.h"
  32. #include "libavutil/hdr_dynamic_metadata.h"
  33. #include "libavutil/imgutils.h"
  34. #include "libavutil/mathematics.h"
  35. #include "libavutil/mem.h"
  36. #include "libavutil/opt.h"
  37. #include "libavutil/pixdesc.h"
  38. #include "av1.h"
  39. #include "avcodec.h"
  40. #include "bsf.h"
  41. #include "bytestream.h"
  42. #include "codec_internal.h"
  43. #include "dovi_rpu.h"
  44. #include "encode.h"
  45. #include "internal.h"
  46. #include "itut35.h"
  47. #include "libaom.h"
  48. #include "packet_internal.h"
  49. #include "profiles.h"
  50. /*
  51. * Portion of struct aom_codec_cx_pkt from aom_encoder.h.
  52. * One encoded frame returned from the library.
  53. */
  54. struct FrameListData {
  55. void *buf; /**< compressed data buffer */
  56. size_t sz; /**< length of compressed data */
  57. int64_t pts; /**< time stamp to show frame
  58. (in timebase units) */
  59. unsigned long duration; /**< duration to show frame
  60. (in timebase units) */
  61. uint32_t flags; /**< flags for this frame */
  62. uint64_t sse[4];
  63. int have_sse; /**< true if we have pending sse[] */
  64. uint64_t frame_number;
  65. struct FrameListData *next;
  66. };
  67. typedef struct AOMEncoderContext {
  68. AVClass *class;
  69. AVBSFContext *bsf;
  70. DOVIContext dovi;
  71. struct aom_codec_ctx encoder;
  72. struct aom_image rawimg;
  73. struct aom_fixed_buf twopass_stats;
  74. unsigned twopass_stats_size;
  75. struct FrameListData *coded_frame_list;
  76. int cpu_used;
  77. int auto_alt_ref;
  78. int arnr_max_frames;
  79. int arnr_strength;
  80. int aq_mode;
  81. int lag_in_frames;
  82. int error_resilient;
  83. int crf;
  84. int static_thresh;
  85. int drop_threshold;
  86. int denoise_noise_level;
  87. int denoise_block_size;
  88. uint64_t sse[4];
  89. int have_sse; /**< true if we have pending sse[] */
  90. uint64_t frame_number;
  91. int rc_undershoot_pct;
  92. int rc_overshoot_pct;
  93. int minsection_pct;
  94. int maxsection_pct;
  95. int frame_parallel;
  96. int tile_cols, tile_rows;
  97. int tile_cols_log2, tile_rows_log2;
  98. aom_superblock_size_t superblock_size;
  99. int uniform_tiles;
  100. int row_mt;
  101. int enable_cdef;
  102. int enable_global_motion;
  103. int enable_intrabc;
  104. int enable_restoration;
  105. int usage;
  106. int tune;
  107. int still_picture;
  108. int enable_rect_partitions;
  109. int enable_1to4_partitions;
  110. int enable_ab_partitions;
  111. int enable_angle_delta;
  112. int enable_cfl_intra;
  113. int enable_paeth_intra;
  114. int enable_smooth_intra;
  115. int enable_intra_edge_filter;
  116. int enable_palette;
  117. int enable_filter_intra;
  118. int enable_flip_idtx;
  119. int enable_tx64;
  120. int reduced_tx_type_set;
  121. int use_intra_dct_only;
  122. int use_inter_dct_only;
  123. int use_intra_default_tx_only;
  124. int enable_ref_frame_mvs;
  125. int enable_interinter_wedge;
  126. int enable_interintra_wedge;
  127. int enable_interintra_comp;
  128. int enable_masked_comp;
  129. int enable_obmc;
  130. int enable_onesided_comp;
  131. int enable_reduced_reference_set;
  132. int enable_smooth_interintra;
  133. int enable_diff_wtd_comp;
  134. int enable_dist_wtd_comp;
  135. int enable_dual_filter;
  136. AVDictionary *aom_params;
  137. } AOMContext;
  138. #define OFFSET(x) offsetof(AOMContext, x)
  139. static const char *const ctlidstr[] = {
  140. [AOME_SET_CPUUSED] = "AOME_SET_CPUUSED",
  141. [AOME_SET_CQ_LEVEL] = "AOME_SET_CQ_LEVEL",
  142. [AOME_SET_ENABLEAUTOALTREF] = "AOME_SET_ENABLEAUTOALTREF",
  143. [AOME_SET_ARNR_MAXFRAMES] = "AOME_SET_ARNR_MAXFRAMES",
  144. [AOME_SET_ARNR_STRENGTH] = "AOME_SET_ARNR_STRENGTH",
  145. [AOME_SET_STATIC_THRESHOLD] = "AOME_SET_STATIC_THRESHOLD",
  146. [AV1E_SET_COLOR_RANGE] = "AV1E_SET_COLOR_RANGE",
  147. [AV1E_SET_COLOR_PRIMARIES] = "AV1E_SET_COLOR_PRIMARIES",
  148. [AV1E_SET_MATRIX_COEFFICIENTS] = "AV1E_SET_MATRIX_COEFFICIENTS",
  149. [AV1E_SET_TRANSFER_CHARACTERISTICS] = "AV1E_SET_TRANSFER_CHARACTERISTICS",
  150. [AV1E_SET_AQ_MODE] = "AV1E_SET_AQ_MODE",
  151. [AV1E_SET_FRAME_PARALLEL_DECODING] = "AV1E_SET_FRAME_PARALLEL_DECODING",
  152. [AV1E_SET_SUPERBLOCK_SIZE] = "AV1E_SET_SUPERBLOCK_SIZE",
  153. [AV1E_SET_TILE_COLUMNS] = "AV1E_SET_TILE_COLUMNS",
  154. [AV1E_SET_TILE_ROWS] = "AV1E_SET_TILE_ROWS",
  155. [AV1E_SET_ENABLE_RESTORATION] = "AV1E_SET_ENABLE_RESTORATION",
  156. [AV1E_SET_ROW_MT] = "AV1E_SET_ROW_MT",
  157. [AV1E_SET_DENOISE_NOISE_LEVEL] = "AV1E_SET_DENOISE_NOISE_LEVEL",
  158. [AV1E_SET_DENOISE_BLOCK_SIZE] = "AV1E_SET_DENOISE_BLOCK_SIZE",
  159. [AV1E_SET_MAX_REFERENCE_FRAMES] = "AV1E_SET_MAX_REFERENCE_FRAMES",
  160. [AV1E_SET_ENABLE_GLOBAL_MOTION] = "AV1E_SET_ENABLE_GLOBAL_MOTION",
  161. [AV1E_SET_ENABLE_INTRABC] = "AV1E_SET_ENABLE_INTRABC",
  162. [AV1E_SET_ENABLE_CDEF] = "AV1E_SET_ENABLE_CDEF",
  163. [AOME_SET_TUNING] = "AOME_SET_TUNING",
  164. [AV1E_SET_ENABLE_1TO4_PARTITIONS] = "AV1E_SET_ENABLE_1TO4_PARTITIONS",
  165. [AV1E_SET_ENABLE_AB_PARTITIONS] = "AV1E_SET_ENABLE_AB_PARTITIONS",
  166. [AV1E_SET_ENABLE_RECT_PARTITIONS] = "AV1E_SET_ENABLE_RECT_PARTITIONS",
  167. [AV1E_SET_ENABLE_ANGLE_DELTA] = "AV1E_SET_ENABLE_ANGLE_DELTA",
  168. [AV1E_SET_ENABLE_CFL_INTRA] = "AV1E_SET_ENABLE_CFL_INTRA",
  169. [AV1E_SET_ENABLE_FILTER_INTRA] = "AV1E_SET_ENABLE_FILTER_INTRA",
  170. [AV1E_SET_ENABLE_INTRA_EDGE_FILTER] = "AV1E_SET_ENABLE_INTRA_EDGE_FILTER",
  171. [AV1E_SET_ENABLE_PAETH_INTRA] = "AV1E_SET_ENABLE_PAETH_INTRA",
  172. [AV1E_SET_ENABLE_SMOOTH_INTRA] = "AV1E_SET_ENABLE_SMOOTH_INTRA",
  173. [AV1E_SET_ENABLE_PALETTE] = "AV1E_SET_ENABLE_PALETTE",
  174. [AV1E_SET_ENABLE_FLIP_IDTX] = "AV1E_SET_ENABLE_FLIP_IDTX",
  175. [AV1E_SET_ENABLE_TX64] = "AV1E_SET_ENABLE_TX64",
  176. [AV1E_SET_INTRA_DCT_ONLY] = "AV1E_SET_INTRA_DCT_ONLY",
  177. [AV1E_SET_INTER_DCT_ONLY] = "AV1E_SET_INTER_DCT_ONLY",
  178. [AV1E_SET_INTRA_DEFAULT_TX_ONLY] = "AV1E_SET_INTRA_DEFAULT_TX_ONLY",
  179. [AV1E_SET_REDUCED_TX_TYPE_SET] = "AV1E_SET_REDUCED_TX_TYPE_SET",
  180. [AV1E_SET_ENABLE_DIFF_WTD_COMP] = "AV1E_SET_ENABLE_DIFF_WTD_COMP",
  181. [AV1E_SET_ENABLE_DIST_WTD_COMP] = "AV1E_SET_ENABLE_DIST_WTD_COMP",
  182. [AV1E_SET_ENABLE_DUAL_FILTER] = "AV1E_SET_ENABLE_DUAL_FILTER",
  183. [AV1E_SET_ENABLE_INTERINTER_WEDGE] = "AV1E_SET_ENABLE_INTERINTER_WEDGE",
  184. [AV1E_SET_ENABLE_INTERINTRA_WEDGE] = "AV1E_SET_ENABLE_INTERINTRA_WEDGE",
  185. [AV1E_SET_ENABLE_MASKED_COMP] = "AV1E_SET_ENABLE_MASKED_COMP",
  186. [AV1E_SET_ENABLE_INTERINTRA_COMP] = "AV1E_SET_ENABLE_INTERINTRA_COMP",
  187. [AV1E_SET_ENABLE_OBMC] = "AV1E_SET_ENABLE_OBMC",
  188. [AV1E_SET_ENABLE_ONESIDED_COMP] = "AV1E_SET_ENABLE_ONESIDED_COMP",
  189. [AV1E_SET_REDUCED_REFERENCE_SET] = "AV1E_SET_REDUCED_REFERENCE_SET",
  190. [AV1E_SET_ENABLE_SMOOTH_INTERINTRA] = "AV1E_SET_ENABLE_SMOOTH_INTERINTRA",
  191. [AV1E_SET_ENABLE_REF_FRAME_MVS] = "AV1E_SET_ENABLE_REF_FRAME_MVS",
  192. #ifdef AOM_CTRL_AV1E_GET_NUM_OPERATING_POINTS
  193. [AV1E_GET_NUM_OPERATING_POINTS] = "AV1E_GET_NUM_OPERATING_POINTS",
  194. #endif
  195. [AV1E_GET_SEQ_LEVEL_IDX] = "AV1E_GET_SEQ_LEVEL_IDX",
  196. #ifdef AOM_CTRL_AV1E_GET_TARGET_SEQ_LEVEL_IDX
  197. [AV1E_GET_TARGET_SEQ_LEVEL_IDX] = "AV1E_GET_TARGET_SEQ_LEVEL_IDX",
  198. #endif
  199. [AV1_GET_NEW_FRAME_IMAGE] = "AV1_GET_NEW_FRAME_IMAGE",
  200. };
  201. static av_cold void log_encoder_error(AVCodecContext *avctx, const char *desc)
  202. {
  203. AOMContext *ctx = avctx->priv_data;
  204. const char *error = aom_codec_error(&ctx->encoder);
  205. const char *detail = aom_codec_error_detail(&ctx->encoder);
  206. av_log(avctx, AV_LOG_ERROR, "%s: %s\n", desc, error);
  207. if (detail)
  208. av_log(avctx, AV_LOG_ERROR, " Additional information: %s\n", detail);
  209. }
  210. static av_cold void dump_enc_cfg(AVCodecContext *avctx,
  211. const struct aom_codec_enc_cfg *cfg,
  212. int level)
  213. {
  214. int width = -30;
  215. av_log(avctx, level, "aom_codec_enc_cfg\n");
  216. av_log(avctx, level, "generic settings\n"
  217. " %*s%u\n %*s%u\n %*s%u\n %*s%u\n %*s%u\n"
  218. " %*s%u\n %*s%u\n"
  219. " %*s{%u/%u}\n %*s%u\n %*s%d\n %*s%u\n",
  220. width, "g_usage:", cfg->g_usage,
  221. width, "g_threads:", cfg->g_threads,
  222. width, "g_profile:", cfg->g_profile,
  223. width, "g_w:", cfg->g_w,
  224. width, "g_h:", cfg->g_h,
  225. width, "g_bit_depth:", cfg->g_bit_depth,
  226. width, "g_input_bit_depth:", cfg->g_input_bit_depth,
  227. width, "g_timebase:", cfg->g_timebase.num, cfg->g_timebase.den,
  228. width, "g_error_resilient:", cfg->g_error_resilient,
  229. width, "g_pass:", cfg->g_pass,
  230. width, "g_lag_in_frames:", cfg->g_lag_in_frames);
  231. av_log(avctx, level, "rate control settings\n"
  232. " %*s%u\n %*s%d\n %*s%p(%"SIZE_SPECIFIER")\n %*s%u\n",
  233. width, "rc_dropframe_thresh:", cfg->rc_dropframe_thresh,
  234. width, "rc_end_usage:", cfg->rc_end_usage,
  235. width, "rc_twopass_stats_in:", cfg->rc_twopass_stats_in.buf, cfg->rc_twopass_stats_in.sz,
  236. width, "rc_target_bitrate:", cfg->rc_target_bitrate);
  237. av_log(avctx, level, "quantizer settings\n"
  238. " %*s%u\n %*s%u\n",
  239. width, "rc_min_quantizer:", cfg->rc_min_quantizer,
  240. width, "rc_max_quantizer:", cfg->rc_max_quantizer);
  241. av_log(avctx, level, "bitrate tolerance\n"
  242. " %*s%u\n %*s%u\n",
  243. width, "rc_undershoot_pct:", cfg->rc_undershoot_pct,
  244. width, "rc_overshoot_pct:", cfg->rc_overshoot_pct);
  245. av_log(avctx, level, "decoder buffer model\n"
  246. " %*s%u\n %*s%u\n %*s%u\n",
  247. width, "rc_buf_sz:", cfg->rc_buf_sz,
  248. width, "rc_buf_initial_sz:", cfg->rc_buf_initial_sz,
  249. width, "rc_buf_optimal_sz:", cfg->rc_buf_optimal_sz);
  250. av_log(avctx, level, "2 pass rate control settings\n"
  251. " %*s%u\n %*s%u\n %*s%u\n",
  252. width, "rc_2pass_vbr_bias_pct:", cfg->rc_2pass_vbr_bias_pct,
  253. width, "rc_2pass_vbr_minsection_pct:", cfg->rc_2pass_vbr_minsection_pct,
  254. width, "rc_2pass_vbr_maxsection_pct:", cfg->rc_2pass_vbr_maxsection_pct);
  255. av_log(avctx, level, "keyframing settings\n"
  256. " %*s%d\n %*s%u\n %*s%u\n",
  257. width, "kf_mode:", cfg->kf_mode,
  258. width, "kf_min_dist:", cfg->kf_min_dist,
  259. width, "kf_max_dist:", cfg->kf_max_dist);
  260. av_log(avctx, level, "tile settings\n"
  261. " %*s%d\n %*s%d\n",
  262. width, "tile_width_count:", cfg->tile_width_count,
  263. width, "tile_height_count:", cfg->tile_height_count);
  264. av_log(avctx, level, "\n");
  265. }
  266. static void coded_frame_add(void *list, struct FrameListData *cx_frame)
  267. {
  268. struct FrameListData **p = list;
  269. while (*p)
  270. p = &(*p)->next;
  271. *p = cx_frame;
  272. cx_frame->next = NULL;
  273. }
  274. static av_cold void free_coded_frame(struct FrameListData *cx_frame)
  275. {
  276. av_freep(&cx_frame->buf);
  277. av_freep(&cx_frame);
  278. }
  279. static av_cold void free_frame_list(struct FrameListData *list)
  280. {
  281. struct FrameListData *p = list;
  282. while (p) {
  283. list = list->next;
  284. free_coded_frame(p);
  285. p = list;
  286. }
  287. }
  288. static av_cold int codecctl_int(AVCodecContext *avctx,
  289. #ifdef UENUM1BYTE
  290. aome_enc_control_id id,
  291. #else
  292. enum aome_enc_control_id id,
  293. #endif
  294. int val)
  295. {
  296. AOMContext *ctx = avctx->priv_data;
  297. char buf[80];
  298. int width = -30;
  299. int res;
  300. snprintf(buf, sizeof(buf), "%s:", ctlidstr[id]);
  301. av_log(avctx, AV_LOG_DEBUG, " %*s%d\n", width, buf, val);
  302. res = aom_codec_control(&ctx->encoder, id, val);
  303. if (res != AOM_CODEC_OK) {
  304. snprintf(buf, sizeof(buf), "Failed to set %s codec control",
  305. ctlidstr[id]);
  306. log_encoder_error(avctx, buf);
  307. return AVERROR(EINVAL);
  308. }
  309. return 0;
  310. }
  311. static int add_hdr_plus(AVCodecContext *avctx, struct aom_image *img, const AVFrame *frame)
  312. {
  313. // Check for HDR10+
  314. AVFrameSideData *side_data =
  315. av_frame_get_side_data(frame, AV_FRAME_DATA_DYNAMIC_HDR_PLUS);
  316. if (!side_data)
  317. return 0;
  318. size_t payload_size;
  319. AVDynamicHDRPlus *hdr_plus = (AVDynamicHDRPlus *)side_data->buf->data;
  320. int res = av_dynamic_hdr_plus_to_t35(hdr_plus, NULL, &payload_size);
  321. if (res < 0) {
  322. log_encoder_error(avctx, "Error finding the size of HDR10+");
  323. return res;
  324. }
  325. uint8_t *hdr_plus_buf;
  326. // Extra bytes for the country code, provider code, provider oriented code and app id.
  327. const size_t hdr_plus_buf_size = payload_size + 6;
  328. hdr_plus_buf = av_malloc(hdr_plus_buf_size);
  329. if (!hdr_plus_buf)
  330. return AVERROR(ENOMEM);
  331. uint8_t *payload = hdr_plus_buf;
  332. // See "HDR10+ AV1 Metadata Handling Specification" v1.0.1, Section 2.1.
  333. bytestream_put_byte(&payload, ITU_T_T35_COUNTRY_CODE_US);
  334. bytestream_put_be16(&payload, ITU_T_T35_PROVIDER_CODE_SAMSUNG);
  335. bytestream_put_be16(&payload, 0x0001); // provider_oriented_code
  336. bytestream_put_byte(&payload, 0x04); // application_identifier
  337. res = av_dynamic_hdr_plus_to_t35(hdr_plus, &payload, &payload_size);
  338. if (res < 0) {
  339. av_free(hdr_plus_buf);
  340. log_encoder_error(avctx, "Error encoding HDR10+ from side data");
  341. return res;
  342. }
  343. res = aom_img_add_metadata(img, OBU_METADATA_TYPE_ITUT_T35,
  344. hdr_plus_buf, hdr_plus_buf_size, AOM_MIF_ANY_FRAME);
  345. av_free(hdr_plus_buf);
  346. if (res < 0) {
  347. log_encoder_error(avctx, "Error adding HDR10+ to aom_img");
  348. return res;
  349. }
  350. return 0;
  351. }
  352. #if defined(AOM_CTRL_AV1E_GET_NUM_OPERATING_POINTS) && \
  353. defined(AOM_CTRL_AV1E_GET_SEQ_LEVEL_IDX) && \
  354. defined(AOM_CTRL_AV1E_GET_TARGET_SEQ_LEVEL_IDX)
  355. static av_cold int codecctl_intp(AVCodecContext *avctx,
  356. #ifdef UENUM1BYTE
  357. aome_enc_control_id id,
  358. #else
  359. enum aome_enc_control_id id,
  360. #endif
  361. int* ptr)
  362. {
  363. AOMContext *ctx = avctx->priv_data;
  364. char buf[80];
  365. int width = -30;
  366. int res;
  367. snprintf(buf, sizeof(buf), "%s:", ctlidstr[id]);
  368. av_log(avctx, AV_LOG_DEBUG, " %*s%d\n", width, buf, *ptr);
  369. res = aom_codec_control(&ctx->encoder, id, ptr);
  370. if (res != AOM_CODEC_OK) {
  371. snprintf(buf, sizeof(buf), "Failed to set %s codec control",
  372. ctlidstr[id]);
  373. log_encoder_error(avctx, buf);
  374. return AVERROR(EINVAL);
  375. }
  376. return 0;
  377. }
  378. #endif
  379. static av_cold int codecctl_imgp(AVCodecContext *avctx,
  380. #ifdef UENUM1BYTE
  381. aome_enc_control_id id,
  382. #else
  383. enum aome_enc_control_id id,
  384. #endif
  385. struct aom_image *img)
  386. {
  387. AOMContext *ctx = avctx->priv_data;
  388. char buf[80];
  389. int res;
  390. snprintf(buf, sizeof(buf), "%s:", ctlidstr[id]);
  391. res = aom_codec_control(&ctx->encoder, id, img);
  392. if (res != AOM_CODEC_OK) {
  393. snprintf(buf, sizeof(buf), "Failed to get %s codec control",
  394. ctlidstr[id]);
  395. log_encoder_error(avctx, buf);
  396. return AVERROR(EINVAL);
  397. }
  398. return 0;
  399. }
  400. static av_cold int aom_free(AVCodecContext *avctx)
  401. {
  402. AOMContext *ctx = avctx->priv_data;
  403. #if defined(AOM_CTRL_AV1E_GET_NUM_OPERATING_POINTS) && \
  404. defined(AOM_CTRL_AV1E_GET_SEQ_LEVEL_IDX) && \
  405. defined(AOM_CTRL_AV1E_GET_TARGET_SEQ_LEVEL_IDX)
  406. if (ctx->encoder.iface && !(avctx->flags & AV_CODEC_FLAG_PASS1)) {
  407. int num_operating_points;
  408. int levels[32];
  409. int target_levels[32];
  410. if (!codecctl_intp(avctx, AV1E_GET_NUM_OPERATING_POINTS,
  411. &num_operating_points) &&
  412. !codecctl_intp(avctx, AV1E_GET_SEQ_LEVEL_IDX, levels) &&
  413. !codecctl_intp(avctx, AV1E_GET_TARGET_SEQ_LEVEL_IDX,
  414. target_levels)) {
  415. for (int i = 0; i < num_operating_points; i++) {
  416. if (levels[i] > target_levels[i]) {
  417. // Warn when the target level was not met
  418. av_log(avctx, AV_LOG_WARNING,
  419. "Could not encode to target level %d.%d for "
  420. "operating point %d. The output level is %d.%d.\n",
  421. 2 + (target_levels[i] >> 2), target_levels[i] & 3,
  422. i, 2 + (levels[i] >> 2), levels[i] & 3);
  423. } else if (target_levels[i] < 31) {
  424. // Log the encoded level if a target level was given
  425. av_log(avctx, AV_LOG_INFO,
  426. "Output level for operating point %d is %d.%d.\n",
  427. i, 2 + (levels[i] >> 2), levels[i] & 3);
  428. }
  429. }
  430. }
  431. }
  432. #endif
  433. aom_codec_destroy(&ctx->encoder);
  434. aom_img_remove_metadata(&ctx->rawimg);
  435. av_freep(&ctx->twopass_stats.buf);
  436. av_freep(&avctx->stats_out);
  437. free_frame_list(ctx->coded_frame_list);
  438. av_bsf_free(&ctx->bsf);
  439. ff_dovi_ctx_unref(&ctx->dovi);
  440. return 0;
  441. }
  442. static int set_pix_fmt(AVCodecContext *avctx, aom_codec_caps_t codec_caps,
  443. struct aom_codec_enc_cfg *enccfg, aom_codec_flags_t *flags,
  444. aom_img_fmt_t *img_fmt)
  445. {
  446. av_unused AOMContext *ctx = avctx->priv_data;
  447. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
  448. enccfg->g_bit_depth = enccfg->g_input_bit_depth = desc->comp[0].depth;
  449. switch (avctx->pix_fmt) {
  450. case AV_PIX_FMT_GRAY8:
  451. enccfg->monochrome = 1;
  452. /* Fall-through */
  453. case AV_PIX_FMT_YUV420P:
  454. enccfg->g_profile = AV_PROFILE_AV1_MAIN;
  455. *img_fmt = AOM_IMG_FMT_I420;
  456. return 0;
  457. case AV_PIX_FMT_YUV422P:
  458. enccfg->g_profile = AV_PROFILE_AV1_PROFESSIONAL;
  459. *img_fmt = AOM_IMG_FMT_I422;
  460. return 0;
  461. case AV_PIX_FMT_YUV444P:
  462. case AV_PIX_FMT_GBRP:
  463. enccfg->g_profile = AV_PROFILE_AV1_HIGH;
  464. *img_fmt = AOM_IMG_FMT_I444;
  465. return 0;
  466. case AV_PIX_FMT_GRAY10:
  467. case AV_PIX_FMT_GRAY12:
  468. enccfg->monochrome = 1;
  469. /* Fall-through */
  470. case AV_PIX_FMT_YUV420P10:
  471. case AV_PIX_FMT_YUV420P12:
  472. if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH) {
  473. enccfg->g_profile =
  474. enccfg->g_bit_depth == 10 ? AV_PROFILE_AV1_MAIN : AV_PROFILE_AV1_PROFESSIONAL;
  475. *img_fmt = AOM_IMG_FMT_I42016;
  476. *flags |= AOM_CODEC_USE_HIGHBITDEPTH;
  477. return 0;
  478. }
  479. break;
  480. case AV_PIX_FMT_YUV422P10:
  481. case AV_PIX_FMT_YUV422P12:
  482. if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH) {
  483. enccfg->g_profile = AV_PROFILE_AV1_PROFESSIONAL;
  484. *img_fmt = AOM_IMG_FMT_I42216;
  485. *flags |= AOM_CODEC_USE_HIGHBITDEPTH;
  486. return 0;
  487. }
  488. break;
  489. case AV_PIX_FMT_YUV444P10:
  490. case AV_PIX_FMT_YUV444P12:
  491. case AV_PIX_FMT_GBRP10:
  492. case AV_PIX_FMT_GBRP12:
  493. if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH) {
  494. enccfg->g_profile =
  495. enccfg->g_bit_depth == 10 ? AV_PROFILE_AV1_HIGH : AV_PROFILE_AV1_PROFESSIONAL;
  496. *img_fmt = AOM_IMG_FMT_I44416;
  497. *flags |= AOM_CODEC_USE_HIGHBITDEPTH;
  498. return 0;
  499. }
  500. break;
  501. default:
  502. break;
  503. }
  504. av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format.\n");
  505. return AVERROR_INVALIDDATA;
  506. }
  507. static void set_color_range(AVCodecContext *avctx)
  508. {
  509. aom_color_range_t aom_cr;
  510. switch (avctx->color_range) {
  511. case AVCOL_RANGE_UNSPECIFIED:
  512. case AVCOL_RANGE_MPEG: aom_cr = AOM_CR_STUDIO_RANGE; break;
  513. case AVCOL_RANGE_JPEG: aom_cr = AOM_CR_FULL_RANGE; break;
  514. default:
  515. av_log(avctx, AV_LOG_WARNING, "Unsupported color range (%d)\n",
  516. avctx->color_range);
  517. return;
  518. }
  519. codecctl_int(avctx, AV1E_SET_COLOR_RANGE, aom_cr);
  520. }
  521. static int count_uniform_tiling(int dim, int sb_size, int tiles_log2)
  522. {
  523. int sb_dim = (dim + sb_size - 1) / sb_size;
  524. int tile_dim = (sb_dim + (1 << tiles_log2) - 1) >> tiles_log2;
  525. av_assert0(tile_dim > 0);
  526. return (sb_dim + tile_dim - 1) / tile_dim;
  527. }
  528. static int choose_tiling(AVCodecContext *avctx,
  529. struct aom_codec_enc_cfg *enccfg)
  530. {
  531. AOMContext *ctx = avctx->priv_data;
  532. int sb_128x128_possible, sb_size, sb_width, sb_height;
  533. int uniform_rows, uniform_cols;
  534. int uniform_64x64_possible, uniform_128x128_possible;
  535. int tile_size, rounding, i;
  536. if (ctx->tile_cols_log2 >= 0)
  537. ctx->tile_cols = 1 << ctx->tile_cols_log2;
  538. if (ctx->tile_rows_log2 >= 0)
  539. ctx->tile_rows = 1 << ctx->tile_rows_log2;
  540. if (ctx->tile_cols == 0) {
  541. ctx->tile_cols = (avctx->width + AV1_MAX_TILE_WIDTH - 1) /
  542. AV1_MAX_TILE_WIDTH;
  543. if (ctx->tile_cols > 1) {
  544. av_log(avctx, AV_LOG_DEBUG, "Automatically using %d tile "
  545. "columns to fill width.\n", ctx->tile_cols);
  546. }
  547. }
  548. av_assert0(ctx->tile_cols > 0);
  549. if (ctx->tile_rows == 0) {
  550. int max_tile_width =
  551. FFALIGN((FFALIGN(avctx->width, 128) +
  552. ctx->tile_cols - 1) / ctx->tile_cols, 128);
  553. ctx->tile_rows =
  554. (max_tile_width * FFALIGN(avctx->height, 128) +
  555. AV1_MAX_TILE_AREA - 1) / AV1_MAX_TILE_AREA;
  556. if (ctx->tile_rows > 1) {
  557. av_log(avctx, AV_LOG_DEBUG, "Automatically using %d tile "
  558. "rows to fill area.\n", ctx->tile_rows);
  559. }
  560. }
  561. av_assert0(ctx->tile_rows > 0);
  562. if ((avctx->width + 63) / 64 < ctx->tile_cols ||
  563. (avctx->height + 63) / 64 < ctx->tile_rows) {
  564. av_log(avctx, AV_LOG_ERROR, "Invalid tile sizing: frame not "
  565. "large enough to fit specified tile arrangement.\n");
  566. return AVERROR(EINVAL);
  567. }
  568. if (ctx->tile_cols > AV1_MAX_TILE_COLS ||
  569. ctx->tile_rows > AV1_MAX_TILE_ROWS) {
  570. av_log(avctx, AV_LOG_ERROR, "Invalid tile sizing: AV1 does "
  571. "not allow more than %dx%d tiles.\n",
  572. AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS);
  573. return AVERROR(EINVAL);
  574. }
  575. if (avctx->width / ctx->tile_cols > AV1_MAX_TILE_WIDTH) {
  576. av_log(avctx, AV_LOG_ERROR, "Invalid tile sizing: AV1 does "
  577. "not allow tiles of width greater than %d.\n",
  578. AV1_MAX_TILE_WIDTH);
  579. return AVERROR(EINVAL);
  580. }
  581. ctx->superblock_size = AOM_SUPERBLOCK_SIZE_DYNAMIC;
  582. if (ctx->tile_cols == 1 && ctx->tile_rows == 1) {
  583. av_log(avctx, AV_LOG_DEBUG, "Using a single tile.\n");
  584. return 0;
  585. }
  586. sb_128x128_possible =
  587. (avctx->width + 127) / 128 >= ctx->tile_cols &&
  588. (avctx->height + 127) / 128 >= ctx->tile_rows;
  589. ctx->tile_cols_log2 = ctx->tile_cols == 1 ? 0 :
  590. av_log2(ctx->tile_cols - 1) + 1;
  591. ctx->tile_rows_log2 = ctx->tile_rows == 1 ? 0 :
  592. av_log2(ctx->tile_rows - 1) + 1;
  593. uniform_cols = count_uniform_tiling(avctx->width,
  594. 64, ctx->tile_cols_log2);
  595. uniform_rows = count_uniform_tiling(avctx->height,
  596. 64, ctx->tile_rows_log2);
  597. av_log(avctx, AV_LOG_DEBUG, "Uniform with 64x64 superblocks "
  598. "-> %dx%d tiles.\n", uniform_cols, uniform_rows);
  599. uniform_64x64_possible = uniform_cols == ctx->tile_cols &&
  600. uniform_rows == ctx->tile_rows;
  601. if (sb_128x128_possible) {
  602. uniform_cols = count_uniform_tiling(avctx->width,
  603. 128, ctx->tile_cols_log2);
  604. uniform_rows = count_uniform_tiling(avctx->height,
  605. 128, ctx->tile_rows_log2);
  606. av_log(avctx, AV_LOG_DEBUG, "Uniform with 128x128 superblocks "
  607. "-> %dx%d tiles.\n", uniform_cols, uniform_rows);
  608. uniform_128x128_possible = uniform_cols == ctx->tile_cols &&
  609. uniform_rows == ctx->tile_rows;
  610. } else {
  611. av_log(avctx, AV_LOG_DEBUG, "128x128 superblocks not possible.\n");
  612. uniform_128x128_possible = 0;
  613. }
  614. ctx->uniform_tiles = 1;
  615. if (uniform_64x64_possible && uniform_128x128_possible) {
  616. av_log(avctx, AV_LOG_DEBUG, "Using uniform tiling with dynamic "
  617. "superblocks (tile_cols_log2 = %d, tile_rows_log2 = %d).\n",
  618. ctx->tile_cols_log2, ctx->tile_rows_log2);
  619. return 0;
  620. }
  621. if (uniform_64x64_possible && !sb_128x128_possible) {
  622. av_log(avctx, AV_LOG_DEBUG, "Using uniform tiling with 64x64 "
  623. "superblocks (tile_cols_log2 = %d, tile_rows_log2 = %d).\n",
  624. ctx->tile_cols_log2, ctx->tile_rows_log2);
  625. ctx->superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
  626. return 0;
  627. }
  628. if (uniform_128x128_possible) {
  629. av_log(avctx, AV_LOG_DEBUG, "Using uniform tiling with 128x128 "
  630. "superblocks (tile_cols_log2 = %d, tile_rows_log2 = %d).\n",
  631. ctx->tile_cols_log2, ctx->tile_rows_log2);
  632. ctx->superblock_size = AOM_SUPERBLOCK_SIZE_128X128;
  633. return 0;
  634. }
  635. ctx->uniform_tiles = 0;
  636. if (sb_128x128_possible) {
  637. sb_size = 128;
  638. ctx->superblock_size = AOM_SUPERBLOCK_SIZE_128X128;
  639. } else {
  640. sb_size = 64;
  641. ctx->superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
  642. }
  643. av_log(avctx, AV_LOG_DEBUG, "Using fixed tiling with %dx%d "
  644. "superblocks (tile_cols = %d, tile_rows = %d).\n",
  645. sb_size, sb_size, ctx->tile_cols, ctx->tile_rows);
  646. enccfg->tile_width_count = ctx->tile_cols;
  647. enccfg->tile_height_count = ctx->tile_rows;
  648. sb_width = (avctx->width + sb_size - 1) / sb_size;
  649. sb_height = (avctx->height + sb_size - 1) / sb_size;
  650. tile_size = sb_width / ctx->tile_cols;
  651. rounding = sb_width % ctx->tile_cols;
  652. for (i = 0; i < ctx->tile_cols; i++) {
  653. enccfg->tile_widths[i] = tile_size +
  654. (i < rounding / 2 ||
  655. i > ctx->tile_cols - 1 - (rounding + 1) / 2);
  656. }
  657. tile_size = sb_height / ctx->tile_rows;
  658. rounding = sb_height % ctx->tile_rows;
  659. for (i = 0; i < ctx->tile_rows; i++) {
  660. enccfg->tile_heights[i] = tile_size +
  661. (i < rounding / 2 ||
  662. i > ctx->tile_rows - 1 - (rounding + 1) / 2);
  663. }
  664. return 0;
  665. }
  666. static const struct {
  667. int aom_enum;
  668. unsigned offset;
  669. } option_map[] = {
  670. { AOME_SET_ENABLEAUTOALTREF, OFFSET(auto_alt_ref) },
  671. { AOME_SET_ARNR_MAXFRAMES, OFFSET(arnr_max_frames) },
  672. { AOME_SET_ARNR_STRENGTH, OFFSET(arnr_strength) },
  673. { AV1E_SET_ENABLE_CDEF, OFFSET(enable_cdef) },
  674. { AV1E_SET_ENABLE_RESTORATION, OFFSET(enable_restoration) },
  675. { AV1E_SET_ENABLE_RECT_PARTITIONS, OFFSET(enable_rect_partitions) },
  676. { AV1E_SET_ENABLE_1TO4_PARTITIONS, OFFSET(enable_1to4_partitions) },
  677. { AV1E_SET_ENABLE_AB_PARTITIONS, OFFSET(enable_ab_partitions) },
  678. { AV1E_SET_ENABLE_ANGLE_DELTA, OFFSET(enable_angle_delta) },
  679. { AV1E_SET_ENABLE_CFL_INTRA, OFFSET(enable_cfl_intra) },
  680. { AV1E_SET_ENABLE_FILTER_INTRA, OFFSET(enable_filter_intra) },
  681. { AV1E_SET_ENABLE_INTRA_EDGE_FILTER, OFFSET(enable_intra_edge_filter) },
  682. { AV1E_SET_ENABLE_PAETH_INTRA, OFFSET(enable_paeth_intra) },
  683. { AV1E_SET_ENABLE_SMOOTH_INTRA, OFFSET(enable_smooth_intra) },
  684. { AV1E_SET_ENABLE_PALETTE, OFFSET(enable_palette) },
  685. { AV1E_SET_ENABLE_TX64, OFFSET(enable_tx64) },
  686. { AV1E_SET_ENABLE_FLIP_IDTX, OFFSET(enable_flip_idtx) },
  687. { AV1E_SET_INTRA_DCT_ONLY, OFFSET(use_intra_dct_only) },
  688. { AV1E_SET_INTER_DCT_ONLY, OFFSET(use_inter_dct_only) },
  689. { AV1E_SET_INTRA_DEFAULT_TX_ONLY, OFFSET(use_intra_default_tx_only) },
  690. { AV1E_SET_REDUCED_TX_TYPE_SET, OFFSET(reduced_tx_type_set) },
  691. { AV1E_SET_ENABLE_REF_FRAME_MVS, OFFSET(enable_ref_frame_mvs) },
  692. { AV1E_SET_REDUCED_REFERENCE_SET, OFFSET(enable_reduced_reference_set) },
  693. { AV1E_SET_ENABLE_DIFF_WTD_COMP, OFFSET(enable_diff_wtd_comp) },
  694. { AV1E_SET_ENABLE_DIST_WTD_COMP, OFFSET(enable_dist_wtd_comp) },
  695. { AV1E_SET_ENABLE_DUAL_FILTER, OFFSET(enable_dual_filter) },
  696. { AV1E_SET_ENABLE_INTERINTER_WEDGE, OFFSET(enable_interinter_wedge) },
  697. { AV1E_SET_ENABLE_MASKED_COMP, OFFSET(enable_masked_comp) },
  698. { AV1E_SET_ENABLE_INTERINTRA_COMP, OFFSET(enable_interintra_comp) },
  699. { AV1E_SET_ENABLE_INTERINTRA_WEDGE, OFFSET(enable_interintra_wedge) },
  700. { AV1E_SET_ENABLE_OBMC, OFFSET(enable_obmc) },
  701. { AV1E_SET_ENABLE_ONESIDED_COMP, OFFSET(enable_onesided_comp) },
  702. { AV1E_SET_ENABLE_SMOOTH_INTERINTRA, OFFSET(enable_smooth_interintra) },
  703. };
  704. static av_cold int aom_init(AVCodecContext *avctx,
  705. const struct aom_codec_iface *iface)
  706. {
  707. AOMContext *ctx = avctx->priv_data;
  708. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
  709. struct aom_codec_enc_cfg enccfg = { 0 };
  710. aom_codec_flags_t flags =
  711. (avctx->flags & AV_CODEC_FLAG_PSNR) ? AOM_CODEC_USE_PSNR : 0;
  712. int res;
  713. aom_img_fmt_t img_fmt;
  714. aom_codec_caps_t codec_caps = aom_codec_get_caps(iface);
  715. av_log(avctx, AV_LOG_INFO, "%s\n", aom_codec_version_str());
  716. av_log(avctx, AV_LOG_VERBOSE, "%s\n", aom_codec_build_config());
  717. if ((res = aom_codec_enc_config_default(iface, &enccfg, ctx->usage)) != AOM_CODEC_OK) {
  718. av_log(avctx, AV_LOG_ERROR, "Failed to get config: %s\n",
  719. aom_codec_err_to_string(res));
  720. return AVERROR(EINVAL);
  721. }
  722. if (set_pix_fmt(avctx, codec_caps, &enccfg, &flags, &img_fmt))
  723. return AVERROR(EINVAL);
  724. if(!avctx->bit_rate)
  725. if(avctx->rc_max_rate || avctx->rc_buffer_size || avctx->rc_initial_buffer_occupancy) {
  726. av_log( avctx, AV_LOG_ERROR, "Rate control parameters set without a bitrate\n");
  727. return AVERROR(EINVAL);
  728. }
  729. dump_enc_cfg(avctx, &enccfg, AV_LOG_DEBUG);
  730. enccfg.g_w = avctx->width;
  731. enccfg.g_h = avctx->height;
  732. enccfg.g_timebase.num = avctx->time_base.num;
  733. enccfg.g_timebase.den = avctx->time_base.den;
  734. enccfg.g_threads =
  735. FFMIN(avctx->thread_count ? avctx->thread_count : av_cpu_count(), 64);
  736. if (ctx->lag_in_frames >= 0)
  737. enccfg.g_lag_in_frames = ctx->lag_in_frames;
  738. if (avctx->flags & AV_CODEC_FLAG_PASS1)
  739. enccfg.g_pass = AOM_RC_FIRST_PASS;
  740. else if (avctx->flags & AV_CODEC_FLAG_PASS2)
  741. enccfg.g_pass = AOM_RC_LAST_PASS;
  742. else
  743. enccfg.g_pass = AOM_RC_ONE_PASS;
  744. if (avctx->rc_min_rate == avctx->rc_max_rate &&
  745. avctx->rc_min_rate == avctx->bit_rate && avctx->bit_rate) {
  746. enccfg.rc_end_usage = AOM_CBR;
  747. } else if (ctx->crf >= 0) {
  748. enccfg.rc_end_usage = AOM_CQ;
  749. if (!avctx->bit_rate)
  750. enccfg.rc_end_usage = AOM_Q;
  751. }
  752. if (avctx->bit_rate) {
  753. enccfg.rc_target_bitrate = av_rescale_rnd(avctx->bit_rate, 1, 1000,
  754. AV_ROUND_NEAR_INF);
  755. } else if (enccfg.rc_end_usage != AOM_Q) {
  756. enccfg.rc_end_usage = AOM_Q;
  757. ctx->crf = 32;
  758. av_log(avctx, AV_LOG_WARNING,
  759. "Neither bitrate nor constrained quality specified, using default CRF of %d\n",
  760. ctx->crf);
  761. }
  762. if (avctx->qmin >= 0)
  763. enccfg.rc_min_quantizer = avctx->qmin;
  764. if (avctx->qmax >= 0) {
  765. enccfg.rc_max_quantizer = avctx->qmax;
  766. } else if (!ctx->crf) {
  767. enccfg.rc_max_quantizer = 0;
  768. }
  769. if (enccfg.rc_end_usage == AOM_CQ || enccfg.rc_end_usage == AOM_Q) {
  770. if (ctx->crf < enccfg.rc_min_quantizer || ctx->crf > enccfg.rc_max_quantizer) {
  771. av_log(avctx, AV_LOG_ERROR,
  772. "CQ level %d must be between minimum and maximum quantizer value (%d-%d)\n",
  773. ctx->crf, enccfg.rc_min_quantizer, enccfg.rc_max_quantizer);
  774. return AVERROR(EINVAL);
  775. }
  776. }
  777. enccfg.rc_dropframe_thresh = ctx->drop_threshold;
  778. // 0-100 (0 => CBR, 100 => VBR)
  779. enccfg.rc_2pass_vbr_bias_pct = round(avctx->qcompress * 100);
  780. if (ctx->minsection_pct >= 0)
  781. enccfg.rc_2pass_vbr_minsection_pct = ctx->minsection_pct;
  782. else if (avctx->bit_rate)
  783. enccfg.rc_2pass_vbr_minsection_pct =
  784. avctx->rc_min_rate * 100LL / avctx->bit_rate;
  785. if (ctx->maxsection_pct >= 0)
  786. enccfg.rc_2pass_vbr_maxsection_pct = ctx->maxsection_pct;
  787. else if (avctx->rc_max_rate)
  788. enccfg.rc_2pass_vbr_maxsection_pct =
  789. avctx->rc_max_rate * 100LL / avctx->bit_rate;
  790. if (avctx->rc_buffer_size)
  791. enccfg.rc_buf_sz =
  792. avctx->rc_buffer_size * 1000LL / avctx->bit_rate;
  793. if (avctx->rc_initial_buffer_occupancy)
  794. enccfg.rc_buf_initial_sz =
  795. avctx->rc_initial_buffer_occupancy * 1000LL / avctx->bit_rate;
  796. enccfg.rc_buf_optimal_sz = enccfg.rc_buf_sz * 5 / 6;
  797. if (ctx->rc_undershoot_pct >= 0)
  798. enccfg.rc_undershoot_pct = ctx->rc_undershoot_pct;
  799. if (ctx->rc_overshoot_pct >= 0)
  800. enccfg.rc_overshoot_pct = ctx->rc_overshoot_pct;
  801. // _enc_init() will balk if kf_min_dist differs from max w/AOM_KF_AUTO
  802. if (avctx->keyint_min >= 0 && avctx->keyint_min == avctx->gop_size)
  803. enccfg.kf_min_dist = avctx->keyint_min;
  804. if (avctx->gop_size >= 0)
  805. enccfg.kf_max_dist = avctx->gop_size;
  806. if (enccfg.g_pass == AOM_RC_FIRST_PASS)
  807. enccfg.g_lag_in_frames = 0;
  808. else if (enccfg.g_pass == AOM_RC_LAST_PASS) {
  809. int decode_size, ret;
  810. if (!avctx->stats_in) {
  811. av_log(avctx, AV_LOG_ERROR, "No stats file for second pass\n");
  812. return AVERROR_INVALIDDATA;
  813. }
  814. ctx->twopass_stats.sz = strlen(avctx->stats_in) * 3 / 4;
  815. ret = av_reallocp(&ctx->twopass_stats.buf, ctx->twopass_stats.sz);
  816. if (ret < 0) {
  817. av_log(avctx, AV_LOG_ERROR,
  818. "Stat buffer alloc (%"SIZE_SPECIFIER" bytes) failed\n",
  819. ctx->twopass_stats.sz);
  820. ctx->twopass_stats.sz = 0;
  821. return ret;
  822. }
  823. decode_size = av_base64_decode(ctx->twopass_stats.buf, avctx->stats_in,
  824. ctx->twopass_stats.sz);
  825. if (decode_size < 0) {
  826. av_log(avctx, AV_LOG_ERROR, "Stat buffer decode failed\n");
  827. return AVERROR_INVALIDDATA;
  828. }
  829. ctx->twopass_stats.sz = decode_size;
  830. enccfg.rc_twopass_stats_in = ctx->twopass_stats;
  831. }
  832. /* 0-3: For non-zero values the encoder increasingly optimizes for reduced
  833. * complexity playback on low powered devices at the expense of encode
  834. * quality. */
  835. if (avctx->profile != AV_PROFILE_UNKNOWN)
  836. enccfg.g_profile = avctx->profile;
  837. enccfg.g_error_resilient = ctx->error_resilient;
  838. res = choose_tiling(avctx, &enccfg);
  839. if (res < 0)
  840. return res;
  841. if (ctx->still_picture) {
  842. // Set the maximum number of frames to 1. This will let libaom set
  843. // still_picture and reduced_still_picture_header to 1 in the Sequence
  844. // Header as required by AVIF still images.
  845. enccfg.g_limit = 1;
  846. // Reduce memory usage for still images.
  847. enccfg.g_lag_in_frames = 0;
  848. // All frames will be key frames.
  849. enccfg.kf_max_dist = 0;
  850. enccfg.kf_mode = AOM_KF_DISABLED;
  851. }
  852. /* Construct Encoder Context */
  853. res = aom_codec_enc_init(&ctx->encoder, iface, &enccfg, flags);
  854. if (res != AOM_CODEC_OK) {
  855. dump_enc_cfg(avctx, &enccfg, AV_LOG_WARNING);
  856. log_encoder_error(avctx, "Failed to initialize encoder");
  857. return AVERROR(EINVAL);
  858. }
  859. dump_enc_cfg(avctx, &enccfg, AV_LOG_DEBUG);
  860. // codec control failures are currently treated only as warnings
  861. av_log(avctx, AV_LOG_DEBUG, "aom_codec_control\n");
  862. codecctl_int(avctx, AOME_SET_CPUUSED, ctx->cpu_used);
  863. for (size_t i = 0; i < FF_ARRAY_ELEMS(option_map); ++i) {
  864. int val = *(int*)((char*)ctx + option_map[i].offset);
  865. if (val >= 0)
  866. codecctl_int(avctx, option_map[i].aom_enum, val);
  867. }
  868. codecctl_int(avctx, AOME_SET_STATIC_THRESHOLD, ctx->static_thresh);
  869. if (ctx->crf >= 0)
  870. codecctl_int(avctx, AOME_SET_CQ_LEVEL, ctx->crf);
  871. if (ctx->tune >= 0)
  872. codecctl_int(avctx, AOME_SET_TUNING, ctx->tune);
  873. if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
  874. codecctl_int(avctx, AV1E_SET_COLOR_PRIMARIES, AVCOL_PRI_BT709);
  875. codecctl_int(avctx, AV1E_SET_MATRIX_COEFFICIENTS, AVCOL_SPC_RGB);
  876. codecctl_int(avctx, AV1E_SET_TRANSFER_CHARACTERISTICS, AVCOL_TRC_IEC61966_2_1);
  877. } else {
  878. codecctl_int(avctx, AV1E_SET_COLOR_PRIMARIES, avctx->color_primaries);
  879. codecctl_int(avctx, AV1E_SET_MATRIX_COEFFICIENTS, avctx->colorspace);
  880. codecctl_int(avctx, AV1E_SET_TRANSFER_CHARACTERISTICS, avctx->color_trc);
  881. }
  882. if (ctx->aq_mode >= 0)
  883. codecctl_int(avctx, AV1E_SET_AQ_MODE, ctx->aq_mode);
  884. if (ctx->frame_parallel >= 0)
  885. codecctl_int(avctx, AV1E_SET_FRAME_PARALLEL_DECODING, ctx->frame_parallel);
  886. set_color_range(avctx);
  887. codecctl_int(avctx, AV1E_SET_SUPERBLOCK_SIZE, ctx->superblock_size);
  888. if (ctx->uniform_tiles) {
  889. codecctl_int(avctx, AV1E_SET_TILE_COLUMNS, ctx->tile_cols_log2);
  890. codecctl_int(avctx, AV1E_SET_TILE_ROWS, ctx->tile_rows_log2);
  891. }
  892. if (ctx->denoise_noise_level >= 0)
  893. codecctl_int(avctx, AV1E_SET_DENOISE_NOISE_LEVEL, ctx->denoise_noise_level);
  894. if (ctx->denoise_block_size >= 0)
  895. codecctl_int(avctx, AV1E_SET_DENOISE_BLOCK_SIZE, ctx->denoise_block_size);
  896. if (ctx->enable_global_motion >= 0)
  897. codecctl_int(avctx, AV1E_SET_ENABLE_GLOBAL_MOTION, ctx->enable_global_motion);
  898. if (avctx->refs >= 3) {
  899. codecctl_int(avctx, AV1E_SET_MAX_REFERENCE_FRAMES, avctx->refs);
  900. }
  901. if (ctx->row_mt >= 0)
  902. codecctl_int(avctx, AV1E_SET_ROW_MT, ctx->row_mt);
  903. if (ctx->enable_intrabc >= 0)
  904. codecctl_int(avctx, AV1E_SET_ENABLE_INTRABC, ctx->enable_intrabc);
  905. #if AOM_ENCODER_ABI_VERSION >= 23
  906. {
  907. const AVDictionaryEntry *en = NULL;
  908. while ((en = av_dict_iterate(ctx->aom_params, en))) {
  909. int ret = aom_codec_set_option(&ctx->encoder, en->key, en->value);
  910. if (ret != AOM_CODEC_OK) {
  911. log_encoder_error(avctx, en->key);
  912. return AVERROR_EXTERNAL;
  913. }
  914. }
  915. }
  916. #endif
  917. // provide dummy value to initialize wrapper, values will be updated each _encode()
  918. aom_img_wrap(&ctx->rawimg, img_fmt, avctx->width, avctx->height, 1,
  919. (unsigned char*)1);
  920. if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH)
  921. ctx->rawimg.bit_depth = enccfg.g_bit_depth;
  922. ctx->dovi.logctx = avctx;
  923. if ((res = ff_dovi_configure(&ctx->dovi, avctx)) < 0)
  924. return res;
  925. if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
  926. const AVBitStreamFilter *filter = av_bsf_get_by_name("extract_extradata");
  927. int ret;
  928. if (!filter) {
  929. av_log(avctx, AV_LOG_ERROR, "extract_extradata bitstream filter "
  930. "not found. This is a bug, please report it.\n");
  931. return AVERROR_BUG;
  932. }
  933. ret = av_bsf_alloc(filter, &ctx->bsf);
  934. if (ret < 0)
  935. return ret;
  936. ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx);
  937. if (ret < 0)
  938. return ret;
  939. ret = av_bsf_init(ctx->bsf);
  940. if (ret < 0)
  941. return ret;
  942. }
  943. AVCPBProperties *cpb_props = ff_encode_add_cpb_side_data(avctx);
  944. if (!cpb_props)
  945. return AVERROR(ENOMEM);
  946. if (enccfg.rc_end_usage == AOM_CBR ||
  947. enccfg.g_pass != AOM_RC_ONE_PASS) {
  948. cpb_props->max_bitrate = avctx->rc_max_rate;
  949. cpb_props->min_bitrate = avctx->rc_min_rate;
  950. cpb_props->avg_bitrate = avctx->bit_rate;
  951. }
  952. cpb_props->buffer_size = avctx->rc_buffer_size;
  953. return 0;
  954. }
  955. static inline void cx_pktcpy(AOMContext *ctx,
  956. struct FrameListData *dst,
  957. const struct aom_codec_cx_pkt *src)
  958. {
  959. dst->pts = src->data.frame.pts;
  960. dst->duration = src->data.frame.duration;
  961. dst->flags = src->data.frame.flags;
  962. dst->sz = src->data.frame.sz;
  963. dst->buf = src->data.frame.buf;
  964. dst->frame_number = ++ctx->frame_number;
  965. dst->have_sse = ctx->have_sse;
  966. if (ctx->have_sse) {
  967. /* associate last-seen SSE to the frame. */
  968. /* Transfers ownership from ctx to dst. */
  969. memcpy(dst->sse, ctx->sse, sizeof(dst->sse));
  970. ctx->have_sse = 0;
  971. }
  972. }
  973. /**
  974. * Store coded frame information in format suitable for return from encode2().
  975. *
  976. * Write information from @a cx_frame to @a pkt
  977. * @return packet data size on success
  978. * @return a negative AVERROR on error
  979. */
  980. static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
  981. AVPacket *pkt)
  982. {
  983. AOMContext *ctx = avctx->priv_data;
  984. av_unused int pict_type;
  985. int ret = ff_get_encode_buffer(avctx, pkt, cx_frame->sz, 0);
  986. if (ret < 0) {
  987. av_log(avctx, AV_LOG_ERROR,
  988. "Error getting output packet of size %"SIZE_SPECIFIER".\n", cx_frame->sz);
  989. return ret;
  990. }
  991. memcpy(pkt->data, cx_frame->buf, pkt->size);
  992. pkt->pts = pkt->dts = cx_frame->pts;
  993. pkt->duration = cx_frame->duration;
  994. if (!!(cx_frame->flags & AOM_FRAME_IS_KEY)) {
  995. pkt->flags |= AV_PKT_FLAG_KEY;
  996. pict_type = AV_PICTURE_TYPE_I;
  997. } else if (cx_frame->flags & AOM_FRAME_IS_INTRAONLY) {
  998. pict_type = AV_PICTURE_TYPE_I;
  999. } else {
  1000. pict_type = AV_PICTURE_TYPE_P;
  1001. }
  1002. ff_side_data_set_encoder_stats(pkt, 0, cx_frame->sse + 1,
  1003. cx_frame->have_sse ? 3 : 0, pict_type);
  1004. if (cx_frame->have_sse) {
  1005. int i;
  1006. for (i = 0; i < 3; ++i) {
  1007. avctx->error[i] += cx_frame->sse[i + 1];
  1008. }
  1009. cx_frame->have_sse = 0;
  1010. }
  1011. if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
  1012. ret = av_bsf_send_packet(ctx->bsf, pkt);
  1013. if (ret < 0) {
  1014. av_log(avctx, AV_LOG_ERROR, "extract_extradata filter "
  1015. "failed to send input packet\n");
  1016. return ret;
  1017. }
  1018. ret = av_bsf_receive_packet(ctx->bsf, pkt);
  1019. if (ret < 0) {
  1020. av_log(avctx, AV_LOG_ERROR, "extract_extradata filter "
  1021. "failed to receive output packet\n");
  1022. return ret;
  1023. }
  1024. }
  1025. return pkt->size;
  1026. }
  1027. /**
  1028. * Queue multiple output frames from the encoder, returning the front-most.
  1029. * In cases where aom_codec_get_cx_data() returns more than 1 frame append
  1030. * the frame queue. Return the head frame if available.
  1031. * @return Stored frame size
  1032. * @return AVERROR(EINVAL) on output size error
  1033. * @return AVERROR(ENOMEM) on coded frame queue data allocation error
  1034. */
  1035. static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out)
  1036. {
  1037. AOMContext *ctx = avctx->priv_data;
  1038. const struct aom_codec_cx_pkt *pkt;
  1039. const void *iter = NULL;
  1040. int size = 0;
  1041. if (ctx->coded_frame_list) {
  1042. struct FrameListData *cx_frame = ctx->coded_frame_list;
  1043. /* return the leading frame if we've already begun queueing */
  1044. size = storeframe(avctx, cx_frame, pkt_out);
  1045. if (size < 0)
  1046. return size;
  1047. ctx->coded_frame_list = cx_frame->next;
  1048. free_coded_frame(cx_frame);
  1049. }
  1050. /* consume all available output from the encoder before returning. buffers
  1051. * are only good through the next aom_codec call */
  1052. while ((pkt = aom_codec_get_cx_data(&ctx->encoder, &iter))) {
  1053. switch (pkt->kind) {
  1054. case AOM_CODEC_CX_FRAME_PKT:
  1055. if (!size) {
  1056. struct FrameListData cx_frame;
  1057. /* avoid storing the frame when the list is empty and we haven't yet
  1058. * provided a frame for output */
  1059. av_assert0(!ctx->coded_frame_list);
  1060. cx_pktcpy(ctx, &cx_frame, pkt);
  1061. size = storeframe(avctx, &cx_frame, pkt_out);
  1062. if (size < 0)
  1063. return size;
  1064. } else {
  1065. struct FrameListData *cx_frame =
  1066. av_malloc(sizeof(struct FrameListData));
  1067. if (!cx_frame) {
  1068. av_log(avctx, AV_LOG_ERROR,
  1069. "Frame queue element alloc failed\n");
  1070. return AVERROR(ENOMEM);
  1071. }
  1072. cx_pktcpy(ctx, cx_frame, pkt);
  1073. cx_frame->buf = av_malloc(cx_frame->sz);
  1074. if (!cx_frame->buf) {
  1075. av_log(avctx, AV_LOG_ERROR,
  1076. "Data buffer alloc (%"SIZE_SPECIFIER" bytes) failed\n",
  1077. cx_frame->sz);
  1078. av_freep(&cx_frame);
  1079. return AVERROR(ENOMEM);
  1080. }
  1081. memcpy(cx_frame->buf, pkt->data.frame.buf, pkt->data.frame.sz);
  1082. coded_frame_add(&ctx->coded_frame_list, cx_frame);
  1083. }
  1084. break;
  1085. case AOM_CODEC_STATS_PKT:
  1086. {
  1087. struct aom_fixed_buf *stats = &ctx->twopass_stats;
  1088. uint8_t *tmp = av_fast_realloc(stats->buf,
  1089. &ctx->twopass_stats_size,
  1090. stats->sz +
  1091. pkt->data.twopass_stats.sz);
  1092. if (!tmp) {
  1093. av_freep(&stats->buf);
  1094. stats->sz = 0;
  1095. av_log(avctx, AV_LOG_ERROR, "Stat buffer realloc failed\n");
  1096. return AVERROR(ENOMEM);
  1097. }
  1098. stats->buf = tmp;
  1099. memcpy((uint8_t *)stats->buf + stats->sz,
  1100. pkt->data.twopass_stats.buf, pkt->data.twopass_stats.sz);
  1101. stats->sz += pkt->data.twopass_stats.sz;
  1102. break;
  1103. }
  1104. case AOM_CODEC_PSNR_PKT:
  1105. {
  1106. av_assert0(!ctx->have_sse);
  1107. ctx->sse[0] = pkt->data.psnr.sse[0];
  1108. ctx->sse[1] = pkt->data.psnr.sse[1];
  1109. ctx->sse[2] = pkt->data.psnr.sse[2];
  1110. ctx->sse[3] = pkt->data.psnr.sse[3];
  1111. ctx->have_sse = 1;
  1112. break;
  1113. }
  1114. case AOM_CODEC_CUSTOM_PKT:
  1115. // ignore unsupported/unrecognized packet types
  1116. break;
  1117. }
  1118. }
  1119. return size;
  1120. }
  1121. static enum AVPixelFormat aomfmt_to_pixfmt(struct aom_image *img)
  1122. {
  1123. switch (img->fmt) {
  1124. case AOM_IMG_FMT_I420:
  1125. case AOM_IMG_FMT_I42016:
  1126. if (img->bit_depth == 8)
  1127. return img->monochrome ? AV_PIX_FMT_GRAY8 : AV_PIX_FMT_YUV420P;
  1128. else if (img->bit_depth == 10)
  1129. return img->monochrome ? AV_PIX_FMT_GRAY10 : AV_PIX_FMT_YUV420P10;
  1130. else
  1131. return img->monochrome ? AV_PIX_FMT_GRAY12 : AV_PIX_FMT_YUV420P12;
  1132. case AOM_IMG_FMT_I422:
  1133. case AOM_IMG_FMT_I42216:
  1134. if (img->bit_depth == 8)
  1135. return AV_PIX_FMT_YUV422P;
  1136. else if (img->bit_depth == 10)
  1137. return AV_PIX_FMT_YUV422P10;
  1138. else
  1139. return AV_PIX_FMT_YUV422P12;
  1140. case AOM_IMG_FMT_I444:
  1141. case AOM_IMG_FMT_I44416:
  1142. if (img->bit_depth == 8)
  1143. return AV_PIX_FMT_YUV444P;
  1144. else if (img->bit_depth == 10)
  1145. return AV_PIX_FMT_YUV444P10;
  1146. else
  1147. return AV_PIX_FMT_YUV444P12;
  1148. };
  1149. return AV_PIX_FMT_NONE;
  1150. }
  1151. static int aom_encode(AVCodecContext *avctx, AVPacket *pkt,
  1152. const AVFrame *frame, int *got_packet)
  1153. {
  1154. AOMContext *ctx = avctx->priv_data;
  1155. struct aom_image *rawimg = NULL;
  1156. int64_t timestamp = 0;
  1157. unsigned long duration = 0;
  1158. int res, coded_size;
  1159. aom_enc_frame_flags_t flags = 0;
  1160. AVFrameSideData *sd;
  1161. if (frame) {
  1162. rawimg = &ctx->rawimg;
  1163. aom_img_remove_metadata(rawimg);
  1164. rawimg->planes[AOM_PLANE_Y] = frame->data[0];
  1165. rawimg->planes[AOM_PLANE_U] = frame->data[1];
  1166. rawimg->planes[AOM_PLANE_V] = frame->data[2];
  1167. rawimg->stride[AOM_PLANE_Y] = frame->linesize[0];
  1168. rawimg->stride[AOM_PLANE_U] = frame->linesize[1];
  1169. rawimg->stride[AOM_PLANE_V] = frame->linesize[2];
  1170. timestamp = frame->pts;
  1171. if (frame->duration > ULONG_MAX) {
  1172. av_log(avctx, AV_LOG_WARNING,
  1173. "Frame duration too large: %"PRId64"\n", frame->duration);
  1174. } else if (frame->duration)
  1175. duration = frame->duration;
  1176. else if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
  1177. duration = av_rescale_q(1, av_inv_q(avctx->framerate), avctx->time_base);
  1178. else {
  1179. duration = 1;
  1180. }
  1181. switch (frame->color_range) {
  1182. case AVCOL_RANGE_MPEG:
  1183. rawimg->range = AOM_CR_STUDIO_RANGE;
  1184. break;
  1185. case AVCOL_RANGE_JPEG:
  1186. rawimg->range = AOM_CR_FULL_RANGE;
  1187. break;
  1188. }
  1189. aom_img_remove_metadata(rawimg);
  1190. sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DOVI_METADATA);
  1191. if (ctx->dovi.cfg.dv_profile && sd) {
  1192. const AVDOVIMetadata *metadata = (const AVDOVIMetadata *)sd->data;
  1193. uint8_t *t35;
  1194. int size;
  1195. if ((res = ff_dovi_rpu_generate(&ctx->dovi, metadata, FF_DOVI_WRAP_T35,
  1196. &t35, &size)) < 0)
  1197. return res;
  1198. res = aom_img_add_metadata(rawimg, OBU_METADATA_TYPE_ITUT_T35,
  1199. t35, size, AOM_MIF_ANY_FRAME);
  1200. av_free(t35);
  1201. if (res != AOM_CODEC_OK)
  1202. return AVERROR(ENOMEM);
  1203. } else if (ctx->dovi.cfg.dv_profile) {
  1204. av_log(avctx, AV_LOG_ERROR, "Dolby Vision enabled, but received frame "
  1205. "without AV_FRAME_DATA_DOVI_METADATA\n");
  1206. return AVERROR_INVALIDDATA;
  1207. }
  1208. if (frame->pict_type == AV_PICTURE_TYPE_I)
  1209. flags |= AOM_EFLAG_FORCE_KF;
  1210. res = add_hdr_plus(avctx, rawimg, frame);
  1211. if (res < 0)
  1212. return res;
  1213. }
  1214. res = aom_codec_encode(&ctx->encoder, rawimg, timestamp, duration, flags);
  1215. if (res != AOM_CODEC_OK) {
  1216. log_encoder_error(avctx, "Error encoding frame");
  1217. return AVERROR_INVALIDDATA;
  1218. }
  1219. coded_size = queue_frames(avctx, pkt);
  1220. if (coded_size < 0)
  1221. return coded_size;
  1222. if (!frame && avctx->flags & AV_CODEC_FLAG_PASS1) {
  1223. size_t b64_size = AV_BASE64_SIZE(ctx->twopass_stats.sz);
  1224. avctx->stats_out = av_malloc(b64_size);
  1225. if (!avctx->stats_out) {
  1226. av_log(avctx, AV_LOG_ERROR, "Stat buffer alloc (%"SIZE_SPECIFIER" bytes) failed\n",
  1227. b64_size);
  1228. return AVERROR(ENOMEM);
  1229. }
  1230. av_base64_encode(avctx->stats_out, b64_size, ctx->twopass_stats.buf,
  1231. ctx->twopass_stats.sz);
  1232. }
  1233. *got_packet = !!coded_size;
  1234. if (*got_packet && avctx->flags & AV_CODEC_FLAG_RECON_FRAME) {
  1235. AVCodecInternal *avci = avctx->internal;
  1236. struct aom_image img;
  1237. av_frame_unref(avci->recon_frame);
  1238. res = codecctl_imgp(avctx, AV1_GET_NEW_FRAME_IMAGE, &img);
  1239. if (res < 0)
  1240. return res;
  1241. avci->recon_frame->format = aomfmt_to_pixfmt(&img);
  1242. if (avci->recon_frame->format == AV_PIX_FMT_NONE) {
  1243. av_log(ctx, AV_LOG_ERROR,
  1244. "Unhandled reconstructed frame colorspace: %d\n",
  1245. img.fmt);
  1246. return AVERROR(ENOSYS);
  1247. }
  1248. avci->recon_frame->width = img.d_w;
  1249. avci->recon_frame->height = img.d_h;
  1250. res = av_frame_get_buffer(avci->recon_frame, 0);
  1251. if (res < 0)
  1252. return res;
  1253. if ((img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) && img.bit_depth == 8)
  1254. ff_aom_image_copy_16_to_8(avci->recon_frame, &img);
  1255. else {
  1256. const uint8_t *planes[4] = { img.planes[0], img.planes[1], img.planes[2] };
  1257. const int stride[4] = { img.stride[0], img.stride[1], img.stride[2] };
  1258. av_image_copy(avci->recon_frame->data, avci->recon_frame->linesize, planes,
  1259. stride, avci->recon_frame->format, img.d_w, img.d_h);
  1260. }
  1261. }
  1262. return 0;
  1263. }
  1264. static const enum AVPixelFormat av1_pix_fmts[] = {
  1265. AV_PIX_FMT_YUV420P,
  1266. AV_PIX_FMT_YUV422P,
  1267. AV_PIX_FMT_YUV444P,
  1268. AV_PIX_FMT_GBRP,
  1269. AV_PIX_FMT_NONE
  1270. };
  1271. static const enum AVPixelFormat av1_pix_fmts_with_gray[] = {
  1272. AV_PIX_FMT_YUV420P,
  1273. AV_PIX_FMT_YUV422P,
  1274. AV_PIX_FMT_YUV444P,
  1275. AV_PIX_FMT_GBRP,
  1276. AV_PIX_FMT_GRAY8,
  1277. AV_PIX_FMT_NONE
  1278. };
  1279. static const enum AVPixelFormat av1_pix_fmts_highbd[] = {
  1280. AV_PIX_FMT_YUV420P,
  1281. AV_PIX_FMT_YUV422P,
  1282. AV_PIX_FMT_YUV444P,
  1283. AV_PIX_FMT_GBRP,
  1284. AV_PIX_FMT_YUV420P10,
  1285. AV_PIX_FMT_YUV422P10,
  1286. AV_PIX_FMT_YUV444P10,
  1287. AV_PIX_FMT_YUV420P12,
  1288. AV_PIX_FMT_YUV422P12,
  1289. AV_PIX_FMT_YUV444P12,
  1290. AV_PIX_FMT_GBRP10,
  1291. AV_PIX_FMT_GBRP12,
  1292. AV_PIX_FMT_NONE
  1293. };
  1294. static const enum AVPixelFormat av1_pix_fmts_highbd_with_gray[] = {
  1295. AV_PIX_FMT_YUV420P,
  1296. AV_PIX_FMT_YUV422P,
  1297. AV_PIX_FMT_YUV444P,
  1298. AV_PIX_FMT_GBRP,
  1299. AV_PIX_FMT_YUV420P10,
  1300. AV_PIX_FMT_YUV422P10,
  1301. AV_PIX_FMT_YUV444P10,
  1302. AV_PIX_FMT_YUV420P12,
  1303. AV_PIX_FMT_YUV422P12,
  1304. AV_PIX_FMT_YUV444P12,
  1305. AV_PIX_FMT_GBRP10,
  1306. AV_PIX_FMT_GBRP12,
  1307. AV_PIX_FMT_GRAY8,
  1308. AV_PIX_FMT_GRAY10,
  1309. AV_PIX_FMT_GRAY12,
  1310. AV_PIX_FMT_NONE
  1311. };
  1312. static int av1_get_supported_config(const AVCodecContext *avctx,
  1313. const AVCodec *codec,
  1314. enum AVCodecConfig config,
  1315. unsigned flags, const void **out,
  1316. int *out_num)
  1317. {
  1318. if (config == AV_CODEC_CONFIG_PIX_FORMAT) {
  1319. int supports_monochrome = aom_codec_version() >= 20001;
  1320. aom_codec_caps_t codec_caps = aom_codec_get_caps(aom_codec_av1_cx());
  1321. if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH) {
  1322. if (supports_monochrome) {
  1323. *out = av1_pix_fmts_highbd_with_gray;
  1324. *out_num = FF_ARRAY_ELEMS(av1_pix_fmts_highbd_with_gray) - 1;
  1325. } else {
  1326. *out = av1_pix_fmts_highbd;
  1327. *out_num = FF_ARRAY_ELEMS(av1_pix_fmts_highbd) - 1;
  1328. }
  1329. } else {
  1330. if (supports_monochrome) {
  1331. *out = av1_pix_fmts_with_gray;
  1332. *out_num = FF_ARRAY_ELEMS(av1_pix_fmts_with_gray) - 1;
  1333. } else {
  1334. *out = av1_pix_fmts;
  1335. *out_num = FF_ARRAY_ELEMS(av1_pix_fmts) - 1;
  1336. }
  1337. }
  1338. return 0;
  1339. }
  1340. return ff_default_get_supported_config(avctx, codec, config, flags, out, out_num);
  1341. }
  1342. static av_cold int av1_init(AVCodecContext *avctx)
  1343. {
  1344. return aom_init(avctx, aom_codec_av1_cx());
  1345. }
  1346. #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
  1347. static const AVOption options[] = {
  1348. { "cpu-used", "Quality/Speed ratio modifier", OFFSET(cpu_used), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 8, VE},
  1349. { "auto-alt-ref", "Enable use of alternate reference "
  1350. "frames (2-pass only)", OFFSET(auto_alt_ref), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 2, VE},
  1351. { "lag-in-frames", "Number of frames to look ahead at for "
  1352. "alternate reference frame selection", OFFSET(lag_in_frames), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE},
  1353. { "arnr-max-frames", "altref noise reduction max frame count", OFFSET(arnr_max_frames), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE},
  1354. { "arnr-strength", "altref noise reduction filter strength", OFFSET(arnr_strength), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 6, VE},
  1355. { "aq-mode", "adaptive quantization mode", OFFSET(aq_mode), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 4, VE, .unit = "aq_mode"},
  1356. { "none", "Aq not used", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, VE, .unit = "aq_mode"},
  1357. { "variance", "Variance based Aq", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, VE, .unit = "aq_mode"},
  1358. { "complexity", "Complexity based Aq", 0, AV_OPT_TYPE_CONST, {.i64 = 2}, 0, 0, VE, .unit = "aq_mode"},
  1359. { "cyclic", "Cyclic Refresh Aq", 0, AV_OPT_TYPE_CONST, {.i64 = 3}, 0, 0, VE, .unit = "aq_mode"},
  1360. { "error-resilience", "Error resilience configuration", OFFSET(error_resilient), AV_OPT_TYPE_FLAGS, {.i64 = 0}, INT_MIN, INT_MAX, VE, .unit = "er"},
  1361. { "default", "Improve resiliency against losses of whole frames", 0, AV_OPT_TYPE_CONST, {.i64 = AOM_ERROR_RESILIENT_DEFAULT}, 0, 0, VE, .unit = "er"},
  1362. { "crf", "Select the quality for constant quality mode", offsetof(AOMContext, crf), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 63, VE },
  1363. { "static-thresh", "A change threshold on blocks below which they will be skipped by the encoder", OFFSET(static_thresh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
  1364. { "drop-threshold", "Frame drop threshold", offsetof(AOMContext, drop_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, VE },
  1365. { "denoise-noise-level", "Amount of noise to be removed", OFFSET(denoise_noise_level), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE},
  1366. { "denoise-block-size", "Denoise block size ", OFFSET(denoise_block_size), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE},
  1367. { "undershoot-pct", "Datarate undershoot (min) target (%)", OFFSET(rc_undershoot_pct), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 100, VE},
  1368. { "overshoot-pct", "Datarate overshoot (max) target (%)", OFFSET(rc_overshoot_pct), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1000, VE},
  1369. { "minsection-pct", "GOP min bitrate (% of target)", OFFSET(minsection_pct), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 100, VE},
  1370. { "maxsection-pct", "GOP max bitrate (% of target)", OFFSET(maxsection_pct), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 5000, VE},
  1371. { "frame-parallel", "Enable frame parallel decodability features", OFFSET(frame_parallel), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1372. { "tiles", "Tile columns x rows", OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, VE },
  1373. { "tile-columns", "Log2 of number of tile columns to use", OFFSET(tile_cols_log2), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 6, VE},
  1374. { "tile-rows", "Log2 of number of tile rows to use", OFFSET(tile_rows_log2), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 6, VE},
  1375. { "row-mt", "Enable row based multi-threading", OFFSET(row_mt), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1376. { "enable-cdef", "Enable CDEF filtering", OFFSET(enable_cdef), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1377. { "enable-global-motion", "Enable global motion", OFFSET(enable_global_motion), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1378. { "enable-intrabc", "Enable intra block copy prediction mode", OFFSET(enable_intrabc), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1379. { "enable-restoration", "Enable Loop Restoration filtering", OFFSET(enable_restoration), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1380. { "usage", "Quality and compression efficiency vs speed trade-off", OFFSET(usage), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, VE, .unit = "usage"},
  1381. { "good", "Good quality", 0, AV_OPT_TYPE_CONST, {.i64 = 0 /* AOM_USAGE_GOOD_QUALITY */}, 0, 0, VE, .unit = "usage"},
  1382. { "realtime", "Realtime encoding", 0, AV_OPT_TYPE_CONST, {.i64 = 1 /* AOM_USAGE_REALTIME */}, 0, 0, VE, .unit = "usage"},
  1383. { "allintra", "All Intra encoding", 0, AV_OPT_TYPE_CONST, {.i64 = 2 /* AOM_USAGE_ALL_INTRA */}, 0, 0, VE, .unit = "usage"},
  1384. { "tune", "The metric that the encoder tunes for. Automatically chosen by the encoder by default", OFFSET(tune), AV_OPT_TYPE_INT, {.i64 = -1}, -1, AOM_TUNE_SSIM, VE, .unit = "tune"},
  1385. { "psnr", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AOM_TUNE_PSNR}, 0, 0, VE, .unit = "tune"},
  1386. { "ssim", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AOM_TUNE_SSIM}, 0, 0, VE, .unit = "tune"},
  1387. FF_AV1_PROFILE_OPTS
  1388. { "still-picture", "Encode in single frame mode (typically used for still AVIF images).", OFFSET(still_picture), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, VE },
  1389. { "dolbyvision", "Enable Dolby Vision RPU coding", OFFSET(dovi.enable), AV_OPT_TYPE_BOOL, {.i64 = FF_DOVI_AUTOMATIC }, -1, 1, VE, .unit = "dovi" },
  1390. { "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DOVI_AUTOMATIC}, .flags = VE, .unit = "dovi" },
  1391. { "enable-rect-partitions", "Enable rectangular partitions", OFFSET(enable_rect_partitions), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1392. { "enable-1to4-partitions", "Enable 1:4/4:1 partitions", OFFSET(enable_1to4_partitions), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1393. { "enable-ab-partitions", "Enable ab shape partitions", OFFSET(enable_ab_partitions), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1394. { "enable-angle-delta", "Enable angle delta intra prediction", OFFSET(enable_angle_delta), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1395. { "enable-cfl-intra", "Enable chroma predicted from luma intra prediction", OFFSET(enable_cfl_intra), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1396. { "enable-filter-intra", "Enable filter intra predictor", OFFSET(enable_filter_intra), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1397. { "enable-intra-edge-filter", "Enable intra edge filter", OFFSET(enable_intra_edge_filter), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1398. { "enable-smooth-intra", "Enable smooth intra prediction mode", OFFSET(enable_smooth_intra), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1399. { "enable-paeth-intra", "Enable paeth predictor in intra prediction", OFFSET(enable_paeth_intra), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1400. { "enable-palette", "Enable palette prediction mode", OFFSET(enable_palette), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1401. { "enable-flip-idtx", "Enable extended transform type", OFFSET(enable_flip_idtx), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1402. { "enable-tx64", "Enable 64-pt transform", OFFSET(enable_tx64), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1403. { "reduced-tx-type-set", "Use reduced set of transform types", OFFSET(reduced_tx_type_set), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1404. { "use-intra-dct-only", "Use DCT only for INTRA modes", OFFSET(use_intra_dct_only), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1405. { "use-inter-dct-only", "Use DCT only for INTER modes", OFFSET(use_inter_dct_only), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1406. { "use-intra-default-tx-only", "Use default-transform only for INTRA modes", OFFSET(use_intra_default_tx_only), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1407. { "enable-ref-frame-mvs", "Enable temporal mv prediction", OFFSET(enable_ref_frame_mvs), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1408. { "enable-reduced-reference-set", "Use reduced set of single and compound references", OFFSET(enable_reduced_reference_set), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1409. { "enable-obmc", "Enable obmc", OFFSET(enable_obmc), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1410. { "enable-dual-filter", "Enable dual filter", OFFSET(enable_dual_filter), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1411. { "enable-diff-wtd-comp", "Enable difference-weighted compound", OFFSET(enable_diff_wtd_comp), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1412. { "enable-dist-wtd-comp", "Enable distance-weighted compound", OFFSET(enable_dist_wtd_comp), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1413. { "enable-onesided-comp", "Enable one sided compound", OFFSET(enable_onesided_comp), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1414. { "enable-interinter-wedge", "Enable interinter wedge compound", OFFSET(enable_interinter_wedge), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1415. { "enable-interintra-wedge", "Enable interintra wedge compound", OFFSET(enable_interintra_wedge), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1416. { "enable-masked-comp", "Enable masked compound", OFFSET(enable_masked_comp), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1417. { "enable-interintra-comp", "Enable interintra compound", OFFSET(enable_interintra_comp), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1418. { "enable-smooth-interintra", "Enable smooth interintra mode", OFFSET(enable_smooth_interintra), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
  1419. #if AOM_ENCODER_ABI_VERSION >= 23
  1420. { "aom-params", "Set libaom options using a :-separated list of key=value pairs", OFFSET(aom_params), AV_OPT_TYPE_DICT, { 0 }, 0, 0, VE },
  1421. #endif
  1422. { NULL },
  1423. };
  1424. static const FFCodecDefault defaults[] = {
  1425. { "b", "0" },
  1426. { "qmin", "-1" },
  1427. { "qmax", "-1" },
  1428. { "g", "-1" },
  1429. { "keyint_min", "-1" },
  1430. { NULL },
  1431. };
  1432. static const AVClass class_aom = {
  1433. .class_name = "libaom-av1 encoder",
  1434. .item_name = av_default_item_name,
  1435. .option = options,
  1436. .version = LIBAVUTIL_VERSION_INT,
  1437. };
  1438. FFCodec ff_libaom_av1_encoder = {
  1439. .p.name = "libaom-av1",
  1440. CODEC_LONG_NAME("libaom AV1"),
  1441. .p.type = AVMEDIA_TYPE_VIDEO,
  1442. .p.id = AV_CODEC_ID_AV1,
  1443. .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
  1444. AV_CODEC_CAP_ENCODER_RECON_FRAME |
  1445. AV_CODEC_CAP_OTHER_THREADS,
  1446. .color_ranges = AVCOL_RANGE_MPEG | AVCOL_RANGE_JPEG,
  1447. .p.profiles = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
  1448. .p.priv_class = &class_aom,
  1449. .p.wrapper_name = "libaom",
  1450. .priv_data_size = sizeof(AOMContext),
  1451. .init = av1_init,
  1452. FF_CODEC_ENCODE_CB(aom_encode),
  1453. .close = aom_free,
  1454. .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE |
  1455. FF_CODEC_CAP_INIT_CLEANUP |
  1456. FF_CODEC_CAP_AUTO_THREADS,
  1457. .defaults = defaults,
  1458. .get_supported_config = av1_get_supported_config,
  1459. };