sw_ops.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776
  1. /**
  2. * Copyright (C) 2025 Niklas Haas
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19. */
  20. #include <string.h>
  21. #include "libavutil/avassert.h"
  22. #include "libavutil/mem_internal.h"
  23. #include "libavutil/refstruct.h"
  24. #include "libswscale/ops.h"
  25. #include "libswscale/ops_internal.h"
  26. #include "checkasm.h"
  27. enum {
  28. LINES = 2,
  29. NB_PLANES = 4,
  30. PIXELS = 64,
  31. };
  32. enum {
  33. U8 = SWS_PIXEL_U8,
  34. U16 = SWS_PIXEL_U16,
  35. U32 = SWS_PIXEL_U32,
  36. F32 = SWS_PIXEL_F32,
  37. };
  38. #define FMT(fmt, ...) tprintf((char[256]) {0}, 256, fmt, __VA_ARGS__)
  39. static const char *tprintf(char buf[], size_t size, const char *fmt, ...)
  40. {
  41. va_list ap;
  42. va_start(ap, fmt);
  43. vsnprintf(buf, size, fmt, ap);
  44. va_end(ap);
  45. return buf;
  46. }
  47. static int rw_pixel_bits(const SwsOp *op)
  48. {
  49. const int elems = op->rw.packed ? op->rw.elems : 1;
  50. const int size = ff_sws_pixel_type_size(op->type);
  51. const int bits = 8 >> op->rw.frac;
  52. av_assert1(bits >= 1);
  53. return elems * size * bits;
  54. }
  55. static float rndf(void)
  56. {
  57. union { uint32_t u; float f; } x;
  58. do {
  59. x.u = rnd();
  60. } while (!isnormal(x.f));
  61. return x.f;
  62. }
  63. static void fill32f(float *line, int num, unsigned range)
  64. {
  65. const float scale = (float) range / UINT32_MAX;
  66. for (int i = 0; i < num; i++)
  67. line[i] = range ? scale * rnd() : rndf();
  68. }
  69. static void fill32(uint32_t *line, int num, unsigned range)
  70. {
  71. for (int i = 0; i < num; i++)
  72. line[i] = (range && range < UINT_MAX) ? rnd() % (range + 1) : rnd();
  73. }
  74. static void fill16(uint16_t *line, int num, unsigned range)
  75. {
  76. if (!range) {
  77. fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 1), 0);
  78. } else {
  79. for (int i = 0; i < num; i++)
  80. line[i] = rnd() % (range + 1);
  81. }
  82. }
  83. static void fill8(uint8_t *line, int num, unsigned range)
  84. {
  85. if (!range) {
  86. fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 2), 0);
  87. } else {
  88. for (int i = 0; i < num; i++)
  89. line[i] = rnd() % (range + 1);
  90. }
  91. }
  92. static void check_ops(const char *report, const unsigned ranges[NB_PLANES],
  93. const SwsOp *ops)
  94. {
  95. SwsContext *ctx = sws_alloc_context();
  96. SwsCompiledOp comp_ref = {0}, comp_new = {0};
  97. const SwsOpBackend *backend_new = NULL;
  98. SwsOpList oplist = { .ops = (SwsOp *) ops };
  99. const SwsOp *read_op, *write_op;
  100. static const unsigned def_ranges[4] = {0};
  101. if (!ranges)
  102. ranges = def_ranges;
  103. declare_func(void, const SwsOpExec *, const void *, int bx, int y, int bx_end, int y_end);
  104. DECLARE_ALIGNED_64(char, src0)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
  105. DECLARE_ALIGNED_64(char, src1)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
  106. DECLARE_ALIGNED_64(char, dst0)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
  107. DECLARE_ALIGNED_64(char, dst1)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
  108. if (!ctx)
  109. return;
  110. ctx->flags = SWS_BITEXACT;
  111. read_op = &ops[0];
  112. for (oplist.num_ops = 0; ops[oplist.num_ops].op; oplist.num_ops++)
  113. write_op = &ops[oplist.num_ops];
  114. const int read_size = PIXELS * rw_pixel_bits(read_op) >> 3;
  115. const int write_size = PIXELS * rw_pixel_bits(write_op) >> 3;
  116. for (int p = 0; p < NB_PLANES; p++) {
  117. void *plane = src0[p];
  118. switch (read_op->type) {
  119. case U8: fill8(plane, sizeof(src0[p]) / sizeof(uint8_t), ranges[p]); break;
  120. case U16: fill16(plane, sizeof(src0[p]) / sizeof(uint16_t), ranges[p]); break;
  121. case U32: fill32(plane, sizeof(src0[p]) / sizeof(uint32_t), ranges[p]); break;
  122. case F32: fill32f(plane, sizeof(src0[p]) / sizeof(uint32_t), ranges[p]); break;
  123. }
  124. }
  125. memcpy(src1, src0, sizeof(src0));
  126. memset(dst0, 0, sizeof(dst0));
  127. memset(dst1, 0, sizeof(dst1));
  128. /* Compile `ops` using both the asm and c backends */
  129. for (int n = 0; ff_sws_op_backends[n]; n++) {
  130. const SwsOpBackend *backend = ff_sws_op_backends[n];
  131. const bool is_ref = !strcmp(backend->name, "c");
  132. if (is_ref || !comp_new.func) {
  133. SwsCompiledOp comp;
  134. int ret = ff_sws_ops_compile_backend(ctx, backend, &oplist, &comp);
  135. if (ret == AVERROR(ENOTSUP))
  136. continue;
  137. else if (ret < 0)
  138. fail();
  139. else if (PIXELS % comp.block_size != 0)
  140. fail();
  141. if (is_ref)
  142. comp_ref = comp;
  143. if (!comp_new.func) {
  144. comp_new = comp;
  145. backend_new = backend;
  146. }
  147. }
  148. }
  149. av_assert0(comp_ref.func && comp_new.func);
  150. SwsOpExec exec = {0};
  151. exec.width = PIXELS;
  152. exec.height = exec.slice_h = 1;
  153. for (int i = 0; i < NB_PLANES; i++) {
  154. exec.in_stride[i] = sizeof(src0[i][0]);
  155. exec.out_stride[i] = sizeof(dst0[i][0]);
  156. exec.in_bump[i] = exec.in_stride[i] - read_size;
  157. exec.out_bump[i] = exec.out_stride[i] - write_size;
  158. }
  159. /**
  160. * Don't use check_func() because the actual function pointer may be a
  161. * wrapper shared by multiple implementations. Instead, take a hash of both
  162. * the backend pointer and the active CPU flags.
  163. */
  164. uintptr_t id = (uintptr_t) backend_new;
  165. id ^= (id << 6) + (id >> 2) + 0x9e3779b97f4a7c15 + comp_new.cpu_flags;
  166. checkasm_save_context();
  167. if (checkasm_check_func((void *) id, "%s", report)) {
  168. func_new = comp_new.func;
  169. func_ref = comp_ref.func;
  170. exec.block_size_in = comp_ref.block_size * rw_pixel_bits(read_op) >> 3;
  171. exec.block_size_out = comp_ref.block_size * rw_pixel_bits(write_op) >> 3;
  172. for (int i = 0; i < NB_PLANES; i++) {
  173. exec.in[i] = (void *) src0[i];
  174. exec.out[i] = (void *) dst0[i];
  175. }
  176. call_ref(&exec, comp_ref.priv, 0, 0, PIXELS / comp_ref.block_size, LINES);
  177. exec.block_size_in = comp_new.block_size * rw_pixel_bits(read_op) >> 3;
  178. exec.block_size_out = comp_new.block_size * rw_pixel_bits(write_op) >> 3;
  179. for (int i = 0; i < NB_PLANES; i++) {
  180. exec.in[i] = (void *) src1[i];
  181. exec.out[i] = (void *) dst1[i];
  182. }
  183. call_new(&exec, comp_new.priv, 0, 0, PIXELS / comp_new.block_size, LINES);
  184. for (int i = 0; i < NB_PLANES; i++) {
  185. const char *name = FMT("%s[%d]", report, i);
  186. const int stride = sizeof(dst0[i][0]);
  187. switch (write_op->type) {
  188. case U8:
  189. checkasm_check(uint8_t, (void *) dst0[i], stride,
  190. (void *) dst1[i], stride,
  191. write_size, LINES, name);
  192. break;
  193. case U16:
  194. checkasm_check(uint16_t, (void *) dst0[i], stride,
  195. (void *) dst1[i], stride,
  196. write_size >> 1, LINES, name);
  197. break;
  198. case U32:
  199. checkasm_check(uint32_t, (void *) dst0[i], stride,
  200. (void *) dst1[i], stride,
  201. write_size >> 2, LINES, name);
  202. break;
  203. case F32:
  204. checkasm_check(float_ulp, (void *) dst0[i], stride,
  205. (void *) dst1[i], stride,
  206. write_size >> 2, LINES, name, 0);
  207. break;
  208. }
  209. if (write_op->rw.packed)
  210. break;
  211. }
  212. bench_new(&exec, comp_new.priv, 0, 0, PIXELS / comp_new.block_size, LINES);
  213. }
  214. if (comp_new.func != comp_ref.func && comp_new.free)
  215. comp_new.free(comp_new.priv);
  216. if (comp_ref.free)
  217. comp_ref.free(comp_ref.priv);
  218. sws_free_context(&ctx);
  219. }
  220. #define CHECK_RANGES(NAME, RANGES, N_IN, N_OUT, IN, OUT, ...) \
  221. do { \
  222. check_ops(NAME, RANGES, (SwsOp[]) { \
  223. { \
  224. .op = SWS_OP_READ, \
  225. .type = IN, \
  226. .rw.elems = N_IN, \
  227. }, \
  228. __VA_ARGS__, \
  229. { \
  230. .op = SWS_OP_WRITE, \
  231. .type = OUT, \
  232. .rw.elems = N_OUT, \
  233. }, {0} \
  234. }); \
  235. } while (0)
  236. #define MK_RANGES(R) ((const unsigned[]) { R, R, R, R })
  237. #define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT, ...) \
  238. CHECK_RANGES(NAME, MK_RANGES(RANGE), N_IN, N_OUT, IN, OUT, __VA_ARGS__)
  239. #define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT, ...) \
  240. CHECK_RANGE(FMT("%s_p1000", NAME), RANGE, 1, 1, IN, OUT, __VA_ARGS__); \
  241. CHECK_RANGE(FMT("%s_p1110", NAME), RANGE, 3, 3, IN, OUT, __VA_ARGS__); \
  242. CHECK_RANGE(FMT("%s_p1111", NAME), RANGE, 4, 4, IN, OUT, __VA_ARGS__); \
  243. CHECK_RANGE(FMT("%s_p1001", NAME), RANGE, 4, 2, IN, OUT, __VA_ARGS__, { \
  244. .op = SWS_OP_SWIZZLE, \
  245. .type = OUT, \
  246. .swizzle = SWS_SWIZZLE(0, 3, 1, 2), \
  247. })
  248. #define CHECK(NAME, N_IN, N_OUT, IN, OUT, ...) \
  249. CHECK_RANGE(NAME, 0, N_IN, N_OUT, IN, OUT, __VA_ARGS__)
  250. #define CHECK_COMMON(NAME, IN, OUT, ...) \
  251. CHECK_COMMON_RANGE(NAME, 0, IN, OUT, __VA_ARGS__)
  252. static void check_read_write(void)
  253. {
  254. for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
  255. const char *type = ff_sws_pixel_type_name(t);
  256. for (int i = 1; i <= 4; i++) {
  257. /* Test N->N planar read/write */
  258. for (int o = 1; o <= i; o++) {
  259. check_ops(FMT("rw_%d_%d_%s", i, o, type), NULL, (SwsOp[]) {
  260. {
  261. .op = SWS_OP_READ,
  262. .type = t,
  263. .rw.elems = i,
  264. }, {
  265. .op = SWS_OP_WRITE,
  266. .type = t,
  267. .rw.elems = o,
  268. }, {0}
  269. });
  270. }
  271. /* Test packed read/write */
  272. if (i == 1)
  273. continue;
  274. check_ops(FMT("read_packed%d_%s", i, type), NULL, (SwsOp[]) {
  275. {
  276. .op = SWS_OP_READ,
  277. .type = t,
  278. .rw.elems = i,
  279. .rw.packed = true,
  280. }, {
  281. .op = SWS_OP_WRITE,
  282. .type = t,
  283. .rw.elems = i,
  284. }, {0}
  285. });
  286. check_ops(FMT("write_packed%d_%s", i, type), NULL, (SwsOp[]) {
  287. {
  288. .op = SWS_OP_READ,
  289. .type = t,
  290. .rw.elems = i,
  291. }, {
  292. .op = SWS_OP_WRITE,
  293. .type = t,
  294. .rw.elems = i,
  295. .rw.packed = true,
  296. }, {0}
  297. });
  298. }
  299. }
  300. /* Test fractional reads/writes */
  301. for (int frac = 1; frac <= 3; frac++) {
  302. const int bits = 8 >> frac;
  303. const int range = (1 << bits) - 1;
  304. if (bits == 2)
  305. continue; /* no 2 bit packed formats currently exist */
  306. check_ops(FMT("read_frac%d", frac), NULL, (SwsOp[]) {
  307. {
  308. .op = SWS_OP_READ,
  309. .type = U8,
  310. .rw.elems = 1,
  311. .rw.frac = frac,
  312. }, {
  313. .op = SWS_OP_WRITE,
  314. .type = U8,
  315. .rw.elems = 1,
  316. }, {0}
  317. });
  318. check_ops(FMT("write_frac%d", frac), MK_RANGES(range), (SwsOp[]) {
  319. {
  320. .op = SWS_OP_READ,
  321. .type = U8,
  322. .rw.elems = 1,
  323. }, {
  324. .op = SWS_OP_WRITE,
  325. .type = U8,
  326. .rw.elems = 1,
  327. .rw.frac = frac,
  328. }, {0}
  329. });
  330. }
  331. }
  332. static void check_swap_bytes(void)
  333. {
  334. CHECK_COMMON("swap_bytes_16", U16, U16, {
  335. .op = SWS_OP_SWAP_BYTES,
  336. .type = U16,
  337. });
  338. CHECK_COMMON("swap_bytes_32", U32, U32, {
  339. .op = SWS_OP_SWAP_BYTES,
  340. .type = U32,
  341. });
  342. }
  343. static void check_pack_unpack(void)
  344. {
  345. const struct {
  346. SwsPixelType type;
  347. SwsPackOp op;
  348. } patterns[] = {
  349. { U8, {{ 3, 3, 2 }}},
  350. { U8, {{ 2, 3, 3 }}},
  351. { U8, {{ 1, 2, 1 }}},
  352. {U16, {{ 5, 6, 5 }}},
  353. {U16, {{ 5, 5, 5 }}},
  354. {U16, {{ 4, 4, 4 }}},
  355. {U32, {{ 2, 10, 10, 10 }}},
  356. {U32, {{10, 10, 10, 2 }}},
  357. };
  358. for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
  359. const SwsPixelType type = patterns[i].type;
  360. const SwsPackOp pack = patterns[i].op;
  361. const int num = pack.pattern[3] ? 4 : 3;
  362. const char *pat = FMT("%d%d%d%d", pack.pattern[0], pack.pattern[1],
  363. pack.pattern[2], pack.pattern[3]);
  364. const int total = pack.pattern[0] + pack.pattern[1] +
  365. pack.pattern[2] + pack.pattern[3];
  366. const unsigned ranges[4] = {
  367. (1 << pack.pattern[0]) - 1,
  368. (1 << pack.pattern[1]) - 1,
  369. (1 << pack.pattern[2]) - 1,
  370. (1 << pack.pattern[3]) - 1,
  371. };
  372. CHECK_RANGES(FMT("pack_%s", pat), ranges, num, 1, type, type, {
  373. .op = SWS_OP_PACK,
  374. .type = type,
  375. .pack = pack,
  376. });
  377. CHECK_RANGE(FMT("unpack_%s", pat), UINT32_MAX >> (32 - total), 1, num, type, type, {
  378. .op = SWS_OP_UNPACK,
  379. .type = type,
  380. .pack = pack,
  381. });
  382. }
  383. }
  384. static AVRational rndq(SwsPixelType t)
  385. {
  386. const unsigned num = rnd();
  387. if (ff_sws_pixel_type_is_int(t)) {
  388. const unsigned mask = UINT_MAX >> (32 - ff_sws_pixel_type_size(t) * 8);
  389. return (AVRational) { num & mask, 1 };
  390. } else {
  391. const unsigned den = rnd();
  392. return (AVRational) { num, den ? den : 1 };
  393. }
  394. }
  395. static void check_clear(void)
  396. {
  397. for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
  398. const char *type = ff_sws_pixel_type_name(t);
  399. const int bits = ff_sws_pixel_type_size(t) * 8;
  400. /* TODO: AVRational can't fit 32 bit constants */
  401. if (bits < 32) {
  402. const AVRational chroma = (AVRational) { 1 << (bits - 1), 1};
  403. const AVRational alpha = (AVRational) { (1 << bits) - 1, 1};
  404. const AVRational zero = (AVRational) { 0, 1};
  405. const AVRational none = {0};
  406. const SwsConst patterns[] = {
  407. /* Zero only */
  408. {.q4 = { none, none, none, zero }},
  409. {.q4 = { zero, none, none, none }},
  410. /* Alpha only */
  411. {.q4 = { none, none, none, alpha }},
  412. {.q4 = { alpha, none, none, none }},
  413. /* Chroma only */
  414. {.q4 = { chroma, chroma, none, none }},
  415. {.q4 = { none, chroma, chroma, none }},
  416. {.q4 = { none, none, chroma, chroma }},
  417. {.q4 = { chroma, none, chroma, none }},
  418. {.q4 = { none, chroma, none, chroma }},
  419. /* Alpha+chroma */
  420. {.q4 = { chroma, chroma, none, alpha }},
  421. {.q4 = { none, chroma, chroma, alpha }},
  422. {.q4 = { alpha, none, chroma, chroma }},
  423. {.q4 = { chroma, none, chroma, alpha }},
  424. {.q4 = { alpha, chroma, none, chroma }},
  425. /* Random values */
  426. {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
  427. {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
  428. {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
  429. {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
  430. };
  431. for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
  432. CHECK(FMT("clear_pattern_%s[%d]", type, i), 4, 4, t, t, {
  433. .op = SWS_OP_CLEAR,
  434. .type = t,
  435. .c = patterns[i],
  436. });
  437. }
  438. } else if (!ff_sws_pixel_type_is_int(t)) {
  439. /* Floating point YUV doesn't exist, only alpha needs to be cleared */
  440. CHECK(FMT("clear_alpha_%s", type), 4, 4, t, t, {
  441. .op = SWS_OP_CLEAR,
  442. .type = t,
  443. .c.q4[3] = { 0, 1 },
  444. });
  445. }
  446. }
  447. }
  448. static void check_shift(void)
  449. {
  450. for (SwsPixelType t = U16; t < SWS_PIXEL_TYPE_NB; t++) {
  451. const char *type = ff_sws_pixel_type_name(t);
  452. if (!ff_sws_pixel_type_is_int(t))
  453. continue;
  454. for (int shift = 1; shift <= 8; shift++) {
  455. CHECK_COMMON(FMT("lshift%d_%s", shift, type), t, t, {
  456. .op = SWS_OP_LSHIFT,
  457. .type = t,
  458. .c.u = shift,
  459. });
  460. CHECK_COMMON(FMT("rshift%d_%s", shift, type), t, t, {
  461. .op = SWS_OP_RSHIFT,
  462. .type = t,
  463. .c.u = shift,
  464. });
  465. }
  466. }
  467. }
  468. static void check_swizzle(void)
  469. {
  470. for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
  471. const char *type = ff_sws_pixel_type_name(t);
  472. static const int patterns[][4] = {
  473. /* Pure swizzle */
  474. {3, 0, 1, 2},
  475. {3, 0, 2, 1},
  476. {2, 1, 0, 3},
  477. {3, 2, 1, 0},
  478. {3, 1, 0, 2},
  479. {3, 2, 0, 1},
  480. {1, 2, 0, 3},
  481. {1, 0, 2, 3},
  482. {2, 0, 1, 3},
  483. {2, 3, 1, 0},
  484. {2, 1, 3, 0},
  485. {1, 2, 3, 0},
  486. {1, 3, 2, 0},
  487. {0, 2, 1, 3},
  488. {0, 2, 3, 1},
  489. {0, 3, 1, 2},
  490. {3, 1, 2, 0},
  491. {0, 3, 2, 1},
  492. /* Luma expansion */
  493. {0, 0, 0, 3},
  494. {3, 0, 0, 0},
  495. {0, 0, 0, 1},
  496. {1, 0, 0, 0},
  497. };
  498. for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
  499. const int x = patterns[i][0], y = patterns[i][1],
  500. z = patterns[i][2], w = patterns[i][3];
  501. CHECK(FMT("swizzle_%d%d%d%d_%s", x, y, z, w, type), 4, 4, t, t, {
  502. .op = SWS_OP_SWIZZLE,
  503. .type = t,
  504. .swizzle = SWS_SWIZZLE(x, y, z, w),
  505. });
  506. }
  507. }
  508. }
  509. static void check_convert(void)
  510. {
  511. for (SwsPixelType i = U8; i < SWS_PIXEL_TYPE_NB; i++) {
  512. const char *itype = ff_sws_pixel_type_name(i);
  513. const int isize = ff_sws_pixel_type_size(i);
  514. for (SwsPixelType o = U8; o < SWS_PIXEL_TYPE_NB; o++) {
  515. const char *otype = ff_sws_pixel_type_name(o);
  516. const int osize = ff_sws_pixel_type_size(o);
  517. const char *name = FMT("convert_%s_%s", itype, otype);
  518. if (i == o)
  519. continue;
  520. if (isize < osize || !ff_sws_pixel_type_is_int(o)) {
  521. CHECK_COMMON(name, i, o, {
  522. .op = SWS_OP_CONVERT,
  523. .type = i,
  524. .convert.to = o,
  525. });
  526. } else if (isize > osize || !ff_sws_pixel_type_is_int(i)) {
  527. uint32_t range = UINT32_MAX >> (32 - osize * 8);
  528. CHECK_COMMON_RANGE(name, range, i, o, {
  529. .op = SWS_OP_CONVERT,
  530. .type = i,
  531. .convert.to = o,
  532. });
  533. }
  534. }
  535. }
  536. /* Check expanding conversions */
  537. CHECK_COMMON("expand16", U8, U16, {
  538. .op = SWS_OP_CONVERT,
  539. .type = U8,
  540. .convert.to = U16,
  541. .convert.expand = true,
  542. });
  543. CHECK_COMMON("expand32", U8, U32, {
  544. .op = SWS_OP_CONVERT,
  545. .type = U8,
  546. .convert.to = U32,
  547. .convert.expand = true,
  548. });
  549. }
  550. static void check_dither(void)
  551. {
  552. for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
  553. const char *type = ff_sws_pixel_type_name(t);
  554. if (ff_sws_pixel_type_is_int(t))
  555. continue;
  556. /* Test all sizes up to 256x256 */
  557. for (int size_log2 = 0; size_log2 <= 8; size_log2++) {
  558. const int size = 1 << size_log2;
  559. AVRational *matrix = av_refstruct_allocz(size * size * sizeof(*matrix));
  560. if (!matrix) {
  561. fail();
  562. return;
  563. }
  564. if (size == 1) {
  565. matrix[0] = (AVRational) { 1, 2 };
  566. } else {
  567. for (int i = 0; i < size * size; i++)
  568. matrix[i] = rndq(t);
  569. }
  570. CHECK_COMMON(FMT("dither_%dx%d_%s", size, size, type), t, t, {
  571. .op = SWS_OP_DITHER,
  572. .type = t,
  573. .dither.size_log2 = size_log2,
  574. .dither.matrix = matrix,
  575. });
  576. av_refstruct_unref(&matrix);
  577. }
  578. }
  579. }
  580. static void check_min_max(void)
  581. {
  582. for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
  583. const char *type = ff_sws_pixel_type_name(t);
  584. CHECK_COMMON(FMT("min_%s", type), t, t, {
  585. .op = SWS_OP_MIN,
  586. .type = t,
  587. .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) },
  588. });
  589. CHECK_COMMON(FMT("max_%s", type), t, t, {
  590. .op = SWS_OP_MAX,
  591. .type = t,
  592. .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) },
  593. });
  594. }
  595. }
  596. static void check_linear(void)
  597. {
  598. static const struct {
  599. const char *name;
  600. uint32_t mask;
  601. } patterns[] = {
  602. { "noop", 0 },
  603. { "luma", SWS_MASK_LUMA },
  604. { "alpha", SWS_MASK_ALPHA },
  605. { "luma+alpha", SWS_MASK_LUMA | SWS_MASK_ALPHA },
  606. { "dot3", 0x7 },
  607. { "dot4", 0xF },
  608. { "row0", SWS_MASK_ROW(0) },
  609. { "row0+alpha", SWS_MASK_ROW(0) | SWS_MASK_ALPHA },
  610. { "off3", SWS_MASK_OFF3 },
  611. { "off3+alpha", SWS_MASK_OFF3 | SWS_MASK_ALPHA },
  612. { "diag3", SWS_MASK_DIAG3 },
  613. { "diag4", SWS_MASK_DIAG4 },
  614. { "diag3+alpha", SWS_MASK_DIAG3 | SWS_MASK_ALPHA },
  615. { "diag3+off3", SWS_MASK_DIAG3 | SWS_MASK_OFF3 },
  616. { "diag3+off3+alpha", SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
  617. { "diag4+off4", SWS_MASK_DIAG4 | SWS_MASK_OFF4 },
  618. { "matrix3", SWS_MASK_MAT3 },
  619. { "matrix3+off3", SWS_MASK_MAT3 | SWS_MASK_OFF3 },
  620. { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
  621. { "matrix4", SWS_MASK_MAT4 },
  622. { "matrix4+off4", SWS_MASK_MAT4 | SWS_MASK_OFF4 },
  623. };
  624. for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
  625. const char *type = ff_sws_pixel_type_name(t);
  626. if (ff_sws_pixel_type_is_int(t))
  627. continue;
  628. for (int p = 0; p < FF_ARRAY_ELEMS(patterns); p++) {
  629. const uint32_t mask = patterns[p].mask;
  630. SwsLinearOp lin = { .mask = mask };
  631. for (int i = 0; i < 4; i++) {
  632. for (int j = 0; j < 5; j++) {
  633. if (mask & SWS_MASK(i, j)) {
  634. lin.m[i][j] = rndq(t);
  635. } else {
  636. lin.m[i][j] = (AVRational) { i == j, 1 };
  637. }
  638. }
  639. }
  640. CHECK(FMT("linear_%s_%s", patterns[p].name, type), 4, 4, t, t, {
  641. .op = SWS_OP_LINEAR,
  642. .type = t,
  643. .lin = lin,
  644. });
  645. }
  646. }
  647. }
  648. static void check_scale(void)
  649. {
  650. for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
  651. const char *type = ff_sws_pixel_type_name(t);
  652. const int bits = ff_sws_pixel_type_size(t) * 8;
  653. if (ff_sws_pixel_type_is_int(t)) {
  654. /* Ensure the result won't exceed the value range */
  655. const unsigned max = (1 << bits) - 1;
  656. const unsigned scale = rnd() & max;
  657. const unsigned range = max / (scale ? scale : 1);
  658. CHECK_COMMON_RANGE(FMT("scale_%s", type), range, t, t, {
  659. .op = SWS_OP_SCALE,
  660. .type = t,
  661. .c.q = { scale, 1 },
  662. });
  663. } else {
  664. CHECK_COMMON(FMT("scale_%s", type), t, t, {
  665. .op = SWS_OP_SCALE,
  666. .type = t,
  667. .c.q = rndq(t),
  668. });
  669. }
  670. }
  671. }
  672. void checkasm_check_sw_ops(void)
  673. {
  674. check_read_write();
  675. report("read_write");
  676. check_swap_bytes();
  677. report("swap_bytes");
  678. check_pack_unpack();
  679. report("pack_unpack");
  680. check_clear();
  681. report("clear");
  682. check_shift();
  683. report("shift");
  684. check_swizzle();
  685. report("swizzle");
  686. check_convert();
  687. report("convert");
  688. check_dither();
  689. report("dither");
  690. check_min_max();
  691. report("min_max");
  692. check_linear();
  693. report("linear");
  694. check_scale();
  695. report("scale");
  696. }