aaccoder_twoloop.h 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759
  1. /*
  2. * AAC encoder twoloop coder
  3. * Copyright (C) 2008-2009 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC encoder twoloop coder
  24. * @author Konstantin Shishkov, Claudio Freire
  25. */
  26. /**
  27. * This file contains a template for the twoloop coder function.
  28. * It needs to be provided, externally, as an already included declaration,
  29. * the following functions from aacenc_quantization/util.h. They're not included
  30. * explicitly here to make it possible to provide alternative implementations:
  31. * - quantize_band_cost
  32. * - abs_pow34_v
  33. * - find_max_val
  34. * - find_min_book
  35. * - find_form_factor
  36. */
  37. #ifndef AVCODEC_AACCODER_TWOLOOP_H
  38. #define AVCODEC_AACCODER_TWOLOOP_H
  39. #include <float.h>
  40. #include "libavutil/mathematics.h"
  41. #include "mathops.h"
  42. #include "avcodec.h"
  43. #include "put_bits.h"
  44. #include "aac.h"
  45. #include "aacenc.h"
  46. #include "aactab.h"
  47. #include "aacenctab.h"
  48. /** Frequency in Hz for lower limit of noise substitution **/
  49. #define NOISE_LOW_LIMIT 4000
  50. /* Reflects the cost to change codebooks */
  51. static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
  52. {
  53. return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
  54. }
  55. /**
  56. * two-loop quantizers search taken from ISO 13818-7 Appendix C
  57. */
  58. static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  59. AACEncContext *s,
  60. SingleChannelElement *sce,
  61. const float lambda)
  62. {
  63. int start = 0, i, w, w2, g, recomprd;
  64. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
  65. / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->ch_layout.nb_channels)
  66. * (lambda / 120.f);
  67. int refbits = destbits;
  68. int toomanybits, toofewbits;
  69. char nzs[128];
  70. uint8_t nextband[128];
  71. int maxsf[128], minsf[128];
  72. float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
  73. float maxvals[128], spread_thr_r[128];
  74. float min_spread_thr_r, max_spread_thr_r;
  75. /**
  76. * rdlambda controls the maximum tolerated distortion. Twoloop
  77. * will keep iterating until it fails to lower it or it reaches
  78. * ulimit * rdlambda. Keeping it low increases quality on difficult
  79. * signals, but lower it too much, and bits will be taken from weak
  80. * signals, creating "holes". A balance is necessary.
  81. * rdmax and rdmin specify the relative deviation from rdlambda
  82. * allowed for tonality compensation
  83. */
  84. float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
  85. const float nzslope = 1.5f;
  86. float rdmin = 0.03125f;
  87. float rdmax = 1.0f;
  88. /**
  89. * sfoffs controls an offset of optmium allocation that will be
  90. * applied based on lambda. Keep it real and modest, the loop
  91. * will take care of the rest, this just accelerates convergence
  92. */
  93. float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
  94. int fflag, minscaler, nminscaler;
  95. int its = 0;
  96. int maxits = 30;
  97. int allz = 0;
  98. int tbits;
  99. int cutoff = 1024;
  100. int pns_start_pos;
  101. int prev;
  102. /**
  103. * zeroscale controls a multiplier of the threshold, if band energy
  104. * is below this, a zero is forced. Keep it lower than 1, unless
  105. * low lambda is used, because energy < threshold doesn't mean there's
  106. * no audible signal outright, it's just energy. Also make it rise
  107. * slower than rdlambda, as rdscale has due compensation with
  108. * noisy band depriorization below, whereas zeroing logic is rather dumb
  109. */
  110. float zeroscale;
  111. if (lambda > 120.f) {
  112. zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
  113. } else {
  114. zeroscale = 1.f;
  115. }
  116. if (s->psy.bitres.alloc >= 0) {
  117. /**
  118. * Psy granted us extra bits to use, from the reservoire
  119. * adjust for lambda except what psy already did
  120. */
  121. destbits = s->psy.bitres.alloc
  122. * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
  123. }
  124. if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
  125. /**
  126. * Constant Q-scale doesn't compensate MS coding on its own
  127. * No need to be overly precise, this only controls RD
  128. * adjustment CB limits when going overboard
  129. */
  130. if (s->options.mid_side && s->cur_type == TYPE_CPE)
  131. destbits *= 2;
  132. /**
  133. * When using a constant Q-scale, don't adjust bits, just use RD
  134. * Don't let it go overboard, though... 8x psy target is enough
  135. */
  136. toomanybits = 5800;
  137. toofewbits = destbits / 16;
  138. /** Don't offset scalers, just RD */
  139. sfoffs = sce->ics.num_windows - 1;
  140. rdlambda = sqrtf(rdlambda);
  141. /** search further */
  142. maxits *= 2;
  143. } else {
  144. /* When using ABR, be strict, but a reasonable leeway is
  145. * critical to allow RC to smoothly track desired bitrate
  146. * without sudden quality drops that cause audible artifacts.
  147. * Symmetry is also desirable, to avoid systematic bias.
  148. */
  149. toomanybits = destbits + destbits/8;
  150. toofewbits = destbits - destbits/8;
  151. sfoffs = 0;
  152. rdlambda = sqrtf(rdlambda);
  153. }
  154. /** and zero out above cutoff frequency */
  155. {
  156. int wlen = 1024 / sce->ics.num_windows;
  157. int bandwidth;
  158. /**
  159. * Scale, psy gives us constant quality, this LP only scales
  160. * bitrate by lambda, so we save bits on subjectively unimportant HF
  161. * rather than increase quantization noise. Adjust nominal bitrate
  162. * to effective bitrate according to encoding parameters,
  163. * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
  164. */
  165. float rate_bandwidth_multiplier = 1.5f;
  166. int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
  167. ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
  168. : (avctx->bit_rate / avctx->ch_layout.nb_channels);
  169. /** Compensate for extensions that increase efficiency */
  170. if (s->options.pns || s->options.intensity_stereo)
  171. frame_bit_rate *= 1.15f;
  172. if (avctx->cutoff > 0) {
  173. bandwidth = avctx->cutoff;
  174. } else {
  175. bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
  176. s->psy.cutoff = bandwidth;
  177. }
  178. cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
  179. pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
  180. }
  181. /**
  182. * for values above this the decoder might end up in an endless loop
  183. * due to always having more bits than what can be encoded.
  184. */
  185. destbits = FFMIN(destbits, 5800);
  186. toomanybits = FFMIN(toomanybits, 5800);
  187. toofewbits = FFMIN(toofewbits, 5800);
  188. /**
  189. * XXX: some heuristic to determine initial quantizers will reduce search time
  190. * determine zero bands and upper distortion limits
  191. */
  192. min_spread_thr_r = -1;
  193. max_spread_thr_r = -1;
  194. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  195. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  196. int nz = 0;
  197. float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
  198. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  199. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  200. if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
  201. sce->zeroes[(w+w2)*16+g] = 1;
  202. continue;
  203. }
  204. nz = 1;
  205. }
  206. if (!nz) {
  207. uplim = 0.0f;
  208. } else {
  209. nz = 0;
  210. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  211. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  212. if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
  213. continue;
  214. uplim += band->threshold;
  215. energy += band->energy;
  216. spread += band->spread;
  217. nz++;
  218. }
  219. }
  220. uplims[w*16+g] = uplim;
  221. energies[w*16+g] = energy;
  222. nzs[w*16+g] = nz;
  223. sce->zeroes[w*16+g] = !nz;
  224. allz |= nz;
  225. if (nz && sce->can_pns[w*16+g]) {
  226. spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
  227. if (min_spread_thr_r < 0) {
  228. min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
  229. } else {
  230. min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
  231. max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
  232. }
  233. }
  234. }
  235. }
  236. /** Compute initial scalers */
  237. minscaler = 65535;
  238. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  239. for (g = 0; g < sce->ics.num_swb; g++) {
  240. if (sce->zeroes[w*16+g]) {
  241. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  242. continue;
  243. }
  244. /**
  245. * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
  246. * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
  247. * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
  248. * more robust.
  249. */
  250. sce->sf_idx[w*16+g] = av_clip(
  251. SCALE_ONE_POS
  252. + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
  253. + sfoffs,
  254. 60, SCALE_MAX_POS);
  255. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  256. }
  257. }
  258. /** Clip */
  259. minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
  260. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  261. for (g = 0; g < sce->ics.num_swb; g++)
  262. if (!sce->zeroes[w*16+g])
  263. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
  264. if (!allz)
  265. return;
  266. s->aacdsp.abs_pow34(s->scoefs, sce->coeffs, 1024);
  267. ff_quantize_band_cost_cache_init(s);
  268. for (i = 0; i < sizeof(minsf) / sizeof(minsf[0]); ++i)
  269. minsf[i] = 0;
  270. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  271. start = w*128;
  272. for (g = 0; g < sce->ics.num_swb; g++) {
  273. const float *scaled = s->scoefs + start;
  274. int minsfidx;
  275. maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
  276. if (maxvals[w*16+g] > 0) {
  277. minsfidx = coef2minsf(maxvals[w*16+g]);
  278. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
  279. minsf[(w+w2)*16+g] = minsfidx;
  280. }
  281. start += sce->ics.swb_sizes[g];
  282. }
  283. }
  284. /**
  285. * Scale uplims to match rate distortion to quality
  286. * bu applying noisy band depriorization and tonal band prioritization.
  287. * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
  288. * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
  289. * rate distortion requirements.
  290. */
  291. memcpy(euplims, uplims, sizeof(euplims));
  292. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  293. /** psy already prioritizes transients to some extent */
  294. float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
  295. start = w*128;
  296. for (g = 0; g < sce->ics.num_swb; g++) {
  297. if (nzs[g] > 0) {
  298. float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
  299. float energy2uplim = find_form_factor(
  300. sce->ics.group_len[w], sce->ics.swb_sizes[g],
  301. uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
  302. sce->coeffs + start,
  303. nzslope * cleanup_factor);
  304. energy2uplim *= de_psy_factor;
  305. if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
  306. /** In ABR, we need to prioritize less and let rate control do its thing */
  307. energy2uplim = sqrtf(energy2uplim);
  308. }
  309. energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
  310. uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
  311. * sce->ics.group_len[w];
  312. energy2uplim = find_form_factor(
  313. sce->ics.group_len[w], sce->ics.swb_sizes[g],
  314. uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
  315. sce->coeffs + start,
  316. 2.0f);
  317. energy2uplim *= de_psy_factor;
  318. if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
  319. /** In ABR, we need to prioritize less and let rate control do its thing */
  320. energy2uplim = sqrtf(energy2uplim);
  321. }
  322. energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
  323. euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
  324. 0.5f, 1.0f);
  325. }
  326. start += sce->ics.swb_sizes[g];
  327. }
  328. }
  329. for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
  330. maxsf[i] = SCALE_MAX_POS;
  331. //perform two-loop search
  332. //outer loop - improve quality
  333. do {
  334. //inner loop - quantize spectrum to fit into given number of bits
  335. int overdist;
  336. int qstep = its ? 1 : 32;
  337. do {
  338. int changed = 0;
  339. prev = -1;
  340. recomprd = 0;
  341. tbits = 0;
  342. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  343. start = w*128;
  344. for (g = 0; g < sce->ics.num_swb; g++) {
  345. const float *coefs = &sce->coeffs[start];
  346. const float *scaled = &s->scoefs[start];
  347. int bits = 0;
  348. int cb;
  349. float dist = 0.0f;
  350. float qenergy = 0.0f;
  351. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  352. start += sce->ics.swb_sizes[g];
  353. if (sce->can_pns[w*16+g]) {
  354. /** PNS isn't free */
  355. tbits += ff_pns_bits(sce, w, g);
  356. }
  357. continue;
  358. }
  359. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  360. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  361. int b;
  362. float sqenergy;
  363. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  364. scaled + w2*128,
  365. sce->ics.swb_sizes[g],
  366. sce->sf_idx[w*16+g],
  367. cb,
  368. 1.0f,
  369. INFINITY,
  370. &b, &sqenergy,
  371. 0);
  372. bits += b;
  373. qenergy += sqenergy;
  374. }
  375. dists[w*16+g] = dist - bits;
  376. qenergies[w*16+g] = qenergy;
  377. if (prev != -1) {
  378. int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
  379. bits += ff_aac_scalefactor_bits[sfdiff];
  380. }
  381. tbits += bits;
  382. start += sce->ics.swb_sizes[g];
  383. prev = sce->sf_idx[w*16+g];
  384. }
  385. }
  386. if (tbits > toomanybits) {
  387. recomprd = 1;
  388. for (i = 0; i < 128; i++) {
  389. if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
  390. int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
  391. int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
  392. if (new_sf != sce->sf_idx[i]) {
  393. sce->sf_idx[i] = new_sf;
  394. changed = 1;
  395. }
  396. }
  397. }
  398. } else if (tbits < toofewbits) {
  399. recomprd = 1;
  400. for (i = 0; i < 128; i++) {
  401. if (sce->sf_idx[i] > SCALE_ONE_POS) {
  402. int new_sf = FFMAX3(minsf[i], SCALE_ONE_POS, sce->sf_idx[i] - qstep);
  403. if (new_sf != sce->sf_idx[i]) {
  404. sce->sf_idx[i] = new_sf;
  405. changed = 1;
  406. }
  407. }
  408. }
  409. }
  410. qstep >>= 1;
  411. if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
  412. qstep = 1;
  413. } while (qstep);
  414. overdist = 1;
  415. fflag = tbits < toofewbits;
  416. for (i = 0; i < 2 && (overdist || recomprd); ++i) {
  417. if (recomprd) {
  418. /** Must recompute distortion */
  419. prev = -1;
  420. tbits = 0;
  421. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  422. start = w*128;
  423. for (g = 0; g < sce->ics.num_swb; g++) {
  424. const float *coefs = sce->coeffs + start;
  425. const float *scaled = s->scoefs + start;
  426. int bits = 0;
  427. int cb;
  428. float dist = 0.0f;
  429. float qenergy = 0.0f;
  430. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  431. start += sce->ics.swb_sizes[g];
  432. if (sce->can_pns[w*16+g]) {
  433. /** PNS isn't free */
  434. tbits += ff_pns_bits(sce, w, g);
  435. }
  436. continue;
  437. }
  438. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  439. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  440. int b;
  441. float sqenergy;
  442. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  443. scaled + w2*128,
  444. sce->ics.swb_sizes[g],
  445. sce->sf_idx[w*16+g],
  446. cb,
  447. 1.0f,
  448. INFINITY,
  449. &b, &sqenergy,
  450. 0);
  451. bits += b;
  452. qenergy += sqenergy;
  453. }
  454. dists[w*16+g] = dist - bits;
  455. qenergies[w*16+g] = qenergy;
  456. if (prev != -1) {
  457. int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
  458. bits += ff_aac_scalefactor_bits[sfdiff];
  459. }
  460. tbits += bits;
  461. start += sce->ics.swb_sizes[g];
  462. prev = sce->sf_idx[w*16+g];
  463. }
  464. }
  465. }
  466. if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
  467. float maxoverdist = 0.0f;
  468. float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
  469. overdist = recomprd = 0;
  470. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  471. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  472. if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
  473. float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
  474. maxoverdist = FFMAX(maxoverdist, ovrdist);
  475. overdist++;
  476. }
  477. }
  478. }
  479. if (overdist) {
  480. /* We have overdistorted bands, trade for zeroes (that can be noise)
  481. * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
  482. */
  483. float minspread = max_spread_thr_r;
  484. float maxspread = min_spread_thr_r;
  485. float zspread;
  486. int zeroable = 0;
  487. int zeroed = 0;
  488. int maxzeroed, zloop;
  489. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  490. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  491. if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
  492. minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
  493. maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
  494. zeroable++;
  495. }
  496. }
  497. }
  498. zspread = (maxspread-minspread) * 0.0125f + minspread;
  499. /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
  500. * and forced the hand of the later search_for_pns step.
  501. * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
  502. * and leave further PNSing to search_for_pns if worthwhile.
  503. */
  504. zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
  505. ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
  506. maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
  507. for (zloop = 0; zloop < 2; zloop++) {
  508. /* Two passes: first distorted stuff - two birds in one shot and all that,
  509. * then anything viable. Viable means not zero, but either CB=zero-able
  510. * (too high SF), not SF <= 1 (that means we'd be operating at very high
  511. * quality, we don't want PNS when doing VHQ), PNS allowed, and within
  512. * the lowest ranking percentile.
  513. */
  514. float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
  515. int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
  516. int mcb;
  517. for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
  518. if (sce->ics.swb_offset[g] < pns_start_pos)
  519. continue;
  520. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  521. if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
  522. && sce->sf_idx[w*16+g] > loopminsf
  523. && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
  524. || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
  525. sce->zeroes[w*16+g] = 1;
  526. sce->band_type[w*16+g] = 0;
  527. zeroed++;
  528. }
  529. }
  530. }
  531. }
  532. if (zeroed)
  533. recomprd = fflag = 1;
  534. } else {
  535. overdist = 0;
  536. }
  537. }
  538. }
  539. minscaler = SCALE_MAX_POS;
  540. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  541. for (g = 0; g < sce->ics.num_swb; g++) {
  542. if (!sce->zeroes[w*16+g]) {
  543. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  544. }
  545. }
  546. }
  547. minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
  548. prev = -1;
  549. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  550. /** Start with big steps, end up fine-tunning */
  551. int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
  552. int edepth = depth+2;
  553. float uplmax = its / (maxits*0.25f) + 1.0f;
  554. uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
  555. start = w * 128;
  556. for (g = 0; g < sce->ics.num_swb; g++) {
  557. int prevsc = sce->sf_idx[w*16+g];
  558. if (prev < 0 && !sce->zeroes[w*16+g])
  559. prev = sce->sf_idx[0];
  560. if (!sce->zeroes[w*16+g]) {
  561. const float *coefs = sce->coeffs + start;
  562. const float *scaled = s->scoefs + start;
  563. int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  564. int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
  565. int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
  566. if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > FFMAX(mindeltasf, minsf[w*16+g])) {
  567. /* Try to make sure there is some energy in every nonzero band
  568. * NOTE: This algorithm must be forcibly imbalanced, pushing harder
  569. * on holes or more distorted bands at first, otherwise there's
  570. * no net gain (since the next iteration will offset all bands
  571. * on the opposite direction to compensate for extra bits)
  572. */
  573. for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
  574. int cb, bits;
  575. float dist, qenergy;
  576. int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
  577. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  578. dist = qenergy = 0.f;
  579. bits = 0;
  580. if (!cb) {
  581. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
  582. } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
  583. break;
  584. }
  585. /* !g is the DC band, it's important, since quantization error here
  586. * applies to less than a cycle, it creates horrible intermodulation
  587. * distortion if it doesn't stick to what psy requests
  588. */
  589. if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
  590. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
  591. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  592. int b;
  593. float sqenergy;
  594. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  595. scaled + w2*128,
  596. sce->ics.swb_sizes[g],
  597. sce->sf_idx[w*16+g]-1,
  598. cb,
  599. 1.0f,
  600. INFINITY,
  601. &b, &sqenergy,
  602. 0);
  603. bits += b;
  604. qenergy += sqenergy;
  605. }
  606. sce->sf_idx[w*16+g]--;
  607. dists[w*16+g] = dist - bits;
  608. qenergies[w*16+g] = qenergy;
  609. if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
  610. (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
  611. && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
  612. ) )) {
  613. break;
  614. }
  615. }
  616. } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
  617. && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
  618. && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
  619. ) {
  620. /** Um... over target. Save bits for more important stuff. */
  621. for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
  622. int cb, bits;
  623. float dist, qenergy;
  624. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
  625. if (cb > 0) {
  626. dist = qenergy = 0.f;
  627. bits = 0;
  628. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  629. int b;
  630. float sqenergy;
  631. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  632. scaled + w2*128,
  633. sce->ics.swb_sizes[g],
  634. sce->sf_idx[w*16+g]+1,
  635. cb,
  636. 1.0f,
  637. INFINITY,
  638. &b, &sqenergy,
  639. 0);
  640. bits += b;
  641. qenergy += sqenergy;
  642. }
  643. dist -= bits;
  644. if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
  645. sce->sf_idx[w*16+g]++;
  646. dists[w*16+g] = dist;
  647. qenergies[w*16+g] = qenergy;
  648. } else {
  649. break;
  650. }
  651. } else {
  652. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
  653. break;
  654. }
  655. }
  656. }
  657. prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
  658. if (sce->sf_idx[w*16+g] != prevsc)
  659. fflag = 1;
  660. nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
  661. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  662. }
  663. start += sce->ics.swb_sizes[g];
  664. }
  665. }
  666. /** SF difference limit violation risk. Must re-clamp. */
  667. prev = -1;
  668. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  669. for (g = 0; g < sce->ics.num_swb; g++) {
  670. if (!sce->zeroes[w*16+g]) {
  671. int prevsf = sce->sf_idx[w*16+g];
  672. if (prev < 0)
  673. prev = prevsf;
  674. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
  675. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  676. prev = sce->sf_idx[w*16+g];
  677. if (!fflag && prevsf != sce->sf_idx[w*16+g])
  678. fflag = 1;
  679. }
  680. }
  681. }
  682. its++;
  683. } while (fflag && its < maxits);
  684. /** Scout out next nonzero bands */
  685. ff_init_nextband_map(sce, nextband);
  686. prev = -1;
  687. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  688. /** Make sure proper codebooks are set */
  689. for (g = 0; g < sce->ics.num_swb; g++) {
  690. if (!sce->zeroes[w*16+g]) {
  691. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  692. if (sce->band_type[w*16+g] <= 0) {
  693. if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
  694. /** Cannot zero out, make sure it's not attempted */
  695. sce->band_type[w*16+g] = 1;
  696. } else {
  697. sce->zeroes[w*16+g] = 1;
  698. sce->band_type[w*16+g] = 0;
  699. }
  700. }
  701. } else {
  702. sce->band_type[w*16+g] = 0;
  703. }
  704. /** Check that there's no SF delta range violations */
  705. if (!sce->zeroes[w*16+g]) {
  706. if (prev != -1) {
  707. av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
  708. av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
  709. } else if (sce->zeroes[0]) {
  710. /** Set global gain to something useful */
  711. sce->sf_idx[0] = sce->sf_idx[w*16+g];
  712. }
  713. prev = sce->sf_idx[w*16+g];
  714. }
  715. }
  716. }
  717. }
  718. #endif /* AVCODEC_AACCODER_TWOLOOP_H */