00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00027 #include "avcodec.h"
00028 #include "aactab.h"
00029 #include "psymodel.h"
00030
00031
00032
00033
00034
00035
00036
00037
00042 #define PSY_3GPP_THR_SPREAD_HI 1.5f // spreading factor for low-to-hi threshold spreading (15 dB/Bark)
00043 #define PSY_3GPP_THR_SPREAD_LOW 3.0f // spreading factor for hi-to-low threshold spreading (30 dB/Bark)
00044
00045 #define PSY_3GPP_RPEMIN 0.01f
00046 #define PSY_3GPP_RPELEV 2.0f
00047
00048
00049 #define PSY_LAME_FIR_LEN 21
00050 #define AAC_BLOCK_SIZE_LONG 1024
00051 #define AAC_BLOCK_SIZE_SHORT 128
00052 #define AAC_NUM_BLOCKS_SHORT 8
00053 #define PSY_LAME_NUM_SUBBLOCKS 3
00054
00055
00062 typedef struct AacPsyBand{
00063 float energy;
00064 float thr;
00065 float thr_quiet;
00066 }AacPsyBand;
00067
00071 typedef struct AacPsyChannel{
00072 AacPsyBand band[128];
00073 AacPsyBand prev_band[128];
00074
00075 float win_energy;
00076 float iir_state[2];
00077 uint8_t next_grouping;
00078 enum WindowSequence next_window_seq;
00079
00080 float attack_threshold;
00081 float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS];
00082 int prev_attack;
00083 }AacPsyChannel;
00084
00088 typedef struct AacPsyCoeffs{
00089 float ath;
00090 float barks;
00091 float spread_low[2];
00092 float spread_hi [2];
00093 float min_snr;
00094 }AacPsyCoeffs;
00095
00099 typedef struct AacPsyContext{
00100 AacPsyCoeffs psy_coef[2][64];
00101 AacPsyChannel *ch;
00102 }AacPsyContext;
00103
00107 typedef struct {
00108 int quality;
00109
00110
00111
00112 float st_lrm;
00113 } PsyLamePreset;
00114
00118 static const PsyLamePreset psy_abr_map[] = {
00119
00120
00121 { 8, 6.60},
00122 { 16, 6.60},
00123 { 24, 6.60},
00124 { 32, 6.60},
00125 { 40, 6.60},
00126 { 48, 6.60},
00127 { 56, 6.60},
00128 { 64, 6.40},
00129 { 80, 6.00},
00130 { 96, 5.60},
00131 {112, 5.20},
00132 {128, 5.20},
00133 {160, 5.20}
00134 };
00135
00139 static const PsyLamePreset psy_vbr_map[] = {
00140
00141 { 0, 4.20},
00142 { 1, 4.20},
00143 { 2, 4.20},
00144 { 3, 4.20},
00145 { 4, 4.20},
00146 { 5, 4.20},
00147 { 6, 4.20},
00148 { 7, 4.20},
00149 { 8, 4.20},
00150 { 9, 4.20},
00151 {10, 4.20}
00152 };
00153
00157 static const float psy_fir_coeffs[] = {
00158 -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
00159 -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
00160 -5.52212e-17 * 2, -0.313819 * 2
00161 };
00162
00166 static float lame_calc_attack_threshold(int bitrate)
00167 {
00168
00169 int lower_range = 12, upper_range = 12;
00170 int lower_range_kbps = psy_abr_map[12].quality;
00171 int upper_range_kbps = psy_abr_map[12].quality;
00172 int i;
00173
00174
00175
00176
00177 for (i = 1; i < 13; i++) {
00178 if (FFMAX(bitrate, psy_abr_map[i].quality) != bitrate) {
00179 upper_range = i;
00180 upper_range_kbps = psy_abr_map[i ].quality;
00181 lower_range = i - 1;
00182 lower_range_kbps = psy_abr_map[i - 1].quality;
00183 break;
00184 }
00185 }
00186
00187
00188 if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
00189 return psy_abr_map[lower_range].st_lrm;
00190 return psy_abr_map[upper_range].st_lrm;
00191 }
00192
00196 static void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx) {
00197 int i, j;
00198
00199 for (i = 0; i < avctx->channels; i++) {
00200 AacPsyChannel *pch = &ctx->ch[i];
00201
00202 if (avctx->flags & CODEC_FLAG_QSCALE)
00203 pch->attack_threshold = psy_vbr_map[avctx->global_quality / FF_QP2LAMBDA].st_lrm;
00204 else
00205 pch->attack_threshold = lame_calc_attack_threshold(avctx->bit_rate / avctx->channels / 1000);
00206
00207 for (j = 0; j < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; j++)
00208 pch->prev_energy_subshort[j] = 10.0f;
00209 }
00210 }
00211
00215 static av_cold float calc_bark(float f)
00216 {
00217 return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.0f));
00218 }
00219
00220 #define ATH_ADD 4
00221
00225 static av_cold float ath(float f, float add)
00226 {
00227 f /= 1000.0f;
00228 return 3.64 * pow(f, -0.8)
00229 - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))
00230 + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))
00231 + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
00232 }
00233
00234 static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
00235 AacPsyContext *pctx;
00236 float bark;
00237 int i, j, g, start;
00238 float prev, minscale, minath;
00239
00240 ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
00241 pctx = (AacPsyContext*) ctx->model_priv_data;
00242
00243 minath = ath(3410, ATH_ADD);
00244 for (j = 0; j < 2; j++) {
00245 AacPsyCoeffs *coeffs = pctx->psy_coef[j];
00246 const uint8_t *band_sizes = ctx->bands[j];
00247 float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
00248 i = 0;
00249 prev = 0.0;
00250 for (g = 0; g < ctx->num_bands[j]; g++) {
00251 i += band_sizes[g];
00252 bark = calc_bark((i-1) * line_to_frequency);
00253 coeffs[g].barks = (bark + prev) / 2.0;
00254 prev = bark;
00255 }
00256 for (g = 0; g < ctx->num_bands[j] - 1; g++) {
00257 AacPsyCoeffs *coeff = &coeffs[g];
00258 float bark_width = coeffs[g+1].barks - coeffs->barks;
00259 coeff->spread_low[0] = pow(10.0, -bark_width * PSY_3GPP_THR_SPREAD_LOW);
00260 coeff->spread_hi [0] = pow(10.0, -bark_width * PSY_3GPP_THR_SPREAD_HI);
00261 }
00262 start = 0;
00263 for (g = 0; g < ctx->num_bands[j]; g++) {
00264 minscale = ath(start * line_to_frequency, ATH_ADD);
00265 for (i = 1; i < band_sizes[g]; i++)
00266 minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));
00267 coeffs[g].ath = minscale - minath;
00268 start += band_sizes[g];
00269 }
00270 }
00271
00272 pctx->ch = av_mallocz(sizeof(AacPsyChannel) * ctx->avctx->channels);
00273
00274 lame_window_init(pctx, ctx->avctx);
00275
00276 return 0;
00277 }
00278
00282 static float iir_filter(int in, float state[2])
00283 {
00284 float ret;
00285
00286 ret = 0.7548f * (in - state[0]) + 0.5095f * state[1];
00287 state[0] = in;
00288 state[1] = ret;
00289 return ret;
00290 }
00291
00295 static const uint8_t window_grouping[9] = {
00296 0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
00297 };
00298
00303 static FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx,
00304 const int16_t *audio, const int16_t *la,
00305 int channel, int prev_type)
00306 {
00307 int i, j;
00308 int br = ctx->avctx->bit_rate / ctx->avctx->channels;
00309 int attack_ratio = br <= 16000 ? 18 : 10;
00310 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
00311 AacPsyChannel *pch = &pctx->ch[channel];
00312 uint8_t grouping = 0;
00313 int next_type = pch->next_window_seq;
00314 FFPsyWindowInfo wi;
00315
00316 memset(&wi, 0, sizeof(wi));
00317 if (la) {
00318 float s[8], v;
00319 int switch_to_eight = 0;
00320 float sum = 0.0, sum2 = 0.0;
00321 int attack_n = 0;
00322 int stay_short = 0;
00323 for (i = 0; i < 8; i++) {
00324 for (j = 0; j < 128; j++) {
00325 v = iir_filter(la[(i*128+j)*ctx->avctx->channels], pch->iir_state);
00326 sum += v*v;
00327 }
00328 s[i] = sum;
00329 sum2 += sum;
00330 }
00331 for (i = 0; i < 8; i++) {
00332 if (s[i] > pch->win_energy * attack_ratio) {
00333 attack_n = i + 1;
00334 switch_to_eight = 1;
00335 break;
00336 }
00337 }
00338 pch->win_energy = pch->win_energy*7/8 + sum2/64;
00339
00340 wi.window_type[1] = prev_type;
00341 switch (prev_type) {
00342 case ONLY_LONG_SEQUENCE:
00343 wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
00344 next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
00345 break;
00346 case LONG_START_SEQUENCE:
00347 wi.window_type[0] = EIGHT_SHORT_SEQUENCE;
00348 grouping = pch->next_grouping;
00349 next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
00350 break;
00351 case LONG_STOP_SEQUENCE:
00352 wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
00353 next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
00354 break;
00355 case EIGHT_SHORT_SEQUENCE:
00356 stay_short = next_type == EIGHT_SHORT_SEQUENCE || switch_to_eight;
00357 wi.window_type[0] = stay_short ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
00358 grouping = next_type == EIGHT_SHORT_SEQUENCE ? pch->next_grouping : 0;
00359 next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
00360 break;
00361 }
00362
00363 pch->next_grouping = window_grouping[attack_n];
00364 pch->next_window_seq = next_type;
00365 } else {
00366 for (i = 0; i < 3; i++)
00367 wi.window_type[i] = prev_type;
00368 grouping = (prev_type == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
00369 }
00370
00371 wi.window_shape = 1;
00372 if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
00373 wi.num_windows = 1;
00374 wi.grouping[0] = 1;
00375 } else {
00376 int lastgrp = 0;
00377 wi.num_windows = 8;
00378 for (i = 0; i < 8; i++) {
00379 if (!((grouping >> i) & 1))
00380 lastgrp = i;
00381 wi.grouping[lastgrp]++;
00382 }
00383 }
00384
00385 return wi;
00386 }
00387
00391 static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
00392 const float *coefs, const FFPsyWindowInfo *wi)
00393 {
00394 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
00395 AacPsyChannel *pch = &pctx->ch[channel];
00396 int start = 0;
00397 int i, w, g;
00398 const int num_bands = ctx->num_bands[wi->num_windows == 8];
00399 const uint8_t *band_sizes = ctx->bands[wi->num_windows == 8];
00400 AacPsyCoeffs *coeffs = pctx->psy_coef[wi->num_windows == 8];
00401
00402
00403 for (w = 0; w < wi->num_windows*16; w += 16) {
00404 for (g = 0; g < num_bands; g++) {
00405 AacPsyBand *band = &pch->band[w+g];
00406 band->energy = 0.0f;
00407 for (i = 0; i < band_sizes[g]; i++)
00408 band->energy += coefs[start+i] * coefs[start+i];
00409 band->thr = band->energy * 0.001258925f;
00410 start += band_sizes[g];
00411 }
00412 }
00413
00414 for (w = 0; w < wi->num_windows*16; w += 16) {
00415 AacPsyBand *bands = &pch->band[w];
00416
00417 for (g = 1; g < num_bands; g++)
00418 bands[g].thr = FFMAX(bands[g].thr, bands[g-1].thr * coeffs[g].spread_hi[0]);
00419 for (g = num_bands - 2; g >= 0; g--)
00420 bands[g].thr = FFMAX(bands[g].thr, bands[g+1].thr * coeffs[g].spread_low[0]);
00421
00422 for (g = 0; g < num_bands; g++) {
00423 AacPsyBand *band = &bands[g];
00424 band->thr_quiet = band->thr = FFMAX(band->thr, coeffs[g].ath);
00425
00426 if (!(wi->window_type[0] == LONG_STOP_SEQUENCE || (wi->window_type[1] == LONG_START_SEQUENCE && !w)))
00427 band->thr = FFMAX(PSY_3GPP_RPEMIN*band->thr, FFMIN(band->thr,
00428 PSY_3GPP_RPELEV*pch->prev_band[w+g].thr_quiet));
00429 }
00430 }
00431
00432 for (w = 0; w < wi->num_windows*16; w += 16) {
00433 for (g = 0; g < num_bands; g++) {
00434 AacPsyBand *band = &pch->band[w+g];
00435 FFPsyBand *psy_band = &ctx->psy_bands[channel*PSY_MAX_BANDS+w+g];
00436
00437 psy_band->threshold = band->thr;
00438 psy_band->energy = band->energy;
00439 }
00440 }
00441
00442 memcpy(pch->prev_band, pch->band, sizeof(pch->band));
00443 }
00444
00445 static av_cold void psy_3gpp_end(FFPsyContext *apc)
00446 {
00447 AacPsyContext *pctx = (AacPsyContext*) apc->model_priv_data;
00448 av_freep(&pctx->ch);
00449 av_freep(&apc->model_priv_data);
00450 }
00451
00452 static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
00453 {
00454 int blocktype = ONLY_LONG_SEQUENCE;
00455 if (uselongblock) {
00456 if (ctx->next_window_seq == EIGHT_SHORT_SEQUENCE)
00457 blocktype = LONG_STOP_SEQUENCE;
00458 } else {
00459 blocktype = EIGHT_SHORT_SEQUENCE;
00460 if (ctx->next_window_seq == ONLY_LONG_SEQUENCE)
00461 ctx->next_window_seq = LONG_START_SEQUENCE;
00462 if (ctx->next_window_seq == LONG_STOP_SEQUENCE)
00463 ctx->next_window_seq = EIGHT_SHORT_SEQUENCE;
00464 }
00465
00466 wi->window_type[0] = ctx->next_window_seq;
00467 ctx->next_window_seq = blocktype;
00468 }
00469
00470 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx,
00471 const int16_t *audio, const int16_t *la,
00472 int channel, int prev_type)
00473 {
00474 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
00475 AacPsyChannel *pch = &pctx->ch[channel];
00476 int grouping = 0;
00477 int uselongblock = 1;
00478 int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
00479 int i;
00480 FFPsyWindowInfo wi;
00481
00482 memset(&wi, 0, sizeof(wi));
00483 if (la) {
00484 float hpfsmpl[AAC_BLOCK_SIZE_LONG];
00485 float const *pf = hpfsmpl;
00486 float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
00487 float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
00488 float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
00489 int chans = ctx->avctx->channels;
00490 const int16_t *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN) * chans;
00491 int j, att_sum = 0;
00492
00493
00494 for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
00495 float sum1, sum2;
00496 sum1 = firbuf[(i + ((PSY_LAME_FIR_LEN - 1) / 2)) * chans];
00497 sum2 = 0.0;
00498 for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
00499 sum1 += psy_fir_coeffs[j] * (firbuf[(i + j) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j) * chans]);
00500 sum2 += psy_fir_coeffs[j + 1] * (firbuf[(i + j + 1) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j - 1) * chans]);
00501 }
00502 hpfsmpl[i] = sum1 + sum2;
00503 }
00504
00505
00506 for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
00507 energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
00508 assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)] > 0);
00509 attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];
00510 energy_short[0] += energy_subshort[i];
00511 }
00512
00513 for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {
00514 float const *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS);
00515 float p = 1.0f;
00516 for (; pf < pfe; pf++)
00517 if (p < fabsf(*pf))
00518 p = fabsf(*pf);
00519 pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p;
00520 energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
00521
00522
00523
00524
00525
00526
00527
00528 if (p > energy_subshort[i + 1])
00529 p = p / energy_subshort[i + 1];
00530 else if (energy_subshort[i + 1] > p * 10.0f)
00531 p = energy_subshort[i + 1] / (p * 10.0f);
00532 else
00533 p = 0.0;
00534 attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
00535 }
00536
00537
00538 for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++)
00539 if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])
00540 if (attack_intensity[i] > pch->attack_threshold)
00541 attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBBLOCKS) + 1;
00542
00543
00544
00545
00546
00547 for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {
00548 float const u = energy_short[i - 1];
00549 float const v = energy_short[i];
00550 float const m = FFMAX(u, v);
00551 if (m < 40000) {
00552 if (u < 1.7f * v && v < 1.7f * u) {
00553 if (i == 1 && attacks[0] < attacks[i])
00554 attacks[0] = 0;
00555 attacks[i] = 0;
00556 }
00557 }
00558 att_sum += attacks[i];
00559 }
00560
00561 if (attacks[0] <= pch->prev_attack)
00562 attacks[0] = 0;
00563
00564 att_sum += attacks[0];
00565
00566 if (pch->prev_attack == 3 || att_sum) {
00567 uselongblock = 0;
00568
00569 for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++)
00570 if (attacks[i] && attacks[i-1])
00571 attacks[i] = 0;
00572 }
00573 } else {
00574
00575 uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);
00576 }
00577
00578 lame_apply_block_type(pch, &wi, uselongblock);
00579
00580 wi.window_type[1] = prev_type;
00581 if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
00582 wi.num_windows = 1;
00583 wi.grouping[0] = 1;
00584 if (wi.window_type[0] == LONG_START_SEQUENCE)
00585 wi.window_shape = 0;
00586 else
00587 wi.window_shape = 1;
00588 } else {
00589 int lastgrp = 0;
00590
00591 wi.num_windows = 8;
00592 wi.window_shape = 0;
00593 for (i = 0; i < 8; i++) {
00594 if (!((pch->next_grouping >> i) & 1))
00595 lastgrp = i;
00596 wi.grouping[lastgrp]++;
00597 }
00598 }
00599
00600
00601
00602
00603
00604
00605
00606 for (i = 0; i < 9; i++) {
00607 if (attacks[i]) {
00608 grouping = i;
00609 break;
00610 }
00611 }
00612 pch->next_grouping = window_grouping[grouping];
00613
00614 pch->prev_attack = attacks[8];
00615
00616 return wi;
00617 }
00618
00619 const FFPsyModel ff_aac_psy_model =
00620 {
00621 .name = "3GPP TS 26.403-inspired model",
00622 .init = psy_3gpp_init,
00623 .window = psy_lame_window,
00624 .analyze = psy_3gpp_analyze,
00625 .end = psy_3gpp_end,
00626 };