00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #include <math.h>
00029 #include "avcodec.h"
00030 #include "get_bits.h"
00031 #include "put_bits.h"
00032 #include "wmavoice_data.h"
00033 #include "celp_math.h"
00034 #include "celp_filters.h"
00035 #include "acelp_vectors.h"
00036 #include "acelp_filters.h"
00037 #include "lsp.h"
00038 #include "libavutil/lzo.h"
00039 #include "dct.h"
00040 #include "rdft.h"
00041 #include "sinewin.h"
00042
00043 #define MAX_BLOCKS 8
00044 #define MAX_LSPS 16
00045 #define MAX_LSPS_ALIGN16 16
00046
00047 #define MAX_FRAMES 3
00048 #define MAX_FRAMESIZE 160
00049 #define MAX_SIGNAL_HISTORY 416
00050 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00052 #define SFRAME_CACHE_MAXSIZE 256
00053
00054 #define VLC_NBITS 6
00055
00056
00059 static VLC frame_type_vlc;
00060
00064 enum {
00065 ACB_TYPE_NONE = 0,
00066 ACB_TYPE_ASYMMETRIC = 1,
00067
00068
00069
00070
00071 ACB_TYPE_HAMMING = 2
00072
00073
00074 };
00075
00079 enum {
00080 FCB_TYPE_SILENCE = 0,
00081
00082
00083 FCB_TYPE_HARDCODED = 1,
00084
00085 FCB_TYPE_AW_PULSES = 2,
00086
00087 FCB_TYPE_EXC_PULSES = 3,
00088
00089
00090 };
00091
00095 static const struct frame_type_desc {
00096 uint8_t n_blocks;
00097
00098 uint8_t log_n_blocks;
00099 uint8_t acb_type;
00100 uint8_t fcb_type;
00101 uint8_t dbl_pulses;
00102
00103
00104 uint16_t frame_size;
00105
00106 } frame_descs[17] = {
00107 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00108 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00109 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00110 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00111 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00112 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00113 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00114 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00115 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00116 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00117 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00118 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00119 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00120 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00121 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00122 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00123 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00124 };
00125
00129 typedef struct {
00136 GetBitContext gb;
00137
00138
00139
00140 int8_t vbm_tree[25];
00141
00142 int spillover_bitsize;
00143
00144
00145 int history_nsamples;
00146
00147
00148
00149 int do_apf;
00150
00151 int denoise_strength;
00152
00153 int denoise_tilt_corr;
00154
00155 int dc_level;
00156
00157
00158 int lsps;
00159 int lsp_q_mode;
00160 int lsp_def_mode;
00161
00162 int frame_lsp_bitsize;
00163
00164 int sframe_lsp_bitsize;
00165
00166
00167 int min_pitch_val;
00168 int max_pitch_val;
00169 int pitch_nbits;
00170
00171 int block_pitch_nbits;
00172
00173 int block_pitch_range;
00174 int block_delta_pitch_nbits;
00175
00176
00177
00178 int block_delta_pitch_hrange;
00179
00180 uint16_t block_conv_table[4];
00181
00182
00191 int spillover_nbits;
00192
00193
00194
00195 int has_residual_lsps;
00196
00197
00198
00199
00200 int skip_bits_next;
00201
00202
00203
00204 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00207 int sframe_cache_size;
00208
00209
00210
00211
00212 PutBitContext pb;
00213
00222 double prev_lsps[MAX_LSPS];
00223
00224 int last_pitch_val;
00225 int last_acb_type;
00226 int pitch_diff_sh16;
00227
00228 float silence_gain;
00229
00230 int aw_idx_is_ext;
00231
00232 int aw_pulse_range;
00233
00234
00235
00236
00237
00238 int aw_n_pulses[2];
00239
00240
00241 int aw_first_pulse_off[2];
00242
00243 int aw_next_pulse_off_cache;
00244
00245
00246
00247
00248
00249 int frame_cntr;
00250
00251 float gain_pred_err[6];
00252 float excitation_history[MAX_SIGNAL_HISTORY];
00256 float synth_history[MAX_LSPS];
00257
00264 RDFTContext rdft, irdft;
00265
00266 DCTContext dct, dst;
00267
00268 float sin[511], cos[511];
00269
00270 float postfilter_agc;
00271
00272 float dcf_mem[2];
00273 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00276 float denoise_filter_cache[MAX_FRAMESIZE];
00277 int denoise_filter_cache_size;
00278 DECLARE_ALIGNED(16, float, tilted_lpcs_pf)[0x80];
00280 DECLARE_ALIGNED(16, float, denoise_coeffs_pf)[0x80];
00282 DECLARE_ALIGNED(16, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00285
00288 } WMAVoiceContext;
00289
00299 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00300 {
00301 static const uint8_t bits[] = {
00302 2, 2, 2, 4, 4, 4,
00303 6, 6, 6, 8, 8, 8,
00304 10, 10, 10, 12, 12, 12,
00305 14, 14, 14, 14
00306 };
00307 static const uint16_t codes[] = {
00308 0x0000, 0x0001, 0x0002,
00309 0x000c, 0x000d, 0x000e,
00310 0x003c, 0x003d, 0x003e,
00311 0x00fc, 0x00fd, 0x00fe,
00312 0x03fc, 0x03fd, 0x03fe,
00313 0x0ffc, 0x0ffd, 0x0ffe,
00314 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00315 };
00316 int cntr[8], n, res;
00317
00318 memset(vbm_tree, 0xff, sizeof(vbm_tree));
00319 memset(cntr, 0, sizeof(cntr));
00320 for (n = 0; n < 17; n++) {
00321 res = get_bits(gb, 3);
00322 if (cntr[res] > 3)
00323 return -1;
00324 vbm_tree[res * 3 + cntr[res]++] = n;
00325 }
00326 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00327 bits, 1, 1, codes, 2, 2, 132);
00328 return 0;
00329 }
00330
00334 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00335 {
00336 int n, flags, pitch_range, lsp16_flag;
00337 WMAVoiceContext *s = ctx->priv_data;
00338
00347 if (ctx->extradata_size != 46) {
00348 av_log(ctx, AV_LOG_ERROR,
00349 "Invalid extradata size %d (should be 46)\n",
00350 ctx->extradata_size);
00351 return -1;
00352 }
00353 flags = AV_RL32(ctx->extradata + 18);
00354 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00355 s->do_apf = flags & 0x1;
00356 if (s->do_apf) {
00357 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00358 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00359 ff_dct_init(&s->dct, 6, DCT_I);
00360 ff_dct_init(&s->dst, 6, DST_I);
00361
00362 ff_sine_window_init(s->cos, 256);
00363 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00364 for (n = 0; n < 255; n++) {
00365 s->sin[n] = -s->sin[510 - n];
00366 s->cos[510 - n] = s->cos[n];
00367 }
00368 }
00369 s->denoise_strength = (flags >> 2) & 0xF;
00370 if (s->denoise_strength >= 12) {
00371 av_log(ctx, AV_LOG_ERROR,
00372 "Invalid denoise filter strength %d (max=11)\n",
00373 s->denoise_strength);
00374 return -1;
00375 }
00376 s->denoise_tilt_corr = !!(flags & 0x40);
00377 s->dc_level = (flags >> 7) & 0xF;
00378 s->lsp_q_mode = !!(flags & 0x2000);
00379 s->lsp_def_mode = !!(flags & 0x4000);
00380 lsp16_flag = flags & 0x1000;
00381 if (lsp16_flag) {
00382 s->lsps = 16;
00383 s->frame_lsp_bitsize = 34;
00384 s->sframe_lsp_bitsize = 60;
00385 } else {
00386 s->lsps = 10;
00387 s->frame_lsp_bitsize = 24;
00388 s->sframe_lsp_bitsize = 48;
00389 }
00390 for (n = 0; n < s->lsps; n++)
00391 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00392
00393 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00394 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00395 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00396 return -1;
00397 }
00398
00399 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00400 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00401 pitch_range = s->max_pitch_val - s->min_pitch_val;
00402 s->pitch_nbits = av_ceil_log2(pitch_range);
00403 s->last_pitch_val = 40;
00404 s->last_acb_type = ACB_TYPE_NONE;
00405 s->history_nsamples = s->max_pitch_val + 8;
00406
00407 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00408 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00409 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00410
00411 av_log(ctx, AV_LOG_ERROR,
00412 "Unsupported samplerate %d (min=%d, max=%d)\n",
00413 ctx->sample_rate, min_sr, max_sr);
00414
00415 return -1;
00416 }
00417
00418 s->block_conv_table[0] = s->min_pitch_val;
00419 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00420 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00421 s->block_conv_table[3] = s->max_pitch_val - 1;
00422 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00423 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00424 s->block_pitch_range = s->block_conv_table[2] +
00425 s->block_conv_table[3] + 1 +
00426 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00427 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00428
00429 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00430
00431 return 0;
00432 }
00433
00455 static void adaptive_gain_control(float *out, const float *in,
00456 const float *speech_synth,
00457 int size, float alpha, float *gain_mem)
00458 {
00459 int i;
00460 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00461 float mem = *gain_mem;
00462
00463 for (i = 0; i < size; i++) {
00464 speech_energy += fabsf(speech_synth[i]);
00465 postfilter_energy += fabsf(in[i]);
00466 }
00467 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00468
00469 for (i = 0; i < size; i++) {
00470 mem = alpha * mem + gain_scale_factor;
00471 out[i] = in[i] * mem;
00472 }
00473
00474 *gain_mem = mem;
00475 }
00476
00495 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00496 const float *in, float *out, int size)
00497 {
00498 int n;
00499 float optimal_gain = 0, dot;
00500 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00501 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00502 *best_hist_ptr;
00503
00504
00505 do {
00506 dot = ff_dot_productf(in, ptr, size);
00507 if (dot > optimal_gain) {
00508 optimal_gain = dot;
00509 best_hist_ptr = ptr;
00510 }
00511 } while (--ptr >= end);
00512
00513 if (optimal_gain <= 0)
00514 return -1;
00515 dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00516 if (dot <= 0)
00517 return -1;
00518
00519 if (optimal_gain <= dot) {
00520 dot = dot / (dot + 0.6 * optimal_gain);
00521 } else
00522 dot = 0.625;
00523
00524
00525 for (n = 0; n < size; n++)
00526 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00527
00528 return 0;
00529 }
00530
00541 static float tilt_factor(const float *lpcs, int n_lpcs)
00542 {
00543 float rh0, rh1;
00544
00545 rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs);
00546 rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00547
00548 return rh1 / rh0;
00549 }
00550
00554 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00555 int fcb_type, float *coeffs, int remainder)
00556 {
00557 float last_coeff, min = 15.0, max = -15.0;
00558 float irange, angle_mul, gain_mul, range, sq;
00559 int n, idx;
00560
00561
00562 s->rdft.rdft_calc(&s->rdft, lpcs);
00563 #define log_range(var, assign) do { \
00564 float tmp = log10f(assign); var = tmp; \
00565 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00566 } while (0)
00567 log_range(last_coeff, lpcs[1] * lpcs[1]);
00568 for (n = 1; n < 64; n++)
00569 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00570 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00571 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00572 #undef log_range
00573 range = max - min;
00574 lpcs[64] = last_coeff;
00575
00576
00577
00578
00579
00580
00581 irange = 64.0 / range;
00582 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00583 (5.0 / 14.7));
00584 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00585 for (n = 0; n <= 64; n++) {
00586 float pwr;
00587
00588 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00589 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00590 lpcs[n] = angle_mul * pwr;
00591
00592
00593 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00594 if (idx > 127) {
00595 coeffs[n] = wmavoice_energy_table[127] *
00596 powf(1.0331663, idx - 127);
00597 } else
00598 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00599 }
00600
00601
00602
00603
00604
00605 s->dct.dct_calc(&s->dct, lpcs);
00606 s->dst.dct_calc(&s->dst, lpcs);
00607
00608
00609 idx = 255 + av_clip(lpcs[64], -255, 255);
00610 coeffs[0] = coeffs[0] * s->cos[idx];
00611 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00612 last_coeff = coeffs[64] * s->cos[idx];
00613 for (n = 63;; n--) {
00614 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00615 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00616 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00617
00618 if (!--n) break;
00619
00620 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00621 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00622 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00623 }
00624 coeffs[1] = last_coeff;
00625
00626
00627 s->irdft.rdft_calc(&s->irdft, coeffs);
00628
00629
00630 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00631 if (s->denoise_tilt_corr) {
00632 float tilt_mem = 0;
00633
00634 coeffs[remainder - 1] = 0;
00635 ff_tilt_compensation(&tilt_mem,
00636 -1.8 * tilt_factor(coeffs, remainder - 1),
00637 coeffs, remainder);
00638 }
00639 sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00640 for (n = 0; n < remainder; n++)
00641 coeffs[n] *= sq;
00642 }
00643
00670 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00671 float *synth_pf, int size,
00672 const float *lpcs)
00673 {
00674 int remainder, lim, n;
00675
00676 if (fcb_type != FCB_TYPE_SILENCE) {
00677 float *tilted_lpcs = s->tilted_lpcs_pf,
00678 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00679
00680 tilted_lpcs[0] = 1.0;
00681 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00682 memset(&tilted_lpcs[s->lsps + 1], 0,
00683 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00684 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00685 tilted_lpcs, s->lsps + 2);
00686
00687
00688
00689
00690
00691 remainder = FFMIN(127 - size, size - 1);
00692 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00693
00694
00695
00696 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00697 s->rdft.rdft_calc(&s->rdft, synth_pf);
00698 s->rdft.rdft_calc(&s->rdft, coeffs);
00699 synth_pf[0] *= coeffs[0];
00700 synth_pf[1] *= coeffs[1];
00701 for (n = 1; n < 64; n++) {
00702 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00703 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00704 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00705 }
00706 s->irdft.rdft_calc(&s->irdft, synth_pf);
00707 }
00708
00709
00710 if (s->denoise_filter_cache_size) {
00711 lim = FFMIN(s->denoise_filter_cache_size, size);
00712 for (n = 0; n < lim; n++)
00713 synth_pf[n] += s->denoise_filter_cache[n];
00714 s->denoise_filter_cache_size -= lim;
00715 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00716 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00717 }
00718
00719
00720 if (fcb_type != FCB_TYPE_SILENCE) {
00721 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00722 for (n = 0; n < lim; n++)
00723 s->denoise_filter_cache[n] += synth_pf[size + n];
00724 if (lim < remainder) {
00725 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00726 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00727 s->denoise_filter_cache_size = remainder;
00728 }
00729 }
00730 }
00731
00752 static void postfilter(WMAVoiceContext *s, const float *synth,
00753 float *samples, int size,
00754 const float *lpcs, float *zero_exc_pf,
00755 int fcb_type, int pitch)
00756 {
00757 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00758 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00759 *synth_filter_in = zero_exc_pf;
00760
00761 assert(size <= MAX_FRAMESIZE / 2);
00762
00763
00764 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00765
00766 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00767 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00768 synth_filter_in = synth_filter_in_buf;
00769
00770
00771 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00772 synth_filter_in, size, s->lsps);
00773 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00774 sizeof(synth_pf[0]) * s->lsps);
00775
00776 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00777
00778 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00779 &s->postfilter_agc);
00780
00781 if (s->dc_level > 8) {
00782
00783
00784
00785 ff_acelp_apply_order_2_transfer_function(samples, samples,
00786 (const float[2]) { -1.99997, 1.0 },
00787 (const float[2]) { -1.9330735188, 0.93589198496 },
00788 0.93980580475, s->dcf_mem, size);
00789 }
00790 }
00806 static void dequant_lsps(double *lsps, int num,
00807 const uint16_t *values,
00808 const uint16_t *sizes,
00809 int n_stages, const uint8_t *table,
00810 const double *mul_q,
00811 const double *base_q)
00812 {
00813 int n, m;
00814
00815 memset(lsps, 0, num * sizeof(*lsps));
00816 for (n = 0; n < n_stages; n++) {
00817 const uint8_t *t_off = &table[values[n] * num];
00818 double base = base_q[n], mul = mul_q[n];
00819
00820 for (m = 0; m < num; m++)
00821 lsps[m] += base + mul * t_off[m];
00822
00823 table += sizes[n] * num;
00824 }
00825 }
00826
00838 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00839 {
00840 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00841 static const double mul_lsf[4] = {
00842 5.2187144800e-3, 1.4626986422e-3,
00843 9.6179549166e-4, 1.1325736225e-3
00844 };
00845 static const double base_lsf[4] = {
00846 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00847 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00848 };
00849 uint16_t v[4];
00850
00851 v[0] = get_bits(gb, 8);
00852 v[1] = get_bits(gb, 6);
00853 v[2] = get_bits(gb, 5);
00854 v[3] = get_bits(gb, 5);
00855
00856 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00857 mul_lsf, base_lsf);
00858 }
00859
00864 static void dequant_lsp10r(GetBitContext *gb,
00865 double *i_lsps, const double *old,
00866 double *a1, double *a2, int q_mode)
00867 {
00868 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00869 static const double mul_lsf[3] = {
00870 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00871 };
00872 static const double base_lsf[3] = {
00873 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00874 };
00875 const float (*ipol_tab)[2][10] = q_mode ?
00876 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00877 uint16_t interpol, v[3];
00878 int n;
00879
00880 dequant_lsp10i(gb, i_lsps);
00881
00882 interpol = get_bits(gb, 5);
00883 v[0] = get_bits(gb, 7);
00884 v[1] = get_bits(gb, 6);
00885 v[2] = get_bits(gb, 6);
00886
00887 for (n = 0; n < 10; n++) {
00888 double delta = old[n] - i_lsps[n];
00889 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00890 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00891 }
00892
00893 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00894 mul_lsf, base_lsf);
00895 }
00896
00900 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00901 {
00902 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00903 static const double mul_lsf[5] = {
00904 3.3439586280e-3, 6.9908173703e-4,
00905 3.3216608306e-3, 1.0334960326e-3,
00906 3.1899104283e-3
00907 };
00908 static const double base_lsf[5] = {
00909 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00910 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00911 M_PI * -1.29816e-1
00912 };
00913 uint16_t v[5];
00914
00915 v[0] = get_bits(gb, 8);
00916 v[1] = get_bits(gb, 6);
00917 v[2] = get_bits(gb, 7);
00918 v[3] = get_bits(gb, 6);
00919 v[4] = get_bits(gb, 7);
00920
00921 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00922 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00923 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00924 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00925 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00926 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00927 }
00928
00933 static void dequant_lsp16r(GetBitContext *gb,
00934 double *i_lsps, const double *old,
00935 double *a1, double *a2, int q_mode)
00936 {
00937 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00938 static const double mul_lsf[3] = {
00939 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00940 };
00941 static const double base_lsf[3] = {
00942 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00943 };
00944 const float (*ipol_tab)[2][16] = q_mode ?
00945 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00946 uint16_t interpol, v[3];
00947 int n;
00948
00949 dequant_lsp16i(gb, i_lsps);
00950
00951 interpol = get_bits(gb, 5);
00952 v[0] = get_bits(gb, 7);
00953 v[1] = get_bits(gb, 7);
00954 v[2] = get_bits(gb, 7);
00955
00956 for (n = 0; n < 16; n++) {
00957 double delta = old[n] - i_lsps[n];
00958 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00959 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00960 }
00961
00962 dequant_lsps( a2, 10, v, vec_sizes, 1,
00963 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00964 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00965 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00966 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00967 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00968 }
00969
00983 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
00984 const int *pitch)
00985 {
00986 static const int16_t start_offset[94] = {
00987 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
00988 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
00989 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
00990 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
00991 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
00992 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
00993 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
00994 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
00995 };
00996 int bits, offset;
00997
00998
00999 s->aw_idx_is_ext = 0;
01000 if ((bits = get_bits(gb, 6)) >= 54) {
01001 s->aw_idx_is_ext = 1;
01002 bits += (bits - 54) * 3 + get_bits(gb, 2);
01003 }
01004
01005
01006
01007 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01008 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01009 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01010 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01011 offset += s->aw_n_pulses[0] * pitch[0];
01012 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01013 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01014
01015
01016
01017
01018 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01019 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01020 s->aw_first_pulse_off[1] -= pitch[1];
01021 if (start_offset[bits] < 0)
01022 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01023 s->aw_first_pulse_off[0] -= pitch[0];
01024 }
01025 }
01026
01034 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01035 int block_idx, AMRFixed *fcb)
01036 {
01037 uint16_t use_mask_mem[9];
01038 uint16_t *use_mask = use_mask_mem + 2;
01039
01040
01041
01042
01043
01044
01045
01046 int pulse_off = s->aw_first_pulse_off[block_idx],
01047 pulse_start, n, idx, range, aidx, start_off = 0;
01048
01049
01050 if (s->aw_n_pulses[block_idx] > 0)
01051 while (pulse_off + s->aw_pulse_range < 1)
01052 pulse_off += fcb->pitch_lag;
01053
01054
01055 if (s->aw_n_pulses[0] > 0) {
01056 if (block_idx == 0) {
01057 range = 32;
01058 } else {
01059 range = 8;
01060 if (s->aw_n_pulses[block_idx] > 0)
01061 pulse_off = s->aw_next_pulse_off_cache;
01062 }
01063 } else
01064 range = 16;
01065 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01066
01067
01068
01069
01070 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01071 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01072 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01073 if (s->aw_n_pulses[block_idx] > 0)
01074 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01075 int excl_range = s->aw_pulse_range;
01076 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01077 int first_sh = 16 - (idx & 15);
01078 *use_mask_ptr++ &= 0xFFFF << first_sh;
01079 excl_range -= first_sh;
01080 if (excl_range >= 16) {
01081 *use_mask_ptr++ = 0;
01082 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01083 } else
01084 *use_mask_ptr &= 0xFFFF >> excl_range;
01085 }
01086
01087
01088 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01089 for (n = 0; n <= aidx; pulse_start++) {
01090 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01091 if (idx >= MAX_FRAMESIZE / 2) {
01092 if (use_mask[0]) idx = 0x0F;
01093 else if (use_mask[1]) idx = 0x1F;
01094 else if (use_mask[2]) idx = 0x2F;
01095 else if (use_mask[3]) idx = 0x3F;
01096 else if (use_mask[4]) idx = 0x4F;
01097 else return;
01098 idx -= av_log2_16bit(use_mask[idx >> 4]);
01099 }
01100 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01101 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01102 n++;
01103 start_off = idx;
01104 }
01105 }
01106
01107 fcb->x[fcb->n] = start_off;
01108 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01109 fcb->n++;
01110
01111
01112 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01113 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01114 }
01115
01123 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01124 int block_idx, AMRFixed *fcb)
01125 {
01126 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01127 float v;
01128
01129 if (s->aw_n_pulses[block_idx] > 0) {
01130 int n, v_mask, i_mask, sh, n_pulses;
01131
01132 if (s->aw_pulse_range == 24) {
01133 n_pulses = 3;
01134 v_mask = 8;
01135 i_mask = 7;
01136 sh = 4;
01137 } else {
01138 n_pulses = 4;
01139 v_mask = 4;
01140 i_mask = 3;
01141 sh = 3;
01142 }
01143
01144 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01145 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01146 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01147 s->aw_first_pulse_off[block_idx];
01148 while (fcb->x[fcb->n] < 0)
01149 fcb->x[fcb->n] += fcb->pitch_lag;
01150 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01151 fcb->n++;
01152 }
01153 } else {
01154 int num2 = (val & 0x1FF) >> 1, delta, idx;
01155
01156 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01157 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01158 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01159 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01160 v = (val & 0x200) ? -1.0 : 1.0;
01161
01162 fcb->no_repeat_mask |= 3 << fcb->n;
01163 fcb->x[fcb->n] = idx - delta;
01164 fcb->y[fcb->n] = v;
01165 fcb->x[fcb->n + 1] = idx;
01166 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01167 fcb->n += 2;
01168 }
01169 }
01170
01184 static int pRNG(int frame_cntr, int block_num, int block_size)
01185 {
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196 static const unsigned int div_tbl[9][2] = {
01197 { 8332, 3 * 715827883U },
01198 { 4545, 0 * 390451573U },
01199 { 3124, 11 * 268435456U },
01200 { 2380, 15 * 204522253U },
01201 { 1922, 23 * 165191050U },
01202 { 1612, 23 * 138547333U },
01203 { 1388, 27 * 119304648U },
01204 { 1219, 16 * 104755300U },
01205 { 1086, 39 * 93368855U }
01206 };
01207 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01208 if (x >= 0xFFFF) x -= 0xFFFF;
01209
01210 y = x - 9 * MULH(477218589, x);
01211 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01212
01213 return z % (1000 - block_size);
01214 }
01215
01220 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01221 int block_idx, int size,
01222 const struct frame_type_desc *frame_desc,
01223 float *excitation)
01224 {
01225 float gain;
01226 int n, r_idx;
01227
01228 assert(size <= MAX_FRAMESIZE);
01229
01230
01231 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01232 r_idx = pRNG(s->frame_cntr, block_idx, size);
01233 gain = s->silence_gain;
01234 } else {
01235 r_idx = get_bits(gb, 8);
01236 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01237 }
01238
01239
01240 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01241
01242
01243 for (n = 0; n < size; n++)
01244 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01245 }
01246
01251 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01252 int block_idx, int size,
01253 int block_pitch_sh2,
01254 const struct frame_type_desc *frame_desc,
01255 float *excitation)
01256 {
01257 static const float gain_coeff[6] = {
01258 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01259 };
01260 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01261 int n, idx, gain_weight;
01262 AMRFixed fcb;
01263
01264 assert(size <= MAX_FRAMESIZE / 2);
01265 memset(pulses, 0, sizeof(*pulses) * size);
01266
01267 fcb.pitch_lag = block_pitch_sh2 >> 2;
01268 fcb.pitch_fac = 1.0;
01269 fcb.no_repeat_mask = 0;
01270 fcb.n = 0;
01271
01272
01273
01274 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01275 aw_pulse_set1(s, gb, block_idx, &fcb);
01276 aw_pulse_set2(s, gb, block_idx, &fcb);
01277 } else {
01278 int offset_nbits = 5 - frame_desc->log_n_blocks;
01279
01280 fcb.no_repeat_mask = -1;
01281
01282
01283 for (n = 0; n < 5; n++) {
01284 float sign;
01285 int pos1, pos2;
01286
01287 sign = get_bits1(gb) ? 1.0 : -1.0;
01288 pos1 = get_bits(gb, offset_nbits);
01289 fcb.x[fcb.n] = n + 5 * pos1;
01290 fcb.y[fcb.n++] = sign;
01291 if (n < frame_desc->dbl_pulses) {
01292 pos2 = get_bits(gb, offset_nbits);
01293 fcb.x[fcb.n] = n + 5 * pos2;
01294 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01295 }
01296 }
01297 }
01298 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01299
01300
01301
01302 idx = get_bits(gb, 7);
01303 fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01304 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01305 acb_gain = wmavoice_gain_codebook_acb[idx];
01306 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01307 -2.9957322736 ,
01308 1.6094379124 );
01309
01310 gain_weight = 8 >> frame_desc->log_n_blocks;
01311 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01312 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01313 for (n = 0; n < gain_weight; n++)
01314 s->gain_pred_err[n] = pred_err;
01315
01316
01317 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01318 int len;
01319 for (n = 0; n < size; n += len) {
01320 int next_idx_sh16;
01321 int abs_idx = block_idx * size + n;
01322 int pitch_sh16 = (s->last_pitch_val << 16) +
01323 s->pitch_diff_sh16 * abs_idx;
01324 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01325 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01326 idx = idx_sh16 >> 16;
01327 if (s->pitch_diff_sh16) {
01328 if (s->pitch_diff_sh16 > 0) {
01329 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01330 } else
01331 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01332 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01333 1, size - n);
01334 } else
01335 len = size;
01336
01337 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01338 wmavoice_ipol1_coeffs, 17,
01339 idx, 9, len);
01340 }
01341 } else {
01342 int block_pitch = block_pitch_sh2 >> 2;
01343 idx = block_pitch_sh2 & 3;
01344 if (idx) {
01345 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01346 wmavoice_ipol2_coeffs, 4,
01347 idx, 8, size);
01348 } else
01349 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01350 sizeof(float) * size);
01351 }
01352
01353
01354 ff_weighted_vector_sumf(excitation, excitation, pulses,
01355 acb_gain, fcb_gain, size);
01356 }
01357
01374 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01375 int block_idx, int size,
01376 int block_pitch_sh2,
01377 const double *lsps, const double *prev_lsps,
01378 const struct frame_type_desc *frame_desc,
01379 float *excitation, float *synth)
01380 {
01381 double i_lsps[MAX_LSPS];
01382 float lpcs[MAX_LSPS];
01383 float fac;
01384 int n;
01385
01386 if (frame_desc->acb_type == ACB_TYPE_NONE)
01387 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01388 else
01389 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01390 frame_desc, excitation);
01391
01392
01393 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01394 for (n = 0; n < s->lsps; n++)
01395 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01396 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01397
01398
01399 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01400 }
01401
01417 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01418 float *samples,
01419 const double *lsps, const double *prev_lsps,
01420 float *excitation, float *synth)
01421 {
01422 WMAVoiceContext *s = ctx->priv_data;
01423 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01424 int pitch[MAX_BLOCKS], last_block_pitch;
01425
01426
01427 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
01428 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01429
01430 if (bd_idx < 0) {
01431 av_log(ctx, AV_LOG_ERROR,
01432 "Invalid frame type VLC code, skipping\n");
01433 return -1;
01434 }
01435
01436
01437 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01438
01439
01440
01441
01442 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01443 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01444 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01445 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01446 if (s->last_acb_type == ACB_TYPE_NONE ||
01447 20 * abs(cur_pitch_val - s->last_pitch_val) >
01448 (cur_pitch_val + s->last_pitch_val))
01449 s->last_pitch_val = cur_pitch_val;
01450
01451
01452 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01453 int fac = n * 2 + 1;
01454
01455 pitch[n] = (MUL16(fac, cur_pitch_val) +
01456 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01457 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01458 }
01459
01460
01461 s->pitch_diff_sh16 =
01462 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01463 }
01464
01465
01466 switch (frame_descs[bd_idx].fcb_type) {
01467 case FCB_TYPE_SILENCE:
01468 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01469 break;
01470 case FCB_TYPE_AW_PULSES:
01471 aw_parse_coords(s, gb, pitch);
01472 break;
01473 }
01474
01475 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01476 int bl_pitch_sh2;
01477
01478
01479 switch (frame_descs[bd_idx].acb_type) {
01480 case ACB_TYPE_HAMMING: {
01481
01482
01483
01484
01485
01486 int block_pitch,
01487 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01488 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01489 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01490
01491 if (n == 0) {
01492 block_pitch = get_bits(gb, s->block_pitch_nbits);
01493 } else
01494 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01495 get_bits(gb, s->block_delta_pitch_nbits);
01496
01497 last_block_pitch = av_clip(block_pitch,
01498 s->block_delta_pitch_hrange,
01499 s->block_pitch_range -
01500 s->block_delta_pitch_hrange);
01501
01502
01503 if (block_pitch < t1) {
01504 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01505 } else {
01506 block_pitch -= t1;
01507 if (block_pitch < t2) {
01508 bl_pitch_sh2 =
01509 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01510 } else {
01511 block_pitch -= t2;
01512 if (block_pitch < t3) {
01513 bl_pitch_sh2 =
01514 (s->block_conv_table[2] + block_pitch) << 2;
01515 } else
01516 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01517 }
01518 }
01519 pitch[n] = bl_pitch_sh2 >> 2;
01520 break;
01521 }
01522
01523 case ACB_TYPE_ASYMMETRIC: {
01524 bl_pitch_sh2 = pitch[n] << 2;
01525 break;
01526 }
01527
01528 default:
01529 bl_pitch_sh2 = 0;
01530 break;
01531 }
01532
01533 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01534 lsps, prev_lsps, &frame_descs[bd_idx],
01535 &excitation[n * block_nsamples],
01536 &synth[n * block_nsamples]);
01537 }
01538
01539
01540
01541 if (s->do_apf) {
01542 double i_lsps[MAX_LSPS];
01543 float lpcs[MAX_LSPS];
01544
01545 for (n = 0; n < s->lsps; n++)
01546 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01547 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01548 postfilter(s, synth, samples, 80, lpcs,
01549 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01550 frame_descs[bd_idx].fcb_type, pitch[0]);
01551
01552 for (n = 0; n < s->lsps; n++)
01553 i_lsps[n] = cos(lsps[n]);
01554 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01555 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01556 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01557 frame_descs[bd_idx].fcb_type, pitch[0]);
01558 } else
01559 memcpy(samples, synth, 160 * sizeof(synth[0]));
01560
01561
01562 s->frame_cntr++;
01563 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01564 s->last_acb_type = frame_descs[bd_idx].acb_type;
01565 switch (frame_descs[bd_idx].acb_type) {
01566 case ACB_TYPE_NONE:
01567 s->last_pitch_val = 0;
01568 break;
01569 case ACB_TYPE_ASYMMETRIC:
01570 s->last_pitch_val = cur_pitch_val;
01571 break;
01572 case ACB_TYPE_HAMMING:
01573 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01574 break;
01575 }
01576
01577 return 0;
01578 }
01579
01592 static void stabilize_lsps(double *lsps, int num)
01593 {
01594 int n, m, l;
01595
01596
01597
01598
01599 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01600 for (n = 1; n < num; n++)
01601 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01602 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01603
01604
01605
01606 for (n = 1; n < num; n++) {
01607 if (lsps[n] < lsps[n - 1]) {
01608 for (m = 1; m < num; m++) {
01609 double tmp = lsps[m];
01610 for (l = m - 1; l >= 0; l--) {
01611 if (lsps[l] <= tmp) break;
01612 lsps[l + 1] = lsps[l];
01613 }
01614 lsps[l + 1] = tmp;
01615 }
01616 break;
01617 }
01618 }
01619 }
01620
01630 static int check_bits_for_superframe(GetBitContext *orig_gb,
01631 WMAVoiceContext *s)
01632 {
01633 GetBitContext s_gb, *gb = &s_gb;
01634 int n, need_bits, bd_idx;
01635 const struct frame_type_desc *frame_desc;
01636
01637
01638 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01639 skip_bits_long(gb, get_bits_count(orig_gb));
01640 assert(get_bits_left(gb) == get_bits_left(orig_gb));
01641
01642
01643 if (get_bits_left(gb) < 14)
01644 return 1;
01645 if (!get_bits1(gb))
01646 return -1;
01647 if (get_bits1(gb)) skip_bits(gb, 12);
01648 if (s->has_residual_lsps) {
01649 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01650 return 1;
01651 skip_bits_long(gb, s->sframe_lsp_bitsize);
01652 }
01653
01654
01655 for (n = 0; n < MAX_FRAMES; n++) {
01656 int aw_idx_is_ext = 0;
01657
01658 if (!s->has_residual_lsps) {
01659 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01660 skip_bits_long(gb, s->frame_lsp_bitsize);
01661 }
01662 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01663 if (bd_idx < 0)
01664 return -1;
01665 frame_desc = &frame_descs[bd_idx];
01666 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01667 if (get_bits_left(gb) < s->pitch_nbits)
01668 return 1;
01669 skip_bits_long(gb, s->pitch_nbits);
01670 }
01671 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01672 skip_bits(gb, 8);
01673 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01674 int tmp = get_bits(gb, 6);
01675 if (tmp >= 0x36) {
01676 skip_bits(gb, 2);
01677 aw_idx_is_ext = 1;
01678 }
01679 }
01680
01681
01682 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01683 need_bits = s->block_pitch_nbits +
01684 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01685 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01686 need_bits = 2 * !aw_idx_is_ext;
01687 } else
01688 need_bits = 0;
01689 need_bits += frame_desc->frame_size;
01690 if (get_bits_left(gb) < need_bits)
01691 return 1;
01692 skip_bits_long(gb, need_bits);
01693 }
01694
01695 return 0;
01696 }
01697
01718 static int synth_superframe(AVCodecContext *ctx,
01719 float *samples, int *data_size)
01720 {
01721 WMAVoiceContext *s = ctx->priv_data;
01722 GetBitContext *gb = &s->gb, s_gb;
01723 int n, res, n_samples = 480;
01724 double lsps[MAX_FRAMES][MAX_LSPS];
01725 const double *mean_lsf = s->lsps == 16 ?
01726 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01727 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01728 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01729
01730 memcpy(synth, s->synth_history,
01731 s->lsps * sizeof(*synth));
01732 memcpy(excitation, s->excitation_history,
01733 s->history_nsamples * sizeof(*excitation));
01734
01735 if (s->sframe_cache_size > 0) {
01736 gb = &s_gb;
01737 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01738 s->sframe_cache_size = 0;
01739 }
01740
01741 if ((res = check_bits_for_superframe(gb, s)) == 1) return 1;
01742
01743
01744
01745
01746
01747 if (!get_bits1(gb)) {
01748 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01749 return -1;
01750 }
01751
01752
01753 if (get_bits1(gb)) {
01754 if ((n_samples = get_bits(gb, 12)) > 480) {
01755 av_log(ctx, AV_LOG_ERROR,
01756 "Superframe encodes >480 samples (%d), not allowed\n",
01757 n_samples);
01758 return -1;
01759 }
01760 }
01761
01762 if (s->has_residual_lsps) {
01763 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01764
01765 for (n = 0; n < s->lsps; n++)
01766 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01767
01768 if (s->lsps == 10) {
01769 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01770 } else
01771 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01772
01773 for (n = 0; n < s->lsps; n++) {
01774 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01775 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01776 lsps[2][n] += mean_lsf[n];
01777 }
01778 for (n = 0; n < 3; n++)
01779 stabilize_lsps(lsps[n], s->lsps);
01780 }
01781
01782
01783 for (n = 0; n < 3; n++) {
01784 if (!s->has_residual_lsps) {
01785 int m;
01786
01787 if (s->lsps == 10) {
01788 dequant_lsp10i(gb, lsps[n]);
01789 } else
01790 dequant_lsp16i(gb, lsps[n]);
01791
01792 for (m = 0; m < s->lsps; m++)
01793 lsps[n][m] += mean_lsf[m];
01794 stabilize_lsps(lsps[n], s->lsps);
01795 }
01796
01797 if ((res = synth_frame(ctx, gb, n,
01798 &samples[n * MAX_FRAMESIZE],
01799 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01800 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01801 &synth[s->lsps + n * MAX_FRAMESIZE])))
01802 return res;
01803 }
01804
01805
01806
01807
01808 if (get_bits1(gb)) {
01809 res = get_bits(gb, 4);
01810 skip_bits(gb, 10 * (res + 1));
01811 }
01812
01813
01814 *data_size = n_samples * sizeof(float);
01815
01816
01817 memcpy(s->prev_lsps, lsps[2],
01818 s->lsps * sizeof(*s->prev_lsps));
01819 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01820 s->lsps * sizeof(*synth));
01821 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01822 s->history_nsamples * sizeof(*excitation));
01823 if (s->do_apf)
01824 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01825 s->history_nsamples * sizeof(*s->zero_exc_pf));
01826
01827 return 0;
01828 }
01829
01837 static int parse_packet_header(WMAVoiceContext *s)
01838 {
01839 GetBitContext *gb = &s->gb;
01840 unsigned int res;
01841
01842 if (get_bits_left(gb) < 11)
01843 return 1;
01844 skip_bits(gb, 4);
01845 s->has_residual_lsps = get_bits1(gb);
01846 do {
01847 res = get_bits(gb, 6);
01848
01849 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01850 return 1;
01851 } while (res == 0x3F);
01852 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01853
01854 return 0;
01855 }
01856
01872 static void copy_bits(PutBitContext *pb,
01873 const uint8_t *data, int size,
01874 GetBitContext *gb, int nbits)
01875 {
01876 int rmn_bytes, rmn_bits;
01877
01878 rmn_bits = rmn_bytes = get_bits_left(gb);
01879 if (rmn_bits < nbits)
01880 return;
01881 rmn_bits &= 7; rmn_bytes >>= 3;
01882 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01883 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01884 ff_copy_bits(pb, data + size - rmn_bytes,
01885 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01886 }
01887
01899 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01900 int *data_size, AVPacket *avpkt)
01901 {
01902 WMAVoiceContext *s = ctx->priv_data;
01903 GetBitContext *gb = &s->gb;
01904 int size, res, pos;
01905
01906 if (*data_size < 480 * sizeof(float)) {
01907 av_log(ctx, AV_LOG_ERROR,
01908 "Output buffer too small (%d given - %zu needed)\n",
01909 *data_size, 480 * sizeof(float));
01910 return -1;
01911 }
01912 *data_size = 0;
01913
01914
01915
01916
01917
01918
01919 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01920 if (!size)
01921 return 0;
01922 init_get_bits(&s->gb, avpkt->data, size << 3);
01923
01924
01925
01926
01927 if (size == ctx->block_align) {
01928 if ((res = parse_packet_header(s)) < 0)
01929 return res;
01930
01931
01932
01933
01934 if (s->spillover_nbits > 0) {
01935 if (s->sframe_cache_size > 0) {
01936 int cnt = get_bits_count(gb);
01937 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01938 flush_put_bits(&s->pb);
01939 s->sframe_cache_size += s->spillover_nbits;
01940 if ((res = synth_superframe(ctx, data, data_size)) == 0 &&
01941 *data_size > 0) {
01942 cnt += s->spillover_nbits;
01943 s->skip_bits_next = cnt & 7;
01944 return cnt >> 3;
01945 } else
01946 skip_bits_long (gb, s->spillover_nbits - cnt +
01947 get_bits_count(gb));
01948 } else
01949 skip_bits_long(gb, s->spillover_nbits);
01950 }
01951 } else if (s->skip_bits_next)
01952 skip_bits(gb, s->skip_bits_next);
01953
01954
01955 s->sframe_cache_size = 0;
01956 s->skip_bits_next = 0;
01957 pos = get_bits_left(gb);
01958 if ((res = synth_superframe(ctx, data, data_size)) < 0) {
01959 return res;
01960 } else if (*data_size > 0) {
01961 int cnt = get_bits_count(gb);
01962 s->skip_bits_next = cnt & 7;
01963 return cnt >> 3;
01964 } else if ((s->sframe_cache_size = pos) > 0) {
01965
01966 init_get_bits(gb, avpkt->data, size << 3);
01967 skip_bits_long(gb, (size << 3) - pos);
01968 assert(get_bits_left(gb) == pos);
01969
01970
01971 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01972 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01973
01974
01975 }
01976
01977 return size;
01978 }
01979
01980 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
01981 {
01982 WMAVoiceContext *s = ctx->priv_data;
01983
01984 if (s->do_apf) {
01985 ff_rdft_end(&s->rdft);
01986 ff_rdft_end(&s->irdft);
01987 ff_dct_end(&s->dct);
01988 ff_dct_end(&s->dst);
01989 }
01990
01991 return 0;
01992 }
01993
01994 static av_cold void wmavoice_flush(AVCodecContext *ctx)
01995 {
01996 WMAVoiceContext *s = ctx->priv_data;
01997 int n;
01998
01999 s->postfilter_agc = 0;
02000 s->sframe_cache_size = 0;
02001 s->skip_bits_next = 0;
02002 for (n = 0; n < s->lsps; n++)
02003 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02004 memset(s->excitation_history, 0,
02005 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02006 memset(s->synth_history, 0,
02007 sizeof(*s->synth_history) * MAX_LSPS);
02008 memset(s->gain_pred_err, 0,
02009 sizeof(s->gain_pred_err));
02010
02011 if (s->do_apf) {
02012 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02013 sizeof(*s->synth_filter_out_buf) * s->lsps);
02014 memset(s->dcf_mem, 0,
02015 sizeof(*s->dcf_mem) * 2);
02016 memset(s->zero_exc_pf, 0,
02017 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02018 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02019 }
02020 }
02021
02022 AVCodec ff_wmavoice_decoder = {
02023 "wmavoice",
02024 AVMEDIA_TYPE_AUDIO,
02025 CODEC_ID_WMAVOICE,
02026 sizeof(WMAVoiceContext),
02027 wmavoice_decode_init,
02028 NULL,
02029 wmavoice_decode_end,
02030 wmavoice_decode_packet,
02031 CODEC_CAP_SUBFRAMES,
02032 .flush = wmavoice_flush,
02033 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02034 };