Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 : #include <stdint.h>
5 : #include "options.h" /* Compilation switches */
6 : #include "cnst.h" /* Common constants */
7 : //#include "prot_fx.h" /* Function prototypes */
8 : #include "rom_com_fx.h" /* Static table prototypes */
9 : #include "rom_com.h" /* Static table prototypes */
10 : #include "prot_fx.h" /* Function prototypes */
11 : #include "prot_fx_enc.h" /* Function prototypes */
12 : #include "ivas_prot_fx.h" /* Function prototypes */
13 :
14 :
15 : /*============================================================================*/
16 : /* FUNCTION : void FEC_encode_fx() */
17 : /*----------------------------------------------------------------------------*/
18 : /* PURPOSE : Encoder supplementary information for FEC */
19 : /*----------------------------------------------------------------------------*/
20 : /* INPUT ARGUMENTS : */
21 : /* Word16 *synth i : pointer to synthesized speech for E computation */
22 : /* Word16 coder_type i : type of coder */
23 : /* Word16 clas i : signal clas for current frame */
24 : /* Word16 *fpit i : close loop fractional pitch buffer */
25 : /* Word16 *res i : LP residual signal frame */
26 : /* Word16 L_frame i : Frame length */
27 : /* Word32 total_brate i : total codec bitrate */
28 : /*----------------------------------------------------------------------------*/
29 : /* OUTPUT ARGUMENTS : */
30 : /*----------------------------------------------------------------------------*/
31 : /* INPUT/OUTPUT ARGUMENTS : */
32 : /* Word16 *last_pulse_pos i/o: Position of the last pulse */
33 : /* Encoder_State *st_fx i/o: state structure */
34 : /*----------------------------------------------------------------------------*/
35 : /* RETURN ARGUMENTS : */
36 : /* _ None */
37 : /*----------------------------------------------------------------------------*/
38 : /* */
39 : /*============================================================================*/
40 1427 : void FEC_encode_fx(
41 : BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */
42 : const ACELP_config acelp_cfg, /* i/o: configuration of the ACELP */
43 : const Word16 *synth, /* i : pointer to synthesized speech for E computation Q_synth*/
44 : const Word16 coder_type, /* i : type of coder Q0*/
45 : Word16 clas, /* i : signal clas for current frame Q0*/
46 : const Word16 *fpit, /* i : close loop fractional pitch buffer Q6*/
47 : const Word16 *res, /* i : LP residual signal frame Qx*/
48 : Word16 *last_pulse_pos, /* i/o: Position of the last pulse Q0*/
49 : const Word16 L_frame, /* i : Frame length Q0*/
50 : const Word32 total_brate, /* i : total codec bitrate Q0*/
51 : const Word16 Q_new, /* i : input scaling */
52 : const Word16 shift /* i : scaling to get 12bits */
53 : )
54 : {
55 : Word16 tmpS, index;
56 : Word16 maxi, sign, tmp_FER_pitch;
57 : Word32 enr_q, Ltmp;
58 : Word16 enr_lg_ent, enr_lg_frac, exp_enrq;
59 :
60 1427 : tmpS = 0;
61 1427 : move16();
62 1427 : enr_q = 1;
63 1427 : move16();
64 1427 : sign = 0;
65 1427 : move16();
66 1427 : test();
67 1427 : test();
68 1427 : IF( GT_16( coder_type, UNVOICED ) && LT_16( coder_type, AUDIO ) && acelp_cfg.FEC_mode > 0 )
69 : {
70 : /*-----------------------------------------------------------------*
71 : * encode signal class (not needed for VC mode since it is clearly voiced) (2 bits)
72 : *-----------------------------------------------------------------*/
73 1420 : IF( NE_16( coder_type, VOICED ) )
74 : {
75 : /* encode signal clas with 2 bits */
76 1208 : test();
77 1208 : IF( EQ_16( clas, UNVOICED_CLAS ) )
78 : {
79 553 : index = 0;
80 553 : move16();
81 : }
82 655 : ELSE IF( EQ_16( clas, VOICED_TRANSITION ) || EQ_16( clas, UNVOICED_TRANSITION ) )
83 : {
84 83 : index = 1;
85 83 : move16();
86 : }
87 572 : ELSE IF( EQ_16( clas, VOICED_CLAS ) )
88 : {
89 496 : index = 2;
90 496 : move16();
91 : }
92 : ELSE
93 : {
94 76 : index = 3;
95 76 : move16();
96 : }
97 1208 : push_indice( hBstr, IND_FEC_CLAS, index, FEC_BITS_CLS );
98 : }
99 :
100 : /*-----------------------------------------------------------------*
101 : * encode frame energy (5 bits)
102 : *-----------------------------------------------------------------*/
103 1420 : IF( GT_16( acelp_cfg.FEC_mode, 1 ) ) /* GENERIC and VOICED frames */
104 : {
105 : /* frame energy (maximum energy per pitch period for voiced frames or mean energy per sample over 2nd halframe for unvoiced frames) */
106 : /*frame_ener( L_frame, clas, synth, fpit[(L_frame>>6)-1], &enr_q, 0 );*/
107 624 : exp_enrq = frame_ener_fx( L_frame, clas, synth, shr_r( fpit[( L_frame >> 6 ) - 1], 6 ), &enr_q, L_frame, Q_new, shift, 1 );
108 :
109 : /* linearly quantize the energy in the range 0 : FEC_ENR_STEP : 96 dB */
110 : /*tmpS = (short)( 10.0 * log10( enr_q + 0.001f ) / FEC_ENR_STEP )*/ /*To be converted fl_2_fx*/
111 :
112 624 : enr_lg_frac = Log2_norm_lc( enr_q );
113 624 : enr_lg_ent = sub( 30, exp_enrq ); /* Q15 */
114 624 : Ltmp = Mpy_32_16( enr_lg_ent, enr_lg_frac, LG10_s3_0 ); /* Q14 */
115 624 : tmpS = extract_h( L_shl( Ltmp, 1 ) ); /* Q15 + 1 -> Q0*/
116 :
117 624 : tmpS = s_min( tmpS, 31 );
118 624 : tmpS = s_max( tmpS, 0 );
119 :
120 624 : push_indice( hBstr, IND_FEC_ENR, tmpS, FEC_BITS_ENR );
121 : }
122 : /*-----------------------------------------------------------------*
123 : * Encode last glottal pulse position (8 bits)
124 : *-----------------------------------------------------------------*/
125 1420 : IF( GT_16( acelp_cfg.FEC_mode, 2 ) ) /* GENERIC frames */
126 : {
127 : /* retrieve the last glottal pulse position of the previous frame */
128 : /* use the current pitch information to scale or not the quantization */
129 624 : tmp_FER_pitch = shr( fpit[0], 6 ); /* take the 1st subframe pit, since it is easier to get on decoder side */
130 624 : sign = 0;
131 624 : move16();
132 624 : maxi = *last_pulse_pos; /* Q0 */
133 624 : move16();
134 624 : IF( maxi < 0 )
135 : {
136 127 : sign = 1;
137 127 : move16();
138 : /*maxi = -maxi; */
139 127 : maxi = negate( maxi ); /* Q0 */
140 : }
141 :
142 624 : if ( GE_16( tmp_FER_pitch, 128 ) )
143 : {
144 187 : maxi = shr( maxi, 1 );
145 : }
146 :
147 624 : if ( GT_16( maxi, 127 ) )
148 : {
149 : /* better not use the glottal pulse position at all instead of using a wrong pulse */
150 : /* can happen only with pitch > 254 and max pit = 289 and should happen very rarely */
151 0 : maxi = 0;
152 0 : move16();
153 : }
154 :
155 624 : if ( sign == 1 )
156 : {
157 127 : maxi = add( maxi, 128 ); /* use 8 bits (MSB represent the sign of the pulse) Q0*/
158 : }
159 :
160 624 : push_indice( hBstr, IND_FEC_POS, maxi, FEC_BITS_POS );
161 : }
162 1420 : maxi = 0;
163 1420 : move16();
164 :
165 : /* If bitrate < 24k4, then the pitch
166 : is not represented in the same domain (12.k instead of 16k) */
167 1420 : test();
168 1420 : IF( GE_16( clas, VOICED_CLAS ) && GE_32( total_brate, ACELP_24k40 ) )
169 : {
170 : /*maxi = findpulse( L_frame, res, (short)(fpit[(L_frame>>6)-1]), 0, &sign ); */
171 376 : maxi = findpulse_fx( L_frame, res, shr_r( fpit[( L_frame >> 6 ) - 1], 6 ), 0, &sign ); /* Q0 */
172 376 : if ( sign == 1 )
173 : {
174 : /*maxi = -maxi;*/
175 141 : maxi = negate( maxi ); /* Q0 */
176 : }
177 : }
178 :
179 1420 : *last_pulse_pos = maxi; /* Q0 */
180 1420 : move16();
181 : }
182 : ELSE
183 : {
184 7 : *last_pulse_pos = 0;
185 7 : move16();
186 : }
187 :
188 1427 : return;
189 : }
190 :
191 :
192 : /*-------------------------------------------------------------------*
193 : * FEC_lsf_estim_enc_fx()
194 : *
195 : * Simulates LSF estimation in case of FEC in the encoder ( only one frame erasure is considered )
196 : * The estimated LSF vector is then used to check LSF stability and may invoke safety-net usage in the next frame
197 : *-------------------------------------------------------------------*/
198 :
199 269889 : void FEC_lsf_estim_enc_fx(
200 : Encoder_State *st_fx, /* i : Encoder static memory */
201 : Word16 *lsf /* o : estimated LSF vector Qlog2(2.56)*/
202 : )
203 : {
204 : Word16 i;
205 : Word16 alpha, lsf_mean[M];
206 : Word16 tmp;
207 :
208 269889 : IF( EQ_16( st_fx->L_frame, L_FRAME ) )
209 : {
210 120708 : Copy( UVWB_Ave_fx, lsf_mean, M );
211 : }
212 : ELSE
213 : {
214 149181 : Copy( GEWB2_Ave_fx, lsf_mean, M );
215 : }
216 :
217 : /*----------------------------------------------------------------------*
218 : * Initialize the alpha factor
219 : *----------------------------------------------------------------------*/
220 :
221 269889 : IF( EQ_16( st_fx->last_coder_type, UNVOICED ) )
222 : {
223 : /* clearly unvoiced */
224 27517 : alpha = _ALPHA_UU_FX; /* Q15 */
225 27517 : move16();
226 : }
227 : ELSE
228 : {
229 242372 : test();
230 242372 : test();
231 242372 : IF( EQ_16( st_fx->last_coder_type, AUDIO ) || EQ_16( st_fx->clas, INACTIVE_CLAS ) )
232 : {
233 11345 : alpha = 32604; /* 0.995 in Q15 */
234 11345 : move16();
235 : }
236 231027 : ELSE IF( EQ_16( st_fx->clas, UNVOICED_CLAS ) )
237 : {
238 : /* if stable, do not flatten the spectrum in the first erased frame */
239 : /* alpha = st->stab_fac * (1.0f - 2.0f*ALPHA_U) + 2.0f*ALPHA_U; */
240 112391 : alpha = add( mult( st_fx->stab_fac_fx, 32768 - _ALPHA_U_FX_X_2 ), _ALPHA_U_FX_X_2 ); /* Q15 */
241 : }
242 118636 : ELSE IF( EQ_16( st_fx->clas, UNVOICED_TRANSITION ) )
243 : {
244 2772 : alpha = _ALPHA_UT_FX; /* Q15 */
245 2772 : move16();
246 : }
247 115864 : ELSE IF( EQ_16( st_fx->clas, VOICED_CLAS ) || EQ_16( st_fx->clas, ONSET ) )
248 : {
249 : /* clearly voiced - mild convergence to the CNG spectrum for the first three erased frames */
250 101625 : alpha = _ALPHA_V_FX; /* Q15 */
251 101625 : move16();
252 : }
253 14239 : ELSE IF( EQ_16( st_fx->clas, SIN_ONSET ) )
254 : {
255 0 : alpha = _ALPHA_S_FX; /* Q15 */
256 0 : move16();
257 : }
258 : ELSE
259 : {
260 : /* long erasures and onsets - rapid convergence to the CNG spectrum */
261 14239 : alpha = _ALPHA_VT_FX; /* Q15 */
262 14239 : move16();
263 : }
264 : }
265 : /*----------------------------------------------------------------------*
266 : * Extrapolate LSF vector
267 : *----------------------------------------------------------------------*/
268 269889 : tmp = sub( 32767, alpha ); /* Q15 */
269 : /* extrapolate the old LSF vector */
270 4588113 : FOR( i = 0; i < M; i++ )
271 : {
272 : /* calculate mean LSF vector */
273 : /*lsf_mean[i] = BETA_FEC * lsf_mean[i] + (1-BETA_FEC) * st->lsf_adaptive_mean[i]; */
274 4318224 : lsf_mean[i] = mac_r( L_mult( BETA_FEC_FX, lsf_mean[i] ), 32768 - BETA_FEC_FX, st_fx->lsf_adaptive_mean_fx[i] ); /* Qlog2(2.56) */
275 4318224 : move16();
276 :
277 : /* move old LSF vector towards the mean LSF vector */
278 : /* lsf[i] = alpha * st->lsf_old[i] + (1.0f - alpha) * lsf_mean[i]; */
279 4318224 : lsf[i] = mac_r( L_mult( alpha, st_fx->lsf_old_fx[i] ), tmp, lsf_mean[i] ); /* Qlog2(2.56) */
280 4318224 : move16();
281 : }
282 :
283 : /* check LSF stability through LSF ordering */
284 269889 : IF( EQ_16( st_fx->L_frame, L_FRAME ) )
285 : {
286 120708 : reorder_lsf_fx( lsf, MODE1_LSF_GAP_FX, M, INT_FS_FX );
287 : }
288 : ELSE /* L_frame == L_FRAME16k */
289 : {
290 149181 : reorder_lsf_fx( lsf, MODE1_LSF_GAP_FX, M, INT_FS_16k_FX );
291 : }
292 :
293 269889 : return;
294 : }
295 :
296 : /*-------------------------------------------------------------------*
297 : * FEC_encode()
298 : *
299 : * Encoder supplementary information for FEC
300 : *-------------------------------------------------------------------*/
301 :
302 155916 : void FEC_encode_ivas_fx(
303 : BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */
304 : const ACELP_config acelp_cfg, /* i/o: configuration of the ACELP */
305 : const Word16 *synth, /* i : pointer to synthesized speech for E computation Q_Synth*/
306 : const Word16 coder_type, /* i : type of coder Q0*/
307 : Word16 clas, /* i : signal clas for current frame Q0*/
308 : const Word16 *fpit, /* i : close loop fractional pitch buffer Q6*/
309 : const Word16 *res, /* i : LP residual signal frame Qx*/
310 : Word16 *last_pulse_pos, /* i/o: Position of the last pulse Q0*/
311 : const Word16 L_frame, /* i : Frame length Q0*/
312 : const Word32 total_brate, /* i : total codec bitrate Q0*/
313 : const Word16 Q_synth /* i : input scaling */
314 : )
315 : {
316 : Word16 tmpS, index;
317 : Word16 maxi, sign, tmp_FER_pitch;
318 : Word32 enr_q, Ltmp;
319 : Word16 exp_enrq;
320 :
321 155916 : tmpS = 0;
322 155916 : move16();
323 155916 : enr_q = 1;
324 155916 : move16();
325 155916 : sign = 0;
326 155916 : move16();
327 155916 : test();
328 155916 : test();
329 155916 : IF( GT_16( coder_type, UNVOICED ) && LT_16( coder_type, AUDIO ) && acelp_cfg.FEC_mode > 0 )
330 : {
331 : /*-----------------------------------------------------------------*
332 : * encode signal class (not needed for VC mode since it is clearly voiced) (2 bits)
333 : *-----------------------------------------------------------------*/
334 110011 : IF( NE_16( coder_type, VOICED ) )
335 : {
336 : /* encode signal clas with 2 bits */
337 105260 : test();
338 105260 : IF( clas == UNVOICED_CLAS )
339 : {
340 44956 : index = 0;
341 44956 : move16();
342 : }
343 60304 : ELSE IF( EQ_16( clas, VOICED_TRANSITION ) || EQ_16( clas, UNVOICED_TRANSITION ) )
344 : {
345 7143 : index = 1;
346 7143 : move16();
347 : }
348 53161 : ELSE IF( EQ_16( clas, VOICED_CLAS ) )
349 : {
350 45743 : index = 2;
351 45743 : move16();
352 : }
353 : ELSE
354 : {
355 7418 : index = 3;
356 7418 : move16();
357 : }
358 105260 : push_indice( hBstr, IND_FEC_CLAS, index, FEC_BITS_CLS );
359 : }
360 :
361 : /*-----------------------------------------------------------------*
362 : * encode frame energy (5 bits)
363 : *-----------------------------------------------------------------*/
364 110011 : IF( GT_16( acelp_cfg.FEC_mode, 1 ) ) /* GENERIC and VOICED frames */
365 : {
366 : /* frame energy (maximum energy per pitch period for voiced frames or mean energy per sample over 2nd halframe for unvoiced frames) */
367 : /*frame_ener( L_frame, clas, synth, fpit[(L_frame>>6)-1], &enr_q, 0 );*/
368 : Word32 synth32[L_FRAME16k];
369 62529 : Copy_Scale_sig_16_32_DEPREC( synth, synth32, L_FRAME16k, 0 ); /* Qsynth */
370 62529 : fer_energy_fx( L_frame, clas, synth32, Q_synth, shr_r( fpit[( L_frame >> 6 ) - 1], 6 ), &enr_q, L_frame );
371 62529 : exp_enrq = sub( 31, shl( Q_synth, 1 ) );
372 :
373 62529 : test();
374 62529 : test();
375 62529 : if ( EQ_16( clas, VOICED_CLAS ) || EQ_16( clas, ONSET ) || EQ_16( clas, SIN_ONSET ) ) /* Voiced or Onset current frame */
376 : {
377 33858 : exp_enrq = 31;
378 33858 : move16();
379 : }
380 : /* linearly quantize the energy in the range 0 : FEC_ENR_STEP : 96 dB */
381 : /*tmpS = (short)( 10.0 * log10( enr_q + 0.001f ) / FEC_ENR_STEP )*/ /*To be converted fl_2_fx*/
382 :
383 62529 : Ltmp = Mpy_32_32( BASOP_Util_Log10( enr_q, exp_enrq ), 894784853 /* 10 / FEC_ENR_STEP Q28 */ ); // Q 25 + 28 - 31 = Q22
384 62529 : IF( Ltmp < 0 )
385 : {
386 26 : tmpS = extract_l( L_negate( L_shr( L_negate( Ltmp ), Q22 ) ) ); /* Q0 */
387 : }
388 : ELSE
389 : {
390 62503 : tmpS = extract_l( L_shr( Ltmp, Q22 ) ); /* Q0 */
391 : }
392 :
393 62529 : tmpS = s_min( tmpS, FEC_ENR_QLIMIT );
394 62529 : tmpS = s_max( tmpS, 0 );
395 :
396 62529 : push_indice( hBstr, IND_FEC_ENR, tmpS, FEC_BITS_ENR );
397 : }
398 : /*-----------------------------------------------------------------*
399 : * Encode last glottal pulse position (8 bits)
400 : *-----------------------------------------------------------------*/
401 :
402 110011 : IF( GT_16( acelp_cfg.FEC_mode, 2 ) ) /* GENERIC frames */
403 : {
404 : /* retrieve the last glottal pulse position of the previous frame */
405 : /* use the current pitch information to scale or not the quantization */
406 10103 : tmp_FER_pitch = shr( fpit[0], 6 ); /* take the 1st subframe pit, since it is easier to get on decoder side */
407 10103 : sign = 0;
408 10103 : move16();
409 10103 : maxi = *last_pulse_pos; /* Q0 */
410 10103 : move16();
411 10103 : IF( maxi < 0 )
412 : {
413 1645 : sign = 1;
414 1645 : move16();
415 : /*maxi = -maxi; */
416 1645 : maxi = negate( maxi );
417 : }
418 :
419 10103 : if ( GE_16( tmp_FER_pitch, 128 ) )
420 : {
421 3236 : maxi = shr( maxi, 1 );
422 : }
423 :
424 10103 : if ( GT_16( maxi, 127 ) )
425 : {
426 : /* better not use the glottal pulse position at all instead of using a wrong pulse */
427 : /* can happen only with pitch > 254 and max pit = 289 and should happen very rarely */
428 26 : maxi = 0;
429 26 : move16();
430 : }
431 :
432 10103 : if ( EQ_16( sign, 1 ) )
433 : {
434 1645 : maxi = add( maxi, 128 ); /* use 8 bits (MSB represent the sign of the pulse) Q0*/
435 : }
436 :
437 10103 : push_indice( hBstr, IND_FEC_POS, maxi, FEC_BITS_POS );
438 : }
439 110011 : maxi = 0;
440 110011 : move16();
441 :
442 : /* If bitrate < 24k4, then the pitch
443 : is not represented in the same domain (12.k instead of 16k) */
444 110011 : test();
445 110011 : IF( GE_16( clas, VOICED_CLAS ) && GE_32( total_brate, ACELP_24k40 ) )
446 : {
447 : /*maxi = findpulse( L_frame, res, (short)(fpit[(L_frame>>6)-1]), 0, &sign ); */
448 14068 : maxi = findpulse_fx( L_frame, res, shr_r( fpit[( L_frame >> 6 ) - 1], 6 ), 0, &sign );
449 14068 : if ( EQ_16( sign, 1 ) )
450 : {
451 : /*maxi = -maxi;*/
452 5459 : maxi = negate( maxi );
453 : }
454 : }
455 :
456 110011 : *last_pulse_pos = maxi; /* Q0 */
457 110011 : move16();
458 : }
459 : ELSE
460 : {
461 45905 : *last_pulse_pos = 0;
462 45905 : move16();
463 : }
464 :
465 155916 : return;
466 : }
|