Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 : #include <stdint.h>
5 : #include "options.h" /* Compilation switches */
6 : #include "cnst.h" /* Common constants */
7 : //#include "prot_fx.h" /* Function prototypes */
8 : #include "rom_com.h" /* Function prototypes */
9 : #include "prot_fx.h" /* Function prototypes */
10 : #include "prot_fx_enc.h" /* Function prototypes */
11 :
12 : /*-------------------------------------------------------------------*
13 : * Local constants
14 : *-------------------------------------------------------------------*/
15 :
16 : #define L_ENR ( NB_SSF + 2 )
17 :
18 :
19 : /*-------------------------------------------------------------------*
20 : * find_ener_decrease_fx()
21 : *
22 : * Find maximum energy ration between short subblocks in case
23 : * energy is trailing off after a spike
24 : *-------------------------------------------------------------------*/
25 :
26 19848 : static Word16 find_ener_decrease_fx( /* o : maximum energy ratio Q10*/
27 : const Word16 ind_deltaMax, /* i : index of the beginning of maximum energy search Q0*/
28 : const Word32 *pt_enr_ssf /* i : Pointer to the energy buffer Qx*/
29 : )
30 : {
31 : Word16 i, j, end, flag;
32 : Word16 wtmp0, wtmp1;
33 : Word32 maxEnr, minEnr;
34 : Word16 dE2, exp0, exp1;
35 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
36 19848 : Flag Overflow = 0;
37 19848 : move32();
38 : #endif
39 :
40 19848 : dE2 = 0;
41 19848 : move16();
42 :
43 19848 : j = ind_deltaMax + 2;
44 19848 : move16();
45 19848 : end = j + L_ENR;
46 19848 : move16();
47 19848 : maxEnr = L_add( pt_enr_ssf[j], 0 );
48 19848 : j = add( j, 1 );
49 19848 : flag = 0;
50 19848 : move16();
51 198480 : FOR( i = j; i < end; i++ )
52 : {
53 178632 : test();
54 178632 : IF( ( GT_32( pt_enr_ssf[i], maxEnr ) ) && ( flag == 0 ) )
55 : {
56 7845 : maxEnr = L_add( pt_enr_ssf[i], 0 ); /*Qx*/
57 7845 : j = add( j, 1 );
58 : }
59 : ELSE
60 : {
61 170787 : flag = 1;
62 170787 : move16();
63 : }
64 : }
65 :
66 19848 : minEnr = L_add( maxEnr, 0 );
67 190635 : FOR( i = j; i < end; i++ )
68 : {
69 170787 : minEnr = L_min( minEnr, pt_enr_ssf[i] ); /* Qx */
70 : }
71 :
72 :
73 19848 : minEnr = L_add_sat( minEnr, 100000 );
74 19848 : exp0 = norm_l( minEnr );
75 19848 : wtmp0 = extract_h( L_shl( minEnr, exp0 ) );
76 19848 : exp1 = sub( norm_l( maxEnr ), 1 );
77 19848 : wtmp1 = extract_h( L_shl( maxEnr, exp1 ) );
78 19848 : wtmp1 = div_s( wtmp1, wtmp0 );
79 19848 : dE2 = shr_ro( wtmp1, add( sub( exp1, exp0 ), 15 - 10 ), &Overflow ); /*Q10*/
80 :
81 19848 : return dE2;
82 : }
83 :
84 : /*-------------------------------------------------------------------*
85 : * find_uv_fx()
86 : *
87 : * Decision about coder type
88 : *-------------------------------------------------------------------*/
89 :
90 3100 : Word16 find_uv_fx( /* o : coding type */
91 : Encoder_State *st_fx, /* i/o: encoder state structure */
92 : const Word16 *T_op_fr, /* i : pointer to adjusted fractional pitch (4 val.) Q6*/
93 : const Word16 *voicing_fr, /* i : refined correlation for each subframes Q15*/
94 : const Word16 *speech, /* i : pointer to speech signal for E computation Q_new*/
95 : const Word32 *ee, /* i : lf/hf Energy ratio for present frame Q6*/
96 : const Word16 corr_shift, /* i : normalized correlation correction in noise Q15*/
97 : const Word16 relE, /* i : relative frame energy Q8*/
98 : const Word16 Etot, /* i : total energy Q8*/
99 : const Word32 hp_E[], /* i : energy in HF Q_new + Q_SCALE*/
100 : const Word16 Q_new,
101 : Word16 *flag_spitch, /* i/o: flag to indicate very short stable pitch and high correlation Q0*/
102 : const Word16 shift,
103 : const Word16 last_core_orig /* i : original last core Q0*/
104 : )
105 : {
106 : Word16 coder_type, i;
107 : Word32 mean_ee, dE1, fac_32;
108 : const Word16 *pt_speech;
109 : Word32 L_tmp, enr_ssf[2 * NB_SSF + 2 * NB_SSF + 2], E_min_th;
110 : Word16 dE2;
111 : Word16 ind_deltaMax, tmp_offset_flag;
112 : Word32 Ltmp0, *pt_enr_ssf, *pt_enr_ssf1, dE2_th;
113 : Word16 exp0, exp1, Q_in;
114 : Word16 wtmp0, wtmp1;
115 : Word16 fac, mean_voi3, dE3;
116 : Word16 relE_thres;
117 : Word16 mean_voi3_offset;
118 : Word16 voicing_m, dpit1, dpit2, dpit3;
119 : Word16 ee0_th, ee1_th, voi_th, nb_cond, flag_low_relE;
120 3100 : NOISE_EST_HANDLE hNoiseEst = st_fx->hNoiseEst;
121 3100 : SC_VBR_ENC_HANDLE hSC_VBR = st_fx->hSC_VBR;
122 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
123 3100 : Flag Overflow = 0;
124 3100 : move32();
125 : #endif
126 :
127 : Word16 Last_Resort;
128 : Word16 vadnoise;
129 :
130 3100 : IF( hSC_VBR != NULL )
131 : {
132 3100 : Last_Resort = hSC_VBR->Last_Resort; /* Q0 */
133 3100 : vadnoise = hSC_VBR->vadnoise_fx;
134 3100 : move16();
135 3100 : move16();
136 : }
137 : ELSE
138 : {
139 0 : Last_Resort = 0;
140 0 : vadnoise = 0;
141 0 : move16();
142 0 : move16();
143 : }
144 :
145 3100 : Q_in = sub( Q_new, 1 );
146 :
147 : /*-----------------------------------------------------------------*
148 : * Detect sudden energy increases to catch voice and music
149 : * temporal events (dE1)
150 : *
151 : * - Find maximum energy per short subblocks.
152 : * Two subblock sets are used shifted by half the subblock length
153 : * - Find maximum energy ratio between adjacent subblocks
154 : *-----------------------------------------------------------------*/
155 :
156 : /* Find maximum energy per short subblocks */
157 3100 : pt_speech = speech - SSF; /* Q_new */
158 3100 : pt_enr_ssf = enr_ssf + 2 * NB_SSF;
159 58900 : FOR( i = 0; i < 2 * ( NB_SSF + 1 ); i++ )
160 : {
161 55800 : emaximum_fx( Q_in, pt_speech, SSF, pt_enr_ssf );
162 55800 : pt_speech += ( SSF / 2 );
163 55800 : pt_enr_ssf++;
164 : }
165 :
166 3100 : dE1 = 0;
167 3100 : move16();
168 3100 : ind_deltaMax = 0;
169 3100 : move16();
170 3100 : pt_enr_ssf = enr_ssf + 2 * NB_SSF;
171 3100 : pt_enr_ssf1 = pt_enr_ssf + 2;
172 :
173 : /* Test on energy increase between adjacent sub-subframes */
174 3100 : exp1 = 0;
175 3100 : move16();
176 52700 : FOR( i = 0; i < 2 * NB_SSF; i++ )
177 : {
178 : /*fac = *pt_enr_ssf1 / (*pt_enr_ssf + 1);*/
179 49600 : Ltmp0 = L_max( *pt_enr_ssf, 1 );
180 49600 : exp0 = norm_l( Ltmp0 );
181 49600 : wtmp0 = extract_h( L_shl( Ltmp0, exp0 ) );
182 49600 : exp1 = sub( norm_l( *pt_enr_ssf1 ), 1 );
183 49600 : wtmp1 = extract_h( L_shl( *pt_enr_ssf1, exp1 ) );
184 49600 : fac = div_s( wtmp1, wtmp0 );
185 49600 : fac_32 = L_shr_o( L_deposit_l( fac ), add( sub( exp1, exp0 ), 15 - 13 ), &Overflow ); /* fac32 in Q13*/
186 :
187 49600 : if ( GT_32( fac_32, dE1 ) )
188 : {
189 10462 : ind_deltaMax = i;
190 10462 : move16();
191 : }
192 :
193 49600 : dE1 = L_max( dE1, fac_32 ); /* Q13 */
194 :
195 49600 : pt_enr_ssf++;
196 49600 : pt_enr_ssf1++;
197 : }
198 :
199 : /*-----------------------------------------------------------------*
200 : * Average spectral tilt
201 : * Average voicing (normalized correlation)
202 : *-----------------------------------------------------------------*/
203 :
204 : /*mean_ee = 1.0f/3.0f * (st->ee_old + ee[0] + ee[1]); */ /* coefficients take into account the position of the window */
205 3100 : mean_ee = L_add_o( L_add_o( st_fx->ee_old_fx, ee[0], &Overflow ), ee[1], &Overflow ); /* Q6 */
206 3100 : mean_ee = Mult_32_16( mean_ee, 10923 /* 1/3 in Q15 */ ); /*Q6*/
207 :
208 : /* mean_voi3 = 1.0f/3.0f * (voicing[0] + voicing[1] + voicing[2]);*/
209 3100 : Ltmp0 = L_mult( st_fx->voicing_fx[0], 10923 /* 1/3 in Q15 */ ); /* Q31 */
210 3100 : Ltmp0 = L_mac( Ltmp0, st_fx->voicing_fx[1], 10923 /* 1/3 in Q15 */ ); /* Q31 */
211 3100 : mean_voi3 = mac_r_sat( Ltmp0, st_fx->voicing_fx[2], 10923 /* 1/3 in Q15 */ ); /*Q15*/
212 : /*-----------------------------------------------------------------*
213 : * Total frame energy difference (dE3)
214 : *-----------------------------------------------------------------*/
215 :
216 3100 : dE3 = sub( Etot, hNoiseEst->Etot_last_fx ); /*Q8*/
217 :
218 : /*-----------------------------------------------------------------*
219 : * Energy decrease after spike (dE2)
220 : *-----------------------------------------------------------------*/
221 :
222 : /* set different thresholds and conditions for NB and WB input */
223 3100 : dE2_th = 30 << 10;
224 3100 : move32();
225 3100 : nb_cond = 1;
226 3100 : move16(); /* no additional condition for WB input */
227 3100 : IF( EQ_16( st_fx->input_bwidth, NB ) )
228 : {
229 0 : dE2_th = 21 << 10;
230 0 : move32();
231 0 : if ( GE_16( add_o( mean_voi3, corr_shift, &Overflow ), 22282 /* 0.68 in Q15 */ ) ) /*( mean_voi3 + corr_shift ) >= 0.68f*/
232 : {
233 0 : nb_cond = 0;
234 0 : move16();
235 : }
236 : }
237 :
238 : /* calcualte maximum energy decrease */
239 3100 : dE2 = 0;
240 3100 : move16(); /* Test on energy decrease after an energy spike */
241 3100 : pt_enr_ssf = enr_ssf + 2 * NB_SSF;
242 :
243 3100 : test();
244 3100 : IF( GT_32( dE1, 30 << 13 ) && nb_cond ) /*>30 Q13*/
245 : {
246 83 : IF( LT_16( sub( shl( NB_SSF, 1 ), ind_deltaMax ), L_ENR ) )
247 : {
248 60 : st_fx->old_ind_deltaMax = ind_deltaMax; /* Q0 */
249 60 : move16();
250 60 : Copy32( pt_enr_ssf, st_fx->old_enr_ssf_fx, 2 * NB_SSF ); /* Qx */
251 : }
252 : ELSE
253 : {
254 23 : st_fx->old_ind_deltaMax = -1;
255 23 : move16();
256 23 : dE2 = find_ener_decrease_fx( ind_deltaMax, pt_enr_ssf ); /*Q10*/
257 :
258 23 : if ( GT_32( dE2, dE2_th ) )
259 : {
260 1 : st_fx->spike_hyst = 0;
261 1 : move16();
262 : }
263 : }
264 : }
265 : ELSE
266 : {
267 3017 : IF( st_fx->old_ind_deltaMax >= 0 )
268 : {
269 58 : Copy32( st_fx->old_enr_ssf_fx, enr_ssf, 2 * NB_SSF ); /* Qx */
270 58 : dE2 = find_ener_decrease_fx( st_fx->old_ind_deltaMax, enr_ssf ); /* Q10 */
271 :
272 58 : if ( GT_32( dE2, dE2_th ) )
273 : {
274 3 : st_fx->spike_hyst = 1;
275 3 : move16();
276 : }
277 : }
278 :
279 3017 : st_fx->old_ind_deltaMax = -1;
280 3017 : move16();
281 : }
282 :
283 : /*-----------------------------------------------------------------*
284 : * Detection of voiced offsets (tmp_offset_flag)
285 : *-----------------------------------------------------------------*/
286 :
287 3100 : tmp_offset_flag = 1;
288 3100 : move16();
289 :
290 3100 : IF( NE_16( st_fx->input_bwidth, NB ) )
291 : {
292 3100 : ee0_th = 154; /*2.4 in Q6 */
293 3100 : move16();
294 3100 : voi_th = 24248; /*0.74f Q15 */
295 3100 : move16();
296 : }
297 : ELSE
298 : {
299 0 : ee0_th = 627; /*9.8f Q6 */
300 0 : move16();
301 0 : voi_th = 24904; /*0.76f Q15*/
302 0 : move16();
303 : }
304 :
305 3100 : test();
306 3100 : test();
307 3100 : test();
308 6062 : if ( ( EQ_16( st_fx->last_coder_type_raw, UNVOICED ) ) || /* previous frame was unvoiced */
309 3978 : ( ( LT_32( ee[0], ee0_th ) ) && ( GT_32( hp_E[0], L_shl( E_MIN_FX, Q_new ) ) ) && /* energy is concentrated in high frequencies provided that some energy is present in HF */
310 483 : ( LT_16( add_o( st_fx->voicing_fx[0], corr_shift, &Overflow ), voi_th ) ) ) ) /* normalized correlation is low */
311 : {
312 317 : tmp_offset_flag = 0;
313 317 : move16();
314 : }
315 :
316 : /*-----------------------------------------------------------------*
317 : * Decision about UC
318 : *-----------------------------------------------------------------*/
319 :
320 : /* SC-VBR - set additional parameters and thresholds for SC-VBR */
321 3100 : mean_voi3_offset = 0;
322 3100 : move16();
323 3100 : flag_low_relE = 0;
324 3100 : move16();
325 3100 : ee1_th = 608; /*9.5 Q6*/
326 3100 : move16();
327 3100 : IF( st_fx->Opt_SC_VBR || ( EQ_16( st_fx->idchan, 1 ) && EQ_16( st_fx->element_mode, IVAS_CPE_TD ) ) ) /* Allow the low energy flag for the secondary channel */
328 : {
329 0 : ee1_th = 544; /*8.5f Q6*/
330 0 : move16();
331 :
332 : /* SC-VBR - determine the threshold on relative energy as a function of lp_noise */
333 0 : IF( NE_16( st_fx->input_bwidth, NB ) )
334 : {
335 : /*relE_thres = 0.700f * st->lp_noise - 33.5f; (lp_noise in Q8, constant Q8<<16) */
336 0 : L_tmp = L_mac( -562036736 /* 33.5f in Q24 */, 22938 /* 0.7 in Q15 */, st_fx->lp_noise_fx ); // Q24
337 0 : if ( Last_Resort == 0 )
338 : {
339 : /*relE_thres = 0.650f * st->lp_noise - 33.5f; (lp_noise in Q8, constant Q8<<16)*/
340 0 : L_tmp = L_mac( -562036736 /* 33.5f in Q24 */, 21299 /* 0.650f in Q15 */, st_fx->lp_noise_fx ); // Q24
341 : }
342 0 : relE_thres = round_fx( L_tmp );
343 : }
344 : ELSE
345 : {
346 :
347 : /*relE_thres = 0.60f * st->lp_noise - 28.2f; (lp_noise in Q8, constant Q8<<16)*/
348 0 : L_tmp = L_mac( -473117491 /* 28.2f in Q24 */, 19661 /* 0.6f in Q15 */, st_fx->lp_noise_fx ); // Q24
349 0 : relE_thres = round_fx( L_tmp );
350 : }
351 0 : relE_thres = s_max( relE_thres, -6400 /* -25.0f in Q8 */ ); /* Q8 */
352 :
353 : /* SC-VBR = set flag on low relative energy */
354 0 : if ( LT_16( relE, relE_thres ) )
355 : {
356 0 : flag_low_relE = 1;
357 0 : move16();
358 : }
359 :
360 : /* SC-VBR - correction of voicing threshold for NB inputs (important only in noisy conditions) */
361 0 : test();
362 0 : if ( EQ_16( st_fx->input_bwidth, NB ) && LT_16( vadnoise, 20 << 8 ) ) /* vadnoise in Q8, constant Q0<<8 */
363 : {
364 0 : mean_voi3_offset = 1638; /*0.05f Q15*/
365 0 : move16();
366 : }
367 : }
368 :
369 : /* make decision whether frame is unvoiced */
370 3100 : E_min_th = L_shl( E_MIN_FX, Q_new );
371 3100 : coder_type = GENERIC;
372 3100 : move16();
373 3100 : IF( EQ_16( st_fx->input_bwidth, NB ) )
374 : {
375 0 : test();
376 0 : test();
377 0 : test();
378 0 : test();
379 0 : test();
380 0 : test();
381 0 : test();
382 0 : test();
383 0 : test();
384 0 : test();
385 0 : test();
386 0 : if ( ( ( LT_16( add_o( mean_voi3, corr_shift, &Overflow ), add( 22282 /* 0.68 in Q15 */, mean_voi3_offset ) ) ) && /* normalized correlation low */
387 0 : ( LT_16( add_o( st_fx->voicing_fx[2], corr_shift, &Overflow ), 25887 /* 0.79 in Q15 */ ) ) && /* normalized correlation low on look-ahead - onset detection */
388 0 : ( LT_32( ee[0], 640 /* 10.0f in Q6 */ ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy concentrated in high frequencies provided that some energy is present in HF... */
389 0 : ( LT_32( ee[1], ee1_th ) ) && ( GT_32( hp_E[1], E_min_th ) ) && /* ... biased towards look-ahead to detect onsets */
390 0 : ( tmp_offset_flag == 0 ) && /* Take care of voiced offsets */
391 : /*( st_fx->music_hysteresis_fx == 0 ) &&*/ /* ... and in segment after AUDIO frames */
392 0 : ( LE_32( dE1, 237568 /* 29.0f in Q13 */ ) ) && /* Avoid on sharp energy spikes */
393 0 : ( LE_32( st_fx->old_dE1_fx, 237568 /* 29.0f in Q13 */ ) ) && /* + one frame hysteresis */
394 0 : ( st_fx->spike_hyst < 0 ) ) || /* Avoid after sharp energy spikes followed by decay (e.g. castanets) */
395 : flag_low_relE ) /* low relative frame energy (only for SC-VBR) */
396 : {
397 0 : coder_type = UNVOICED;
398 0 : move16();
399 : }
400 : }
401 : ELSE
402 : {
403 3100 : test();
404 3100 : test();
405 3100 : test();
406 3100 : test();
407 3100 : test();
408 3100 : test();
409 3100 : test();
410 3100 : test();
411 3100 : test();
412 3100 : test();
413 3100 : test();
414 3100 : test();
415 4048 : if ( ( ( LT_16( add_o( mean_voi3, corr_shift, &Overflow ), add( 22774 /* 0.695f in Q15 */, mean_voi3_offset ) ) ) && /* normalized correlation low */
416 1792 : ( LT_16( add_sat( st_fx->voicing_fx[2], corr_shift ), 25887 /* 0.8 in Q15 */ ) ) && /* normalized correlation low on look-ahead - onset detection */
417 1376 : ( LT_32( ee[0], 397 /* 6.2f in Q6 */ ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy concentrated in high frequencies provided that some energy is present in HF... */
418 480 : ( LT_32( ee[1], 397 /* 6.2f in Q6 */ ) ) && ( GT_32( hp_E[1], E_min_th ) ) && /* ... biased towards look-ahead to detect onsets */
419 158 : ( tmp_offset_flag == 0 ) && /* Take care of voiced offsets */
420 : /*( st_fx->music_hysteresis_fx == 0 ) && */ /* ... and in segment after AUDIO frames */
421 307 : ( LE_32( dE1, 245760 /* 30.0f in Q13 */ ) ) && /* Avoid on sharp energy spikes */
422 149 : ( LE_32( st_fx->old_dE1_fx, 245760 /* 30.0f in Q13 */ ) ) && /* + one frame hysteresis */
423 139 : ( st_fx->spike_hyst < 0 ) ) /* Avoid after sharp energy spikes followed by decay (e.g. castanets) */
424 2962 : || ( flag_low_relE && ( LE_32( st_fx->old_dE1_fx, 245760 /* 30.0f in Q13 */ ) ) ) ) /* low relative frame energy (only for SC-VBR) */
425 : {
426 138 : coder_type = UNVOICED;
427 138 : move16();
428 : }
429 : }
430 :
431 : /*-----------------------------------------------------------------*
432 : * Decision about VC
433 : *-----------------------------------------------------------------*/
434 3100 : if ( st_fx->Opt_SC_VBR )
435 : {
436 0 : hSC_VBR->set_ppp_generic = 0;
437 : }
438 3100 : move16();
439 :
440 3100 : test();
441 3100 : test();
442 3100 : IF( EQ_16( st_fx->localVAD, 1 ) && EQ_16( coder_type, GENERIC ) && NE_16( last_core_orig, AMR_WB_CORE ) )
443 : {
444 2856 : dpit1 = abs_s( sub( T_op_fr[1], T_op_fr[0] ) ); // Q6
445 2856 : dpit2 = abs_s( sub( T_op_fr[2], T_op_fr[1] ) ); // Q6
446 2856 : dpit3 = abs_s( sub( T_op_fr[3], T_op_fr[2] ) ); // Q6
447 :
448 2856 : test();
449 2856 : test();
450 2856 : test();
451 2856 : test();
452 2856 : test();
453 2856 : test();
454 2856 : test();
455 2856 : test();
456 2856 : test();
457 2856 : IF( ( GT_16( voicing_fr[0], 19825 /* 0.605 in Q15 */ ) ) && /* normalized correlation high in 1st sf. */
458 : ( GT_16( voicing_fr[1], 19825 /* 0.605 in Q15 */ ) ) && /* normalized correlation high in 2st sf. */
459 : ( GT_16( voicing_fr[2], 19825 /* 0.605 in Q15 */ ) ) && /* normalized correlation high in 3st sf. */
460 : ( GT_16( voicing_fr[3], 19825 /* 0.605 in Q15 */ ) ) && /* normalized correlation high in 4st sf. */
461 : ( GT_32( mean_ee, 256 /* 4.0f in Q6 */ ) ) && /* energy concentrated in low frequencies */
462 : ( LT_16( dpit1, 3 << 6 ) ) &&
463 : ( LT_16( dpit2, 3 << 6 ) ) &&
464 : ( LT_16( dpit3, 3 << 6 ) ) )
465 : {
466 1380 : coder_type = VOICED;
467 1380 : move16();
468 : }
469 1476 : ELSE IF( st_fx->Opt_SC_VBR && EQ_16( st_fx->input_bwidth, NB ) && LT_16( vadnoise, 20 << 8 ) )
470 : {
471 0 : test();
472 0 : test();
473 0 : test();
474 0 : test();
475 0 : test();
476 0 : test();
477 0 : test();
478 0 : IF( GT_16( voicing_fr[0], 8192 /* 0.25 in Q15 */ ) && /* normalized correlation high in 1st sf. */
479 : ( GT_16( voicing_fr[1], 8192 /* 0.25 in Q15 */ ) ) && /* normalized correlation high in 2st sf. */
480 : ( GT_16( voicing_fr[2], 8192 /* 0.25 in Q15 */ ) ) && /* normalized correlation high in 3st sf. */
481 : ( GT_16( voicing_fr[3], 8192 /* 0.25 in Q15 */ ) ) && /* normalized correlation high in 4st sf. */
482 : ( GT_32( mean_ee, 64 ) ) && /* energy concentrated in low frequencies */
483 : ( LT_16( dpit1, 5 << 6 ) ) &&
484 : ( LT_16( dpit2, 5 << 6 ) ) &&
485 : ( LT_16( dpit3, 5 << 6 ) ) )
486 : {
487 0 : hSC_VBR->set_ppp_generic = 1;
488 0 : move16();
489 0 : coder_type = VOICED;
490 0 : move16();
491 : }
492 : }
493 :
494 : /* set VOICED mode for frames with very stable pitch and high correlation
495 : and avoid to switch to AUDIO/MUSIC later */
496 2856 : voicing_m = mac_r( L_mac( L_mac( L_mult( voicing_fr[3], 8192 /* 0.25 in Q15 */ ), voicing_fr[2], 8192 /* 0.25 in Q15 */ ), voicing_fr[1], 8192 /* 0.25 in Q15 */ ), voicing_fr[0], 8192 /* 0.25 in Q15 */ );
497 2856 : test();
498 2856 : test();
499 2856 : test();
500 2856 : test();
501 2856 : test();
502 2856 : IF( *flag_spitch || ( LE_16( dpit1, 3 << 6 ) && LE_16( dpit2, 3 << 6 ) && LE_16( dpit3, 3 << 6 ) &&
503 : GT_16( voicing_m, 31130 /* 0.95f in Q15 */ ) && GT_16( st_fx->voicing_sm_fx, 31785 /* 0.97f in Q15 */ ) ) )
504 : {
505 46 : coder_type = VOICED;
506 46 : move16();
507 46 : *flag_spitch = 1;
508 46 : move16(); /*to avoid switch to AUDIO/MUSIC later*/
509 : }
510 : }
511 :
512 : /*-----------------------------------------------------------------*
513 : * Channel-aware mode - set RF mode and total bitrate
514 : *-----------------------------------------------------------------*/
515 :
516 3100 : st_fx->rf_mode = st_fx->Opt_RF_ON; /* Q0 */
517 3100 : move16();
518 :
519 3100 : IF( EQ_16( coder_type, GENERIC ) )
520 : {
521 1582 : test();
522 1582 : test();
523 1582 : test();
524 1582 : test();
525 1582 : IF( ( LT_16( voicing_fr[0], 6554 /* 0.2f in Q15 */ ) ) && /* normalized correlation high in 2st sf. */
526 : ( LT_16( voicing_fr[1], 6554 /* 0.2f in Q15 */ ) ) && /* normalized correlation high in 2st sf. */
527 : ( LT_16( voicing_fr[2], 6554 /* 0.2f in Q15 */ ) ) && /* normalized correlation high in 3rd sf. */
528 : ( LT_16( voicing_fr[3], 6554 /* 0.2f in Q15 */ ) ) && /* normalized correlation high in 4th sf. */
529 : ( GT_16( vadnoise, 25 << 8 ) ) ) /* when speech is clean */
530 :
531 : {
532 0 : st_fx->rf_mode = 0;
533 0 : move16();
534 : /* Current frame cannot be compressed to pack the partial redundancy;*/
535 :
536 0 : if ( st_fx->rf_mode != st_fx->Opt_RF_ON )
537 : {
538 0 : core_coder_mode_switch_fx( st_fx, st_fx->last_total_brate, 0, shift );
539 : }
540 : }
541 : }
542 :
543 : /*-----------------------------------------------------------------*
544 : * Updates
545 : *-----------------------------------------------------------------*/
546 :
547 : /* update spike hysteresis parameters */
548 3100 : test();
549 3100 : if ( st_fx->spike_hyst >= 0 && LT_16( st_fx->spike_hyst, 2 ) )
550 : {
551 5 : st_fx->spike_hyst = add( st_fx->spike_hyst, 1 ); /* Q0 */
552 : }
553 :
554 : /* reset spike hysteresis */
555 3100 : test();
556 3100 : test();
557 3100 : test();
558 3140 : if ( ( GT_16( st_fx->spike_hyst, 1 ) ) &&
559 77 : ( GT_16( dE3, 5 << 8 ) || /* energy increases */
560 49 : ( GT_16( relE, -3328 ) && ( GT_16( add_sat( mean_voi3, corr_shift ), 22774 ) ) ) ) ) /* normalized correlation is high */
561 : {
562 4 : st_fx->spike_hyst = -1;
563 4 : move16();
564 : }
565 :
566 : /* update tilt parameters */
567 3100 : st_fx->ee_old_fx = ee[1];
568 3100 : move32(); /*Q6*/
569 3100 : st_fx->old_dE1_fx = dE1;
570 3100 : move32(); /*Q13*/
571 :
572 : /* save the raw coder_type for various modules later in the codec (the reason is that e.g. UNVOICED is lost at higher rates) */
573 3100 : st_fx->coder_type_raw = coder_type;
574 3100 : move16();
575 :
576 3100 : return coder_type;
577 : }
578 :
579 : /*-------------------------------------------------------------------*
580 : * find_uv()
581 : *
582 : * Decision about coder type
583 : *-------------------------------------------------------------------*/
584 1111210 : Word16 find_uv_ivas_fx( /* o : coding type */
585 : Encoder_State *st_fx, /* i/o: encoder state structure */
586 : const Word16 *T_op_fr, /* i : pointer to adjusted fractional pitch (4 val.) Q6*/
587 : const Word16 *voicing_fr, /* i : refined correlation for each subframes Q15*/
588 : const Word16 *speech, /* i : pointer to speech signal for E computation Q_new*/
589 : const Word32 *ee, /* i : lf/hf Energy ratio for present frame Q6*/
590 : Word32 *dE1X, /* o : sudden energy increase for S/M classifier Q13*/
591 : const Word16 corr_shift, /* i : normalized correlation correction in noise Q15*/
592 : const Word16 relE, /* i : relative frame energy Q8*/
593 : const Word16 Etot, /* i : total energy Q8*/
594 : const Word32 hp_E[], /* i : energy in HF q_hp_E*/
595 : Word16 *flag_spitch, /* i/o: flag to indicate very short stable pitch and high correlation Q0*/
596 : const Word16 last_core_orig, /* i : original last core Q0*/
597 : STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */
598 : const Word16 Q_new,
599 : const Word16 q_hp_E )
600 : {
601 : Word16 coder_type, i;
602 : Word32 mean_ee, dE1, fac_32;
603 : const Word16 *pt_speech;
604 : Word32 L_tmp, enr_ssf[2 * NB_SSF + 2 * NB_SSF + 2], E_min_th;
605 : Word16 dE2;
606 : Word16 ind_deltaMax, tmp_offset_flag;
607 : Word32 Ltmp0, *pt_enr_ssf, *pt_enr_ssf1, dE2_th;
608 : Word16 exp0, exp1;
609 : Word16 wtmp0, wtmp1;
610 : Word16 fac, mean_voi3, dE3;
611 : Word16 relE_thres;
612 : Word16 mean_voi3_offset;
613 : Word16 voicing_m, dpit1, dpit2, dpit3;
614 : Word16 ee0_th, ee1_th, voi_th, nb_cond, flag_low_relE;
615 1111210 : NOISE_EST_HANDLE hNoiseEst = st_fx->hNoiseEst;
616 1111210 : SC_VBR_ENC_HANDLE hSC_VBR = st_fx->hSC_VBR;
617 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
618 1111210 : Flag Overflow = 0;
619 1111210 : move32();
620 : #endif
621 : Word16 Last_Resort;
622 : Word16 vadnoise;
623 :
624 1111210 : IF( hSC_VBR != NULL )
625 : {
626 0 : Last_Resort = hSC_VBR->Last_Resort; /* Q0 */
627 0 : move16();
628 0 : vadnoise = hSC_VBR->vadnoise_fx; /* Q8 */
629 0 : move16();
630 : }
631 : ELSE
632 : {
633 1111210 : Last_Resort = 0;
634 1111210 : move16();
635 1111210 : vadnoise = 0;
636 1111210 : move16();
637 : }
638 :
639 : /*-----------------------------------------------------------------*
640 : * Detect sudden energy increases to catch voice and music
641 : * temporal events (dE1)
642 : *
643 : * - Find maximum energy per short subblocks.
644 : * Two subblock sets are used shifted by half the subblock length
645 : * - Find maximum energy ratio between adjacent subblocks
646 : *-----------------------------------------------------------------*/
647 :
648 : /* Find maximum energy per short subblocks */
649 1111210 : pt_speech = speech - SSF;
650 1111210 : pt_enr_ssf = enr_ssf + 2 * NB_SSF;
651 21112990 : FOR( i = 0; i < 2 * ( NB_SSF + 1 ); i++ )
652 : {
653 20001780 : emaximum_fx( Q_new, pt_speech, SSF, pt_enr_ssf );
654 20001780 : pt_speech += ( SSF / 2 );
655 20001780 : pt_enr_ssf++;
656 : }
657 :
658 1111210 : dE1 = 0;
659 1111210 : move16();
660 1111210 : ind_deltaMax = 0;
661 1111210 : move16();
662 1111210 : pt_enr_ssf = enr_ssf + 2 * NB_SSF;
663 1111210 : pt_enr_ssf1 = pt_enr_ssf + 2;
664 :
665 : /* Test on energy increase between adjacent sub-subframes */
666 1111210 : exp1 = 0;
667 1111210 : move16();
668 18890570 : FOR( i = 0; i < 2 * NB_SSF; i++ )
669 : {
670 : /*fac = *pt_enr_ssf1 / (*pt_enr_ssf + 1);*/
671 17779360 : Ltmp0 = L_max( *pt_enr_ssf, 1 );
672 17779360 : exp0 = norm_l( Ltmp0 );
673 17779360 : wtmp0 = extract_h( L_shl( Ltmp0, exp0 ) );
674 17779360 : exp1 = sub( norm_l( *pt_enr_ssf1 ), 1 );
675 17779360 : wtmp1 = extract_h( L_shl( *pt_enr_ssf1, exp1 ) );
676 17779360 : fac = div_s( wtmp1, wtmp0 );
677 17779360 : fac_32 = L_shr_o( L_deposit_l( fac ), add( sub( exp1, exp0 ), 15 - 13 ), &Overflow ); /* fac32 in Q13*/
678 :
679 17779360 : if ( GT_32( fac_32, dE1 ) )
680 : {
681 3433389 : ind_deltaMax = i;
682 3433389 : move16();
683 : }
684 :
685 17779360 : dE1 = L_max( dE1, fac_32 ); /* Q13 */
686 :
687 17779360 : pt_enr_ssf++;
688 17779360 : pt_enr_ssf1++;
689 : }
690 :
691 1111210 : IF( hStereoClassif != NULL )
692 : {
693 760867 : IF( st_fx->idchan == 0 )
694 : {
695 410255 : hStereoClassif->dE1_ch1_fx = dE1; /* Q13 */
696 410255 : move32();
697 410255 : hStereoClassif->dE1_ch1_e = 31 - Q13;
698 410255 : move16();
699 : }
700 : ELSE
701 : {
702 350612 : hStereoClassif->dE1_ch2_fx = dE1; /* Q13 */
703 350612 : move32();
704 350612 : hStereoClassif->dE1_ch2_e = 31 - Q13;
705 350612 : move16();
706 : }
707 : }
708 :
709 1111210 : if ( dE1X != NULL )
710 : {
711 1111210 : *dE1X = dE1; /* Q13 */
712 1111210 : move32();
713 : }
714 :
715 : /*-----------------------------------------------------------------*
716 : * Average spectral tilt
717 : * Average voicing (normalized correlation)
718 : *-----------------------------------------------------------------*/
719 :
720 : /*mean_ee = 1.0f/3.0f * (st->ee_old + ee[0] + ee[1]); */ /* coefficients take into account the position of the window */
721 1111210 : mean_ee = L_add_o( L_add_o( st_fx->ee_old_fx, ee[0], &Overflow ), ee[1], &Overflow ); /* Q6 */
722 1111210 : mean_ee = Mult_32_16( mean_ee, 10923 ); /*Q6*/
723 :
724 : /* mean_voi3 = 1.0f/3.0f * (voicing[0] + voicing[1] + voicing[2]);*/
725 1111210 : Ltmp0 = L_mult( st_fx->voicing_fx[0], 10923 /* 1/3 in Q15 */ ); /* Q31 */
726 1111210 : Ltmp0 = L_mac( Ltmp0, st_fx->voicing_fx[1], 10923 /* 1/3 in Q15 */ ); /* Q31 */
727 1111210 : mean_voi3 = mac_r_sat( Ltmp0, st_fx->voicing_fx[2], 10923 /* 1/3 in Q15 */ ); /*Q15*/
728 : /*-----------------------------------------------------------------*
729 : * Total frame energy difference (dE3)
730 : *-----------------------------------------------------------------*/
731 :
732 1111210 : dE3 = sub( Etot, extract_h( hNoiseEst->Etot_last_32fx ) ); /*Q8*/
733 :
734 : /*-----------------------------------------------------------------*
735 : * Energy decrease after spike (dE2)
736 : *-----------------------------------------------------------------*/
737 :
738 : /* set different thresholds and conditions for NB and WB input */
739 1111210 : dE2_th = 30 << 10;
740 1111210 : move32();
741 1111210 : nb_cond = 1;
742 1111210 : move16(); /* no additional condition for WB input */
743 1111210 : IF( st_fx->input_bwidth == NB )
744 : {
745 3835 : dE2_th = 21 << 10;
746 3835 : move32();
747 3835 : if ( GE_16( add_o( mean_voi3, corr_shift, &Overflow ), 22282 ) ) /*( mean_voi3 + corr_shift ) >= 0.68f*/
748 : {
749 2624 : nb_cond = 0;
750 2624 : move16();
751 : }
752 : }
753 :
754 : /* calcualte maximum energy decrease */
755 1111210 : dE2 = 0;
756 1111210 : move16(); /* Test on energy decrease after an energy spike */
757 1111210 : pt_enr_ssf = enr_ssf + 2 * NB_SSF;
758 :
759 1111210 : test();
760 1111210 : IF( GT_32( dE1, 30 << 13 ) && nb_cond ) /*>30 Q13*/
761 : {
762 18749 : IF( LT_16( sub( shl( NB_SSF, 1 ), ind_deltaMax ), L_ENR ) )
763 : {
764 7912 : st_fx->old_ind_deltaMax = ind_deltaMax; /* Q0 */
765 7912 : move16();
766 7912 : Copy32( pt_enr_ssf, st_fx->old_enr_ssf_fx, 2 * NB_SSF ); /* Qx */
767 : }
768 : ELSE
769 : {
770 10837 : st_fx->old_ind_deltaMax = -1;
771 10837 : move16();
772 10837 : dE2 = find_ener_decrease_fx( ind_deltaMax, pt_enr_ssf ); /*Q10*/
773 :
774 10837 : if ( GT_32( dE2, dE2_th ) )
775 : {
776 149 : st_fx->spike_hyst = 0;
777 149 : move16();
778 : }
779 : }
780 : }
781 : ELSE
782 : {
783 1092461 : IF( st_fx->old_ind_deltaMax >= 0 )
784 : {
785 8930 : Copy32( st_fx->old_enr_ssf_fx, enr_ssf, 2 * NB_SSF ); /* Qx */
786 8930 : dE2 = find_ener_decrease_fx( st_fx->old_ind_deltaMax, enr_ssf ); /* Q10 */
787 :
788 8930 : if ( GT_32( dE2, dE2_th ) )
789 : {
790 107 : st_fx->spike_hyst = 1;
791 107 : move16();
792 : }
793 : }
794 :
795 1092461 : st_fx->old_ind_deltaMax = -1;
796 1092461 : move16();
797 : }
798 :
799 : /*-----------------------------------------------------------------*
800 : * Detection of voiced offsets (tmp_offset_flag)
801 : *-----------------------------------------------------------------*/
802 :
803 1111210 : tmp_offset_flag = 1;
804 1111210 : move16();
805 :
806 1111210 : IF( st_fx->input_bwidth != NB )
807 : {
808 1107375 : ee0_th = 154; /*2.4 in Q6 */
809 1107375 : move16();
810 1107375 : voi_th = 24248; /*0.74f Q15 */
811 1107375 : move16();
812 : }
813 : ELSE
814 : {
815 3835 : ee0_th = 627; /*9.8f Q6 */
816 3835 : move16();
817 3835 : voi_th = 24904; /*0.76f Q15*/
818 3835 : move16();
819 : }
820 :
821 1111210 : E_min_th = L_shl( E_MIN_IVAS_FX_Q31, sub( q_hp_E, Q31 ) );
822 :
823 1111210 : test();
824 1111210 : test();
825 1111210 : test();
826 1111210 : IF( ( EQ_16( st_fx->last_coder_type_raw, UNVOICED ) ) || /* previous frame was unvoiced */
827 : ( ( LT_32( ee[0], ee0_th ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy is concentrated in high frequencies provided that some energy is present in HF */
828 : ( LT_16( add_o( st_fx->voicing_fx[0], corr_shift, &Overflow ), voi_th ) ) ) ) /* normalized correlation is low */
829 : {
830 129102 : tmp_offset_flag = 0;
831 129102 : move16();
832 : }
833 :
834 : /*-----------------------------------------------------------------*
835 : * Decision about UC
836 : *-----------------------------------------------------------------*/
837 :
838 : /* SC-VBR - set additional parameters and thresholds for SC-VBR */
839 1111210 : mean_voi3_offset = 0;
840 1111210 : move16();
841 1111210 : flag_low_relE = 0;
842 1111210 : move16();
843 1111210 : ee1_th = 608; /*9.5 Q6*/
844 1111210 : move16();
845 1111210 : test();
846 1111210 : test();
847 1111210 : IF( st_fx->Opt_SC_VBR || ( EQ_16( st_fx->idchan, 1 ) && EQ_16( st_fx->element_mode, IVAS_CPE_TD ) ) ) /* Allow the low energy flag for the secondary channel */
848 : {
849 3827 : ee1_th = 544; /*8.5f Q6*/
850 3827 : move16();
851 :
852 : /* SC-VBR - determine the threshold on relative energy as a function of lp_noise */
853 3827 : IF( st_fx->input_bwidth != NB )
854 : {
855 : /*relE_thres = 0.700f * st->lp_noise - 33.5f; (lp_noise in Q8, constant Q8<<16) */
856 3827 : L_tmp = L_mac( -562036736 /* 33.5f in Q24 */, 22938 /* 0.7 in Q15 */, st_fx->lp_noise_fx ); // Q24
857 3827 : IF( Last_Resort == 0 )
858 : {
859 : /*relE_thres = 0.650f * st->lp_noise - 33.5f; (lp_noise in Q8, constant Q8<<16)*/
860 3827 : L_tmp = L_mac( -562036736 /* 33.5f in Q24 */, 21299 /* 0.650f in Q15 */, st_fx->lp_noise_fx ); // Q24
861 : }
862 3827 : relE_thres = round_fx( L_tmp );
863 : }
864 : ELSE
865 : {
866 :
867 : /*relE_thres = 0.60f * st->lp_noise - 28.2f; (lp_noise in Q8, constant Q8<<16)*/
868 0 : L_tmp = L_mac( -473117491 /* 28.2f in Q24 */, 19661 /* 0.6f in Q15 */, st_fx->lp_noise_fx ); // Q24
869 0 : relE_thres = round_fx( L_tmp );
870 : }
871 3827 : relE_thres = s_max( relE_thres, -6400 /* -25.0f in Q8 */ ); /* Q8 */
872 :
873 : /* SC-VBR = set flag on low relative energy */
874 3827 : if ( LT_16( relE, relE_thres ) )
875 : {
876 333 : flag_low_relE = 1;
877 333 : move16();
878 : }
879 :
880 : /* SC-VBR - correction of voicing threshold for NB inputs (important only in noisy conditions) */
881 3827 : test();
882 3827 : if ( st_fx->input_bwidth == NB && LT_16( vadnoise, 20 << 8 ) ) /* vadnoise in Q8, constant Q0<<8 */
883 : {
884 0 : mean_voi3_offset = 1638; /*0.05f Q15*/
885 0 : move16();
886 : }
887 : }
888 :
889 : /* make decision whether frame is unvoiced */
890 1111210 : coder_type = GENERIC;
891 1111210 : move16();
892 1111210 : IF( st_fx->input_bwidth == NB )
893 : {
894 3835 : test();
895 3835 : test();
896 3835 : test();
897 3835 : test();
898 3835 : test();
899 3835 : test();
900 3835 : test();
901 3835 : test();
902 3835 : test();
903 3835 : test();
904 3835 : IF( ( ( LT_16( add_o( mean_voi3, corr_shift, &Overflow ), add( 22282 /* 0.68 in Q15 */, mean_voi3_offset ) ) ) && /* normalized correlation low */
905 : ( LT_16( add_o( st_fx->voicing_fx[2], corr_shift, &Overflow ), 25887 /* 0.79 in Q15 */ ) ) && /* normalized correlation low on look-ahead - onset detection */
906 : ( LT_32( ee[0], 640 /* 10.0f in Q6 */ ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy concentrated in high frequencies provided that some energy is present in HF... */
907 : ( LT_32( ee[1], ee1_th ) ) && ( GT_32( hp_E[1], E_min_th ) ) && /* ... biased towards look-ahead to detect onsets */
908 : ( tmp_offset_flag == 0 ) && /* Take care of voiced offsets */
909 : /*( st_fx->music_hysteresis_fx == 0 ) &&*/ /* ... and in segment after AUDIO frames */
910 : ( LE_32( dE1, 237568 /* 29.0f in Q13 */ ) ) && /* Avoid on sharp energy spikes */
911 : ( LE_32( st_fx->old_dE1_fx, 237568 /* 29.0f in Q13 */ ) ) && /* + one frame hysteresis */
912 : ( st_fx->spike_hyst < 0 ) ) || /* Avoid after sharp energy spikes followed by decay (e.g. castanets) */
913 : flag_low_relE ) /* low relative frame energy (only for SC-VBR) */
914 : {
915 117 : coder_type = UNVOICED;
916 117 : move16();
917 : }
918 : }
919 : ELSE
920 : {
921 1107375 : test();
922 1107375 : test();
923 1107375 : test();
924 1107375 : test();
925 1107375 : test();
926 1107375 : test();
927 1107375 : test();
928 1107375 : test();
929 1107375 : test();
930 1695047 : if ( ( ( LT_16( add_o( mean_voi3, corr_shift, &Overflow ), add( 22774 /* 0.695f in Q15 */, mean_voi3_offset ) ) ) && /* normalized correlation low */
931 : /*( LT_16( add_o( st_fx->voicing_fx[2], corr_shift, &Overflow ), 25887 ) ) && */ /* normalized correlation low on look-ahead - onset detection */
932 963801 : ( LT_32( ee[0], 397 /* 6.2f in Q6 */ ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy concentrated in high frequencies provided that some energy is present in HF... */
933 233437 : ( LT_32( ee[1], 397 /* 6.2f in Q16 */ ) ) && ( GT_32( hp_E[1], E_min_th ) ) && /* ... biased towards look-ahead to detect onsets */
934 83573 : ( tmp_offset_flag == 0 ) && /* Take care of voiced offsets */
935 : /*( st_fx->music_hysteresis_fx == 0 ) && */ /* ... and in segment after AUDIO frames */
936 163837 : ( LE_32( dE1, 245760 /* 30.0f in Q13 */ ) ) && /* Avoid on sharp energy spikes */
937 80264 : ( LE_32( st_fx->old_dE1_fx, 245760 /* 30.0f in Q13 */ ) ) && /* + one frame hysteresis */
938 77897 : ( st_fx->spike_hyst < 0 ) ) /* Avoid after sharp energy spikes followed by decay (e.g. castanets) */
939 1029774 : || ( flag_low_relE && ( LE_32( st_fx->old_dE1_fx, 245760 /* 30.0f in Q13 */ ) ) ) ) /* low relative frame energy (only for SC-VBR) */
940 : {
941 77871 : coder_type = UNVOICED;
942 77871 : move16();
943 : }
944 : }
945 :
946 : /*-----------------------------------------------------------------*
947 : * Decision about VC
948 : *-----------------------------------------------------------------*/
949 1111210 : if ( st_fx->Opt_SC_VBR )
950 : {
951 0 : hSC_VBR->set_ppp_generic = 0;
952 : }
953 1111210 : move16();
954 :
955 1111210 : test();
956 1111210 : test();
957 1111210 : IF( EQ_16( st_fx->localVAD, 1 ) && EQ_16( coder_type, GENERIC ) && NE_16( last_core_orig, AMR_WB_CORE ) )
958 : {
959 871859 : dpit1 = abs_s( sub( T_op_fr[1], T_op_fr[0] ) ); /* Q6 */
960 871859 : dpit2 = abs_s( sub( T_op_fr[2], T_op_fr[1] ) ); /* Q6 */
961 871859 : dpit3 = abs_s( sub( T_op_fr[3], T_op_fr[2] ) ); /* Q6 */
962 :
963 871859 : test();
964 871859 : test();
965 871859 : test();
966 871859 : test();
967 871859 : test();
968 871859 : test();
969 871859 : test();
970 871859 : test();
971 871859 : test();
972 871859 : IF( ( GT_16( voicing_fr[0], 19825 /* 0.605 in Q15 */ ) ) && /* normalized correlation high in 1st sf. */
973 : ( GT_16( voicing_fr[1], 19825 /* 0.605 in Q15 */ ) ) && /* normalized correlation high in 2st sf. */
974 : ( GT_16( voicing_fr[2], 19825 /* 0.605 in Q15 */ ) ) && /* normalized correlation high in 3st sf. */
975 : ( GT_16( voicing_fr[3], 19825 /* 0.605 in Q15 */ ) ) && /* normalized correlation high in 4st sf. */
976 : ( GT_32( mean_ee, 256 /* 4.0f in Q6 */ ) ) && /* energy concentrated in low frequencies */
977 : ( LT_16( dpit1, 3 << 6 ) ) &&
978 : ( LT_16( dpit2, 3 << 6 ) ) &&
979 : ( LT_16( dpit3, 3 << 6 ) ) )
980 : {
981 362502 : coder_type = VOICED;
982 362502 : move16();
983 : }
984 509357 : ELSE IF( st_fx->Opt_SC_VBR && st_fx->input_bwidth == NB && LT_16( vadnoise, 20 << 8 ) )
985 : {
986 0 : test();
987 0 : test();
988 0 : test();
989 0 : test();
990 0 : test();
991 0 : test();
992 0 : test();
993 0 : IF( GT_16( voicing_fr[0], 8192 /* 0.25 in Q15 */ ) && /* normalized correlation high in 1st sf. */
994 : ( GT_16( voicing_fr[1], 8192 /* 0.25 in Q15 */ ) ) && /* normalized correlation high in 2st sf. */
995 : ( GT_16( voicing_fr[2], 8192 /* 0.25 in Q15 */ ) ) && /* normalized correlation high in 3st sf. */
996 : ( GT_16( voicing_fr[3], 8192 /* 0.25 in Q15 */ ) ) && /* normalized correlation high in 4st sf. */
997 : ( GT_32( mean_ee, 64 /* 1.0f in Q6 */ ) ) && /* energy concentrated in low frequencies */
998 : ( LT_16( dpit1, 5 << 6 ) ) &&
999 : ( LT_16( dpit2, 5 << 6 ) ) &&
1000 : ( LT_16( dpit3, 5 << 6 ) ) )
1001 : {
1002 0 : hSC_VBR->set_ppp_generic = 1;
1003 0 : move16();
1004 0 : coder_type = VOICED;
1005 0 : move16();
1006 : }
1007 : }
1008 :
1009 : /* set VOICED mode for frames with very stable pitch and high correlation
1010 : and avoid to switch to AUDIO/MUSIC later */
1011 871859 : voicing_m = mac_r( L_mac( L_mac( L_mult( voicing_fr[3], 8192 /* 0.25 in Q15 */ ), voicing_fr[2], 8192 /* 0.25 in Q15 */ ), voicing_fr[1], 8192 /* 0.25 in Q15 */ ), voicing_fr[0], 8192 /* 0.25 in Q15 */ ); /* Q15 */
1012 871859 : test();
1013 871859 : test();
1014 871859 : test();
1015 871859 : test();
1016 871859 : test();
1017 871859 : IF( *flag_spitch || ( LE_16( dpit1, 3 << 6 ) && LE_16( dpit2, 3 << 6 ) && LE_16( dpit3, 3 << 6 ) &&
1018 : GT_16( voicing_m, 31130 /* 0.95f in Q15 */ ) && GT_16( st_fx->voicing_sm_fx, 31785 /* 0.97f in Q15 */ ) ) )
1019 : {
1020 37440 : coder_type = VOICED;
1021 37440 : move16();
1022 37440 : *flag_spitch = 1;
1023 37440 : move16(); /*to avoid switch to AUDIO/MUSIC later*/
1024 : }
1025 : }
1026 :
1027 : /*-----------------------------------------------------------------*
1028 : * Channel-aware mode - set RF mode and total bitrate
1029 : *-----------------------------------------------------------------*/
1030 :
1031 1111210 : st_fx->rf_mode = st_fx->Opt_RF_ON; /* Q0 */
1032 1111210 : move16();
1033 :
1034 1111210 : IF( EQ_16( coder_type, GENERIC ) )
1035 : {
1036 665642 : test();
1037 665642 : test();
1038 665642 : test();
1039 665642 : test();
1040 665642 : IF( ( LT_16( voicing_fr[0], 6554 /* 0.2f in Q15 */ ) ) && /* normalized correlation high in 2st sf. */
1041 : ( LT_16( voicing_fr[1], 6554 /* 0.2f in Q15 */ ) ) && /* normalized correlation high in 2st sf. */
1042 : ( LT_16( voicing_fr[2], 6554 /* 0.2f in Q15 */ ) ) && /* normalized correlation high in 3rd sf. */
1043 : ( LT_16( voicing_fr[3], 6554 /* 0.2f in Q15 */ ) ) && /* normalized correlation high in 4th sf. */
1044 : ( GT_16( vadnoise, 25 << 8 ) ) ) /* when speech is clean */
1045 :
1046 : {
1047 0 : st_fx->rf_mode = 0;
1048 0 : move16();
1049 : /* Current frame cannot be compressed to pack the partial redundancy;*/
1050 :
1051 0 : IF( NE_16( st_fx->rf_mode, st_fx->Opt_RF_ON ) )
1052 : {
1053 0 : core_coder_mode_switch_ivas_fx( st_fx, st_fx->last_total_brate, 0 );
1054 : }
1055 : }
1056 : }
1057 :
1058 : /*-----------------------------------------------------------------*
1059 : * UNCLR classifier
1060 : *-----------------------------------------------------------------*/
1061 :
1062 1111210 : IF( hStereoClassif != NULL )
1063 : {
1064 760867 : test();
1065 760867 : test();
1066 760867 : test();
1067 760867 : test();
1068 760867 : test();
1069 760867 : IF( st_fx->element_mode > EVS_MONO && ( EQ_16( coder_type, GENERIC ) || EQ_16( coder_type, UNVOICED ) || coder_type == INACTIVE || st_fx->localVAD == 0 ) && LT_16( hStereoClassif->unclr_sw_enable_cnt[st_fx->idchan], MAX_UV_CNT ) )
1070 : {
1071 484253 : hStereoClassif->unclr_sw_enable_cnt[st_fx->idchan] = add( hStereoClassif->unclr_sw_enable_cnt[st_fx->idchan], 1 );
1072 484253 : move16();
1073 : }
1074 : ELSE
1075 : {
1076 276614 : hStereoClassif->unclr_sw_enable_cnt[st_fx->idchan] = 0;
1077 276614 : move16();
1078 : }
1079 : }
1080 :
1081 : /*-----------------------------------------------------------------*
1082 : * Updates
1083 : *-----------------------------------------------------------------*/
1084 :
1085 : /* update spike hysteresis parameters */
1086 1111210 : test();
1087 1111210 : if ( st_fx->spike_hyst >= 0 && LT_16( st_fx->spike_hyst, 2 ) )
1088 : {
1089 405 : st_fx->spike_hyst = add( st_fx->spike_hyst, 1 ); /* Q0 */
1090 405 : move16();
1091 : }
1092 :
1093 : /* reset spike hysteresis */
1094 1111210 : test();
1095 1111210 : test();
1096 1111210 : test();
1097 1114417 : if ( ( GT_16( st_fx->spike_hyst, 1 ) ) &&
1098 6272 : ( GT_16( dE3, 5 << 8 ) || /* energy increases */
1099 5211 : ( GT_16( relE, -3328 /* 13 in Q8 */ ) && ( GT_16( add_sat( mean_voi3, corr_shift ), 22774 /* 0.695 in Q15 */ ) ) ) ) ) /* normalized correlation is high */
1100 : {
1101 243 : st_fx->spike_hyst = -1;
1102 243 : move16();
1103 : }
1104 :
1105 : /* update tilt parameters */
1106 1111210 : st_fx->ee_old_fx = ee[1];
1107 1111210 : move32(); /*Q6*/
1108 1111210 : st_fx->old_dE1_fx = dE1;
1109 1111210 : move32(); /*Q13*/
1110 :
1111 : /* save the raw coder_type for various modules later in the codec (the reason is that e.g. UNVOICED is lost at higher rates) */
1112 1111210 : st_fx->coder_type_raw = coder_type; /* Q0 */
1113 1111210 : move16();
1114 :
1115 1111210 : return coder_type;
1116 : }
|