Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include <stdint.h>
6 : #include "options.h"
7 : #include "cnst.h"
8 : //#include "prot_fx.h"
9 : #include "rom_com.h"
10 : #include "prot_fx.h" /* Function prototypes */
11 : #include "prot_fx_enc.h" /* Function prototypes */
12 :
13 : /*-----------------------------------------------------------------*
14 : * Local constants
15 : *-----------------------------------------------------------------*/
16 :
17 : #define MDCT_SW_SIG_LINE_THR 840 /* 2.85f*LOG_10 in Q7 */ /* Significant spectral line threshold above Etot (dB) */
18 : #define MDCT_SW_SIG_PEAK_THR 9216 /* 36.0f in Q8 */ /* Significant peak threshold below Etot (dB) */
19 : #define MDCT_SW_HI_SPARSE_THR 8192 /* 0.25f in Q15*/ /* Max. ratio of significant spectral lines for the spectrum to be considered sparse */
20 : #define MDCT_SW_HI_ENER_LO_THR 1920 /* 7.5f in Q8 */ /* Hi band low energy threshold (dB) */
21 : #define MDCT_SW_SPARSE_THR 6554 /* 0.25f*0.8f in Q15*/
22 : #define MDCT_SW_1_VOICING_THR 29491 /* 0.9f in Q15*/ /* Voicing threshold */
23 : #define MDCT_SW_1_VOICING_THR2 23593 /* 0.9f*0.8f in Q15*/
24 : #define MDCT_SW_1_HI_ENER_LO_THR 3200 /* 12.5f in Q8 */ /* Hi band high energy threshold (dB) */
25 : #define MDCT_SW_1_SIG_HI_LEVEL_THR 7168 /* 28.0f in Q8 */ /* High signal level threshold above noise floor (dB) */
26 : #define MDCT_SW_1_SIG_LO_LEVEL_THR 5760 /* 22.5f in Q8 */ /* Low signal level threshold above noise floor (dB) */
27 : #define MDCT_SW_1_COR_THR 20480 /* 80.0f in Q8 */ /* Threshold on cor_map_sum to indicate strongly tonal signal */
28 : #define MDCT_SW_1_COR_THR2 16384 /* 80.0f*0.8f in Q8 */
29 : #define MDCT_SW_1_SPARSENESS_THR 21299 /* 0.65f in Q15*/ /* Threshold on spectrum sparseness */
30 : #define MDCT_SW_1_SPARSENESS_THR2 17039 /* 0.65f*0.8f in Q15*/
31 :
32 : #define MDCT_SW_2_VOICING_THR 19661 /* 0.6f in Q15*/ /* Voicing threshold */
33 : #define MDCT_SW_2_VOICING_THR2 15729 /* 0.6f*0.8f in Q15*/
34 : #define MDCT_SW_2_HI_ENER_LO_THR 2432 /* 9.5f in Q8 */ /* Hi band low energy threshold (dB) */
35 : #define MDCT_SW_2_SIG_HI_LEVEL_THR 4864 /* 19.0f in Q8 */ /* High signal level threshold above noise floor (dB) */
36 : #define MDCT_SW_2_SIG_LO_LEVEL_THR 6016 /* 23.5f in Q8 */ /* Low signal level threshold above noise floor (dB) */
37 : #define MDCT_SW_2_COR_THR 16000 /* 62.5f in Q8 */ /* Threshold on cor_map_sum to indicate strongly tonal signal */
38 : #define MDCT_SW_2_COR_THR2 12800 /* 62.5f*0.8f in Q8 */
39 : #define MDCT_SW_2_SPARSENESS_THR 13107 /* 0.4f in Q15*/ /* Threshold on spectrum sparseness */
40 : #define MDCT_SW_2_SPARSENESS_THR2 10486 /* 0.4f*0.8f in Q15*/
41 :
42 : /*--------------------------------------------------------------------------*
43 : * get_sparseness()
44 : *
45 : *
46 : *--------------------------------------------------------------------------*/
47 :
48 297 : static Word16 get_sparseness( /* Returns sparseness measure (Q15) */
49 : const Word16 Bin_E[], /* i : per bin energy dB Q7 */
50 : Word16 n, /* i : number of bins Q0 */
51 : Word16 thr /* i : peak threshold Q8 */
52 : )
53 : {
54 : Word16 num_max, i;
55 :
56 297 : thr = add( thr, mult( thr, 4958 ) ); /* Convert to 10*log() domain from 10*log10() domain, and also to Q7 */
57 :
58 297 : thr = s_max( thr, 384 ); /* 3.0 in Q7 */ /* Set an absolute minimum for close to silent signals */
59 :
60 297 : num_max = 0;
61 297 : move16();
62 :
63 37719 : FOR( i = 1; i < n - 1; ++i )
64 : {
65 37422 : IF( GT_16( Bin_E[i], s_max( s_max( Bin_E[i - 1], Bin_E[i + 1] ), thr ) ) )
66 : {
67 4697 : num_max = add( num_max, 1 );
68 : }
69 : }
70 :
71 297 : n = shr( sub( n, 2 ), 1 );
72 297 : return div_s( sub( n, num_max ), n ); // Q15
73 : }
74 : /*--------------------------------------------------------------------------*
75 : * get_mean_ener()
76 : *
77 : *
78 : *--------------------------------------------------------------------------*/
79 :
80 297 : static Word16 get_mean_ener( /* Returns mean energy in dB (Q8) */
81 : const Word32 enerBuffer[], /* i : CLDFB buffers enerBuffer_exp*/
82 : Word16 enerBuffer_exp, /* i : exponent of enerBuffer */
83 : Word16 n /* i : number of bins */
84 : )
85 : {
86 : Word32 L_tmp;
87 : Word16 i, shift, frac_nrg, exp_nrg;
88 :
89 297 : shift = sub( 14, norm_s( n ) );
90 297 : IF( LT_16( shl( 1, shift ), n ) )
91 : {
92 297 : shift = add( shift, 1 );
93 : }
94 :
95 297 : L_tmp = L_deposit_l( 0 );
96 7425 : FOR( i = 0; i < n; ++i )
97 : {
98 7128 : L_tmp = L_add( L_tmp, L_shr( enerBuffer[i], shift ) );
99 : }
100 297 : L_tmp = Mult_32_16( L_tmp, div_s( 1, n ) );
101 :
102 : /* Log energy */
103 297 : exp_nrg = norm_l( L_tmp );
104 297 : frac_nrg = Log2_norm_lc( L_shl( L_tmp, exp_nrg ) );
105 297 : exp_nrg = sub( 30, exp_nrg );
106 297 : exp_nrg = sub( add( exp_nrg, shift ), sub( 31, enerBuffer_exp ) );
107 297 : L_tmp = Mpy_32_16( exp_nrg, frac_nrg, 9864 ); /* log10(2) in Q15 */
108 :
109 297 : return round_fx( L_shl( L_tmp, 8 ) ); // Q8
110 : }
111 : /*--------------------------------------------------------------------------*
112 : * MDCT_selector_fx()
113 : *
114 : *
115 : *--------------------------------------------------------------------------*/
116 :
117 297 : void MDCT_selector_fx(
118 : Encoder_State *st, /* i/o: Encoder State */
119 : Word16 sp_floor, /* i : Noise floor estimate Q7 */
120 : const Word16 Etot, /* i : Total energy Q8 */
121 : const Word16 cor_map_sum, /* i : harmonicity factor Q8 */
122 : const Word32 enerBuffer[], /* i : CLDFB buffers enerBuffer_exp*/
123 : const Word16 enerBuffer_exp /* i : exponent of enerBuffer */
124 : )
125 : {
126 297 : TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc;
127 297 : test();
128 297 : IF( EQ_16( st->mdct_sw_enable, MODE1 ) || EQ_16( st->mdct_sw_enable, MODE2 ) )
129 : {
130 : Word16 hi_ener, frame_voicing, sparseness;
131 : Word16 peak_count;
132 : Word16 prefer_tcx, prefer_hq_core, switching_point, hi_sparse, sparse;
133 : Word16 lob_cldfb, hib_cldfb, lob_fft, hib_fft;
134 : Word16 i, tmp;
135 : Word16 sig_lo_level_thr, sig_hi_level_thr, cor_thr, cor_thr2, voicing_thr, voicing_thr2, sparseness_thr, sparseness_thr2, hi_ener_lo_thr;
136 : Word16 last_core;
137 :
138 297 : sp_floor = shl( sp_floor, 1 ); /* convert to Q8 */
139 :
140 297 : IF( ( st->bwidth == NB ) )
141 : {
142 0 : lob_cldfb = 3200 / 400;
143 0 : move16();
144 0 : hib_cldfb = 4000 / 400;
145 0 : move16();
146 0 : lob_fft = ( L_FFT / 2 ) / 2; /* 3.2 KHz */
147 0 : move16();
148 0 : hib_fft = ( 40 * ( L_FFT / 2 ) ) / 64; /* 4.0 KHz */
149 0 : move16();
150 : }
151 297 : ELSE IF( EQ_16( st->bwidth, WB ) )
152 : {
153 0 : lob_cldfb = 4800 / 400;
154 0 : move16();
155 0 : hib_cldfb = 8000 / 400;
156 0 : move16();
157 0 : lob_fft = 3 * L_FFT / 2 / 4; /* 4.8 KHz */
158 0 : move16();
159 0 : hib_fft = L_FFT / 2; /* 6.4 KHz (should be 8 KHz) */
160 0 : move16();
161 : }
162 : ELSE
163 : {
164 297 : lob_cldfb = 6400 / 400;
165 297 : move16();
166 297 : hib_cldfb = 16000 / 400;
167 297 : move16();
168 297 : if ( EQ_16( st->bwidth, FB ) )
169 : {
170 0 : hib_cldfb = 24000 / 400;
171 0 : move16();
172 : }
173 297 : lob_fft = L_FFT / 2; /* 6.4 KHz */
174 297 : move16();
175 297 : hib_fft = L_FFT / 2; /* 6.4 KHz (should be 8 KHz) */
176 297 : move16();
177 : }
178 :
179 : /* st->last_core is reset to TCX_20_CORE in init_acelp() => fix it here */
180 297 : last_core = st->last_core;
181 297 : move16();
182 297 : test();
183 297 : if ( EQ_16( st->last_codec_mode, MODE1 ) && EQ_16( last_core, TCX_20_CORE ) )
184 : {
185 0 : last_core = HQ_CORE;
186 0 : move16();
187 : }
188 :
189 : /* Voicing */
190 297 : frame_voicing = add( shr( st->voicing_fx[0], 1 ), shr( st->voicing_fx[1], 1 ) );
191 :
192 : /* Spectral sparseness */
193 297 : sparseness = get_sparseness( st->lgBin_E_fx, lob_fft, sub( Etot, MDCT_SW_SIG_PEAK_THR ) ); // Q15
194 :
195 : /* Hi band energy */
196 297 : hi_ener = get_mean_ener( &enerBuffer[lob_cldfb], enerBuffer_exp, sub( hib_cldfb, lob_cldfb ) );
197 :
198 : /* Hi band sparseness */
199 297 : IF( GE_16( st->bwidth, SWB ) )
200 : {
201 : /* For SWB, assume hi band sparseness based on 4.8 KHz-6.4 KHz band */
202 297 : lob_fft = 3 * L_FFT / 2 / 4; /* 4.8 KHz */
203 297 : move16();
204 : }
205 297 : peak_count = 0;
206 297 : move16();
207 297 : tmp = add( MDCT_SW_SIG_LINE_THR, shr( Etot, 1 ) ); /* Q7 */
208 9801 : FOR( i = lob_fft; i < hib_fft; ++i )
209 : {
210 9504 : if ( GE_16( st->lgBin_E_fx[i], tmp ) )
211 : {
212 1512 : peak_count = add( peak_count, 1 );
213 : }
214 : }
215 :
216 297 : hi_sparse = 0;
217 297 : move16();
218 297 : if ( LE_16( peak_count, mult_r( sub( hib_fft, lob_fft ), MDCT_SW_HI_SPARSE_THR ) ) )
219 : {
220 231 : hi_sparse = 1;
221 231 : move16();
222 : }
223 :
224 297 : sparse = 0;
225 297 : move16();
226 297 : if ( LE_16( peak_count, mult_r( sub( hib_fft, lob_fft ), MDCT_SW_SPARSE_THR ) ) )
227 : {
228 218 : sparse = 1;
229 218 : move16();
230 : }
231 :
232 : /* Hysteresis */
233 297 : test();
234 297 : test();
235 297 : if ( hTcxEnc->prev_hi_sparse > 0 && sparse > 0 && GE_16( s_min( s_min( st->voicing_fx[0], st->voicing_fx[1] ), st->voicing_fx[2] ), MDCT_SW_1_VOICING_THR ) )
236 : {
237 96 : hi_sparse = 1;
238 96 : move16();
239 : }
240 :
241 : /* Allowed switching point? */
242 297 : test();
243 297 : test();
244 297 : test();
245 297 : test();
246 297 : test();
247 297 : test();
248 297 : test();
249 297 : test();
250 297 : test();
251 297 : test();
252 297 : test();
253 840 : switching_point = ( NE_16( last_core, HQ_CORE ) && NE_16( last_core, TCX_20_CORE ) ) || /* previous core was non-MDCT */
254 400 : ( LE_16( hTcxEnc->prev_hi_ener, MDCT_SW_HI_ENER_LO_THR ) || LE_16( hi_ener, MDCT_SW_HI_ENER_LO_THR ) ) || /* hi band is close to silent */
255 679 : ( EQ_16( last_core, HQ_CORE ) && ( EQ_16( st->mdct_sw_enable, MODE1 ) || ( hi_sparse > 0 && hTcxEnc->prev_hi_sparse >= 0 && LE_16( hTcxEnc->prev_hi_sparse, 1 ) ) ) ) || /* HQ_CORE and hi band became sparse */
256 68 : ( EQ_16( last_core, TCX_20_CORE ) && ( hi_sparse == 0 && hTcxEnc->prev_hi_sparse > 0 ) ); /* TCX and hi band became dense */
257 :
258 297 : IF( EQ_16( st->mdct_sw_enable, MODE1 ) )
259 : {
260 297 : sig_lo_level_thr = MDCT_SW_1_SIG_LO_LEVEL_THR; // Q8
261 297 : move16();
262 297 : sig_hi_level_thr = MDCT_SW_1_SIG_HI_LEVEL_THR; // Q8
263 297 : move16();
264 297 : cor_thr = MDCT_SW_1_COR_THR; // Q8
265 297 : move16();
266 297 : cor_thr2 = MDCT_SW_1_COR_THR2; // Q8
267 297 : move16();
268 297 : voicing_thr = MDCT_SW_1_VOICING_THR; // Q15
269 297 : move16();
270 297 : voicing_thr2 = MDCT_SW_1_VOICING_THR2; // Q15
271 297 : move16();
272 297 : sparseness_thr = MDCT_SW_1_SPARSENESS_THR; // Q15
273 297 : move16();
274 297 : sparseness_thr2 = MDCT_SW_1_SPARSENESS_THR2; // Q15
275 297 : move16();
276 297 : hi_ener_lo_thr = MDCT_SW_1_HI_ENER_LO_THR; // Q8
277 297 : move16();
278 : }
279 : ELSE /* st->mdct_sw_enable == MODE2 */
280 : {
281 0 : sig_lo_level_thr = MDCT_SW_2_SIG_LO_LEVEL_THR;
282 0 : move16();
283 0 : sig_hi_level_thr = MDCT_SW_2_SIG_HI_LEVEL_THR;
284 0 : move16();
285 0 : cor_thr = MDCT_SW_2_COR_THR;
286 0 : move16();
287 0 : cor_thr2 = MDCT_SW_2_COR_THR2;
288 0 : move16();
289 0 : voicing_thr = MDCT_SW_2_VOICING_THR;
290 0 : move16();
291 0 : voicing_thr2 = MDCT_SW_2_VOICING_THR2;
292 0 : move16();
293 0 : sparseness_thr = MDCT_SW_2_SPARSENESS_THR;
294 0 : move16();
295 0 : sparseness_thr2 = MDCT_SW_2_SPARSENESS_THR2;
296 0 : move16();
297 0 : hi_ener_lo_thr = MDCT_SW_2_HI_ENER_LO_THR;
298 0 : move16();
299 : }
300 :
301 297 : test();
302 297 : test();
303 297 : test();
304 297 : test();
305 297 : test();
306 536 : prefer_tcx = ( GE_16( sub( Etot, sp_floor ), sig_hi_level_thr ) ) && /* noise floor is low */
307 679 : ( GE_16( cor_map_sum, cor_thr ) || GE_16( frame_voicing, voicing_thr ) || GE_16( sparseness, sparseness_thr ) ) && /* strong tonal components */
308 226 : ( LE_16( hi_ener, hi_ener_lo_thr ) || hi_sparse > 0 ); /* high freqs have low energy or are sparse */
309 :
310 297 : test();
311 297 : test();
312 297 : test();
313 297 : test();
314 297 : test();
315 297 : test();
316 571 : prefer_hq_core = ( LT_16( sub( Etot, sp_floor ), sig_lo_level_thr ) ) || /* noise floor is very high */
317 954 : ( LT_16( cor_map_sum, cor_thr2 ) && LT_16( frame_voicing, voicing_thr2 ) && LT_16( sparseness, sparseness_thr2 ) ) || /* too weak tonal components */
318 540 : ( EQ_16( st->mdct_sw_enable, MODE1 ) && prefer_tcx == 0 && EQ_16( st->transientDetection.transientDetector.bIsAttackPresent, 1 ) );
319 :
320 : /* Prefer HQ_CORE on transients */
321 297 : test();
322 297 : IF( EQ_16( st->mdct_sw_enable, MODE2 ) && EQ_16( st->transientDetection.transientDetector.bIsAttackPresent, 1 ) )
323 : {
324 0 : prefer_tcx = 0;
325 0 : move16();
326 0 : prefer_hq_core = 1;
327 0 : move16();
328 : }
329 :
330 297 : test();
331 297 : test();
332 297 : test();
333 297 : IF( switching_point && ( prefer_tcx || prefer_hq_core ) )
334 : {
335 230 : IF( prefer_tcx )
336 : {
337 212 : st->core = TCX_20_CORE;
338 212 : move16();
339 : }
340 : ELSE /* prefer_hq_core */
341 : {
342 18 : st->core = HQ_CORE;
343 18 : move16();
344 : }
345 : }
346 67 : ELSE IF( EQ_16( last_core, HQ_CORE ) || EQ_16( last_core, TCX_20_CORE ) )
347 : {
348 65 : st->core = last_core;
349 65 : move16();
350 : }
351 :
352 297 : test();
353 297 : test();
354 297 : test();
355 : /* Prevent the usage of HQ_CORE on noisy-speech or inactive */
356 297 : IF( EQ_16( st->mdct_sw_enable, MODE2 ) && EQ_16( st->core, HQ_CORE ) && ( EQ_16( st->flag_noisy_speech_snr, 1 ) || st->vad_flag == 0 ) )
357 : {
358 0 : st->core = TCX_20_CORE;
359 0 : move16();
360 : }
361 :
362 : /* Update memories */
363 297 : hTcxEnc->prev_hi_sparse = add( hTcxEnc->prev_hi_sparse, hi_sparse );
364 297 : move16();
365 297 : if ( hi_sparse <= 0 )
366 : {
367 66 : hTcxEnc->prev_hi_sparse = hi_sparse;
368 66 : move16();
369 : }
370 297 : hTcxEnc->prev_hi_sparse = s_min( hTcxEnc->prev_hi_sparse, 2 );
371 297 : hTcxEnc->prev_hi_ener = hi_ener;
372 297 : move16();
373 : }
374 297 : }
375 : /*--------------------------------------------------------------------------*
376 : * MDCT_selector_reset_fx()
377 : *
378 : * reset MDCT selector memories
379 : *--------------------------------------------------------------------------*/
380 10118 : void MDCT_selector_reset_fx(
381 : TCX_ENC_HANDLE hTcxEnc )
382 : {
383 10118 : hTcxEnc->prev_hi_ener = 0;
384 10118 : move16();
385 10118 : hTcxEnc->prev_hi_sparse = -1;
386 10118 : move16();
387 10118 : }
388 : /*--------------------------------------------------------------------------*
389 : * MDCT_classifier_reset_fx()
390 : *
391 : * reset MDCT classifier memories
392 : *--------------------------------------------------------------------------*/
393 8087 : void MDCT_classifier_reset_fx(
394 : TCX_ENC_HANDLE hTcxEnc /* i/o: TCX Encoder Handle */
395 : )
396 : {
397 8087 : hTcxEnc->clas_sec_old_fx = 8192; /* 1.0f in Q13 */
398 8087 : move16();
399 8087 : hTcxEnc->clas_final_old = 1; /* Q0 */
400 8087 : move16();
401 8087 : hTcxEnc->last_gain1 = 0;
402 8087 : move32();
403 8087 : hTcxEnc->last_gain2 = 0;
404 8087 : move32();
405 :
406 8087 : return;
407 : }
|