Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include <stdlib.h>
6 : #include <assert.h>
7 : #include "options.h"
8 : #include "cnst.h"
9 : #include "rom_enc.h"
10 : #include "rom_com.h"
11 : #include "stl.h"
12 : #include "prot_fx.h" /* Function prototypes */
13 : #include "prot_fx_enc.h" /* Function prototypes */
14 : #ifdef DEBUGGING
15 : #include "debug.h"
16 : #endif
17 : #include <math.h>
18 : #include "ivas_prot_fx.h"
19 :
20 :
21 : /*---------------------------------------------------------------------*
22 : * Local constants
23 : *---------------------------------------------------------------------*/
24 :
25 : #define ATT_NSEG 32
26 : #define ATT_SEG_LEN ( L_FRAME / ATT_NSEG )
27 : #define ATT_3LSUB_POS ( 3 * ATT_NSEG / NB_SUBFR )
28 : #define ATT_3LSUB_POS_16k 26 /* (short)((4.0f * ATT_NSEG / (float)NB_SUBFR16k) + 0.5f) */
29 :
30 : #define LOG_PROB_CONST 11292 /*0.5f * N_FEATURES * LOG_PI2 in Q10 */
31 : #define DLP_BIAS 0.138121f
32 : #define DLP_BIAS_FX 36208 /*Q18*/
33 :
34 : #define TON_ALPHA_FX 31130 /* 0.95f in Q15 */
35 : #define THR_MASS_MAX_FX 3565158 /* 0.85f in Q22 */
36 : #define THR_MASS_MIN_FX 3145728 /* 0.75f in Q22 */
37 : #define THR_MASS_STEP_UP_FX 41943 /* 0.01f in Q22 */
38 : #define THR_MASS_STEP_DN_FX 83886 /* 0.02f in Q22 */
39 :
40 : /*---------------------------------------------------------------------*
41 : * Local functions
42 : *---------------------------------------------------------------------*/
43 :
44 : static Word16 sp_mus_classif_gmm_fx( Encoder_State *st_fx, const Word16 localVAD_HE_SAD, const Word16 lsp_new[M], const Word16 cor_map_sum, const Word32 epsP[M + 1], const Word32 PS[], Word16 non_sta, Word16 relE, Word16 *voi_fv, Word16 *cor_map_sum_fv, Word16 *LPCErr, Word16 Q_esp, Word16 *high_lpn_flag_ptr );
45 :
46 : static void sp_mus_classif_2nd_fx( Encoder_State *st, const Word16 Etot, Word16 *attack_flag, const Word16 *inp, const Word16 Qx );
47 :
48 : static void music_mixed_classif_improv_fx( Encoder_State *st, const Word16 *new_inp, const Word32 *epsP, Word16 Q_epsP, Word16 etot, Word16 old_cor, Word16 cor_map_sum );
49 :
50 : static void tonal_context_improv_fx( Encoder_State *st_fx, const Word32 PS[], const Word16 voi_fv, const Word16 cor_map_sum_fv, const Word16 LPCErr, const Word16 Qx );
51 :
52 : static void var_cor_calc_fx( const Word16 old_corr, Word16 *mold_corr, Word16 var_cor_t[], Word16 *high_stable_cor );
53 :
54 : static Word16 attack_det_fx( const Word16 *inp, const Word16 Qx, const Word16 last_clas, const Word16 localVAD, const Word16 coder_type, const Word32 total_brate );
55 :
56 : static void order_spectrum_fx( Word16 *vec, Word16 len );
57 :
58 : static void detect_sparseness_fx( Encoder_State *st_fx, const Word16 localVAD_HE_SAD, const Word16 voi_fv );
59 : // Q18
60 : Word32 log_weights_speech_compute[N_SMC_MIXTURES] = {
61 : -578045, -483403, -473370, -468152, -379470, -473234
62 : };
63 : Word32 log_weights_music_compute[N_SMC_MIXTURES] = {
64 : -486797, -522830, -315523, -429999, -775981, -477255
65 : };
66 : Word32 log_weights_noise_compute[N_SMC_MIXTURES] = {
67 : -439941, -576743, -269243, -645452, -529228, -542196
68 : };
69 : /*---------------------------------------------------------------------*
70 : * speech_music_clas_init_fx()
71 : *
72 : * Initialization of speech/music classifier
73 : *---------------------------------------------------------------------*/
74 :
75 3 : void speech_music_clas_init_fx(
76 : SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle */
77 : )
78 : {
79 : Word16 i;
80 :
81 :
82 3 : hSpMusClas->inact_cnt = 0;
83 3 : move16();
84 3 : set16_fx( hSpMusClas->past_dec, 0, HANG_LEN - 1 );
85 3 : set16_fx( hSpMusClas->past_dlp_fx, 0, HANG_LEN - 1 );
86 3 : set16_fx( hSpMusClas->past_log_enr_fx, -1448, NB_BANDS_SPMUS ); /* log(E_MIN) in Q8 */
87 :
88 3 : hSpMusClas->sp_mus_state = -8;
89 3 : move16();
90 3 : hSpMusClas->wdrop_fx = 0;
91 3 : move16();
92 3 : hSpMusClas->wdlp_0_95_sp_fx = 0;
93 3 : move16();
94 3 : set16_fx( hSpMusClas->last_lsp_fx, 0, M_LSP_SPMUS );
95 3 : hSpMusClas->last_cor_map_sum_fx = 0;
96 3 : move16();
97 3 : hSpMusClas->last_non_sta_fx = 0;
98 3 : move16();
99 3 : set32_fx( hSpMusClas->past_PS_fx, 0, HIGHEST_FBIN - LOWEST_FBIN );
100 3 : hSpMusClas->past_ps_diff_fx = 0;
101 3 : move16();
102 3 : hSpMusClas->past_epsP2_fx = 1024;
103 3 : move16();
104 :
105 :
106 3 : hSpMusClas->gsc_thres_fx[0] = TH_0_MIN_FX;
107 3 : move16();
108 3 : hSpMusClas->gsc_thres_fx[1] = TH_1_MIN_FX;
109 3 : move16();
110 3 : hSpMusClas->gsc_thres_fx[2] = TH_2_MIN_FX;
111 3 : move16();
112 3 : hSpMusClas->gsc_thres_fx[3] = TH_3_MIN_FX;
113 3 : move16();
114 3 : set16_fx( hSpMusClas->gsc_lt_diff_etot_fx, 0, 40 );
115 3 : hSpMusClas->gsc_mem_etot_fx = 0;
116 3 : move16();
117 3 : hSpMusClas->gsc_last_music_flag = 0;
118 3 : move16();
119 3 : hSpMusClas->gsc_nb_thr_1 = 0;
120 3 : move16();
121 3 : hSpMusClas->gsc_nb_thr_3 = 0;
122 3 : move16();
123 3 : hSpMusClas->mold_corr_fx = 29491;
124 3 : move16();
125 3 : hSpMusClas->mean_avr_dyn_fx = 64;
126 3 : move16(); /*Q7 */
127 3 : hSpMusClas->last_sw_dyn_fx = 2560;
128 3 : move16();
129 : /* speech/music classifier improvement */
130 183 : FOR( i = 0; i < BUF_LEN; i++ )
131 : {
132 180 : hSpMusClas->buf_flux_fx[i] = -12800;
133 180 : move16(); /*-100.0 in Q7 */
134 180 : hSpMusClas->buf_pkh_fx[i] = 0;
135 180 : move16();
136 180 : hSpMusClas->buf_epsP_tilt_fx[i] = 0;
137 180 : move16();
138 180 : hSpMusClas->buf_cor_map_sum_fx[i] = 0;
139 180 : move16();
140 180 : hSpMusClas->buf_Ntonal_fx[i] = 0;
141 180 : move16();
142 180 : hSpMusClas->buf_Ntonal2_fx[i] = 0;
143 180 : move16();
144 180 : hSpMusClas->buf_Ntonal_lf_fx[i] = 0;
145 180 : move16();
146 : }
147 :
148 3 : set16_fx( hSpMusClas->lpe_buf_fx, 0, HANG_LEN_INIT );
149 3 : set16_fx( hSpMusClas->voicing_buf_fx, 0, HANG_LEN_INIT );
150 3 : hSpMusClas->gsc_hangover = 0;
151 3 : move16();
152 3 : set16_fx( hSpMusClas->sparse_buf_fx, 0, HANG_LEN_INIT );
153 3 : set16_fx( hSpMusClas->hf_spar_buf_fx, 0, HANG_LEN_INIT );
154 3 : hSpMusClas->LT_sparse_fx = 0;
155 3 : move16();
156 3 : hSpMusClas->gsc_cnt = 0;
157 3 : move16();
158 3 : set16_fx( hSpMusClas->old_Bin_E_fx, 0, 3 * N_OLD_BIN_E );
159 3 : set16_fx( hSpMusClas->buf_etot_fx, 0, 4 );
160 3 : set16_fx( hSpMusClas->buf_dlp_fx, 0, 10 );
161 :
162 3 : hSpMusClas->UV_cnt1 = 300;
163 3 : move16();
164 3 : hSpMusClas->LT_UV_cnt1_fx = 16000;
165 3 : move16(); /*250.0f in Q6 */
166 3 : hSpMusClas->onset_cnt = 0;
167 3 : move16();
168 3 : hSpMusClas->attack_hangover = 0;
169 3 : move16();
170 3 : hSpMusClas->dec_mov_fx = 0;
171 3 : move16();
172 3 : hSpMusClas->dec_mov1_fx = 0;
173 3 : move16();
174 3 : hSpMusClas->mov_log_max_spl_fx = 25600;
175 3 : move16(); /*200.0 in Q7 */
176 3 : hSpMusClas->old_lt_diff_fx[0] = 0;
177 3 : move16();
178 3 : hSpMusClas->old_lt_diff_fx[1] = 0;
179 3 : move16();
180 :
181 : /* GSC - pitch excitation parameters */
182 3 : hSpMusClas->high_stable_cor = 0;
183 3 : move16();
184 3 : set16_fx( hSpMusClas->var_cor_t_fx, 0, VAR_COR_LEN );
185 :
186 3 : hSpMusClas->lps_fx = 0;
187 3 : move16();
188 3 : hSpMusClas->lpm_fx = 0;
189 3 : move16();
190 3 : hSpMusClas->lt_dec_thres_fx = 5120;
191 3 : move16(); /*10 in Q9 */
192 3 : hSpMusClas->ener_RAT_fx = 0;
193 3 : move16();
194 :
195 : /* speech/music classification */
196 3 : set16_fx( hSpMusClas->lt_old_mode, 1, 3 );
197 3 : hSpMusClas->lt_voicing = 16384 /*0.5f Q15*/;
198 3 : move16();
199 3 : hSpMusClas->lt_corr = 16384 /*0.5f Q15*/;
200 3 : move16();
201 3 : hSpMusClas->lt_tonality = 0;
202 3 : move32();
203 3 : set16_fx( hSpMusClas->lt_corr_pitch, 0, 3 );
204 3 : hSpMusClas->lt_hangover = 0;
205 3 : move16();
206 3 : hSpMusClas->lowrate_pitchGain = 0;
207 3 : move16();
208 :
209 :
210 3 : hSpMusClas->lt_music_hangover = 0;
211 3 : move16();
212 3 : set16_fx( hSpMusClas->tonality2_buf_fx, 0, HANG_LEN_INIT );
213 3 : set16_fx( hSpMusClas->tonality3_buf_fx, 0, HANG_LEN_INIT );
214 3 : set16_fx( hSpMusClas->LPCErr_buf_fx, 0, HANG_LEN_INIT );
215 3 : hSpMusClas->lt_music_state = 0;
216 3 : move16();
217 3 : hSpMusClas->lt_speech_state = 0;
218 3 : move16();
219 3 : hSpMusClas->lt_speech_hangover = 0;
220 3 : move16();
221 :
222 :
223 3 : return;
224 : }
225 :
226 9536 : void speech_music_clas_init_ivas_fx(
227 : SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle */
228 : )
229 : {
230 : Word16 i;
231 :
232 9536 : set32_fx( hSpMusClas->FV_st_fx, 0, N_SMC_FEATURES );
233 :
234 9536 : hSpMusClas->inact_cnt = 0;
235 9536 : move16();
236 9536 : set16_fx( hSpMusClas->past_dec, 0, HANG_LEN - 1 );
237 9536 : set16_fx( hSpMusClas->past_dlp_fx, 0, HANG_LEN - 1 );
238 :
239 9536 : set32_fx( hSpMusClas->past_dlp_mean_ST_fx, 0, HANG_LEN - 1 );
240 9536 : hSpMusClas->dlp_mean_ST_fx = 0;
241 9536 : move32();
242 9536 : hSpMusClas->dlp_mean_LT_fx = 0;
243 9536 : move32();
244 9536 : hSpMusClas->dlp_var_LT_fx = 0;
245 9536 : move32();
246 :
247 152576 : FOR( i = 0; i < N_SMC_FEATURES; i++ )
248 : {
249 143040 : hSpMusClas->prev_FV_fx[i] = L_add( L_shr( hout_intervals_fx[2 * i], 1 ), L_shr( hout_intervals_fx[2 * i + 1], 1 ) );
250 143040 : move32();
251 : }
252 :
253 152576 : FOR( i = 0; i < NB_BANDS_SPMUS; i++ )
254 : {
255 143040 : hSpMusClas->past_log_enr_fx[i] = -1448; /* log(E_MIN) in Q8 */
256 143040 : move16();
257 : }
258 :
259 9536 : hSpMusClas->sp_mus_state = -8;
260 9536 : move16();
261 9536 : hSpMusClas->wdrop_32fx = 0;
262 9536 : move32();
263 9536 : hSpMusClas->wrise_fx = 0;
264 9536 : move16();
265 9536 : hSpMusClas->wdlp_0_95_sp_fx = 0;
266 9536 : move16();
267 9536 : hSpMusClas->wdlp_0_95_sp_32fx = 0;
268 9536 : move32();
269 9536 : hSpMusClas->wdlp_xtalk_fx = 0;
270 9536 : move16();
271 9536 : set16_fx( hSpMusClas->last_lsp_fx, 0, M_LSP_SPMUS );
272 9536 : hSpMusClas->last_cor_map_sum_fx = 0;
273 9536 : move16();
274 9536 : hSpMusClas->last_non_sta_fx = 0;
275 9536 : move16();
276 9536 : set32_fx( hSpMusClas->past_PS_fx, 0, HIGHEST_FBIN - LOWEST_FBIN );
277 9536 : hSpMusClas->past_PS_Q = Q31;
278 9536 : move16();
279 9536 : hSpMusClas->past_ps_diff_fx = 0;
280 9536 : move16();
281 9536 : hSpMusClas->past_epsP2_fx = 1024; /* 1.0f in Q10 */
282 9536 : move16();
283 9536 : hSpMusClas->past_epsP_fx = 0;
284 9536 : move16();
285 9536 : hSpMusClas->flag_spitch_cnt = 0;
286 9536 : move16();
287 :
288 :
289 9536 : hSpMusClas->gsc_thres_fx[0] = TH_0_MIN_FX;
290 9536 : move16();
291 9536 : hSpMusClas->gsc_thres_fx[1] = TH_1_MIN_FX;
292 9536 : move16();
293 9536 : hSpMusClas->gsc_thres_fx[2] = TH_2_MIN_FX;
294 9536 : move16();
295 9536 : hSpMusClas->gsc_thres_fx[3] = TH_3_MIN_FX;
296 9536 : move16();
297 9536 : set16_fx( hSpMusClas->gsc_lt_diff_etot_fx, 0, 40 );
298 9536 : hSpMusClas->gsc_mem_etot_fx = 0;
299 9536 : move16();
300 9536 : hSpMusClas->gsc_last_music_flag = 0;
301 9536 : move16();
302 9536 : hSpMusClas->gsc_nb_thr_1 = 0;
303 9536 : move16();
304 9536 : hSpMusClas->gsc_nb_thr_3 = 0;
305 9536 : move16();
306 9536 : hSpMusClas->mold_corr_fx = 29491; /* 0.9f in Q15 */
307 9536 : move16();
308 9536 : hSpMusClas->mean_avr_dyn_fx = 64; /* 0.5f in Q7 */
309 9536 : move16();
310 9536 : hSpMusClas->last_sw_dyn_fx = 2560; /* 10.0f in Q7 */
311 9536 : move16();
312 :
313 9536 : hSpMusClas->relE_attack_cnt = 0;
314 9536 : move16();
315 9536 : hSpMusClas->prev_relE_fx = 0;
316 9536 : move16();
317 9536 : hSpMusClas->prev_Etot_fx = 0;
318 9536 : move16();
319 9536 : hSpMusClas->prev_vad = 0;
320 9536 : move16();
321 9536 : hSpMusClas->vad_0_1_cnt = 0;
322 9536 : move16();
323 9536 : hSpMusClas->relE_attack_sum_fx = 0;
324 9536 : move16();
325 :
326 : /* speech/music classifier improvement */
327 581696 : FOR( i = 0; i < BUF_LEN; i++ )
328 : {
329 572160 : hSpMusClas->buf_flux_fx[i] = -12800; /*-100.0f in Q7 */
330 572160 : move16();
331 572160 : hSpMusClas->buf_pkh_fx[i] = 0;
332 572160 : move16();
333 572160 : hSpMusClas->buf_epsP_tilt_fx[i] = 0;
334 572160 : move16();
335 572160 : hSpMusClas->buf_cor_map_sum_fx[i] = 0;
336 572160 : move16();
337 572160 : hSpMusClas->buf_Ntonal_fx[i] = 0;
338 572160 : move16();
339 572160 : hSpMusClas->buf_Ntonal2_fx[i] = 0;
340 572160 : move16();
341 572160 : hSpMusClas->buf_Ntonal_lf_fx[i] = 0;
342 572160 : move16();
343 : }
344 :
345 9536 : set16_fx( hSpMusClas->lpe_buf_fx, 0, HANG_LEN_INIT );
346 9536 : set16_fx( hSpMusClas->voicing_buf_fx, 0, HANG_LEN_INIT );
347 9536 : hSpMusClas->gsc_hangover = 0;
348 9536 : move16();
349 9536 : set16_fx( hSpMusClas->sparse_buf_fx, 0, HANG_LEN_INIT );
350 9536 : set16_fx( hSpMusClas->hf_spar_buf_fx, 0, HANG_LEN_INIT );
351 9536 : hSpMusClas->LT_sparse_fx = 0;
352 9536 : move16();
353 9536 : hSpMusClas->gsc_cnt = 0;
354 9536 : move16();
355 9536 : hSpMusClas->last_vad_spa = 0;
356 9536 : move16();
357 :
358 9536 : set16_fx( hSpMusClas->old_Bin_E_fx, 0, 3 * N_OLD_BIN_E );
359 9536 : set16_fx( hSpMusClas->buf_etot_fx, 0, 4 );
360 9536 : set16_fx( hSpMusClas->buf_dlp_fx, 0, 10 );
361 :
362 9536 : hSpMusClas->UV_cnt1 = 300;
363 9536 : move16();
364 9536 : hSpMusClas->LT_UV_cnt1_fx = 16000; /* 250.0f in Q6 */
365 9536 : move16();
366 9536 : hSpMusClas->onset_cnt = 0;
367 9536 : move16();
368 9536 : hSpMusClas->attack_hangover = 0;
369 9536 : move16();
370 9536 : hSpMusClas->dec_mov_fx = 0;
371 9536 : move16();
372 9536 : hSpMusClas->dec_mov1_fx = 0;
373 9536 : move16();
374 9536 : hSpMusClas->mov_log_max_spl_fx = 25600; /* 200.0 in Q7 */
375 9536 : move16();
376 9536 : hSpMusClas->old_lt_diff_fx[0] = 0;
377 9536 : move16();
378 9536 : hSpMusClas->old_lt_diff_fx[1] = 0;
379 9536 : move16();
380 :
381 9536 : set32_fx( hSpMusClas->finc_prev_fx, 0, ATT_NSEG );
382 9536 : hSpMusClas->q_finc_prev = Q31;
383 9536 : move16();
384 9536 : hSpMusClas->lt_finc_fx = 0;
385 9536 : move32();
386 9536 : hSpMusClas->Q_lt_finc = Q31;
387 9536 : move16();
388 :
389 9536 : hSpMusClas->last_strong_attack = 0;
390 9536 : move16();
391 9536 : hSpMusClas->tdm_lt_Etot_fx = 3; /* 0.01f in Q8 */
392 9536 : move16();
393 9536 : set32_fx( hSpMusClas->tod_lt_Bin_E_fx, 0, TOD_NSPEC );
394 9536 : hSpMusClas->Q_tod_lt_Bin_E = Q31;
395 9536 : move16();
396 9536 : set32_fx( hSpMusClas->tod_S_map_lt_fx, 0, TOD_NSPEC );
397 9536 : hSpMusClas->tod_thr_lt_fx = TOD_THR_MASS_FX_Q22;
398 9536 : move32();
399 9536 : hSpMusClas->tod_weight_fx = 0;
400 9536 : move16();
401 9536 : hSpMusClas->tod_S_mass_prev_fx = 0;
402 9536 : move32();
403 9536 : hSpMusClas->tod_S_mass_lt_fx = 0;
404 9536 : move32();
405 :
406 : /* speech/music classification */
407 9536 : set16_fx( hSpMusClas->lt_old_mode, 1, 3 );
408 9536 : hSpMusClas->lt_voicing = 16384; /* 0.5f in Q15 */
409 9536 : move16();
410 9536 : hSpMusClas->lt_corr = 16384; /* 0.5f in Q15 */
411 9536 : move16();
412 9536 : hSpMusClas->lt_tonality = 0;
413 9536 : move32();
414 9536 : set16_fx( hSpMusClas->lt_corr_pitch, 0, 3 );
415 9536 : hSpMusClas->lt_hangover = 0;
416 9536 : move16();
417 9536 : hSpMusClas->lowrate_pitchGain = 0;
418 9536 : move16();
419 :
420 9536 : hSpMusClas->lt_music_hangover = 0;
421 9536 : move16();
422 9536 : set16_fx( hSpMusClas->tonality2_buf_fx, 0, HANG_LEN_INIT );
423 9536 : set16_fx( hSpMusClas->tonality3_buf_fx, 0, HANG_LEN_INIT );
424 9536 : set16_fx( hSpMusClas->LPCErr_buf_fx, 0, HANG_LEN_INIT );
425 9536 : hSpMusClas->lt_music_state = 0;
426 9536 : move16();
427 9536 : hSpMusClas->lt_speech_state = 0;
428 9536 : move16();
429 9536 : hSpMusClas->lt_speech_hangover = 0;
430 9536 : move16();
431 :
432 9536 : hSpMusClas->lt_dec_thres_fx = 5120; /* 10.0f in Q9 */
433 9536 : move16();
434 9536 : hSpMusClas->ener_RAT_fx = 0;
435 9536 : move16();
436 :
437 9536 : hSpMusClas->high_stable_cor = 0;
438 9536 : move16();
439 9536 : set16_fx( hSpMusClas->var_cor_t_fx, 0, VAR_COR_LEN );
440 :
441 9536 : hSpMusClas->lps_fx = 0;
442 9536 : move16();
443 9536 : hSpMusClas->lpm_fx = 0;
444 9536 : move16();
445 9536 : hSpMusClas->lpn_fx = 0;
446 9536 : move16();
447 :
448 9536 : return;
449 : }
450 :
451 : /*---------------------------------------------------------------------*
452 : * speech_music_classif()
453 : *
454 : * Speech/music classification
455 : *
456 : * The following technologies are used based on the outcome of the sp/mus classifier
457 : * sp_aud_decision1 sp_aud_decision2
458 : * 0 0 use ACELP (+TD BWE)
459 : * 1 0 use ACELP (+FD BWE) or HQ/LR-MDCT depending on bitrate
460 : * 1 1 use GSC (+FD BWE) or HQ/LR-MDCT depending on bitrate
461 : *
462 : * 0 1 exceptionally use GSC (+FD BWE) instead of LR-MDCT at 13.2 kbps (WB/SWB) for sparse spectra
463 : *---------------------------------------------------------------------*/
464 :
465 3100 : void speech_music_classif_fx(
466 : Encoder_State *st, /* i/o: state structure */
467 : const Word16 *new_inp, /* i : new input signal */
468 : const Word16 *inp, /* i : input signal to locate attach position */
469 : const Word16 localVAD_HE_SAD, /* i : HE-SAD flag without hangover */
470 : const Word16 lsp_new[M], /* i : LSPs in current frame Q15 */
471 : const Word16 cor_map_sum, /* i : correlation map sum (from multi-harmonic anal.)Q8*/
472 : const Word32 epsP[M + 1], /* i : LP prediciton error Q_esp*/
473 : const Word32 PS[], /* i : energy spectrum Q_new+QSCALE*/
474 : const Word16 Etot, /* i : total frame energy Q8 */
475 : const Word16 old_cor, /* i : max correlation from previous frame Q15 */
476 : Word16 *attack_flag, /* o : flag to indicate if attack is to be treated by TC or GSC */
477 : Word16 non_sta, /* i : unbound non-stationarity for sp/mus classifier */
478 : Word16 relE, /* i : relative frame energy */
479 : Word16 Q_esp, /* i : scaling of esP */
480 : Word16 Q_inp, /* i : scaling of input */
481 : Word16 *high_lpn_flag_ptr, /* o : noise log prob flag for NOISE_EST */
482 : Word16 flag_spitch /* i : flag to indicate very short stable pitch */
483 : )
484 : {
485 : Word16 voi_fv, cor_map_sum_fv, LPCErr;
486 3100 : GSC_ENC_HANDLE hGSCEnc = st->hGSCEnc;
487 :
488 : /* 1st stage speech/music classifier based on the GMM model */
489 3100 : st->sp_aud_decision1 = sp_mus_classif_gmm_fx( st, localVAD_HE_SAD, lsp_new, cor_map_sum,
490 : epsP, PS, non_sta, relE, &voi_fv, &cor_map_sum_fv, &LPCErr, Q_esp, high_lpn_flag_ptr );
491 :
492 3100 : test();
493 3100 : IF( EQ_16( st->codec_mode, MODE1 ) || EQ_32( st->sr_core, INT_FS_12k8 ) )
494 : {
495 :
496 :
497 : /* Improvement of the 1st stage decision on mixed/music content */
498 2050 : test();
499 2050 : IF( st->Opt_SC_VBR == 0 && NE_32( st->total_brate, ACELP_24k40 ) )
500 : {
501 2050 : music_mixed_classif_improv_fx( st, new_inp, epsP, Q_esp, Etot, old_cor, cor_map_sum );
502 : }
503 :
504 2050 : st->sp_aud_decision0 = st->sp_aud_decision1;
505 2050 : move16();
506 :
507 : /* 2nd stage speech/music classifier (rewrite music to speech in onsets) */
508 2050 : st->sp_aud_decision2 = st->sp_aud_decision1;
509 2050 : move16();
510 :
511 2050 : IF( st->bwidth > NB )
512 : {
513 2050 : sp_mus_classif_2nd_fx( st, Etot, attack_flag, inp, Q_inp - 1 );
514 :
515 : /* avoid switch to AUDIO/MUSIC class for very short stable high st->pitch
516 : and/or stable pitch with high correlation at low bitrates*/
517 2050 : test();
518 2050 : test();
519 2050 : IF( flag_spitch && EQ_16( st->bwidth, WB ) && LT_32( st->total_brate, ACELP_13k20 ) )
520 : {
521 0 : st->sp_aud_decision2 = 0;
522 0 : move16();
523 : }
524 : }
525 :
526 :
527 : /* Context-based improvement of 1st and 2nd stage decision on stable tonal signals */
528 2050 : test();
529 2050 : IF( st->Opt_SC_VBR == 0 && NE_32( st->total_brate, ACELP_24k40 ) )
530 : {
531 2050 : tonal_context_improv_fx( st, PS, voi_fv, cor_map_sum_fv, LPCErr, Q_inp + QSCALE - 2 );
532 : }
533 :
534 : /* Avoid using LR-MDCT on sparse spectra, use GSC instead at 13.2 kbps (WB/SWB) */
535 2050 : test();
536 2050 : test();
537 2050 : test();
538 2050 : test();
539 2050 : IF( !st->Opt_SC_VBR && EQ_32( st->total_brate, ACELP_13k20 ) && EQ_16( st->vad_flag, 1 ) &&
540 : ( EQ_16( st->bwidth, WB ) || EQ_16( st->bwidth, SWB ) ) )
541 : {
542 1041 : detect_sparseness_fx( st, localVAD_HE_SAD, voi_fv );
543 : }
544 :
545 : /* override speech/music classification to ACELP when background noise level reaches certain level */
546 : /* this is a patch against mis-classifications during active noisy speech segments */
547 2050 : IF( GT_16( st->lp_noise_fx, 3072 ) )
548 : {
549 0 : st->sp_aud_decision1 = 0;
550 0 : move16();
551 0 : st->sp_aud_decision2 = 0;
552 0 : move16();
553 : }
554 :
555 :
556 : /* select GSC on SWB noisy speech (only on active unvoiced SWB noisy speech segments) */
557 2050 : st->GSC_noisy_speech = 0;
558 2050 : move16();
559 :
560 2050 : test();
561 2050 : test();
562 2050 : test();
563 2050 : test();
564 2050 : test();
565 2050 : test();
566 2050 : IF( EQ_16( st->vad_flag, 1 ) && GE_32( st->total_brate, ACELP_13k20 ) && LT_32( st->total_brate, ACELP_24k40 ) &&
567 : GT_16( st->lp_noise_fx, 3072 ) && st->sp_aud_decision1 == 0 && GE_16( st->bwidth, SWB ) &&
568 : EQ_16( st->coder_type_raw, UNVOICED ) )
569 : {
570 0 : st->GSC_noisy_speech = 1;
571 0 : move16();
572 : }
573 :
574 : /* Select AUDIO frames */
575 2050 : test();
576 2050 : test();
577 : #ifdef DEBUGGING
578 : if ( st->codec_mode == MODE1 && ( st->force == 1 || ( st->force == -1 && ( st->sp_aud_decision2 || st->GSC_noisy_speech ) ) ) )
579 : #else
580 2050 : IF( EQ_16( st->codec_mode, MODE1 ) && ( st->sp_aud_decision2 || st->GSC_noisy_speech ) )
581 : #endif
582 : {
583 622 : st->coder_type = AUDIO;
584 622 : move16();
585 622 : hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
586 622 : move16();
587 : }
588 : }
589 : ELSE
590 : {
591 1050 : st->sp_aud_decision0 = st->sp_aud_decision1;
592 1050 : move16();
593 : }
594 :
595 :
596 3100 : return;
597 : }
598 :
599 : /*---------------------------------------------------------------------*
600 : * sp_mus_classif_gmm_fx()
601 : *
602 : * Speech/music classification based on GMM model
603 : *---------------------------------------------------------------------*/
604 :
605 3100 : static Word16 sp_mus_classif_gmm_fx( /* o : decision flag (1-music, 0-speech or noise) */
606 : Encoder_State *st_fx, /* i/o: state structure */
607 : const Word16 localVAD_HE_SAD, /* i : local VAD HE flag */
608 : const Word16 lsp_new[M], /* i : LSPs in current frame Q15 */
609 : const Word16 cor_map_sum, /* i : correlation map sum (from multi-harmonic anal.)Q8 */
610 : const Word32 epsP[M + 1], /* i : LP prediciton error Q_esp */
611 : const Word32 PS[], /* i : energy spectrum Q_new+Qscale-2 */
612 : Word16 non_sta, /* i : unbound non-stationarity for sp/mus classifier */
613 : Word16 relE, /* i : relative frame energy */
614 : Word16 *voi_fv, /* o : scaled voicing feature */
615 : Word16 *cor_map_sum_fv, /* o : scaled correlation map feature */
616 : Word16 *LPCErr, /* o : scaled LP prediction error feature */
617 : Word16 Q_esp, /* i : scaling of epsP */
618 : Word16 *high_lpn_flag_ptr /* o : noise log prob flag for NOISE_EST */
619 : )
620 : {
621 : Word16 i, k, p, dec, vad;
622 :
623 3100 : Word16 lsp[M], FV[N_FEATURES], *pFV = FV;
624 : const Word32 *pSF_a;
625 : const Word16 *pSF_m;
626 : Word16 lsf2acos_fact, wrelE, dlp, wdrop, wght;
627 :
628 : Word32 mx;
629 : Word32 sum_PS;
630 : Word16 ftmp, tmp16;
631 : Word16 xm[N_FEATURES];
632 : Word16 lps, lpm;
633 : Word16 lpn;
634 : Word16 e_tmp, f_tmp;
635 : Word32 L_tmp;
636 : Word16 exp1;
637 : Word32 ps_sta;
638 : Word32 ps_diff;
639 : Word16 ps_diff_16;
640 : Word32 dPS[128], PS_norm[128];
641 : Word32 lepsP1;
642 3100 : Word32 max_s = 0, max_m = 0, py_s, py_m;
643 3100 : move32();
644 3100 : move32();
645 : Word32 max_n, py_n; /* pyn */
646 3100 : Word16 ishift[12] = { 8, 0, 2, 2, 2, 2, 2, 1, 0, 2, 2, 1 };
647 3100 : move16();
648 3100 : move16();
649 3100 : move16();
650 3100 : move16();
651 3100 : move16();
652 3100 : move16();
653 3100 : move16();
654 3100 : move16();
655 3100 : move16();
656 3100 : move16();
657 3100 : move16();
658 3100 : move16();
659 : Word16 tmp;
660 : Word16 tmp1, tmp2, exp2, scale, exp3;
661 3100 : SP_MUS_CLAS_HANDLE hSpMusClas = st_fx->hSpMusClas;
662 3100 : HQ_ENC_HANDLE hHQ_core = st_fx->hHQ_core;
663 :
664 : /*------------------------------------------------------------------*
665 : * Initialization
666 : *------------------------------------------------------------------*/
667 :
668 3100 : vad = localVAD_HE_SAD;
669 3100 : move16();
670 :
671 : /*------------------------------------------------------------------*
672 : * Preparation of the feature vector
673 : *------------------------------------------------------------------*/
674 :
675 : /* [0] OL pitch Q0 */
676 : /*(float)(pitch[0] + pitch[1] + pitch[2]) / 3.0f;*/
677 3100 : L_tmp = L_mult( st_fx->pitch[0], 10923 );
678 3100 : L_tmp = L_mac( L_tmp, st_fx->pitch[1], 10923 );
679 3100 : L_tmp = L_mac( L_tmp, st_fx->pitch[2], 10923 );
680 :
681 3100 : test();
682 3100 : IF( EQ_16( st_fx->tc_cnt, 1 ) || EQ_16( st_fx->tc_cnt, 2 ) )
683 : {
684 260 : *pFV++ = st_fx->pitch[2];
685 260 : move16();
686 : }
687 : ELSE
688 : {
689 2840 : *pFV++ = round_fx( L_tmp );
690 2840 : move16();
691 : }
692 :
693 : /* [1] voicing Q15 */
694 : /*(float)(voicing[0] + voicing[1] + voicing[2]) / 3.0f*/
695 3100 : test();
696 3100 : IF( EQ_16( st_fx->tc_cnt, 1 ) || EQ_16( st_fx->tc_cnt, 2 ) )
697 : {
698 260 : *pFV++ = st_fx->voicing_fx[2];
699 260 : move16();
700 : }
701 : ELSE
702 : {
703 2840 : L_tmp = L_mult( st_fx->voicing_fx[0], 10923 );
704 2840 : L_tmp = L_mac( L_tmp, st_fx->voicing_fx[1], 10923 );
705 2840 : L_tmp = L_mac( L_tmp, st_fx->voicing_fx[2], 10923 );
706 2840 : *pFV++ = round_fx_sat( L_tmp );
707 2840 : move16();
708 : }
709 :
710 : /* [2,3,4,5,6] LSFs Q15*/
711 3100 : Copy( lsp_new, lsp, M );
712 3100 : lsf2acos_fact = 25735;
713 3100 : move16(); /* PI/6400 -> Q27 */
714 :
715 : /*ftmp = (float)acos(lsp[1...5]);*/
716 : /**pFV++ = ftmp + st->last_lsp[1...5];*/
717 : /*st->last_lsp[1...5] = ftmp;*/
718 18600 : FOR( i = 1; i < M_LSP_SPMUS; i++ )
719 : {
720 15500 : L_tmp = sub_lsp2lsf_fx( lsp[i] );
721 15500 : tmp16 = round_fx( L_shl( L_mult0( extract_l( L_tmp ), lsf2acos_fact ), 2 ) );
722 15500 : *pFV++ = add( tmp16, hSpMusClas->last_lsp_fx[i] );
723 15500 : move16(); /*Q13*/
724 15500 : hSpMusClas->last_lsp_fx[i] = tmp16;
725 15500 : move16();
726 : }
727 :
728 : /* [7] cor_map_sum Q8 */
729 3100 : *pFV++ = round_fx( L_mac( L_mult( cor_map_sum, 16384 ), hSpMusClas->last_cor_map_sum_fx, 16384 ) ); /* Q8 ->Q7*/
730 3100 : move16();
731 3100 : hSpMusClas->last_cor_map_sum_fx = cor_map_sum;
732 3100 : move16();
733 :
734 : /* [8] non_sta Q8*/
735 3100 : *pFV++ = round_fx( L_mac( L_mult( non_sta, 16384 ), hSpMusClas->last_non_sta_fx, 16384 ) ); /* Q8 -> Q7 */
736 3100 : move16();
737 3100 : hSpMusClas->last_non_sta_fx = non_sta;
738 3100 : move16();
739 :
740 : /* [9] epsP Q10 */
741 3100 : IF( EQ_16( st_fx->bwidth, NB ) )
742 : {
743 0 : *pFV++ = -1687;
744 0 : move16(); /*Q10*/
745 : }
746 : ELSE
747 : {
748 : /*lepsP1 = (float)log(epsP[1] + 1e-5f);*/
749 3100 : IF( epsP[1] != 0 )
750 : {
751 3100 : e_tmp = norm_l( epsP[1] );
752 3100 : f_tmp = Log2_norm_lc( L_shl( epsP[1], e_tmp ) );
753 3100 : e_tmp = sub( 30, add( e_tmp, Q_esp ) );
754 3100 : lepsP1 = Mpy_32_16( e_tmp, f_tmp, 22713 ); /* Q16 */ /* 22713 = ln(2) in Q15 */
755 : }
756 : ELSE
757 : {
758 0 : lepsP1 = L_deposit_l( 0 );
759 : }
760 :
761 : /*ftmp = (float)log(epsP[13]);*/
762 3100 : IF( epsP[13] != 0 )
763 : {
764 3100 : e_tmp = norm_l( epsP[13] );
765 3100 : f_tmp = Log2_norm_lc( L_shl( epsP[13], e_tmp ) );
766 3100 : e_tmp = sub( 30, add( e_tmp, Q_esp ) );
767 3100 : L_tmp = Mpy_32_16( e_tmp, f_tmp, 22713 ); /* Q16 */ /* 22713 = ln(2) in Q15 */
768 : }
769 : ELSE
770 : {
771 0 : L_tmp = L_deposit_l( 0 );
772 : }
773 :
774 : /*ftmp = (float)log(epsP[13]) - lepsP1;*/
775 3100 : L_tmp = L_sub( L_tmp, lepsP1 ); /*Q16 */
776 3100 : ftmp = round_fx( L_shl( L_tmp, 10 ) ); /*Q10 */
777 :
778 : /**pFV++ = ftmp + st->past_epsP2;*/
779 3100 : *pFV++ = add( ftmp, hSpMusClas->past_epsP2_fx );
780 3100 : move16(); /*Q10 */
781 :
782 : /*st->past_epsP2 = ftmp;*/
783 3100 : hSpMusClas->past_epsP2_fx = ftmp;
784 3100 : move16(); /*Q10 */
785 : }
786 :
787 : /* calculation of differential normalized power spectrum */
788 3100 : sum_PS = L_deposit_l( 0 );
789 210800 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
790 : {
791 207700 : sum_PS = L_add_sat( sum_PS, PS[i] );
792 : }
793 3100 : exp1 = norm_l( sum_PS );
794 3100 : tmp1 = round_fx_sat( L_shl( sum_PS, exp1 ) );
795 3100 : exp1 = sub( 30, exp1 );
796 :
797 210800 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
798 : {
799 : /*PS_norm[i] = PS[i] / sum_PS;*/
800 : /*dPS[i] = (float)fabs(PS_norm[i] - st->past_PS[i]);*/
801 207700 : exp2 = norm_l( PS[i] );
802 207700 : tmp2 = round_fx_sat( L_shl( PS[i], exp2 ) );
803 207700 : exp2 = sub( 30, exp2 );
804 :
805 207700 : scale = shr( sub( tmp1, tmp2 ), 15 );
806 207700 : tmp2 = shl( tmp2, scale );
807 207700 : exp2 = sub( exp2, scale );
808 :
809 207700 : exp3 = sub( exp1, exp2 );
810 :
811 207700 : tmp = div_s( tmp2, tmp1 ); /*Q(15+exp3) */
812 207700 : PS_norm[i] = L_shl( tmp, sub( 10, exp3 ) );
813 207700 : move32(); /*Q25 */
814 207700 : dPS[i] = L_abs( L_sub( PS_norm[i], hSpMusClas->past_PS_fx[i - LOWEST_FBIN] ) );
815 207700 : move32(); /*Q25 */
816 : }
817 :
818 : /* [10] ps_diff (spectral difference) Q10*/
819 3100 : ps_diff = 0;
820 3100 : move16();
821 210800 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
822 : {
823 : /*ps_diff += dPS[i];*/
824 207700 : ps_diff = L_add( ps_diff, dPS[i] ); /*Q25*/
825 : }
826 :
827 : /*ps_diff = (float)log(ps_diff + 1e-5f);*/
828 3100 : IF( ps_diff != 0 )
829 : {
830 3100 : e_tmp = norm_l( ps_diff );
831 3100 : f_tmp = Log2_norm_lc( L_shl( ps_diff, e_tmp ) );
832 3100 : e_tmp = sub( 30 - 25, e_tmp );
833 3100 : ps_diff = Mpy_32_16( e_tmp, f_tmp, 22713 ); /* Q16 */ /* 22713 = ln(2) in Q15 */
834 3100 : ps_diff_16 = round_fx( L_shl( ps_diff, 10 ) ); /*Q10 */
835 : }
836 : ELSE
837 : {
838 0 : ps_diff_16 = -11789;
839 0 : move16(); /*Q10 */
840 : }
841 :
842 3100 : *pFV++ = add( ps_diff_16, hSpMusClas->past_ps_diff_fx );
843 3100 : move16(); /*Q10 */
844 3100 : hSpMusClas->past_ps_diff_fx = ps_diff_16;
845 3100 : move16(); /*Q10 */
846 :
847 : /* [11] ps_sta (spectral stationarity) Q11 */
848 3100 : ps_sta = 0;
849 3100 : move16();
850 210800 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
851 : {
852 : /*mx = PS_norm[i] > st->past_PS[i] ? PS_norm[i] : st->past_PS[i];*/
853 207700 : mx = L_max( PS_norm[i], hSpMusClas->past_PS_fx[i - LOWEST_FBIN] ); /*Q25 */
854 :
855 : /*ps_sta += mx / (dPS[i] + 1e-5f);*/
856 207700 : IF( !dPS[i] )
857 : {
858 95 : ps_sta = L_add( ps_sta, L_shr( mx, 9 ) ); /*Q16 */
859 : }
860 : ELSE
861 : {
862 207605 : exp1 = norm_l( L_add( dPS[i], 336 ) );
863 207605 : tmp1 = round_fx_sat( L_shl_sat( L_add( dPS[i], 336 ), exp1 ) );
864 207605 : exp1 = sub( 30, exp1 );
865 :
866 207605 : exp2 = norm_l( mx );
867 207605 : tmp2 = round_fx( L_shl( mx, exp2 ) );
868 207605 : exp2 = sub( 30, exp2 );
869 :
870 207605 : scale = shr( sub( tmp1, tmp2 ), 15 );
871 207605 : tmp2 = shl( tmp2, scale );
872 207605 : exp2 = sub( exp2, scale );
873 :
874 207605 : exp3 = sub( exp1, exp2 );
875 :
876 207605 : tmp = div_s( tmp2, tmp1 ); /*Q(15+exp3) */
877 207605 : L_tmp = L_shl( tmp, sub( 1, exp3 ) ); /*Q16 */
878 207605 : ps_sta = L_add_sat( ps_sta, L_tmp ); /*Q16 */
879 : }
880 : }
881 :
882 : /**pFV++ = (float)log(ps_sta + 1e-5f);*/
883 3100 : ps_sta = L_add_sat( ps_sta, 336 );
884 3100 : e_tmp = norm_l( ps_sta );
885 3100 : f_tmp = Log2_norm_lc( L_shl( ps_sta, e_tmp ) );
886 3100 : e_tmp = sub( 30 - 16, e_tmp );
887 3100 : L_tmp = Mpy_32_16( e_tmp, f_tmp, 22713 ); /* Q16 */ /* 22713 = ln(2) in Q15 */
888 3100 : *pFV++ = round_fx( L_shl( L_tmp, 11 ) ); /*Q11 */
889 3100 : move16();
890 :
891 : /* update PS vector */
892 3100 : Copy32( &PS_norm[LOWEST_FBIN], hSpMusClas->past_PS_fx, HIGHEST_FBIN - LOWEST_FBIN );
893 :
894 : /*------------------------------------------------------------------*
895 : * Scaling of the feature vector
896 : *------------------------------------------------------------------*/
897 :
898 : /* FV[0] -> Q0 */
899 : /* FV[1...6] -> Q13*/
900 : /* FV[7,8] -> Q7 */
901 : /* FV[9,10] -> Q10 */
902 : /* FV[11] -> Q11 */
903 :
904 :
905 3100 : pFV = FV;
906 3100 : IF( EQ_16( st_fx->bwidth, NB ) )
907 : {
908 0 : pSF_m = SF_8k_mult_fx;
909 0 : pSF_a = SF_8k_add_fx;
910 : }
911 : ELSE
912 : {
913 3100 : pSF_m = SF_mult_fx;
914 3100 : pSF_a = SF_add_fx;
915 : }
916 :
917 40300 : FOR( i = 0; i < N_FEATURES; i++ )
918 : {
919 : /**pFV = pSF[0] * *pFV + pSF[1];*/
920 37200 : *pFV = round_fx_sat( L_shl_sat( L_mac( pSF_a[i], *pFV, pSF_m[i] ), ishift[i] ) );
921 37200 : move16();
922 37200 : pFV++;
923 : }
924 :
925 3100 : *voi_fv = FV[1];
926 3100 : move16();
927 3100 : *cor_map_sum_fv = FV[7];
928 3100 : move16();
929 3100 : *LPCErr = FV[9];
930 3100 : move16();
931 :
932 :
933 : /*------------------------------------------------------------------*
934 : * Calculation of posterior probability
935 : * Log-probability
936 : *------------------------------------------------------------------*/
937 :
938 3100 : max_s = L_add( MIN_32, 0 );
939 3100 : max_m = L_add( MIN_32, 0 );
940 : /* pyn = 1e-5f;*/
941 3100 : max_n = L_add( MIN_32, 0 );
942 :
943 :
944 21700 : FOR( k = 0; k < N_MIXTURES; k++ )
945 : {
946 : /* for each mixture, calculate the probability of speech or noise and the probability of music */
947 : /* active frames - calculate the probability of speech */
948 241800 : FOR( p = 0; p < N_FEATURES; p++ )
949 : {
950 : /* xm[p] = FV[p] - m_speech[k*N_FEATURES+p];*/
951 223200 : xm[p] = sub_sat( FV[p], m_speech_fx[k * N_FEATURES + p] );
952 223200 : move16(); /*Q15 */
953 : }
954 :
955 : /*py = lvm_speech[k] + dot_product_mat(xm, &invV_speech[k*N_FEATURES*N_FEATURES], N_FEATURES );*/
956 18600 : L_tmp = dot_product_mat_fx( xm, &invV_speech_fx[k * N_FEATURES * N_FEATURES], N_FEATURES ); /*Q10 */
957 18600 : py_s = L_add( lvm_speech_fx[k], L_tmp ); /*Q10 */
958 18600 : max_s = L_max( py_s, max_s );
959 :
960 :
961 : /* pys += (float)exp(py); */
962 :
963 : /* inactive frames - calculate the probability of noise */
964 241800 : FOR( p = 0; p < N_FEATURES; p++ )
965 : {
966 : /*xm[p] = FV[p] - m_noise[k*N_FEATURES+p];*/
967 223200 : xm[p] = sub_sat( FV[p], m_noise_fx[k * N_FEATURES + p] );
968 223200 : move16(); /*Q15 */
969 : }
970 :
971 : /*py = lvm_noise[k] + dot_product_mat(xm, &invV_noise[k*N_FEATURES*N_FEATURES], N_FEATURES );*/
972 18600 : L_tmp = dot_product_mat_fx( xm, &invV_noise_fx[k * N_FEATURES * N_FEATURES], N_FEATURES ); /*Q10 */
973 : /* pyn += (float)exp(py); */
974 18600 : py_n = L_add( lvm_noise_fx[k], L_tmp ); /*Q10 */
975 18600 : max_n = L_max( py_n, max_n );
976 :
977 :
978 : /* either active or inactive frames - calculate the probability of music */
979 241800 : FOR( p = 0; p < N_FEATURES; p++ )
980 : {
981 : /*xm[p] = FV[p] - m_music[k*N_FEATURES+p];*/
982 223200 : xm[p] = sub_sat( FV[p], m_music_fx[k * N_FEATURES + p] );
983 223200 : move16(); /*Q15 */
984 : }
985 :
986 : /*py = lvm_music[k] + dot_product_mat(xm, &invV_music[k*N_FEATURES*N_FEATURES], N_FEATURES );*/
987 18600 : L_tmp = dot_product_mat_fx( xm, &invV_music_fx[k * N_FEATURES * N_FEATURES], N_FEATURES ); /*Q10 */
988 18600 : py_m = L_add( lvm_music_fx[k], L_tmp ); /*Q10 */
989 18600 : max_m = L_max( py_m, max_m );
990 :
991 : /*pym += (float)exp(py);#######*/
992 : }
993 :
994 : /* calculate log-probability */
995 : /*log(0.0001)-0.5f * N_FEATURES * LOG_PI2 in Q9 */
996 3100 : lps = extract_h( L_shl_sat( L_sub( max_s, LOG_PROB_CONST ), 16 - 1 ) ); /*Q9 */
997 3100 : lps = s_max( lps, -10832 );
998 :
999 3100 : lpm = extract_h( L_shl( L_sub( max_m, LOG_PROB_CONST ), 16 - 1 ) ); /*Q9 */
1000 3100 : lpm = s_max( lpm, -10832 );
1001 : /*
1002 : lpn = (float)log(pyn) - 0.5f * N_FEATURES * (float)log(2*PI);
1003 : */
1004 3100 : lpn = extract_h( L_shl_sat( L_sub( max_n, LOG_PROB_CONST ), 16 - 1 ) ); /*Q9 */
1005 3100 : lpn = s_max( lpn, -10832 );
1006 :
1007 3100 : *high_lpn_flag_ptr = 0;
1008 3100 : move16();
1009 3100 : test();
1010 3100 : if ( GT_16( lpn, lps ) && GT_16( lpn, lpm ) )
1011 : {
1012 53 : *high_lpn_flag_ptr = 1;
1013 53 : move16();
1014 : }
1015 :
1016 :
1017 3100 : IF( !vad )
1018 : {
1019 : /* increase log-probability of noise */
1020 : /* lps = lpn * 1.2f; */
1021 108 : lps = add( lpn, mult_r( 6554, lpn ) ); /* Q9 */
1022 : }
1023 :
1024 3100 : hSpMusClas->lpm_fx = lpm;
1025 3100 : move16();
1026 3100 : hSpMusClas->lps_fx = lps;
1027 3100 : move16();
1028 :
1029 : /* determine HQ GENERIC speech class */
1030 3100 : IF( hHQ_core != NULL )
1031 : {
1032 3100 : hHQ_core->hq_generic_speech_class = 0;
1033 3100 : move16();
1034 3100 : if ( GT_16( lps, add( lpm, 256 ) ) )
1035 : {
1036 1433 : hHQ_core->hq_generic_speech_class = 1;
1037 1433 : move16();
1038 : }
1039 : }
1040 :
1041 : /*------------------------------------------------------------------*
1042 : * State machine (sp_mus_state < 0 .. inactive, > 0 .. entry, = 0 .. active )
1043 : *------------------------------------------------------------------*/
1044 :
1045 3100 : IF( vad )
1046 : {
1047 2992 : test();
1048 2992 : test();
1049 2992 : test();
1050 2992 : IF( LT_16( relE, -20 * 256 ) || ( LE_16( lps, -5 * 512 ) && LE_16( lpm, -5 * 512 ) ) )
1051 : {
1052 370 : IF( hSpMusClas->sp_mus_state > 0 )
1053 : {
1054 70 : if ( LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1055 : {
1056 : /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */
1057 9 : hSpMusClas->inact_cnt = 0;
1058 9 : move16();
1059 : }
1060 :
1061 : /* energy is too low -> we are going to instable state */
1062 70 : hSpMusClas->sp_mus_state = 0;
1063 70 : move16();
1064 : }
1065 300 : ELSE IF( GT_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1066 : {
1067 : /* energy is still too low -> we are still in instable state */
1068 136 : hSpMusClas->sp_mus_state = sub( hSpMusClas->sp_mus_state, 1 );
1069 : }
1070 : }
1071 2622 : ELSE IF( hSpMusClas->sp_mus_state <= 0 )
1072 : {
1073 70 : IF( hSpMusClas->inact_cnt == 0 )
1074 : {
1075 :
1076 24 : hSpMusClas->sp_mus_state = 1;
1077 24 : move16();
1078 : }
1079 : ELSE
1080 : {
1081 :
1082 46 : hSpMusClas->sp_mus_state = HANG_LEN;
1083 46 : move16();
1084 : }
1085 :
1086 70 : hSpMusClas->inact_cnt = 12;
1087 70 : move16();
1088 : }
1089 2552 : ELSE IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1090 : {
1091 : /* we are inside an entry period -> increment the counter of entry frames */
1092 129 : hSpMusClas->sp_mus_state = add( hSpMusClas->sp_mus_state, 1 );
1093 : }
1094 :
1095 2992 : test();
1096 2992 : if ( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 )
1097 : {
1098 182 : hSpMusClas->inact_cnt = sub( hSpMusClas->inact_cnt, 1 );
1099 : }
1100 : }
1101 : ELSE
1102 : {
1103 108 : test();
1104 108 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1105 : {
1106 0 : hSpMusClas->inact_cnt = 0;
1107 0 : move16();
1108 : }
1109 108 : ELSE IF( hSpMusClas->inact_cnt > 0 )
1110 : {
1111 40 : hSpMusClas->inact_cnt = sub( hSpMusClas->inact_cnt, 1 );
1112 : }
1113 :
1114 108 : test();
1115 108 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1116 : {
1117 :
1118 0 : hSpMusClas->sp_mus_state = -HANG_LEN;
1119 0 : move16();
1120 : }
1121 108 : ELSE IF( hSpMusClas->sp_mus_state > 0 )
1122 : {
1123 :
1124 0 : hSpMusClas->sp_mus_state = -1;
1125 0 : move16();
1126 : }
1127 108 : ELSE IF( GT_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1128 : {
1129 : /* we are in inactive state */
1130 63 : hSpMusClas->sp_mus_state = sub( hSpMusClas->sp_mus_state, 1 );
1131 : }
1132 : }
1133 :
1134 : /*------------------------------------------------------------------*
1135 : * Decision without hangover
1136 : * Weighted decision
1137 : *------------------------------------------------------------------*/
1138 :
1139 : /* decision without hangover (0 - speech/noise, 1 - music) */
1140 3100 : logic16();
1141 3100 : dec = sub( lpm, lps ) > 0;
1142 3100 : move16();
1143 3100 : dlp = sub( lpm, lps ); /*Q9*/
1144 :
1145 3100 : IF( !vad )
1146 : {
1147 108 : dec = 0;
1148 108 : move16();
1149 108 : dlp = 0;
1150 108 : move16();
1151 : }
1152 :
1153 : /* calculate weight based on relE (close to 0.01 in low-E regions, close to 1 in high-E regions) */
1154 : /*wrelE = 1.0f + relE/15;*/
1155 3100 : wrelE = add( 2048, mult_r( relE, 17476 ) ); /* 1/15 in Q18 -> 17476 result in Q11 */
1156 :
1157 :
1158 3100 : wrelE = s_min( wrelE, 2048 );
1159 3100 : wrelE = s_max( wrelE, 20 );
1160 :
1161 : /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */
1162 3100 : test();
1163 3100 : IF( dlp < 0 && LT_16( dlp, hSpMusClas->past_dlp_fx[0] ) )
1164 : {
1165 899 : IF( hSpMusClas->past_dlp_fx[0] > 0 )
1166 : {
1167 279 : hSpMusClas->wdrop_fx = negate( dlp ); /*Q9*/
1168 : }
1169 : ELSE
1170 : {
1171 620 : hSpMusClas->wdrop_fx = add( hSpMusClas->wdrop_fx, sub( hSpMusClas->past_dlp_fx[0], dlp ) ); /*Q9*/
1172 : }
1173 : }
1174 : ELSE
1175 : {
1176 2201 : hSpMusClas->wdrop_fx = 0;
1177 2201 : move16();
1178 : }
1179 :
1180 : /*wdrop = st->wdrop/20;*/
1181 3100 : wdrop = mult_r( hSpMusClas->wdrop_fx, 26214 ); /*Q9*Q19->Q13*/
1182 3100 : wdrop = s_min( wdrop, 8192 ); /* limitation [0.1,1] Q13 */
1183 3100 : wdrop = s_max( wdrop, 819 );
1184 :
1185 : /* combine weights into one */
1186 : /*wght = wrelE * wdrop;*/
1187 3100 : wght = mult_r( wrelE, wdrop ); /* Q11*Q13 -> Q9*/
1188 3100 : wght = s_max( wght, 5 );
1189 :
1190 : /* calculate weighted decision */
1191 : /*st->wdlp_0_95_sp = wght * dlp + (1 - wght) * st->wdlp_0_95_sp;*/
1192 : /* = Q9 * Q9 + (Q9-Q9)*Q9 */
1193 3100 : L_tmp = L_mac( L_mult( wght, dlp ), sub( 512, wght ), hSpMusClas->wdlp_0_95_sp_fx );
1194 3100 : hSpMusClas->wdlp_0_95_sp_fx = round_fx( L_shl( L_tmp, 6 ) );
1195 :
1196 3100 : if ( EQ_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1197 : {
1198 228 : hSpMusClas->wdlp_0_95_sp_fx = 0;
1199 228 : move16();
1200 : }
1201 :
1202 : /*------------------------------------------------------------------*
1203 : * Final speech/music decision
1204 : *------------------------------------------------------------------*/
1205 :
1206 3100 : test();
1207 3100 : test();
1208 3100 : IF( !vad && EQ_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1209 : {
1210 : /* inactive state */
1211 49 : dec = 0;
1212 49 : move16();
1213 : }
1214 3051 : ELSE IF( hSpMusClas->sp_mus_state <= 0 )
1215 : {
1216 : /* transition from active to inactive state or instable state */
1217 429 : dec = hSpMusClas->past_dec[0];
1218 429 : move16();
1219 : }
1220 2622 : ELSE IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1221 : {
1222 : /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */
1223 138 : L_tmp = L_mult( w_spmus_fx[hSpMusClas->sp_mus_state - 1][0], dlp ); /*Q15*Q9 */
1224 :
1225 : /*ftmp += dotp( &w[st_fx->sp_mus_state-1][1], st_fx->past_dlp_fx, HANG_LEN-1 );*/
1226 138 : L_tmp = L_add( L_tmp, Dot_product( &w_spmus_fx[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp_fx, HANG_LEN - 1 ) );
1227 138 : logic16();
1228 138 : move16();
1229 :
1230 : /*dec = ftmp > 2.0f;*/
1231 138 : dec = L_sub( L_tmp, 2 * ( 1 << 25 ) ) > 0;
1232 : }
1233 : ELSE
1234 : {
1235 : /* stable active state */
1236 2484 : test();
1237 2484 : test();
1238 2484 : test();
1239 2484 : test();
1240 2484 : IF( hSpMusClas->wdlp_0_95_sp_fx > 0 && hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 )
1241 : {
1242 : /* switching from speech to music */
1243 18 : dec = 1;
1244 18 : move16();
1245 : }
1246 2466 : ELSE IF( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp_fx < 0 )
1247 : {
1248 : /* switching from music to speech */
1249 18 : dec = 0;
1250 18 : move16();
1251 : }
1252 : ELSE
1253 : {
1254 2448 : dec = hSpMusClas->past_dec[0];
1255 2448 : move16();
1256 : }
1257 : }
1258 :
1259 :
1260 : /*------------------------------------------------------------------*
1261 : * Updates
1262 : *------------------------------------------------------------------*/
1263 :
1264 : /* update the buffer of past non-binary decisions */
1265 3100 : Copy( &hSpMusClas->past_dlp_fx[0], &hSpMusClas->past_dlp_fx[1], HANG_LEN - 2 );
1266 3100 : hSpMusClas->past_dlp_fx[0] = dlp;
1267 3100 : move16();
1268 :
1269 : /* update the buffer of past binary decisions */
1270 3100 : Copy( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 );
1271 3100 : hSpMusClas->past_dec[0] = dec;
1272 3100 : move16();
1273 :
1274 3100 : return dec;
1275 : }
1276 :
1277 :
1278 : /*---------------------------------------------------------------------*
1279 : * sp_mus_classif_2nd_fx()
1280 : *
1281 : * 2nd stage speech/music classifier (convert music to speech for onsets)
1282 : *---------------------------------------------------------------------*/
1283 :
1284 2050 : static void sp_mus_classif_2nd_fx(
1285 : Encoder_State *st, /* i/o: Encoder state structure */
1286 : const Word16 Etot, /* i : total frame energy */
1287 : Word16 *attack_flag, /* i/o: attack flag (GSC or TC) */
1288 : const Word16 *inp, /* i : input signal */
1289 : const Word16 Qx )
1290 : {
1291 : Word16 attack;
1292 2050 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
1293 :
1294 : /* initialization */
1295 2050 : *attack_flag = 0;
1296 2050 : move16();
1297 :
1298 : /* signal stability estimation */
1299 2050 : stab_est_fx( Etot, hSpMusClas->gsc_lt_diff_etot_fx, &hSpMusClas->gsc_mem_etot_fx, &hSpMusClas->gsc_nb_thr_3, &hSpMusClas->gsc_nb_thr_1, hSpMusClas->gsc_thres_fx, &hSpMusClas->gsc_last_music_flag, st->vad_flag );
1300 :
1301 : /* calculate variance of correlation */
1302 2050 : var_cor_calc_fx( st->old_corr_fx, &hSpMusClas->mold_corr_fx, hSpMusClas->var_cor_t_fx, &hSpMusClas->high_stable_cor );
1303 :
1304 : /* attack detection */
1305 2050 : attack = attack_det_fx( inp, Qx, st->clas, st->localVAD, st->coder_type, st->total_brate );
1306 :
1307 2050 : test();
1308 2050 : test();
1309 2050 : test();
1310 2050 : test();
1311 2050 : test();
1312 2050 : test();
1313 2050 : IF( EQ_16( st->sp_aud_decision1, 1 ) )
1314 : {
1315 665 : test();
1316 665 : test();
1317 665 : test();
1318 665 : IF( LT_16( hSpMusClas->ener_RAT_fx, 5898 ) && GT_16( hSpMusClas->lt_dec_thres_fx, 7680 ) )
1319 : {
1320 0 : st->sp_aud_decision2 = 0;
1321 0 : move16();
1322 : }
1323 665 : ELSE IF( EQ_16( hSpMusClas->high_stable_cor, 1 ) && GE_16( st->pitch[0], 130 ) )
1324 : {
1325 : /* prevent GSC in highly correlated signal with low energy variation */
1326 : /* this is basically a patch against bassoon-type of music */
1327 0 : st->sp_aud_decision2 = 0;
1328 0 : move16();
1329 :
1330 0 : test();
1331 0 : if ( EQ_16( st->codec_mode, MODE1 ) && EQ_16( st->coder_type, TRANSITION ) )
1332 : {
1333 0 : st->coder_type = GENERIC;
1334 0 : move16();
1335 : }
1336 : }
1337 665 : ELSE IF( GT_16( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], 1152 ) &&
1338 : GT_16( sub( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 2] ), 2560 ) ) /* 10.0f in Q8 */
1339 : {
1340 23 : IF( EQ_16( st->tc_cnt, 1 ) )
1341 : {
1342 0 : st->sp_aud_decision2 = 0;
1343 0 : move16();
1344 :
1345 0 : if ( EQ_16( st->codec_mode, MODE1 ) )
1346 : {
1347 0 : st->coder_type = TRANSITION;
1348 0 : move16();
1349 : }
1350 : }
1351 : ELSE
1352 : {
1353 23 : IF( GE_16( attack, ATT_3LSUB_POS ) )
1354 : {
1355 : /* do TC coding if attack is located in the last subframe */
1356 6 : st->sp_aud_decision2 = 0;
1357 6 : move16();
1358 6 : *attack_flag = add( attack, 1 );
1359 6 : move16();
1360 6 : if ( EQ_16( st->codec_mode, MODE1 ) )
1361 : {
1362 6 : st->coder_type = TRANSITION;
1363 6 : move16();
1364 : }
1365 : }
1366 17 : ELSE IF( GE_16( attack, ATT_SEG_LEN >> 1 ) )
1367 : {
1368 : /* do GSC coding if attack is located after the first quarter of the first subframe */
1369 : /* (pre-echo will be treated at the decoder side) */
1370 0 : st->sp_aud_decision2 = 1;
1371 0 : move16();
1372 0 : *attack_flag = 31;
1373 0 : move16();
1374 : }
1375 : }
1376 : }
1377 : }
1378 1385 : ELSE IF( EQ_16( st->localVAD, 1 ) && EQ_16( st->coder_type, GENERIC ) &&
1379 : ( ( GE_16( attack, ATT_3LSUB_POS ) && LT_32( st->total_brate, ACELP_24k40 ) ) ||
1380 : ( GE_16( attack, ATT_3LSUB_POS_16k ) && GE_32( st->total_brate, ACELP_24k40 ) && LT_32( st->total_brate, ACELP_48k ) ) ) )
1381 : {
1382 : /* do TC coding if attack is located in the last subframe */
1383 19 : *attack_flag = add( attack, 1 );
1384 19 : move16();
1385 19 : if ( EQ_16( st->codec_mode, MODE1 ) )
1386 : {
1387 19 : st->coder_type = TRANSITION;
1388 19 : move16();
1389 : }
1390 : }
1391 :
1392 2050 : return;
1393 : }
1394 :
1395 :
1396 : /*---------------------------------------------------------------------*
1397 : * var_cor_calc_fx()
1398 : *
1399 : * Calculate variance of correlation
1400 : *---------------------------------------------------------------------*/
1401 :
1402 2050 : static void var_cor_calc_fx(
1403 : const Word16 old_corr,
1404 : Word16 *mold_corr,
1405 : Word16 var_cor_t[],
1406 : Word16 *high_stable_cor )
1407 : {
1408 : Word16 i, var_cor;
1409 :
1410 : /* update buffer of old correlation values */
1411 20500 : FOR( i = VAR_COR_LEN - 1; i > 0; i-- )
1412 : {
1413 18450 : var_cor_t[i] = var_cor_t[i - 1]; /*Q11*/
1414 18450 : move16();
1415 : }
1416 2050 : var_cor_t[i] = old_corr;
1417 2050 : move16();
1418 :
1419 : /* calculate variance of correlation */
1420 2050 : var_cor = var_fx( var_cor_t, 11, VAR_COR_LEN );
1421 :
1422 2050 : *high_stable_cor = 0;
1423 2050 : move16();
1424 2050 : test();
1425 2050 : if ( GT_16( *mold_corr, 26214 ) && LT_16( var_cor, 2 ) )
1426 : {
1427 0 : *high_stable_cor = 1;
1428 0 : move16();
1429 : }
1430 :
1431 : /* update average correlation */
1432 : /*st->mold_corr = 0.1f * st->old_corr + 0.9f * st->mold_corr;*/
1433 2050 : *mold_corr = mac_r( L_mult( 3277, old_corr ), 29491, *mold_corr ); /*Q15 */
1434 :
1435 2050 : return;
1436 : }
1437 :
1438 : /*---------------------------------------------------------------------*
1439 : * attack_det_fx()
1440 : *
1441 : * Attack detection
1442 : *---------------------------------------------------------------------*/
1443 :
1444 2050 : static Word16 attack_det_fx( /* o : attack flag */
1445 : const Word16 *inp, /* i : input signal */
1446 : const Word16 Qx,
1447 : const Word16 last_clas, /* i : last signal clas */
1448 : const Word16 localVAD, /* i : local VAD flag */
1449 : const Word16 coder_type, /* i : coder type */
1450 : const Word32 total_brate /* i : total bitrate */
1451 : )
1452 : {
1453 : Word16 i, j, tmp, tmp1, attack, exp1;
1454 : Word32 L_tmp, etmp, etmp2, finc[ATT_NSEG];
1455 : Word16 att_3lsub_pos;
1456 :
1457 2050 : att_3lsub_pos = ATT_3LSUB_POS;
1458 2050 : move16();
1459 2050 : if ( GE_32( total_brate, ACELP_24k40 ) )
1460 : {
1461 1000 : att_3lsub_pos = ATT_3LSUB_POS_16k;
1462 1000 : move16();
1463 : }
1464 :
1465 : /* compute energy per section */
1466 67650 : FOR( i = 0; i < ATT_NSEG; i++ )
1467 : {
1468 65600 : L_tmp = L_mult0( inp[i * ATT_SEG_LEN], inp[i * ATT_SEG_LEN] ); /*2*Qx */
1469 :
1470 524800 : FOR( j = 1; j < ATT_SEG_LEN; j++ )
1471 : {
1472 459200 : L_tmp = L_mac0_sat( L_tmp, inp[i * ATT_SEG_LEN + j], inp[i * ATT_SEG_LEN + j] ); /*2*Qx */
1473 : }
1474 :
1475 65600 : finc[i] = L_tmp;
1476 65600 : move32();
1477 : }
1478 :
1479 2050 : attack = maximum_32_fx( finc, ATT_NSEG, &etmp );
1480 2050 : move16();
1481 2050 : test();
1482 2050 : IF( EQ_16( localVAD, 1 ) && EQ_16( coder_type, GENERIC ) )
1483 : {
1484 : /*----------------------------------------------------------------------*
1485 : * Detect if there is a strong onset in the last subframe
1486 : * - if detected, TC is used to better code the onset
1487 : *----------------------------------------------------------------------*/
1488 :
1489 : /* compute mean energy in the first three subframes */
1490 1548 : exp1 = norm_s( att_3lsub_pos );
1491 1548 : tmp = div_s( shl( 1, sub( 14, exp1 ) ), att_3lsub_pos ); /*Q(29-exp1) */
1492 :
1493 1548 : L_tmp = L_shr_sat( finc[0], Qx ); /*Qx */
1494 :
1495 39046 : FOR( i = 1; i < att_3lsub_pos; i++ )
1496 : {
1497 37498 : L_tmp = L_add_sat( L_tmp, L_shr_sat( finc[i], Qx ) ); /*Qx */
1498 : }
1499 1548 : L_tmp = Mult_32_16( L_tmp, tmp ); /*Q(14-exp1+Qx) */
1500 1548 : etmp = L_shl( L_tmp, sub( exp1, 14 ) ); /*Qx */
1501 :
1502 1548 : tmp1 = sub( ATT_NSEG, attack );
1503 1548 : exp1 = norm_s( tmp1 );
1504 1548 : tmp = div_s( shl( 1, sub( 14, exp1 ) ), tmp1 ); /*Q(29-exp1) */
1505 :
1506 1548 : L_tmp = L_shr_sat( finc[attack], Qx ); /*Qx */
1507 27151 : FOR( i = 1; i < tmp1; i++ )
1508 : {
1509 25603 : L_tmp = L_add_sat( L_tmp, L_shr_sat( finc[i + attack], Qx ) ); /*Qx */
1510 : }
1511 1548 : L_tmp = Mult_32_16( L_tmp, tmp ); /*Q(14-exp1+Qx) */
1512 1548 : etmp2 = L_shl( L_tmp, sub( exp1, 14 ) ); /*Qx */
1513 :
1514 : /* and compare them */
1515 1548 : if ( GT_32( etmp, L_shr( etmp2, 3 ) ) )
1516 : {
1517 : /* stop, if the attack is not sufficiently strong */
1518 1496 : attack = 0;
1519 1496 : move16();
1520 : }
1521 :
1522 1548 : test();
1523 1548 : if ( EQ_16( last_clas, VOICED_CLAS ) && GT_32( L_add( L_shl( etmp, 4 ), L_shl( etmp, 2 ) ), etmp2 ) )
1524 : {
1525 : /* stop, if the signal was voiced and the attack is not sufficiently strong */
1526 553 : attack = 0;
1527 553 : move16();
1528 : }
1529 :
1530 : /* compare also wrt. other sections (reduces a misclassification) */
1531 1548 : IF( attack > 0 )
1532 : {
1533 50 : etmp2 = L_add( finc[attack], 0 );
1534 50 : etmp = Mult_32_16( etmp2, 16384 ); /* etmp2 / 2.0 = (etmp2*0.5) */
1535 1045 : FOR( i = 2; i < ATT_3LSUB_POS - 2; i++ )
1536 : {
1537 998 : IF( GT_32( finc[i], etmp ) )
1538 : {
1539 3 : attack = 0;
1540 3 : move16();
1541 3 : BREAK;
1542 : }
1543 : }
1544 : }
1545 : }
1546 502 : ELSE IF( attack > 0 )
1547 : {
1548 477 : etmp2 = L_add( finc[attack], 0 );
1549 477 : etmp = Mult_32_16( etmp2, 25206 ); /* etmp2 / 1.3 = (etmp2*0.76923) */
1550 5688 : FOR( i = 2; i < att_3lsub_pos - 2; i++ )
1551 : {
1552 : /*if( i != attack && finc[i] * 1.3f > etmp2 ) -> finc[i] > (etmp2*0.76923) */
1553 5539 : test();
1554 5539 : IF( NE_16( i, attack ) && GT_32( finc[i], etmp ) )
1555 : {
1556 328 : attack = 0;
1557 328 : move16();
1558 328 : BREAK;
1559 : }
1560 : }
1561 : }
1562 :
1563 2050 : return attack;
1564 : }
1565 :
1566 : /* -------------------------------------------------------------------- - *
1567 : *ivas_smc_gmm()
1568 : *
1569 : *1st stage of the speech / music classification(based on the GMM model)
1570 : * -------------------------------------------------------------------- - */
1571 : /*! r: S/M decision (0=speech or noise,1=unclear,2=music) */
1572 1154754 : Word16 ivas_smc_gmm_fx(
1573 : Encoder_State *st, /* i/o: state structure */
1574 : STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */
1575 : const Word16 localVAD_HE_SAD, /* i : HE-SAD flag without hangover */
1576 : const Word16 Etot_fx, /* i : total frame energy */
1577 : const Word16 lsp_new_fx[M], /* i : LSPs in current frame Q15 */
1578 : const Word16 cor_map_sum_fx, /* i : correlation map sum (from multi-harmonic anal.) Q8 */
1579 : const Word32 epsP_fx[M + 1], /* i : LP prediciton error */
1580 : const Word32 PS_fx[], /* i : energy spectrum */
1581 : const Word32 non_sta_fx, /* i : unbound non-stationarity Q20 */
1582 : const Word16 relE_fx, /* i : relative frame energy Q8 */
1583 : Word16 *high_lpn_flag, /* i/o: sp/mus LPN flag */
1584 : const Word16 flag_spitch, /* i : flag to indicate very short stable pitch */
1585 : Word16 Qfact_PS,
1586 : Word16 Q_esp,
1587 : Word16 Qfact_PS_past )
1588 : {
1589 : Word16 i, m, dec;
1590 : Word16 flag_odv;
1591 : Word32 lps_fx, lpm_fx, lpn_fx;
1592 : Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES];
1593 : Word64 wprob_fx;
1594 : Word32 fvm_fx[N_PCA_COEF];
1595 : Word32 sum_PS_fx, ps_diff_fx;
1596 : Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx;
1597 : Word32 wrise_fx;
1598 : Word16 dlp_mean2var_fx;
1599 : Word16 dlp_mean2var_q;
1600 : Word32 FV_fx[N_SMC_FEATURES], *pFV_fx;
1601 : Word32 dPS_fx[128];
1602 : Word32 PS_norm_fx[128];
1603 : const Word32 *pODV_fx;
1604 : Word32 *pFV_st_fx;
1605 : Word16 relE_attack_flag, smc_st_mean_fact_fx;
1606 : Word16 j, len;
1607 : const Word32 *pt_mel_fb_fx;
1608 : Word32 melS_fx[NB_MEL_BANDS], mfcc_fx[NB_MEL_BANDS];
1609 : Word16 odv_cnt;
1610 : Word16 i_out[N_SMC_FEATURES], *p_out;
1611 : Word16 temp_exp;
1612 : Word16 Qfact_FV;
1613 : Word32 temp32, temp32_log;
1614 : Word32 temp32_log1, temp32_log2;
1615 : Word16 temp16;
1616 1154754 : Word16 dotp_exp = 0;
1617 1154754 : move16();
1618 : /*------------------------------------------------------------------*
1619 : * Initialization
1620 : *------------------------------------------------------------------*/
1621 :
1622 1154754 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
1623 : Word32 temp_sqrt, temp_acos;
1624 : /*------------------------------------------------------------------*
1625 : * State machine (sp_mus_state: -8 = INACTIVE, -7:-1 = UNSTABLE, 0:7 = ENTRY, 8 = STABLE )
1626 : *------------------------------------------------------------------*/
1627 :
1628 1154754 : IF( localVAD_HE_SAD )
1629 : {
1630 978483 : test();
1631 978483 : IF( LT_16( relE_fx, -5120 /*20 q8*/ ) )
1632 : {
1633 101830 : IF( hSpMusClas->sp_mus_state > 0 )
1634 : {
1635 10687 : if ( LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1636 : {
1637 : /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */
1638 2344 : hSpMusClas->inact_cnt = 0;
1639 2344 : move16();
1640 : }
1641 :
1642 : /* energy is too low -> we are going to instable state */
1643 10687 : hSpMusClas->sp_mus_state = 0;
1644 10687 : move16();
1645 : }
1646 91143 : ELSE IF( GT_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1647 : {
1648 : /* energy is still too low -> we are still in instable state */
1649 29615 : hSpMusClas->sp_mus_state = sub( hSpMusClas->sp_mus_state, 1 );
1650 : }
1651 : }
1652 876653 : ELSE IF( hSpMusClas->sp_mus_state <= 0 )
1653 : {
1654 21943 : IF( hSpMusClas->inact_cnt == 0 )
1655 : {
1656 :
1657 13736 : hSpMusClas->sp_mus_state = 1;
1658 13736 : move16();
1659 : }
1660 : ELSE
1661 : {
1662 :
1663 8207 : hSpMusClas->sp_mus_state = HANG_LEN;
1664 8207 : move16();
1665 : }
1666 :
1667 21943 : hSpMusClas->inact_cnt = 12;
1668 21943 : move16();
1669 : }
1670 854710 : ELSE IF( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
1671 : {
1672 : /* we are inside an entry period -> increment the counter of entry frames */
1673 65557 : hSpMusClas->sp_mus_state = add( hSpMusClas->sp_mus_state, 1 );
1674 : }
1675 :
1676 978483 : test();
1677 978483 : IF( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 )
1678 : {
1679 30540 : hSpMusClas->inact_cnt = sub( hSpMusClas->inact_cnt, 1 );
1680 30540 : move16();
1681 : }
1682 : }
1683 : ELSE
1684 : {
1685 176271 : test();
1686 176271 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1687 : {
1688 961 : hSpMusClas->inact_cnt = 0;
1689 961 : move16();
1690 : }
1691 175310 : ELSE IF( hSpMusClas->inact_cnt > 0 )
1692 : {
1693 23594 : hSpMusClas->inact_cnt = sub( hSpMusClas->inact_cnt, 1 );
1694 : }
1695 :
1696 176271 : test();
1697 176271 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1698 : {
1699 961 : hSpMusClas->sp_mus_state = -HANG_LEN;
1700 961 : move16();
1701 : }
1702 175310 : ELSE IF( hSpMusClas->sp_mus_state > 0 )
1703 : {
1704 3413 : hSpMusClas->sp_mus_state = -1;
1705 3413 : move16();
1706 : }
1707 171897 : ELSE IF( GT_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1708 : {
1709 : /* we are in inactive state */
1710 15712 : hSpMusClas->sp_mus_state = sub( hSpMusClas->sp_mus_state, 1 );
1711 : }
1712 : }
1713 :
1714 : /* detect attacks based on relE */
1715 1154754 : IF( GT_16( relE_fx, hSpMusClas->prev_relE_fx ) )
1716 : {
1717 488961 : hSpMusClas->relE_attack_sum_fx = add_sat( sub_sat( relE_fx, hSpMusClas->prev_relE_fx ), hSpMusClas->relE_attack_sum_fx ); /*q8*/
1718 488961 : move16();
1719 : }
1720 : ELSE
1721 : {
1722 665793 : hSpMusClas->relE_attack_sum_fx = 0; /*q8*/
1723 665793 : move16();
1724 : }
1725 1154754 : hSpMusClas->prev_relE_fx = relE_fx;
1726 1154754 : move16();
1727 1154754 : test();
1728 1154754 : test();
1729 1154754 : test();
1730 : /* update counter from last VAD 0->1 change */
1731 1154754 : IF( hSpMusClas->prev_vad == 0 && EQ_16( localVAD_HE_SAD, 1 ) )
1732 : {
1733 15644 : hSpMusClas->vad_0_1_cnt = 1;
1734 15644 : move16();
1735 : }
1736 1139110 : ELSE IF( EQ_16( localVAD_HE_SAD, 1 ) && hSpMusClas->vad_0_1_cnt > 0 && LT_16( hSpMusClas->vad_0_1_cnt, 50 ) )
1737 : {
1738 249319 : hSpMusClas->vad_0_1_cnt = add( hSpMusClas->vad_0_1_cnt, 1 );
1739 : }
1740 : ELSE
1741 : {
1742 889791 : hSpMusClas->vad_0_1_cnt = 0;
1743 889791 : move16();
1744 : }
1745 1154754 : hSpMusClas->prev_vad = localVAD_HE_SAD;
1746 1154754 : move16();
1747 1154754 : test();
1748 1154754 : test();
1749 1154754 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) && GT_16( hSpMusClas->relE_attack_sum_fx, 1280 /*q8*/ ) )
1750 : {
1751 23207 : hSpMusClas->relE_attack_cnt = add( hSpMusClas->relE_attack_cnt, 1 );
1752 :
1753 : /* set flag only in the first X frames in a series */
1754 23207 : IF( hSpMusClas->relE_attack_cnt > 0 && LT_16( hSpMusClas->relE_attack_cnt, 3 ) )
1755 : {
1756 16672 : relE_attack_flag = 1;
1757 : }
1758 : ELSE
1759 : {
1760 6535 : relE_attack_flag = 0;
1761 : }
1762 23207 : move16();
1763 : }
1764 : ELSE
1765 : {
1766 1131547 : hSpMusClas->relE_attack_cnt = 0;
1767 1131547 : move16();
1768 1131547 : relE_attack_flag = 0;
1769 1131547 : move16();
1770 : }
1771 :
1772 1154754 : hSpMusClas->prev_Etot_fx = Etot_fx;
1773 1154754 : move16();
1774 :
1775 : /*------------------------------------------------------------------*
1776 : * Preparation of the feature vector
1777 : *------------------------------------------------------------------*/
1778 :
1779 1154754 : pFV_fx = FV_fx;
1780 1154754 : test();
1781 1154754 : test();
1782 : /* [0] OL pitch */
1783 1154754 : IF( relE_attack_flag || EQ_16( st->tc_cnt, 1 ) || EQ_16( st->tc_cnt, 2 ) )
1784 : {
1785 115846 : *pFV_fx++ = L_shl( st->pitch[2], Q20 );
1786 : }
1787 : ELSE
1788 : {
1789 : // *pFV_fx++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f;
1790 1038908 : *pFV_fx++ = Mpy_32_32( L_shl( add( add( st->pitch[0], st->pitch[1] ), st->pitch[2] ), Q20 ), 715827883 );
1791 : }
1792 1154754 : move32();
1793 :
1794 1154754 : test();
1795 1154754 : test();
1796 : /* [1] voicing */
1797 1154754 : IF( relE_attack_flag || EQ_16( st->tc_cnt, 1 ) || EQ_16( st->tc_cnt, 2 ) )
1798 : {
1799 115846 : *pFV_fx++ = L_shl( st->voicing_fx[2], 5 ); /*q20*/
1800 : }
1801 : ELSE
1802 : {
1803 : // *pFV++ = ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f;
1804 1038908 : *pFV_fx++ = Mpy_32_32( L_shl( L_add( L_add( st->voicing_fx[0], st->voicing_fx[1] ), st->voicing_fx[2] ), Q5 ), 715827883 ); /*q20*/
1805 : }
1806 1154754 : move32();
1807 :
1808 1154754 : temp_exp = 1;
1809 1154754 : move16();
1810 1154754 : temp16 = lsp_new_fx[2];
1811 1154754 : move16();
1812 :
1813 1154754 : temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30
1814 1154754 : temp_sqrt = Sqrt32( temp32, &temp_exp );
1815 1154754 : temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp );
1816 1154754 : *pFV_fx++ = L_shl( temp_acos, Q7 ); // Q20
1817 1154754 : move32();
1818 1154754 : temp_exp = 1;
1819 1154754 : move16();
1820 1154754 : temp16 = lsp_new_fx[3];
1821 1154754 : move16();
1822 :
1823 1154754 : temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30
1824 1154754 : temp_sqrt = Sqrt32( temp32, &temp_exp );
1825 1154754 : temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp );
1826 1154754 : *pFV_fx++ = L_shl( temp_acos, Q7 ); // Q20
1827 1154754 : move32();
1828 1154754 : temp_exp = 1;
1829 1154754 : move16();
1830 1154754 : temp16 = lsp_new_fx[4];
1831 1154754 : move16();
1832 :
1833 1154754 : temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30
1834 1154754 : temp_sqrt = Sqrt32( temp32, &temp_exp );
1835 1154754 : temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp );
1836 1154754 : *pFV_fx++ = L_shl( temp_acos, Q7 ); // Q20
1837 1154754 : move32();
1838 1154754 : temp_exp = 1;
1839 1154754 : move16();
1840 1154754 : temp16 = lsp_new_fx[5];
1841 1154754 : move16();
1842 :
1843 1154754 : temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30
1844 1154754 : temp_sqrt = Sqrt32( temp32, &temp_exp );
1845 1154754 : temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp );
1846 1154754 : *pFV_fx++ = L_shl( temp_acos, Q7 ); // Q20
1847 1154754 : move32();
1848 1154754 : temp_exp = 1;
1849 1154754 : move16();
1850 1154754 : temp16 = lsp_new_fx[6];
1851 1154754 : move16();
1852 :
1853 :
1854 1154754 : temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30
1855 1154754 : temp_sqrt = Sqrt32( temp32, &temp_exp );
1856 1154754 : temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp );
1857 1154754 : *pFV_fx++ = L_shl( temp_acos, Q7 ); // Q20
1858 1154754 : move32();
1859 : // temf = acosf( lsp_new[2] );
1860 : /* [2,3,4,5,6] LSFs */
1861 : /* *pFV++ = acosf( lsp_new[2] );
1862 : *pFV++ = acosf( lsp_new[3] );
1863 : *pFV++ = acosf( lsp_new[4] );
1864 : *pFV++ = acosf( lsp_new[5] );
1865 : *pFV++ = acosf( lsp_new[6] );*/
1866 :
1867 : /* [7] cor_map_sum */
1868 1154754 : *pFV_fx++ = L_shl( cor_map_sum_fx, Q12 ); /*scaling from Q8 to Q20*/
1869 1154754 : move32();
1870 :
1871 : /* [8] non_sta */
1872 1154754 : *pFV_fx++ = non_sta_fx; /*Q20*/
1873 1154754 : move32();
1874 :
1875 : /* [9] epsP */
1876 1154754 : temp32 = L_add( epsP_fx[14], L_shr( 21475, sub( 31, Q_esp ) ) );
1877 1154754 : temp32_log = L_add( BASOP_Util_Log2( temp32 ), L_shl( sub( Q31, Q_esp ), Q25 ) );
1878 1154754 : temp32_log1 = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/
1879 :
1880 1154754 : temp32 = L_add( epsP_fx[0], L_shr( 21475, sub( 31, Q_esp ) ) );
1881 1154754 : temp32_log = L_add( BASOP_Util_Log2( temp32 ), L_shl( sub( Q31, Q_esp ), Q25 ) );
1882 1154754 : temp32_log2 = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/
1883 :
1884 1154754 : *pFV_fx++ = L_shr( L_sub( temp32_log1, temp32_log2 ), Q5 );
1885 1154754 : move32();
1886 : //*pFV++ = logf( epsP[14] + 1e-5f ) - logf( epsP[0] + 1e-5f );
1887 :
1888 : /* [10,11,12] MFCCs */
1889 1154754 : set_zero_fx( melS_fx, NB_MEL_BANDS );
1890 :
1891 1154754 : pt_mel_fb_fx = mel_fb_fx;
1892 :
1893 47344914 : FOR( i = 0; i < NB_MEL_BANDS; i++ )
1894 : {
1895 46190160 : j = mel_fb_start[i];
1896 46190160 : move16();
1897 46190160 : len = mel_fb_len[i];
1898 46190160 : move16();
1899 46190160 : temp32 = dotp_me_fx( &PS_fx[j], pt_mel_fb_fx, len, 31 - Qfact_PS, Q1, &dotp_exp );
1900 46190160 : IF( LT_16( dotp_exp, -17 ) ) /*-18 is exponent of 10737:to avoid overflow when left shifting 10737*/
1901 : {
1902 2382 : temp32 = L_shr( temp32, sub( -17, dotp_exp ) );
1903 2382 : dotp_exp = -17;
1904 2382 : move16();
1905 : }
1906 46190160 : temp32_log = L_add_sat( BASOP_Util_Log2( L_add_sat( L_shr( temp32, 1 ), L_shr( 10737 /*1e-5f q30*/, dotp_exp ) ) ), L_shl( add( dotp_exp, 1 ), Q25 ) );
1907 46190160 : temp32_log = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/
1908 46190160 : melS_fx[i] = temp32_log;
1909 46190160 : move32();
1910 : // melS[i] = logf( dotp( &PS[j], pt_mel_fb, len ) + 1e-5f );
1911 46190160 : pt_mel_fb_fx += len;
1912 : }
1913 :
1914 1154754 : Word16 guard_bits = find_guarded_bits_fx( NB_MEL_BANDS );
1915 1154754 : move16();
1916 1154754 : v_mult_mat_fx( mfcc_fx, melS_fx, dct_mtx_fx, NB_MEL_BANDS, NB_MEL_COEF, guard_bits ); // Q19
1917 1154754 : *pFV_fx++ = L_shl( mfcc_fx[2], 1 ); // Q20
1918 1154754 : move32();
1919 1154754 : *pFV_fx++ = L_shl( mfcc_fx[6], 1 );
1920 1154754 : move32();
1921 1154754 : *pFV_fx++ = L_shl( mfcc_fx[12], 1 );
1922 1154754 : move32();
1923 : /* *pFV++ = mfcc[2];
1924 : *pFV++ = mfcc[6];
1925 : *pFV++ = mfcc[12];*/
1926 :
1927 : /* calculation of differential normalized power spectrum */
1928 1154754 : sum_PS_fx = 0;
1929 1154754 : move32();
1930 : Word16 q_temp32;
1931 1154754 : Word16 sum_PS_e = 0;
1932 1154754 : move16();
1933 1154754 : Word64 sum = W_shl( 21475 /* 1e-5 in Q31 */, sub( Qfact_PS, 30 ) ); // Qfact_PS+1
1934 1154754 : move64();
1935 78523272 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
1936 : {
1937 77368518 : sum = W_mac_32_32( sum, PS_fx[i], 1 ); // Qfact_PS+1
1938 : }
1939 1154754 : IF( sum == 0 )
1940 : {
1941 0 : sum_PS_fx = 1407374884; // 1e-5 in Q47
1942 0 : move32();
1943 0 : sum_PS_e = -16;
1944 0 : move16();
1945 : }
1946 : ELSE
1947 : {
1948 1154754 : sum_PS_e = W_norm( sum );
1949 1154754 : sum_PS_fx = W_extract_h( W_shl( sum, sum_PS_e ) ); // Qfact_PS+1+sum_PS_e-32
1950 1154754 : sum_PS_e = sub( 62, add( Qfact_PS, sum_PS_e ) ); // 31-(Qfact_PS+1+sum_PS_e-32)
1951 : }
1952 :
1953 78523272 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
1954 : {
1955 77368518 : temp32 = BASOP_Util_Divide3232_Scale_newton( PS_fx[i], sum_PS_fx, &temp_exp ); // 31-temp_exp
1956 77368518 : q_temp32 = add( sub( 31, temp_exp ), sub( Qfact_PS, sub( 31, sum_PS_e ) ) );
1957 77368518 : test();
1958 77368518 : if ( temp32 == 0 )
1959 : {
1960 283 : q_temp32 = 31;
1961 283 : move16();
1962 : }
1963 77368518 : IF( LT_16( q_temp32, 31 ) && EQ_32( temp32, L_shl( 1, q_temp32 ) ) )
1964 : {
1965 0 : temp32 = ONE_IN_Q31;
1966 0 : move32();
1967 0 : q_temp32 = Q31;
1968 0 : move16();
1969 : }
1970 77368518 : PS_norm_fx[i] = L_shl( temp32, sub( Qfact_PS_past, q_temp32 ) ); // Qfact_PS_past
1971 77368518 : move32();
1972 77368518 : dPS_fx[i] = L_abs( L_sub( PS_norm_fx[i], hSpMusClas->past_PS_fx[i - LOWEST_FBIN] ) );
1973 77368518 : move32();
1974 : }
1975 :
1976 : /* [13] ps_diff (spectral difference) */
1977 1154754 : ps_diff_fx = 0;
1978 1154754 : move32();
1979 78523272 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
1980 : {
1981 77368518 : ps_diff_fx = L_add( L_shr( dPS_fx[i], Q7 ), ps_diff_fx ); // Qfact_PS_past-7
1982 : }
1983 :
1984 1154754 : *pFV_fx++ = L_shr( ps_diff_fx, sub( sub( Qfact_PS_past, Q7 ), Q20 ) ); /// ps_diff;
1985 1154754 : move32();
1986 :
1987 : /* [14] ps_sta (spectral stationarity) */
1988 1154754 : Word32 ps_sta_fx = 0;
1989 1154754 : move32();
1990 1154754 : Word16 ps_sta_exp = 0;
1991 1154754 : move16();
1992 : Word32 avoid_divide_by_zero;
1993 1154754 : avoid_divide_by_zero = L_shr( 21475, sub( 31, Qfact_PS_past ) ); // 21475 = 1e-5 in Q31
1994 :
1995 78523272 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
1996 : {
1997 : Word32 tmp_max;
1998 77368518 : tmp_max = L_max( PS_norm_fx[i], hSpMusClas->past_PS_fx[i - LOWEST_FBIN] );
1999 : /* Saturation doesn't have a significant impact here, as a value of 1e-5 in Q31 format is added to prevent division by zero */
2000 77368518 : temp32 = BASOP_Util_Divide3232_Scale_newton( tmp_max, L_add_sat( dPS_fx[i], avoid_divide_by_zero ), &temp_exp ); // 31-temp_exp
2001 77368518 : ps_sta_fx = BASOP_Util_Add_Mant32Exp( temp32, temp_exp, ps_sta_fx, ps_sta_exp, &ps_sta_exp );
2002 : }
2003 1154754 : temp32_log = L_add( BASOP_Util_Log2( L_add_sat( ps_sta_fx, L_shr( 21475, ps_sta_exp ) ) ), L_shl( ps_sta_exp, Q25 ) );
2004 1154754 : temp32_log = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/
2005 1154754 : *pFV_fx++ = L_shr( temp32_log, Q5 ); // logf( ps_sta + 1e-5f );
2006 1154754 : move32();
2007 1154754 : MVR2R_WORD32( &PS_norm_fx[LOWEST_FBIN], hSpMusClas->past_PS_fx, HIGHEST_FBIN - LOWEST_FBIN );
2008 :
2009 : /* save ps_diff and ps_sta features for XTALK and UNCLR classifier */
2010 1154754 : IF( hStereoClassif != NULL )
2011 : {
2012 786051 : IF( st->idchan == 0 )
2013 : {
2014 422855 : hStereoClassif->ps_diff_ch1_fx = ps_diff_fx; // Qfact_PS_past - 7
2015 422855 : hStereoClassif->ps_diff_ch1_e = sub( 38, Qfact_PS_past ); // Qfact_PS_past - 7
2016 422855 : hStereoClassif->ps_sta_ch1_fx = temp32_log; // logf( ps_sta + 1e-5f );Q25
2017 422855 : hStereoClassif->ps_sta_ch1_e = 6; // logf( ps_sta + 1e-5f );Q25
2018 : }
2019 : ELSE
2020 : {
2021 363196 : hStereoClassif->ps_diff_ch2_fx = ps_diff_fx;
2022 363196 : hStereoClassif->ps_diff_ch2_e = sub( 38, Qfact_PS_past );
2023 363196 : hStereoClassif->ps_sta_ch2_fx = temp32_log; // logf( ps_sta + 1e-5f );Q25
2024 363196 : hStereoClassif->ps_sta_ch2_e = 6; // logf( ps_sta + 1e-5f );Q25
2025 : }
2026 786051 : move32();
2027 786051 : move16();
2028 786051 : move32();
2029 786051 : move16();
2030 : }
2031 :
2032 : /*------------------------------------------------------------------*
2033 : * Outlier detection based on feature histograms
2034 : *------------------------------------------------------------------*/
2035 1154754 : flag_odv = 0;
2036 1154754 : move16();
2037 1154754 : IF( localVAD_HE_SAD )
2038 : {
2039 978483 : pFV_fx = FV_fx;
2040 978483 : pODV_fx = hout_intervals_fx;
2041 978483 : p_out = i_out;
2042 978483 : odv_cnt = 0;
2043 978483 : move16();
2044 15655728 : FOR( i = 0; i < N_SMC_FEATURES; i++ )
2045 : {
2046 14677245 : test();
2047 14677245 : IF( LT_32( *pFV_fx, pODV_fx[0] ) || GT_32( *pFV_fx, pODV_fx[1] ) )
2048 : {
2049 2544 : *p_out++ = i;
2050 2544 : odv_cnt = add( odv_cnt, 1 );
2051 : }
2052 :
2053 14677245 : pFV_fx++;
2054 14677245 : pODV_fx += 2;
2055 : }
2056 :
2057 : /* set outlier flag */
2058 978483 : IF( GE_16( odv_cnt, 2 ) )
2059 : {
2060 549 : flag_odv = 1;
2061 549 : move16();
2062 : /* replace outlying features with values from the previous frame */
2063 1954 : FOR( i = 0; i < odv_cnt; i++ )
2064 : {
2065 1405 : FV_fx[i_out[i]] = hSpMusClas->prev_FV_fx[i_out[i]];
2066 1405 : move32();
2067 : }
2068 : }
2069 : }
2070 :
2071 : /*------------------------------------------------------------------*
2072 : * Adaptive short-term mean filter on feature vector
2073 : *------------------------------------------------------------------*/
2074 1154754 : Qfact_FV = 20;
2075 1154754 : move16();
2076 1154754 : pFV_fx = FV_fx;
2077 1154754 : pFV_st_fx = hSpMusClas->FV_st_fx;
2078 1154754 : smc_st_mean_fact_fx = SMC_ST_MEAN_RSHIFT_FACT_FX;
2079 1154754 : move16();
2080 18476064 : FOR( i = 0; i < N_SMC_FEATURES; i++ )
2081 : {
2082 : //*pFV_st = smc_st_mean_fact * ( *pFV_st ) + ( 1 - smc_st_mean_fact ) * ( *pFV );
2083 17321310 : *pFV_st_fx = L_add( L_shr( *pFV_st_fx, smc_st_mean_fact_fx ), L_shr( *pFV_fx, 1 ) );
2084 17321310 : move32();
2085 :
2086 17321310 : test();
2087 17321310 : test();
2088 17321310 : test();
2089 17321310 : test();
2090 17321310 : test();
2091 17321310 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) && ( relE_attack_flag || flag_odv ) )
2092 : {
2093 : /* strong attack or outlier frame during entry state -> features cannot be trusted but there is also no useful past info -> */
2094 : /* -> do whatever you want because dlp will be reset to 0 anyway */
2095 250080 : pFV_fx++;
2096 250080 : pFV_st_fx++;
2097 : }
2098 17071230 : ELSE IF( hSpMusClas->sp_mus_state == HANG_LEN && ( EQ_16( st->tc_cnt, 1 ) || EQ_16( st->tc_cnt, 2 ) ) )
2099 : {
2100 : /* energy attack in stable state -> use current features intead of the long-term average */
2101 1357470 : pFV_fx++;
2102 1357470 : pFV_st_fx++;
2103 : }
2104 : ELSE
2105 : {
2106 15713760 : *pFV_fx++ = *pFV_st_fx++;
2107 15713760 : move32();
2108 : }
2109 : }
2110 :
2111 : /* update */
2112 1154754 : MVR2R_WORD32( FV_fx, hSpMusClas->prev_FV_fx, N_SMC_FEATURES );
2113 : /*------------------------------------------------------------------*
2114 : * Non-linear power transformation (boxcox) on certain features
2115 : *------------------------------------------------------------------*/
2116 1154754 : pFV_fx = FV_fx;
2117 18476064 : FOR( i = 0; i < N_SMC_FEATURES; i++ )
2118 : {
2119 17321310 : IF( bcox_lmbd_fx[i] != 0 )
2120 : {
2121 3464262 : *pFV_fx = L_sub( *pFV_fx, L_shr( bcox_add_cnst_fx[i], sub( 31, Qfact_FV ) ) );
2122 3464262 : move32();
2123 3464262 : IF( LT_32( *pFV_fx, L_shl( 1, Qfact_FV ) ) )
2124 : {
2125 113374 : *pFV_fx = L_shl( 1, Qfact_FV );
2126 113374 : move32();
2127 : }
2128 3464262 : Word16 pow_e = 0;
2129 3464262 : move32();
2130 3464262 : temp32_log = L_add( BASOP_Util_Log2( *pFV_fx ), L_shl( sub( 31, Qfact_FV ), Q25 ) ); // Q25
2131 3464262 : temp32 = Mpy_32_32( temp32_log, bcox_lmbd_fx[i] ); // Q25
2132 3464262 : Word32 pow_temp = BASOP_util_Pow2( temp32, 31 - Q25, &pow_e );
2133 3464262 : IF( pow_e <= 0 )
2134 : {
2135 1038110 : pow_temp = L_shr( pow_temp, sub( 1, pow_e ) );
2136 1038110 : pow_e = add( pow_e, sub( 1, pow_e ) );
2137 : }
2138 3464262 : temp32 = L_sub( pow_temp, L_shl( 1, 31 - pow_e ) );
2139 3464262 : temp_exp = 0;
2140 3464262 : move32();
2141 3464262 : temp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( temp32, bcox_lmbd_fx[i], &temp_exp ) );
2142 3464262 : *pFV_fx = L_shl( temp32, sub( Qfact_FV, sub( 31, add( temp_exp, pow_e ) ) ) );
2143 3464262 : move32();
2144 : // float temp = powf( *pFV, bcox_lmbd[i] );
2145 : // *pFV = ( powf( *pFV, bcox_lmbd[i] ) - 1 ) / bcox_lmbd[i];
2146 : }
2147 :
2148 17321310 : pFV_fx++;
2149 : }
2150 :
2151 : /*------------------------------------------------------------------*
2152 : * Scaling of the feature vector
2153 : * PCA
2154 : *------------------------------------------------------------------*/
2155 :
2156 1154754 : pFV_fx = FV_fx;
2157 18476064 : FOR( i = 0; i < N_SMC_FEATURES; i++ )
2158 : {
2159 : /* Standard scaler - mean and variance normalization */
2160 : // *pFV = ( *pFV - sm_means[i] ) / sm_scale[i];
2161 17321310 : temp32 = L_sub( *pFV_fx, sm_means_fx[i] );
2162 17321310 : temp_exp = 0;
2163 17321310 : move16();
2164 17321310 : temp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( temp32, sm_scale_fx[i], &temp_exp ) );
2165 : // *pFV_fx = L_shl( temp32, Qfact_FV - ( 31 - temp_exp ) );
2166 17321310 : *pFV_fx = L_shl( temp32, sub( Qfact_FV, sub( 31, temp_exp ) ) );
2167 17321310 : move32();
2168 17321310 : pFV_fx++;
2169 : /* MinMax sclaer - mean and variance normalization */
2170 : /**pFV = *pFV * sm_scale[i] + sm_min[i];*/
2171 : }
2172 :
2173 : /* PCA */
2174 1154754 : v_sub_fx_no_hdrm( FV_fx, pca_mean_fx, FV_fx, N_SMC_FEATURES );
2175 1154754 : v_mult_mat_fx( FV_fx, FV_fx, pca_components_fx, N_SMC_FEATURES, N_PCA_COEF, 0 );
2176 :
2177 : /*------------------------------------------------------------------*
2178 : * Calculation of posterior probability
2179 : * Log-probability
2180 : *------------------------------------------------------------------*/
2181 :
2182 : /* run loop for all mixtures (for each mixture, calculate the probability of speech, music and noise) */
2183 1154754 : lps_fx = lpm_fx = lpn_fx = 0;
2184 1154754 : move32();
2185 1154754 : move32();
2186 1154754 : move32();
2187 :
2188 8083278 : FOR( m = 0; m < N_SMC_MIXTURES; m++ )
2189 : {
2190 6928524 : v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
2191 6928524 : wprob_fx = dot_product_cholesky_fx( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10
2192 6928524 : ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_extract_l( W_shr( wprob_fx, Q10 ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
2193 6928524 : move32();
2194 6928524 : v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
2195 6928524 : wprob_fx = dot_product_cholesky_fx( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10
2196 6928524 : pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_extract_l( W_shr( wprob_fx, Q10 ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
2197 6928524 : move32();
2198 6928524 : v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
2199 6928524 : wprob_fx = dot_product_cholesky_fx( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10
2200 6928524 : pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_extract_l( W_shr( wprob_fx, Q10 ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
2201 6928524 : move32();
2202 : }
2203 :
2204 1154754 : lps_fx = logsumexp_fx( ps_fx, 31 - Q18, N_SMC_MIXTURES );
2205 1154754 : lpm_fx = logsumexp_fx( pm_fx, 31 - Q18, N_SMC_MIXTURES );
2206 1154754 : lpn_fx = logsumexp_fx( pn_fx, 31 - Q18, N_SMC_MIXTURES );
2207 1154754 : *high_lpn_flag = 0;
2208 1154754 : move16();
2209 1154754 : if ( GT_32( lpn_fx, lps_fx ) && GT_32( lpn_fx, lpm_fx ) )
2210 : {
2211 143495 : *high_lpn_flag = 1;
2212 143495 : move32();
2213 : }
2214 1154754 : hSpMusClas->lpm_fx = extract_h( L_shl_sat( lpm_fx, 16 - 11 ) ); // Q7
2215 1154754 : move16();
2216 1154754 : hSpMusClas->lps_fx = extract_h( L_shl_sat( lps_fx, 16 - 11 ) ); // Q7
2217 1154754 : move16();
2218 1154754 : hSpMusClas->lpn_fx = extract_h( L_shl_sat( lpn_fx, 16 - 11 ) ); // Q7
2219 1154754 : move16();
2220 : /* determine HQ Generic speech class */
2221 1154754 : IF( st->hHQ_core != NULL )
2222 : {
2223 421725 : IF( GT_32( lps_fx, L_add( lpm_fx, ONE_IN_Q17 ) ) )
2224 : {
2225 159830 : st->hHQ_core->hq_generic_speech_class = 1;
2226 : }
2227 : ELSE
2228 : {
2229 261895 : st->hHQ_core->hq_generic_speech_class = 0;
2230 : }
2231 421725 : move16();
2232 : }
2233 :
2234 : /*------------------------------------------------------------------*
2235 : * Decision without hangover
2236 : * Weighted decision
2237 : *------------------------------------------------------------------*/
2238 1154754 : test();
2239 1154754 : test();
2240 1154754 : test();
2241 1154754 : test();
2242 1154754 : test();
2243 : /* decision without hangover (0 - speech/noise, 1 - music) */
2244 1154754 : IF( !localVAD_HE_SAD || LT_16( Etot_fx, 2560 ) || ( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) && ( relE_attack_flag || flag_odv ) ) )
2245 : {
2246 217195 : dlp_fx = 0;
2247 217195 : move32();
2248 : }
2249 : ELSE
2250 : {
2251 937559 : dlp_fx = L_add( L_sub( lpm_fx, lps_fx ), DLP_BIAS_FX );
2252 937559 : dlp_fx = L_shl( dlp_fx, 1 ); // Q19
2253 :
2254 937559 : IF( GT_32( dlp_fx, 15728640 ) ) /*30.0f in Q19*/
2255 : {
2256 33291 : dlp_fx = 15728640;
2257 : }
2258 904268 : ELSE IF( LT_32( dlp_fx, -15728640 ) )
2259 : {
2260 0 : dlp_fx = -15728640;
2261 : }
2262 937559 : move32();
2263 : }
2264 :
2265 1154754 : dec = (Word16) GT_32( dlp_fx, 0 );
2266 1154754 : move16();
2267 : /* calculate weight based on relE (higher relE -> lower weight, lower relE -> higher weight) */
2268 :
2269 1154754 : wrelE_fx = lin_interp32_fx( L_deposit_h( relE_fx ), 15 << 24, 1932735283 /*0.9 in Q31*/, -( 15 << 24 ), 2126008812 /*0.99 in Q31*/, 1 ); // Q31
2270 : /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */
2271 : // hSpMusClas->dlp_mean_ST = 0.8f * hSpMusClas->dlp_mean_ST + 0.2f * dlp;
2272 1154754 : hSpMusClas->dlp_mean_ST_fx = L_add( Mpy_32_32( 1717986918, hSpMusClas->dlp_mean_ST_fx ), Mpy_32_32( 429496729, dlp_fx ) );
2273 1154754 : hSpMusClas->lt_dec_thres_fx = extract_l( L_shr( hSpMusClas->dlp_mean_ST_fx, 10 ) );
2274 1154754 : test();
2275 1154754 : IF( dlp_fx < 0 && LT_32( dlp_fx, hSpMusClas->dlp_mean_ST_fx ) )
2276 : {
2277 260083 : IF( hSpMusClas->dlp_mean_ST_fx > 0 )
2278 : {
2279 77698 : hSpMusClas->wdrop_32fx = L_negate( dlp_fx ); // Q19
2280 77698 : move32();
2281 : }
2282 182385 : ELSE IF( hSpMusClas->wdrop_32fx > 0 )
2283 : {
2284 42381 : hSpMusClas->wdrop_32fx = L_add( hSpMusClas->wdrop_32fx, L_sub( hSpMusClas->dlp_mean_ST_fx, dlp_fx ) );
2285 42381 : move32();
2286 : }
2287 260083 : move16();
2288 : }
2289 : ELSE
2290 : {
2291 894671 : hSpMusClas->wdrop_32fx = 0;
2292 894671 : move32();
2293 : }
2294 1154754 : wdrop_fx = lin_interp32_fx( hSpMusClas->wdrop_32fx, 7864320, 1503238554 /* 0.7 in Q31 */, 0, ONE_IN_Q31 /* 1.0f in Q31 */, 1 ); /* Q31 */
2295 :
2296 1154754 : test();
2297 1154754 : test();
2298 : /* calculate weight based on rises of dlp (close to 1 during sudden rise of dlp, close to 0 otherwise) */
2299 1154754 : IF( EQ_16( hSpMusClas->sp_mus_state, HANG_LEN ) && hSpMusClas->dlp_mean_ST_fx > 0 && GT_32( hSpMusClas->dlp_mean_ST_fx, hSpMusClas->past_dlp_mean_ST_fx[0] ) )
2300 : {
2301 252701 : IF( hSpMusClas->past_dlp_mean_ST_fx[0] < 0 )
2302 : {
2303 14119 : hSpMusClas->wrise_fx = extract_l( L_shr( hSpMusClas->dlp_mean_ST_fx, 10 ) );
2304 : }
2305 238582 : ELSE IF( hSpMusClas->wrise_fx > 0 )
2306 : {
2307 35055 : hSpMusClas->wrise_fx = add( hSpMusClas->wrise_fx, extract_l( L_shr( L_sub( hSpMusClas->dlp_mean_ST_fx, hSpMusClas->past_dlp_mean_ST_fx[0] ), 10 ) ) );
2308 : }
2309 252701 : move16();
2310 : }
2311 : ELSE
2312 : {
2313 902053 : hSpMusClas->wrise_fx = 0;
2314 902053 : move16();
2315 : }
2316 :
2317 :
2318 1154754 : wrise_fx = lin_interp32_fx( L_deposit_h( hSpMusClas->wrise_fx ), 167772160, 2040109466 /* 0.95 in Q31 */, 0, ONE_IN_Q31 /* 1.0f in Q31 */, 1 ); /* Q31 */
2319 : /* combine weights into one */
2320 : // wght = wrelE * wdrop * wrise;
2321 1154754 : wght_fx = Mpy_32_32( Mpy_32_32( wrelE_fx, wdrop_fx ), wrise_fx ); /* Q31 */
2322 1154754 : test();
2323 : /* ratio of delta means vs. delta variances */
2324 1154754 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
2325 : {
2326 :
2327 71439 : hSpMusClas->dlp_mean_LT_fx = dlp_fx;
2328 71439 : move32();
2329 71439 : hSpMusClas->dlp_var_LT_fx = 0;
2330 71439 : move32();
2331 : }
2332 :
2333 1154754 : hSpMusClas->dlp_mean_LT_fx = L_add( Mpy_32_32( 1932735283, hSpMusClas->dlp_mean_LT_fx ), Mpy_32_32( 214748365, dlp_fx ) ); // Q19
2334 :
2335 1154754 : temp32 = L_sub( dlp_fx, hSpMusClas->dlp_mean_LT_fx );
2336 1154754 : temp32 = W_extract_l( W_shr( W_mult0_32_32( temp32, temp32 ), 19 ) ); /*q19*/
2337 1154754 : hSpMusClas->dlp_var_LT_fx = L_add( Mpy_32_32( 1932735283, hSpMusClas->dlp_var_LT_fx ), Mpy_32_32( 214748365, temp32 ) );
2338 :
2339 1154754 : test();
2340 1154754 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
2341 : {
2342 71439 : dlp_mean2var_fx = 0;
2343 71439 : move16();
2344 71439 : dlp_mean2var_q = 0;
2345 71439 : move16();
2346 : }
2347 : ELSE
2348 : {
2349 1083315 : temp_exp = sub( Q31, Q19 );
2350 1083315 : Word16 div_e = 0;
2351 1083315 : move16();
2352 1083315 : temp_sqrt = Sqrt32( L_abs( hSpMusClas->dlp_var_LT_fx ), &temp_exp );
2353 1083315 : IF( temp_exp < 0 )
2354 : {
2355 61795 : temp_sqrt = L_shl( temp_sqrt, temp_exp );
2356 61795 : temp_exp = 0;
2357 61795 : move16();
2358 : }
2359 1083315 : temp_sqrt = L_shr( temp_sqrt, 1 ); /*adding 1 as guard bit to avoid overflow in addition*/
2360 1083315 : temp_exp = add( temp_exp, 1 );
2361 1083315 : temp_sqrt = L_add( temp_sqrt, L_shl( 1, sub( 31, temp_exp ) ) );
2362 1083315 : dlp_mean2var_fx = BASOP_Util_Divide3232_Scale( L_abs( hSpMusClas->dlp_mean_LT_fx ), temp_sqrt, &div_e );
2363 1083315 : dlp_mean2var_q = sub( add( Q3, temp_exp ), div_e ); // 15-div_e+Q19 -(31-temp_exp)
2364 1083315 : IF( GT_16( dlp_mean2var_q, 26 ) )
2365 : {
2366 17297 : dlp_mean2var_fx = shl( dlp_mean2var_fx, sub( 26, dlp_mean2var_q ) );
2367 17297 : dlp_mean2var_q = 26;
2368 17297 : move16();
2369 : }
2370 : }
2371 :
2372 1154754 : IF( GT_32( L_deposit_l( dlp_mean2var_fx ), L_shl( 15, dlp_mean2var_q ) ) )
2373 : {
2374 : /* decrease the weight little bit when the classifier indicates "strong speech" or "strong music" */
2375 3147 : wght_fx = Mpy_32_32( wght_fx, 1932735283 /* 0.9f in Q31 */ ); /* Q31 */
2376 : }
2377 :
2378 1154754 : IF( GT_32( wght_fx, ONE_IN_Q31 ) )
2379 : {
2380 0 : wght_fx = ONE_IN_Q31; /* 1.0f in Q31 */
2381 : }
2382 1154754 : ELSE IF( LT_32( wght_fx, 21474836 /* 0.01f in Q31 */ ) )
2383 : {
2384 0 : wght_fx = 21474836; /* 0.01f in Q31 */
2385 : }
2386 1154754 : move32();
2387 1154754 : if ( LT_16( Etot_fx, 2560 /* 10f in Q8 */ ) )
2388 : {
2389 : /* silence */
2390 136710 : wght_fx = 1975684956; /* 0.92f in Q31 */
2391 136710 : move32();
2392 : }
2393 :
2394 : /* calculate weighted decision */
2395 : // hSpMusClas->wdlp_0_95_sp = wght * hSpMusClas->wdlp_0_95_sp + ( 1 - wght ) * dlp;
2396 1154754 : hSpMusClas->wdlp_0_95_sp_32fx = L_add( Mpy_32_32( wght_fx, hSpMusClas->wdlp_0_95_sp_32fx /*q24*/ ), Mpy_32_32( L_sub( ONE_IN_Q31, wght_fx ), L_shl( dlp_fx /*q19*/, 5 ) ) ); // Q24
2397 1154754 : move32();
2398 :
2399 : /* xtalk classifier: apply long hysteresis to prevent LRTD on music */
2400 :
2401 1154754 : hSpMusClas->wdlp_xtalk_fx = Madd_32_32( Mpy_32_32( 2136746230 /* 0.995f in Q31*/, hSpMusClas->wdlp_xtalk_fx /* Q25*/ ), 687194767 /* 0.005f in Q37 */, dlp_fx /* Q19*/ ); // Q25
2402 1154754 : move32();
2403 :
2404 : /*------------------------------------------------------------------*
2405 : * Final speech/music decision
2406 : *------------------------------------------------------------------*/
2407 :
2408 1154754 : IF( flag_spitch )
2409 : {
2410 39937 : hSpMusClas->flag_spitch_cnt = 5;
2411 39937 : move16();
2412 : }
2413 1114817 : ELSE IF( hSpMusClas->flag_spitch_cnt > 0 )
2414 : {
2415 5855 : hSpMusClas->flag_spitch_cnt = sub( hSpMusClas->flag_spitch_cnt, 1 );
2416 5855 : move16();
2417 : }
2418 1154754 : test();
2419 1154754 : IF( Etot_fx < 2560 )
2420 : {
2421 : /* silence */
2422 136710 : dec = 0;
2423 136710 : move16();
2424 : }
2425 1018044 : ELSE IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
2426 : {
2427 71439 : temp32 = L_mult( w_spmus_fx[hSpMusClas->sp_mus_state - 1][0], (Word16) L_shr( dlp_fx, 10 ) ); /*Q25 */
2428 71439 : temp32 = L_add( temp32, Dot_product( &w_spmus_fx[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp_fx, sub( HANG_LEN, 1 ) ) );
2429 : /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */
2430 71439 : IF( GT_32( temp32, 2 << 25 ) )
2431 : {
2432 35229 : IF( GT_32( dlp_fx, 2 << 19 ) )
2433 : {
2434 24388 : dec = 2;
2435 : }
2436 : ELSE
2437 : {
2438 10841 : dec = 1;
2439 : }
2440 : }
2441 : ELSE
2442 : {
2443 36210 : dec = 0;
2444 : }
2445 71439 : move16();
2446 : }
2447 : ELSE
2448 : {
2449 946605 : test();
2450 946605 : test();
2451 946605 : test();
2452 946605 : test();
2453 946605 : test();
2454 946605 : test();
2455 946605 : test();
2456 946605 : test();
2457 946605 : test();
2458 946605 : test();
2459 946605 : test();
2460 946605 : test();
2461 946605 : test();
2462 946605 : test();
2463 : /* stable active state */
2464 946605 : IF( hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 &&
2465 : ( ( hSpMusClas->flag_spitch_cnt > 0 && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 57042534 /*3.4*(2^24)*/ ) ) || ( hSpMusClas->flag_spitch_cnt == 0 && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 35232154 /*2.1*(2^24)*/ ) ) ) )
2466 : {
2467 : /* switching from speech to unclear */
2468 1950 : dec = 1;
2469 : }
2470 944655 : ELSE IF( hSpMusClas->past_dec[0] == 0 && LT_16( hSpMusClas->vad_0_1_cnt, 50 ) && hSpMusClas->relE_attack_sum_fx == 0 && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 1 << 24 ) )
2471 : {
2472 : /* switch from speech to unclear also during slowly rising weak music onsets */
2473 3371 : dec = 1;
2474 : }
2475 941284 : ELSE IF( EQ_16( hSpMusClas->past_dec[0], 1 ) && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 41943040 /*2.5*2^24*/ ) )
2476 : {
2477 : /* switching from unclear to music */
2478 4228 : dec = 2;
2479 : }
2480 937056 : ELSE IF( EQ_16( hSpMusClas->past_dec[0], 2 ) && EQ_16( hSpMusClas->past_dec[1], 2 ) && EQ_16( hSpMusClas->past_dec[2], 2 ) && LT_32( hSpMusClas->wdlp_0_95_sp_32fx, -( 1 << 24 ) ) )
2481 : {
2482 : /* switching from music to unclear */
2483 2449 : dec = 1;
2484 : }
2485 934607 : ELSE IF( EQ_16( hSpMusClas->past_dec[0], 1 ) && LT_32( hSpMusClas->wdlp_0_95_sp_32fx, -( 41943040 /*2.5*2^24*/ ) ) )
2486 : {
2487 : /* switching from unclear to speech */
2488 2534 : dec = 0;
2489 : }
2490 : ELSE
2491 : {
2492 932073 : dec = hSpMusClas->past_dec[0];
2493 : }
2494 946605 : move16();
2495 : }
2496 :
2497 : /*------------------------------------------------------------------*
2498 : * raw S/M decision based on smoothed GMM score
2499 : *------------------------------------------------------------------*/
2500 1154754 : test();
2501 1154754 : IF( dec == 0 || st->hSpMusClas->wdlp_0_95_sp_32fx <= 0 )
2502 : {
2503 701442 : st->sp_aud_decision0 = 0;
2504 701442 : st->sp_aud_decision1 = 0;
2505 : }
2506 : ELSE
2507 : {
2508 453312 : st->sp_aud_decision0 = 1;
2509 453312 : st->sp_aud_decision1 = 1;
2510 : }
2511 1154754 : move16();
2512 1154754 : move16();
2513 : /*------------------------------------------------------------------*
2514 : * Updates
2515 : *------------------------------------------------------------------*/
2516 :
2517 : /* update buffer of past non-binary decisions */
2518 1154754 : Copy( &hSpMusClas->past_dlp_fx[0], &hSpMusClas->past_dlp_fx[1], HANG_LEN - 2 );
2519 1154754 : hSpMusClas->past_dlp_fx[0] = extract_l( L_shr( dlp_fx, 10 ) );
2520 1154754 : move16();
2521 :
2522 1154754 : Copy32( &hSpMusClas->past_dlp_mean_ST_fx[0], &hSpMusClas->past_dlp_mean_ST_fx[1], HANG_LEN - 2 );
2523 1154754 : hSpMusClas->past_dlp_mean_ST_fx[0] = hSpMusClas->dlp_mean_ST_fx;
2524 1154754 : move32();
2525 :
2526 : /* update buffer of past binary decisions */
2527 1154754 : mvs2s( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 );
2528 1154754 : hSpMusClas->past_dec[0] = dec;
2529 1154754 : move16();
2530 : #ifdef DEBUG_MODE_INFO
2531 : dbgwrite( &st->hSpMusClas->wdlp_0_95_sp_32fx, sizeof( Word32 ), 1, 1, "res/wdlp_0_95_sp.x" );
2532 : #endif
2533 :
2534 1154754 : return dec;
2535 : }
2536 :
2537 : /*---------------------------------------------------------------------*
2538 : * var_cor_calc_ivas_fx()
2539 : *
2540 : * Calculate variance of correlation
2541 : *---------------------------------------------------------------------*/
2542 :
2543 414383 : static void var_cor_calc_ivas_fx(
2544 : const Word16 old_corr, /* Q15 */
2545 : Word16 *mold_corr, /* Q15 */
2546 : Word16 var_cor_t[], /* Q11 */
2547 : Word16 *high_stable_cor )
2548 : {
2549 : Word16 i, var_cor;
2550 :
2551 : /* update buffer of old correlation values */
2552 4143830 : FOR( i = VAR_COR_LEN - 1; i > 0; i-- )
2553 : {
2554 3729447 : var_cor_t[i] = var_cor_t[i - 1]; /*Q11*/
2555 3729447 : move16();
2556 : }
2557 414383 : var_cor_t[i] = shr( old_corr, 4 ); /* Q11 */
2558 414383 : move16();
2559 :
2560 : /* calculate variance of correlation */
2561 414383 : var_cor = var_fx( var_cor_t, 11, VAR_COR_LEN );
2562 :
2563 414383 : *high_stable_cor = 0;
2564 414383 : move16();
2565 414383 : test();
2566 414383 : if ( GT_16( *mold_corr, 26214 ) && LT_16( var_cor, 1 ) )
2567 : {
2568 5610 : *high_stable_cor = 1;
2569 5610 : move16();
2570 : }
2571 :
2572 : /* update average correlation */
2573 : /*st->mold_corr = 0.1f * st->old_corr + 0.9f * st->mold_corr;*/
2574 414383 : *mold_corr = mac_r( L_mult( 3277, old_corr ), 29491, *mold_corr ); /*Q15 */
2575 414383 : move16();
2576 :
2577 414383 : return;
2578 : }
2579 :
2580 : /*---------------------------------------------------------------------*
2581 : * attack_det_fx()
2582 : *
2583 : * Attack detection
2584 : *---------------------------------------------------------------------*/
2585 :
2586 414383 : static Word16 attack_det_ivas_fx( /* o : attack flag */
2587 : const Word16 *inp, /* i : input signal */
2588 : const Word16 Qx,
2589 : const Word16 last_clas, /* i : last signal clas */
2590 : const Word16 localVAD, /* i : local VAD flag */
2591 : const Word16 coder_type, /* i : coder type */
2592 : const Word32 total_brate, /* i : total bitrate */
2593 : const Word16 element_mode, /* i : IVAS element mode */
2594 : const Word16 clas, /* i : signal class */
2595 : Word32 finc_prev[], /* i/o: previous finc, (q_finc_prev) */
2596 : Word16 *q_finc_prev, /* i/o: Q of previous finc */
2597 : Word32 *lt_finc, /* i/o: long-term mean finc, (q_lt_finc) */
2598 : Word16 *q_lt_finc, /* i/o: Q of lt_finc */
2599 : Word16 *last_strong_attack /* i/o: last strong attack flag */
2600 : )
2601 : {
2602 : Word16 i, j, tmp, tmp1, attack, exp1, etmp_e, etmp2_e, s;
2603 : Word32 L_tmp, etmp, etmp2, finc[ATT_NSEG], mean_finc;
2604 : Word16 att_3lsub_pos;
2605 : Word16 attack1;
2606 : Word64 W_tmp;
2607 : Word16 q_diff;
2608 :
2609 414383 : att_3lsub_pos = ATT_3LSUB_POS;
2610 414383 : move16();
2611 414383 : if ( GE_32( total_brate, ACELP_24k40 ) )
2612 : {
2613 0 : att_3lsub_pos = ATT_3LSUB_POS_16k;
2614 0 : move16();
2615 : }
2616 :
2617 : /* compute energy per section */
2618 13674639 : FOR( i = 0; i < ATT_NSEG; i++ )
2619 : {
2620 13260256 : L_tmp = L_mult0( inp[i * ATT_SEG_LEN], inp[i * ATT_SEG_LEN] ); /*2*Qx */
2621 :
2622 106082048 : FOR( j = 1; j < ATT_SEG_LEN; j++ )
2623 : {
2624 92821792 : L_tmp = L_mac0_sat( L_tmp, inp[i * ATT_SEG_LEN + j], inp[i * ATT_SEG_LEN + j] ); /*2*Qx */
2625 : }
2626 :
2627 13260256 : finc[i] = L_tmp;
2628 13260256 : move32();
2629 : }
2630 :
2631 414383 : attack = maximum_32_fx( finc, ATT_NSEG, &etmp );
2632 414383 : attack1 = attack;
2633 414383 : move16();
2634 :
2635 414383 : *q_finc_prev = shl( Qx, 1 ); // Q of finc
2636 414383 : move16();
2637 414383 : q_diff = sub( *q_finc_prev, *q_lt_finc );
2638 414383 : test();
2639 414383 : IF( EQ_16( localVAD, 1 ) && EQ_16( coder_type, GENERIC ) )
2640 : {
2641 : /*----------------------------------------------------------------------*
2642 : * Detect if there is a strong onset in the last subframe
2643 : * - if detected, TC is used to better code the onset
2644 : *----------------------------------------------------------------------*/
2645 :
2646 : /* compute mean energy in the first three subframes */
2647 209064 : exp1 = norm_s( att_3lsub_pos );
2648 209064 : tmp = div_s( shl( 1, sub( 14, exp1 ) ), att_3lsub_pos ); /*Q(29-exp1) */
2649 :
2650 209064 : W_tmp = 0;
2651 209064 : move64();
2652 5226600 : FOR( i = 0; i < att_3lsub_pos; i++ )
2653 : {
2654 5017536 : W_tmp = W_add( W_tmp, finc[i] ); /* *q_ finc_prev */
2655 : }
2656 209064 : s = W_norm( W_tmp );
2657 209064 : L_tmp = W_extract_h( W_shl( W_tmp, s ) ); // *q_finc_prev + s - 32
2658 :
2659 209064 : L_tmp = Mpy_32_16_1( L_tmp, tmp ); /* *q_finc_prev + s - 32 + Q29 - exp1 - 15 => *q_finc_prev + s - exp1 - 18 */
2660 209064 : etmp = L_tmp;
2661 209064 : move32();
2662 209064 : etmp_e = sub( 31, add( *q_finc_prev, sub( s, add( exp1, 18 ) ) ) );
2663 :
2664 :
2665 209064 : tmp1 = sub( ATT_NSEG, attack );
2666 209064 : exp1 = norm_s( tmp1 );
2667 209064 : tmp = div_s( shl( 1, sub( 14, exp1 ) ), tmp1 ); /*Q(29-exp1) */
2668 :
2669 209064 : W_tmp = 0;
2670 209064 : move64();
2671 3634642 : FOR( i = 0; i < tmp1; i++ )
2672 : {
2673 3425578 : W_tmp = W_add( W_tmp, finc[i + attack] ); /* *q_finc_prev */
2674 : }
2675 209064 : s = W_norm( W_tmp );
2676 209064 : L_tmp = W_extract_h( W_shl( W_tmp, s ) ); // *q_finc_prev + s - 32
2677 :
2678 209064 : L_tmp = Mpy_32_16_1( L_tmp, tmp ); /* *q_finc_prev + s - 32 + Q29 - exp1 - 15 => *q_finc_prev + s - exp1 - 18 */
2679 209064 : etmp2 = L_tmp;
2680 209064 : move32();
2681 209064 : etmp2_e = sub( 31, add( *q_finc_prev, sub( s, add( exp1, 18 ) ) ) );
2682 :
2683 : /* and compare them */
2684 : /* if ( etmp * 8 > etmp2 ) */
2685 209064 : if ( BASOP_Util_Cmp_Mant32Exp( etmp, add( etmp_e, 3 ), etmp2, etmp2_e ) > 0 )
2686 : {
2687 : /* stop, if the attack is not sufficiently strong */
2688 202707 : attack = 0;
2689 202707 : move16();
2690 : }
2691 :
2692 209064 : test();
2693 : /* if ( last_clas == VOICED_CLAS && etmp * 20 > etmp2 ) */
2694 209064 : if ( EQ_16( last_clas, VOICED_CLAS ) && BASOP_Util_Cmp_Mant32Exp( etmp, etmp_e, Mpy_32_16_1( etmp2, 1638 /* 1/20 in Q15 */ ), etmp2_e ) > 0 )
2695 : {
2696 : /* stop, if the signal was voiced and the attack is not sufficiently strong */
2697 49363 : attack = 0;
2698 49363 : move16();
2699 : }
2700 :
2701 : /* compare also wrt. other sections (reduces a misclassification) */
2702 209064 : IF( attack > 0 )
2703 : {
2704 5751 : etmp2 = L_add( finc[attack], 0 );
2705 5751 : etmp = Mult_32_16( etmp2, 16384 ); /* etmp2 / 2.0 = (etmp2*0.5) */
2706 117837 : FOR( i = 2; i < ATT_3LSUB_POS - 2; i++ )
2707 : {
2708 112520 : IF( GT_32( finc[i], etmp ) )
2709 : {
2710 434 : attack = 0;
2711 434 : move16();
2712 434 : BREAK;
2713 : }
2714 : }
2715 : }
2716 :
2717 209064 : test();
2718 209064 : test();
2719 209064 : test();
2720 209064 : IF( attack == 0 && GT_16( element_mode, EVS_MONO ) && ( LT_16( clas, VOICED_TRANSITION ) || EQ_16( clas, ONSET ) ) )
2721 : {
2722 136845 : Copy32( finc, finc_prev, attack1 );
2723 :
2724 : /* compute mean energy before the attack */
2725 136845 : Word64 W_etmp = W_deposit32_l( finc_prev[0] );
2726 4379040 : FOR( Word16 idx = 1; idx < ATT_NSEG; idx++ )
2727 : {
2728 4242195 : W_etmp = W_add( W_etmp, W_deposit32_l( finc_prev[idx] ) );
2729 : }
2730 136845 : W_etmp = W_shr( W_etmp, 5 ); /*ATT_NSEG == 32*/
2731 :
2732 136845 : etmp2 = finc[attack1];
2733 136845 : move32();
2734 136845 : test();
2735 136845 : test();
2736 136845 : if ( ( LT_64( W_shl( W_etmp, 4 ), W_deposit32_l( etmp2 ) ) ) || ( LT_64( W_add( W_shl( W_etmp, 3 ), W_shl( W_etmp, 2 ) ), W_deposit32_l( etmp2 ) ) && EQ_16( last_clas, UNVOICED_CLAS ) ) )
2737 : {
2738 5007 : attack = attack1;
2739 5007 : move16();
2740 : }
2741 136845 : test();
2742 136845 : if ( GT_32( L_shl_sat( *lt_finc, q_diff ), Mpy_32_32( etmp2, 107374182 /* 1.f/20 in Q31 */ ) ) || *last_strong_attack )
2743 : {
2744 128966 : attack = 0;
2745 128966 : move16();
2746 : }
2747 : }
2748 209064 : *last_strong_attack = attack;
2749 209064 : move16();
2750 : }
2751 205319 : ELSE IF( attack > 0 )
2752 : {
2753 194644 : etmp2 = L_add( finc[attack], 0 );
2754 194644 : etmp = Mult_32_16( etmp2, 25206 ); /* etmp2 / 1.3 = (etmp2*0.76923) */
2755 2523604 : FOR( i = 2; i < att_3lsub_pos - 2; i++ )
2756 : {
2757 : /*if( i != attack && finc[i] * 1.3f > etmp2 ) -> finc[i] > (etmp2*0.76923) */
2758 2449303 : test();
2759 2449303 : IF( NE_16( i, attack ) && GT_32( finc[i], etmp ) )
2760 : {
2761 120343 : attack = 0;
2762 120343 : move16();
2763 120343 : BREAK;
2764 : }
2765 : }
2766 194644 : *last_strong_attack = 0;
2767 194644 : move16();
2768 : }
2769 :
2770 : /* updates */
2771 414383 : Copy32( finc, finc_prev, ATT_NSEG );
2772 :
2773 : /* Calculating mean of finc */
2774 414383 : W_tmp = W_mult_32_16( finc[0], 1 ); // q_finc_prev+1
2775 13260256 : FOR( i = 1; i < ATT_NSEG; i++ )
2776 : {
2777 12845873 : W_tmp = W_mac_32_16( W_tmp, finc[i], 1 ); // q_finc_prev+1
2778 : }
2779 : /* mean = W_tmp / 32 and change the Q from q_finc_prev+1 to q_finc_prev
2780 : Mean value doesn't saturate, W_shl_sat_l is used only considering complexity */
2781 414383 : mean_finc = W_shl_sat_l( W_tmp, -Q6 ); // q_finc_prev+1 -> q_finc_prev
2782 :
2783 : //*lt_finc = 0.95f * *lt_finc + 0.05f * mean( finc, ATT_NSEG );
2784 414383 : IF( q_diff > 0 ) /* q_finc_prev > q_lt_finc */
2785 : {
2786 289889 : mean_finc = L_shr( mean_finc, q_diff ); // q_lt_finc
2787 289889 : *lt_finc = Madd_32_32( Mpy_32_32( *lt_finc, 2040109466 /* 0.95 in Q31 */ ), mean_finc, 107374182 /* 0.05f in Q31 */ ); // q_lt_finc
2788 289889 : move32();
2789 : }
2790 : ELSE
2791 : {
2792 124494 : *lt_finc = Madd_32_32( Mpy_32_32( L_shl( *lt_finc, q_diff ), 2040109466 /* 0.95 in Q31 */ ), mean_finc, 107374182 /* 0.05f in Q31 */ ); // q_finc_prev
2793 124494 : move32();
2794 124494 : *q_lt_finc = *q_finc_prev;
2795 124494 : move16();
2796 : }
2797 :
2798 414383 : return attack;
2799 : }
2800 :
2801 : /*---------------------------------------------------------------------*
2802 : * tonal_det()
2803 : *
2804 : * Tonal detector based on spectral stability and harmonicity
2805 : *---------------------------------------------------------------------*/
2806 :
2807 414383 : static Word32 tonal_det_fx(
2808 : const Word16 S[], // Q7
2809 : Word16 vad_flag,
2810 : Word32 tod_S_map_lt[], // Q22
2811 : Word32 *tod_thr_lt, // Q22
2812 : Word16 *tod_weight, // Q15
2813 : Word32 *tod_S_mass_prev, // Q22
2814 : Word32 *tod_S_mass_lt // Q22
2815 : )
2816 : {
2817 : Word16 i;
2818 : Word32 S_mass, alpha;
2819 : Word32 L_tmp;
2820 : Word64 W_tmp;
2821 :
2822 : /* update the adaptive weight */
2823 414383 : *tod_weight = add( mult( TON_ALPHA_FX, *tod_weight ), imult1616( ( 32767 - TON_ALPHA_FX ), vad_flag ) );
2824 414383 : move16();
2825 414383 : IF( GT_16( *tod_weight, TON_ALPHA_FX ) )
2826 : {
2827 269193 : *tod_weight = TON_ALPHA_FX;
2828 269193 : move16();
2829 : }
2830 145190 : ELSE IF( LT_16( *tod_weight, ( 32767 - TON_ALPHA_FX ) ) )
2831 : {
2832 28982 : *tod_weight = 32767 - TON_ALPHA_FX;
2833 28982 : move16();
2834 : }
2835 :
2836 : /* calculate LT spectral correlation in each band up to 4KHz */
2837 414383 : W_tmp = 0;
2838 414383 : move64();
2839 33565023 : FOR( i = 0; i < TOD_NSPEC; i++ )
2840 : {
2841 33150640 : tod_S_map_lt[i] = L_add( Mpy_32_16_1( tod_S_map_lt[i], *tod_weight ), L_mult0( sub( 32767, *tod_weight ), S[i] ) ); // Q22
2842 33150640 : move16();
2843 :
2844 33150640 : W_tmp = W_add( W_tmp, (Word64) ( tod_S_map_lt[i] ) ); // Q22
2845 : }
2846 : // S_mass /= TOD_NSPEC;
2847 414383 : L_tmp = W_extract_l( W_tmp ); // Q22
2848 414383 : S_mass = ( Mpy_32_32( L_tmp, TOD_NSPEC_INV_Q31 ) ); // Q22
2849 :
2850 414383 : IF( GT_32( S_mass, *tod_S_mass_prev ) )
2851 : {
2852 201106 : alpha = 1503238554; /* 0.7f in Q31 */
2853 : }
2854 : ELSE
2855 : {
2856 213277 : alpha = 644245094; /* 0.3f in Q31 */
2857 : }
2858 414383 : move16();
2859 :
2860 414383 : *tod_S_mass_prev = S_mass;
2861 414383 : move32();
2862 414383 : *tod_S_mass_lt = L_add( Mpy_32_32( alpha, *tod_S_mass_lt ), Mpy_32_32( L_sub( ONE_IN_Q31, alpha ), S_mass ) ); // Q22
2863 414383 : move32();
2864 414383 : S_mass = *tod_S_mass_lt;
2865 414383 : move32();
2866 :
2867 : /* updating adaptive decision threshold */
2868 414383 : IF( GT_32( S_mass, *tod_thr_lt ) )
2869 : {
2870 2005 : *tod_thr_lt = L_sub( *tod_thr_lt, THR_MASS_STEP_DN_FX );
2871 : }
2872 : ELSE
2873 : {
2874 412378 : *tod_thr_lt = L_add( *tod_thr_lt, THR_MASS_STEP_UP_FX );
2875 : }
2876 414383 : move16();
2877 :
2878 414383 : if ( GT_32( *tod_thr_lt, THR_MASS_MAX_FX ) )
2879 : {
2880 411807 : *tod_thr_lt = THR_MASS_MAX_FX;
2881 : }
2882 :
2883 414383 : if ( LT_32( *tod_thr_lt, THR_MASS_MIN_FX ) )
2884 : {
2885 1667 : *tod_thr_lt = THR_MASS_MIN_FX;
2886 : }
2887 414383 : move16();
2888 :
2889 414383 : return S_mass; /* Q22 */
2890 : }
2891 :
2892 :
2893 : /*---------------------------------------------------------------------*
2894 : * ivas_smc_mode_selection()
2895 : *
2896 : * 2nd stage speech/music classifier (select coding mode (ACELP, GSC and TCX) based on S/M classification)
2897 : * output (sp_aud_decision1 - sp_aud_decision2 -> coding mode):
2898 : * 0 - 0 -> ACELP
2899 : * 1 - 0 -> GSC
2900 : * 1 - 1 -> TCX
2901 : *---------------------------------------------------------------------*/
2902 :
2903 414383 : void ivas_smc_mode_selection_fx(
2904 : Encoder_State *st, /* i/o: encoder state structure */
2905 : const Word32 element_brate, /* i : element bitrate */
2906 : Word16 smc_dec, /* i : raw decision of the 1st stage classifier*/
2907 : const Word16 relE, /* i : relative frame energy, Q8 */
2908 : const Word16 Etot, /* i : total frame energy, Q8 */
2909 : Word16 *attack_flag, /* i/o: attack flag (GSC or TC) */
2910 : const Word16 *inp, /* i : input signal */
2911 : const Word16 Q_new, /* i : Q of input signal */
2912 : const Word16 S_map[], /* i : short-term correlation map, Q7 */
2913 : const Word16 flag_spitch /* i : flag to indicate very short stable pitch*/
2914 : )
2915 : {
2916 : Word16 attack;
2917 : Word32 ton;
2918 : Word16 i;
2919 414383 : Word32 S_p2a, S_max, S_ave = 0;
2920 414383 : move32();
2921 : Word32 thr_sp2a;
2922 :
2923 414383 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
2924 :
2925 : /* initialization */
2926 414383 : *attack_flag = 0;
2927 414383 : move16();
2928 414383 : st->sp_aud_decision2 = 0;
2929 414383 : move16();
2930 :
2931 : /* signal stability estimation */
2932 414383 : stab_est_fx( Etot, hSpMusClas->gsc_lt_diff_etot_fx, &hSpMusClas->gsc_mem_etot_fx, &hSpMusClas->gsc_nb_thr_3, &hSpMusClas->gsc_nb_thr_1, hSpMusClas->gsc_thres_fx, &hSpMusClas->gsc_last_music_flag, st->vad_flag );
2933 :
2934 : /* calculate variance of correlation */
2935 414383 : var_cor_calc_ivas_fx( st->old_corr_fx, &hSpMusClas->mold_corr_fx, hSpMusClas->var_cor_t_fx, &hSpMusClas->high_stable_cor );
2936 :
2937 : /* attack detection */
2938 414383 : IF( NE_16( shl( Q_new, 1 ), hSpMusClas->q_finc_prev ) )
2939 : {
2940 54384 : Scale_sig32( hSpMusClas->finc_prev_fx, ATT_NSEG, sub( shl( Q_new, 1 ), hSpMusClas->q_finc_prev ) );
2941 54384 : hSpMusClas->q_finc_prev = shl( Q_new, 1 );
2942 54384 : move16();
2943 : }
2944 414383 : attack = attack_det_ivas_fx( inp, Q_new, st->clas, st->localVAD, st->coder_type, 0, st->element_mode, st->clas, hSpMusClas->finc_prev_fx,
2945 : &hSpMusClas->q_finc_prev, &hSpMusClas->lt_finc_fx, &hSpMusClas->Q_lt_finc, &hSpMusClas->last_strong_attack );
2946 :
2947 : /* tonal detector */
2948 414383 : ton = tonal_det_fx( S_map, st->vad_flag, hSpMusClas->tod_S_map_lt_fx, &hSpMusClas->tod_thr_lt_fx, &hSpMusClas->tod_weight_fx, &hSpMusClas->tod_S_mass_prev_fx, &hSpMusClas->tod_S_mass_lt_fx ); // Q22
2949 :
2950 : /* calculate spectral peak-to-average ratio */
2951 414383 : Word16 shift = sub( st->q_Bin_E, st->hSpMusClas->Q_tod_lt_Bin_E );
2952 33565023 : FOR( i = 0; i < TOD_NSPEC; i++ )
2953 : {
2954 : // st->hSpMusClas->tod_lt_Bin_E[i] = P2A_FACT * st->hSpMusClas->tod_lt_Bin_E[i] + ( 1 - P2A_FACT ) * st->Bin_E[i];
2955 33150640 : st->hSpMusClas->tod_lt_Bin_E_fx[i] = Madd_32_16( L_shl( Mpy_32_16_1( st->hSpMusClas->tod_lt_Bin_E_fx[i], P2A_FACT_FX_Q15 ), shift ), st->Bin_E_fx[i], ( 32767 - P2A_FACT_FX_Q15 ) ); // Q = st->q_Bin_E + Q_SCALE - 2
2956 33150640 : move32();
2957 : }
2958 414383 : st->hSpMusClas->Q_tod_lt_Bin_E = add( st->hSpMusClas->Q_tod_lt_Bin_E, shift );
2959 414383 : move16();
2960 414383 : maximum_32_fx( st->hSpMusClas->tod_lt_Bin_E_fx, TOD_NSPEC, &S_max );
2961 : // S_ave = sum_f( st->hSpMusClas->tod_lt_Bin_E_fx, TOD_NSPEC ) / TOD_NSPEC;
2962 33565023 : FOR( i = 0; i < TOD_NSPEC; i++ )
2963 : {
2964 33150640 : S_ave = L_add( S_ave, st->hSpMusClas->tod_lt_Bin_E_fx[i] );
2965 : }
2966 414383 : S_ave = Mpy_32_32( S_ave, TOD_NSPEC_INV_Q31 );
2967 :
2968 414383 : S_p2a = L_sub( S_max, S_ave );
2969 :
2970 414383 : IF( LE_32( element_brate, IVAS_16k4 ) )
2971 : {
2972 135734 : thr_sp2a = L_shl( THR_P2A_HIGH_FX, st->q_Bin_E ); // Q = st->q_Bin_E
2973 : }
2974 : ELSE
2975 : {
2976 278649 : thr_sp2a = L_shl( THR_P2A_FX, st->q_Bin_E ); // Q = st->q_Bin_E
2977 : }
2978 :
2979 : /* initial 3-way selection of coding modes (ACELP/GSC/TCX) */
2980 414383 : test();
2981 414383 : test();
2982 414383 : IF( GT_16( relE, -2560 /* -10.0f in Q8 */ ) && ( GT_32( S_p2a, thr_sp2a ) || GT_32( ton, hSpMusClas->tod_thr_lt_fx ) ) )
2983 : {
2984 : /* select TCX to encode extremely peaky signals or strongly tonal signals */
2985 18225 : st->sp_aud_decision1 = 1;
2986 18225 : st->sp_aud_decision2 = 1;
2987 : }
2988 396158 : ELSE IF( smc_dec == SPEECH )
2989 : {
2990 : /* select ACELP to encode speech */
2991 153381 : st->sp_aud_decision1 = 0;
2992 153381 : st->sp_aud_decision2 = 0;
2993 : }
2994 242777 : ELSE IF( EQ_16( smc_dec, SPEECH_OR_MUSIC ) )
2995 : {
2996 : /* select GSC to encode "unclear" segments (classifier's score on the borderline) */
2997 6204 : st->sp_aud_decision1 = 1;
2998 6204 : st->sp_aud_decision2 = 0;
2999 : }
3000 : ELSE
3001 : {
3002 : /* select TCX to encode music */
3003 236573 : st->sp_aud_decision1 = 1;
3004 236573 : st->sp_aud_decision2 = 1;
3005 : }
3006 414383 : move16();
3007 414383 : move16();
3008 :
3009 : /* change decision from GSC to ACELP/TCX in some special cases */
3010 414383 : test();
3011 414383 : IF( EQ_16( st->sp_aud_decision1, 1 ) && st->sp_aud_decision2 == 0 )
3012 : {
3013 6204 : test();
3014 6204 : test();
3015 6204 : IF( LT_16( hSpMusClas->ener_RAT_fx, 5898 /* 0.18f in Q15 */ ) && GT_16( hSpMusClas->lt_dec_thres_fx, 7680 /* 15.0f in Q9 */ ) )
3016 : {
3017 : /* prevent GSC on strong music with almost no content below 1kHz */
3018 4 : st->sp_aud_decision2 = 1;
3019 4 : move16();
3020 : }
3021 6200 : ELSE IF( flag_spitch )
3022 : {
3023 : /* prevent GSC on signals with very short and stable high pitch period */
3024 114 : IF( LT_32( hSpMusClas->wdlp_0_95_sp_32fx, 41943040 /* 2.5f in Q24 */ ) )
3025 : {
3026 : /* select ACELP instead */
3027 110 : st->sp_aud_decision1 = 0;
3028 110 : move16();
3029 : }
3030 : ELSE
3031 : {
3032 : /* select TCX instead */
3033 4 : st->sp_aud_decision2 = 1;
3034 4 : move16();
3035 : }
3036 : }
3037 6086 : ELSE IF( hSpMusClas->high_stable_cor && GE_16( st->pitch[0], 130 ) )
3038 : {
3039 : /* prevent GSC in highly correlated signal with low energy variation */
3040 : /* this is basically a patch against bassoon-type of music */
3041 0 : st->sp_aud_decision2 = 1;
3042 0 : move16();
3043 : }
3044 : }
3045 :
3046 : /* change decision from GSC to ACELP TC during attacks/onsets */
3047 414383 : test();
3048 414383 : IF( EQ_16( st->sp_aud_decision1, 1 ) && st->sp_aud_decision2 == 0 )
3049 : {
3050 6086 : test();
3051 6086 : IF( GT_16( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], 1152 /*4.5f in Q8*/ ) &&
3052 : ( GT_16( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], add( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 2], 2560 /* 10.0f in Q8 */ ) ) ) )
3053 : {
3054 119 : IF( EQ_16( st->tc_cnt, 1 ) )
3055 : {
3056 : /* do ACELP TC coding instead of GC/VC if onset has been already declared before */
3057 49 : st->sp_aud_decision1 = 0;
3058 49 : move16();
3059 49 : st->coder_type = TRANSITION;
3060 49 : move16();
3061 : }
3062 : ELSE
3063 : {
3064 70 : IF( GE_16( attack, ATT_3LSUB_POS ) )
3065 : {
3066 : /* do ACELP TC coding also if attack is located in the last subframe */
3067 16 : st->sp_aud_decision1 = 0;
3068 16 : move16();
3069 16 : *attack_flag = add( attack, 1 );
3070 16 : move16();
3071 16 : st->coder_type = TRANSITION;
3072 16 : move16();
3073 : }
3074 54 : ELSE IF( GE_16( attack, ATT_SEG_LEN / 2 ) )
3075 : {
3076 : /* do GSC coding if attack is located after the first quarter of the first subframe */
3077 : /* (pre-echo will be treated at the decoder side) */
3078 1 : *attack_flag = 31;
3079 1 : move16();
3080 1 : *attack_flag = add( attack, 1 );
3081 1 : move16();
3082 : }
3083 : }
3084 : }
3085 : }
3086 :
3087 414383 : test();
3088 414383 : test();
3089 414383 : test();
3090 414383 : test();
3091 414383 : IF( EQ_16( st->localVAD, 1 ) && EQ_16( st->coder_type, GENERIC ) && attack > 0 /*&& *attack_flag < 32*/ /*&& st->tc_cnt != 2*/ && !( EQ_16( st->sp_aud_decision2, 1 ) && GT_32( ton, 2726298 /* 0.65f in Q22 */ ) ) )
3092 : {
3093 : /* change ACELP coder_type to TC if attack has been detected */
3094 6786 : st->sp_aud_decision1 = 0;
3095 6786 : move16();
3096 6786 : st->sp_aud_decision2 = 0;
3097 6786 : move16();
3098 :
3099 6786 : st->coder_type = TRANSITION;
3100 6786 : move16();
3101 6786 : *attack_flag = add( attack, 1 );
3102 6786 : move16();
3103 : }
3104 :
3105 : #ifdef DEBUGGING
3106 : if ( st->idchan == 0 && st->coder_type != INACTIVE )
3107 : {
3108 : if ( st->force == FORCE_GSC && element_brate < IVAS_24k4 )
3109 : {
3110 : /* enforce GSC */
3111 : st->sp_aud_decision1 = 1;
3112 : st->sp_aud_decision2 = 0;
3113 : }
3114 : else if ( st->force == FORCE_SPEECH && ( st->sp_aud_decision1 == 1 || st->sp_aud_decision2 == 1 ) )
3115 : {
3116 : if ( element_brate < IVAS_24k4 )
3117 : {
3118 : /* convert TCX to GSC */
3119 : st->sp_aud_decision1 = 1;
3120 : st->sp_aud_decision2 = 0;
3121 : }
3122 : else
3123 : {
3124 : /* convert TCX to ACELP */
3125 : st->sp_aud_decision1 = 0;
3126 : st->sp_aud_decision2 = 0;
3127 : }
3128 : }
3129 : else if ( st->force == FORCE_MUSIC )
3130 : {
3131 : /* enforce TCX */
3132 : st->sp_aud_decision1 = 1;
3133 : st->sp_aud_decision2 = 1;
3134 : }
3135 : }
3136 : #endif
3137 :
3138 : /* set GSC noisy speech flag on unvoiced SWB segments */
3139 414383 : st->GSC_noisy_speech = 0;
3140 414383 : move16();
3141 414383 : test();
3142 414383 : test();
3143 414383 : test();
3144 414383 : test();
3145 414383 : test();
3146 414383 : if ( EQ_16( st->vad_flag, 1 ) && LE_32( element_brate, IVAS_16k4 ) && GT_32( st->lp_noise_32fx, 503316480 /* 30.0f in Q24 */ ) && st->sp_aud_decision1 == 0 && GE_16( st->bwidth, SWB ) && EQ_16( st->coder_type_raw, UNVOICED ) )
3147 : {
3148 1232 : st->GSC_noisy_speech = 1;
3149 1232 : move16();
3150 : }
3151 :
3152 : /* set GSC submode */
3153 414383 : test();
3154 414383 : test();
3155 414383 : test();
3156 414383 : IF( st->element_mode > EVS_MONO && ( EQ_16( st->sp_aud_decision1, 1 ) && st->sp_aud_decision2 == 0 ) && GT_32( st->total_brate, STEREO_GSC_BIT_RATE_ALLOC ) ) /* below STEREO_GSC_BIT_RATE_ALLOC, fall back on normal GSC */
3157 : {
3158 5012 : st->GSC_IVAS_mode = 1;
3159 5012 : move16();
3160 5012 : IF( st->hSpMusClas->wdlp_0_95_sp_32fx > 0 )
3161 : {
3162 : /* music-like content */
3163 2979 : st->GSC_IVAS_mode = 3;
3164 : }
3165 2033 : ELSE IF( st->tc_cnt > 0 )
3166 : {
3167 : /* likely presence of an onset, GSC bit allocation will be more focused on LF */
3168 241 : st->GSC_IVAS_mode = 2;
3169 : }
3170 5012 : move16();
3171 :
3172 5012 : test();
3173 5012 : IF( EQ_16( st->coder_type_raw, UNVOICED ) && st->sp_aud_decision0 == 0 /*&& st->GSC_IVAS_mode < 3*/ )
3174 : {
3175 104 : st->GSC_noisy_speech = 1;
3176 : }
3177 : ELSE
3178 : {
3179 4908 : st->GSC_noisy_speech = 0;
3180 : }
3181 5012 : move16();
3182 : }
3183 :
3184 : /* set coder_type to AUDIO when GSC is selected (st->core will be set later in the decision matrix) */
3185 414383 : test();
3186 414383 : test();
3187 414383 : IF( ( EQ_16( st->sp_aud_decision1, 1 ) && st->sp_aud_decision2 == 0 ) || st->GSC_noisy_speech )
3188 : {
3189 7165 : st->coder_type = AUDIO;
3190 7165 : move16();
3191 7165 : test();
3192 7165 : if ( st->hGSCEnc != NULL && st->GSC_noisy_speech == 0 ) /* In case of GSC_noisy_speech, NOISE_LEVEL should remain at NOISE_LEVEL_SP3 */
3193 : {
3194 5829 : st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
3195 5829 : move16();
3196 : }
3197 : }
3198 :
3199 414383 : return;
3200 : }
3201 :
3202 : /*---------------------------------------------------------------------*
3203 : * mode_decision_fx()
3204 : *
3205 : *
3206 : *---------------------------------------------------------------------*/
3207 :
3208 2041 : static Word16 mode_decision_fx(
3209 : Encoder_State *st, /* i : endoer state structure */
3210 : Word16 len, /* i : buffering status */
3211 : Word16 *dec_mov, /* i/o: moving average of classifier decision Q15*/
3212 : Word16 *buf_flux, /* i : buffer storing spectral energy fluctuation Q7*/
3213 : Word16 *buf_epsP_tilt, /* i : buffer storing LP prediciton error tilt Q15*/
3214 : Word16 *buf_pkh, /* i : buffer storing highband spectral peakiness Q1*/
3215 : Word16 *buf_cor_map_sum, /* i : buffer storing correlation map sum Q8*/
3216 : Word16 *buf_Ntonal, /* i : buffer storing No.of 1st spectral tone Q0*/
3217 : Word16 *buf_Ntonal2, /* i : buffer storing No.of 2nd spectral tone Q0*/
3218 : Word16 *buf_Ntonal_lf, /* i : buffer storing low band spectral tone ratio Q0*/
3219 : Word16 *buf_dlp /* i : buffer storing log probability diff between speech and music Q9*/
3220 : )
3221 : {
3222 : Word16 mode;
3223 : Word16 i;
3224 : Word16 voiced_cnt;
3225 : Word16 M_pkh;
3226 : Word16 M_cor_map_sum;
3227 : Word16 M_Ntonal;
3228 : Word16 M_flux;
3229 : Word32 V_epsP_tilt;
3230 : Word16 lf_Ntonal_ratio;
3231 : Word16 tmp, tmp1;
3232 : Word32 L_tmp;
3233 : Word16 inv_len;
3234 : Word16 j;
3235 : Word16 M_flux10;
3236 2041 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
3237 :
3238 :
3239 2041 : mode = *dec_mov > 16384;
3240 2041 : logic16();
3241 2041 : move16();
3242 :
3243 2041 : IF( LE_16( len, 5 ) )
3244 : {
3245 25 : return ( mode );
3246 : }
3247 : ELSE
3248 : {
3249 2016 : IF( LT_16( len, 10 ) )
3250 : {
3251 20 : inv_len = div_s( 1, len ); /*Q15 */
3252 :
3253 20 : L_tmp = L_deposit_l( 0 );
3254 170 : FOR( i = 0; i < len; i++ )
3255 : {
3256 150 : L_tmp = L_add( L_tmp, buf_pkh[BUF_LEN - len + i] ); /*Q1 */
3257 : }
3258 20 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q1 */
3259 20 : M_pkh = extract_l( L_tmp ); /*Q1 */
3260 :
3261 20 : L_tmp = L_deposit_l( 0 );
3262 170 : FOR( i = 0; i < len; i++ )
3263 : {
3264 150 : L_tmp = L_add( L_tmp, buf_cor_map_sum[BUF_LEN - len + i] ); /*Q8 */
3265 : }
3266 20 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q8 */
3267 20 : M_cor_map_sum = extract_l( L_tmp ); /*Q8 */
3268 :
3269 20 : tmp = 0;
3270 20 : move16();
3271 170 : FOR( i = 0; i < len; i++ )
3272 : {
3273 150 : tmp = add( tmp, shl( buf_Ntonal[BUF_LEN - len + i], 2 ) ); /*Q2 */
3274 : }
3275 20 : M_Ntonal = mult_r( tmp, inv_len ); /*Q2 */
3276 :
3277 20 : V_epsP_tilt = var_fx_32( buf_epsP_tilt + BUF_LEN - len, 15, len ); /*Q31 */
3278 :
3279 20 : voiced_cnt = 0;
3280 20 : move16();
3281 140 : FOR( i = 9; i > 3; i-- )
3282 : {
3283 120 : if ( buf_dlp[i] > 0 )
3284 : {
3285 10 : voiced_cnt = add( voiced_cnt, 1 );
3286 : }
3287 : }
3288 :
3289 20 : test();
3290 20 : test();
3291 20 : test();
3292 20 : test();
3293 20 : IF( ( GT_16( M_pkh, 2200 ) || LT_32( V_epsP_tilt, 171799 ) || GT_16( M_cor_map_sum, 25600 ) ) && LT_16( voiced_cnt, 4 ) )
3294 : {
3295 4 : mode = 1;
3296 4 : move16();
3297 : }
3298 16 : ELSE IF( GT_16( M_Ntonal, 108 ) && LT_16( voiced_cnt, 4 ) ) /*27 in Q2 */
3299 : {
3300 0 : mode = 1;
3301 0 : move16();
3302 : }
3303 : }
3304 : ELSE
3305 : {
3306 1996 : voiced_cnt = 0;
3307 1996 : move16();
3308 21956 : FOR( i = 0; i < 10; i++ )
3309 : {
3310 19960 : if ( buf_dlp[i] > 0 )
3311 : {
3312 10234 : voiced_cnt = add( voiced_cnt, 1 );
3313 : }
3314 : }
3315 :
3316 1996 : inv_len = 3277; /*Q15 */
3317 1996 : move16();
3318 :
3319 1996 : L_tmp = L_deposit_l( 0 );
3320 21956 : FOR( i = 0; i < 10; i++ )
3321 : {
3322 19960 : L_tmp = L_add( L_tmp, L_shl( buf_flux[BUF_LEN - 10 + i], 2 ) ); /*Q9 */
3323 : }
3324 1996 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q9 */
3325 1996 : M_flux10 = extract_l( L_tmp ); /*Q9 */
3326 :
3327 1996 : L_tmp = L_deposit_l( 0 );
3328 21956 : FOR( i = 0; i < 10; i++ )
3329 : {
3330 19960 : L_tmp = L_add( L_tmp, buf_pkh[BUF_LEN - 10 + i] ); /*Q1 */
3331 : }
3332 1996 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q1 */
3333 1996 : M_pkh = extract_l( L_tmp ); /*Q1 */
3334 :
3335 1996 : L_tmp = L_deposit_l( 0 );
3336 21956 : FOR( i = 0; i < 10; i++ )
3337 : {
3338 19960 : L_tmp = L_add( L_tmp, buf_cor_map_sum[BUF_LEN - 10 + i] ); /*Q8 */
3339 : }
3340 1996 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q8 */
3341 1996 : M_cor_map_sum = extract_l( L_tmp ); /*Q8 */
3342 :
3343 1996 : V_epsP_tilt = var_fx_32( buf_epsP_tilt + BUF_LEN - 10, 15, 10 ); /*Q31 */
3344 :
3345 1996 : L_tmp = L_deposit_l( 0 );
3346 11976 : FOR( i = 0; i < 5; i++ )
3347 : {
3348 9980 : L_tmp = L_add( L_tmp, L_shl( buf_flux[BUF_LEN - 5 + i], 2 ) ); /*Q9 */
3349 : }
3350 1996 : L_tmp = Mult_32_16( L_tmp, 6554 ); /*Q9 */
3351 1996 : tmp = extract_l( L_tmp ); /*Q9 */
3352 :
3353 1996 : test();
3354 1996 : test();
3355 1996 : test();
3356 1996 : test();
3357 1996 : test();
3358 1996 : test();
3359 1996 : IF( ( LT_16( M_flux10, 4352 ) || ( LT_32( V_epsP_tilt, 2147484 ) && LT_16( M_flux10, 6144 ) ) || GT_16( M_pkh, 2100 ) ||
3360 : GT_16( M_cor_map_sum, 25600 ) ) &&
3361 : LT_16( voiced_cnt, 3 ) && LT_16( tmp, 7680 ) )
3362 : {
3363 238 : mode = 1;
3364 238 : move16();
3365 238 : *dec_mov = 32767;
3366 238 : move16();
3367 238 : return ( mode );
3368 : }
3369 :
3370 1758 : test();
3371 1758 : test();
3372 1758 : test();
3373 1758 : test();
3374 1758 : test();
3375 1758 : IF( GT_16( M_flux10, 8192 ) || ( GT_16( M_flux10, 7680 ) && GT_16( voiced_cnt, 2 ) ) || GT_16( tmp, 9728 ) ||
3376 : ( GE_16( buf_flux[59], 2560 ) && GT_16( hSpMusClas->lps_fx, hSpMusClas->lpm_fx ) ) )
3377 : {
3378 1520 : mode = 0;
3379 1520 : move16();
3380 1520 : *dec_mov = 0;
3381 1520 : move16();
3382 1520 : return ( mode );
3383 : }
3384 :
3385 5442 : FOR( i = 10; i < len; i++ )
3386 : {
3387 5335 : inv_len = div_s( 1, i ); /*Q15 */
3388 :
3389 5335 : L_tmp = L_deposit_l( 0 );
3390 185500 : FOR( j = 0; j < i; j++ )
3391 : {
3392 180165 : L_tmp = L_add( L_tmp, L_shl( buf_flux[BUF_LEN - i + j], 2 ) ); /*Q9 */
3393 : }
3394 5335 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q9 */
3395 5335 : M_flux = extract_l( L_tmp ); /*Q9 */
3396 :
3397 5335 : L_tmp = L_deposit_l( 0 );
3398 185500 : FOR( j = 0; j < i; j++ )
3399 : {
3400 180165 : L_tmp = L_add( L_tmp, buf_pkh[BUF_LEN - i + j] ); /*Q1 */
3401 : }
3402 5335 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q1 */
3403 5335 : M_pkh = extract_l( L_tmp ); /*Q1 */
3404 :
3405 5335 : L_tmp = L_deposit_l( 0 );
3406 185500 : FOR( j = 0; j < i; j++ )
3407 : {
3408 180165 : L_tmp = L_add( L_tmp, buf_cor_map_sum[BUF_LEN - i + j] ); /*Q8 */
3409 : }
3410 5335 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q8 */
3411 5335 : M_cor_map_sum = extract_l( L_tmp ); /*Q8 */
3412 :
3413 5335 : V_epsP_tilt = var_fx_32( buf_epsP_tilt + BUF_LEN - i, 15, i ); /*Q31 */
3414 :
3415 5335 : test();
3416 5335 : test();
3417 5335 : test();
3418 5335 : test();
3419 5335 : test();
3420 5335 : IF( ( ( LT_16( M_flux, add( 6144, mult_r( 1638, shl( sub( len, 10 ), 9 ) ) ) ) && LT_16( M_flux10, 7680 ) ) ||
3421 : LT_32( V_epsP_tilt, L_add( 214748, L_shl( L_mult0( 19327, ( len - 10 ) ), 1 ) ) ) ||
3422 : GT_16( M_pkh, sub( 2100, extract_l( L_mult0( 10, sub( len, 10 ) ) ) ) ) ||
3423 : GT_16( M_cor_map_sum, sub( 24320, extract_l( L_mult0( 77, sub( len, 10 ) ) ) ) ) ) &&
3424 : LT_16( voiced_cnt, 3 ) )
3425 : {
3426 131 : mode = 1;
3427 131 : move16();
3428 131 : return ( mode );
3429 : }
3430 : }
3431 :
3432 107 : IF( EQ_16( len, BUF_LEN ) )
3433 : {
3434 103 : tmp = 0;
3435 103 : move16();
3436 6283 : FOR( i = 0; i < len; i++ )
3437 : {
3438 6180 : tmp = add( tmp, shl( buf_Ntonal[i], 2 ) ); /*Q2 */
3439 : }
3440 103 : M_Ntonal = mult_r( tmp, 546 ); /*Q2 */
3441 :
3442 103 : tmp = 0;
3443 103 : move16();
3444 6283 : FOR( i = 0; i < len; i++ )
3445 : {
3446 6180 : tmp = add( tmp, buf_Ntonal_lf[i] ); /*Q0 */
3447 : }
3448 103 : tmp1 = 0;
3449 103 : move16();
3450 6283 : FOR( i = 0; i < len; i++ )
3451 : {
3452 6180 : tmp1 = add( tmp1, buf_Ntonal2[i] ); /*Q0 */
3453 : }
3454 103 : lf_Ntonal_ratio = 0;
3455 103 : move16(); /*Q15 */
3456 103 : if ( tmp1 != 0 )
3457 : {
3458 103 : lf_Ntonal_ratio = div_s( tmp, tmp1 ); /*Q15 */
3459 : }
3460 :
3461 103 : test();
3462 103 : IF( GT_16( M_Ntonal, 72 ) || LT_16( lf_Ntonal_ratio, 6554 ) )
3463 : {
3464 0 : mode = 1;
3465 0 : move16();
3466 : }
3467 103 : ELSE IF( LT_16( M_Ntonal, 4 ) )
3468 : {
3469 0 : mode = 0;
3470 0 : move16();
3471 : }
3472 : }
3473 : }
3474 : }
3475 :
3476 127 : return ( mode );
3477 : }
3478 :
3479 : /*---------------------------------------------------------------------*
3480 : * tonal_dist_fx()
3481 : *
3482 : *
3483 : *---------------------------------------------------------------------*/
3484 :
3485 2041 : static void tonal_dist_fx(
3486 : Word16 *p2v_map, /* i : spectral peakiness map Q7*/
3487 : Word16 *buf_pkh, /* i/o: buffer storing highband spectral peakiness Q1*/
3488 : Word16 *buf_Ntonal, /* i/o: buffer storing No.of 1st spectral tone Q0*/
3489 : Word16 *buf_Ntonal2, /* i/o: buffer storing No.of 2nd spectral tone Q0*/
3490 : Word16 *buf_Ntonal_lf /* i/o: buffer storing low band spectral tone ratio Q0*/
3491 : )
3492 : {
3493 : Word16 i;
3494 : Word32 pk;
3495 : Word16 Ntonal;
3496 : Word16 Ntonal2;
3497 : Word16 Ntonal_lf;
3498 :
3499 :
3500 : /* find number of tonals, number of tonals at low-band,
3501 : spectral peakiness at high-band */
3502 2041 : pk = L_deposit_l( 0 );
3503 2041 : Ntonal = 0;
3504 2041 : move16();
3505 2041 : Ntonal2 = 0;
3506 2041 : move16();
3507 2041 : Ntonal_lf = 0;
3508 2041 : move16();
3509 132665 : FOR( i = 0; i < 64; i++ )
3510 : {
3511 130624 : if ( GT_16( p2v_map[i], 7040 ) )
3512 : {
3513 9820 : Ntonal = add( Ntonal, 1 );
3514 : }
3515 :
3516 130624 : IF( GT_16( p2v_map[i], 10240 ) )
3517 : {
3518 5699 : Ntonal2 = add( Ntonal2, 1 );
3519 5699 : Ntonal_lf = add( Ntonal_lf, 1 );
3520 : }
3521 : }
3522 :
3523 130624 : FOR( i = 64; i < 127; i++ )
3524 : {
3525 128583 : if ( p2v_map[i] != 0 )
3526 : {
3527 33844 : pk = L_add( pk, p2v_map[i] ); /*Q7 */
3528 : }
3529 128583 : if ( GT_16( p2v_map[i], 7040 ) )
3530 : {
3531 4215 : Ntonal = add( Ntonal, 1 );
3532 : }
3533 128583 : if ( GT_16( p2v_map[i], 10240 ) )
3534 : {
3535 1519 : Ntonal2 = add( Ntonal2, 1 );
3536 : }
3537 : }
3538 :
3539 : /* update buffers */
3540 122460 : FOR( i = 0; i < BUF_LEN - 1; i++ )
3541 : {
3542 120419 : buf_pkh[i] = buf_pkh[i + 1];
3543 120419 : move16();
3544 120419 : buf_Ntonal[i] = buf_Ntonal[i + 1];
3545 120419 : move16();
3546 120419 : buf_Ntonal2[i] = buf_Ntonal2[i + 1];
3547 120419 : move16();
3548 120419 : buf_Ntonal_lf[i] = buf_Ntonal_lf[i + 1];
3549 120419 : move16();
3550 : }
3551 :
3552 2041 : buf_pkh[i] = extract_l( L_shr_r( pk, 6 ) ); /*Q1 */
3553 2041 : buf_Ntonal[i] = Ntonal;
3554 2041 : move16(); /*Q0 */
3555 2041 : buf_Ntonal2[i] = Ntonal2;
3556 2041 : move16(); /*Q0 */
3557 2041 : buf_Ntonal_lf[i] = Ntonal_lf;
3558 2041 : move16(); /*Q0 */
3559 :
3560 2041 : return;
3561 : }
3562 :
3563 : /*---------------------------------------------------------------------*
3564 : * flux_fx()
3565 : *
3566 : *
3567 : *---------------------------------------------------------------------*/
3568 :
3569 2041 : static void flux_fx(
3570 : Word16 *Bin_E, /* i : log energy spectrum of the current frame Q7*/
3571 : Word16 *p2v_map, /* i : spectral peakiness map Q7*/
3572 : Word16 *old_Bin_E, /* i/o: log energy spectrum of the frame 60ms ago Q7*/
3573 : Word16 *buf_flux, /* i/o: buffer storing spectral energy fluctuation Q7*/
3574 : Word16 attack_hangover, /* i/o: hangover preventing flux buffering Q0*/
3575 : Word16 dec_mov /* i/o: moving average of classifier decision Q15*/
3576 : )
3577 : {
3578 : Word16 i;
3579 : Word16 *pt1, *pt2, *pt3, *pt4, *pt5, *pt6;
3580 : Word16 flux;
3581 : Word32 L_flux;
3582 : Word16 cnt;
3583 : Word16 tmp;
3584 :
3585 : /* calculate flux */
3586 2041 : L_flux = L_deposit_l( 0 );
3587 2041 : cnt = 0;
3588 2041 : move16();
3589 87763 : FOR( i = 0; i < N_OLD_BIN_E; i++ )
3590 : {
3591 85722 : IF( p2v_map[i] != 0 )
3592 : {
3593 20995 : L_flux = L_add_sat( L_flux, abs_s( sub_sat( Bin_E[i], old_Bin_E[i] ) ) ); /*Q7 */
3594 : }
3595 85722 : if ( p2v_map[i] != 0 )
3596 : {
3597 20995 : cnt = add( cnt, 1 );
3598 : }
3599 : }
3600 :
3601 2041 : flux = 640;
3602 2041 : move16(); /*5 in Q7 */
3603 2041 : IF( cnt != 0 )
3604 : {
3605 2035 : tmp = div_s( 1, cnt ); /*Q15 */
3606 2035 : flux = extract_l( Mult_32_16( L_flux, tmp ) ); /*Q7 */
3607 : }
3608 :
3609 2041 : test();
3610 2041 : if ( GT_16( flux, 2560 ) && GT_16( dec_mov, 26214 ) )
3611 : {
3612 54 : flux = 2560;
3613 54 : move16(); /*20 in Q7 */
3614 : }
3615 :
3616 : /* update old Bin_E buffer */
3617 2041 : pt1 = old_Bin_E;
3618 2041 : pt2 = old_Bin_E + N_OLD_BIN_E;
3619 2041 : pt3 = Bin_E;
3620 2041 : pt4 = old_Bin_E + N_OLD_BIN_E;
3621 2041 : pt5 = old_Bin_E + 2 * N_OLD_BIN_E;
3622 2041 : pt6 = old_Bin_E + 2 * N_OLD_BIN_E;
3623 :
3624 87763 : FOR( i = 0; i < N_OLD_BIN_E; i++ )
3625 : {
3626 85722 : *pt1++ = *pt2++;
3627 85722 : move16();
3628 85722 : *pt4++ = *pt5++;
3629 85722 : move16();
3630 85722 : *pt6++ = *pt3++;
3631 85722 : move16();
3632 : }
3633 : /* update flux buffer */
3634 2041 : IF( attack_hangover <= 0 )
3635 : {
3636 122460 : FOR( i = 0; i < BUF_LEN - 1; i++ )
3637 : {
3638 120419 : buf_flux[i] = buf_flux[i + 1];
3639 120419 : move16();
3640 : }
3641 2041 : buf_flux[i] = flux;
3642 2041 : move16();
3643 : }
3644 :
3645 2041 : return;
3646 : }
3647 :
3648 : /*---------------------------------------------------------------------*
3649 : * spec_analysis_fx()
3650 : *
3651 : *
3652 : *---------------------------------------------------------------------*/
3653 :
3654 2041 : static void spec_analysis_fx(
3655 : Word16 *Bin_E, /* i : log energy spectrum of the current frame Q7*/
3656 : Word16 *p2v_map /* o : spectral peakiness map Q7*/
3657 : )
3658 : {
3659 : Word16 i, k, m;
3660 : Word16 peak[65];
3661 : Word16 valley[65];
3662 : Word16 peak_idx[65];
3663 : Word16 valey_idx[65];
3664 : Word16 p2v[65];
3665 :
3666 : /* find spectral peaks */
3667 2041 : k = 0;
3668 2041 : move16();
3669 257166 : FOR( i = 1; i < L_FFT / 2 - 2; i++ )
3670 : {
3671 255125 : test();
3672 255125 : IF( GT_16( Bin_E[i], Bin_E[i - 1] ) && GT_16( Bin_E[i], Bin_E[i + 1] ) )
3673 : {
3674 68073 : peak[k] = Bin_E[i];
3675 68073 : move16();
3676 68073 : peak_idx[k] = i;
3677 68073 : move16();
3678 68073 : k = add( k, 1 );
3679 : }
3680 : }
3681 2041 : assert( k + 1 < 65 );
3682 2041 : peak_idx[k] = -1;
3683 2041 : move16();
3684 2041 : peak_idx[k + 1] = -1;
3685 2041 : move16();
3686 2041 : IF( k == 0 )
3687 : {
3688 0 : FOR( i = 0; i < 127; i++ )
3689 : {
3690 0 : p2v_map[i] = 0;
3691 0 : move16();
3692 : }
3693 :
3694 0 : return;
3695 : }
3696 :
3697 : /* find spectral valleys */
3698 2041 : m = 0;
3699 2041 : move16();
3700 :
3701 2041 : IF( LT_16( Bin_E[0], Bin_E[1] ) )
3702 : {
3703 1107 : valley[0] = Bin_E[0];
3704 1107 : move16();
3705 1107 : valey_idx[0] = 0;
3706 1107 : move16();
3707 1107 : m = add( m, 1 );
3708 : }
3709 :
3710 2041 : k = 126;
3711 2041 : move16();
3712 3611 : FOR( i = 125; i >= 0; i-- )
3713 : {
3714 3611 : IF( LE_16( Bin_E[i + 1], Bin_E[i] ) )
3715 : {
3716 2041 : BREAK;
3717 : }
3718 1570 : k = i;
3719 1570 : move16();
3720 : }
3721 :
3722 255596 : FOR( i = 1; i < k; i++ )
3723 : {
3724 253555 : test();
3725 253555 : IF( LT_16( Bin_E[i], Bin_E[i - 1] ) && LT_16( Bin_E[i], Bin_E[i + 1] ) )
3726 : {
3727 66856 : valley[m] = Bin_E[i];
3728 66856 : move16();
3729 66856 : valey_idx[m] = i;
3730 66856 : move16();
3731 66856 : m = add( m, 1 );
3732 : }
3733 : }
3734 2041 : valley[m] = Bin_E[k];
3735 2041 : move16();
3736 2041 : valey_idx[m] = k;
3737 2041 : move16();
3738 :
3739 : /* find spectral peak to valley distances */
3740 2041 : k = 0;
3741 2041 : move16();
3742 70004 : FOR( i = 0; i < m; i++ )
3743 : {
3744 67963 : test();
3745 67963 : IF( GT_16( peak_idx[k], valey_idx[i] ) && LT_16( peak_idx[k], valey_idx[i + 1] ) )
3746 : {
3747 66509 : p2v[k] = sub_sat( shl_sat( peak[k], 1 ), add_sat( valley[i], valley[i + 1] ) );
3748 66509 : move16();
3749 66509 : k = add( k, 1 );
3750 : }
3751 : }
3752 :
3753 261248 : FOR( i = 0; i < 127; i++ )
3754 : {
3755 259207 : p2v_map[i] = 0;
3756 259207 : move16();
3757 : }
3758 :
3759 68550 : FOR( i = 0; i < k; i++ )
3760 : {
3761 66509 : p2v_map[peak_idx[i]] = p2v[i];
3762 66509 : move16();
3763 : }
3764 : }
3765 :
3766 2050 : static void music_mixed_classif_improv_fx(
3767 : Encoder_State *st, /* i : encoder state structure */
3768 : const Word16 *new_inp, /* i : new input signal */
3769 : const Word32 *epsP, /* i : LP prediciton error Q_epsP*/
3770 : Word16 Q_epsP,
3771 : Word16 etot, /* i : total frame energy Q8*/
3772 : Word16 old_cor, /* i : normalized correlation Q15*/
3773 : Word16 cor_map_sum /* i : correlation map sum Q8*/
3774 : )
3775 : {
3776 : Word16 i, max_spl, dec, len, percus_flag, lt_diff, log_max_spl, epsP_tilt, p2v_map[128];
3777 : Word16 exp, frac, expn, fracn, expd, fracd, scale;
3778 : Word16 tmp;
3779 2050 : Word32 L_tmp, ftmp, ftmp1, epsP_max = MIN_32;
3780 2050 : move32();
3781 2050 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
3782 :
3783 : /* find sample with maximum absolute amplitude */
3784 2050 : max_spl = 0;
3785 2050 : move16();
3786 526850 : FOR( i = 0; i < L_FRAME; i++ )
3787 : {
3788 524800 : max_spl = s_max( abs_s( new_inp[i] ), max_spl );
3789 : }
3790 :
3791 : /* music is considered only appearing in high SNR condition and active signal */
3792 2050 : test();
3793 2050 : IF( st->vad_flag == 0 || LT_16( sub( st->lp_speech_fx, st->lp_noise_fx ), 6400 ) ) /* 25 in Q8 */
3794 : {
3795 : /* st->dec_mov = 0.5f; */
3796 : /* st->dec_mov1 = 0.5f; */
3797 9 : hSpMusClas->dec_mov_fx = 16384;
3798 9 : move16();
3799 9 : hSpMusClas->dec_mov1_fx = 16384;
3800 9 : move16();
3801 :
3802 9 : if ( st->vad_flag == 0 )
3803 : {
3804 9 : hSpMusClas->onset_cnt = 0;
3805 9 : move16();
3806 : }
3807 :
3808 9 : return;
3809 : }
3810 :
3811 2041 : hSpMusClas->onset_cnt = add( hSpMusClas->onset_cnt, 1 );
3812 2041 : hSpMusClas->onset_cnt = s_min( hSpMusClas->onset_cnt, 9 );
3813 :
3814 2041 : IF( EQ_16( hSpMusClas->onset_cnt, 1 ) )
3815 : {
3816 5 : set16_fx( hSpMusClas->buf_flux_fx, -12800, BUF_LEN ); /*-100.0 in Q7 */
3817 : }
3818 :
3819 : /* spectral analysis */
3820 2041 : spec_analysis_fx( st->lgBin_E_fx, p2v_map );
3821 :
3822 : /* percussive music detection */
3823 2041 : log_max_spl = 0;
3824 2041 : move16();
3825 2041 : IF( max_spl )
3826 : {
3827 2041 : L_tmp = L_deposit_h( max_spl ); /*Q16 */
3828 2041 : exp = norm_l( L_tmp );
3829 2041 : frac = Log2_norm_lc( L_shl( L_tmp, exp ) );
3830 2041 : exp = sub( sub( 30, exp ), 16 );
3831 2041 : L_tmp = Mpy_32_16( exp, frac, 28391 ); /*Q12 */
3832 2041 : log_max_spl = round_fx( L_shl( L_tmp, 11 ) ); /*Q7 */
3833 : }
3834 :
3835 2041 : lt_diff = sub( log_max_spl, hSpMusClas->mov_log_max_spl_fx ); /*Q7 */
3836 :
3837 8164 : FOR( i = 0; i < 3; i++ )
3838 : {
3839 6123 : hSpMusClas->buf_etot_fx[i] = hSpMusClas->buf_etot_fx[i + 1];
3840 6123 : move16(); /*Q8 */
3841 : }
3842 2041 : hSpMusClas->buf_etot_fx[i] = etot;
3843 2041 : move16(); /*Q8 */
3844 :
3845 2041 : percus_flag = 0;
3846 2041 : move16();
3847 2041 : test();
3848 2041 : test();
3849 2041 : IF( GT_16( sub( hSpMusClas->buf_etot_fx[1], hSpMusClas->buf_etot_fx[0] ), 1536 ) &&
3850 : LT_16( hSpMusClas->buf_etot_fx[2], hSpMusClas->buf_etot_fx[1] ) &&
3851 : GT_16( sub( hSpMusClas->buf_etot_fx[1], st->lp_speech_fx ), 768 ) ) /* 3 in Q8 */
3852 : {
3853 : /*tmp = add(shr(voicing[0],2),shr(voicing[1],2)); //Q15 */
3854 : /*tmp = add(tmp,shr(old_cor,1)); //Q15 */
3855 15 : tmp = mac_r( L_mac( L_mult( st->voicing_fx[0], 8192 ), st->voicing_fx[1], 8192 ), old_cor, 16384 );
3856 15 : test();
3857 15 : test();
3858 15 : IF( GT_16( sub( hSpMusClas->buf_etot_fx[1], hSpMusClas->buf_etot_fx[3] ), 768 ) &&
3859 : LT_16( hSpMusClas->buf_etot_fx[3], hSpMusClas->buf_etot_fx[2] ) &&
3860 : LT_16( tmp, 24576 ) ) /* 0.75 in Q15 */
3861 : {
3862 4 : IF( GT_16( hSpMusClas->dec_mov_fx, 26214 ) ) /* 0.8 in Q15 */
3863 : {
3864 2 : percus_flag = 1;
3865 2 : move16();
3866 : }
3867 : ELSE
3868 : {
3869 2 : test();
3870 2 : test();
3871 2 : test();
3872 2 : if ( LT_16( old_cor, 24576 ) && LT_16( st->voicing_fx[0], 24576 ) && LT_16( st->voicing_fx[1], 24576 ) && GT_16( hSpMusClas->old_lt_diff_fx[0], 1280 ) )
3873 : {
3874 0 : percus_flag = 1;
3875 0 : move16();
3876 : }
3877 : }
3878 : }
3879 : }
3880 :
3881 : /* sound attack detection */
3882 2041 : test();
3883 2041 : test();
3884 2041 : test();
3885 2041 : if ( GT_16( sub( hSpMusClas->buf_etot_fx[3], hSpMusClas->buf_etot_fx[2] ), 1536 ) && GT_16( hSpMusClas->dec_mov_fx, 29491 ) && GT_16( sub( etot, st->lp_speech_fx ), 1280 ) && GT_16( hSpMusClas->old_lt_diff_fx[0], 640 ) )
3886 : {
3887 0 : hSpMusClas->attack_hangover = 3;
3888 0 : move16();
3889 : }
3890 :
3891 2041 : test();
3892 2041 : IF( GT_16( st->voicing_fx[0], 29491 ) && GT_16( st->voicing_fx[1], 29491 ) )
3893 : {
3894 555 : IF( GT_16( log_max_spl, hSpMusClas->mov_log_max_spl_fx ) )
3895 : {
3896 : /**mov_log_max_spl = add(mult_r(31130,(*mov_log_max_spl)),mult_r(1638,log_max_spl)); //Q7 */
3897 28 : hSpMusClas->mov_log_max_spl_fx = round_fx( L_mac( L_mult( 31130, hSpMusClas->mov_log_max_spl_fx ), 1638, log_max_spl ) ); /*Q7 */
3898 : }
3899 : ELSE
3900 : {
3901 : /**mov_log_max_spl = add(mult_r(32604,(*mov_log_max_spl)),mult_r(164,log_max_spl)); //Q7 */
3902 527 : hSpMusClas->mov_log_max_spl_fx = round_fx( L_mac( L_mult( 32604, hSpMusClas->mov_log_max_spl_fx ), 164, log_max_spl ) ); /*Q7 */
3903 : }
3904 : }
3905 :
3906 2041 : hSpMusClas->old_lt_diff_fx[0] = hSpMusClas->old_lt_diff_fx[1];
3907 2041 : move16(); /*Q7 */
3908 2041 : hSpMusClas->old_lt_diff_fx[1] = lt_diff;
3909 2041 : move16(); /*Q7 */
3910 :
3911 : /* calculate and buffer spectral energy fluctuation */
3912 2041 : flux_fx( st->lgBin_E_fx, p2v_map, hSpMusClas->old_Bin_E_fx, hSpMusClas->buf_flux_fx, hSpMusClas->attack_hangover, hSpMusClas->dec_mov_fx );
3913 :
3914 2041 : hSpMusClas->attack_hangover = sub( hSpMusClas->attack_hangover, 1 );
3915 2041 : move16();
3916 2041 : hSpMusClas->attack_hangover = s_max( hSpMusClas->attack_hangover, 0 );
3917 2041 : move16();
3918 :
3919 : /* identify flux buffer buffering status */
3920 2041 : len = 0;
3921 2041 : move16();
3922 115822 : FOR( i = BUF_LEN - 1; i >= 0; i-- )
3923 : {
3924 114058 : IF( hSpMusClas->buf_flux_fx[i] < 0 )
3925 : {
3926 277 : BREAK;
3927 : }
3928 :
3929 113781 : len = add( len, 1 );
3930 : }
3931 :
3932 : /* reset flux buffer if percussive music is detected */
3933 2041 : IF( EQ_16( percus_flag, 1 ) )
3934 : {
3935 2 : set16_fx( &hSpMusClas->buf_flux_fx[BUF_LEN - len], 640, len ); /* 5 in Q7 */
3936 : }
3937 :
3938 : /* calculate and buffer the tilt of residual LP energies */
3939 2041 : ftmp = 0;
3940 2041 : move16();
3941 2041 : ftmp1 = 0;
3942 2041 : move16();
3943 34697 : FOR( i = 1; i <= 16; i++ )
3944 : {
3945 32656 : epsP_max = L_max( epsP_max, epsP[i] );
3946 : }
3947 :
3948 32656 : FOR( i = 1; i < 16; i++ )
3949 : {
3950 30615 : IF( EQ_32( epsP[i], epsP_max ) )
3951 : {
3952 2041 : tmp = -32768;
3953 2041 : move16();
3954 2041 : L_tmp = Mult_32_16( epsP[i], tmp ); /* Q_epsP */
3955 2041 : ftmp = L_sub( ftmp, L_shr( L_tmp, 4 ) ); /* Q(Q_epsP-4) */
3956 : }
3957 : ELSE
3958 : {
3959 28574 : expn = norm_l( epsP[i] );
3960 28574 : fracn = extract_h( L_shl( epsP[i], expn ) );
3961 28574 : expn = sub( sub( 30, expn ), Q_epsP );
3962 :
3963 28574 : expd = norm_l( epsP_max );
3964 28574 : fracd = extract_h( L_shl( epsP_max, expd ) );
3965 28574 : expd = sub( sub( 30, expd ), Q_epsP );
3966 :
3967 28574 : scale = shr( sub( fracd, fracn ), 15 );
3968 28574 : fracn = shl( fracn, scale );
3969 28574 : expn = sub( expn, scale );
3970 :
3971 28574 : tmp = div_s( fracn, fracd ); /*Q(15+expd-expn) */
3972 28574 : tmp = shl( tmp, sub( expn, expd ) ); /*Q15 */
3973 :
3974 28574 : L_tmp = Mult_32_16( epsP[i], tmp ); /*Q_epsP */
3975 28574 : ftmp = L_add( ftmp, L_shr( L_tmp, 4 ) ); /*Q(Q_epsP-4) */
3976 : }
3977 : }
3978 :
3979 32656 : FOR( i = 1; i < 16; i++ )
3980 : {
3981 30615 : IF( EQ_32( epsP[i], epsP_max ) )
3982 : {
3983 2041 : tmp = -32768;
3984 2041 : move16();
3985 2041 : L_tmp = Mult_32_16( epsP[i + 1], tmp ); /*Q_epsP */
3986 2041 : ftmp1 = L_sub( ftmp1, L_shr( L_tmp, 4 ) ); /*Q(Q_epsP-4) */
3987 : }
3988 28574 : ELSE IF( EQ_32( epsP[i + 1], epsP_max ) )
3989 : {
3990 0 : tmp = -32768;
3991 0 : move16();
3992 0 : L_tmp = Mult_32_16( epsP[i], tmp ); /*Q_epsP */
3993 0 : ftmp1 = L_sub( ftmp1, L_shr( L_tmp, 4 ) ); /*Q(Q_epsP-4) */
3994 : }
3995 : ELSE
3996 : {
3997 28574 : expn = norm_l( epsP[i] );
3998 28574 : fracn = extract_h( L_shl( epsP[i], expn ) );
3999 28574 : expn = sub( sub( 30, expn ), Q_epsP );
4000 :
4001 28574 : expd = norm_l( epsP_max );
4002 28574 : fracd = extract_h( L_shl( epsP_max, expd ) );
4003 28574 : expd = sub( sub( 30, expd ), Q_epsP );
4004 :
4005 28574 : scale = shr( sub( fracd, fracn ), 15 );
4006 28574 : fracn = shl( fracn, scale );
4007 28574 : expn = sub( expn, scale );
4008 :
4009 28574 : tmp = div_s( fracn, fracd ); /*Q(15+expd-expn) */
4010 28574 : tmp = shl( tmp, sub( expn, expd ) ); /*Q15 */
4011 :
4012 28574 : L_tmp = Mult_32_16( epsP[i + 1], tmp ); /*Q_epsP */
4013 28574 : ftmp1 = L_add( ftmp1, L_shr( L_tmp, 4 ) ); /*Q(Q_epsP-4) */
4014 : }
4015 : }
4016 :
4017 : /* epsP_tilt = ftmp1/ftmp; */
4018 2041 : expn = norm_l( ftmp1 );
4019 2041 : fracn = extract_h( L_shl( ftmp1, expn ) );
4020 2041 : expn = sub( sub( 30, expn ), Q_epsP - 4 );
4021 :
4022 2041 : expd = norm_l( ftmp );
4023 2041 : fracd = round_fx_sat( L_shl( ftmp, expd ) );
4024 2041 : expd = sub( sub( 30, expd ), sub( Q_epsP, 4 ) );
4025 :
4026 2041 : scale = shr( sub( fracd, fracn ), 15 );
4027 2041 : fracn = shl( fracn, scale );
4028 2041 : expn = sub( expn, scale );
4029 :
4030 2041 : tmp = div_s( fracn, fracd ); /*Q(15+expd-expn) */
4031 :
4032 2041 : epsP_tilt = shl( tmp, sub( expn, expd ) ); /*Q15 */
4033 :
4034 122460 : FOR( i = 0; i < BUF_LEN - 1; i++ )
4035 : {
4036 120419 : hSpMusClas->buf_epsP_tilt_fx[i] = hSpMusClas->buf_epsP_tilt_fx[i + 1];
4037 120419 : move16(); /*Q15 */
4038 : }
4039 2041 : hSpMusClas->buf_epsP_tilt_fx[i] = epsP_tilt;
4040 2041 : move16(); /*Q15 */
4041 :
4042 : /* calculate and buffer highband spectral peakness */
4043 2041 : tonal_dist_fx( p2v_map, hSpMusClas->buf_pkh_fx, hSpMusClas->buf_Ntonal_fx, hSpMusClas->buf_Ntonal2_fx, hSpMusClas->buf_Ntonal_lf_fx );
4044 :
4045 : /* buffer sum of correlation map */
4046 122460 : FOR( i = 0; i < BUF_LEN - 1; i++ )
4047 : {
4048 120419 : hSpMusClas->buf_cor_map_sum_fx[i] = hSpMusClas->buf_cor_map_sum_fx[i + 1];
4049 120419 : move16(); /*Q8 */
4050 : }
4051 2041 : hSpMusClas->buf_cor_map_sum_fx[i] = cor_map_sum;
4052 2041 : move16(); /*Q8 */
4053 :
4054 : /* buffer voicing metric */
4055 20410 : FOR( i = 0; i < 9; i++ )
4056 : {
4057 18369 : hSpMusClas->buf_dlp_fx[i] = hSpMusClas->buf_dlp_fx[i + 1];
4058 18369 : move16();
4059 : }
4060 2041 : hSpMusClas->buf_dlp_fx[i] = sub( hSpMusClas->lps_fx, hSpMusClas->lpm_fx );
4061 2041 : move16(); /*Q9 */
4062 :
4063 : /* classification */
4064 2041 : dec = mode_decision_fx( st, len, &hSpMusClas->dec_mov_fx, hSpMusClas->buf_flux_fx, hSpMusClas->buf_epsP_tilt_fx, hSpMusClas->buf_pkh_fx,
4065 2041 : hSpMusClas->buf_cor_map_sum_fx, hSpMusClas->buf_Ntonal_fx, hSpMusClas->buf_Ntonal2_fx, hSpMusClas->buf_Ntonal_lf_fx,
4066 2041 : hSpMusClas->buf_dlp_fx );
4067 2041 : move16();
4068 :
4069 : /* update long term moving average of the classification decisions */
4070 2041 : IF( GT_16( len, 30 ) )
4071 : {
4072 1891 : IF( dec == 0 )
4073 : {
4074 1501 : hSpMusClas->dec_mov_fx = mult_r( 31785, hSpMusClas->dec_mov_fx ); /*Q15 */
4075 1501 : hSpMusClas->dec_mov1_fx = mult_r( 31785, hSpMusClas->dec_mov1_fx ); /*Q15 */
4076 : }
4077 : ELSE
4078 : {
4079 390 : hSpMusClas->dec_mov_fx = add( mult_r( 31785, hSpMusClas->dec_mov_fx ), 983 ); /*Q15 */
4080 390 : hSpMusClas->dec_mov1_fx = add( mult_r( 31785, hSpMusClas->dec_mov1_fx ), 983 ); /*Q15 */
4081 : }
4082 1891 : move16();
4083 1891 : move16();
4084 : }
4085 :
4086 : /* update long term unvoiced counter */
4087 2041 : test();
4088 2041 : test();
4089 2041 : test();
4090 2041 : IF( ( EQ_16( st->coder_type_raw, UNVOICED ) || EQ_16( st->coder_type_raw, INACTIVE ) ) &&
4091 : GT_16( etot, 384 ) && LT_16( hSpMusClas->buf_Ntonal2_fx[59], 2 ) )
4092 : {
4093 73 : hSpMusClas->UV_cnt1 = sub( hSpMusClas->UV_cnt1, 8 );
4094 : }
4095 : ELSE
4096 : {
4097 1968 : hSpMusClas->UV_cnt1 = add( hSpMusClas->UV_cnt1, 1 );
4098 : }
4099 2041 : move16();
4100 :
4101 2041 : hSpMusClas->UV_cnt1 = s_min( hSpMusClas->UV_cnt1, 300 );
4102 2041 : move16();
4103 2041 : hSpMusClas->UV_cnt1 = s_max( hSpMusClas->UV_cnt1, 0 );
4104 2041 : move16();
4105 :
4106 : /**LT_UV_cnt1 = add(mult_r(29491,*LT_UV_cnt1),mult_r(3277,shl(*UV_cnt1,6)));*/ /* Q6 */
4107 2041 : hSpMusClas->LT_UV_cnt1_fx = round_fx( L_mac( L_mult( 29491, hSpMusClas->LT_UV_cnt1_fx ), 3277, shl( hSpMusClas->UV_cnt1, 6 ) ) ); /*Q6 */
4108 2041 : move16();
4109 : /* revert classification decision due to long-term unvoiced counter */
4110 2041 : test();
4111 2041 : test();
4112 2041 : if ( EQ_16( dec, 1 ) && LT_16( hSpMusClas->dec_mov1_fx, 6554 ) && LT_16( hSpMusClas->LT_UV_cnt1_fx, 12800 ) )
4113 : {
4114 0 : dec = 0;
4115 0 : move16();
4116 : }
4117 :
4118 : /* overwrite 1st stage speech/music decision to music */
4119 2041 : if ( EQ_16( dec, 1 ) )
4120 : {
4121 420 : st->sp_aud_decision1 = 1;
4122 420 : move16();
4123 : }
4124 :
4125 2041 : return;
4126 : }
4127 :
4128 :
4129 : /*----------------------------------------------------------------------------------*
4130 : * tonal_context_improv_fx()
4131 : *
4132 : * Context-based improvement of 1st/2nd stage speech/music decision on stable tonal signals
4133 : *----------------------------------------------------------------------------------*/
4134 :
4135 2050 : static void tonal_context_improv_fx(
4136 : Encoder_State *st_fx, /* i/o: Encoder state structure */
4137 : const Word32 PS[], /* i : energy spectrum */
4138 : const Word16 voi_fv, /* i : scaled voicing feature */
4139 : const Word16 cor_map_sum_fv, /* i : scaled correlation map feature */
4140 : const Word16 LPCErr, /* i : scaled LP prediction error feature */
4141 : const Word16 Qx )
4142 : {
4143 : Word16 t2_fx, t3_fx, tL_fx, err_fx, cor_fx, dft_fx;
4144 : Word16 exp, expa, expb, fraca, fracb, scale, exp1, exp2, exp3, tmp;
4145 : Word16 voi_mean, lt_pitch_diff;
4146 : Word32 L_tmp, tonality, tonality1, tonality2, tonality3, sort_max, sort_avg, sort_val[80];
4147 2050 : VAD_HANDLE hVAD = st_fx->hVAD;
4148 2050 : SP_MUS_CLAS_HANDLE hSpMusClas = st_fx->hSpMusClas;
4149 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
4150 2050 : Flag Overflow = 0;
4151 2050 : move16();
4152 : #endif
4153 :
4154 2050 : IF( EQ_16( st_fx->last_codec_mode, MODE2 ) )
4155 : {
4156 262 : set16_fx( hSpMusClas->tonality2_buf_fx, 0, HANG_LEN_INIT );
4157 262 : set16_fx( hSpMusClas->tonality3_buf_fx, 0, HANG_LEN_INIT );
4158 262 : set16_fx( hSpMusClas->LPCErr_buf_fx, 0, HANG_LEN_INIT );
4159 262 : hSpMusClas->lt_music_hangover = 0;
4160 262 : move16();
4161 262 : hSpMusClas->lt_music_state = 0;
4162 262 : move16();
4163 262 : hSpMusClas->lt_speech_state = 0;
4164 262 : move16();
4165 262 : hSpMusClas->lt_speech_hangover = 0;
4166 262 : move16();
4167 : }
4168 :
4169 : /* estimate maximum tonality in bands [0-1 kHz], [1-2kHz] and [2-4kHz] */
4170 2050 : Copy32( PS, sort_val, 80 );
4171 :
4172 : /* tonality in band [0-1 kHz] */
4173 2050 : sort_32_fx( sort_val, 0, 19 );
4174 2050 : sort_max = L_add( sort_val[19], 0 );
4175 2050 : sort_avg = sum32_fx( &sort_val[0], 10 );
4176 :
4177 : /* tonality1 = sort_max / sort_avg; */
4178 2050 : IF( sort_avg )
4179 : {
4180 2050 : expa = norm_l( sort_max );
4181 2050 : fraca = extract_h( L_shl( sort_max, expa ) );
4182 2050 : expa = sub( 30, add( expa, Qx ) );
4183 :
4184 2050 : expb = norm_l( sort_avg );
4185 2050 : fracb = extract_h( L_shl( sort_avg, expb ) );
4186 2050 : expb = sub( 30, add( expb, Qx ) );
4187 :
4188 2050 : scale = shr( sub( fracb, fraca ), 15 );
4189 2050 : fraca = shl( fraca, scale );
4190 2050 : expa = sub( expa, scale );
4191 :
4192 2050 : tmp = div_s( fraca, fracb );
4193 2050 : exp1 = sub( expa, expb );
4194 :
4195 2050 : tonality1 = L_shl_o( tmp, exp1, &Overflow );
4196 : }
4197 : ELSE
4198 : {
4199 0 : tonality1 = L_shl( sort_max, sub( 15, Qx ) ); /*Q15 */
4200 : }
4201 :
4202 : /* tonality in band [1-2 kHz] */
4203 2050 : sort_32_fx( sort_val, 20, 39 );
4204 2050 : sort_max = sort_val[39];
4205 2050 : sort_avg = sum32_fx( &sort_val[20], 10 );
4206 :
4207 2050 : IF( sort_avg )
4208 : {
4209 : /* tonality2 = sort_max / sort_avg; */
4210 2050 : expa = norm_l( sort_max );
4211 2050 : fraca = extract_h( L_shl( sort_max, expa ) );
4212 2050 : expa = sub( 30, add( expa, Qx ) );
4213 :
4214 :
4215 2050 : expb = norm_l( sort_avg );
4216 2050 : fracb = extract_h( L_shl( sort_avg, expb ) );
4217 2050 : expb = sub( 30, add( expb, Qx ) );
4218 :
4219 2050 : scale = shr( sub( fracb, fraca ), 15 );
4220 2050 : fraca = shl( fraca, scale );
4221 2050 : expa = sub( expa, scale );
4222 :
4223 2050 : tmp = div_s( fraca, fracb );
4224 2050 : exp2 = sub( expa, expb );
4225 :
4226 2050 : tonality2 = L_shl_o( tmp, exp2, &Overflow );
4227 : }
4228 : ELSE
4229 : {
4230 0 : tonality2 = L_shl( sort_max, sub( 15, Qx ) ); /*Q15 */
4231 : }
4232 :
4233 : /* tonality in band [2-4 kHz] */
4234 2050 : sort_32_fx( sort_val, 40, 79 );
4235 2050 : sort_max = sort_val[79];
4236 2050 : sort_avg = sum32_fx( &sort_val[40], 20 );
4237 :
4238 2050 : IF( sort_avg )
4239 : {
4240 : /* tonality3 = sort_max / sort_avg; */
4241 2050 : expa = norm_l( sort_max );
4242 2050 : fraca = extract_h( L_shl( sort_max, expa ) );
4243 2050 : expa = sub( 30, add( expa, Qx ) );
4244 :
4245 2050 : expb = norm_l( sort_avg );
4246 2050 : fracb = extract_h( L_shl( sort_avg, expb ) );
4247 2050 : expb = sub( 30, add( expb, Qx ) );
4248 :
4249 2050 : scale = shr( sub( fracb, fraca ), 15 );
4250 2050 : fraca = shl( fraca, scale );
4251 2050 : expa = sub( expa, scale );
4252 :
4253 2050 : tmp = div_s( fraca, fracb );
4254 2050 : exp3 = sub( expa, expb );
4255 :
4256 2050 : tonality3 = L_shl_o( tmp, exp3, &Overflow );
4257 : }
4258 : ELSE
4259 : {
4260 0 : tonality3 = L_shl( sort_max, sub( 15, Qx ) ); /*Q15 */
4261 : }
4262 :
4263 2050 : tonality = L_max( L_max( tonality1, tonality2 ), tonality3 );
4264 :
4265 : /* voi_mean = 0.33f * (st->voicing_fx[0] + voicing[1] + voicing[2]); */
4266 2050 : L_tmp = L_mult( st_fx->voicing_fx[0], 10923 );
4267 2050 : L_tmp = L_mac( L_tmp, st_fx->voicing_fx[1], 10923 );
4268 2050 : voi_mean = mac_r_sat( L_tmp, st_fx->voicing_fx[2], 10923 ); /* Q15 */
4269 2050 : test();
4270 2050 : IF( EQ_16( hVAD->hangover_cnt, 10 ) && EQ_16( st_fx->vad_flag, 1 ) )
4271 : {
4272 : /* long-term voicing parameter */
4273 10 : hSpMusClas->lt_voicing = round_fx( L_mac( L_mult( 3277, hSpMusClas->lt_voicing ), 29491, voi_mean ) );
4274 :
4275 : /* long-term correlation value */
4276 10 : hSpMusClas->lt_corr = round_fx( L_mac( L_mult( 3277, hSpMusClas->lt_corr ), 29491, st_fx->old_corr_fx ) );
4277 :
4278 : /* long-term tonality measure */
4279 10 : hSpMusClas->lt_tonality = L_add( Mult_32_16( hSpMusClas->lt_tonality, 3277 ), Mult_32_16( tonality, 29491 ) );
4280 : }
4281 : ELSE
4282 : {
4283 : /* long-term voicing parameter */
4284 2040 : hSpMusClas->lt_voicing = round_fx( L_mac( L_mult( 22938, hSpMusClas->lt_voicing ), 9830, voi_mean ) );
4285 :
4286 : /* long-term correlation value */
4287 2040 : hSpMusClas->lt_corr = round_fx( L_mac( L_mult( 22938, hSpMusClas->lt_corr ), 9830, st_fx->old_corr_fx ) );
4288 :
4289 : /* long-term tonality measure */
4290 2040 : hSpMusClas->lt_tonality = L_add( Mult_32_16( hSpMusClas->lt_tonality, 16384 ), Mult_32_16( tonality, 16384 ) );
4291 : }
4292 2050 : move16();
4293 2050 : move16();
4294 2050 : move16();
4295 :
4296 : /* Pitch difference w.r.t to past 3 frames */
4297 2050 : lt_pitch_diff = abs_s( sub( hSpMusClas->lt_corr_pitch[0], st_fx->pitch[0] ) );
4298 2050 : lt_pitch_diff = add( lt_pitch_diff, abs_s( sub( hSpMusClas->lt_corr_pitch[1], st_fx->pitch[0] ) ) );
4299 2050 : lt_pitch_diff = add( lt_pitch_diff, abs_s( sub( hSpMusClas->lt_corr_pitch[2], st_fx->pitch[0] ) ) );
4300 :
4301 2050 : hSpMusClas->lt_corr_pitch[0] = hSpMusClas->lt_corr_pitch[1];
4302 2050 : move16();
4303 2050 : hSpMusClas->lt_corr_pitch[1] = hSpMusClas->lt_corr_pitch[2];
4304 2050 : move16();
4305 2050 : hSpMusClas->lt_corr_pitch[2] = st_fx->pitch[0];
4306 2050 : move16();
4307 :
4308 2050 : hSpMusClas->lt_old_mode[0] = hSpMusClas->lt_old_mode[1];
4309 2050 : move16();
4310 2050 : hSpMusClas->lt_old_mode[1] = hSpMusClas->lt_old_mode[2];
4311 2050 : move16();
4312 :
4313 2050 : test();
4314 2050 : test();
4315 2050 : test();
4316 2050 : test();
4317 2050 : test();
4318 2050 : test();
4319 2050 : test();
4320 2050 : test();
4321 2050 : test();
4322 2050 : test();
4323 2050 : test();
4324 2050 : test();
4325 2050 : test();
4326 2050 : IF( st_fx->sp_aud_decision1 == 1 &&
4327 : ( GT_32( L_min( L_min( tonality1, tonality2 ), tonality3 ), 1638400 ) ) &&
4328 : ( GT_32( L_add_sat( tonality1, tonality2 ), 6553600 ) && GT_32( L_add_sat( tonality2, tonality3 ), 6553600 ) && GT_32( L_add_sat( tonality1, tonality3 ), 6553600 ) ) &&
4329 : ( LT_32( hSpMusClas->lt_tonality, 655360000 ) ) &&
4330 : ( ( GT_32( hSpMusClas->lt_tonality, 32768000 ) && GT_16( s_max( hSpMusClas->lt_voicing, voi_mean ), 32440 ) ) ||
4331 : ( GT_32( hSpMusClas->lt_tonality, 49152000 ) && GT_16( hSpMusClas->lt_corr, 32440 ) ) ||
4332 : ( GT_32( hSpMusClas->lt_tonality, 98304000 ) && GT_16( hSpMusClas->lowrate_pitchGain, 15729 ) ) ||
4333 : ( lt_pitch_diff == 0 && GT_16( hSpMusClas->lowrate_pitchGain, 14582 ) ) ) )
4334 : {
4335 19 : IF( LT_16( sum16_fx( hSpMusClas->lt_old_mode, 2 ), 2 ) )
4336 : {
4337 : /* probably speech - change the decision to speech */
4338 0 : st_fx->sp_aud_decision1 = 0;
4339 0 : move16();
4340 0 : st_fx->sp_aud_decision2 = 0;
4341 0 : move16();
4342 :
4343 0 : if ( hSpMusClas->lt_hangover == 0 )
4344 : {
4345 0 : hSpMusClas->lt_hangover = 6;
4346 0 : move16();
4347 : }
4348 : }
4349 : }
4350 : ELSE
4351 : {
4352 : /* not speech, but still in the hangover period - change the decision to speech */
4353 2031 : IF( hSpMusClas->lt_hangover > 0 )
4354 : {
4355 0 : st_fx->sp_aud_decision1 = 0;
4356 0 : move16();
4357 0 : st_fx->sp_aud_decision2 = 0;
4358 0 : move16();
4359 :
4360 0 : hSpMusClas->lt_hangover = sub( hSpMusClas->lt_hangover, 1 );
4361 : }
4362 : }
4363 :
4364 : /* calculate standard deviation of log-tonality */
4365 2050 : Copy( hSpMusClas->tonality2_buf_fx + 1, hSpMusClas->tonality2_buf_fx, HANG_LEN_INIT - 1 );
4366 : /* st->tonality2_buf[HANG_LEN_INIT - 1] = 0.2f*(float)log10(tonality2); */
4367 2050 : exp = norm_l( tonality2 );
4368 2050 : tmp = Log2_norm_lc( L_shl( tonality2, exp ) ); /*15 */
4369 2050 : exp = sub( 30, add( exp, 16 ) );
4370 2050 : L_tmp = Mpy_32_16( exp, tmp, 15783 ); /*19 //3945, 0.2*log10(2), Q18 */
4371 2050 : hSpMusClas->tonality2_buf_fx[HANG_LEN_INIT - 1] = round_fx( L_shl( L_tmp, 11 ) ); /*14 */
4372 2050 : move16();
4373 : /* t2 = std( st->tonality2_buf, HANG_LEN_INIT ); */
4374 2050 : t2_fx = std_fx( hSpMusClas->tonality2_buf_fx, HANG_LEN_INIT ); /*14 */
4375 :
4376 2050 : Copy( hSpMusClas->tonality3_buf_fx + 1, hSpMusClas->tonality3_buf_fx, HANG_LEN_INIT - 1 );
4377 : /* st->tonality3_buf[HANG_LEN_INIT - 1] = 0.2f*(float)log10(tonality3); */
4378 2050 : exp = norm_l( tonality3 );
4379 2050 : tmp = Log2_norm_lc( L_shl( tonality3, exp ) ); /*15 */
4380 2050 : exp = sub( 30, add( exp, 16 ) );
4381 2050 : L_tmp = Mpy_32_16( exp, tmp, 15783 ); /*19 //3945, 0.2*log10(2), Q18 */
4382 2050 : hSpMusClas->tonality3_buf_fx[HANG_LEN_INIT - 1] = round_fx( L_shl( L_tmp, 11 ) ); /*14 */
4383 2050 : t3_fx = std_fx( hSpMusClas->tonality3_buf_fx, HANG_LEN_INIT ); /*14 */
4384 2050 : move16();
4385 :
4386 : /* tL = 0.2f*(float)log10(st->lt_tonality); */
4387 2050 : exp = norm_l( hSpMusClas->lt_tonality );
4388 2050 : tmp = Log2_norm_lc( L_shl( hSpMusClas->lt_tonality, exp ) ); /*15 */
4389 2050 : exp = sub( 30, add( exp, 16 ) );
4390 2050 : L_tmp = Mpy_32_16( exp, tmp, 15783 ); /*19 //3945, 0.2*log10(2), Q18 */
4391 2050 : tL_fx = round_fx( L_shl( L_tmp, 11 ) ); /*14 */
4392 :
4393 : /* calculate standard deviation of residual LP energy */
4394 2050 : Copy( hSpMusClas->LPCErr_buf_fx + 1, hSpMusClas->LPCErr_buf_fx, HANG_LEN_INIT - 1 );
4395 2050 : hSpMusClas->LPCErr_buf_fx[HANG_LEN_INIT - 1] = LPCErr;
4396 2050 : move16();
4397 : /* err = std( st->LPCErr_buf, HANG_LEN_INIT ); */
4398 2050 : err_fx = std_fx( hSpMusClas->LPCErr_buf_fx, HANG_LEN_INIT );
4399 :
4400 2050 : cor_fx = s_max( sub( voi_fv, cor_map_sum_fv ), 0 ); /*15 */
4401 2050 : dft_fx = abs_s( sub( hSpMusClas->tonality2_buf_fx[HANG_LEN_INIT - 1], hSpMusClas->tonality3_buf_fx[HANG_LEN_INIT - 1] ) ); /*14 */
4402 :
4403 :
4404 : /* state machine for strong music */
4405 2050 : test();
4406 2050 : test();
4407 2050 : test();
4408 2050 : test();
4409 2050 : test();
4410 2050 : test();
4411 2050 : test();
4412 2050 : test();
4413 2050 : test();
4414 2050 : test();
4415 2050 : test();
4416 2050 : test();
4417 2050 : IF( ( EQ_16( st_fx->sp_aud_decision1, 1 ) ) && hSpMusClas->lt_music_state == 0 && hSpMusClas->lt_music_hangover == 0 &&
4418 : ( LT_16( t2_fx, 8847 ) ) && ( GT_16( t2_fx, 4260 ) ) && ( GT_16( t3_fx, 3604 ) ) && ( LT_16( tL_fx, 8847 ) ) && ( GT_16( tL_fx, 4260 ) ) && ( GT_16( err_fx, 8192 ) ) )
4419 : {
4420 7 : hSpMusClas->lt_music_state = 1;
4421 7 : move16();
4422 7 : hSpMusClas->lt_music_hangover = 6;
4423 7 : move16();
4424 : }
4425 2043 : ELSE IF( EQ_16( hSpMusClas->lt_music_state, 1 ) && hSpMusClas->lt_music_hangover == 0 &&
4426 : ( LT_16( t2_fx, 5571 ) ) && ( LT_16( t3_fx, 4260 ) ) && ( LT_16( tL_fx, 7373 ) ) )
4427 : {
4428 6 : hSpMusClas->lt_music_state = 0;
4429 6 : move16();
4430 6 : hSpMusClas->lt_music_hangover = 6;
4431 6 : move16();
4432 : }
4433 :
4434 2050 : IF( hSpMusClas->lt_music_hangover > 0 )
4435 : {
4436 73 : hSpMusClas->lt_music_hangover = sub( hSpMusClas->lt_music_hangover, 1 );
4437 73 : move16();
4438 : }
4439 :
4440 : /* state machine for strong speech */
4441 2050 : test();
4442 2050 : test();
4443 2050 : test();
4444 2050 : test();
4445 2050 : test();
4446 2050 : test();
4447 2050 : test();
4448 2050 : test();
4449 2050 : test();
4450 2050 : test();
4451 2050 : test();
4452 2050 : test();
4453 2050 : test();
4454 2050 : IF( ( EQ_16( st_fx->sp_aud_decision1, 1 ) ) && hSpMusClas->lt_speech_state == 0 && hSpMusClas->lt_speech_hangover == 0 &&
4455 : ( GT_16( cor_fx, 13107 ) ) && ( LT_16( dft_fx, 1638 ) ) && GT_16( shr( voi_fv, 1 ), add( cor_map_sum_fv, 1966 ) ) &&
4456 : ( LT_16( t2_fx, shr( cor_fx, 1 ) ) ) && ( LT_16( t3_fx, shr( cor_fx, 1 ) ) ) && ( LT_16( tL_fx, shr( cor_fx, 1 ) ) ) &&
4457 : ( LT_16( cor_map_sum_fv, cor_fx ) ) && ( GT_16( voi_fv, cor_fx ) ) && ( GT_16( voi_fv, 24903 ) ) )
4458 : {
4459 7 : hSpMusClas->lt_speech_state = 1;
4460 7 : move16();
4461 7 : hSpMusClas->lt_speech_hangover = 6;
4462 7 : move16();
4463 : }
4464 2043 : ELSE IF( ( EQ_16( hSpMusClas->lt_speech_state, 1 ) ) && hSpMusClas->lt_speech_hangover == 0 && ( LT_16( cor_fx, 13107 ) ) )
4465 : {
4466 7 : hSpMusClas->lt_speech_state = 0;
4467 7 : move16();
4468 7 : hSpMusClas->lt_speech_hangover = 6;
4469 7 : move16();
4470 : }
4471 :
4472 2050 : IF( hSpMusClas->lt_speech_hangover > 0 )
4473 : {
4474 70 : hSpMusClas->lt_speech_hangover = sub( hSpMusClas->lt_speech_hangover, 1 );
4475 70 : move16();
4476 : }
4477 :
4478 : /* final decision */
4479 2050 : test();
4480 2050 : test();
4481 2050 : IF( EQ_16( st_fx->sp_aud_decision1, 1 ) && EQ_16( hSpMusClas->lt_speech_state, 1 ) )
4482 : {
4483 : /* strong speech - probably error in speech/music classification */
4484 38 : st_fx->sp_aud_decision1 = 0;
4485 38 : move16();
4486 38 : st_fx->sp_aud_decision2 = 0;
4487 38 : move16();
4488 : }
4489 2012 : ELSE IF( st_fx->sp_aud_decision1 == 0 && EQ_16( hSpMusClas->lt_speech_state, 1 ) )
4490 : {
4491 : /* strong music - probably error in speech/music classification */
4492 8 : st_fx->sp_aud_decision1 = 0;
4493 8 : move16();
4494 8 : st_fx->sp_aud_decision2 = 0;
4495 8 : move16();
4496 : }
4497 :
4498 : /* update the buffer of past decisions */
4499 2050 : hSpMusClas->lt_old_mode[2] = st_fx->sp_aud_decision1;
4500 2050 : move16();
4501 :
4502 2050 : return;
4503 : }
4504 :
4505 : /*----------------------------------------------------------------------------------*
4506 : * detect_sparseness_fx()
4507 : *
4508 : *
4509 : *----------------------------------------------------------------------------------*/
4510 1041 : static void detect_sparseness_fx(
4511 : Encoder_State *st_fx, /* i/o: encoder state structure */
4512 : const Word16 localVAD_HE_SAD, /* i : HE-SAD flag without hangover */
4513 : const Word16 voi_fv /* i : scaled voicing feature */
4514 : )
4515 : {
4516 : Word16 sum, sumh;
4517 : Word32 L_tmp, L_tmp1;
4518 : Word16 tmp, tmp1;
4519 : Word16 S1[128];
4520 : Word16 i, j;
4521 1041 : Word16 hb_sp_high_flag = 0;
4522 1041 : move16();
4523 1041 : Word16 lb_sp_high_flag = 0;
4524 1041 : move16();
4525 : Word16 sparse;
4526 : Word16 tmp_buf[4];
4527 1041 : Word16 Mlpe = 0, Mv = 0, Msp;
4528 1041 : move16();
4529 1041 : move16();
4530 1041 : SP_MUS_CLAS_HANDLE hSpMusClas = st_fx->hSpMusClas;
4531 :
4532 1041 : Copy( st_fx->lgBin_E_fx, S1, 128 );
4533 :
4534 1041 : L_tmp = L_deposit_l( 0 );
4535 84321 : FOR( i = 0; i < 80; i++ )
4536 : {
4537 83280 : if ( S1[i] < 0 )
4538 : {
4539 17471 : S1[i] = 0;
4540 17471 : move16(); /* Q7 */
4541 : }
4542 83280 : L_tmp = L_add( L_tmp, L_deposit_l( S1[i] ) );
4543 : }
4544 :
4545 1041 : L_tmp1 = L_deposit_l( 0 );
4546 51009 : FOR( i = 80; i < 128; i++ )
4547 : {
4548 49968 : if ( S1[i] < 0 )
4549 : {
4550 13117 : S1[i] = 0;
4551 13117 : move16();
4552 : }
4553 49968 : L_tmp1 = L_add( L_tmp1, L_deposit_l( S1[i] ) );
4554 : }
4555 :
4556 1041 : sumh = extract_l( L_shr( L_tmp1, 7 ) ); /* Q0 */
4557 1041 : sum = add( extract_l( L_shr( L_tmp, 7 ) ), sumh ); /* Q0 */
4558 :
4559 : /* order spectral from max to min */
4560 1041 : order_spectrum_fx( S1, 128 );
4561 :
4562 : /* calculate spectral sparseness in the range 0 - 6.4 kHz */
4563 1041 : j = 0;
4564 1041 : move16();
4565 1041 : L_tmp = 0;
4566 1041 : move16();
4567 1041 : L_tmp1 = L_deposit_l( mult( sum, 24576 ) );
4568 55622 : FOR( i = 0; i < 128; i++ )
4569 : {
4570 55616 : L_tmp = L_add( L_tmp, L_deposit_l( S1[i] ) );
4571 55616 : IF( GT_32( L_shr( L_tmp, 7 ), L_tmp1 ) )
4572 : {
4573 1035 : j = i;
4574 1035 : move16();
4575 1035 : BREAK;
4576 : }
4577 : }
4578 :
4579 8328 : FOR( i = 0; i < HANG_LEN_INIT - 1; i++ )
4580 : {
4581 7287 : hSpMusClas->sparse_buf_fx[i] = hSpMusClas->sparse_buf_fx[i + 1];
4582 7287 : move16();
4583 : }
4584 :
4585 1041 : sparse = j;
4586 1041 : move16();
4587 1041 : hSpMusClas->sparse_buf_fx[i] = sparse;
4588 1041 : move16();
4589 :
4590 1041 : IF( EQ_16( st_fx->bwidth, WB ) )
4591 : {
4592 0 : Msp = 0;
4593 0 : move16();
4594 0 : FOR( i = 0; i < 8; i++ )
4595 : {
4596 0 : Msp = add( Msp, hSpMusClas->sparse_buf_fx[i] );
4597 : }
4598 0 : Msp = shl( Msp, 5 ); /* Q8 */
4599 :
4600 : /* find long-term smoothed sparseness */
4601 0 : IF( st_fx->last_vad_spa_fx == 0 )
4602 : {
4603 0 : set16_fx( &hSpMusClas->sparse_buf_fx[0], sparse, HANG_LEN_INIT - 1 );
4604 0 : hSpMusClas->LT_sparse_fx = sparse;
4605 0 : move16();
4606 : }
4607 : ELSE
4608 : {
4609 0 : set16_fx( tmp_buf, 0, 4 );
4610 :
4611 0 : FOR( i = 0; i < HANG_LEN_INIT; i++ )
4612 : {
4613 0 : FOR( j = 0; j < 4; j++ )
4614 : {
4615 0 : IF( GT_16( hSpMusClas->sparse_buf_fx[i], tmp_buf[j] ) )
4616 : {
4617 0 : Copy( &tmp_buf[j], &tmp_buf[j + 1], sub( 3, j ) );
4618 0 : tmp_buf[j] = hSpMusClas->sparse_buf_fx[i];
4619 0 : move16();
4620 0 : BREAK;
4621 : }
4622 : }
4623 : }
4624 :
4625 : /* ftmp = 0.25f*(HANG_LEN_INIT*Msp - sum_f(tmp_buf, 4)) - st->LT_sparse; */
4626 0 : tmp = shl( sum16_fx( tmp_buf, 4 ), 5 );
4627 0 : tmp = shl( sub( Msp, tmp ), 1 );
4628 0 : tmp = sub( tmp, hSpMusClas->LT_sparse_fx );
4629 :
4630 0 : hSpMusClas->LT_sparse_fx = add( hSpMusClas->LT_sparse_fx, shr( tmp, 2 ) ); /* Q8 */
4631 : }
4632 :
4633 : /* find high-band sparseness */
4634 0 : Copy( st_fx->lgBin_E_fx + 80, S1, 48 );
4635 :
4636 0 : order_spectrum_fx( S1, 48 );
4637 :
4638 0 : FOR( i = 0; i < HANG_LEN_INIT - 1; i++ )
4639 : {
4640 0 : hSpMusClas->hf_spar_buf_fx[i] = hSpMusClas->hf_spar_buf_fx[i + 1];
4641 0 : move16();
4642 : }
4643 :
4644 : /* st_fx->hf_spar_buf_fx[i] = sum_f(S1, 5)/sumh; */
4645 0 : L_tmp = L_deposit_l( 0 );
4646 0 : FOR( i = 0; i < 5; i++ )
4647 : {
4648 0 : if ( S1[i] < 0 )
4649 : {
4650 0 : S1[i] = 0;
4651 0 : move16();
4652 : }
4653 :
4654 0 : L_tmp = L_add( L_tmp, S1[i] );
4655 : }
4656 :
4657 0 : tmp = extract_l( L_shr( L_tmp, 7 ) );
4658 0 : IF( tmp == 0 )
4659 : {
4660 0 : hSpMusClas->hf_spar_buf_fx[HANG_LEN_INIT - 1] = 0;
4661 : }
4662 : ELSE
4663 : {
4664 0 : hSpMusClas->hf_spar_buf_fx[HANG_LEN_INIT - 1] = div_s( tmp, sumh );
4665 : }
4666 0 : move16();
4667 :
4668 0 : tmp = 0;
4669 0 : move16();
4670 0 : FOR( i = 0; i < 8; i++ )
4671 : {
4672 0 : tmp = add( tmp, shr( hSpMusClas->hf_spar_buf_fx[i], 3 ) );
4673 : }
4674 0 : IF( GT_16( tmp, 6554 ) )
4675 : {
4676 0 : hb_sp_high_flag = 1;
4677 0 : move16();
4678 : }
4679 :
4680 : /* find low-band sparseness */
4681 0 : Copy( st_fx->lgBin_E_fx, S1, 60 );
4682 :
4683 0 : order_spectrum_fx( S1, 60 );
4684 0 : L_tmp = L_deposit_l( 0 );
4685 0 : L_tmp1 = L_deposit_l( 0 );
4686 0 : FOR( i = 0; i < 5; i++ )
4687 : {
4688 0 : if ( S1[i] < 0 )
4689 : {
4690 0 : S1[i] = 0;
4691 0 : move16();
4692 : }
4693 :
4694 0 : L_tmp = L_add( L_tmp, S1[i] );
4695 : }
4696 :
4697 0 : FOR( ; i < 60; i++ )
4698 : {
4699 0 : if ( S1[i] < 0 )
4700 : {
4701 0 : S1[i] = 0;
4702 0 : move16();
4703 : }
4704 :
4705 0 : L_tmp1 = L_add( L_tmp1, S1[i] );
4706 : }
4707 :
4708 : /* if ( sum_f(S1, 5)/sum_f(S1,60) > 0.18f ) */
4709 0 : tmp = extract_l( L_shr( L_tmp, 7 ) );
4710 0 : IF( tmp != 0 )
4711 : {
4712 0 : tmp = div_s( tmp, add( tmp, extract_l( L_shr( L_tmp1, 7 ) ) ) );
4713 0 : if ( GT_16( tmp, 5898 ) )
4714 : {
4715 0 : lb_sp_high_flag = 1;
4716 0 : move16();
4717 : }
4718 : }
4719 :
4720 : /* find smoothed linear prediction efficiency */
4721 0 : FOR( i = 0; i < 7; i++ )
4722 : {
4723 0 : hSpMusClas->lpe_buf_fx[i] = hSpMusClas->lpe_buf_fx[i + 1];
4724 0 : move16();
4725 : }
4726 :
4727 0 : hSpMusClas->lpe_buf_fx[i] = hSpMusClas->past_epsP2_fx;
4728 0 : move16();
4729 0 : Mlpe = 0;
4730 0 : move16();
4731 0 : FOR( i = 0; i < 8; i++ )
4732 : {
4733 0 : Mlpe = add( Mlpe, shr( hSpMusClas->lpe_buf_fx[i], 3 ) );
4734 : }
4735 :
4736 : /* find smoothed voicing */
4737 0 : FOR( i = 0; i < HANG_LEN_INIT - 1; i++ )
4738 : {
4739 0 : hSpMusClas->voicing_buf_fx[i] = hSpMusClas->voicing_buf_fx[i + 1];
4740 0 : move16();
4741 : }
4742 :
4743 0 : hSpMusClas->voicing_buf_fx[i] = voi_fv;
4744 0 : move16();
4745 0 : Mv = 0;
4746 0 : move16();
4747 0 : FOR( i = 0; i < 8; i++ )
4748 : {
4749 0 : Mv = add( Mv, shr( hSpMusClas->voicing_buf_fx[i], 3 ) );
4750 : }
4751 : }
4752 :
4753 : /* avoid using LR-MDCT on sparse spectra */
4754 1041 : IF( EQ_16( st_fx->sp_aud_decision1, 1 ) )
4755 : {
4756 301 : tmp = 91;
4757 301 : move16();
4758 301 : if ( EQ_16( st_fx->bwidth, WB ) )
4759 : {
4760 0 : tmp = 90;
4761 0 : move16();
4762 : }
4763 :
4764 301 : IF( GT_16( sparse, tmp ) )
4765 : {
4766 0 : st_fx->sp_aud_decision1 = 0;
4767 0 : move16();
4768 0 : st_fx->sp_aud_decision2 = 1;
4769 0 : move16();
4770 0 : hSpMusClas->gsc_hangover = 1;
4771 0 : move16();
4772 : }
4773 301 : ELSE IF( EQ_16( hSpMusClas->gsc_hangover, 1 ) )
4774 : {
4775 0 : IF( GT_16( sparse, 85 ) )
4776 : {
4777 0 : st_fx->sp_aud_decision1 = 0;
4778 0 : move16();
4779 0 : st_fx->sp_aud_decision2 = 1;
4780 0 : move16();
4781 : }
4782 : ELSE
4783 : {
4784 0 : tmp = 0;
4785 0 : move16();
4786 0 : FOR( i = 0; i < hSpMusClas->gsc_cnt; i++ )
4787 : {
4788 0 : tmp = add( tmp, hSpMusClas->sparse_buf_fx[HANG_LEN_INIT - 1 - hSpMusClas->gsc_cnt + i] );
4789 : }
4790 0 : tmp1 = div_s( 1, hSpMusClas->gsc_cnt );
4791 0 : tmp = mult( tmp, tmp1 );
4792 :
4793 0 : IF( LT_16( abs_s( sub( sparse, tmp ) ), 7 ) )
4794 : {
4795 0 : st_fx->sp_aud_decision1 = 0;
4796 0 : move16();
4797 0 : st_fx->sp_aud_decision2 = 1;
4798 0 : move16();
4799 : }
4800 : }
4801 : }
4802 :
4803 301 : IF( EQ_16( st_fx->bwidth, WB ) )
4804 : {
4805 0 : test();
4806 0 : test();
4807 0 : test();
4808 0 : test();
4809 0 : test();
4810 0 : test();
4811 0 : test();
4812 0 : test();
4813 0 : test();
4814 0 : IF( GT_16( hSpMusClas->LT_sparse_fx, 15360 ) && GT_16( sparse, 50 ) && LT_16( Mlpe, -1331 ) && GT_16( Mv, 27853 ) &&
4815 : lb_sp_high_flag == 0 && ( ( hb_sp_high_flag == 0 && GT_16( sumh, mult_r( 4915, sum ) ) ) || LE_16( sumh, mult_r( 4915, sum ) ) ) )
4816 : {
4817 0 : st_fx->sp_aud_decision1 = 0;
4818 0 : move16();
4819 0 : st_fx->sp_aud_decision2 = 1;
4820 0 : move16();
4821 0 : hSpMusClas->gsc_hangover = 1;
4822 0 : move16();
4823 : }
4824 0 : ELSE IF( EQ_16( hSpMusClas->gsc_hangover, 1 ) && !( st_fx->sp_aud_decision1 == 0 && EQ_16( st_fx->sp_aud_decision2, 1 ) ) )
4825 : {
4826 0 : IF( LT_16( abs_s( sub( sparse, mean_fx( &hSpMusClas->sparse_buf_fx[HANG_LEN_INIT - 1 - hSpMusClas->gsc_cnt], hSpMusClas->gsc_cnt ) ) ), 7 ) )
4827 : {
4828 0 : st_fx->sp_aud_decision1 = 0;
4829 0 : move16();
4830 0 : st_fx->sp_aud_decision2 = 1;
4831 0 : move16();
4832 : }
4833 : }
4834 : }
4835 : }
4836 :
4837 : /* update the counter of consecutive GSC frames with sparse spectrum */
4838 1041 : test();
4839 1041 : IF( st_fx->sp_aud_decision1 == 0 && EQ_16( st_fx->sp_aud_decision2, 1 ) )
4840 : {
4841 0 : hSpMusClas->gsc_cnt = add( hSpMusClas->gsc_cnt, 1 );
4842 0 : IF( GT_16( hSpMusClas->gsc_cnt, 7 ) )
4843 : {
4844 0 : hSpMusClas->gsc_cnt = 7;
4845 0 : move16();
4846 : }
4847 : }
4848 : ELSE
4849 : {
4850 1041 : hSpMusClas->gsc_cnt = 0;
4851 1041 : move16();
4852 1041 : hSpMusClas->gsc_hangover = 0;
4853 1041 : move16();
4854 : }
4855 :
4856 1041 : st_fx->last_vad_spa_fx = localVAD_HE_SAD;
4857 1041 : move16();
4858 :
4859 1041 : return;
4860 : }
4861 :
4862 : /*---------------------------------------------------------------------*
4863 : * order_spectrum()
4864 : *
4865 : *
4866 : *---------------------------------------------------------------------*/
4867 1041 : static void order_spectrum_fx(
4868 : Word16 *vec,
4869 : Word16 len )
4870 : {
4871 : Word16 i, j, end, end_1, len_2, tmp;
4872 : Word16 smax, smin;
4873 : Word16 imax, imin;
4874 :
4875 1041 : len_2 = shr( len, 1 );
4876 67665 : FOR( i = 0; i < len_2; i++ )
4877 : {
4878 66624 : imax = i;
4879 66624 : move16();
4880 66624 : imin = i;
4881 66624 : move16();
4882 66624 : smax = vec[i];
4883 66624 : move16();
4884 66624 : smin = vec[i];
4885 66624 : move16();
4886 66624 : end = sub( len, i );
4887 4397184 : FOR( j = i; j < end; j++ )
4888 : {
4889 4330560 : IF( GT_16( vec[j], smax ) )
4890 : {
4891 178505 : smax = vec[j];
4892 178505 : move16();
4893 178505 : imax = j;
4894 178505 : move16();
4895 : }
4896 : ELSE
4897 : {
4898 4152055 : IF( LT_16( vec[j], smin ) )
4899 : {
4900 250056 : smin = vec[j];
4901 250056 : move16();
4902 250056 : imin = j;
4903 250056 : move16();
4904 : }
4905 : }
4906 : }
4907 :
4908 66624 : tmp = vec[i];
4909 66624 : move16();
4910 66624 : vec[i] = smax;
4911 66624 : move16();
4912 66624 : vec[imax] = tmp;
4913 66624 : move16();
4914 :
4915 66624 : IF( EQ_16( imin, i ) )
4916 : {
4917 11757 : imin = imax;
4918 11757 : move16();
4919 : }
4920 :
4921 66624 : end_1 = sub( end, 1 );
4922 66624 : tmp = vec[end_1];
4923 66624 : move16();
4924 66624 : vec[end_1] = smin;
4925 66624 : move16();
4926 66624 : vec[imin] = tmp;
4927 66624 : move16();
4928 : }
4929 1041 : }
|