Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include <stdlib.h>
6 : #include <assert.h>
7 : #include "options.h"
8 : #include "cnst.h"
9 : // #include "prot_fx.h"
10 : #include "rom_enc.h"
11 : #include "rom_com_fx.h"
12 : #include "rom_com.h"
13 : #include "stl.h"
14 : #include "prot_fx.h" /* Function prototypes */
15 : #include "prot_fx_enc.h" /* Function prototypes */
16 : #ifdef DEBUGGING
17 : #include "debug.h"
18 : #endif
19 : #include <math.h>
20 : #include "ivas_prot_fx.h"
21 :
22 :
23 : /*---------------------------------------------------------------------*
24 : * Local constants
25 : *---------------------------------------------------------------------*/
26 : #define ATT_NSEG 32
27 : #define ATT_SEG_LEN ( L_FRAME / ATT_NSEG )
28 : #define ATT_3LSUB_POS ( 3 * ATT_NSEG / NB_SUBFR )
29 : #define ATT_3LSUB_POS_16k 26 /* (short)((4.0f * ATT_NSEG / (float)NB_SUBFR16k) + 0.5f) */
30 :
31 : #define LOG_PROB_CONST 11292 /*0.5f * N_FEATURES * LOG_PI2 in Q10 */
32 : #define DLP_BIAS 0.138121f
33 : #define DLP_BIAS_FX 36208 /*Q18*/
34 :
35 : #define TON_ALPHA_FX 31130 /* 0.95f in Q15 */
36 : #define THR_MASS_MAX_FX 3565158 /* 0.85f in Q22 */
37 : #define THR_MASS_MIN_FX 3145728 /* 0.75f in Q22 */
38 : #define THR_MASS_STEP_UP_FX 41943 /* 0.01f in Q22 */
39 : #define THR_MASS_STEP_DN_FX 83886 /* 0.02f in Q22 */
40 :
41 : /*---------------------------------------------------------------------*
42 : * Local functions
43 : *---------------------------------------------------------------------*/
44 :
45 : static Word16 sp_mus_classif_gmm_fx( Encoder_State *st_fx, const Word16 localVAD_HE_SAD, const Word16 lsp_new[M], const Word16 cor_map_sum, const Word32 epsP[M + 1], const Word32 PS[], Word16 non_sta, Word16 relE, Word16 *voi_fv, Word16 *cor_map_sum_fv, Word16 *LPCErr, Word16 Q_esp, Word16 *high_lpn_flag_ptr );
46 :
47 :
48 : static void sp_mus_classif_2nd_fx( Encoder_State *st, const Word16 Etot, Word16 *attack_flag, const Word16 *inp, const Word16 Qx );
49 :
50 : static void music_mixed_classif_improv_fx( Encoder_State *st, const Word16 *new_inp, const Word32 *epsP, Word16 Q_epsP, Word16 etot, Word16 old_cor, Word16 cor_map_sum );
51 :
52 : static void tonal_context_improv_fx( Encoder_State *st_fx, const Word32 PS[], const Word16 voi_fv, const Word16 cor_map_sum_fv, const Word16 LPCErr, const Word16 Qx );
53 :
54 : static void var_cor_calc_fx( const Word16 old_corr, Word16 *mold_corr, Word16 var_cor_t[], Word16 *high_stable_cor );
55 :
56 : static Word16 attack_det_fx( const Word16 *inp, const Word16 Qx, const Word16 last_clas, const Word16 localVAD, const Word16 coder_type, const Word32 total_brate );
57 :
58 : static void order_spectrum_fx( Word16 *vec, Word16 len );
59 :
60 : static void detect_sparseness_fx( Encoder_State *st_fx, const Word16 localVAD_HE_SAD, const Word16 voi_fv );
61 : // Q18
62 : Word32 log_weights_speech_compute[N_SMC_MIXTURES] = {
63 : -578045, -483403, -473370, -468152, -379470, -473234
64 : };
65 : Word32 log_weights_music_compute[N_SMC_MIXTURES] = {
66 : -486797, -522830, -315523, -429999, -775981, -477255
67 : };
68 : Word32 log_weights_noise_compute[N_SMC_MIXTURES] = {
69 : -439941, -576743, -269243, -645452, -529228, -542196
70 : };
71 : /*---------------------------------------------------------------------*
72 : * speech_music_clas_init_fx()
73 : *
74 : * Initialization of speech/music classifier
75 : *---------------------------------------------------------------------*/
76 :
77 3 : void speech_music_clas_init_fx(
78 : SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle */
79 : )
80 : {
81 : Word16 i;
82 :
83 :
84 3 : hSpMusClas->inact_cnt = 0;
85 3 : move16();
86 3 : set16_fx( hSpMusClas->past_dec, 0, HANG_LEN - 1 );
87 3 : set16_fx( hSpMusClas->past_dlp_fx, 0, HANG_LEN - 1 );
88 3 : set16_fx( hSpMusClas->past_log_enr_fx, -1448, NB_BANDS_SPMUS ); /* log(E_MIN) in Q8 */
89 :
90 3 : hSpMusClas->sp_mus_state = -8;
91 3 : move16();
92 3 : hSpMusClas->wdrop_fx = 0;
93 3 : move16();
94 3 : hSpMusClas->wdlp_0_95_sp_fx = 0;
95 3 : move16();
96 3 : set16_fx( hSpMusClas->last_lsp_fx, 0, M_LSP_SPMUS );
97 3 : hSpMusClas->last_cor_map_sum_fx = 0;
98 3 : move16();
99 3 : hSpMusClas->last_non_sta_fx = 0;
100 3 : move16();
101 3 : set32_fx( hSpMusClas->past_PS_fx, 0, HIGHEST_FBIN - LOWEST_FBIN );
102 3 : hSpMusClas->past_ps_diff_fx = 0;
103 3 : move16();
104 3 : hSpMusClas->past_epsP2_fx = 1024;
105 3 : move16();
106 :
107 :
108 3 : hSpMusClas->gsc_thres_fx[0] = TH_0_MIN_FX;
109 3 : move16();
110 3 : hSpMusClas->gsc_thres_fx[1] = TH_1_MIN_FX;
111 3 : move16();
112 3 : hSpMusClas->gsc_thres_fx[2] = TH_2_MIN_FX;
113 3 : move16();
114 3 : hSpMusClas->gsc_thres_fx[3] = TH_3_MIN_FX;
115 3 : move16();
116 3 : set16_fx( hSpMusClas->gsc_lt_diff_etot_fx, 0, 40 );
117 3 : hSpMusClas->gsc_mem_etot_fx = 0;
118 3 : move16();
119 3 : hSpMusClas->gsc_last_music_flag = 0;
120 3 : move16();
121 3 : hSpMusClas->gsc_nb_thr_1 = 0;
122 3 : move16();
123 3 : hSpMusClas->gsc_nb_thr_3 = 0;
124 3 : move16();
125 3 : hSpMusClas->mold_corr_fx = 29491;
126 3 : move16();
127 3 : hSpMusClas->mean_avr_dyn_fx = 64;
128 3 : move16(); /*Q7 */
129 3 : hSpMusClas->last_sw_dyn_fx = 2560;
130 3 : move16();
131 : /* speech/music classifier improvement */
132 183 : FOR( i = 0; i < BUF_LEN; i++ )
133 : {
134 180 : hSpMusClas->buf_flux_fx[i] = -12800;
135 180 : move16(); /*-100.0 in Q7 */
136 180 : hSpMusClas->buf_pkh_fx[i] = 0;
137 180 : move16();
138 180 : hSpMusClas->buf_epsP_tilt_fx[i] = 0;
139 180 : move16();
140 180 : hSpMusClas->buf_cor_map_sum_fx[i] = 0;
141 180 : move16();
142 180 : hSpMusClas->buf_Ntonal_fx[i] = 0;
143 180 : move16();
144 180 : hSpMusClas->buf_Ntonal2_fx[i] = 0;
145 180 : move16();
146 180 : hSpMusClas->buf_Ntonal_lf_fx[i] = 0;
147 180 : move16();
148 : }
149 :
150 3 : set16_fx( hSpMusClas->lpe_buf_fx, 0, HANG_LEN_INIT );
151 3 : set16_fx( hSpMusClas->voicing_buf_fx, 0, HANG_LEN_INIT );
152 3 : hSpMusClas->gsc_hangover = 0;
153 3 : move16();
154 3 : set16_fx( hSpMusClas->sparse_buf_fx, 0, HANG_LEN_INIT );
155 3 : set16_fx( hSpMusClas->hf_spar_buf_fx, 0, HANG_LEN_INIT );
156 3 : hSpMusClas->LT_sparse_fx = 0;
157 3 : move16();
158 3 : hSpMusClas->gsc_cnt = 0;
159 3 : move16();
160 3 : set16_fx( hSpMusClas->old_Bin_E_fx, 0, 3 * N_OLD_BIN_E );
161 3 : set16_fx( hSpMusClas->buf_etot_fx, 0, 4 );
162 3 : set16_fx( hSpMusClas->buf_dlp_fx, 0, 10 );
163 :
164 3 : hSpMusClas->UV_cnt1 = 300;
165 3 : move16();
166 3 : hSpMusClas->LT_UV_cnt1_fx = 16000;
167 3 : move16(); /*250.0f in Q6 */
168 3 : hSpMusClas->onset_cnt = 0;
169 3 : move16();
170 3 : hSpMusClas->attack_hangover = 0;
171 3 : move16();
172 3 : hSpMusClas->dec_mov_fx = 0;
173 3 : move16();
174 3 : hSpMusClas->dec_mov1_fx = 0;
175 3 : move16();
176 3 : hSpMusClas->mov_log_max_spl_fx = 25600;
177 3 : move16(); /*200.0 in Q7 */
178 3 : hSpMusClas->old_lt_diff_fx[0] = 0;
179 3 : move16();
180 3 : hSpMusClas->old_lt_diff_fx[1] = 0;
181 3 : move16();
182 :
183 : /* GSC - pitch excitation parameters */
184 3 : hSpMusClas->high_stable_cor = 0;
185 3 : move16();
186 3 : set16_fx( hSpMusClas->var_cor_t_fx, 0, VAR_COR_LEN );
187 :
188 3 : hSpMusClas->lps_fx = 0;
189 3 : move16();
190 3 : hSpMusClas->lpm_fx = 0;
191 3 : move16();
192 3 : hSpMusClas->lt_dec_thres_fx = 5120;
193 3 : move16(); /*10 in Q9 */
194 3 : hSpMusClas->ener_RAT_fx = 0;
195 3 : move16();
196 :
197 : /* speech/music classification */
198 3 : set16_fx( hSpMusClas->lt_old_mode, 1, 3 );
199 3 : hSpMusClas->lt_voicing = 16384 /*0.5f Q15*/;
200 3 : move16();
201 3 : hSpMusClas->lt_corr = 16384 /*0.5f Q15*/;
202 3 : move16();
203 3 : hSpMusClas->lt_tonality = 0;
204 3 : move32();
205 3 : set16_fx( hSpMusClas->lt_corr_pitch, 0, 3 );
206 3 : hSpMusClas->lt_hangover = 0;
207 3 : move16();
208 3 : hSpMusClas->lowrate_pitchGain = 0;
209 3 : move16();
210 :
211 :
212 3 : hSpMusClas->lt_music_hangover = 0;
213 3 : move16();
214 3 : set16_fx( hSpMusClas->tonality2_buf_fx, 0, HANG_LEN_INIT );
215 3 : set16_fx( hSpMusClas->tonality3_buf_fx, 0, HANG_LEN_INIT );
216 3 : set16_fx( hSpMusClas->LPCErr_buf_fx, 0, HANG_LEN_INIT );
217 3 : hSpMusClas->lt_music_state = 0;
218 3 : move16();
219 3 : hSpMusClas->lt_speech_state = 0;
220 3 : move16();
221 3 : hSpMusClas->lt_speech_hangover = 0;
222 3 : move16();
223 :
224 :
225 3 : return;
226 : }
227 :
228 8953 : void speech_music_clas_init_ivas_fx(
229 : SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle */
230 : )
231 : {
232 : Word16 i;
233 :
234 8953 : set32_fx( hSpMusClas->FV_st_fx, 0, N_SMC_FEATURES );
235 :
236 8953 : hSpMusClas->inact_cnt = 0;
237 8953 : move16();
238 8953 : set16_fx( hSpMusClas->past_dec, 0, HANG_LEN - 1 );
239 8953 : set16_fx( hSpMusClas->past_dlp_fx, 0, HANG_LEN - 1 );
240 :
241 8953 : set32_fx( hSpMusClas->past_dlp_mean_ST_fx, 0, HANG_LEN - 1 );
242 8953 : hSpMusClas->dlp_mean_ST_fx = 0;
243 8953 : move32();
244 8953 : hSpMusClas->dlp_mean_LT_fx = 0;
245 8953 : move32();
246 8953 : hSpMusClas->dlp_var_LT_fx = 0;
247 8953 : move32();
248 :
249 143248 : FOR( i = 0; i < N_SMC_FEATURES; i++ )
250 : {
251 134295 : hSpMusClas->prev_FV_fx[i] = L_add( L_shr( hout_intervals_fx[2 * i], 1 ), L_shr( hout_intervals_fx[2 * i + 1], 1 ) );
252 134295 : move32();
253 : }
254 :
255 143248 : FOR( i = 0; i < NB_BANDS_SPMUS; i++ )
256 : {
257 134295 : hSpMusClas->past_log_enr_fx[i] = -1448; /* log(E_MIN) in Q8 */
258 134295 : move16();
259 : }
260 :
261 8953 : hSpMusClas->sp_mus_state = -8;
262 8953 : move16();
263 8953 : hSpMusClas->wdrop_32fx = 0;
264 8953 : move32();
265 8953 : hSpMusClas->wrise_fx = 0;
266 8953 : move16();
267 8953 : hSpMusClas->wdlp_0_95_sp_fx = 0;
268 8953 : move16();
269 8953 : hSpMusClas->wdlp_0_95_sp_32fx = 0;
270 8953 : move32();
271 8953 : hSpMusClas->wdlp_xtalk_fx = 0;
272 8953 : move16();
273 8953 : set16_fx( hSpMusClas->last_lsp_fx, 0, M_LSP_SPMUS );
274 8953 : hSpMusClas->last_cor_map_sum_fx = 0;
275 8953 : move16();
276 8953 : hSpMusClas->last_non_sta_fx = 0;
277 8953 : move16();
278 8953 : set32_fx( hSpMusClas->past_PS_fx, 0, HIGHEST_FBIN - LOWEST_FBIN );
279 8953 : hSpMusClas->past_PS_Q = Q31;
280 8953 : move16();
281 8953 : hSpMusClas->past_ps_diff_fx = 0;
282 8953 : move16();
283 8953 : hSpMusClas->past_epsP2_fx = 1024; /* 1.0f in Q10 */
284 8953 : move16();
285 8953 : hSpMusClas->past_epsP_fx = 0;
286 8953 : move16();
287 8953 : hSpMusClas->flag_spitch_cnt = 0;
288 8953 : move16();
289 :
290 :
291 8953 : hSpMusClas->gsc_thres_fx[0] = TH_0_MIN_FX;
292 8953 : move16();
293 8953 : hSpMusClas->gsc_thres_fx[1] = TH_1_MIN_FX;
294 8953 : move16();
295 8953 : hSpMusClas->gsc_thres_fx[2] = TH_2_MIN_FX;
296 8953 : move16();
297 8953 : hSpMusClas->gsc_thres_fx[3] = TH_3_MIN_FX;
298 8953 : move16();
299 8953 : set16_fx( hSpMusClas->gsc_lt_diff_etot_fx, 0, 40 );
300 8953 : hSpMusClas->gsc_mem_etot_fx = 0;
301 8953 : move16();
302 8953 : hSpMusClas->gsc_last_music_flag = 0;
303 8953 : move16();
304 8953 : hSpMusClas->gsc_nb_thr_1 = 0;
305 8953 : move16();
306 8953 : hSpMusClas->gsc_nb_thr_3 = 0;
307 8953 : move16();
308 8953 : hSpMusClas->mold_corr_fx = 29491; /* 0.9f in Q15 */
309 8953 : move16();
310 8953 : hSpMusClas->mean_avr_dyn_fx = 64; /* 0.5f in Q7 */
311 8953 : move16();
312 8953 : hSpMusClas->last_sw_dyn_fx = 2560; /* 10.0f in Q7 */
313 8953 : move16();
314 :
315 8953 : hSpMusClas->relE_attack_cnt = 0;
316 8953 : move16();
317 8953 : hSpMusClas->prev_relE_fx = 0;
318 8953 : move16();
319 8953 : hSpMusClas->prev_Etot_fx = 0;
320 8953 : move16();
321 8953 : hSpMusClas->prev_vad = 0;
322 8953 : move16();
323 8953 : hSpMusClas->vad_0_1_cnt = 0;
324 8953 : move16();
325 8953 : hSpMusClas->relE_attack_sum_fx = 0;
326 8953 : move16();
327 :
328 : /* speech/music classifier improvement */
329 546133 : FOR( i = 0; i < BUF_LEN; i++ )
330 : {
331 537180 : hSpMusClas->buf_flux_fx[i] = -12800; /*-100.0f in Q7 */
332 537180 : move16();
333 537180 : hSpMusClas->buf_pkh_fx[i] = 0;
334 537180 : move16();
335 537180 : hSpMusClas->buf_epsP_tilt_fx[i] = 0;
336 537180 : move16();
337 537180 : hSpMusClas->buf_cor_map_sum_fx[i] = 0;
338 537180 : move16();
339 537180 : hSpMusClas->buf_Ntonal_fx[i] = 0;
340 537180 : move16();
341 537180 : hSpMusClas->buf_Ntonal2_fx[i] = 0;
342 537180 : move16();
343 537180 : hSpMusClas->buf_Ntonal_lf_fx[i] = 0;
344 537180 : move16();
345 : }
346 :
347 8953 : set16_fx( hSpMusClas->lpe_buf_fx, 0, HANG_LEN_INIT );
348 8953 : set16_fx( hSpMusClas->voicing_buf_fx, 0, HANG_LEN_INIT );
349 8953 : hSpMusClas->gsc_hangover = 0;
350 8953 : move16();
351 8953 : set16_fx( hSpMusClas->sparse_buf_fx, 0, HANG_LEN_INIT );
352 8953 : set16_fx( hSpMusClas->hf_spar_buf_fx, 0, HANG_LEN_INIT );
353 8953 : hSpMusClas->LT_sparse_fx = 0;
354 8953 : move16();
355 8953 : hSpMusClas->gsc_cnt = 0;
356 8953 : move16();
357 8953 : hSpMusClas->last_vad_spa = 0;
358 8953 : move16();
359 :
360 8953 : set16_fx( hSpMusClas->old_Bin_E_fx, 0, 3 * N_OLD_BIN_E );
361 8953 : set16_fx( hSpMusClas->buf_etot_fx, 0, 4 );
362 8953 : set16_fx( hSpMusClas->buf_dlp_fx, 0, 10 );
363 :
364 8953 : hSpMusClas->UV_cnt1 = 300;
365 8953 : move16();
366 8953 : hSpMusClas->LT_UV_cnt1_fx = 16000; /* 250.0f in Q6 */
367 8953 : move16();
368 8953 : hSpMusClas->onset_cnt = 0;
369 8953 : move16();
370 8953 : hSpMusClas->attack_hangover = 0;
371 8953 : move16();
372 8953 : hSpMusClas->dec_mov_fx = 0;
373 8953 : move16();
374 8953 : hSpMusClas->dec_mov1_fx = 0;
375 8953 : move16();
376 8953 : hSpMusClas->mov_log_max_spl_fx = 25600; /* 200.0 in Q7 */
377 8953 : move16();
378 8953 : hSpMusClas->old_lt_diff_fx[0] = 0;
379 8953 : move16();
380 8953 : hSpMusClas->old_lt_diff_fx[1] = 0;
381 8953 : move16();
382 :
383 8953 : set32_fx( hSpMusClas->finc_prev_fx, 0, ATT_NSEG );
384 8953 : hSpMusClas->q_finc_prev = Q31;
385 8953 : move16();
386 8953 : hSpMusClas->lt_finc_fx = 0;
387 8953 : move32();
388 8953 : hSpMusClas->Q_lt_finc = Q31;
389 8953 : move16();
390 :
391 8953 : hSpMusClas->last_strong_attack = 0;
392 8953 : move16();
393 8953 : hSpMusClas->tdm_lt_Etot_fx = 3; /* 0.01f in Q8 */
394 8953 : move16();
395 8953 : set32_fx( hSpMusClas->tod_lt_Bin_E_fx, 0, TOD_NSPEC );
396 8953 : hSpMusClas->Q_tod_lt_Bin_E = Q31;
397 8953 : move16();
398 8953 : set32_fx( hSpMusClas->tod_S_map_lt_fx, 0, TOD_NSPEC );
399 8953 : hSpMusClas->tod_thr_lt_fx = TOD_THR_MASS_FX_Q22;
400 8953 : move32();
401 8953 : hSpMusClas->tod_weight_fx = 0;
402 8953 : move16();
403 8953 : hSpMusClas->tod_S_mass_prev_fx = 0;
404 8953 : move32();
405 8953 : hSpMusClas->tod_S_mass_lt_fx = 0;
406 8953 : move32();
407 :
408 : /* speech/music classification */
409 8953 : set16_fx( hSpMusClas->lt_old_mode, 1, 3 );
410 8953 : hSpMusClas->lt_voicing = 16384; /* 0.5f in Q15 */
411 8953 : move16();
412 8953 : hSpMusClas->lt_corr = 16384; /* 0.5f in Q15 */
413 8953 : move16();
414 8953 : hSpMusClas->lt_tonality = 0;
415 8953 : move32();
416 8953 : set16_fx( hSpMusClas->lt_corr_pitch, 0, 3 );
417 8953 : hSpMusClas->lt_hangover = 0;
418 8953 : move16();
419 8953 : hSpMusClas->lowrate_pitchGain = 0;
420 8953 : move16();
421 :
422 8953 : hSpMusClas->lt_music_hangover = 0;
423 8953 : move16();
424 8953 : set16_fx( hSpMusClas->tonality2_buf_fx, 0, HANG_LEN_INIT );
425 8953 : set16_fx( hSpMusClas->tonality3_buf_fx, 0, HANG_LEN_INIT );
426 8953 : set16_fx( hSpMusClas->LPCErr_buf_fx, 0, HANG_LEN_INIT );
427 8953 : hSpMusClas->lt_music_state = 0;
428 8953 : move16();
429 8953 : hSpMusClas->lt_speech_state = 0;
430 8953 : move16();
431 8953 : hSpMusClas->lt_speech_hangover = 0;
432 8953 : move16();
433 :
434 8953 : hSpMusClas->lt_dec_thres_fx = 5120; /* 10.0f in Q9 */
435 8953 : move16();
436 8953 : hSpMusClas->ener_RAT_fx = 0;
437 8953 : move16();
438 :
439 8953 : hSpMusClas->high_stable_cor = 0;
440 8953 : move16();
441 8953 : set16_fx( hSpMusClas->var_cor_t_fx, 0, VAR_COR_LEN );
442 :
443 8953 : hSpMusClas->lps_fx = 0;
444 8953 : move16();
445 8953 : hSpMusClas->lpm_fx = 0;
446 8953 : move16();
447 8953 : hSpMusClas->lpn_fx = 0;
448 8953 : move16();
449 :
450 8953 : return;
451 : }
452 :
453 : /*---------------------------------------------------------------------*
454 : * speech_music_classif()
455 : *
456 : * Speech/music classification
457 : *
458 : * The following technologies are used based on the outcome of the sp/mus classifier
459 : * sp_aud_decision1 sp_aud_decision2
460 : * 0 0 use ACELP (+TD BWE)
461 : * 1 0 use ACELP (+FD BWE) or HQ/LR-MDCT depending on bitrate
462 : * 1 1 use GSC (+FD BWE) or HQ/LR-MDCT depending on bitrate
463 : *
464 : * 0 1 exceptionally use GSC (+FD BWE) instead of LR-MDCT at 13.2 kbps (WB/SWB) for sparse spectra
465 : *---------------------------------------------------------------------*/
466 :
467 3100 : void speech_music_classif_fx(
468 : Encoder_State *st, /* i/o: state structure */
469 : const Word16 *new_inp, /* i : new input signal */
470 : const Word16 *inp, /* i : input signal to locate attach position */
471 : const Word16 localVAD_HE_SAD, /* i : HE-SAD flag without hangover */
472 : const Word16 lsp_new[M], /* i : LSPs in current frame Q15 */
473 : const Word16 cor_map_sum, /* i : correlation map sum (from multi-harmonic anal.)Q8*/
474 : const Word32 epsP[M + 1], /* i : LP prediciton error Q_esp*/
475 : const Word32 PS[], /* i : energy spectrum Q_new+QSCALE*/
476 : const Word16 Etot, /* i : total frame energy Q8 */
477 : const Word16 old_cor, /* i : max correlation from previous frame Q15 */
478 : Word16 *attack_flag, /* o : flag to indicate if attack is to be treated by TC or GSC */
479 : Word16 non_sta, /* i : unbound non-stationarity for sp/mus classifier */
480 : Word16 relE, /* i : relative frame energy */
481 : Word16 Q_esp, /* i : scaling of esP */
482 : Word16 Q_inp, /* i : scaling of input */
483 : Word16 *high_lpn_flag_ptr, /* o : noise log prob flag for NOISE_EST */
484 : Word16 flag_spitch /* i : flag to indicate very short stable pitch */
485 : )
486 : {
487 : Word16 voi_fv, cor_map_sum_fv, LPCErr;
488 3100 : GSC_ENC_HANDLE hGSCEnc = st->hGSCEnc;
489 :
490 : /* 1st stage speech/music classifier based on the GMM model */
491 3100 : st->sp_aud_decision1 = sp_mus_classif_gmm_fx( st, localVAD_HE_SAD, lsp_new, cor_map_sum,
492 : epsP, PS, non_sta, relE, &voi_fv, &cor_map_sum_fv, &LPCErr, Q_esp, high_lpn_flag_ptr );
493 :
494 3100 : test();
495 3100 : IF( EQ_16( st->codec_mode, MODE1 ) || EQ_32( st->sr_core, INT_FS_12k8 ) )
496 : {
497 :
498 :
499 : /* Improvement of the 1st stage decision on mixed/music content */
500 2050 : test();
501 2050 : IF( st->Opt_SC_VBR == 0 && NE_32( st->total_brate, ACELP_24k40 ) )
502 : {
503 2050 : music_mixed_classif_improv_fx( st, new_inp, epsP, Q_esp, Etot, old_cor, cor_map_sum );
504 : }
505 :
506 2050 : st->sp_aud_decision0 = st->sp_aud_decision1;
507 2050 : move16();
508 :
509 : /* 2nd stage speech/music classifier (rewrite music to speech in onsets) */
510 2050 : st->sp_aud_decision2 = st->sp_aud_decision1;
511 2050 : move16();
512 :
513 2050 : IF( st->bwidth > NB )
514 : {
515 2050 : sp_mus_classif_2nd_fx( st, Etot, attack_flag, inp, Q_inp - 1 );
516 :
517 : /* avoid switch to AUDIO/MUSIC class for very short stable high st->pitch
518 : and/or stable pitch with high correlation at low bitrates*/
519 2050 : test();
520 2050 : test();
521 2050 : IF( flag_spitch && EQ_16( st->bwidth, WB ) && LT_32( st->total_brate, ACELP_13k20 ) )
522 : {
523 0 : st->sp_aud_decision2 = 0;
524 0 : move16();
525 : }
526 : }
527 :
528 :
529 : /* Context-based improvement of 1st and 2nd stage decision on stable tonal signals */
530 2050 : test();
531 2050 : IF( st->Opt_SC_VBR == 0 && NE_32( st->total_brate, ACELP_24k40 ) )
532 : {
533 2050 : tonal_context_improv_fx( st, PS, voi_fv, cor_map_sum_fv, LPCErr, Q_inp + QSCALE - 2 );
534 : }
535 :
536 : /* Avoid using LR-MDCT on sparse spectra, use GSC instead at 13.2 kbps (WB/SWB) */
537 2050 : test();
538 2050 : test();
539 2050 : test();
540 2050 : test();
541 2050 : IF( !st->Opt_SC_VBR && EQ_32( st->total_brate, ACELP_13k20 ) && EQ_16( st->vad_flag, 1 ) &&
542 : ( EQ_16( st->bwidth, WB ) || EQ_16( st->bwidth, SWB ) ) )
543 : {
544 1039 : detect_sparseness_fx( st, localVAD_HE_SAD, voi_fv );
545 : }
546 :
547 : /* override speech/music classification to ACELP when background noise level reaches certain level */
548 : /* this is a patch against mis-classifications during active noisy speech segments */
549 2050 : IF( GT_16( st->lp_noise_fx, 3072 ) )
550 : {
551 0 : st->sp_aud_decision1 = 0;
552 0 : move16();
553 0 : st->sp_aud_decision2 = 0;
554 0 : move16();
555 : }
556 :
557 :
558 : /* select GSC on SWB noisy speech (only on active unvoiced SWB noisy speech segments) */
559 2050 : st->GSC_noisy_speech = 0;
560 2050 : move16();
561 :
562 2050 : test();
563 2050 : test();
564 2050 : test();
565 2050 : test();
566 2050 : test();
567 2050 : test();
568 2050 : IF( EQ_16( st->vad_flag, 1 ) && GE_32( st->total_brate, ACELP_13k20 ) && LT_32( st->total_brate, ACELP_24k40 ) &&
569 : GT_16( st->lp_noise_fx, 3072 ) && st->sp_aud_decision1 == 0 && GE_16( st->bwidth, SWB ) &&
570 : EQ_16( st->coder_type_raw, UNVOICED ) )
571 : {
572 0 : st->GSC_noisy_speech = 1;
573 0 : move16();
574 : }
575 :
576 : /* Select AUDIO frames */
577 2050 : test();
578 2050 : test();
579 : #ifdef DEBUGGING
580 : if ( st->codec_mode == MODE1 && ( st->force == 1 || ( st->force == -1 && ( st->sp_aud_decision2 || st->GSC_noisy_speech ) ) ) )
581 : #else
582 2050 : IF( EQ_16( st->codec_mode, MODE1 ) && ( st->sp_aud_decision2 || st->GSC_noisy_speech ) )
583 : #endif
584 : {
585 618 : st->coder_type = AUDIO;
586 618 : move16();
587 618 : hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
588 618 : move16();
589 : }
590 : }
591 : ELSE
592 : {
593 1050 : st->sp_aud_decision0 = st->sp_aud_decision1;
594 1050 : move16();
595 : }
596 :
597 :
598 3100 : return;
599 : }
600 :
601 : /*---------------------------------------------------------------------*
602 : * sp_mus_classif_gmm_fx()
603 : *
604 : * Speech/music classification based on GMM model
605 : *---------------------------------------------------------------------*/
606 :
607 3100 : static Word16 sp_mus_classif_gmm_fx( /* o : decision flag (1-music, 0-speech or noise) */
608 : Encoder_State *st_fx, /* i/o: state structure */
609 : const Word16 localVAD_HE_SAD, /* i : local VAD HE flag */
610 : const Word16 lsp_new[M], /* i : LSPs in current frame Q15 */
611 : const Word16 cor_map_sum, /* i : correlation map sum (from multi-harmonic anal.)Q8 */
612 : const Word32 epsP[M + 1], /* i : LP prediciton error Q_esp */
613 : const Word32 PS[], /* i : energy spectrum Q_new+Qscale-2 */
614 : Word16 non_sta, /* i : unbound non-stationarity for sp/mus classifier */
615 : Word16 relE, /* i : relative frame energy */
616 : Word16 *voi_fv, /* o : scaled voicing feature */
617 : Word16 *cor_map_sum_fv, /* o : scaled correlation map feature */
618 : Word16 *LPCErr, /* o : scaled LP prediction error feature */
619 : Word16 Q_esp, /* i : scaling of epsP */
620 : Word16 *high_lpn_flag_ptr /* o : noise log prob flag for NOISE_EST */
621 : )
622 : {
623 : Word16 i, k, p, dec, vad;
624 :
625 3100 : Word16 lsp[M], FV[N_FEATURES], *pFV = FV;
626 : const Word32 *pSF_a;
627 : const Word16 *pSF_m;
628 : Word16 lsf2acos_fact, wrelE, dlp, wdrop, wght;
629 :
630 : Word32 mx;
631 : Word32 sum_PS;
632 : Word16 ftmp, tmp16;
633 : Word16 xm[N_FEATURES];
634 : Word16 lps, lpm;
635 : Word16 lpn;
636 : Word16 e_tmp, f_tmp;
637 : Word32 L_tmp;
638 : Word16 exp1;
639 : Word32 ps_sta;
640 : Word32 ps_diff;
641 : Word16 ps_diff_16;
642 : Word32 dPS[128], PS_norm[128];
643 : Word32 lepsP1;
644 3100 : Word32 max_s = 0, max_m = 0, py_s, py_m;
645 3100 : move32();
646 3100 : move32();
647 : Word32 max_n, py_n; /* pyn */
648 3100 : Word16 ishift[12] = { 8, 0, 2, 2, 2, 2, 2, 1, 0, 2, 2, 1 };
649 3100 : move16();
650 3100 : move16();
651 3100 : move16();
652 3100 : move16();
653 3100 : move16();
654 3100 : move16();
655 3100 : move16();
656 3100 : move16();
657 3100 : move16();
658 3100 : move16();
659 3100 : move16();
660 3100 : move16();
661 : Word16 tmp;
662 : Word16 tmp1, tmp2, exp2, scale, exp3;
663 3100 : SP_MUS_CLAS_HANDLE hSpMusClas = st_fx->hSpMusClas;
664 3100 : HQ_ENC_HANDLE hHQ_core = st_fx->hHQ_core;
665 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
666 3100 : Flag Overflow = 0;
667 3100 : move16();
668 : #endif
669 :
670 : /*------------------------------------------------------------------*
671 : * Initialization
672 : *------------------------------------------------------------------*/
673 :
674 3100 : vad = localVAD_HE_SAD;
675 3100 : move16();
676 :
677 : /*------------------------------------------------------------------*
678 : * Preparation of the feature vector
679 : *------------------------------------------------------------------*/
680 :
681 : /* [0] OL pitch Q0 */
682 : /*(float)(pitch[0] + pitch[1] + pitch[2]) / 3.0f;*/
683 3100 : L_tmp = L_mult( st_fx->pitch[0], 10923 );
684 3100 : L_tmp = L_mac( L_tmp, st_fx->pitch[1], 10923 );
685 3100 : L_tmp = L_mac( L_tmp, st_fx->pitch[2], 10923 );
686 :
687 3100 : test();
688 3100 : IF( EQ_16( st_fx->tc_cnt, 1 ) || EQ_16( st_fx->tc_cnt, 2 ) )
689 : {
690 253 : *pFV++ = st_fx->pitch[2];
691 253 : move16();
692 : }
693 : ELSE
694 : {
695 2847 : *pFV++ = round_fx( L_tmp );
696 2847 : move16();
697 : }
698 :
699 : /* [1] voicing Q15 */
700 : /*(float)(voicing[0] + voicing[1] + voicing[2]) / 3.0f*/
701 3100 : test();
702 3100 : IF( EQ_16( st_fx->tc_cnt, 1 ) || EQ_16( st_fx->tc_cnt, 2 ) )
703 : {
704 253 : *pFV++ = st_fx->voicing_fx[2];
705 253 : move16();
706 : }
707 : ELSE
708 : {
709 2847 : L_tmp = L_mult( st_fx->voicing_fx[0], 10923 );
710 2847 : L_tmp = L_mac( L_tmp, st_fx->voicing_fx[1], 10923 );
711 2847 : L_tmp = L_mac( L_tmp, st_fx->voicing_fx[2], 10923 );
712 2847 : *pFV++ = round_fx_sat( L_tmp );
713 2847 : move16();
714 : }
715 :
716 : /* [2,3,4,5,6] LSFs Q15*/
717 3100 : Copy( lsp_new, lsp, M );
718 3100 : lsf2acos_fact = 25735;
719 3100 : move16(); /* PI/6400 -> Q27 */
720 :
721 : /*ftmp = (float)acos(lsp[1...5]);*/
722 : /**pFV++ = ftmp + st->last_lsp[1...5];*/
723 : /*st->last_lsp[1...5] = ftmp;*/
724 18600 : FOR( i = 1; i < M_LSP_SPMUS; i++ )
725 : {
726 15500 : L_tmp = sub_lsp2lsf_fx( lsp[i] );
727 15500 : tmp16 = round_fx( L_shl( L_mult0( extract_l( L_tmp ), lsf2acos_fact ), 2 ) );
728 15500 : *pFV++ = add( tmp16, hSpMusClas->last_lsp_fx[i] );
729 15500 : move16(); /*Q13*/
730 15500 : hSpMusClas->last_lsp_fx[i] = tmp16;
731 15500 : move16();
732 : }
733 :
734 : /* [7] cor_map_sum Q8 */
735 3100 : *pFV++ = round_fx( L_mac( L_mult( cor_map_sum, 16384 ), hSpMusClas->last_cor_map_sum_fx, 16384 ) ); /* Q8 ->Q7*/
736 3100 : move16();
737 3100 : hSpMusClas->last_cor_map_sum_fx = cor_map_sum;
738 3100 : move16();
739 :
740 : /* [8] non_sta Q8*/
741 3100 : *pFV++ = round_fx( L_mac( L_mult( non_sta, 16384 ), hSpMusClas->last_non_sta_fx, 16384 ) ); /* Q8 -> Q7 */
742 3100 : move16();
743 3100 : hSpMusClas->last_non_sta_fx = non_sta;
744 3100 : move16();
745 :
746 : /* [9] epsP Q10 */
747 3100 : IF( EQ_16( st_fx->bwidth, NB ) )
748 : {
749 0 : *pFV++ = -1687;
750 0 : move16(); /*Q10*/
751 : }
752 : ELSE
753 : {
754 : /*lepsP1 = (float)log(epsP[1] + 1e-5f);*/
755 3100 : IF( epsP[1] != 0 )
756 : {
757 3100 : e_tmp = norm_l( epsP[1] );
758 3100 : f_tmp = Log2_norm_lc( L_shl( epsP[1], e_tmp ) );
759 3100 : e_tmp = sub( 30, add( e_tmp, Q_esp ) );
760 3100 : lepsP1 = Mpy_32_16( e_tmp, f_tmp, 22713 ); /* Q16 */ /* 22713 = ln(2) in Q15 */
761 : }
762 : ELSE
763 : {
764 0 : lepsP1 = L_deposit_l( 0 );
765 : }
766 :
767 : /*ftmp = (float)log(epsP[13]);*/
768 3100 : IF( epsP[13] != 0 )
769 : {
770 3100 : e_tmp = norm_l( epsP[13] );
771 3100 : f_tmp = Log2_norm_lc( L_shl( epsP[13], e_tmp ) );
772 3100 : e_tmp = sub( 30, add( e_tmp, Q_esp ) );
773 3100 : L_tmp = Mpy_32_16( e_tmp, f_tmp, 22713 ); /* Q16 */ /* 22713 = ln(2) in Q15 */
774 : }
775 : ELSE
776 : {
777 0 : L_tmp = L_deposit_l( 0 );
778 : }
779 :
780 : /*ftmp = (float)log(epsP[13]) - lepsP1;*/
781 3100 : L_tmp = L_sub( L_tmp, lepsP1 ); /*Q16 */
782 3100 : ftmp = round_fx( L_shl( L_tmp, 10 ) ); /*Q10 */
783 :
784 : /**pFV++ = ftmp + st->past_epsP2;*/
785 3100 : *pFV++ = add( ftmp, hSpMusClas->past_epsP2_fx );
786 3100 : move16(); /*Q10 */
787 :
788 : /*st->past_epsP2 = ftmp;*/
789 3100 : hSpMusClas->past_epsP2_fx = ftmp;
790 3100 : move16(); /*Q10 */
791 : }
792 :
793 : /* calculation of differential normalized power spectrum */
794 3100 : sum_PS = L_deposit_l( 0 );
795 210800 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
796 : {
797 207700 : sum_PS = L_add_o( sum_PS, PS[i], &Overflow );
798 : }
799 3100 : exp1 = norm_l( sum_PS );
800 3100 : tmp1 = round_fx_o( L_shl( sum_PS, exp1 ), &Overflow );
801 3100 : exp1 = sub( 30, exp1 );
802 :
803 210800 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
804 : {
805 : /*PS_norm[i] = PS[i] / sum_PS;*/
806 : /*dPS[i] = (float)fabs(PS_norm[i] - st->past_PS[i]);*/
807 207700 : exp2 = norm_l( PS[i] );
808 207700 : tmp2 = round_fx_o( L_shl( PS[i], exp2 ), &Overflow );
809 207700 : exp2 = sub( 30, exp2 );
810 :
811 207700 : scale = shr( sub( tmp1, tmp2 ), 15 );
812 207700 : tmp2 = shl( tmp2, scale );
813 207700 : exp2 = sub( exp2, scale );
814 :
815 207700 : exp3 = sub( exp1, exp2 );
816 :
817 207700 : tmp = div_s( tmp2, tmp1 ); /*Q(15+exp3) */
818 207700 : PS_norm[i] = L_shl( tmp, sub( 10, exp3 ) );
819 207700 : move32(); /*Q25 */
820 207700 : dPS[i] = L_abs( L_sub( PS_norm[i], hSpMusClas->past_PS_fx[i - LOWEST_FBIN] ) );
821 207700 : move32(); /*Q25 */
822 : }
823 :
824 : /* [10] ps_diff (spectral difference) Q10*/
825 3100 : ps_diff = 0;
826 3100 : move16();
827 210800 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
828 : {
829 : /*ps_diff += dPS[i];*/
830 207700 : ps_diff = L_add( ps_diff, dPS[i] ); /*Q25*/
831 : }
832 :
833 : /*ps_diff = (float)log(ps_diff + 1e-5f);*/
834 3100 : IF( ps_diff != 0 )
835 : {
836 3100 : e_tmp = norm_l( ps_diff );
837 3100 : f_tmp = Log2_norm_lc( L_shl( ps_diff, e_tmp ) );
838 3100 : e_tmp = sub( 30 - 25, e_tmp );
839 3100 : ps_diff = Mpy_32_16( e_tmp, f_tmp, 22713 ); /* Q16 */ /* 22713 = ln(2) in Q15 */
840 3100 : ps_diff_16 = round_fx( L_shl( ps_diff, 10 ) ); /*Q10 */
841 : }
842 : ELSE
843 : {
844 0 : ps_diff_16 = -11789;
845 0 : move16(); /*Q10 */
846 : }
847 :
848 3100 : *pFV++ = add( ps_diff_16, hSpMusClas->past_ps_diff_fx );
849 3100 : move16(); /*Q10 */
850 3100 : hSpMusClas->past_ps_diff_fx = ps_diff_16;
851 3100 : move16(); /*Q10 */
852 :
853 : /* [11] ps_sta (spectral stationarity) Q11 */
854 3100 : ps_sta = 0;
855 3100 : move16();
856 210800 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
857 : {
858 : /*mx = PS_norm[i] > st->past_PS[i] ? PS_norm[i] : st->past_PS[i];*/
859 207700 : mx = L_max( PS_norm[i], hSpMusClas->past_PS_fx[i - LOWEST_FBIN] ); /*Q25 */
860 :
861 : /*ps_sta += mx / (dPS[i] + 1e-5f);*/
862 207700 : IF( !dPS[i] )
863 : {
864 97 : ps_sta = L_add( ps_sta, L_shr( mx, 9 ) ); /*Q16 */
865 : }
866 : ELSE
867 : {
868 207603 : exp1 = norm_l( L_add( dPS[i], 336 ) );
869 207603 : tmp1 = round_fx_o( L_shl_o( L_add( dPS[i], 336 ), exp1, &Overflow ), &Overflow );
870 207603 : exp1 = sub( 30, exp1 );
871 :
872 207603 : exp2 = norm_l( mx );
873 207603 : tmp2 = round_fx( L_shl( mx, exp2 ) );
874 207603 : exp2 = sub( 30, exp2 );
875 :
876 207603 : scale = shr( sub( tmp1, tmp2 ), 15 );
877 207603 : tmp2 = shl( tmp2, scale );
878 207603 : exp2 = sub( exp2, scale );
879 :
880 207603 : exp3 = sub( exp1, exp2 );
881 :
882 207603 : tmp = div_s( tmp2, tmp1 ); /*Q(15+exp3) */
883 207603 : L_tmp = L_shl( tmp, sub( 1, exp3 ) ); /*Q16 */
884 207603 : ps_sta = L_add_sat( ps_sta, L_tmp ); /*Q16 */
885 : }
886 : }
887 :
888 : /**pFV++ = (float)log(ps_sta + 1e-5f);*/
889 3100 : ps_sta = L_add_sat( ps_sta, 336 );
890 3100 : e_tmp = norm_l( ps_sta );
891 3100 : f_tmp = Log2_norm_lc( L_shl( ps_sta, e_tmp ) );
892 3100 : e_tmp = sub( 30 - 16, e_tmp );
893 3100 : L_tmp = Mpy_32_16( e_tmp, f_tmp, 22713 ); /* Q16 */ /* 22713 = ln(2) in Q15 */
894 3100 : *pFV++ = round_fx( L_shl( L_tmp, 11 ) ); /*Q11 */
895 3100 : move16();
896 :
897 : /* update PS vector */
898 3100 : Copy32( &PS_norm[LOWEST_FBIN], hSpMusClas->past_PS_fx, HIGHEST_FBIN - LOWEST_FBIN );
899 :
900 : /*------------------------------------------------------------------*
901 : * Scaling of the feature vector
902 : *------------------------------------------------------------------*/
903 :
904 : /* FV[0] -> Q0 */
905 : /* FV[1...6] -> Q13*/
906 : /* FV[7,8] -> Q7 */
907 : /* FV[9,10] -> Q10 */
908 : /* FV[11] -> Q11 */
909 :
910 :
911 3100 : pFV = FV;
912 3100 : IF( EQ_16( st_fx->bwidth, NB ) )
913 : {
914 0 : pSF_m = SF_8k_mult_fx;
915 0 : pSF_a = SF_8k_add_fx;
916 : }
917 : ELSE
918 : {
919 3100 : pSF_m = SF_mult_fx;
920 3100 : pSF_a = SF_add_fx;
921 : }
922 :
923 40300 : FOR( i = 0; i < N_FEATURES; i++ )
924 : {
925 : /**pFV = pSF[0] * *pFV + pSF[1];*/
926 37200 : *pFV = round_fx_o( L_shl_o( L_mac( pSF_a[i], *pFV, pSF_m[i] ), ishift[i], &Overflow ), &Overflow );
927 37200 : move16();
928 37200 : pFV++;
929 : }
930 :
931 3100 : *voi_fv = FV[1];
932 3100 : move16();
933 3100 : *cor_map_sum_fv = FV[7];
934 3100 : move16();
935 3100 : *LPCErr = FV[9];
936 3100 : move16();
937 :
938 :
939 : /*------------------------------------------------------------------*
940 : * Calculation of posterior probability
941 : * Log-probability
942 : *------------------------------------------------------------------*/
943 :
944 3100 : max_s = L_add( MIN_32, 0 );
945 3100 : max_m = L_add( MIN_32, 0 );
946 : /* pyn = 1e-5f;*/
947 3100 : max_n = L_add( MIN_32, 0 );
948 :
949 :
950 21700 : FOR( k = 0; k < N_MIXTURES; k++ )
951 : {
952 : /* for each mixture, calculate the probability of speech or noise and the probability of music */
953 : /* active frames - calculate the probability of speech */
954 241800 : FOR( p = 0; p < N_FEATURES; p++ )
955 : {
956 : /* xm[p] = FV[p] - m_speech[k*N_FEATURES+p];*/
957 223200 : xm[p] = sub_o( FV[p], m_speech_fx[k * N_FEATURES + p], &Overflow );
958 223200 : move16(); /*Q15 */
959 : }
960 :
961 : /*py = lvm_speech[k] + dot_product_mat(xm, &invV_speech[k*N_FEATURES*N_FEATURES], N_FEATURES );*/
962 18600 : L_tmp = dot_product_mat_fx( xm, &invV_speech_fx[k * N_FEATURES * N_FEATURES], N_FEATURES ); /*Q10 */
963 18600 : py_s = L_add( lvm_speech_fx[k], L_tmp ); /*Q10 */
964 18600 : max_s = L_max( py_s, max_s );
965 :
966 :
967 : /* pys += (float)exp(py); */
968 :
969 : /* inactive frames - calculate the probability of noise */
970 241800 : FOR( p = 0; p < N_FEATURES; p++ )
971 : {
972 : /*xm[p] = FV[p] - m_noise[k*N_FEATURES+p];*/
973 223200 : xm[p] = sub_o( FV[p], m_noise_fx[k * N_FEATURES + p], &Overflow );
974 223200 : move16(); /*Q15 */
975 : }
976 :
977 : /*py = lvm_noise[k] + dot_product_mat(xm, &invV_noise[k*N_FEATURES*N_FEATURES], N_FEATURES );*/
978 18600 : L_tmp = dot_product_mat_fx( xm, &invV_noise_fx[k * N_FEATURES * N_FEATURES], N_FEATURES ); /*Q10 */
979 : /* pyn += (float)exp(py); */
980 18600 : py_n = L_add( lvm_noise_fx[k], L_tmp ); /*Q10 */
981 18600 : max_n = L_max( py_n, max_n );
982 :
983 :
984 : /* either active or inactive frames - calculate the probability of music */
985 241800 : FOR( p = 0; p < N_FEATURES; p++ )
986 : {
987 : /*xm[p] = FV[p] - m_music[k*N_FEATURES+p];*/
988 223200 : xm[p] = sub_o( FV[p], m_music_fx[k * N_FEATURES + p], &Overflow );
989 223200 : move16(); /*Q15 */
990 : }
991 :
992 : /*py = lvm_music[k] + dot_product_mat(xm, &invV_music[k*N_FEATURES*N_FEATURES], N_FEATURES );*/
993 18600 : L_tmp = dot_product_mat_fx( xm, &invV_music_fx[k * N_FEATURES * N_FEATURES], N_FEATURES ); /*Q10 */
994 18600 : py_m = L_add( lvm_music_fx[k], L_tmp ); /*Q10 */
995 18600 : max_m = L_max( py_m, max_m );
996 :
997 : /*pym += (float)exp(py);#######*/
998 : }
999 :
1000 : /* calculate log-probability */
1001 : /*log(0.0001)-0.5f * N_FEATURES * LOG_PI2 in Q9 */
1002 3100 : lps = extract_h( L_shl_o( L_sub( max_s, LOG_PROB_CONST ), 16 - 1, &Overflow ) ); /*Q9 */
1003 3100 : lps = s_max( lps, -10832 );
1004 :
1005 3100 : lpm = extract_h( L_shl( L_sub( max_m, LOG_PROB_CONST ), 16 - 1 ) ); /*Q9 */
1006 3100 : lpm = s_max( lpm, -10832 );
1007 : /*
1008 : lpn = (float)log(pyn) - 0.5f * N_FEATURES * (float)log(2*PI);
1009 : */
1010 3100 : lpn = extract_h( L_shl_o( L_sub( max_n, LOG_PROB_CONST ), 16 - 1, &Overflow ) ); /*Q9 */
1011 3100 : lpn = s_max( lpn, -10832 );
1012 :
1013 3100 : *high_lpn_flag_ptr = 0;
1014 3100 : move16();
1015 3100 : test();
1016 3100 : if ( GT_16( lpn, lps ) && GT_16( lpn, lpm ) )
1017 : {
1018 47 : *high_lpn_flag_ptr = 1;
1019 47 : move16();
1020 : }
1021 :
1022 :
1023 3100 : IF( !vad )
1024 : {
1025 : /* increase log-probability of noise */
1026 : /* lps = lpn * 1.2f; */
1027 110 : lps = add( lpn, mult_r( 6554, lpn ) ); /* Q9 */
1028 : }
1029 :
1030 3100 : hSpMusClas->lpm_fx = lpm;
1031 3100 : move16();
1032 3100 : hSpMusClas->lps_fx = lps;
1033 3100 : move16();
1034 :
1035 : /* determine HQ GENERIC speech class */
1036 3100 : IF( hHQ_core != NULL )
1037 : {
1038 3100 : hHQ_core->hq_generic_speech_class = 0;
1039 3100 : move16();
1040 3100 : if ( GT_16( lps, add( lpm, 256 ) ) )
1041 : {
1042 1416 : hHQ_core->hq_generic_speech_class = 1;
1043 1416 : move16();
1044 : }
1045 : }
1046 :
1047 : /*------------------------------------------------------------------*
1048 : * State machine (sp_mus_state < 0 .. inactive, > 0 .. entry, = 0 .. active )
1049 : *------------------------------------------------------------------*/
1050 :
1051 3100 : IF( vad )
1052 : {
1053 2990 : test();
1054 2990 : test();
1055 2990 : test();
1056 2990 : IF( LT_16( relE, -20 * 256 ) || ( LE_16( lps, -5 * 512 ) && LE_16( lpm, -5 * 512 ) ) )
1057 : {
1058 369 : IF( hSpMusClas->sp_mus_state > 0 )
1059 : {
1060 68 : if ( LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1061 : {
1062 : /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */
1063 7 : hSpMusClas->inact_cnt = 0;
1064 7 : move16();
1065 : }
1066 :
1067 : /* energy is too low -> we are going to instable state */
1068 68 : hSpMusClas->sp_mus_state = 0;
1069 68 : move16();
1070 : }
1071 301 : ELSE IF( GT_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1072 : {
1073 : /* energy is still too low -> we are still in instable state */
1074 136 : hSpMusClas->sp_mus_state = sub( hSpMusClas->sp_mus_state, 1 );
1075 : }
1076 : }
1077 2621 : ELSE IF( hSpMusClas->sp_mus_state <= 0 )
1078 : {
1079 68 : IF( hSpMusClas->inact_cnt == 0 )
1080 : {
1081 :
1082 22 : hSpMusClas->sp_mus_state = 1;
1083 22 : move16();
1084 : }
1085 : ELSE
1086 : {
1087 :
1088 46 : hSpMusClas->sp_mus_state = HANG_LEN;
1089 46 : move16();
1090 : }
1091 :
1092 68 : hSpMusClas->inact_cnt = 12;
1093 68 : move16();
1094 : }
1095 2553 : ELSE IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1096 : {
1097 : /* we are inside an entry period -> increment the counter of entry frames */
1098 123 : hSpMusClas->sp_mus_state = add( hSpMusClas->sp_mus_state, 1 );
1099 : }
1100 :
1101 2990 : test();
1102 2990 : if ( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 )
1103 : {
1104 183 : hSpMusClas->inact_cnt = sub( hSpMusClas->inact_cnt, 1 );
1105 : }
1106 : }
1107 : ELSE
1108 : {
1109 110 : test();
1110 110 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1111 : {
1112 0 : hSpMusClas->inact_cnt = 0;
1113 0 : move16();
1114 : }
1115 110 : ELSE IF( hSpMusClas->inact_cnt > 0 )
1116 : {
1117 41 : hSpMusClas->inact_cnt = sub( hSpMusClas->inact_cnt, 1 );
1118 : }
1119 :
1120 110 : test();
1121 110 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1122 : {
1123 :
1124 0 : hSpMusClas->sp_mus_state = -HANG_LEN;
1125 0 : move16();
1126 : }
1127 110 : ELSE IF( hSpMusClas->sp_mus_state > 0 )
1128 : {
1129 :
1130 0 : hSpMusClas->sp_mus_state = -1;
1131 0 : move16();
1132 : }
1133 110 : ELSE IF( GT_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1134 : {
1135 : /* we are in inactive state */
1136 65 : hSpMusClas->sp_mus_state = sub( hSpMusClas->sp_mus_state, 1 );
1137 : }
1138 : }
1139 :
1140 : /*------------------------------------------------------------------*
1141 : * Decision without hangover
1142 : * Weighted decision
1143 : *------------------------------------------------------------------*/
1144 :
1145 : /* decision without hangover (0 - speech/noise, 1 - music) */
1146 3100 : logic16();
1147 3100 : dec = sub( lpm, lps ) > 0;
1148 3100 : move16();
1149 3100 : dlp = sub( lpm, lps ); /*Q9*/
1150 :
1151 3100 : IF( !vad )
1152 : {
1153 110 : dec = 0;
1154 110 : move16();
1155 110 : dlp = 0;
1156 110 : move16();
1157 : }
1158 :
1159 : /* calculate weight based on relE (close to 0.01 in low-E regions, close to 1 in high-E regions) */
1160 : /*wrelE = 1.0f + relE/15;*/
1161 3100 : wrelE = add( 2048, mult_r( relE, 17476 ) ); /* 1/15 in Q18 -> 17476 result in Q11 */
1162 :
1163 :
1164 3100 : wrelE = s_min( wrelE, 2048 );
1165 3100 : wrelE = s_max( wrelE, 20 );
1166 :
1167 : /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */
1168 3100 : test();
1169 3100 : IF( dlp < 0 && LT_16( dlp, hSpMusClas->past_dlp_fx[0] ) )
1170 : {
1171 902 : IF( hSpMusClas->past_dlp_fx[0] > 0 )
1172 : {
1173 286 : hSpMusClas->wdrop_fx = negate( dlp ); /*Q9*/
1174 : }
1175 : ELSE
1176 : {
1177 616 : hSpMusClas->wdrop_fx = add( hSpMusClas->wdrop_fx, sub( hSpMusClas->past_dlp_fx[0], dlp ) ); /*Q9*/
1178 : }
1179 : }
1180 : ELSE
1181 : {
1182 2198 : hSpMusClas->wdrop_fx = 0;
1183 2198 : move16();
1184 : }
1185 :
1186 : /*wdrop = st->wdrop/20;*/
1187 3100 : wdrop = mult_r( hSpMusClas->wdrop_fx, 26214 ); /*Q9*Q19->Q13*/
1188 3100 : wdrop = s_min( wdrop, 8192 ); /* limitation [0.1,1] Q13 */
1189 3100 : wdrop = s_max( wdrop, 819 );
1190 :
1191 : /* combine weights into one */
1192 : /*wght = wrelE * wdrop;*/
1193 3100 : wght = mult_r( wrelE, wdrop ); /* Q11*Q13 -> Q9*/
1194 3100 : wght = s_max( wght, 5 );
1195 :
1196 : /* calculate weighted decision */
1197 : /*st->wdlp_0_95_sp = wght * dlp + (1 - wght) * st->wdlp_0_95_sp;*/
1198 : /* = Q9 * Q9 + (Q9-Q9)*Q9 */
1199 3100 : L_tmp = L_mac( L_mult( wght, dlp ), sub( 512, wght ), hSpMusClas->wdlp_0_95_sp_fx );
1200 3100 : hSpMusClas->wdlp_0_95_sp_fx = round_fx( L_shl( L_tmp, 6 ) );
1201 :
1202 3100 : if ( EQ_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1203 : {
1204 229 : hSpMusClas->wdlp_0_95_sp_fx = 0;
1205 229 : move16();
1206 : }
1207 :
1208 : /*------------------------------------------------------------------*
1209 : * Final speech/music decision
1210 : *------------------------------------------------------------------*/
1211 :
1212 3100 : test();
1213 3100 : test();
1214 3100 : IF( !vad && EQ_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1215 : {
1216 : /* inactive state */
1217 49 : dec = 0;
1218 49 : move16();
1219 : }
1220 3051 : ELSE IF( hSpMusClas->sp_mus_state <= 0 )
1221 : {
1222 : /* transition from active to inactive state or instable state */
1223 430 : dec = hSpMusClas->past_dec[0];
1224 430 : move16();
1225 : }
1226 2621 : ELSE IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1227 : {
1228 : /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */
1229 130 : L_tmp = L_mult( w_spmus_fx[hSpMusClas->sp_mus_state - 1][0], dlp ); /*Q15*Q9 */
1230 :
1231 : /*ftmp += dotp( &w[st_fx->sp_mus_state-1][1], st_fx->past_dlp_fx, HANG_LEN-1 );*/
1232 130 : L_tmp = L_add( L_tmp, Dot_product( &w_spmus_fx[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp_fx, HANG_LEN - 1 ) );
1233 130 : logic16();
1234 130 : move16();
1235 :
1236 : /*dec = ftmp > 2.0f;*/
1237 130 : dec = L_sub( L_tmp, 2 * ( 1 << 25 ) ) > 0;
1238 : }
1239 : ELSE
1240 : {
1241 : /* stable active state */
1242 2491 : test();
1243 2491 : test();
1244 2491 : test();
1245 2491 : test();
1246 2491 : IF( hSpMusClas->wdlp_0_95_sp_fx > 0 && hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 )
1247 : {
1248 : /* switching from speech to music */
1249 16 : dec = 1;
1250 16 : move16();
1251 : }
1252 2475 : ELSE IF( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp_fx < 0 )
1253 : {
1254 : /* switching from music to speech */
1255 16 : dec = 0;
1256 16 : move16();
1257 : }
1258 : ELSE
1259 : {
1260 2459 : dec = hSpMusClas->past_dec[0];
1261 2459 : move16();
1262 : }
1263 : }
1264 :
1265 :
1266 : /*------------------------------------------------------------------*
1267 : * Updates
1268 : *------------------------------------------------------------------*/
1269 :
1270 : /* update the buffer of past non-binary decisions */
1271 3100 : Copy( &hSpMusClas->past_dlp_fx[0], &hSpMusClas->past_dlp_fx[1], HANG_LEN - 2 );
1272 3100 : hSpMusClas->past_dlp_fx[0] = dlp;
1273 3100 : move16();
1274 :
1275 : /* update the buffer of past binary decisions */
1276 3100 : Copy( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 );
1277 3100 : hSpMusClas->past_dec[0] = dec;
1278 3100 : move16();
1279 :
1280 3100 : return dec;
1281 : }
1282 :
1283 :
1284 : /*---------------------------------------------------------------------*
1285 : * sp_mus_classif_2nd_fx()
1286 : *
1287 : * 2nd stage speech/music classifier (convert music to speech for onsets)
1288 : *---------------------------------------------------------------------*/
1289 :
1290 2050 : static void sp_mus_classif_2nd_fx(
1291 : Encoder_State *st, /* i/o: Encoder state structure */
1292 : const Word16 Etot, /* i : total frame energy */
1293 : Word16 *attack_flag, /* i/o: attack flag (GSC or TC) */
1294 : const Word16 *inp, /* i : input signal */
1295 : const Word16 Qx )
1296 : {
1297 : Word16 attack;
1298 2050 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
1299 :
1300 : /* initialization */
1301 2050 : *attack_flag = 0;
1302 2050 : move16();
1303 :
1304 : /* signal stability estimation */
1305 2050 : stab_est_fx( Etot, hSpMusClas->gsc_lt_diff_etot_fx, &hSpMusClas->gsc_mem_etot_fx, &hSpMusClas->gsc_nb_thr_3, &hSpMusClas->gsc_nb_thr_1, hSpMusClas->gsc_thres_fx, &hSpMusClas->gsc_last_music_flag, st->vad_flag );
1306 :
1307 : /* calculate variance of correlation */
1308 2050 : var_cor_calc_fx( st->old_corr_fx, &hSpMusClas->mold_corr_fx, hSpMusClas->var_cor_t_fx, &hSpMusClas->high_stable_cor );
1309 :
1310 : /* attack detection */
1311 2050 : attack = attack_det_fx( inp, Qx, st->clas, st->localVAD, st->coder_type, st->total_brate );
1312 :
1313 2050 : test();
1314 2050 : test();
1315 2050 : test();
1316 2050 : test();
1317 2050 : test();
1318 2050 : test();
1319 2050 : IF( EQ_16( st->sp_aud_decision1, 1 ) )
1320 : {
1321 662 : test();
1322 662 : test();
1323 662 : test();
1324 662 : IF( LT_16( hSpMusClas->ener_RAT_fx, 5898 ) && GT_16( hSpMusClas->lt_dec_thres_fx, 7680 ) )
1325 : {
1326 0 : st->sp_aud_decision2 = 0;
1327 0 : move16();
1328 : }
1329 662 : ELSE IF( EQ_16( hSpMusClas->high_stable_cor, 1 ) && GE_16( st->pitch[0], 130 ) )
1330 : {
1331 : /* prevent GSC in highly correlated signal with low energy variation */
1332 : /* this is basically a patch against bassoon-type of music */
1333 0 : st->sp_aud_decision2 = 0;
1334 0 : move16();
1335 :
1336 0 : test();
1337 0 : if ( EQ_16( st->codec_mode, MODE1 ) && EQ_16( st->coder_type, TRANSITION ) )
1338 : {
1339 0 : st->coder_type = GENERIC;
1340 0 : move16();
1341 : }
1342 : }
1343 662 : ELSE IF( GT_16( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], 1152 ) &&
1344 : GT_16( sub( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 2] ), 2560 ) ) /* 10.0f in Q8 */
1345 : {
1346 21 : IF( EQ_16( st->tc_cnt, 1 ) )
1347 : {
1348 0 : st->sp_aud_decision2 = 0;
1349 0 : move16();
1350 :
1351 0 : if ( EQ_16( st->codec_mode, MODE1 ) )
1352 : {
1353 0 : st->coder_type = TRANSITION;
1354 0 : move16();
1355 : }
1356 : }
1357 : ELSE
1358 : {
1359 21 : IF( GE_16( attack, ATT_3LSUB_POS ) )
1360 : {
1361 : /* do TC coding if attack is located in the last subframe */
1362 6 : st->sp_aud_decision2 = 0;
1363 6 : move16();
1364 6 : *attack_flag = add( attack, 1 );
1365 6 : move16();
1366 6 : if ( EQ_16( st->codec_mode, MODE1 ) )
1367 : {
1368 6 : st->coder_type = TRANSITION;
1369 6 : move16();
1370 : }
1371 : }
1372 15 : ELSE IF( GE_16( attack, ATT_SEG_LEN >> 1 ) )
1373 : {
1374 : /* do GSC coding if attack is located after the first quarter of the first subframe */
1375 : /* (pre-echo will be treated at the decoder side) */
1376 0 : st->sp_aud_decision2 = 1;
1377 0 : move16();
1378 0 : *attack_flag = 31;
1379 0 : move16();
1380 : }
1381 : }
1382 : }
1383 : }
1384 1388 : ELSE IF( EQ_16( st->localVAD, 1 ) && EQ_16( st->coder_type, GENERIC ) &&
1385 : ( ( GE_16( attack, ATT_3LSUB_POS ) && LT_32( st->total_brate, ACELP_24k40 ) ) ||
1386 : ( GE_16( attack, ATT_3LSUB_POS_16k ) && GE_32( st->total_brate, ACELP_24k40 ) && LT_32( st->total_brate, ACELP_48k ) ) ) )
1387 : {
1388 : /* do TC coding if attack is located in the last subframe */
1389 22 : *attack_flag = add( attack, 1 );
1390 22 : move16();
1391 22 : if ( EQ_16( st->codec_mode, MODE1 ) )
1392 : {
1393 22 : st->coder_type = TRANSITION;
1394 22 : move16();
1395 : }
1396 : }
1397 :
1398 2050 : return;
1399 : }
1400 :
1401 :
1402 : /*---------------------------------------------------------------------*
1403 : * var_cor_calc_fx()
1404 : *
1405 : * Calculate variance of correlation
1406 : *---------------------------------------------------------------------*/
1407 :
1408 2050 : static void var_cor_calc_fx(
1409 : const Word16 old_corr,
1410 : Word16 *mold_corr,
1411 : Word16 var_cor_t[],
1412 : Word16 *high_stable_cor )
1413 : {
1414 : Word16 i, var_cor;
1415 :
1416 : /* update buffer of old correlation values */
1417 20500 : FOR( i = VAR_COR_LEN - 1; i > 0; i-- )
1418 : {
1419 18450 : var_cor_t[i] = var_cor_t[i - 1]; /*Q11*/
1420 18450 : move16();
1421 : }
1422 2050 : var_cor_t[i] = old_corr;
1423 2050 : move16();
1424 :
1425 : /* calculate variance of correlation */
1426 2050 : var_cor = var_fx( var_cor_t, 11, VAR_COR_LEN );
1427 :
1428 2050 : *high_stable_cor = 0;
1429 2050 : move16();
1430 2050 : test();
1431 2050 : if ( GT_16( *mold_corr, 26214 ) && LT_16( var_cor, 2 ) )
1432 : {
1433 0 : *high_stable_cor = 1;
1434 0 : move16();
1435 : }
1436 :
1437 : /* update average correlation */
1438 : /*st->mold_corr = 0.1f * st->old_corr + 0.9f * st->mold_corr;*/
1439 2050 : *mold_corr = mac_r( L_mult( 3277, old_corr ), 29491, *mold_corr ); /*Q15 */
1440 :
1441 2050 : return;
1442 : }
1443 :
1444 : /*---------------------------------------------------------------------*
1445 : * attack_det_fx()
1446 : *
1447 : * Attack detection
1448 : *---------------------------------------------------------------------*/
1449 :
1450 2050 : static Word16 attack_det_fx( /* o : attack flag */
1451 : const Word16 *inp, /* i : input signal */
1452 : const Word16 Qx,
1453 : const Word16 last_clas, /* i : last signal clas */
1454 : const Word16 localVAD, /* i : local VAD flag */
1455 : const Word16 coder_type, /* i : coder type */
1456 : const Word32 total_brate /* i : total bitrate */
1457 : )
1458 : {
1459 : Word16 i, j, tmp, tmp1, attack, exp1;
1460 : Word32 L_tmp, etmp, etmp2, finc[ATT_NSEG];
1461 : Word16 att_3lsub_pos;
1462 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1463 2050 : Flag Overflow = 0;
1464 2050 : move16();
1465 : #endif
1466 :
1467 2050 : att_3lsub_pos = ATT_3LSUB_POS;
1468 2050 : move16();
1469 2050 : if ( GE_32( total_brate, ACELP_24k40 ) )
1470 : {
1471 1000 : att_3lsub_pos = ATT_3LSUB_POS_16k;
1472 1000 : move16();
1473 : }
1474 :
1475 : /* compute energy per section */
1476 67650 : FOR( i = 0; i < ATT_NSEG; i++ )
1477 : {
1478 65600 : L_tmp = L_mult0( inp[i * ATT_SEG_LEN], inp[i * ATT_SEG_LEN] ); /*2*Qx */
1479 :
1480 524800 : FOR( j = 1; j < ATT_SEG_LEN; j++ )
1481 : {
1482 459200 : L_tmp = L_mac0_o( L_tmp, inp[i * ATT_SEG_LEN + j], inp[i * ATT_SEG_LEN + j], &Overflow ); /*2*Qx */
1483 : }
1484 :
1485 65600 : finc[i] = L_tmp;
1486 65600 : move32();
1487 : }
1488 :
1489 2050 : attack = maximum_32_fx( finc, ATT_NSEG, &etmp );
1490 2050 : move16();
1491 2050 : test();
1492 2050 : IF( EQ_16( localVAD, 1 ) && EQ_16( coder_type, GENERIC ) )
1493 : {
1494 : /*----------------------------------------------------------------------*
1495 : * Detect if there is a strong onset in the last subframe
1496 : * - if detected, TC is used to better code the onset
1497 : *----------------------------------------------------------------------*/
1498 :
1499 : /* compute mean energy in the first three subframes */
1500 1547 : exp1 = norm_s( att_3lsub_pos );
1501 1547 : tmp = div_s( shl( 1, sub( 14, exp1 ) ), att_3lsub_pos ); /*Q(29-exp1) */
1502 :
1503 1547 : L_tmp = L_shr_o( finc[0], Qx, &Overflow ); /*Qx */
1504 :
1505 39022 : FOR( i = 1; i < att_3lsub_pos; i++ )
1506 : {
1507 37475 : L_tmp = L_add_o( L_tmp, L_shr_o( finc[i], Qx, &Overflow ), &Overflow ); /*Qx */
1508 : }
1509 1547 : L_tmp = Mult_32_16( L_tmp, tmp ); /*Q(14-exp1+Qx) */
1510 1547 : etmp = L_shl( L_tmp, sub( exp1, 14 ) ); /*Qx */
1511 :
1512 1547 : tmp1 = sub( ATT_NSEG, attack );
1513 1547 : exp1 = norm_s( tmp1 );
1514 1547 : tmp = div_s( shl( 1, sub( 14, exp1 ) ), tmp1 ); /*Q(29-exp1) */
1515 :
1516 1547 : L_tmp = L_shr_o( finc[attack], Qx, &Overflow ); /*Qx */
1517 27240 : FOR( i = 1; i < tmp1; i++ )
1518 : {
1519 25693 : L_tmp = L_add_o( L_tmp, L_shr_o( finc[i + attack], Qx, &Overflow ), &Overflow ); /*Qx */
1520 : }
1521 1547 : L_tmp = Mult_32_16( L_tmp, tmp ); /*Q(14-exp1+Qx) */
1522 1547 : etmp2 = L_shl( L_tmp, sub( exp1, 14 ) ); /*Qx */
1523 :
1524 : /* and compare them */
1525 1547 : if ( GT_32( etmp, L_shr( etmp2, 3 ) ) )
1526 : {
1527 : /* stop, if the attack is not sufficiently strong */
1528 1492 : attack = 0;
1529 1492 : move16();
1530 : }
1531 :
1532 1547 : test();
1533 1547 : if ( EQ_16( last_clas, VOICED_CLAS ) && GT_32( L_add( L_shl( etmp, 4 ), L_shl( etmp, 2 ) ), etmp2 ) )
1534 : {
1535 : /* stop, if the signal was voiced and the attack is not sufficiently strong */
1536 553 : attack = 0;
1537 553 : move16();
1538 : }
1539 :
1540 : /* compare also wrt. other sections (reduces a misclassification) */
1541 1547 : IF( attack > 0 )
1542 : {
1543 53 : etmp2 = L_add( finc[attack], 0 );
1544 53 : etmp = Mult_32_16( etmp2, 16384 ); /* etmp2 / 2.0 = (etmp2*0.5) */
1545 1108 : FOR( i = 2; i < ATT_3LSUB_POS - 2; i++ )
1546 : {
1547 1058 : IF( GT_32( finc[i], etmp ) )
1548 : {
1549 3 : attack = 0;
1550 3 : move16();
1551 3 : BREAK;
1552 : }
1553 : }
1554 : }
1555 : }
1556 503 : ELSE IF( attack > 0 )
1557 : {
1558 479 : etmp2 = L_add( finc[attack], 0 );
1559 479 : etmp = Mult_32_16( etmp2, 25206 ); /* etmp2 / 1.3 = (etmp2*0.76923) */
1560 5650 : FOR( i = 2; i < att_3lsub_pos - 2; i++ )
1561 : {
1562 : /*if( i != attack && finc[i] * 1.3f > etmp2 ) -> finc[i] > (etmp2*0.76923) */
1563 5507 : test();
1564 5507 : IF( NE_16( i, attack ) && GT_32( finc[i], etmp ) )
1565 : {
1566 336 : attack = 0;
1567 336 : move16();
1568 336 : BREAK;
1569 : }
1570 : }
1571 : }
1572 :
1573 2050 : return attack;
1574 : }
1575 :
1576 : /* -------------------------------------------------------------------- - *
1577 : *ivas_smc_gmm()
1578 : *
1579 : *1st stage of the speech / music classification(based on the GMM model)
1580 : * -------------------------------------------------------------------- - */
1581 : /*! r: S/M decision (0=speech or noise,1=unclear,2=music) */
1582 1129000 : Word16 ivas_smc_gmm_fx(
1583 : Encoder_State *st, /* i/o: state structure */
1584 : STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */
1585 : const Word16 localVAD_HE_SAD, /* i : HE-SAD flag without hangover */
1586 : const Word16 Etot_fx, /* i : total frame energy */
1587 : const Word16 lsp_new_fx[M], /* i : LSPs in current frame Q15 */
1588 : const Word16 cor_map_sum_fx, /* i : correlation map sum (from multi-harmonic anal.) Q8 */
1589 : const Word32 epsP_fx[M + 1], /* i : LP prediciton error */
1590 : const Word32 PS_fx[], /* i : energy spectrum */
1591 : const Word16 non_sta_fx, /* i : unbound non-stationarity Q8 */
1592 : const Word16 relE_fx, /* i : relative frame energy Q8 */
1593 : Word16 *high_lpn_flag, /* i/o: sp/mus LPN flag */
1594 : const Word16 flag_spitch, /* i : flag to indicate very short stable pitch */
1595 : Word16 Qfact_PS,
1596 : Word16 Q_esp,
1597 : Word16 Qfact_PS_past )
1598 : {
1599 : Word16 i, m, dec;
1600 : Word16 flag_odv;
1601 : Word32 lps_fx, lpm_fx, lpn_fx;
1602 : Word32 ps_fx[N_SMC_MIXTURES], pm_fx[N_SMC_MIXTURES], pn_fx[N_SMC_MIXTURES];
1603 : Word64 wprob_fx;
1604 : Word32 fvm_fx[N_PCA_COEF];
1605 : Word32 sum_PS_fx, ps_diff_fx;
1606 : Word32 dlp_fx, wrelE_fx, wdrop_fx, wght_fx;
1607 : Word32 wrise_fx;
1608 : Word16 dlp_mean2var_fx;
1609 : Word16 dlp_mean2var_q;
1610 : Word32 FV_fx[N_SMC_FEATURES], *pFV_fx;
1611 : Word32 dPS_fx[128];
1612 : Word32 PS_norm_fx[128];
1613 : const Word32 *pODV_fx;
1614 : Word32 *pFV_st_fx;
1615 : Word16 relE_attack_flag, smc_st_mean_fact_fx;
1616 : Word16 j, len;
1617 : const Word32 *pt_mel_fb_fx;
1618 : Word32 melS_fx[NB_MEL_BANDS], mfcc_fx[NB_MEL_BANDS];
1619 : Word16 odv_cnt;
1620 : Word16 i_out[N_SMC_FEATURES], *p_out;
1621 : Word16 temp_exp;
1622 : Word16 Qfact_FV;
1623 : Word32 temp32, temp32_log;
1624 : Word32 temp32_log1, temp32_log2;
1625 : Word16 temp16;
1626 1129000 : Word16 dotp_exp = 0;
1627 1129000 : move16();
1628 : /*------------------------------------------------------------------*
1629 : * Initialization
1630 : *------------------------------------------------------------------*/
1631 :
1632 1129000 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
1633 : Word32 temp_sqrt, temp_acos;
1634 : /*------------------------------------------------------------------*
1635 : * State machine (sp_mus_state: -8 = INACTIVE, -7:-1 = UNSTABLE, 0:7 = ENTRY, 8 = STABLE )
1636 : *------------------------------------------------------------------*/
1637 :
1638 1129000 : IF( localVAD_HE_SAD )
1639 : {
1640 946842 : test();
1641 946842 : IF( LT_16( relE_fx, -5120 /*20 q8*/ ) )
1642 : {
1643 96526 : IF( hSpMusClas->sp_mus_state > 0 )
1644 : {
1645 10156 : if ( LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1646 : {
1647 : /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */
1648 2146 : hSpMusClas->inact_cnt = 0;
1649 2146 : move16();
1650 : }
1651 :
1652 : /* energy is too low -> we are going to instable state */
1653 10156 : hSpMusClas->sp_mus_state = 0;
1654 10156 : move16();
1655 : }
1656 86370 : ELSE IF( GT_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1657 : {
1658 : /* energy is still too low -> we are still in instable state */
1659 28316 : hSpMusClas->sp_mus_state = sub( hSpMusClas->sp_mus_state, 1 );
1660 : }
1661 : }
1662 850316 : ELSE IF( hSpMusClas->sp_mus_state <= 0 )
1663 : {
1664 20722 : IF( hSpMusClas->inact_cnt == 0 )
1665 : {
1666 :
1667 12848 : hSpMusClas->sp_mus_state = 1;
1668 12848 : move16();
1669 : }
1670 : ELSE
1671 : {
1672 :
1673 7874 : hSpMusClas->sp_mus_state = HANG_LEN;
1674 7874 : move16();
1675 : }
1676 :
1677 20722 : hSpMusClas->inact_cnt = 12;
1678 20722 : move16();
1679 : }
1680 829594 : ELSE IF( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
1681 : {
1682 : /* we are inside an entry period -> increment the counter of entry frames */
1683 60844 : hSpMusClas->sp_mus_state = add( hSpMusClas->sp_mus_state, 1 );
1684 : }
1685 :
1686 946842 : test();
1687 946842 : IF( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 )
1688 : {
1689 29327 : hSpMusClas->inact_cnt = sub( hSpMusClas->inact_cnt, 1 );
1690 29327 : move16();
1691 : }
1692 : }
1693 : ELSE
1694 : {
1695 182158 : test();
1696 182158 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1697 : {
1698 924 : hSpMusClas->inact_cnt = 0;
1699 924 : move16();
1700 : }
1701 181234 : ELSE IF( hSpMusClas->inact_cnt > 0 )
1702 : {
1703 22593 : hSpMusClas->inact_cnt = sub( hSpMusClas->inact_cnt, 1 );
1704 : }
1705 :
1706 182158 : test();
1707 182158 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
1708 : {
1709 924 : hSpMusClas->sp_mus_state = -HANG_LEN;
1710 924 : move16();
1711 : }
1712 181234 : ELSE IF( hSpMusClas->sp_mus_state > 0 )
1713 : {
1714 3250 : hSpMusClas->sp_mus_state = -1;
1715 3250 : move16();
1716 : }
1717 177984 : ELSE IF( GT_16( hSpMusClas->sp_mus_state, -HANG_LEN ) )
1718 : {
1719 : /* we are in inactive state */
1720 15062 : hSpMusClas->sp_mus_state = sub( hSpMusClas->sp_mus_state, 1 );
1721 : }
1722 : }
1723 :
1724 : /* detect attacks based on relE */
1725 1129000 : IF( GT_16( relE_fx, hSpMusClas->prev_relE_fx ) )
1726 : {
1727 473932 : hSpMusClas->relE_attack_sum_fx = add_sat( sub_sat( relE_fx, hSpMusClas->prev_relE_fx ), hSpMusClas->relE_attack_sum_fx ); /*q8*/
1728 473932 : move16();
1729 : }
1730 : ELSE
1731 : {
1732 655068 : hSpMusClas->relE_attack_sum_fx = 0; /*q8*/
1733 655068 : move16();
1734 : }
1735 1129000 : hSpMusClas->prev_relE_fx = relE_fx;
1736 1129000 : move16();
1737 1129000 : test();
1738 1129000 : test();
1739 1129000 : test();
1740 : /* update counter from last VAD 0->1 change */
1741 1129000 : IF( hSpMusClas->prev_vad == 0 && EQ_16( localVAD_HE_SAD, 1 ) )
1742 : {
1743 14768 : hSpMusClas->vad_0_1_cnt = 1;
1744 14768 : move16();
1745 : }
1746 1114232 : ELSE IF( EQ_16( localVAD_HE_SAD, 1 ) && hSpMusClas->vad_0_1_cnt > 0 && LT_16( hSpMusClas->vad_0_1_cnt, 50 ) )
1747 : {
1748 233381 : hSpMusClas->vad_0_1_cnt = add( hSpMusClas->vad_0_1_cnt, 1 );
1749 : }
1750 : ELSE
1751 : {
1752 880851 : hSpMusClas->vad_0_1_cnt = 0;
1753 880851 : move16();
1754 : }
1755 1129000 : hSpMusClas->prev_vad = localVAD_HE_SAD;
1756 1129000 : move16();
1757 1129000 : test();
1758 1129000 : test();
1759 1129000 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) && GT_16( hSpMusClas->relE_attack_sum_fx, 1280 /*q8*/ ) )
1760 : {
1761 21842 : hSpMusClas->relE_attack_cnt = add( hSpMusClas->relE_attack_cnt, 1 );
1762 :
1763 : /* set flag only in the first X frames in a series */
1764 21842 : IF( hSpMusClas->relE_attack_cnt > 0 && LT_16( hSpMusClas->relE_attack_cnt, 3 ) )
1765 : {
1766 15598 : relE_attack_flag = 1;
1767 : }
1768 : ELSE
1769 : {
1770 6244 : relE_attack_flag = 0;
1771 : }
1772 21842 : move16();
1773 : }
1774 : ELSE
1775 : {
1776 1107158 : hSpMusClas->relE_attack_cnt = 0;
1777 1107158 : move16();
1778 1107158 : relE_attack_flag = 0;
1779 1107158 : move16();
1780 : }
1781 :
1782 1129000 : hSpMusClas->prev_Etot_fx = Etot_fx;
1783 1129000 : move16();
1784 :
1785 : /*------------------------------------------------------------------*
1786 : * Preparation of the feature vector
1787 : *------------------------------------------------------------------*/
1788 :
1789 1129000 : pFV_fx = FV_fx;
1790 1129000 : test();
1791 1129000 : test();
1792 : /* [0] OL pitch */
1793 1129000 : IF( relE_attack_flag || EQ_16( st->tc_cnt, 1 ) || EQ_16( st->tc_cnt, 2 ) )
1794 : {
1795 112243 : *pFV_fx++ = L_shl( st->pitch[2], Q20 );
1796 : }
1797 : ELSE
1798 : {
1799 : // *pFV_fx++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f;
1800 1016757 : *pFV_fx++ = Mpy_32_32( L_shl( add( add( st->pitch[0], st->pitch[1] ), st->pitch[2] ), Q20 ), 715827883 );
1801 : }
1802 1129000 : move32();
1803 :
1804 1129000 : test();
1805 1129000 : test();
1806 : /* [1] voicing */
1807 1129000 : IF( relE_attack_flag || EQ_16( st->tc_cnt, 1 ) || EQ_16( st->tc_cnt, 2 ) )
1808 : {
1809 112243 : *pFV_fx++ = L_shl( st->voicing_fx[2], 5 ); /*q20*/
1810 : }
1811 : ELSE
1812 : {
1813 : // *pFV++ = ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f;
1814 1016757 : *pFV_fx++ = Mpy_32_32( L_shl( L_add( L_add( st->voicing_fx[0], st->voicing_fx[1] ), st->voicing_fx[2] ), Q5 ), 715827883 ); /*q20*/
1815 : }
1816 1129000 : move32();
1817 :
1818 1129000 : temp_exp = 1;
1819 1129000 : move16();
1820 1129000 : temp16 = lsp_new_fx[2];
1821 1129000 : move16();
1822 :
1823 1129000 : temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30
1824 1129000 : temp_sqrt = Sqrt32( temp32, &temp_exp );
1825 1129000 : temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp );
1826 1129000 : *pFV_fx++ = L_shl( temp_acos, Q7 ); // Q20
1827 1129000 : move32();
1828 1129000 : temp_exp = 1;
1829 1129000 : move16();
1830 1129000 : temp16 = lsp_new_fx[3];
1831 1129000 : move16();
1832 :
1833 1129000 : temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30
1834 1129000 : temp_sqrt = Sqrt32( temp32, &temp_exp );
1835 1129000 : temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp );
1836 1129000 : *pFV_fx++ = L_shl( temp_acos, Q7 ); // Q20
1837 1129000 : move32();
1838 1129000 : temp_exp = 1;
1839 1129000 : move16();
1840 1129000 : temp16 = lsp_new_fx[4];
1841 1129000 : move16();
1842 :
1843 1129000 : temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30
1844 1129000 : temp_sqrt = Sqrt32( temp32, &temp_exp );
1845 1129000 : temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp );
1846 1129000 : *pFV_fx++ = L_shl( temp_acos, Q7 ); // Q20
1847 1129000 : move32();
1848 1129000 : temp_exp = 1;
1849 1129000 : move16();
1850 1129000 : temp16 = lsp_new_fx[5];
1851 1129000 : move16();
1852 :
1853 1129000 : temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30
1854 1129000 : temp_sqrt = Sqrt32( temp32, &temp_exp );
1855 1129000 : temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp );
1856 1129000 : *pFV_fx++ = L_shl( temp_acos, Q7 ); // Q20
1857 1129000 : move32();
1858 1129000 : temp_exp = 1;
1859 1129000 : move16();
1860 1129000 : temp16 = lsp_new_fx[6];
1861 1129000 : move16();
1862 :
1863 :
1864 1129000 : temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30
1865 1129000 : temp_sqrt = Sqrt32( temp32, &temp_exp );
1866 1129000 : temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp );
1867 1129000 : *pFV_fx++ = L_shl( temp_acos, Q7 ); // Q20
1868 1129000 : move32();
1869 : // temf = acosf( lsp_new[2] );
1870 : /* [2,3,4,5,6] LSFs */
1871 : /* *pFV++ = acosf( lsp_new[2] );
1872 : *pFV++ = acosf( lsp_new[3] );
1873 : *pFV++ = acosf( lsp_new[4] );
1874 : *pFV++ = acosf( lsp_new[5] );
1875 : *pFV++ = acosf( lsp_new[6] );*/
1876 :
1877 : /* [7] cor_map_sum */
1878 1129000 : *pFV_fx++ = L_shl( cor_map_sum_fx, Q12 ); /*scaling from Q8 to Q20*/
1879 1129000 : move32();
1880 :
1881 : /* [8] non_sta */
1882 1129000 : *pFV_fx++ = L_shl( non_sta_fx, Q12 ); /*scaling from Q8 to Q20*/
1883 1129000 : move32();
1884 :
1885 : /* [9] epsP */
1886 1129000 : temp32 = L_add( epsP_fx[14], L_shr( 21475, sub( 31, Q_esp ) ) );
1887 1129000 : temp32_log = L_add( BASOP_Util_Log2( temp32 ), L_shl( sub( Q31, Q_esp ), Q25 ) );
1888 1129000 : temp32_log1 = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/
1889 :
1890 1129000 : temp32 = L_add( epsP_fx[0], L_shr( 21475, sub( 31, Q_esp ) ) );
1891 1129000 : temp32_log = L_add( BASOP_Util_Log2( temp32 ), L_shl( sub( Q31, Q_esp ), Q25 ) );
1892 1129000 : temp32_log2 = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/
1893 :
1894 1129000 : *pFV_fx++ = L_shr( L_sub( temp32_log1, temp32_log2 ), Q5 );
1895 1129000 : move32();
1896 : //*pFV++ = logf( epsP[14] + 1e-5f ) - logf( epsP[0] + 1e-5f );
1897 :
1898 : /* [10,11,12] MFCCs */
1899 1129000 : set_zero_fx( melS_fx, NB_MEL_BANDS );
1900 :
1901 1129000 : pt_mel_fb_fx = mel_fb_fx;
1902 :
1903 46289000 : FOR( i = 0; i < NB_MEL_BANDS; i++ )
1904 : {
1905 45160000 : j = mel_fb_start[i];
1906 45160000 : move16();
1907 45160000 : len = mel_fb_len[i];
1908 45160000 : move16();
1909 45160000 : temp32 = dotp_me_fx( &PS_fx[j], pt_mel_fb_fx, len, 31 - Qfact_PS, Q1, &dotp_exp );
1910 45160000 : IF( LT_16( dotp_exp, -17 ) ) /*-18 is exponent of 10737:to avoid overflow when left shifting 10737*/
1911 : {
1912 2745 : temp32 = L_shr( temp32, sub( -17, dotp_exp ) );
1913 2745 : dotp_exp = -17;
1914 2745 : move16();
1915 : }
1916 45160000 : temp32_log = L_add_sat( BASOP_Util_Log2( L_add_sat( L_shr( temp32, 1 ), L_shr( 10737 /*1e-5f q30*/, dotp_exp ) ) ), L_shl( add( dotp_exp, 1 ), Q25 ) );
1917 45160000 : temp32_log = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/
1918 45160000 : melS_fx[i] = temp32_log;
1919 45160000 : move32();
1920 : // melS[i] = logf( dotp( &PS[j], pt_mel_fb, len ) + 1e-5f );
1921 45160000 : pt_mel_fb_fx += len;
1922 : }
1923 :
1924 1129000 : Word16 guard_bits = find_guarded_bits_fx( NB_MEL_BANDS );
1925 1129000 : move16();
1926 1129000 : v_mult_mat_fixed( mfcc_fx, melS_fx, dct_mtx_fx, NB_MEL_BANDS, NB_MEL_COEF, guard_bits ); // Q19
1927 1129000 : *pFV_fx++ = L_shl( mfcc_fx[2], 1 ); // Q20
1928 1129000 : move32();
1929 1129000 : *pFV_fx++ = L_shl( mfcc_fx[6], 1 );
1930 1129000 : move32();
1931 1129000 : *pFV_fx++ = L_shl( mfcc_fx[12], 1 );
1932 1129000 : move32();
1933 : /* *pFV++ = mfcc[2];
1934 : *pFV++ = mfcc[6];
1935 : *pFV++ = mfcc[12];*/
1936 :
1937 : /* calculation of differential normalized power spectrum */
1938 1129000 : sum_PS_fx = 0;
1939 1129000 : move32();
1940 : Word16 q_temp32;
1941 1129000 : Word16 sum_PS_e = 0;
1942 1129000 : move16();
1943 1129000 : Word64 sum = W_shl( 21475 /* 1e-5 in Q31 */, sub( Qfact_PS, 30 ) ); // Qfact_PS+1
1944 1129000 : move64();
1945 76772000 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
1946 : {
1947 75643000 : sum = W_mac_32_32( sum, PS_fx[i], 1 ); // Qfact_PS+1
1948 : }
1949 1129000 : IF( sum == 0 )
1950 : {
1951 0 : sum_PS_fx = 1407374884; // 1e-5 in Q47
1952 0 : move32();
1953 0 : sum_PS_e = -16;
1954 0 : move16();
1955 : }
1956 : ELSE
1957 : {
1958 1129000 : sum_PS_e = W_norm( sum );
1959 1129000 : sum_PS_fx = W_extract_h( W_shl( sum, sum_PS_e ) ); // Qfact_PS+1+sum_PS_e-32
1960 1129000 : sum_PS_e = sub( 62, add( Qfact_PS, sum_PS_e ) ); // 31-(Qfact_PS+1+sum_PS_e-32)
1961 : }
1962 :
1963 76772000 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
1964 : {
1965 75643000 : temp32 = BASOP_Util_Divide3232_Scale_newton( PS_fx[i], sum_PS_fx, &temp_exp ); // 31-temp_exp
1966 75643000 : q_temp32 = add( sub( 31, temp_exp ), sub( Qfact_PS, sub( 31, sum_PS_e ) ) );
1967 75643000 : test();
1968 75643000 : if ( temp32 == 0 )
1969 : {
1970 281 : q_temp32 = 31;
1971 281 : move16();
1972 : }
1973 75643000 : IF( LT_16( q_temp32, 31 ) && EQ_32( temp32, L_shl( 1, q_temp32 ) ) )
1974 : {
1975 0 : temp32 = ONE_IN_Q31;
1976 0 : move32();
1977 0 : q_temp32 = Q31;
1978 0 : move16();
1979 : }
1980 75643000 : PS_norm_fx[i] = L_shl( temp32, sub( Qfact_PS_past, q_temp32 ) ); // Qfact_PS_past
1981 75643000 : move32();
1982 75643000 : dPS_fx[i] = L_abs( L_sub( PS_norm_fx[i], hSpMusClas->past_PS_fx[i - LOWEST_FBIN] ) );
1983 75643000 : move32();
1984 : }
1985 :
1986 : /* [13] ps_diff (spectral difference) */
1987 1129000 : ps_diff_fx = 0;
1988 1129000 : move32();
1989 76772000 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
1990 : {
1991 75643000 : ps_diff_fx = L_add( L_shr( dPS_fx[i], Q7 ), ps_diff_fx ); // Qfact_PS_past-7
1992 : }
1993 :
1994 1129000 : *pFV_fx++ = L_shr( ps_diff_fx, sub( sub( Qfact_PS_past, Q7 ), Q20 ) ); /// ps_diff;
1995 1129000 : move32();
1996 :
1997 : /* [14] ps_sta (spectral stationarity) */
1998 1129000 : Word32 ps_sta_fx = 0;
1999 1129000 : move32();
2000 1129000 : Word16 ps_sta_exp = 0;
2001 1129000 : move16();
2002 : Word32 avoid_divide_by_zero;
2003 1129000 : avoid_divide_by_zero = L_shr( 21475, sub( 31, Qfact_PS_past ) ); // 21475 = 1e-5 in Q31
2004 :
2005 76772000 : FOR( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
2006 : {
2007 : Word32 tmp_max;
2008 75643000 : tmp_max = L_max( PS_norm_fx[i], hSpMusClas->past_PS_fx[i - LOWEST_FBIN] );
2009 : /* Saturation doesn't have a significant impact here, as a value of 1e-5 in Q31 format is added to prevent division by zero */
2010 75643000 : temp32 = BASOP_Util_Divide3232_Scale_newton( tmp_max, L_add_sat( dPS_fx[i], avoid_divide_by_zero ), &temp_exp ); // 31-temp_exp
2011 75643000 : ps_sta_fx = BASOP_Util_Add_Mant32Exp( temp32, temp_exp, ps_sta_fx, ps_sta_exp, &ps_sta_exp );
2012 : }
2013 1129000 : temp32_log = L_add( BASOP_Util_Log2( L_add_sat( ps_sta_fx, L_shr( 21475, ps_sta_exp ) ) ), L_shl( ps_sta_exp, Q25 ) );
2014 1129000 : temp32_log = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/
2015 1129000 : *pFV_fx++ = L_shr( temp32_log, Q5 ); // logf( ps_sta + 1e-5f );
2016 1129000 : move32();
2017 1129000 : MVR2R_WORD32( &PS_norm_fx[LOWEST_FBIN], hSpMusClas->past_PS_fx, HIGHEST_FBIN - LOWEST_FBIN );
2018 :
2019 : /* save ps_diff and ps_sta features for XTALK and UNCLR classifier */
2020 1129000 : IF( hStereoClassif != NULL )
2021 : {
2022 760867 : IF( st->idchan == 0 )
2023 : {
2024 410255 : hStereoClassif->ps_diff_ch1_fx = ps_diff_fx; // Qfact_PS_past - 7
2025 410255 : hStereoClassif->ps_diff_ch1_e = sub( 38, Qfact_PS_past ); // Qfact_PS_past - 7
2026 410255 : hStereoClassif->ps_sta_ch1_fx = temp32_log; // logf( ps_sta + 1e-5f );Q25
2027 410255 : hStereoClassif->ps_sta_ch1_e = 6; // logf( ps_sta + 1e-5f );Q25
2028 : }
2029 : ELSE
2030 : {
2031 350612 : hStereoClassif->ps_diff_ch2_fx = ps_diff_fx;
2032 350612 : hStereoClassif->ps_diff_ch2_e = sub( 38, Qfact_PS_past );
2033 350612 : hStereoClassif->ps_sta_ch2_fx = temp32_log; // logf( ps_sta + 1e-5f );Q25
2034 350612 : hStereoClassif->ps_sta_ch2_e = 6; // logf( ps_sta + 1e-5f );Q25
2035 : }
2036 760867 : move32();
2037 760867 : move16();
2038 760867 : move32();
2039 760867 : move16();
2040 : }
2041 :
2042 : /*------------------------------------------------------------------*
2043 : * Outlier detection based on feature histograms
2044 : *------------------------------------------------------------------*/
2045 1129000 : flag_odv = 0;
2046 1129000 : move16();
2047 1129000 : IF( localVAD_HE_SAD )
2048 : {
2049 946842 : pFV_fx = FV_fx;
2050 946842 : pODV_fx = hout_intervals_fx;
2051 946842 : p_out = i_out;
2052 946842 : odv_cnt = 0;
2053 946842 : move16();
2054 15149472 : FOR( i = 0; i < N_SMC_FEATURES; i++ )
2055 : {
2056 14202630 : test();
2057 14202630 : IF( LT_32( *pFV_fx, pODV_fx[0] ) || GT_32( *pFV_fx, pODV_fx[1] ) )
2058 : {
2059 1969 : *p_out++ = i;
2060 1969 : odv_cnt = add( odv_cnt, 1 );
2061 : }
2062 :
2063 14202630 : pFV_fx++;
2064 14202630 : pODV_fx += 2;
2065 : }
2066 :
2067 : /* set outlier flag */
2068 946842 : IF( GE_16( odv_cnt, 2 ) )
2069 : {
2070 380 : flag_odv = 1;
2071 380 : move16();
2072 : /* replace outlying features with values from the previous frame */
2073 1343 : FOR( i = 0; i < odv_cnt; i++ )
2074 : {
2075 963 : FV_fx[i_out[i]] = hSpMusClas->prev_FV_fx[i_out[i]];
2076 963 : move32();
2077 : }
2078 : }
2079 : }
2080 :
2081 : /*------------------------------------------------------------------*
2082 : * Adaptive short-term mean filter on feature vector
2083 : *------------------------------------------------------------------*/
2084 1129000 : Qfact_FV = 20;
2085 1129000 : move16();
2086 1129000 : pFV_fx = FV_fx;
2087 1129000 : pFV_st_fx = hSpMusClas->FV_st_fx;
2088 1129000 : smc_st_mean_fact_fx = SMC_ST_MEAN_RSHIFT_FACT_FX;
2089 1129000 : move16();
2090 18064000 : FOR( i = 0; i < N_SMC_FEATURES; i++ )
2091 : {
2092 : //*pFV_st = smc_st_mean_fact * ( *pFV_st ) + ( 1 - smc_st_mean_fact ) * ( *pFV );
2093 16935000 : *pFV_st_fx = L_add( L_shr( *pFV_st_fx, smc_st_mean_fact_fx ), L_shr( *pFV_fx, 1 ) );
2094 16935000 : move32();
2095 :
2096 16935000 : test();
2097 16935000 : test();
2098 16935000 : test();
2099 16935000 : test();
2100 16935000 : test();
2101 16935000 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) && ( relE_attack_flag || flag_odv ) )
2102 : {
2103 : /* strong attack or outlier frame during entry state -> features cannot be trusted but there is also no useful past info -> */
2104 : /* -> do whatever you want because dlp will be reset to 0 anyway */
2105 234015 : pFV_fx++;
2106 234015 : pFV_st_fx++;
2107 : }
2108 16700985 : ELSE IF( hSpMusClas->sp_mus_state == HANG_LEN && ( EQ_16( st->tc_cnt, 1 ) || EQ_16( st->tc_cnt, 2 ) ) )
2109 : {
2110 : /* energy attack in stable state -> use current features intead of the long-term average */
2111 1328460 : pFV_fx++;
2112 1328460 : pFV_st_fx++;
2113 : }
2114 : ELSE
2115 : {
2116 15372525 : *pFV_fx++ = *pFV_st_fx++;
2117 15372525 : move32();
2118 : }
2119 : }
2120 :
2121 : /* update */
2122 1129000 : MVR2R_WORD32( FV_fx, hSpMusClas->prev_FV_fx, N_SMC_FEATURES );
2123 : /*------------------------------------------------------------------*
2124 : * Non-linear power transformation (boxcox) on certain features
2125 : *------------------------------------------------------------------*/
2126 1129000 : pFV_fx = FV_fx;
2127 18064000 : FOR( i = 0; i < N_SMC_FEATURES; i++ )
2128 : {
2129 16935000 : IF( bcox_lmbd_fx[i] != 0 )
2130 : {
2131 3387000 : *pFV_fx = L_sub( *pFV_fx, L_shr( bcox_add_cnst_fx[i], sub( 31, Qfact_FV ) ) );
2132 3387000 : move32();
2133 3387000 : IF( LT_32( *pFV_fx, L_shl( 1, Qfact_FV ) ) )
2134 : {
2135 119593 : *pFV_fx = L_shl( 1, Qfact_FV );
2136 119593 : move32();
2137 : }
2138 3387000 : Word16 pow_e = 0;
2139 3387000 : move32();
2140 3387000 : temp32_log = L_add( BASOP_Util_Log2( *pFV_fx ), L_shl( sub( 31, Qfact_FV ), Q25 ) ); // Q25
2141 3387000 : temp32 = Mpy_32_32( temp32_log, bcox_lmbd_fx[i] ); // Q25
2142 3387000 : Word32 pow_temp = BASOP_util_Pow2( temp32, 31 - Q25, &pow_e );
2143 3387000 : IF( pow_e <= 0 )
2144 : {
2145 1006031 : pow_temp = L_shr( pow_temp, sub( 1, pow_e ) );
2146 1006031 : pow_e = add( pow_e, sub( 1, pow_e ) );
2147 : }
2148 3387000 : temp32 = L_sub( pow_temp, L_shl( 1, 31 - pow_e ) );
2149 3387000 : temp_exp = 0;
2150 3387000 : move32();
2151 3387000 : temp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( temp32, bcox_lmbd_fx[i], &temp_exp ) );
2152 3387000 : *pFV_fx = L_shl( temp32, sub( Qfact_FV, sub( 31, add( temp_exp, pow_e ) ) ) );
2153 3387000 : move32();
2154 : // float temp = powf( *pFV, bcox_lmbd[i] );
2155 : // *pFV = ( powf( *pFV, bcox_lmbd[i] ) - 1 ) / bcox_lmbd[i];
2156 : }
2157 :
2158 16935000 : pFV_fx++;
2159 : }
2160 :
2161 : /*------------------------------------------------------------------*
2162 : * Scaling of the feature vector
2163 : * PCA
2164 : *------------------------------------------------------------------*/
2165 :
2166 1129000 : pFV_fx = FV_fx;
2167 18064000 : FOR( i = 0; i < N_SMC_FEATURES; i++ )
2168 : {
2169 : /* Standard scaler - mean and variance normalization */
2170 : // *pFV = ( *pFV - sm_means[i] ) / sm_scale[i];
2171 16935000 : temp32 = L_sub( *pFV_fx, sm_means_fx[i] );
2172 16935000 : temp_exp = 0;
2173 16935000 : move16();
2174 16935000 : temp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( temp32, sm_scale_fx[i], &temp_exp ) );
2175 : // *pFV_fx = L_shl( temp32, Qfact_FV - ( 31 - temp_exp ) );
2176 16935000 : *pFV_fx = L_shl( temp32, sub( Qfact_FV, sub( 31, temp_exp ) ) );
2177 16935000 : move32();
2178 16935000 : pFV_fx++;
2179 : /* MinMax sclaer - mean and variance normalization */
2180 : /**pFV = *pFV * sm_scale[i] + sm_min[i];*/
2181 : }
2182 :
2183 : /* PCA */
2184 : #ifdef VEC_ARITH_OPT_v1
2185 1129000 : v_sub_fixed_no_hdrm( FV_fx, pca_mean_fx, FV_fx, N_SMC_FEATURES );
2186 : #else /* VEC_ARITH_OPT_v1 */
2187 : v_sub_fixed( FV_fx, pca_mean_fx, FV_fx, N_SMC_FEATURES, 0 );
2188 : #endif /* VEC_ARITH_OPT_v1 */
2189 1129000 : v_mult_mat_fixed( FV_fx, FV_fx, pca_components_fx, N_SMC_FEATURES, N_PCA_COEF, 0 );
2190 : /*------------------------------------------------------------------*
2191 : * Calculation of posterior probability
2192 : * Log-probability
2193 : *------------------------------------------------------------------*/
2194 :
2195 : /* run loop for all mixtures (for each mixture, calculate the probability of speech, music and noise) */
2196 1129000 : lps_fx = lpm_fx = lpn_fx = 0;
2197 1129000 : move32();
2198 1129000 : move32();
2199 1129000 : move32();
2200 :
2201 7903000 : FOR( m = 0; m < N_SMC_MIXTURES; m++ )
2202 : {
2203 6774000 : v_sub32_fx( FV_fx, &means_speech_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
2204 6774000 : wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10
2205 6774000 : ps_fx[m] = L_sub( L_sub( L_add( log_weights_speech_compute[m], log_det_chol_speech_fx[m] ), W_extract_l( W_shr( wprob_fx, Q10 ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
2206 6774000 : move32();
2207 6774000 : v_sub32_fx( FV_fx, &means_music_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
2208 6774000 : wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10
2209 6774000 : pm_fx[m] = L_sub( L_sub( L_add( log_weights_music_compute[m], log_det_chol_music_fx[m] ), W_extract_l( W_shr( wprob_fx, Q10 ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
2210 6774000 : move32();
2211 6774000 : v_sub32_fx( FV_fx, &means_noise_fx[m * N_PCA_COEF], fvm_fx, N_PCA_COEF );
2212 6774000 : wprob_fx = dot_product_cholesky_fixed( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); // Q10
2213 6774000 : pn_fx[m] = L_sub( L_sub( L_add( log_weights_noise_compute[m], log_det_chol_noise_fx[m] ), W_extract_l( W_shr( wprob_fx, Q10 ) ) ), HALF_N_PCA_COEF_LOG_P12_Q18 ); // Q18
2214 6774000 : move32();
2215 : }
2216 :
2217 1129000 : lps_fx = logsumexp_fx( ps_fx, 31 - Q18, N_SMC_MIXTURES );
2218 1129000 : lpm_fx = logsumexp_fx( pm_fx, 31 - Q18, N_SMC_MIXTURES );
2219 1129000 : lpn_fx = logsumexp_fx( pn_fx, 31 - Q18, N_SMC_MIXTURES );
2220 1129000 : *high_lpn_flag = 0;
2221 1129000 : move16();
2222 1129000 : if ( GT_32( lpn_fx, lps_fx ) && GT_32( lpn_fx, lpm_fx ) )
2223 : {
2224 140556 : *high_lpn_flag = 1;
2225 140556 : move32();
2226 : }
2227 1129000 : hSpMusClas->lpm_fx = extract_h( L_shl_sat( lpm_fx, 16 - 11 ) ); // Q7
2228 1129000 : move16();
2229 1129000 : hSpMusClas->lps_fx = extract_h( L_shl_sat( lps_fx, 16 - 11 ) ); // Q7
2230 1129000 : move16();
2231 1129000 : hSpMusClas->lpn_fx = extract_h( L_shl_sat( lpn_fx, 16 - 11 ) ); // Q7
2232 1129000 : move16();
2233 : /* determine HQ Generic speech class */
2234 1129000 : IF( st->hHQ_core != NULL )
2235 : {
2236 421155 : IF( GT_32( lps_fx, L_add( lpm_fx, ONE_IN_Q17 ) ) )
2237 : {
2238 159278 : st->hHQ_core->hq_generic_speech_class = 1;
2239 : }
2240 : ELSE
2241 : {
2242 261877 : st->hHQ_core->hq_generic_speech_class = 0;
2243 : }
2244 421155 : move16();
2245 : }
2246 :
2247 : /*------------------------------------------------------------------*
2248 : * Decision without hangover
2249 : * Weighted decision
2250 : *------------------------------------------------------------------*/
2251 1129000 : test();
2252 1129000 : test();
2253 1129000 : test();
2254 1129000 : test();
2255 1129000 : test();
2256 : /* decision without hangover (0 - speech/noise, 1 - music) */
2257 1129000 : IF( !localVAD_HE_SAD || LT_16( Etot_fx, 2560 ) || ( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) && ( relE_attack_flag || flag_odv ) ) )
2258 : {
2259 220438 : dlp_fx = 0;
2260 220438 : move32();
2261 : }
2262 : ELSE
2263 : {
2264 908562 : dlp_fx = L_add( L_sub( lpm_fx, lps_fx ), DLP_BIAS_FX );
2265 908562 : dlp_fx = L_shl( dlp_fx, 1 ); // Q19
2266 :
2267 908562 : IF( GT_32( dlp_fx, 15728640 ) ) /*30.0f in Q19*/
2268 : {
2269 31507 : dlp_fx = 15728640;
2270 : }
2271 877055 : ELSE IF( LT_32( dlp_fx, -15728640 ) )
2272 : {
2273 0 : dlp_fx = -15728640;
2274 : }
2275 908562 : move32();
2276 : }
2277 :
2278 1129000 : dec = (Word16) GT_32( dlp_fx, 0 );
2279 1129000 : move16();
2280 : /* calculate weight based on relE (higher relE -> lower weight, lower relE -> higher weight) */
2281 :
2282 1129000 : wrelE_fx = lin_interp32_fx( L_deposit_h( relE_fx ), 15 << 24, 1932735283 /*0.9 in Q31*/, -( 15 << 24 ), 2126008812 /*0.99 in Q31*/, 1 ); // Q31
2283 : /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */
2284 : // hSpMusClas->dlp_mean_ST = 0.8f * hSpMusClas->dlp_mean_ST + 0.2f * dlp;
2285 1129000 : hSpMusClas->dlp_mean_ST_fx = L_add( Mpy_32_32( 1717986918, hSpMusClas->dlp_mean_ST_fx ), Mpy_32_32( 429496729, dlp_fx ) );
2286 1129000 : hSpMusClas->lt_dec_thres_fx = extract_l( L_shr( hSpMusClas->dlp_mean_ST_fx, 10 ) );
2287 1129000 : test();
2288 1129000 : IF( dlp_fx < 0 && LT_32( dlp_fx, hSpMusClas->dlp_mean_ST_fx ) )
2289 : {
2290 252785 : IF( hSpMusClas->dlp_mean_ST_fx > 0 )
2291 : {
2292 74743 : hSpMusClas->wdrop_32fx = L_negate( dlp_fx ); // Q19
2293 74743 : move32();
2294 : }
2295 178042 : ELSE IF( hSpMusClas->wdrop_32fx > 0 )
2296 : {
2297 41885 : hSpMusClas->wdrop_32fx = L_add( hSpMusClas->wdrop_32fx, L_sub( hSpMusClas->dlp_mean_ST_fx, dlp_fx ) );
2298 41885 : move32();
2299 : }
2300 252785 : move16();
2301 : }
2302 : ELSE
2303 : {
2304 876215 : hSpMusClas->wdrop_32fx = 0;
2305 876215 : move32();
2306 : }
2307 1129000 : wdrop_fx = lin_interp32_fx( hSpMusClas->wdrop_32fx, 7864320, 1503238554 /* 0.7 in Q31 */, 0, ONE_IN_Q31 /* 1.0f in Q31 */, 1 ); /* Q31 */
2308 :
2309 1129000 : test();
2310 1129000 : test();
2311 : /* calculate weight based on rises of dlp (close to 1 during sudden rise of dlp, close to 0 otherwise) */
2312 1129000 : IF( EQ_16( hSpMusClas->sp_mus_state, HANG_LEN ) && hSpMusClas->dlp_mean_ST_fx > 0 && GT_32( hSpMusClas->dlp_mean_ST_fx, hSpMusClas->past_dlp_mean_ST_fx[0] ) )
2313 : {
2314 244772 : IF( hSpMusClas->past_dlp_mean_ST_fx[0] < 0 )
2315 : {
2316 13883 : hSpMusClas->wrise_fx = extract_l( L_shr( hSpMusClas->dlp_mean_ST_fx, 10 ) );
2317 : }
2318 230889 : ELSE IF( hSpMusClas->wrise_fx > 0 )
2319 : {
2320 34765 : hSpMusClas->wrise_fx = add( hSpMusClas->wrise_fx, extract_l( L_shr( L_sub( hSpMusClas->dlp_mean_ST_fx, hSpMusClas->past_dlp_mean_ST_fx[0] ), 10 ) ) );
2321 : }
2322 244772 : move16();
2323 : }
2324 : ELSE
2325 : {
2326 884228 : hSpMusClas->wrise_fx = 0;
2327 884228 : move16();
2328 : }
2329 :
2330 :
2331 1129000 : wrise_fx = lin_interp32_fx( L_deposit_h( hSpMusClas->wrise_fx ), 167772160, 2040109466 /* 0.95 in Q31 */, 0, ONE_IN_Q31 /* 1.0f in Q31 */, 1 ); /* Q31 */
2332 : /* combine weights into one */
2333 : // wght = wrelE * wdrop * wrise;
2334 1129000 : wght_fx = Mpy_32_32( Mpy_32_32( wrelE_fx, wdrop_fx ), wrise_fx ); /* Q31 */
2335 1129000 : test();
2336 : /* ratio of delta means vs. delta variances */
2337 1129000 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
2338 : {
2339 :
2340 66434 : hSpMusClas->dlp_mean_LT_fx = dlp_fx;
2341 66434 : move32();
2342 66434 : hSpMusClas->dlp_var_LT_fx = 0;
2343 66434 : move32();
2344 : }
2345 :
2346 1129000 : hSpMusClas->dlp_mean_LT_fx = L_add( Mpy_32_32( 1932735283, hSpMusClas->dlp_mean_LT_fx ), Mpy_32_32( 214748365, dlp_fx ) ); // Q19
2347 :
2348 1129000 : temp32 = L_sub( dlp_fx, hSpMusClas->dlp_mean_LT_fx );
2349 1129000 : temp32 = W_extract_l( W_shr( W_mult0_32_32( temp32, temp32 ), 19 ) ); /*q19*/
2350 1129000 : hSpMusClas->dlp_var_LT_fx = L_add( Mpy_32_32( 1932735283, hSpMusClas->dlp_var_LT_fx ), Mpy_32_32( 214748365, temp32 ) );
2351 :
2352 1129000 : test();
2353 1129000 : IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
2354 : {
2355 66434 : dlp_mean2var_fx = 0;
2356 66434 : move16();
2357 66434 : dlp_mean2var_q = 0;
2358 66434 : move16();
2359 : }
2360 : ELSE
2361 : {
2362 1062566 : temp_exp = sub( Q31, Q19 );
2363 1062566 : Word16 div_e = 0;
2364 1062566 : move16();
2365 1062566 : temp_sqrt = Sqrt32( L_abs( hSpMusClas->dlp_var_LT_fx ), &temp_exp );
2366 1062566 : IF( temp_exp < 0 )
2367 : {
2368 59903 : temp_sqrt = L_shl( temp_sqrt, temp_exp );
2369 59903 : temp_exp = 0;
2370 59903 : move16();
2371 : }
2372 1062566 : temp_sqrt = L_shr( temp_sqrt, 1 ); /*adding 1 as guard bit to avoid overflow in addition*/
2373 1062566 : temp_exp = add( temp_exp, 1 );
2374 1062566 : temp_sqrt = L_add( temp_sqrt, L_shl( 1, sub( 31, temp_exp ) ) );
2375 1062566 : dlp_mean2var_fx = BASOP_Util_Divide3232_Scale( L_abs( hSpMusClas->dlp_mean_LT_fx ), temp_sqrt, &div_e );
2376 1062566 : dlp_mean2var_q = sub( add( Q3, temp_exp ), div_e ); // 15-div_e+Q19 -(31-temp_exp)
2377 1062566 : IF( GT_16( dlp_mean2var_q, 26 ) )
2378 : {
2379 17100 : dlp_mean2var_fx = shl( dlp_mean2var_fx, sub( 26, dlp_mean2var_q ) );
2380 17100 : dlp_mean2var_q = 26;
2381 17100 : move16();
2382 : }
2383 : }
2384 :
2385 1129000 : IF( GT_32( L_deposit_l( dlp_mean2var_fx ), L_shl( 15, dlp_mean2var_q ) ) )
2386 : {
2387 : /* decrease the weight little bit when the classifier indicates "strong speech" or "strong music" */
2388 2856 : wght_fx = Mpy_32_32( wght_fx, 1932735283 /* 0.9f in Q31 */ ); /* Q31 */
2389 : }
2390 :
2391 1129000 : IF( GT_32( wght_fx, ONE_IN_Q31 ) )
2392 : {
2393 0 : wght_fx = ONE_IN_Q31; /* 1.0f in Q31 */
2394 : }
2395 1129000 : ELSE IF( LT_32( wght_fx, 21474836 /* 0.01f in Q31 */ ) )
2396 : {
2397 0 : wght_fx = 21474836; /* 0.01f in Q31 */
2398 : }
2399 1129000 : move32();
2400 1129000 : if ( LT_16( Etot_fx, 2560 /* 10f in Q8 */ ) )
2401 : {
2402 : /* silence */
2403 142469 : wght_fx = 1975684956; /* 0.92f in Q31 */
2404 142469 : move32();
2405 : }
2406 :
2407 : /* calculate weighted decision */
2408 : // hSpMusClas->wdlp_0_95_sp = wght * hSpMusClas->wdlp_0_95_sp + ( 1 - wght ) * dlp;
2409 1129000 : hSpMusClas->wdlp_0_95_sp_32fx = L_add( Mpy_32_32( wght_fx, hSpMusClas->wdlp_0_95_sp_32fx /*q24*/ ), Mpy_32_32( L_sub( ONE_IN_Q31, wght_fx ), L_shl( dlp_fx /*q19*/, 5 ) ) ); // Q24
2410 1129000 : move32();
2411 :
2412 : /* xtalk classifier: apply long hysteresis to prevent LRTD on music */
2413 :
2414 1129000 : hSpMusClas->wdlp_xtalk_fx = Madd_32_32( Mpy_32_32( 2136746230 /* 0.995f in Q31*/, hSpMusClas->wdlp_xtalk_fx /* Q25*/ ), 687194767 /* 0.005f in Q37 */, dlp_fx /* Q19*/ ); // Q25
2415 1129000 : move32();
2416 :
2417 : /*------------------------------------------------------------------*
2418 : * Final speech/music decision
2419 : *------------------------------------------------------------------*/
2420 :
2421 1129000 : IF( flag_spitch )
2422 : {
2423 37898 : hSpMusClas->flag_spitch_cnt = 5;
2424 37898 : move16();
2425 : }
2426 1091102 : ELSE IF( hSpMusClas->flag_spitch_cnt > 0 )
2427 : {
2428 5664 : hSpMusClas->flag_spitch_cnt = sub( hSpMusClas->flag_spitch_cnt, 1 );
2429 5664 : move16();
2430 : }
2431 1129000 : test();
2432 1129000 : IF( Etot_fx < 2560 )
2433 : {
2434 : /* silence */
2435 142469 : dec = 0;
2436 142469 : move16();
2437 : }
2438 986531 : ELSE IF( hSpMusClas->sp_mus_state > 0 && LT_16( hSpMusClas->sp_mus_state, HANG_LEN ) )
2439 : {
2440 66434 : temp32 = L_mult( w_spmus_fx[hSpMusClas->sp_mus_state - 1][0], (Word16) L_shr( dlp_fx, 10 ) ); /*Q25 */
2441 66434 : temp32 = L_add( temp32, Dot_product( &w_spmus_fx[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp_fx, sub( HANG_LEN, 1 ) ) );
2442 : /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */
2443 66434 : IF( GT_32( temp32, 2 << 25 ) )
2444 : {
2445 32664 : IF( GT_32( dlp_fx, 2 << 19 ) )
2446 : {
2447 22769 : dec = 2;
2448 : }
2449 : ELSE
2450 : {
2451 9895 : dec = 1;
2452 : }
2453 : }
2454 : ELSE
2455 : {
2456 33770 : dec = 0;
2457 : }
2458 66434 : move16();
2459 : }
2460 : ELSE
2461 : {
2462 920097 : test();
2463 920097 : test();
2464 920097 : test();
2465 920097 : test();
2466 920097 : test();
2467 920097 : test();
2468 920097 : test();
2469 920097 : test();
2470 920097 : test();
2471 920097 : test();
2472 920097 : test();
2473 920097 : test();
2474 920097 : test();
2475 920097 : test();
2476 : /* stable active state */
2477 920097 : IF( hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 &&
2478 : ( ( hSpMusClas->flag_spitch_cnt > 0 && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 57042534 /*3.4*(2^24)*/ ) ) || ( hSpMusClas->flag_spitch_cnt == 0 && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 35232154 /*2.1*(2^24)*/ ) ) ) )
2479 : {
2480 : /* switching from speech to unclear */
2481 1784 : dec = 1;
2482 : }
2483 918313 : ELSE IF( hSpMusClas->past_dec[0] == 0 && LT_16( hSpMusClas->vad_0_1_cnt, 50 ) && hSpMusClas->relE_attack_sum_fx == 0 && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 1 << 24 ) )
2484 : {
2485 : /* switch from speech to unclear also during slowly rising weak music onsets */
2486 3407 : dec = 1;
2487 : }
2488 914906 : ELSE IF( EQ_16( hSpMusClas->past_dec[0], 1 ) && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 41943040 /*2.5*2^24*/ ) )
2489 : {
2490 : /* switching from unclear to music */
2491 4081 : dec = 2;
2492 : }
2493 910825 : ELSE IF( EQ_16( hSpMusClas->past_dec[0], 2 ) && EQ_16( hSpMusClas->past_dec[1], 2 ) && EQ_16( hSpMusClas->past_dec[2], 2 ) && LT_32( hSpMusClas->wdlp_0_95_sp_32fx, -( 1 << 24 ) ) )
2494 : {
2495 : /* switching from music to unclear */
2496 2411 : dec = 1;
2497 : }
2498 908414 : ELSE IF( EQ_16( hSpMusClas->past_dec[0], 1 ) && LT_32( hSpMusClas->wdlp_0_95_sp_32fx, -( 41943040 /*2.5*2^24*/ ) ) )
2499 : {
2500 : /* switching from unclear to speech */
2501 2494 : dec = 0;
2502 : }
2503 : ELSE
2504 : {
2505 905920 : dec = hSpMusClas->past_dec[0];
2506 : }
2507 920097 : move16();
2508 : }
2509 :
2510 : /*------------------------------------------------------------------*
2511 : * raw S/M decision based on smoothed GMM score
2512 : *------------------------------------------------------------------*/
2513 1129000 : test();
2514 1129000 : IF( dec == 0 || st->hSpMusClas->wdlp_0_95_sp_32fx <= 0 )
2515 : {
2516 691636 : st->sp_aud_decision0 = 0;
2517 691636 : st->sp_aud_decision1 = 0;
2518 : }
2519 : ELSE
2520 : {
2521 437364 : st->sp_aud_decision0 = 1;
2522 437364 : st->sp_aud_decision1 = 1;
2523 : }
2524 1129000 : move16();
2525 1129000 : move16();
2526 : /*------------------------------------------------------------------*
2527 : * Updates
2528 : *------------------------------------------------------------------*/
2529 :
2530 : /* update buffer of past non-binary decisions */
2531 1129000 : Copy( &hSpMusClas->past_dlp_fx[0], &hSpMusClas->past_dlp_fx[1], HANG_LEN - 2 );
2532 1129000 : hSpMusClas->past_dlp_fx[0] = extract_l( L_shr( dlp_fx, 10 ) );
2533 1129000 : move16();
2534 :
2535 1129000 : Copy32( &hSpMusClas->past_dlp_mean_ST_fx[0], &hSpMusClas->past_dlp_mean_ST_fx[1], HANG_LEN - 2 );
2536 1129000 : hSpMusClas->past_dlp_mean_ST_fx[0] = hSpMusClas->dlp_mean_ST_fx;
2537 1129000 : move32();
2538 :
2539 : /* update buffer of past binary decisions */
2540 1129000 : mvs2s( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 );
2541 1129000 : hSpMusClas->past_dec[0] = dec;
2542 1129000 : move16();
2543 : #ifdef DEBUG_MODE_INFO
2544 : dbgwrite( &st->hSpMusClas->wdlp_0_95_sp_32fx, sizeof( Word32 ), 1, 1, "res/wdlp_0_95_sp.x" );
2545 : #endif
2546 :
2547 1129000 : return dec;
2548 : }
2549 :
2550 : /*---------------------------------------------------------------------*
2551 : * var_cor_calc_ivas_fx()
2552 : *
2553 : * Calculate variance of correlation
2554 : *---------------------------------------------------------------------*/
2555 :
2556 413813 : static void var_cor_calc_ivas_fx(
2557 : const Word16 old_corr, /* Q15 */
2558 : Word16 *mold_corr, /* Q15 */
2559 : Word16 var_cor_t[], /* Q11 */
2560 : Word16 *high_stable_cor )
2561 : {
2562 : Word16 i, var_cor;
2563 :
2564 : /* update buffer of old correlation values */
2565 4138130 : FOR( i = VAR_COR_LEN - 1; i > 0; i-- )
2566 : {
2567 3724317 : var_cor_t[i] = var_cor_t[i - 1]; /*Q11*/
2568 3724317 : move16();
2569 : }
2570 413813 : var_cor_t[i] = shr( old_corr, 4 ); /* Q11 */
2571 413813 : move16();
2572 :
2573 : /* calculate variance of correlation */
2574 413813 : var_cor = var_fx( var_cor_t, 11, VAR_COR_LEN );
2575 :
2576 413813 : *high_stable_cor = 0;
2577 413813 : move16();
2578 413813 : test();
2579 413813 : if ( GT_16( *mold_corr, 26214 ) && LT_16( var_cor, 1 ) )
2580 : {
2581 5625 : *high_stable_cor = 1;
2582 5625 : move16();
2583 : }
2584 :
2585 : /* update average correlation */
2586 : /*st->mold_corr = 0.1f * st->old_corr + 0.9f * st->mold_corr;*/
2587 413813 : *mold_corr = mac_r( L_mult( 3277, old_corr ), 29491, *mold_corr ); /*Q15 */
2588 413813 : move16();
2589 :
2590 413813 : return;
2591 : }
2592 :
2593 : /*---------------------------------------------------------------------*
2594 : * attack_det_fx()
2595 : *
2596 : * Attack detection
2597 : *---------------------------------------------------------------------*/
2598 :
2599 413813 : static Word16 attack_det_ivas_fx( /* o : attack flag */
2600 : const Word16 *inp, /* i : input signal */
2601 : const Word16 Qx,
2602 : const Word16 last_clas, /* i : last signal clas */
2603 : const Word16 localVAD, /* i : local VAD flag */
2604 : const Word16 coder_type, /* i : coder type */
2605 : const Word32 total_brate, /* i : total bitrate */
2606 : const Word16 element_mode, /* i : IVAS element mode */
2607 : const Word16 clas, /* i : signal class */
2608 : Word32 finc_prev[], /* i/o: previous finc, (q_finc_prev) */
2609 : Word16 *q_finc_prev, /* i/o: Q of previous finc */
2610 : Word32 *lt_finc, /* i/o: long-term mean finc, (q_lt_finc) */
2611 : Word16 *q_lt_finc, /* i/o: Q of lt_finc */
2612 : Word16 *last_strong_attack /* i/o: last strong attack flag */
2613 : )
2614 : {
2615 : Word16 i, j, tmp, tmp1, attack, exp1;
2616 : Word32 L_tmp, etmp, etmp2, finc[ATT_NSEG], mean_finc;
2617 : Word16 att_3lsub_pos;
2618 : Word16 attack1;
2619 : Word64 W_tmp;
2620 : Word16 q_diff;
2621 :
2622 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
2623 413813 : Flag Overflow = 0;
2624 413813 : move32();
2625 : #endif
2626 :
2627 413813 : att_3lsub_pos = ATT_3LSUB_POS;
2628 413813 : move16();
2629 413813 : if ( GE_32( total_brate, ACELP_24k40 ) )
2630 : {
2631 0 : att_3lsub_pos = ATT_3LSUB_POS_16k;
2632 0 : move16();
2633 : }
2634 :
2635 : /* compute energy per section */
2636 13655829 : FOR( i = 0; i < ATT_NSEG; i++ )
2637 : {
2638 13242016 : L_tmp = L_mult0( inp[i * ATT_SEG_LEN], inp[i * ATT_SEG_LEN] ); /*2*Qx */
2639 :
2640 105936128 : FOR( j = 1; j < ATT_SEG_LEN; j++ )
2641 : {
2642 92694112 : L_tmp = L_mac0_o( L_tmp, inp[i * ATT_SEG_LEN + j], inp[i * ATT_SEG_LEN + j], &Overflow ); /*2*Qx */
2643 : }
2644 :
2645 13242016 : finc[i] = L_tmp;
2646 13242016 : move32();
2647 : }
2648 :
2649 413813 : attack = maximum_32_fx( finc, ATT_NSEG, &etmp );
2650 413813 : attack1 = attack;
2651 413813 : move16();
2652 :
2653 413813 : *q_finc_prev = shl( Qx, 1 ); // Q of finc
2654 413813 : move16();
2655 413813 : q_diff = sub( *q_finc_prev, *q_lt_finc );
2656 413813 : test();
2657 413813 : IF( EQ_16( localVAD, 1 ) && EQ_16( coder_type, GENERIC ) )
2658 : {
2659 : /*----------------------------------------------------------------------*
2660 : * Detect if there is a strong onset in the last subframe
2661 : * - if detected, TC is used to better code the onset
2662 : *----------------------------------------------------------------------*/
2663 :
2664 : /* compute mean energy in the first three subframes */
2665 208268 : exp1 = norm_s( att_3lsub_pos );
2666 208268 : tmp = div_s( shl( 1, sub( 14, exp1 ) ), att_3lsub_pos ); /*Q(29-exp1) */
2667 :
2668 208268 : L_tmp = L_shr_o( finc[0], Qx, &Overflow ); /*Qx */
2669 :
2670 4998432 : FOR( i = 1; i < att_3lsub_pos; i++ )
2671 : {
2672 4790164 : L_tmp = L_add_o( L_tmp, L_shr_o( finc[i], Qx, &Overflow ), &Overflow ); /*Qx */
2673 : }
2674 208268 : L_tmp = Mult_32_16( L_tmp, tmp ); /*Q(14-exp1+Qx) */
2675 208268 : etmp = L_shl( L_tmp, sub( exp1, 14 ) ); /*Qx */
2676 :
2677 208268 : tmp1 = sub( ATT_NSEG, attack );
2678 208268 : exp1 = norm_s( tmp1 );
2679 208268 : tmp = div_s( shl( 1, sub( 14, exp1 ) ), tmp1 ); /*Q(29-exp1) */
2680 :
2681 208268 : L_tmp = L_shr_o( finc[attack], Qx, &Overflow ); /*Qx */
2682 3413526 : FOR( i = 1; i < tmp1; i++ )
2683 : {
2684 3205258 : L_tmp = L_add_o( L_tmp, L_shr_o( finc[i + attack], Qx, &Overflow ), &Overflow ); /*Qx */
2685 : }
2686 208268 : L_tmp = Mult_32_16( L_tmp, tmp ); /*Q(14-exp1+Qx) */
2687 208268 : etmp2 = L_shl( L_tmp, sub( exp1, 14 ) ); /*Qx */
2688 :
2689 : /* and compare them */
2690 208268 : if ( GT_32( etmp, L_shr( etmp2, 3 ) ) )
2691 : {
2692 : /* stop, if the attack is not sufficiently strong */
2693 201920 : attack = 0;
2694 201920 : move16();
2695 : }
2696 :
2697 208268 : test();
2698 208268 : if ( EQ_16( last_clas, VOICED_CLAS ) && GT_32( L_add( L_shl( etmp, 4 ), L_shl( etmp, 2 ) ), etmp2 ) )
2699 : {
2700 : /* stop, if the signal was voiced and the attack is not sufficiently strong */
2701 49213 : attack = 0;
2702 49213 : move16();
2703 : }
2704 :
2705 : /* compare also wrt. other sections (reduces a misclassification) */
2706 208268 : IF( attack > 0 )
2707 : {
2708 5729 : etmp2 = L_add( finc[attack], 0 );
2709 5729 : etmp = Mult_32_16( etmp2, 16384 ); /* etmp2 / 2.0 = (etmp2*0.5) */
2710 117268 : FOR( i = 2; i < ATT_3LSUB_POS - 2; i++ )
2711 : {
2712 111979 : IF( GT_32( finc[i], etmp ) )
2713 : {
2714 440 : attack = 0;
2715 440 : move16();
2716 440 : BREAK;
2717 : }
2718 : }
2719 : }
2720 :
2721 208268 : test();
2722 208268 : test();
2723 208268 : test();
2724 208268 : IF( attack == 0 && GT_16( element_mode, EVS_MONO ) && ( LT_16( clas, VOICED_TRANSITION ) || EQ_16( clas, ONSET ) ) )
2725 : {
2726 135939 : Copy32( finc, finc_prev, attack1 );
2727 :
2728 : /* compute mean energy before the attack */
2729 135939 : Word64 W_etmp = W_deposit32_l( finc_prev[0] );
2730 4350048 : FOR( Word16 idx = 1; idx < ATT_NSEG; idx++ )
2731 : {
2732 4214109 : W_etmp = W_add( W_etmp, W_deposit32_l( finc_prev[idx] ) );
2733 : }
2734 135939 : W_etmp = W_shr( W_etmp, 5 ); /*ATT_NSEG == 32*/
2735 :
2736 135939 : etmp2 = finc[attack1];
2737 135939 : move32();
2738 135939 : test();
2739 135939 : test();
2740 135939 : if ( ( LT_64( W_shl( W_etmp, 4 ), W_deposit32_l( etmp2 ) ) ) || ( LT_64( W_add( W_shl( W_etmp, 3 ), W_shl( W_etmp, 2 ) ), W_deposit32_l( etmp2 ) ) && EQ_16( last_clas, UNVOICED_CLAS ) ) )
2741 : {
2742 4978 : attack = attack1;
2743 4978 : move16();
2744 : }
2745 135939 : test();
2746 135939 : if ( GT_32( L_shl_sat( *lt_finc, q_diff ), Mpy_32_32( etmp2, 107374182 /* 1.f/20 in Q31 */ ) ) || *last_strong_attack )
2747 : {
2748 128143 : attack = 0;
2749 128143 : move16();
2750 : }
2751 : }
2752 208268 : *last_strong_attack = attack;
2753 208268 : move16();
2754 : }
2755 205545 : ELSE IF( attack > 0 )
2756 : {
2757 194959 : etmp2 = L_add( finc[attack], 0 );
2758 194959 : etmp = Mult_32_16( etmp2, 25206 ); /* etmp2 / 1.3 = (etmp2*0.76923) */
2759 2528224 : FOR( i = 2; i < att_3lsub_pos - 2; i++ )
2760 : {
2761 : /*if( i != attack && finc[i] * 1.3f > etmp2 ) -> finc[i] > (etmp2*0.76923) */
2762 2453591 : test();
2763 2453591 : IF( NE_16( i, attack ) && GT_32( finc[i], etmp ) )
2764 : {
2765 120326 : attack = 0;
2766 120326 : move16();
2767 120326 : BREAK;
2768 : }
2769 : }
2770 194959 : *last_strong_attack = 0;
2771 194959 : move16();
2772 : }
2773 :
2774 : /* updates */
2775 413813 : Copy32( finc, finc_prev, ATT_NSEG );
2776 :
2777 : /* Calculating mean of finc */
2778 413813 : W_tmp = W_mult_32_16( finc[0], 1 ); // q_finc_prev+1
2779 13242016 : FOR( i = 1; i < ATT_NSEG; i++ )
2780 : {
2781 12828203 : W_tmp = W_mac_32_16( W_tmp, finc[i], 1 ); // q_finc_prev+1
2782 : }
2783 : /* mean = W_tmp / 32 and change the Q from q_finc_prev+1 to q_finc_prev
2784 : Mean value doesn't saturate, W_shl_sat_l is used only considering complexity */
2785 413813 : mean_finc = W_shl_sat_l( W_tmp, -Q6 ); // q_finc_prev+1 -> q_finc_prev
2786 :
2787 : //*lt_finc = 0.95f * *lt_finc + 0.05f * mean( finc, ATT_NSEG );
2788 413813 : IF( q_diff > 0 ) /* q_finc_prev > q_lt_finc */
2789 : {
2790 289742 : mean_finc = L_shr( mean_finc, q_diff ); // q_lt_finc
2791 289742 : *lt_finc = Madd_32_32( Mpy_32_32( *lt_finc, 2040109466 /* 0.95 in Q31 */ ), mean_finc, 107374182 /* 0.05f in Q31 */ ); // q_lt_finc
2792 289742 : move32();
2793 : }
2794 : ELSE
2795 : {
2796 124071 : *lt_finc = Madd_32_32( Mpy_32_32( L_shl( *lt_finc, q_diff ), 2040109466 /* 0.95 in Q31 */ ), mean_finc, 107374182 /* 0.05f in Q31 */ ); // q_finc_prev
2797 124071 : move32();
2798 124071 : *q_lt_finc = *q_finc_prev;
2799 124071 : move16();
2800 : }
2801 :
2802 413813 : return attack;
2803 : }
2804 :
2805 : /*---------------------------------------------------------------------*
2806 : * tonal_det()
2807 : *
2808 : * Tonal detector based on spectral stability and harmonicity
2809 : *---------------------------------------------------------------------*/
2810 :
2811 413813 : static Word32 tonal_det_fx(
2812 : const Word16 S[], // Q7
2813 : Word16 vad_flag,
2814 : Word32 tod_S_map_lt[], // Q22
2815 : Word32 *tod_thr_lt, // Q22
2816 : Word16 *tod_weight, // Q15
2817 : Word32 *tod_S_mass_prev, // Q22
2818 : Word32 *tod_S_mass_lt // Q22
2819 : )
2820 : {
2821 : Word16 i;
2822 : Word32 S_mass, alpha;
2823 : Word32 L_tmp;
2824 : Word64 W_tmp;
2825 :
2826 : /* update the adaptive weight */
2827 413813 : *tod_weight = add( mult( TON_ALPHA_FX, *tod_weight ), imult1616( ( 32767 - TON_ALPHA_FX ), vad_flag ) );
2828 413813 : move16();
2829 413813 : IF( GT_16( *tod_weight, TON_ALPHA_FX ) )
2830 : {
2831 270400 : *tod_weight = TON_ALPHA_FX;
2832 270400 : move16();
2833 : }
2834 143413 : ELSE IF( LT_16( *tod_weight, ( 32767 - TON_ALPHA_FX ) ) )
2835 : {
2836 29443 : *tod_weight = 32767 - TON_ALPHA_FX;
2837 29443 : move16();
2838 : }
2839 :
2840 : /* calculate LT spectral correlation in each band up to 4KHz */
2841 413813 : W_tmp = 0;
2842 413813 : move64();
2843 33518853 : FOR( i = 0; i < TOD_NSPEC; i++ )
2844 : {
2845 33105040 : tod_S_map_lt[i] = L_add( Mpy_32_16_1( tod_S_map_lt[i], *tod_weight ), L_mult0( sub( 32767, *tod_weight ), S[i] ) ); // Q22
2846 33105040 : move16();
2847 :
2848 33105040 : W_tmp = W_add( W_tmp, (Word64) ( tod_S_map_lt[i] ) ); // Q22
2849 : }
2850 : // S_mass /= TOD_NSPEC;
2851 413813 : L_tmp = W_extract_l( W_tmp ); // Q22
2852 413813 : S_mass = ( Mpy_32_32( L_tmp, TOD_NSPEC_INV_Q31 ) ); // Q22
2853 :
2854 413813 : IF( GT_32( S_mass, *tod_S_mass_prev ) )
2855 : {
2856 200638 : alpha = 1503238554; /* 0.7f in Q31 */
2857 : }
2858 : ELSE
2859 : {
2860 213175 : alpha = 644245094; /* 0.3f in Q31 */
2861 : }
2862 413813 : move16();
2863 :
2864 413813 : *tod_S_mass_prev = S_mass;
2865 413813 : move32();
2866 413813 : *tod_S_mass_lt = L_add( Mpy_32_32( alpha, *tod_S_mass_lt ), Mpy_32_32( L_sub( ONE_IN_Q31, alpha ), S_mass ) ); // Q22
2867 413813 : move32();
2868 413813 : S_mass = *tod_S_mass_lt;
2869 413813 : move32();
2870 :
2871 : /* updating adaptive decision threshold */
2872 413813 : IF( GT_32( S_mass, *tod_thr_lt ) )
2873 : {
2874 1984 : *tod_thr_lt = L_sub( *tod_thr_lt, THR_MASS_STEP_DN_FX );
2875 : }
2876 : ELSE
2877 : {
2878 411829 : *tod_thr_lt = L_add( *tod_thr_lt, THR_MASS_STEP_UP_FX );
2879 : }
2880 413813 : move16();
2881 :
2882 413813 : if ( GT_32( *tod_thr_lt, THR_MASS_MAX_FX ) )
2883 : {
2884 411263 : *tod_thr_lt = THR_MASS_MAX_FX;
2885 : }
2886 :
2887 413813 : if ( LT_32( *tod_thr_lt, THR_MASS_MIN_FX ) )
2888 : {
2889 1652 : *tod_thr_lt = THR_MASS_MIN_FX;
2890 : }
2891 413813 : move16();
2892 :
2893 413813 : return S_mass; /* Q22 */
2894 : }
2895 :
2896 :
2897 : /*---------------------------------------------------------------------*
2898 : * ivas_smc_mode_selection()
2899 : *
2900 : * 2nd stage speech/music classifier (select coding mode (ACELP, GSC and TCX) based on S/M classification)
2901 : * output (sp_aud_decision1 - sp_aud_decision2 -> coding mode):
2902 : * 0 - 0 -> ACELP
2903 : * 1 - 0 -> GSC
2904 : * 1 - 1 -> TCX
2905 : *---------------------------------------------------------------------*/
2906 :
2907 413813 : void ivas_smc_mode_selection_fx(
2908 : Encoder_State *st, /* i/o: encoder state structure */
2909 : const Word32 element_brate, /* i : element bitrate */
2910 : Word16 smc_dec, /* i : raw decision of the 1st stage classifier*/
2911 : const Word16 relE, /* i : relative frame energy, Q8 */
2912 : const Word16 Etot, /* i : total frame energy, Q8 */
2913 : Word16 *attack_flag, /* i/o: attack flag (GSC or TC) */
2914 : const Word16 *inp, /* i : input signal */
2915 : const Word16 Q_new, /* i : Q of input signal */
2916 : const Word16 S_map[], /* i : short-term correlation map, Q7 */
2917 : const Word16 flag_spitch /* i : flag to indicate very short stable pitch*/
2918 : )
2919 : {
2920 : Word16 attack;
2921 : Word32 ton;
2922 : Word16 i;
2923 413813 : Word32 S_p2a, S_max, S_ave = 0;
2924 413813 : move32();
2925 : Word32 thr_sp2a;
2926 :
2927 413813 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
2928 :
2929 : /* initialization */
2930 413813 : *attack_flag = 0;
2931 413813 : move16();
2932 413813 : st->sp_aud_decision2 = 0;
2933 413813 : move16();
2934 :
2935 : /* signal stability estimation */
2936 413813 : stab_est_fx( Etot, hSpMusClas->gsc_lt_diff_etot_fx, &hSpMusClas->gsc_mem_etot_fx, &hSpMusClas->gsc_nb_thr_3, &hSpMusClas->gsc_nb_thr_1, hSpMusClas->gsc_thres_fx, &hSpMusClas->gsc_last_music_flag, st->vad_flag );
2937 :
2938 : /* calculate variance of correlation */
2939 413813 : var_cor_calc_ivas_fx( st->old_corr_fx, &hSpMusClas->mold_corr_fx, hSpMusClas->var_cor_t_fx, &hSpMusClas->high_stable_cor );
2940 :
2941 : /* attack detection */
2942 413813 : IF( NE_16( shl( Q_new, 1 ), hSpMusClas->q_finc_prev ) )
2943 : {
2944 54303 : Scale_sig32( hSpMusClas->finc_prev_fx, ATT_NSEG, sub( shl( Q_new, 1 ), hSpMusClas->q_finc_prev ) );
2945 54303 : hSpMusClas->q_finc_prev = shl( Q_new, 1 );
2946 54303 : move16();
2947 : }
2948 413813 : attack = attack_det_ivas_fx( inp, Q_new, st->clas, st->localVAD, st->coder_type, 0, st->element_mode, st->clas, hSpMusClas->finc_prev_fx,
2949 : &hSpMusClas->q_finc_prev, &hSpMusClas->lt_finc_fx, &hSpMusClas->Q_lt_finc, &hSpMusClas->last_strong_attack );
2950 :
2951 : /* tonal detector */
2952 413813 : ton = tonal_det_fx( S_map, st->vad_flag, hSpMusClas->tod_S_map_lt_fx, &hSpMusClas->tod_thr_lt_fx, &hSpMusClas->tod_weight_fx, &hSpMusClas->tod_S_mass_prev_fx, &hSpMusClas->tod_S_mass_lt_fx ); // Q22
2953 :
2954 : /* calculate spectral peak-to-average ratio */
2955 413813 : Word16 shift = sub( st->q_Bin_E, st->hSpMusClas->Q_tod_lt_Bin_E );
2956 33518853 : FOR( i = 0; i < TOD_NSPEC; i++ )
2957 : {
2958 : // st->hSpMusClas->tod_lt_Bin_E[i] = P2A_FACT * st->hSpMusClas->tod_lt_Bin_E[i] + ( 1 - P2A_FACT ) * st->Bin_E[i];
2959 33105040 : st->hSpMusClas->tod_lt_Bin_E_fx[i] = Madd_32_16( L_shl( Mpy_32_16_1( st->hSpMusClas->tod_lt_Bin_E_fx[i], P2A_FACT_FX_Q15 ), shift ), st->Bin_E_fx[i], ( 32767 - P2A_FACT_FX_Q15 ) ); // Q = st->q_Bin_E + Q_SCALE - 2
2960 33105040 : move32();
2961 : }
2962 413813 : st->hSpMusClas->Q_tod_lt_Bin_E = add( st->hSpMusClas->Q_tod_lt_Bin_E, shift );
2963 413813 : move16();
2964 413813 : maximum_32_fx( st->hSpMusClas->tod_lt_Bin_E_fx, TOD_NSPEC, &S_max );
2965 : // S_ave = sum_f( st->hSpMusClas->tod_lt_Bin_E_fx, TOD_NSPEC ) / TOD_NSPEC;
2966 33518853 : FOR( i = 0; i < TOD_NSPEC; i++ )
2967 : {
2968 33105040 : S_ave = L_add( S_ave, st->hSpMusClas->tod_lt_Bin_E_fx[i] );
2969 : }
2970 413813 : S_ave = Mpy_32_32( S_ave, TOD_NSPEC_INV_Q31 );
2971 :
2972 413813 : S_p2a = L_sub( S_max, S_ave );
2973 :
2974 413813 : IF( LE_32( element_brate, IVAS_16k4 ) )
2975 : {
2976 135504 : thr_sp2a = L_shl( THR_P2A_HIGH_FX, st->q_Bin_E ); // Q = st->q_Bin_E
2977 : }
2978 : ELSE
2979 : {
2980 278309 : thr_sp2a = L_shl( THR_P2A_FX, st->q_Bin_E ); // Q = st->q_Bin_E
2981 : }
2982 :
2983 : /* initial 3-way selection of coding modes (ACELP/GSC/TCX) */
2984 413813 : test();
2985 413813 : test();
2986 413813 : IF( GT_16( relE, -2560 /* -10.0f in Q8 */ ) && ( GT_32( S_p2a, thr_sp2a ) || GT_32( ton, hSpMusClas->tod_thr_lt_fx ) ) )
2987 : {
2988 : /* select TCX to encode extremely peaky signals or strongly tonal signals */
2989 18304 : st->sp_aud_decision1 = 1;
2990 18304 : st->sp_aud_decision2 = 1;
2991 : }
2992 395509 : ELSE IF( smc_dec == SPEECH )
2993 : {
2994 : /* select ACELP to encode speech */
2995 152546 : st->sp_aud_decision1 = 0;
2996 152546 : st->sp_aud_decision2 = 0;
2997 : }
2998 242963 : ELSE IF( EQ_16( smc_dec, SPEECH_OR_MUSIC ) )
2999 : {
3000 : /* select GSC to encode "unclear" segments (classifier's score on the borderline) */
3001 6430 : st->sp_aud_decision1 = 1;
3002 6430 : st->sp_aud_decision2 = 0;
3003 : }
3004 : ELSE
3005 : {
3006 : /* select TCX to encode music */
3007 236533 : st->sp_aud_decision1 = 1;
3008 236533 : st->sp_aud_decision2 = 1;
3009 : }
3010 413813 : move16();
3011 413813 : move16();
3012 :
3013 : /* change decision from GSC to ACELP/TCX in some special cases */
3014 413813 : test();
3015 413813 : IF( EQ_16( st->sp_aud_decision1, 1 ) && st->sp_aud_decision2 == 0 )
3016 : {
3017 6430 : test();
3018 6430 : test();
3019 6430 : IF( LT_16( hSpMusClas->ener_RAT_fx, 5898 /* 0.18f in Q15 */ ) && GT_16( hSpMusClas->lt_dec_thres_fx, 7680 /* 15.0f in Q9 */ ) )
3020 : {
3021 : /* prevent GSC on strong music with almost no content below 1kHz */
3022 4 : st->sp_aud_decision2 = 1;
3023 4 : move16();
3024 : }
3025 6426 : ELSE IF( flag_spitch )
3026 : {
3027 : /* prevent GSC on signals with very short and stable high pitch period */
3028 113 : IF( LT_32( hSpMusClas->wdlp_0_95_sp_32fx, 41943040 /* 2.5f in Q24 */ ) )
3029 : {
3030 : /* select ACELP instead */
3031 109 : st->sp_aud_decision1 = 0;
3032 109 : move16();
3033 : }
3034 : ELSE
3035 : {
3036 : /* select TCX instead */
3037 4 : st->sp_aud_decision2 = 1;
3038 4 : move16();
3039 : }
3040 : }
3041 6313 : ELSE IF( hSpMusClas->high_stable_cor && GE_16( st->pitch[0], 130 ) )
3042 : {
3043 : /* prevent GSC in highly correlated signal with low energy variation */
3044 : /* this is basically a patch against bassoon-type of music */
3045 0 : st->sp_aud_decision2 = 1;
3046 0 : move16();
3047 : }
3048 : }
3049 :
3050 : /* change decision from GSC to ACELP TC during attacks/onsets */
3051 413813 : test();
3052 413813 : IF( EQ_16( st->sp_aud_decision1, 1 ) && st->sp_aud_decision2 == 0 )
3053 : {
3054 6313 : test();
3055 6313 : IF( GT_16( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], 1152 /*4.5f in Q8*/ ) &&
3056 : ( GT_16( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 1], add( hSpMusClas->gsc_lt_diff_etot_fx[MAX_LT - 2], 2560 /* 10.0f in Q8 */ ) ) ) )
3057 : {
3058 106 : IF( EQ_16( st->tc_cnt, 1 ) )
3059 : {
3060 : /* do ACELP TC coding instead of GC/VC if onset has been already declared before */
3061 39 : st->sp_aud_decision1 = 0;
3062 39 : move16();
3063 39 : st->coder_type = TRANSITION;
3064 39 : move16();
3065 : }
3066 : ELSE
3067 : {
3068 67 : IF( GE_16( attack, ATT_3LSUB_POS ) )
3069 : {
3070 : /* do ACELP TC coding also if attack is located in the last subframe */
3071 16 : st->sp_aud_decision1 = 0;
3072 16 : move16();
3073 16 : *attack_flag = add( attack, 1 );
3074 16 : move16();
3075 16 : st->coder_type = TRANSITION;
3076 16 : move16();
3077 : }
3078 51 : ELSE IF( GE_16( attack, ATT_SEG_LEN / 2 ) )
3079 : {
3080 : /* do GSC coding if attack is located after the first quarter of the first subframe */
3081 : /* (pre-echo will be treated at the decoder side) */
3082 1 : *attack_flag = 31;
3083 1 : move16();
3084 1 : *attack_flag = add( attack, 1 );
3085 1 : move16();
3086 : }
3087 : }
3088 : }
3089 : }
3090 :
3091 413813 : test();
3092 413813 : test();
3093 413813 : test();
3094 413813 : test();
3095 413813 : IF( EQ_16( st->localVAD, 1 ) && EQ_16( st->coder_type, GENERIC ) && attack > 0 /*&& *attack_flag < 32*/ /*&& st->tc_cnt != 2*/ && !( EQ_16( st->sp_aud_decision2, 1 ) && GT_32( ton, 2726298 /* 0.65f in Q22 */ ) ) )
3096 : {
3097 : /* change ACELP coder_type to TC if attack has been detected */
3098 6727 : st->sp_aud_decision1 = 0;
3099 6727 : move16();
3100 6727 : st->sp_aud_decision2 = 0;
3101 6727 : move16();
3102 :
3103 6727 : st->coder_type = TRANSITION;
3104 6727 : move16();
3105 6727 : *attack_flag = add( attack, 1 );
3106 6727 : move16();
3107 : }
3108 :
3109 : #ifdef DEBUGGING
3110 : if ( st->idchan == 0 && st->coder_type != INACTIVE )
3111 : {
3112 : if ( st->force == FORCE_GSC && element_brate < IVAS_24k4 )
3113 : {
3114 : /* enforce GSC */
3115 : st->sp_aud_decision1 = 1;
3116 : st->sp_aud_decision2 = 0;
3117 : }
3118 : else if ( st->force == FORCE_SPEECH && ( st->sp_aud_decision1 == 1 || st->sp_aud_decision2 == 1 ) )
3119 : {
3120 : if ( element_brate < IVAS_24k4 )
3121 : {
3122 : /* convert TCX to GSC */
3123 : st->sp_aud_decision1 = 1;
3124 : st->sp_aud_decision2 = 0;
3125 : }
3126 : else
3127 : {
3128 : /* convert TCX to ACELP */
3129 : st->sp_aud_decision1 = 0;
3130 : st->sp_aud_decision2 = 0;
3131 : }
3132 : }
3133 : else if ( st->force == FORCE_MUSIC )
3134 : {
3135 : /* enforce TCX */
3136 : st->sp_aud_decision1 = 1;
3137 : st->sp_aud_decision2 = 1;
3138 : }
3139 : }
3140 : #endif
3141 :
3142 : /* set GSC noisy speech flag on unvoiced SWB segments */
3143 413813 : st->GSC_noisy_speech = 0;
3144 413813 : move16();
3145 413813 : test();
3146 413813 : test();
3147 413813 : test();
3148 413813 : test();
3149 413813 : test();
3150 413813 : if ( EQ_16( st->vad_flag, 1 ) && LE_32( element_brate, IVAS_16k4 ) && GT_16( st->lp_noise_fx, 7680 /* 30.0f in Q8 */ ) && st->sp_aud_decision1 == 0 && GE_16( st->bwidth, SWB ) && EQ_16( st->coder_type_raw, UNVOICED ) )
3151 : {
3152 1245 : st->GSC_noisy_speech = 1;
3153 1245 : move16();
3154 : }
3155 :
3156 : /* set GSC submode */
3157 413813 : test();
3158 413813 : test();
3159 413813 : test();
3160 413813 : IF( st->element_mode > EVS_MONO && ( EQ_16( st->sp_aud_decision1, 1 ) && st->sp_aud_decision2 == 0 ) && GT_32( st->total_brate, STEREO_GSC_BIT_RATE_ALLOC ) ) /* below STEREO_GSC_BIT_RATE_ALLOC, fall back on normal GSC */
3161 : {
3162 5185 : st->GSC_IVAS_mode = 1;
3163 5185 : move16();
3164 5185 : IF( st->hSpMusClas->wdlp_0_95_sp_32fx > 0 )
3165 : {
3166 : /* music-like content */
3167 2982 : st->GSC_IVAS_mode = 3;
3168 : }
3169 2203 : ELSE IF( st->tc_cnt > 0 )
3170 : {
3171 : /* likely presence of an onset, GSC bit allocation will be more focused on LF */
3172 248 : st->GSC_IVAS_mode = 2;
3173 : }
3174 5185 : move16();
3175 :
3176 5185 : test();
3177 5185 : IF( EQ_16( st->coder_type_raw, UNVOICED ) && st->sp_aud_decision0 == 0 /*&& st->GSC_IVAS_mode < 3*/ )
3178 : {
3179 131 : st->GSC_noisy_speech = 1;
3180 : }
3181 : ELSE
3182 : {
3183 5054 : st->GSC_noisy_speech = 0;
3184 : }
3185 5185 : move16();
3186 : }
3187 :
3188 : /* set coder_type to AUDIO when GSC is selected (st->core will be set later in the decision matrix) */
3189 413813 : test();
3190 413813 : test();
3191 413813 : IF( ( EQ_16( st->sp_aud_decision1, 1 ) && st->sp_aud_decision2 == 0 ) || st->GSC_noisy_speech )
3192 : {
3193 7409 : st->coder_type = AUDIO;
3194 7409 : move16();
3195 7409 : test();
3196 7409 : if ( st->hGSCEnc != NULL && st->GSC_noisy_speech == 0 ) /* In case of GSC_noisy_speech, NOISE_LEVEL should remain at NOISE_LEVEL_SP3 */
3197 : {
3198 6033 : st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
3199 6033 : move16();
3200 : }
3201 : }
3202 :
3203 413813 : return;
3204 : }
3205 :
3206 : /*---------------------------------------------------------------------*
3207 : * mode_decision_fx()
3208 : *
3209 : *
3210 : *---------------------------------------------------------------------*/
3211 :
3212 2039 : static Word16 mode_decision_fx(
3213 : Encoder_State *st, /* i : endoer state structure */
3214 : Word16 len, /* i : buffering status */
3215 : Word16 *dec_mov, /* i/o: moving average of classifier decision Q15*/
3216 : Word16 *buf_flux, /* i : buffer storing spectral energy fluctuation Q7*/
3217 : Word16 *buf_epsP_tilt, /* i : buffer storing LP prediciton error tilt Q15*/
3218 : Word16 *buf_pkh, /* i : buffer storing highband spectral peakiness Q1*/
3219 : Word16 *buf_cor_map_sum, /* i : buffer storing correlation map sum Q8*/
3220 : Word16 *buf_Ntonal, /* i : buffer storing No.of 1st spectral tone Q0*/
3221 : Word16 *buf_Ntonal2, /* i : buffer storing No.of 2nd spectral tone Q0*/
3222 : Word16 *buf_Ntonal_lf, /* i : buffer storing low band spectral tone ratio Q0*/
3223 : Word16 *buf_dlp /* i : buffer storing log probability diff between speech and music Q9*/
3224 : )
3225 : {
3226 : Word16 mode;
3227 : Word16 i;
3228 : Word16 voiced_cnt;
3229 : Word16 M_pkh;
3230 : Word16 M_cor_map_sum;
3231 : Word16 M_Ntonal;
3232 : Word16 M_flux;
3233 : Word32 V_epsP_tilt;
3234 : Word16 lf_Ntonal_ratio;
3235 : Word16 tmp, tmp1;
3236 : Word32 L_tmp;
3237 : Word16 inv_len;
3238 : Word16 j;
3239 : Word16 M_flux10;
3240 2039 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
3241 :
3242 :
3243 2039 : mode = *dec_mov > 16384;
3244 2039 : logic16();
3245 2039 : move16();
3246 :
3247 2039 : IF( LE_16( len, 5 ) )
3248 : {
3249 25 : return ( mode );
3250 : }
3251 : ELSE
3252 : {
3253 2014 : IF( LT_16( len, 10 ) )
3254 : {
3255 20 : inv_len = div_s( 1, len ); /*Q15 */
3256 :
3257 20 : L_tmp = L_deposit_l( 0 );
3258 170 : FOR( i = 0; i < len; i++ )
3259 : {
3260 150 : L_tmp = L_add( L_tmp, buf_pkh[BUF_LEN - len + i] ); /*Q1 */
3261 : }
3262 20 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q1 */
3263 20 : M_pkh = extract_l( L_tmp ); /*Q1 */
3264 :
3265 20 : L_tmp = L_deposit_l( 0 );
3266 170 : FOR( i = 0; i < len; i++ )
3267 : {
3268 150 : L_tmp = L_add( L_tmp, buf_cor_map_sum[BUF_LEN - len + i] ); /*Q8 */
3269 : }
3270 20 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q8 */
3271 20 : M_cor_map_sum = extract_l( L_tmp ); /*Q8 */
3272 :
3273 20 : tmp = 0;
3274 20 : move16();
3275 170 : FOR( i = 0; i < len; i++ )
3276 : {
3277 150 : tmp = add( tmp, shl( buf_Ntonal[BUF_LEN - len + i], 2 ) ); /*Q2 */
3278 : }
3279 20 : M_Ntonal = mult_r( tmp, inv_len ); /*Q2 */
3280 :
3281 20 : V_epsP_tilt = var_fx_32( buf_epsP_tilt + BUF_LEN - len, 15, len ); /*Q31 */
3282 :
3283 20 : voiced_cnt = 0;
3284 20 : move16();
3285 140 : FOR( i = 9; i > 3; i-- )
3286 : {
3287 120 : if ( buf_dlp[i] > 0 )
3288 : {
3289 14 : voiced_cnt = add( voiced_cnt, 1 );
3290 : }
3291 : }
3292 :
3293 20 : test();
3294 20 : test();
3295 20 : test();
3296 20 : test();
3297 20 : IF( ( GT_16( M_pkh, 2200 ) || LT_32( V_epsP_tilt, 171799 ) || GT_16( M_cor_map_sum, 25600 ) ) && LT_16( voiced_cnt, 4 ) )
3298 : {
3299 4 : mode = 1;
3300 4 : move16();
3301 : }
3302 16 : ELSE IF( GT_16( M_Ntonal, 108 ) && LT_16( voiced_cnt, 4 ) ) /*27 in Q2 */
3303 : {
3304 0 : mode = 1;
3305 0 : move16();
3306 : }
3307 : }
3308 : ELSE
3309 : {
3310 1994 : voiced_cnt = 0;
3311 1994 : move16();
3312 21934 : FOR( i = 0; i < 10; i++ )
3313 : {
3314 19940 : if ( buf_dlp[i] > 0 )
3315 : {
3316 10229 : voiced_cnt = add( voiced_cnt, 1 );
3317 : }
3318 : }
3319 :
3320 1994 : inv_len = 3277; /*Q15 */
3321 1994 : move16();
3322 :
3323 1994 : L_tmp = L_deposit_l( 0 );
3324 21934 : FOR( i = 0; i < 10; i++ )
3325 : {
3326 19940 : L_tmp = L_add( L_tmp, L_shl( buf_flux[BUF_LEN - 10 + i], 2 ) ); /*Q9 */
3327 : }
3328 1994 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q9 */
3329 1994 : M_flux10 = extract_l( L_tmp ); /*Q9 */
3330 :
3331 1994 : L_tmp = L_deposit_l( 0 );
3332 21934 : FOR( i = 0; i < 10; i++ )
3333 : {
3334 19940 : L_tmp = L_add( L_tmp, buf_pkh[BUF_LEN - 10 + i] ); /*Q1 */
3335 : }
3336 1994 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q1 */
3337 1994 : M_pkh = extract_l( L_tmp ); /*Q1 */
3338 :
3339 1994 : L_tmp = L_deposit_l( 0 );
3340 21934 : FOR( i = 0; i < 10; i++ )
3341 : {
3342 19940 : L_tmp = L_add( L_tmp, buf_cor_map_sum[BUF_LEN - 10 + i] ); /*Q8 */
3343 : }
3344 1994 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q8 */
3345 1994 : M_cor_map_sum = extract_l( L_tmp ); /*Q8 */
3346 :
3347 1994 : V_epsP_tilt = var_fx_32( buf_epsP_tilt + BUF_LEN - 10, 15, 10 ); /*Q31 */
3348 :
3349 1994 : L_tmp = L_deposit_l( 0 );
3350 11964 : FOR( i = 0; i < 5; i++ )
3351 : {
3352 9970 : L_tmp = L_add( L_tmp, L_shl( buf_flux[BUF_LEN - 5 + i], 2 ) ); /*Q9 */
3353 : }
3354 1994 : L_tmp = Mult_32_16( L_tmp, 6554 ); /*Q9 */
3355 1994 : tmp = extract_l( L_tmp ); /*Q9 */
3356 :
3357 1994 : test();
3358 1994 : test();
3359 1994 : test();
3360 1994 : test();
3361 1994 : test();
3362 1994 : test();
3363 1994 : IF( ( LT_16( M_flux10, 4352 ) || ( LT_32( V_epsP_tilt, 2147484 ) && LT_16( M_flux10, 6144 ) ) || GT_16( M_pkh, 2100 ) ||
3364 : GT_16( M_cor_map_sum, 25600 ) ) &&
3365 : LT_16( voiced_cnt, 3 ) && LT_16( tmp, 7680 ) )
3366 : {
3367 233 : mode = 1;
3368 233 : move16();
3369 233 : *dec_mov = 32767;
3370 233 : move16();
3371 233 : return ( mode );
3372 : }
3373 :
3374 1761 : test();
3375 1761 : test();
3376 1761 : test();
3377 1761 : test();
3378 1761 : test();
3379 1761 : IF( GT_16( M_flux10, 8192 ) || ( GT_16( M_flux10, 7680 ) && GT_16( voiced_cnt, 2 ) ) || GT_16( tmp, 9728 ) ||
3380 : ( GE_16( buf_flux[59], 2560 ) && GT_16( hSpMusClas->lps_fx, hSpMusClas->lpm_fx ) ) )
3381 : {
3382 1515 : mode = 0;
3383 1515 : move16();
3384 1515 : *dec_mov = 0;
3385 1515 : move16();
3386 1515 : return ( mode );
3387 : }
3388 :
3389 5883 : FOR( i = 10; i < len; i++ )
3390 : {
3391 5762 : inv_len = div_s( 1, i ); /*Q15 */
3392 :
3393 5762 : L_tmp = L_deposit_l( 0 );
3394 197909 : FOR( j = 0; j < i; j++ )
3395 : {
3396 192147 : L_tmp = L_add( L_tmp, L_shl( buf_flux[BUF_LEN - i + j], 2 ) ); /*Q9 */
3397 : }
3398 5762 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q9 */
3399 5762 : M_flux = extract_l( L_tmp ); /*Q9 */
3400 :
3401 5762 : L_tmp = L_deposit_l( 0 );
3402 197909 : FOR( j = 0; j < i; j++ )
3403 : {
3404 192147 : L_tmp = L_add( L_tmp, buf_pkh[BUF_LEN - i + j] ); /*Q1 */
3405 : }
3406 5762 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q1 */
3407 5762 : M_pkh = extract_l( L_tmp ); /*Q1 */
3408 :
3409 5762 : L_tmp = L_deposit_l( 0 );
3410 197909 : FOR( j = 0; j < i; j++ )
3411 : {
3412 192147 : L_tmp = L_add( L_tmp, buf_cor_map_sum[BUF_LEN - i + j] ); /*Q8 */
3413 : }
3414 5762 : L_tmp = Mult_32_16( L_tmp, inv_len ); /*Q8 */
3415 5762 : M_cor_map_sum = extract_l( L_tmp ); /*Q8 */
3416 :
3417 5762 : V_epsP_tilt = var_fx_32( buf_epsP_tilt + BUF_LEN - i, 15, i ); /*Q31 */
3418 :
3419 5762 : test();
3420 5762 : test();
3421 5762 : test();
3422 5762 : test();
3423 5762 : test();
3424 5762 : IF( ( ( LT_16( M_flux, add( 6144, mult_r( 1638, shl( sub( len, 10 ), 9 ) ) ) ) && LT_16( M_flux10, 7680 ) ) ||
3425 : LT_32( V_epsP_tilt, L_add( 214748, L_shl( L_mult0( 19327, ( len - 10 ) ), 1 ) ) ) ||
3426 : GT_16( M_pkh, sub( 2100, extract_l( L_mult0( 10, sub( len, 10 ) ) ) ) ) ||
3427 : GT_16( M_cor_map_sum, sub( 24320, extract_l( L_mult0( 77, sub( len, 10 ) ) ) ) ) ) &&
3428 : LT_16( voiced_cnt, 3 ) )
3429 : {
3430 125 : mode = 1;
3431 125 : move16();
3432 125 : return ( mode );
3433 : }
3434 : }
3435 :
3436 121 : IF( EQ_16( len, BUF_LEN ) )
3437 : {
3438 107 : tmp = 0;
3439 107 : move16();
3440 6527 : FOR( i = 0; i < len; i++ )
3441 : {
3442 6420 : tmp = add( tmp, shl( buf_Ntonal[i], 2 ) ); /*Q2 */
3443 : }
3444 107 : M_Ntonal = mult_r( tmp, 546 ); /*Q2 */
3445 :
3446 107 : tmp = 0;
3447 107 : move16();
3448 6527 : FOR( i = 0; i < len; i++ )
3449 : {
3450 6420 : tmp = add( tmp, buf_Ntonal_lf[i] ); /*Q0 */
3451 : }
3452 107 : tmp1 = 0;
3453 107 : move16();
3454 6527 : FOR( i = 0; i < len; i++ )
3455 : {
3456 6420 : tmp1 = add( tmp1, buf_Ntonal2[i] ); /*Q0 */
3457 : }
3458 107 : lf_Ntonal_ratio = 0;
3459 107 : move16(); /*Q15 */
3460 107 : if ( tmp1 != 0 )
3461 : {
3462 107 : lf_Ntonal_ratio = div_s( tmp, tmp1 ); /*Q15 */
3463 : }
3464 :
3465 107 : test();
3466 107 : IF( GT_16( M_Ntonal, 72 ) || LT_16( lf_Ntonal_ratio, 6554 ) )
3467 : {
3468 0 : mode = 1;
3469 0 : move16();
3470 : }
3471 107 : ELSE IF( LT_16( M_Ntonal, 4 ) )
3472 : {
3473 0 : mode = 0;
3474 0 : move16();
3475 : }
3476 : }
3477 : }
3478 : }
3479 :
3480 141 : return ( mode );
3481 : }
3482 :
3483 : /*---------------------------------------------------------------------*
3484 : * tonal_dist_fx()
3485 : *
3486 : *
3487 : *---------------------------------------------------------------------*/
3488 :
3489 2039 : static void tonal_dist_fx(
3490 : Word16 *p2v_map, /* i : spectral peakiness map Q7*/
3491 : Word16 *buf_pkh, /* i/o: buffer storing highband spectral peakiness Q1*/
3492 : Word16 *buf_Ntonal, /* i/o: buffer storing No.of 1st spectral tone Q0*/
3493 : Word16 *buf_Ntonal2, /* i/o: buffer storing No.of 2nd spectral tone Q0*/
3494 : Word16 *buf_Ntonal_lf /* i/o: buffer storing low band spectral tone ratio Q0*/
3495 : )
3496 : {
3497 : Word16 i;
3498 : Word32 pk;
3499 : Word16 Ntonal;
3500 : Word16 Ntonal2;
3501 : Word16 Ntonal_lf;
3502 :
3503 :
3504 : /* find number of tonals, number of tonals at low-band,
3505 : spectral peakiness at high-band */
3506 2039 : pk = L_deposit_l( 0 );
3507 2039 : Ntonal = 0;
3508 2039 : move16();
3509 2039 : Ntonal2 = 0;
3510 2039 : move16();
3511 2039 : Ntonal_lf = 0;
3512 2039 : move16();
3513 132535 : FOR( i = 0; i < 64; i++ )
3514 : {
3515 130496 : if ( GT_16( p2v_map[i], 7040 ) )
3516 : {
3517 9823 : Ntonal = add( Ntonal, 1 );
3518 : }
3519 :
3520 130496 : IF( GT_16( p2v_map[i], 10240 ) )
3521 : {
3522 5685 : Ntonal2 = add( Ntonal2, 1 );
3523 5685 : Ntonal_lf = add( Ntonal_lf, 1 );
3524 : }
3525 : }
3526 :
3527 130496 : FOR( i = 64; i < 127; i++ )
3528 : {
3529 128457 : if ( p2v_map[i] != 0 )
3530 : {
3531 33780 : pk = L_add( pk, p2v_map[i] ); /*Q7 */
3532 : }
3533 128457 : if ( GT_16( p2v_map[i], 7040 ) )
3534 : {
3535 4269 : Ntonal = add( Ntonal, 1 );
3536 : }
3537 128457 : if ( GT_16( p2v_map[i], 10240 ) )
3538 : {
3539 1529 : Ntonal2 = add( Ntonal2, 1 );
3540 : }
3541 : }
3542 :
3543 : /* update buffers */
3544 122340 : FOR( i = 0; i < BUF_LEN - 1; i++ )
3545 : {
3546 120301 : buf_pkh[i] = buf_pkh[i + 1];
3547 120301 : move16();
3548 120301 : buf_Ntonal[i] = buf_Ntonal[i + 1];
3549 120301 : move16();
3550 120301 : buf_Ntonal2[i] = buf_Ntonal2[i + 1];
3551 120301 : move16();
3552 120301 : buf_Ntonal_lf[i] = buf_Ntonal_lf[i + 1];
3553 120301 : move16();
3554 : }
3555 :
3556 2039 : buf_pkh[i] = extract_l( L_shr_r( pk, 6 ) ); /*Q1 */
3557 2039 : buf_Ntonal[i] = Ntonal;
3558 2039 : move16(); /*Q0 */
3559 2039 : buf_Ntonal2[i] = Ntonal2;
3560 2039 : move16(); /*Q0 */
3561 2039 : buf_Ntonal_lf[i] = Ntonal_lf;
3562 2039 : move16(); /*Q0 */
3563 :
3564 2039 : return;
3565 : }
3566 :
3567 : /*---------------------------------------------------------------------*
3568 : * flux_fx()
3569 : *
3570 : *
3571 : *---------------------------------------------------------------------*/
3572 :
3573 2039 : static void flux_fx(
3574 : Word16 *Bin_E, /* i : log energy spectrum of the current frame Q7*/
3575 : Word16 *p2v_map, /* i : spectral peakiness map Q7*/
3576 : Word16 *old_Bin_E, /* i/o: log energy spectrum of the frame 60ms ago Q7*/
3577 : Word16 *buf_flux, /* i/o: buffer storing spectral energy fluctuation Q7*/
3578 : Word16 attack_hangover, /* i/o: hangover preventing flux buffering Q0*/
3579 : Word16 dec_mov /* i/o: moving average of classifier decision Q15*/
3580 : )
3581 : {
3582 : Word16 i;
3583 : Word16 *pt1, *pt2, *pt3, *pt4, *pt5, *pt6;
3584 : Word16 flux;
3585 : Word32 L_flux;
3586 : Word16 cnt;
3587 : Word16 tmp;
3588 :
3589 : /* calculate flux */
3590 2039 : L_flux = L_deposit_l( 0 );
3591 2039 : cnt = 0;
3592 2039 : move16();
3593 87677 : FOR( i = 0; i < N_OLD_BIN_E; i++ )
3594 : {
3595 85638 : IF( p2v_map[i] != 0 )
3596 : {
3597 20945 : L_flux = L_add_sat( L_flux, abs_s( sub_sat( Bin_E[i], old_Bin_E[i] ) ) ); /*Q7 */
3598 : }
3599 85638 : if ( p2v_map[i] != 0 )
3600 : {
3601 20945 : cnt = add( cnt, 1 );
3602 : }
3603 : }
3604 :
3605 2039 : flux = 640;
3606 2039 : move16(); /*5 in Q7 */
3607 2039 : IF( cnt != 0 )
3608 : {
3609 2032 : tmp = div_s( 1, cnt ); /*Q15 */
3610 2032 : flux = extract_l( Mult_32_16( L_flux, tmp ) ); /*Q7 */
3611 : }
3612 :
3613 2039 : test();
3614 2039 : if ( GT_16( flux, 2560 ) && GT_16( dec_mov, 26214 ) )
3615 : {
3616 55 : flux = 2560;
3617 55 : move16(); /*20 in Q7 */
3618 : }
3619 :
3620 : /* update old Bin_E buffer */
3621 2039 : pt1 = old_Bin_E;
3622 2039 : pt2 = old_Bin_E + N_OLD_BIN_E;
3623 2039 : pt3 = Bin_E;
3624 2039 : pt4 = old_Bin_E + N_OLD_BIN_E;
3625 2039 : pt5 = old_Bin_E + 2 * N_OLD_BIN_E;
3626 2039 : pt6 = old_Bin_E + 2 * N_OLD_BIN_E;
3627 :
3628 87677 : FOR( i = 0; i < N_OLD_BIN_E; i++ )
3629 : {
3630 85638 : *pt1++ = *pt2++;
3631 85638 : move16();
3632 85638 : *pt4++ = *pt5++;
3633 85638 : move16();
3634 85638 : *pt6++ = *pt3++;
3635 85638 : move16();
3636 : }
3637 : /* update flux buffer */
3638 2039 : IF( attack_hangover <= 0 )
3639 : {
3640 122340 : FOR( i = 0; i < BUF_LEN - 1; i++ )
3641 : {
3642 120301 : buf_flux[i] = buf_flux[i + 1];
3643 120301 : move16();
3644 : }
3645 2039 : buf_flux[i] = flux;
3646 2039 : move16();
3647 : }
3648 :
3649 2039 : return;
3650 : }
3651 :
3652 : /*---------------------------------------------------------------------*
3653 : * spec_analysis_fx()
3654 : *
3655 : *
3656 : *---------------------------------------------------------------------*/
3657 :
3658 2039 : static void spec_analysis_fx(
3659 : Word16 *Bin_E, /* i : log energy spectrum of the current frame Q7*/
3660 : Word16 *p2v_map /* o : spectral peakiness map Q7*/
3661 : )
3662 : {
3663 : Word16 i, k, m;
3664 : Word16 peak[65];
3665 : Word16 valley[65];
3666 : Word16 peak_idx[65];
3667 : Word16 valey_idx[65];
3668 : Word16 p2v[65];
3669 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
3670 2039 : Flag Overflow = 0;
3671 2039 : move32();
3672 : #endif
3673 :
3674 : /* find spectral peaks */
3675 2039 : k = 0;
3676 2039 : move16();
3677 256914 : FOR( i = 1; i < L_FFT / 2 - 2; i++ )
3678 : {
3679 254875 : test();
3680 254875 : IF( GT_16( Bin_E[i], Bin_E[i - 1] ) && GT_16( Bin_E[i], Bin_E[i + 1] ) )
3681 : {
3682 67953 : peak[k] = Bin_E[i];
3683 67953 : move16();
3684 67953 : peak_idx[k] = i;
3685 67953 : move16();
3686 67953 : k = add( k, 1 );
3687 : }
3688 : }
3689 2039 : assert( k + 1 < 65 );
3690 2039 : peak_idx[k] = -1;
3691 2039 : move16();
3692 2039 : peak_idx[k + 1] = -1;
3693 2039 : move16();
3694 2039 : IF( k == 0 )
3695 : {
3696 0 : FOR( i = 0; i < 127; i++ )
3697 : {
3698 0 : p2v_map[i] = 0;
3699 0 : move16();
3700 : }
3701 :
3702 0 : return;
3703 : }
3704 :
3705 : /* find spectral valleys */
3706 2039 : m = 0;
3707 2039 : move16();
3708 :
3709 2039 : IF( LT_16( Bin_E[0], Bin_E[1] ) )
3710 : {
3711 1106 : valley[0] = Bin_E[0];
3712 1106 : move16();
3713 1106 : valey_idx[0] = 0;
3714 1106 : move16();
3715 1106 : m = add( m, 1 );
3716 : }
3717 :
3718 2039 : k = 126;
3719 2039 : move16();
3720 3597 : FOR( i = 125; i >= 0; i-- )
3721 : {
3722 3597 : IF( LE_16( Bin_E[i + 1], Bin_E[i] ) )
3723 : {
3724 2039 : BREAK;
3725 : }
3726 1558 : k = i;
3727 1558 : move16();
3728 : }
3729 :
3730 255356 : FOR( i = 1; i < k; i++ )
3731 : {
3732 253317 : test();
3733 253317 : IF( LT_16( Bin_E[i], Bin_E[i - 1] ) && LT_16( Bin_E[i], Bin_E[i + 1] ) )
3734 : {
3735 66725 : valley[m] = Bin_E[i];
3736 66725 : move16();
3737 66725 : valey_idx[m] = i;
3738 66725 : move16();
3739 66725 : m = add( m, 1 );
3740 : }
3741 : }
3742 2039 : valley[m] = Bin_E[k];
3743 2039 : move16();
3744 2039 : valey_idx[m] = k;
3745 2039 : move16();
3746 :
3747 : /* find spectral peak to valley distances */
3748 2039 : k = 0;
3749 2039 : move16();
3750 69870 : FOR( i = 0; i < m; i++ )
3751 : {
3752 67831 : test();
3753 67831 : IF( GT_16( peak_idx[k], valey_idx[i] ) && LT_16( peak_idx[k], valey_idx[i + 1] ) )
3754 : {
3755 66367 : p2v[k] = sub_o( shl_o( peak[k], 1, &Overflow ), add_o( valley[i], valley[i + 1], &Overflow ), &Overflow );
3756 66367 : move16();
3757 66367 : k = add( k, 1 );
3758 : }
3759 : }
3760 :
3761 260992 : FOR( i = 0; i < 127; i++ )
3762 : {
3763 258953 : p2v_map[i] = 0;
3764 258953 : move16();
3765 : }
3766 :
3767 68406 : FOR( i = 0; i < k; i++ )
3768 : {
3769 66367 : p2v_map[peak_idx[i]] = p2v[i];
3770 66367 : move16();
3771 : }
3772 : }
3773 :
3774 2050 : static void music_mixed_classif_improv_fx(
3775 : Encoder_State *st, /* i : encoder state structure */
3776 : const Word16 *new_inp, /* i : new input signal */
3777 : const Word32 *epsP, /* i : LP prediciton error Q_epsP*/
3778 : Word16 Q_epsP,
3779 : Word16 etot, /* i : total frame energy Q8*/
3780 : Word16 old_cor, /* i : normalized correlation Q15*/
3781 : Word16 cor_map_sum /* i : correlation map sum Q8*/
3782 : )
3783 : {
3784 : Word16 i, max_spl, dec, len, percus_flag, lt_diff, log_max_spl, epsP_tilt, p2v_map[128];
3785 : Word16 exp, frac, expn, fracn, expd, fracd, scale;
3786 : Word16 tmp;
3787 2050 : Word32 L_tmp, ftmp, ftmp1, epsP_max = MIN_32;
3788 2050 : move32();
3789 2050 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
3790 :
3791 : /* find sample with maximum absolute amplitude */
3792 2050 : max_spl = 0;
3793 2050 : move16();
3794 526850 : FOR( i = 0; i < L_FRAME; i++ )
3795 : {
3796 524800 : max_spl = s_max( abs_s( new_inp[i] ), max_spl );
3797 : }
3798 :
3799 : /* music is considered only appearing in high SNR condition and active signal */
3800 2050 : test();
3801 2050 : IF( st->vad_flag == 0 || LT_16( sub( st->lp_speech_fx, st->lp_noise_fx ), 6400 ) ) /* 25 in Q8 */
3802 : {
3803 : /* st->dec_mov = 0.5f; */
3804 : /* st->dec_mov1 = 0.5f; */
3805 11 : hSpMusClas->dec_mov_fx = 16384;
3806 11 : move16();
3807 11 : hSpMusClas->dec_mov1_fx = 16384;
3808 11 : move16();
3809 :
3810 11 : if ( st->vad_flag == 0 )
3811 : {
3812 11 : hSpMusClas->onset_cnt = 0;
3813 11 : move16();
3814 : }
3815 :
3816 11 : return;
3817 : }
3818 :
3819 2039 : hSpMusClas->onset_cnt = add( hSpMusClas->onset_cnt, 1 );
3820 2039 : hSpMusClas->onset_cnt = s_min( hSpMusClas->onset_cnt, 9 );
3821 :
3822 2039 : IF( EQ_16( hSpMusClas->onset_cnt, 1 ) )
3823 : {
3824 5 : set16_fx( hSpMusClas->buf_flux_fx, -12800, BUF_LEN ); /*-100.0 in Q7 */
3825 : }
3826 :
3827 : /* spectral analysis */
3828 2039 : spec_analysis_fx( st->lgBin_E_fx, p2v_map );
3829 :
3830 : /* percussive music detection */
3831 2039 : log_max_spl = 0;
3832 2039 : move16();
3833 2039 : IF( max_spl )
3834 : {
3835 2039 : L_tmp = L_deposit_h( max_spl ); /*Q16 */
3836 2039 : exp = norm_l( L_tmp );
3837 2039 : frac = Log2_norm_lc( L_shl( L_tmp, exp ) );
3838 2039 : exp = sub( sub( 30, exp ), 16 );
3839 2039 : L_tmp = Mpy_32_16( exp, frac, 28391 ); /*Q12 */
3840 2039 : log_max_spl = round_fx( L_shl( L_tmp, 11 ) ); /*Q7 */
3841 : }
3842 :
3843 2039 : lt_diff = sub( log_max_spl, hSpMusClas->mov_log_max_spl_fx ); /*Q7 */
3844 :
3845 8156 : FOR( i = 0; i < 3; i++ )
3846 : {
3847 6117 : hSpMusClas->buf_etot_fx[i] = hSpMusClas->buf_etot_fx[i + 1];
3848 6117 : move16(); /*Q8 */
3849 : }
3850 2039 : hSpMusClas->buf_etot_fx[i] = etot;
3851 2039 : move16(); /*Q8 */
3852 :
3853 2039 : percus_flag = 0;
3854 2039 : move16();
3855 2039 : test();
3856 2039 : test();
3857 2039 : IF( GT_16( sub( hSpMusClas->buf_etot_fx[1], hSpMusClas->buf_etot_fx[0] ), 1536 ) &&
3858 : LT_16( hSpMusClas->buf_etot_fx[2], hSpMusClas->buf_etot_fx[1] ) &&
3859 : GT_16( sub( hSpMusClas->buf_etot_fx[1], st->lp_speech_fx ), 768 ) ) /* 3 in Q8 */
3860 : {
3861 : /*tmp = add(shr(voicing[0],2),shr(voicing[1],2)); //Q15 */
3862 : /*tmp = add(tmp,shr(old_cor,1)); //Q15 */
3863 15 : tmp = mac_r( L_mac( L_mult( st->voicing_fx[0], 8192 ), st->voicing_fx[1], 8192 ), old_cor, 16384 );
3864 15 : test();
3865 15 : test();
3866 15 : IF( GT_16( sub( hSpMusClas->buf_etot_fx[1], hSpMusClas->buf_etot_fx[3] ), 768 ) &&
3867 : LT_16( hSpMusClas->buf_etot_fx[3], hSpMusClas->buf_etot_fx[2] ) &&
3868 : LT_16( tmp, 24576 ) ) /* 0.75 in Q15 */
3869 : {
3870 4 : IF( GT_16( hSpMusClas->dec_mov_fx, 26214 ) ) /* 0.8 in Q15 */
3871 : {
3872 2 : percus_flag = 1;
3873 2 : move16();
3874 : }
3875 : ELSE
3876 : {
3877 2 : test();
3878 2 : test();
3879 2 : test();
3880 2 : if ( LT_16( old_cor, 24576 ) && LT_16( st->voicing_fx[0], 24576 ) && LT_16( st->voicing_fx[1], 24576 ) && GT_16( hSpMusClas->old_lt_diff_fx[0], 1280 ) )
3881 : {
3882 0 : percus_flag = 1;
3883 0 : move16();
3884 : }
3885 : }
3886 : }
3887 : }
3888 :
3889 : /* sound attack detection */
3890 2039 : test();
3891 2039 : test();
3892 2039 : test();
3893 2039 : if ( GT_16( sub( hSpMusClas->buf_etot_fx[3], hSpMusClas->buf_etot_fx[2] ), 1536 ) && GT_16( hSpMusClas->dec_mov_fx, 29491 ) && GT_16( sub( etot, st->lp_speech_fx ), 1280 ) && GT_16( hSpMusClas->old_lt_diff_fx[0], 640 ) )
3894 : {
3895 0 : hSpMusClas->attack_hangover = 3;
3896 0 : move16();
3897 : }
3898 :
3899 2039 : test();
3900 2039 : IF( GT_16( st->voicing_fx[0], 29491 ) && GT_16( st->voicing_fx[1], 29491 ) )
3901 : {
3902 550 : IF( GT_16( log_max_spl, hSpMusClas->mov_log_max_spl_fx ) )
3903 : {
3904 : /**mov_log_max_spl = add(mult_r(31130,(*mov_log_max_spl)),mult_r(1638,log_max_spl)); //Q7 */
3905 34 : hSpMusClas->mov_log_max_spl_fx = round_fx( L_mac( L_mult( 31130, hSpMusClas->mov_log_max_spl_fx ), 1638, log_max_spl ) ); /*Q7 */
3906 : }
3907 : ELSE
3908 : {
3909 : /**mov_log_max_spl = add(mult_r(32604,(*mov_log_max_spl)),mult_r(164,log_max_spl)); //Q7 */
3910 516 : hSpMusClas->mov_log_max_spl_fx = round_fx( L_mac( L_mult( 32604, hSpMusClas->mov_log_max_spl_fx ), 164, log_max_spl ) ); /*Q7 */
3911 : }
3912 : }
3913 :
3914 2039 : hSpMusClas->old_lt_diff_fx[0] = hSpMusClas->old_lt_diff_fx[1];
3915 2039 : move16(); /*Q7 */
3916 2039 : hSpMusClas->old_lt_diff_fx[1] = lt_diff;
3917 2039 : move16(); /*Q7 */
3918 :
3919 : /* calculate and buffer spectral energy fluctuation */
3920 2039 : flux_fx( st->lgBin_E_fx, p2v_map, hSpMusClas->old_Bin_E_fx, hSpMusClas->buf_flux_fx, hSpMusClas->attack_hangover, hSpMusClas->dec_mov_fx );
3921 :
3922 2039 : hSpMusClas->attack_hangover = sub( hSpMusClas->attack_hangover, 1 );
3923 2039 : move16();
3924 2039 : hSpMusClas->attack_hangover = s_max( hSpMusClas->attack_hangover, 0 );
3925 2039 : move16();
3926 :
3927 : /* identify flux buffer buffering status */
3928 2039 : len = 0;
3929 2039 : move16();
3930 115700 : FOR( i = BUF_LEN - 1; i >= 0; i-- )
3931 : {
3932 113938 : IF( hSpMusClas->buf_flux_fx[i] < 0 )
3933 : {
3934 277 : BREAK;
3935 : }
3936 :
3937 113661 : len = add( len, 1 );
3938 : }
3939 :
3940 : /* reset flux buffer if percussive music is detected */
3941 2039 : IF( EQ_16( percus_flag, 1 ) )
3942 : {
3943 2 : set16_fx( &hSpMusClas->buf_flux_fx[BUF_LEN - len], 640, len ); /* 5 in Q7 */
3944 : }
3945 :
3946 : /* calculate and buffer the tilt of residual LP energies */
3947 2039 : ftmp = 0;
3948 2039 : move16();
3949 2039 : ftmp1 = 0;
3950 2039 : move16();
3951 34663 : FOR( i = 1; i <= 16; i++ )
3952 : {
3953 32624 : epsP_max = L_max( epsP_max, epsP[i] );
3954 : }
3955 :
3956 32624 : FOR( i = 1; i < 16; i++ )
3957 : {
3958 30585 : IF( EQ_32( epsP[i], epsP_max ) )
3959 : {
3960 2039 : tmp = -32768;
3961 2039 : move16();
3962 2039 : L_tmp = Mult_32_16( epsP[i], tmp ); /* Q_epsP */
3963 2039 : ftmp = L_sub( ftmp, L_shr( L_tmp, 4 ) ); /* Q(Q_epsP-4) */
3964 : }
3965 : ELSE
3966 : {
3967 28546 : expn = norm_l( epsP[i] );
3968 28546 : fracn = extract_h( L_shl( epsP[i], expn ) );
3969 28546 : expn = sub( sub( 30, expn ), Q_epsP );
3970 :
3971 28546 : expd = norm_l( epsP_max );
3972 28546 : fracd = extract_h( L_shl( epsP_max, expd ) );
3973 28546 : expd = sub( sub( 30, expd ), Q_epsP );
3974 :
3975 28546 : scale = shr( sub( fracd, fracn ), 15 );
3976 28546 : fracn = shl( fracn, scale );
3977 28546 : expn = sub( expn, scale );
3978 :
3979 28546 : tmp = div_s( fracn, fracd ); /*Q(15+expd-expn) */
3980 28546 : tmp = shl( tmp, sub( expn, expd ) ); /*Q15 */
3981 :
3982 28546 : L_tmp = Mult_32_16( epsP[i], tmp ); /*Q_epsP */
3983 28546 : ftmp = L_add( ftmp, L_shr( L_tmp, 4 ) ); /*Q(Q_epsP-4) */
3984 : }
3985 : }
3986 :
3987 32624 : FOR( i = 1; i < 16; i++ )
3988 : {
3989 30585 : IF( EQ_32( epsP[i], epsP_max ) )
3990 : {
3991 2039 : tmp = -32768;
3992 2039 : move16();
3993 2039 : L_tmp = Mult_32_16( epsP[i + 1], tmp ); /*Q_epsP */
3994 2039 : ftmp1 = L_sub( ftmp1, L_shr( L_tmp, 4 ) ); /*Q(Q_epsP-4) */
3995 : }
3996 28546 : ELSE IF( EQ_32( epsP[i + 1], epsP_max ) )
3997 : {
3998 0 : tmp = -32768;
3999 0 : move16();
4000 0 : L_tmp = Mult_32_16( epsP[i], tmp ); /*Q_epsP */
4001 0 : ftmp1 = L_sub( ftmp1, L_shr( L_tmp, 4 ) ); /*Q(Q_epsP-4) */
4002 : }
4003 : ELSE
4004 : {
4005 28546 : expn = norm_l( epsP[i] );
4006 28546 : fracn = extract_h( L_shl( epsP[i], expn ) );
4007 28546 : expn = sub( sub( 30, expn ), Q_epsP );
4008 :
4009 28546 : expd = norm_l( epsP_max );
4010 28546 : fracd = extract_h( L_shl( epsP_max, expd ) );
4011 28546 : expd = sub( sub( 30, expd ), Q_epsP );
4012 :
4013 28546 : scale = shr( sub( fracd, fracn ), 15 );
4014 28546 : fracn = shl( fracn, scale );
4015 28546 : expn = sub( expn, scale );
4016 :
4017 28546 : tmp = div_s( fracn, fracd ); /*Q(15+expd-expn) */
4018 28546 : tmp = shl( tmp, sub( expn, expd ) ); /*Q15 */
4019 :
4020 28546 : L_tmp = Mult_32_16( epsP[i + 1], tmp ); /*Q_epsP */
4021 28546 : ftmp1 = L_add( ftmp1, L_shr( L_tmp, 4 ) ); /*Q(Q_epsP-4) */
4022 : }
4023 : }
4024 :
4025 : /* epsP_tilt = ftmp1/ftmp; */
4026 2039 : expn = norm_l( ftmp1 );
4027 2039 : fracn = extract_h( L_shl( ftmp1, expn ) );
4028 2039 : expn = sub( sub( 30, expn ), Q_epsP - 4 );
4029 :
4030 2039 : expd = norm_l( ftmp );
4031 2039 : fracd = round_fx_sat( L_shl( ftmp, expd ) );
4032 2039 : expd = sub( sub( 30, expd ), sub( Q_epsP, 4 ) );
4033 :
4034 2039 : scale = shr( sub( fracd, fracn ), 15 );
4035 2039 : fracn = shl( fracn, scale );
4036 2039 : expn = sub( expn, scale );
4037 :
4038 2039 : tmp = div_s( fracn, fracd ); /*Q(15+expd-expn) */
4039 :
4040 2039 : epsP_tilt = shl( tmp, sub( expn, expd ) ); /*Q15 */
4041 :
4042 122340 : FOR( i = 0; i < BUF_LEN - 1; i++ )
4043 : {
4044 120301 : hSpMusClas->buf_epsP_tilt_fx[i] = hSpMusClas->buf_epsP_tilt_fx[i + 1];
4045 120301 : move16(); /*Q15 */
4046 : }
4047 2039 : hSpMusClas->buf_epsP_tilt_fx[i] = epsP_tilt;
4048 2039 : move16(); /*Q15 */
4049 :
4050 : /* calculate and buffer highband spectral peakness */
4051 2039 : tonal_dist_fx( p2v_map, hSpMusClas->buf_pkh_fx, hSpMusClas->buf_Ntonal_fx, hSpMusClas->buf_Ntonal2_fx, hSpMusClas->buf_Ntonal_lf_fx );
4052 :
4053 : /* buffer sum of correlation map */
4054 122340 : FOR( i = 0; i < BUF_LEN - 1; i++ )
4055 : {
4056 120301 : hSpMusClas->buf_cor_map_sum_fx[i] = hSpMusClas->buf_cor_map_sum_fx[i + 1];
4057 120301 : move16(); /*Q8 */
4058 : }
4059 2039 : hSpMusClas->buf_cor_map_sum_fx[i] = cor_map_sum;
4060 2039 : move16(); /*Q8 */
4061 :
4062 : /* buffer voicing metric */
4063 20390 : FOR( i = 0; i < 9; i++ )
4064 : {
4065 18351 : hSpMusClas->buf_dlp_fx[i] = hSpMusClas->buf_dlp_fx[i + 1];
4066 18351 : move16();
4067 : }
4068 2039 : hSpMusClas->buf_dlp_fx[i] = sub( hSpMusClas->lps_fx, hSpMusClas->lpm_fx );
4069 2039 : move16(); /*Q9 */
4070 :
4071 : /* classification */
4072 2039 : dec = mode_decision_fx( st, len, &hSpMusClas->dec_mov_fx, hSpMusClas->buf_flux_fx, hSpMusClas->buf_epsP_tilt_fx, hSpMusClas->buf_pkh_fx,
4073 2039 : hSpMusClas->buf_cor_map_sum_fx, hSpMusClas->buf_Ntonal_fx, hSpMusClas->buf_Ntonal2_fx, hSpMusClas->buf_Ntonal_lf_fx,
4074 2039 : hSpMusClas->buf_dlp_fx );
4075 2039 : move16();
4076 :
4077 : /* update long term moving average of the classification decisions */
4078 2039 : IF( GT_16( len, 30 ) )
4079 : {
4080 1889 : IF( dec == 0 )
4081 : {
4082 1509 : hSpMusClas->dec_mov_fx = mult_r( 31785, hSpMusClas->dec_mov_fx ); /*Q15 */
4083 1509 : hSpMusClas->dec_mov1_fx = mult_r( 31785, hSpMusClas->dec_mov1_fx ); /*Q15 */
4084 : }
4085 : ELSE
4086 : {
4087 380 : hSpMusClas->dec_mov_fx = add( mult_r( 31785, hSpMusClas->dec_mov_fx ), 983 ); /*Q15 */
4088 380 : hSpMusClas->dec_mov1_fx = add( mult_r( 31785, hSpMusClas->dec_mov1_fx ), 983 ); /*Q15 */
4089 : }
4090 1889 : move16();
4091 1889 : move16();
4092 : }
4093 :
4094 : /* update long term unvoiced counter */
4095 2039 : test();
4096 2039 : test();
4097 2039 : test();
4098 2039 : IF( ( EQ_16( st->coder_type_raw, UNVOICED ) || EQ_16( st->coder_type_raw, INACTIVE ) ) &&
4099 : GT_16( etot, 384 ) && LT_16( hSpMusClas->buf_Ntonal2_fx[59], 2 ) )
4100 : {
4101 70 : hSpMusClas->UV_cnt1 = sub( hSpMusClas->UV_cnt1, 8 );
4102 : }
4103 : ELSE
4104 : {
4105 1969 : hSpMusClas->UV_cnt1 = add( hSpMusClas->UV_cnt1, 1 );
4106 : }
4107 2039 : move16();
4108 :
4109 2039 : hSpMusClas->UV_cnt1 = s_min( hSpMusClas->UV_cnt1, 300 );
4110 2039 : move16();
4111 2039 : hSpMusClas->UV_cnt1 = s_max( hSpMusClas->UV_cnt1, 0 );
4112 2039 : move16();
4113 :
4114 : /**LT_UV_cnt1 = add(mult_r(29491,*LT_UV_cnt1),mult_r(3277,shl(*UV_cnt1,6)));*/ /* Q6 */
4115 2039 : hSpMusClas->LT_UV_cnt1_fx = round_fx( L_mac( L_mult( 29491, hSpMusClas->LT_UV_cnt1_fx ), 3277, shl( hSpMusClas->UV_cnt1, 6 ) ) ); /*Q6 */
4116 2039 : move16();
4117 : /* revert classification decision due to long-term unvoiced counter */
4118 2039 : test();
4119 2039 : test();
4120 2039 : if ( EQ_16( dec, 1 ) && LT_16( hSpMusClas->dec_mov1_fx, 6554 ) && LT_16( hSpMusClas->LT_UV_cnt1_fx, 12800 ) )
4121 : {
4122 0 : dec = 0;
4123 0 : move16();
4124 : }
4125 :
4126 : /* overwrite 1st stage speech/music decision to music */
4127 2039 : if ( EQ_16( dec, 1 ) )
4128 : {
4129 408 : st->sp_aud_decision1 = 1;
4130 408 : move16();
4131 : }
4132 :
4133 2039 : return;
4134 : }
4135 :
4136 :
4137 : /*----------------------------------------------------------------------------------*
4138 : * tonal_context_improv_fx()
4139 : *
4140 : * Context-based improvement of 1st/2nd stage speech/music decision on stable tonal signals
4141 : *----------------------------------------------------------------------------------*/
4142 :
4143 2050 : static void tonal_context_improv_fx(
4144 : Encoder_State *st_fx, /* i/o: Encoder state structure */
4145 : const Word32 PS[], /* i : energy spectrum */
4146 : const Word16 voi_fv, /* i : scaled voicing feature */
4147 : const Word16 cor_map_sum_fv, /* i : scaled correlation map feature */
4148 : const Word16 LPCErr, /* i : scaled LP prediction error feature */
4149 : const Word16 Qx )
4150 : {
4151 : Word16 t2_fx, t3_fx, tL_fx, err_fx, cor_fx, dft_fx;
4152 : Word16 exp, expa, expb, fraca, fracb, scale, exp1, exp2, exp3, tmp;
4153 : Word16 voi_mean, lt_pitch_diff;
4154 : Word32 L_tmp, tonality, tonality1, tonality2, tonality3, sort_max, sort_avg, sort_val[80];
4155 2050 : VAD_HANDLE hVAD = st_fx->hVAD;
4156 2050 : SP_MUS_CLAS_HANDLE hSpMusClas = st_fx->hSpMusClas;
4157 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
4158 2050 : Flag Overflow = 0;
4159 2050 : move16();
4160 : #endif
4161 :
4162 2050 : IF( EQ_16( st_fx->last_codec_mode, MODE2 ) )
4163 : {
4164 260 : set16_fx( hSpMusClas->tonality2_buf_fx, 0, HANG_LEN_INIT );
4165 260 : set16_fx( hSpMusClas->tonality3_buf_fx, 0, HANG_LEN_INIT );
4166 260 : set16_fx( hSpMusClas->LPCErr_buf_fx, 0, HANG_LEN_INIT );
4167 260 : hSpMusClas->lt_music_hangover = 0;
4168 260 : move16();
4169 260 : hSpMusClas->lt_music_state = 0;
4170 260 : move16();
4171 260 : hSpMusClas->lt_speech_state = 0;
4172 260 : move16();
4173 260 : hSpMusClas->lt_speech_hangover = 0;
4174 260 : move16();
4175 : }
4176 :
4177 : /* estimate maximum tonality in bands [0-1 kHz], [1-2kHz] and [2-4kHz] */
4178 2050 : Copy32( PS, sort_val, 80 );
4179 :
4180 : /* tonality in band [0-1 kHz] */
4181 2050 : sort_32_fx( sort_val, 0, 19 );
4182 2050 : sort_max = L_add( sort_val[19], 0 );
4183 2050 : sort_avg = sum32_fx( &sort_val[0], 10 );
4184 :
4185 : /* tonality1 = sort_max / sort_avg; */
4186 2050 : IF( sort_avg )
4187 : {
4188 2050 : expa = norm_l( sort_max );
4189 2050 : fraca = extract_h( L_shl( sort_max, expa ) );
4190 2050 : expa = sub( 30, add( expa, Qx ) );
4191 :
4192 2050 : expb = norm_l( sort_avg );
4193 2050 : fracb = extract_h( L_shl( sort_avg, expb ) );
4194 2050 : expb = sub( 30, add( expb, Qx ) );
4195 :
4196 2050 : scale = shr( sub( fracb, fraca ), 15 );
4197 2050 : fraca = shl( fraca, scale );
4198 2050 : expa = sub( expa, scale );
4199 :
4200 2050 : tmp = div_s( fraca, fracb );
4201 2050 : exp1 = sub( expa, expb );
4202 :
4203 2050 : tonality1 = L_shl_o( tmp, exp1, &Overflow );
4204 : }
4205 : ELSE
4206 : {
4207 0 : tonality1 = L_shl( sort_max, sub( 15, Qx ) ); /*Q15 */
4208 : }
4209 :
4210 : /* tonality in band [1-2 kHz] */
4211 2050 : sort_32_fx( sort_val, 20, 39 );
4212 2050 : sort_max = sort_val[39];
4213 2050 : sort_avg = sum32_fx( &sort_val[20], 10 );
4214 :
4215 2050 : IF( sort_avg )
4216 : {
4217 : /* tonality2 = sort_max / sort_avg; */
4218 2050 : expa = norm_l( sort_max );
4219 2050 : fraca = extract_h( L_shl( sort_max, expa ) );
4220 2050 : expa = sub( 30, add( expa, Qx ) );
4221 :
4222 :
4223 2050 : expb = norm_l( sort_avg );
4224 2050 : fracb = extract_h( L_shl( sort_avg, expb ) );
4225 2050 : expb = sub( 30, add( expb, Qx ) );
4226 :
4227 2050 : scale = shr( sub( fracb, fraca ), 15 );
4228 2050 : fraca = shl( fraca, scale );
4229 2050 : expa = sub( expa, scale );
4230 :
4231 2050 : tmp = div_s( fraca, fracb );
4232 2050 : exp2 = sub( expa, expb );
4233 :
4234 2050 : tonality2 = L_shl_o( tmp, exp2, &Overflow );
4235 : }
4236 : ELSE
4237 : {
4238 0 : tonality2 = L_shl( sort_max, sub( 15, Qx ) ); /*Q15 */
4239 : }
4240 :
4241 : /* tonality in band [2-4 kHz] */
4242 2050 : sort_32_fx( sort_val, 40, 79 );
4243 2050 : sort_max = sort_val[79];
4244 2050 : sort_avg = sum32_fx( &sort_val[40], 20 );
4245 :
4246 2050 : IF( sort_avg )
4247 : {
4248 : /* tonality3 = sort_max / sort_avg; */
4249 2050 : expa = norm_l( sort_max );
4250 2050 : fraca = extract_h( L_shl( sort_max, expa ) );
4251 2050 : expa = sub( 30, add( expa, Qx ) );
4252 :
4253 2050 : expb = norm_l( sort_avg );
4254 2050 : fracb = extract_h( L_shl( sort_avg, expb ) );
4255 2050 : expb = sub( 30, add( expb, Qx ) );
4256 :
4257 2050 : scale = shr( sub( fracb, fraca ), 15 );
4258 2050 : fraca = shl( fraca, scale );
4259 2050 : expa = sub( expa, scale );
4260 :
4261 2050 : tmp = div_s( fraca, fracb );
4262 2050 : exp3 = sub( expa, expb );
4263 :
4264 2050 : tonality3 = L_shl_o( tmp, exp3, &Overflow );
4265 : }
4266 : ELSE
4267 : {
4268 0 : tonality3 = L_shl( sort_max, sub( 15, Qx ) ); /*Q15 */
4269 : }
4270 :
4271 2050 : tonality = L_max( L_max( tonality1, tonality2 ), tonality3 );
4272 :
4273 : /* voi_mean = 0.33f * (st->voicing_fx[0] + voicing[1] + voicing[2]); */
4274 2050 : L_tmp = L_mult( st_fx->voicing_fx[0], 10923 );
4275 2050 : L_tmp = L_mac( L_tmp, st_fx->voicing_fx[1], 10923 );
4276 2050 : voi_mean = mac_r_sat( L_tmp, st_fx->voicing_fx[2], 10923 ); /* Q15 */
4277 2050 : test();
4278 2050 : IF( EQ_16( hVAD->hangover_cnt, 10 ) && EQ_16( st_fx->vad_flag, 1 ) )
4279 : {
4280 : /* long-term voicing parameter */
4281 10 : hSpMusClas->lt_voicing = round_fx( L_mac( L_mult( 3277, hSpMusClas->lt_voicing ), 29491, voi_mean ) );
4282 :
4283 : /* long-term correlation value */
4284 10 : hSpMusClas->lt_corr = round_fx( L_mac( L_mult( 3277, hSpMusClas->lt_corr ), 29491, st_fx->old_corr_fx ) );
4285 :
4286 : /* long-term tonality measure */
4287 10 : hSpMusClas->lt_tonality = L_add( Mult_32_16( hSpMusClas->lt_tonality, 3277 ), Mult_32_16( tonality, 29491 ) );
4288 : }
4289 : ELSE
4290 : {
4291 : /* long-term voicing parameter */
4292 2040 : hSpMusClas->lt_voicing = round_fx( L_mac( L_mult( 22938, hSpMusClas->lt_voicing ), 9830, voi_mean ) );
4293 :
4294 : /* long-term correlation value */
4295 2040 : hSpMusClas->lt_corr = round_fx( L_mac( L_mult( 22938, hSpMusClas->lt_corr ), 9830, st_fx->old_corr_fx ) );
4296 :
4297 : /* long-term tonality measure */
4298 2040 : hSpMusClas->lt_tonality = L_add( Mult_32_16( hSpMusClas->lt_tonality, 16384 ), Mult_32_16( tonality, 16384 ) );
4299 : }
4300 2050 : move16();
4301 2050 : move16();
4302 2050 : move16();
4303 :
4304 : /* Pitch difference w.r.t to past 3 frames */
4305 2050 : lt_pitch_diff = abs_s( sub( hSpMusClas->lt_corr_pitch[0], st_fx->pitch[0] ) );
4306 2050 : lt_pitch_diff = add( lt_pitch_diff, abs_s( sub( hSpMusClas->lt_corr_pitch[1], st_fx->pitch[0] ) ) );
4307 2050 : lt_pitch_diff = add( lt_pitch_diff, abs_s( sub( hSpMusClas->lt_corr_pitch[2], st_fx->pitch[0] ) ) );
4308 :
4309 2050 : hSpMusClas->lt_corr_pitch[0] = hSpMusClas->lt_corr_pitch[1];
4310 2050 : move16();
4311 2050 : hSpMusClas->lt_corr_pitch[1] = hSpMusClas->lt_corr_pitch[2];
4312 2050 : move16();
4313 2050 : hSpMusClas->lt_corr_pitch[2] = st_fx->pitch[0];
4314 2050 : move16();
4315 :
4316 2050 : hSpMusClas->lt_old_mode[0] = hSpMusClas->lt_old_mode[1];
4317 2050 : move16();
4318 2050 : hSpMusClas->lt_old_mode[1] = hSpMusClas->lt_old_mode[2];
4319 2050 : move16();
4320 :
4321 2050 : test();
4322 2050 : test();
4323 2050 : test();
4324 2050 : test();
4325 2050 : test();
4326 2050 : test();
4327 2050 : test();
4328 2050 : test();
4329 2050 : test();
4330 2050 : test();
4331 2050 : test();
4332 2050 : test();
4333 2050 : test();
4334 2050 : IF( st_fx->sp_aud_decision1 == 1 &&
4335 : ( GT_32( L_min( L_min( tonality1, tonality2 ), tonality3 ), 1638400 ) ) &&
4336 : ( GT_32( L_add_sat( tonality1, tonality2 ), 6553600 ) && GT_32( L_add_sat( tonality2, tonality3 ), 6553600 ) && GT_32( L_add_sat( tonality1, tonality3 ), 6553600 ) ) &&
4337 : ( LT_32( hSpMusClas->lt_tonality, 655360000 ) ) &&
4338 : ( ( GT_32( hSpMusClas->lt_tonality, 32768000 ) && GT_16( s_max( hSpMusClas->lt_voicing, voi_mean ), 32440 ) ) ||
4339 : ( GT_32( hSpMusClas->lt_tonality, 49152000 ) && GT_16( hSpMusClas->lt_corr, 32440 ) ) ||
4340 : ( GT_32( hSpMusClas->lt_tonality, 98304000 ) && GT_16( hSpMusClas->lowrate_pitchGain, 15729 ) ) ||
4341 : ( lt_pitch_diff == 0 && GT_16( hSpMusClas->lowrate_pitchGain, 14582 ) ) ) )
4342 : {
4343 18 : IF( LT_16( sum16_fx( hSpMusClas->lt_old_mode, 2 ), 2 ) )
4344 : {
4345 : /* probably speech - change the decision to speech */
4346 0 : st_fx->sp_aud_decision1 = 0;
4347 0 : move16();
4348 0 : st_fx->sp_aud_decision2 = 0;
4349 0 : move16();
4350 :
4351 0 : if ( hSpMusClas->lt_hangover == 0 )
4352 : {
4353 0 : hSpMusClas->lt_hangover = 6;
4354 0 : move16();
4355 : }
4356 : }
4357 : }
4358 : ELSE
4359 : {
4360 : /* not speech, but still in the hangover period - change the decision to speech */
4361 2032 : IF( hSpMusClas->lt_hangover > 0 )
4362 : {
4363 0 : st_fx->sp_aud_decision1 = 0;
4364 0 : move16();
4365 0 : st_fx->sp_aud_decision2 = 0;
4366 0 : move16();
4367 :
4368 0 : hSpMusClas->lt_hangover = sub( hSpMusClas->lt_hangover, 1 );
4369 : }
4370 : }
4371 :
4372 : /* calculate standard deviation of log-tonality */
4373 2050 : Copy( hSpMusClas->tonality2_buf_fx + 1, hSpMusClas->tonality2_buf_fx, HANG_LEN_INIT - 1 );
4374 : /* st->tonality2_buf[HANG_LEN_INIT - 1] = 0.2f*(float)log10(tonality2); */
4375 2050 : exp = norm_l( tonality2 );
4376 2050 : tmp = Log2_norm_lc( L_shl( tonality2, exp ) ); /*15 */
4377 2050 : exp = sub( 30, add( exp, 16 ) );
4378 2050 : L_tmp = Mpy_32_16( exp, tmp, 15783 ); /*19 //3945, 0.2*log10(2), Q18 */
4379 2050 : hSpMusClas->tonality2_buf_fx[HANG_LEN_INIT - 1] = round_fx( L_shl( L_tmp, 11 ) ); /*14 */
4380 2050 : move16();
4381 : /* t2 = std( st->tonality2_buf, HANG_LEN_INIT ); */
4382 2050 : t2_fx = std_fx( hSpMusClas->tonality2_buf_fx, HANG_LEN_INIT ); /*14 */
4383 :
4384 2050 : Copy( hSpMusClas->tonality3_buf_fx + 1, hSpMusClas->tonality3_buf_fx, HANG_LEN_INIT - 1 );
4385 : /* st->tonality3_buf[HANG_LEN_INIT - 1] = 0.2f*(float)log10(tonality3); */
4386 2050 : exp = norm_l( tonality3 );
4387 2050 : tmp = Log2_norm_lc( L_shl( tonality3, exp ) ); /*15 */
4388 2050 : exp = sub( 30, add( exp, 16 ) );
4389 2050 : L_tmp = Mpy_32_16( exp, tmp, 15783 ); /*19 //3945, 0.2*log10(2), Q18 */
4390 2050 : hSpMusClas->tonality3_buf_fx[HANG_LEN_INIT - 1] = round_fx( L_shl( L_tmp, 11 ) ); /*14 */
4391 2050 : t3_fx = std_fx( hSpMusClas->tonality3_buf_fx, HANG_LEN_INIT ); /*14 */
4392 2050 : move16();
4393 :
4394 : /* tL = 0.2f*(float)log10(st->lt_tonality); */
4395 2050 : exp = norm_l( hSpMusClas->lt_tonality );
4396 2050 : tmp = Log2_norm_lc( L_shl( hSpMusClas->lt_tonality, exp ) ); /*15 */
4397 2050 : exp = sub( 30, add( exp, 16 ) );
4398 2050 : L_tmp = Mpy_32_16( exp, tmp, 15783 ); /*19 //3945, 0.2*log10(2), Q18 */
4399 2050 : tL_fx = round_fx( L_shl( L_tmp, 11 ) ); /*14 */
4400 :
4401 : /* calculate standard deviation of residual LP energy */
4402 2050 : Copy( hSpMusClas->LPCErr_buf_fx + 1, hSpMusClas->LPCErr_buf_fx, HANG_LEN_INIT - 1 );
4403 2050 : hSpMusClas->LPCErr_buf_fx[HANG_LEN_INIT - 1] = LPCErr;
4404 2050 : move16();
4405 : /* err = std( st->LPCErr_buf, HANG_LEN_INIT ); */
4406 2050 : err_fx = std_fx( hSpMusClas->LPCErr_buf_fx, HANG_LEN_INIT );
4407 :
4408 2050 : cor_fx = s_max( sub( voi_fv, cor_map_sum_fv ), 0 ); /*15 */
4409 2050 : dft_fx = abs_s( sub( hSpMusClas->tonality2_buf_fx[HANG_LEN_INIT - 1], hSpMusClas->tonality3_buf_fx[HANG_LEN_INIT - 1] ) ); /*14 */
4410 :
4411 :
4412 : /* state machine for strong music */
4413 2050 : test();
4414 2050 : test();
4415 2050 : test();
4416 2050 : test();
4417 2050 : test();
4418 2050 : test();
4419 2050 : test();
4420 2050 : test();
4421 2050 : test();
4422 2050 : test();
4423 2050 : test();
4424 2050 : test();
4425 2050 : IF( ( EQ_16( st_fx->sp_aud_decision1, 1 ) ) && hSpMusClas->lt_music_state == 0 && hSpMusClas->lt_music_hangover == 0 &&
4426 : ( LT_16( t2_fx, 8847 ) ) && ( GT_16( t2_fx, 4260 ) ) && ( GT_16( t3_fx, 3604 ) ) && ( LT_16( tL_fx, 8847 ) ) && ( GT_16( tL_fx, 4260 ) ) && ( GT_16( err_fx, 8192 ) ) )
4427 : {
4428 6 : hSpMusClas->lt_music_state = 1;
4429 6 : move16();
4430 6 : hSpMusClas->lt_music_hangover = 6;
4431 6 : move16();
4432 : }
4433 2044 : ELSE IF( EQ_16( hSpMusClas->lt_music_state, 1 ) && hSpMusClas->lt_music_hangover == 0 &&
4434 : ( LT_16( t2_fx, 5571 ) ) && ( LT_16( t3_fx, 4260 ) ) && ( LT_16( tL_fx, 7373 ) ) )
4435 : {
4436 6 : hSpMusClas->lt_music_state = 0;
4437 6 : move16();
4438 6 : hSpMusClas->lt_music_hangover = 6;
4439 6 : move16();
4440 : }
4441 :
4442 2050 : IF( hSpMusClas->lt_music_hangover > 0 )
4443 : {
4444 72 : hSpMusClas->lt_music_hangover = sub( hSpMusClas->lt_music_hangover, 1 );
4445 72 : move16();
4446 : }
4447 :
4448 : /* state machine for strong speech */
4449 2050 : test();
4450 2050 : test();
4451 2050 : test();
4452 2050 : test();
4453 2050 : test();
4454 2050 : test();
4455 2050 : test();
4456 2050 : test();
4457 2050 : test();
4458 2050 : test();
4459 2050 : test();
4460 2050 : test();
4461 2050 : test();
4462 2050 : IF( ( EQ_16( st_fx->sp_aud_decision1, 1 ) ) && hSpMusClas->lt_speech_state == 0 && hSpMusClas->lt_speech_hangover == 0 &&
4463 : ( GT_16( cor_fx, 13107 ) ) && ( LT_16( dft_fx, 1638 ) ) && GT_16( shr( voi_fv, 1 ), add( cor_map_sum_fv, 1966 ) ) &&
4464 : ( LT_16( t2_fx, shr( cor_fx, 1 ) ) ) && ( LT_16( t3_fx, shr( cor_fx, 1 ) ) ) && ( LT_16( tL_fx, shr( cor_fx, 1 ) ) ) &&
4465 : ( LT_16( cor_map_sum_fv, cor_fx ) ) && ( GT_16( voi_fv, cor_fx ) ) && ( GT_16( voi_fv, 24903 ) ) )
4466 : {
4467 6 : hSpMusClas->lt_speech_state = 1;
4468 6 : move16();
4469 6 : hSpMusClas->lt_speech_hangover = 6;
4470 6 : move16();
4471 : }
4472 2044 : ELSE IF( ( EQ_16( hSpMusClas->lt_speech_state, 1 ) ) && hSpMusClas->lt_speech_hangover == 0 && ( LT_16( cor_fx, 13107 ) ) )
4473 : {
4474 6 : hSpMusClas->lt_speech_state = 0;
4475 6 : move16();
4476 6 : hSpMusClas->lt_speech_hangover = 6;
4477 6 : move16();
4478 : }
4479 :
4480 2050 : IF( hSpMusClas->lt_speech_hangover > 0 )
4481 : {
4482 60 : hSpMusClas->lt_speech_hangover = sub( hSpMusClas->lt_speech_hangover, 1 );
4483 60 : move16();
4484 : }
4485 :
4486 : /* final decision */
4487 2050 : test();
4488 2050 : test();
4489 2050 : IF( EQ_16( st_fx->sp_aud_decision1, 1 ) && EQ_16( hSpMusClas->lt_speech_state, 1 ) )
4490 : {
4491 : /* strong speech - probably error in speech/music classification */
4492 39 : st_fx->sp_aud_decision1 = 0;
4493 39 : move16();
4494 39 : st_fx->sp_aud_decision2 = 0;
4495 39 : move16();
4496 : }
4497 2011 : ELSE IF( st_fx->sp_aud_decision1 == 0 && EQ_16( hSpMusClas->lt_speech_state, 1 ) )
4498 : {
4499 : /* strong music - probably error in speech/music classification */
4500 2 : st_fx->sp_aud_decision1 = 0;
4501 2 : move16();
4502 2 : st_fx->sp_aud_decision2 = 0;
4503 2 : move16();
4504 : }
4505 :
4506 : /* update the buffer of past decisions */
4507 2050 : hSpMusClas->lt_old_mode[2] = st_fx->sp_aud_decision1;
4508 2050 : move16();
4509 :
4510 2050 : return;
4511 : }
4512 :
4513 : /*----------------------------------------------------------------------------------*
4514 : * detect_sparseness_fx()
4515 : *
4516 : *
4517 : *----------------------------------------------------------------------------------*/
4518 1039 : static void detect_sparseness_fx(
4519 : Encoder_State *st_fx, /* i/o: encoder state structure */
4520 : const Word16 localVAD_HE_SAD, /* i : HE-SAD flag without hangover */
4521 : const Word16 voi_fv /* i : scaled voicing feature */
4522 : )
4523 : {
4524 : Word16 sum, sumh;
4525 : Word32 L_tmp, L_tmp1;
4526 : Word16 tmp, tmp1;
4527 : Word16 S1[128];
4528 : Word16 i, j;
4529 1039 : Word16 hb_sp_high_flag = 0;
4530 1039 : move16();
4531 1039 : Word16 lb_sp_high_flag = 0;
4532 1039 : move16();
4533 : Word16 sparse;
4534 : Word16 tmp_buf[4];
4535 1039 : Word16 Mlpe = 0, Mv = 0, Msp;
4536 1039 : move16();
4537 1039 : move16();
4538 1039 : SP_MUS_CLAS_HANDLE hSpMusClas = st_fx->hSpMusClas;
4539 :
4540 1039 : Copy( st_fx->lgBin_E_fx, S1, 128 );
4541 :
4542 1039 : L_tmp = L_deposit_l( 0 );
4543 84159 : FOR( i = 0; i < 80; i++ )
4544 : {
4545 83120 : if ( S1[i] < 0 )
4546 : {
4547 17210 : S1[i] = 0;
4548 17210 : move16(); /* Q7 */
4549 : }
4550 83120 : L_tmp = L_add( L_tmp, L_deposit_l( S1[i] ) );
4551 : }
4552 :
4553 1039 : L_tmp1 = L_deposit_l( 0 );
4554 50911 : FOR( i = 80; i < 128; i++ )
4555 : {
4556 49872 : if ( S1[i] < 0 )
4557 : {
4558 12880 : S1[i] = 0;
4559 12880 : move16();
4560 : }
4561 49872 : L_tmp1 = L_add( L_tmp1, L_deposit_l( S1[i] ) );
4562 : }
4563 :
4564 1039 : sumh = extract_l( L_shr( L_tmp1, 7 ) ); /* Q0 */
4565 1039 : sum = add( extract_l( L_shr( L_tmp, 7 ) ), sumh ); /* Q0 */
4566 :
4567 : /* order spectral from max to min */
4568 1039 : order_spectrum_fx( S1, 128 );
4569 :
4570 : /* calculate spectral sparseness in the range 0 - 6.4 kHz */
4571 1039 : j = 0;
4572 1039 : move16();
4573 1039 : L_tmp = 0;
4574 1039 : move16();
4575 1039 : L_tmp1 = L_deposit_l( mult( sum, 24576 ) );
4576 55749 : FOR( i = 0; i < 128; i++ )
4577 : {
4578 55743 : L_tmp = L_add( L_tmp, L_deposit_l( S1[i] ) );
4579 55743 : IF( GT_32( L_shr( L_tmp, 7 ), L_tmp1 ) )
4580 : {
4581 1033 : j = i;
4582 1033 : move16();
4583 1033 : BREAK;
4584 : }
4585 : }
4586 :
4587 8312 : FOR( i = 0; i < HANG_LEN_INIT - 1; i++ )
4588 : {
4589 7273 : hSpMusClas->sparse_buf_fx[i] = hSpMusClas->sparse_buf_fx[i + 1];
4590 7273 : move16();
4591 : }
4592 :
4593 1039 : sparse = j;
4594 1039 : move16();
4595 1039 : hSpMusClas->sparse_buf_fx[i] = sparse;
4596 1039 : move16();
4597 :
4598 1039 : IF( EQ_16( st_fx->bwidth, WB ) )
4599 : {
4600 0 : Msp = 0;
4601 0 : move16();
4602 0 : FOR( i = 0; i < 8; i++ )
4603 : {
4604 0 : Msp = add( Msp, hSpMusClas->sparse_buf_fx[i] );
4605 : }
4606 0 : Msp = shl( Msp, 5 ); /* Q8 */
4607 :
4608 : /* find long-term smoothed sparseness */
4609 0 : IF( st_fx->last_vad_spa_fx == 0 )
4610 : {
4611 0 : set16_fx( &hSpMusClas->sparse_buf_fx[0], sparse, HANG_LEN_INIT - 1 );
4612 0 : hSpMusClas->LT_sparse_fx = sparse;
4613 0 : move16();
4614 : }
4615 : ELSE
4616 : {
4617 0 : set16_fx( tmp_buf, 0, 4 );
4618 :
4619 0 : FOR( i = 0; i < HANG_LEN_INIT; i++ )
4620 : {
4621 0 : FOR( j = 0; j < 4; j++ )
4622 : {
4623 0 : IF( GT_16( hSpMusClas->sparse_buf_fx[i], tmp_buf[j] ) )
4624 : {
4625 0 : Copy( &tmp_buf[j], &tmp_buf[j + 1], sub( 3, j ) );
4626 0 : tmp_buf[j] = hSpMusClas->sparse_buf_fx[i];
4627 0 : move16();
4628 0 : BREAK;
4629 : }
4630 : }
4631 : }
4632 :
4633 : /* ftmp = 0.25f*(HANG_LEN_INIT*Msp - sum_f(tmp_buf, 4)) - st->LT_sparse; */
4634 0 : tmp = shl( sum16_fx( tmp_buf, 4 ), 5 );
4635 0 : tmp = shl( sub( Msp, tmp ), 1 );
4636 0 : tmp = sub( tmp, hSpMusClas->LT_sparse_fx );
4637 :
4638 0 : hSpMusClas->LT_sparse_fx = add( hSpMusClas->LT_sparse_fx, shr( tmp, 2 ) ); /* Q8 */
4639 : }
4640 :
4641 : /* find high-band sparseness */
4642 0 : Copy( st_fx->lgBin_E_fx + 80, S1, 48 );
4643 :
4644 0 : order_spectrum_fx( S1, 48 );
4645 :
4646 0 : FOR( i = 0; i < HANG_LEN_INIT - 1; i++ )
4647 : {
4648 0 : hSpMusClas->hf_spar_buf_fx[i] = hSpMusClas->hf_spar_buf_fx[i + 1];
4649 0 : move16();
4650 : }
4651 :
4652 : /* st_fx->hf_spar_buf_fx[i] = sum_f(S1, 5)/sumh; */
4653 0 : L_tmp = L_deposit_l( 0 );
4654 0 : FOR( i = 0; i < 5; i++ )
4655 : {
4656 0 : if ( S1[i] < 0 )
4657 : {
4658 0 : S1[i] = 0;
4659 0 : move16();
4660 : }
4661 :
4662 0 : L_tmp = L_add( L_tmp, S1[i] );
4663 : }
4664 :
4665 0 : tmp = extract_l( L_shr( L_tmp, 7 ) );
4666 0 : IF( tmp == 0 )
4667 : {
4668 0 : hSpMusClas->hf_spar_buf_fx[HANG_LEN_INIT - 1] = 0;
4669 : }
4670 : ELSE
4671 : {
4672 0 : hSpMusClas->hf_spar_buf_fx[HANG_LEN_INIT - 1] = div_s( tmp, sumh );
4673 : }
4674 0 : move16();
4675 :
4676 0 : tmp = 0;
4677 0 : move16();
4678 0 : FOR( i = 0; i < 8; i++ )
4679 : {
4680 0 : tmp = add( tmp, shr( hSpMusClas->hf_spar_buf_fx[i], 3 ) );
4681 : }
4682 0 : IF( GT_16( tmp, 6554 ) )
4683 : {
4684 0 : hb_sp_high_flag = 1;
4685 0 : move16();
4686 : }
4687 :
4688 : /* find low-band sparseness */
4689 0 : Copy( st_fx->lgBin_E_fx, S1, 60 );
4690 :
4691 0 : order_spectrum_fx( S1, 60 );
4692 0 : L_tmp = L_deposit_l( 0 );
4693 0 : L_tmp1 = L_deposit_l( 0 );
4694 0 : FOR( i = 0; i < 5; i++ )
4695 : {
4696 0 : if ( S1[i] < 0 )
4697 : {
4698 0 : S1[i] = 0;
4699 0 : move16();
4700 : }
4701 :
4702 0 : L_tmp = L_add( L_tmp, S1[i] );
4703 : }
4704 :
4705 0 : FOR( ; i < 60; i++ )
4706 : {
4707 0 : if ( S1[i] < 0 )
4708 : {
4709 0 : S1[i] = 0;
4710 0 : move16();
4711 : }
4712 :
4713 0 : L_tmp1 = L_add( L_tmp1, S1[i] );
4714 : }
4715 :
4716 : /* if ( sum_f(S1, 5)/sum_f(S1,60) > 0.18f ) */
4717 0 : tmp = extract_l( L_shr( L_tmp, 7 ) );
4718 0 : IF( tmp != 0 )
4719 : {
4720 0 : tmp = div_s( tmp, add( tmp, extract_l( L_shr( L_tmp1, 7 ) ) ) );
4721 0 : if ( GT_16( tmp, 5898 ) )
4722 : {
4723 0 : lb_sp_high_flag = 1;
4724 0 : move16();
4725 : }
4726 : }
4727 :
4728 : /* find smoothed linear prediction efficiency */
4729 0 : FOR( i = 0; i < 7; i++ )
4730 : {
4731 0 : hSpMusClas->lpe_buf_fx[i] = hSpMusClas->lpe_buf_fx[i + 1];
4732 0 : move16();
4733 : }
4734 :
4735 0 : hSpMusClas->lpe_buf_fx[i] = hSpMusClas->past_epsP2_fx;
4736 0 : move16();
4737 0 : Mlpe = 0;
4738 0 : move16();
4739 0 : FOR( i = 0; i < 8; i++ )
4740 : {
4741 0 : Mlpe = add( Mlpe, shr( hSpMusClas->lpe_buf_fx[i], 3 ) );
4742 : }
4743 :
4744 : /* find smoothed voicing */
4745 0 : FOR( i = 0; i < HANG_LEN_INIT - 1; i++ )
4746 : {
4747 0 : hSpMusClas->voicing_buf_fx[i] = hSpMusClas->voicing_buf_fx[i + 1];
4748 0 : move16();
4749 : }
4750 :
4751 0 : hSpMusClas->voicing_buf_fx[i] = voi_fv;
4752 0 : move16();
4753 0 : Mv = 0;
4754 0 : move16();
4755 0 : FOR( i = 0; i < 8; i++ )
4756 : {
4757 0 : Mv = add( Mv, shr( hSpMusClas->voicing_buf_fx[i], 3 ) );
4758 : }
4759 : }
4760 :
4761 : /* avoid using LR-MDCT on sparse spectra */
4762 1039 : IF( EQ_16( st_fx->sp_aud_decision1, 1 ) )
4763 : {
4764 295 : tmp = 91;
4765 295 : move16();
4766 295 : if ( EQ_16( st_fx->bwidth, WB ) )
4767 : {
4768 0 : tmp = 90;
4769 0 : move16();
4770 : }
4771 :
4772 295 : IF( GT_16( sparse, tmp ) )
4773 : {
4774 0 : st_fx->sp_aud_decision1 = 0;
4775 0 : move16();
4776 0 : st_fx->sp_aud_decision2 = 1;
4777 0 : move16();
4778 0 : hSpMusClas->gsc_hangover = 1;
4779 0 : move16();
4780 : }
4781 295 : ELSE IF( EQ_16( hSpMusClas->gsc_hangover, 1 ) )
4782 : {
4783 0 : IF( GT_16( sparse, 85 ) )
4784 : {
4785 0 : st_fx->sp_aud_decision1 = 0;
4786 0 : move16();
4787 0 : st_fx->sp_aud_decision2 = 1;
4788 0 : move16();
4789 : }
4790 : ELSE
4791 : {
4792 0 : tmp = 0;
4793 0 : move16();
4794 0 : FOR( i = 0; i < hSpMusClas->gsc_cnt; i++ )
4795 : {
4796 0 : tmp = add( tmp, hSpMusClas->sparse_buf_fx[HANG_LEN_INIT - 1 - hSpMusClas->gsc_cnt + i] );
4797 : }
4798 0 : tmp1 = div_s( 1, hSpMusClas->gsc_cnt );
4799 0 : tmp = mult( tmp, tmp1 );
4800 :
4801 0 : IF( LT_16( abs_s( sub( sparse, tmp ) ), 7 ) )
4802 : {
4803 0 : st_fx->sp_aud_decision1 = 0;
4804 0 : move16();
4805 0 : st_fx->sp_aud_decision2 = 1;
4806 0 : move16();
4807 : }
4808 : }
4809 : }
4810 :
4811 295 : IF( EQ_16( st_fx->bwidth, WB ) )
4812 : {
4813 0 : test();
4814 0 : test();
4815 0 : test();
4816 0 : test();
4817 0 : test();
4818 0 : test();
4819 0 : test();
4820 0 : test();
4821 0 : test();
4822 0 : IF( GT_16( hSpMusClas->LT_sparse_fx, 15360 ) && GT_16( sparse, 50 ) && LT_16( Mlpe, -1331 ) && GT_16( Mv, 27853 ) &&
4823 : lb_sp_high_flag == 0 && ( ( hb_sp_high_flag == 0 && GT_16( sumh, mult_r( 4915, sum ) ) ) || LE_16( sumh, mult_r( 4915, sum ) ) ) )
4824 : {
4825 0 : st_fx->sp_aud_decision1 = 0;
4826 0 : move16();
4827 0 : st_fx->sp_aud_decision2 = 1;
4828 0 : move16();
4829 0 : hSpMusClas->gsc_hangover = 1;
4830 0 : move16();
4831 : }
4832 0 : ELSE IF( EQ_16( hSpMusClas->gsc_hangover, 1 ) && !( st_fx->sp_aud_decision1 == 0 && EQ_16( st_fx->sp_aud_decision2, 1 ) ) )
4833 : {
4834 0 : IF( LT_16( abs_s( sub( sparse, mean_fx( &hSpMusClas->sparse_buf_fx[HANG_LEN_INIT - 1 - hSpMusClas->gsc_cnt], hSpMusClas->gsc_cnt ) ) ), 7 ) )
4835 : {
4836 0 : st_fx->sp_aud_decision1 = 0;
4837 0 : move16();
4838 0 : st_fx->sp_aud_decision2 = 1;
4839 0 : move16();
4840 : }
4841 : }
4842 : }
4843 : }
4844 :
4845 : /* update the counter of consecutive GSC frames with sparse spectrum */
4846 1039 : test();
4847 1039 : IF( st_fx->sp_aud_decision1 == 0 && EQ_16( st_fx->sp_aud_decision2, 1 ) )
4848 : {
4849 0 : hSpMusClas->gsc_cnt = add( hSpMusClas->gsc_cnt, 1 );
4850 0 : IF( GT_16( hSpMusClas->gsc_cnt, 7 ) )
4851 : {
4852 0 : hSpMusClas->gsc_cnt = 7;
4853 0 : move16();
4854 : }
4855 : }
4856 : ELSE
4857 : {
4858 1039 : hSpMusClas->gsc_cnt = 0;
4859 1039 : move16();
4860 1039 : hSpMusClas->gsc_hangover = 0;
4861 1039 : move16();
4862 : }
4863 :
4864 1039 : st_fx->last_vad_spa_fx = localVAD_HE_SAD;
4865 1039 : move16();
4866 :
4867 1039 : return;
4868 : }
4869 :
4870 : /*---------------------------------------------------------------------*
4871 : * order_spectrum()
4872 : *
4873 : *
4874 : *---------------------------------------------------------------------*/
4875 1039 : static void order_spectrum_fx(
4876 : Word16 *vec,
4877 : Word16 len )
4878 : {
4879 : Word16 i, j, end, end_1, len_2, tmp;
4880 : Word16 smax, smin;
4881 : Word16 imax, imin;
4882 :
4883 1039 : len_2 = shr( len, 1 );
4884 67535 : FOR( i = 0; i < len_2; i++ )
4885 : {
4886 66496 : imax = i;
4887 66496 : move16();
4888 66496 : imin = i;
4889 66496 : move16();
4890 66496 : smax = vec[i];
4891 66496 : move16();
4892 66496 : smin = vec[i];
4893 66496 : move16();
4894 66496 : end = sub( len, i );
4895 4388736 : FOR( j = i; j < end; j++ )
4896 : {
4897 4322240 : IF( GT_16( vec[j], smax ) )
4898 : {
4899 178543 : smax = vec[j];
4900 178543 : move16();
4901 178543 : imax = j;
4902 178543 : move16();
4903 : }
4904 : ELSE
4905 : {
4906 4143697 : IF( LT_16( vec[j], smin ) )
4907 : {
4908 249971 : smin = vec[j];
4909 249971 : move16();
4910 249971 : imin = j;
4911 249971 : move16();
4912 : }
4913 : }
4914 : }
4915 :
4916 66496 : tmp = vec[i];
4917 66496 : move16();
4918 66496 : vec[i] = smax;
4919 66496 : move16();
4920 66496 : vec[imax] = tmp;
4921 66496 : move16();
4922 :
4923 66496 : IF( EQ_16( imin, i ) )
4924 : {
4925 11566 : imin = imax;
4926 11566 : move16();
4927 : }
4928 :
4929 66496 : end_1 = sub( end, 1 );
4930 66496 : tmp = vec[end_1];
4931 66496 : move16();
4932 66496 : vec[end_1] = smin;
4933 66496 : move16();
4934 66496 : vec[imin] = tmp;
4935 66496 : move16();
4936 : }
4937 1039 : }
|