Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include "options.h"
35 : #include "cnst.h"
36 : #include "ivas_cnst.h"
37 : #include "rom_enc.h"
38 : #include "rom_com.h"
39 : #include "prot_fx.h"
40 : #include "prot_fx_enc.h"
41 : #include <assert.h>
42 : #include "wmc_auto.h"
43 : #include <math.h>
44 : #include "prot_fx_enc.h"
45 : #include "ivas_prot_fx.h"
46 :
47 : /*-----------------------------------------------------------------------------------------*
48 : * Function front_vad()
49 : *
50 : * Standalone front-VAD module
51 : *-----------------------------------------------------------------------------------------*/
52 428045 : ivas_error front_vad_fx(
53 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure, nullable */
54 : Encoder_State *st, /* i/o: encoder state structure */
55 : const ENCODER_CONFIG_HANDLE hEncoderConfig, /* i : configuration structure */
56 : FRONT_VAD_ENC_HANDLE *hFrontVads, /* i/o: FrontVad handles */
57 : const Word16 MCT_flag, /* i : hMCT handle allocated (1) or not (0) Q0*/
58 : const Word16 input_frame, /* i : frame length Q0*/
59 : Word16 vad_flag_dtx[], /* o : HE-SAD flag with additional DTX HO Q0*/
60 : Word32 fr_bands_fx[][2 * NB_BANDS], /* o : energy in frequency bands q_fr_bands_fx*/
61 : Word16 q_fr_bands[], /* o : Q of fr_bands_fx */
62 : Word16 Etot_LR_fx[], /* o : total energy Left & Right channel Q8*/
63 : Word32 lf_E_fx[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels q_lf_E*/
64 : Word16 q_lf_E[], /* o : Q of lf_E_fx */
65 : Word16 localVAD_HE_SAD[], /* o : HE-SAD flag without hangover, LR channels Q0*/
66 : Word16 vad_hover_flag[], /* o : VAD hangover flag Q0*/
67 : Word32 band_energies_LR_fx[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN q_band_energies_LR */
68 : Word16 *q_band_energies_LR, /* o : Q of band_energies_LR_fx */
69 : Word32 *PS_out_fx, /* o : energy spectrum q_PS_out*/
70 : Word16 *q_PS_out, /* o : Q of PS_out_fx Q0*/
71 : Word16 *Bin_E_out_fx, /* o : log-energy spectrum of the current frame Q7*/
72 : Word16 Q_inp,
73 : Word16 *Q_buffer,
74 : Word16 Q_add,
75 : Word16 *front_create_flag )
76 : {
77 : ENC_CORE_HANDLE *sts;
78 : Word32 band_energies_fx[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */
79 : Word32 PS_fx[128];
80 : Word16 *lgBin_E_fx;
81 : Word16 snr_sum_he_fx;
82 : // Word32 fr_bands_cp_fx[2 * NB_BANDS] = { 0 };
83 428045 : Word32 Bin_E_fx[L_FFT] = { 0 }; /* per bin log energy spectrum for mid-frame */
84 428045 : Word32 Bin_E_old_fx[L_FFT / 2] = { 0 }; /* old per bin log energy spectrum for mid-frame */
85 : Word16 fft_buffLR_fx[2 * L_FFT]; /* fft buffer */
86 : // Word16 front_create_flag = 0;
87 : Word16 n, n_chan, dummy;
88 : Word16 dummy_short;
89 : Word16 element_mode, last_element_mode;
90 : ivas_error error;
91 : Word16 Q_new;
92 : Word16 Qband, mem_decim_size;
93 : Word32 Etot_LR_32fx;
94 428045 : error = IVAS_ERR_OK;
95 428045 : push_wmops( "front_vad" );
96 428045 : move16();
97 428045 : move16();
98 :
99 428045 : Q_new = 0;
100 428045 : move16();
101 :
102 428045 : lgBin_E_fx = NULL;
103 428045 : if ( st != NULL )
104 : {
105 17790 : lgBin_E_fx = &st->lgBin_E_fx[0]; /* Q8 */
106 17790 : move16();
107 : }
108 428045 : IF( hCPE != NULL )
109 : {
110 410255 : n_chan = CPE_CHANNELS;
111 410255 : move16();
112 410255 : sts = &hCPE->hCoreCoder[0];
113 410255 : element_mode = hCPE->element_mode; /* Q0 */
114 410255 : move16();
115 410255 : last_element_mode = hCPE->last_element_mode; /* Q0 */
116 410255 : move16();
117 : }
118 17790 : ELSE IF( st != NULL )
119 : {
120 17790 : n_chan = 1;
121 17790 : move16();
122 17790 : sts = &st;
123 17790 : element_mode = IVAS_SCE;
124 17790 : move16();
125 17790 : last_element_mode = IVAS_SCE;
126 17790 : move16();
127 : }
128 : ELSE
129 : {
130 0 : assert( 0 && "Either CPE or SCE must be given!" );
131 : return IVAS_ERR_INTERNAL_FATAL;
132 : }
133 :
134 1266345 : FOR( n = 0; n < n_chan; n++ )
135 : {
136 838300 : localVAD_HE_SAD[n] = 0;
137 838300 : move16();
138 838300 : vad_hover_flag[n] = 0;
139 838300 : move16();
140 838300 : vad_flag_dtx[n] = 1;
141 838300 : move16();
142 : }
143 :
144 : /*------------------------------------------------------------------*
145 : * Allocate/deallocate hFrontVad handles in case of element_mode change
146 : *-----------------------------------------------------------------*/
147 428045 : test();
148 428045 : test();
149 428045 : IF( sts[0]->ini_frame > 0 && MCT_flag == 0 && NE_16( last_element_mode, element_mode ) )
150 : {
151 1739 : IF( EQ_16( element_mode, IVAS_CPE_MDCT ) )
152 : {
153 749 : IF( hFrontVads[0] != NULL )
154 : {
155 189 : FOR( n = 0; n < n_chan; n++ )
156 : {
157 126 : front_vad_destroy_fx( &hFrontVads[n] );
158 126 : hFrontVads[n] = NULL;
159 : }
160 : }
161 : }
162 : ELSE
163 : {
164 990 : test();
165 990 : IF( sts[0]->Opt_DTX_ON && hFrontVads[0] == NULL )
166 : {
167 180 : for ( n = 0; n < n_chan; n++ )
168 : {
169 120 : *front_create_flag = 1;
170 120 : move16();
171 120 : IF( NE_32( ( error = front_vad_create_fx( &hFrontVads[n], hEncoderConfig ) ), IVAS_ERR_OK ) )
172 : {
173 0 : return error;
174 : }
175 : }
176 : }
177 : }
178 : }
179 428045 : test();
180 : /* Only run VAD if DTX is on and TD stereo or unified stereo is selected */
181 428045 : IF( hFrontVads[0] != NULL && NE_16( element_mode, IVAS_CPE_MDCT ) )
182 : {
183 :
184 : #ifdef DEBUGGING
185 : /* If stereo switching is not enabled and TD is selected restore element_mode to TD every frame before the VAD */
186 : if ( hCPE != NULL && hCPE->stereo_mode_cmdl == IVAS_CPE_TD )
187 : {
188 : hCPE->element_mode = IVAS_CPE_TD;
189 : }
190 : #endif
191 :
192 : /*------------------------------------------------------------------*
193 : * VAD
194 : *-----------------------------------------------------------------*/
195 49758 : set_val_Word32( band_energies_LR_fx, 0, 2 * NB_BANDS );
196 49758 : Qband = -1;
197 49758 : move16();
198 :
199 49758 : IF( *front_create_flag )
200 : {
201 60 : Word16 scale_inp = Q15;
202 60 : Q_add = 0;
203 60 : move16();
204 180 : FOR( n = 0; n < n_chan; n++ )
205 : {
206 :
207 120 : scale_inp = s_min( sts[n]->q_inp, hCPE->hFrontVad[n]->q_mem_decim );
208 120 : Scale_sig( sts[n]->input_fx, input_frame, sub( scale_inp, sts[n]->q_inp ) );
209 120 : Scale_sig( hCPE->hFrontVad[n]->mem_decim_fx, 2 * L_FILT_MAX, sub( scale_inp, hCPE->hFrontVad[n]->q_mem_decim ) );
210 120 : sts[n]->q_inp = scale_inp;
211 120 : hCPE->hFrontVad[n]->q_mem_decim = scale_inp;
212 120 : Q_inp = scale_inp;
213 120 : move16();
214 120 : Q_buffer[n] = hCPE->hFrontVad[n]->q_buffer_12k8;
215 120 : move16();
216 : }
217 : }
218 :
219 : Word16 q_Bin_E, q_Bin_E_old;
220 : Word16 q_band_energies;
221 : Word16 q_fft_buffLR;
222 131484 : FOR( n = 0; n < n_chan; n++ )
223 : {
224 : FRONT_VAD_ENC_HANDLE hFrontVad;
225 81726 : hFrontVad = hFrontVads[n];
226 :
227 : /* Move previous frame 12k8 signal */
228 :
229 81726 : MVR2R_WORD16( hFrontVad->buffer_12k8_fx + L_FFT, hFrontVad->buffer_12k8_fx, L_FFT / 2 );
230 :
231 : /* Resample to 12k8 */
232 81726 : modify_Fs_ivas_fx( sts[n]->input_fx, input_frame, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2, INT_FS_12k8, hFrontVad->mem_decim_fx, ( sts[0]->max_bwidth == NB ), &Qband, &mem_decim_size );
233 :
234 : /* Preemphasis */
235 81726 : hFrontVad->mem_preemph_fx = shl( hFrontVad->mem_preemph_fx, sub( add( Q_inp, Qband ), hFrontVad->q_mem_preemph_fx ) ); /* Q_inp + Qband */
236 81726 : move16();
237 81726 : hFrontVad->q_mem_preemph_fx = add( Q_inp, Qband );
238 81726 : move16();
239 :
240 81726 : PREEMPH_FX( hFrontVad->buffer_12k8_fx + L_FFT / 2, PREEMPH_FAC, L_FRAME, &hFrontVad->mem_preemph_fx );
241 :
242 81726 : Q_new = s_min( add( add( Q_inp, Qband ), Q_add ), Q_buffer[n] );
243 81726 : scale_sig( hFrontVad->buffer_12k8_fx, L_FFT / 2, sub( Q_new, Q_buffer[n] ) ); /* Q_new */
244 81726 : scale_sig( hFrontVad->buffer_12k8_fx + L_FFT / 2, 3 * L_FRAME / 2 - L_FFT / 2, sub( Q_new, add( Q_inp, Qband ) ) ); /* Q_new */
245 :
246 81726 : Q_buffer[n] = Q_new;
247 81726 : move16();
248 :
249 81726 : ivas_analy_sp_fx( IVAS_CPE_TD, hCPE, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2 - 3 * ( L_SUBFR / 2 ), Q_new, fr_bands_fx[n],
250 81726 : &q_fr_bands[n], lf_E_fx[n], &q_lf_E[n], &Etot_LR_32fx, sts[0]->min_band, sts[0]->max_band, Bin_E_fx, &q_Bin_E, Bin_E_old_fx,
251 : &q_Bin_E_old, PS_fx, q_PS_out, lgBin_E_fx, band_energies_fx, &q_band_energies, fft_buffLR_fx, &q_fft_buffLR );
252 81726 : if ( n == 0 )
253 : {
254 49758 : *q_band_energies_LR = q_band_energies;
255 49758 : move16();
256 : }
257 :
258 : /* v_add( &band_energies[0], &band_energies_LR[0], &band_energies_LR[0], 2 * NB_BANDS ); */
259 81726 : IF( EQ_16( n, 1 ) )
260 : {
261 31968 : IF( LT_16( *q_band_energies_LR, q_band_energies ) )
262 : {
263 7718 : scale_sig32( band_energies_fx, 2 * NB_BANDS, sub( *q_band_energies_LR, q_band_energies ) ); // q_band_energies_LR
264 : }
265 : ELSE
266 : {
267 24250 : scale_sig32( band_energies_LR_fx, 2 * NB_BANDS, sub( q_band_energies, *q_band_energies_LR ) ); // q_band_energies_LR
268 24250 : *q_band_energies_LR = q_band_energies;
269 24250 : move16();
270 : }
271 : }
272 :
273 81726 : v_add_fx( &band_energies_fx[0], &band_energies_LR_fx[0], &band_energies_LR_fx[0], 2 * NB_BANDS ); // q_band_energies_LR
274 :
275 : /* add up energies for later calculating average of channel energies */
276 :
277 81726 : noise_est_pre_32fx( Etot_LR_32fx, hFrontVads[0]->ini_frame, hFrontVad->hNoiseEst, 0, 0, 0 );
278 :
279 81726 : Etot_LR_fx[n] = extract_h( Etot_LR_32fx );
280 81726 : move16();
281 :
282 : /* wb_vad */
283 163452 : hFrontVad->hVAD->vad_flag = wb_vad_ivas_fx( sts[n], fr_bands_fx[n], q_fr_bands[n], &dummy, &dummy, &dummy, &snr_sum_he_fx,
284 81726 : &localVAD_HE_SAD[n], &dummy_short, hFrontVad->hVAD, hFrontVad->hNoiseEst,
285 81726 : hFrontVad->lp_speech_fx, hFrontVad->lp_noise_fx ); // Q0
286 :
287 81726 : test();
288 81726 : test();
289 81726 : if ( n == 0 && GT_16( n_chan, 1 ) && EQ_16( last_element_mode, IVAS_CPE_DFT ) )
290 : {
291 30242 : sts[1]->last_coder_type = sts[0]->last_coder_type; /* Q0 */
292 30242 : move16();
293 : }
294 81726 : vad_flag_dtx[n] = ivas_dtx_hangover_addition_fx( sts[n], hFrontVad->hVAD->vad_flag, sub( hFrontVad->lp_speech_fx, hFrontVad->lp_noise_fx ), 0 /* <- no cldfb addition */, &vad_hover_flag[n], hFrontVad->hVAD, hFrontVad->hNoiseEst, &hFrontVads[n]->rem_dtx_ho ); /* Q0 */
295 81726 : move16();
296 :
297 81726 : if ( EQ_16( n_chan, 1 ) )
298 : {
299 17790 : sts[n]->vad_flag = hFrontVad->hVAD->vad_flag; /* Q0 */
300 17790 : move16();
301 : }
302 : }
303 :
304 49758 : IF( EQ_16( n_chan, CPE_CHANNELS ) )
305 : {
306 : /* get average channel energies, adding up was already done, so only need to scale by number of channels */
307 31968 : v_multc_fixed( &band_energies_LR_fx[0], ONE_IN_Q30 /* 0.5f in Q31 */, &band_energies_LR_fx[0], 2 * NB_BANDS );
308 :
309 : /* Logical OR between L and R decisions */
310 31968 : test();
311 31968 : vad_flag_dtx[0] = vad_flag_dtx[0] || vad_flag_dtx[1];
312 31968 : move16();
313 : }
314 49758 : IF( sts[0]->hFdCngEnc != NULL )
315 : {
316 31968 : resetFdCngEnc_fx( sts[0] );
317 : }
318 49758 : test();
319 : /* Part of DTX to decide if SID/NO_DATA */
320 49758 : IF( vad_flag_dtx[0] == 0 && GT_16( sts[0]->ini_frame, 2 ) ) /* CNG coding starts after 3 frames */
321 : {
322 11974 : IF( sts[0]->fd_cng_reset_flag == 0 )
323 : {
324 11928 : if ( hCPE != NULL )
325 : {
326 10482 : hCPE->element_mode = IVAS_CPE_DFT;
327 10482 : move16();
328 10482 : sts[1]->active_cnt = 0;
329 10482 : move16();
330 : }
331 : }
332 : ELSE
333 : {
334 46 : vad_flag_dtx[0] = 1;
335 46 : move16();
336 : }
337 : }
338 : ELSE
339 : {
340 37784 : vad_flag_dtx[0] = 1;
341 37784 : move16();
342 : }
343 : }
344 :
345 428045 : IF( PS_out_fx != NULL )
346 : {
347 17790 : MVR2R_WORD32( PS_fx, PS_out_fx, L_FRAME / 2 );
348 : }
349 :
350 428045 : IF( Bin_E_out_fx != NULL )
351 : {
352 :
353 17790 : MVR2R_WORD16( lgBin_E_fx, Bin_E_out_fx, L_FRAME / 2 );
354 : }
355 :
356 428045 : pop_wmops();
357 428045 : return error;
358 : }
359 :
360 :
361 : /*-----------------------------------------------------------------------------------------*
362 : * Function front_vad_create()
363 : *
364 : * Allocate and initialize Standalone front-VAD module
365 : *-----------------------------------------------------------------------------------------*/
366 251 : ivas_error front_vad_create_fx(
367 : FRONT_VAD_ENC_HANDLE *hFrontVad_out, /* i/o: front-VAD handle */
368 : const ENCODER_CONFIG_HANDLE hEncoderConfig /* i : configuration structure */
369 : )
370 : {
371 : FRONT_VAD_ENC_HANDLE hFrontVad;
372 :
373 251 : IF( ( hFrontVad = (FRONT_VAD_ENC_HANDLE) malloc( sizeof( FRONT_VAD_ENC ) ) ) == NULL )
374 : {
375 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for front-VAD structure \n" ) );
376 : }
377 :
378 251 : IF( ( hFrontVad->hNoiseEst = (NOISE_EST_HANDLE) malloc( sizeof( NOISE_EST_DATA ) ) ) == NULL )
379 : {
380 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Noise estimation\n" ) );
381 : }
382 251 : noise_est_init_ivas_fx( hFrontVad->hNoiseEst );
383 :
384 251 : IF( ( hFrontVad->hVAD = (VAD_HANDLE) malloc( sizeof( VAD_DATA ) ) ) == NULL )
385 : {
386 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for VAD\n" ) );
387 : }
388 251 : wb_vad_init_ivas_fx( hFrontVad->hVAD );
389 :
390 251 : hFrontVad->lp_speech_fx = 11520; // 45.0f in Q8 /* Initialize the long-term active speech level in dB */
391 251 : move16();
392 251 : hFrontVad->lp_noise_fx = 0; /* Initialize the long-term noise level in dB */
393 251 : move16();
394 251 : set16_fx( hFrontVad->mem_decim_fx, 0, shl( L_FILT_MAX, 1 ) );
395 251 : set16_fx( hFrontVad->buffer_12k8_fx, 0, i_mult( 3, shr( L_FRAME, 1 ) ) );
396 251 : hFrontVad->mem_preemph_fx = 0;
397 251 : move16();
398 251 : hFrontVad->q_mem_preemph_fx = Q15;
399 251 : move16();
400 251 : hFrontVad->q_buffer_12k8 = Q15;
401 251 : hFrontVad->q_mem_decim = Q15;
402 251 : hFrontVad->ini_frame = 0;
403 251 : move16();
404 251 : hFrontVad->hVAD->vad_flag = 1;
405 251 : move16();
406 :
407 : /* allocate delay buffer to compensate for filterbank delay */
408 251 : hFrontVad->delay_samples = NS2SA_FX2( hEncoderConfig->input_Fs, IVAS_FB_ENC_DELAY_NS );
409 251 : move16();
410 251 : hFrontVad->delay_buf_fx = NULL;
411 251 : IF( GT_16( hFrontVad->delay_samples, 0 ) )
412 : {
413 251 : IF( ( hFrontVad->delay_buf_fx = (Word16 *) malloc( hFrontVad->delay_samples * sizeof( Word16 ) ) ) == NULL )
414 : {
415 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for VAD delay buffer\n" ) );
416 : }
417 251 : set16_fx( hFrontVad->delay_buf_fx, 0, hFrontVad->delay_samples );
418 : }
419 :
420 251 : *hFrontVad_out = hFrontVad;
421 :
422 251 : return IVAS_ERR_OK;
423 : }
424 :
425 251 : void front_vad_destroy_fx(
426 : FRONT_VAD_ENC_HANDLE *hFrontVad /* i/o: front-VAD handle */
427 : )
428 : {
429 251 : IF( *hFrontVad != NULL )
430 : {
431 251 : free( ( *hFrontVad )->hNoiseEst );
432 251 : ( *hFrontVad )->hNoiseEst = NULL;
433 :
434 251 : free( ( *hFrontVad )->hVAD );
435 251 : ( *hFrontVad )->hVAD = NULL;
436 :
437 251 : IF( ( *hFrontVad )->delay_buf_fx != NULL )
438 : {
439 251 : free( ( *hFrontVad )->delay_buf_fx );
440 251 : ( *hFrontVad )->delay_buf_fx = NULL;
441 : }
442 :
443 251 : free( *hFrontVad );
444 251 : *hFrontVad = NULL;
445 : }
446 :
447 251 : return;
448 : }
449 : /*-----------------------------------------------------------------------------------------*
450 : * Function front_vad_spar()
451 : *
452 : * Standalone front-VAD module for SPAR
453 : *-----------------------------------------------------------------------------------------*/
454 157500 : ivas_error front_vad_spar_fx(
455 : SPAR_ENC_HANDLE hSpar, /* i/o: SPAR encoder structure */
456 : const Word32 *omni_in, /* i : omnidirectional input signal Q11*/
457 : ENCODER_CONFIG_HANDLE hEncoderConfig, /* i : encoder configuration handle */
458 : const Word16 input_frame /* i : input frame length Q0*/
459 : )
460 : {
461 : FRONT_VAD_ENC_HANDLE hFrontVad;
462 : Word16 input_fx[L_FRAME48k];
463 : Word16 vad_flag_dtx[1];
464 157500 : Word32 fr_bands_fx[1][2 * NB_BANDS] = { { 0 } };
465 : Word16 Etot_fx[1]; /* Q8 */
466 :
467 : Word16 localVAD_HE_SAD[1];
468 : Word16 vad_hover_flag[1];
469 : Word32 band_energies_fx[2 * NB_BANDS];
470 : Word16 high_lpn_flag;
471 : Encoder_State *st;
472 157500 : Word32 tmpN_fx[NB_BANDS] = { 0 };
473 157500 : Word32 tmpE_fx[NB_BANDS] = { 0 };
474 : Word16 corr_shift_fx;
475 :
476 : Word32 res_energy_fx;
477 : Word16 A_fx[NB_SUBFR16k * ( M + 1 )], Aw_fx[NB_SUBFR16k * ( M + 1 )];
478 : Word16 Q_esp;
479 : Word32 epsP_fx[M + 1];
480 :
481 : Word16 alw_voicing_fx[2]; /* Q15 */
482 : Word16 lsp_new_fx[M];
483 : Word16 lsp_mid_fx[M];
484 :
485 : Word16 alw_pitch_lag_12k8[2];
486 : Word16 loc_harm;
487 : Word16 epsP_h[M + 1];
488 : Word16 epsP_l[M + 1];
489 : Word32 lf_E_fx[1][2 * VOIC_BINS];
490 :
491 : Word16 sp_div_fx;
492 : Word16 Q_sp_div;
493 : Word32 non_staX_fx;
494 : Word16 sp_floor;
495 :
496 : Word16 cor_map_sum_fx;
497 : Word16 dummy_fx;
498 : Word16 S_map_fx[L_FFT / 2];
499 : Word16 relE_fx; /* Q8 */
500 : Word16 *wsp_fx;
501 : Word16 *inp_12k8_fx;
502 :
503 : Word16 old_wsp_fx[L_WSP];
504 : Word16 flag_spitch;
505 : Word32 PS_fx[L_FRAME / 2];
506 : Word16 s, Q_inp;
507 : Word16 tmp;
508 : Word16 old_pitch;
509 : ivas_error error;
510 : Word16 Qfact_PS;
511 :
512 157500 : push_wmops( "front_vad_SPAR" );
513 157500 : error = IVAS_ERR_OK;
514 157500 : move32();
515 157500 : hFrontVad = hSpar->hFrontVad;
516 157500 : st = hSpar->hCoreCoderVAD;
517 :
518 157500 : test();
519 157500 : IF( hEncoderConfig->Opt_DTX_ON && LE_32( hEncoderConfig->ivas_total_brate, SBA_DTX_BITRATE_THRESHOLD ) )
520 17790 : {
521 : /*------------------------------------------------------------------*
522 : * Initialization
523 : *-----------------------------------------------------------------*/
524 :
525 17790 : Word16 Q_add = 0;
526 17790 : move16();
527 17790 : Word16 front_create_flag = 0;
528 17790 : move16();
529 : Word16 tmp1;
530 17790 : Word16 Q_inp_12k8 = hFrontVad->q_buffer_12k8;
531 17790 : move16();
532 :
533 17790 : inp_12k8_fx = hFrontVad->buffer_12k8_fx;
534 :
535 :
536 17790 : MVR2R_WORD16( st->old_wsp_fx, old_wsp_fx, L_WSP_MEM );
537 17790 : wsp_fx = old_wsp_fx + L_WSP_MEM;
538 :
539 17790 : st->core_brate = -1; /* updated in dtx() */
540 17790 : st->input_bwidth = st->last_input_bwidth; /* Q0 */
541 17790 : move32();
542 17790 : move16();
543 :
544 : /*------------------------------------------------------------------*
545 : * compensate for SPAR filterbank delay
546 : *-----------------------------------------------------------------*/
547 17790 : st->input_fx = input_fx;
548 :
549 17790 : Copy_Scale_sig32_16( omni_in, st->input_fx, input_frame, Q16 - Q11 ); /* Q16 */
550 17790 : delay_signal_fx( st->input_fx, input_frame, hFrontVad->delay_buf_fx, hFrontVad->delay_samples );
551 :
552 : /* Scaling only if the omni_in buffer contains non-zero values */
553 17790 : maximum_abs_16_fx( st->input_fx, input_frame, &tmp );
554 17790 : IF( tmp != 0 )
555 : {
556 17549 : s = norm_s( tmp );
557 : }
558 : ELSE
559 : {
560 241 : s = 15;
561 241 : move16();
562 : }
563 :
564 17790 : maximum_abs_16_fx( hFrontVad->mem_decim_fx, 2 * L_FILT_MAX, &tmp1 );
565 17790 : IF( tmp1 != 0 )
566 : {
567 17317 : tmp1 = norm_s( tmp1 );
568 17317 : tmp1 = add( tmp1, hFrontVad->q_mem_decim );
569 17317 : s = s_min( s, tmp1 );
570 : }
571 17790 : IF( tmp != 0 )
572 : {
573 17549 : Scale_sig( st->input_fx, input_frame, s ); /* s */
574 : }
575 17790 : Q_inp = s;
576 17790 : move16();
577 : /*------------------------------------------------------------------*
578 : * Front-VAD
579 : *-----------------------------------------------------------------*/
580 : Word16 scale;
581 :
582 17790 : Word16 Q_buffer = hFrontVad->q_buffer_12k8;
583 17790 : move16();
584 17790 : Scale_sig( hFrontVad->mem_decim_fx, 2 * L_FILT_MAX, sub( Q_inp, hFrontVad->q_mem_decim ) ); /* Q_inp */
585 17790 : hFrontVad->q_mem_decim = Q_inp;
586 17790 : move16();
587 : Word16 q_band_energies;
588 : Word16 q_fr_bands[2], q_lf_E[2];
589 17790 : IF( NE_32( ( error = front_vad_fx( NULL, st, hEncoderConfig, &hFrontVad, 0 /* MCT_flag */, input_frame, vad_flag_dtx, fr_bands_fx,
590 : q_fr_bands, Etot_fx, lf_E_fx, q_lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies_fx,
591 : &q_band_energies, PS_fx, &Qfact_PS, &st->lgBin_E_fx[0], Q_inp, &Q_buffer, Q_add, &front_create_flag ) ),
592 : IVAS_ERR_OK ) )
593 : {
594 0 : return error;
595 : }
596 : // Scale_sig(hFrontVad->buffer_12k8_fx + 384, 3 * L_FRAME / 2 - 384, sub(Q_buffer, hFrontVad->q_buffer_12k8));
597 17790 : hFrontVad->q_buffer_12k8 = Q_buffer;
598 17790 : move16();
599 17790 : Q_inp_12k8 = hFrontVad->q_buffer_12k8;
600 17790 : move16();
601 :
602 : Word16 q_tmpN, q_tmpE;
603 35580 : noise_est_down_ivas_fx( fr_bands_fx[0], q_fr_bands[0], hFrontVad->hNoiseEst->bckr_fx, &hFrontVad->hNoiseEst->q_bckr, tmpN_fx, &q_tmpN, tmpE_fx, &q_tmpE, st->min_band, st->max_band,
604 17790 : &hFrontVad->hNoiseEst->totalNoise_32fx, L_deposit_h( Etot_fx[0] ) /*q8->q24*/, &hFrontVad->hNoiseEst->Etot_last_32fx, &hFrontVad->hNoiseEst->Etot_v_h2_32fx );
605 :
606 17790 : corr_shift_fx = correlation_shift_fx( extract_h( hFrontVad->hNoiseEst->totalNoise_32fx ) ); /* Q15 */
607 :
608 17790 : dtx_ivas_fx( st, hEncoderConfig->last_ivas_total_brate, hEncoderConfig->ivas_total_brate, vad_flag_dtx[0], inp_12k8_fx, Q_inp_12k8 );
609 :
610 : /* linear prediction analysis */
611 17790 : alw_pitch_lag_12k8[0] = st->old_pitch_la; /* Q0 */
612 17790 : alw_pitch_lag_12k8[1] = st->old_pitch_la; /* Q0 */
613 17790 : alw_voicing_fx[0] = st->voicing_fx[2];
614 17790 : alw_voicing_fx[1] = st->voicing_fx[2];
615 17790 : move16();
616 17790 : move16();
617 17790 : move16();
618 17790 : move16();
619 17790 : Word16 Q_r[2] = { 0 };
620 17790 : move16();
621 17790 : move16();
622 17790 : Scale_sig( hFrontVad->mem_decim_fx, 2 * L_FILT_MAX, sub( s_min( Q_inp_12k8, hFrontVad->q_mem_decim ), hFrontVad->q_mem_decim ) ); /* min( hFrontVad->q_mem_decim, hFrontVad->q_buffer_12k8 */
623 17790 : Scale_sig( inp_12k8_fx, 3 * L_FRAME / 2, sub( s_min( Q_inp_12k8, hFrontVad->q_mem_decim ), Q_inp_12k8 ) ); /* min( hFrontVad->q_mem_decim, hFrontVad->q_buffer_12k8 */
624 17790 : Q_inp_12k8 = s_min( hFrontVad->q_mem_decim, hFrontVad->q_buffer_12k8 );
625 17790 : hFrontVad->q_mem_decim = Q_inp_12k8;
626 17790 : move16();
627 17790 : hFrontVad->q_buffer_12k8 = Q_inp_12k8;
628 17790 : move16();
629 :
630 17790 : analy_lp_fx( inp_12k8_fx, L_FRAME, L_LOOK_12k8, &res_energy_fx, A_fx, epsP_h, epsP_l, lsp_new_fx, lsp_mid_fx, st->lsp_old1_fx, alw_pitch_lag_12k8, alw_voicing_fx, INT_FS_12k8, st->element_mode, 0, Q_inp_12k8, Q_r );
631 :
632 320220 : FOR( Word16 i = 0; i <= M; i++ )
633 : {
634 302430 : epsP_fx[i] = L_Comp( epsP_h[i], epsP_l[i] ); // Q_r[0]
635 302430 : move32();
636 : }
637 :
638 17790 : relE_fx = sub( Etot_fx[0], extract_h( st->lp_speech_32fx ) );
639 17790 : Scale_sig( A_fx, ( L_FRAME / L_SUBFR ) * ( M + 1 ), -2 ); // Q12
640 :
641 17790 : st->mem_wsp_fx = (Word16) shl_sat( st->mem_wsp_fx, Q_inp_12k8 - st->mem_wsp_q ); /* Q_inp_12k8 */
642 17790 : st->mem_wsp_q = Q_inp_12k8;
643 17790 : move16();
644 17790 : ivas_find_wsp_fx( L_FRAME, L_SUBFR, NB_SUBFR, A_fx, Aw_fx, inp_12k8_fx, TILT_FAC_FX, wsp_fx, &st->mem_wsp_fx, GAMMA1, L_LOOK_12k8 );
645 :
646 17790 : IF( st->vad_flag == 0 )
647 : {
648 : /* reset the OL pitch tracker memories during inactive frames */
649 1605 : pitch_ol_init_fx( &st->old_thres_fx, &st->old_pitch, &st->delta_pit, &st->old_corr_fx );
650 : }
651 :
652 17790 : old_pitch = st->pitch[1];
653 17790 : move16();
654 : Word16 shift, Q_wsp;
655 17790 : Word16 shift1 = norm_arr( old_wsp_fx, L_WSP_MEM );
656 17790 : Word16 shift2 = norm_arr( wsp_fx, L_WSP - L_WSP_MEM );
657 17790 : maximum_abs_16_fx( old_wsp_fx, L_WSP_MEM, &shift );
658 17790 : if ( !shift )
659 : {
660 17790 : shift1 = Q15;
661 17790 : move16();
662 : }
663 17790 : maximum_abs_16_fx( wsp_fx, L_WSP - L_WSP_MEM, &shift );
664 17790 : if ( !shift )
665 : {
666 74 : shift2 = Q15;
667 74 : move16();
668 : }
669 :
670 17790 : shift = s_min( Q15, s_min( add( Q_inp_12k8, shift1 ), add( Q_inp_12k8, shift2 ) ) );
671 17790 : shift = s_min( shift, add( norm_arr( st->mem_decim2_fx, 3 ), st->Q_old_wsp2 ) );
672 17790 : shift = s_min( shift, add( norm_arr( st->old_wsp2_fx, ( L_WSP_MEM - L_INTERPOL ) / OPL_DECIM ), st->Q_old_wsp2 ) );
673 :
674 17790 : scale_sig( old_wsp_fx, L_WSP_MEM, sub( shift, Q_inp_12k8 ) );
675 17790 : scale_sig( wsp_fx, L_WSP - L_WSP_MEM, sub( shift, Q_inp_12k8 ) );
676 :
677 17790 : Q_wsp = shift;
678 17790 : move16();
679 :
680 17790 : scale_sig( st->mem_decim2_fx, 3, sub( Q_wsp, st->Q_old_wsp2 ) ); // Q( mem_decim ) = Q( old_wsp2 )
681 17790 : scale_sig( st->old_wsp2_fx, ( L_WSP_MEM - L_INTERPOL ) / OPL_DECIM, sub( Q_wsp, st->Q_old_wsp2 ) ); // Q_wsp
682 :
683 17790 : st->Q_old_wsp2 = Q_wsp;
684 17790 : move16();
685 :
686 17790 : pitch_ol_ivas_fx( st->pitch, st->voicing_fx, &st->old_pitch, &st->old_corr_fx, corr_shift_fx, &st->old_thres_fx, &st->delta_pit, st->old_wsp2_fx, wsp_fx, st->mem_decim2_fx, relE_fx, st->clas, st->input_bwidth, st->Opt_SC_VBR, Q_wsp );
687 :
688 : /* Updates for adaptive lag window memory */
689 17790 : st->old_pitch_la = st->pitch[2]; /* Q0 */
690 17790 : move16();
691 :
692 17790 : StableHighPitchDetect_ivas_fx( &flag_spitch, st->pitch, st->voicing_fx, wsp_fx, st->localVAD, &st->voicing_sm_fx, &st->voicing0_sm_fx, &st->LF_EnergyRatio_sm_fx, &st->predecision_flag, &st->diff_sm_fx, &st->energy_sm_fx, Q_wsp, st->lgBin_E_fx );
693 17790 : IF( st->hSpMusClas != NULL )
694 : {
695 : Word16 dummy_int;
696 : Word16 *cor_strong_limit;
697 17790 : dummy_int = 0;
698 17790 : move16();
699 17790 : if ( st->hGSCEnc != NULL )
700 : {
701 0 : cor_strong_limit = &st->hGSCEnc->cor_strong_limit;
702 : }
703 : else
704 : {
705 17790 : cor_strong_limit = &dummy_int;
706 : }
707 17790 : loc_harm = multi_harm_fx( st->lgBin_E_fx, hFrontVad->hNoiseEst->old_S_fx, hFrontVad->hNoiseEst->cor_map_fx, &hFrontVad->hNoiseEst->multi_harm_limit_fx, st->total_brate, st->bwidth, cor_strong_limit, &st->hSpMusClas->mean_avr_dyn_fx, &st->hSpMusClas->last_sw_dyn_fx, &cor_map_sum_fx, &dummy_fx, S_map_fx ); /* Q0 */
708 : }
709 :
710 17790 : scale = getScaleFactor32( epsP_fx, M + 1 );
711 17790 : Q_esp = add( Q_r[0], scale );
712 17790 : Scale_sig32( epsP_fx, M + 1, scale ); // Q_esp
713 :
714 17790 : scale = add( hFrontVad->hNoiseEst->ave_enr_q, s_min( L_norm_arr( hFrontVad->hNoiseEst->ave_enr_fx, NB_BANDS ), L_norm_arr( hFrontVad->hNoiseEst->ave_enr2_fx, NB_BANDS ) ) );
715 17790 : scale = s_min( scale, add( q_tmpE, L_norm_arr( tmpE_fx, NB_BANDS ) ) );
716 17790 : scale = sub( s_min( scale, Q31 ), 1 );
717 17790 : scale_sig32( hFrontVad->hNoiseEst->ave_enr_fx, NB_BANDS, sub( scale, hFrontVad->hNoiseEst->ave_enr_q ) );
718 17790 : scale_sig32( hFrontVad->hNoiseEst->ave_enr2_fx, NB_BANDS, sub( scale, hFrontVad->hNoiseEst->ave_enr_q ) );
719 17790 : scale_sig32( tmpE_fx, NB_BANDS, sub( scale, q_tmpE ) );
720 17790 : hFrontVad->hNoiseEst->ave_enr_q = scale;
721 17790 : move16();
722 :
723 17790 : scale = s_min( add( hFrontVad->hNoiseEst->q_bckr, L_norm_arr( hFrontVad->hNoiseEst->bckr_fx, NB_BANDS ) ), add( q_tmpN, L_norm_arr( tmpN_fx, NB_BANDS ) ) );
724 17790 : scale = sub( s_min( Q31, scale ), 1 ); // guard bits
725 17790 : scale_sig32( hFrontVad->hNoiseEst->bckr_fx, NB_BANDS, sub( scale, hFrontVad->hNoiseEst->q_bckr ) );
726 17790 : scale_sig32( tmpN_fx, NB_BANDS, sub( scale, q_tmpN ) );
727 17790 : hFrontVad->hNoiseEst->q_bckr = scale;
728 17790 : move16();
729 :
730 17790 : scale = add( hFrontVad->hNoiseEst->fr_bands_fx_q, s_min( L_norm_arr( hFrontVad->hNoiseEst->fr_bands1_fx, NB_BANDS ), L_norm_arr( hFrontVad->hNoiseEst->fr_bands2_fx, NB_BANDS ) ) );
731 17790 : scale = s_min( scale, add( q_fr_bands[0], L_norm_arr( fr_bands_fx[0], 2 * NB_BANDS ) ) );
732 17790 : scale = s_min( Q31, scale );
733 17790 : scale_sig32( hFrontVad->hNoiseEst->fr_bands1_fx, NB_BANDS, sub( scale, hFrontVad->hNoiseEst->fr_bands_fx_q ) );
734 17790 : scale_sig32( hFrontVad->hNoiseEst->fr_bands2_fx, NB_BANDS, sub( scale, hFrontVad->hNoiseEst->fr_bands_fx_q ) );
735 17790 : scale_sig32( fr_bands_fx[0], 2 * NB_BANDS, sub( scale, q_fr_bands[0] ) );
736 17790 : hFrontVad->hNoiseEst->fr_bands_fx_q = q_fr_bands[0] = scale;
737 17790 : move16();
738 17790 : move16();
739 :
740 17790 : noise_est_ivas_fx( st, old_pitch, tmpN_fx, epsP_fx, Etot_fx[0], sub( Etot_fx[0], hFrontVad->lp_speech_fx ), corr_shift_fx, tmpE_fx,
741 17790 : hFrontVad->hNoiseEst->ave_enr_q, fr_bands_fx[0], q_fr_bands[0], &cor_map_sum_fx, NULL, &sp_div_fx, &Q_sp_div, &non_staX_fx, &loc_harm,
742 17790 : lf_E_fx[0], q_lf_E[0], &hFrontVad->hNoiseEst->harm_cor_cnt, extract_h( hFrontVad->hNoiseEst->Etot_l_lp_32fx ), hFrontVad->hNoiseEst->Etot_v_h2_32fx,
743 17790 : &hFrontVad->hNoiseEst->bg_cnt, st->lgBin_E_fx, &sp_floor, S_map_fx, NULL, hFrontVad, hFrontVad->ini_frame );
744 :
745 17790 : MVR2R_WORD16( st->pitch, st->pitch, 3 );
746 17790 : vad_param_updt_fx( st, st->pitch[1], corr_shift_fx, corr_shift_fx, A_fx, &hFrontVad, 1 );
747 : /* 1st stage speech/music classification (GMM model) */
748 : /* run only to get 'high_lpn_flag' parameter */
749 17790 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
750 17790 : Word16 Etot_fx_0 = Etot_fx[0];
751 17790 : move16();
752 17790 : scale = getScaleFactor32( PS_fx, 128 );
753 17790 : Qfact_PS = add( Qfact_PS, scale );
754 17790 : Scale_sig32( PS_fx, 128, scale );
755 17790 : ivas_smc_gmm_fx( st, NULL, localVAD_HE_SAD[0], Etot_fx_0, lsp_new_fx, cor_map_sum_fx, epsP_fx, PS_fx, non_staX_fx, relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, Q_esp, hSpMusClas->past_PS_Q );
756 :
757 : /* long-term energy update */
758 17790 : ivas_long_enr_fx( st, -256 /*-1 q8*/, localVAD_HE_SAD[0], high_lpn_flag, &hFrontVad, 1, localVAD_HE_SAD, Etot_fx );
759 :
760 : /* increase ini_frame counter */
761 17790 : hFrontVad->ini_frame = s_min( add( hFrontVad->ini_frame, 1 ), MAX_FRAME_COUNTER ); /* Q0 */
762 17790 : st->ini_frame = hFrontVad->ini_frame; /* Q0 */
763 17790 : move16();
764 17790 : hSpar->front_vad_flag = st->vad_flag; /* Q0 */
765 17790 : move16();
766 17790 : hSpar->front_vad_dtx_flag = 1;
767 17790 : move16();
768 17790 : if ( st->core_brate == SID_2k40 || st->core_brate == FRAME_NO_DATA )
769 : {
770 1446 : hSpar->front_vad_dtx_flag = 0;
771 1446 : move16();
772 : }
773 17790 : hSpar->force_front_vad = 1;
774 17790 : move16();
775 17790 : st->last_core = 0;
776 17790 : move16();
777 : }
778 : ELSE
779 : {
780 139710 : hSpar->front_vad_flag = 1;
781 139710 : move16();
782 139710 : hSpar->front_vad_dtx_flag = 0;
783 139710 : move16();
784 139710 : hSpar->force_front_vad = 0;
785 139710 : move16();
786 : }
787 :
788 157500 : pop_wmops();
789 157500 : return error;
790 : }
|