Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include "options.h"
35 : #include "cnst.h"
36 : #include "ivas_cnst.h"
37 : #include "rom_enc.h"
38 : #include "rom_com.h"
39 : #include "prot_fx.h"
40 : #include "prot_fx_enc.h"
41 : #include <assert.h>
42 : #include "wmc_auto.h"
43 : #include <math.h>
44 : #include "prot_fx_enc.h"
45 : #include "ivas_prot_fx.h"
46 :
47 : /*-----------------------------------------------------------------------------------------*
48 : * Function front_vad()
49 : *
50 : * Standalone front-VAD module
51 : *-----------------------------------------------------------------------------------------*/
52 428045 : ivas_error front_vad_fx(
53 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure, nullable */
54 : Encoder_State *st, /* i/o: encoder state structure */
55 : const ENCODER_CONFIG_HANDLE hEncoderConfig, /* i : configuration structure */
56 : FRONT_VAD_ENC_HANDLE *hFrontVads, /* i/o: FrontVad handles */
57 : const Word16 MCT_flag, /* i : hMCT handle allocated (1) or not (0) Q0*/
58 : const Word16 input_frame, /* i : frame length Q0*/
59 : Word16 vad_flag_dtx[], /* o : HE-SAD flag with additional DTX HO Q0*/
60 : Word32 fr_bands_fx[][2 * NB_BANDS], /* o : energy in frequency bands q_fr_bands_fx*/
61 : Word16 q_fr_bands[], /* o : Q of fr_bands_fx */
62 : Word16 Etot_LR_fx[], /* o : total energy Left & Right channel Q8*/
63 : Word32 lf_E_fx[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels q_lf_E*/
64 : Word16 q_lf_E[], /* o : Q of lf_E_fx */
65 : Word16 localVAD_HE_SAD[], /* o : HE-SAD flag without hangover, LR channels Q0*/
66 : Word16 vad_hover_flag[], /* o : VAD hangover flag Q0*/
67 : Word32 band_energies_LR_fx[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN q_band_energies_LR */
68 : Word16 *q_band_energies_LR, /* o : Q of band_energies_LR_fx */
69 : Word32 *PS_out_fx, /* o : energy spectrum q_PS_out*/
70 : Word16 *q_PS_out, /* o : Q of PS_out_fx Q0*/
71 : Word16 *Bin_E_out_fx, /* o : log-energy spectrum of the current frame Q7*/
72 : Word16 Q_inp,
73 : Word16 *Q_buffer,
74 : Word16 Q_add,
75 : Word16 *front_create_flag )
76 : {
77 : ENC_CORE_HANDLE *sts;
78 : Word32 band_energies_fx[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */
79 : Word32 PS_fx[128];
80 : Word16 *lgBin_E_fx;
81 : Word16 snr_sum_he_fx;
82 : // Word32 fr_bands_cp_fx[2 * NB_BANDS] = { 0 };
83 428045 : Word32 Bin_E_fx[L_FFT] = { 0 }; /* per bin log energy spectrum for mid-frame */
84 428045 : Word32 Bin_E_old_fx[L_FFT / 2] = { 0 }; /* old per bin log energy spectrum for mid-frame */
85 : Word16 fft_buffLR_fx[2 * L_FFT]; /* fft buffer */
86 : // Word16 front_create_flag = 0;
87 : Word16 n, n_chan, dummy;
88 : Word16 dummy_short;
89 : Word16 element_mode, last_element_mode;
90 : ivas_error error;
91 : Word16 Q_new;
92 : Word16 Qband, mem_decim_size;
93 : Word32 Etot_LR_32fx;
94 428045 : error = IVAS_ERR_OK;
95 428045 : push_wmops( "front_vad" );
96 428045 : move16();
97 428045 : move16();
98 :
99 428045 : Q_new = 0;
100 428045 : move16();
101 :
102 428045 : lgBin_E_fx = NULL;
103 428045 : if ( st != NULL )
104 : {
105 17790 : lgBin_E_fx = &st->lgBin_E_fx[0]; /* Q8 */
106 17790 : move16();
107 : }
108 428045 : IF( hCPE != NULL )
109 : {
110 410255 : n_chan = CPE_CHANNELS;
111 410255 : move16();
112 410255 : sts = &hCPE->hCoreCoder[0];
113 410255 : element_mode = hCPE->element_mode; /* Q0 */
114 410255 : move16();
115 410255 : last_element_mode = hCPE->last_element_mode; /* Q0 */
116 410255 : move16();
117 : }
118 17790 : ELSE IF( st != NULL )
119 : {
120 17790 : n_chan = 1;
121 17790 : move16();
122 17790 : sts = &st;
123 17790 : element_mode = IVAS_SCE;
124 17790 : move16();
125 17790 : last_element_mode = IVAS_SCE;
126 17790 : move16();
127 : }
128 : ELSE
129 : {
130 0 : assert( 0 && "Either CPE or SCE must be given!" );
131 : return IVAS_ERR_INTERNAL_FATAL;
132 : }
133 :
134 1266345 : FOR( n = 0; n < n_chan; n++ )
135 : {
136 838300 : localVAD_HE_SAD[n] = 0;
137 838300 : move16();
138 838300 : vad_hover_flag[n] = 0;
139 838300 : move16();
140 838300 : vad_flag_dtx[n] = 1;
141 838300 : move16();
142 : }
143 :
144 : /*------------------------------------------------------------------*
145 : * Allocate/deallocate hFrontVad handles in case of element_mode change
146 : *-----------------------------------------------------------------*/
147 428045 : test();
148 428045 : test();
149 428045 : IF( sts[0]->ini_frame > 0 && MCT_flag == 0 && NE_16( last_element_mode, element_mode ) )
150 : {
151 1742 : IF( EQ_16( element_mode, IVAS_CPE_MDCT ) )
152 : {
153 749 : IF( hFrontVads[0] != NULL )
154 : {
155 189 : FOR( n = 0; n < n_chan; n++ )
156 : {
157 126 : front_vad_destroy_fx( &hFrontVads[n] );
158 126 : hFrontVads[n] = NULL;
159 : }
160 : }
161 : }
162 : ELSE
163 : {
164 993 : test();
165 993 : IF( sts[0]->Opt_DTX_ON && hFrontVads[0] == NULL )
166 : {
167 180 : for ( n = 0; n < n_chan; n++ )
168 : {
169 120 : *front_create_flag = 1;
170 120 : move16();
171 120 : IF( NE_32( ( error = front_vad_create_fx( &hFrontVads[n], hEncoderConfig ) ), IVAS_ERR_OK ) )
172 : {
173 0 : return error;
174 : }
175 : }
176 : }
177 : }
178 : }
179 428045 : test();
180 : /* Only run VAD if DTX is on and TD stereo or unified stereo is selected */
181 428045 : IF( hFrontVads[0] != NULL && NE_16( element_mode, IVAS_CPE_MDCT ) )
182 : {
183 :
184 : #ifdef DEBUGGING
185 : /* If stereo switching is not enabled and TD is selected restore element_mode to TD every frame before the VAD */
186 : if ( hCPE != NULL && hCPE->stereo_mode_cmdl == IVAS_CPE_TD )
187 : {
188 : hCPE->element_mode = IVAS_CPE_TD;
189 : }
190 : #endif
191 :
192 : /*------------------------------------------------------------------*
193 : * VAD
194 : *-----------------------------------------------------------------*/
195 49758 : set_val_Word32( band_energies_LR_fx, 0, 2 * NB_BANDS );
196 49758 : Qband = -1;
197 49758 : move16();
198 :
199 49758 : IF( *front_create_flag )
200 : {
201 60 : Word16 scale_inp = Q15;
202 60 : Q_add = 0;
203 60 : move16();
204 180 : FOR( n = 0; n < n_chan; n++ )
205 : {
206 120 : hFrontVads[n]->lp_speech_fx = shr( hFrontVads[n]->lp_speech_fx, 1 );
207 120 : move16();
208 :
209 120 : scale_inp = s_min( sts[n]->q_inp, hCPE->hFrontVad[n]->q_mem_decim );
210 120 : Scale_sig( sts[n]->input_fx, input_frame, sub( scale_inp, sts[n]->q_inp ) );
211 120 : Scale_sig( hCPE->hFrontVad[n]->mem_decim_fx, 2 * L_FILT_MAX, sub( scale_inp, hCPE->hFrontVad[n]->q_mem_decim ) );
212 120 : sts[n]->q_inp = scale_inp;
213 120 : hCPE->hFrontVad[n]->q_mem_decim = scale_inp;
214 120 : Q_inp = scale_inp;
215 120 : move16();
216 120 : Q_buffer[n] = hCPE->hFrontVad[n]->q_buffer_12k8;
217 120 : move16();
218 : }
219 : }
220 :
221 : Word16 q_Bin_E, q_Bin_E_old;
222 : Word16 q_band_energies;
223 : Word16 q_fft_buffLR;
224 131484 : FOR( n = 0; n < n_chan; n++ )
225 : {
226 : FRONT_VAD_ENC_HANDLE hFrontVad;
227 81726 : hFrontVad = hFrontVads[n];
228 :
229 : /* Move previous frame 12k8 signal */
230 :
231 81726 : MVR2R_WORD16( hFrontVad->buffer_12k8_fx + L_FFT, hFrontVad->buffer_12k8_fx, L_FFT / 2 );
232 :
233 : /* Resample to 12k8 */
234 81726 : modify_Fs_ivas_fx( sts[n]->input_fx, input_frame, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2, INT_FS_12k8, hFrontVad->mem_decim_fx, ( sts[0]->max_bwidth == NB ), &Qband, &mem_decim_size );
235 :
236 : /* Preemphasis */
237 81726 : hFrontVad->mem_preemph_fx = shl( hFrontVad->mem_preemph_fx, sub( add( Q_inp, Qband ), hFrontVad->q_mem_preemph_fx ) ); /* Q_inp + Qband */
238 81726 : move16();
239 81726 : hFrontVad->q_mem_preemph_fx = add( Q_inp, Qband );
240 81726 : move16();
241 :
242 81726 : PREEMPH_FX( hFrontVad->buffer_12k8_fx + L_FFT / 2, PREEMPH_FAC, L_FRAME, &hFrontVad->mem_preemph_fx );
243 :
244 81726 : Q_new = add( sub( Q_inp, Qband ), Q_add );
245 81726 : Scale_sig( hFrontVad->buffer_12k8_fx, L_FFT / 2, sub( Q_new, Q_buffer[n] ) ); /* Q_new */
246 81726 : Scale_sig( hFrontVad->buffer_12k8_fx + L_FFT / 2, 384 - L_FFT / 2, sub( Q_new, add( Q_inp, Qband ) ) ); /* Q_new */
247 :
248 81726 : Q_buffer[n] = Q_new;
249 81726 : move16();
250 :
251 81726 : ivas_analy_sp_fx( IVAS_CPE_TD, hCPE, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2 - 3 * ( L_SUBFR / 2 ), Q_new, fr_bands_fx[n],
252 81726 : &q_fr_bands[n], lf_E_fx[n], &q_lf_E[n], &Etot_LR_32fx, sts[0]->min_band, sts[0]->max_band, Bin_E_fx, &q_Bin_E, Bin_E_old_fx,
253 : &q_Bin_E_old, PS_fx, q_PS_out, lgBin_E_fx, band_energies_fx, &q_band_energies, fft_buffLR_fx, &q_fft_buffLR );
254 81726 : if ( n == 0 )
255 : {
256 49758 : *q_band_energies_LR = q_band_energies;
257 49758 : move16();
258 : }
259 :
260 : /* v_add( &band_energies[0], &band_energies_LR[0], &band_energies_LR[0], 2 * NB_BANDS ); */
261 81726 : IF( EQ_16( n, 1 ) )
262 : {
263 31968 : IF( LT_16( *q_band_energies_LR, q_band_energies ) )
264 : {
265 7756 : scale_sig32( band_energies_fx, 2 * NB_BANDS, sub( *q_band_energies_LR, q_band_energies ) ); // q_band_energies_LR
266 : }
267 : ELSE
268 : {
269 24212 : scale_sig32( band_energies_LR_fx, 2 * NB_BANDS, sub( q_band_energies, *q_band_energies_LR ) ); // q_band_energies_LR
270 24212 : *q_band_energies_LR = q_band_energies;
271 24212 : move16();
272 : }
273 : }
274 :
275 81726 : v_add_fx( &band_energies_fx[0], &band_energies_LR_fx[0], &band_energies_LR_fx[0], 2 * NB_BANDS ); // q_band_energies_LR
276 :
277 : /* add up energies for later calculating average of channel energies */
278 :
279 81726 : noise_est_pre_32fx( Etot_LR_32fx, hFrontVads[0]->ini_frame, hFrontVad->hNoiseEst, 0, 0, 0 );
280 :
281 81726 : Etot_LR_fx[n] = extract_h( Etot_LR_32fx );
282 81726 : move16();
283 :
284 : /* wb_vad */
285 81726 : Word16 scale = s_min( q_fr_bands[n], add( hFrontVads[n]->hNoiseEst->q_enrO, L_norm_arr( hFrontVads[n]->hNoiseEst->enrO_fx, NB_BANDS ) ) );
286 :
287 81726 : scale_sig32( hFrontVads[n]->hNoiseEst->enrO_fx, NB_BANDS, sub( scale, hFrontVads[n]->hNoiseEst->q_enrO ) ); // scale
288 81726 : hFrontVads[n]->hNoiseEst->q_enrO = scale;
289 81726 : move16();
290 :
291 81726 : scale_sig32( fr_bands_fx[n], 2 * NB_BANDS, sub( scale, q_fr_bands[n] ) ); // scale
292 81726 : q_fr_bands[n] = scale;
293 81726 : move16();
294 :
295 163452 : hFrontVad->hVAD->vad_flag = wb_vad_ivas_fx( sts[n], fr_bands_fx[n], q_fr_bands[n], &dummy, &dummy, &dummy, &snr_sum_he_fx,
296 81726 : &localVAD_HE_SAD[n], &dummy_short, hFrontVad->hVAD, hFrontVad->hNoiseEst,
297 81726 : hFrontVad->lp_speech_fx, hFrontVad->lp_noise_fx ); // Q0
298 :
299 81726 : test();
300 81726 : test();
301 81726 : if ( n == 0 && GT_16( n_chan, 1 ) && EQ_16( last_element_mode, IVAS_CPE_DFT ) )
302 : {
303 30241 : sts[1]->last_coder_type = sts[0]->last_coder_type; /* Q0 */
304 30241 : move16();
305 : }
306 81726 : vad_flag_dtx[n] = ivas_dtx_hangover_addition_fx( sts[n], hFrontVad->hVAD->vad_flag, sub( hFrontVad->lp_speech_fx, hFrontVad->lp_noise_fx ), 0 /* <- no cldfb addition */, &vad_hover_flag[n], hFrontVad->hVAD, hFrontVad->hNoiseEst, &hFrontVads[n]->rem_dtx_ho ); /* Q0 */
307 81726 : move16();
308 :
309 81726 : if ( EQ_16( n_chan, 1 ) )
310 : {
311 17790 : sts[n]->vad_flag = hFrontVad->hVAD->vad_flag; /* Q0 */
312 17790 : move16();
313 : }
314 : }
315 :
316 49758 : IF( EQ_16( n_chan, CPE_CHANNELS ) )
317 : {
318 : /* get average channel energies, adding up was already done, so only need to scale by number of channels */
319 31968 : v_multc_fixed( &band_energies_LR_fx[0], ONE_IN_Q30 /* 0.5f in Q31 */, &band_energies_LR_fx[0], 2 * NB_BANDS );
320 :
321 : /* Logical OR between L and R decisions */
322 31968 : test();
323 31968 : vad_flag_dtx[0] = vad_flag_dtx[0] || vad_flag_dtx[1];
324 31968 : move16();
325 : }
326 49758 : IF( sts[0]->hFdCngEnc != NULL )
327 : {
328 31968 : resetFdCngEnc_fx( sts[0] );
329 : }
330 49758 : test();
331 : /* Part of DTX to decide if SID/NO_DATA */
332 49758 : IF( vad_flag_dtx[0] == 0 && GT_16( sts[0]->ini_frame, 2 ) ) /* CNG coding starts after 3 frames */
333 : {
334 13579 : IF( sts[0]->fd_cng_reset_flag == 0 )
335 : {
336 13534 : if ( hCPE != NULL )
337 : {
338 10395 : hCPE->element_mode = IVAS_CPE_DFT;
339 10395 : move16();
340 10395 : sts[1]->active_cnt = 0;
341 10395 : move16();
342 : }
343 : }
344 : ELSE
345 : {
346 45 : vad_flag_dtx[0] = 1;
347 45 : move16();
348 : }
349 : }
350 : ELSE
351 : {
352 36179 : vad_flag_dtx[0] = 1;
353 36179 : move16();
354 : }
355 : }
356 :
357 428045 : IF( PS_out_fx != NULL )
358 : {
359 17790 : MVR2R_WORD32( PS_fx, PS_out_fx, L_FRAME / 2 );
360 : }
361 :
362 428045 : IF( Bin_E_out_fx != NULL )
363 : {
364 :
365 17790 : MVR2R_WORD16( lgBin_E_fx, Bin_E_out_fx, L_FRAME / 2 );
366 : }
367 :
368 428045 : pop_wmops();
369 428045 : return error;
370 : }
371 :
372 :
373 : /*-----------------------------------------------------------------------------------------*
374 : * Function front_vad_create()
375 : *
376 : * Allocate and initialize Standalone front-VAD module
377 : *-----------------------------------------------------------------------------------------*/
378 251 : ivas_error front_vad_create_fx(
379 : FRONT_VAD_ENC_HANDLE *hFrontVad_out, /* i/o: front-VAD handle */
380 : const ENCODER_CONFIG_HANDLE hEncoderConfig /* i : configuration structure */
381 : )
382 : {
383 : FRONT_VAD_ENC_HANDLE hFrontVad;
384 :
385 251 : IF( ( hFrontVad = (FRONT_VAD_ENC_HANDLE) malloc( sizeof( FRONT_VAD_ENC ) ) ) == NULL )
386 : {
387 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for front-VAD structure \n" ) );
388 : }
389 :
390 251 : IF( ( hFrontVad->hNoiseEst = (NOISE_EST_HANDLE) malloc( sizeof( NOISE_EST_DATA ) ) ) == NULL )
391 : {
392 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Noise estimation\n" ) );
393 : }
394 251 : noise_est_init_ivas_fx( hFrontVad->hNoiseEst );
395 :
396 251 : IF( ( hFrontVad->hVAD = (VAD_HANDLE) malloc( sizeof( VAD_DATA ) ) ) == NULL )
397 : {
398 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for VAD\n" ) );
399 : }
400 251 : wb_vad_init_ivas_fx( hFrontVad->hVAD );
401 :
402 251 : hFrontVad->lp_speech_fx = 11520; // Q9/* Initialize the long-term active speech level in dB */
403 251 : move16();
404 251 : hFrontVad->lp_noise_fx = 0; /* Initialize the long-term noise level in dB */
405 251 : move16();
406 251 : set16_fx( hFrontVad->mem_decim_fx, 0, shl( L_FILT_MAX, 1 ) );
407 251 : set16_fx( hFrontVad->buffer_12k8_fx, 0, i_mult( 3, shr( L_FRAME, 1 ) ) );
408 251 : hFrontVad->mem_preemph_fx = 0;
409 251 : move16();
410 251 : hFrontVad->q_mem_preemph_fx = Q15;
411 251 : move16();
412 251 : hFrontVad->q_buffer_12k8 = Q15;
413 251 : hFrontVad->q_mem_decim = Q15;
414 251 : hFrontVad->ini_frame = 0;
415 251 : move16();
416 251 : hFrontVad->hVAD->vad_flag = 1;
417 251 : move16();
418 :
419 : /* allocate delay buffer to compensate for filterbank delay */
420 251 : hFrontVad->delay_samples = NS2SA_FX2( hEncoderConfig->input_Fs, IVAS_FB_ENC_DELAY_NS );
421 251 : move16();
422 251 : hFrontVad->delay_buf_fx = NULL;
423 251 : IF( GT_16( hFrontVad->delay_samples, 0 ) )
424 : {
425 251 : IF( ( hFrontVad->delay_buf_fx = (Word16 *) malloc( hFrontVad->delay_samples * sizeof( Word16 ) ) ) == NULL )
426 : {
427 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for VAD delay buffer\n" ) );
428 : }
429 251 : set16_fx( hFrontVad->delay_buf_fx, 0, hFrontVad->delay_samples );
430 : }
431 :
432 251 : *hFrontVad_out = hFrontVad;
433 :
434 251 : return IVAS_ERR_OK;
435 : }
436 :
437 251 : void front_vad_destroy_fx(
438 : FRONT_VAD_ENC_HANDLE *hFrontVad /* i/o: front-VAD handle */
439 : )
440 : {
441 251 : IF( *hFrontVad != NULL )
442 : {
443 251 : free( ( *hFrontVad )->hNoiseEst );
444 251 : ( *hFrontVad )->hNoiseEst = NULL;
445 :
446 251 : free( ( *hFrontVad )->hVAD );
447 251 : ( *hFrontVad )->hVAD = NULL;
448 :
449 251 : IF( ( *hFrontVad )->delay_buf_fx != NULL )
450 : {
451 251 : free( ( *hFrontVad )->delay_buf_fx );
452 251 : ( *hFrontVad )->delay_buf_fx = NULL;
453 : }
454 :
455 251 : free( *hFrontVad );
456 251 : *hFrontVad = NULL;
457 : }
458 :
459 251 : return;
460 : }
461 : /*-----------------------------------------------------------------------------------------*
462 : * Function front_vad_spar()
463 : *
464 : * Standalone front-VAD module for SPAR
465 : *-----------------------------------------------------------------------------------------*/
466 157500 : ivas_error front_vad_spar_fx(
467 : SPAR_ENC_HANDLE hSpar, /* i/o: SPAR encoder structure */
468 : const Word32 *omni_in, /* i : omnidirectional input signal Q11*/
469 : ENCODER_CONFIG_HANDLE hEncoderConfig, /* i : encoder configuration handle */
470 : const Word16 input_frame /* i : input frame length Q0*/
471 : )
472 : {
473 : FRONT_VAD_ENC_HANDLE hFrontVad;
474 : Word16 input_fx[L_FRAME48k];
475 : Word16 vad_flag_dtx[1];
476 157500 : Word32 fr_bands_fx[1][2 * NB_BANDS] = { { 0 } };
477 : Word16 Etot_fx[1]; /* Q8 */
478 :
479 : Word16 localVAD_HE_SAD[1];
480 : Word16 vad_hover_flag[1];
481 : Word32 band_energies_fx[2 * NB_BANDS];
482 : Word16 high_lpn_flag;
483 : Encoder_State *st;
484 157500 : Word32 tmpN_fx[NB_BANDS] = { 0 };
485 157500 : Word32 tmpE_fx[NB_BANDS] = { 0 };
486 : Word16 corr_shift_fx;
487 :
488 : Word32 res_energy_fx;
489 : Word16 A_fx[NB_SUBFR16k * ( M + 1 )], Aw_fx[NB_SUBFR16k * ( M + 1 )];
490 : Word16 Q_esp;
491 : Word32 epsP_fx[M + 1];
492 :
493 : Word16 alw_voicing_fx[2]; /* Q15 */
494 : Word16 lsp_new_fx[M];
495 : Word16 lsp_mid_fx[M];
496 :
497 : Word16 alw_pitch_lag_12k8[2];
498 : Word16 loc_harm;
499 : Word16 epsP_h[M + 1];
500 : Word16 epsP_l[M + 1];
501 : Word32 lf_E_fx[1][2 * VOIC_BINS];
502 :
503 : Word16 sp_div_fx;
504 : Word16 Q_sp_div;
505 : Word16 non_staX_fx;
506 : Word16 sp_floor;
507 :
508 : Word16 cor_map_sum_fx;
509 : Word16 dummy_fx;
510 : Word16 S_map_fx[L_FFT / 2];
511 : Word16 relE_fx; /* Q8 */
512 : Word16 *wsp_fx;
513 : Word16 *inp_12k8_fx;
514 :
515 : Word16 old_wsp_fx[L_WSP];
516 : Word16 flag_spitch;
517 : Word32 PS_fx[L_FRAME / 2];
518 : Word16 s, Q_inp;
519 : Word16 tmp;
520 : Word16 old_pitch;
521 : ivas_error error;
522 : Word16 Qfact_PS;
523 :
524 157500 : push_wmops( "front_vad_SPAR" );
525 157500 : error = IVAS_ERR_OK;
526 157500 : move32();
527 157500 : hFrontVad = hSpar->hFrontVad;
528 157500 : st = hSpar->hCoreCoderVAD;
529 :
530 157500 : test();
531 157500 : IF( hEncoderConfig->Opt_DTX_ON && LE_32( hEncoderConfig->ivas_total_brate, SBA_DTX_BITRATE_THRESHOLD ) )
532 17790 : {
533 : /*------------------------------------------------------------------*
534 : * Initialization
535 : *-----------------------------------------------------------------*/
536 :
537 17790 : Word16 Q_add = 0;
538 17790 : move16();
539 17790 : Word16 front_create_flag = 0;
540 17790 : move16();
541 : Word16 tmp1;
542 17790 : Word16 Q_inp_12k8 = hFrontVad->q_buffer_12k8;
543 17790 : move16();
544 :
545 17790 : inp_12k8_fx = hFrontVad->buffer_12k8_fx;
546 :
547 :
548 17790 : MVR2R_WORD16( st->old_wsp_fx, old_wsp_fx, L_WSP_MEM );
549 17790 : wsp_fx = old_wsp_fx + L_WSP_MEM;
550 :
551 17790 : st->core_brate = -1; /* updated in dtx() */
552 17790 : st->input_bwidth = st->last_input_bwidth; /* Q0 */
553 17790 : move32();
554 17790 : move16();
555 :
556 : /*------------------------------------------------------------------*
557 : * compensate for SPAR filterbank delay
558 : *-----------------------------------------------------------------*/
559 17790 : st->input_fx = input_fx;
560 :
561 17790 : Copy_Scale_sig32_16( omni_in, st->input_fx, input_frame, Q16 - Q11 ); /* Q16 */
562 17790 : delay_signal_fx( st->input_fx, input_frame, hFrontVad->delay_buf_fx, hFrontVad->delay_samples );
563 :
564 : /* Scaling only if the omni_in buffer contains non-zero values */
565 17790 : maximum_abs_16_fx( st->input_fx, input_frame, &tmp );
566 17790 : IF( tmp != 0 )
567 : {
568 17549 : s = norm_s( tmp );
569 : }
570 : ELSE
571 : {
572 241 : s = 15;
573 241 : move16();
574 : }
575 :
576 17790 : maximum_abs_16_fx( hFrontVad->mem_decim_fx, 2 * L_FILT_MAX, &tmp1 );
577 17790 : IF( tmp1 != 0 )
578 : {
579 17317 : tmp1 = norm_s( tmp1 );
580 17317 : tmp1 = add( tmp1, hFrontVad->q_mem_decim );
581 17317 : s = s_min( s, tmp1 );
582 : }
583 17790 : IF( tmp != 0 )
584 : {
585 17549 : Scale_sig( st->input_fx, input_frame, s ); /* s */
586 : }
587 17790 : Q_inp = s;
588 17790 : move16();
589 : /*------------------------------------------------------------------*
590 : * Front-VAD
591 : *-----------------------------------------------------------------*/
592 : Word16 scale;
593 :
594 17790 : Word16 Q_buffer = hFrontVad->q_buffer_12k8;
595 17790 : move16();
596 17790 : Scale_sig( hFrontVad->mem_decim_fx, 2 * L_FILT_MAX, sub( Q_inp, hFrontVad->q_mem_decim ) ); /* Q_inp */
597 17790 : hFrontVad->q_mem_decim = Q_inp;
598 17790 : move16();
599 : Word16 q_band_energies;
600 : Word16 q_fr_bands[2], q_lf_E[2];
601 17790 : IF( NE_32( ( error = front_vad_fx( NULL, st, hEncoderConfig, &hFrontVad, 0 /* MCT_flag */, input_frame, vad_flag_dtx, fr_bands_fx,
602 : q_fr_bands, Etot_fx, lf_E_fx, q_lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies_fx,
603 : &q_band_energies, PS_fx, &Qfact_PS, &st->lgBin_E_fx[0], Q_inp, &Q_buffer, Q_add, &front_create_flag ) ),
604 : IVAS_ERR_OK ) )
605 : {
606 0 : return error;
607 : }
608 : // Scale_sig(hFrontVad->buffer_12k8_fx + 384, 3 * L_FRAME / 2 - 384, sub(Q_buffer, hFrontVad->q_buffer_12k8));
609 17790 : hFrontVad->q_buffer_12k8 = Q_buffer;
610 17790 : move16();
611 17790 : Q_inp_12k8 = hFrontVad->q_buffer_12k8;
612 17790 : move16();
613 :
614 : Word16 q_tmpN, q_tmpE;
615 17790 : noise_est_down_ivas_fx( fr_bands_fx[0], q_fr_bands[0], hFrontVad->hNoiseEst->bckr_fx, &hFrontVad->hNoiseEst->q_bckr, tmpN_fx, &q_tmpN, tmpE_fx, &q_tmpE, st->min_band, st->max_band,
616 17790 : &hFrontVad->hNoiseEst->totalNoise_fx, Etot_fx[0], &hFrontVad->hNoiseEst->Etot_last_32fx, &hFrontVad->hNoiseEst->Etot_v_h2_fx );
617 :
618 17790 : corr_shift_fx = correlation_shift_fx( hFrontVad->hNoiseEst->totalNoise_fx ); /* Q15 */
619 :
620 17790 : dtx_ivas_fx( st, hEncoderConfig->last_ivas_total_brate, hEncoderConfig->ivas_total_brate, vad_flag_dtx[0], inp_12k8_fx, Q_inp_12k8 );
621 :
622 : /* linear prediction analysis */
623 17790 : alw_pitch_lag_12k8[0] = st->old_pitch_la; /* Q0 */
624 17790 : alw_pitch_lag_12k8[1] = st->old_pitch_la; /* Q0 */
625 17790 : alw_voicing_fx[0] = st->voicing_fx[2];
626 17790 : alw_voicing_fx[1] = st->voicing_fx[2];
627 17790 : move16();
628 17790 : move16();
629 17790 : move16();
630 17790 : move16();
631 17790 : Word16 Q_r[2] = { 0 };
632 17790 : move16();
633 17790 : move16();
634 17790 : Scale_sig( hFrontVad->mem_decim_fx, 2 * L_FILT_MAX, sub( s_min( Q_inp_12k8, hFrontVad->q_mem_decim ), hFrontVad->q_mem_decim ) ); /* min( hFrontVad->q_mem_decim, hFrontVad->q_buffer_12k8 */
635 17790 : Scale_sig( inp_12k8_fx, 3 * L_FRAME / 2, sub( s_min( Q_inp_12k8, hFrontVad->q_mem_decim ), Q_inp_12k8 ) ); /* min( hFrontVad->q_mem_decim, hFrontVad->q_buffer_12k8 */
636 17790 : Q_inp_12k8 = s_min( hFrontVad->q_mem_decim, hFrontVad->q_buffer_12k8 );
637 17790 : hFrontVad->q_mem_decim = Q_inp_12k8;
638 17790 : move16();
639 17790 : hFrontVad->q_buffer_12k8 = Q_inp_12k8;
640 17790 : move16();
641 :
642 : #ifdef REMOVE_EVS_DUPLICATES
643 17790 : analy_lp_ivas_fx( inp_12k8_fx, L_FRAME, L_LOOK_12k8, &res_energy_fx, A_fx, epsP_h, epsP_l, lsp_new_fx, lsp_mid_fx, st->lsp_old1_fx, alw_pitch_lag_12k8, alw_voicing_fx, INT_FS_12k8, st->element_mode, 0, Q_inp_12k8, Q_r );
644 : #else
645 : analy_lp_ivas_fx( inp_12k8_fx, L_FRAME, L_LOOK_12k8, &res_energy_fx, A_fx, epsP_h, epsP_l, lsp_new_fx, lsp_mid_fx, st->lsp_old1_fx, alw_pitch_lag_12k8, alw_voicing_fx, INT_FS_12k8, 0 /* <-- sec_chan_low_rate */, Q_inp_12k8, Q_r );
646 : #endif
647 :
648 320220 : FOR( Word16 i = 0; i <= M; i++ )
649 : {
650 302430 : epsP_fx[i] = L_Comp( epsP_h[i], epsP_l[i] ); // Q_r[0]
651 302430 : move32();
652 : }
653 :
654 17790 : relE_fx = sub( Etot_fx[0], st->lp_speech_fx );
655 17790 : Scale_sig( A_fx, ( L_FRAME / L_SUBFR ) * ( M + 1 ), -2 ); // Q12
656 :
657 17790 : st->mem_wsp_fx = (Word16) shl_sat( st->mem_wsp_fx, Q_inp_12k8 - st->mem_wsp_q ); /* Q_inp_12k8 */
658 17790 : st->mem_wsp_q = Q_inp_12k8;
659 17790 : move16();
660 17790 : ivas_find_wsp_fx( L_FRAME, L_SUBFR, NB_SUBFR, A_fx, Aw_fx, inp_12k8_fx, TILT_FAC_FX, wsp_fx, &st->mem_wsp_fx, GAMMA1, L_LOOK_12k8 );
661 :
662 17790 : IF( st->vad_flag == 0 )
663 : {
664 : /* reset the OL pitch tracker memories during inactive frames */
665 3363 : pitch_ol_init_fx( &st->old_thres_fx, &st->old_pitch, &st->delta_pit, &st->old_corr_fx );
666 : }
667 :
668 17790 : old_pitch = st->pitch[1];
669 17790 : move16();
670 : Word16 shift, Q_wsp;
671 17790 : Word16 shift1 = norm_arr( old_wsp_fx, L_WSP_MEM );
672 17790 : Word16 shift2 = norm_arr( wsp_fx, L_WSP - L_WSP_MEM );
673 17790 : maximum_abs_16_fx( old_wsp_fx, L_WSP_MEM, &shift );
674 17790 : if ( !shift )
675 : {
676 17790 : shift1 = Q15;
677 17790 : move16();
678 : }
679 17790 : maximum_abs_16_fx( wsp_fx, L_WSP - L_WSP_MEM, &shift );
680 17790 : if ( !shift )
681 : {
682 167 : shift2 = Q15;
683 167 : move16();
684 : }
685 :
686 17790 : shift = s_min( Q15, s_min( add( Q_inp_12k8, shift1 ), add( Q_inp_12k8, shift2 ) ) );
687 17790 : shift = s_min( shift, add( norm_arr( st->mem_decim2_fx, 3 ), st->Q_old_wsp2 ) );
688 17790 : shift = s_min( shift, add( norm_arr( st->old_wsp2_fx, ( L_WSP_MEM - L_INTERPOL ) / OPL_DECIM ), st->Q_old_wsp2 ) );
689 :
690 17790 : scale_sig( old_wsp_fx, L_WSP_MEM, sub( shift, Q_inp_12k8 ) );
691 17790 : scale_sig( wsp_fx, L_WSP - L_WSP_MEM, sub( shift, Q_inp_12k8 ) );
692 :
693 17790 : Q_wsp = shift;
694 17790 : move16();
695 :
696 17790 : scale_sig( st->mem_decim2_fx, 3, sub( Q_wsp, st->Q_old_wsp2 ) ); // Q( mem_decim ) = Q( old_wsp2 )
697 17790 : scale_sig( st->old_wsp2_fx, ( L_WSP_MEM - L_INTERPOL ) / OPL_DECIM, sub( Q_wsp, st->Q_old_wsp2 ) ); // Q_wsp
698 :
699 17790 : st->Q_old_wsp2 = Q_wsp;
700 17790 : move16();
701 :
702 17790 : pitch_ol_ivas_fx( st->pitch, st->voicing_fx, &st->old_pitch, &st->old_corr_fx, corr_shift_fx, &st->old_thres_fx, &st->delta_pit, st->old_wsp2_fx, wsp_fx, st->mem_decim2_fx, relE_fx, st->clas, st->input_bwidth, st->Opt_SC_VBR, Q_wsp );
703 :
704 : /* Updates for adaptive lag window memory */
705 17790 : st->old_pitch_la = st->pitch[2]; /* Q0 */
706 17790 : move16();
707 :
708 17790 : StableHighPitchDetect_ivas_fx( &flag_spitch, st->pitch, st->voicing_fx, wsp_fx, st->localVAD, &st->voicing_sm_fx, &st->voicing0_sm_fx, &st->LF_EnergyRatio_sm_fx, &st->predecision_flag, &st->diff_sm_fx, &st->energy_sm_fx, Q_wsp, st->lgBin_E_fx );
709 17790 : IF( st->hSpMusClas != NULL )
710 : {
711 : Word16 dummy_int;
712 : Word16 *cor_strong_limit;
713 17790 : dummy_int = 0;
714 17790 : move16();
715 17790 : if ( st->hGSCEnc != NULL )
716 : {
717 0 : cor_strong_limit = &st->hGSCEnc->cor_strong_limit;
718 : }
719 : else
720 : {
721 17790 : cor_strong_limit = &dummy_int;
722 : }
723 17790 : loc_harm = multi_harm_fx( st->lgBin_E_fx, hFrontVad->hNoiseEst->old_S_fx, hFrontVad->hNoiseEst->cor_map_fx, &hFrontVad->hNoiseEst->multi_harm_limit_fx, st->total_brate, st->bwidth, cor_strong_limit, &st->hSpMusClas->mean_avr_dyn_fx, &st->hSpMusClas->last_sw_dyn_fx, &cor_map_sum_fx, &dummy_fx, S_map_fx ); /* Q0 */
724 : }
725 :
726 17790 : scale = getScaleFactor32( epsP_fx, M + 1 );
727 17790 : Q_esp = add( Q_r[0], scale );
728 17790 : Scale_sig32( epsP_fx, M + 1, scale ); // Q_esp
729 :
730 17790 : scale = add( hFrontVad->hNoiseEst->ave_enr_q, s_min( L_norm_arr( hFrontVad->hNoiseEst->ave_enr_fx, NB_BANDS ), L_norm_arr( hFrontVad->hNoiseEst->ave_enr2_fx, NB_BANDS ) ) );
731 17790 : scale = s_min( scale, add( q_tmpE, L_norm_arr( tmpE_fx, NB_BANDS ) ) );
732 17790 : scale = sub( s_min( scale, Q31 ), 1 );
733 17790 : scale_sig32( hFrontVad->hNoiseEst->ave_enr_fx, NB_BANDS, sub( scale, hFrontVad->hNoiseEst->ave_enr_q ) );
734 17790 : scale_sig32( hFrontVad->hNoiseEst->ave_enr2_fx, NB_BANDS, sub( scale, hFrontVad->hNoiseEst->ave_enr_q ) );
735 17790 : scale_sig32( tmpE_fx, NB_BANDS, sub( scale, q_tmpE ) );
736 17790 : hFrontVad->hNoiseEst->ave_enr_q = scale;
737 17790 : move16();
738 :
739 17790 : scale = s_min( add( hFrontVad->hNoiseEst->q_bckr, L_norm_arr( hFrontVad->hNoiseEst->bckr_fx, NB_BANDS ) ), add( q_tmpN, L_norm_arr( tmpN_fx, NB_BANDS ) ) );
740 17790 : scale = sub( s_min( Q31, scale ), 1 ); // guard bits
741 17790 : scale_sig32( hFrontVad->hNoiseEst->bckr_fx, NB_BANDS, sub( scale, hFrontVad->hNoiseEst->q_bckr ) );
742 17790 : scale_sig32( tmpN_fx, NB_BANDS, sub( scale, q_tmpN ) );
743 17790 : hFrontVad->hNoiseEst->q_bckr = scale;
744 17790 : move16();
745 :
746 17790 : scale = add( hFrontVad->hNoiseEst->fr_bands_fx_q, s_min( L_norm_arr( hFrontVad->hNoiseEst->fr_bands1_fx, NB_BANDS ), L_norm_arr( hFrontVad->hNoiseEst->fr_bands2_fx, NB_BANDS ) ) );
747 17790 : scale = s_min( scale, add( q_fr_bands[0], L_norm_arr( fr_bands_fx[0], 2 * NB_BANDS ) ) );
748 17790 : scale = s_min( Q31, scale );
749 17790 : scale_sig32( hFrontVad->hNoiseEst->fr_bands1_fx, NB_BANDS, sub( scale, hFrontVad->hNoiseEst->fr_bands_fx_q ) );
750 17790 : scale_sig32( hFrontVad->hNoiseEst->fr_bands2_fx, NB_BANDS, sub( scale, hFrontVad->hNoiseEst->fr_bands_fx_q ) );
751 17790 : scale_sig32( fr_bands_fx[0], 2 * NB_BANDS, sub( scale, q_fr_bands[0] ) );
752 17790 : hFrontVad->hNoiseEst->fr_bands_fx_q = q_fr_bands[0] = scale;
753 17790 : move16();
754 17790 : move16();
755 :
756 17790 : noise_est_ivas_fx( st, old_pitch, tmpN_fx, epsP_fx, Etot_fx[0], sub( Etot_fx[0], hFrontVad->lp_speech_fx ), corr_shift_fx, tmpE_fx,
757 17790 : hFrontVad->hNoiseEst->ave_enr_q, fr_bands_fx[0], q_fr_bands[0], &cor_map_sum_fx, NULL, &sp_div_fx, &Q_sp_div, &non_staX_fx, &loc_harm,
758 17790 : lf_E_fx[0], q_lf_E[0], &hFrontVad->hNoiseEst->harm_cor_cnt, extract_h( hFrontVad->hNoiseEst->Etot_l_lp_32fx ), hFrontVad->hNoiseEst->Etot_v_h2_fx,
759 17790 : &hFrontVad->hNoiseEst->bg_cnt, st->lgBin_E_fx, &sp_floor, S_map_fx, NULL, hFrontVad, hFrontVad->ini_frame );
760 :
761 17790 : MVR2R_WORD16( st->pitch, st->pitch, 3 );
762 17790 : vad_param_updt_fx( st, st->pitch[1], corr_shift_fx, corr_shift_fx, A_fx, &hFrontVad, 1 );
763 : /* 1st stage speech/music classification (GMM model) */
764 : /* run only to get 'high_lpn_flag' parameter */
765 17790 : SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
766 17790 : Word16 non_sta_fx = shr( non_staX_fx, Q2 ); // Q8->Q6
767 17790 : Word16 Etot_fx_0 = Etot_fx[0];
768 17790 : move16();
769 17790 : scale = getScaleFactor32( PS_fx, 128 );
770 17790 : Qfact_PS = add( Qfact_PS, scale );
771 17790 : Scale_sig32( PS_fx, 128, scale );
772 17790 : ivas_smc_gmm_fx( st, NULL, localVAD_HE_SAD[0], Etot_fx_0, lsp_new_fx, cor_map_sum_fx, epsP_fx, PS_fx, non_sta_fx, relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, Q_esp, hSpMusClas->past_PS_Q );
773 :
774 : /* long-term energy update */
775 17790 : ivas_long_enr_fx( st, -1, localVAD_HE_SAD[0], high_lpn_flag, &hFrontVad, 1, localVAD_HE_SAD, Etot_fx );
776 :
777 : /* increase ini_frame counter */
778 17790 : hFrontVad->ini_frame = s_min( add( hFrontVad->ini_frame, 1 ), MAX_FRAME_COUNTER ); /* Q0 */
779 17790 : st->ini_frame = hFrontVad->ini_frame; /* Q0 */
780 17790 : move16();
781 17790 : hSpar->front_vad_flag = st->vad_flag; /* Q0 */
782 17790 : move16();
783 17790 : hSpar->front_vad_dtx_flag = 1;
784 17790 : move16();
785 17790 : if ( st->core_brate == SID_2k40 || st->core_brate == FRAME_NO_DATA )
786 : {
787 3139 : hSpar->front_vad_dtx_flag = 0;
788 3139 : move16();
789 : }
790 17790 : hSpar->force_front_vad = 1;
791 17790 : move16();
792 17790 : st->last_core = 0;
793 17790 : move16();
794 : }
795 : ELSE
796 : {
797 139710 : hSpar->front_vad_flag = 1;
798 139710 : move16();
799 139710 : hSpar->front_vad_dtx_flag = 0;
800 139710 : move16();
801 139710 : hSpar->force_front_vad = 0;
802 139710 : move16();
803 : }
804 :
805 157500 : pop_wmops();
806 157500 : return error;
807 : }
|