Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 :
34 : #include "basop_util.h"
35 : #include "enh32.h"
36 : #include <stdint.h>
37 : #include <string.h>
38 : #include <stdio.h>
39 : #include <stdlib.h>
40 : #include <assert.h>
41 : #include <math.h>
42 : #include "options.h"
43 : #include "cnst.h"
44 : #include "rom_enc.h"
45 : #include "rom_com.h"
46 : #include "prot_fx.h"
47 : #include "ivas_stat_dec.h"
48 : #include "ivas_cnst.h"
49 : #include "ivas_rom_com.h"
50 : #include "ivas_rom_dec.h"
51 : #include "wmc_auto.h"
52 : #include "rom_dec.h"
53 : #include "ivas_prot_fx.h"
54 :
55 : #ifdef NONBE_FIX_1091_PMC_LOW_SIGNAL_BURSTS
56 : /*-----------------------------------------------------------------------*
57 : * Local constants
58 : *-----------------------------------------------------------------------*/
59 :
60 : #define SQRT_EPSILON_FX 68 /* Q31 square root of EPSILON */
61 : #endif
62 :
63 : /*-------------------------------------------------------------------*
64 : * ivas_dirac_dec_output_synthesis_cov_open()
65 : *
66 : * Sets up the state and parameters for the Covariance Synthesis
67 : *-------------------------------------------------------------------*/
68 :
69 341 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
70 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
71 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state */
72 : const Word16 max_band_decorr, /* i : uppermost frequency band where decorrelation is applied */
73 : const Word16 interp_length, /* i : length for interpolating the mixing matrices in time slots */
74 : const Word16 num_param_bands, /* i : number of parameter bands */
75 : const Word16 num_param_bands_residual, /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
76 : const Word16 nchan_in, /* i : number of input (transport) channels */
77 : const Word16 nchan_out, /* i : number of output channels */
78 : const Word32 *proto_matrix /* i : the prototype (upmix) matrix (only used if mode == 1) Q(15-proto_matrix_e) */
79 : )
80 : {
81 : Word16 idx;
82 :
83 341 : h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
84 341 : move16();
85 :
86 : /*-----------------------------------------------------------------*
87 : * memory allocation
88 : *-----------------------------------------------------------------*/
89 :
90 : /* buffer length and interpolator */
91 341 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
92 341 : IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
93 : {
94 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
95 : }
96 341 : h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
97 341 : move16();
98 : /* cov buffers */
99 4445 : FOR( idx = 0; idx < num_param_bands; idx++ )
100 : {
101 4104 : IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
102 : {
103 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
104 : }
105 4104 : h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
106 4104 : IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
107 : {
108 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
109 : }
110 4104 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
111 : {
112 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
113 : }
114 4104 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
115 4104 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
116 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
117 :
118 4104 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
119 : {
120 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
121 : }
122 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
123 4104 : h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
124 4104 : move16();
125 : }
126 16697 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
127 : {
128 16356 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
129 16356 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
130 16356 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
131 16356 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
132 : }
133 :
134 3563 : FOR( idx = 0; idx < num_param_bands_residual; idx++ )
135 : {
136 3222 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
137 : {
138 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
139 : }
140 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
141 :
142 3222 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
143 : {
144 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
145 : }
146 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
147 3222 : h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
148 3222 : move16();
149 : }
150 17579 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
151 : {
152 17238 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
153 17238 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
154 : }
155 :
156 341 : IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
157 : {
158 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
159 : }
160 341 : IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
161 : {
162 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
163 : }
164 :
165 341 : set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
166 341 : set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
167 :
168 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
169 : {
170 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
171 : }
172 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
173 :
174 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
175 : {
176 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
177 : }
178 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
179 :
180 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
181 : {
182 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
183 : }
184 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
185 :
186 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
187 : {
188 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
189 : }
190 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
191 :
192 : /*-----------------------------------------------------------------*
193 : * prepare processing parameters
194 : *-----------------------------------------------------------------*/
195 :
196 : /* compute interpolator */
197 341 : IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
198 : {
199 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
200 : }
201 11253 : FOR( idx = 1; idx <= interp_length; ++idx )
202 : {
203 10912 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
204 10912 : move16();
205 : }
206 341 : Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
207 341 : h_dirac_output_synthesis_params->proto_matrix_e = 5;
208 341 : move16();
209 341 : return IVAS_ERR_OK;
210 : }
211 :
212 :
213 : /*-------------------------------------------------------------------*
214 : * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
215 : *
216 : *
217 : *-------------------------------------------------------------------*/
218 :
219 328331 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
220 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
221 : const UWord16 interp_length /* i : interpolator length */
222 : )
223 : {
224 : Word16 idx;
225 328331 : Word16 tmp, exp_diff = 0;
226 328331 : move16();
227 :
228 1632986 : FOR( idx = 1; idx <= interp_length; ++idx )
229 : {
230 1304655 : tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
231 1304655 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff ); // Q15
232 1304655 : move16();
233 : }
234 :
235 328331 : return;
236 : }
237 :
238 :
239 : /*-------------------------------------------------------------------*
240 : * ivas_dirac_dec_output_synthesis_cov_init()
241 : *
242 : * initialize the states for the covariance synthesis
243 : *-------------------------------------------------------------------*/
244 341 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
245 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis */
246 : const Word16 nchan_in, /* i : number of input (tranport) channels */
247 : const Word16 nchan_out, /* i : number of output channels */
248 : const Word16 n_param_bands, /* i : number of total parameter bands */
249 : const Word16 n_param_bands_res /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
250 : )
251 : {
252 :
253 : Word16 idx;
254 :
255 : /* initialize buffers */
256 4445 : FOR( idx = 0; idx < n_param_bands; idx++ )
257 : {
258 4104 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
259 4104 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
260 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
261 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
262 : }
263 :
264 3563 : FOR( idx = 0; idx < n_param_bands_res; idx++ )
265 : {
266 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
267 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
268 : }
269 :
270 :
271 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
272 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
273 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
274 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
275 :
276 341 : return;
277 : }
278 :
279 :
280 : /*-------------------------------------------------------------------*
281 : * ivas_dirac_dec_output_synthesis_cov_close()
282 : *
283 : * deallocate dynamic memory in the covariance synthesis state
284 : *-------------------------------------------------------------------*/
285 :
286 341 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
287 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i : handle for the covariance synthesis parameters */
288 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state */
289 : )
290 : {
291 : Word16 idx;
292 :
293 : /*-----------------------------------------------------------------*
294 : * memory deallocation
295 : *-----------------------------------------------------------------*/
296 :
297 : /* free interpolator */
298 341 : IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
299 : {
300 341 : free( h_dirac_output_synthesis_params->interpolator_fx );
301 341 : h_dirac_output_synthesis_params->interpolator_fx = NULL;
302 : }
303 :
304 : /* free alpha */
305 341 : IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
306 : {
307 0 : free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
308 0 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
309 : }
310 :
311 : /* free proto_matrix */
312 341 : IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
313 : {
314 341 : free( h_dirac_output_synthesis_params->proto_matrix_fx );
315 341 : h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
316 : }
317 :
318 :
319 341 : IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
320 : {
321 341 : free( h_dirac_output_synthesis_state->cx_old_e );
322 341 : h_dirac_output_synthesis_state->cx_old_e = NULL;
323 : }
324 341 : IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
325 : {
326 341 : free( h_dirac_output_synthesis_state->cy_old_e );
327 341 : h_dirac_output_synthesis_state->cy_old_e = NULL;
328 : }
329 :
330 : /* free cov buffers */
331 20801 : FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
332 : {
333 20460 : IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
334 : {
335 4104 : free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
336 4104 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
337 : }
338 :
339 20460 : IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
340 : {
341 4104 : free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
342 4104 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
343 : }
344 :
345 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
346 : {
347 4104 : free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
348 4104 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
349 : }
350 :
351 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
352 : {
353 3222 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
354 3222 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
355 : }
356 :
357 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
358 : {
359 4104 : free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
360 4104 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
361 : }
362 :
363 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
364 : {
365 3222 : free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
366 3222 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
367 : }
368 : }
369 :
370 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
371 : {
372 341 : free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
373 341 : h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
374 : }
375 :
376 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
377 : {
378 341 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
379 341 : h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
380 : }
381 :
382 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
383 : {
384 341 : free( h_dirac_output_synthesis_state->mixing_matrix_exp );
385 341 : h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
386 : }
387 :
388 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
389 : {
390 341 : free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
391 341 : h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
392 : }
393 341 : return;
394 : }
395 :
396 : /*-------------------------------------------------------------------*
397 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
398 : *
399 : * collect the multi channel input covariance for one filter bank time slot
400 : *-------------------------------------------------------------------*/
401 :
402 : /*-------------------------------------------------------------------*
403 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
404 : *
405 : * collect the multi channel input covariance for one filter bank time slot
406 : *-------------------------------------------------------------------*/
407 2084111 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
408 : Word32 *RealBuffer_fx, /* i : input channel filter bank samples (real part) Q(31- RealBuffer_e)*/
409 : Word16 RealBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
410 : Word32 *ImagBuffer_fx, /* i : input channel filter bank samples (imaginary part Q(ImagBuffer_e)*/
411 : Word16 ImagBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
412 : Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (real part) Q(31- cx_e)*/
413 : Word16 *cx_e, /* i : exponent for accumulated input covariance (real part) */
414 : Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (imaginary part) Q(31- cx_imag_e)*/
415 : Word16 *cx_imag_e, /* i : exponent accumulated input covariance (imag part) */
416 : PARAM_MC_DEC_HANDLE hParamMC, /* i : handle to Parametric MC state */
417 : const Word16 param_band, /* i : parameter band */
418 : const Word16 nchan_in /* i : number of input channels */
419 : )
420 : {
421 : Word16 band_idx, ch_idx;
422 : Word16 brange[2];
423 : Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
424 : Word16 real_in_e;
425 : Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
426 : Word16 imag_in_e;
427 : Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
428 : Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
429 : Word16 output_e;
430 : Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
431 : Word16 band, num_bands;
432 : Word16 cx_fx_norm, cx_imag_fx_norm;
433 : /* estimate input covariance */
434 : /* Already stack here instead of in the process_subframe */
435 :
436 : /* collect input frame */
437 2084111 : brange[0] = hParamMC->band_grouping[param_band];
438 2084111 : move16();
439 2084111 : brange[1] = hParamMC->band_grouping[param_band + 1];
440 2084111 : move16();
441 2084111 : num_bands = sub( brange[1], brange[0] );
442 :
443 10913031 : FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
444 : {
445 8828920 : band = add( brange[0], band_idx );
446 26555520 : FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
447 : {
448 17726600 : real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
449 17726600 : move32();
450 17726600 : imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
451 17726600 : move32();
452 : }
453 : }
454 :
455 2084111 : real_in_e = RealBuffer_e;
456 2084111 : move16();
457 2084111 : imag_in_e = ImagBuffer_e;
458 2084111 : move16();
459 :
460 2084111 : Word16 buf_len = imult1616( num_bands, nchan_in );
461 :
462 2084111 : shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
463 2084111 : shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
464 :
465 2084111 : real_in_e = sub( real_in_e, shift_real );
466 2084111 : imag_in_e = sub( imag_in_e, shift_imag );
467 :
468 :
469 2084111 : output_e = s_max( real_in_e, imag_in_e );
470 :
471 2084111 : scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
472 2084111 : scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
473 :
474 2084111 : cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
475 2084111 : v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
476 :
477 2084111 : v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
478 :
479 2084111 : cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
480 2084111 : cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
481 :
482 2084111 : scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
483 2084111 : scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
484 :
485 2084111 : *cx_e = sub( tmp1_e, cx_fx_norm );
486 2084111 : move16();
487 2084111 : *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
488 2084111 : move16();
489 :
490 2084111 : return;
491 : }
492 :
493 : /*-------------------------------------------------------------------*
494 : * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
495 : *
496 : * synthesize one filter bank slot of multi channel output filter bank
497 : * samples with the covariance synthesis
498 : *-------------------------------------------------------------------*/
499 :
500 163667 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
501 : Word32 *Cldfb_RealBuffer_in_fx, /*Q6*/
502 : Word32 *Cldfb_ImagBuffer_in_fx, /*Q6*/
503 : Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (real part) Q6*/
504 : Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (imaginary part) Q6*/
505 : Word32 *mixing_matrix_fx[], /* i : parameter band wise mixing matrices (direct part) Q(31-mixing_matrix_e)*/
506 : Word16 *mixing_matrix_e, /* i : parameter band wise mixing matrices (direct part) */
507 : Word32 *mixing_matrix_res_fx[], /* i : parameter band wise mixing matrices (residual part) Q(31-mixing_matrix_res_e)*/
508 : Word16 *mixing_matrix_res_e, /* i : parameter band wise mixing matrices (residual part) */
509 : const UWord16 slot_idx_sfr, /* i : time slot index for the current slot within the current subframe */
510 : const UWord16 slot_idx_tot, /* i : time slot index for the current slot within the frame */
511 : const Word16 nX, /* i : number of input channels */
512 : const Word16 nY, /* i : number of output channels */
513 : PARAM_MC_DEC_HANDLE hParamMC /* i : handle to the Parametric MC decoder state */
514 : )
515 : {
516 : Word16 param_band_idx, band, ch_idx;
517 : Word16 have_residual;
518 : Word16 brange[2];
519 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
520 : Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
521 : Word16 mixing_matrix_smooth_e;
522 : Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
523 163667 : Word16 mixing_matrix_res_smooth_e = 0;
524 163667 : move16();
525 : Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
526 : Word16 mixing_matrix_buffer_e;
527 : Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
528 : Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
529 :
530 : Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
531 : Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
532 :
533 163667 : h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
534 :
535 163667 : set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
536 163667 : set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
537 :
538 163667 : set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
539 163667 : set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
540 :
541 2288286 : FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
542 : {
543 : /* final mixing */
544 2124619 : have_residual = 0;
545 2124619 : move16();
546 2124619 : brange[0] = hParamMC->band_grouping[param_band_idx];
547 2124619 : move16();
548 2124619 : brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
549 2124619 : move16();
550 :
551 2124619 : if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
552 : {
553 1709608 : have_residual = 1;
554 1709608 : move16();
555 : }
556 :
557 2124619 : v_multc_fixed( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
558 2124619 : mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
559 2124619 : move16();
560 :
561 2124619 : v_multc_fixed( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
562 2124619 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
563 2124619 : move16();
564 :
565 2124619 : v_add_fixed_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
566 :
567 2124619 : IF( have_residual )
568 : {
569 : /* residual mixing matrix interpolation*/
570 :
571 1709608 : v_multc_fixed( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
572 1709608 : mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
573 1709608 : move16();
574 :
575 1709608 : set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
576 1709608 : v_multc_fixed( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
577 1709608 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
578 1709608 : move16();
579 :
580 1709608 : v_add_fixed_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
581 : }
582 :
583 :
584 11119299 : FOR( band = brange[0]; band < brange[1]; band++ )
585 : {
586 8994680 : assert( band >= 0 );
587 :
588 8994680 : IF( have_residual )
589 : {
590 : /* collect diffuse prototypes */
591 3273340 : assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
592 23265500 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
593 : {
594 19992160 : diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
595 19992160 : move32();
596 19992160 : diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
597 19992160 : move32();
598 : }
599 :
600 : /* apply residual mixing */
601 : {
602 : Word16 shifter;
603 3273340 : shifter = sub( mixing_matrix_res_smooth_e, 32 );
604 23265500 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
605 : {
606 : int i;
607 : Word16 idx;
608 : Word64 temp_real, temp_imag;
609 :
610 :
611 19992160 : idx = ch_idx;
612 19992160 : temp_real = 0;
613 19992160 : temp_imag = 0;
614 19992160 : move64();
615 19992160 : move64();
616 143222880 : for ( i = 0; i < nY; i++ )
617 : {
618 123230720 : temp_real = W_mac_32_32( temp_real, mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] );
619 123230720 : temp_imag = W_mac_32_32( temp_imag, mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] );
620 123230720 : idx += nY;
621 : }
622 19992160 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
623 19992160 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
624 : }
625 : }
626 : }
627 : ELSE
628 : {
629 40612700 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
630 : {
631 34891360 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
632 34891360 : move32();
633 34891360 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
634 34891360 : move32();
635 : }
636 : }
637 :
638 : /* collect input signals, still in cldfb buffers */
639 27060840 : FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
640 : {
641 :
642 18066160 : input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
643 18066160 : move32();
644 18066160 : input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
645 18066160 : move32();
646 : }
647 :
648 : /* apply mixing matrix */
649 : {
650 : Word16 shifter;
651 8994680 : shifter = 31 - mixing_matrix_smooth_e;
652 :
653 63878200 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
654 : {
655 : int i;
656 : Word16 idx;
657 : Word64 temp_real, temp_imag;
658 :
659 :
660 54883520 : idx = ch_idx;
661 54883520 : temp_real = 0;
662 54883520 : temp_imag = 0;
663 54883520 : move64();
664 54883520 : move64();
665 165495360 : for ( i = 0; i < nX; i++ )
666 : {
667 110611840 : temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
668 110611840 : temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
669 110611840 : idx += nY;
670 : }
671 54883520 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
672 54883520 : move32();
673 54883520 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
674 54883520 : move32();
675 : }
676 : }
677 : }
678 : }
679 :
680 163667 : return;
681 : }
682 :
683 : /*-------------------------------------------------------------------*
684 : * computeMixingMatrices()
685 : *
686 : * compute a mixing matrix using the convariance synthesis approach
687 : *-------------------------------------------------------------------*/
688 :
689 132787 : Word16 computeMixingMatrices_fx(
690 : const Word16 num_inputs, /* i : number of input channels */
691 : const Word16 num_outputs, /* i : number of output channels */
692 : const Word32 *Cx, /* i : input channel covariance matrix Q(31-Cx_e) */
693 : Word16 Cx_e,
694 : const Word32 *Cy, /* i : target covariance matrix Q(31-Cy_e) */
695 : Word16 Cy_e,
696 : const Word32 *Q, /* i : prototype matrix (usually a upmix matrix) Q_fx_e */
697 : Word16 Q_fx_e,
698 : const Word16 energy_compensation_flag, /* i : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
699 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values */
700 : Word16 reg_Sx_e,
701 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
702 : Word16 reg_ghat_e,
703 : Word32 *mixing_matrix_fx, /* o : resulting mixing matrix Q(31-mixing_matrix_out_e) */
704 : Word16 *mixing_matrix_out_e,
705 : Word32 *Cr_fx, /* o : residual covariance matrix Q(31-Cr_e) */
706 : Word16 *Cr_e )
707 : {
708 : Word16 i, j;
709 : Word16 out;
710 : Word16 nL, nC;
711 132787 : Word16 lengthCx = num_inputs;
712 132787 : Word16 lengthCy = num_outputs;
713 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
714 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
715 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
716 : Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
717 : Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
718 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
719 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
720 : Word16 Cx_fx_e;
721 : Word16 Cy_fx_e;
722 :
723 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
724 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
725 :
726 :
727 : Word32 limit_fx;
728 : Word16 limit_e;
729 :
730 : Word32 L_tmp;
731 : Word16 tmp_e, tmp, exp;
732 :
733 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
734 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
735 :
736 : Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
737 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
738 :
739 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
740 : Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
741 :
742 : Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
743 : Word16 Q_e;
744 :
745 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
746 : Word16 Q_Cx_e;
747 :
748 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
749 : Word16 Cy_hat_diag_e;
750 : Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
751 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
752 : Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
753 :
754 : Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
755 :
756 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
757 :
758 : Word16 mixing_matrix_e;
759 :
760 : Word16 Cr_fx_e;
761 :
762 :
763 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
764 : Word16 adj_e[MAX_OUTPUT_CHANNELS];
765 : Word32 *adj_fx_p;
766 : Word16 adj_fx_e;
767 :
768 : Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
769 132787 : push_wmops( "dirac_cov_mix_mat" );
770 :
771 132787 : out = EXIT_SUCCESS;
772 132787 : move16();
773 :
774 132787 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
775 2257379 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
776 : {
777 2124592 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
778 2124592 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
779 2124592 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
780 : }
781 :
782 :
783 132787 : Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
784 132787 : Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
785 132787 : Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
786 :
787 132787 : Q_e = Q_fx_e;
788 132787 : move16();
789 132787 : Cx_fx_e = Cx_e;
790 132787 : move16();
791 132787 : Cy_fx_e = Cy_e;
792 132787 : move16();
793 :
794 :
795 : /*-----------------------------------------------------------------*
796 : * Decomposition of Cy
797 : *-----------------------------------------------------------------*/
798 :
799 : /* Processing the SVD */
800 :
801 132787 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
802 :
803 132787 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
804 132787 : Word16 max_e = -32;
805 : /* Computing Ky */
806 812472 : FOR( i = 0; i < lengthCy; ++i )
807 : {
808 4209670 : FOR( j = 0; j < lengthCy; ++j )
809 : {
810 3529985 : tmp_e = svd_s_buffer_e[j];
811 3529985 : move16();
812 3529985 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
813 3529985 : Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
814 3529985 : move32();
815 3529985 : Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
816 3529985 : move16();
817 3529985 : max_e = s_max( max_e, tmp_e );
818 : }
819 : }
820 3662772 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
821 : {
822 3529985 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
823 3529985 : move32();
824 3529985 : Ky_fx_e[i] = max_e;
825 3529985 : move16();
826 : }
827 :
828 : /*-----------------------------------------------------------------*
829 : * Decomposition of Cx
830 : *-----------------------------------------------------------------*/
831 :
832 : /* Processing the SVD */
833 :
834 132787 : mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
835 :
836 132787 : svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
837 132787 : max_e = -32;
838 399721 : FOR( i = 0; i < lengthCx; ++i )
839 : {
840 804882 : FOR( j = 0; j < lengthCx; ++j )
841 : {
842 537948 : tmp_e = svd_s_buffer_e[j];
843 537948 : move16();
844 537948 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
845 537948 : Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
846 537948 : move32();
847 537948 : Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
848 537948 : move16();
849 537948 : max_e = s_max( max_e, tmp_e );
850 : }
851 : }
852 670735 : FOR( i = 0; i < lengthCx * lengthCx; ++i )
853 : {
854 537948 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
855 537948 : move32();
856 537948 : Kx_fx_e[i] = max_e;
857 537948 : move16();
858 : }
859 :
860 399721 : FOR( i = 0; i < lengthCx; ++i )
861 : {
862 266934 : tmp_e = svd_s_buffer_e[i];
863 266934 : move16();
864 266934 : svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
865 266934 : move32();
866 266934 : svd_s_buffer_e[i] = tmp_e;
867 266934 : move16();
868 : }
869 :
870 : /*-----------------------------------------------------------------*
871 : * Regularization of Sx
872 : *-----------------------------------------------------------------*/
873 :
874 132787 : limit_fx = svd_s_buffer_fx[0];
875 132787 : move32();
876 132787 : limit_e = svd_s_buffer_e[0];
877 132787 : move16();
878 266934 : FOR( i = 1; i < lengthCx; i++ )
879 : {
880 134147 : IF( GT_32( svd_s_buffer_fx[i], L_shl_sat( limit_fx, sub( limit_e, svd_s_buffer_e[i] ) ) ) )
881 : {
882 0 : limit_fx = svd_s_buffer_fx[i];
883 0 : move32();
884 0 : limit_e = svd_s_buffer_e[i];
885 0 : move16();
886 : }
887 : }
888 :
889 132787 : limit_e = add( limit_e, reg_Sx_e );
890 : #ifdef NONBE_FIX_1091_PMC_LOW_SIGNAL_BURSTS
891 132787 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx );
892 132787 : IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, SQRT_EPSILON_FX /* Q31 */, 0 ) < 0 )
893 : {
894 0 : limit_fx = SQRT_EPSILON_FX;
895 0 : move32();
896 0 : limit_e = 0;
897 0 : move16();
898 : }
899 : #else /* NONBE_FIX_1091_PMC_LOW_SIGNAL_BURSTS */
900 : limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_Sx_fx );
901 : #endif
902 :
903 399721 : FOR( i = 0; i < lengthCx; ++i )
904 : {
905 266934 : IF( LT_32( L_shl_sat( svd_s_buffer_fx[i], sub( svd_s_buffer_e[i], limit_e ) ), limit_fx ) )
906 : {
907 59978 : svd_s_buffer_fx[i] = limit_fx;
908 59978 : move32();
909 59978 : svd_s_buffer_e[i] = limit_e;
910 59978 : move16();
911 : }
912 : }
913 :
914 :
915 132787 : limit_fx = 0;
916 132787 : move32();
917 132787 : limit_e = 0;
918 132787 : move16();
919 :
920 :
921 : /*-----------------------------------------------------------------*
922 : * regularized Kx-1
923 : *-----------------------------------------------------------------*/
924 :
925 399721 : FOR( i = 0; i < lengthCx; ++i )
926 : {
927 : Word16 scale, reg_fac_fx;
928 266934 : reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
929 266934 : scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
930 804882 : FOR( j = 0; j < lengthCx; ++j )
931 : {
932 537948 : Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
933 537948 : move32();
934 537948 : Kx_reg_inv_e[i + j * lengthCx] = scale;
935 537948 : move16();
936 : }
937 : }
938 :
939 : /*-----------------------------------------------------------------*
940 : * normalization matrix G hat
941 : *-----------------------------------------------------------------*/
942 :
943 : /* Computing Q*Cx*Q' */
944 :
945 132787 : matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
946 :
947 132787 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
948 :
949 :
950 132787 : Word16 com_e = sub( limit_e, Cy_hat_diag_e );
951 812472 : FOR( i = 0; i < lengthCy; ++i )
952 : {
953 679685 : IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
954 : {
955 679685 : limit_fx = Cy_hat_diag_fx[i];
956 679685 : move32();
957 679685 : limit_e = Cy_hat_diag_e;
958 679685 : move16();
959 : }
960 : }
961 132787 : limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
962 132787 : limit_e = add( limit_e, reg_ghat_e );
963 :
964 132787 : com_e = sub( Cy_hat_diag_e, limit_e );
965 812472 : FOR( i = 0; i < lengthCy; ++i )
966 : {
967 679685 : Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
968 679685 : move16();
969 :
970 679685 : IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) )
971 : {
972 15 : Cy_hat_diag_fx[i] = limit_fx;
973 15 : move32();
974 15 : Cy_hat_diag_buff_e[i] = limit_e;
975 15 : move16();
976 : }
977 :
978 679685 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
979 679685 : exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
980 679685 : L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
981 679685 : G_hat_fx[i] = L_tmp;
982 679685 : move32();
983 679685 : G_hat_buff_e[i] = exp;
984 679685 : move16();
985 : }
986 :
987 :
988 : /*-----------------------------------------------------------------*
989 : * Formulate optimal P
990 : *-----------------------------------------------------------------*/
991 :
992 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
993 :
994 : Word16 mat_mult_buffer1_fx_e;
995 :
996 : Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
997 :
998 132787 : matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
999 :
1000 132787 : matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
1001 :
1002 132787 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
1003 :
1004 132787 : IF( LT_16( lengthCx, lengthCy ) )
1005 : {
1006 132787 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
1007 132787 : nL = lengthCy;
1008 132787 : move16();
1009 132787 : nC = lengthCx;
1010 132787 : move16();
1011 132787 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1012 : }
1013 : ELSE
1014 : {
1015 0 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1016 0 : nL = lengthCx;
1017 0 : move16();
1018 0 : nC = lengthCy;
1019 0 : move16();
1020 0 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1021 : }
1022 :
1023 : /* Actually Processing P */
1024 :
1025 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
1026 : truncates V to num_input_channel columns */
1027 :
1028 132787 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1029 132787 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1030 :
1031 132787 : mat_mult_buffer1_fx_e = 0;
1032 132787 : move16();
1033 132787 : mat_mult_buffer2_e = 0;
1034 132787 : move16();
1035 :
1036 132787 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
1037 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1038 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1039 :
1040 : /************************ Formulate M **********************/
1041 :
1042 132787 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
1043 :
1044 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
1045 :
1046 : Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1047 132787 : set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1048 :
1049 132787 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
1050 :
1051 : /*-----------------------------------------------------------------*
1052 : * Formulate Cr
1053 : *-----------------------------------------------------------------*/
1054 :
1055 : /* Compute Cy_tilde = M*Cx*M' */
1056 :
1057 :
1058 : Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
1059 132787 : set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
1060 132787 : matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
1061 :
1062 132787 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
1063 :
1064 132787 : exp = mixing_matrix_fx_e[0];
1065 132787 : move16();
1066 1372340 : FOR( i = 1; i < lengthCy * lengthCx; i++ )
1067 : {
1068 1239553 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1069 : {
1070 97612 : exp = mixing_matrix_fx_e[i];
1071 97612 : move16();
1072 : }
1073 : }
1074 :
1075 1505127 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1076 : {
1077 1372340 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1078 1372340 : move32();
1079 : }
1080 :
1081 132787 : mixing_matrix_e = exp;
1082 132787 : move16();
1083 :
1084 132787 : Cr_p_fx = Cr_fx;
1085 132787 : Cy_p_fx = Cy_fx;
1086 132787 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1087 : Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1088 812472 : FOR( i = 0; i < lengthCy; ++i )
1089 : {
1090 4209670 : FOR( j = 0; j < lengthCy; ++j )
1091 : {
1092 :
1093 3529985 : *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
1094 3529985 : move32();
1095 3529985 : Cr_p_fx++;
1096 3529985 : Cy_p_fx++;
1097 3529985 : Cy_tilde_p_fx++;
1098 : }
1099 :
1100 : /* Avoid Meaningless negative main diagonal elements */
1101 679685 : IF( Cr_fx[i + ( i * lengthCy )] < 0 )
1102 : {
1103 75958 : Cr_fx[i + ( i * lengthCy )] = 0;
1104 75958 : move32();
1105 75958 : Cr_e_arr[i + ( i * lengthCy )] = 0;
1106 75958 : move16();
1107 : }
1108 : }
1109 :
1110 132787 : exp = Cr_e_arr[0];
1111 132787 : move16();
1112 3529985 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1113 : {
1114 3397198 : if ( LT_16( exp, Cr_e_arr[i] ) )
1115 : {
1116 127602 : exp = Cr_e_arr[i];
1117 127602 : move16();
1118 : }
1119 : }
1120 :
1121 3662772 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1122 : {
1123 3529985 : Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
1124 3529985 : move32();
1125 : }
1126 :
1127 132787 : Cr_fx_e = exp;
1128 132787 : move16();
1129 :
1130 132787 : exp = mat_mult_buffer2_fx_e[0];
1131 132787 : move16();
1132 3529985 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1133 : {
1134 3397198 : if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
1135 : {
1136 184436 : exp = mat_mult_buffer2_fx_e[i];
1137 184436 : move16();
1138 : }
1139 : }
1140 :
1141 3662772 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1142 : {
1143 3529985 : mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
1144 3529985 : move32();
1145 : }
1146 :
1147 132787 : mat_mult_buffer2_e = exp;
1148 132787 : move16();
1149 :
1150 : /*-----------------------------------------------------------------*
1151 : * Energy Compensation
1152 : *-----------------------------------------------------------------*/
1153 :
1154 132787 : IF( EQ_16( energy_compensation_flag, 1 ) )
1155 : {
1156 25938 : adj_fx_p = svd_s_buffer_fx;
1157 25938 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1158 :
1159 158348 : FOR( i = 0; i < lengthCy; ++i )
1160 : {
1161 : /* Avoid correction for very small energies,
1162 : main diagonal elements of Cy_tilde_p may be negative */
1163 132410 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
1164 : {
1165 0 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1166 0 : move32();
1167 0 : adj_e[i] = 1;
1168 0 : move16();
1169 : }
1170 : ELSE
1171 : {
1172 132410 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
1173 132410 : exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
1174 132410 : L_tmp = L_deposit_h( tmp );
1175 132410 : L_tmp = Sqrt32( L_tmp, &exp );
1176 132410 : adj_fx_p[i] = L_tmp;
1177 132410 : move32();
1178 132410 : adj_e[i] = exp;
1179 132410 : move16();
1180 : }
1181 :
1182 132410 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
1183 132410 : IF( GT_32( adj_fx_p[i], temp ) )
1184 : {
1185 5 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1186 5 : move32();
1187 5 : adj_e[i] = 3;
1188 5 : move16();
1189 : }
1190 : }
1191 :
1192 25938 : exp = adj_e[0];
1193 25938 : move16();
1194 132410 : FOR( i = 1; i < lengthCy; i++ )
1195 : {
1196 106472 : if ( LT_16( exp, adj_e[i] ) )
1197 : {
1198 1428 : exp = adj_e[i];
1199 1428 : move16();
1200 : }
1201 : }
1202 :
1203 158348 : FOR( i = 0; i < lengthCy; i++ )
1204 : {
1205 132410 : adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
1206 132410 : move32();
1207 : }
1208 25938 : adj_fx_e = exp;
1209 25938 : move16();
1210 :
1211 25938 : diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1212 :
1213 25938 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
1214 25938 : mixing_matrix_e = mat_mult_buffer3_e;
1215 25938 : move16();
1216 : }
1217 :
1218 132787 : *mixing_matrix_out_e = mixing_matrix_e;
1219 132787 : move16();
1220 132787 : *Cr_e = Cr_fx_e;
1221 132787 : move16();
1222 132787 : pop_wmops();
1223 :
1224 132787 : return out;
1225 : }
1226 :
1227 : /*-------------------------------------------------------------------*
1228 : * computeMixingMatricesResidual()
1229 : *
1230 : * compute a residual mixing matrix using the covariance synthesis approach
1231 : *-------------------------------------------------------------------*/
1232 :
1233 106849 : Word16 computeMixingMatricesResidual_fx(
1234 : const Word32 num_outputs, /* i : number of output channels */
1235 : const Word32 *Cx_fx, /* i : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e) */
1236 : const Word16 Cx_e,
1237 : const Word32 *Cy_fx, /* i : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
1238 : const Word16 Cy_fx_e,
1239 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values Q(31-reg_Sx_e) */
1240 : const Word16 reg_Sx_e,
1241 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
1242 : const Word16 reg_ghat_e,
1243 : Word32 *mixing_matrix_fx, /* o : resulting residual mixing matrix Q(31-mixing_matrix_ret_e) */
1244 : Word16 *mixing_matrix_ret_e )
1245 : {
1246 : Word16 i, j;
1247 : Word16 out, lengthCx, lengthCy;
1248 106849 : out = EXIT_SUCCESS;
1249 106849 : move16();
1250 106849 : lengthCx = extract_l( num_outputs );
1251 106849 : lengthCy = extract_l( num_outputs );
1252 :
1253 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1254 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1255 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1256 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1257 :
1258 : Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1259 :
1260 106849 : push_wmops( "dirac_cov_mix_mat_r" );
1261 :
1262 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1263 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
1264 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1265 106849 : Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
1266 106849 : move16();
1267 :
1268 106849 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
1269 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1270 : Word32 L_tmp;
1271 : Word16 tmp_e;
1272 : Word16 tmp, scale;
1273 : Word32 div_tmp;
1274 : Word16 exp;
1275 :
1276 : Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
1277 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1278 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1279 : Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
1280 :
1281 : Word32 limit_fx;
1282 : Word16 limit_e;
1283 :
1284 : Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
1285 : Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
1286 :
1287 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1288 : Word16 Cy_hat_diag_e;
1289 :
1290 : Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
1291 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1292 : Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
1293 :
1294 : Word32 *adj_fx_p;
1295 : Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
1296 :
1297 : Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
1298 : Word16 Cy_tilde_e;
1299 :
1300 : /*-----------------------------------------------------------------*
1301 : * Decomposition of Cy
1302 : *-----------------------------------------------------------------*/
1303 :
1304 : /* Processing the SVD */
1305 :
1306 : /* linear array to svd buffer */
1307 106849 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
1308 :
1309 106849 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
1310 :
1311 : /* Computing Ky */
1312 106849 : Word16 max_e = -32;
1313 654124 : FOR( i = 0; i < lengthCy; ++i )
1314 : {
1315 3391850 : FOR( j = 0; j < lengthCy; ++j )
1316 : {
1317 2844575 : tmp_e = svd_s_buffer_e[j];
1318 2844575 : move16();
1319 2844575 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
1320 2844575 : Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
1321 2844575 : move32();
1322 2844575 : Ky_fx_e[i + j * lengthCy] = tmp_e;
1323 2844575 : move16();
1324 2844575 : max_e = s_max( max_e, tmp_e );
1325 : }
1326 : }
1327 :
1328 2951424 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
1329 : {
1330 2844575 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
1331 2844575 : move32();
1332 2844575 : Ky_fx_e[i] = max_e;
1333 2844575 : move16();
1334 : }
1335 :
1336 : /*-----------------------------------------------------------------*
1337 : * Decomposition of Cx
1338 : *-----------------------------------------------------------------*/
1339 :
1340 : /* Processing the SVD of Cx*/
1341 : /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
1342 : * would be just indicating the sorting index, so go straight to Kx as the
1343 : * square root of the diagonal of Cx */
1344 :
1345 : /* Computing Kx */
1346 106849 : max_e = -32;
1347 654124 : FOR( i = 0; i < lengthCx; ++i )
1348 : {
1349 547275 : exp = Cx_e;
1350 547275 : move16();
1351 547275 : Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
1352 547275 : move32();
1353 547275 : Kx_fx_e[i] = exp;
1354 547275 : move16();
1355 547275 : max_e = s_max( max_e, exp );
1356 : }
1357 :
1358 654124 : FOR( i = 0; i < lengthCx; ++i )
1359 : {
1360 547275 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
1361 547275 : move32();
1362 547275 : Kx_fx_e[i] = max_e;
1363 547275 : move16();
1364 : }
1365 :
1366 : /*-----------------------------------------------------------------*
1367 : * Regularization of Sx
1368 : *-----------------------------------------------------------------*/
1369 :
1370 106849 : limit_fx = Kx_fx[0];
1371 106849 : move32();
1372 :
1373 547275 : FOR( i = 1; i < lengthCx; i++ )
1374 : {
1375 440426 : IF( GT_32( Kx_fx[i], limit_fx ) )
1376 : {
1377 166158 : limit_fx = Kx_fx[i];
1378 166158 : move32();
1379 : }
1380 : }
1381 :
1382 106849 : L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
1383 106849 : L_tmp = L_add( L_tmp, EPSILLON_FX );
1384 106849 : limit_fx = L_tmp;
1385 106849 : move16();
1386 106849 : limit_e = add( Kx_fx_e[0], reg_Sx_e );
1387 :
1388 654124 : FOR( i = 0; i < lengthCx; ++i )
1389 : {
1390 547275 : IF( GT_32( Kx_fx[i], L_shl_sat( limit_fx, sub( limit_e, Kx_fx_e[i] ) ) ) )
1391 : {
1392 545970 : div_tmp = Kx_fx[i];
1393 545970 : move32();
1394 545970 : exp = Kx_fx_e[i];
1395 545970 : move16();
1396 : }
1397 : ELSE
1398 : {
1399 1305 : div_tmp = limit_fx;
1400 1305 : move32();
1401 1305 : exp = limit_e;
1402 1305 : move16();
1403 : }
1404 547275 : tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
1405 547275 : scale = add( scale, sub( Q1, exp ) );
1406 :
1407 547275 : Kx_reg_inv_fx[i] = L_deposit_h( tmp );
1408 547275 : move32();
1409 547275 : Kx_reg_inv_e[i] = scale;
1410 547275 : move16();
1411 : }
1412 :
1413 106849 : limit_fx = 0;
1414 106849 : move32();
1415 106849 : limit_e = 0;
1416 106849 : move16();
1417 :
1418 : /*-----------------------------------------------------------------*
1419 : * regularized Kx-1
1420 : *-----------------------------------------------------------------*/
1421 :
1422 : /*-----------------------------------------------------------------*
1423 : * normalization matrix G hat
1424 : *-----------------------------------------------------------------*/
1425 :
1426 : /* Computing Cy_hat_diag */
1427 106849 : Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
1428 106849 : Cy_hat_diag_e = Cx_e;
1429 106849 : move16();
1430 :
1431 106849 : Word16 com_e = sub( limit_e, Cy_hat_diag_e );
1432 654124 : FOR( i = 0; i < lengthCy; ++i )
1433 : {
1434 547275 : IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
1435 : {
1436 547275 : limit_fx = Cy_hat_diag_fx[i];
1437 547275 : move32();
1438 547275 : limit_e = Cy_hat_diag_e;
1439 547275 : move16();
1440 : }
1441 : }
1442 :
1443 106849 : limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
1444 106849 : limit_e = add( limit_e, reg_ghat_e );
1445 :
1446 : /* Computing G_hat */
1447 :
1448 106849 : com_e = sub( Cy_hat_diag_e, limit_e );
1449 654124 : FOR( i = 0; i < lengthCy; ++i )
1450 : {
1451 547275 : Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
1452 547275 : move16();
1453 547275 : IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1454 : {
1455 15 : Cy_hat_diag_fx[i] = limit_fx;
1456 15 : move32();
1457 15 : Cy_hat_diag_fx_e[i] = limit_e;
1458 15 : move16();
1459 : }
1460 547275 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
1461 547275 : scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
1462 547275 : L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
1463 :
1464 547275 : G_hat_fx[i] = L_tmp;
1465 547275 : move32();
1466 547275 : G_hat_e[i] = scale;
1467 547275 : move16();
1468 : }
1469 :
1470 : /*-----------------------------------------------------------------*
1471 : * Formulate optimal P
1472 : *-----------------------------------------------------------------*/
1473 :
1474 654124 : FOR( i = 0; i < num_outputs; i++ )
1475 : {
1476 547275 : Kx_fx[i] = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
1477 547275 : move32();
1478 547275 : Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
1479 547275 : move16();
1480 : }
1481 :
1482 654124 : FOR( i = 0; i < num_outputs; i++ )
1483 : {
1484 : Word32 fac_fx;
1485 547275 : fac_fx = Kx_fx[i];
1486 547275 : move32();
1487 :
1488 3391850 : FOR( j = 0; j < num_outputs; j++ )
1489 : {
1490 2844575 : mat_mult_buffer1_fx[i + j * num_outputs] = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e));
1491 2844575 : move32();
1492 2844575 : mat_mult_buffer1_buff_e[i + j * num_outputs] = add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] );
1493 2844575 : move16();
1494 : }
1495 : }
1496 :
1497 106849 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
1498 106849 : move16();
1499 2844575 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1500 : {
1501 2737726 : if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
1502 : {
1503 29532 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
1504 29532 : move16();
1505 : }
1506 : }
1507 :
1508 2951424 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1509 : {
1510 2844575 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
1511 2844575 : move32();
1512 : }
1513 :
1514 106849 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1515 :
1516 106849 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
1517 :
1518 : /* Actually Processing P */
1519 :
1520 106849 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1521 106849 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1522 106849 : mat_mult_buffer1_e = 0;
1523 106849 : move16();
1524 106849 : mat_mult_buffer2_e = 0;
1525 106849 : move16();
1526 :
1527 106849 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
1528 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1529 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1530 :
1531 : /*-----------------------------------------------------------------*
1532 : * Formulate M
1533 : *-----------------------------------------------------------------*/
1534 :
1535 :
1536 106849 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1537 106849 : set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1538 :
1539 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1540 :
1541 654124 : FOR( i = 0; i < num_outputs; i++ )
1542 : {
1543 : Word32 fac_fx;
1544 547275 : fac_fx = Kx_reg_inv_fx[i];
1545 547275 : move32();
1546 :
1547 3391850 : FOR( j = 0; j < num_outputs; j++ )
1548 : {
1549 2844575 : mixing_matrix_fx[j + i * num_outputs] = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e);
1550 2844575 : move32();
1551 2844575 : mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
1552 2844575 : move16();
1553 : }
1554 : }
1555 :
1556 : /*-----------------------------------------------------------------*
1557 : * Formulate Cr
1558 : *-----------------------------------------------------------------*/
1559 :
1560 : /* Compute Cy_tilde = M*Cx*M' */
1561 :
1562 : Word16 Cx_e_arr[MAX_CICP_CHANNELS];
1563 106849 : set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
1564 :
1565 106849 : matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1566 :
1567 106849 : exp = mixing_matrix_fx_e[0];
1568 106849 : move16();
1569 2844575 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1570 : {
1571 2737726 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1572 : {
1573 11848 : exp = mixing_matrix_fx_e[i];
1574 11848 : move16();
1575 : }
1576 : }
1577 :
1578 2951424 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1579 : {
1580 2844575 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1581 2844575 : move32();
1582 : }
1583 106849 : mixing_matrix_e = exp;
1584 106849 : move16();
1585 :
1586 106849 : exp = mat_mult_buffer1_buff_e[0];
1587 106849 : move16();
1588 2844575 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1589 : {
1590 2737726 : if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
1591 : {
1592 11848 : exp = mat_mult_buffer1_buff_e[i];
1593 11848 : move16();
1594 : }
1595 : }
1596 :
1597 2951424 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1598 : {
1599 2844575 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
1600 2844575 : move32();
1601 : }
1602 106849 : mat_mult_buffer1_e = exp;
1603 106849 : move16();
1604 :
1605 106849 : matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
1606 :
1607 : /*-----------------------------------------------------------------*
1608 : * Energy Compensation
1609 : *-----------------------------------------------------------------*/
1610 :
1611 106849 : adj_fx_p = svd_s_buffer_fx;
1612 :
1613 654124 : FOR( i = 0; i < lengthCy; ++i )
1614 : {
1615 547275 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
1616 547275 : scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
1617 547275 : adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
1618 547275 : move32();
1619 547275 : adj_buff_e[i] = scale;
1620 547275 : move16();
1621 547275 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
1622 547275 : IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
1623 : {
1624 3 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1625 3 : move32();
1626 3 : adj_buff_e[i] = 3;
1627 3 : move16();
1628 : }
1629 : }
1630 :
1631 106849 : adj_e = adj_buff_e[0];
1632 106849 : move16();
1633 :
1634 547275 : FOR( i = 1; i < lengthCy; i++ )
1635 : {
1636 440426 : if ( LT_16( adj_e, adj_buff_e[i] ) )
1637 : {
1638 43271 : adj_e = adj_buff_e[i];
1639 43271 : move16();
1640 : }
1641 : }
1642 :
1643 :
1644 654124 : FOR( i = 0; i < lengthCy; i++ )
1645 : {
1646 547275 : adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
1647 547275 : move32();
1648 : }
1649 :
1650 106849 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1651 106849 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
1652 106849 : *mixing_matrix_ret_e = mat_mult_buffer3_e;
1653 106849 : move16();
1654 :
1655 106849 : pop_wmops();
1656 :
1657 106849 : return out;
1658 : }
1659 :
1660 :
1661 : /*-------------------------------------------------------------------*
1662 : * computeMixingMatricesISM()
1663 : *
1664 : *
1665 : *-------------------------------------------------------------------*/
1666 315180 : Word16 computeMixingMatricesISM_fx(
1667 : const Word16 num_inputs,
1668 : const Word16 num_responses,
1669 : const Word16 num_outputs,
1670 : const Word32 *responses_fx, /*Q(31-responses_e) */
1671 : const Word16 responses_e,
1672 : const Word32 *ener_fx, /*Q(31-ener_e) */
1673 : const Word16 ener_e,
1674 : const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
1675 : const Word16 Cx_diag_e,
1676 : const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
1677 : const Word16 Cy_diag_e,
1678 : const Word16 *Q_16fx, // Q15
1679 : const Word16 energy_compensation_flag,
1680 : const Word32 reg_Sx_fx, /*Q0*/
1681 : const Word32 reg_ghat_fx, /*Q0*/
1682 : Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
1683 : Word16 *mixing_matrix_e )
1684 : {
1685 : Word16 i, out;
1686 : Word16 lengthCx, lengthCy;
1687 : Word16 nL, nC;
1688 :
1689 : Word32 *Cy_tilde_p_fx;
1690 : Word32 *adj_fx;
1691 : Word32 limit_fx;
1692 :
1693 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1694 : Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1695 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1696 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1697 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1698 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1699 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
1700 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1701 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
1702 : Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
1703 :
1704 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1705 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1706 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
1707 : Word16 svd_s_buffer_fx_e;
1708 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1709 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1710 :
1711 : Word16 temp_e[MAX_OUTPUT_CHANNELS];
1712 315180 : push_wmops( "dirac_cov_mix_mat" );
1713 :
1714 315180 : out = EXIT_SUCCESS;
1715 315180 : move16();
1716 315180 : lengthCx = num_inputs;
1717 315180 : move16();
1718 315180 : lengthCy = num_outputs;
1719 315180 : move16();
1720 :
1721 6073020 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1722 : {
1723 5757840 : IF( EQ_16( Q_16fx[i], MAX_16 ) )
1724 : {
1725 2575200 : Q_fx[i] = MAX_32;
1726 2575200 : move32();
1727 : }
1728 : ELSE
1729 : {
1730 3182640 : Q_fx[i] = L_deposit_h( Q_16fx[i] );
1731 3182640 : move32();
1732 : }
1733 : }
1734 315180 : Q_e = 0;
1735 315180 : move16();
1736 315180 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
1737 5358060 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
1738 : {
1739 5042880 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1740 5042880 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1741 5042880 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1742 : }
1743 :
1744 : /* Decomposition of Cy = Ky*Ky' */
1745 : /* Ky = responses*diag(ener) */
1746 315180 : matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
1747 :
1748 : /* Decomposition of Cx -> Computing Kx */
1749 315180 : set16_fx( temp_e, Cx_diag_e, lengthCx );
1750 315180 : v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
1751 315180 : Kx_e = temp_e[0];
1752 315180 : move16();
1753 630360 : FOR( i = 1; i < lengthCx; i++ )
1754 : {
1755 315180 : Kx_e = s_max( Kx_e, temp_e[i] );
1756 : }
1757 945540 : FOR( i = 0; i < lengthCx; i++ )
1758 : {
1759 630360 : Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
1760 630360 : move32();
1761 : }
1762 :
1763 : /* Regularization of Sx */
1764 315180 : maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
1765 315180 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
1766 :
1767 945540 : FOR( i = 0; i < lengthCx; ++i )
1768 : {
1769 630360 : IF( GT_32( Kx_fx[i], limit_fx ) )
1770 : {
1771 594761 : svd_s_buffer_fx[i] = Kx_fx[i];
1772 594761 : move32();
1773 : }
1774 : ELSE
1775 : {
1776 35599 : svd_s_buffer_fx[i] = limit_fx;
1777 35599 : move32();
1778 : }
1779 : }
1780 315180 : svd_s_buffer_fx_e = Kx_e;
1781 315180 : move16();
1782 :
1783 315180 : limit_fx = 0;
1784 315180 : move32();
1785 :
1786 : /* regularized Kx-1 */
1787 :
1788 945540 : FOR( i = 0; i < lengthCx; ++i )
1789 : {
1790 630360 : IF( svd_s_buffer_fx[i] )
1791 : {
1792 : Word32 reg_fac;
1793 630356 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
1794 630356 : Kx_reg_inv_fx[i] = reg_fac;
1795 630356 : move32();
1796 630356 : temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
1797 630356 : move16();
1798 : }
1799 : ELSE
1800 : {
1801 : Word32 reg_fac;
1802 4 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
1803 4 : Kx_reg_inv_fx[i] = reg_fac;
1804 4 : move32();
1805 4 : temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
1806 4 : move16();
1807 : }
1808 : }
1809 315180 : Kx_reg_inv_e = temp_e[0];
1810 315180 : move16();
1811 630360 : FOR( i = 1; i < lengthCx; i++ )
1812 : {
1813 315180 : Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
1814 : }
1815 945540 : FOR( i = 0; i < lengthCx; i++ )
1816 : {
1817 630360 : Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
1818 630360 : move32();
1819 : }
1820 :
1821 : /************************ normalization matrix G hat **********************/
1822 :
1823 : /* Computing Q*Cx*Q' */
1824 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1825 : Word16 Cy_hat_diag_e;
1826 :
1827 315180 : matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
1828 :
1829 315180 : Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
1830 :
1831 6073020 : FOR( i = 0; i < lengthCy * lengthCx; ++i )
1832 : {
1833 5757840 : IF( GT_16( Q_Cx_e, Q_e ) )
1834 : {
1835 5757804 : Q_fx[i] = L_shr( Q_fx[i], guard_bits );
1836 5757804 : move32();
1837 : }
1838 : ELSE
1839 : {
1840 36 : Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
1841 36 : move32();
1842 : }
1843 : }
1844 :
1845 315180 : IF( GT_16( Q_Cx_e, Q_e ) )
1846 : {
1847 315178 : Q_e = add( Q_e, guard_bits );
1848 : }
1849 : ELSE
1850 : {
1851 2 : Q_Cx_e = add( Q_Cx_e, guard_bits );
1852 : }
1853 315180 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
1854 :
1855 : /* Computing Cy_hat_diag */
1856 3194100 : FOR( i = 0; i < lengthCy; ++i )
1857 : {
1858 2878920 : if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
1859 : {
1860 469580 : limit_fx = Cy_hat_diag_fx[i];
1861 469580 : move32();
1862 : }
1863 : }
1864 :
1865 315180 : limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
1866 :
1867 : /* Computing G_hat */
1868 3194100 : FOR( i = 0; i < lengthCy; ++i )
1869 : {
1870 2878920 : if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1871 : {
1872 12268 : Cy_hat_diag_fx[i] = limit_fx;
1873 12268 : move32();
1874 : }
1875 2878920 : IF( Cy_diag_fx[i] )
1876 : {
1877 1875170 : IF( Cy_hat_diag_fx[i] )
1878 : {
1879 1875170 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
1880 1875170 : move32();
1881 1875170 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
1882 1875170 : move16();
1883 1875170 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1884 1875170 : move32();
1885 : }
1886 : ELSE
1887 : {
1888 0 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1889 0 : move32();
1890 0 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1891 0 : move16();
1892 0 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1893 0 : move32();
1894 : }
1895 : }
1896 : ELSE
1897 : {
1898 1003750 : G_hat_fx[i] = 0;
1899 1003750 : move32();
1900 1003750 : temp_e[i] = 0;
1901 1003750 : move16();
1902 : }
1903 : }
1904 315180 : G_hat_e = temp_e[0];
1905 315180 : move16();
1906 2878920 : FOR( i = 1; i < lengthCy; i++ )
1907 : {
1908 2563740 : G_hat_e = s_max( G_hat_e, temp_e[i] );
1909 : }
1910 3194100 : FOR( i = 0; i < lengthCy; i++ )
1911 : {
1912 2878920 : G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
1913 2878920 : move32();
1914 : }
1915 :
1916 : /************************ Formulate optimal P **********************/
1917 :
1918 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
1919 315180 : diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1920 :
1921 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1922 :
1923 315180 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1924 :
1925 315180 : IF( LT_16( lengthCx, num_responses ) )
1926 : {
1927 3600 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
1928 :
1929 3600 : nL = num_responses;
1930 3600 : move16();
1931 3600 : nC = lengthCx;
1932 3600 : move16();
1933 3600 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1934 : }
1935 : ELSE
1936 : {
1937 311580 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
1938 :
1939 311580 : nL = lengthCx;
1940 311580 : move16();
1941 311580 : nC = num_responses;
1942 311580 : move16();
1943 311580 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1944 : }
1945 :
1946 : /* Actually Processing P */
1947 :
1948 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
1949 315180 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
1950 315180 : mat_mult_buffer1_e = 0;
1951 315180 : move16();
1952 315180 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1953 315180 : mat_mult_buffer2_e = 0;
1954 315180 : move16();
1955 :
1956 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1957 :
1958 : /************************ Formulate M **********************/
1959 :
1960 315180 : matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1961 :
1962 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
1963 :
1964 : /*********************** Energy Compensation ****************/
1965 :
1966 : /* Compute Cy_tilde = M*Cx*M' */
1967 315180 : matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1968 :
1969 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1970 :
1971 315180 : IF( EQ_16( energy_compensation_flag, 1 ) )
1972 : {
1973 315180 : adj_fx = svd_s_buffer_fx;
1974 315180 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1975 3194100 : FOR( i = 0; i < lengthCy; ++i )
1976 : {
1977 : /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
1978 2878920 : IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
1979 : {
1980 0 : adj_fx[i] = MAX_32;
1981 0 : move32();
1982 0 : temp_e[i] = 0;
1983 0 : move16();
1984 : }
1985 : ELSE
1986 : {
1987 2878920 : IF( Cy_diag_fx[i] )
1988 : {
1989 1875170 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
1990 : {
1991 1875131 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
1992 1875131 : move32();
1993 1875131 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
1994 1875131 : move16();
1995 1875131 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1996 1875131 : move32();
1997 : }
1998 : ELSE
1999 : {
2000 39 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
2001 39 : move32();
2002 39 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
2003 39 : move16();
2004 39 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
2005 39 : move32();
2006 : }
2007 : }
2008 : ELSE
2009 : {
2010 1003750 : adj_fx[i] = 0;
2011 1003750 : move32();
2012 1003750 : temp_e[i] = 0;
2013 1003750 : move16();
2014 : }
2015 : }
2016 :
2017 2878920 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
2018 2878920 : IF( GT_32( adj_fx[i], temp ) )
2019 : {
2020 1680 : adj_fx[i] = MAX_32;
2021 1680 : move32();
2022 1680 : temp_e[i] = 2;
2023 1680 : move16();
2024 : }
2025 : }
2026 315180 : adj_e = temp_e[0];
2027 315180 : move16();
2028 2878920 : FOR( i = 1; i < lengthCy; i++ )
2029 : {
2030 2563740 : adj_e = s_max( adj_e, temp_e[i] );
2031 : }
2032 3194100 : FOR( i = 0; i < lengthCy; i++ )
2033 : {
2034 2878920 : adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
2035 2878920 : move32();
2036 : }
2037 :
2038 315180 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
2039 :
2040 315180 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
2041 315180 : *mixing_matrix_e = mat_mult_buffer3_e;
2042 315180 : move16();
2043 : }
2044 :
2045 315180 : pop_wmops();
2046 :
2047 315180 : return out;
2048 : }
|