Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include "basop_util.h"
34 : #include "enh32.h"
35 : #include <stdint.h>
36 : #include <string.h>
37 : #include <stdio.h>
38 : #include <stdlib.h>
39 : #include <assert.h>
40 : #include <math.h>
41 : #include "options.h"
42 : #include "cnst.h"
43 : #include "rom_enc.h"
44 : #include "rom_com.h"
45 : #include "prot_fx.h"
46 : #include "ivas_stat_dec.h"
47 : #include "ivas_cnst.h"
48 : #include "ivas_rom_com.h"
49 : #include "ivas_rom_dec.h"
50 : #include "wmc_auto.h"
51 : #include "rom_dec.h"
52 : #include "ivas_prot_fx.h"
53 :
54 : /*-----------------------------------------------------------------------*
55 : * Local constants
56 : *-----------------------------------------------------------------------*/
57 :
58 : #define SQRT_EPSILON_FX 68 /* Q31 square root of EPSILON */
59 :
60 : /*-------------------------------------------------------------------*
61 : * ivas_dirac_dec_output_synthesis_cov_open()
62 : *
63 : * Sets up the state and parameters for the Covariance Synthesis
64 : *-------------------------------------------------------------------*/
65 :
66 409 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
67 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
68 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state */
69 : const Word16 max_band_decorr, /* i : uppermost frequency band where decorrelation is applied */
70 : const Word16 interp_length, /* i : length for interpolating the mixing matrices in time slots */
71 : const Word16 num_param_bands, /* i : number of parameter bands */
72 : const Word16 num_param_bands_residual, /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
73 : const Word16 nchan_in, /* i : number of input (transport) channels */
74 : const Word16 nchan_out, /* i : number of output channels */
75 : const Word32 *proto_matrix /* i : the prototype (upmix) matrix (only used if mode == 1) Q(15-proto_matrix_e) */
76 : )
77 : {
78 : Word16 idx;
79 :
80 409 : h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
81 409 : move16();
82 :
83 : /*-----------------------------------------------------------------*
84 : * memory allocation
85 : *-----------------------------------------------------------------*/
86 :
87 : /* buffer length and interpolator */
88 409 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
89 409 : IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
90 : {
91 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
92 : }
93 409 : h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
94 409 : move16();
95 : /* cov buffers */
96 5305 : FOR( idx = 0; idx < num_param_bands; idx++ )
97 : {
98 4896 : IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
99 : {
100 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
101 : }
102 4896 : h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
103 4896 : IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
104 : {
105 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
106 : }
107 4896 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
108 : {
109 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
110 : }
111 4896 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
112 4896 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
113 4896 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
114 :
115 4896 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
116 : {
117 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
118 : }
119 4896 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
120 4896 : h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
121 4896 : move16();
122 : }
123 20053 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
124 : {
125 19644 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
126 19644 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
127 19644 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
128 19644 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
129 : }
130 :
131 4259 : FOR( idx = 0; idx < num_param_bands_residual; idx++ )
132 : {
133 3850 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
134 : {
135 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
136 : }
137 3850 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
138 :
139 3850 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
140 : {
141 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
142 : }
143 3850 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
144 3850 : h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
145 3850 : move16();
146 : }
147 21099 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
148 : {
149 20690 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
150 20690 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
151 : }
152 :
153 409 : IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
154 : {
155 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
156 : }
157 409 : IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
158 : {
159 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
160 : }
161 :
162 409 : set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
163 409 : set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
164 :
165 409 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
166 : {
167 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
168 : }
169 409 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
170 :
171 409 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
172 : {
173 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
174 : }
175 409 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
176 :
177 409 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
178 : {
179 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
180 : }
181 409 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
182 :
183 409 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
184 : {
185 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
186 : }
187 409 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
188 :
189 : /*-----------------------------------------------------------------*
190 : * prepare processing parameters
191 : *-----------------------------------------------------------------*/
192 :
193 : /* compute interpolator */
194 409 : IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
195 : {
196 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
197 : }
198 13497 : FOR( idx = 1; idx <= interp_length; ++idx )
199 : {
200 13088 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
201 13088 : move16();
202 : }
203 409 : Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
204 409 : h_dirac_output_synthesis_params->proto_matrix_e = 5;
205 409 : move16();
206 409 : return IVAS_ERR_OK;
207 : }
208 :
209 :
210 : /*-------------------------------------------------------------------*
211 : * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
212 : *
213 : *
214 : *-------------------------------------------------------------------*/
215 :
216 408313 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
217 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
218 : const UWord16 interp_length /* i : interpolator length */
219 : )
220 : {
221 : Word16 idx;
222 408313 : Word16 tmp, exp_diff = 0;
223 408313 : move16();
224 :
225 2029274 : FOR( idx = 1; idx <= interp_length; ++idx )
226 : {
227 1620961 : tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
228 1620961 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff ); // Q15
229 1620961 : move16();
230 : }
231 :
232 408313 : return;
233 : }
234 :
235 :
236 : /*-------------------------------------------------------------------*
237 : * ivas_dirac_dec_output_synthesis_cov_init()
238 : *
239 : * initialize the states for the covariance synthesis
240 : *-------------------------------------------------------------------*/
241 409 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
242 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis */
243 : const Word16 nchan_in, /* i : number of input (tranport) channels */
244 : const Word16 nchan_out, /* i : number of output channels */
245 : const Word16 n_param_bands, /* i : number of total parameter bands */
246 : const Word16 n_param_bands_res /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
247 : )
248 : {
249 :
250 : Word16 idx;
251 :
252 : /* initialize buffers */
253 5305 : FOR( idx = 0; idx < n_param_bands; idx++ )
254 : {
255 4896 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
256 4896 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
257 4896 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
258 4896 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
259 : }
260 :
261 4259 : FOR( idx = 0; idx < n_param_bands_res; idx++ )
262 : {
263 3850 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
264 3850 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
265 : }
266 :
267 :
268 409 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
269 409 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
270 409 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
271 409 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
272 :
273 409 : return;
274 : }
275 :
276 :
277 : /*-------------------------------------------------------------------*
278 : * ivas_dirac_dec_output_synthesis_cov_close()
279 : *
280 : * deallocate dynamic memory in the covariance synthesis state
281 : *-------------------------------------------------------------------*/
282 :
283 409 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
284 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i : handle for the covariance synthesis parameters */
285 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state */
286 : )
287 : {
288 : Word16 idx;
289 :
290 : /*-----------------------------------------------------------------*
291 : * memory deallocation
292 : *-----------------------------------------------------------------*/
293 :
294 : /* free interpolator */
295 409 : IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
296 : {
297 409 : free( h_dirac_output_synthesis_params->interpolator_fx );
298 409 : h_dirac_output_synthesis_params->interpolator_fx = NULL;
299 : }
300 :
301 : /* free alpha */
302 409 : IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
303 : {
304 0 : free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
305 0 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
306 : }
307 :
308 : /* free proto_matrix */
309 409 : IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
310 : {
311 409 : free( h_dirac_output_synthesis_params->proto_matrix_fx );
312 409 : h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
313 : }
314 :
315 :
316 409 : IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
317 : {
318 409 : free( h_dirac_output_synthesis_state->cx_old_e );
319 409 : h_dirac_output_synthesis_state->cx_old_e = NULL;
320 : }
321 409 : IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
322 : {
323 409 : free( h_dirac_output_synthesis_state->cy_old_e );
324 409 : h_dirac_output_synthesis_state->cy_old_e = NULL;
325 : }
326 :
327 : /* free cov buffers */
328 24949 : FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
329 : {
330 24540 : IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
331 : {
332 4896 : free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
333 4896 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
334 : }
335 :
336 24540 : IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
337 : {
338 4896 : free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
339 4896 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
340 : }
341 :
342 24540 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
343 : {
344 4896 : free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
345 4896 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
346 : }
347 :
348 24540 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
349 : {
350 3850 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
351 3850 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
352 : }
353 :
354 24540 : IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
355 : {
356 4896 : free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
357 4896 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
358 : }
359 :
360 24540 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
361 : {
362 3850 : free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
363 3850 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
364 : }
365 : }
366 :
367 409 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
368 : {
369 409 : free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
370 409 : h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
371 : }
372 :
373 409 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
374 : {
375 409 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
376 409 : h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
377 : }
378 :
379 409 : IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
380 : {
381 409 : free( h_dirac_output_synthesis_state->mixing_matrix_exp );
382 409 : h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
383 : }
384 :
385 409 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
386 : {
387 409 : free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
388 409 : h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
389 : }
390 409 : return;
391 : }
392 :
393 : /*-------------------------------------------------------------------*
394 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
395 : *
396 : * collect the multi channel input covariance for one filter bank time slot
397 : *-------------------------------------------------------------------*/
398 :
399 : /*-------------------------------------------------------------------*
400 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
401 : *
402 : * collect the multi channel input covariance for one filter bank time slot
403 : *-------------------------------------------------------------------*/
404 2145291 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
405 : Word32 *RealBuffer_fx, /* i : input channel filter bank samples (real part) Q(31- RealBuffer_e)*/
406 : Word16 RealBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
407 : Word32 *ImagBuffer_fx, /* i : input channel filter bank samples (imaginary part Q(ImagBuffer_e)*/
408 : Word16 ImagBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
409 : Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (real part) Q(31- cx_e)*/
410 : Word16 *cx_e, /* i : exponent for accumulated input covariance (real part) */
411 : Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (imaginary part) Q(31- cx_imag_e)*/
412 : Word16 *cx_imag_e, /* i : exponent accumulated input covariance (imag part) */
413 : PARAM_MC_DEC_HANDLE hParamMC, /* i : handle to Parametric MC state */
414 : const Word16 param_band, /* i : parameter band */
415 : const Word16 nchan_in /* i : number of input channels */
416 : )
417 : {
418 : Word16 band_idx, ch_idx;
419 : Word16 brange[2];
420 : Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
421 : Word16 real_in_e;
422 : Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
423 : Word16 imag_in_e;
424 : Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
425 : Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
426 : Word16 output_e;
427 : Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
428 : Word16 band, num_bands;
429 : Word16 cx_fx_norm, cx_imag_fx_norm;
430 : /* estimate input covariance */
431 : /* Already stack here instead of in the process_subframe */
432 :
433 : /* collect input frame */
434 2145291 : brange[0] = hParamMC->band_grouping[param_band];
435 2145291 : move16();
436 2145291 : brange[1] = hParamMC->band_grouping[param_band + 1];
437 2145291 : move16();
438 2145291 : num_bands = sub( brange[1], brange[0] );
439 :
440 11288251 : FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
441 : {
442 9142960 : band = add( brange[0], band_idx );
443 27497640 : FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
444 : {
445 18354680 : real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
446 18354680 : move32();
447 18354680 : imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
448 18354680 : move32();
449 : }
450 : }
451 :
452 2145291 : real_in_e = RealBuffer_e;
453 2145291 : move16();
454 2145291 : imag_in_e = ImagBuffer_e;
455 2145291 : move16();
456 :
457 2145291 : Word16 buf_len = imult1616( num_bands, nchan_in );
458 :
459 2145291 : shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
460 2145291 : shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
461 :
462 2145291 : real_in_e = sub( real_in_e, shift_real );
463 2145291 : imag_in_e = sub( imag_in_e, shift_imag );
464 :
465 :
466 2145291 : output_e = s_max( real_in_e, imag_in_e );
467 :
468 2145291 : scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
469 2145291 : scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
470 :
471 2145291 : cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
472 2145291 : v_add_fx_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
473 :
474 2145291 : v_add_fx_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
475 :
476 2145291 : cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
477 2145291 : cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
478 :
479 2145291 : scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
480 2145291 : scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
481 :
482 2145291 : *cx_e = sub( tmp1_e, cx_fx_norm );
483 2145291 : move16();
484 2145291 : *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
485 2145291 : move16();
486 :
487 2145291 : return;
488 : }
489 :
490 : /*-------------------------------------------------------------------*
491 : * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
492 : *
493 : * synthesize one filter bank slot of multi channel output filter bank
494 : * samples with the covariance synthesis
495 : *-------------------------------------------------------------------*/
496 :
497 169171 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
498 : Word32 *Cldfb_RealBuffer_in_fx, /*Q6*/
499 : Word32 *Cldfb_ImagBuffer_in_fx, /*Q6*/
500 : Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (real part) Q6*/
501 : Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (imaginary part) Q6*/
502 : Word32 *mixing_matrix_fx[], /* i : parameter band wise mixing matrices (direct part) Q(31-mixing_matrix_e)*/
503 : Word16 *mixing_matrix_e, /* i : parameter band wise mixing matrices (direct part) */
504 : Word32 *mixing_matrix_res_fx[], /* i : parameter band wise mixing matrices (residual part) Q(31-mixing_matrix_res_e)*/
505 : Word16 *mixing_matrix_res_e, /* i : parameter band wise mixing matrices (residual part) */
506 : const UWord16 slot_idx_sfr, /* i : time slot index for the current slot within the current subframe */
507 : const UWord16 slot_idx_tot, /* i : time slot index for the current slot within the frame */
508 : const Word16 nX, /* i : number of input channels */
509 : const Word16 nY, /* i : number of output channels */
510 : PARAM_MC_DEC_HANDLE hParamMC /* i : handle to the Parametric MC decoder state */
511 : )
512 : {
513 : Word16 param_band_idx, band, ch_idx;
514 : Word16 have_residual;
515 : Word16 brange[2];
516 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
517 : Word32 mixing_matrix_smooth_fx[MAX_LS_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
518 : Word16 mixing_matrix_smooth_e;
519 : Word32 mixing_matrix_res_smooth_fx[MAX_LS_CHANNELS * MAX_LS_CHANNELS];
520 169171 : Word16 mixing_matrix_res_smooth_e = 0;
521 169171 : move16();
522 : Word32 mixing_matrix_buffer_fx[MAX_LS_CHANNELS * MAX_LS_CHANNELS];
523 : Word16 mixing_matrix_buffer_e;
524 : Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
525 : Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
526 :
527 : Word32 diff_f_real_fx[MAX_LS_CHANNELS];
528 : Word32 diff_f_imag_fx[MAX_LS_CHANNELS];
529 :
530 169171 : h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
531 :
532 169171 : set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
533 169171 : set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
534 :
535 169171 : set_zero_fx( diff_f_real_fx, MAX_LS_CHANNELS );
536 169171 : set_zero_fx( diff_f_imag_fx, MAX_LS_CHANNELS );
537 :
538 2357870 : FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
539 : {
540 : /* final mixing */
541 2188699 : have_residual = 0;
542 2188699 : move16();
543 2188699 : brange[0] = hParamMC->band_grouping[param_band_idx];
544 2188699 : move16();
545 2188699 : brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
546 2188699 : move16();
547 :
548 2188699 : if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
549 : {
550 1760504 : have_residual = 1;
551 1760504 : move16();
552 : }
553 :
554 2188699 : v_multc_fx( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
555 2188699 : mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
556 2188699 : move16();
557 :
558 2188699 : v_multc_fx( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
559 2188699 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
560 2188699 : move16();
561 :
562 2188699 : v_add_fx_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
563 :
564 2188699 : IF( have_residual )
565 : {
566 : /* residual mixing matrix interpolation*/
567 :
568 1760504 : v_multc_fx( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
569 1760504 : mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
570 1760504 : move16();
571 :
572 1760504 : set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
573 1760504 : v_multc_fx( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
574 1760504 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
575 1760504 : move16();
576 :
577 1760504 : v_add_fx_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
578 : }
579 :
580 11512019 : FOR( band = brange[0]; band < brange[1]; band++ )
581 : {
582 9323320 : assert( band >= 0 );
583 :
584 9323320 : IF( have_residual )
585 : {
586 : /* collect diffuse prototypes */
587 3383420 : assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
588 24036060 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
589 : {
590 20652640 : diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
591 20652640 : move32();
592 20652640 : diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
593 20652640 : move32();
594 : }
595 :
596 : /* apply residual mixing */
597 : {
598 : Word16 shifter;
599 3383420 : shifter = sub( mixing_matrix_res_smooth_e, 32 );
600 24036060 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
601 : {
602 : Word16 i;
603 : Word16 idx;
604 : Word64 temp_real, temp_imag;
605 :
606 20652640 : idx = ch_idx;
607 20652640 : temp_real = 0;
608 20652640 : temp_imag = 0;
609 20652640 : move64();
610 20652640 : move64();
611 147846240 : FOR( i = 0; i < nY; i++ )
612 : {
613 127193600 : temp_real = W_mac_32_32( temp_real, mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] );
614 127193600 : temp_imag = W_mac_32_32( temp_imag, mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] );
615 127193600 : idx += nY;
616 : }
617 20652640 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
618 20652640 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
619 : }
620 : }
621 : }
622 : ELSE
623 : {
624 42142620 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
625 : {
626 36202720 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
627 36202720 : move32();
628 36202720 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
629 36202720 : move32();
630 : }
631 : }
632 :
633 : /* collect input signals, still in cldfb buffers */
634 28046760 : FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
635 : {
636 :
637 18723440 : input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
638 18723440 : move32();
639 18723440 : input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
640 18723440 : move32();
641 : }
642 :
643 : /* apply mixing matrix */
644 : {
645 : Word16 shifter;
646 9323320 : shifter = 31 - mixing_matrix_smooth_e;
647 :
648 66178680 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
649 : {
650 : Word16 i;
651 : Word16 idx;
652 : Word64 temp_real, temp_imag;
653 :
654 56855360 : idx = ch_idx;
655 56855360 : temp_real = 0;
656 56855360 : temp_imag = 0;
657 56855360 : move64();
658 56855360 : move64();
659 171410880 : for ( i = 0; i < nX; i++ )
660 : {
661 114555520 : temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
662 114555520 : temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
663 114555520 : idx += nY;
664 : }
665 56855360 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
666 56855360 : move32();
667 56855360 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
668 56855360 : move32();
669 : }
670 : }
671 : }
672 : }
673 :
674 169171 : return;
675 : }
676 :
677 : /*-------------------------------------------------------------------*
678 : * computeMixingMatrices()
679 : *
680 : * compute a mixing matrix using the convariance synthesis approach
681 : *-------------------------------------------------------------------*/
682 :
683 136792 : Word16 computeMixingMatrices_fx(
684 : const Word16 num_inputs, /* i : number of input channels */
685 : const Word16 num_outputs, /* i : number of output channels */
686 : const Word32 *Cx, /* i : input channel covariance matrix Q(31-Cx_e) */
687 : Word16 Cx_e,
688 : const Word32 *Cy, /* i : target covariance matrix Q(31-Cy_e) */
689 : Word16 Cy_e,
690 : const Word32 *Q, /* i : prototype matrix (usually a upmix matrix) Q_fx_e */
691 : Word16 Q_fx_e,
692 : const Word16 energy_compensation_flag, /* i : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
693 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values */
694 : Word16 reg_Sx_e,
695 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
696 : Word16 reg_ghat_e,
697 : Word32 *mixing_matrix_fx, /* o : resulting mixing matrix Q(31-mixing_matrix_out_e) */
698 : Word16 *mixing_matrix_out_e,
699 : Word32 *Cr_fx, /* o : residual covariance matrix Q(31-Cr_e) */
700 : Word16 *Cr_e )
701 : {
702 : Word16 i, j;
703 : Word16 out;
704 : Word16 nL, nC;
705 136792 : Word16 lengthCx = num_inputs;
706 136792 : Word16 lengthCy = num_outputs;
707 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
708 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
709 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
710 : Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
711 : Word32 Cy_fx[MAX_LS_CHANNELS * MAX_LS_CHANNELS];
712 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
713 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
714 : Word16 Cx_fx_e;
715 : Word16 Cy_fx_e;
716 :
717 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
718 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
719 :
720 : Word32 limit_fx;
721 : Word16 limit_e;
722 :
723 : Word32 L_tmp;
724 : Word16 tmp_e, tmp, exp;
725 :
726 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
727 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
728 :
729 : Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
730 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
731 :
732 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
733 : Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
734 :
735 : Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_LS_CHANNELS];
736 : Word16 Q_e;
737 :
738 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
739 : Word16 Q_Cx_e;
740 :
741 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
742 : Word16 Cy_hat_diag_e;
743 : Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
744 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
745 : Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
746 :
747 : Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
748 :
749 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
750 :
751 : Word16 mixing_matrix_e;
752 :
753 : Word16 Cr_fx_e;
754 :
755 :
756 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
757 : Word16 adj_e[MAX_OUTPUT_CHANNELS];
758 : Word32 *adj_fx_p;
759 : Word16 adj_fx_e;
760 :
761 : Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
762 136792 : push_wmops( "dirac_cov_mix_mat" );
763 :
764 136792 : out = EXIT_SUCCESS;
765 136792 : move16();
766 :
767 136792 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
768 2325464 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
769 : {
770 2188672 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
771 2188672 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
772 2188672 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
773 : }
774 :
775 :
776 136792 : Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
777 136792 : Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
778 136792 : Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
779 :
780 136792 : Q_e = Q_fx_e;
781 136792 : move16();
782 136792 : Cx_fx_e = Cx_e;
783 136792 : move16();
784 136792 : Cy_fx_e = Cy_e;
785 136792 : move16();
786 :
787 : /*-----------------------------------------------------------------*
788 : * Decomposition of Cy
789 : *-----------------------------------------------------------------*/
790 :
791 : /* Processing the SVD */
792 :
793 136792 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
794 :
795 136792 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
796 136792 : Word16 max_e = -32;
797 : /* Computing Ky */
798 836511 : FOR( i = 0; i < lengthCy; ++i )
799 : {
800 4329928 : FOR( j = 0; j < lengthCy; ++j )
801 : {
802 3630209 : tmp_e = svd_s_buffer_e[j];
803 3630209 : move16();
804 3630209 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
805 3630209 : Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
806 3630209 : move32();
807 3630209 : Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
808 3630209 : move16();
809 3630209 : max_e = s_max( max_e, tmp_e );
810 : }
811 : }
812 3767001 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
813 : {
814 3630209 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
815 3630209 : move32();
816 3630209 : Ky_fx_e[i] = max_e;
817 3630209 : move16();
818 : }
819 :
820 : /*-----------------------------------------------------------------*
821 : * Decomposition of Cx
822 : *-----------------------------------------------------------------*/
823 :
824 : /* Processing the SVD */
825 :
826 136792 : mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
827 :
828 136792 : svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
829 136792 : max_e = -32;
830 411736 : FOR( i = 0; i < lengthCx; ++i )
831 : {
832 828912 : FOR( j = 0; j < lengthCx; ++j )
833 : {
834 553968 : tmp_e = svd_s_buffer_e[j];
835 553968 : move16();
836 553968 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
837 553968 : Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
838 553968 : move32();
839 553968 : Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
840 553968 : move16();
841 553968 : max_e = s_max( max_e, tmp_e );
842 : }
843 : }
844 690760 : FOR( i = 0; i < lengthCx * lengthCx; ++i )
845 : {
846 553968 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
847 553968 : move32();
848 553968 : Kx_fx_e[i] = max_e;
849 553968 : move16();
850 : }
851 :
852 411736 : FOR( i = 0; i < lengthCx; ++i )
853 : {
854 274944 : tmp_e = svd_s_buffer_e[i];
855 274944 : move16();
856 274944 : svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
857 274944 : move32();
858 274944 : svd_s_buffer_e[i] = tmp_e;
859 274944 : move16();
860 : }
861 :
862 : /*-----------------------------------------------------------------*
863 : * Regularization of Sx
864 : *-----------------------------------------------------------------*/
865 :
866 136792 : limit_fx = svd_s_buffer_fx[0];
867 136792 : move32();
868 136792 : limit_e = svd_s_buffer_e[0];
869 136792 : move16();
870 274944 : FOR( i = 1; i < lengthCx; i++ )
871 : {
872 138152 : IF( GT_32( svd_s_buffer_fx[i], L_shl_sat( limit_fx, sub( limit_e, svd_s_buffer_e[i] ) ) ) )
873 : {
874 0 : limit_fx = svd_s_buffer_fx[i];
875 0 : move32();
876 0 : limit_e = svd_s_buffer_e[i];
877 0 : move16();
878 : }
879 : }
880 :
881 136792 : limit_e = add( limit_e, reg_Sx_e );
882 136792 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx );
883 136792 : IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, SQRT_EPSILON_FX /* Q31 */, 0 ) < 0 )
884 : {
885 0 : limit_fx = SQRT_EPSILON_FX;
886 0 : move32();
887 0 : limit_e = 0;
888 0 : move16();
889 : }
890 :
891 411736 : FOR( i = 0; i < lengthCx; ++i )
892 : {
893 274944 : IF( LT_32( L_shl_sat( svd_s_buffer_fx[i], sub( svd_s_buffer_e[i], limit_e ) ), limit_fx ) )
894 : {
895 60051 : svd_s_buffer_fx[i] = limit_fx;
896 60051 : move32();
897 60051 : svd_s_buffer_e[i] = limit_e;
898 60051 : move16();
899 : }
900 : }
901 :
902 136792 : limit_fx = 0;
903 136792 : move32();
904 136792 : limit_e = 0;
905 136792 : move16();
906 :
907 : /*-----------------------------------------------------------------*
908 : * regularized Kx-1
909 : *-----------------------------------------------------------------*/
910 :
911 411736 : FOR( i = 0; i < lengthCx; ++i )
912 : {
913 : Word16 scale, reg_fac_fx;
914 274944 : reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
915 274944 : scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
916 828912 : FOR( j = 0; j < lengthCx; ++j )
917 : {
918 553968 : Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
919 553968 : move32();
920 553968 : Kx_reg_inv_e[i + j * lengthCx] = scale;
921 553968 : move16();
922 : }
923 : }
924 :
925 : /*-----------------------------------------------------------------*
926 : * normalization matrix G hat
927 : *-----------------------------------------------------------------*/
928 :
929 : /* Computing Q*Cx*Q' */
930 136792 : matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
931 :
932 136792 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
933 :
934 136792 : Word16 com_e = sub( limit_e, Cy_hat_diag_e );
935 836511 : FOR( i = 0; i < lengthCy; ++i )
936 : {
937 699719 : IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
938 : {
939 699698 : limit_fx = Cy_hat_diag_fx[i];
940 699698 : move32();
941 699698 : limit_e = Cy_hat_diag_e;
942 699698 : move16();
943 : }
944 : }
945 136792 : limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
946 136792 : limit_e = add( limit_e, reg_ghat_e );
947 :
948 136792 : com_e = sub( Cy_hat_diag_e, limit_e );
949 836511 : FOR( i = 0; i < lengthCy; ++i )
950 : {
951 699719 : Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
952 699719 : move16();
953 :
954 699719 : IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) )
955 : {
956 125 : Cy_hat_diag_fx[i] = limit_fx;
957 125 : move32();
958 125 : Cy_hat_diag_buff_e[i] = limit_e;
959 125 : move16();
960 : }
961 :
962 699719 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
963 699719 : exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
964 699719 : L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
965 699719 : G_hat_fx[i] = L_tmp;
966 699719 : move32();
967 699719 : G_hat_buff_e[i] = exp;
968 699719 : move16();
969 : }
970 :
971 : /*-----------------------------------------------------------------*
972 : * Formulate optimal P
973 : *-----------------------------------------------------------------*/
974 :
975 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
976 : Word16 mat_mult_buffer1_fx_e;
977 :
978 : Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
979 :
980 136792 : matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
981 :
982 136792 : matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
983 :
984 136792 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
985 :
986 136792 : IF( LT_16( lengthCx, lengthCy ) )
987 : {
988 136792 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
989 136792 : nL = lengthCy;
990 136792 : move16();
991 136792 : nC = lengthCx;
992 136792 : move16();
993 136792 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
994 : }
995 : ELSE
996 : {
997 0 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
998 0 : nL = lengthCx;
999 0 : move16();
1000 0 : nC = lengthCy;
1001 0 : move16();
1002 0 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1003 : }
1004 :
1005 : /* Actually Processing P */
1006 :
1007 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
1008 : truncates V to num_input_channel columns */
1009 :
1010 136792 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1011 136792 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1012 :
1013 136792 : mat_mult_buffer1_fx_e = 0;
1014 136792 : move16();
1015 136792 : mat_mult_buffer2_e = 0;
1016 136792 : move16();
1017 :
1018 136792 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
1019 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1020 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1021 :
1022 : /************************ Formulate M **********************/
1023 :
1024 136792 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
1025 :
1026 : Word16 mixing_matrix_fx_e[MAX_LS_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
1027 :
1028 : Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1029 136792 : set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1030 :
1031 136792 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
1032 :
1033 : /*-----------------------------------------------------------------*
1034 : * Formulate Cr
1035 : *-----------------------------------------------------------------*/
1036 :
1037 : /* Compute Cy_tilde = M*Cx*M' */
1038 : Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
1039 136792 : set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
1040 136792 : matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
1041 :
1042 136792 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
1043 :
1044 136792 : exp = mixing_matrix_fx_e[0];
1045 136792 : move16();
1046 1412408 : FOR( i = 1; i < lengthCy * lengthCx; i++ )
1047 : {
1048 1275616 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1049 : {
1050 100170 : exp = mixing_matrix_fx_e[i];
1051 100170 : move16();
1052 : }
1053 : }
1054 :
1055 1549200 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1056 : {
1057 1412408 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1058 1412408 : move32();
1059 : }
1060 :
1061 136792 : mixing_matrix_e = exp;
1062 136792 : move16();
1063 :
1064 136792 : Cr_p_fx = Cr_fx;
1065 136792 : Cy_p_fx = Cy_fx;
1066 136792 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1067 : Word16 Cr_e_arr[MAX_LS_CHANNELS * MAX_LS_CHANNELS];
1068 836511 : FOR( i = 0; i < lengthCy; ++i )
1069 : {
1070 4329928 : FOR( j = 0; j < lengthCy; ++j )
1071 : {
1072 :
1073 3630209 : *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
1074 3630209 : move32();
1075 3630209 : Cr_p_fx++;
1076 3630209 : Cy_p_fx++;
1077 3630209 : Cy_tilde_p_fx++;
1078 : }
1079 :
1080 : /* Avoid Meaningless negative main diagonal elements */
1081 699719 : IF( Cr_fx[i + ( i * lengthCy )] < 0 )
1082 : {
1083 78733 : Cr_fx[i + ( i * lengthCy )] = 0;
1084 78733 : move32();
1085 78733 : Cr_e_arr[i + ( i * lengthCy )] = 0;
1086 78733 : move16();
1087 : }
1088 : }
1089 :
1090 136792 : exp = Cr_e_arr[0];
1091 136792 : move16();
1092 3630209 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1093 : {
1094 3493417 : if ( LT_16( exp, Cr_e_arr[i] ) )
1095 : {
1096 130480 : exp = Cr_e_arr[i];
1097 130480 : move16();
1098 : }
1099 : }
1100 :
1101 3767001 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1102 : {
1103 3630209 : Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
1104 3630209 : move32();
1105 : }
1106 :
1107 136792 : Cr_fx_e = exp;
1108 136792 : move16();
1109 :
1110 136792 : exp = mat_mult_buffer2_fx_e[0];
1111 136792 : move16();
1112 3630209 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1113 : {
1114 3493417 : if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
1115 : {
1116 190275 : exp = mat_mult_buffer2_fx_e[i];
1117 190275 : move16();
1118 : }
1119 : }
1120 :
1121 3767001 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1122 : {
1123 3630209 : mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
1124 3630209 : move32();
1125 : }
1126 :
1127 136792 : mat_mult_buffer2_e = exp;
1128 136792 : move16();
1129 :
1130 : /*-----------------------------------------------------------------*
1131 : * Energy Compensation
1132 : *-----------------------------------------------------------------*/
1133 :
1134 136792 : IF( EQ_16( energy_compensation_flag, 1 ) )
1135 : {
1136 26762 : adj_fx_p = svd_s_buffer_fx;
1137 26762 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1138 :
1139 163292 : FOR( i = 0; i < lengthCy; ++i )
1140 : {
1141 : /* Avoid correction for very small energies,
1142 : main diagonal elements of Cy_tilde_p may be negative */
1143 136530 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
1144 : {
1145 0 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1146 0 : move32();
1147 0 : adj_e[i] = 1;
1148 0 : move16();
1149 : }
1150 : ELSE
1151 : {
1152 136530 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
1153 136530 : exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
1154 136530 : L_tmp = L_deposit_h( tmp );
1155 136530 : L_tmp = Sqrt32( L_tmp, &exp );
1156 136530 : adj_fx_p[i] = L_tmp;
1157 136530 : move32();
1158 136530 : adj_e[i] = exp;
1159 136530 : move16();
1160 : }
1161 :
1162 136530 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
1163 136530 : IF( GT_32( adj_fx_p[i], temp ) )
1164 : {
1165 18 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1166 18 : move32();
1167 18 : adj_e[i] = 3;
1168 18 : move16();
1169 : }
1170 : }
1171 :
1172 26762 : exp = adj_e[0];
1173 26762 : move16();
1174 136530 : FOR( i = 1; i < lengthCy; i++ )
1175 : {
1176 109768 : if ( LT_16( exp, adj_e[i] ) )
1177 : {
1178 1427 : exp = adj_e[i];
1179 1427 : move16();
1180 : }
1181 : }
1182 :
1183 163292 : FOR( i = 0; i < lengthCy; i++ )
1184 : {
1185 136530 : adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
1186 136530 : move32();
1187 : }
1188 26762 : adj_fx_e = exp;
1189 26762 : move16();
1190 :
1191 26762 : diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1192 :
1193 26762 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
1194 26762 : mixing_matrix_e = mat_mult_buffer3_e;
1195 26762 : move16();
1196 : }
1197 :
1198 136792 : *mixing_matrix_out_e = mixing_matrix_e;
1199 136792 : move16();
1200 136792 : *Cr_e = Cr_fx_e;
1201 136792 : move16();
1202 136792 : pop_wmops();
1203 :
1204 136792 : return out;
1205 : }
1206 :
1207 :
1208 : /*-------------------------------------------------------------------*
1209 : * computeMixingMatricesResidual()
1210 : *
1211 : * compute a residual mixing matrix using the covariance synthesis approach
1212 : *-------------------------------------------------------------------*/
1213 :
1214 110030 : Word16 computeMixingMatricesResidual_fx(
1215 : const Word32 num_outputs, /* i : number of output channels */
1216 : const Word32 *Cx_fx, /* i : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e) */
1217 : const Word16 Cx_e,
1218 : const Word32 *Cy_fx, /* i : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
1219 : const Word16 Cy_fx_e,
1220 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values Q(31-reg_Sx_e) */
1221 : const Word16 reg_Sx_e,
1222 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
1223 : const Word16 reg_ghat_e,
1224 : Word32 *mixing_matrix_fx, /* o : resulting residual mixing matrix Q(31-mixing_matrix_ret_e) */
1225 : Word16 *mixing_matrix_ret_e )
1226 : {
1227 : Word16 i, j;
1228 : Word16 out, lengthCx, lengthCy;
1229 110030 : out = EXIT_SUCCESS;
1230 110030 : move16();
1231 110030 : lengthCx = extract_l( num_outputs );
1232 110030 : lengthCy = extract_l( num_outputs );
1233 :
1234 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1235 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1236 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1237 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1238 :
1239 : Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1240 :
1241 110030 : push_wmops( "dirac_cov_mix_mat_r" );
1242 :
1243 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1244 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
1245 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1246 110030 : Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
1247 110030 : move16();
1248 :
1249 110030 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
1250 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1251 : Word32 L_tmp;
1252 : Word16 tmp_e;
1253 : Word16 tmp, scale;
1254 : Word32 div_tmp;
1255 : Word16 exp;
1256 :
1257 : Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
1258 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1259 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1260 : Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
1261 :
1262 : Word32 limit_fx;
1263 : Word16 limit_e;
1264 :
1265 : Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
1266 : Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
1267 :
1268 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1269 : Word16 Cy_hat_diag_e;
1270 :
1271 : Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
1272 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1273 : Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
1274 :
1275 : Word32 *adj_fx_p;
1276 : Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
1277 :
1278 : Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
1279 : Word16 Cy_tilde_e;
1280 :
1281 : /*-----------------------------------------------------------------*
1282 : * Decomposition of Cy
1283 : *-----------------------------------------------------------------*/
1284 :
1285 : /* Processing the SVD */
1286 :
1287 : /* linear array to svd buffer */
1288 110030 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
1289 :
1290 110030 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
1291 :
1292 : /* Computing Ky */
1293 110030 : Word16 max_e = -32;
1294 673219 : FOR( i = 0; i < lengthCy; ++i )
1295 : {
1296 3487388 : FOR( j = 0; j < lengthCy; ++j )
1297 : {
1298 2924199 : tmp_e = svd_s_buffer_e[j];
1299 2924199 : move16();
1300 2924199 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
1301 2924199 : Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
1302 2924199 : move32();
1303 2924199 : Ky_fx_e[i + j * lengthCy] = tmp_e;
1304 2924199 : move16();
1305 2924199 : max_e = s_max( max_e, tmp_e );
1306 : }
1307 : }
1308 :
1309 3034229 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
1310 : {
1311 2924199 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
1312 2924199 : move32();
1313 2924199 : Ky_fx_e[i] = max_e;
1314 2924199 : move16();
1315 : }
1316 :
1317 : /*-----------------------------------------------------------------*
1318 : * Decomposition of Cx
1319 : *-----------------------------------------------------------------*/
1320 :
1321 : /* Processing the SVD of Cx*/
1322 : /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
1323 : * would be just indicating the sorting index, so go straight to Kx as the
1324 : * square root of the diagonal of Cx */
1325 :
1326 : /* Computing Kx */
1327 110030 : max_e = -32;
1328 673219 : FOR( i = 0; i < lengthCx; ++i )
1329 : {
1330 563189 : exp = Cx_e;
1331 563189 : move16();
1332 563189 : Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
1333 563189 : move32();
1334 563189 : Kx_fx_e[i] = exp;
1335 563189 : move16();
1336 563189 : max_e = s_max( max_e, exp );
1337 : }
1338 :
1339 673219 : FOR( i = 0; i < lengthCx; ++i )
1340 : {
1341 563189 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
1342 563189 : move32();
1343 563189 : Kx_fx_e[i] = max_e;
1344 563189 : move16();
1345 : }
1346 :
1347 : /*-----------------------------------------------------------------*
1348 : * Regularization of Sx
1349 : *-----------------------------------------------------------------*/
1350 :
1351 110030 : limit_fx = Kx_fx[0];
1352 110030 : move32();
1353 :
1354 563189 : FOR( i = 1; i < lengthCx; i++ )
1355 : {
1356 453159 : IF( GT_32( Kx_fx[i], limit_fx ) )
1357 : {
1358 170765 : limit_fx = Kx_fx[i];
1359 170765 : move32();
1360 : }
1361 : }
1362 :
1363 110030 : L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
1364 110030 : L_tmp = L_add( L_tmp, EPSILLON_FX );
1365 110030 : limit_fx = L_tmp;
1366 110030 : move16();
1367 110030 : limit_e = add( Kx_fx_e[0], reg_Sx_e );
1368 :
1369 673219 : FOR( i = 0; i < lengthCx; ++i )
1370 : {
1371 563189 : IF( GT_32( Kx_fx[i], L_shl_sat( limit_fx, sub( limit_e, Kx_fx_e[i] ) ) ) )
1372 : {
1373 561703 : div_tmp = Kx_fx[i];
1374 561703 : move32();
1375 561703 : exp = Kx_fx_e[i];
1376 561703 : move16();
1377 : }
1378 : ELSE
1379 : {
1380 1486 : div_tmp = limit_fx;
1381 1486 : move32();
1382 1486 : exp = limit_e;
1383 1486 : move16();
1384 : }
1385 563189 : tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
1386 563189 : scale = add( scale, sub( Q1, exp ) );
1387 :
1388 563189 : Kx_reg_inv_fx[i] = L_deposit_h( tmp );
1389 563189 : move32();
1390 563189 : Kx_reg_inv_e[i] = scale;
1391 563189 : move16();
1392 : }
1393 :
1394 110030 : limit_fx = 0;
1395 110030 : move32();
1396 110030 : limit_e = 0;
1397 110030 : move16();
1398 :
1399 : /*-----------------------------------------------------------------*
1400 : * regularized Kx-1
1401 : *-----------------------------------------------------------------*/
1402 :
1403 : /*-----------------------------------------------------------------*
1404 : * normalization matrix G hat
1405 : *-----------------------------------------------------------------*/
1406 :
1407 : /* Computing Cy_hat_diag */
1408 110030 : Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
1409 110030 : Cy_hat_diag_e = Cx_e;
1410 110030 : move16();
1411 :
1412 110030 : Word16 com_e = sub( limit_e, Cy_hat_diag_e );
1413 673219 : FOR( i = 0; i < lengthCy; ++i )
1414 : {
1415 563189 : IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
1416 : {
1417 563173 : limit_fx = Cy_hat_diag_fx[i];
1418 563173 : move32();
1419 563173 : limit_e = Cy_hat_diag_e;
1420 563173 : move16();
1421 : }
1422 : }
1423 :
1424 110030 : limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
1425 110030 : limit_e = add( limit_e, reg_ghat_e );
1426 :
1427 : /* Computing G_hat */
1428 :
1429 110030 : com_e = sub( Cy_hat_diag_e, limit_e );
1430 673219 : FOR( i = 0; i < lengthCy; ++i )
1431 : {
1432 563189 : Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
1433 563189 : move16();
1434 563189 : IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1435 : {
1436 101 : Cy_hat_diag_fx[i] = limit_fx;
1437 101 : move32();
1438 101 : Cy_hat_diag_fx_e[i] = limit_e;
1439 101 : move16();
1440 : }
1441 563189 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
1442 563189 : scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
1443 563189 : L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
1444 :
1445 563189 : G_hat_fx[i] = L_tmp;
1446 563189 : move32();
1447 563189 : G_hat_e[i] = scale;
1448 563189 : move16();
1449 : }
1450 :
1451 : /*-----------------------------------------------------------------*
1452 : * Formulate optimal P
1453 : *-----------------------------------------------------------------*/
1454 :
1455 673219 : FOR( i = 0; i < num_outputs; i++ )
1456 : {
1457 563189 : Kx_fx[i] = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
1458 563189 : move32();
1459 563189 : Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
1460 563189 : move16();
1461 : }
1462 :
1463 673219 : FOR( i = 0; i < num_outputs; i++ )
1464 : {
1465 : Word32 fac_fx;
1466 563189 : fac_fx = Kx_fx[i];
1467 563189 : move32();
1468 :
1469 3487388 : FOR( j = 0; j < num_outputs; j++ )
1470 : {
1471 2924199 : mat_mult_buffer1_fx[i + j * num_outputs] = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e));
1472 2924199 : move32();
1473 2924199 : mat_mult_buffer1_buff_e[i + j * num_outputs] = add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] );
1474 2924199 : move16();
1475 : }
1476 : }
1477 :
1478 110030 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
1479 110030 : move16();
1480 2924199 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1481 : {
1482 2814169 : if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
1483 : {
1484 30132 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
1485 30132 : move16();
1486 : }
1487 : }
1488 :
1489 3034229 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1490 : {
1491 2924199 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
1492 2924199 : move32();
1493 : }
1494 :
1495 110030 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1496 :
1497 110030 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
1498 :
1499 : /* Actually Processing P */
1500 :
1501 110030 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1502 110030 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1503 110030 : mat_mult_buffer1_e = 0;
1504 110030 : move16();
1505 110030 : mat_mult_buffer2_e = 0;
1506 110030 : move16();
1507 :
1508 110030 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
1509 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1510 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1511 :
1512 : /*-----------------------------------------------------------------*
1513 : * Formulate M
1514 : *-----------------------------------------------------------------*/
1515 :
1516 :
1517 110030 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1518 110030 : set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1519 :
1520 : Word16 mixing_matrix_fx_e[MAX_LS_CHANNELS * MAX_LS_CHANNELS];
1521 :
1522 673219 : FOR( i = 0; i < num_outputs; i++ )
1523 : {
1524 : Word32 fac_fx;
1525 563189 : fac_fx = Kx_reg_inv_fx[i];
1526 563189 : move32();
1527 :
1528 3487388 : FOR( j = 0; j < num_outputs; j++ )
1529 : {
1530 2924199 : mixing_matrix_fx[j + i * num_outputs] = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e);
1531 2924199 : move32();
1532 2924199 : mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
1533 2924199 : move16();
1534 : }
1535 : }
1536 :
1537 : /*-----------------------------------------------------------------*
1538 : * Formulate Cr
1539 : *-----------------------------------------------------------------*/
1540 :
1541 : /* Compute Cy_tilde = M*Cx*M' */
1542 :
1543 : Word16 Cx_e_arr[MAX_LS_CHANNELS];
1544 110030 : set16_fx( Cx_e_arr, Cx_e, MAX_LS_CHANNELS );
1545 :
1546 110030 : matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1547 :
1548 110030 : exp = mixing_matrix_fx_e[0];
1549 110030 : move16();
1550 2924199 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1551 : {
1552 2814169 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1553 : {
1554 12173 : exp = mixing_matrix_fx_e[i];
1555 12173 : move16();
1556 : }
1557 : }
1558 :
1559 3034229 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1560 : {
1561 2924199 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1562 2924199 : move32();
1563 : }
1564 110030 : mixing_matrix_e = exp;
1565 110030 : move16();
1566 :
1567 110030 : exp = mat_mult_buffer1_buff_e[0];
1568 110030 : move16();
1569 2924199 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1570 : {
1571 2814169 : if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
1572 : {
1573 12173 : exp = mat_mult_buffer1_buff_e[i];
1574 12173 : move16();
1575 : }
1576 : }
1577 :
1578 3034229 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1579 : {
1580 2924199 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
1581 2924199 : move32();
1582 : }
1583 110030 : mat_mult_buffer1_e = exp;
1584 110030 : move16();
1585 :
1586 110030 : matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
1587 :
1588 : /*-----------------------------------------------------------------*
1589 : * Energy Compensation
1590 : *-----------------------------------------------------------------*/
1591 :
1592 110030 : adj_fx_p = svd_s_buffer_fx;
1593 :
1594 673219 : FOR( i = 0; i < lengthCy; ++i )
1595 : {
1596 563189 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
1597 563189 : scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
1598 563189 : adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
1599 563189 : move32();
1600 563189 : adj_buff_e[i] = scale;
1601 563189 : move16();
1602 563189 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
1603 563189 : IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
1604 : {
1605 2 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1606 2 : move32();
1607 2 : adj_buff_e[i] = 3;
1608 2 : move16();
1609 : }
1610 : }
1611 :
1612 110030 : adj_e = adj_buff_e[0];
1613 110030 : move16();
1614 :
1615 563189 : FOR( i = 1; i < lengthCy; i++ )
1616 : {
1617 453159 : if ( LT_16( adj_e, adj_buff_e[i] ) )
1618 : {
1619 44297 : adj_e = adj_buff_e[i];
1620 44297 : move16();
1621 : }
1622 : }
1623 :
1624 :
1625 673219 : FOR( i = 0; i < lengthCy; i++ )
1626 : {
1627 563189 : adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
1628 563189 : move32();
1629 : }
1630 :
1631 110030 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1632 110030 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
1633 110030 : *mixing_matrix_ret_e = mat_mult_buffer3_e;
1634 110030 : move16();
1635 :
1636 110030 : pop_wmops();
1637 :
1638 110030 : return out;
1639 : }
1640 :
1641 :
1642 : /*-------------------------------------------------------------------*
1643 : * computeMixingMatricesISM()
1644 : *
1645 : *
1646 : *-------------------------------------------------------------------*/
1647 405540 : Word16 computeMixingMatricesISM_fx(
1648 : const Word16 num_inputs,
1649 : const Word16 num_responses,
1650 : const Word16 num_outputs,
1651 : const Word32 *responses_fx, /*Q(31-responses_e) */
1652 : const Word16 responses_e,
1653 : const Word32 *ener_fx, /*Q(31-ener_e) */
1654 : const Word16 ener_e,
1655 : const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
1656 : const Word16 Cx_diag_e,
1657 : const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
1658 : const Word16 Cy_diag_e,
1659 : const Word16 *Q_16fx, // Q15
1660 : const Word16 energy_compensation_flag,
1661 : const Word32 reg_Sx_fx, /*Q0*/
1662 : const Word32 reg_ghat_fx, /*Q0*/
1663 : Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
1664 : Word16 *mixing_matrix_e )
1665 : {
1666 : Word16 i, out;
1667 : Word16 lengthCx, lengthCy;
1668 : Word16 nL, nC;
1669 :
1670 : Word32 *Cy_tilde_p_fx;
1671 : Word32 *adj_fx;
1672 : Word32 limit_fx;
1673 :
1674 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1675 : Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1676 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1677 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1678 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1679 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1680 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
1681 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1682 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
1683 : Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
1684 :
1685 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1686 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1687 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
1688 : Word16 svd_s_buffer_fx_e;
1689 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1690 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1691 :
1692 : Word16 temp_e[MAX_OUTPUT_CHANNELS];
1693 405540 : push_wmops( "dirac_cov_mix_mat" );
1694 :
1695 405540 : out = EXIT_SUCCESS;
1696 405540 : move16();
1697 405540 : lengthCx = num_inputs;
1698 405540 : move16();
1699 405540 : lengthCy = num_outputs;
1700 405540 : move16();
1701 :
1702 8151300 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1703 : {
1704 7745760 : IF( EQ_16( Q_16fx[i], MAX_16 ) )
1705 : {
1706 3478800 : Q_fx[i] = MAX_32;
1707 3478800 : move32();
1708 : }
1709 : ELSE
1710 : {
1711 4266960 : Q_fx[i] = L_deposit_h( Q_16fx[i] );
1712 4266960 : move32();
1713 : }
1714 : }
1715 405540 : Q_e = 0;
1716 405540 : move16();
1717 405540 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
1718 6894180 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
1719 : {
1720 6488640 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1721 6488640 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1722 6488640 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1723 : }
1724 :
1725 : /* Decomposition of Cy = Ky*Ky' */
1726 : /* Ky = responses*diag(ener) */
1727 405540 : matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
1728 :
1729 : /* Decomposition of Cx -> Computing Kx */
1730 405540 : set16_fx( temp_e, Cx_diag_e, lengthCx );
1731 405540 : v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
1732 405540 : Kx_e = temp_e[0];
1733 405540 : move16();
1734 811080 : FOR( i = 1; i < lengthCx; i++ )
1735 : {
1736 405540 : Kx_e = s_max( Kx_e, temp_e[i] );
1737 : }
1738 1216620 : FOR( i = 0; i < lengthCx; i++ )
1739 : {
1740 811080 : Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
1741 811080 : move32();
1742 : }
1743 :
1744 : /* Regularization of Sx */
1745 405540 : maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
1746 405540 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
1747 :
1748 1216620 : FOR( i = 0; i < lengthCx; ++i )
1749 : {
1750 811080 : IF( GT_32( Kx_fx[i], limit_fx ) )
1751 : {
1752 763345 : svd_s_buffer_fx[i] = Kx_fx[i];
1753 763345 : move32();
1754 : }
1755 : ELSE
1756 : {
1757 47735 : svd_s_buffer_fx[i] = limit_fx;
1758 47735 : move32();
1759 : }
1760 : }
1761 405540 : svd_s_buffer_fx_e = Kx_e;
1762 405540 : move16();
1763 :
1764 405540 : limit_fx = 0;
1765 405540 : move32();
1766 :
1767 : /* regularized Kx-1 */
1768 :
1769 1216620 : FOR( i = 0; i < lengthCx; ++i )
1770 : {
1771 811080 : IF( svd_s_buffer_fx[i] )
1772 : {
1773 : Word32 reg_fac;
1774 811072 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
1775 811072 : Kx_reg_inv_fx[i] = reg_fac;
1776 811072 : move32();
1777 811072 : temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
1778 811072 : move16();
1779 : }
1780 : ELSE
1781 : {
1782 : Word32 reg_fac;
1783 8 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
1784 8 : Kx_reg_inv_fx[i] = reg_fac;
1785 8 : move32();
1786 8 : temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
1787 8 : move16();
1788 : }
1789 : }
1790 405540 : Kx_reg_inv_e = temp_e[0];
1791 405540 : move16();
1792 811080 : FOR( i = 1; i < lengthCx; i++ )
1793 : {
1794 405540 : Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
1795 : }
1796 1216620 : FOR( i = 0; i < lengthCx; i++ )
1797 : {
1798 811080 : Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
1799 811080 : move32();
1800 : }
1801 :
1802 : /************************ normalization matrix G hat **********************/
1803 :
1804 : /* Computing Q*Cx*Q' */
1805 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1806 : Word16 Cy_hat_diag_e;
1807 :
1808 405540 : matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
1809 :
1810 405540 : Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
1811 :
1812 8151300 : FOR( i = 0; i < lengthCy * lengthCx; ++i )
1813 : {
1814 7745760 : IF( GT_16( Q_Cx_e, Q_e ) )
1815 : {
1816 7745688 : Q_fx[i] = L_shr( Q_fx[i], guard_bits );
1817 7745688 : move32();
1818 : }
1819 : ELSE
1820 : {
1821 72 : Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
1822 72 : move32();
1823 : }
1824 : }
1825 :
1826 405540 : IF( GT_16( Q_Cx_e, Q_e ) )
1827 : {
1828 405536 : Q_e = add( Q_e, guard_bits );
1829 : }
1830 : ELSE
1831 : {
1832 4 : Q_Cx_e = add( Q_Cx_e, guard_bits );
1833 : }
1834 405540 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
1835 :
1836 : /* Computing Cy_hat_diag */
1837 4278420 : FOR( i = 0; i < lengthCy; ++i )
1838 : {
1839 3872880 : if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
1840 : {
1841 601680 : limit_fx = Cy_hat_diag_fx[i];
1842 601680 : move32();
1843 : }
1844 : }
1845 :
1846 405540 : limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
1847 :
1848 : /* Computing G_hat */
1849 4278420 : FOR( i = 0; i < lengthCy; ++i )
1850 : {
1851 3872880 : if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1852 : {
1853 19412 : Cy_hat_diag_fx[i] = limit_fx;
1854 19412 : move32();
1855 : }
1856 3872880 : IF( Cy_diag_fx[i] )
1857 : {
1858 2450608 : IF( Cy_hat_diag_fx[i] )
1859 : {
1860 2450608 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
1861 2450608 : move32();
1862 2450608 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
1863 2450608 : move16();
1864 2450608 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1865 2450608 : move32();
1866 : }
1867 : ELSE
1868 : {
1869 0 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1870 0 : move32();
1871 0 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1872 0 : move16();
1873 0 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1874 0 : move32();
1875 : }
1876 : }
1877 : ELSE
1878 : {
1879 1422272 : G_hat_fx[i] = 0;
1880 1422272 : move32();
1881 1422272 : temp_e[i] = 0;
1882 1422272 : move16();
1883 : }
1884 : }
1885 405540 : G_hat_e = temp_e[0];
1886 405540 : move16();
1887 3872880 : FOR( i = 1; i < lengthCy; i++ )
1888 : {
1889 3467340 : G_hat_e = s_max( G_hat_e, temp_e[i] );
1890 : }
1891 4278420 : FOR( i = 0; i < lengthCy; i++ )
1892 : {
1893 3872880 : G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
1894 3872880 : move32();
1895 : }
1896 :
1897 : /************************ Formulate optimal P **********************/
1898 :
1899 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
1900 405540 : diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1901 :
1902 405540 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1903 :
1904 405540 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1905 :
1906 405540 : IF( LT_16( lengthCx, num_responses ) )
1907 : {
1908 3600 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
1909 :
1910 3600 : nL = num_responses;
1911 3600 : move16();
1912 3600 : nC = lengthCx;
1913 3600 : move16();
1914 3600 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1915 : }
1916 : ELSE
1917 : {
1918 401940 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
1919 :
1920 401940 : nL = lengthCx;
1921 401940 : move16();
1922 401940 : nC = num_responses;
1923 401940 : move16();
1924 401940 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1925 : }
1926 :
1927 : /* Actually Processing P */
1928 :
1929 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
1930 405540 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
1931 405540 : mat_mult_buffer1_e = 0;
1932 405540 : move16();
1933 405540 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1934 405540 : mat_mult_buffer2_e = 0;
1935 405540 : move16();
1936 :
1937 405540 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1938 :
1939 : /************************ Formulate M **********************/
1940 :
1941 405540 : matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1942 :
1943 405540 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
1944 :
1945 : /*********************** Energy Compensation ****************/
1946 :
1947 : /* Compute Cy_tilde = M*Cx*M' */
1948 405540 : matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1949 :
1950 405540 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1951 :
1952 405540 : IF( EQ_16( energy_compensation_flag, 1 ) )
1953 : {
1954 405540 : adj_fx = svd_s_buffer_fx;
1955 405540 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1956 4278420 : FOR( i = 0; i < lengthCy; ++i )
1957 : {
1958 : /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
1959 3872880 : IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
1960 : {
1961 0 : adj_fx[i] = MAX_32;
1962 0 : move32();
1963 0 : temp_e[i] = 0;
1964 0 : move16();
1965 : }
1966 : ELSE
1967 : {
1968 3872880 : IF( Cy_diag_fx[i] )
1969 : {
1970 2450608 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
1971 : {
1972 2450566 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
1973 2450566 : move32();
1974 2450566 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
1975 2450566 : move16();
1976 2450566 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1977 2450566 : move32();
1978 : }
1979 : ELSE
1980 : {
1981 42 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1982 42 : move32();
1983 42 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1984 42 : move16();
1985 42 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1986 42 : move32();
1987 : }
1988 : }
1989 : ELSE
1990 : {
1991 1422272 : adj_fx[i] = 0;
1992 1422272 : move32();
1993 1422272 : temp_e[i] = 0;
1994 1422272 : move16();
1995 : }
1996 : }
1997 :
1998 3872880 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
1999 3872880 : IF( GT_32( adj_fx[i], temp ) )
2000 : {
2001 2222 : adj_fx[i] = MAX_32;
2002 2222 : move32();
2003 2222 : temp_e[i] = 2;
2004 2222 : move16();
2005 : }
2006 : }
2007 405540 : adj_e = temp_e[0];
2008 405540 : move16();
2009 3872880 : FOR( i = 1; i < lengthCy; i++ )
2010 : {
2011 3467340 : adj_e = s_max( adj_e, temp_e[i] );
2012 : }
2013 4278420 : FOR( i = 0; i < lengthCy; i++ )
2014 : {
2015 3872880 : adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
2016 3872880 : move32();
2017 : }
2018 :
2019 405540 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
2020 :
2021 405540 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
2022 405540 : *mixing_matrix_e = mat_mult_buffer3_e;
2023 405540 : move16();
2024 : }
2025 :
2026 405540 : pop_wmops();
2027 :
2028 405540 : return out;
2029 : }
|