Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 :
34 : #include <stdint.h>
35 : #include <string.h>
36 : #include <stdio.h>
37 : #include <stdlib.h>
38 : #include <assert.h>
39 : #include <math.h>
40 : #include "options.h"
41 : #include "cnst.h"
42 : #include "rom_enc.h"
43 : #include "rom_com.h"
44 : #include "prot_fx.h"
45 : #include "ivas_stat_dec.h"
46 : #include "ivas_cnst.h"
47 : #include "ivas_rom_com.h"
48 : #include "ivas_rom_dec.h"
49 : #include "wmc_auto.h"
50 : #include "rom_dec.h"
51 : #include "ivas_prot_fx.h"
52 :
53 :
54 : /*-------------------------------------------------------------------*
55 : * ivas_dirac_dec_output_synthesis_cov_open()
56 : *
57 : * Sets up the state and parameters for the Covariance Synthesis
58 : *-------------------------------------------------------------------*/
59 :
60 297 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
61 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
62 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state */
63 : const Word16 max_band_decorr, /* i : uppermost frequency band where decorrelation is applied */
64 : const Word16 interp_length, /* i : length for interpolating the mixing matrices in time slots */
65 : const Word16 num_param_bands, /* i : number of parameter bands */
66 : const Word16 num_param_bands_residual, /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
67 : const Word16 nchan_in, /* i : number of input (transport) channels */
68 : const Word16 nchan_out, /* i : number of output channels */
69 : const Word32 *proto_matrix /* i : the prototype (upmix) matrix (only used if mode == 1) Q(15-proto_matrix_e) */
70 : )
71 : {
72 : Word16 idx;
73 :
74 297 : h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
75 297 : move16();
76 :
77 : /*-----------------------------------------------------------------*
78 : * memory allocation
79 : *-----------------------------------------------------------------*/
80 :
81 : /* buffer length and interpolator */
82 297 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
83 297 : IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
84 : {
85 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
86 : }
87 297 : h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
88 297 : move16();
89 : /* cov buffers */
90 3803 : FOR( idx = 0; idx < num_param_bands; idx++ )
91 : {
92 3506 : IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
93 : {
94 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
95 : }
96 3506 : h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
97 3506 : IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
98 : {
99 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
100 : }
101 3506 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
102 : {
103 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
104 : }
105 3506 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
106 3506 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
107 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
108 :
109 3506 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
110 : {
111 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
112 : }
113 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
114 3506 : h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
115 3506 : move16();
116 : }
117 14611 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
118 : {
119 14314 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
120 14314 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
121 14314 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
122 14314 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
123 : }
124 :
125 3053 : FOR( idx = 0; idx < num_param_bands_residual; idx++ )
126 : {
127 2756 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
128 : {
129 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
130 : }
131 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
132 :
133 2756 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
134 : {
135 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
136 : }
137 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
138 2756 : h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
139 2756 : move16();
140 : }
141 15361 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
142 : {
143 15064 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
144 15064 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
145 : }
146 :
147 297 : IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
148 : {
149 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
150 : }
151 297 : IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
152 : {
153 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
154 : }
155 :
156 297 : set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
157 297 : set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
158 :
159 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
160 : {
161 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
162 : }
163 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
164 :
165 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
166 : {
167 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
168 : }
169 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
170 :
171 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
172 : {
173 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
174 : }
175 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
176 :
177 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
178 : {
179 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
180 : }
181 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
182 :
183 : /*-----------------------------------------------------------------*
184 : * prepare processing parameters
185 : *-----------------------------------------------------------------*/
186 :
187 : /* compute interpolator */
188 297 : IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
189 : {
190 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
191 : }
192 9801 : FOR( idx = 1; idx <= interp_length; ++idx )
193 : {
194 9504 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
195 9504 : move16();
196 : }
197 297 : Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
198 297 : h_dirac_output_synthesis_params->proto_matrix_e = 5;
199 297 : move16();
200 297 : return IVAS_ERR_OK;
201 : }
202 :
203 :
204 : /*-------------------------------------------------------------------*
205 : * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
206 : *
207 : *
208 : *-------------------------------------------------------------------*/
209 :
210 318405 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
211 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
212 : const UWord16 interp_length /* i : interpolator length */
213 : )
214 : {
215 : Word16 idx;
216 318405 : Word16 tmp, exp_diff = 0;
217 318405 : move16();
218 :
219 1583754 : FOR( idx = 1; idx <= interp_length; ++idx )
220 : {
221 1265349 : tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
222 1265349 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff ); // Q15
223 1265349 : move16();
224 : }
225 :
226 318405 : return;
227 : }
228 :
229 :
230 : /*-------------------------------------------------------------------*
231 : * ivas_dirac_dec_output_synthesis_cov_init()
232 : *
233 : * initialize the states for the covariance synthesis
234 : *-------------------------------------------------------------------*/
235 297 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
236 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis */
237 : const Word16 nchan_in, /* i : number of input (tranport) channels */
238 : const Word16 nchan_out, /* i : number of output channels */
239 : const Word16 n_param_bands, /* i : number of total parameter bands */
240 : const Word16 n_param_bands_res /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
241 : )
242 : {
243 :
244 : Word16 idx;
245 :
246 : /* initialize buffers */
247 3803 : FOR( idx = 0; idx < n_param_bands; idx++ )
248 : {
249 3506 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
250 3506 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
251 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
252 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
253 : }
254 :
255 3053 : FOR( idx = 0; idx < n_param_bands_res; idx++ )
256 : {
257 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
258 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
259 : }
260 :
261 :
262 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
263 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
264 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
265 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
266 :
267 297 : return;
268 : }
269 :
270 :
271 : /*-------------------------------------------------------------------*
272 : * ivas_dirac_dec_output_synthesis_cov_close()
273 : *
274 : * deallocate dynamic memory in the covariance synthesis state
275 : *-------------------------------------------------------------------*/
276 :
277 297 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
278 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i : handle for the covariance synthesis parameters */
279 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state */
280 : )
281 : {
282 : Word16 idx;
283 :
284 : /*-----------------------------------------------------------------*
285 : * memory deallocation
286 : *-----------------------------------------------------------------*/
287 :
288 : /* free interpolator */
289 297 : IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
290 : {
291 297 : free( h_dirac_output_synthesis_params->interpolator_fx );
292 297 : h_dirac_output_synthesis_params->interpolator_fx = NULL;
293 : }
294 :
295 : /* free alpha */
296 297 : IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
297 : {
298 0 : free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
299 0 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
300 : }
301 :
302 : /* free proto_matrix */
303 297 : IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
304 : {
305 297 : free( h_dirac_output_synthesis_params->proto_matrix_fx );
306 297 : h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
307 : }
308 :
309 :
310 297 : IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
311 : {
312 297 : free( h_dirac_output_synthesis_state->cx_old_e );
313 297 : h_dirac_output_synthesis_state->cx_old_e = NULL;
314 : }
315 297 : IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
316 : {
317 297 : free( h_dirac_output_synthesis_state->cy_old_e );
318 297 : h_dirac_output_synthesis_state->cy_old_e = NULL;
319 : }
320 :
321 : /* free cov buffers */
322 18117 : FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
323 : {
324 17820 : IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
325 : {
326 3506 : free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
327 3506 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
328 : }
329 :
330 17820 : IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
331 : {
332 3506 : free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
333 3506 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
334 : }
335 :
336 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
337 : {
338 3506 : free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
339 3506 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
340 : }
341 :
342 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
343 : {
344 2756 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
345 2756 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
346 : }
347 :
348 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
349 : {
350 3506 : free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
351 3506 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
352 : }
353 :
354 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
355 : {
356 2756 : free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
357 2756 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
358 : }
359 : }
360 :
361 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
362 : {
363 297 : free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
364 297 : h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
365 : }
366 :
367 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
368 : {
369 297 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
370 297 : h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
371 : }
372 :
373 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
374 : {
375 297 : free( h_dirac_output_synthesis_state->mixing_matrix_exp );
376 297 : h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
377 : }
378 :
379 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
380 : {
381 297 : free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
382 297 : h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
383 : }
384 297 : return;
385 : }
386 :
387 : /*-------------------------------------------------------------------*
388 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
389 : *
390 : * collect the multi channel input covariance for one filter bank time slot
391 : *-------------------------------------------------------------------*/
392 :
393 : /*-------------------------------------------------------------------*
394 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
395 : *
396 : * collect the multi channel input covariance for one filter bank time slot
397 : *-------------------------------------------------------------------*/
398 1776450 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
399 : Word32 *RealBuffer_fx, /* i : input channel filter bank samples (real part) Q(31- RealBuffer_e)*/
400 : Word16 RealBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
401 : Word32 *ImagBuffer_fx, /* i : input channel filter bank samples (imaginary part Q(ImagBuffer_e)*/
402 : Word16 ImagBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
403 : Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (real part) Q(31- cx_e)*/
404 : Word16 *cx_e, /* i : exponent for accumulated input covariance (real part) */
405 : Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (imaginary part) Q(31- cx_imag_e)*/
406 : Word16 *cx_imag_e, /* i : exponent accumulated input covariance (imag part) */
407 : PARAM_MC_DEC_HANDLE hParamMC, /* i : handle to Parametric MC state */
408 : const Word16 param_band, /* i : parameter band */
409 : const Word16 nchan_in /* i : number of input channels */
410 : )
411 : {
412 : Word16 band_idx, ch_idx;
413 : Word16 brange[2];
414 : Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
415 : Word16 real_in_e;
416 : Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
417 : Word16 imag_in_e;
418 : Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
419 : Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
420 : Word16 output_e;
421 : Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
422 : Word16 band, num_bands;
423 : Word16 cx_fx_norm, cx_imag_fx_norm;
424 : /* estimate input covariance */
425 : /* Already stack here instead of in the process_subframe */
426 :
427 : /* collect input frame */
428 1776450 : brange[0] = hParamMC->band_grouping[param_band];
429 1776450 : move16();
430 1776450 : brange[1] = hParamMC->band_grouping[param_band + 1];
431 1776450 : move16();
432 1776450 : num_bands = sub( brange[1], brange[0] );
433 :
434 9379450 : FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
435 : {
436 7603000 : band = add( brange[0], band_idx );
437 22843320 : FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
438 : {
439 15240320 : real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
440 15240320 : move32();
441 15240320 : imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
442 15240320 : move32();
443 : }
444 : }
445 :
446 1776450 : real_in_e = RealBuffer_e;
447 1776450 : move16();
448 1776450 : imag_in_e = ImagBuffer_e;
449 1776450 : move16();
450 :
451 1776450 : Word16 buf_len = imult1616( num_bands, nchan_in );
452 :
453 1776450 : shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
454 1776450 : shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
455 :
456 1776450 : real_in_e = sub( real_in_e, shift_real );
457 1776450 : imag_in_e = sub( imag_in_e, shift_imag );
458 :
459 :
460 1776450 : output_e = s_max( real_in_e, imag_in_e );
461 :
462 1776450 : scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
463 1776450 : scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
464 :
465 1776450 : cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
466 1776450 : v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
467 :
468 1776450 : v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
469 :
470 1776450 : cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
471 1776450 : cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
472 :
473 1776450 : scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
474 1776450 : scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
475 :
476 1776450 : *cx_e = sub( tmp1_e, cx_fx_norm );
477 1776450 : move16();
478 1776450 : *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
479 1776450 : move16();
480 :
481 1776450 : return;
482 : }
483 :
484 : /*-------------------------------------------------------------------*
485 : * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
486 : *
487 : * synthesize one filter bank slot of multi channel output filter bank
488 : * samples with the covariance synthesis
489 : *-------------------------------------------------------------------*/
490 :
491 140160 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
492 : Word32 *Cldfb_RealBuffer_in_fx, /*Q6*/
493 : Word32 *Cldfb_ImagBuffer_in_fx, /*Q6*/
494 : Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (real part) Q6*/
495 : Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (imaginary part) Q6*/
496 : Word32 *mixing_matrix_fx[], /* i : parameter band wise mixing matrices (direct part) Q(31-mixing_matrix_e)*/
497 : Word16 *mixing_matrix_e, /* i : parameter band wise mixing matrices (direct part) */
498 : Word32 *mixing_matrix_res_fx[], /* i : parameter band wise mixing matrices (residual part) Q(31-mixing_matrix_res_e)*/
499 : Word16 *mixing_matrix_res_e, /* i : parameter band wise mixing matrices (residual part) */
500 : const UWord16 slot_idx_sfr, /* i : time slot index for the current slot within the current subframe */
501 : const UWord16 slot_idx_tot, /* i : time slot index for the current slot within the frame */
502 : const Word16 nX, /* i : number of input channels */
503 : const Word16 nY, /* i : number of output channels */
504 : PARAM_MC_DEC_HANDLE hParamMC /* i : handle to the Parametric MC decoder state */
505 : )
506 : {
507 : Word16 param_band_idx, band, ch_idx;
508 : Word16 have_residual;
509 : Word16 brange[2];
510 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
511 : Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
512 : Word16 mixing_matrix_smooth_e;
513 : Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
514 140160 : Word16 mixing_matrix_res_smooth_e = 0;
515 140160 : move16();
516 : Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
517 : Word16 mixing_matrix_buffer_e;
518 : Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
519 : Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
520 :
521 : Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
522 : Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
523 :
524 140160 : h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
525 :
526 140160 : set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
527 140160 : set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
528 :
529 140160 : set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
530 140160 : set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
531 :
532 1950080 : FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
533 : {
534 : /* final mixing */
535 1809920 : have_residual = 0;
536 1809920 : move16();
537 1809920 : brange[0] = hParamMC->band_grouping[param_band_idx];
538 1809920 : move16();
539 1809920 : brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
540 1809920 : move16();
541 :
542 1809920 : if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
543 : {
544 1454400 : have_residual = 1;
545 1454400 : move16();
546 : }
547 :
548 1809920 : v_multc_fixed( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
549 1809920 : mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
550 1809920 : move16();
551 :
552 1809920 : v_multc_fixed( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
553 1809920 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
554 1809920 : move16();
555 :
556 1809920 : v_add_fixed_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
557 :
558 1809920 : IF( have_residual )
559 : {
560 : /* residual mixing matrix interpolation*/
561 :
562 1454400 : v_multc_fixed( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
563 1454400 : mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
564 1454400 : move16();
565 :
566 1454400 : set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
567 1454400 : v_multc_fixed( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
568 1454400 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
569 1454400 : move16();
570 :
571 1454400 : v_add_fixed_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
572 : }
573 :
574 :
575 9553920 : FOR( band = brange[0]; band < brange[1]; band++ )
576 : {
577 7744000 : assert( band >= 0 );
578 :
579 7744000 : IF( have_residual )
580 : {
581 : /* collect diffuse prototypes */
582 2803200 : assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
583 19833600 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
584 : {
585 17030400 : diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
586 17030400 : move32();
587 17030400 : diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
588 17030400 : move32();
589 : }
590 :
591 : /* apply residual mixing */
592 : {
593 : Word16 shifter;
594 : #ifdef OPT_SBA_DEC_V2_BE
595 2803200 : shifter = sub( mixing_matrix_res_smooth_e, 31 );
596 : #else /* OPT_SBA_DEC_V2_BE */
597 : shifter = 31 - mixing_matrix_res_smooth_e;
598 : #endif /* OPT_SBA_DEC_V2_BE */
599 19833600 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
600 : {
601 : int i;
602 : Word16 idx;
603 : Word64 temp_real, temp_imag;
604 :
605 :
606 17030400 : idx = ch_idx;
607 17030400 : temp_real = 0;
608 17030400 : temp_imag = 0;
609 17030400 : move64();
610 17030400 : move64();
611 121209600 : for ( i = 0; i < nY; i++ )
612 : {
613 104179200 : temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] ) );
614 104179200 : temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) );
615 104179200 : idx += nY;
616 : }
617 : #ifdef OPT_SBA_DEC_V2_BE
618 17030400 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
619 17030400 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
620 : #else /* OPT_SBA_DEC_V2_BE */
621 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) );
622 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) );
623 : #endif /* OPT_SBA_DEC_V2_BE */
624 : }
625 : }
626 : }
627 : ELSE
628 : {
629 34969600 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
630 : {
631 30028800 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
632 30028800 : move32();
633 30028800 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
634 30028800 : move32();
635 : }
636 : }
637 :
638 : /* collect input signals, still in cldfb buffers */
639 23270400 : FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
640 : {
641 :
642 15526400 : input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
643 15526400 : move32();
644 15526400 : input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
645 15526400 : move32();
646 : }
647 :
648 : /* apply mixing matrix */
649 : {
650 : Word16 shifter;
651 7744000 : shifter = 31 - mixing_matrix_smooth_e;
652 :
653 54803200 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
654 : {
655 : int i;
656 : Word16 idx;
657 : Word64 temp_real, temp_imag;
658 :
659 :
660 47059200 : idx = ch_idx;
661 47059200 : temp_real = 0;
662 47059200 : temp_imag = 0;
663 47059200 : move64();
664 47059200 : move64();
665 141638400 : for ( i = 0; i < nX; i++ )
666 : {
667 94579200 : temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
668 94579200 : temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
669 94579200 : idx += nY;
670 : }
671 47059200 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
672 47059200 : move32();
673 47059200 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
674 47059200 : move32();
675 : }
676 : }
677 : }
678 : }
679 :
680 140160 : return;
681 : }
682 :
683 : /*-------------------------------------------------------------------*
684 : * computeMixingMatrices()
685 : *
686 : * compute a mixing matrix using the convariance synthesis approach
687 : *-------------------------------------------------------------------*/
688 :
689 113120 : Word16 computeMixingMatrices_fx(
690 : const Word16 num_inputs, /* i : number of input channels */
691 : const Word16 num_outputs, /* i : number of output channels */
692 : const Word32 *Cx, /* i : input channel covariance matrix Q(31-Cx_e) */
693 : Word16 Cx_e,
694 : const Word32 *Cy, /* i : target covariance matrix Q(31-Cy_e) */
695 : Word16 Cy_e,
696 : const Word32 *Q, /* i : prototype matrix (usually a upmix matrix) Q_fx_e */
697 : Word16 Q_fx_e,
698 : const Word16 energy_compensation_flag, /* i : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
699 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values */
700 : Word16 reg_Sx_e,
701 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
702 : Word16 reg_ghat_e,
703 : Word32 *mixing_matrix_fx, /* o : resulting mixing matrix Q(31-mixing_matrix_out_e) */
704 : Word16 *mixing_matrix_out_e,
705 : Word32 *Cr_fx, /* o : residual covariance matrix Q(31-Cr_e) */
706 : Word16 *Cr_e )
707 : {
708 : Word16 i, j;
709 : Word16 out;
710 : Word16 nL, nC;
711 113120 : Word16 lengthCx = num_inputs;
712 113120 : Word16 lengthCy = num_outputs;
713 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
714 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
715 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
716 : Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
717 : Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
718 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
719 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
720 : Word16 Cx_fx_e;
721 : Word16 Cy_fx_e;
722 :
723 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
724 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
725 :
726 :
727 : Word32 limit_fx;
728 : Word16 limit_e;
729 :
730 : Word32 L_tmp;
731 : Word16 tmp_e, tmp, exp;
732 :
733 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
734 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
735 :
736 : Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
737 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
738 :
739 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
740 : Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
741 :
742 : Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
743 : Word16 Q_e;
744 :
745 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
746 : Word16 Q_Cx_e;
747 :
748 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
749 : Word16 Cy_hat_diag_e;
750 : Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
751 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
752 : Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
753 :
754 : Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
755 :
756 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
757 :
758 : Word16 mixing_matrix_e;
759 :
760 : Word16 Cr_fx_e;
761 :
762 :
763 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
764 : Word16 adj_e[MAX_OUTPUT_CHANNELS];
765 : Word32 *adj_fx_p;
766 : Word16 adj_fx_e;
767 :
768 : Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
769 113120 : push_wmops( "dirac_cov_mix_mat" );
770 :
771 113120 : out = EXIT_SUCCESS;
772 113120 : move16();
773 :
774 113120 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
775 1923040 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
776 : {
777 1809920 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
778 1809920 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
779 1809920 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
780 : }
781 :
782 :
783 113120 : Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
784 113120 : Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
785 113120 : Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
786 :
787 113120 : Q_e = Q_fx_e;
788 113120 : move16();
789 113120 : Cx_fx_e = Cx_e;
790 113120 : move16();
791 113120 : Cy_fx_e = Cy_e;
792 113120 : move16();
793 :
794 :
795 : /*-----------------------------------------------------------------*
796 : * Decomposition of Cy
797 : *-----------------------------------------------------------------*/
798 :
799 : /* Processing the SVD */
800 :
801 113120 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
802 :
803 113120 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
804 113120 : Word16 max_e = -32;
805 : /* Computing Ky */
806 688444 : FOR( i = 0; i < lengthCy; ++i )
807 : {
808 3533968 : FOR( j = 0; j < lengthCy; ++j )
809 : {
810 2958644 : tmp_e = svd_s_buffer_e[j];
811 2958644 : move16();
812 2958644 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
813 2958644 : Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
814 2958644 : move32();
815 2958644 : Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
816 2958644 : move16();
817 2958644 : max_e = s_max( max_e, tmp_e );
818 : }
819 : }
820 3071764 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
821 : {
822 2958644 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
823 2958644 : move32();
824 2958644 : Ky_fx_e[i] = max_e;
825 2958644 : move16();
826 : }
827 :
828 : /*-----------------------------------------------------------------*
829 : * Decomposition of Cx
830 : *-----------------------------------------------------------------*/
831 :
832 : /* Processing the SVD */
833 :
834 113120 : mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
835 :
836 113120 : svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
837 113120 : max_e = -32;
838 339920 : FOR( i = 0; i < lengthCx; ++i )
839 : {
840 682080 : FOR( j = 0; j < lengthCx; ++j )
841 : {
842 455280 : tmp_e = svd_s_buffer_e[j];
843 455280 : move16();
844 455280 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
845 455280 : Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
846 455280 : move32();
847 455280 : Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
848 455280 : move16();
849 455280 : max_e = s_max( max_e, tmp_e );
850 : }
851 : }
852 568400 : FOR( i = 0; i < lengthCx * lengthCx; ++i )
853 : {
854 455280 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
855 455280 : move32();
856 455280 : Kx_fx_e[i] = max_e;
857 455280 : move16();
858 : }
859 :
860 339920 : FOR( i = 0; i < lengthCx; ++i )
861 : {
862 226800 : tmp_e = svd_s_buffer_e[i];
863 226800 : move16();
864 226800 : svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
865 226800 : move32();
866 226800 : svd_s_buffer_e[i] = tmp_e;
867 226800 : move16();
868 : }
869 :
870 : /*-----------------------------------------------------------------*
871 : * Regularization of Sx
872 : *-----------------------------------------------------------------*/
873 :
874 113120 : limit_fx = svd_s_buffer_fx[0];
875 113120 : move32();
876 113120 : limit_e = svd_s_buffer_e[0];
877 113120 : move16();
878 226800 : FOR( i = 1; i < lengthCx; i++ )
879 : {
880 113680 : IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) > 0 )
881 : {
882 0 : limit_fx = svd_s_buffer_fx[i];
883 0 : move32();
884 0 : limit_e = svd_s_buffer_e[i];
885 0 : move16();
886 : }
887 : }
888 :
889 113120 : L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx );
890 113120 : limit_e = add( limit_e, reg_Sx_e );
891 113120 : limit_fx = L_add( L_tmp, EPSILON_FX );
892 :
893 339920 : FOR( i = 0; i < lengthCx; ++i )
894 : {
895 226800 : IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) < 0 )
896 : {
897 49326 : svd_s_buffer_fx[i] = limit_fx;
898 49326 : move32();
899 49326 : svd_s_buffer_e[i] = limit_e;
900 49326 : move16();
901 : }
902 : }
903 :
904 :
905 113120 : limit_fx = 0;
906 113120 : move32();
907 113120 : limit_e = 0;
908 113120 : move16();
909 :
910 :
911 : /*-----------------------------------------------------------------*
912 : * regularized Kx-1
913 : *-----------------------------------------------------------------*/
914 :
915 339920 : FOR( i = 0; i < lengthCx; ++i )
916 : {
917 : Word16 scale, reg_fac_fx;
918 226800 : reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
919 226800 : scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
920 682080 : FOR( j = 0; j < lengthCx; ++j )
921 : {
922 455280 : Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
923 455280 : move32();
924 455280 : Kx_reg_inv_e[i + j * lengthCx] = scale;
925 455280 : move16();
926 : }
927 : }
928 :
929 : /*-----------------------------------------------------------------*
930 : * normalization matrix G hat
931 : *-----------------------------------------------------------------*/
932 :
933 : /* Computing Q*Cx*Q' */
934 :
935 113120 : matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
936 :
937 113120 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
938 :
939 :
940 688444 : FOR( i = 0; i < lengthCy; ++i )
941 : {
942 575324 : IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
943 : {
944 289247 : limit_fx = Cy_hat_diag_fx[i];
945 289247 : move32();
946 289247 : limit_e = Cy_hat_diag_e;
947 289247 : move16();
948 : }
949 : }
950 113120 : L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
951 113120 : limit_fx = L_add( L_tmp, EPSILON_FX );
952 113120 : limit_e = add( limit_e, reg_ghat_e );
953 :
954 688444 : FOR( i = 0; i < lengthCy; ++i )
955 : {
956 575324 : Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
957 575324 : move16();
958 575324 : IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_buff_e[i] ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
959 : {
960 210 : Cy_hat_diag_fx[i] = limit_fx;
961 210 : move32();
962 210 : Cy_hat_diag_buff_e[i] = limit_e;
963 210 : move16();
964 : }
965 :
966 575324 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
967 575324 : exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
968 575324 : L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
969 575324 : G_hat_fx[i] = L_tmp;
970 575324 : move32();
971 575324 : G_hat_buff_e[i] = exp;
972 575324 : move16();
973 : }
974 :
975 :
976 : /*-----------------------------------------------------------------*
977 : * Formulate optimal P
978 : *-----------------------------------------------------------------*/
979 :
980 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
981 :
982 : Word16 mat_mult_buffer1_fx_e;
983 :
984 : Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
985 :
986 113120 : matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
987 :
988 113120 : matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
989 :
990 113120 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
991 :
992 113120 : IF( LT_16( lengthCx, lengthCy ) )
993 : {
994 113120 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
995 113120 : nL = lengthCy;
996 113120 : move16();
997 113120 : nC = lengthCx;
998 113120 : move16();
999 113120 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1000 : }
1001 : ELSE
1002 : {
1003 0 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1004 0 : nL = lengthCx;
1005 0 : move16();
1006 0 : nC = lengthCy;
1007 0 : move16();
1008 0 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1009 : }
1010 :
1011 : /* Actually Processing P */
1012 :
1013 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
1014 : truncates V to num_input_channel columns */
1015 :
1016 113120 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1017 113120 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1018 :
1019 113120 : mat_mult_buffer1_fx_e = 0;
1020 113120 : move16();
1021 113120 : mat_mult_buffer2_e = 0;
1022 113120 : move16();
1023 :
1024 113120 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
1025 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1026 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1027 :
1028 : /************************ Formulate M **********************/
1029 :
1030 113120 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
1031 :
1032 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
1033 :
1034 : Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1035 113120 : set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1036 :
1037 113120 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
1038 :
1039 : /*-----------------------------------------------------------------*
1040 : * Formulate Cr
1041 : *-----------------------------------------------------------------*/
1042 :
1043 : /* Compute Cy_tilde = M*Cx*M' */
1044 :
1045 :
1046 : Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
1047 113120 : set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
1048 113120 : matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
1049 :
1050 113120 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
1051 :
1052 113120 : exp = mixing_matrix_fx_e[0];
1053 113120 : move16();
1054 1156848 : FOR( i = 1; i < lengthCy * lengthCx; i++ )
1055 : {
1056 1043728 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1057 : {
1058 81665 : exp = mixing_matrix_fx_e[i];
1059 81665 : move16();
1060 : }
1061 : }
1062 :
1063 1269968 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1064 : {
1065 1156848 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1066 1156848 : move32();
1067 : }
1068 :
1069 113120 : mixing_matrix_e = exp;
1070 113120 : move16();
1071 :
1072 113120 : Cr_p_fx = Cr_fx;
1073 113120 : Cy_p_fx = Cy_fx;
1074 113120 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1075 : Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1076 688444 : FOR( i = 0; i < lengthCy; ++i )
1077 : {
1078 3533968 : FOR( j = 0; j < lengthCy; ++j )
1079 : {
1080 :
1081 2958644 : *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
1082 2958644 : move32();
1083 2958644 : Cr_p_fx++;
1084 2958644 : Cy_p_fx++;
1085 2958644 : Cy_tilde_p_fx++;
1086 : }
1087 :
1088 : /* Avoid Meaningless negative main diagonal elements */
1089 575324 : IF( Cr_fx[i + ( i * lengthCy )] < 0 )
1090 : {
1091 62157 : Cr_fx[i + ( i * lengthCy )] = 0;
1092 62157 : move32();
1093 62157 : Cr_e_arr[i + ( i * lengthCy )] = 0;
1094 62157 : move16();
1095 : }
1096 : }
1097 :
1098 113120 : exp = Cr_e_arr[0];
1099 113120 : move16();
1100 2958644 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1101 : {
1102 2845524 : if ( LT_16( exp, Cr_e_arr[i] ) )
1103 : {
1104 103521 : exp = Cr_e_arr[i];
1105 103521 : move16();
1106 : }
1107 : }
1108 :
1109 3071764 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1110 : {
1111 2958644 : Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
1112 2958644 : move32();
1113 : }
1114 :
1115 113120 : Cr_fx_e = exp;
1116 113120 : move16();
1117 :
1118 113120 : exp = mat_mult_buffer2_fx_e[0];
1119 113120 : move16();
1120 2958644 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1121 : {
1122 2845524 : if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
1123 : {
1124 156914 : exp = mat_mult_buffer2_fx_e[i];
1125 156914 : move16();
1126 : }
1127 : }
1128 :
1129 3071764 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1130 : {
1131 2958644 : mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
1132 2958644 : move32();
1133 : }
1134 :
1135 113120 : mat_mult_buffer2_e = exp;
1136 113120 : move16();
1137 :
1138 : /*-----------------------------------------------------------------*
1139 : * Energy Compensation
1140 : *-----------------------------------------------------------------*/
1141 :
1142 113120 : IF( EQ_16( energy_compensation_flag, 1 ) )
1143 : {
1144 22220 : adj_fx_p = svd_s_buffer_fx;
1145 22220 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1146 :
1147 135100 : FOR( i = 0; i < lengthCy; ++i )
1148 : {
1149 : /* Avoid correction for very small energies,
1150 : main diagonal elements of Cy_tilde_p may be negative */
1151 112880 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
1152 : {
1153 0 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1154 0 : move32();
1155 0 : adj_e[i] = 1;
1156 0 : move16();
1157 : }
1158 : ELSE
1159 : {
1160 112880 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
1161 112880 : exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
1162 112880 : L_tmp = L_deposit_h( tmp );
1163 112880 : L_tmp = Sqrt32( L_tmp, &exp );
1164 112880 : adj_fx_p[i] = L_tmp;
1165 112880 : move32();
1166 112880 : adj_e[i] = exp;
1167 112880 : move16();
1168 : }
1169 :
1170 112880 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
1171 112880 : IF( GT_32( adj_fx_p[i], temp ) )
1172 : {
1173 4 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1174 4 : move32();
1175 4 : adj_e[i] = 3;
1176 4 : move16();
1177 : }
1178 : }
1179 :
1180 22220 : exp = adj_e[0];
1181 22220 : move16();
1182 112880 : FOR( i = 1; i < lengthCy; i++ )
1183 : {
1184 90660 : if ( LT_16( exp, adj_e[i] ) )
1185 : {
1186 1040 : exp = adj_e[i];
1187 1040 : move16();
1188 : }
1189 : }
1190 :
1191 135100 : FOR( i = 0; i < lengthCy; i++ )
1192 : {
1193 112880 : adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
1194 112880 : move32();
1195 : }
1196 22220 : adj_fx_e = exp;
1197 22220 : move16();
1198 :
1199 22220 : diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1200 :
1201 22220 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
1202 22220 : mixing_matrix_e = mat_mult_buffer3_e;
1203 22220 : move16();
1204 : }
1205 :
1206 113120 : *mixing_matrix_out_e = mixing_matrix_e;
1207 113120 : move16();
1208 113120 : *Cr_e = Cr_fx_e;
1209 113120 : move16();
1210 113120 : pop_wmops();
1211 :
1212 113120 : return out;
1213 : }
1214 :
1215 : /*-------------------------------------------------------------------*
1216 : * computeMixingMatricesResidual()
1217 : *
1218 : * compute a residual mixing matrix using the covariance synthesis approach
1219 : *-------------------------------------------------------------------*/
1220 :
1221 90900 : Word16 computeMixingMatricesResidual_fx(
1222 : const Word32 num_outputs, /* i : number of output channels */
1223 : const Word32 *Cx_fx, /* i : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e) */
1224 : const Word16 Cx_e,
1225 : const Word32 *Cy_fx, /* i : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
1226 : const Word16 Cy_fx_e,
1227 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values Q(31-reg_Sx_e) */
1228 : const Word16 reg_Sx_e,
1229 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
1230 : const Word16 reg_ghat_e,
1231 : Word32 *mixing_matrix_fx, /* o : resulting residual mixing matrix Q(31-mixing_matrix_ret_e) */
1232 : Word16 *mixing_matrix_ret_e )
1233 : {
1234 : Word16 i, j;
1235 : Word16 out, lengthCx, lengthCy;
1236 90900 : out = EXIT_SUCCESS;
1237 90900 : move16();
1238 90900 : lengthCx = extract_l( num_outputs );
1239 90900 : lengthCy = extract_l( num_outputs );
1240 :
1241 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1242 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1243 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1244 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1245 :
1246 : Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1247 :
1248 90900 : push_wmops( "dirac_cov_mix_mat_r" );
1249 :
1250 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1251 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
1252 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1253 90900 : Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
1254 90900 : move16();
1255 :
1256 90900 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
1257 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1258 : Word32 L_tmp;
1259 : Word16 tmp_e;
1260 : Word16 tmp, scale;
1261 : Word32 div_tmp;
1262 : Word16 exp;
1263 :
1264 : Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
1265 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1266 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1267 : Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
1268 :
1269 : Word32 limit_fx;
1270 : Word16 limit_e;
1271 :
1272 : Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
1273 : Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
1274 :
1275 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1276 : Word16 Cy_hat_diag_e;
1277 :
1278 : Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
1279 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1280 : Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
1281 :
1282 : Word32 *adj_fx_p;
1283 : Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
1284 :
1285 : Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
1286 : Word16 Cy_tilde_e;
1287 :
1288 : /*-----------------------------------------------------------------*
1289 : * Decomposition of Cy
1290 : *-----------------------------------------------------------------*/
1291 :
1292 : /* Processing the SVD */
1293 :
1294 : /* linear array to svd buffer */
1295 90900 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
1296 :
1297 90900 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
1298 :
1299 : /* Computing Ky */
1300 90900 : Word16 max_e = -32;
1301 553344 : FOR( i = 0; i < lengthCy; ++i )
1302 : {
1303 2841348 : FOR( j = 0; j < lengthCy; ++j )
1304 : {
1305 2378904 : tmp_e = svd_s_buffer_e[j];
1306 2378904 : move16();
1307 2378904 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
1308 2378904 : Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
1309 2378904 : move32();
1310 2378904 : Ky_fx_e[i + j * lengthCy] = tmp_e;
1311 2378904 : move16();
1312 2378904 : max_e = s_max( max_e, tmp_e );
1313 : }
1314 : }
1315 :
1316 2469804 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
1317 : {
1318 2378904 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
1319 2378904 : move32();
1320 2378904 : Ky_fx_e[i] = max_e;
1321 2378904 : move16();
1322 : }
1323 :
1324 : /*-----------------------------------------------------------------*
1325 : * Decomposition of Cx
1326 : *-----------------------------------------------------------------*/
1327 :
1328 : /* Processing the SVD of Cx*/
1329 : /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
1330 : * would be just indicating the sorting index, so go straight to Kx as the
1331 : * square root of the diagonal of Cx */
1332 :
1333 : /* Computing Kx */
1334 90900 : max_e = -32;
1335 553344 : FOR( i = 0; i < lengthCx; ++i )
1336 : {
1337 462444 : exp = Cx_e;
1338 462444 : move16();
1339 462444 : Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
1340 462444 : move32();
1341 462444 : Kx_fx_e[i] = exp;
1342 462444 : move16();
1343 462444 : max_e = s_max( max_e, exp );
1344 : }
1345 :
1346 553344 : FOR( i = 0; i < lengthCx; ++i )
1347 : {
1348 462444 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
1349 462444 : move32();
1350 462444 : Kx_fx_e[i] = max_e;
1351 462444 : move16();
1352 : }
1353 :
1354 : /*-----------------------------------------------------------------*
1355 : * Regularization of Sx
1356 : *-----------------------------------------------------------------*/
1357 :
1358 90900 : limit_fx = Kx_fx[0];
1359 90900 : move32();
1360 :
1361 462444 : FOR( i = 1; i < lengthCx; i++ )
1362 : {
1363 371544 : IF( GT_32( Kx_fx[i], limit_fx ) )
1364 : {
1365 141641 : limit_fx = Kx_fx[i];
1366 141641 : move32();
1367 : }
1368 : }
1369 :
1370 90900 : L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
1371 90900 : L_tmp = L_add( L_tmp, EPSILLON_FX );
1372 90900 : limit_fx = L_tmp;
1373 90900 : move16();
1374 90900 : limit_e = add( Kx_fx_e[0], reg_Sx_e );
1375 :
1376 553344 : FOR( i = 0; i < lengthCx; ++i )
1377 : {
1378 462444 : IF( BASOP_Util_Cmp_Mant32Exp( Kx_fx[i], Kx_fx_e[i], limit_fx, limit_e ) > 0 )
1379 : {
1380 461368 : div_tmp = Kx_fx[i];
1381 461368 : move32();
1382 461368 : exp = Kx_fx_e[i];
1383 461368 : move16();
1384 : }
1385 : ELSE
1386 : {
1387 1076 : div_tmp = limit_fx;
1388 1076 : move32();
1389 1076 : exp = limit_e;
1390 1076 : move16();
1391 : }
1392 462444 : tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
1393 462444 : scale = add( scale, sub( Q1, exp ) );
1394 :
1395 462444 : Kx_reg_inv_fx[i] = L_deposit_h( tmp );
1396 462444 : move32();
1397 462444 : Kx_reg_inv_e[i] = scale;
1398 462444 : move16();
1399 : }
1400 :
1401 90900 : limit_fx = 0;
1402 90900 : move32();
1403 90900 : limit_e = 0;
1404 90900 : move16();
1405 :
1406 : /*-----------------------------------------------------------------*
1407 : * regularized Kx-1
1408 : *-----------------------------------------------------------------*/
1409 :
1410 : /*-----------------------------------------------------------------*
1411 : * normalization matrix G hat
1412 : *-----------------------------------------------------------------*/
1413 :
1414 : /* Computing Cy_hat_diag */
1415 90900 : Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
1416 90900 : Cy_hat_diag_e = Cx_e;
1417 90900 : move16();
1418 :
1419 553344 : FOR( i = 0; i < lengthCy; ++i )
1420 : {
1421 462444 : IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
1422 : {
1423 232507 : limit_fx = Cy_hat_diag_fx[i];
1424 232507 : move32();
1425 232507 : limit_e = Cy_hat_diag_e;
1426 232507 : move16();
1427 : }
1428 : }
1429 :
1430 90900 : L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
1431 90900 : limit_fx = L_add( L_tmp, EPSILON_FX );
1432 90900 : limit_e = add( limit_e, reg_ghat_e );
1433 :
1434 : /* Computing G_hat */
1435 553344 : FOR( i = 0; i < lengthCy; ++i )
1436 : {
1437 462444 : Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
1438 462444 : move16();
1439 462444 : IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_e ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1440 : {
1441 210 : Cy_hat_diag_fx[i] = limit_fx;
1442 210 : move32();
1443 210 : Cy_hat_diag_fx_e[i] = limit_e;
1444 210 : move16();
1445 : }
1446 462444 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
1447 462444 : scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
1448 462444 : L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
1449 :
1450 462444 : G_hat_fx[i] = L_tmp;
1451 462444 : move32();
1452 462444 : G_hat_e[i] = scale;
1453 462444 : move16();
1454 : }
1455 :
1456 : /*-----------------------------------------------------------------*
1457 : * Formulate optimal P
1458 : *-----------------------------------------------------------------*/
1459 :
1460 553344 : FOR( i = 0; i < num_outputs; i++ )
1461 : {
1462 462444 : L_tmp = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
1463 462444 : Kx_fx[i] = L_tmp;
1464 462444 : move32();
1465 462444 : Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
1466 462444 : move16();
1467 : }
1468 :
1469 553344 : FOR( i = 0; i < num_outputs; i++ )
1470 : {
1471 : Word32 fac_fx;
1472 462444 : fac_fx = Kx_fx[i];
1473 462444 : move32();
1474 :
1475 2841348 : FOR( j = 0; j < num_outputs; j++ )
1476 : {
1477 2378904 : L_tmp = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e))
1478 2378904 : mat_mult_buffer1_fx[i + j * num_outputs] = L_tmp;
1479 2378904 : move32();
1480 2378904 : mat_mult_buffer1_buff_e[i + j * num_outputs] = extract_l( L_add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] ) );
1481 2378904 : move16();
1482 : }
1483 : }
1484 :
1485 90900 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
1486 90900 : move16();
1487 2378904 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1488 : {
1489 2288004 : if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
1490 : {
1491 24333 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
1492 24333 : move16();
1493 : }
1494 : }
1495 :
1496 2469804 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1497 : {
1498 2378904 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
1499 2378904 : move32();
1500 : }
1501 :
1502 90900 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1503 :
1504 90900 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
1505 :
1506 : /* Actually Processing P */
1507 :
1508 90900 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1509 90900 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1510 90900 : mat_mult_buffer1_e = 0;
1511 90900 : move16();
1512 90900 : mat_mult_buffer2_e = 0;
1513 90900 : move16();
1514 :
1515 90900 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
1516 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1517 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1518 :
1519 : /*-----------------------------------------------------------------*
1520 : * Formulate M
1521 : *-----------------------------------------------------------------*/
1522 :
1523 :
1524 90900 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1525 90900 : set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1526 :
1527 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1528 :
1529 553344 : FOR( i = 0; i < num_outputs; i++ )
1530 : {
1531 : Word32 fac_fx;
1532 462444 : fac_fx = Kx_reg_inv_fx[i];
1533 462444 : move32();
1534 :
1535 2841348 : FOR( j = 0; j < num_outputs; j++ )
1536 : {
1537 2378904 : L_tmp = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e)
1538 2378904 : mixing_matrix_fx[j + i * num_outputs] = L_tmp;
1539 2378904 : move32();
1540 2378904 : mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
1541 2378904 : move16();
1542 : }
1543 : }
1544 :
1545 : /*-----------------------------------------------------------------*
1546 : * Formulate Cr
1547 : *-----------------------------------------------------------------*/
1548 :
1549 : /* Compute Cy_tilde = M*Cx*M' */
1550 :
1551 : Word16 Cx_e_arr[MAX_CICP_CHANNELS];
1552 90900 : set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
1553 :
1554 90900 : matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1555 :
1556 90900 : exp = mixing_matrix_fx_e[0];
1557 90900 : move16();
1558 2378904 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1559 : {
1560 2288004 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1561 : {
1562 9829 : exp = mixing_matrix_fx_e[i];
1563 9829 : move16();
1564 : }
1565 : }
1566 :
1567 2469804 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1568 : {
1569 2378904 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1570 2378904 : move32();
1571 : }
1572 90900 : mixing_matrix_e = exp;
1573 90900 : move16();
1574 :
1575 90900 : exp = mat_mult_buffer1_buff_e[0];
1576 90900 : move16();
1577 2378904 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1578 : {
1579 2288004 : if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
1580 : {
1581 9829 : exp = mat_mult_buffer1_buff_e[i];
1582 9829 : move16();
1583 : }
1584 : }
1585 :
1586 2469804 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1587 : {
1588 2378904 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
1589 2378904 : move32();
1590 : }
1591 90900 : mat_mult_buffer1_e = exp;
1592 90900 : move16();
1593 :
1594 90900 : matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
1595 :
1596 : /*-----------------------------------------------------------------*
1597 : * Energy Compensation
1598 : *-----------------------------------------------------------------*/
1599 :
1600 90900 : adj_fx_p = svd_s_buffer_fx;
1601 :
1602 553344 : FOR( i = 0; i < lengthCy; ++i )
1603 : {
1604 462444 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
1605 462444 : scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
1606 462444 : adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
1607 462444 : move32();
1608 462444 : adj_buff_e[i] = scale;
1609 462444 : move16();
1610 462444 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
1611 462444 : IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
1612 : {
1613 1 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1614 1 : move32();
1615 1 : adj_buff_e[i] = 3;
1616 1 : move16();
1617 : }
1618 : }
1619 :
1620 90900 : adj_e = adj_buff_e[0];
1621 90900 : move16();
1622 :
1623 462444 : FOR( i = 1; i < lengthCy; i++ )
1624 : {
1625 371544 : if ( LT_16( adj_e, adj_buff_e[i] ) )
1626 : {
1627 36417 : adj_e = adj_buff_e[i];
1628 36417 : move16();
1629 : }
1630 : }
1631 :
1632 :
1633 553344 : FOR( i = 0; i < lengthCy; i++ )
1634 : {
1635 462444 : adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
1636 462444 : move32();
1637 : }
1638 :
1639 90900 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1640 90900 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
1641 90900 : *mixing_matrix_ret_e = mat_mult_buffer3_e;
1642 90900 : move16();
1643 :
1644 90900 : pop_wmops();
1645 :
1646 90900 : return out;
1647 : }
1648 :
1649 :
1650 : /*-------------------------------------------------------------------*
1651 : * computeMixingMatricesISM()
1652 : *
1653 : *
1654 : *-------------------------------------------------------------------*/
1655 315180 : Word16 computeMixingMatricesISM_fx(
1656 : const Word16 num_inputs,
1657 : const Word16 num_responses,
1658 : const Word16 num_outputs,
1659 : const Word32 *responses_fx, /*Q(31-responses_e) */
1660 : const Word16 responses_e,
1661 : const Word32 *ener_fx, /*Q(31-ener_e) */
1662 : const Word16 ener_e,
1663 : const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
1664 : const Word16 Cx_diag_e,
1665 : const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
1666 : const Word16 Cy_diag_e,
1667 : const Word16 *Q_16fx, // Q15
1668 : const Word16 energy_compensation_flag,
1669 : const Word32 reg_Sx_fx, /*Q0*/
1670 : const Word32 reg_ghat_fx, /*Q0*/
1671 : Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
1672 : Word16 *mixing_matrix_e )
1673 : {
1674 : Word16 i, out;
1675 : Word16 lengthCx, lengthCy;
1676 : Word16 nL, nC;
1677 :
1678 : Word32 *Cy_tilde_p_fx;
1679 : Word32 *adj_fx;
1680 : Word32 limit_fx;
1681 :
1682 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1683 : Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1684 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1685 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1686 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1687 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1688 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
1689 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1690 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
1691 : Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
1692 :
1693 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1694 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1695 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
1696 : Word16 svd_s_buffer_fx_e;
1697 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1698 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1699 :
1700 : Word16 temp_e[MAX_OUTPUT_CHANNELS];
1701 315180 : push_wmops( "dirac_cov_mix_mat" );
1702 :
1703 315180 : out = EXIT_SUCCESS;
1704 315180 : move16();
1705 315180 : lengthCx = num_inputs;
1706 315180 : move16();
1707 315180 : lengthCy = num_outputs;
1708 315180 : move16();
1709 :
1710 6073020 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1711 : {
1712 5757840 : IF( EQ_16( Q_16fx[i], MAX_16 ) )
1713 : {
1714 2575200 : Q_fx[i] = MAX_32;
1715 2575200 : move32();
1716 : }
1717 : ELSE
1718 : {
1719 3182640 : Q_fx[i] = L_deposit_h( Q_16fx[i] );
1720 3182640 : move32();
1721 : }
1722 : }
1723 315180 : Q_e = 0;
1724 315180 : move16();
1725 315180 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
1726 5358060 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
1727 : {
1728 5042880 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1729 5042880 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1730 5042880 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1731 : }
1732 :
1733 : /* Decomposition of Cy = Ky*Ky' */
1734 : /* Ky = responses*diag(ener) */
1735 315180 : matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
1736 :
1737 : /* Decomposition of Cx -> Computing Kx */
1738 315180 : set16_fx( temp_e, Cx_diag_e, lengthCx );
1739 315180 : v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
1740 315180 : Kx_e = temp_e[0];
1741 315180 : move16();
1742 630360 : FOR( i = 1; i < lengthCx; i++ )
1743 : {
1744 315180 : Kx_e = s_max( Kx_e, temp_e[i] );
1745 : }
1746 945540 : FOR( i = 0; i < lengthCx; i++ )
1747 : {
1748 630360 : Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
1749 630360 : move32();
1750 : }
1751 :
1752 : /* Regularization of Sx */
1753 315180 : maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
1754 315180 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
1755 :
1756 945540 : FOR( i = 0; i < lengthCx; ++i )
1757 : {
1758 630360 : IF( GT_32( Kx_fx[i], limit_fx ) )
1759 : {
1760 595983 : svd_s_buffer_fx[i] = Kx_fx[i];
1761 595983 : move32();
1762 : }
1763 : ELSE
1764 : {
1765 34377 : svd_s_buffer_fx[i] = limit_fx;
1766 34377 : move32();
1767 : }
1768 : }
1769 315180 : svd_s_buffer_fx_e = Kx_e;
1770 315180 : move16();
1771 :
1772 315180 : limit_fx = 0;
1773 315180 : move32();
1774 :
1775 : /* regularized Kx-1 */
1776 :
1777 945540 : FOR( i = 0; i < lengthCx; ++i )
1778 : {
1779 630360 : IF( svd_s_buffer_fx[i] )
1780 : {
1781 : Word32 reg_fac;
1782 630350 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
1783 630350 : Kx_reg_inv_fx[i] = reg_fac;
1784 630350 : move32();
1785 630350 : temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
1786 630350 : move16();
1787 : }
1788 : ELSE
1789 : {
1790 : Word32 reg_fac;
1791 10 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
1792 10 : Kx_reg_inv_fx[i] = reg_fac;
1793 10 : move32();
1794 10 : temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
1795 10 : move16();
1796 : }
1797 : }
1798 315180 : Kx_reg_inv_e = temp_e[0];
1799 315180 : move16();
1800 630360 : FOR( i = 1; i < lengthCx; i++ )
1801 : {
1802 315180 : Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
1803 : }
1804 945540 : FOR( i = 0; i < lengthCx; i++ )
1805 : {
1806 630360 : Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
1807 630360 : move32();
1808 : }
1809 :
1810 : /************************ normalization matrix G hat **********************/
1811 :
1812 : /* Computing Q*Cx*Q' */
1813 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1814 : Word16 Cy_hat_diag_e;
1815 :
1816 315180 : matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
1817 :
1818 315180 : Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
1819 :
1820 6073020 : FOR( i = 0; i < lengthCy * lengthCx; ++i )
1821 : {
1822 5757840 : IF( GT_16( Q_Cx_e, Q_e ) )
1823 : {
1824 5757760 : Q_fx[i] = L_shr( Q_fx[i], guard_bits );
1825 5757760 : move32();
1826 : }
1827 : ELSE
1828 : {
1829 80 : Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
1830 80 : move32();
1831 : }
1832 : }
1833 :
1834 315180 : IF( GT_16( Q_Cx_e, Q_e ) )
1835 : {
1836 315175 : Q_e = add( Q_e, guard_bits );
1837 : }
1838 : ELSE
1839 : {
1840 5 : Q_Cx_e = add( Q_Cx_e, guard_bits );
1841 : }
1842 315180 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
1843 :
1844 : /* Computing Cy_hat_diag */
1845 3194100 : FOR( i = 0; i < lengthCy; ++i )
1846 : {
1847 2878920 : if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
1848 : {
1849 469388 : limit_fx = Cy_hat_diag_fx[i];
1850 469388 : move32();
1851 : }
1852 : }
1853 :
1854 315180 : limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
1855 :
1856 : /* Computing G_hat */
1857 3194100 : FOR( i = 0; i < lengthCy; ++i )
1858 : {
1859 2878920 : if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1860 : {
1861 11966 : Cy_hat_diag_fx[i] = limit_fx;
1862 11966 : move32();
1863 : }
1864 2878920 : IF( Cy_diag_fx[i] )
1865 : {
1866 1875159 : IF( Cy_hat_diag_fx[i] )
1867 : {
1868 1875159 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
1869 1875159 : move32();
1870 1875159 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
1871 1875159 : move16();
1872 1875159 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1873 1875159 : move32();
1874 : }
1875 : ELSE
1876 : {
1877 0 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1878 0 : move32();
1879 0 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1880 0 : move16();
1881 0 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1882 0 : move32();
1883 : }
1884 : }
1885 : ELSE
1886 : {
1887 1003761 : G_hat_fx[i] = 0;
1888 1003761 : move32();
1889 1003761 : temp_e[i] = 0;
1890 1003761 : move16();
1891 : }
1892 : }
1893 315180 : G_hat_e = temp_e[0];
1894 315180 : move16();
1895 2878920 : FOR( i = 1; i < lengthCy; i++ )
1896 : {
1897 2563740 : G_hat_e = s_max( G_hat_e, temp_e[i] );
1898 : }
1899 3194100 : FOR( i = 0; i < lengthCy; i++ )
1900 : {
1901 2878920 : G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
1902 2878920 : move32();
1903 : }
1904 :
1905 : /************************ Formulate optimal P **********************/
1906 :
1907 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
1908 315180 : diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1909 :
1910 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1911 :
1912 315180 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1913 :
1914 315180 : IF( LT_16( lengthCx, num_responses ) )
1915 : {
1916 3600 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
1917 :
1918 3600 : nL = num_responses;
1919 3600 : move16();
1920 3600 : nC = lengthCx;
1921 3600 : move16();
1922 3600 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1923 : }
1924 : ELSE
1925 : {
1926 311580 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
1927 :
1928 311580 : nL = lengthCx;
1929 311580 : move16();
1930 311580 : nC = num_responses;
1931 311580 : move16();
1932 311580 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1933 : }
1934 :
1935 : /* Actually Processing P */
1936 :
1937 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
1938 315180 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
1939 315180 : mat_mult_buffer1_e = 0;
1940 315180 : move16();
1941 315180 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1942 315180 : mat_mult_buffer2_e = 0;
1943 315180 : move16();
1944 :
1945 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1946 :
1947 : /************************ Formulate M **********************/
1948 :
1949 315180 : matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1950 :
1951 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
1952 :
1953 : /*********************** Energy Compensation ****************/
1954 :
1955 : /* Compute Cy_tilde = M*Cx*M' */
1956 315180 : matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1957 :
1958 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1959 :
1960 315180 : IF( EQ_16( energy_compensation_flag, 1 ) )
1961 : {
1962 315180 : adj_fx = svd_s_buffer_fx;
1963 315180 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1964 3194100 : FOR( i = 0; i < lengthCy; ++i )
1965 : {
1966 : /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
1967 2878920 : IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
1968 : {
1969 0 : adj_fx[i] = MAX_32;
1970 0 : move32();
1971 0 : temp_e[i] = 0;
1972 0 : move16();
1973 : }
1974 : ELSE
1975 : {
1976 2878920 : IF( Cy_diag_fx[i] )
1977 : {
1978 1875159 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
1979 : {
1980 1875117 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
1981 1875117 : move32();
1982 1875117 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
1983 1875117 : move16();
1984 1875117 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1985 1875117 : move32();
1986 : }
1987 : ELSE
1988 : {
1989 42 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1990 42 : move32();
1991 42 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1992 42 : move16();
1993 42 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1994 42 : move32();
1995 : }
1996 : }
1997 : ELSE
1998 : {
1999 1003761 : adj_fx[i] = 0;
2000 1003761 : move32();
2001 1003761 : temp_e[i] = 0;
2002 1003761 : move16();
2003 : }
2004 : }
2005 :
2006 2878920 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
2007 2878920 : IF( GT_32( adj_fx[i], temp ) )
2008 : {
2009 1547 : adj_fx[i] = MAX_32;
2010 1547 : move32();
2011 1547 : temp_e[i] = 2;
2012 1547 : move16();
2013 : }
2014 : }
2015 315180 : adj_e = temp_e[0];
2016 315180 : move16();
2017 2878920 : FOR( i = 1; i < lengthCy; i++ )
2018 : {
2019 2563740 : adj_e = s_max( adj_e, temp_e[i] );
2020 : }
2021 3194100 : FOR( i = 0; i < lengthCy; i++ )
2022 : {
2023 2878920 : adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
2024 2878920 : move32();
2025 : }
2026 :
2027 315180 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
2028 :
2029 315180 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
2030 315180 : *mixing_matrix_e = mat_mult_buffer3_e;
2031 315180 : move16();
2032 : }
2033 :
2034 315180 : pop_wmops();
2035 :
2036 315180 : return out;
2037 : }
|