Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include <string.h>
35 : #include <stdio.h>
36 : #include <stdlib.h>
37 : #include <assert.h>
38 : #include <math.h>
39 : #include "options.h"
40 : #include "cnst.h"
41 : #include "rom_enc.h"
42 : #include "rom_com.h"
43 : #include "prot_fx.h"
44 : #include "ivas_stat_dec.h"
45 : #include "ivas_cnst.h"
46 : #include "ivas_rom_com.h"
47 : #include "ivas_rom_dec.h"
48 : #include "wmc_auto.h"
49 : #include "rom_dec.h"
50 : #include "ivas_prot_fx.h"
51 :
52 :
53 : /*-------------------------------------------------------------------*
54 : * ivas_dirac_dec_output_synthesis_cov_open()
55 : *
56 : * Sets up the state and parameters for the Covariance Synthesis
57 : *-------------------------------------------------------------------*/
58 :
59 297 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
60 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
61 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state */
62 : const Word16 max_band_decorr, /* i : uppermost frequency band where decorrelation is applied */
63 : const Word16 interp_length, /* i : length for interpolating the mixing matrices in time slots */
64 : const Word16 num_param_bands, /* i : number of parameter bands */
65 : const Word16 num_param_bands_residual, /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
66 : const Word16 nchan_in, /* i : number of input (transport) channels */
67 : const Word16 nchan_out, /* i : number of output channels */
68 : const Word32 *proto_matrix /* i : the prototype (upmix) matrix (only used if mode == 1) Q(15-proto_matrix_e) */
69 : )
70 : {
71 : Word16 idx;
72 :
73 297 : h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
74 297 : move16();
75 :
76 : /*-----------------------------------------------------------------*
77 : * memory allocation
78 : *-----------------------------------------------------------------*/
79 :
80 : /* buffer length and interpolator */
81 297 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
82 297 : IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
83 : {
84 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
85 : }
86 297 : h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
87 297 : move16();
88 : /* cov buffers */
89 3803 : FOR( idx = 0; idx < num_param_bands; idx++ )
90 : {
91 3506 : IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
92 : {
93 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
94 : }
95 3506 : h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
96 3506 : IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
97 : {
98 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
99 : }
100 3506 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
101 : {
102 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
103 : }
104 3506 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
105 3506 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
106 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
107 :
108 3506 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
109 : {
110 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
111 : }
112 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
113 3506 : h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
114 3506 : move16();
115 : }
116 14611 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
117 : {
118 14314 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
119 14314 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
120 14314 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
121 14314 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
122 : }
123 :
124 3053 : FOR( idx = 0; idx < num_param_bands_residual; idx++ )
125 : {
126 2756 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
127 : {
128 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
129 : }
130 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
131 :
132 2756 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
133 : {
134 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
135 : }
136 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
137 2756 : h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
138 2756 : move16();
139 : }
140 15361 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
141 : {
142 15064 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
143 15064 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
144 : }
145 :
146 297 : IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
147 : {
148 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
149 : }
150 297 : IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
151 : {
152 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
153 : }
154 :
155 297 : set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
156 297 : set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
157 :
158 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
159 : {
160 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
161 : }
162 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
163 :
164 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
165 : {
166 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
167 : }
168 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
169 :
170 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
171 : {
172 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
173 : }
174 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
175 :
176 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
177 : {
178 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
179 : }
180 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
181 :
182 : /*-----------------------------------------------------------------*
183 : * prepare processing parameters
184 : *-----------------------------------------------------------------*/
185 :
186 : /* compute interpolator */
187 297 : IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
188 : {
189 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
190 : }
191 9801 : FOR( idx = 1; idx <= interp_length; ++idx )
192 : {
193 9504 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
194 9504 : move16();
195 : }
196 297 : Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
197 297 : h_dirac_output_synthesis_params->proto_matrix_e = 5;
198 297 : move16();
199 297 : return IVAS_ERR_OK;
200 : }
201 :
202 :
203 : /*-------------------------------------------------------------------*
204 : * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
205 : *
206 : *
207 : *-------------------------------------------------------------------*/
208 :
209 318383 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
210 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
211 : const UWord16 interp_length /* i : interpolator length */
212 : )
213 : {
214 : Word16 idx;
215 318383 : Word16 tmp, exp_diff = 0;
216 318383 : move16();
217 :
218 1583648 : FOR( idx = 1; idx <= interp_length; ++idx )
219 : {
220 1265265 : tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
221 1265265 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff ); // Q15
222 1265265 : move16();
223 : }
224 :
225 318383 : return;
226 : }
227 :
228 :
229 : /*-------------------------------------------------------------------*
230 : * ivas_dirac_dec_output_synthesis_cov_init()
231 : *
232 : * initialize the states for the covariance synthesis
233 : *-------------------------------------------------------------------*/
234 297 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
235 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis */
236 : const Word16 nchan_in, /* i : number of input (tranport) channels */
237 : const Word16 nchan_out, /* i : number of output channels */
238 : const Word16 n_param_bands, /* i : number of total parameter bands */
239 : const Word16 n_param_bands_res /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
240 : )
241 : {
242 :
243 : Word16 idx;
244 :
245 : /* initialize buffers */
246 3803 : FOR( idx = 0; idx < n_param_bands; idx++ )
247 : {
248 3506 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
249 3506 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
250 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
251 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
252 : }
253 :
254 3053 : FOR( idx = 0; idx < n_param_bands_res; idx++ )
255 : {
256 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
257 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
258 : }
259 :
260 :
261 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
262 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
263 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
264 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
265 :
266 297 : return;
267 : }
268 :
269 :
270 : /*-------------------------------------------------------------------*
271 : * ivas_dirac_dec_output_synthesis_cov_close()
272 : *
273 : * deallocate dynamic memory in the covariance synthesis state
274 : *-------------------------------------------------------------------*/
275 :
276 297 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
277 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i : handle for the covariance synthesis parameters */
278 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state */
279 : )
280 : {
281 : Word16 idx;
282 :
283 : /*-----------------------------------------------------------------*
284 : * memory deallocation
285 : *-----------------------------------------------------------------*/
286 :
287 : /* free interpolator */
288 297 : IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
289 : {
290 297 : free( h_dirac_output_synthesis_params->interpolator_fx );
291 297 : h_dirac_output_synthesis_params->interpolator_fx = NULL;
292 : }
293 :
294 : /* free alpha */
295 297 : IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
296 : {
297 0 : free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
298 0 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
299 : }
300 :
301 : /* free proto_matrix */
302 297 : IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
303 : {
304 297 : free( h_dirac_output_synthesis_params->proto_matrix_fx );
305 297 : h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
306 : }
307 :
308 :
309 297 : IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
310 : {
311 297 : free( h_dirac_output_synthesis_state->cx_old_e );
312 297 : h_dirac_output_synthesis_state->cx_old_e = NULL;
313 : }
314 297 : IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
315 : {
316 297 : free( h_dirac_output_synthesis_state->cy_old_e );
317 297 : h_dirac_output_synthesis_state->cy_old_e = NULL;
318 : }
319 :
320 : /* free cov buffers */
321 18117 : FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
322 : {
323 17820 : IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
324 : {
325 3506 : free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
326 3506 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
327 : }
328 :
329 17820 : IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
330 : {
331 3506 : free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
332 3506 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
333 : }
334 :
335 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
336 : {
337 3506 : free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
338 3506 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
339 : }
340 :
341 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
342 : {
343 2756 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
344 2756 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
345 : }
346 :
347 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
348 : {
349 3506 : free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
350 3506 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
351 : }
352 :
353 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
354 : {
355 2756 : free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
356 2756 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
357 : }
358 : }
359 :
360 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
361 : {
362 297 : free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
363 297 : h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
364 : }
365 :
366 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
367 : {
368 297 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
369 297 : h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
370 : }
371 :
372 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
373 : {
374 297 : free( h_dirac_output_synthesis_state->mixing_matrix_exp );
375 297 : h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
376 : }
377 :
378 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
379 : {
380 297 : free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
381 297 : h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
382 : }
383 297 : return;
384 : }
385 :
386 : /*-------------------------------------------------------------------*
387 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
388 : *
389 : * collect the multi channel input covariance for one filter bank time slot
390 : *-------------------------------------------------------------------*/
391 :
392 : /*-------------------------------------------------------------------*
393 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
394 : *
395 : * collect the multi channel input covariance for one filter bank time slot
396 : *-------------------------------------------------------------------*/
397 1776450 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
398 : Word32 *RealBuffer_fx, /* i : input channel filter bank samples (real part) Q(31- RealBuffer_e)*/
399 : Word16 RealBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
400 : Word32 *ImagBuffer_fx, /* i : input channel filter bank samples (imaginary part Q(ImagBuffer_e)*/
401 : Word16 ImagBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
402 : Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (real part) Q(31- cx_e)*/
403 : Word16 *cx_e, /* i : exponent for accumulated input covariance (real part) */
404 : Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (imaginary part) Q(31- cx_imag_e)*/
405 : Word16 *cx_imag_e, /* i : exponent accumulated input covariance (imag part) */
406 : PARAM_MC_DEC_HANDLE hParamMC, /* i : handle to Parametric MC state */
407 : const Word16 param_band, /* i : parameter band */
408 : const Word16 nchan_in /* i : number of input channels */
409 : )
410 : {
411 : Word16 band_idx, ch_idx;
412 : Word16 brange[2];
413 : Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
414 : Word16 real_in_e;
415 : Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
416 : Word16 imag_in_e;
417 : Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
418 : Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
419 : Word16 output_e;
420 : Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
421 : Word16 band, num_bands;
422 : Word16 cx_fx_norm, cx_imag_fx_norm;
423 : /* estimate input covariance */
424 : /* Already stack here instead of in the process_subframe */
425 :
426 : /* collect input frame */
427 1776450 : brange[0] = hParamMC->band_grouping[param_band];
428 1776450 : move16();
429 1776450 : brange[1] = hParamMC->band_grouping[param_band + 1];
430 1776450 : move16();
431 1776450 : num_bands = sub( brange[1], brange[0] );
432 :
433 9379450 : FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
434 : {
435 7603000 : band = add( brange[0], band_idx );
436 22843320 : FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
437 : {
438 15240320 : real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
439 15240320 : move32();
440 15240320 : imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
441 15240320 : move32();
442 : }
443 : }
444 :
445 1776450 : real_in_e = RealBuffer_e;
446 1776450 : move16();
447 1776450 : imag_in_e = ImagBuffer_e;
448 1776450 : move16();
449 :
450 1776450 : Word16 buf_len = imult1616( num_bands, nchan_in );
451 :
452 1776450 : shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
453 1776450 : shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
454 :
455 1776450 : real_in_e = sub( real_in_e, shift_real );
456 1776450 : imag_in_e = sub( imag_in_e, shift_imag );
457 :
458 :
459 1776450 : output_e = s_max( real_in_e, imag_in_e );
460 :
461 1776450 : scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
462 1776450 : scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
463 :
464 1776450 : cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
465 1776450 : v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
466 :
467 1776450 : v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
468 :
469 1776450 : cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
470 1776450 : cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
471 :
472 1776450 : scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
473 1776450 : scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
474 :
475 1776450 : *cx_e = sub( tmp1_e, cx_fx_norm );
476 1776450 : move16();
477 1776450 : *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
478 1776450 : move16();
479 :
480 1776450 : return;
481 : }
482 :
483 : /*-------------------------------------------------------------------*
484 : * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
485 : *
486 : * synthesize one filter bank slot of multi channel output filter bank
487 : * samples with the covariance synthesis
488 : *-------------------------------------------------------------------*/
489 :
490 140160 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
491 : Word32 *Cldfb_RealBuffer_in_fx, /*Q6*/
492 : Word32 *Cldfb_ImagBuffer_in_fx, /*Q6*/
493 : Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (real part) Q6*/
494 : Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (imaginary part) Q6*/
495 : Word32 *mixing_matrix_fx[], /* i : parameter band wise mixing matrices (direct part) Q(31-mixing_matrix_e)*/
496 : Word16 *mixing_matrix_e, /* i : parameter band wise mixing matrices (direct part) */
497 : Word32 *mixing_matrix_res_fx[], /* i : parameter band wise mixing matrices (residual part) Q(31-mixing_matrix_res_e)*/
498 : Word16 *mixing_matrix_res_e, /* i : parameter band wise mixing matrices (residual part) */
499 : const UWord16 slot_idx_sfr, /* i : time slot index for the current slot within the current subframe */
500 : const UWord16 slot_idx_tot, /* i : time slot index for the current slot within the frame */
501 : const Word16 nX, /* i : number of input channels */
502 : const Word16 nY, /* i : number of output channels */
503 : PARAM_MC_DEC_HANDLE hParamMC /* i : handle to the Parametric MC decoder state */
504 : )
505 : {
506 : Word16 param_band_idx, band, ch_idx;
507 : Word16 have_residual;
508 : Word16 brange[2];
509 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
510 : Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
511 : Word16 mixing_matrix_smooth_e;
512 : Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
513 140160 : Word16 mixing_matrix_res_smooth_e = 0;
514 140160 : move16();
515 : Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
516 : Word16 mixing_matrix_buffer_e;
517 : Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
518 : Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
519 : Word32 output_f_real_fx[MAX_CICP_CHANNELS];
520 : Word32 output_f_imag_fx[MAX_CICP_CHANNELS];
521 : Word16 output_f_real_e;
522 : Word16 output_f_imag_e;
523 : Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
524 : Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
525 :
526 140160 : h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
527 :
528 140160 : set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
529 140160 : set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
530 140160 : set_zero_fx( output_f_real_fx, MAX_CICP_CHANNELS );
531 140160 : set_zero_fx( output_f_imag_fx, MAX_CICP_CHANNELS );
532 140160 : set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
533 140160 : set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
534 :
535 1950080 : FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
536 : {
537 : /* final mixing */
538 1809920 : have_residual = 0;
539 1809920 : move16();
540 1809920 : brange[0] = hParamMC->band_grouping[param_band_idx];
541 1809920 : move16();
542 1809920 : brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
543 1809920 : move16();
544 :
545 1809920 : if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
546 : {
547 1454400 : have_residual = 1;
548 1454400 : move16();
549 : }
550 :
551 1809920 : v_multc_fixed( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
552 1809920 : mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
553 1809920 : move16();
554 :
555 1809920 : v_multc_fixed( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
556 1809920 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
557 1809920 : move16();
558 :
559 1809920 : v_add_fixed_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
560 :
561 1809920 : IF( have_residual )
562 : {
563 : /* residual mixing matrix interpolation*/
564 :
565 1454400 : v_multc_fixed( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
566 1454400 : mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
567 1454400 : move16();
568 :
569 1454400 : set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
570 1454400 : v_multc_fixed( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
571 1454400 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
572 1454400 : move16();
573 :
574 1454400 : v_add_fixed_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
575 : }
576 :
577 :
578 9553920 : FOR( band = brange[0]; band < brange[1]; band++ )
579 : {
580 7744000 : assert( band >= 0 );
581 :
582 7744000 : IF( have_residual )
583 : {
584 : /* collect diffuse prototypes */
585 2803200 : assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
586 19833600 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
587 : {
588 17030400 : diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
589 17030400 : move32();
590 17030400 : diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
591 17030400 : move32();
592 : }
593 :
594 : /* apply residual mixing */
595 :
596 2803200 : matrix_product_mant_exp_fx( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, nY, nY, 0, diff_f_real_fx, 25, nY, 1, 0, output_f_real_fx, &output_f_real_e );
597 2803200 : scale_sig32( output_f_real_fx, nY, sub( Q6, sub( Q31, output_f_real_e ) ) ); // Q6
598 :
599 :
600 2803200 : matrix_product_mant_exp_fx( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, nY, nY, 0, diff_f_imag_fx, 25, nY, 1, 0, output_f_imag_fx, &output_f_imag_e );
601 2803200 : scale_sig32( output_f_imag_fx, nY, sub( Q6, sub( Q31, output_f_imag_e ) ) ); // Q6
602 :
603 19833600 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
604 : {
605 17030400 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = output_f_real_fx[ch_idx]; // Q6
606 17030400 : move32();
607 17030400 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = output_f_imag_fx[ch_idx]; // Q6
608 17030400 : move32();
609 : }
610 : }
611 : ELSE
612 : {
613 34969600 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
614 : {
615 30028800 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
616 30028800 : move32();
617 30028800 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
618 30028800 : move32();
619 : }
620 : }
621 :
622 : /* collect input signals, still in cldfb buffers */
623 23270400 : FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
624 : {
625 :
626 15526400 : input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
627 15526400 : move32();
628 15526400 : input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
629 15526400 : move32();
630 : }
631 :
632 : /* apply mixing matrix */
633 :
634 7744000 : matrix_product_mant_exp_fx( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, nY, nX, 0, input_f_real_fx, 25, nX, 1, 0, output_f_real_fx, &output_f_real_e );
635 7744000 : scale_sig32( output_f_real_fx, MAX_CICP_CHANNELS, sub( 6, sub( 31, output_f_real_e ) ) ); // Q6
636 :
637 :
638 7744000 : matrix_product_mant_exp_fx( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, nY, nX, 0, input_f_imag_fx, 25, nX, 1, 0, output_f_imag_fx, &output_f_imag_e );
639 7744000 : scale_sig32( output_f_imag_fx, MAX_CICP_CHANNELS, sub( 6, sub( 31, output_f_imag_e ) ) ); // Q6
640 :
641 : /* collect output */
642 54803200 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
643 : {
644 47059200 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], output_f_real_fx[ch_idx] );
645 47059200 : move32();
646 47059200 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], output_f_imag_fx[ch_idx] );
647 47059200 : move32();
648 : }
649 : }
650 : }
651 :
652 140160 : return;
653 : }
654 :
655 : /*-------------------------------------------------------------------*
656 : * computeMixingMatrices()
657 : *
658 : * compute a mixing matrix using the convariance synthesis approach
659 : *-------------------------------------------------------------------*/
660 :
661 113120 : Word16 computeMixingMatrices_fx(
662 : const Word16 num_inputs, /* i : number of input channels */
663 : const Word16 num_outputs, /* i : number of output channels */
664 : const Word32 *Cx, /* i : input channel covariance matrix Q(31-Cx_e) */
665 : Word16 Cx_e,
666 : const Word32 *Cy, /* i : target covariance matrix Q(31-Cy_e) */
667 : Word16 Cy_e,
668 : const Word32 *Q, /* i : prototype matrix (usually a upmix matrix) Q_fx_e */
669 : Word16 Q_fx_e,
670 : const Word16 energy_compensation_flag, /* i : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
671 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values */
672 : Word16 reg_Sx_e,
673 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
674 : Word16 reg_ghat_e,
675 : Word32 *mixing_matrix_fx, /* o : resulting mixing matrix Q(31-mixing_matrix_out_e) */
676 : Word16 *mixing_matrix_out_e,
677 : Word32 *Cr_fx, /* o : residual covariance matrix Q(31-Cr_e) */
678 : Word16 *Cr_e )
679 : {
680 : Word16 i, j;
681 : Word16 out;
682 : Word16 nL, nC;
683 113120 : Word16 lengthCx = num_inputs;
684 113120 : Word16 lengthCy = num_outputs;
685 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
686 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
687 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
688 : Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
689 : Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
690 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
691 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
692 : Word16 Cx_fx_e;
693 : Word16 Cy_fx_e;
694 :
695 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
696 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
697 :
698 :
699 : Word32 limit_fx;
700 : Word16 limit_e;
701 :
702 : Word32 L_tmp;
703 : Word16 tmp_e, tmp, exp;
704 :
705 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
706 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
707 :
708 : Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
709 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
710 :
711 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
712 : Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
713 :
714 : Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
715 : Word16 Q_e;
716 :
717 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
718 : Word16 Q_Cx_e;
719 :
720 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
721 : Word16 Cy_hat_diag_e;
722 : Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
723 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
724 : Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
725 :
726 : Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
727 :
728 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
729 :
730 : Word16 mixing_matrix_e;
731 :
732 : Word16 Cr_fx_e;
733 :
734 :
735 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
736 : Word16 adj_e[MAX_OUTPUT_CHANNELS];
737 : Word32 *adj_fx_p;
738 : Word16 adj_fx_e;
739 :
740 : Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
741 113120 : push_wmops( "dirac_cov_mix_mat" );
742 :
743 113120 : out = EXIT_SUCCESS;
744 113120 : move16();
745 :
746 113120 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
747 1923040 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
748 : {
749 1809920 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
750 1809920 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
751 1809920 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
752 : }
753 :
754 :
755 113120 : Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
756 113120 : Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
757 113120 : Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
758 :
759 113120 : Q_e = Q_fx_e;
760 113120 : move16();
761 113120 : Cx_fx_e = Cx_e;
762 113120 : move16();
763 113120 : Cy_fx_e = Cy_e;
764 113120 : move16();
765 :
766 :
767 : /*-----------------------------------------------------------------*
768 : * Decomposition of Cy
769 : *-----------------------------------------------------------------*/
770 :
771 : /* Processing the SVD */
772 :
773 113120 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
774 :
775 113120 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
776 113120 : Word16 max_e = -32;
777 : /* Computing Ky */
778 688444 : FOR( i = 0; i < lengthCy; ++i )
779 : {
780 3533968 : FOR( j = 0; j < lengthCy; ++j )
781 : {
782 2958644 : tmp_e = svd_s_buffer_e[j];
783 2958644 : move16();
784 2958644 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
785 2958644 : Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
786 2958644 : move32();
787 2958644 : Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
788 2958644 : move16();
789 2958644 : max_e = s_max( max_e, tmp_e );
790 : }
791 : }
792 3071764 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
793 : {
794 2958644 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
795 2958644 : move32();
796 2958644 : Ky_fx_e[i] = max_e;
797 2958644 : move16();
798 : }
799 :
800 : /*-----------------------------------------------------------------*
801 : * Decomposition of Cx
802 : *-----------------------------------------------------------------*/
803 :
804 : /* Processing the SVD */
805 :
806 113120 : mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
807 :
808 113120 : svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
809 113120 : max_e = -32;
810 339920 : FOR( i = 0; i < lengthCx; ++i )
811 : {
812 682080 : FOR( j = 0; j < lengthCx; ++j )
813 : {
814 455280 : tmp_e = svd_s_buffer_e[j];
815 455280 : move16();
816 455280 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
817 455280 : Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
818 455280 : move32();
819 455280 : Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
820 455280 : move16();
821 455280 : max_e = s_max( max_e, tmp_e );
822 : }
823 : }
824 568400 : FOR( i = 0; i < lengthCx * lengthCx; ++i )
825 : {
826 455280 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
827 455280 : move32();
828 455280 : Kx_fx_e[i] = max_e;
829 455280 : move16();
830 : }
831 :
832 339920 : FOR( i = 0; i < lengthCx; ++i )
833 : {
834 226800 : tmp_e = svd_s_buffer_e[i];
835 226800 : move16();
836 226800 : svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
837 226800 : move32();
838 226800 : svd_s_buffer_e[i] = tmp_e;
839 226800 : move16();
840 : }
841 :
842 : /*-----------------------------------------------------------------*
843 : * Regularization of Sx
844 : *-----------------------------------------------------------------*/
845 :
846 113120 : limit_fx = svd_s_buffer_fx[0];
847 113120 : move32();
848 113120 : limit_e = svd_s_buffer_e[0];
849 113120 : move16();
850 226800 : FOR( i = 1; i < lengthCx; i++ )
851 : {
852 113680 : IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) > 0 )
853 : {
854 0 : limit_fx = svd_s_buffer_fx[i];
855 0 : move32();
856 0 : limit_e = svd_s_buffer_e[i];
857 0 : move16();
858 : }
859 : }
860 :
861 113120 : L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx );
862 113120 : limit_e = add( limit_e, reg_Sx_e );
863 113120 : limit_fx = L_add( L_tmp, EPSILON_FX );
864 :
865 339920 : FOR( i = 0; i < lengthCx; ++i )
866 : {
867 226800 : IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) < 0 )
868 : {
869 49196 : svd_s_buffer_fx[i] = limit_fx;
870 49196 : move32();
871 49196 : svd_s_buffer_e[i] = limit_e;
872 49196 : move16();
873 : }
874 : }
875 :
876 :
877 113120 : limit_fx = 0;
878 113120 : move32();
879 113120 : limit_e = 0;
880 113120 : move16();
881 :
882 :
883 : /*-----------------------------------------------------------------*
884 : * regularized Kx-1
885 : *-----------------------------------------------------------------*/
886 :
887 339920 : FOR( i = 0; i < lengthCx; ++i )
888 : {
889 : Word16 scale, reg_fac_fx;
890 226800 : reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
891 226800 : scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
892 682080 : FOR( j = 0; j < lengthCx; ++j )
893 : {
894 455280 : Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
895 455280 : move32();
896 455280 : Kx_reg_inv_e[i + j * lengthCx] = scale;
897 455280 : move16();
898 : }
899 : }
900 :
901 : /*-----------------------------------------------------------------*
902 : * normalization matrix G hat
903 : *-----------------------------------------------------------------*/
904 :
905 : /* Computing Q*Cx*Q' */
906 :
907 113120 : matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
908 :
909 113120 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
910 :
911 :
912 688444 : FOR( i = 0; i < lengthCy; ++i )
913 : {
914 575324 : IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
915 : {
916 289583 : limit_fx = Cy_hat_diag_fx[i];
917 289583 : move32();
918 289583 : limit_e = Cy_hat_diag_e;
919 289583 : move16();
920 : }
921 : }
922 113120 : L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
923 113120 : limit_fx = L_add( L_tmp, EPSILON_FX );
924 113120 : limit_e = add( limit_e, reg_ghat_e );
925 :
926 688444 : FOR( i = 0; i < lengthCy; ++i )
927 : {
928 575324 : Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
929 575324 : move16();
930 575324 : IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_buff_e[i] ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
931 : {
932 210 : Cy_hat_diag_fx[i] = limit_fx;
933 210 : move32();
934 210 : Cy_hat_diag_buff_e[i] = limit_e;
935 210 : move16();
936 : }
937 :
938 575324 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
939 575324 : exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
940 575324 : L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
941 575324 : G_hat_fx[i] = L_tmp;
942 575324 : move32();
943 575324 : G_hat_buff_e[i] = exp;
944 575324 : move16();
945 : }
946 :
947 :
948 : /*-----------------------------------------------------------------*
949 : * Formulate optimal P
950 : *-----------------------------------------------------------------*/
951 :
952 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
953 :
954 : Word16 mat_mult_buffer1_fx_e;
955 :
956 : Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
957 :
958 113120 : matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
959 :
960 113120 : matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
961 :
962 113120 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
963 :
964 113120 : IF( LT_16( lengthCx, lengthCy ) )
965 : {
966 113120 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
967 113120 : nL = lengthCy;
968 113120 : move16();
969 113120 : nC = lengthCx;
970 113120 : move16();
971 113120 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
972 : }
973 : ELSE
974 : {
975 0 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
976 0 : nL = lengthCx;
977 0 : move16();
978 0 : nC = lengthCy;
979 0 : move16();
980 0 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
981 : }
982 :
983 : /* Actually Processing P */
984 :
985 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
986 : truncates V to num_input_channel columns */
987 :
988 113120 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
989 113120 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
990 :
991 113120 : mat_mult_buffer1_fx_e = 0;
992 113120 : move16();
993 113120 : mat_mult_buffer2_e = 0;
994 113120 : move16();
995 :
996 113120 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
997 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
998 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
999 :
1000 : /************************ Formulate M **********************/
1001 :
1002 113120 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
1003 :
1004 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
1005 :
1006 : Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1007 113120 : set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1008 :
1009 113120 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
1010 :
1011 : /*-----------------------------------------------------------------*
1012 : * Formulate Cr
1013 : *-----------------------------------------------------------------*/
1014 :
1015 : /* Compute Cy_tilde = M*Cx*M' */
1016 :
1017 :
1018 : Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
1019 113120 : set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
1020 113120 : matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
1021 :
1022 113120 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
1023 :
1024 113120 : exp = mixing_matrix_fx_e[0];
1025 113120 : move16();
1026 1156848 : FOR( i = 1; i < lengthCy * lengthCx; i++ )
1027 : {
1028 1043728 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1029 : {
1030 81545 : exp = mixing_matrix_fx_e[i];
1031 81545 : move16();
1032 : }
1033 : }
1034 :
1035 1269968 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1036 : {
1037 1156848 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1038 1156848 : move32();
1039 : }
1040 :
1041 113120 : mixing_matrix_e = exp;
1042 113120 : move16();
1043 :
1044 113120 : Cr_p_fx = Cr_fx;
1045 113120 : Cy_p_fx = Cy_fx;
1046 113120 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1047 : Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1048 688444 : FOR( i = 0; i < lengthCy; ++i )
1049 : {
1050 3533968 : FOR( j = 0; j < lengthCy; ++j )
1051 : {
1052 :
1053 2958644 : *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
1054 2958644 : move32();
1055 2958644 : Cr_p_fx++;
1056 2958644 : Cy_p_fx++;
1057 2958644 : Cy_tilde_p_fx++;
1058 : }
1059 :
1060 : /* Avoid Meaningless negative main diagonal elements */
1061 575324 : IF( Cr_fx[i + ( i * lengthCy )] < 0 )
1062 : {
1063 62266 : Cr_fx[i + ( i * lengthCy )] = 0;
1064 62266 : move32();
1065 62266 : Cr_e_arr[i + ( i * lengthCy )] = 0;
1066 62266 : move16();
1067 : }
1068 : }
1069 :
1070 113120 : exp = Cr_e_arr[0];
1071 113120 : move16();
1072 2958644 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1073 : {
1074 2845524 : if ( LT_16( exp, Cr_e_arr[i] ) )
1075 : {
1076 103413 : exp = Cr_e_arr[i];
1077 103413 : move16();
1078 : }
1079 : }
1080 :
1081 3071764 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1082 : {
1083 2958644 : Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
1084 2958644 : move32();
1085 : }
1086 :
1087 113120 : Cr_fx_e = exp;
1088 113120 : move16();
1089 :
1090 113120 : exp = mat_mult_buffer2_fx_e[0];
1091 113120 : move16();
1092 2958644 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1093 : {
1094 2845524 : if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
1095 : {
1096 156985 : exp = mat_mult_buffer2_fx_e[i];
1097 156985 : move16();
1098 : }
1099 : }
1100 :
1101 3071764 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1102 : {
1103 2958644 : mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
1104 2958644 : move32();
1105 : }
1106 :
1107 113120 : mat_mult_buffer2_e = exp;
1108 113120 : move16();
1109 :
1110 : /*-----------------------------------------------------------------*
1111 : * Energy Compensation
1112 : *-----------------------------------------------------------------*/
1113 :
1114 113120 : IF( EQ_16( energy_compensation_flag, 1 ) )
1115 : {
1116 22220 : adj_fx_p = svd_s_buffer_fx;
1117 22220 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1118 :
1119 135100 : FOR( i = 0; i < lengthCy; ++i )
1120 : {
1121 : /* Avoid correction for very small energies,
1122 : main diagonal elements of Cy_tilde_p may be negative */
1123 112880 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
1124 : {
1125 0 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1126 0 : move32();
1127 0 : adj_e[i] = 1;
1128 0 : move16();
1129 : }
1130 : ELSE
1131 : {
1132 112880 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
1133 112880 : exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
1134 112880 : L_tmp = L_deposit_h( tmp );
1135 112880 : L_tmp = Sqrt32( L_tmp, &exp );
1136 112880 : adj_fx_p[i] = L_tmp;
1137 112880 : move32();
1138 112880 : adj_e[i] = exp;
1139 112880 : move16();
1140 : }
1141 :
1142 112880 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
1143 112880 : IF( GT_32( adj_fx_p[i], temp ) )
1144 : {
1145 4 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1146 4 : move32();
1147 4 : adj_e[i] = 3;
1148 4 : move16();
1149 : }
1150 : }
1151 :
1152 22220 : exp = adj_e[0];
1153 22220 : move16();
1154 112880 : FOR( i = 1; i < lengthCy; i++ )
1155 : {
1156 90660 : if ( LT_16( exp, adj_e[i] ) )
1157 : {
1158 1050 : exp = adj_e[i];
1159 1050 : move16();
1160 : }
1161 : }
1162 :
1163 135100 : FOR( i = 0; i < lengthCy; i++ )
1164 : {
1165 112880 : adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
1166 112880 : move32();
1167 : }
1168 22220 : adj_fx_e = exp;
1169 22220 : move16();
1170 :
1171 22220 : diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1172 :
1173 22220 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
1174 22220 : mixing_matrix_e = mat_mult_buffer3_e;
1175 22220 : move16();
1176 : }
1177 :
1178 113120 : *mixing_matrix_out_e = mixing_matrix_e;
1179 113120 : move16();
1180 113120 : *Cr_e = Cr_fx_e;
1181 113120 : move16();
1182 113120 : pop_wmops();
1183 :
1184 113120 : return out;
1185 : }
1186 :
1187 : /*-------------------------------------------------------------------*
1188 : * computeMixingMatricesResidual()
1189 : *
1190 : * compute a residual mixing matrix using the covariance synthesis approach
1191 : *-------------------------------------------------------------------*/
1192 :
1193 90900 : Word16 computeMixingMatricesResidual_fx(
1194 : const Word32 num_outputs, /* i : number of output channels */
1195 : const Word32 *Cx_fx, /* i : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e) */
1196 : const Word16 Cx_e,
1197 : const Word32 *Cy_fx, /* i : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
1198 : const Word16 Cy_fx_e,
1199 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values Q(31-reg_Sx_e) */
1200 : const Word16 reg_Sx_e,
1201 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
1202 : const Word16 reg_ghat_e,
1203 : Word32 *mixing_matrix_fx, /* o : resulting residual mixing matrix Q(31-mixing_matrix_ret_e) */
1204 : Word16 *mixing_matrix_ret_e )
1205 : {
1206 : Word16 i, j;
1207 : Word16 out, lengthCx, lengthCy;
1208 90900 : out = EXIT_SUCCESS;
1209 90900 : move16();
1210 90900 : lengthCx = extract_l( num_outputs );
1211 90900 : lengthCy = extract_l( num_outputs );
1212 :
1213 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1214 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1215 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1216 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1217 :
1218 : Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1219 :
1220 90900 : push_wmops( "dirac_cov_mix_mat_r" );
1221 :
1222 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1223 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
1224 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1225 90900 : Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
1226 90900 : move16();
1227 :
1228 90900 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
1229 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1230 : Word32 L_tmp;
1231 : Word16 tmp_e;
1232 : Word16 tmp, scale;
1233 : Word32 div_tmp;
1234 : Word16 exp;
1235 :
1236 : Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
1237 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1238 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1239 : Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
1240 :
1241 : Word32 limit_fx;
1242 : Word16 limit_e;
1243 :
1244 : Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
1245 : Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
1246 :
1247 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1248 : Word16 Cy_hat_diag_e;
1249 :
1250 : Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
1251 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1252 : Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
1253 :
1254 : Word32 *adj_fx_p;
1255 : Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
1256 :
1257 : Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
1258 : Word16 Cy_tilde_e;
1259 :
1260 : /*-----------------------------------------------------------------*
1261 : * Decomposition of Cy
1262 : *-----------------------------------------------------------------*/
1263 :
1264 : /* Processing the SVD */
1265 :
1266 : /* linear array to svd buffer */
1267 90900 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
1268 :
1269 90900 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
1270 :
1271 : /* Computing Ky */
1272 90900 : Word16 max_e = -32;
1273 553344 : FOR( i = 0; i < lengthCy; ++i )
1274 : {
1275 2841348 : FOR( j = 0; j < lengthCy; ++j )
1276 : {
1277 2378904 : tmp_e = svd_s_buffer_e[j];
1278 2378904 : move16();
1279 2378904 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
1280 2378904 : Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
1281 2378904 : move32();
1282 2378904 : Ky_fx_e[i + j * lengthCy] = tmp_e;
1283 2378904 : move16();
1284 2378904 : max_e = s_max( max_e, tmp_e );
1285 : }
1286 : }
1287 :
1288 2469804 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
1289 : {
1290 2378904 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
1291 2378904 : move32();
1292 2378904 : Ky_fx_e[i] = max_e;
1293 2378904 : move16();
1294 : }
1295 :
1296 : /*-----------------------------------------------------------------*
1297 : * Decomposition of Cx
1298 : *-----------------------------------------------------------------*/
1299 :
1300 : /* Processing the SVD of Cx*/
1301 : /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
1302 : * would be just indicating the sorting index, so go straight to Kx as the
1303 : * square root of the diagonal of Cx */
1304 :
1305 : /* Computing Kx */
1306 90900 : max_e = -32;
1307 553344 : FOR( i = 0; i < lengthCx; ++i )
1308 : {
1309 462444 : exp = Cx_e;
1310 462444 : move16();
1311 462444 : Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
1312 462444 : move32();
1313 462444 : Kx_fx_e[i] = exp;
1314 462444 : move16();
1315 462444 : max_e = s_max( max_e, exp );
1316 : }
1317 :
1318 553344 : FOR( i = 0; i < lengthCx; ++i )
1319 : {
1320 462444 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
1321 462444 : move32();
1322 462444 : Kx_fx_e[i] = max_e;
1323 462444 : move16();
1324 : }
1325 :
1326 : /*-----------------------------------------------------------------*
1327 : * Regularization of Sx
1328 : *-----------------------------------------------------------------*/
1329 :
1330 90900 : limit_fx = Kx_fx[0];
1331 90900 : move32();
1332 :
1333 462444 : FOR( i = 1; i < lengthCx; i++ )
1334 : {
1335 371544 : IF( GT_32( Kx_fx[i], limit_fx ) )
1336 : {
1337 141665 : limit_fx = Kx_fx[i];
1338 141665 : move32();
1339 : }
1340 : }
1341 :
1342 90900 : L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
1343 90900 : L_tmp = L_add( L_tmp, EPSILLON_FX );
1344 90900 : limit_fx = L_tmp;
1345 90900 : move16();
1346 90900 : limit_e = add( Kx_fx_e[0], reg_Sx_e );
1347 :
1348 553344 : FOR( i = 0; i < lengthCx; ++i )
1349 : {
1350 462444 : IF( BASOP_Util_Cmp_Mant32Exp( Kx_fx[i], Kx_fx_e[i], limit_fx, limit_e ) > 0 )
1351 : {
1352 461375 : div_tmp = Kx_fx[i];
1353 461375 : move32();
1354 461375 : exp = Kx_fx_e[i];
1355 461375 : move16();
1356 : }
1357 : ELSE
1358 : {
1359 1069 : div_tmp = limit_fx;
1360 1069 : move32();
1361 1069 : exp = limit_e;
1362 1069 : move16();
1363 : }
1364 462444 : tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
1365 462444 : scale = add( scale, sub( Q1, exp ) );
1366 :
1367 462444 : Kx_reg_inv_fx[i] = L_deposit_h( tmp );
1368 462444 : move32();
1369 462444 : Kx_reg_inv_e[i] = scale;
1370 462444 : move16();
1371 : }
1372 :
1373 90900 : limit_fx = 0;
1374 90900 : move32();
1375 90900 : limit_e = 0;
1376 90900 : move16();
1377 :
1378 : /*-----------------------------------------------------------------*
1379 : * regularized Kx-1
1380 : *-----------------------------------------------------------------*/
1381 :
1382 : /*-----------------------------------------------------------------*
1383 : * normalization matrix G hat
1384 : *-----------------------------------------------------------------*/
1385 :
1386 : /* Computing Cy_hat_diag */
1387 90900 : Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
1388 90900 : Cy_hat_diag_e = Cx_e;
1389 90900 : move16();
1390 :
1391 553344 : FOR( i = 0; i < lengthCy; ++i )
1392 : {
1393 462444 : IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
1394 : {
1395 232531 : limit_fx = Cy_hat_diag_fx[i];
1396 232531 : move32();
1397 232531 : limit_e = Cy_hat_diag_e;
1398 232531 : move16();
1399 : }
1400 : }
1401 :
1402 90900 : L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
1403 90900 : limit_fx = L_add( L_tmp, EPSILON_FX );
1404 90900 : limit_e = add( limit_e, reg_ghat_e );
1405 :
1406 : /* Computing G_hat */
1407 553344 : FOR( i = 0; i < lengthCy; ++i )
1408 : {
1409 462444 : Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
1410 462444 : move16();
1411 462444 : IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_e ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1412 : {
1413 210 : Cy_hat_diag_fx[i] = limit_fx;
1414 210 : move32();
1415 210 : Cy_hat_diag_fx_e[i] = limit_e;
1416 210 : move16();
1417 : }
1418 462444 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
1419 462444 : scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
1420 462444 : L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
1421 :
1422 462444 : G_hat_fx[i] = L_tmp;
1423 462444 : move32();
1424 462444 : G_hat_e[i] = scale;
1425 462444 : move16();
1426 : }
1427 :
1428 : /*-----------------------------------------------------------------*
1429 : * Formulate optimal P
1430 : *-----------------------------------------------------------------*/
1431 :
1432 553344 : FOR( i = 0; i < num_outputs; i++ )
1433 : {
1434 462444 : L_tmp = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
1435 462444 : Kx_fx[i] = L_tmp;
1436 462444 : move32();
1437 462444 : Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
1438 462444 : move16();
1439 : }
1440 :
1441 553344 : FOR( i = 0; i < num_outputs; i++ )
1442 : {
1443 : Word32 fac_fx;
1444 462444 : fac_fx = Kx_fx[i];
1445 462444 : move32();
1446 :
1447 2841348 : FOR( j = 0; j < num_outputs; j++ )
1448 : {
1449 2378904 : L_tmp = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e))
1450 2378904 : mat_mult_buffer1_fx[i + j * num_outputs] = L_tmp;
1451 2378904 : move32();
1452 2378904 : mat_mult_buffer1_buff_e[i + j * num_outputs] = extract_l( L_add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] ) );
1453 2378904 : move16();
1454 : }
1455 : }
1456 :
1457 90900 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
1458 90900 : move16();
1459 2378904 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1460 : {
1461 2288004 : if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
1462 : {
1463 24305 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
1464 24305 : move16();
1465 : }
1466 : }
1467 :
1468 2469804 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1469 : {
1470 2378904 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
1471 2378904 : move32();
1472 : }
1473 :
1474 90900 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1475 :
1476 90900 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
1477 :
1478 : /* Actually Processing P */
1479 :
1480 90900 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1481 90900 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1482 90900 : mat_mult_buffer1_e = 0;
1483 90900 : move16();
1484 90900 : mat_mult_buffer2_e = 0;
1485 90900 : move16();
1486 :
1487 90900 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
1488 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1489 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1490 :
1491 : /*-----------------------------------------------------------------*
1492 : * Formulate M
1493 : *-----------------------------------------------------------------*/
1494 :
1495 :
1496 90900 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1497 90900 : set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1498 :
1499 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1500 :
1501 553344 : FOR( i = 0; i < num_outputs; i++ )
1502 : {
1503 : Word32 fac_fx;
1504 462444 : fac_fx = Kx_reg_inv_fx[i];
1505 462444 : move32();
1506 :
1507 2841348 : FOR( j = 0; j < num_outputs; j++ )
1508 : {
1509 2378904 : L_tmp = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e)
1510 2378904 : mixing_matrix_fx[j + i * num_outputs] = L_tmp;
1511 2378904 : move32();
1512 2378904 : mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
1513 2378904 : move16();
1514 : }
1515 : }
1516 :
1517 : /*-----------------------------------------------------------------*
1518 : * Formulate Cr
1519 : *-----------------------------------------------------------------*/
1520 :
1521 : /* Compute Cy_tilde = M*Cx*M' */
1522 :
1523 : Word16 Cx_e_arr[MAX_CICP_CHANNELS];
1524 90900 : set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
1525 :
1526 90900 : matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1527 :
1528 90900 : exp = mixing_matrix_fx_e[0];
1529 90900 : move16();
1530 2378904 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1531 : {
1532 2288004 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1533 : {
1534 9848 : exp = mixing_matrix_fx_e[i];
1535 9848 : move16();
1536 : }
1537 : }
1538 :
1539 2469804 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1540 : {
1541 2378904 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1542 2378904 : move32();
1543 : }
1544 90900 : mixing_matrix_e = exp;
1545 90900 : move16();
1546 :
1547 90900 : exp = mat_mult_buffer1_buff_e[0];
1548 90900 : move16();
1549 2378904 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1550 : {
1551 2288004 : if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
1552 : {
1553 9848 : exp = mat_mult_buffer1_buff_e[i];
1554 9848 : move16();
1555 : }
1556 : }
1557 :
1558 2469804 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1559 : {
1560 2378904 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
1561 2378904 : move32();
1562 : }
1563 90900 : mat_mult_buffer1_e = exp;
1564 90900 : move16();
1565 :
1566 90900 : matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
1567 :
1568 : /*-----------------------------------------------------------------*
1569 : * Energy Compensation
1570 : *-----------------------------------------------------------------*/
1571 :
1572 90900 : adj_fx_p = svd_s_buffer_fx;
1573 :
1574 553344 : FOR( i = 0; i < lengthCy; ++i )
1575 : {
1576 462444 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
1577 462444 : scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
1578 462444 : adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
1579 462444 : move32();
1580 462444 : adj_buff_e[i] = scale;
1581 462444 : move16();
1582 462444 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
1583 462444 : IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
1584 : {
1585 1 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1586 1 : move32();
1587 1 : adj_buff_e[i] = 3;
1588 1 : move16();
1589 : }
1590 : }
1591 :
1592 90900 : adj_e = adj_buff_e[0];
1593 90900 : move16();
1594 :
1595 462444 : FOR( i = 1; i < lengthCy; i++ )
1596 : {
1597 371544 : if ( LT_16( adj_e, adj_buff_e[i] ) )
1598 : {
1599 36546 : adj_e = adj_buff_e[i];
1600 36546 : move16();
1601 : }
1602 : }
1603 :
1604 :
1605 553344 : FOR( i = 0; i < lengthCy; i++ )
1606 : {
1607 462444 : adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
1608 462444 : move32();
1609 : }
1610 :
1611 90900 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1612 90900 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
1613 90900 : *mixing_matrix_ret_e = mat_mult_buffer3_e;
1614 90900 : move16();
1615 :
1616 90900 : pop_wmops();
1617 :
1618 90900 : return out;
1619 : }
1620 :
1621 :
1622 : /*-------------------------------------------------------------------*
1623 : * computeMixingMatricesISM()
1624 : *
1625 : *
1626 : *-------------------------------------------------------------------*/
1627 315180 : Word16 computeMixingMatricesISM_fx(
1628 : const Word16 num_inputs,
1629 : const Word16 num_responses,
1630 : const Word16 num_outputs,
1631 : const Word32 *responses_fx, /*Q(31-responses_e) */
1632 : const Word16 responses_e,
1633 : const Word32 *ener_fx, /*Q(31-ener_e) */
1634 : const Word16 ener_e,
1635 : const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
1636 : const Word16 Cx_diag_e,
1637 : const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
1638 : const Word16 Cy_diag_e,
1639 : const Word16 *Q_16fx, // Q15
1640 : const Word16 energy_compensation_flag,
1641 : const Word32 reg_Sx_fx, /*Q0*/
1642 : const Word32 reg_ghat_fx, /*Q0*/
1643 : Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
1644 : Word16 *mixing_matrix_e )
1645 : {
1646 : Word16 i, out;
1647 : Word16 lengthCx, lengthCy;
1648 : Word16 nL, nC;
1649 :
1650 : Word32 *Cy_tilde_p_fx;
1651 : Word32 *adj_fx;
1652 : Word32 limit_fx;
1653 :
1654 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1655 : Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1656 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1657 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1658 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1659 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1660 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
1661 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1662 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
1663 : Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
1664 :
1665 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1666 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1667 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
1668 : Word16 svd_s_buffer_fx_e;
1669 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1670 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1671 :
1672 : Word16 temp_e[MAX_OUTPUT_CHANNELS];
1673 315180 : push_wmops( "dirac_cov_mix_mat" );
1674 :
1675 315180 : out = EXIT_SUCCESS;
1676 315180 : move16();
1677 315180 : lengthCx = num_inputs;
1678 315180 : move16();
1679 315180 : lengthCy = num_outputs;
1680 315180 : move16();
1681 :
1682 6073020 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1683 : {
1684 5757840 : IF( EQ_16( Q_16fx[i], MAX_16 ) )
1685 : {
1686 2575200 : Q_fx[i] = MAX_32;
1687 2575200 : move32();
1688 : }
1689 : ELSE
1690 : {
1691 3182640 : Q_fx[i] = L_deposit_h( Q_16fx[i] );
1692 3182640 : move32();
1693 : }
1694 : }
1695 315180 : Q_e = 0;
1696 315180 : move16();
1697 315180 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
1698 5358060 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
1699 : {
1700 5042880 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1701 5042880 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1702 5042880 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1703 : }
1704 :
1705 : /* Decomposition of Cy = Ky*Ky' */
1706 : /* Ky = responses*diag(ener) */
1707 315180 : matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
1708 :
1709 : /* Decomposition of Cx -> Computing Kx */
1710 315180 : set16_fx( temp_e, Cx_diag_e, lengthCx );
1711 315180 : v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
1712 315180 : Kx_e = temp_e[0];
1713 315180 : move16();
1714 630360 : FOR( i = 1; i < lengthCx; i++ )
1715 : {
1716 315180 : Kx_e = s_max( Kx_e, temp_e[i] );
1717 : }
1718 945540 : FOR( i = 0; i < lengthCx; i++ )
1719 : {
1720 630360 : Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
1721 630360 : move32();
1722 : }
1723 :
1724 : /* Regularization of Sx */
1725 315180 : maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
1726 315180 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
1727 :
1728 945540 : FOR( i = 0; i < lengthCx; ++i )
1729 : {
1730 630360 : IF( GT_32( Kx_fx[i], limit_fx ) )
1731 : {
1732 595148 : svd_s_buffer_fx[i] = Kx_fx[i];
1733 595148 : move32();
1734 : }
1735 : ELSE
1736 : {
1737 35212 : svd_s_buffer_fx[i] = limit_fx;
1738 35212 : move32();
1739 : }
1740 : }
1741 315180 : svd_s_buffer_fx_e = Kx_e;
1742 315180 : move16();
1743 :
1744 315180 : limit_fx = 0;
1745 315180 : move32();
1746 :
1747 : /* regularized Kx-1 */
1748 :
1749 945540 : FOR( i = 0; i < lengthCx; ++i )
1750 : {
1751 630360 : IF( svd_s_buffer_fx[i] )
1752 : {
1753 : Word32 reg_fac;
1754 630356 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
1755 630356 : Kx_reg_inv_fx[i] = reg_fac;
1756 630356 : move32();
1757 630356 : temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
1758 630356 : move16();
1759 : }
1760 : ELSE
1761 : {
1762 : Word32 reg_fac;
1763 4 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
1764 4 : Kx_reg_inv_fx[i] = reg_fac;
1765 4 : move32();
1766 4 : temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
1767 4 : move16();
1768 : }
1769 : }
1770 315180 : Kx_reg_inv_e = temp_e[0];
1771 315180 : move16();
1772 630360 : FOR( i = 1; i < lengthCx; i++ )
1773 : {
1774 315180 : Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
1775 : }
1776 945540 : FOR( i = 0; i < lengthCx; i++ )
1777 : {
1778 630360 : Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
1779 630360 : move32();
1780 : }
1781 :
1782 : /************************ normalization matrix G hat **********************/
1783 :
1784 : /* Computing Q*Cx*Q' */
1785 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1786 : Word16 Cy_hat_diag_e;
1787 :
1788 315180 : matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
1789 :
1790 315180 : Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
1791 :
1792 6073020 : FOR( i = 0; i < lengthCy * lengthCx; ++i )
1793 : {
1794 5757840 : IF( GT_16( Q_Cx_e, Q_e ) )
1795 : {
1796 5757796 : Q_fx[i] = L_shr( Q_fx[i], guard_bits );
1797 5757796 : move32();
1798 : }
1799 : ELSE
1800 : {
1801 44 : Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
1802 44 : move32();
1803 : }
1804 : }
1805 :
1806 315180 : IF( GT_16( Q_Cx_e, Q_e ) )
1807 : {
1808 315178 : Q_e = add( Q_e, guard_bits );
1809 : }
1810 : ELSE
1811 : {
1812 2 : Q_Cx_e = add( Q_Cx_e, guard_bits );
1813 : }
1814 315180 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
1815 :
1816 : /* Computing Cy_hat_diag */
1817 3194100 : FOR( i = 0; i < lengthCy; ++i )
1818 : {
1819 2878920 : if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
1820 : {
1821 467378 : limit_fx = Cy_hat_diag_fx[i];
1822 467378 : move32();
1823 : }
1824 : }
1825 :
1826 315180 : limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
1827 :
1828 : /* Computing G_hat */
1829 3194100 : FOR( i = 0; i < lengthCy; ++i )
1830 : {
1831 2878920 : if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1832 : {
1833 13694 : Cy_hat_diag_fx[i] = limit_fx;
1834 13694 : move32();
1835 : }
1836 2878920 : IF( Cy_diag_fx[i] )
1837 : {
1838 1875173 : IF( Cy_hat_diag_fx[i] )
1839 : {
1840 1875173 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
1841 1875173 : move32();
1842 1875173 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
1843 1875173 : move16();
1844 1875173 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1845 1875173 : move32();
1846 : }
1847 : ELSE
1848 : {
1849 0 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1850 0 : move32();
1851 0 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1852 0 : move16();
1853 0 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1854 0 : move32();
1855 : }
1856 : }
1857 : ELSE
1858 : {
1859 1003747 : G_hat_fx[i] = 0;
1860 1003747 : move32();
1861 1003747 : temp_e[i] = 0;
1862 1003747 : move16();
1863 : }
1864 : }
1865 315180 : G_hat_e = temp_e[0];
1866 315180 : move16();
1867 2878920 : FOR( i = 1; i < lengthCy; i++ )
1868 : {
1869 2563740 : G_hat_e = s_max( G_hat_e, temp_e[i] );
1870 : }
1871 3194100 : FOR( i = 0; i < lengthCy; i++ )
1872 : {
1873 2878920 : G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
1874 2878920 : move32();
1875 : }
1876 :
1877 : /************************ Formulate optimal P **********************/
1878 :
1879 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
1880 315180 : diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1881 :
1882 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1883 :
1884 315180 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1885 :
1886 315180 : IF( LT_16( lengthCx, num_responses ) )
1887 : {
1888 3600 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
1889 :
1890 3600 : nL = num_responses;
1891 3600 : move16();
1892 3600 : nC = lengthCx;
1893 3600 : move16();
1894 3600 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1895 : }
1896 : ELSE
1897 : {
1898 311580 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
1899 :
1900 311580 : nL = lengthCx;
1901 311580 : move16();
1902 311580 : nC = num_responses;
1903 311580 : move16();
1904 311580 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1905 : }
1906 :
1907 : /* Actually Processing P */
1908 :
1909 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
1910 315180 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
1911 315180 : mat_mult_buffer1_e = 0;
1912 315180 : move16();
1913 315180 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1914 315180 : mat_mult_buffer2_e = 0;
1915 315180 : move16();
1916 :
1917 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1918 :
1919 : /************************ Formulate M **********************/
1920 :
1921 315180 : matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1922 :
1923 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
1924 :
1925 : /*********************** Energy Compensation ****************/
1926 :
1927 : /* Compute Cy_tilde = M*Cx*M' */
1928 315180 : matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1929 :
1930 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1931 :
1932 315180 : IF( EQ_16( energy_compensation_flag, 1 ) )
1933 : {
1934 315180 : adj_fx = svd_s_buffer_fx;
1935 315180 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1936 3194100 : FOR( i = 0; i < lengthCy; ++i )
1937 : {
1938 : /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
1939 2878920 : IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
1940 : {
1941 0 : adj_fx[i] = MAX_32;
1942 0 : move32();
1943 0 : temp_e[i] = 0;
1944 0 : move16();
1945 : }
1946 : ELSE
1947 : {
1948 2878920 : IF( Cy_diag_fx[i] )
1949 : {
1950 1875173 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
1951 : {
1952 1874498 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
1953 1874498 : move32();
1954 1874498 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
1955 1874498 : move16();
1956 1874498 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1957 1874498 : move32();
1958 : }
1959 : ELSE
1960 : {
1961 675 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1962 675 : move32();
1963 675 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1964 675 : move16();
1965 675 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1966 675 : move32();
1967 : }
1968 : }
1969 : ELSE
1970 : {
1971 1003747 : adj_fx[i] = 0;
1972 1003747 : move32();
1973 1003747 : temp_e[i] = 0;
1974 1003747 : move16();
1975 : }
1976 : }
1977 :
1978 2878920 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
1979 2878920 : IF( GT_32( adj_fx[i], temp ) )
1980 : {
1981 5518 : adj_fx[i] = MAX_32;
1982 5518 : move32();
1983 5518 : temp_e[i] = 2;
1984 5518 : move16();
1985 : }
1986 : }
1987 315180 : adj_e = temp_e[0];
1988 315180 : move16();
1989 2878920 : FOR( i = 1; i < lengthCy; i++ )
1990 : {
1991 2563740 : adj_e = s_max( adj_e, temp_e[i] );
1992 : }
1993 3194100 : FOR( i = 0; i < lengthCy; i++ )
1994 : {
1995 2878920 : adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
1996 2878920 : move32();
1997 : }
1998 :
1999 315180 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
2000 :
2001 315180 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
2002 315180 : *mixing_matrix_e = mat_mult_buffer3_e;
2003 315180 : move16();
2004 : }
2005 :
2006 315180 : pop_wmops();
2007 :
2008 315180 : return out;
2009 : }
|