Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 :
34 : #include "basop_util.h"
35 : #include "enh32.h"
36 : #include <stdint.h>
37 : #include <string.h>
38 : #include <stdio.h>
39 : #include <stdlib.h>
40 : #include <assert.h>
41 : #include <math.h>
42 : #include "options.h"
43 : #include "cnst.h"
44 : #include "rom_enc.h"
45 : #include "rom_com.h"
46 : #include "prot_fx.h"
47 : #include "ivas_stat_dec.h"
48 : #include "ivas_cnst.h"
49 : #include "ivas_rom_com.h"
50 : #include "ivas_rom_dec.h"
51 : #include "wmc_auto.h"
52 : #include "rom_dec.h"
53 : #include "ivas_prot_fx.h"
54 :
55 : /*-----------------------------------------------------------------------*
56 : * Local constants
57 : *-----------------------------------------------------------------------*/
58 :
59 : #define SQRT_EPSILON_FX 68 /* Q31 square root of EPSILON */
60 :
61 : /*-------------------------------------------------------------------*
62 : * ivas_dirac_dec_output_synthesis_cov_open()
63 : *
64 : * Sets up the state and parameters for the Covariance Synthesis
65 : *-------------------------------------------------------------------*/
66 :
67 341 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
68 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
69 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state */
70 : const Word16 max_band_decorr, /* i : uppermost frequency band where decorrelation is applied */
71 : const Word16 interp_length, /* i : length for interpolating the mixing matrices in time slots */
72 : const Word16 num_param_bands, /* i : number of parameter bands */
73 : const Word16 num_param_bands_residual, /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
74 : const Word16 nchan_in, /* i : number of input (transport) channels */
75 : const Word16 nchan_out, /* i : number of output channels */
76 : const Word32 *proto_matrix /* i : the prototype (upmix) matrix (only used if mode == 1) Q(15-proto_matrix_e) */
77 : )
78 : {
79 : Word16 idx;
80 :
81 341 : h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
82 341 : move16();
83 :
84 : /*-----------------------------------------------------------------*
85 : * memory allocation
86 : *-----------------------------------------------------------------*/
87 :
88 : /* buffer length and interpolator */
89 341 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
90 341 : IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
91 : {
92 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
93 : }
94 341 : h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
95 341 : move16();
96 : /* cov buffers */
97 4445 : FOR( idx = 0; idx < num_param_bands; idx++ )
98 : {
99 4104 : IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
100 : {
101 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
102 : }
103 4104 : h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
104 4104 : IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
105 : {
106 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
107 : }
108 4104 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
109 : {
110 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
111 : }
112 4104 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
113 4104 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
114 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
115 :
116 4104 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
117 : {
118 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
119 : }
120 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
121 4104 : h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
122 4104 : move16();
123 : }
124 16697 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
125 : {
126 16356 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
127 16356 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
128 16356 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
129 16356 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
130 : }
131 :
132 3563 : FOR( idx = 0; idx < num_param_bands_residual; idx++ )
133 : {
134 3222 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
135 : {
136 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
137 : }
138 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
139 :
140 3222 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
141 : {
142 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
143 : }
144 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
145 3222 : h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
146 3222 : move16();
147 : }
148 17579 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
149 : {
150 17238 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
151 17238 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
152 : }
153 :
154 341 : IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
155 : {
156 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
157 : }
158 341 : IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
159 : {
160 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
161 : }
162 :
163 341 : set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
164 341 : set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
165 :
166 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
167 : {
168 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
169 : }
170 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
171 :
172 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
173 : {
174 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
175 : }
176 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
177 :
178 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
179 : {
180 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
181 : }
182 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
183 :
184 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
185 : {
186 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
187 : }
188 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
189 :
190 : /*-----------------------------------------------------------------*
191 : * prepare processing parameters
192 : *-----------------------------------------------------------------*/
193 :
194 : /* compute interpolator */
195 341 : IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
196 : {
197 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
198 : }
199 11253 : FOR( idx = 1; idx <= interp_length; ++idx )
200 : {
201 10912 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
202 10912 : move16();
203 : }
204 341 : Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
205 341 : h_dirac_output_synthesis_params->proto_matrix_e = 5;
206 341 : move16();
207 341 : return IVAS_ERR_OK;
208 : }
209 :
210 :
211 : /*-------------------------------------------------------------------*
212 : * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
213 : *
214 : *
215 : *-------------------------------------------------------------------*/
216 :
217 328605 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
218 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
219 : const UWord16 interp_length /* i : interpolator length */
220 : )
221 : {
222 : Word16 idx;
223 328605 : Word16 tmp, exp_diff = 0;
224 328605 : move16();
225 :
226 1633254 : FOR( idx = 1; idx <= interp_length; ++idx )
227 : {
228 1304649 : tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
229 1304649 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff ); // Q15
230 1304649 : move16();
231 : }
232 :
233 328605 : return;
234 : }
235 :
236 :
237 : /*-------------------------------------------------------------------*
238 : * ivas_dirac_dec_output_synthesis_cov_init()
239 : *
240 : * initialize the states for the covariance synthesis
241 : *-------------------------------------------------------------------*/
242 341 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
243 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis */
244 : const Word16 nchan_in, /* i : number of input (tranport) channels */
245 : const Word16 nchan_out, /* i : number of output channels */
246 : const Word16 n_param_bands, /* i : number of total parameter bands */
247 : const Word16 n_param_bands_res /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
248 : )
249 : {
250 :
251 : Word16 idx;
252 :
253 : /* initialize buffers */
254 4445 : FOR( idx = 0; idx < n_param_bands; idx++ )
255 : {
256 4104 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
257 4104 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
258 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
259 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
260 : }
261 :
262 3563 : FOR( idx = 0; idx < n_param_bands_res; idx++ )
263 : {
264 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
265 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
266 : }
267 :
268 :
269 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
270 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
271 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
272 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
273 :
274 341 : return;
275 : }
276 :
277 :
278 : /*-------------------------------------------------------------------*
279 : * ivas_dirac_dec_output_synthesis_cov_close()
280 : *
281 : * deallocate dynamic memory in the covariance synthesis state
282 : *-------------------------------------------------------------------*/
283 :
284 341 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
285 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i : handle for the covariance synthesis parameters */
286 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state */
287 : )
288 : {
289 : Word16 idx;
290 :
291 : /*-----------------------------------------------------------------*
292 : * memory deallocation
293 : *-----------------------------------------------------------------*/
294 :
295 : /* free interpolator */
296 341 : IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
297 : {
298 341 : free( h_dirac_output_synthesis_params->interpolator_fx );
299 341 : h_dirac_output_synthesis_params->interpolator_fx = NULL;
300 : }
301 :
302 : /* free alpha */
303 341 : IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
304 : {
305 0 : free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
306 0 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
307 : }
308 :
309 : /* free proto_matrix */
310 341 : IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
311 : {
312 341 : free( h_dirac_output_synthesis_params->proto_matrix_fx );
313 341 : h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
314 : }
315 :
316 :
317 341 : IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
318 : {
319 341 : free( h_dirac_output_synthesis_state->cx_old_e );
320 341 : h_dirac_output_synthesis_state->cx_old_e = NULL;
321 : }
322 341 : IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
323 : {
324 341 : free( h_dirac_output_synthesis_state->cy_old_e );
325 341 : h_dirac_output_synthesis_state->cy_old_e = NULL;
326 : }
327 :
328 : /* free cov buffers */
329 20801 : FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
330 : {
331 20460 : IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
332 : {
333 4104 : free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
334 4104 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
335 : }
336 :
337 20460 : IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
338 : {
339 4104 : free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
340 4104 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
341 : }
342 :
343 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
344 : {
345 4104 : free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
346 4104 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
347 : }
348 :
349 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
350 : {
351 3222 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
352 3222 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
353 : }
354 :
355 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
356 : {
357 4104 : free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
358 4104 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
359 : }
360 :
361 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
362 : {
363 3222 : free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
364 3222 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
365 : }
366 : }
367 :
368 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
369 : {
370 341 : free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
371 341 : h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
372 : }
373 :
374 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
375 : {
376 341 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
377 341 : h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
378 : }
379 :
380 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
381 : {
382 341 : free( h_dirac_output_synthesis_state->mixing_matrix_exp );
383 341 : h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
384 : }
385 :
386 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
387 : {
388 341 : free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
389 341 : h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
390 : }
391 341 : return;
392 : }
393 :
394 : /*-------------------------------------------------------------------*
395 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
396 : *
397 : * collect the multi channel input covariance for one filter bank time slot
398 : *-------------------------------------------------------------------*/
399 :
400 : /*-------------------------------------------------------------------*
401 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
402 : *
403 : * collect the multi channel input covariance for one filter bank time slot
404 : *-------------------------------------------------------------------*/
405 2084335 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
406 : Word32 *RealBuffer_fx, /* i : input channel filter bank samples (real part) Q(31- RealBuffer_e)*/
407 : Word16 RealBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
408 : Word32 *ImagBuffer_fx, /* i : input channel filter bank samples (imaginary part Q(ImagBuffer_e)*/
409 : Word16 ImagBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
410 : Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (real part) Q(31- cx_e)*/
411 : Word16 *cx_e, /* i : exponent for accumulated input covariance (real part) */
412 : Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (imaginary part) Q(31- cx_imag_e)*/
413 : Word16 *cx_imag_e, /* i : exponent accumulated input covariance (imag part) */
414 : PARAM_MC_DEC_HANDLE hParamMC, /* i : handle to Parametric MC state */
415 : const Word16 param_band, /* i : parameter band */
416 : const Word16 nchan_in /* i : number of input channels */
417 : )
418 : {
419 : Word16 band_idx, ch_idx;
420 : Word16 brange[2];
421 : Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
422 : Word16 real_in_e;
423 : Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
424 : Word16 imag_in_e;
425 : Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
426 : Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
427 : Word16 output_e;
428 : Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
429 : Word16 band, num_bands;
430 : Word16 cx_fx_norm, cx_imag_fx_norm;
431 : /* estimate input covariance */
432 : /* Already stack here instead of in the process_subframe */
433 :
434 : /* collect input frame */
435 2084335 : brange[0] = hParamMC->band_grouping[param_band];
436 2084335 : move16();
437 2084335 : brange[1] = hParamMC->band_grouping[param_band + 1];
438 2084335 : move16();
439 2084335 : num_bands = sub( brange[1], brange[0] );
440 :
441 10914375 : FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
442 : {
443 8830040 : band = add( brange[0], band_idx );
444 26558880 : FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
445 : {
446 17728840 : real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
447 17728840 : move32();
448 17728840 : imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
449 17728840 : move32();
450 : }
451 : }
452 :
453 2084335 : real_in_e = RealBuffer_e;
454 2084335 : move16();
455 2084335 : imag_in_e = ImagBuffer_e;
456 2084335 : move16();
457 :
458 2084335 : Word16 buf_len = imult1616( num_bands, nchan_in );
459 :
460 2084335 : shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
461 2084335 : shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
462 :
463 2084335 : real_in_e = sub( real_in_e, shift_real );
464 2084335 : imag_in_e = sub( imag_in_e, shift_imag );
465 :
466 :
467 2084335 : output_e = s_max( real_in_e, imag_in_e );
468 :
469 2084335 : scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
470 2084335 : scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
471 :
472 2084335 : cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
473 2084335 : v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
474 :
475 2084335 : v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
476 :
477 2084335 : cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
478 2084335 : cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
479 :
480 2084335 : scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
481 2084335 : scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
482 :
483 2084335 : *cx_e = sub( tmp1_e, cx_fx_norm );
484 2084335 : move16();
485 2084335 : *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
486 2084335 : move16();
487 :
488 2084335 : return;
489 : }
490 :
491 : /*-------------------------------------------------------------------*
492 : * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
493 : *
494 : * synthesize one filter bank slot of multi channel output filter bank
495 : * samples with the covariance synthesis
496 : *-------------------------------------------------------------------*/
497 :
498 163667 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
499 : Word32 *Cldfb_RealBuffer_in_fx, /*Q6*/
500 : Word32 *Cldfb_ImagBuffer_in_fx, /*Q6*/
501 : Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (real part) Q6*/
502 : Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (imaginary part) Q6*/
503 : Word32 *mixing_matrix_fx[], /* i : parameter band wise mixing matrices (direct part) Q(31-mixing_matrix_e)*/
504 : Word16 *mixing_matrix_e, /* i : parameter band wise mixing matrices (direct part) */
505 : Word32 *mixing_matrix_res_fx[], /* i : parameter band wise mixing matrices (residual part) Q(31-mixing_matrix_res_e)*/
506 : Word16 *mixing_matrix_res_e, /* i : parameter band wise mixing matrices (residual part) */
507 : const UWord16 slot_idx_sfr, /* i : time slot index for the current slot within the current subframe */
508 : const UWord16 slot_idx_tot, /* i : time slot index for the current slot within the frame */
509 : const Word16 nX, /* i : number of input channels */
510 : const Word16 nY, /* i : number of output channels */
511 : PARAM_MC_DEC_HANDLE hParamMC /* i : handle to the Parametric MC decoder state */
512 : )
513 : {
514 : Word16 param_band_idx, band, ch_idx;
515 : Word16 have_residual;
516 : Word16 brange[2];
517 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
518 : Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
519 : Word16 mixing_matrix_smooth_e;
520 : Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
521 163667 : Word16 mixing_matrix_res_smooth_e = 0;
522 163667 : move16();
523 : Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
524 : Word16 mixing_matrix_buffer_e;
525 : Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
526 : Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
527 :
528 : Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
529 : Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
530 :
531 163667 : h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
532 :
533 163667 : set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
534 163667 : set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
535 :
536 163667 : set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
537 163667 : set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
538 :
539 2288286 : FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
540 : {
541 : /* final mixing */
542 2124619 : have_residual = 0;
543 2124619 : move16();
544 2124619 : brange[0] = hParamMC->band_grouping[param_band_idx];
545 2124619 : move16();
546 2124619 : brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
547 2124619 : move16();
548 :
549 2124619 : if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
550 : {
551 1709608 : have_residual = 1;
552 1709608 : move16();
553 : }
554 :
555 2124619 : v_multc_fixed( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
556 2124619 : mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
557 2124619 : move16();
558 :
559 2124619 : v_multc_fixed( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
560 2124619 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
561 2124619 : move16();
562 :
563 2124619 : v_add_fixed_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
564 :
565 2124619 : IF( have_residual )
566 : {
567 : /* residual mixing matrix interpolation*/
568 :
569 1709608 : v_multc_fixed( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
570 1709608 : mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
571 1709608 : move16();
572 :
573 1709608 : set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
574 1709608 : v_multc_fixed( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
575 1709608 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
576 1709608 : move16();
577 :
578 1709608 : v_add_fixed_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
579 : }
580 :
581 :
582 11119299 : FOR( band = brange[0]; band < brange[1]; band++ )
583 : {
584 8994680 : assert( band >= 0 );
585 :
586 8994680 : IF( have_residual )
587 : {
588 : /* collect diffuse prototypes */
589 3273340 : assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
590 23265500 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
591 : {
592 19992160 : diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
593 19992160 : move32();
594 19992160 : diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
595 19992160 : move32();
596 : }
597 :
598 : /* apply residual mixing */
599 : {
600 : Word16 shifter;
601 3273340 : shifter = sub( mixing_matrix_res_smooth_e, 32 );
602 23265500 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
603 : {
604 : int i;
605 : Word16 idx;
606 : Word64 temp_real, temp_imag;
607 :
608 :
609 19992160 : idx = ch_idx;
610 19992160 : temp_real = 0;
611 19992160 : temp_imag = 0;
612 19992160 : move64();
613 19992160 : move64();
614 143222880 : for ( i = 0; i < nY; i++ )
615 : {
616 123230720 : temp_real = W_mac_32_32( temp_real, mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] );
617 123230720 : temp_imag = W_mac_32_32( temp_imag, mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] );
618 123230720 : idx += nY;
619 : }
620 19992160 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
621 19992160 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
622 : }
623 : }
624 : }
625 : ELSE
626 : {
627 40612700 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
628 : {
629 34891360 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
630 34891360 : move32();
631 34891360 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
632 34891360 : move32();
633 : }
634 : }
635 :
636 : /* collect input signals, still in cldfb buffers */
637 27060840 : FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
638 : {
639 :
640 18066160 : input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
641 18066160 : move32();
642 18066160 : input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
643 18066160 : move32();
644 : }
645 :
646 : /* apply mixing matrix */
647 : {
648 : Word16 shifter;
649 8994680 : shifter = 31 - mixing_matrix_smooth_e;
650 :
651 63878200 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
652 : {
653 : int i;
654 : Word16 idx;
655 : Word64 temp_real, temp_imag;
656 :
657 :
658 54883520 : idx = ch_idx;
659 54883520 : temp_real = 0;
660 54883520 : temp_imag = 0;
661 54883520 : move64();
662 54883520 : move64();
663 165495360 : for ( i = 0; i < nX; i++ )
664 : {
665 110611840 : temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
666 110611840 : temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
667 110611840 : idx += nY;
668 : }
669 54883520 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
670 54883520 : move32();
671 54883520 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
672 54883520 : move32();
673 : }
674 : }
675 : }
676 : }
677 :
678 163667 : return;
679 : }
680 :
681 : /*-------------------------------------------------------------------*
682 : * computeMixingMatrices()
683 : *
684 : * compute a mixing matrix using the convariance synthesis approach
685 : *-------------------------------------------------------------------*/
686 :
687 132787 : Word16 computeMixingMatrices_fx(
688 : const Word16 num_inputs, /* i : number of input channels */
689 : const Word16 num_outputs, /* i : number of output channels */
690 : const Word32 *Cx, /* i : input channel covariance matrix Q(31-Cx_e) */
691 : Word16 Cx_e,
692 : const Word32 *Cy, /* i : target covariance matrix Q(31-Cy_e) */
693 : Word16 Cy_e,
694 : const Word32 *Q, /* i : prototype matrix (usually a upmix matrix) Q_fx_e */
695 : Word16 Q_fx_e,
696 : const Word16 energy_compensation_flag, /* i : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
697 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values */
698 : Word16 reg_Sx_e,
699 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
700 : Word16 reg_ghat_e,
701 : Word32 *mixing_matrix_fx, /* o : resulting mixing matrix Q(31-mixing_matrix_out_e) */
702 : Word16 *mixing_matrix_out_e,
703 : Word32 *Cr_fx, /* o : residual covariance matrix Q(31-Cr_e) */
704 : Word16 *Cr_e )
705 : {
706 : Word16 i, j;
707 : Word16 out;
708 : Word16 nL, nC;
709 132787 : Word16 lengthCx = num_inputs;
710 132787 : Word16 lengthCy = num_outputs;
711 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
712 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
713 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
714 : Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
715 : Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
716 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
717 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
718 : Word16 Cx_fx_e;
719 : Word16 Cy_fx_e;
720 :
721 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
722 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
723 :
724 :
725 : Word32 limit_fx;
726 : Word16 limit_e;
727 :
728 : Word32 L_tmp;
729 : Word16 tmp_e, tmp, exp;
730 :
731 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
732 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
733 :
734 : Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
735 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
736 :
737 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
738 : Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
739 :
740 : Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
741 : Word16 Q_e;
742 :
743 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
744 : Word16 Q_Cx_e;
745 :
746 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
747 : Word16 Cy_hat_diag_e;
748 : Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
749 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
750 : Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
751 :
752 : Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
753 :
754 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
755 :
756 : Word16 mixing_matrix_e;
757 :
758 : Word16 Cr_fx_e;
759 :
760 :
761 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
762 : Word16 adj_e[MAX_OUTPUT_CHANNELS];
763 : Word32 *adj_fx_p;
764 : Word16 adj_fx_e;
765 :
766 : Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
767 132787 : push_wmops( "dirac_cov_mix_mat" );
768 :
769 132787 : out = EXIT_SUCCESS;
770 132787 : move16();
771 :
772 132787 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
773 2257379 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
774 : {
775 2124592 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
776 2124592 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
777 2124592 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
778 : }
779 :
780 :
781 132787 : Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
782 132787 : Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
783 132787 : Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
784 :
785 132787 : Q_e = Q_fx_e;
786 132787 : move16();
787 132787 : Cx_fx_e = Cx_e;
788 132787 : move16();
789 132787 : Cy_fx_e = Cy_e;
790 132787 : move16();
791 :
792 :
793 : /*-----------------------------------------------------------------*
794 : * Decomposition of Cy
795 : *-----------------------------------------------------------------*/
796 :
797 : /* Processing the SVD */
798 :
799 132787 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
800 :
801 132787 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
802 132787 : Word16 max_e = -32;
803 : /* Computing Ky */
804 812472 : FOR( i = 0; i < lengthCy; ++i )
805 : {
806 4209670 : FOR( j = 0; j < lengthCy; ++j )
807 : {
808 3529985 : tmp_e = svd_s_buffer_e[j];
809 3529985 : move16();
810 3529985 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
811 3529985 : Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
812 3529985 : move32();
813 3529985 : Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
814 3529985 : move16();
815 3529985 : max_e = s_max( max_e, tmp_e );
816 : }
817 : }
818 3662772 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
819 : {
820 3529985 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
821 3529985 : move32();
822 3529985 : Ky_fx_e[i] = max_e;
823 3529985 : move16();
824 : }
825 :
826 : /*-----------------------------------------------------------------*
827 : * Decomposition of Cx
828 : *-----------------------------------------------------------------*/
829 :
830 : /* Processing the SVD */
831 :
832 132787 : mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
833 :
834 132787 : svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
835 132787 : max_e = -32;
836 399721 : FOR( i = 0; i < lengthCx; ++i )
837 : {
838 804882 : FOR( j = 0; j < lengthCx; ++j )
839 : {
840 537948 : tmp_e = svd_s_buffer_e[j];
841 537948 : move16();
842 537948 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
843 537948 : Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
844 537948 : move32();
845 537948 : Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
846 537948 : move16();
847 537948 : max_e = s_max( max_e, tmp_e );
848 : }
849 : }
850 670735 : FOR( i = 0; i < lengthCx * lengthCx; ++i )
851 : {
852 537948 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
853 537948 : move32();
854 537948 : Kx_fx_e[i] = max_e;
855 537948 : move16();
856 : }
857 :
858 399721 : FOR( i = 0; i < lengthCx; ++i )
859 : {
860 266934 : tmp_e = svd_s_buffer_e[i];
861 266934 : move16();
862 266934 : svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
863 266934 : move32();
864 266934 : svd_s_buffer_e[i] = tmp_e;
865 266934 : move16();
866 : }
867 :
868 : /*-----------------------------------------------------------------*
869 : * Regularization of Sx
870 : *-----------------------------------------------------------------*/
871 :
872 132787 : limit_fx = svd_s_buffer_fx[0];
873 132787 : move32();
874 132787 : limit_e = svd_s_buffer_e[0];
875 132787 : move16();
876 266934 : FOR( i = 1; i < lengthCx; i++ )
877 : {
878 134147 : IF( GT_32( svd_s_buffer_fx[i], L_shl_sat( limit_fx, sub( limit_e, svd_s_buffer_e[i] ) ) ) )
879 : {
880 0 : limit_fx = svd_s_buffer_fx[i];
881 0 : move32();
882 0 : limit_e = svd_s_buffer_e[i];
883 0 : move16();
884 : }
885 : }
886 :
887 132787 : limit_e = add( limit_e, reg_Sx_e );
888 132787 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx );
889 132787 : IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, SQRT_EPSILON_FX /* Q31 */, 0 ) < 0 )
890 : {
891 0 : limit_fx = SQRT_EPSILON_FX;
892 0 : move32();
893 0 : limit_e = 0;
894 0 : move16();
895 : }
896 :
897 399721 : FOR( i = 0; i < lengthCx; ++i )
898 : {
899 266934 : IF( LT_32( L_shl_sat( svd_s_buffer_fx[i], sub( svd_s_buffer_e[i], limit_e ) ), limit_fx ) )
900 : {
901 58524 : svd_s_buffer_fx[i] = limit_fx;
902 58524 : move32();
903 58524 : svd_s_buffer_e[i] = limit_e;
904 58524 : move16();
905 : }
906 : }
907 :
908 :
909 132787 : limit_fx = 0;
910 132787 : move32();
911 132787 : limit_e = 0;
912 132787 : move16();
913 :
914 :
915 : /*-----------------------------------------------------------------*
916 : * regularized Kx-1
917 : *-----------------------------------------------------------------*/
918 :
919 399721 : FOR( i = 0; i < lengthCx; ++i )
920 : {
921 : Word16 scale, reg_fac_fx;
922 266934 : reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
923 266934 : scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
924 804882 : FOR( j = 0; j < lengthCx; ++j )
925 : {
926 537948 : Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
927 537948 : move32();
928 537948 : Kx_reg_inv_e[i + j * lengthCx] = scale;
929 537948 : move16();
930 : }
931 : }
932 :
933 : /*-----------------------------------------------------------------*
934 : * normalization matrix G hat
935 : *-----------------------------------------------------------------*/
936 :
937 : /* Computing Q*Cx*Q' */
938 :
939 132787 : matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
940 :
941 132787 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
942 :
943 :
944 132787 : Word16 com_e = sub( limit_e, Cy_hat_diag_e );
945 812472 : FOR( i = 0; i < lengthCy; ++i )
946 : {
947 679685 : IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
948 : {
949 679685 : limit_fx = Cy_hat_diag_fx[i];
950 679685 : move32();
951 679685 : limit_e = Cy_hat_diag_e;
952 679685 : move16();
953 : }
954 : }
955 132787 : limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
956 132787 : limit_e = add( limit_e, reg_ghat_e );
957 :
958 132787 : com_e = sub( Cy_hat_diag_e, limit_e );
959 812472 : FOR( i = 0; i < lengthCy; ++i )
960 : {
961 679685 : Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
962 679685 : move16();
963 :
964 679685 : IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) )
965 : {
966 15 : Cy_hat_diag_fx[i] = limit_fx;
967 15 : move32();
968 15 : Cy_hat_diag_buff_e[i] = limit_e;
969 15 : move16();
970 : }
971 :
972 679685 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
973 679685 : exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
974 679685 : L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
975 679685 : G_hat_fx[i] = L_tmp;
976 679685 : move32();
977 679685 : G_hat_buff_e[i] = exp;
978 679685 : move16();
979 : }
980 :
981 :
982 : /*-----------------------------------------------------------------*
983 : * Formulate optimal P
984 : *-----------------------------------------------------------------*/
985 :
986 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
987 :
988 : Word16 mat_mult_buffer1_fx_e;
989 :
990 : Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
991 :
992 132787 : matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
993 :
994 132787 : matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
995 :
996 132787 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
997 :
998 132787 : IF( LT_16( lengthCx, lengthCy ) )
999 : {
1000 132787 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
1001 132787 : nL = lengthCy;
1002 132787 : move16();
1003 132787 : nC = lengthCx;
1004 132787 : move16();
1005 132787 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1006 : }
1007 : ELSE
1008 : {
1009 0 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1010 0 : nL = lengthCx;
1011 0 : move16();
1012 0 : nC = lengthCy;
1013 0 : move16();
1014 0 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1015 : }
1016 :
1017 : /* Actually Processing P */
1018 :
1019 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
1020 : truncates V to num_input_channel columns */
1021 :
1022 132787 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1023 132787 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1024 :
1025 132787 : mat_mult_buffer1_fx_e = 0;
1026 132787 : move16();
1027 132787 : mat_mult_buffer2_e = 0;
1028 132787 : move16();
1029 :
1030 132787 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
1031 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1032 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1033 :
1034 : /************************ Formulate M **********************/
1035 :
1036 132787 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
1037 :
1038 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
1039 :
1040 : Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1041 132787 : set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1042 :
1043 132787 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
1044 :
1045 : /*-----------------------------------------------------------------*
1046 : * Formulate Cr
1047 : *-----------------------------------------------------------------*/
1048 :
1049 : /* Compute Cy_tilde = M*Cx*M' */
1050 :
1051 :
1052 : Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
1053 132787 : set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
1054 132787 : matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
1055 :
1056 132787 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
1057 :
1058 132787 : exp = mixing_matrix_fx_e[0];
1059 132787 : move16();
1060 1372340 : FOR( i = 1; i < lengthCy * lengthCx; i++ )
1061 : {
1062 1239553 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1063 : {
1064 97373 : exp = mixing_matrix_fx_e[i];
1065 97373 : move16();
1066 : }
1067 : }
1068 :
1069 1505127 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1070 : {
1071 1372340 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1072 1372340 : move32();
1073 : }
1074 :
1075 132787 : mixing_matrix_e = exp;
1076 132787 : move16();
1077 :
1078 132787 : Cr_p_fx = Cr_fx;
1079 132787 : Cy_p_fx = Cy_fx;
1080 132787 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1081 : Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1082 812472 : FOR( i = 0; i < lengthCy; ++i )
1083 : {
1084 4209670 : FOR( j = 0; j < lengthCy; ++j )
1085 : {
1086 :
1087 3529985 : *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
1088 3529985 : move32();
1089 3529985 : Cr_p_fx++;
1090 3529985 : Cy_p_fx++;
1091 3529985 : Cy_tilde_p_fx++;
1092 : }
1093 :
1094 : /* Avoid Meaningless negative main diagonal elements */
1095 679685 : IF( Cr_fx[i + ( i * lengthCy )] < 0 )
1096 : {
1097 76181 : Cr_fx[i + ( i * lengthCy )] = 0;
1098 76181 : move32();
1099 76181 : Cr_e_arr[i + ( i * lengthCy )] = 0;
1100 76181 : move16();
1101 : }
1102 : }
1103 :
1104 132787 : exp = Cr_e_arr[0];
1105 132787 : move16();
1106 3529985 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1107 : {
1108 3397198 : if ( LT_16( exp, Cr_e_arr[i] ) )
1109 : {
1110 126463 : exp = Cr_e_arr[i];
1111 126463 : move16();
1112 : }
1113 : }
1114 :
1115 3662772 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1116 : {
1117 3529985 : Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
1118 3529985 : move32();
1119 : }
1120 :
1121 132787 : Cr_fx_e = exp;
1122 132787 : move16();
1123 :
1124 132787 : exp = mat_mult_buffer2_fx_e[0];
1125 132787 : move16();
1126 3529985 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1127 : {
1128 3397198 : if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
1129 : {
1130 184298 : exp = mat_mult_buffer2_fx_e[i];
1131 184298 : move16();
1132 : }
1133 : }
1134 :
1135 3662772 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1136 : {
1137 3529985 : mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
1138 3529985 : move32();
1139 : }
1140 :
1141 132787 : mat_mult_buffer2_e = exp;
1142 132787 : move16();
1143 :
1144 : /*-----------------------------------------------------------------*
1145 : * Energy Compensation
1146 : *-----------------------------------------------------------------*/
1147 :
1148 132787 : IF( EQ_16( energy_compensation_flag, 1 ) )
1149 : {
1150 25938 : adj_fx_p = svd_s_buffer_fx;
1151 25938 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1152 :
1153 158348 : FOR( i = 0; i < lengthCy; ++i )
1154 : {
1155 : /* Avoid correction for very small energies,
1156 : main diagonal elements of Cy_tilde_p may be negative */
1157 132410 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
1158 : {
1159 0 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1160 0 : move32();
1161 0 : adj_e[i] = 1;
1162 0 : move16();
1163 : }
1164 : ELSE
1165 : {
1166 132410 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
1167 132410 : exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
1168 132410 : L_tmp = L_deposit_h( tmp );
1169 132410 : L_tmp = Sqrt32( L_tmp, &exp );
1170 132410 : adj_fx_p[i] = L_tmp;
1171 132410 : move32();
1172 132410 : adj_e[i] = exp;
1173 132410 : move16();
1174 : }
1175 :
1176 132410 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
1177 132410 : IF( GT_32( adj_fx_p[i], temp ) )
1178 : {
1179 6 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1180 6 : move32();
1181 6 : adj_e[i] = 3;
1182 6 : move16();
1183 : }
1184 : }
1185 :
1186 25938 : exp = adj_e[0];
1187 25938 : move16();
1188 132410 : FOR( i = 1; i < lengthCy; i++ )
1189 : {
1190 106472 : if ( LT_16( exp, adj_e[i] ) )
1191 : {
1192 1422 : exp = adj_e[i];
1193 1422 : move16();
1194 : }
1195 : }
1196 :
1197 158348 : FOR( i = 0; i < lengthCy; i++ )
1198 : {
1199 132410 : adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
1200 132410 : move32();
1201 : }
1202 25938 : adj_fx_e = exp;
1203 25938 : move16();
1204 :
1205 25938 : diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1206 :
1207 25938 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
1208 25938 : mixing_matrix_e = mat_mult_buffer3_e;
1209 25938 : move16();
1210 : }
1211 :
1212 132787 : *mixing_matrix_out_e = mixing_matrix_e;
1213 132787 : move16();
1214 132787 : *Cr_e = Cr_fx_e;
1215 132787 : move16();
1216 132787 : pop_wmops();
1217 :
1218 132787 : return out;
1219 : }
1220 :
1221 : /*-------------------------------------------------------------------*
1222 : * computeMixingMatricesResidual()
1223 : *
1224 : * compute a residual mixing matrix using the covariance synthesis approach
1225 : *-------------------------------------------------------------------*/
1226 :
1227 106849 : Word16 computeMixingMatricesResidual_fx(
1228 : const Word32 num_outputs, /* i : number of output channels */
1229 : const Word32 *Cx_fx, /* i : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e) */
1230 : const Word16 Cx_e,
1231 : const Word32 *Cy_fx, /* i : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
1232 : const Word16 Cy_fx_e,
1233 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values Q(31-reg_Sx_e) */
1234 : const Word16 reg_Sx_e,
1235 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
1236 : const Word16 reg_ghat_e,
1237 : Word32 *mixing_matrix_fx, /* o : resulting residual mixing matrix Q(31-mixing_matrix_ret_e) */
1238 : Word16 *mixing_matrix_ret_e )
1239 : {
1240 : Word16 i, j;
1241 : Word16 out, lengthCx, lengthCy;
1242 106849 : out = EXIT_SUCCESS;
1243 106849 : move16();
1244 106849 : lengthCx = extract_l( num_outputs );
1245 106849 : lengthCy = extract_l( num_outputs );
1246 :
1247 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1248 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1249 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1250 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1251 :
1252 : Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1253 :
1254 106849 : push_wmops( "dirac_cov_mix_mat_r" );
1255 :
1256 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1257 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
1258 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1259 106849 : Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
1260 106849 : move16();
1261 :
1262 106849 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
1263 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1264 : Word32 L_tmp;
1265 : Word16 tmp_e;
1266 : Word16 tmp, scale;
1267 : Word32 div_tmp;
1268 : Word16 exp;
1269 :
1270 : Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
1271 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1272 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1273 : Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
1274 :
1275 : Word32 limit_fx;
1276 : Word16 limit_e;
1277 :
1278 : Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
1279 : Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
1280 :
1281 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1282 : Word16 Cy_hat_diag_e;
1283 :
1284 : Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
1285 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1286 : Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
1287 :
1288 : Word32 *adj_fx_p;
1289 : Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
1290 :
1291 : Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
1292 : Word16 Cy_tilde_e;
1293 :
1294 : /*-----------------------------------------------------------------*
1295 : * Decomposition of Cy
1296 : *-----------------------------------------------------------------*/
1297 :
1298 : /* Processing the SVD */
1299 :
1300 : /* linear array to svd buffer */
1301 106849 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
1302 :
1303 106849 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
1304 :
1305 : /* Computing Ky */
1306 106849 : Word16 max_e = -32;
1307 654124 : FOR( i = 0; i < lengthCy; ++i )
1308 : {
1309 3391850 : FOR( j = 0; j < lengthCy; ++j )
1310 : {
1311 2844575 : tmp_e = svd_s_buffer_e[j];
1312 2844575 : move16();
1313 2844575 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
1314 2844575 : Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
1315 2844575 : move32();
1316 2844575 : Ky_fx_e[i + j * lengthCy] = tmp_e;
1317 2844575 : move16();
1318 2844575 : max_e = s_max( max_e, tmp_e );
1319 : }
1320 : }
1321 :
1322 2951424 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
1323 : {
1324 2844575 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
1325 2844575 : move32();
1326 2844575 : Ky_fx_e[i] = max_e;
1327 2844575 : move16();
1328 : }
1329 :
1330 : /*-----------------------------------------------------------------*
1331 : * Decomposition of Cx
1332 : *-----------------------------------------------------------------*/
1333 :
1334 : /* Processing the SVD of Cx*/
1335 : /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
1336 : * would be just indicating the sorting index, so go straight to Kx as the
1337 : * square root of the diagonal of Cx */
1338 :
1339 : /* Computing Kx */
1340 106849 : max_e = -32;
1341 654124 : FOR( i = 0; i < lengthCx; ++i )
1342 : {
1343 547275 : exp = Cx_e;
1344 547275 : move16();
1345 547275 : Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
1346 547275 : move32();
1347 547275 : Kx_fx_e[i] = exp;
1348 547275 : move16();
1349 547275 : max_e = s_max( max_e, exp );
1350 : }
1351 :
1352 654124 : FOR( i = 0; i < lengthCx; ++i )
1353 : {
1354 547275 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
1355 547275 : move32();
1356 547275 : Kx_fx_e[i] = max_e;
1357 547275 : move16();
1358 : }
1359 :
1360 : /*-----------------------------------------------------------------*
1361 : * Regularization of Sx
1362 : *-----------------------------------------------------------------*/
1363 :
1364 106849 : limit_fx = Kx_fx[0];
1365 106849 : move32();
1366 :
1367 547275 : FOR( i = 1; i < lengthCx; i++ )
1368 : {
1369 440426 : IF( GT_32( Kx_fx[i], limit_fx ) )
1370 : {
1371 165660 : limit_fx = Kx_fx[i];
1372 165660 : move32();
1373 : }
1374 : }
1375 :
1376 106849 : L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
1377 106849 : L_tmp = L_add( L_tmp, EPSILLON_FX );
1378 106849 : limit_fx = L_tmp;
1379 106849 : move16();
1380 106849 : limit_e = add( Kx_fx_e[0], reg_Sx_e );
1381 :
1382 654124 : FOR( i = 0; i < lengthCx; ++i )
1383 : {
1384 547275 : IF( GT_32( Kx_fx[i], L_shl_sat( limit_fx, sub( limit_e, Kx_fx_e[i] ) ) ) )
1385 : {
1386 545970 : div_tmp = Kx_fx[i];
1387 545970 : move32();
1388 545970 : exp = Kx_fx_e[i];
1389 545970 : move16();
1390 : }
1391 : ELSE
1392 : {
1393 1305 : div_tmp = limit_fx;
1394 1305 : move32();
1395 1305 : exp = limit_e;
1396 1305 : move16();
1397 : }
1398 547275 : tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
1399 547275 : scale = add( scale, sub( Q1, exp ) );
1400 :
1401 547275 : Kx_reg_inv_fx[i] = L_deposit_h( tmp );
1402 547275 : move32();
1403 547275 : Kx_reg_inv_e[i] = scale;
1404 547275 : move16();
1405 : }
1406 :
1407 106849 : limit_fx = 0;
1408 106849 : move32();
1409 106849 : limit_e = 0;
1410 106849 : move16();
1411 :
1412 : /*-----------------------------------------------------------------*
1413 : * regularized Kx-1
1414 : *-----------------------------------------------------------------*/
1415 :
1416 : /*-----------------------------------------------------------------*
1417 : * normalization matrix G hat
1418 : *-----------------------------------------------------------------*/
1419 :
1420 : /* Computing Cy_hat_diag */
1421 106849 : Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
1422 106849 : Cy_hat_diag_e = Cx_e;
1423 106849 : move16();
1424 :
1425 106849 : Word16 com_e = sub( limit_e, Cy_hat_diag_e );
1426 654124 : FOR( i = 0; i < lengthCy; ++i )
1427 : {
1428 547275 : IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
1429 : {
1430 547275 : limit_fx = Cy_hat_diag_fx[i];
1431 547275 : move32();
1432 547275 : limit_e = Cy_hat_diag_e;
1433 547275 : move16();
1434 : }
1435 : }
1436 :
1437 106849 : limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
1438 106849 : limit_e = add( limit_e, reg_ghat_e );
1439 :
1440 : /* Computing G_hat */
1441 :
1442 106849 : com_e = sub( Cy_hat_diag_e, limit_e );
1443 654124 : FOR( i = 0; i < lengthCy; ++i )
1444 : {
1445 547275 : Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
1446 547275 : move16();
1447 547275 : IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1448 : {
1449 15 : Cy_hat_diag_fx[i] = limit_fx;
1450 15 : move32();
1451 15 : Cy_hat_diag_fx_e[i] = limit_e;
1452 15 : move16();
1453 : }
1454 547275 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
1455 547275 : scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
1456 547275 : L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
1457 :
1458 547275 : G_hat_fx[i] = L_tmp;
1459 547275 : move32();
1460 547275 : G_hat_e[i] = scale;
1461 547275 : move16();
1462 : }
1463 :
1464 : /*-----------------------------------------------------------------*
1465 : * Formulate optimal P
1466 : *-----------------------------------------------------------------*/
1467 :
1468 654124 : FOR( i = 0; i < num_outputs; i++ )
1469 : {
1470 547275 : Kx_fx[i] = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
1471 547275 : move32();
1472 547275 : Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
1473 547275 : move16();
1474 : }
1475 :
1476 654124 : FOR( i = 0; i < num_outputs; i++ )
1477 : {
1478 : Word32 fac_fx;
1479 547275 : fac_fx = Kx_fx[i];
1480 547275 : move32();
1481 :
1482 3391850 : FOR( j = 0; j < num_outputs; j++ )
1483 : {
1484 2844575 : mat_mult_buffer1_fx[i + j * num_outputs] = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e));
1485 2844575 : move32();
1486 2844575 : mat_mult_buffer1_buff_e[i + j * num_outputs] = add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] );
1487 2844575 : move16();
1488 : }
1489 : }
1490 :
1491 106849 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
1492 106849 : move16();
1493 2844575 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1494 : {
1495 2737726 : if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
1496 : {
1497 29410 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
1498 29410 : move16();
1499 : }
1500 : }
1501 :
1502 2951424 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1503 : {
1504 2844575 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
1505 2844575 : move32();
1506 : }
1507 :
1508 106849 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1509 :
1510 106849 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
1511 :
1512 : /* Actually Processing P */
1513 :
1514 106849 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1515 106849 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1516 106849 : mat_mult_buffer1_e = 0;
1517 106849 : move16();
1518 106849 : mat_mult_buffer2_e = 0;
1519 106849 : move16();
1520 :
1521 106849 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
1522 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1523 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1524 :
1525 : /*-----------------------------------------------------------------*
1526 : * Formulate M
1527 : *-----------------------------------------------------------------*/
1528 :
1529 :
1530 106849 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1531 106849 : set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1532 :
1533 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1534 :
1535 654124 : FOR( i = 0; i < num_outputs; i++ )
1536 : {
1537 : Word32 fac_fx;
1538 547275 : fac_fx = Kx_reg_inv_fx[i];
1539 547275 : move32();
1540 :
1541 3391850 : FOR( j = 0; j < num_outputs; j++ )
1542 : {
1543 2844575 : mixing_matrix_fx[j + i * num_outputs] = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e);
1544 2844575 : move32();
1545 2844575 : mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
1546 2844575 : move16();
1547 : }
1548 : }
1549 :
1550 : /*-----------------------------------------------------------------*
1551 : * Formulate Cr
1552 : *-----------------------------------------------------------------*/
1553 :
1554 : /* Compute Cy_tilde = M*Cx*M' */
1555 :
1556 : Word16 Cx_e_arr[MAX_CICP_CHANNELS];
1557 106849 : set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
1558 :
1559 106849 : matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1560 :
1561 106849 : exp = mixing_matrix_fx_e[0];
1562 106849 : move16();
1563 2844575 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1564 : {
1565 2737726 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1566 : {
1567 11815 : exp = mixing_matrix_fx_e[i];
1568 11815 : move16();
1569 : }
1570 : }
1571 :
1572 2951424 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1573 : {
1574 2844575 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1575 2844575 : move32();
1576 : }
1577 106849 : mixing_matrix_e = exp;
1578 106849 : move16();
1579 :
1580 106849 : exp = mat_mult_buffer1_buff_e[0];
1581 106849 : move16();
1582 2844575 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1583 : {
1584 2737726 : if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
1585 : {
1586 11815 : exp = mat_mult_buffer1_buff_e[i];
1587 11815 : move16();
1588 : }
1589 : }
1590 :
1591 2951424 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1592 : {
1593 2844575 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
1594 2844575 : move32();
1595 : }
1596 106849 : mat_mult_buffer1_e = exp;
1597 106849 : move16();
1598 :
1599 106849 : matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
1600 :
1601 : /*-----------------------------------------------------------------*
1602 : * Energy Compensation
1603 : *-----------------------------------------------------------------*/
1604 :
1605 106849 : adj_fx_p = svd_s_buffer_fx;
1606 :
1607 654124 : FOR( i = 0; i < lengthCy; ++i )
1608 : {
1609 547275 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
1610 547275 : scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
1611 547275 : adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
1612 547275 : move32();
1613 547275 : adj_buff_e[i] = scale;
1614 547275 : move16();
1615 547275 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
1616 547275 : IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
1617 : {
1618 2 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1619 2 : move32();
1620 2 : adj_buff_e[i] = 3;
1621 2 : move16();
1622 : }
1623 : }
1624 :
1625 106849 : adj_e = adj_buff_e[0];
1626 106849 : move16();
1627 :
1628 547275 : FOR( i = 1; i < lengthCy; i++ )
1629 : {
1630 440426 : if ( LT_16( adj_e, adj_buff_e[i] ) )
1631 : {
1632 42965 : adj_e = adj_buff_e[i];
1633 42965 : move16();
1634 : }
1635 : }
1636 :
1637 :
1638 654124 : FOR( i = 0; i < lengthCy; i++ )
1639 : {
1640 547275 : adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
1641 547275 : move32();
1642 : }
1643 :
1644 106849 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1645 106849 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
1646 106849 : *mixing_matrix_ret_e = mat_mult_buffer3_e;
1647 106849 : move16();
1648 :
1649 106849 : pop_wmops();
1650 :
1651 106849 : return out;
1652 : }
1653 :
1654 :
1655 : /*-------------------------------------------------------------------*
1656 : * computeMixingMatricesISM()
1657 : *
1658 : *
1659 : *-------------------------------------------------------------------*/
1660 315180 : Word16 computeMixingMatricesISM_fx(
1661 : const Word16 num_inputs,
1662 : const Word16 num_responses,
1663 : const Word16 num_outputs,
1664 : const Word32 *responses_fx, /*Q(31-responses_e) */
1665 : const Word16 responses_e,
1666 : const Word32 *ener_fx, /*Q(31-ener_e) */
1667 : const Word16 ener_e,
1668 : const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
1669 : const Word16 Cx_diag_e,
1670 : const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
1671 : const Word16 Cy_diag_e,
1672 : const Word16 *Q_16fx, // Q15
1673 : const Word16 energy_compensation_flag,
1674 : const Word32 reg_Sx_fx, /*Q0*/
1675 : const Word32 reg_ghat_fx, /*Q0*/
1676 : Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
1677 : Word16 *mixing_matrix_e )
1678 : {
1679 : Word16 i, out;
1680 : Word16 lengthCx, lengthCy;
1681 : Word16 nL, nC;
1682 :
1683 : Word32 *Cy_tilde_p_fx;
1684 : Word32 *adj_fx;
1685 : Word32 limit_fx;
1686 :
1687 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1688 : Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1689 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1690 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1691 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1692 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1693 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
1694 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1695 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
1696 : Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
1697 :
1698 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1699 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1700 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
1701 : Word16 svd_s_buffer_fx_e;
1702 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1703 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1704 :
1705 : Word16 temp_e[MAX_OUTPUT_CHANNELS];
1706 315180 : push_wmops( "dirac_cov_mix_mat" );
1707 :
1708 315180 : out = EXIT_SUCCESS;
1709 315180 : move16();
1710 315180 : lengthCx = num_inputs;
1711 315180 : move16();
1712 315180 : lengthCy = num_outputs;
1713 315180 : move16();
1714 :
1715 6073020 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1716 : {
1717 5757840 : IF( EQ_16( Q_16fx[i], MAX_16 ) )
1718 : {
1719 2575200 : Q_fx[i] = MAX_32;
1720 2575200 : move32();
1721 : }
1722 : ELSE
1723 : {
1724 3182640 : Q_fx[i] = L_deposit_h( Q_16fx[i] );
1725 3182640 : move32();
1726 : }
1727 : }
1728 315180 : Q_e = 0;
1729 315180 : move16();
1730 315180 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
1731 5358060 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
1732 : {
1733 5042880 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1734 5042880 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1735 5042880 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1736 : }
1737 :
1738 : /* Decomposition of Cy = Ky*Ky' */
1739 : /* Ky = responses*diag(ener) */
1740 315180 : matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
1741 :
1742 : /* Decomposition of Cx -> Computing Kx */
1743 315180 : set16_fx( temp_e, Cx_diag_e, lengthCx );
1744 315180 : v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
1745 315180 : Kx_e = temp_e[0];
1746 315180 : move16();
1747 630360 : FOR( i = 1; i < lengthCx; i++ )
1748 : {
1749 315180 : Kx_e = s_max( Kx_e, temp_e[i] );
1750 : }
1751 945540 : FOR( i = 0; i < lengthCx; i++ )
1752 : {
1753 630360 : Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
1754 630360 : move32();
1755 : }
1756 :
1757 : /* Regularization of Sx */
1758 315180 : maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
1759 315180 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
1760 :
1761 945540 : FOR( i = 0; i < lengthCx; ++i )
1762 : {
1763 630360 : IF( GT_32( Kx_fx[i], limit_fx ) )
1764 : {
1765 595280 : svd_s_buffer_fx[i] = Kx_fx[i];
1766 595280 : move32();
1767 : }
1768 : ELSE
1769 : {
1770 35080 : svd_s_buffer_fx[i] = limit_fx;
1771 35080 : move32();
1772 : }
1773 : }
1774 315180 : svd_s_buffer_fx_e = Kx_e;
1775 315180 : move16();
1776 :
1777 315180 : limit_fx = 0;
1778 315180 : move32();
1779 :
1780 : /* regularized Kx-1 */
1781 :
1782 945540 : FOR( i = 0; i < lengthCx; ++i )
1783 : {
1784 630360 : IF( svd_s_buffer_fx[i] )
1785 : {
1786 : Word32 reg_fac;
1787 630354 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
1788 630354 : Kx_reg_inv_fx[i] = reg_fac;
1789 630354 : move32();
1790 630354 : temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
1791 630354 : move16();
1792 : }
1793 : ELSE
1794 : {
1795 : Word32 reg_fac;
1796 6 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
1797 6 : Kx_reg_inv_fx[i] = reg_fac;
1798 6 : move32();
1799 6 : temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
1800 6 : move16();
1801 : }
1802 : }
1803 315180 : Kx_reg_inv_e = temp_e[0];
1804 315180 : move16();
1805 630360 : FOR( i = 1; i < lengthCx; i++ )
1806 : {
1807 315180 : Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
1808 : }
1809 945540 : FOR( i = 0; i < lengthCx; i++ )
1810 : {
1811 630360 : Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
1812 630360 : move32();
1813 : }
1814 :
1815 : /************************ normalization matrix G hat **********************/
1816 :
1817 : /* Computing Q*Cx*Q' */
1818 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1819 : Word16 Cy_hat_diag_e;
1820 :
1821 315180 : matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
1822 :
1823 315180 : Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
1824 :
1825 6073020 : FOR( i = 0; i < lengthCy * lengthCx; ++i )
1826 : {
1827 5757840 : IF( GT_16( Q_Cx_e, Q_e ) )
1828 : {
1829 5757782 : Q_fx[i] = L_shr( Q_fx[i], guard_bits );
1830 5757782 : move32();
1831 : }
1832 : ELSE
1833 : {
1834 58 : Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
1835 58 : move32();
1836 : }
1837 : }
1838 :
1839 315180 : IF( GT_16( Q_Cx_e, Q_e ) )
1840 : {
1841 315177 : Q_e = add( Q_e, guard_bits );
1842 : }
1843 : ELSE
1844 : {
1845 3 : Q_Cx_e = add( Q_Cx_e, guard_bits );
1846 : }
1847 315180 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
1848 :
1849 : /* Computing Cy_hat_diag */
1850 3194100 : FOR( i = 0; i < lengthCy; ++i )
1851 : {
1852 2878920 : if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
1853 : {
1854 467644 : limit_fx = Cy_hat_diag_fx[i];
1855 467644 : move32();
1856 : }
1857 : }
1858 :
1859 315180 : limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
1860 :
1861 : /* Computing G_hat */
1862 3194100 : FOR( i = 0; i < lengthCy; ++i )
1863 : {
1864 2878920 : if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1865 : {
1866 12522 : Cy_hat_diag_fx[i] = limit_fx;
1867 12522 : move32();
1868 : }
1869 2878920 : IF( Cy_diag_fx[i] )
1870 : {
1871 1875780 : IF( Cy_hat_diag_fx[i] )
1872 : {
1873 1875780 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
1874 1875780 : move32();
1875 1875780 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
1876 1875780 : move16();
1877 1875780 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1878 1875780 : move32();
1879 : }
1880 : ELSE
1881 : {
1882 0 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1883 0 : move32();
1884 0 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1885 0 : move16();
1886 0 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1887 0 : move32();
1888 : }
1889 : }
1890 : ELSE
1891 : {
1892 1003140 : G_hat_fx[i] = 0;
1893 1003140 : move32();
1894 1003140 : temp_e[i] = 0;
1895 1003140 : move16();
1896 : }
1897 : }
1898 315180 : G_hat_e = temp_e[0];
1899 315180 : move16();
1900 2878920 : FOR( i = 1; i < lengthCy; i++ )
1901 : {
1902 2563740 : G_hat_e = s_max( G_hat_e, temp_e[i] );
1903 : }
1904 3194100 : FOR( i = 0; i < lengthCy; i++ )
1905 : {
1906 2878920 : G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
1907 2878920 : move32();
1908 : }
1909 :
1910 : /************************ Formulate optimal P **********************/
1911 :
1912 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
1913 315180 : diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1914 :
1915 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1916 :
1917 315180 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1918 :
1919 315180 : IF( LT_16( lengthCx, num_responses ) )
1920 : {
1921 3600 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
1922 :
1923 3600 : nL = num_responses;
1924 3600 : move16();
1925 3600 : nC = lengthCx;
1926 3600 : move16();
1927 3600 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1928 : }
1929 : ELSE
1930 : {
1931 311580 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
1932 :
1933 311580 : nL = lengthCx;
1934 311580 : move16();
1935 311580 : nC = num_responses;
1936 311580 : move16();
1937 311580 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1938 : }
1939 :
1940 : /* Actually Processing P */
1941 :
1942 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
1943 315180 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
1944 315180 : mat_mult_buffer1_e = 0;
1945 315180 : move16();
1946 315180 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1947 315180 : mat_mult_buffer2_e = 0;
1948 315180 : move16();
1949 :
1950 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1951 :
1952 : /************************ Formulate M **********************/
1953 :
1954 315180 : matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1955 :
1956 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
1957 :
1958 : /*********************** Energy Compensation ****************/
1959 :
1960 : /* Compute Cy_tilde = M*Cx*M' */
1961 315180 : matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1962 :
1963 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1964 :
1965 315180 : IF( EQ_16( energy_compensation_flag, 1 ) )
1966 : {
1967 315180 : adj_fx = svd_s_buffer_fx;
1968 315180 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1969 3194100 : FOR( i = 0; i < lengthCy; ++i )
1970 : {
1971 : /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
1972 2878920 : IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
1973 : {
1974 0 : adj_fx[i] = MAX_32;
1975 0 : move32();
1976 0 : temp_e[i] = 0;
1977 0 : move16();
1978 : }
1979 : ELSE
1980 : {
1981 2878920 : IF( Cy_diag_fx[i] )
1982 : {
1983 1875780 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
1984 : {
1985 1875750 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
1986 1875750 : move32();
1987 1875750 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
1988 1875750 : move16();
1989 1875750 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1990 1875750 : move32();
1991 : }
1992 : ELSE
1993 : {
1994 30 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1995 30 : move32();
1996 30 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1997 30 : move16();
1998 30 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1999 30 : move32();
2000 : }
2001 : }
2002 : ELSE
2003 : {
2004 1003140 : adj_fx[i] = 0;
2005 1003140 : move32();
2006 1003140 : temp_e[i] = 0;
2007 1003140 : move16();
2008 : }
2009 : }
2010 :
2011 2878920 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
2012 2878920 : IF( GT_32( adj_fx[i], temp ) )
2013 : {
2014 1621 : adj_fx[i] = MAX_32;
2015 1621 : move32();
2016 1621 : temp_e[i] = 2;
2017 1621 : move16();
2018 : }
2019 : }
2020 315180 : adj_e = temp_e[0];
2021 315180 : move16();
2022 2878920 : FOR( i = 1; i < lengthCy; i++ )
2023 : {
2024 2563740 : adj_e = s_max( adj_e, temp_e[i] );
2025 : }
2026 3194100 : FOR( i = 0; i < lengthCy; i++ )
2027 : {
2028 2878920 : adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
2029 2878920 : move32();
2030 : }
2031 :
2032 315180 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
2033 :
2034 315180 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
2035 315180 : *mixing_matrix_e = mat_mult_buffer3_e;
2036 315180 : move16();
2037 : }
2038 :
2039 315180 : pop_wmops();
2040 :
2041 315180 : return out;
2042 : }
|