Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include "basop_util.h"
34 : #include "enh32.h"
35 : #include <stdint.h>
36 : #include <string.h>
37 : #include <stdio.h>
38 : #include <stdlib.h>
39 : #include <assert.h>
40 : #include <math.h>
41 : #include "options.h"
42 : #include "cnst.h"
43 : #include "rom_enc.h"
44 : #include "rom_com.h"
45 : #include "prot_fx.h"
46 : #include "ivas_stat_dec.h"
47 : #include "ivas_cnst.h"
48 : #include "ivas_rom_com.h"
49 : #include "ivas_rom_dec.h"
50 : #include "wmc_auto.h"
51 : #include "rom_dec.h"
52 : #include "ivas_prot_fx.h"
53 :
54 : /*-----------------------------------------------------------------------*
55 : * Local constants
56 : *-----------------------------------------------------------------------*/
57 :
58 : #define SQRT_EPSILON_FX 68 /* Q31 square root of EPSILON */
59 :
60 : /*-------------------------------------------------------------------*
61 : * ivas_dirac_dec_output_synthesis_cov_open()
62 : *
63 : * Sets up the state and parameters for the Covariance Synthesis
64 : *-------------------------------------------------------------------*/
65 :
66 341 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
67 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
68 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state */
69 : const Word16 max_band_decorr, /* i : uppermost frequency band where decorrelation is applied */
70 : const Word16 interp_length, /* i : length for interpolating the mixing matrices in time slots */
71 : const Word16 num_param_bands, /* i : number of parameter bands */
72 : const Word16 num_param_bands_residual, /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
73 : const Word16 nchan_in, /* i : number of input (transport) channels */
74 : const Word16 nchan_out, /* i : number of output channels */
75 : const Word32 *proto_matrix /* i : the prototype (upmix) matrix (only used if mode == 1) Q(15-proto_matrix_e) */
76 : )
77 : {
78 : Word16 idx;
79 :
80 341 : h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
81 341 : move16();
82 :
83 : /*-----------------------------------------------------------------*
84 : * memory allocation
85 : *-----------------------------------------------------------------*/
86 :
87 : /* buffer length and interpolator */
88 341 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
89 341 : IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
90 : {
91 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
92 : }
93 341 : h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
94 341 : move16();
95 : /* cov buffers */
96 4445 : FOR( idx = 0; idx < num_param_bands; idx++ )
97 : {
98 4104 : IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
99 : {
100 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
101 : }
102 4104 : h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
103 4104 : IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
104 : {
105 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
106 : }
107 4104 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
108 : {
109 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
110 : }
111 4104 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
112 4104 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
113 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
114 :
115 4104 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
116 : {
117 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
118 : }
119 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
120 4104 : h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
121 4104 : move16();
122 : }
123 16697 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
124 : {
125 16356 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
126 16356 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
127 16356 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
128 16356 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
129 : }
130 :
131 3563 : FOR( idx = 0; idx < num_param_bands_residual; idx++ )
132 : {
133 3222 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
134 : {
135 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
136 : }
137 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
138 :
139 3222 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
140 : {
141 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
142 : }
143 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
144 3222 : h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
145 3222 : move16();
146 : }
147 17579 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
148 : {
149 17238 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
150 17238 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
151 : }
152 :
153 341 : IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
154 : {
155 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
156 : }
157 341 : IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
158 : {
159 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
160 : }
161 :
162 341 : set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
163 341 : set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
164 :
165 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
166 : {
167 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
168 : }
169 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
170 :
171 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
172 : {
173 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
174 : }
175 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
176 :
177 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
178 : {
179 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
180 : }
181 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
182 :
183 341 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
184 : {
185 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
186 : }
187 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
188 :
189 : /*-----------------------------------------------------------------*
190 : * prepare processing parameters
191 : *-----------------------------------------------------------------*/
192 :
193 : /* compute interpolator */
194 341 : IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
195 : {
196 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
197 : }
198 11253 : FOR( idx = 1; idx <= interp_length; ++idx )
199 : {
200 10912 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
201 10912 : move16();
202 : }
203 341 : Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
204 341 : h_dirac_output_synthesis_params->proto_matrix_e = 5;
205 341 : move16();
206 341 : return IVAS_ERR_OK;
207 : }
208 :
209 :
210 : /*-------------------------------------------------------------------*
211 : * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
212 : *
213 : *
214 : *-------------------------------------------------------------------*/
215 :
216 332209 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
217 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
218 : const UWord16 interp_length /* i : interpolator length */
219 : )
220 : {
221 : Word16 idx;
222 332209 : Word16 tmp, exp_diff = 0;
223 332209 : move16();
224 :
225 1652722 : FOR( idx = 1; idx <= interp_length; ++idx )
226 : {
227 1320513 : tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
228 1320513 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff ); // Q15
229 1320513 : move16();
230 : }
231 :
232 332209 : return;
233 : }
234 :
235 :
236 : /*-------------------------------------------------------------------*
237 : * ivas_dirac_dec_output_synthesis_cov_init()
238 : *
239 : * initialize the states for the covariance synthesis
240 : *-------------------------------------------------------------------*/
241 341 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
242 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis */
243 : const Word16 nchan_in, /* i : number of input (tranport) channels */
244 : const Word16 nchan_out, /* i : number of output channels */
245 : const Word16 n_param_bands, /* i : number of total parameter bands */
246 : const Word16 n_param_bands_res /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
247 : )
248 : {
249 :
250 : Word16 idx;
251 :
252 : /* initialize buffers */
253 4445 : FOR( idx = 0; idx < n_param_bands; idx++ )
254 : {
255 4104 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
256 4104 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
257 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
258 4104 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
259 : }
260 :
261 3563 : FOR( idx = 0; idx < n_param_bands_res; idx++ )
262 : {
263 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
264 3222 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
265 : }
266 :
267 :
268 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
269 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
270 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
271 341 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
272 :
273 341 : return;
274 : }
275 :
276 :
277 : /*-------------------------------------------------------------------*
278 : * ivas_dirac_dec_output_synthesis_cov_close()
279 : *
280 : * deallocate dynamic memory in the covariance synthesis state
281 : *-------------------------------------------------------------------*/
282 :
283 341 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
284 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i : handle for the covariance synthesis parameters */
285 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state */
286 : )
287 : {
288 : Word16 idx;
289 :
290 : /*-----------------------------------------------------------------*
291 : * memory deallocation
292 : *-----------------------------------------------------------------*/
293 :
294 : /* free interpolator */
295 341 : IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
296 : {
297 341 : free( h_dirac_output_synthesis_params->interpolator_fx );
298 341 : h_dirac_output_synthesis_params->interpolator_fx = NULL;
299 : }
300 :
301 : /* free alpha */
302 341 : IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
303 : {
304 0 : free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
305 0 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
306 : }
307 :
308 : /* free proto_matrix */
309 341 : IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
310 : {
311 341 : free( h_dirac_output_synthesis_params->proto_matrix_fx );
312 341 : h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
313 : }
314 :
315 :
316 341 : IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
317 : {
318 341 : free( h_dirac_output_synthesis_state->cx_old_e );
319 341 : h_dirac_output_synthesis_state->cx_old_e = NULL;
320 : }
321 341 : IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
322 : {
323 341 : free( h_dirac_output_synthesis_state->cy_old_e );
324 341 : h_dirac_output_synthesis_state->cy_old_e = NULL;
325 : }
326 :
327 : /* free cov buffers */
328 20801 : FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
329 : {
330 20460 : IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
331 : {
332 4104 : free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
333 4104 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
334 : }
335 :
336 20460 : IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
337 : {
338 4104 : free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
339 4104 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
340 : }
341 :
342 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
343 : {
344 4104 : free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
345 4104 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
346 : }
347 :
348 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
349 : {
350 3222 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
351 3222 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
352 : }
353 :
354 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
355 : {
356 4104 : free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
357 4104 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
358 : }
359 :
360 20460 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
361 : {
362 3222 : free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
363 3222 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
364 : }
365 : }
366 :
367 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
368 : {
369 341 : free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
370 341 : h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
371 : }
372 :
373 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
374 : {
375 341 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
376 341 : h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
377 : }
378 :
379 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
380 : {
381 341 : free( h_dirac_output_synthesis_state->mixing_matrix_exp );
382 341 : h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
383 : }
384 :
385 341 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
386 : {
387 341 : free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
388 341 : h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
389 : }
390 341 : return;
391 : }
392 :
393 : /*-------------------------------------------------------------------*
394 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
395 : *
396 : * collect the multi channel input covariance for one filter bank time slot
397 : *-------------------------------------------------------------------*/
398 :
399 : /*-------------------------------------------------------------------*
400 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
401 : *
402 : * collect the multi channel input covariance for one filter bank time slot
403 : *-------------------------------------------------------------------*/
404 2085319 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
405 : Word32 *RealBuffer_fx, /* i : input channel filter bank samples (real part) Q(31- RealBuffer_e)*/
406 : Word16 RealBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
407 : Word32 *ImagBuffer_fx, /* i : input channel filter bank samples (imaginary part Q(ImagBuffer_e)*/
408 : Word16 ImagBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
409 : Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (real part) Q(31- cx_e)*/
410 : Word16 *cx_e, /* i : exponent for accumulated input covariance (real part) */
411 : Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (imaginary part) Q(31- cx_imag_e)*/
412 : Word16 *cx_imag_e, /* i : exponent accumulated input covariance (imag part) */
413 : PARAM_MC_DEC_HANDLE hParamMC, /* i : handle to Parametric MC state */
414 : const Word16 param_band, /* i : parameter band */
415 : const Word16 nchan_in /* i : number of input channels */
416 : )
417 : {
418 : Word16 band_idx, ch_idx;
419 : Word16 brange[2];
420 : Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
421 : Word16 real_in_e;
422 : Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
423 : Word16 imag_in_e;
424 : Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
425 : Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
426 : Word16 output_e;
427 : Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
428 : Word16 band, num_bands;
429 : Word16 cx_fx_norm, cx_imag_fx_norm;
430 : /* estimate input covariance */
431 : /* Already stack here instead of in the process_subframe */
432 :
433 : /* collect input frame */
434 2085319 : brange[0] = hParamMC->band_grouping[param_band];
435 2085319 : move16();
436 2085319 : brange[1] = hParamMC->band_grouping[param_band + 1];
437 2085319 : move16();
438 2085319 : num_bands = sub( brange[1], brange[0] );
439 :
440 10919279 : FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
441 : {
442 8833960 : band = add( brange[0], band_idx );
443 26570640 : FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
444 : {
445 17736680 : real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
446 17736680 : move32();
447 17736680 : imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
448 17736680 : move32();
449 : }
450 : }
451 :
452 2085319 : real_in_e = RealBuffer_e;
453 2085319 : move16();
454 2085319 : imag_in_e = ImagBuffer_e;
455 2085319 : move16();
456 :
457 2085319 : Word16 buf_len = imult1616( num_bands, nchan_in );
458 :
459 2085319 : shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
460 2085319 : shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
461 :
462 2085319 : real_in_e = sub( real_in_e, shift_real );
463 2085319 : imag_in_e = sub( imag_in_e, shift_imag );
464 :
465 :
466 2085319 : output_e = s_max( real_in_e, imag_in_e );
467 :
468 2085319 : scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
469 2085319 : scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
470 :
471 2085319 : cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
472 2085319 : v_add_fx_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
473 :
474 2085319 : v_add_fx_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
475 :
476 2085319 : cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
477 2085319 : cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
478 :
479 2085319 : scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
480 2085319 : scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
481 :
482 2085319 : *cx_e = sub( tmp1_e, cx_fx_norm );
483 2085319 : move16();
484 2085319 : *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
485 2085319 : move16();
486 :
487 2085319 : return;
488 : }
489 :
490 : /*-------------------------------------------------------------------*
491 : * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
492 : *
493 : * synthesize one filter bank slot of multi channel output filter bank
494 : * samples with the covariance synthesis
495 : *-------------------------------------------------------------------*/
496 :
497 163731 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
498 : Word32 *Cldfb_RealBuffer_in_fx, /*Q6*/
499 : Word32 *Cldfb_ImagBuffer_in_fx, /*Q6*/
500 : Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (real part) Q6*/
501 : Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (imaginary part) Q6*/
502 : Word32 *mixing_matrix_fx[], /* i : parameter band wise mixing matrices (direct part) Q(31-mixing_matrix_e)*/
503 : Word16 *mixing_matrix_e, /* i : parameter band wise mixing matrices (direct part) */
504 : Word32 *mixing_matrix_res_fx[], /* i : parameter band wise mixing matrices (residual part) Q(31-mixing_matrix_res_e)*/
505 : Word16 *mixing_matrix_res_e, /* i : parameter band wise mixing matrices (residual part) */
506 : const UWord16 slot_idx_sfr, /* i : time slot index for the current slot within the current subframe */
507 : const UWord16 slot_idx_tot, /* i : time slot index for the current slot within the frame */
508 : const Word16 nX, /* i : number of input channels */
509 : const Word16 nY, /* i : number of output channels */
510 : PARAM_MC_DEC_HANDLE hParamMC /* i : handle to the Parametric MC decoder state */
511 : )
512 : {
513 : Word16 param_band_idx, band, ch_idx;
514 : Word16 have_residual;
515 : Word16 brange[2];
516 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
517 : Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
518 : Word16 mixing_matrix_smooth_e;
519 : Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
520 163731 : Word16 mixing_matrix_res_smooth_e = 0;
521 163731 : move16();
522 : Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
523 : Word16 mixing_matrix_buffer_e;
524 : Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
525 : Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
526 :
527 : Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
528 : Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
529 :
530 163731 : h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
531 :
532 163731 : set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
533 163731 : set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
534 :
535 163731 : set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
536 163731 : set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
537 :
538 2289070 : FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
539 : {
540 : /* final mixing */
541 2125339 : have_residual = 0;
542 2125339 : move16();
543 2125339 : brange[0] = hParamMC->band_grouping[param_band_idx];
544 2125339 : move16();
545 2125339 : brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
546 2125339 : move16();
547 :
548 2125339 : if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
549 : {
550 1710264 : have_residual = 1;
551 1710264 : move16();
552 : }
553 :
554 2125339 : v_multc_fx( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
555 2125339 : mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
556 2125339 : move16();
557 :
558 2125339 : v_multc_fx( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
559 2125339 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
560 2125339 : move16();
561 :
562 2125339 : v_add_fx_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
563 :
564 2125339 : IF( have_residual )
565 : {
566 : /* residual mixing matrix interpolation*/
567 :
568 1710264 : v_multc_fx( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
569 1710264 : mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
570 1710264 : move16();
571 :
572 1710264 : set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
573 1710264 : v_multc_fx( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
574 1710264 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
575 1710264 : move16();
576 :
577 1710264 : v_add_fx_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
578 : }
579 :
580 11122259 : FOR( band = brange[0]; band < brange[1]; band++ )
581 : {
582 8996920 : assert( band >= 0 );
583 :
584 8996920 : IF( have_residual )
585 : {
586 : /* collect diffuse prototypes */
587 3274620 : assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
588 23274460 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
589 : {
590 19999840 : diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
591 19999840 : move32();
592 19999840 : diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
593 19999840 : move32();
594 : }
595 :
596 : /* apply residual mixing */
597 : {
598 : Word16 shifter;
599 3274620 : shifter = sub( mixing_matrix_res_smooth_e, 32 );
600 23274460 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
601 : {
602 : Word16 i;
603 : Word16 idx;
604 : Word64 temp_real, temp_imag;
605 :
606 19999840 : idx = ch_idx;
607 19999840 : temp_real = 0;
608 19999840 : temp_imag = 0;
609 19999840 : move64();
610 19999840 : move64();
611 143276640 : FOR( i = 0; i < nY; i++ )
612 : {
613 123276800 : temp_real = W_mac_32_32( temp_real, mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] );
614 123276800 : temp_imag = W_mac_32_32( temp_imag, mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] );
615 123276800 : idx += nY;
616 : }
617 19999840 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
618 19999840 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
619 : }
620 : }
621 : }
622 : ELSE
623 : {
624 40619420 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
625 : {
626 34897120 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
627 34897120 : move32();
628 34897120 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
629 34897120 : move32();
630 : }
631 : }
632 :
633 : /* collect input signals, still in cldfb buffers */
634 27067560 : FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
635 : {
636 :
637 18070640 : input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
638 18070640 : move32();
639 18070640 : input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
640 18070640 : move32();
641 : }
642 :
643 : /* apply mixing matrix */
644 : {
645 : Word16 shifter;
646 8996920 : shifter = 31 - mixing_matrix_smooth_e;
647 :
648 63893880 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
649 : {
650 : Word16 i;
651 : Word16 idx;
652 : Word64 temp_real, temp_imag;
653 :
654 54896960 : idx = ch_idx;
655 54896960 : temp_real = 0;
656 54896960 : temp_imag = 0;
657 54896960 : move64();
658 54896960 : move64();
659 165535680 : for ( i = 0; i < nX; i++ )
660 : {
661 110638720 : temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
662 110638720 : temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
663 110638720 : idx += nY;
664 : }
665 54896960 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
666 54896960 : move32();
667 54896960 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
668 54896960 : move32();
669 : }
670 : }
671 : }
672 : }
673 :
674 163731 : return;
675 : }
676 :
677 : /*-------------------------------------------------------------------*
678 : * computeMixingMatrices()
679 : *
680 : * compute a mixing matrix using the convariance synthesis approach
681 : *-------------------------------------------------------------------*/
682 :
683 132832 : Word16 computeMixingMatrices_fx(
684 : const Word16 num_inputs, /* i : number of input channels */
685 : const Word16 num_outputs, /* i : number of output channels */
686 : const Word32 *Cx, /* i : input channel covariance matrix Q(31-Cx_e) */
687 : Word16 Cx_e,
688 : const Word32 *Cy, /* i : target covariance matrix Q(31-Cy_e) */
689 : Word16 Cy_e,
690 : const Word32 *Q, /* i : prototype matrix (usually a upmix matrix) Q_fx_e */
691 : Word16 Q_fx_e,
692 : const Word16 energy_compensation_flag, /* i : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
693 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values */
694 : Word16 reg_Sx_e,
695 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
696 : Word16 reg_ghat_e,
697 : Word32 *mixing_matrix_fx, /* o : resulting mixing matrix Q(31-mixing_matrix_out_e) */
698 : Word16 *mixing_matrix_out_e,
699 : Word32 *Cr_fx, /* o : residual covariance matrix Q(31-Cr_e) */
700 : Word16 *Cr_e )
701 : {
702 : Word16 i, j;
703 : Word16 out;
704 : Word16 nL, nC;
705 132832 : Word16 lengthCx = num_inputs;
706 132832 : Word16 lengthCy = num_outputs;
707 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
708 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
709 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
710 : Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
711 : Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
712 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
713 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
714 : Word16 Cx_fx_e;
715 : Word16 Cy_fx_e;
716 :
717 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
718 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
719 :
720 : Word32 limit_fx;
721 : Word16 limit_e;
722 :
723 : Word32 L_tmp;
724 : Word16 tmp_e, tmp, exp;
725 :
726 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
727 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
728 :
729 : Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
730 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
731 :
732 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
733 : Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
734 :
735 : Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
736 : Word16 Q_e;
737 :
738 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
739 : Word16 Q_Cx_e;
740 :
741 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
742 : Word16 Cy_hat_diag_e;
743 : Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
744 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
745 : Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
746 :
747 : Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
748 :
749 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
750 :
751 : Word16 mixing_matrix_e;
752 :
753 : Word16 Cr_fx_e;
754 :
755 :
756 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
757 : Word16 adj_e[MAX_OUTPUT_CHANNELS];
758 : Word32 *adj_fx_p;
759 : Word16 adj_fx_e;
760 :
761 : Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
762 132832 : push_wmops( "dirac_cov_mix_mat" );
763 :
764 132832 : out = EXIT_SUCCESS;
765 132832 : move16();
766 :
767 132832 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
768 2258144 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
769 : {
770 2125312 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
771 2125312 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
772 2125312 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
773 : }
774 :
775 :
776 132832 : Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
777 132832 : Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
778 132832 : Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
779 :
780 132832 : Q_e = Q_fx_e;
781 132832 : move16();
782 132832 : Cx_fx_e = Cx_e;
783 132832 : move16();
784 132832 : Cy_fx_e = Cy_e;
785 132832 : move16();
786 :
787 : /*-----------------------------------------------------------------*
788 : * Decomposition of Cy
789 : *-----------------------------------------------------------------*/
790 :
791 : /* Processing the SVD */
792 :
793 132832 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
794 :
795 132832 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
796 132832 : Word16 max_e = -32;
797 : /* Computing Ky */
798 812741 : FOR( i = 0; i < lengthCy; ++i )
799 : {
800 4211008 : FOR( j = 0; j < lengthCy; ++j )
801 : {
802 3531099 : tmp_e = svd_s_buffer_e[j];
803 3531099 : move16();
804 3531099 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
805 3531099 : Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
806 3531099 : move32();
807 3531099 : Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
808 3531099 : move16();
809 3531099 : max_e = s_max( max_e, tmp_e );
810 : }
811 : }
812 3663931 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
813 : {
814 3531099 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
815 3531099 : move32();
816 3531099 : Ky_fx_e[i] = max_e;
817 3531099 : move16();
818 : }
819 :
820 : /*-----------------------------------------------------------------*
821 : * Decomposition of Cx
822 : *-----------------------------------------------------------------*/
823 :
824 : /* Processing the SVD */
825 :
826 132832 : mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
827 :
828 132832 : svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
829 132832 : max_e = -32;
830 399856 : FOR( i = 0; i < lengthCx; ++i )
831 : {
832 805152 : FOR( j = 0; j < lengthCx; ++j )
833 : {
834 538128 : tmp_e = svd_s_buffer_e[j];
835 538128 : move16();
836 538128 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
837 538128 : Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
838 538128 : move32();
839 538128 : Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
840 538128 : move16();
841 538128 : max_e = s_max( max_e, tmp_e );
842 : }
843 : }
844 670960 : FOR( i = 0; i < lengthCx * lengthCx; ++i )
845 : {
846 538128 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
847 538128 : move32();
848 538128 : Kx_fx_e[i] = max_e;
849 538128 : move16();
850 : }
851 :
852 399856 : FOR( i = 0; i < lengthCx; ++i )
853 : {
854 267024 : tmp_e = svd_s_buffer_e[i];
855 267024 : move16();
856 267024 : svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
857 267024 : move32();
858 267024 : svd_s_buffer_e[i] = tmp_e;
859 267024 : move16();
860 : }
861 :
862 : /*-----------------------------------------------------------------*
863 : * Regularization of Sx
864 : *-----------------------------------------------------------------*/
865 :
866 132832 : limit_fx = svd_s_buffer_fx[0];
867 132832 : move32();
868 132832 : limit_e = svd_s_buffer_e[0];
869 132832 : move16();
870 267024 : FOR( i = 1; i < lengthCx; i++ )
871 : {
872 134192 : IF( GT_32( svd_s_buffer_fx[i], L_shl_sat( limit_fx, sub( limit_e, svd_s_buffer_e[i] ) ) ) )
873 : {
874 0 : limit_fx = svd_s_buffer_fx[i];
875 0 : move32();
876 0 : limit_e = svd_s_buffer_e[i];
877 0 : move16();
878 : }
879 : }
880 :
881 132832 : limit_e = add( limit_e, reg_Sx_e );
882 132832 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx );
883 132832 : IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, SQRT_EPSILON_FX /* Q31 */, 0 ) < 0 )
884 : {
885 0 : limit_fx = SQRT_EPSILON_FX;
886 0 : move32();
887 0 : limit_e = 0;
888 0 : move16();
889 : }
890 :
891 399856 : FOR( i = 0; i < lengthCx; ++i )
892 : {
893 267024 : IF( LT_32( L_shl_sat( svd_s_buffer_fx[i], sub( svd_s_buffer_e[i], limit_e ) ), limit_fx ) )
894 : {
895 58518 : svd_s_buffer_fx[i] = limit_fx;
896 58518 : move32();
897 58518 : svd_s_buffer_e[i] = limit_e;
898 58518 : move16();
899 : }
900 : }
901 :
902 132832 : limit_fx = 0;
903 132832 : move32();
904 132832 : limit_e = 0;
905 132832 : move16();
906 :
907 : /*-----------------------------------------------------------------*
908 : * regularized Kx-1
909 : *-----------------------------------------------------------------*/
910 :
911 399856 : FOR( i = 0; i < lengthCx; ++i )
912 : {
913 : Word16 scale, reg_fac_fx;
914 267024 : reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
915 267024 : scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
916 805152 : FOR( j = 0; j < lengthCx; ++j )
917 : {
918 538128 : Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
919 538128 : move32();
920 538128 : Kx_reg_inv_e[i + j * lengthCx] = scale;
921 538128 : move16();
922 : }
923 : }
924 :
925 : /*-----------------------------------------------------------------*
926 : * normalization matrix G hat
927 : *-----------------------------------------------------------------*/
928 :
929 : /* Computing Q*Cx*Q' */
930 132832 : matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
931 :
932 132832 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
933 :
934 132832 : Word16 com_e = sub( limit_e, Cy_hat_diag_e );
935 812741 : FOR( i = 0; i < lengthCy; ++i )
936 : {
937 679909 : IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
938 : {
939 679888 : limit_fx = Cy_hat_diag_fx[i];
940 679888 : move32();
941 679888 : limit_e = Cy_hat_diag_e;
942 679888 : move16();
943 : }
944 : }
945 132832 : limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
946 132832 : limit_e = add( limit_e, reg_ghat_e );
947 :
948 132832 : com_e = sub( Cy_hat_diag_e, limit_e );
949 812741 : FOR( i = 0; i < lengthCy; ++i )
950 : {
951 679909 : Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
952 679909 : move16();
953 :
954 679909 : IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) )
955 : {
956 125 : Cy_hat_diag_fx[i] = limit_fx;
957 125 : move32();
958 125 : Cy_hat_diag_buff_e[i] = limit_e;
959 125 : move16();
960 : }
961 :
962 679909 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
963 679909 : exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
964 679909 : L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
965 679909 : G_hat_fx[i] = L_tmp;
966 679909 : move32();
967 679909 : G_hat_buff_e[i] = exp;
968 679909 : move16();
969 : }
970 :
971 : /*-----------------------------------------------------------------*
972 : * Formulate optimal P
973 : *-----------------------------------------------------------------*/
974 :
975 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
976 : Word16 mat_mult_buffer1_fx_e;
977 :
978 : Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
979 :
980 132832 : matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
981 :
982 132832 : matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
983 :
984 132832 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
985 :
986 132832 : IF( LT_16( lengthCx, lengthCy ) )
987 : {
988 132832 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
989 132832 : nL = lengthCy;
990 132832 : move16();
991 132832 : nC = lengthCx;
992 132832 : move16();
993 132832 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
994 : }
995 : ELSE
996 : {
997 0 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
998 0 : nL = lengthCx;
999 0 : move16();
1000 0 : nC = lengthCy;
1001 0 : move16();
1002 0 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1003 : }
1004 :
1005 : /* Actually Processing P */
1006 :
1007 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
1008 : truncates V to num_input_channel columns */
1009 :
1010 132832 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1011 132832 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1012 :
1013 132832 : mat_mult_buffer1_fx_e = 0;
1014 132832 : move16();
1015 132832 : mat_mult_buffer2_e = 0;
1016 132832 : move16();
1017 :
1018 132832 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
1019 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1020 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1021 :
1022 : /************************ Formulate M **********************/
1023 :
1024 132832 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
1025 :
1026 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
1027 :
1028 : Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1029 132832 : set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1030 :
1031 132832 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
1032 :
1033 : /*-----------------------------------------------------------------*
1034 : * Formulate Cr
1035 : *-----------------------------------------------------------------*/
1036 :
1037 : /* Compute Cy_tilde = M*Cx*M' */
1038 : Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
1039 132832 : set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
1040 132832 : matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
1041 :
1042 132832 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
1043 :
1044 132832 : exp = mixing_matrix_fx_e[0];
1045 132832 : move16();
1046 1372788 : FOR( i = 1; i < lengthCy * lengthCx; i++ )
1047 : {
1048 1239956 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1049 : {
1050 97394 : exp = mixing_matrix_fx_e[i];
1051 97394 : move16();
1052 : }
1053 : }
1054 :
1055 1505620 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1056 : {
1057 1372788 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1058 1372788 : move32();
1059 : }
1060 :
1061 132832 : mixing_matrix_e = exp;
1062 132832 : move16();
1063 :
1064 132832 : Cr_p_fx = Cr_fx;
1065 132832 : Cy_p_fx = Cy_fx;
1066 132832 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1067 : Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1068 812741 : FOR( i = 0; i < lengthCy; ++i )
1069 : {
1070 4211008 : FOR( j = 0; j < lengthCy; ++j )
1071 : {
1072 :
1073 3531099 : *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
1074 3531099 : move32();
1075 3531099 : Cr_p_fx++;
1076 3531099 : Cy_p_fx++;
1077 3531099 : Cy_tilde_p_fx++;
1078 : }
1079 :
1080 : /* Avoid Meaningless negative main diagonal elements */
1081 679909 : IF( Cr_fx[i + ( i * lengthCy )] < 0 )
1082 : {
1083 76299 : Cr_fx[i + ( i * lengthCy )] = 0;
1084 76299 : move32();
1085 76299 : Cr_e_arr[i + ( i * lengthCy )] = 0;
1086 76299 : move16();
1087 : }
1088 : }
1089 :
1090 132832 : exp = Cr_e_arr[0];
1091 132832 : move16();
1092 3531099 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1093 : {
1094 3398267 : if ( LT_16( exp, Cr_e_arr[i] ) )
1095 : {
1096 126888 : exp = Cr_e_arr[i];
1097 126888 : move16();
1098 : }
1099 : }
1100 :
1101 3663931 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1102 : {
1103 3531099 : Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
1104 3531099 : move32();
1105 : }
1106 :
1107 132832 : Cr_fx_e = exp;
1108 132832 : move16();
1109 :
1110 132832 : exp = mat_mult_buffer2_fx_e[0];
1111 132832 : move16();
1112 3531099 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1113 : {
1114 3398267 : if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
1115 : {
1116 184646 : exp = mat_mult_buffer2_fx_e[i];
1117 184646 : move16();
1118 : }
1119 : }
1120 :
1121 3663931 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1122 : {
1123 3531099 : mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
1124 3531099 : move32();
1125 : }
1126 :
1127 132832 : mat_mult_buffer2_e = exp;
1128 132832 : move16();
1129 :
1130 : /*-----------------------------------------------------------------*
1131 : * Energy Compensation
1132 : *-----------------------------------------------------------------*/
1133 :
1134 132832 : IF( EQ_16( energy_compensation_flag, 1 ) )
1135 : {
1136 25942 : adj_fx_p = svd_s_buffer_fx;
1137 25942 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1138 :
1139 158372 : FOR( i = 0; i < lengthCy; ++i )
1140 : {
1141 : /* Avoid correction for very small energies,
1142 : main diagonal elements of Cy_tilde_p may be negative */
1143 132430 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
1144 : {
1145 0 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1146 0 : move32();
1147 0 : adj_e[i] = 1;
1148 0 : move16();
1149 : }
1150 : ELSE
1151 : {
1152 132430 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
1153 132430 : exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
1154 132430 : L_tmp = L_deposit_h( tmp );
1155 132430 : L_tmp = Sqrt32( L_tmp, &exp );
1156 132430 : adj_fx_p[i] = L_tmp;
1157 132430 : move32();
1158 132430 : adj_e[i] = exp;
1159 132430 : move16();
1160 : }
1161 :
1162 132430 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
1163 132430 : IF( GT_32( adj_fx_p[i], temp ) )
1164 : {
1165 18 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1166 18 : move32();
1167 18 : adj_e[i] = 3;
1168 18 : move16();
1169 : }
1170 : }
1171 :
1172 25942 : exp = adj_e[0];
1173 25942 : move16();
1174 132430 : FOR( i = 1; i < lengthCy; i++ )
1175 : {
1176 106488 : if ( LT_16( exp, adj_e[i] ) )
1177 : {
1178 1423 : exp = adj_e[i];
1179 1423 : move16();
1180 : }
1181 : }
1182 :
1183 158372 : FOR( i = 0; i < lengthCy; i++ )
1184 : {
1185 132430 : adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
1186 132430 : move32();
1187 : }
1188 25942 : adj_fx_e = exp;
1189 25942 : move16();
1190 :
1191 25942 : diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1192 :
1193 25942 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
1194 25942 : mixing_matrix_e = mat_mult_buffer3_e;
1195 25942 : move16();
1196 : }
1197 :
1198 132832 : *mixing_matrix_out_e = mixing_matrix_e;
1199 132832 : move16();
1200 132832 : *Cr_e = Cr_fx_e;
1201 132832 : move16();
1202 132832 : pop_wmops();
1203 :
1204 132832 : return out;
1205 : }
1206 :
1207 :
1208 : /*-------------------------------------------------------------------*
1209 : * computeMixingMatricesResidual()
1210 : *
1211 : * compute a residual mixing matrix using the covariance synthesis approach
1212 : *-------------------------------------------------------------------*/
1213 :
1214 106890 : Word16 computeMixingMatricesResidual_fx(
1215 : const Word32 num_outputs, /* i : number of output channels */
1216 : const Word32 *Cx_fx, /* i : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e) */
1217 : const Word16 Cx_e,
1218 : const Word32 *Cy_fx, /* i : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
1219 : const Word16 Cy_fx_e,
1220 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values Q(31-reg_Sx_e) */
1221 : const Word16 reg_Sx_e,
1222 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
1223 : const Word16 reg_ghat_e,
1224 : Word32 *mixing_matrix_fx, /* o : resulting residual mixing matrix Q(31-mixing_matrix_ret_e) */
1225 : Word16 *mixing_matrix_ret_e )
1226 : {
1227 : Word16 i, j;
1228 : Word16 out, lengthCx, lengthCy;
1229 106890 : out = EXIT_SUCCESS;
1230 106890 : move16();
1231 106890 : lengthCx = extract_l( num_outputs );
1232 106890 : lengthCy = extract_l( num_outputs );
1233 :
1234 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1235 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1236 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1237 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1238 :
1239 : Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1240 :
1241 106890 : push_wmops( "dirac_cov_mix_mat_r" );
1242 :
1243 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1244 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
1245 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1246 106890 : Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
1247 106890 : move16();
1248 :
1249 106890 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
1250 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1251 : Word32 L_tmp;
1252 : Word16 tmp_e;
1253 : Word16 tmp, scale;
1254 : Word32 div_tmp;
1255 : Word16 exp;
1256 :
1257 : Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
1258 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1259 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1260 : Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
1261 :
1262 : Word32 limit_fx;
1263 : Word16 limit_e;
1264 :
1265 : Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
1266 : Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
1267 :
1268 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1269 : Word16 Cy_hat_diag_e;
1270 :
1271 : Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
1272 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1273 : Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
1274 :
1275 : Word32 *adj_fx_p;
1276 : Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
1277 :
1278 : Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
1279 : Word16 Cy_tilde_e;
1280 :
1281 : /*-----------------------------------------------------------------*
1282 : * Decomposition of Cy
1283 : *-----------------------------------------------------------------*/
1284 :
1285 : /* Processing the SVD */
1286 :
1287 : /* linear array to svd buffer */
1288 106890 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
1289 :
1290 106890 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
1291 :
1292 : /* Computing Ky */
1293 106890 : Word16 max_e = -32;
1294 654369 : FOR( i = 0; i < lengthCy; ++i )
1295 : {
1296 3393068 : FOR( j = 0; j < lengthCy; ++j )
1297 : {
1298 2845589 : tmp_e = svd_s_buffer_e[j];
1299 2845589 : move16();
1300 2845589 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
1301 2845589 : Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
1302 2845589 : move32();
1303 2845589 : Ky_fx_e[i + j * lengthCy] = tmp_e;
1304 2845589 : move16();
1305 2845589 : max_e = s_max( max_e, tmp_e );
1306 : }
1307 : }
1308 :
1309 2952479 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
1310 : {
1311 2845589 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
1312 2845589 : move32();
1313 2845589 : Ky_fx_e[i] = max_e;
1314 2845589 : move16();
1315 : }
1316 :
1317 : /*-----------------------------------------------------------------*
1318 : * Decomposition of Cx
1319 : *-----------------------------------------------------------------*/
1320 :
1321 : /* Processing the SVD of Cx*/
1322 : /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
1323 : * would be just indicating the sorting index, so go straight to Kx as the
1324 : * square root of the diagonal of Cx */
1325 :
1326 : /* Computing Kx */
1327 106890 : max_e = -32;
1328 654369 : FOR( i = 0; i < lengthCx; ++i )
1329 : {
1330 547479 : exp = Cx_e;
1331 547479 : move16();
1332 547479 : Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
1333 547479 : move32();
1334 547479 : Kx_fx_e[i] = exp;
1335 547479 : move16();
1336 547479 : max_e = s_max( max_e, exp );
1337 : }
1338 :
1339 654369 : FOR( i = 0; i < lengthCx; ++i )
1340 : {
1341 547479 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
1342 547479 : move32();
1343 547479 : Kx_fx_e[i] = max_e;
1344 547479 : move16();
1345 : }
1346 :
1347 : /*-----------------------------------------------------------------*
1348 : * Regularization of Sx
1349 : *-----------------------------------------------------------------*/
1350 :
1351 106890 : limit_fx = Kx_fx[0];
1352 106890 : move32();
1353 :
1354 547479 : FOR( i = 1; i < lengthCx; i++ )
1355 : {
1356 440589 : IF( GT_32( Kx_fx[i], limit_fx ) )
1357 : {
1358 165586 : limit_fx = Kx_fx[i];
1359 165586 : move32();
1360 : }
1361 : }
1362 :
1363 106890 : L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
1364 106890 : L_tmp = L_add( L_tmp, EPSILLON_FX );
1365 106890 : limit_fx = L_tmp;
1366 106890 : move16();
1367 106890 : limit_e = add( Kx_fx_e[0], reg_Sx_e );
1368 :
1369 654369 : FOR( i = 0; i < lengthCx; ++i )
1370 : {
1371 547479 : IF( GT_32( Kx_fx[i], L_shl_sat( limit_fx, sub( limit_e, Kx_fx_e[i] ) ) ) )
1372 : {
1373 546080 : div_tmp = Kx_fx[i];
1374 546080 : move32();
1375 546080 : exp = Kx_fx_e[i];
1376 546080 : move16();
1377 : }
1378 : ELSE
1379 : {
1380 1399 : div_tmp = limit_fx;
1381 1399 : move32();
1382 1399 : exp = limit_e;
1383 1399 : move16();
1384 : }
1385 547479 : tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
1386 547479 : scale = add( scale, sub( Q1, exp ) );
1387 :
1388 547479 : Kx_reg_inv_fx[i] = L_deposit_h( tmp );
1389 547479 : move32();
1390 547479 : Kx_reg_inv_e[i] = scale;
1391 547479 : move16();
1392 : }
1393 :
1394 106890 : limit_fx = 0;
1395 106890 : move32();
1396 106890 : limit_e = 0;
1397 106890 : move16();
1398 :
1399 : /*-----------------------------------------------------------------*
1400 : * regularized Kx-1
1401 : *-----------------------------------------------------------------*/
1402 :
1403 : /*-----------------------------------------------------------------*
1404 : * normalization matrix G hat
1405 : *-----------------------------------------------------------------*/
1406 :
1407 : /* Computing Cy_hat_diag */
1408 106890 : Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
1409 106890 : Cy_hat_diag_e = Cx_e;
1410 106890 : move16();
1411 :
1412 106890 : Word16 com_e = sub( limit_e, Cy_hat_diag_e );
1413 654369 : FOR( i = 0; i < lengthCy; ++i )
1414 : {
1415 547479 : IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
1416 : {
1417 547463 : limit_fx = Cy_hat_diag_fx[i];
1418 547463 : move32();
1419 547463 : limit_e = Cy_hat_diag_e;
1420 547463 : move16();
1421 : }
1422 : }
1423 :
1424 106890 : limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
1425 106890 : limit_e = add( limit_e, reg_ghat_e );
1426 :
1427 : /* Computing G_hat */
1428 :
1429 106890 : com_e = sub( Cy_hat_diag_e, limit_e );
1430 654369 : FOR( i = 0; i < lengthCy; ++i )
1431 : {
1432 547479 : Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
1433 547479 : move16();
1434 547479 : IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1435 : {
1436 101 : Cy_hat_diag_fx[i] = limit_fx;
1437 101 : move32();
1438 101 : Cy_hat_diag_fx_e[i] = limit_e;
1439 101 : move16();
1440 : }
1441 547479 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
1442 547479 : scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
1443 547479 : L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
1444 :
1445 547479 : G_hat_fx[i] = L_tmp;
1446 547479 : move32();
1447 547479 : G_hat_e[i] = scale;
1448 547479 : move16();
1449 : }
1450 :
1451 : /*-----------------------------------------------------------------*
1452 : * Formulate optimal P
1453 : *-----------------------------------------------------------------*/
1454 :
1455 654369 : FOR( i = 0; i < num_outputs; i++ )
1456 : {
1457 547479 : Kx_fx[i] = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
1458 547479 : move32();
1459 547479 : Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
1460 547479 : move16();
1461 : }
1462 :
1463 654369 : FOR( i = 0; i < num_outputs; i++ )
1464 : {
1465 : Word32 fac_fx;
1466 547479 : fac_fx = Kx_fx[i];
1467 547479 : move32();
1468 :
1469 3393068 : FOR( j = 0; j < num_outputs; j++ )
1470 : {
1471 2845589 : mat_mult_buffer1_fx[i + j * num_outputs] = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e));
1472 2845589 : move32();
1473 2845589 : mat_mult_buffer1_buff_e[i + j * num_outputs] = add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] );
1474 2845589 : move16();
1475 : }
1476 : }
1477 :
1478 106890 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
1479 106890 : move16();
1480 2845589 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1481 : {
1482 2738699 : if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
1483 : {
1484 29503 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
1485 29503 : move16();
1486 : }
1487 : }
1488 :
1489 2952479 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1490 : {
1491 2845589 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
1492 2845589 : move32();
1493 : }
1494 :
1495 106890 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1496 :
1497 106890 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
1498 :
1499 : /* Actually Processing P */
1500 :
1501 106890 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1502 106890 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1503 106890 : mat_mult_buffer1_e = 0;
1504 106890 : move16();
1505 106890 : mat_mult_buffer2_e = 0;
1506 106890 : move16();
1507 :
1508 106890 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
1509 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1510 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1511 :
1512 : /*-----------------------------------------------------------------*
1513 : * Formulate M
1514 : *-----------------------------------------------------------------*/
1515 :
1516 :
1517 106890 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1518 106890 : set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1519 :
1520 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1521 :
1522 654369 : FOR( i = 0; i < num_outputs; i++ )
1523 : {
1524 : Word32 fac_fx;
1525 547479 : fac_fx = Kx_reg_inv_fx[i];
1526 547479 : move32();
1527 :
1528 3393068 : FOR( j = 0; j < num_outputs; j++ )
1529 : {
1530 2845589 : mixing_matrix_fx[j + i * num_outputs] = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e);
1531 2845589 : move32();
1532 2845589 : mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
1533 2845589 : move16();
1534 : }
1535 : }
1536 :
1537 : /*-----------------------------------------------------------------*
1538 : * Formulate Cr
1539 : *-----------------------------------------------------------------*/
1540 :
1541 : /* Compute Cy_tilde = M*Cx*M' */
1542 :
1543 : Word16 Cx_e_arr[MAX_CICP_CHANNELS];
1544 106890 : set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
1545 :
1546 106890 : matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1547 :
1548 106890 : exp = mixing_matrix_fx_e[0];
1549 106890 : move16();
1550 2845589 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1551 : {
1552 2738699 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1553 : {
1554 11815 : exp = mixing_matrix_fx_e[i];
1555 11815 : move16();
1556 : }
1557 : }
1558 :
1559 2952479 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1560 : {
1561 2845589 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1562 2845589 : move32();
1563 : }
1564 106890 : mixing_matrix_e = exp;
1565 106890 : move16();
1566 :
1567 106890 : exp = mat_mult_buffer1_buff_e[0];
1568 106890 : move16();
1569 2845589 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1570 : {
1571 2738699 : if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
1572 : {
1573 11815 : exp = mat_mult_buffer1_buff_e[i];
1574 11815 : move16();
1575 : }
1576 : }
1577 :
1578 2952479 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1579 : {
1580 2845589 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
1581 2845589 : move32();
1582 : }
1583 106890 : mat_mult_buffer1_e = exp;
1584 106890 : move16();
1585 :
1586 106890 : matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
1587 :
1588 : /*-----------------------------------------------------------------*
1589 : * Energy Compensation
1590 : *-----------------------------------------------------------------*/
1591 :
1592 106890 : adj_fx_p = svd_s_buffer_fx;
1593 :
1594 654369 : FOR( i = 0; i < lengthCy; ++i )
1595 : {
1596 547479 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
1597 547479 : scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
1598 547479 : adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
1599 547479 : move32();
1600 547479 : adj_buff_e[i] = scale;
1601 547479 : move16();
1602 547479 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
1603 547479 : IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
1604 : {
1605 2 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1606 2 : move32();
1607 2 : adj_buff_e[i] = 3;
1608 2 : move16();
1609 : }
1610 : }
1611 :
1612 106890 : adj_e = adj_buff_e[0];
1613 106890 : move16();
1614 :
1615 547479 : FOR( i = 1; i < lengthCy; i++ )
1616 : {
1617 440589 : if ( LT_16( adj_e, adj_buff_e[i] ) )
1618 : {
1619 43195 : adj_e = adj_buff_e[i];
1620 43195 : move16();
1621 : }
1622 : }
1623 :
1624 :
1625 654369 : FOR( i = 0; i < lengthCy; i++ )
1626 : {
1627 547479 : adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
1628 547479 : move32();
1629 : }
1630 :
1631 106890 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1632 106890 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
1633 106890 : *mixing_matrix_ret_e = mat_mult_buffer3_e;
1634 106890 : move16();
1635 :
1636 106890 : pop_wmops();
1637 :
1638 106890 : return out;
1639 : }
1640 :
1641 :
1642 : /*-------------------------------------------------------------------*
1643 : * computeMixingMatricesISM()
1644 : *
1645 : *
1646 : *-------------------------------------------------------------------*/
1647 315180 : Word16 computeMixingMatricesISM_fx(
1648 : const Word16 num_inputs,
1649 : const Word16 num_responses,
1650 : const Word16 num_outputs,
1651 : const Word32 *responses_fx, /*Q(31-responses_e) */
1652 : const Word16 responses_e,
1653 : const Word32 *ener_fx, /*Q(31-ener_e) */
1654 : const Word16 ener_e,
1655 : const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
1656 : const Word16 Cx_diag_e,
1657 : const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
1658 : const Word16 Cy_diag_e,
1659 : const Word16 *Q_16fx, // Q15
1660 : const Word16 energy_compensation_flag,
1661 : const Word32 reg_Sx_fx, /*Q0*/
1662 : const Word32 reg_ghat_fx, /*Q0*/
1663 : Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
1664 : Word16 *mixing_matrix_e )
1665 : {
1666 : Word16 i, out;
1667 : Word16 lengthCx, lengthCy;
1668 : Word16 nL, nC;
1669 :
1670 : Word32 *Cy_tilde_p_fx;
1671 : Word32 *adj_fx;
1672 : Word32 limit_fx;
1673 :
1674 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1675 : Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1676 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1677 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1678 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1679 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1680 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
1681 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1682 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
1683 : Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
1684 :
1685 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1686 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1687 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
1688 : Word16 svd_s_buffer_fx_e;
1689 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1690 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1691 :
1692 : Word16 temp_e[MAX_OUTPUT_CHANNELS];
1693 315180 : push_wmops( "dirac_cov_mix_mat" );
1694 :
1695 315180 : out = EXIT_SUCCESS;
1696 315180 : move16();
1697 315180 : lengthCx = num_inputs;
1698 315180 : move16();
1699 315180 : lengthCy = num_outputs;
1700 315180 : move16();
1701 :
1702 6073020 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1703 : {
1704 5757840 : IF( EQ_16( Q_16fx[i], MAX_16 ) )
1705 : {
1706 2575200 : Q_fx[i] = MAX_32;
1707 2575200 : move32();
1708 : }
1709 : ELSE
1710 : {
1711 3182640 : Q_fx[i] = L_deposit_h( Q_16fx[i] );
1712 3182640 : move32();
1713 : }
1714 : }
1715 315180 : Q_e = 0;
1716 315180 : move16();
1717 315180 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
1718 5358060 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
1719 : {
1720 5042880 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1721 5042880 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1722 5042880 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1723 : }
1724 :
1725 : /* Decomposition of Cy = Ky*Ky' */
1726 : /* Ky = responses*diag(ener) */
1727 315180 : matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
1728 :
1729 : /* Decomposition of Cx -> Computing Kx */
1730 315180 : set16_fx( temp_e, Cx_diag_e, lengthCx );
1731 315180 : v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
1732 315180 : Kx_e = temp_e[0];
1733 315180 : move16();
1734 630360 : FOR( i = 1; i < lengthCx; i++ )
1735 : {
1736 315180 : Kx_e = s_max( Kx_e, temp_e[i] );
1737 : }
1738 945540 : FOR( i = 0; i < lengthCx; i++ )
1739 : {
1740 630360 : Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
1741 630360 : move32();
1742 : }
1743 :
1744 : /* Regularization of Sx */
1745 315180 : maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
1746 315180 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
1747 :
1748 945540 : FOR( i = 0; i < lengthCx; ++i )
1749 : {
1750 630360 : IF( GT_32( Kx_fx[i], limit_fx ) )
1751 : {
1752 595269 : svd_s_buffer_fx[i] = Kx_fx[i];
1753 595269 : move32();
1754 : }
1755 : ELSE
1756 : {
1757 35091 : svd_s_buffer_fx[i] = limit_fx;
1758 35091 : move32();
1759 : }
1760 : }
1761 315180 : svd_s_buffer_fx_e = Kx_e;
1762 315180 : move16();
1763 :
1764 315180 : limit_fx = 0;
1765 315180 : move32();
1766 :
1767 : /* regularized Kx-1 */
1768 :
1769 945540 : FOR( i = 0; i < lengthCx; ++i )
1770 : {
1771 630360 : IF( svd_s_buffer_fx[i] )
1772 : {
1773 : Word32 reg_fac;
1774 630354 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
1775 630354 : Kx_reg_inv_fx[i] = reg_fac;
1776 630354 : move32();
1777 630354 : temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
1778 630354 : move16();
1779 : }
1780 : ELSE
1781 : {
1782 : Word32 reg_fac;
1783 6 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
1784 6 : Kx_reg_inv_fx[i] = reg_fac;
1785 6 : move32();
1786 6 : temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
1787 6 : move16();
1788 : }
1789 : }
1790 315180 : Kx_reg_inv_e = temp_e[0];
1791 315180 : move16();
1792 630360 : FOR( i = 1; i < lengthCx; i++ )
1793 : {
1794 315180 : Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
1795 : }
1796 945540 : FOR( i = 0; i < lengthCx; i++ )
1797 : {
1798 630360 : Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
1799 630360 : move32();
1800 : }
1801 :
1802 : /************************ normalization matrix G hat **********************/
1803 :
1804 : /* Computing Q*Cx*Q' */
1805 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1806 : Word16 Cy_hat_diag_e;
1807 :
1808 315180 : matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
1809 :
1810 315180 : Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
1811 :
1812 6073020 : FOR( i = 0; i < lengthCy * lengthCx; ++i )
1813 : {
1814 5757840 : IF( GT_16( Q_Cx_e, Q_e ) )
1815 : {
1816 5757782 : Q_fx[i] = L_shr( Q_fx[i], guard_bits );
1817 5757782 : move32();
1818 : }
1819 : ELSE
1820 : {
1821 58 : Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
1822 58 : move32();
1823 : }
1824 : }
1825 :
1826 315180 : IF( GT_16( Q_Cx_e, Q_e ) )
1827 : {
1828 315177 : Q_e = add( Q_e, guard_bits );
1829 : }
1830 : ELSE
1831 : {
1832 3 : Q_Cx_e = add( Q_Cx_e, guard_bits );
1833 : }
1834 315180 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
1835 :
1836 : /* Computing Cy_hat_diag */
1837 3194100 : FOR( i = 0; i < lengthCy; ++i )
1838 : {
1839 2878920 : if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
1840 : {
1841 467716 : limit_fx = Cy_hat_diag_fx[i];
1842 467716 : move32();
1843 : }
1844 : }
1845 :
1846 315180 : limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
1847 :
1848 : /* Computing G_hat */
1849 3194100 : FOR( i = 0; i < lengthCy; ++i )
1850 : {
1851 2878920 : if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1852 : {
1853 12557 : Cy_hat_diag_fx[i] = limit_fx;
1854 12557 : move32();
1855 : }
1856 2878920 : IF( Cy_diag_fx[i] )
1857 : {
1858 1875780 : IF( Cy_hat_diag_fx[i] )
1859 : {
1860 1875780 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
1861 1875780 : move32();
1862 1875780 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
1863 1875780 : move16();
1864 1875780 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1865 1875780 : move32();
1866 : }
1867 : ELSE
1868 : {
1869 0 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1870 0 : move32();
1871 0 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1872 0 : move16();
1873 0 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1874 0 : move32();
1875 : }
1876 : }
1877 : ELSE
1878 : {
1879 1003140 : G_hat_fx[i] = 0;
1880 1003140 : move32();
1881 1003140 : temp_e[i] = 0;
1882 1003140 : move16();
1883 : }
1884 : }
1885 315180 : G_hat_e = temp_e[0];
1886 315180 : move16();
1887 2878920 : FOR( i = 1; i < lengthCy; i++ )
1888 : {
1889 2563740 : G_hat_e = s_max( G_hat_e, temp_e[i] );
1890 : }
1891 3194100 : FOR( i = 0; i < lengthCy; i++ )
1892 : {
1893 2878920 : G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
1894 2878920 : move32();
1895 : }
1896 :
1897 : /************************ Formulate optimal P **********************/
1898 :
1899 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
1900 315180 : diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1901 :
1902 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1903 :
1904 315180 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1905 :
1906 315180 : IF( LT_16( lengthCx, num_responses ) )
1907 : {
1908 3600 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
1909 :
1910 3600 : nL = num_responses;
1911 3600 : move16();
1912 3600 : nC = lengthCx;
1913 3600 : move16();
1914 3600 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1915 : }
1916 : ELSE
1917 : {
1918 311580 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
1919 :
1920 311580 : nL = lengthCx;
1921 311580 : move16();
1922 311580 : nC = num_responses;
1923 311580 : move16();
1924 311580 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1925 : }
1926 :
1927 : /* Actually Processing P */
1928 :
1929 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
1930 315180 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
1931 315180 : mat_mult_buffer1_e = 0;
1932 315180 : move16();
1933 315180 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1934 315180 : mat_mult_buffer2_e = 0;
1935 315180 : move16();
1936 :
1937 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1938 :
1939 : /************************ Formulate M **********************/
1940 :
1941 315180 : matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1942 :
1943 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
1944 :
1945 : /*********************** Energy Compensation ****************/
1946 :
1947 : /* Compute Cy_tilde = M*Cx*M' */
1948 315180 : matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1949 :
1950 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1951 :
1952 315180 : IF( EQ_16( energy_compensation_flag, 1 ) )
1953 : {
1954 315180 : adj_fx = svd_s_buffer_fx;
1955 315180 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1956 3194100 : FOR( i = 0; i < lengthCy; ++i )
1957 : {
1958 : /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
1959 2878920 : IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
1960 : {
1961 0 : adj_fx[i] = MAX_32;
1962 0 : move32();
1963 0 : temp_e[i] = 0;
1964 0 : move16();
1965 : }
1966 : ELSE
1967 : {
1968 2878920 : IF( Cy_diag_fx[i] )
1969 : {
1970 1875780 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
1971 : {
1972 1875751 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
1973 1875751 : move32();
1974 1875751 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
1975 1875751 : move16();
1976 1875751 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1977 1875751 : move32();
1978 : }
1979 : ELSE
1980 : {
1981 29 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1982 29 : move32();
1983 29 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1984 29 : move16();
1985 29 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
1986 29 : move32();
1987 : }
1988 : }
1989 : ELSE
1990 : {
1991 1003140 : adj_fx[i] = 0;
1992 1003140 : move32();
1993 1003140 : temp_e[i] = 0;
1994 1003140 : move16();
1995 : }
1996 : }
1997 :
1998 2878920 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
1999 2878920 : IF( GT_32( adj_fx[i], temp ) )
2000 : {
2001 1613 : adj_fx[i] = MAX_32;
2002 1613 : move32();
2003 1613 : temp_e[i] = 2;
2004 1613 : move16();
2005 : }
2006 : }
2007 315180 : adj_e = temp_e[0];
2008 315180 : move16();
2009 2878920 : FOR( i = 1; i < lengthCy; i++ )
2010 : {
2011 2563740 : adj_e = s_max( adj_e, temp_e[i] );
2012 : }
2013 3194100 : FOR( i = 0; i < lengthCy; i++ )
2014 : {
2015 2878920 : adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
2016 2878920 : move32();
2017 : }
2018 :
2019 315180 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
2020 :
2021 315180 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
2022 315180 : *mixing_matrix_e = mat_mult_buffer3_e;
2023 315180 : move16();
2024 : }
2025 :
2026 315180 : pop_wmops();
2027 :
2028 315180 : return out;
2029 : }
|