Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #define MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
34 :
35 : #include <stdint.h>
36 : #include <string.h>
37 : #include <stdio.h>
38 : #include <stdlib.h>
39 : #include <assert.h>
40 : #include <math.h>
41 : #include "options.h"
42 : #include "cnst.h"
43 : #include "rom_enc.h"
44 : #include "rom_com.h"
45 : #include "prot_fx.h"
46 : #include "ivas_stat_dec.h"
47 : #include "ivas_cnst.h"
48 : #include "ivas_rom_com.h"
49 : #include "ivas_rom_dec.h"
50 : #include "wmc_auto.h"
51 : #include "rom_dec.h"
52 : #include "ivas_prot_fx.h"
53 :
54 :
55 : /*-------------------------------------------------------------------*
56 : * ivas_dirac_dec_output_synthesis_cov_open()
57 : *
58 : * Sets up the state and parameters for the Covariance Synthesis
59 : *-------------------------------------------------------------------*/
60 :
61 297 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
62 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
63 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state */
64 : const Word16 max_band_decorr, /* i : uppermost frequency band where decorrelation is applied */
65 : const Word16 interp_length, /* i : length for interpolating the mixing matrices in time slots */
66 : const Word16 num_param_bands, /* i : number of parameter bands */
67 : const Word16 num_param_bands_residual, /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
68 : const Word16 nchan_in, /* i : number of input (transport) channels */
69 : const Word16 nchan_out, /* i : number of output channels */
70 : const Word32 *proto_matrix /* i : the prototype (upmix) matrix (only used if mode == 1) Q(15-proto_matrix_e) */
71 : )
72 : {
73 : Word16 idx;
74 :
75 297 : h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
76 297 : move16();
77 :
78 : /*-----------------------------------------------------------------*
79 : * memory allocation
80 : *-----------------------------------------------------------------*/
81 :
82 : /* buffer length and interpolator */
83 297 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
84 297 : IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
85 : {
86 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
87 : }
88 297 : h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
89 297 : move16();
90 : /* cov buffers */
91 3803 : FOR( idx = 0; idx < num_param_bands; idx++ )
92 : {
93 3506 : IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
94 : {
95 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
96 : }
97 3506 : h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
98 3506 : IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
99 : {
100 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
101 : }
102 3506 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
103 : {
104 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
105 : }
106 3506 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
107 3506 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
108 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
109 :
110 3506 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
111 : {
112 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
113 : }
114 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
115 3506 : h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
116 3506 : move16();
117 : }
118 14611 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
119 : {
120 14314 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
121 14314 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
122 14314 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
123 14314 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
124 : }
125 :
126 3053 : FOR( idx = 0; idx < num_param_bands_residual; idx++ )
127 : {
128 2756 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
129 : {
130 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
131 : }
132 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
133 :
134 2756 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
135 : {
136 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
137 : }
138 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
139 2756 : h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
140 2756 : move16();
141 : }
142 15361 : FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
143 : {
144 15064 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
145 15064 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
146 : }
147 :
148 297 : IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
149 : {
150 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
151 : }
152 297 : IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
153 : {
154 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
155 : }
156 :
157 297 : set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
158 297 : set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
159 :
160 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
161 : {
162 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
163 : }
164 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
165 :
166 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
167 : {
168 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
169 : }
170 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
171 :
172 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
173 : {
174 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
175 : }
176 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
177 :
178 297 : IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
179 : {
180 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
181 : }
182 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
183 :
184 : /*-----------------------------------------------------------------*
185 : * prepare processing parameters
186 : *-----------------------------------------------------------------*/
187 :
188 : /* compute interpolator */
189 297 : IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
190 : {
191 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
192 : }
193 9801 : FOR( idx = 1; idx <= interp_length; ++idx )
194 : {
195 9504 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
196 9504 : move16();
197 : }
198 297 : Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
199 297 : h_dirac_output_synthesis_params->proto_matrix_e = 5;
200 297 : move16();
201 297 : return IVAS_ERR_OK;
202 : }
203 :
204 :
205 : /*-------------------------------------------------------------------*
206 : * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
207 : *
208 : *
209 : *-------------------------------------------------------------------*/
210 :
211 318383 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
212 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */
213 : const UWord16 interp_length /* i : interpolator length */
214 : )
215 : {
216 : Word16 idx;
217 318383 : Word16 tmp, exp_diff = 0;
218 318383 : move16();
219 :
220 1583648 : FOR( idx = 1; idx <= interp_length; ++idx )
221 : {
222 1265265 : tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
223 1265265 : h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff ); // Q15
224 1265265 : move16();
225 : }
226 :
227 318383 : return;
228 : }
229 :
230 :
231 : /*-------------------------------------------------------------------*
232 : * ivas_dirac_dec_output_synthesis_cov_init()
233 : *
234 : * initialize the states for the covariance synthesis
235 : *-------------------------------------------------------------------*/
236 297 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
237 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis */
238 : const Word16 nchan_in, /* i : number of input (tranport) channels */
239 : const Word16 nchan_out, /* i : number of output channels */
240 : const Word16 n_param_bands, /* i : number of total parameter bands */
241 : const Word16 n_param_bands_res /* i : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
242 : )
243 : {
244 :
245 : Word16 idx;
246 :
247 : /* initialize buffers */
248 3803 : FOR( idx = 0; idx < n_param_bands; idx++ )
249 : {
250 3506 : set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
251 3506 : set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
252 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
253 3506 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
254 : }
255 :
256 3053 : FOR( idx = 0; idx < n_param_bands_res; idx++ )
257 : {
258 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
259 2756 : set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
260 : }
261 :
262 :
263 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
264 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
265 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
266 297 : set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
267 :
268 297 : return;
269 : }
270 :
271 :
272 : /*-------------------------------------------------------------------*
273 : * ivas_dirac_dec_output_synthesis_cov_close()
274 : *
275 : * deallocate dynamic memory in the covariance synthesis state
276 : *-------------------------------------------------------------------*/
277 :
278 297 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
279 : DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i : handle for the covariance synthesis parameters */
280 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state */
281 : )
282 : {
283 : Word16 idx;
284 :
285 : /*-----------------------------------------------------------------*
286 : * memory deallocation
287 : *-----------------------------------------------------------------*/
288 :
289 : /* free interpolator */
290 297 : IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
291 : {
292 297 : free( h_dirac_output_synthesis_params->interpolator_fx );
293 297 : h_dirac_output_synthesis_params->interpolator_fx = NULL;
294 : }
295 :
296 : /* free alpha */
297 297 : IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
298 : {
299 0 : free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
300 0 : h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
301 : }
302 :
303 : /* free proto_matrix */
304 297 : IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
305 : {
306 297 : free( h_dirac_output_synthesis_params->proto_matrix_fx );
307 297 : h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
308 : }
309 :
310 :
311 297 : IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
312 : {
313 297 : free( h_dirac_output_synthesis_state->cx_old_e );
314 297 : h_dirac_output_synthesis_state->cx_old_e = NULL;
315 : }
316 297 : IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
317 : {
318 297 : free( h_dirac_output_synthesis_state->cy_old_e );
319 297 : h_dirac_output_synthesis_state->cy_old_e = NULL;
320 : }
321 :
322 : /* free cov buffers */
323 18117 : FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
324 : {
325 17820 : IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
326 : {
327 3506 : free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
328 3506 : h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
329 : }
330 :
331 17820 : IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
332 : {
333 3506 : free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
334 3506 : h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
335 : }
336 :
337 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
338 : {
339 3506 : free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
340 3506 : h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
341 : }
342 :
343 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
344 : {
345 2756 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
346 2756 : h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
347 : }
348 :
349 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
350 : {
351 3506 : free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
352 3506 : h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
353 : }
354 :
355 17820 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
356 : {
357 2756 : free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
358 2756 : h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
359 : }
360 : }
361 :
362 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
363 : {
364 297 : free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
365 297 : h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
366 : }
367 :
368 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
369 : {
370 297 : free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
371 297 : h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
372 : }
373 :
374 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
375 : {
376 297 : free( h_dirac_output_synthesis_state->mixing_matrix_exp );
377 297 : h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
378 : }
379 :
380 297 : IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
381 : {
382 297 : free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
383 297 : h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
384 : }
385 297 : return;
386 : }
387 :
388 : /*-------------------------------------------------------------------*
389 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
390 : *
391 : * collect the multi channel input covariance for one filter bank time slot
392 : *-------------------------------------------------------------------*/
393 :
394 : /*-------------------------------------------------------------------*
395 : * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
396 : *
397 : * collect the multi channel input covariance for one filter bank time slot
398 : *-------------------------------------------------------------------*/
399 1776450 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
400 : Word32 *RealBuffer_fx, /* i : input channel filter bank samples (real part) Q(31- RealBuffer_e)*/
401 : Word16 RealBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
402 : Word32 *ImagBuffer_fx, /* i : input channel filter bank samples (imaginary part Q(ImagBuffer_e)*/
403 : Word16 ImagBuffer_e, /* i : exponent input channel filter bank samples (real part)*/
404 : Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (real part) Q(31- cx_e)*/
405 : Word16 *cx_e, /* i : exponent for accumulated input covariance (real part) */
406 : Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o : accumulated input covariance (imaginary part) Q(31- cx_imag_e)*/
407 : Word16 *cx_imag_e, /* i : exponent accumulated input covariance (imag part) */
408 : PARAM_MC_DEC_HANDLE hParamMC, /* i : handle to Parametric MC state */
409 : const Word16 param_band, /* i : parameter band */
410 : const Word16 nchan_in /* i : number of input channels */
411 : )
412 : {
413 : Word16 band_idx, ch_idx;
414 : Word16 brange[2];
415 : Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
416 : Word16 real_in_e;
417 : Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
418 : Word16 imag_in_e;
419 : Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
420 : Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
421 : Word16 output_e;
422 : Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
423 : Word16 band, num_bands;
424 : Word16 cx_fx_norm, cx_imag_fx_norm;
425 : /* estimate input covariance */
426 : /* Already stack here instead of in the process_subframe */
427 :
428 : /* collect input frame */
429 1776450 : brange[0] = hParamMC->band_grouping[param_band];
430 1776450 : move16();
431 1776450 : brange[1] = hParamMC->band_grouping[param_band + 1];
432 1776450 : move16();
433 1776450 : num_bands = sub( brange[1], brange[0] );
434 :
435 9379450 : FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
436 : {
437 7603000 : band = add( brange[0], band_idx );
438 22843320 : FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
439 : {
440 15240320 : real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
441 15240320 : move32();
442 15240320 : imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
443 15240320 : move32();
444 : }
445 : }
446 :
447 1776450 : real_in_e = RealBuffer_e;
448 1776450 : move16();
449 1776450 : imag_in_e = ImagBuffer_e;
450 1776450 : move16();
451 :
452 1776450 : Word16 buf_len = imult1616( num_bands, nchan_in );
453 :
454 1776450 : shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
455 1776450 : shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
456 :
457 1776450 : real_in_e = sub( real_in_e, shift_real );
458 1776450 : imag_in_e = sub( imag_in_e, shift_imag );
459 :
460 :
461 1776450 : output_e = s_max( real_in_e, imag_in_e );
462 :
463 1776450 : scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
464 1776450 : scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
465 :
466 1776450 : cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
467 1776450 : v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
468 :
469 1776450 : v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
470 :
471 1776450 : cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
472 1776450 : cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
473 :
474 1776450 : scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
475 1776450 : scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
476 :
477 1776450 : *cx_e = sub( tmp1_e, cx_fx_norm );
478 1776450 : move16();
479 1776450 : *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
480 1776450 : move16();
481 :
482 1776450 : return;
483 : }
484 :
485 : /*-------------------------------------------------------------------*
486 : * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
487 : *
488 : * synthesize one filter bank slot of multi channel output filter bank
489 : * samples with the covariance synthesis
490 : *-------------------------------------------------------------------*/
491 :
492 140160 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
493 : Word32 *Cldfb_RealBuffer_in_fx, /*Q6*/
494 : Word32 *Cldfb_ImagBuffer_in_fx, /*Q6*/
495 : Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (real part) Q6*/
496 : Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o : output channel filter bank samples (imaginary part) Q6*/
497 : Word32 *mixing_matrix_fx[], /* i : parameter band wise mixing matrices (direct part) Q(31-mixing_matrix_e)*/
498 : Word16 *mixing_matrix_e, /* i : parameter band wise mixing matrices (direct part) */
499 : Word32 *mixing_matrix_res_fx[], /* i : parameter band wise mixing matrices (residual part) Q(31-mixing_matrix_res_e)*/
500 : Word16 *mixing_matrix_res_e, /* i : parameter band wise mixing matrices (residual part) */
501 : const UWord16 slot_idx_sfr, /* i : time slot index for the current slot within the current subframe */
502 : const UWord16 slot_idx_tot, /* i : time slot index for the current slot within the frame */
503 : const Word16 nX, /* i : number of input channels */
504 : const Word16 nY, /* i : number of output channels */
505 : PARAM_MC_DEC_HANDLE hParamMC /* i : handle to the Parametric MC decoder state */
506 : )
507 : {
508 : Word16 param_band_idx, band, ch_idx;
509 : Word16 have_residual;
510 : Word16 brange[2];
511 : DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
512 : Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
513 : Word16 mixing_matrix_smooth_e;
514 : Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
515 140160 : Word16 mixing_matrix_res_smooth_e = 0;
516 140160 : move16();
517 : Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
518 : Word16 mixing_matrix_buffer_e;
519 : Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
520 : Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
521 : #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
522 :
523 : #else
524 : Word32 output_f_real_fx[MAX_CICP_CHANNELS];
525 : Word32 output_f_imag_fx[MAX_CICP_CHANNELS];
526 : Word16 output_f_real_e;
527 : Word16 output_f_imag_e;
528 : #endif
529 : Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
530 : Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
531 :
532 140160 : h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
533 :
534 140160 : set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
535 140160 : set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
536 : #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
537 :
538 : #else
539 : set_zero_fx( output_f_real_fx, MAX_CICP_CHANNELS );
540 : set_zero_fx( output_f_imag_fx, MAX_CICP_CHANNELS );
541 : #endif
542 140160 : set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
543 140160 : set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
544 :
545 1950080 : FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
546 : {
547 : /* final mixing */
548 1809920 : have_residual = 0;
549 1809920 : move16();
550 1809920 : brange[0] = hParamMC->band_grouping[param_band_idx];
551 1809920 : move16();
552 1809920 : brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
553 1809920 : move16();
554 :
555 1809920 : if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
556 : {
557 1454400 : have_residual = 1;
558 1454400 : move16();
559 : }
560 :
561 1809920 : v_multc_fixed( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
562 1809920 : mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
563 1809920 : move16();
564 :
565 1809920 : v_multc_fixed( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
566 1809920 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
567 1809920 : move16();
568 :
569 1809920 : v_add_fixed_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
570 :
571 1809920 : IF( have_residual )
572 : {
573 : /* residual mixing matrix interpolation*/
574 :
575 1454400 : v_multc_fixed( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
576 1454400 : mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
577 1454400 : move16();
578 :
579 1454400 : set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
580 1454400 : v_multc_fixed( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
581 1454400 : mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
582 1454400 : move16();
583 :
584 1454400 : v_add_fixed_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
585 : }
586 :
587 :
588 9553920 : FOR( band = brange[0]; band < brange[1]; band++ )
589 : {
590 7744000 : assert( band >= 0 );
591 :
592 7744000 : IF( have_residual )
593 : {
594 : /* collect diffuse prototypes */
595 2803200 : assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
596 19833600 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
597 : {
598 17030400 : diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
599 17030400 : move32();
600 17030400 : diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
601 17030400 : move32();
602 : }
603 :
604 : /* apply residual mixing */
605 : #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
606 : {
607 : Word16 shifter;
608 :
609 2803200 : shifter = 31 - mixing_matrix_res_smooth_e;
610 19833600 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
611 : {
612 : int i;
613 : Word16 idx;
614 : Word64 temp_real, temp_imag;
615 :
616 :
617 17030400 : idx = ch_idx;
618 17030400 : temp_real = 0;
619 17030400 : temp_imag = 0;
620 17030400 : move64();
621 17030400 : move64();
622 121209600 : for ( i = 0; i < nY; i++ )
623 : {
624 104179200 : temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] ) );
625 104179200 : temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) );
626 104179200 : idx += nY;
627 : }
628 17030400 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) );
629 17030400 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) );
630 : }
631 : }
632 :
633 :
634 : #else
635 :
636 : matrix_product_mant_exp_fx( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, nY, nY, 0, diff_f_real_fx, 25, nY, 1, 0, output_f_real_fx, &output_f_real_e );
637 : scale_sig32( output_f_real_fx, nY, sub( Q6, sub( Q31, output_f_real_e ) ) ); // Q6
638 :
639 :
640 : matrix_product_mant_exp_fx( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, nY, nY, 0, diff_f_imag_fx, 25, nY, 1, 0, output_f_imag_fx, &output_f_imag_e );
641 : scale_sig32( output_f_imag_fx, nY, sub( Q6, sub( Q31, output_f_imag_e ) ) ); // Q6
642 :
643 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
644 : {
645 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = output_f_real_fx[ch_idx]; // Q6
646 : move32();
647 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = output_f_imag_fx[ch_idx]; // Q6
648 : move32();
649 : }
650 : #endif
651 : }
652 : ELSE
653 : {
654 34969600 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
655 : {
656 30028800 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
657 30028800 : move32();
658 30028800 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
659 30028800 : move32();
660 : }
661 : }
662 :
663 : /* collect input signals, still in cldfb buffers */
664 23270400 : FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
665 : {
666 :
667 15526400 : input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
668 15526400 : move32();
669 15526400 : input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
670 15526400 : move32();
671 : }
672 :
673 : /* apply mixing matrix */
674 : #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
675 : {
676 : Word16 shifter;
677 7744000 : shifter = 31 - mixing_matrix_smooth_e;
678 :
679 54803200 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
680 : {
681 : int i;
682 : Word16 idx;
683 : Word64 temp_real, temp_imag;
684 :
685 :
686 47059200 : idx = ch_idx;
687 47059200 : temp_real = 0;
688 47059200 : temp_imag = 0;
689 47059200 : move64();
690 47059200 : move64();
691 141638400 : for ( i = 0; i < nX; i++ )
692 : {
693 94579200 : temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
694 94579200 : temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
695 94579200 : idx += nY;
696 : }
697 47059200 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
698 47059200 : move32();
699 47059200 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
700 47059200 : move32();
701 : }
702 : }
703 : #else
704 :
705 : matrix_product_mant_exp_fx( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, nY, nX, 0, input_f_real_fx, 25, nX, 1, 0, output_f_real_fx, &output_f_real_e );
706 : scale_sig32( output_f_real_fx, MAX_CICP_CHANNELS, sub( 6, sub( 31, output_f_real_e ) ) ); // Q6
707 :
708 :
709 : matrix_product_mant_exp_fx( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, nY, nX, 0, input_f_imag_fx, 25, nX, 1, 0, output_f_imag_fx, &output_f_imag_e );
710 : scale_sig32( output_f_imag_fx, MAX_CICP_CHANNELS, sub( 6, sub( 31, output_f_imag_e ) ) ); // Q6
711 :
712 : /* collect output */
713 : FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
714 : {
715 : Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], output_f_real_fx[ch_idx] );
716 : move32();
717 : Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], output_f_imag_fx[ch_idx] );
718 : move32();
719 : }
720 : #endif
721 : }
722 : }
723 :
724 140160 : return;
725 : }
726 :
727 : /*-------------------------------------------------------------------*
728 : * computeMixingMatrices()
729 : *
730 : * compute a mixing matrix using the convariance synthesis approach
731 : *-------------------------------------------------------------------*/
732 :
733 113120 : Word16 computeMixingMatrices_fx(
734 : const Word16 num_inputs, /* i : number of input channels */
735 : const Word16 num_outputs, /* i : number of output channels */
736 : const Word32 *Cx, /* i : input channel covariance matrix Q(31-Cx_e) */
737 : Word16 Cx_e,
738 : const Word32 *Cy, /* i : target covariance matrix Q(31-Cy_e) */
739 : Word16 Cy_e,
740 : const Word32 *Q, /* i : prototype matrix (usually a upmix matrix) Q_fx_e */
741 : Word16 Q_fx_e,
742 : const Word16 energy_compensation_flag, /* i : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
743 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values */
744 : Word16 reg_Sx_e,
745 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
746 : Word16 reg_ghat_e,
747 : Word32 *mixing_matrix_fx, /* o : resulting mixing matrix Q(31-mixing_matrix_out_e) */
748 : Word16 *mixing_matrix_out_e,
749 : Word32 *Cr_fx, /* o : residual covariance matrix Q(31-Cr_e) */
750 : Word16 *Cr_e )
751 : {
752 : Word16 i, j;
753 : Word16 out;
754 : Word16 nL, nC;
755 113120 : Word16 lengthCx = num_inputs;
756 113120 : Word16 lengthCy = num_outputs;
757 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
758 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
759 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
760 : Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
761 : Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
762 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
763 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
764 : Word16 Cx_fx_e;
765 : Word16 Cy_fx_e;
766 :
767 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
768 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
769 :
770 :
771 : Word32 limit_fx;
772 : Word16 limit_e;
773 :
774 : Word32 L_tmp;
775 : Word16 tmp_e, tmp, exp;
776 :
777 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
778 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
779 :
780 : Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
781 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
782 :
783 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
784 : Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
785 :
786 : Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
787 : Word16 Q_e;
788 :
789 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
790 : Word16 Q_Cx_e;
791 :
792 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
793 : Word16 Cy_hat_diag_e;
794 : Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
795 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
796 : Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
797 :
798 : Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
799 :
800 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
801 :
802 : Word16 mixing_matrix_e;
803 :
804 : Word16 Cr_fx_e;
805 :
806 :
807 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
808 : Word16 adj_e[MAX_OUTPUT_CHANNELS];
809 : Word32 *adj_fx_p;
810 : Word16 adj_fx_e;
811 :
812 : Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
813 113120 : push_wmops( "dirac_cov_mix_mat" );
814 :
815 113120 : out = EXIT_SUCCESS;
816 113120 : move16();
817 :
818 113120 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
819 1923040 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
820 : {
821 1809920 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
822 1809920 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
823 1809920 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
824 : }
825 :
826 :
827 113120 : Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
828 113120 : Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
829 113120 : Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
830 :
831 113120 : Q_e = Q_fx_e;
832 113120 : move16();
833 113120 : Cx_fx_e = Cx_e;
834 113120 : move16();
835 113120 : Cy_fx_e = Cy_e;
836 113120 : move16();
837 :
838 :
839 : /*-----------------------------------------------------------------*
840 : * Decomposition of Cy
841 : *-----------------------------------------------------------------*/
842 :
843 : /* Processing the SVD */
844 :
845 113120 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
846 :
847 113120 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
848 113120 : Word16 max_e = -32;
849 : /* Computing Ky */
850 688444 : FOR( i = 0; i < lengthCy; ++i )
851 : {
852 3533968 : FOR( j = 0; j < lengthCy; ++j )
853 : {
854 2958644 : tmp_e = svd_s_buffer_e[j];
855 2958644 : move16();
856 2958644 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
857 2958644 : Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
858 2958644 : move32();
859 2958644 : Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
860 2958644 : move16();
861 2958644 : max_e = s_max( max_e, tmp_e );
862 : }
863 : }
864 3071764 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
865 : {
866 2958644 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
867 2958644 : move32();
868 2958644 : Ky_fx_e[i] = max_e;
869 2958644 : move16();
870 : }
871 :
872 : /*-----------------------------------------------------------------*
873 : * Decomposition of Cx
874 : *-----------------------------------------------------------------*/
875 :
876 : /* Processing the SVD */
877 :
878 113120 : mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
879 :
880 113120 : svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
881 113120 : max_e = -32;
882 339920 : FOR( i = 0; i < lengthCx; ++i )
883 : {
884 682080 : FOR( j = 0; j < lengthCx; ++j )
885 : {
886 455280 : tmp_e = svd_s_buffer_e[j];
887 455280 : move16();
888 455280 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
889 455280 : Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
890 455280 : move32();
891 455280 : Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
892 455280 : move16();
893 455280 : max_e = s_max( max_e, tmp_e );
894 : }
895 : }
896 568400 : FOR( i = 0; i < lengthCx * lengthCx; ++i )
897 : {
898 455280 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
899 455280 : move32();
900 455280 : Kx_fx_e[i] = max_e;
901 455280 : move16();
902 : }
903 :
904 339920 : FOR( i = 0; i < lengthCx; ++i )
905 : {
906 226800 : tmp_e = svd_s_buffer_e[i];
907 226800 : move16();
908 226800 : svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
909 226800 : move32();
910 226800 : svd_s_buffer_e[i] = tmp_e;
911 226800 : move16();
912 : }
913 :
914 : /*-----------------------------------------------------------------*
915 : * Regularization of Sx
916 : *-----------------------------------------------------------------*/
917 :
918 113120 : limit_fx = svd_s_buffer_fx[0];
919 113120 : move32();
920 113120 : limit_e = svd_s_buffer_e[0];
921 113120 : move16();
922 226800 : FOR( i = 1; i < lengthCx; i++ )
923 : {
924 113680 : IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) > 0 )
925 : {
926 0 : limit_fx = svd_s_buffer_fx[i];
927 0 : move32();
928 0 : limit_e = svd_s_buffer_e[i];
929 0 : move16();
930 : }
931 : }
932 :
933 113120 : L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx );
934 113120 : limit_e = add( limit_e, reg_Sx_e );
935 113120 : limit_fx = L_add( L_tmp, EPSILON_FX );
936 :
937 339920 : FOR( i = 0; i < lengthCx; ++i )
938 : {
939 226800 : IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) < 0 )
940 : {
941 49196 : svd_s_buffer_fx[i] = limit_fx;
942 49196 : move32();
943 49196 : svd_s_buffer_e[i] = limit_e;
944 49196 : move16();
945 : }
946 : }
947 :
948 :
949 113120 : limit_fx = 0;
950 113120 : move32();
951 113120 : limit_e = 0;
952 113120 : move16();
953 :
954 :
955 : /*-----------------------------------------------------------------*
956 : * regularized Kx-1
957 : *-----------------------------------------------------------------*/
958 :
959 339920 : FOR( i = 0; i < lengthCx; ++i )
960 : {
961 : Word16 scale, reg_fac_fx;
962 226800 : reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
963 226800 : scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
964 682080 : FOR( j = 0; j < lengthCx; ++j )
965 : {
966 455280 : Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
967 455280 : move32();
968 455280 : Kx_reg_inv_e[i + j * lengthCx] = scale;
969 455280 : move16();
970 : }
971 : }
972 :
973 : /*-----------------------------------------------------------------*
974 : * normalization matrix G hat
975 : *-----------------------------------------------------------------*/
976 :
977 : /* Computing Q*Cx*Q' */
978 :
979 113120 : matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
980 :
981 113120 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
982 :
983 :
984 688444 : FOR( i = 0; i < lengthCy; ++i )
985 : {
986 575324 : IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
987 : {
988 289583 : limit_fx = Cy_hat_diag_fx[i];
989 289583 : move32();
990 289583 : limit_e = Cy_hat_diag_e;
991 289583 : move16();
992 : }
993 : }
994 113120 : L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
995 113120 : limit_fx = L_add( L_tmp, EPSILON_FX );
996 113120 : limit_e = add( limit_e, reg_ghat_e );
997 :
998 688444 : FOR( i = 0; i < lengthCy; ++i )
999 : {
1000 575324 : Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
1001 575324 : move16();
1002 575324 : IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_buff_e[i] ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1003 : {
1004 210 : Cy_hat_diag_fx[i] = limit_fx;
1005 210 : move32();
1006 210 : Cy_hat_diag_buff_e[i] = limit_e;
1007 210 : move16();
1008 : }
1009 :
1010 575324 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
1011 575324 : exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
1012 575324 : L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
1013 575324 : G_hat_fx[i] = L_tmp;
1014 575324 : move32();
1015 575324 : G_hat_buff_e[i] = exp;
1016 575324 : move16();
1017 : }
1018 :
1019 :
1020 : /*-----------------------------------------------------------------*
1021 : * Formulate optimal P
1022 : *-----------------------------------------------------------------*/
1023 :
1024 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
1025 :
1026 : Word16 mat_mult_buffer1_fx_e;
1027 :
1028 : Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1029 :
1030 113120 : matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
1031 :
1032 113120 : matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
1033 :
1034 113120 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
1035 :
1036 113120 : IF( LT_16( lengthCx, lengthCy ) )
1037 : {
1038 113120 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
1039 113120 : nL = lengthCy;
1040 113120 : move16();
1041 113120 : nC = lengthCx;
1042 113120 : move16();
1043 113120 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1044 : }
1045 : ELSE
1046 : {
1047 0 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1048 0 : nL = lengthCx;
1049 0 : move16();
1050 0 : nC = lengthCy;
1051 0 : move16();
1052 0 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1053 : }
1054 :
1055 : /* Actually Processing P */
1056 :
1057 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
1058 : truncates V to num_input_channel columns */
1059 :
1060 113120 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1061 113120 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1062 :
1063 113120 : mat_mult_buffer1_fx_e = 0;
1064 113120 : move16();
1065 113120 : mat_mult_buffer2_e = 0;
1066 113120 : move16();
1067 :
1068 113120 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
1069 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1070 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1071 :
1072 : /************************ Formulate M **********************/
1073 :
1074 113120 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
1075 :
1076 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
1077 :
1078 : Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1079 113120 : set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1080 :
1081 113120 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
1082 :
1083 : /*-----------------------------------------------------------------*
1084 : * Formulate Cr
1085 : *-----------------------------------------------------------------*/
1086 :
1087 : /* Compute Cy_tilde = M*Cx*M' */
1088 :
1089 :
1090 : Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
1091 113120 : set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
1092 113120 : matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
1093 :
1094 113120 : matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
1095 :
1096 113120 : exp = mixing_matrix_fx_e[0];
1097 113120 : move16();
1098 1156848 : FOR( i = 1; i < lengthCy * lengthCx; i++ )
1099 : {
1100 1043728 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1101 : {
1102 81545 : exp = mixing_matrix_fx_e[i];
1103 81545 : move16();
1104 : }
1105 : }
1106 :
1107 1269968 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1108 : {
1109 1156848 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1110 1156848 : move32();
1111 : }
1112 :
1113 113120 : mixing_matrix_e = exp;
1114 113120 : move16();
1115 :
1116 113120 : Cr_p_fx = Cr_fx;
1117 113120 : Cy_p_fx = Cy_fx;
1118 113120 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1119 : Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1120 688444 : FOR( i = 0; i < lengthCy; ++i )
1121 : {
1122 3533968 : FOR( j = 0; j < lengthCy; ++j )
1123 : {
1124 :
1125 2958644 : *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
1126 2958644 : move32();
1127 2958644 : Cr_p_fx++;
1128 2958644 : Cy_p_fx++;
1129 2958644 : Cy_tilde_p_fx++;
1130 : }
1131 :
1132 : /* Avoid Meaningless negative main diagonal elements */
1133 575324 : IF( Cr_fx[i + ( i * lengthCy )] < 0 )
1134 : {
1135 62266 : Cr_fx[i + ( i * lengthCy )] = 0;
1136 62266 : move32();
1137 62266 : Cr_e_arr[i + ( i * lengthCy )] = 0;
1138 62266 : move16();
1139 : }
1140 : }
1141 :
1142 113120 : exp = Cr_e_arr[0];
1143 113120 : move16();
1144 2958644 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1145 : {
1146 2845524 : if ( LT_16( exp, Cr_e_arr[i] ) )
1147 : {
1148 103413 : exp = Cr_e_arr[i];
1149 103413 : move16();
1150 : }
1151 : }
1152 :
1153 3071764 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1154 : {
1155 2958644 : Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
1156 2958644 : move32();
1157 : }
1158 :
1159 113120 : Cr_fx_e = exp;
1160 113120 : move16();
1161 :
1162 113120 : exp = mat_mult_buffer2_fx_e[0];
1163 113120 : move16();
1164 2958644 : FOR( i = 1; i < lengthCy * lengthCy; i++ )
1165 : {
1166 2845524 : if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
1167 : {
1168 156985 : exp = mat_mult_buffer2_fx_e[i];
1169 156985 : move16();
1170 : }
1171 : }
1172 :
1173 3071764 : FOR( i = 0; i < lengthCy * lengthCy; i++ )
1174 : {
1175 2958644 : mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
1176 2958644 : move32();
1177 : }
1178 :
1179 113120 : mat_mult_buffer2_e = exp;
1180 113120 : move16();
1181 :
1182 : /*-----------------------------------------------------------------*
1183 : * Energy Compensation
1184 : *-----------------------------------------------------------------*/
1185 :
1186 113120 : IF( EQ_16( energy_compensation_flag, 1 ) )
1187 : {
1188 22220 : adj_fx_p = svd_s_buffer_fx;
1189 22220 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
1190 :
1191 135100 : FOR( i = 0; i < lengthCy; ++i )
1192 : {
1193 : /* Avoid correction for very small energies,
1194 : main diagonal elements of Cy_tilde_p may be negative */
1195 112880 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
1196 : {
1197 0 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1198 0 : move32();
1199 0 : adj_e[i] = 1;
1200 0 : move16();
1201 : }
1202 : ELSE
1203 : {
1204 112880 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
1205 112880 : exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
1206 112880 : L_tmp = L_deposit_h( tmp );
1207 112880 : L_tmp = Sqrt32( L_tmp, &exp );
1208 112880 : adj_fx_p[i] = L_tmp;
1209 112880 : move32();
1210 112880 : adj_e[i] = exp;
1211 112880 : move16();
1212 : }
1213 :
1214 112880 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
1215 112880 : IF( GT_32( adj_fx_p[i], temp ) )
1216 : {
1217 4 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1218 4 : move32();
1219 4 : adj_e[i] = 3;
1220 4 : move16();
1221 : }
1222 : }
1223 :
1224 22220 : exp = adj_e[0];
1225 22220 : move16();
1226 112880 : FOR( i = 1; i < lengthCy; i++ )
1227 : {
1228 90660 : if ( LT_16( exp, adj_e[i] ) )
1229 : {
1230 1050 : exp = adj_e[i];
1231 1050 : move16();
1232 : }
1233 : }
1234 :
1235 135100 : FOR( i = 0; i < lengthCy; i++ )
1236 : {
1237 112880 : adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
1238 112880 : move32();
1239 : }
1240 22220 : adj_fx_e = exp;
1241 22220 : move16();
1242 :
1243 22220 : diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1244 :
1245 22220 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
1246 22220 : mixing_matrix_e = mat_mult_buffer3_e;
1247 22220 : move16();
1248 : }
1249 :
1250 113120 : *mixing_matrix_out_e = mixing_matrix_e;
1251 113120 : move16();
1252 113120 : *Cr_e = Cr_fx_e;
1253 113120 : move16();
1254 113120 : pop_wmops();
1255 :
1256 113120 : return out;
1257 : }
1258 :
1259 : /*-------------------------------------------------------------------*
1260 : * computeMixingMatricesResidual()
1261 : *
1262 : * compute a residual mixing matrix using the covariance synthesis approach
1263 : *-------------------------------------------------------------------*/
1264 :
1265 90900 : Word16 computeMixingMatricesResidual_fx(
1266 : const Word32 num_outputs, /* i : number of output channels */
1267 : const Word32 *Cx_fx, /* i : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e) */
1268 : const Word16 Cx_e,
1269 : const Word32 *Cy_fx, /* i : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
1270 : const Word16 Cy_fx_e,
1271 : const Word32 reg_Sx_fx, /* i : regularization factor for the input channel singular values Q(31-reg_Sx_e) */
1272 : const Word16 reg_Sx_e,
1273 : const Word32 reg_ghat_fx, /* i : regularization factor for the normalization matrix Q(31-reg_ghat_e) */
1274 : const Word16 reg_ghat_e,
1275 : Word32 *mixing_matrix_fx, /* o : resulting residual mixing matrix Q(31-mixing_matrix_ret_e) */
1276 : Word16 *mixing_matrix_ret_e )
1277 : {
1278 : Word16 i, j;
1279 : Word16 out, lengthCx, lengthCy;
1280 90900 : out = EXIT_SUCCESS;
1281 90900 : move16();
1282 90900 : lengthCx = extract_l( num_outputs );
1283 90900 : lengthCy = extract_l( num_outputs );
1284 :
1285 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1286 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1287 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1288 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
1289 :
1290 : Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1291 :
1292 90900 : push_wmops( "dirac_cov_mix_mat_r" );
1293 :
1294 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1295 : Word32 adj_fx[MAX_OUTPUT_CHANNELS];
1296 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1297 90900 : Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
1298 90900 : move16();
1299 :
1300 90900 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
1301 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1302 : Word32 L_tmp;
1303 : Word16 tmp_e;
1304 : Word16 tmp, scale;
1305 : Word32 div_tmp;
1306 : Word16 exp;
1307 :
1308 : Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
1309 : Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1310 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1311 : Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
1312 :
1313 : Word32 limit_fx;
1314 : Word16 limit_e;
1315 :
1316 : Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
1317 : Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
1318 :
1319 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1320 : Word16 Cy_hat_diag_e;
1321 :
1322 : Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
1323 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1324 : Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
1325 :
1326 : Word32 *adj_fx_p;
1327 : Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
1328 :
1329 : Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
1330 : Word16 Cy_tilde_e;
1331 :
1332 : /*-----------------------------------------------------------------*
1333 : * Decomposition of Cy
1334 : *-----------------------------------------------------------------*/
1335 :
1336 : /* Processing the SVD */
1337 :
1338 : /* linear array to svd buffer */
1339 90900 : mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
1340 :
1341 90900 : svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
1342 :
1343 : /* Computing Ky */
1344 90900 : Word16 max_e = -32;
1345 553344 : FOR( i = 0; i < lengthCy; ++i )
1346 : {
1347 2841348 : FOR( j = 0; j < lengthCy; ++j )
1348 : {
1349 2378904 : tmp_e = svd_s_buffer_e[j];
1350 2378904 : move16();
1351 2378904 : L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
1352 2378904 : Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
1353 2378904 : move32();
1354 2378904 : Ky_fx_e[i + j * lengthCy] = tmp_e;
1355 2378904 : move16();
1356 2378904 : max_e = s_max( max_e, tmp_e );
1357 : }
1358 : }
1359 :
1360 2469804 : FOR( i = 0; i < lengthCy * lengthCy; ++i )
1361 : {
1362 2378904 : Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
1363 2378904 : move32();
1364 2378904 : Ky_fx_e[i] = max_e;
1365 2378904 : move16();
1366 : }
1367 :
1368 : /*-----------------------------------------------------------------*
1369 : * Decomposition of Cx
1370 : *-----------------------------------------------------------------*/
1371 :
1372 : /* Processing the SVD of Cx*/
1373 : /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
1374 : * would be just indicating the sorting index, so go straight to Kx as the
1375 : * square root of the diagonal of Cx */
1376 :
1377 : /* Computing Kx */
1378 90900 : max_e = -32;
1379 553344 : FOR( i = 0; i < lengthCx; ++i )
1380 : {
1381 462444 : exp = Cx_e;
1382 462444 : move16();
1383 462444 : Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
1384 462444 : move32();
1385 462444 : Kx_fx_e[i] = exp;
1386 462444 : move16();
1387 462444 : max_e = s_max( max_e, exp );
1388 : }
1389 :
1390 553344 : FOR( i = 0; i < lengthCx; ++i )
1391 : {
1392 462444 : Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
1393 462444 : move32();
1394 462444 : Kx_fx_e[i] = max_e;
1395 462444 : move16();
1396 : }
1397 :
1398 : /*-----------------------------------------------------------------*
1399 : * Regularization of Sx
1400 : *-----------------------------------------------------------------*/
1401 :
1402 90900 : limit_fx = Kx_fx[0];
1403 90900 : move32();
1404 :
1405 462444 : FOR( i = 1; i < lengthCx; i++ )
1406 : {
1407 371544 : IF( GT_32( Kx_fx[i], limit_fx ) )
1408 : {
1409 141665 : limit_fx = Kx_fx[i];
1410 141665 : move32();
1411 : }
1412 : }
1413 :
1414 90900 : L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
1415 90900 : L_tmp = L_add( L_tmp, EPSILLON_FX );
1416 90900 : limit_fx = L_tmp;
1417 90900 : move16();
1418 90900 : limit_e = add( Kx_fx_e[0], reg_Sx_e );
1419 :
1420 553344 : FOR( i = 0; i < lengthCx; ++i )
1421 : {
1422 462444 : IF( BASOP_Util_Cmp_Mant32Exp( Kx_fx[i], Kx_fx_e[i], limit_fx, limit_e ) > 0 )
1423 : {
1424 461375 : div_tmp = Kx_fx[i];
1425 461375 : move32();
1426 461375 : exp = Kx_fx_e[i];
1427 461375 : move16();
1428 : }
1429 : ELSE
1430 : {
1431 1069 : div_tmp = limit_fx;
1432 1069 : move32();
1433 1069 : exp = limit_e;
1434 1069 : move16();
1435 : }
1436 462444 : tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
1437 462444 : scale = add( scale, sub( Q1, exp ) );
1438 :
1439 462444 : Kx_reg_inv_fx[i] = L_deposit_h( tmp );
1440 462444 : move32();
1441 462444 : Kx_reg_inv_e[i] = scale;
1442 462444 : move16();
1443 : }
1444 :
1445 90900 : limit_fx = 0;
1446 90900 : move32();
1447 90900 : limit_e = 0;
1448 90900 : move16();
1449 :
1450 : /*-----------------------------------------------------------------*
1451 : * regularized Kx-1
1452 : *-----------------------------------------------------------------*/
1453 :
1454 : /*-----------------------------------------------------------------*
1455 : * normalization matrix G hat
1456 : *-----------------------------------------------------------------*/
1457 :
1458 : /* Computing Cy_hat_diag */
1459 90900 : Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
1460 90900 : Cy_hat_diag_e = Cx_e;
1461 90900 : move16();
1462 :
1463 553344 : FOR( i = 0; i < lengthCy; ++i )
1464 : {
1465 462444 : IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
1466 : {
1467 232531 : limit_fx = Cy_hat_diag_fx[i];
1468 232531 : move32();
1469 232531 : limit_e = Cy_hat_diag_e;
1470 232531 : move16();
1471 : }
1472 : }
1473 :
1474 90900 : L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
1475 90900 : limit_fx = L_add( L_tmp, EPSILON_FX );
1476 90900 : limit_e = add( limit_e, reg_ghat_e );
1477 :
1478 : /* Computing G_hat */
1479 553344 : FOR( i = 0; i < lengthCy; ++i )
1480 : {
1481 462444 : Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
1482 462444 : move16();
1483 462444 : IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_e ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1484 : {
1485 210 : Cy_hat_diag_fx[i] = limit_fx;
1486 210 : move32();
1487 210 : Cy_hat_diag_fx_e[i] = limit_e;
1488 210 : move16();
1489 : }
1490 462444 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
1491 462444 : scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
1492 462444 : L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
1493 :
1494 462444 : G_hat_fx[i] = L_tmp;
1495 462444 : move32();
1496 462444 : G_hat_e[i] = scale;
1497 462444 : move16();
1498 : }
1499 :
1500 : /*-----------------------------------------------------------------*
1501 : * Formulate optimal P
1502 : *-----------------------------------------------------------------*/
1503 :
1504 553344 : FOR( i = 0; i < num_outputs; i++ )
1505 : {
1506 462444 : L_tmp = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
1507 462444 : Kx_fx[i] = L_tmp;
1508 462444 : move32();
1509 462444 : Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
1510 462444 : move16();
1511 : }
1512 :
1513 553344 : FOR( i = 0; i < num_outputs; i++ )
1514 : {
1515 : Word32 fac_fx;
1516 462444 : fac_fx = Kx_fx[i];
1517 462444 : move32();
1518 :
1519 2841348 : FOR( j = 0; j < num_outputs; j++ )
1520 : {
1521 2378904 : L_tmp = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e))
1522 2378904 : mat_mult_buffer1_fx[i + j * num_outputs] = L_tmp;
1523 2378904 : move32();
1524 2378904 : mat_mult_buffer1_buff_e[i + j * num_outputs] = extract_l( L_add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] ) );
1525 2378904 : move16();
1526 : }
1527 : }
1528 :
1529 90900 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
1530 90900 : move16();
1531 2378904 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1532 : {
1533 2288004 : if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
1534 : {
1535 24305 : mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
1536 24305 : move16();
1537 : }
1538 : }
1539 :
1540 2469804 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1541 : {
1542 2378904 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
1543 2378904 : move32();
1544 : }
1545 :
1546 90900 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
1547 :
1548 90900 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
1549 :
1550 : /* Actually Processing P */
1551 :
1552 90900 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
1553 90900 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1554 90900 : mat_mult_buffer1_e = 0;
1555 90900 : move16();
1556 90900 : mat_mult_buffer2_e = 0;
1557 90900 : move16();
1558 :
1559 90900 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
1560 : mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
1561 : mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1562 :
1563 : /*-----------------------------------------------------------------*
1564 : * Formulate M
1565 : *-----------------------------------------------------------------*/
1566 :
1567 :
1568 90900 : matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1569 90900 : set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
1570 :
1571 : Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
1572 :
1573 553344 : FOR( i = 0; i < num_outputs; i++ )
1574 : {
1575 : Word32 fac_fx;
1576 462444 : fac_fx = Kx_reg_inv_fx[i];
1577 462444 : move32();
1578 :
1579 2841348 : FOR( j = 0; j < num_outputs; j++ )
1580 : {
1581 2378904 : L_tmp = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e)
1582 2378904 : mixing_matrix_fx[j + i * num_outputs] = L_tmp;
1583 2378904 : move32();
1584 2378904 : mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
1585 2378904 : move16();
1586 : }
1587 : }
1588 :
1589 : /*-----------------------------------------------------------------*
1590 : * Formulate Cr
1591 : *-----------------------------------------------------------------*/
1592 :
1593 : /* Compute Cy_tilde = M*Cx*M' */
1594 :
1595 : Word16 Cx_e_arr[MAX_CICP_CHANNELS];
1596 90900 : set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
1597 :
1598 90900 : matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
1599 :
1600 90900 : exp = mixing_matrix_fx_e[0];
1601 90900 : move16();
1602 2378904 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1603 : {
1604 2288004 : if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
1605 : {
1606 9848 : exp = mixing_matrix_fx_e[i];
1607 9848 : move16();
1608 : }
1609 : }
1610 :
1611 2469804 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1612 : {
1613 2378904 : mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
1614 2378904 : move32();
1615 : }
1616 90900 : mixing_matrix_e = exp;
1617 90900 : move16();
1618 :
1619 90900 : exp = mat_mult_buffer1_buff_e[0];
1620 90900 : move16();
1621 2378904 : FOR( i = 1; i < num_outputs * num_outputs; i++ )
1622 : {
1623 2288004 : if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
1624 : {
1625 9848 : exp = mat_mult_buffer1_buff_e[i];
1626 9848 : move16();
1627 : }
1628 : }
1629 :
1630 2469804 : FOR( i = 0; i < num_outputs * num_outputs; i++ )
1631 : {
1632 2378904 : mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
1633 2378904 : move32();
1634 : }
1635 90900 : mat_mult_buffer1_e = exp;
1636 90900 : move16();
1637 :
1638 90900 : matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
1639 :
1640 : /*-----------------------------------------------------------------*
1641 : * Energy Compensation
1642 : *-----------------------------------------------------------------*/
1643 :
1644 90900 : adj_fx_p = svd_s_buffer_fx;
1645 :
1646 553344 : FOR( i = 0; i < lengthCy; ++i )
1647 : {
1648 462444 : tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
1649 462444 : scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
1650 462444 : adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
1651 462444 : move32();
1652 462444 : adj_buff_e[i] = scale;
1653 462444 : move16();
1654 462444 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
1655 462444 : IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
1656 : {
1657 1 : adj_fx_p[i] = 1073741824; // 1.0f in Q30
1658 1 : move32();
1659 1 : adj_buff_e[i] = 3;
1660 1 : move16();
1661 : }
1662 : }
1663 :
1664 90900 : adj_e = adj_buff_e[0];
1665 90900 : move16();
1666 :
1667 462444 : FOR( i = 1; i < lengthCy; i++ )
1668 : {
1669 371544 : if ( LT_16( adj_e, adj_buff_e[i] ) )
1670 : {
1671 36546 : adj_e = adj_buff_e[i];
1672 36546 : move16();
1673 : }
1674 : }
1675 :
1676 :
1677 553344 : FOR( i = 0; i < lengthCy; i++ )
1678 : {
1679 462444 : adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
1680 462444 : move32();
1681 : }
1682 :
1683 90900 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1684 90900 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
1685 90900 : *mixing_matrix_ret_e = mat_mult_buffer3_e;
1686 90900 : move16();
1687 :
1688 90900 : pop_wmops();
1689 :
1690 90900 : return out;
1691 : }
1692 :
1693 :
1694 : /*-------------------------------------------------------------------*
1695 : * computeMixingMatricesISM()
1696 : *
1697 : *
1698 : *-------------------------------------------------------------------*/
1699 315180 : Word16 computeMixingMatricesISM_fx(
1700 : const Word16 num_inputs,
1701 : const Word16 num_responses,
1702 : const Word16 num_outputs,
1703 : const Word32 *responses_fx, /*Q(31-responses_e) */
1704 : const Word16 responses_e,
1705 : const Word32 *ener_fx, /*Q(31-ener_e) */
1706 : const Word16 ener_e,
1707 : const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
1708 : const Word16 Cx_diag_e,
1709 : const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
1710 : const Word16 Cy_diag_e,
1711 : const Word16 *Q_16fx, // Q15
1712 : const Word16 energy_compensation_flag,
1713 : const Word32 reg_Sx_fx, /*Q0*/
1714 : const Word32 reg_ghat_fx, /*Q0*/
1715 : Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
1716 : Word16 *mixing_matrix_e )
1717 : {
1718 : Word16 i, out;
1719 : Word16 lengthCx, lengthCy;
1720 : Word16 nL, nC;
1721 :
1722 : Word32 *Cy_tilde_p_fx;
1723 : Word32 *adj_fx;
1724 : Word32 limit_fx;
1725 :
1726 : Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1727 : Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1728 : Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1729 : Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1730 : Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
1731 : Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1732 : Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
1733 : Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
1734 : Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
1735 : Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
1736 :
1737 : Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1738 : Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1739 : Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
1740 : Word16 svd_s_buffer_fx_e;
1741 : Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
1742 : Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
1743 :
1744 : Word16 temp_e[MAX_OUTPUT_CHANNELS];
1745 315180 : push_wmops( "dirac_cov_mix_mat" );
1746 :
1747 315180 : out = EXIT_SUCCESS;
1748 315180 : move16();
1749 315180 : lengthCx = num_inputs;
1750 315180 : move16();
1751 315180 : lengthCy = num_outputs;
1752 315180 : move16();
1753 :
1754 6073020 : FOR( i = 0; i < lengthCy * lengthCx; i++ )
1755 : {
1756 5757840 : IF( EQ_16( Q_16fx[i], MAX_16 ) )
1757 : {
1758 2575200 : Q_fx[i] = MAX_32;
1759 2575200 : move32();
1760 : }
1761 : ELSE
1762 : {
1763 3182640 : Q_fx[i] = L_deposit_h( Q_16fx[i] );
1764 3182640 : move32();
1765 : }
1766 : }
1767 315180 : Q_e = 0;
1768 315180 : move16();
1769 315180 : set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
1770 5358060 : FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
1771 : {
1772 5042880 : set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1773 5042880 : set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1774 5042880 : set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
1775 : }
1776 :
1777 : /* Decomposition of Cy = Ky*Ky' */
1778 : /* Ky = responses*diag(ener) */
1779 315180 : matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
1780 :
1781 : /* Decomposition of Cx -> Computing Kx */
1782 315180 : set16_fx( temp_e, Cx_diag_e, lengthCx );
1783 315180 : v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
1784 315180 : Kx_e = temp_e[0];
1785 315180 : move16();
1786 630360 : FOR( i = 1; i < lengthCx; i++ )
1787 : {
1788 315180 : Kx_e = s_max( Kx_e, temp_e[i] );
1789 : }
1790 945540 : FOR( i = 0; i < lengthCx; i++ )
1791 : {
1792 630360 : Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
1793 630360 : move32();
1794 : }
1795 :
1796 : /* Regularization of Sx */
1797 315180 : maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
1798 315180 : limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
1799 :
1800 945540 : FOR( i = 0; i < lengthCx; ++i )
1801 : {
1802 630360 : IF( GT_32( Kx_fx[i], limit_fx ) )
1803 : {
1804 595151 : svd_s_buffer_fx[i] = Kx_fx[i];
1805 595151 : move32();
1806 : }
1807 : ELSE
1808 : {
1809 35209 : svd_s_buffer_fx[i] = limit_fx;
1810 35209 : move32();
1811 : }
1812 : }
1813 315180 : svd_s_buffer_fx_e = Kx_e;
1814 315180 : move16();
1815 :
1816 315180 : limit_fx = 0;
1817 315180 : move32();
1818 :
1819 : /* regularized Kx-1 */
1820 :
1821 945540 : FOR( i = 0; i < lengthCx; ++i )
1822 : {
1823 630360 : IF( svd_s_buffer_fx[i] )
1824 : {
1825 : Word32 reg_fac;
1826 630356 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
1827 630356 : Kx_reg_inv_fx[i] = reg_fac;
1828 630356 : move32();
1829 630356 : temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
1830 630356 : move16();
1831 : }
1832 : ELSE
1833 : {
1834 : Word32 reg_fac;
1835 4 : reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
1836 4 : Kx_reg_inv_fx[i] = reg_fac;
1837 4 : move32();
1838 4 : temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
1839 4 : move16();
1840 : }
1841 : }
1842 315180 : Kx_reg_inv_e = temp_e[0];
1843 315180 : move16();
1844 630360 : FOR( i = 1; i < lengthCx; i++ )
1845 : {
1846 315180 : Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
1847 : }
1848 945540 : FOR( i = 0; i < lengthCx; i++ )
1849 : {
1850 630360 : Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
1851 630360 : move32();
1852 : }
1853 :
1854 : /************************ normalization matrix G hat **********************/
1855 :
1856 : /* Computing Q*Cx*Q' */
1857 : Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
1858 : Word16 Cy_hat_diag_e;
1859 :
1860 315180 : matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
1861 :
1862 315180 : Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
1863 :
1864 6073020 : FOR( i = 0; i < lengthCy * lengthCx; ++i )
1865 : {
1866 5757840 : IF( GT_16( Q_Cx_e, Q_e ) )
1867 : {
1868 5757796 : Q_fx[i] = L_shr( Q_fx[i], guard_bits );
1869 5757796 : move32();
1870 : }
1871 : ELSE
1872 : {
1873 44 : Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
1874 44 : move32();
1875 : }
1876 : }
1877 :
1878 315180 : IF( GT_16( Q_Cx_e, Q_e ) )
1879 : {
1880 315178 : Q_e = add( Q_e, guard_bits );
1881 : }
1882 : ELSE
1883 : {
1884 2 : Q_Cx_e = add( Q_Cx_e, guard_bits );
1885 : }
1886 315180 : matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
1887 :
1888 : /* Computing Cy_hat_diag */
1889 3194100 : FOR( i = 0; i < lengthCy; ++i )
1890 : {
1891 2878920 : if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
1892 : {
1893 467357 : limit_fx = Cy_hat_diag_fx[i];
1894 467357 : move32();
1895 : }
1896 : }
1897 :
1898 315180 : limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
1899 :
1900 : /* Computing G_hat */
1901 3194100 : FOR( i = 0; i < lengthCy; ++i )
1902 : {
1903 2878920 : if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
1904 : {
1905 13691 : Cy_hat_diag_fx[i] = limit_fx;
1906 13691 : move32();
1907 : }
1908 2878920 : IF( Cy_diag_fx[i] )
1909 : {
1910 1875173 : IF( Cy_hat_diag_fx[i] )
1911 : {
1912 1875173 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
1913 1875173 : move32();
1914 1875173 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
1915 1875173 : move16();
1916 1875173 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1917 1875173 : move32();
1918 : }
1919 : ELSE
1920 : {
1921 0 : G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
1922 0 : move32();
1923 0 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
1924 0 : move16();
1925 0 : G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
1926 0 : move32();
1927 : }
1928 : }
1929 : ELSE
1930 : {
1931 1003747 : G_hat_fx[i] = 0;
1932 1003747 : move32();
1933 1003747 : temp_e[i] = 0;
1934 1003747 : move16();
1935 : }
1936 : }
1937 315180 : G_hat_e = temp_e[0];
1938 315180 : move16();
1939 2878920 : FOR( i = 1; i < lengthCy; i++ )
1940 : {
1941 2563740 : G_hat_e = s_max( G_hat_e, temp_e[i] );
1942 : }
1943 3194100 : FOR( i = 0; i < lengthCy; i++ )
1944 : {
1945 2878920 : G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
1946 2878920 : move32();
1947 : }
1948 :
1949 : /************************ Formulate optimal P **********************/
1950 :
1951 : /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
1952 315180 : diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1953 :
1954 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
1955 :
1956 315180 : matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1957 :
1958 315180 : IF( LT_16( lengthCx, num_responses ) )
1959 : {
1960 3600 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
1961 :
1962 3600 : nL = num_responses;
1963 3600 : move16();
1964 3600 : nC = lengthCx;
1965 3600 : move16();
1966 3600 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
1967 : }
1968 : ELSE
1969 : {
1970 311580 : mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
1971 :
1972 311580 : nL = lengthCx;
1973 311580 : move16();
1974 311580 : nC = num_responses;
1975 311580 : move16();
1976 311580 : svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
1977 : }
1978 :
1979 : /* Actually Processing P */
1980 :
1981 : /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
1982 315180 : svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
1983 315180 : mat_mult_buffer1_e = 0;
1984 315180 : move16();
1985 315180 : svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
1986 315180 : mat_mult_buffer2_e = 0;
1987 315180 : move16();
1988 :
1989 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
1990 :
1991 : /************************ Formulate M **********************/
1992 :
1993 315180 : matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
1994 :
1995 315180 : matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
1996 :
1997 : /*********************** Energy Compensation ****************/
1998 :
1999 : /* Compute Cy_tilde = M*Cx*M' */
2000 315180 : matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
2001 :
2002 315180 : matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
2003 :
2004 315180 : IF( EQ_16( energy_compensation_flag, 1 ) )
2005 : {
2006 315180 : adj_fx = svd_s_buffer_fx;
2007 315180 : Cy_tilde_p_fx = mat_mult_buffer2_fx;
2008 3194100 : FOR( i = 0; i < lengthCy; ++i )
2009 : {
2010 : /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
2011 2878920 : IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
2012 : {
2013 0 : adj_fx[i] = MAX_32;
2014 0 : move32();
2015 0 : temp_e[i] = 0;
2016 0 : move16();
2017 : }
2018 : ELSE
2019 : {
2020 2878920 : IF( Cy_diag_fx[i] )
2021 : {
2022 1875173 : IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
2023 : {
2024 1874497 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
2025 1874497 : move32();
2026 1874497 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
2027 1874497 : move16();
2028 1874497 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
2029 1874497 : move32();
2030 : }
2031 : ELSE
2032 : {
2033 676 : adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
2034 676 : move32();
2035 676 : temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
2036 676 : move16();
2037 676 : adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
2038 676 : move32();
2039 : }
2040 : }
2041 : ELSE
2042 : {
2043 1003747 : adj_fx[i] = 0;
2044 1003747 : move32();
2045 1003747 : temp_e[i] = 0;
2046 1003747 : move16();
2047 : }
2048 : }
2049 :
2050 2878920 : Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
2051 2878920 : IF( GT_32( adj_fx[i], temp ) )
2052 : {
2053 5516 : adj_fx[i] = MAX_32;
2054 5516 : move32();
2055 5516 : temp_e[i] = 2;
2056 5516 : move16();
2057 : }
2058 : }
2059 315180 : adj_e = temp_e[0];
2060 315180 : move16();
2061 2878920 : FOR( i = 1; i < lengthCy; i++ )
2062 : {
2063 2563740 : adj_e = s_max( adj_e, temp_e[i] );
2064 : }
2065 3194100 : FOR( i = 0; i < lengthCy; i++ )
2066 : {
2067 2878920 : adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
2068 2878920 : move32();
2069 : }
2070 :
2071 315180 : diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
2072 :
2073 315180 : Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
2074 315180 : *mixing_matrix_e = mat_mult_buffer3_e;
2075 315180 : move16();
2076 : }
2077 :
2078 315180 : pop_wmops();
2079 :
2080 315180 : return out;
2081 : }
|