Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include "options.h"
35 : #include "prot_fx.h"
36 : #include "ivas_stat_dec.h"
37 : #include "ivas_cnst.h"
38 : #include <math.h>
39 : #include "wmc_auto.h"
40 : #include "ivas_prot_fx.h"
41 :
42 : /*-----------------------------------------------------------------------*
43 : * Local constants
44 : *-----------------------------------------------------------------------*/
45 :
46 : /* The SVD is sensitive to changes to the following constants, so please be careful when trying to tune things */
47 : #define SVD_MAX_NUM_ITERATION 75 /* maximum number of interations before exiting the SVD */
48 : #define SVD_MINIMUM_VALUE_FX ( 2 ) /* minimum value */
49 : #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 )
50 : #define CONVERGENCE_FACTOR_FX 214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
51 :
52 : /*-----------------------------------------------------------------------*
53 : * Local function prototypes
54 : *-----------------------------------------------------------------------*/
55 :
56 : static void HouseholderReduction_fx(
57 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
58 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* exp(singularValues_fx_e) */
59 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
60 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* exp(secDiag_fx_e) */
61 : Word16 singularVectors_Left_e,
62 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
63 : Word16 *secDiag_fx_e,
64 : const Word16 nChannelsL, /* Q0 */
65 : const Word16 nChannelsC, /* Q0 */
66 : Word32 *eps_x_fx, /* exp(eps_x_fx_e) */
67 : Word16 *eps_x_fx_e );
68 :
69 : static void biDiagonalReductionLeft_fx(
70 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
71 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
72 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
73 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
74 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
75 : Word16 *secDiag_e,
76 : const Word16 nChannelsL, /* Q0 */
77 : const Word16 nChannelsC, /* Q0 */
78 : const Word16 currChannel, /* Q0 */
79 : Word32 *sig_x, /* exp(sig_x_e) */
80 : Word16 *sig_x_e,
81 : Word32 *g /* Q31 */
82 : );
83 :
84 : static void biDiagonalReductionRight_fx(
85 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
86 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
87 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
88 : Word16 *secDiag_e,
89 : const Word16 nChannelsL, /* Q0 */
90 : const Word16 nChannelsC, /* Q0 */
91 : const Word16 currChannel, /* Q0 */
92 : Word32 *sig_x, /* exp(sig_x_e) */
93 : Word16 *sig_x_e,
94 : Word32 *g /* Q31 */
95 : ); // Q31
96 :
97 : static void singularVectorsAccumulationLeft_fx(
98 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
99 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
100 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
101 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
102 : const Word16 nChannelsL, /* Q0 */
103 : const Word16 nChannelsC /* Q0 */
104 : );
105 :
106 : static void singularVectorsAccumulationRight_fx(
107 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
108 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
109 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
110 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
111 : Word16 *secDiag_e,
112 : const Word16 nChannelsC /* Q0 */
113 : );
114 :
115 : static Word32 maxWithSign_fx(
116 : const Word32 a /* Qx */
117 : );
118 :
119 : static Word16 BidagonalDiagonalisation_fx(
120 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_fx_e*/
121 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_fx_e*/
122 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_fx_e*/
123 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_fx_e*/
124 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */
125 : Word16 *secDiag_fx_e, /* i/o: */
126 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
127 : const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/
128 : const Word32 eps_x, /* i : eps_x_e*/
129 : const Word16 eps_x_e /* i : */
130 : );
131 :
132 : static void ApplyRotation_fx(
133 : Word32 singularVector[][MAX_OUTPUT_CHANNELS],
134 : const Word32 c, /* exp(c_e)*/
135 : const Word16 c_e,
136 : const Word32 s, /* exp(s_e) */
137 : const Word16 s_e,
138 : Word32 x11, /* exp(x11_e) */
139 : Word16 x11_e,
140 : Word32 x12, /* exp(x12_e) */
141 : Word16 x12_e,
142 : Word32 *d, /* exp(d_e) */
143 : Word16 *d_e,
144 : Word32 *g, /* exp(g_e) */
145 : Word16 *g_e,
146 : const Word16 currentIndex1, /* Q0 */
147 : const Word16 currentIndex2, /* Q0 */
148 : const Word16 nChannels /* Q0 */
149 : );
150 :
151 : static void GivensRotation2_fx(
152 : const Word32 x, /* exp(x_e) */
153 : const Word16 x_e,
154 : const Word32 z, /* exp(z_e) */
155 : const Word16 z_e,
156 : Word32 *result,
157 : Word32 *resultInv,
158 : Word16 *out_e,
159 : Word16 *outInv_e );
160 :
161 : static Word32 GivensRotation_fx(
162 : const Word32 x, /* exp(x_e) */
163 : const Word16 x_e,
164 : const Word32 z, /* exp(z_e) */
165 : const Word16 z_e,
166 : Word16 *out_e );
167 :
168 : static void ApplyQRTransform_fx(
169 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_e*/
170 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_e*/
171 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_e*/
172 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_e*/
173 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
174 : Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
175 : const Word16 startIndex, /* i : Q0*/
176 : const Word16 currentIndex, /* i : Q0*/
177 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
178 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
179 : );
180 :
181 : /*-------------------------------------------------------------------------
182 : * mat2svdMat()
183 : *
184 : * external matrix format to internal
185 : *-------------------------------------------------------------------------*/
186 :
187 836340 : void mat2svdMat_fx(
188 : const Word32 *mat, /* i : matrix as column ordered vector Qx*/
189 : Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* o : matrix as two-dimensional arry Qx*/
190 : const Word16 nRows, /* i : number of rows of the matrix Q0*/
191 : const Word16 mCols, /* i : number of columns of the matrix Q0*/
192 : const Word16 transpose /* i : flag indication transposition Q0*/
193 : )
194 : {
195 : Word16 i, j;
196 :
197 836340 : IF( transpose )
198 : {
199 706444 : FOR( i = 0; i < mCols; i++ )
200 : {
201 1775372 : FOR( j = 0; j < nRows; j++ )
202 : {
203 1185648 : svdMat[i][j] = mat[j + ( nRows * i )]; /* Qx */
204 1185648 : move32();
205 : }
206 :
207 589724 : set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, nRows ) );
208 : }
209 :
210 1394516 : FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
211 : {
212 1277796 : set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
213 : }
214 : }
215 : ELSE
216 : {
217 3069792 : FOR( i = 0; i < nRows; i++ )
218 : {
219 11768224 : FOR( j = 0; j < mCols; j++ )
220 : {
221 9418052 : svdMat[i][j] = mat[i + ( nRows * j )]; /* Qx */
222 9418052 : move32();
223 : }
224 :
225 2350172 : set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, mCols ) );
226 : }
227 :
228 9883368 : FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
229 : {
230 9163748 : set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
231 : }
232 : }
233 :
234 836340 : return;
235 : }
236 :
237 :
238 : /*---------------------------------------------------------------------*
239 : * svdMat2mat()
240 : *
241 : * transfer a matrix from a two dimensional array to a column wise ordered vector
242 : *---------------------------------------------------------------------*/
243 :
244 1038400 : void svdMat2mat_fx(
245 : Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* i : matrix as two-dimensional arry Qx*/
246 : Word32 *mat, /* o : matrix as column ordered vector Qx*/
247 : const Word16 nRows, /* i : number of rows of the matrix Q0*/
248 : const Word16 mCols /* i : number of columns of the matrix Q0*/
249 : )
250 : {
251 : Word16 i, j;
252 :
253 4033332 : FOR( i = 0; i < nRows; i++ )
254 : {
255 11900708 : FOR( j = 0; j < mCols; j++ )
256 : {
257 8905776 : mat[i + ( nRows * j )] = svdMat[i][j]; /* Qx */
258 8905776 : move32();
259 : }
260 : }
261 :
262 1038400 : return;
263 : }
264 :
265 : /*-------------------------------------------------------------------------
266 : * svd()
267 : *
268 : * perform a singular value decomposition X=USV of a matrix X
269 : *-------------------------------------------------------------------------*/
270 :
271 : /*! r: error or success */
272 836340 : Word16 svd_fx(
273 : Word32 InputMatrix[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) InputMatrix_e*/
274 : Word16 InputMatrix_e,
275 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) singularValues_fx_e*/
276 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) singularValues_fx_e*/
277 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) singularValues_fx_e*/
278 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
279 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
280 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
281 : )
282 : {
283 : Word16 iCh, jCh;
284 : Word16 lengthSingularValues;
285 : Word16 errorMessage, condition;
286 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS];
287 : Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS];
288 836340 : move16();
289 836340 : Word32 eps_x_fx = 0, temp_fx;
290 836340 : move16();
291 836340 : Word16 eps_x_fx_e = 0;
292 836340 : move16();
293 : Word16 temp_fx_e;
294 836340 : push_wmops( "svd_fx" );
295 :
296 :
297 : /* Collecting Values */
298 3776236 : FOR( iCh = 0; iCh < nChannelsL; iCh++ )
299 : {
300 13543596 : FOR( jCh = 0; jCh < nChannelsC; jCh++ )
301 : {
302 10603700 : singularVectors_Left_fx[iCh][jCh] = InputMatrix[iCh][jCh]; /* InputMatrix_e */
303 10603700 : move32();
304 : }
305 : }
306 :
307 : /* Householder reduction */
308 836340 : HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
309 : /* Set extremely small values to zero if needed */
310 : // flushToZeroArray(singularValues, max_length);
311 : // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL);
312 : // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC);
313 :
314 : /* BidagonalDiagonalisation */
315 836340 : errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
316 : /* Sort the singular values descending order */
317 836340 : lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */
318 :
319 : DO
320 : {
321 1156256 : condition = 0;
322 1156256 : move16();
323 3952548 : FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
324 : {
325 : #ifdef OPT_MCH_DEC_V1_NBE
326 2796292 : IF( LT_32( L_shl_sat( singularValues_fx[iCh], sub( singularValues_fx_e[iCh], singularValues_fx_e[iCh + 1] ) ), singularValues_fx[iCh + 1] ) )
327 : #else /* OPT_MCH_DEC_V1_NBE */
328 : IF( BASOP_Util_Cmp_Mant32Exp( singularValues_fx[iCh], singularValues_fx_e[iCh], singularValues_fx[iCh + 1], singularValues_fx_e[iCh + 1] ) < 0 )
329 : #endif /* OPT_MCH_DEC_V1_NBE */
330 : {
331 403439 : condition = 1;
332 403439 : move16();
333 403439 : temp_fx = singularValues_fx[iCh]; /* singularValues_fx_e */
334 403439 : move32();
335 403439 : singularValues_fx[iCh] = singularValues_fx[iCh + 1]; /* singularValues_fx_e */
336 403439 : move32();
337 403439 : singularValues_fx[iCh + 1] = temp_fx; /* singularValues_fx_e */
338 403439 : move32();
339 403439 : temp_fx_e = singularValues_fx_e[iCh];
340 403439 : move16();
341 403439 : singularValues_fx_e[iCh] = singularValues_fx_e[iCh + 1];
342 403439 : move16();
343 403439 : singularValues_fx_e[iCh + 1] = temp_fx_e;
344 403439 : move16();
345 :
346 2224309 : FOR( jCh = 0; jCh < nChannelsL; ++jCh )
347 : {
348 1820870 : temp_fx = singularVectors_Left_fx[jCh][iCh]; /* singularValues_fx_e */
349 1820870 : move32();
350 1820870 : singularVectors_Left_fx[jCh][iCh] = singularVectors_Left_fx[jCh][iCh + 1]; /* singularValues_fx_e */
351 1820870 : move32();
352 1820870 : singularVectors_Left_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
353 1820870 : move32();
354 : }
355 :
356 2216677 : FOR( jCh = 0; jCh < nChannelsC; ++jCh )
357 : {
358 1813238 : temp_fx = singularVectors_Right_fx[jCh][iCh]; /* singularValues_fx_e */
359 1813238 : move32();
360 1813238 : singularVectors_Right_fx[jCh][iCh] = singularVectors_Right_fx[jCh][iCh + 1]; /* singularValues_fx_e */
361 1813238 : move32();
362 1813238 : singularVectors_Right_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
363 1813238 : move32();
364 : }
365 : }
366 : }
367 : }
368 1156256 : WHILE( EQ_16( condition, 1 ) );
369 :
370 836340 : pop_wmops();
371 836340 : return ( errorMessage );
372 : }
373 :
374 :
375 : /*-----------------------------------------------------------------------*
376 : * Local functions
377 : *-----------------------------------------------------------------------*/
378 :
379 : /*-------------------------------------------------------------------------
380 : * BidagonalDiagonalisation()
381 : *
382 : *
383 : *-------------------------------------------------------------------------*/
384 :
385 836340 : static Word16 BidagonalDiagonalisation_fx(
386 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_fx_e*/
387 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_fx_e*/
388 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_fx_e*/
389 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_fx_e*/
390 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */
391 : Word16 *secDiag_new_e, /* i/o: */
392 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
393 : const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/
394 : const Word32 eps_x, /* i : eps_x_e*/
395 : const Word16 eps_x_e /* i : */
396 : )
397 : {
398 : Word16 kCh, nCh, iCh, jCh, split;
399 : Word32 c, s, f1, f2;
400 : Word16 c_e, s_e, f1_e, f2_e;
401 836340 : Word16 x11_e = 0, x12_e = 0;
402 836340 : move16();
403 836340 : move16();
404 : Word16 temp_exp;
405 : Word16 temp_exp2;
406 836340 : Word32 g = 0;
407 836340 : move16();
408 836340 : Word16 g_e = 0;
409 836340 : move16();
410 : Word16 convergence, iteration, found_split;
411 836340 : Word16 error = 0;
412 836340 : move16();
413 : Word32 temp;
414 : Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS];
415 836340 : Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
416 :
417 3420512 : FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */
418 : {
419 2584172 : convergence = 0;
420 2584172 : move16();
421 2584172 : iteration = 0;
422 2584172 : move16();
423 2584172 : split = iCh - 1; /* Q0 */
424 2584172 : move16();
425 :
426 6893394 : WHILE( EQ_16( convergence, 0 ) )
427 : {
428 4309222 : iteration = add( iteration, 1 ); /* Q0 */
429 4309222 : found_split = 1; /* Q0 */
430 4309222 : move16();
431 :
432 8354566 : FOR( jCh = iCh; jCh >= 0; jCh-- )
433 : {
434 : #ifdef OPT_MCH_DEC_V1_NBE
435 8354566 : Word16 com_e = s_max( secDiag_new_e[jCh], eps_x_e );
436 8354566 : IF( LE_32( L_shr( L_abs( secDiag_fx[jCh] ), sub( com_e, secDiag_new_e[jCh] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is secDiag[ch] vanishing compared to eps_x */
437 : #else
438 : split = sub( jCh, 1 ); /* Q0 */ /* OPT_MCH_DEC_V1_NBE */
439 : IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( secDiag_fx[jCh] ), secDiag_new_e[jCh], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is secDiag[ch] vanishing compared to eps_x */
440 : #endif /* OPT_MCH_DEC_V1_NBE */
441 : {
442 4266698 : found_split = 0;
443 4266698 : move16();
444 4266698 : BREAK;
445 : }
446 : #ifdef OPT_MCH_DEC_V1_NBE
447 4087868 : com_e = s_max( singularValues_new_e[jCh - 1], eps_x_e );
448 4087868 : IF( LE_32( L_shr( L_abs( singularValues_fx[jCh - 1] ), sub( com_e, singularValues_new_e[jCh - 1] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is singularValues[jCh - 1] vanishing compared to eps_x */
449 : #else /* OPT_MCH_DEC_V1_NBE */
450 : IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( singularValues_fx[split] ), singularValues_new_e[split], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
451 : #endif /* OPT_MCH_DEC_V1_NBE */
452 : {
453 42524 : BREAK;
454 : }
455 : }
456 :
457 : // convergence = ( jCh == iCh ) ? 1 : 0;
458 4309222 : IF( EQ_16( jCh, iCh ) )
459 : {
460 2584172 : convergence = 1; /* Q0 */
461 2584172 : move16();
462 : }
463 : ELSE
464 : {
465 1725050 : convergence = 0;
466 1725050 : move16();
467 : }
468 :
469 4309222 : IF( found_split )
470 : {
471 42524 : s = MAX_32;
472 42524 : move32();
473 42524 : s_e = 0;
474 42524 : move16();
475 42524 : c = 0;
476 42524 : move32();
477 42524 : c_e = 0;
478 42524 : move16();
479 : #ifdef OPT_MCH_DEC_V1_NBE
480 42524 : split = sub( jCh, 1 ); /* Q0 */
481 : #endif /* OPT_MCH_DEC_V1_NBE */
482 85097 : FOR( kCh = jCh; kCh <= iCh; kCh++ )
483 : {
484 42576 : g = Mpy_32_32( s, secDiag_fx[kCh] ); /* exp(s_e + secDiag_new_e) */
485 42576 : g_e = add( s_e, secDiag_new_e[kCh] );
486 42576 : secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
487 42576 : secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
488 : #ifdef OPT_MCH_DEC_V1_NBE
489 42576 : Word16 com_e = s_max( g_e, eps_x_e );
490 42576 : IF( LE_32( L_shr( L_abs( g ), sub( com_e, g_e ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) )
491 : #else /* OPT_MCH_DEC_V1_NBE */
492 : IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( g ), g_e, Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
493 : #endif /* OPT_MCH_DEC_V1_NBE */
494 : {
495 3 : BREAK;
496 : }
497 :
498 42573 : c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */
499 42573 : c_e = singularValues_new_e[kCh];
500 42573 : GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
501 42573 : c = Mpy_32_32( c, temp );
502 42573 : c_e = add( c_e, temp_exp );
503 42573 : temp_exp2 = norm_l( c );
504 42573 : c = L_shl( c, temp_exp2 );
505 42573 : c_e = sub( c_e, temp_exp2 );
506 42573 : s = Mpy_32_32( -g, temp );
507 42573 : s_e = add( g_e, temp_exp );
508 42573 : temp_exp2 = norm_l( s );
509 42573 : s = L_shl( s, temp_exp2 );
510 42573 : s_e = sub( s_e, temp_exp2 );
511 42573 : ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */
512 : }
513 : }
514 :
515 4309222 : IF( convergence )
516 : {
517 2584172 : singularValues_fx[iCh] = singularValues_fx[iCh];
518 2584172 : move32();
519 2584172 : IF( singularValues_fx[iCh] < 0 )
520 : {
521 486140 : singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
522 486140 : move32();
523 1773858 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
524 : {
525 1287718 : singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
526 1287718 : move32();
527 : }
528 : }
529 : }
530 : ELSE
531 : {
532 1725050 : IF( GE_16( iteration, SVD_MAX_NUM_ITERATION ) )
533 : {
534 0 : IF( LT_32( singularValues_fx[iCh], 0 ) )
535 : {
536 0 : singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
537 0 : move32();
538 :
539 0 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
540 : {
541 0 : singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
542 0 : move32();
543 : }
544 : }
545 0 : error = 1;
546 0 : move16();
547 0 : convergence = 1;
548 0 : move16();
549 : }
550 : ELSE
551 : {
552 1725050 : ApplyQRTransform_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_new_e, secDiag_new_e, jCh, iCh, nChannelsL, nChannelsC ); /* nChannelsC */
553 : }
554 : }
555 : }
556 : }
557 :
558 : // rescaling block
559 836340 : Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS );
560 :
561 :
562 836340 : return ( error );
563 : }
564 :
565 : /*-------------------------------------------------------------------------
566 : * ApplyQRTransform()
567 : *
568 : *
569 : *-------------------------------------------------------------------------*/
570 :
571 1725050 : static void ApplyQRTransform_fx(
572 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_e*/
573 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_e*/
574 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_e*/
575 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_e*/
576 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
577 : Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
578 : const Word16 startIndex, /* i : Q0*/
579 : const Word16 currentIndex, /* i : Q0*/
580 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
581 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
582 : )
583 : {
584 : Word32 temp;
585 : Word16 temp_e;
586 : Word16 temp_norm_e;
587 : Word16 ch, split;
588 1725050 : Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
589 1725050 : move32();
590 1725050 : move32();
591 1725050 : move32();
592 1725050 : move32();
593 1725050 : move32();
594 1725050 : move32();
595 1725050 : move32();
596 1725050 : move32();
597 1725050 : Word16 d_e = 0, g_e = 0, r_e = 0, x_ii_e = 0, x_split_e = 0, x_kk_e = 0, mu_e = 0, aux_e = 0;
598 1725050 : move16();
599 1725050 : move16();
600 1725050 : move16();
601 1725050 : move16();
602 1725050 : move16();
603 1725050 : move16();
604 1725050 : move16();
605 1725050 : move16();
606 : Word32 L_temp1, L_temp2, L_temp3, L_temp4;
607 : Word16 L_temp1_e, L_temp2_e, L_temp3_e, L_temp4_e, temp_exp;
608 1725050 : Word32 c = MAX_32;
609 1725050 : move32();
610 1725050 : Word16 c_e = 0;
611 1725050 : move16();
612 1725050 : Word32 s = MAX_32;
613 1725050 : move32();
614 1725050 : Word16 s_e = 0;
615 1725050 : move16();
616 :
617 1725050 : x_kk = singularValues[currentIndex]; /* exp(singularValues_e) */
618 1725050 : move32();
619 1725050 : x_kk_e = singularValues_e[currentIndex];
620 1725050 : move16();
621 1725050 : x_ii = singularValues[startIndex]; /* exp(singularValues_e) */
622 1725050 : move32();
623 1725050 : x_ii_e = singularValues_e[startIndex];
624 1725050 : move16();
625 1725050 : split = sub( currentIndex, 1 ); /* Q0 */
626 1725050 : move16();
627 :
628 1725050 : x_split = singularValues[split]; /* exp(singularValues_e) */
629 1725050 : move32();
630 1725050 : x_split_e = singularValues_e[split];
631 1725050 : move16();
632 1725050 : g = secDiag[split]; /* exp(secDiag_e) */
633 1725050 : move32();
634 1725050 : g_e = secDiag_e[split];
635 1725050 : move16();
636 1725050 : r = secDiag[currentIndex]; /* exp(secDiag_e) */
637 1725050 : move32();
638 1725050 : r_e = secDiag_e[currentIndex];
639 1725050 : move16();
640 :
641 : // d = (x_split + x_kk) * (x_split - x_kk) + (g + r) * (g - r);
642 1725050 : L_temp1 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, x_kk, x_kk_e, &L_temp1_e ); /* exp(L_temp1_e) */
643 1725050 : L_temp2 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, L_negate( x_kk ), x_kk_e, &L_temp2_e ); /* exp(L_temp2_e) */
644 1725050 : L_temp3 = BASOP_Util_Add_Mant32Exp( g, g_e, r, r_e, &L_temp3_e ); /* exp(L_temp3_e) */
645 1725050 : L_temp4 = BASOP_Util_Add_Mant32Exp( g, g_e, L_negate( r ), r_e, &L_temp4_e ); /* exp(L_temp4_e) */
646 1725050 : d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( L_temp3, L_temp4 ), add( L_temp3_e, L_temp4_e ), &d_e ); /* exp(d_e) */
647 :
648 : // d /= maxWithSign((r + r) * x_split);
649 1725050 : L_temp1 = BASOP_Util_Add_Mant32Exp( r, r_e, r, r_e, &L_temp1_e ); /* exp(L_temp1_e) */
650 1725050 : L_temp1 = maxWithSign_fx( Mpy_32_32( L_temp1, x_split ) ); /* exp(L_temp1_e + x_split_e) */
651 1725050 : L_temp1_e = add( L_temp1_e, x_split_e );
652 1725050 : d = BASOP_Util_Divide3232_Scale_newton( d, L_temp1, &temp_exp ); /* temp_exp + d_e - L_temp1_e */
653 1725050 : d_e = add( temp_exp, sub( d_e, L_temp1_e ) );
654 :
655 1725050 : g = GivensRotation_fx( MAX_32, 0, d, d_e, &g_e );
656 :
657 : // mu = x_split / maxWithSign(d + (d >= 0.0f ? 1 : (-1)) * fabsf(g)) - r;
658 : // L_temp1 = d >= 0 ? L_abs( g ) : -L_abs( g );
659 1725050 : IF( d >= 0 )
660 : {
661 1019752 : L_temp1 = L_abs( g );
662 : }
663 : ELSE
664 : {
665 705298 : L_temp1 = L_negate( L_abs( g ) );
666 : }
667 1725050 : L_temp1_e = g_e;
668 1725050 : move16();
669 1725050 : L_temp2 = maxWithSign_fx( BASOP_Util_Add_Mant32Exp( d, d_e, L_temp1, L_temp1_e, &L_temp2_e ) ); /* exp(L_temp2_e) */
670 1725050 : mu = BASOP_Util_Divide3232_Scale_newton( x_split, L_temp2, &mu_e ); /* exp(mu_e + (x-plit_e - L_temp2_e)) */
671 1725050 : mu_e = add( mu_e, sub( x_split_e, L_temp2_e ) );
672 1725050 : mu = BASOP_Util_Add_Mant32Exp( mu, mu_e, L_negate( r ), r_e, &mu_e ); /* exp(mu_e) */
673 :
674 : // d = ((x_ii + x_kk) * (x_ii - x_kk) + r * mu) / maxWithSign(x_ii);
675 1725050 : L_temp1 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, x_kk, x_kk_e, &L_temp1_e ); /* exp(L_temp1_e) */
676 1725050 : L_temp2 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, L_negate( x_kk ), x_kk_e, &L_temp2_e ); /* exp(L_temp2_e) */
677 1725050 : d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( r, mu ), add( r_e, mu_e ), &d_e ); /* exp(d_e) */
678 1725050 : d = BASOP_Util_Divide3232_Scale_newton( d, maxWithSign_fx( x_ii ), &temp_exp ); /* exp(temp_exp + (d_e - x_ii_e) */
679 1725050 : d_e = add( temp_exp, sub( d_e, x_ii_e ) );
680 :
681 : /*QR transformation*/
682 5770394 : FOR( ch = startIndex; ch <= split; ch++ )
683 : {
684 4045344 : r = Mpy_32_32( s, secDiag[ch + 1] ); /* exp(s_e + secDiag_e) */
685 4045344 : r_e = add( s_e, secDiag_e[ch + 1] );
686 4045344 : g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */
687 4045344 : g_e = add( c_e, secDiag_e[ch + 1] );
688 :
689 4045344 : GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
690 4045344 : c = Mpy_32_32( d, temp );
691 4045344 : c_e = add( temp_e, d_e );
692 4045344 : temp_norm_e = norm_l( c );
693 4045344 : c = L_shl( c, temp_norm_e );
694 4045344 : c_e = sub( c_e, temp_norm_e );
695 4045344 : s = Mpy_32_32( r, temp );
696 4045344 : s_e = add( r_e, temp_e );
697 4045344 : temp_norm_e = norm_l( s );
698 4045344 : s = L_shl( s, temp_norm_e );
699 4045344 : s_e = sub( s_e, temp_norm_e );
700 4045344 : r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */
701 4045344 : r_e = add( s_e, singularValues_e[ch + 1] );
702 4045344 : x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */
703 4045344 : x_split_e = add( c_e, singularValues_e[ch + 1] );
704 :
705 4045344 : aux = g; /* exp(g_e) */
706 4045344 : move32();
707 4045344 : aux_e = g_e;
708 4045344 : move16();
709 :
710 : // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC);
711 4045344 : ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC );
712 :
713 4045344 : GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */
714 4045344 : IF( singularValues[ch] != 0 )
715 : {
716 :
717 4045344 : c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
718 4045344 : c_e = add( d_e, aux_e );
719 4045344 : temp_norm_e = norm_l( c );
720 4045344 : c = L_shl( c, temp_norm_e );
721 4045344 : c_e = sub( c_e, temp_norm_e );
722 :
723 4045344 : s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */
724 4045344 : s_e = add( r_e, aux_e );
725 4045344 : temp_norm_e = norm_l( s );
726 4045344 : s = L_shl( s, temp_norm_e );
727 4045344 : s_e = sub( s_e, temp_norm_e );
728 : }
729 :
730 : // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL);
731 4045344 : ApplyRotation_fx( singularVectors_Left, c, c_e, s, s_e, g, g_e, x_split, x_split_e, &d, &d_e, &x_ii, &x_ii_e, ch + 1, ch, nChannelsL );
732 : }
733 :
734 1725050 : secDiag[startIndex] = 0;
735 1725050 : move32();
736 1725050 : secDiag[currentIndex] = d; /* exp(d_e) */
737 1725050 : move32();
738 1725050 : secDiag_e[currentIndex] = d_e;
739 1725050 : move16();
740 1725050 : singularValues[currentIndex] = x_ii; /* exp(x_ii_e) */
741 1725050 : move32();
742 1725050 : singularValues_e[currentIndex] = x_ii_e;
743 1725050 : move16();
744 :
745 1725050 : return;
746 : }
747 :
748 : /*-------------------------------------------------------------------------
749 : * ApplyRotation()
750 : *
751 : *
752 : *-------------------------------------------------------------------------*/
753 :
754 8133261 : static void ApplyRotation_fx(
755 : Word32 singularVector[][MAX_OUTPUT_CHANNELS],
756 : const Word32 c, /* exp(c_e)*/
757 : const Word16 c_e,
758 : const Word32 s, /* exp(s_e) */
759 : const Word16 s_e,
760 : Word32 x11, /* exp(x11_e) */
761 : Word16 x11_e,
762 : Word32 x12, /* exp(x12_e) */
763 : Word16 x12_e,
764 : Word32 *d, /* exp(d_e) */
765 : Word16 *d_e,
766 : Word32 *g, /* exp(g_e) */
767 : Word16 *g_e,
768 : const Word16 currentIndex1, /* Q0 */
769 : const Word16 currentIndex2, /* Q0 */
770 : const Word16 nChannels /* Q0 */
771 : )
772 : {
773 : Word16 ch;
774 :
775 8133261 : *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */
776 8133261 : move32();
777 8133261 : *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */
778 8133261 : move32();
779 :
780 8133261 : Word16 c_q = sub( 31, c_e );
781 8133261 : Word16 s_q = sub( 31, s_e );
782 : Word32 op1, op2;
783 : Word16 op_e;
784 :
785 : // Bring c and s to same Q
786 8133261 : IF( GT_16( c_q, s_q ) )
787 : {
788 711006 : op1 = L_shr( c, sub( c_q, s_q ) );
789 711006 : op2 = s;
790 711006 : move32();
791 711006 : op_e = s_q;
792 711006 : move16();
793 : }
794 : ELSE
795 : {
796 7422255 : op1 = c;
797 7422255 : move32();
798 7422255 : op2 = L_shr( s, sub( s_q, c_q ) );
799 7422255 : op_e = c_q;
800 7422255 : move16();
801 : }
802 8133261 : op_e = add( op_e, 1 ); // 64 bit mac -> +1
803 : #ifdef OPT_MCH_DEC_V1_BE
804 8133261 : op_e = negate( op_e );
805 : #endif /* OPT_MCH_DEC_V1_BE */
806 :
807 47836581 : FOR( ch = 0; ch < nChannels; ch++ )
808 : {
809 39703320 : x11 = singularVector[ch][currentIndex2];
810 39703320 : move32();
811 39703320 : x12 = singularVector[ch][currentIndex1];
812 39703320 : move32();
813 :
814 39703320 : Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
815 : #ifdef OPT_MCH_DEC_V1_BE
816 39703320 : singularVector[ch][currentIndex2] = W_shl_sat_l( temp, op_e ); // Q(singularVector)
817 : #else /* OPT_MCH_DEC_V1_BE */
818 : temp = W_shr( temp, op_e ); // Q(singularVector)
819 : singularVector[ch][currentIndex2] = W_sat_l( temp ); // Q(singularVector)
820 : #endif /* OPT_MCH_DEC_V1_BE */
821 39703320 : move32();
822 :
823 39703320 : temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
824 : #ifdef OPT_MCH_DEC_V1_BE
825 39703320 : singularVector[ch][currentIndex1] = W_shl_sat_l( temp, op_e ); // Q(singularVector)
826 : #else /* OPT_MCH_DEC_V1_BE */
827 : temp = W_shr( temp, op_e ); // Q(singularVector)
828 : singularVector[ch][currentIndex1] = W_sat_l( temp ); // Q(singularVector)
829 : #endif /* OPT_MCH_DEC_V1_BE */
830 39703320 : move32();
831 : }
832 :
833 8133261 : return;
834 : }
835 :
836 : /*-------------------------------------------------------------------------
837 : * HouseholderReduction()
838 : *
839 : *
840 : *-------------------------------------------------------------------------*/
841 :
842 836340 : static void HouseholderReduction_fx(
843 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
844 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* exp(singularValues_fx_e) */
845 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
846 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* exp(secDiag_fx_e) */
847 : Word16 singularVectors_Left_e,
848 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
849 : Word16 *secDiag_fx_e,
850 : const Word16 nChannelsL, /* Q0 */
851 : const Word16 nChannelsC, /* Q0 */
852 : Word32 *eps_x_fx, /* exp(eps_x_fx_e) */
853 : Word16 *eps_x_fx_e )
854 : {
855 : Word16 nCh;
856 : // float g = 0.0f, sig_x = 0.0f;// to be removed
857 836340 : Word32 g_fx = 0, sig_x_fx = 0;
858 836340 : move32();
859 836340 : move32();
860 836340 : Word16 sig_x_fx_e = 0;
861 836340 : move16();
862 :
863 : Word16 iCh, jCh;
864 : Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
865 3776236 : FOR( jCh = 0; jCh < nChannelsL; jCh++ )
866 : {
867 13543596 : FOR( iCh = 0; iCh < nChannelsC; iCh++ )
868 : {
869 10603700 : singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
870 10603700 : move16();
871 : }
872 : }
873 :
874 : /* Bidiagonal Reduction for every channel */
875 3420512 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
876 : {
877 2584172 : biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
878 2584172 : biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
879 :
880 : Word16 L_temp_e;
881 2584172 : Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
882 2584172 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
883 : {
884 1206737 : *eps_x_fx = L_temp; /* exp(L_temp_e) */
885 1206737 : move32();
886 1206737 : *eps_x_fx_e = L_temp_e;
887 1206737 : move32();
888 : }
889 : }
890 :
891 : /* SingularVecotr Accumulation */
892 836340 : singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
893 :
894 :
895 836340 : singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
896 :
897 836340 : return;
898 : }
899 :
900 : /*-------------------------------------------------------------------------
901 : * biDiagonalReductionLeft()
902 : *
903 : *
904 : *-------------------------------------------------------------------------*/
905 :
906 2584172 : static void biDiagonalReductionLeft_fx(
907 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
908 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
909 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
910 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
911 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
912 : Word16 *secDiag_e,
913 : const Word16 nChannelsL, /* Q0 */
914 : const Word16 nChannelsC, /* Q0 */
915 : const Word16 currChannel, /* Q0 */
916 : Word32 *sig_x, /* exp(sig_x_e) */
917 : Word16 *sig_x_e,
918 : Word32 *g /* Q31 */
919 : )
920 : {
921 : Word16 iCh, jCh, idx;
922 : Word32 norm_x, f, r;
923 : Word16 norm_x_e, f_e, r_e;
924 : Word32 L_temp;
925 : Word16 L_temp_e;
926 :
927 2584172 : secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */
928 2584172 : move32();
929 2584172 : secDiag_e[currChannel] = *sig_x_e;
930 2584172 : move16();
931 :
932 : /* Setting values to 0 */
933 2584172 : ( *sig_x ) = 0;
934 2584172 : move32();
935 2584172 : ( *g ) = 0;
936 2584172 : move32();
937 :
938 2584172 : IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
939 : {
940 2584172 : idx = currChannel;
941 2584172 : move16();
942 :
943 9536092 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
944 : {
945 6951920 : ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
946 : }
947 :
948 2584172 : IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
949 : {
950 : Word16 invVal_e;
951 : Word32 invVal;
952 2459612 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
953 : #ifdef OPT_MCH_DEC_V1_NBE
954 2459612 : Word64 temp = 0;
955 2459612 : move64();
956 2459612 : Word16 max_e = MIN_16;
957 : #else /* OPT_MCH_DEC_V1_NBE */
958 : norm_x = 0;
959 : move32();
960 : norm_x_e = 0;
961 : #endif /* OPT_MCH_DEC_V1_NBE */
962 2459612 : move16();
963 9285373 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
964 : {
965 6825761 : Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
966 6825761 : singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
967 6825761 : move32();
968 6825761 : singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
969 6825761 : move16();
970 : #ifdef OPT_MCH_DEC_V1_NBE
971 6825761 : max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] );
972 : #else /* OPT_MCH_DEC_V1_NBE */
973 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
974 : #endif /* OPT_MCH_DEC_V1_NBE */
975 : }
976 :
977 : #ifdef OPT_MCH_DEC_V1_NBE
978 9285373 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
979 : {
980 6825761 : temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors2_e[jCh][currChannel] ), 1 ) ) );
981 : }
982 :
983 2459612 : Word16 nrm = W_norm( temp );
984 2459612 : nrm = sub( nrm, 32 );
985 2459612 : norm_x = W_shl_sat_l( temp, nrm );
986 2459612 : norm_x_e = sub( add( max_e, max_e ), nrm );
987 : #endif /* OPT_MCH_DEC_V1_NBE */
988 :
989 2459612 : IF( GT_16( norm_x_e, 0 ) )
990 : {
991 417073 : norm_x = MAX_32;
992 417073 : move32();
993 417073 : norm_x_e = 0;
994 417073 : move16();
995 : }
996 2459612 : L_temp_e = norm_x_e;
997 2459612 : move16();
998 2459612 : L_temp = Sqrt32( norm_x, &L_temp_e );
999 2459612 : L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
1000 : //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
1001 2459612 : if ( singularVectors[currChannel][idx] >= 0 )
1002 : {
1003 1392322 : L_temp = L_negate( L_temp );
1004 : }
1005 2459612 : ( *g ) = L_temp;
1006 2459612 : move32();
1007 :
1008 2459612 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
1009 2459612 : singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
1010 2459612 : move32();
1011 :
1012 2459612 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
1013 :
1014 6109793 : FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1015 : {
1016 : #ifdef OPT_MCH_DEC_V1_NBE
1017 3650181 : Word16 max2_e = MIN_16;
1018 3650181 : max_e = MIN_16;
1019 3650181 : move16();
1020 3650181 : move16();
1021 3650181 : temp = 0;
1022 3650181 : move64();
1023 :
1024 18098633 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1025 : {
1026 14448452 : max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] ); /* exp(norm_x_e) */
1027 14448452 : max2_e = s_max( max2_e, singularVectors2_e[jCh][iCh] ); /* exp(norm_x_e) */
1028 : }
1029 3650181 : max_e = add( max_e, max2_e );
1030 :
1031 18098633 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1032 : {
1033 14448452 : temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ) ) ) );
1034 : }
1035 3650181 : nrm = W_norm( temp );
1036 3650181 : nrm = sub( nrm, 32 );
1037 3650181 : norm_x = W_shl_sat_l( temp, nrm );
1038 3650181 : norm_x_e = sub( max_e, nrm );
1039 : #else /* OPT_MCH_DEC_V1_NBE */
1040 : norm_x = 0;
1041 : move32();
1042 : norm_x_e = 0;
1043 : move16();
1044 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1045 : {
1046 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
1047 : }
1048 : #endif /* OPT_MCH_DEC_V1_NBE */
1049 :
1050 3650181 : f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
1051 3650181 : f_e = add( invVal_e, sub( norm_x_e, r_e ) );
1052 :
1053 18098633 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1054 : {
1055 14448452 : singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
1056 14448452 : move32();
1057 : }
1058 : }
1059 :
1060 :
1061 9285373 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1062 : {
1063 6825761 : singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); /* sing_exp + sig_x_e */
1064 6825761 : move32();
1065 6825761 : singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e );
1066 6825761 : move16();
1067 : }
1068 : }
1069 :
1070 : // rescaling block
1071 2584172 : singularValues[currChannel] = Mpy_32_32( ( *sig_x ), ( *g ) ); /* sig_x_e */
1072 2584172 : move32();
1073 2584172 : singularValues_e[currChannel] = *sig_x_e;
1074 2584172 : move16();
1075 : }
1076 :
1077 2584172 : return;
1078 : }
1079 :
1080 : /*-------------------------------------------------------------------------
1081 : * biDiagonalReductionRight()
1082 : *
1083 : *
1084 : *-------------------------------------------------------------------------*/
1085 :
1086 2584172 : static void biDiagonalReductionRight_fx(
1087 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
1088 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_exp[]) */
1089 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
1090 : Word16 *secDiag_exp,
1091 : const Word16 nChannelsL, /* Q0 */
1092 : const Word16 nChannelsC, /* Q0 */
1093 : const Word16 currChannel, /* Q0 */
1094 : Word32 *sig_x, /* exp(sig_x_e) */
1095 : Word16 *sig_x_e,
1096 : Word32 *g /* Q31 */
1097 : )
1098 : {
1099 : Word16 iCh, jCh, idx;
1100 : Word32 norm_x, r;
1101 : Word16 norm_x_e, r_e;
1102 : Word32 L_temp;
1103 : Word16 L_temp_e;
1104 :
1105 : /* Setting values to 0 */
1106 2584172 : ( *sig_x ) = 0;
1107 2584172 : move32();
1108 2584172 : ( *g ) = 0;
1109 2584172 : move32();
1110 :
1111 2584172 : IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
1112 : {
1113 1747832 : idx = add( currChannel, 1 ); /* Q0 */
1114 :
1115 5399612 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1116 : {
1117 3651780 : ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), singularVectors2_e[currChannel][jCh], sig_x_e ); /* exp(sig_x_e) */
1118 : }
1119 :
1120 1747832 : IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
1121 : {
1122 1638035 : norm_x = 0;
1123 1638035 : move32();
1124 1638035 : norm_x_e = 0;
1125 1638035 : move16();
1126 :
1127 : Word16 invVal_e, temp_e;
1128 : Word32 invVal;
1129 1638035 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
1130 5179741 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
1131 : {
1132 3541706 : temp_e = norm_l( singularVectors[currChannel][jCh] );
1133 3541706 : singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
1134 3541706 : move32();
1135 3541706 : singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
1136 3541706 : move16();
1137 3541706 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
1138 : }
1139 1638035 : IF( GT_16( norm_x_e, 0 ) )
1140 : {
1141 522785 : norm_x = MAX_32;
1142 522785 : move32();
1143 522785 : norm_x_e = 0;
1144 522785 : move16();
1145 : }
1146 1638035 : L_temp_e = norm_x_e;
1147 1638035 : move16();
1148 1638035 : L_temp = Sqrt32( norm_x, &L_temp_e );
1149 1638035 : L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
1150 1638035 : IF( singularVectors[currChannel][idx] >= 0 )
1151 : {
1152 617906 : ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
1153 617906 : move32();
1154 : }
1155 : ELSE
1156 : {
1157 1020129 : ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */
1158 1020129 : move32();
1159 : }
1160 :
1161 1638035 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
1162 1638035 : singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* exp(sing_exp) */
1163 1638035 : move32();
1164 :
1165 1638035 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
1166 :
1167 5179741 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1168 : {
1169 3541706 : temp_e = norm_l( singularVectors[currChannel][jCh] );
1170 3541706 : secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
1171 3541706 : move32();
1172 3541706 : secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) );
1173 3541706 : move16();
1174 : }
1175 :
1176 5539985 : FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1177 : {
1178 3901950 : norm_x = 0;
1179 3901950 : move32();
1180 3901950 : norm_x_e = 0;
1181 3901950 : move16();
1182 14591642 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1183 : {
1184 10689692 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
1185 : }
1186 :
1187 14591642 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1188 : {
1189 10689692 : singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
1190 10689692 : move32();
1191 : }
1192 : }
1193 :
1194 5179741 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1195 : {
1196 3541706 : singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */
1197 3541706 : move32();
1198 3541706 : singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e );
1199 3541706 : move16();
1200 : }
1201 : }
1202 : }
1203 :
1204 2584172 : return;
1205 : }
1206 :
1207 : /*-------------------------------------------------------------------------
1208 : * singularVectorsAccumulationLeft()
1209 : *
1210 : *
1211 : *-------------------------------------------------------------------------*/
1212 :
1213 836340 : static void singularVectorsAccumulationLeft_fx(
1214 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
1215 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
1216 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
1217 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
1218 : const Word16 nChannelsL, /* Q0 */
1219 : const Word16 nChannelsC /* Q0 */
1220 : )
1221 : {
1222 : Word16 nCh, iCh, k;
1223 : Word16 nChannels;
1224 : Word32 norm_y, t_jj, t_ii;
1225 : Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp;
1226 :
1227 : /* Processing */
1228 836340 : nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/
1229 : // FILE *fp = fopen("t_ii_out.txt","a");
1230 3420512 : FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* min(nChannelsL,ChannelsC) */
1231 : {
1232 2584172 : t_ii = singularValues[nCh]; /* exp(singularValues_e) */
1233 2584172 : move32();
1234 2584172 : t_ii_e = singularValues_e[nCh];
1235 2584172 : move16();
1236 :
1237 6235952 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1238 : {
1239 3651780 : singularVectors_Left[nCh][iCh] = 0;
1240 3651780 : move32();
1241 : }
1242 :
1243 2584172 : IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
1244 : {
1245 2459612 : t_ii = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
1246 2459612 : t_ii_e = sub( temp_exp, t_ii_e );
1247 : Word16 tempe;
1248 2459612 : Word32 temp = BASOP_Util_Divide3232_Scale_newton( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
1249 2459612 : tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
1250 : // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
1251 6109793 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1252 : {
1253 3650181 : Word64 acc = 0;
1254 3650181 : move64();
1255 : Word64 prod[16];
1256 : Word16 prod_e[16];
1257 3650181 : Word16 max_e = -31;
1258 3650181 : move16();
1259 14448452 : FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
1260 : {
1261 10798271 : prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
1262 10798271 : prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
1263 10798271 : max_e = s_max( max_e, prod_e[k] );
1264 : }
1265 :
1266 14448452 : FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
1267 : {
1268 10798271 : acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) );
1269 : }
1270 3650181 : Word16 acc_e = W_norm( acc );
1271 3650181 : acc = W_shl( acc, acc_e );
1272 :
1273 3650181 : norm_y = W_extract_h( acc );
1274 3650181 : norm_y_e = add( sub( max_e, acc_e ), 1 );
1275 3650181 : t_jj = Mpy_32_32( temp, norm_y );
1276 3650181 : t_jj_e = add( tempe, norm_y_e );
1277 18098633 : FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
1278 : {
1279 14448452 : singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */
1280 14448452 : move32();
1281 : }
1282 : }
1283 :
1284 9285373 : FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1285 : {
1286 6825761 : singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */
1287 6825761 : move32();
1288 6825761 : singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e );
1289 6825761 : move16();
1290 : }
1291 : }
1292 : ELSE
1293 : {
1294 250719 : FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1295 : {
1296 126159 : singularVectors_Left[iCh][nCh] = 0;
1297 126159 : move32();
1298 : }
1299 : }
1300 : #ifdef OPT_MCH_DEC_V1_NBE
1301 2584172 : Word16 exp = s_max( singularVectors_Left_e[nCh][nCh], 1 );
1302 2584172 : singularVectors_Left[nCh][nCh] = L_sub( L_shr( singularVectors_Left[nCh][nCh], sub( exp, singularVectors_Left_e[nCh][nCh] ) ), L_shr( MINUS_ONE_IN_Q31, exp ) ); /* exp(sing_exp2) */
1303 2584172 : move32();
1304 2584172 : singularVectors_Left_e[nCh][nCh] = exp;
1305 2584172 : move16();
1306 : #else /* OPT_MCH_DEC_V1_NBE */
1307 : singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */
1308 : move32();
1309 : #endif /* OPT_MCH_DEC_V1_NBE */
1310 : }
1311 : // fclose(fp);
1312 3776236 : FOR( nCh = 0; nCh < nChannelsL; nCh++ )
1313 : {
1314 13543596 : FOR( iCh = 0; iCh < nChannelsC; iCh++ )
1315 : {
1316 10603700 : singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
1317 10603700 : move32();
1318 : }
1319 : }
1320 :
1321 836340 : return;
1322 : }
1323 :
1324 : /*-------------------------------------------------------------------------
1325 : * singularVectorsAccumulationRight()
1326 : *
1327 : *
1328 : *-------------------------------------------------------------------------*/
1329 :
1330 836340 : static void singularVectorsAccumulationRight_fx(
1331 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
1332 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
1333 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
1334 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
1335 : Word16 *secDiag_e,
1336 : const Word16 nChannelsC /* Q0 */
1337 : )
1338 : {
1339 : Word16 nCh, iCh, k;
1340 : Word16 nChannels;
1341 : Word32 norm_y, t_ii, ratio_float;
1342 836340 : Word16 norm_y_e, temp_exp1, sing_right_exp[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
1343 :
1344 : /* Processing */
1345 836340 : nChannels = nChannelsC; /* nChannelsC Q0*/
1346 :
1347 : /* avoid compiler warning */
1348 836340 : t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */
1349 836340 : move32();
1350 :
1351 3420512 : FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */
1352 : {
1353 :
1354 2584172 : IF( LT_16( nCh, sub( nChannelsC, 1 ) ) ) /* nChannelsC */
1355 : {
1356 1747832 : IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
1357 : {
1358 :
1359 5179741 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
1360 : {
1361 3541706 : ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
1362 3541706 : singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
1363 3541706 : temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
1364 3541706 : move32();
1365 3541706 : sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) );
1366 3541706 : move16();
1367 : // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 );
1368 : }
1369 :
1370 5179741 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1371 : {
1372 : #ifdef OPT_MCH_DEC_V1_NBE
1373 3541706 : Word64 norm_val = 0;
1374 3541706 : move64();
1375 3541706 : Word16 maxL_e = MIN_16;
1376 3541706 : Word16 maxR_e = MIN_16;
1377 3541706 : Word16 maxR2_e = MIN_16;
1378 3541706 : move16();
1379 3541706 : move16();
1380 3541706 : move16();
1381 13866634 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1382 : {
1383 10324928 : maxL_e = s_max( maxL_e, singularVectors_Left_e[nCh][k] );
1384 10324928 : maxR_e = s_max( maxR_e, sing_right_exp[k][iCh] );
1385 10324928 : maxR2_e = s_max( maxR2_e, sing_right_exp[k][nCh] );
1386 : }
1387 : #else /* OPT_MCH_DEC_V1_NBE */
1388 : norm_y = 0;
1389 : move32();
1390 : norm_y_e = 0;
1391 : move16();
1392 : #endif /* OPT_MCH_DEC_V1_NBE */
1393 :
1394 13866634 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1395 : {
1396 : #ifdef OPT_MCH_DEC_V1_NBE
1397 10324928 : norm_val = W_mac_32_32( norm_val, L_shr( singularVectors_Left[nCh][k], sub( maxL_e, singularVectors_Left_e[nCh][k] ) ), L_shr( singularVectors_Right[k][iCh], sub( maxR_e, sing_right_exp[k][iCh] ) ) );
1398 : #else /* OPT_MCH_DEC_V1_NBE */
1399 : norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
1400 : #endif /* OPT_MCH_DEC_V1_NBE */
1401 : }
1402 : #ifdef OPT_MCH_DEC_V1_NBE
1403 3541706 : norm_y_e = W_norm( norm_val );
1404 3541706 : norm_y = W_extract_h( W_shl( norm_val, norm_y_e ) );
1405 3541706 : norm_y_e = sub( add( maxL_e, maxR_e ), norm_y_e );
1406 :
1407 3541706 : Word16 max_new = s_max( maxR_e, add( maxR2_e, norm_y_e ) );
1408 : #endif /* OPT_MCH_DEC_V1_NBE */
1409 13866634 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1410 : {
1411 : #ifdef OPT_MCH_DEC_V1_NBE
1412 10324928 : Word32 temp = Mpy_32_32( norm_y, singularVectors_Right[k][nCh] );
1413 10324928 : Word32 op2 = L_shr( temp, sub( max_new, add( norm_y_e, sing_right_exp[k][nCh] ) ) );
1414 10324928 : singularVectors_Right[k][iCh] = L_add_sat( L_shr( singularVectors_Right[k][iCh], sub( max_new, sing_right_exp[k][iCh] ) ), op2 ); /* exp(sing_right_exp) */
1415 10324928 : move32();
1416 10324928 : singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], max_new ); /* Q31 */
1417 : #else /* OPT_MCH_DEC_V1_NBE */
1418 : singularVectors_Right[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Right[k][iCh], sing_right_exp[k][iCh], Mpy_32_32( norm_y, singularVectors_Right[k][nCh] ), add( norm_y_e, sing_right_exp[k][nCh] ), &sing_right_exp[k][iCh] ); /* exp(sing_right_exp) */
1419 : move32();
1420 : singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], sing_right_exp[k][iCh] ); /* Q31 */
1421 : #endif /* OPT_MCH_DEC_V1_NBE */
1422 10324928 : move32();
1423 10324928 : sing_right_exp[k][iCh] = 0;
1424 10324928 : move16();
1425 : }
1426 : }
1427 : }
1428 :
1429 5399612 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1430 : {
1431 3651780 : singularVectors_Right[nCh][iCh] = singularVectors_Right[iCh][nCh] = 0;
1432 3651780 : move32();
1433 3651780 : move32();
1434 : }
1435 : }
1436 2584172 : singularVectors_Right[nCh][nCh] = MAX_32;
1437 2584172 : move32();
1438 2584172 : t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */
1439 2584172 : move32();
1440 : }
1441 836340 : return;
1442 : }
1443 :
1444 : /*-------------------------------------------------------------------------
1445 : * GivensRotation()
1446 : *
1447 : *
1448 : *-------------------------------------------------------------------------*/
1449 :
1450 :
1451 8133261 : static void GivensRotation2_fx(
1452 : const Word32 x, /* exp(x_e) */
1453 : const Word16 x_e,
1454 : const Word32 z, /* exp(z_e) */
1455 : const Word16 z_e,
1456 : Word32 *result,
1457 : Word32 *resultInv,
1458 : Word16 *out_e,
1459 : Word16 *outInv_e )
1460 : {
1461 : Word32 r;
1462 8133261 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
1463 8133261 : r = L_max( r, 1 );
1464 8133261 : *outInv_e = *out_e;
1465 8133261 : move16();
1466 8133261 : *result = Sqrt32( r, out_e );
1467 8133261 : move32();
1468 :
1469 8133261 : *resultInv = ISqrt32( r, outInv_e );
1470 8133261 : move32();
1471 8133261 : }
1472 :
1473 1725050 : static Word32 GivensRotation_fx(
1474 : const Word32 x, /* exp(x_e) */
1475 : const Word16 x_e,
1476 : const Word32 z, /* exp(z_e) */
1477 : const Word16 z_e,
1478 : Word16 *out_e )
1479 : {
1480 : Word32 r;
1481 :
1482 1725050 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
1483 1725050 : r = Sqrt32( r, out_e );
1484 1725050 : return ( r );
1485 : }
1486 :
1487 : /*-------------------------------------------------------------------------
1488 : * maxWithSign()
1489 : *
1490 : *
1491 : *-------------------------------------------------------------------------*/
1492 :
1493 25373080 : static Word32 maxWithSign_fx(
1494 : const Word32 a /* Qx */
1495 : )
1496 : {
1497 : Word32 result;
1498 25373080 : IF( a >= 0 )
1499 : {
1500 13241407 : result = L_max( a, SVD_MINIMUM_VALUE_FX );
1501 : }
1502 : ELSE
1503 : {
1504 12131673 : result = L_min( a, -SVD_MINIMUM_VALUE_FX );
1505 : }
1506 25373080 : return result;
1507 : }
1508 :
1509 : /*-------------------------------------------------------------------------
1510 : * flushToZeroArray()
1511 : *
1512 : *
1513 : *-------------------------------------------------------------------------*/
1514 :
1515 :
1516 : /*-------------------------------------------------------------------------
1517 : * flushToZeroMat()
1518 : *
1519 : *
1520 : *-------------------------------------------------------------------------*/
|