Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include "options.h"
35 : #include "prot_fx.h"
36 : #include "ivas_stat_dec.h"
37 : #include "ivas_cnst.h"
38 : #include <math.h>
39 : #include "wmc_auto.h"
40 : #include "ivas_prot_fx.h"
41 :
42 : /*-----------------------------------------------------------------------*
43 : * Local constants
44 : *-----------------------------------------------------------------------*/
45 :
46 : /* The SVD is sensitive to changes to the following constants, so please be careful when trying to tune things */
47 : #define SVD_MAX_NUM_ITERATION 75 /* maximum number of interations before exiting the SVD */
48 : #define SVD_MINIMUM_VALUE_FX ( 2 ) /* minimum value */
49 : #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 )
50 : #define CONVERGENCE_FACTOR_FX 214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
51 :
52 : /*-----------------------------------------------------------------------*
53 : * Local function prototypes
54 : *-----------------------------------------------------------------------*/
55 :
56 : static void HouseholderReduction_fx(
57 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
58 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* exp(singularValues_fx_e) */
59 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
60 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* exp(secDiag_fx_e) */
61 : Word16 singularVectors_Left_e,
62 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
63 : Word16 *secDiag_fx_e,
64 : const Word16 nChannelsL, /* Q0 */
65 : const Word16 nChannelsC, /* Q0 */
66 : Word32 *eps_x_fx, /* exp(eps_x_fx_e) */
67 : Word16 *eps_x_fx_e );
68 :
69 : static void biDiagonalReductionLeft_fx(
70 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
71 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
72 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
73 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
74 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
75 : Word16 *secDiag_e,
76 : const Word16 nChannelsL, /* Q0 */
77 : const Word16 nChannelsC, /* Q0 */
78 : const Word16 currChannel, /* Q0 */
79 : Word32 *sig_x, /* exp(sig_x_e) */
80 : Word16 *sig_x_e,
81 : Word32 *g /* Q31 */
82 : );
83 :
84 : static void biDiagonalReductionRight_fx(
85 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
86 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
87 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
88 : Word16 *secDiag_e,
89 : const Word16 nChannelsL, /* Q0 */
90 : const Word16 nChannelsC, /* Q0 */
91 : const Word16 currChannel, /* Q0 */
92 : Word32 *sig_x, /* exp(sig_x_e) */
93 : Word16 *sig_x_e,
94 : Word32 *g /* Q31 */
95 : ); // Q31
96 :
97 : static void singularVectorsAccumulationLeft_fx(
98 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
99 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
100 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
101 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
102 : const Word16 nChannelsL, /* Q0 */
103 : const Word16 nChannelsC /* Q0 */
104 : );
105 :
106 : static void singularVectorsAccumulationRight_fx(
107 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
108 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
109 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
110 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
111 : Word16 *secDiag_e,
112 : const Word16 nChannelsC /* Q0 */
113 : );
114 :
115 : static Word32 maxWithSign_fx(
116 : const Word32 a /* Qx */
117 : );
118 :
119 : static Word16 BidagonalDiagonalisation_fx(
120 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_fx_e*/
121 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_fx_e*/
122 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_fx_e*/
123 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_fx_e*/
124 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */
125 : Word16 *secDiag_fx_e, /* i/o: */
126 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
127 : const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/
128 : const Word32 eps_x, /* i : eps_x_e*/
129 : const Word16 eps_x_e /* i : */
130 : );
131 :
132 : static void ApplyRotation_fx(
133 : Word32 singularVector[][MAX_OUTPUT_CHANNELS],
134 : const Word32 c, /* exp(c_e)*/
135 : const Word16 c_e,
136 : const Word32 s, /* exp(s_e) */
137 : const Word16 s_e,
138 : Word32 x11, /* exp(x11_e) */
139 : Word16 x11_e,
140 : Word32 x12, /* exp(x12_e) */
141 : Word16 x12_e,
142 : Word32 *d, /* exp(d_e) */
143 : Word16 *d_e,
144 : Word32 *g, /* exp(g_e) */
145 : Word16 *g_e,
146 : const Word16 currentIndex1, /* Q0 */
147 : const Word16 currentIndex2, /* Q0 */
148 : const Word16 nChannels /* Q0 */
149 : );
150 :
151 : static void GivensRotation2_fx(
152 : const Word32 x, /* exp(x_e) */
153 : const Word16 x_e,
154 : const Word32 z, /* exp(z_e) */
155 : const Word16 z_e,
156 : Word32 *result,
157 : Word32 *resultInv,
158 : Word16 *out_e,
159 : Word16 *outInv_e );
160 :
161 : static Word32 GivensRotation_fx(
162 : const Word32 x, /* exp(x_e) */
163 : const Word16 x_e,
164 : const Word32 z, /* exp(z_e) */
165 : const Word16 z_e,
166 : Word16 *out_e );
167 :
168 : static void ApplyQRTransform_fx(
169 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_e*/
170 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_e*/
171 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_e*/
172 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_e*/
173 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
174 : Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
175 : const Word16 startIndex, /* i : Q0*/
176 : const Word16 currentIndex, /* i : Q0*/
177 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
178 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
179 : );
180 :
181 : /*-------------------------------------------------------------------------
182 : * mat2svdMat()
183 : *
184 : * external matrix format to internal
185 : *-------------------------------------------------------------------------*/
186 :
187 836340 : void mat2svdMat_fx(
188 : const Word32 *mat, /* i : matrix as column ordered vector Qx*/
189 : Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* o : matrix as two-dimensional arry Qx*/
190 : const Word16 nRows, /* i : number of rows of the matrix Q0*/
191 : const Word16 mCols, /* i : number of columns of the matrix Q0*/
192 : const Word16 transpose /* i : flag indication transposition Q0*/
193 : )
194 : {
195 : Word16 i, j;
196 :
197 836340 : IF( transpose )
198 : {
199 706444 : FOR( i = 0; i < mCols; i++ )
200 : {
201 1775372 : FOR( j = 0; j < nRows; j++ )
202 : {
203 1185648 : svdMat[i][j] = mat[j + ( nRows * i )]; /* Qx */
204 1185648 : move32();
205 : }
206 :
207 589724 : set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, nRows ) );
208 : }
209 :
210 1394516 : FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
211 : {
212 1277796 : set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
213 : }
214 : }
215 : ELSE
216 : {
217 3069792 : FOR( i = 0; i < nRows; i++ )
218 : {
219 11768224 : FOR( j = 0; j < mCols; j++ )
220 : {
221 9418052 : svdMat[i][j] = mat[i + ( nRows * j )]; /* Qx */
222 9418052 : move32();
223 : }
224 :
225 2350172 : set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, mCols ) );
226 : }
227 :
228 9883368 : FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
229 : {
230 9163748 : set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
231 : }
232 : }
233 :
234 836340 : return;
235 : }
236 :
237 :
238 : /*---------------------------------------------------------------------*
239 : * svdMat2mat()
240 : *
241 : * transfer a matrix from a two dimensional array to a column wise ordered vector
242 : *---------------------------------------------------------------------*/
243 :
244 1038400 : void svdMat2mat_fx(
245 : Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* i : matrix as two-dimensional arry Qx*/
246 : Word32 *mat, /* o : matrix as column ordered vector Qx*/
247 : const Word16 nRows, /* i : number of rows of the matrix Q0*/
248 : const Word16 mCols /* i : number of columns of the matrix Q0*/
249 : )
250 : {
251 : Word16 i, j;
252 :
253 4033332 : FOR( i = 0; i < nRows; i++ )
254 : {
255 11900708 : FOR( j = 0; j < mCols; j++ )
256 : {
257 8905776 : mat[i + ( nRows * j )] = svdMat[i][j]; /* Qx */
258 8905776 : move32();
259 : }
260 : }
261 :
262 1038400 : return;
263 : }
264 :
265 : /*-------------------------------------------------------------------------
266 : * svd()
267 : *
268 : * perform a singular value decomposition X=USV of a matrix X
269 : *-------------------------------------------------------------------------*/
270 :
271 : /*! r: error or success */
272 836340 : Word16 svd_fx(
273 : Word32 InputMatrix[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) InputMatrix_e*/
274 : Word16 InputMatrix_e,
275 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) singularValues_fx_e*/
276 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) singularValues_fx_e*/
277 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) singularValues_fx_e*/
278 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
279 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
280 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
281 : )
282 : {
283 : Word16 iCh, jCh;
284 : Word16 lengthSingularValues;
285 : Word16 errorMessage, condition;
286 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS];
287 : Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS];
288 836340 : move16();
289 836340 : Word32 eps_x_fx = 0, temp_fx;
290 836340 : move16();
291 836340 : Word16 eps_x_fx_e = 0;
292 836340 : move16();
293 : Word16 temp_fx_e;
294 836340 : push_wmops( "svd_fx" );
295 :
296 :
297 : /* Collecting Values */
298 3776236 : FOR( iCh = 0; iCh < nChannelsL; iCh++ )
299 : {
300 13543596 : FOR( jCh = 0; jCh < nChannelsC; jCh++ )
301 : {
302 10603700 : singularVectors_Left_fx[iCh][jCh] = InputMatrix[iCh][jCh]; /* InputMatrix_e */
303 10603700 : move32();
304 : }
305 : }
306 :
307 : /* Householder reduction */
308 836340 : HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
309 : /* Set extremely small values to zero if needed */
310 : // flushToZeroArray(singularValues, max_length);
311 : // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL);
312 : // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC);
313 :
314 : /* BidagonalDiagonalisation */
315 836340 : errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
316 : /* Sort the singular values descending order */
317 836340 : lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */
318 :
319 : DO
320 : {
321 1156995 : condition = 0;
322 1156995 : move16();
323 3961884 : FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
324 : {
325 2804889 : IF( BASOP_Util_Cmp_Mant32Exp( singularValues_fx[iCh], singularValues_fx_e[iCh], singularValues_fx[iCh + 1], singularValues_fx_e[iCh + 1] ) < 0 )
326 : {
327 405400 : condition = 1;
328 405400 : move16();
329 405400 : temp_fx = singularValues_fx[iCh]; /* singularValues_fx_e */
330 405400 : move32();
331 405400 : singularValues_fx[iCh] = singularValues_fx[iCh + 1]; /* singularValues_fx_e */
332 405400 : move32();
333 405400 : singularValues_fx[iCh + 1] = temp_fx; /* singularValues_fx_e */
334 405400 : move32();
335 405400 : temp_fx_e = singularValues_fx_e[iCh];
336 405400 : move16();
337 405400 : singularValues_fx_e[iCh] = singularValues_fx_e[iCh + 1];
338 405400 : move16();
339 405400 : singularValues_fx_e[iCh + 1] = temp_fx_e;
340 405400 : move16();
341 :
342 2240259 : FOR( jCh = 0; jCh < nChannelsL; ++jCh )
343 : {
344 1834859 : temp_fx = singularVectors_Left_fx[jCh][iCh]; /* singularValues_fx_e */
345 1834859 : move32();
346 1834859 : singularVectors_Left_fx[jCh][iCh] = singularVectors_Left_fx[jCh][iCh + 1]; /* singularValues_fx_e */
347 1834859 : move32();
348 1834859 : singularVectors_Left_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
349 1834859 : move32();
350 : }
351 :
352 2234267 : FOR( jCh = 0; jCh < nChannelsC; ++jCh )
353 : {
354 1828867 : temp_fx = singularVectors_Right_fx[jCh][iCh]; /* singularValues_fx_e */
355 1828867 : move32();
356 1828867 : singularVectors_Right_fx[jCh][iCh] = singularVectors_Right_fx[jCh][iCh + 1]; /* singularValues_fx_e */
357 1828867 : move32();
358 1828867 : singularVectors_Right_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
359 1828867 : move32();
360 : }
361 : }
362 : }
363 : }
364 1156995 : WHILE( EQ_16( condition, 1 ) );
365 :
366 836340 : pop_wmops();
367 836340 : return ( errorMessage );
368 : }
369 :
370 :
371 : /*-----------------------------------------------------------------------*
372 : * Local functions
373 : *-----------------------------------------------------------------------*/
374 :
375 : /*-------------------------------------------------------------------------
376 : * BidagonalDiagonalisation()
377 : *
378 : *
379 : *-------------------------------------------------------------------------*/
380 :
381 836340 : static Word16 BidagonalDiagonalisation_fx(
382 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_fx_e*/
383 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_fx_e*/
384 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_fx_e*/
385 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_fx_e*/
386 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */
387 : Word16 *secDiag_new_e, /* i/o: */
388 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
389 : const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/
390 : const Word32 eps_x, /* i : eps_x_e*/
391 : const Word16 eps_x_e /* i : */
392 : )
393 : {
394 : Word16 kCh, nCh, iCh, jCh, split;
395 : Word32 c, s, f1, f2;
396 : Word16 c_e, s_e, f1_e, f2_e;
397 836340 : Word16 x11_e = 0, x12_e = 0;
398 836340 : move16();
399 836340 : move16();
400 : Word16 temp_exp;
401 : Word16 temp_exp2;
402 836340 : Word32 g = 0;
403 836340 : move16();
404 836340 : Word16 g_e = 0;
405 836340 : move16();
406 : Word16 convergence, iteration, found_split;
407 836340 : Word16 error = 0;
408 836340 : move16();
409 : Word32 temp;
410 : Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS];
411 836340 : Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
412 :
413 3420512 : FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */
414 : {
415 2584172 : convergence = 0;
416 2584172 : move16();
417 2584172 : iteration = 0;
418 2584172 : move16();
419 2584172 : split = iCh - 1; /* Q0 */
420 2584172 : move16();
421 :
422 6889234 : WHILE( EQ_16( convergence, 0 ) )
423 : {
424 4305062 : iteration = add( iteration, 1 ); /* Q0 */
425 4305062 : found_split = 1; /* Q0 */
426 4305062 : move16();
427 :
428 8342986 : FOR( jCh = iCh; jCh >= 0; jCh-- )
429 : {
430 8342986 : split = sub( jCh, 1 ); /* Q0 */
431 8342986 : IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( secDiag_fx[jCh] ), secDiag_new_e[jCh], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is secDiag[ch] vanishing compared to eps_x */
432 : {
433 4261956 : found_split = 0;
434 4261956 : move16();
435 4261956 : BREAK;
436 : }
437 4081030 : IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( singularValues_fx[split] ), singularValues_new_e[split], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
438 : {
439 43106 : BREAK;
440 : }
441 : }
442 :
443 : // convergence = ( jCh == iCh ) ? 1 : 0;
444 4305062 : IF( EQ_16( jCh, iCh ) )
445 : {
446 2584172 : convergence = 1; /* Q0 */
447 2584172 : move16();
448 : }
449 : ELSE
450 : {
451 1720890 : convergence = 0;
452 1720890 : move16();
453 : }
454 :
455 4305062 : IF( found_split )
456 : {
457 43106 : s = MAX_32;
458 43106 : move32();
459 43106 : s_e = 0;
460 43106 : move16();
461 43106 : c = 0;
462 43106 : move32();
463 43106 : c_e = 0;
464 43106 : move16();
465 :
466 86253 : FOR( kCh = jCh; kCh <= iCh; kCh++ )
467 : {
468 43149 : g = Mpy_32_32( s, secDiag_fx[kCh] ); /* exp(s_e + secDiag_new_e) */
469 43149 : g_e = add( s_e, secDiag_new_e[kCh] );
470 43149 : secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
471 43149 : secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
472 43149 : IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( g ), g_e, Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
473 : {
474 2 : BREAK;
475 : }
476 :
477 43147 : c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */
478 43147 : c_e = singularValues_new_e[kCh];
479 43147 : GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
480 43147 : c = Mpy_32_32( c, temp );
481 43147 : c_e = add( c_e, temp_exp );
482 43147 : temp_exp2 = norm_l( c );
483 43147 : c = L_shl( c, temp_exp2 );
484 43147 : c_e = sub( c_e, temp_exp2 );
485 43147 : s = Mpy_32_32( -g, temp );
486 43147 : s_e = add( g_e, temp_exp );
487 43147 : temp_exp2 = norm_l( s );
488 43147 : s = L_shl( s, temp_exp2 );
489 43147 : s_e = sub( s_e, temp_exp2 );
490 43147 : ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */
491 : }
492 : }
493 :
494 4305062 : IF( convergence )
495 : {
496 2584172 : singularValues_fx[iCh] = singularValues_fx[iCh];
497 2584172 : move32();
498 2584172 : IF( singularValues_fx[iCh] < 0 )
499 : {
500 483797 : singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
501 483797 : move32();
502 1755350 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
503 : {
504 1271553 : singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
505 1271553 : move32();
506 : }
507 : }
508 : }
509 : ELSE
510 : {
511 1720890 : IF( GE_16( iteration, SVD_MAX_NUM_ITERATION ) )
512 : {
513 0 : IF( LT_32( singularValues_fx[iCh], 0 ) )
514 : {
515 0 : singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
516 0 : move32();
517 :
518 0 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
519 : {
520 0 : singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
521 0 : move32();
522 : }
523 : }
524 0 : error = 1;
525 0 : move16();
526 0 : convergence = 1;
527 0 : move16();
528 : }
529 : ELSE
530 : {
531 1720890 : ApplyQRTransform_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_new_e, secDiag_new_e, jCh, iCh, nChannelsL, nChannelsC ); /* nChannelsC */
532 : }
533 : }
534 : }
535 : }
536 :
537 : // rescaling block
538 836340 : Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS );
539 :
540 :
541 836340 : return ( error );
542 : }
543 :
544 : /*-------------------------------------------------------------------------
545 : * ApplyQRTransform()
546 : *
547 : *
548 : *-------------------------------------------------------------------------*/
549 :
550 1720890 : static void ApplyQRTransform_fx(
551 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_e*/
552 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_e*/
553 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_e*/
554 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_e*/
555 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
556 : Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
557 : const Word16 startIndex, /* i : Q0*/
558 : const Word16 currentIndex, /* i : Q0*/
559 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
560 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
561 : )
562 : {
563 : Word32 temp;
564 : Word16 temp_e;
565 : Word16 temp_norm_e;
566 : Word16 ch, split;
567 1720890 : Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
568 1720890 : move32();
569 1720890 : move32();
570 1720890 : move32();
571 1720890 : move32();
572 1720890 : move32();
573 1720890 : move32();
574 1720890 : move32();
575 1720890 : move32();
576 1720890 : Word16 d_e = 0, g_e = 0, r_e = 0, x_ii_e = 0, x_split_e = 0, x_kk_e = 0, mu_e = 0, aux_e = 0;
577 1720890 : move16();
578 1720890 : move16();
579 1720890 : move16();
580 1720890 : move16();
581 1720890 : move16();
582 1720890 : move16();
583 1720890 : move16();
584 1720890 : move16();
585 : Word32 L_temp1, L_temp2, L_temp3, L_temp4;
586 : Word16 L_temp1_e, L_temp2_e, L_temp3_e, L_temp4_e, temp_exp;
587 1720890 : Word32 c = MAX_32;
588 1720890 : move32();
589 1720890 : Word16 c_e = 0;
590 1720890 : move16();
591 1720890 : Word32 s = MAX_32;
592 1720890 : move32();
593 1720890 : Word16 s_e = 0;
594 1720890 : move16();
595 :
596 1720890 : x_kk = singularValues[currentIndex]; /* exp(singularValues_e) */
597 1720890 : move32();
598 1720890 : x_kk_e = singularValues_e[currentIndex];
599 1720890 : move16();
600 1720890 : x_ii = singularValues[startIndex]; /* exp(singularValues_e) */
601 1720890 : move32();
602 1720890 : x_ii_e = singularValues_e[startIndex];
603 1720890 : move16();
604 1720890 : split = sub( currentIndex, 1 ); /* Q0 */
605 1720890 : move16();
606 :
607 1720890 : x_split = singularValues[split]; /* exp(singularValues_e) */
608 1720890 : move32();
609 1720890 : x_split_e = singularValues_e[split];
610 1720890 : move16();
611 1720890 : g = secDiag[split]; /* exp(secDiag_e) */
612 1720890 : move32();
613 1720890 : g_e = secDiag_e[split];
614 1720890 : move16();
615 1720890 : r = secDiag[currentIndex]; /* exp(secDiag_e) */
616 1720890 : move32();
617 1720890 : r_e = secDiag_e[currentIndex];
618 1720890 : move16();
619 :
620 : // d = (x_split + x_kk) * (x_split - x_kk) + (g + r) * (g - r);
621 1720890 : L_temp1 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, x_kk, x_kk_e, &L_temp1_e ); /* exp(L_temp1_e) */
622 1720890 : L_temp2 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, L_negate( x_kk ), x_kk_e, &L_temp2_e ); /* exp(L_temp2_e) */
623 1720890 : L_temp3 = BASOP_Util_Add_Mant32Exp( g, g_e, r, r_e, &L_temp3_e ); /* exp(L_temp3_e) */
624 1720890 : L_temp4 = BASOP_Util_Add_Mant32Exp( g, g_e, L_negate( r ), r_e, &L_temp4_e ); /* exp(L_temp4_e) */
625 1720890 : d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( L_temp3, L_temp4 ), add( L_temp3_e, L_temp4_e ), &d_e ); /* exp(d_e) */
626 :
627 : // d /= maxWithSign((r + r) * x_split);
628 1720890 : L_temp1 = BASOP_Util_Add_Mant32Exp( r, r_e, r, r_e, &L_temp1_e ); /* exp(L_temp1_e) */
629 1720890 : L_temp1 = maxWithSign_fx( Mpy_32_32( L_temp1, x_split ) ); /* exp(L_temp1_e + x_split_e) */
630 1720890 : L_temp1_e = add( L_temp1_e, x_split_e );
631 1720890 : d = BASOP_Util_Divide3232_Scale_newton( d, L_temp1, &temp_exp ); /* temp_exp + d_e - L_temp1_e */
632 1720890 : d_e = add( temp_exp, sub( d_e, L_temp1_e ) );
633 :
634 1720890 : g = GivensRotation_fx( MAX_32, 0, d, d_e, &g_e );
635 :
636 : // mu = x_split / maxWithSign(d + (d >= 0.0f ? 1 : (-1)) * fabsf(g)) - r;
637 : // L_temp1 = d >= 0 ? L_abs( g ) : -L_abs( g );
638 1720890 : IF( d >= 0 )
639 : {
640 1011431 : L_temp1 = L_abs( g );
641 : }
642 : ELSE
643 : {
644 709459 : L_temp1 = L_negate( L_abs( g ) );
645 : }
646 1720890 : L_temp1_e = g_e;
647 1720890 : move16();
648 1720890 : L_temp2 = maxWithSign_fx( BASOP_Util_Add_Mant32Exp( d, d_e, L_temp1, L_temp1_e, &L_temp2_e ) ); /* exp(L_temp2_e) */
649 1720890 : mu = BASOP_Util_Divide3232_Scale_newton( x_split, L_temp2, &mu_e ); /* exp(mu_e + (x-plit_e - L_temp2_e)) */
650 1720890 : mu_e = add( mu_e, sub( x_split_e, L_temp2_e ) );
651 1720890 : mu = BASOP_Util_Add_Mant32Exp( mu, mu_e, L_negate( r ), r_e, &mu_e ); /* exp(mu_e) */
652 :
653 : // d = ((x_ii + x_kk) * (x_ii - x_kk) + r * mu) / maxWithSign(x_ii);
654 1720890 : L_temp1 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, x_kk, x_kk_e, &L_temp1_e ); /* exp(L_temp1_e) */
655 1720890 : L_temp2 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, L_negate( x_kk ), x_kk_e, &L_temp2_e ); /* exp(L_temp2_e) */
656 1720890 : d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( r, mu ), add( r_e, mu_e ), &d_e ); /* exp(d_e) */
657 1720890 : d = BASOP_Util_Divide3232_Scale_newton( d, maxWithSign_fx( x_ii ), &temp_exp ); /* exp(temp_exp + (d_e - x_ii_e) */
658 1720890 : d_e = add( temp_exp, sub( d_e, x_ii_e ) );
659 :
660 : /*QR transformation*/
661 5758814 : FOR( ch = startIndex; ch <= split; ch++ )
662 : {
663 4037924 : r = Mpy_32_32( s, secDiag[ch + 1] ); /* exp(s_e + secDiag_e) */
664 4037924 : r_e = add( s_e, secDiag_e[ch + 1] );
665 4037924 : g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */
666 4037924 : g_e = add( c_e, secDiag_e[ch + 1] );
667 :
668 4037924 : GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
669 4037924 : c = Mpy_32_32( d, temp );
670 4037924 : c_e = add( temp_e, d_e );
671 4037924 : temp_norm_e = norm_l( c );
672 4037924 : c = L_shl( c, temp_norm_e );
673 4037924 : c_e = sub( c_e, temp_norm_e );
674 4037924 : s = Mpy_32_32( r, temp );
675 4037924 : s_e = add( r_e, temp_e );
676 4037924 : temp_norm_e = norm_l( s );
677 4037924 : s = L_shl( s, temp_norm_e );
678 4037924 : s_e = sub( s_e, temp_norm_e );
679 4037924 : r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */
680 4037924 : r_e = add( s_e, singularValues_e[ch + 1] );
681 4037924 : x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */
682 4037924 : x_split_e = add( c_e, singularValues_e[ch + 1] );
683 :
684 4037924 : aux = g; /* exp(g_e) */
685 4037924 : move32();
686 4037924 : aux_e = g_e;
687 4037924 : move16();
688 :
689 : // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC);
690 4037924 : ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC );
691 :
692 4037924 : GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */
693 4037924 : IF( singularValues[ch] != 0 )
694 : {
695 :
696 4037924 : c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
697 4037924 : c_e = add( d_e, aux_e );
698 4037924 : temp_norm_e = norm_l( c );
699 4037924 : c = L_shl( c, temp_norm_e );
700 4037924 : c_e = sub( c_e, temp_norm_e );
701 :
702 4037924 : s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */
703 4037924 : s_e = add( r_e, aux_e );
704 4037924 : temp_norm_e = norm_l( s );
705 4037924 : s = L_shl( s, temp_norm_e );
706 4037924 : s_e = sub( s_e, temp_norm_e );
707 : }
708 :
709 : // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL);
710 4037924 : ApplyRotation_fx( singularVectors_Left, c, c_e, s, s_e, g, g_e, x_split, x_split_e, &d, &d_e, &x_ii, &x_ii_e, ch + 1, ch, nChannelsL );
711 : }
712 :
713 1720890 : secDiag[startIndex] = 0;
714 1720890 : move32();
715 1720890 : secDiag[currentIndex] = d; /* exp(d_e) */
716 1720890 : move32();
717 1720890 : secDiag_e[currentIndex] = d_e;
718 1720890 : move16();
719 1720890 : singularValues[currentIndex] = x_ii; /* exp(x_ii_e) */
720 1720890 : move32();
721 1720890 : singularValues_e[currentIndex] = x_ii_e;
722 1720890 : move16();
723 :
724 1720890 : return;
725 : }
726 :
727 : /*-------------------------------------------------------------------------
728 : * ApplyRotation()
729 : *
730 : *
731 : *-------------------------------------------------------------------------*/
732 :
733 8118995 : static void ApplyRotation_fx(
734 : Word32 singularVector[][MAX_OUTPUT_CHANNELS],
735 : const Word32 c, /* exp(c_e)*/
736 : const Word16 c_e,
737 : const Word32 s, /* exp(s_e) */
738 : const Word16 s_e,
739 : Word32 x11, /* exp(x11_e) */
740 : Word16 x11_e,
741 : Word32 x12, /* exp(x12_e) */
742 : Word16 x12_e,
743 : Word32 *d, /* exp(d_e) */
744 : Word16 *d_e,
745 : Word32 *g, /* exp(g_e) */
746 : Word16 *g_e,
747 : const Word16 currentIndex1, /* Q0 */
748 : const Word16 currentIndex2, /* Q0 */
749 : const Word16 nChannels /* Q0 */
750 : )
751 : {
752 : Word16 ch;
753 :
754 8118995 : *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */
755 8118995 : move32();
756 8118995 : *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */
757 8118995 : move32();
758 :
759 8118995 : Word16 c_q = sub( 31, c_e );
760 8118995 : Word16 s_q = sub( 31, s_e );
761 : Word32 op1, op2;
762 : Word16 op_e;
763 :
764 : // Bring c and s to same Q
765 8118995 : IF( GT_16( c_q, s_q ) )
766 : {
767 713848 : op1 = L_shr( c, sub( c_q, s_q ) );
768 713848 : op2 = s;
769 713848 : move32();
770 713848 : op_e = s_q;
771 713848 : move16();
772 : }
773 : ELSE
774 : {
775 7405147 : op1 = c;
776 7405147 : move32();
777 7405147 : op2 = L_shr( s, sub( s_q, c_q ) );
778 7405147 : op_e = c_q;
779 7405147 : move16();
780 : }
781 8118995 : op_e = add( op_e, 1 ); // 64 bit mac -> +1
782 :
783 47765580 : FOR( ch = 0; ch < nChannels; ch++ )
784 : {
785 39646585 : x11 = singularVector[ch][currentIndex2];
786 39646585 : move32();
787 39646585 : x12 = singularVector[ch][currentIndex1];
788 39646585 : move32();
789 :
790 39646585 : Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
791 39646585 : temp = W_shr( temp, op_e ); // Q(singularVector)
792 39646585 : singularVector[ch][currentIndex2] = W_sat_l( temp ); // Q(singularVector)
793 39646585 : move32();
794 :
795 39646585 : temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
796 39646585 : temp = W_shr( temp, op_e ); // Q(singularVector)
797 39646585 : singularVector[ch][currentIndex1] = W_sat_l( temp ); // Q(singularVector)
798 39646585 : move32();
799 : }
800 :
801 8118995 : return;
802 : }
803 :
804 : /*-------------------------------------------------------------------------
805 : * HouseholderReduction()
806 : *
807 : *
808 : *-------------------------------------------------------------------------*/
809 :
810 836340 : static void HouseholderReduction_fx(
811 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
812 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* exp(singularValues_fx_e) */
813 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
814 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* exp(secDiag_fx_e) */
815 : Word16 singularVectors_Left_e,
816 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
817 : Word16 *secDiag_fx_e,
818 : const Word16 nChannelsL, /* Q0 */
819 : const Word16 nChannelsC, /* Q0 */
820 : Word32 *eps_x_fx, /* exp(eps_x_fx_e) */
821 : Word16 *eps_x_fx_e )
822 : {
823 : Word16 nCh;
824 : // float g = 0.0f, sig_x = 0.0f;// to be removed
825 836340 : Word32 g_fx = 0, sig_x_fx = 0;
826 836340 : move32();
827 836340 : move32();
828 836340 : Word16 sig_x_fx_e = 0;
829 836340 : move16();
830 :
831 : Word16 iCh, jCh;
832 : Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
833 3776236 : FOR( jCh = 0; jCh < nChannelsL; jCh++ )
834 : {
835 13543596 : FOR( iCh = 0; iCh < nChannelsC; iCh++ )
836 : {
837 10603700 : singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
838 10603700 : move16();
839 : }
840 : }
841 :
842 : /* Bidiagonal Reduction for every channel */
843 3420512 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
844 : {
845 2584172 : biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
846 2584172 : biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
847 :
848 : Word16 L_temp_e;
849 2584172 : Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
850 2584172 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
851 : {
852 1204856 : *eps_x_fx = L_temp; /* exp(L_temp_e) */
853 1204856 : move32();
854 1204856 : *eps_x_fx_e = L_temp_e;
855 1204856 : move32();
856 : }
857 : }
858 :
859 : /* SingularVecotr Accumulation */
860 836340 : singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
861 :
862 :
863 836340 : singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
864 :
865 836340 : return;
866 : }
867 :
868 : /*-------------------------------------------------------------------------
869 : * biDiagonalReductionLeft()
870 : *
871 : *
872 : *-------------------------------------------------------------------------*/
873 :
874 2584172 : static void biDiagonalReductionLeft_fx(
875 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
876 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
877 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
878 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
879 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
880 : Word16 *secDiag_e,
881 : const Word16 nChannelsL, /* Q0 */
882 : const Word16 nChannelsC, /* Q0 */
883 : const Word16 currChannel, /* Q0 */
884 : Word32 *sig_x, /* exp(sig_x_e) */
885 : Word16 *sig_x_e,
886 : Word32 *g /* Q31 */
887 : )
888 : {
889 : Word16 iCh, jCh, idx;
890 : Word32 norm_x, f, r;
891 : Word16 norm_x_e, f_e, r_e;
892 : Word32 L_temp;
893 : Word16 L_temp_e;
894 :
895 2584172 : secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */
896 2584172 : move32();
897 2584172 : secDiag_e[currChannel] = *sig_x_e;
898 2584172 : move16();
899 :
900 : /* Setting values to 0 */
901 2584172 : ( *sig_x ) = 0;
902 2584172 : move32();
903 2584172 : ( *g ) = 0;
904 2584172 : move32();
905 :
906 2584172 : IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
907 : {
908 2584172 : idx = currChannel;
909 2584172 : move16();
910 :
911 9536092 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
912 : {
913 6951920 : ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
914 : }
915 :
916 2584172 : IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
917 : {
918 : Word16 invVal_e;
919 : Word32 invVal;
920 2458066 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
921 2458066 : norm_x = 0;
922 2458066 : move32();
923 2458066 : norm_x_e = 0;
924 2458066 : move16();
925 9278973 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
926 : {
927 6820907 : Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
928 6820907 : singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
929 6820907 : move32();
930 6820907 : singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
931 6820907 : move16();
932 6820907 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
933 : }
934 2458066 : IF( GT_16( norm_x_e, 0 ) )
935 : {
936 418400 : norm_x = MAX_32;
937 418400 : move32();
938 418400 : norm_x_e = 0;
939 418400 : move16();
940 : }
941 2458066 : L_temp_e = norm_x_e;
942 2458066 : move16();
943 2458066 : L_temp = Sqrt32( norm_x, &L_temp_e );
944 2458066 : L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
945 : //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
946 2458066 : if ( singularVectors[currChannel][idx] >= 0 )
947 : {
948 1383415 : L_temp = L_negate( L_temp );
949 : }
950 2458066 : ( *g ) = L_temp;
951 2458066 : move32();
952 :
953 2458066 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
954 2458066 : singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
955 2458066 : move32();
956 :
957 2458066 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
958 :
959 6106501 : FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
960 : {
961 3648435 : norm_x = 0;
962 3648435 : move32();
963 3648435 : norm_x_e = 0;
964 3648435 : move16();
965 18090665 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
966 : {
967 14442230 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
968 : }
969 :
970 3648435 : f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
971 3648435 : f_e = add( invVal_e, sub( norm_x_e, r_e ) );
972 :
973 18090665 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
974 : {
975 14442230 : singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
976 14442230 : move32();
977 : }
978 : }
979 :
980 :
981 9278973 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
982 : {
983 6820907 : singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); /* sing_exp + sig_x_e */
984 6820907 : move32();
985 6820907 : singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e );
986 6820907 : move16();
987 : }
988 : }
989 :
990 : // rescaling block
991 2584172 : singularValues[currChannel] = Mpy_32_32( ( *sig_x ), ( *g ) ); /* sig_x_e */
992 2584172 : move32();
993 2584172 : singularValues_e[currChannel] = *sig_x_e;
994 2584172 : move16();
995 : }
996 :
997 2584172 : return;
998 : }
999 :
1000 : /*-------------------------------------------------------------------------
1001 : * biDiagonalReductionRight()
1002 : *
1003 : *
1004 : *-------------------------------------------------------------------------*/
1005 :
1006 2584172 : static void biDiagonalReductionRight_fx(
1007 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
1008 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_exp[]) */
1009 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
1010 : Word16 *secDiag_exp,
1011 : const Word16 nChannelsL, /* Q0 */
1012 : const Word16 nChannelsC, /* Q0 */
1013 : const Word16 currChannel, /* Q0 */
1014 : Word32 *sig_x, /* exp(sig_x_e) */
1015 : Word16 *sig_x_e,
1016 : Word32 *g /* Q31 */
1017 : )
1018 : {
1019 : Word16 iCh, jCh, idx;
1020 : Word32 norm_x, r;
1021 : Word16 norm_x_e, r_e;
1022 : Word32 L_temp;
1023 : Word16 L_temp_e;
1024 :
1025 : /* Setting values to 0 */
1026 2584172 : ( *sig_x ) = 0;
1027 2584172 : move32();
1028 2584172 : ( *g ) = 0;
1029 2584172 : move32();
1030 :
1031 2584172 : IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
1032 : {
1033 1747832 : idx = add( currChannel, 1 ); /* Q0 */
1034 :
1035 5399612 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1036 : {
1037 3651780 : ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), singularVectors2_e[currChannel][jCh], sig_x_e ); /* exp(sig_x_e) */
1038 : }
1039 :
1040 1747832 : IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
1041 : {
1042 1636776 : norm_x = 0;
1043 1636776 : move32();
1044 1636776 : norm_x_e = 0;
1045 1636776 : move16();
1046 :
1047 : Word16 invVal_e, temp_e;
1048 : Word32 invVal;
1049 1636776 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
1050 5176587 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
1051 : {
1052 3539811 : temp_e = norm_l( singularVectors[currChannel][jCh] );
1053 3539811 : singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
1054 3539811 : move32();
1055 3539811 : singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
1056 3539811 : move16();
1057 3539811 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
1058 : }
1059 1636776 : IF( GT_16( norm_x_e, 0 ) )
1060 : {
1061 522217 : norm_x = MAX_32;
1062 522217 : move32();
1063 522217 : norm_x_e = 0;
1064 522217 : move16();
1065 : }
1066 1636776 : L_temp_e = norm_x_e;
1067 1636776 : move16();
1068 1636776 : L_temp = Sqrt32( norm_x, &L_temp_e );
1069 1636776 : L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
1070 1636776 : IF( singularVectors[currChannel][idx] >= 0 )
1071 : {
1072 612349 : ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
1073 612349 : move32();
1074 : }
1075 : ELSE
1076 : {
1077 1024427 : ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */
1078 1024427 : move32();
1079 : }
1080 :
1081 1636776 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
1082 1636776 : singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* exp(sing_exp) */
1083 1636776 : move32();
1084 :
1085 1636776 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
1086 :
1087 5176587 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1088 : {
1089 3539811 : temp_e = norm_l( singularVectors[currChannel][jCh] );
1090 3539811 : secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
1091 3539811 : move32();
1092 3539811 : secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) );
1093 3539811 : move16();
1094 : }
1095 :
1096 5535749 : FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1097 : {
1098 3898973 : norm_x = 0;
1099 3898973 : move32();
1100 3898973 : norm_x_e = 0;
1101 3898973 : move16();
1102 14583542 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1103 : {
1104 10684569 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
1105 : }
1106 :
1107 14583542 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1108 : {
1109 10684569 : singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
1110 10684569 : move32();
1111 : }
1112 : }
1113 :
1114 5176587 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1115 : {
1116 3539811 : singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */
1117 3539811 : move32();
1118 3539811 : singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e );
1119 3539811 : move16();
1120 : }
1121 : }
1122 : }
1123 :
1124 2584172 : return;
1125 : }
1126 :
1127 : /*-------------------------------------------------------------------------
1128 : * singularVectorsAccumulationLeft()
1129 : *
1130 : *
1131 : *-------------------------------------------------------------------------*/
1132 :
1133 836340 : static void singularVectorsAccumulationLeft_fx(
1134 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
1135 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
1136 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
1137 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
1138 : const Word16 nChannelsL, /* Q0 */
1139 : const Word16 nChannelsC /* Q0 */
1140 : )
1141 : {
1142 : Word16 nCh, iCh, k;
1143 : Word16 nChannels;
1144 : Word32 norm_y, t_jj, t_ii;
1145 : Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp;
1146 :
1147 : /* Processing */
1148 836340 : nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/
1149 : // FILE *fp = fopen("t_ii_out.txt","a");
1150 3420512 : FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* min(nChannelsL,ChannelsC) */
1151 : {
1152 2584172 : t_ii = singularValues[nCh]; /* exp(singularValues_e) */
1153 2584172 : move32();
1154 2584172 : t_ii_e = singularValues_e[nCh];
1155 2584172 : move16();
1156 :
1157 6235952 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1158 : {
1159 3651780 : singularVectors_Left[nCh][iCh] = 0;
1160 3651780 : move32();
1161 : }
1162 :
1163 2584172 : IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
1164 : {
1165 2458066 : t_ii = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
1166 2458066 : t_ii_e = sub( temp_exp, t_ii_e );
1167 : Word16 tempe;
1168 2458066 : Word32 temp = BASOP_Util_Divide3232_Scale_newton( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
1169 2458066 : tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
1170 : // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
1171 6106501 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1172 : {
1173 3648435 : Word64 acc = 0;
1174 3648435 : move64();
1175 : Word64 prod[16];
1176 : Word16 prod_e[16];
1177 3648435 : Word16 max_e = -31;
1178 3648435 : move16();
1179 14442230 : FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
1180 : {
1181 10793795 : prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
1182 10793795 : prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
1183 10793795 : max_e = s_max( max_e, prod_e[k] );
1184 : }
1185 :
1186 14442230 : FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
1187 : {
1188 10793795 : acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) );
1189 : }
1190 3648435 : Word16 acc_e = W_norm( acc );
1191 3648435 : acc = W_shl( acc, acc_e );
1192 :
1193 3648435 : norm_y = W_extract_h( acc );
1194 3648435 : norm_y_e = add( sub( max_e, acc_e ), 1 );
1195 3648435 : t_jj = Mpy_32_32( temp, norm_y );
1196 3648435 : t_jj_e = add( tempe, norm_y_e );
1197 18090665 : FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
1198 : {
1199 14442230 : singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */
1200 14442230 : move32();
1201 : }
1202 : }
1203 :
1204 9278973 : FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1205 : {
1206 6820907 : singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */
1207 6820907 : move32();
1208 6820907 : singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e );
1209 6820907 : move16();
1210 : }
1211 : }
1212 : ELSE
1213 : {
1214 257119 : FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1215 : {
1216 131013 : singularVectors_Left[iCh][nCh] = 0;
1217 131013 : move32();
1218 : }
1219 : }
1220 2584172 : singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */
1221 2584172 : move32();
1222 : }
1223 : // fclose(fp);
1224 3776236 : FOR( nCh = 0; nCh < nChannelsL; nCh++ )
1225 : {
1226 13543596 : FOR( iCh = 0; iCh < nChannelsC; iCh++ )
1227 : {
1228 10603700 : singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
1229 10603700 : move32();
1230 : }
1231 : }
1232 :
1233 836340 : return;
1234 : }
1235 :
1236 : /*-------------------------------------------------------------------------
1237 : * singularVectorsAccumulationRight()
1238 : *
1239 : *
1240 : *-------------------------------------------------------------------------*/
1241 :
1242 836340 : static void singularVectorsAccumulationRight_fx(
1243 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
1244 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
1245 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
1246 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
1247 : Word16 *secDiag_e,
1248 : const Word16 nChannelsC /* Q0 */
1249 : )
1250 : {
1251 : Word16 nCh, iCh, k;
1252 : Word16 nChannels;
1253 : Word32 norm_y, t_ii, ratio_float;
1254 836340 : Word16 norm_y_e, temp_exp1, sing_right_exp[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
1255 :
1256 : /* Processing */
1257 836340 : nChannels = nChannelsC; /* nChannelsC Q0*/
1258 :
1259 : /* avoid compiler warning */
1260 836340 : t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */
1261 836340 : move32();
1262 :
1263 3420512 : FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */
1264 : {
1265 :
1266 2584172 : IF( LT_16( nCh, sub( nChannelsC, 1 ) ) ) /* nChannelsC */
1267 : {
1268 1747832 : IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
1269 : {
1270 :
1271 5176587 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
1272 : {
1273 3539811 : ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
1274 3539811 : singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
1275 3539811 : temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
1276 3539811 : move32();
1277 3539811 : sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) );
1278 3539811 : move16();
1279 : // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 );
1280 : }
1281 :
1282 5176587 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1283 : {
1284 3539811 : norm_y = 0;
1285 3539811 : move32();
1286 3539811 : norm_y_e = 0;
1287 3539811 : move16();
1288 :
1289 13860698 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1290 : {
1291 10320887 : norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
1292 : }
1293 :
1294 13860698 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1295 : {
1296 10320887 : singularVectors_Right[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Right[k][iCh], sing_right_exp[k][iCh], Mpy_32_32( norm_y, singularVectors_Right[k][nCh] ), add( norm_y_e, sing_right_exp[k][nCh] ), &sing_right_exp[k][iCh] ); /* exp(sing_right_exp) */
1297 10320887 : move32();
1298 10320887 : singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], sing_right_exp[k][iCh] ); /* Q31 */
1299 10320887 : move32();
1300 10320887 : sing_right_exp[k][iCh] = 0;
1301 10320887 : move16();
1302 : }
1303 : }
1304 : }
1305 :
1306 5399612 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1307 : {
1308 3651780 : singularVectors_Right[nCh][iCh] = singularVectors_Right[iCh][nCh] = 0;
1309 3651780 : move32();
1310 3651780 : move32();
1311 : }
1312 : }
1313 2584172 : singularVectors_Right[nCh][nCh] = MAX_32;
1314 2584172 : move32();
1315 2584172 : t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */
1316 2584172 : move32();
1317 : }
1318 836340 : return;
1319 : }
1320 :
1321 : /*-------------------------------------------------------------------------
1322 : * GivensRotation()
1323 : *
1324 : *
1325 : *-------------------------------------------------------------------------*/
1326 :
1327 :
1328 8118995 : static void GivensRotation2_fx(
1329 : const Word32 x, /* exp(x_e) */
1330 : const Word16 x_e,
1331 : const Word32 z, /* exp(z_e) */
1332 : const Word16 z_e,
1333 : Word32 *result,
1334 : Word32 *resultInv,
1335 : Word16 *out_e,
1336 : Word16 *outInv_e )
1337 : {
1338 : Word32 r;
1339 8118995 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
1340 8118995 : r = L_max( r, 1 );
1341 8118995 : *outInv_e = *out_e;
1342 8118995 : move16();
1343 8118995 : *result = Sqrt32( r, out_e );
1344 8118995 : move32();
1345 :
1346 8118995 : *resultInv = ISqrt32( r, outInv_e );
1347 8118995 : move32();
1348 8118995 : }
1349 :
1350 1720890 : static Word32 GivensRotation_fx(
1351 : const Word32 x, /* exp(x_e) */
1352 : const Word16 x_e,
1353 : const Word32 z, /* exp(z_e) */
1354 : const Word16 z_e,
1355 : Word16 *out_e )
1356 : {
1357 : Word32 r;
1358 :
1359 1720890 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
1360 1720890 : r = Sqrt32( r, out_e );
1361 1720890 : return ( r );
1362 : }
1363 :
1364 : /*-------------------------------------------------------------------------
1365 : * maxWithSign()
1366 : *
1367 : *
1368 : *-------------------------------------------------------------------------*/
1369 :
1370 25348108 : static Word32 maxWithSign_fx(
1371 : const Word32 a /* Qx */
1372 : )
1373 : {
1374 : Word32 result;
1375 25348108 : IF( a >= 0 )
1376 : {
1377 13220820 : result = L_max( a, SVD_MINIMUM_VALUE_FX );
1378 : }
1379 : ELSE
1380 : {
1381 12127288 : result = L_min( a, -SVD_MINIMUM_VALUE_FX );
1382 : }
1383 25348108 : return result;
1384 : }
1385 :
1386 : /*-------------------------------------------------------------------------
1387 : * flushToZeroArray()
1388 : *
1389 : *
1390 : *-------------------------------------------------------------------------*/
1391 :
1392 :
1393 : /*-------------------------------------------------------------------------
1394 : * flushToZeroMat()
1395 : *
1396 : *
1397 : *-------------------------------------------------------------------------*/
|