Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 : #include <stdint.h>
33 : #include "options.h"
34 : #include "prot_fx.h"
35 : #include "ivas_stat_dec.h"
36 : #include "ivas_cnst.h"
37 : #include <math.h>
38 : #include "wmc_auto.h"
39 : #include "ivas_prot_fx.h"
40 :
41 : /*-----------------------------------------------------------------------*
42 : * Local constants
43 : *-----------------------------------------------------------------------*/
44 :
45 : /* The SVD is sensitive to changes to the following constants, so please be careful when trying to tune things */
46 : #define SVD_MAX_NUM_ITERATION 75 /* maximum number of interations before exiting the SVD */
47 : #define SVD_MINIMUM_VALUE_FX ( 2 ) /* minimum value */
48 : #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 )
49 : #define CONVERGENCE_FACTOR_FX 214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
50 :
51 : /*-----------------------------------------------------------------------*
52 : * Local function prototypes
53 : *-----------------------------------------------------------------------*/
54 :
55 : static void HouseholderReduction_fx(
56 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
57 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* exp(singularValues_fx_e) */
58 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
59 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* exp(secDiag_fx_e) */
60 : Word16 singularVectors_Left_e,
61 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
62 : Word16 *secDiag_fx_e,
63 : const Word16 nChannelsL, /* Q0 */
64 : const Word16 nChannelsC, /* Q0 */
65 : Word32 *eps_x_fx, /* exp(eps_x_fx_e) */
66 : Word16 *eps_x_fx_e );
67 : #ifdef NONBE_SVD_OPTIMIZATION
68 :
69 : static void biDiagonalReductionLeft_fx(
70 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
71 : Word16 singularValues_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
72 : const Word16 nChannelsL,
73 : const Word16 nChannelsC, /* Q0 */
74 : const Word16 currChannel, /* Q0 */
75 : Word32 *g,
76 : Word16 *g_e );
77 :
78 : static void biDiagonalReductionRight_fx(
79 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
80 : Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS],
81 : const Word16 nChannelsL, /* Q0 */
82 : const Word16 nChannelsC, /* Q0 */
83 : const Word16 currChannel, /* Q0 */
84 : Word32 *g,
85 : Word16 *g_e );
86 : #else
87 : static void biDiagonalReductionLeft_fx(
88 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
89 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
90 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
91 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
92 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
93 : Word16 *secDiag_e,
94 : const Word16 nChannelsL, /* Q0 */
95 : const Word16 nChannelsC, /* Q0 */
96 : const Word16 currChannel, /* Q0 */
97 : Word32 *sig_x, /* exp(sig_x_e) */
98 : Word16 *sig_x_e,
99 : Word32 *g /* Q31 */
100 : );
101 :
102 : static void biDiagonalReductionRight_fx(
103 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
104 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
105 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
106 : Word16 *secDiag_e,
107 : const Word16 nChannelsL, /* Q0 */
108 : const Word16 nChannelsC, /* Q0 */
109 : const Word16 currChannel, /* Q0 */
110 : Word32 *sig_x, /* exp(sig_x_e) */
111 : Word16 *sig_x_e,
112 : Word32 *g /* Q31 */
113 : ); // Q31
114 : #endif
115 :
116 : static void singularVectorsAccumulationLeft_fx(
117 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
118 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
119 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
120 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
121 : const Word16 nChannelsL, /* Q0 */
122 : const Word16 nChannelsC /* Q0 */
123 : );
124 :
125 : static void singularVectorsAccumulationRight_fx(
126 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
127 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
128 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
129 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
130 : Word16 *secDiag_e,
131 : const Word16 nChannelsC /* Q0 */
132 : );
133 :
134 : static Word32 maxWithSign_fx(
135 : const Word32 a /* Qx */
136 : );
137 :
138 : static Word16 BidagonalDiagonalisation_fx(
139 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_fx_e*/
140 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_fx_e*/
141 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_fx_e*/
142 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_fx_e*/
143 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */
144 : Word16 *secDiag_fx_e, /* i/o: */
145 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
146 : const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/
147 : const Word32 eps_x, /* i : eps_x_e*/
148 : const Word16 eps_x_e /* i : */
149 : );
150 :
151 : static void ApplyRotation_fx(
152 : Word32 singularVector[][MAX_OUTPUT_CHANNELS],
153 : const Word32 c, /* exp(c_e)*/
154 : const Word16 c_e,
155 : const Word32 s, /* exp(s_e) */
156 : const Word16 s_e,
157 : Word32 x11, /* exp(x11_e) */
158 : Word16 x11_e,
159 : Word32 x12, /* exp(x12_e) */
160 : Word16 x12_e,
161 : Word32 *d, /* exp(d_e) */
162 : Word16 *d_e,
163 : Word32 *g, /* exp(g_e) */
164 : Word16 *g_e,
165 : const Word16 currentIndex1, /* Q0 */
166 : const Word16 currentIndex2, /* Q0 */
167 : const Word16 nChannels /* Q0 */
168 : );
169 :
170 : static void GivensRotation2_fx(
171 : const Word32 x, /* exp(x_e) */
172 : const Word16 x_e,
173 : const Word32 z, /* exp(z_e) */
174 : const Word16 z_e,
175 : Word32 *result,
176 : Word32 *resultInv,
177 : Word16 *out_e,
178 : Word16 *outInv_e );
179 :
180 : static Word32 GivensRotation_fx(
181 : const Word32 x, /* exp(x_e) */
182 : const Word16 x_e,
183 : const Word32 z, /* exp(z_e) */
184 : const Word16 z_e,
185 : Word16 *out_e );
186 :
187 : static void ApplyQRTransform_fx(
188 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_e*/
189 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_e*/
190 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_e*/
191 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_e*/
192 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
193 : Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
194 : const Word16 startIndex, /* i : Q0*/
195 : const Word16 currentIndex, /* i : Q0*/
196 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
197 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
198 : );
199 :
200 : /*-------------------------------------------------------------------------
201 : * mat2svdMat()
202 : *
203 : * external matrix format to internal
204 : *-------------------------------------------------------------------------*/
205 :
206 927239 : void mat2svdMat_fx(
207 : const Word32 *mat, /* i : matrix as column ordered vector Qx*/
208 : Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* o : matrix as two-dimensional arry Qx*/
209 : const Word16 nRows, /* i : number of rows of the matrix Q0*/
210 : const Word16 mCols, /* i : number of columns of the matrix Q0*/
211 : const Word16 transpose /* i : flag indication transposition Q0*/
212 : )
213 : {
214 : Word16 i, j;
215 :
216 927239 : IF( transpose )
217 : {
218 830472 : FOR( i = 0; i < mCols; i++ )
219 : {
220 2095225 : FOR( j = 0; j < nRows; j++ )
221 : {
222 1401140 : svdMat[i][j] = mat[j + ( nRows * i )]; /* Qx */
223 1401140 : move32();
224 : }
225 :
226 694085 : set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, nRows ) );
227 : }
228 :
229 1624494 : FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
230 : {
231 1488107 : set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
232 : }
233 : }
234 : ELSE
235 : {
236 3455181 : FOR( i = 0; i < nRows; i++ )
237 : {
238 13667732 : FOR( j = 0; j < mCols; j++ )
239 : {
240 11003403 : svdMat[i][j] = mat[i + ( nRows * j )]; /* Qx */
241 11003403 : move32();
242 : }
243 :
244 2664329 : set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, mCols ) );
245 : }
246 :
247 10780155 : FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
248 : {
249 9989303 : set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
250 : }
251 : }
252 :
253 927239 : return;
254 : }
255 :
256 :
257 : /*---------------------------------------------------------------------*
258 : * svdMat2mat()
259 : *
260 : * transfer a matrix from a two dimensional array to a column wise ordered vector
261 : *---------------------------------------------------------------------*/
262 :
263 1109632 : void svdMat2mat_fx(
264 : Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* i : matrix as two-dimensional arry Qx*/
265 : Word32 *mat, /* o : matrix as column ordered vector Qx*/
266 : const Word16 nRows, /* i : number of rows of the matrix Q0*/
267 : const Word16 mCols /* i : number of columns of the matrix Q0*/
268 : )
269 : {
270 : Word16 i, j;
271 :
272 4418721 : FOR( i = 0; i < nRows; i++ )
273 : {
274 13444367 : FOR( j = 0; j < mCols; j++ )
275 : {
276 10135278 : mat[i + ( nRows * j )] = svdMat[i][j]; /* Qx */
277 10135278 : move32();
278 : }
279 : }
280 :
281 1109632 : return;
282 : }
283 :
284 : /*-------------------------------------------------------------------------
285 : * svd()
286 : *
287 : * perform a singular value decomposition X=USV of a matrix X
288 : *-------------------------------------------------------------------------*/
289 :
290 : /*! r: error or success */
291 927239 : Word16 svd_fx(
292 : Word32 InputMatrix[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) InputMatrix_e*/
293 : Word16 InputMatrix_e,
294 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) singularValues_fx_e*/
295 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) singularValues_fx_e*/
296 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) singularValues_fx_e*/
297 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
298 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
299 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
300 : )
301 : {
302 : Word16 iCh, jCh;
303 : Word16 lengthSingularValues;
304 : Word16 errorMessage, condition;
305 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS];
306 : Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS];
307 927239 : move16();
308 927239 : Word32 eps_x_fx = 0, temp_fx;
309 927239 : move16();
310 927239 : Word16 eps_x_fx_e = 0;
311 927239 : move16();
312 : Word16 temp_fx_e;
313 927239 : push_wmops( "svd_fx" );
314 :
315 :
316 : /* Collecting Values */
317 4285653 : FOR( iCh = 0; iCh < nChannelsL; iCh++ )
318 : {
319 15762957 : FOR( jCh = 0; jCh < nChannelsC; jCh++ )
320 : {
321 12404543 : singularVectors_Left_fx[iCh][jCh] = InputMatrix[iCh][jCh]; /* InputMatrix_e */
322 12404543 : move32();
323 : }
324 : }
325 :
326 : /* Householder reduction */
327 927239 : HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
328 : /* Set extremely small values to zero if needed */
329 : // flushToZeroArray(singularValues, max_length);
330 : // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL);
331 : // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC);
332 :
333 : /* BidagonalDiagonalisation */
334 927239 : errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
335 : /* Sort the singular values descending order */
336 927239 : lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */
337 :
338 : DO
339 : {
340 1301628 : condition = 0;
341 1301628 : move16();
342 4558543 : FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
343 : {
344 3256915 : IF( LT_32( L_shl_sat( singularValues_fx[iCh], sub( singularValues_fx_e[iCh], singularValues_fx_e[iCh + 1] ) ), singularValues_fx[iCh + 1] ) )
345 : {
346 474384 : condition = 1;
347 474384 : move16();
348 474384 : temp_fx = singularValues_fx[iCh]; /* singularValues_fx_e */
349 474384 : move32();
350 474384 : singularValues_fx[iCh] = singularValues_fx[iCh + 1]; /* singularValues_fx_e */
351 474384 : move32();
352 474384 : singularValues_fx[iCh + 1] = temp_fx; /* singularValues_fx_e */
353 474384 : move32();
354 474384 : temp_fx_e = singularValues_fx_e[iCh];
355 474384 : move16();
356 474384 : singularValues_fx_e[iCh] = singularValues_fx_e[iCh + 1];
357 474384 : move16();
358 474384 : singularValues_fx_e[iCh + 1] = temp_fx_e;
359 474384 : move16();
360 :
361 2646240 : FOR( jCh = 0; jCh < nChannelsL; ++jCh )
362 : {
363 2171856 : temp_fx = singularVectors_Left_fx[jCh][iCh]; /* singularValues_fx_e */
364 2171856 : move32();
365 2171856 : singularVectors_Left_fx[jCh][iCh] = singularVectors_Left_fx[jCh][iCh + 1]; /* singularValues_fx_e */
366 2171856 : move32();
367 2171856 : singularVectors_Left_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
368 2171856 : move32();
369 : }
370 :
371 2637281 : FOR( jCh = 0; jCh < nChannelsC; ++jCh )
372 : {
373 2162897 : temp_fx = singularVectors_Right_fx[jCh][iCh]; /* singularValues_fx_e */
374 2162897 : move32();
375 2162897 : singularVectors_Right_fx[jCh][iCh] = singularVectors_Right_fx[jCh][iCh + 1]; /* singularValues_fx_e */
376 2162897 : move32();
377 2162897 : singularVectors_Right_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
378 2162897 : move32();
379 : }
380 : }
381 : }
382 : }
383 1301628 : WHILE( EQ_16( condition, 1 ) );
384 :
385 927239 : pop_wmops();
386 927239 : return ( errorMessage );
387 : }
388 :
389 :
390 : /*-----------------------------------------------------------------------*
391 : * Local functions
392 : *-----------------------------------------------------------------------*/
393 :
394 : /*-------------------------------------------------------------------------
395 : * BidagonalDiagonalisation()
396 : *
397 : *
398 : *-------------------------------------------------------------------------*/
399 :
400 927239 : static Word16 BidagonalDiagonalisation_fx(
401 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_fx_e*/
402 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_fx_e*/
403 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_fx_e*/
404 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_fx_e*/
405 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */
406 : Word16 *secDiag_new_e, /* i/o: */
407 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
408 : const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/
409 : const Word32 eps_x, /* i : eps_x_e*/
410 : const Word16 eps_x_e /* i : */
411 : )
412 : {
413 : Word16 kCh, nCh, iCh, jCh, split;
414 : Word32 c, s, f1, f2;
415 : Word16 c_e, s_e, f1_e, f2_e;
416 927239 : Word16 x11_e = 0, x12_e = 0;
417 927239 : move16();
418 927239 : move16();
419 : Word16 temp_exp;
420 : Word16 temp_exp2;
421 927239 : Word32 g = 0;
422 927239 : move16();
423 927239 : Word16 g_e = 0;
424 927239 : move16();
425 : Word16 convergence, iteration, found_split;
426 927239 : Word16 error = 0;
427 927239 : move16();
428 : Word32 temp;
429 : Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS];
430 927239 : Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
431 :
432 3865702 : FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */
433 : {
434 2938463 : convergence = 0;
435 2938463 : move16();
436 2938463 : iteration = 0;
437 2938463 : move16();
438 2938463 : split = iCh - 1; /* Q0 */
439 2938463 : move16();
440 :
441 7875962 : WHILE( EQ_16( convergence, 0 ) )
442 : {
443 4937499 : iteration = add( iteration, 1 ); /* Q0 */
444 4937499 : found_split = 1; /* Q0 */
445 4937499 : move16();
446 :
447 9698722 : FOR( jCh = iCh; jCh >= 0; jCh-- )
448 : {
449 9698722 : Word16 com_e = s_max( secDiag_new_e[jCh], eps_x_e );
450 9698722 : IF( LE_32( L_shr( L_abs( secDiag_fx[jCh] ), sub( com_e, secDiag_new_e[jCh] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is secDiag[ch] vanishing compared to eps_x */
451 : {
452 4887206 : found_split = 0;
453 4887206 : move16();
454 4887206 : BREAK;
455 : }
456 4811516 : com_e = s_max( singularValues_new_e[jCh - 1], eps_x_e );
457 4811516 : IF( LE_32( L_shr( L_abs( singularValues_fx[jCh - 1] ), sub( com_e, singularValues_new_e[jCh - 1] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is singularValues[jCh - 1] vanishing compared to eps_x */
458 : {
459 50293 : BREAK;
460 : }
461 : }
462 :
463 : // convergence = ( jCh == iCh ) ? 1 : 0;
464 4937499 : IF( EQ_16( jCh, iCh ) )
465 : {
466 2938463 : convergence = 1; /* Q0 */
467 2938463 : move16();
468 : }
469 : ELSE
470 : {
471 1999036 : convergence = 0;
472 1999036 : move16();
473 : }
474 :
475 4937499 : IF( found_split )
476 : {
477 50293 : s = MAX_32;
478 50293 : move32();
479 50293 : s_e = 0;
480 50293 : move16();
481 50293 : c = 0;
482 50293 : move32();
483 50293 : c_e = 0;
484 50293 : move16();
485 50293 : split = sub( jCh, 1 ); /* Q0 */
486 100647 : FOR( kCh = jCh; kCh <= iCh; kCh++ )
487 : {
488 50359 : g = Mpy_32_32( s, secDiag_fx[kCh] ); /* exp(s_e + secDiag_new_e) */
489 50359 : g_e = add( s_e, secDiag_new_e[kCh] );
490 50359 : secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
491 50359 : secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
492 50359 : Word16 com_e = s_max( g_e, eps_x_e );
493 50359 : IF( LE_32( L_shr( L_abs( g ), sub( com_e, g_e ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) )
494 : {
495 5 : BREAK;
496 : }
497 :
498 50354 : c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */
499 50354 : c_e = singularValues_new_e[kCh];
500 50354 : GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
501 50354 : c = Mpy_32_32( c, temp );
502 50354 : c_e = add( c_e, temp_exp );
503 50354 : temp_exp2 = norm_l( c );
504 50354 : c = L_shl( c, temp_exp2 );
505 50354 : c_e = sub( c_e, temp_exp2 );
506 50354 : s = Mpy_32_32( -g, temp );
507 50354 : s_e = add( g_e, temp_exp );
508 50354 : temp_exp2 = norm_l( s );
509 50354 : s = L_shl( s, temp_exp2 );
510 50354 : s_e = sub( s_e, temp_exp2 );
511 50354 : ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */
512 : }
513 : }
514 :
515 4937499 : IF( convergence )
516 : {
517 2938463 : singularValues_fx[iCh] = singularValues_fx[iCh];
518 2938463 : move32();
519 2938463 : IF( singularValues_fx[iCh] < 0 )
520 : {
521 538048 : singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
522 538048 : move32();
523 2016080 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
524 : {
525 1478032 : singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
526 1478032 : move32();
527 : }
528 : }
529 : }
530 : ELSE
531 : {
532 1999036 : IF( GE_16( iteration, SVD_MAX_NUM_ITERATION ) )
533 : {
534 0 : IF( LT_32( singularValues_fx[iCh], 0 ) )
535 : {
536 0 : singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
537 0 : move32();
538 :
539 0 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
540 : {
541 0 : singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
542 0 : move32();
543 : }
544 : }
545 0 : error = 1;
546 0 : move16();
547 0 : convergence = 1;
548 0 : move16();
549 : }
550 : ELSE
551 : {
552 1999036 : ApplyQRTransform_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_new_e, secDiag_new_e, jCh, iCh, nChannelsL, nChannelsC ); /* nChannelsC */
553 : }
554 : }
555 : }
556 : }
557 :
558 : // rescaling block
559 927239 : Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS );
560 :
561 :
562 927239 : return ( error );
563 : }
564 :
565 : /*-------------------------------------------------------------------------
566 : * ApplyQRTransform()
567 : *
568 : *
569 : *-------------------------------------------------------------------------*/
570 :
571 1999036 : static void ApplyQRTransform_fx(
572 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_e*/
573 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_e*/
574 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_e*/
575 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_e*/
576 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
577 : Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
578 : const Word16 startIndex, /* i : Q0*/
579 : const Word16 currentIndex, /* i : Q0*/
580 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
581 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
582 : )
583 : {
584 : Word32 temp;
585 : Word16 temp_e;
586 : Word16 temp_norm_e;
587 : Word16 ch, split;
588 1999036 : Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
589 1999036 : move32();
590 1999036 : move32();
591 1999036 : move32();
592 1999036 : move32();
593 1999036 : move32();
594 1999036 : move32();
595 1999036 : move32();
596 1999036 : move32();
597 1999036 : Word16 d_e = 0, g_e = 0, r_e = 0, x_ii_e = 0, x_split_e = 0, x_kk_e = 0, mu_e = 0, aux_e = 0;
598 1999036 : move16();
599 1999036 : move16();
600 1999036 : move16();
601 1999036 : move16();
602 1999036 : move16();
603 1999036 : move16();
604 1999036 : move16();
605 1999036 : move16();
606 : Word32 L_temp1, L_temp2, L_temp3, L_temp4;
607 : Word16 L_temp1_e, L_temp2_e, L_temp3_e, L_temp4_e, temp_exp;
608 1999036 : Word32 c = MAX_32;
609 1999036 : move32();
610 1999036 : Word16 c_e = 0;
611 1999036 : move16();
612 1999036 : Word32 s = MAX_32;
613 1999036 : move32();
614 1999036 : Word16 s_e = 0;
615 1999036 : move16();
616 :
617 1999036 : x_kk = singularValues[currentIndex]; /* exp(singularValues_e) */
618 1999036 : move32();
619 1999036 : x_kk_e = singularValues_e[currentIndex];
620 1999036 : move16();
621 1999036 : x_ii = singularValues[startIndex]; /* exp(singularValues_e) */
622 1999036 : move32();
623 1999036 : x_ii_e = singularValues_e[startIndex];
624 1999036 : move16();
625 1999036 : split = sub( currentIndex, 1 ); /* Q0 */
626 1999036 : move16();
627 :
628 1999036 : x_split = singularValues[split]; /* exp(singularValues_e) */
629 1999036 : move32();
630 1999036 : x_split_e = singularValues_e[split];
631 1999036 : move16();
632 1999036 : g = secDiag[split]; /* exp(secDiag_e) */
633 1999036 : move32();
634 1999036 : g_e = secDiag_e[split];
635 1999036 : move16();
636 1999036 : r = secDiag[currentIndex]; /* exp(secDiag_e) */
637 1999036 : move32();
638 1999036 : r_e = secDiag_e[currentIndex];
639 1999036 : move16();
640 :
641 : // d = (x_split + x_kk) * (x_split - x_kk) + (g + r) * (g - r);
642 1999036 : L_temp1 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, x_kk, x_kk_e, &L_temp1_e ); /* exp(L_temp1_e) */
643 1999036 : L_temp2 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, L_negate( x_kk ), x_kk_e, &L_temp2_e ); /* exp(L_temp2_e) */
644 1999036 : L_temp3 = BASOP_Util_Add_Mant32Exp( g, g_e, r, r_e, &L_temp3_e ); /* exp(L_temp3_e) */
645 1999036 : L_temp4 = BASOP_Util_Add_Mant32Exp( g, g_e, L_negate( r ), r_e, &L_temp4_e ); /* exp(L_temp4_e) */
646 1999036 : d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( L_temp3, L_temp4 ), add( L_temp3_e, L_temp4_e ), &d_e ); /* exp(d_e) */
647 :
648 : // d /= maxWithSign((r + r) * x_split);
649 1999036 : L_temp1 = BASOP_Util_Add_Mant32Exp( r, r_e, r, r_e, &L_temp1_e ); /* exp(L_temp1_e) */
650 1999036 : L_temp1 = maxWithSign_fx( Mpy_32_32( L_temp1, x_split ) ); /* exp(L_temp1_e + x_split_e) */
651 1999036 : L_temp1_e = add( L_temp1_e, x_split_e );
652 1999036 : d = BASOP_Util_Divide3232_Scale_newton( d, L_temp1, &temp_exp ); /* temp_exp + d_e - L_temp1_e */
653 1999036 : d_e = add( temp_exp, sub( d_e, L_temp1_e ) );
654 :
655 1999036 : g = GivensRotation_fx( MAX_32, 0, d, d_e, &g_e );
656 :
657 : // mu = x_split / maxWithSign(d + (d >= 0.0f ? 1 : (-1)) * fabsf(g)) - r;
658 : // L_temp1 = d >= 0 ? L_abs( g ) : -L_abs( g );
659 1999036 : IF( d >= 0 )
660 : {
661 1200425 : L_temp1 = L_abs( g );
662 : }
663 : ELSE
664 : {
665 798611 : L_temp1 = L_negate( L_abs( g ) );
666 : }
667 1999036 : L_temp1_e = g_e;
668 1999036 : move16();
669 1999036 : L_temp2 = maxWithSign_fx( BASOP_Util_Add_Mant32Exp( d, d_e, L_temp1, L_temp1_e, &L_temp2_e ) ); /* exp(L_temp2_e) */
670 1999036 : mu = BASOP_Util_Divide3232_Scale_newton( x_split, L_temp2, &mu_e ); /* exp(mu_e + (x-plit_e - L_temp2_e)) */
671 1999036 : mu_e = add( mu_e, sub( x_split_e, L_temp2_e ) );
672 1999036 : mu = BASOP_Util_Add_Mant32Exp( mu, mu_e, L_negate( r ), r_e, &mu_e ); /* exp(mu_e) */
673 :
674 : // d = ((x_ii + x_kk) * (x_ii - x_kk) + r * mu) / maxWithSign(x_ii);
675 1999036 : L_temp1 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, x_kk, x_kk_e, &L_temp1_e ); /* exp(L_temp1_e) */
676 1999036 : L_temp2 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, L_negate( x_kk ), x_kk_e, &L_temp2_e ); /* exp(L_temp2_e) */
677 1999036 : d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( r, mu ), add( r_e, mu_e ), &d_e ); /* exp(d_e) */
678 1999036 : d = BASOP_Util_Divide3232_Scale_newton( d, maxWithSign_fx( x_ii ), &temp_exp ); /* exp(temp_exp + (d_e - x_ii_e) */
679 1999036 : d_e = add( temp_exp, sub( d_e, x_ii_e ) );
680 :
681 : /*QR transformation*/
682 6760259 : FOR( ch = startIndex; ch <= split; ch++ )
683 : {
684 4761223 : r = Mpy_32_32( s, secDiag[ch + 1] ); /* exp(s_e + secDiag_e) */
685 4761223 : r_e = add( s_e, secDiag_e[ch + 1] );
686 4761223 : g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */
687 4761223 : g_e = add( c_e, secDiag_e[ch + 1] );
688 :
689 4761223 : GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
690 4761223 : c = Mpy_32_32( d, temp );
691 4761223 : c_e = add( temp_e, d_e );
692 4761223 : temp_norm_e = norm_l( c );
693 4761223 : c = L_shl( c, temp_norm_e );
694 4761223 : c_e = sub( c_e, temp_norm_e );
695 4761223 : s = Mpy_32_32( r, temp );
696 4761223 : s_e = add( r_e, temp_e );
697 4761223 : temp_norm_e = norm_l( s );
698 4761223 : s = L_shl( s, temp_norm_e );
699 4761223 : s_e = sub( s_e, temp_norm_e );
700 4761223 : r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */
701 4761223 : r_e = add( s_e, singularValues_e[ch + 1] );
702 4761223 : x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */
703 4761223 : x_split_e = add( c_e, singularValues_e[ch + 1] );
704 :
705 4761223 : aux = g; /* exp(g_e) */
706 4761223 : move32();
707 4761223 : aux_e = g_e;
708 4761223 : move16();
709 :
710 : // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC);
711 4761223 : ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC );
712 :
713 4761223 : GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */
714 4761223 : IF( singularValues[ch] != 0 )
715 : {
716 :
717 4761223 : c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
718 4761223 : c_e = add( d_e, aux_e );
719 4761223 : temp_norm_e = norm_l( c );
720 4761223 : c = L_shl( c, temp_norm_e );
721 4761223 : c_e = sub( c_e, temp_norm_e );
722 :
723 4761223 : s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */
724 4761223 : s_e = add( r_e, aux_e );
725 4761223 : temp_norm_e = norm_l( s );
726 4761223 : s = L_shl( s, temp_norm_e );
727 4761223 : s_e = sub( s_e, temp_norm_e );
728 : }
729 :
730 : // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL);
731 4761223 : ApplyRotation_fx( singularVectors_Left, c, c_e, s, s_e, g, g_e, x_split, x_split_e, &d, &d_e, &x_ii, &x_ii_e, ch + 1, ch, nChannelsL );
732 : }
733 :
734 1999036 : secDiag[startIndex] = 0;
735 1999036 : move32();
736 1999036 : secDiag[currentIndex] = d; /* exp(d_e) */
737 1999036 : move32();
738 1999036 : secDiag_e[currentIndex] = d_e;
739 1999036 : move16();
740 1999036 : singularValues[currentIndex] = x_ii; /* exp(x_ii_e) */
741 1999036 : move32();
742 1999036 : singularValues_e[currentIndex] = x_ii_e;
743 1999036 : move16();
744 :
745 1999036 : return;
746 : }
747 :
748 : /*-------------------------------------------------------------------------
749 : * ApplyRotation()
750 : *
751 : *
752 : *-------------------------------------------------------------------------*/
753 :
754 9572800 : static void ApplyRotation_fx(
755 : Word32 singularVector[][MAX_OUTPUT_CHANNELS],
756 : const Word32 c, /* exp(c_e)*/
757 : const Word16 c_e,
758 : const Word32 s, /* exp(s_e) */
759 : const Word16 s_e,
760 : Word32 x11, /* exp(x11_e) */
761 : Word16 x11_e,
762 : Word32 x12, /* exp(x12_e) */
763 : Word16 x12_e,
764 : Word32 *d, /* exp(d_e) */
765 : Word16 *d_e,
766 : Word32 *g, /* exp(g_e) */
767 : Word16 *g_e,
768 : const Word16 currentIndex1, /* Q0 */
769 : const Word16 currentIndex2, /* Q0 */
770 : const Word16 nChannels /* Q0 */
771 : )
772 : {
773 : Word16 ch;
774 :
775 9572800 : *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */
776 9572800 : move32();
777 9572800 : *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */
778 9572800 : move32();
779 :
780 9572800 : Word16 c_q = sub( 31, c_e );
781 9572800 : Word16 s_q = sub( 31, s_e );
782 : Word32 op1, op2;
783 : Word16 op_e;
784 :
785 : // Bring c and s to same Q
786 9572800 : IF( GT_16( c_q, s_q ) )
787 : {
788 838677 : op1 = L_shr( c, sub( c_q, s_q ) );
789 838677 : op2 = s;
790 838677 : move32();
791 838677 : op_e = s_q;
792 838677 : move16();
793 : }
794 : ELSE
795 : {
796 8734123 : op1 = c;
797 8734123 : move32();
798 8734123 : op2 = L_shr( s, sub( s_q, c_q ) );
799 8734123 : op_e = c_q;
800 8734123 : move16();
801 : }
802 9572800 : op_e = add( op_e, 1 ); // 64 bit mac -> +1
803 9572800 : op_e = negate( op_e );
804 :
805 57092883 : FOR( ch = 0; ch < nChannels; ch++ )
806 : {
807 47520083 : x11 = singularVector[ch][currentIndex2];
808 47520083 : move32();
809 47520083 : x12 = singularVector[ch][currentIndex1];
810 47520083 : move32();
811 :
812 47520083 : Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
813 47520083 : singularVector[ch][currentIndex2] = W_shl_sat_l( temp, op_e ); // Q(singularVector)
814 47520083 : move32();
815 :
816 47520083 : temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
817 47520083 : singularVector[ch][currentIndex1] = W_shl_sat_l( temp, op_e ); // Q(singularVector)
818 47520083 : move32();
819 : }
820 :
821 9572800 : return;
822 : }
823 :
824 : /*-------------------------------------------------------------------------
825 : * HouseholderReduction()
826 : *
827 : *
828 : *-------------------------------------------------------------------------*/
829 :
830 927239 : static void HouseholderReduction_fx(
831 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
832 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* exp(singularValues_fx_e) */
833 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
834 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* exp(secDiag_fx_e) */
835 : Word16 singularVectors_Left_e,
836 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
837 : Word16 *secDiag_fx_e,
838 : const Word16 nChannelsL, /* Q0 */
839 : const Word16 nChannelsC, /* Q0 */
840 : Word32 *eps_x_fx, /* exp(eps_x_fx_e) */
841 : Word16 *eps_x_fx_e )
842 : {
843 : Word16 nCh;
844 : #ifdef NONBE_SVD_OPTIMIZATION
845 :
846 927239 : Word32 g_left_fx = 0;
847 927239 : Word16 g_left_e = 0;
848 927239 : move32();
849 927239 : move16();
850 927239 : Word32 g_right_fx = 0;
851 927239 : Word16 g_right_e = 0;
852 927239 : move32();
853 927239 : move16();
854 :
855 : #else
856 :
857 : // float g = 0.0f, sig_x = 0.0f;// to be removed
858 : Word32 g_fx = 0, sig_x_fx = 0;
859 : move32();
860 : move32();
861 : Word16 sig_x_fx_e = 0;
862 : move16();
863 : #endif
864 :
865 : Word16 iCh, jCh;
866 : Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
867 :
868 : #ifdef NONBE_SVD_OPTIMIZATION
869 927239 : Word16 sc = 0;
870 927239 : move16();
871 927239 : sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC );
872 3358414 : FOR( jCh = 1; jCh < nChannelsL; jCh++ )
873 : {
874 2431175 : sc = s_min( sc, getScaleFactor32( singularVectors_Left_fx[jCh], nChannelsC ) );
875 : }
876 4285653 : FOR( jCh = 0; jCh < nChannelsL; jCh++ )
877 : {
878 3358414 : Scale_sig32( singularVectors_Left_fx[jCh], nChannelsC, sc );
879 15762957 : FOR( iCh = 0; iCh < nChannelsC; iCh++ )
880 : {
881 12404543 : singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e - sc;
882 12404543 : move16();
883 : }
884 : }
885 :
886 3865702 : FOR( nCh = 0; nCh < nChannelsC; nCh++ )
887 : {
888 2938463 : secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
889 2938463 : move32();
890 2938463 : secDiag_fx_e[nCh] = g_right_e;
891 :
892 2938463 : biDiagonalReductionLeft_fx(
893 : singularVectors_Left_fx,
894 : singularVectors_Left_fx_e,
895 : nChannelsL,
896 : nChannelsC,
897 : nCh,
898 : &g_left_fx,
899 : &g_left_e );
900 :
901 2938463 : singularValues_fx[nCh] = g_left_fx;
902 2938463 : move32();
903 2938463 : singularValues_fx_e[nCh] = g_left_e;
904 :
905 2938463 : biDiagonalReductionRight_fx(
906 : singularVectors_Left_fx,
907 : singularVectors_Left_fx_e,
908 : nChannelsL,
909 : nChannelsC,
910 : nCh,
911 : &g_right_fx,
912 : &g_right_e );
913 :
914 : Word16 L_temp_e;
915 2938463 : Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
916 2938463 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
917 : {
918 1357226 : *eps_x_fx = L_temp; /* exp(L_temp_e) */
919 1357226 : move32();
920 1357226 : *eps_x_fx_e = L_temp_e;
921 1357226 : move32();
922 : }
923 : }
924 :
925 : #else
926 :
927 : FOR( jCh = 0; jCh < nChannelsL; jCh++ )
928 : {
929 : FOR( iCh = 0; iCh < nChannelsC; iCh++ )
930 : {
931 : singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
932 : move16();
933 : }
934 : }
935 :
936 : /* Bidiagonal Reduction for every channel */
937 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
938 : {
939 : biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
940 : biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
941 :
942 : Word16 L_temp_e;
943 : Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
944 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
945 : {
946 : *eps_x_fx = L_temp; /* exp(L_temp_e) */
947 : move32();
948 : *eps_x_fx_e = L_temp_e;
949 : move32();
950 : }
951 : }
952 : #endif
953 :
954 : /* SingularVecotr Accumulation */
955 927239 : singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
956 :
957 :
958 927239 : singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
959 :
960 927239 : return;
961 : }
962 :
963 : #ifdef NONBE_SVD_OPTIMIZATION
964 : /*-------------------------------------------------------------------------
965 : * biDiagonalReductionLeft()
966 : *
967 : *
968 : *-------------------------------------------------------------------------*/
969 2938463 : static void biDiagonalReductionLeft_fx(
970 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
971 : Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
972 : const Word16 nChannelsL,
973 : const Word16 nChannelsC, /* Q0 */
974 : const Word16 currChannel, /* Q0 */
975 : Word32 *g,
976 : Word16 *g_e )
977 : {
978 : Word16 iCh, jCh;
979 : Word32 norm_x, f, r;
980 : Word16 norm_x_e, f_e, r_e;
981 : Word32 L_temp;
982 : Word16 L_temp_e;
983 :
984 : /* Setting values to 0 */
985 2938463 : *g = 0;
986 2938463 : *g_e = 0;
987 2938463 : move32();
988 2938463 : move16();
989 :
990 2938463 : IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
991 : {
992 2938463 : Word64 temp = 0;
993 2938463 : move64();
994 2938463 : norm_x = 0;
995 2938463 : move32();
996 2938463 : norm_x_e = 0;
997 2938463 : move16();
998 2938463 : Word16 max_e = MIN_16;
999 2938463 : move16();
1000 11034362 : FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1001 : {
1002 8095899 : max_e = s_max( max_e, singularVectors_e[jCh][currChannel] );
1003 : }
1004 :
1005 11034362 : FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1006 : {
1007 8095899 : temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors_e[jCh][currChannel] ), 1 ) ) );
1008 : }
1009 :
1010 2938463 : Word16 nrm = W_norm( temp );
1011 2938463 : nrm = sub( nrm, 32 );
1012 2938463 : norm_x = W_shl_sat_l( temp, nrm );
1013 2938463 : norm_x_e = sub( add( max_e, max_e ), nrm );
1014 :
1015 2938463 : IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
1016 : {
1017 : Word16 invVal_e;
1018 : Word32 invVal;
1019 :
1020 2809680 : L_temp_e = norm_x_e;
1021 2809680 : move16();
1022 2809680 : L_temp = Sqrt32( norm_x, &L_temp_e );
1023 : //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
1024 2809680 : if ( singularVectors[currChannel][currChannel] >= 0 )
1025 : {
1026 1572183 : L_temp = L_negate( L_temp );
1027 1572183 : move32();
1028 : }
1029 2809680 : *g = L_temp;
1030 2809680 : move32();
1031 2809680 : *g_e = L_temp_e;
1032 2809680 : move16();
1033 :
1034 2809680 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
1035 2809680 : singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */
1036 2809680 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
1037 :
1038 7113788 : FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1039 : {
1040 4304108 : Word16 max2_e = MIN_16;
1041 4304108 : max_e = MIN_16;
1042 4304108 : move16();
1043 4304108 : move16();
1044 4304108 : temp = 0;
1045 4304108 : move64();
1046 :
1047 21698947 : FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1048 : {
1049 17394839 : max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); /* exp(norm_x_e) */
1050 17394839 : max2_e = s_max( max2_e, singularVectors_e[jCh][iCh] ); /* exp(norm_x_e) */
1051 : }
1052 4304108 : max_e = add( max_e, max2_e );
1053 :
1054 21698947 : FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1055 : {
1056 17394839 : temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors_e[jCh][currChannel], singularVectors_e[jCh][iCh] ) ) ) );
1057 : }
1058 4304108 : nrm = W_norm( temp );
1059 4304108 : nrm = sub( nrm, 32 );
1060 4304108 : norm_x = W_shl_sat_l( temp, nrm );
1061 4304108 : norm_x_e = sub( max_e, nrm );
1062 :
1063 4304108 : f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
1064 4304108 : f_e = add( invVal_e, sub( norm_x_e, r_e ) );
1065 :
1066 21698947 : FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1067 : {
1068 17394839 : singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] );
1069 : }
1070 : }
1071 : }
1072 : }
1073 2938463 : return;
1074 : }
1075 :
1076 2938463 : static void biDiagonalReductionRight_fx(
1077 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
1078 : Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS],
1079 : const Word16 nChannelsL, /* Q0 */
1080 : const Word16 nChannelsC, /* Q0 */
1081 : const Word16 currChannel, /* Q0 */
1082 : Word32 *g,
1083 : Word16 *g_e )
1084 : {
1085 : Word16 iCh, jCh, idx;
1086 : Word32 norm_x, r;
1087 : Word16 norm_x_e, r_e;
1088 : Word32 L_temp;
1089 : Word16 L_temp_e;
1090 :
1091 : /* Setting values to 0 */
1092 2938463 : *g = 0;
1093 2938463 : *g_e = 0;
1094 2938463 : move32();
1095 2938463 : move16();
1096 2938463 : IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
1097 : {
1098 2011224 : idx = add( currChannel, 1 ); /* Q0 */
1099 :
1100 2011224 : norm_x = 0;
1101 2011224 : move32();
1102 2011224 : norm_x_e = 0;
1103 2011224 : move16();
1104 6319868 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
1105 : {
1106 4308644 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
1107 : }
1108 :
1109 2011224 : IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
1110 : {
1111 : Word16 invVal_e;
1112 : Word32 invVal;
1113 :
1114 1899988 : L_temp_e = norm_x_e;
1115 1899988 : move16();
1116 1899988 : L_temp = Sqrt32( norm_x, &L_temp_e );
1117 : // L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
1118 1899988 : IF( singularVectors[currChannel][idx] >= 0 )
1119 : {
1120 727810 : ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
1121 727810 : move32();
1122 : }
1123 : ELSE
1124 : {
1125 1172178 : ( *g ) = L_temp; /* exp(L_temp_e) */
1126 1172178 : move32();
1127 : }
1128 1899988 : *g_e = L_temp_e;
1129 1899988 : move16();
1130 :
1131 1899988 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
1132 1899988 : singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */
1133 :
1134 1899988 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
1135 :
1136 6525603 : FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1137 : {
1138 4625615 : norm_x = 0;
1139 4625615 : move32();
1140 4625615 : norm_x_e = 0;
1141 4625615 : move16();
1142 17609294 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1143 : {
1144 12983679 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors_e[iCh][jCh], singularVectors_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
1145 : }
1146 :
1147 4625615 : norm_x = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
1148 4625615 : norm_x_e = add( invVal_e, sub( norm_x_e, r_e ) );
1149 :
1150 17609294 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1151 : {
1152 12983679 : singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */
1153 : }
1154 : }
1155 : }
1156 : }
1157 :
1158 2938463 : return;
1159 : }
1160 : #else
1161 : /*-------------------------------------------------------------------------
1162 : * biDiagonalReductionLeft()
1163 : *
1164 : *
1165 : *-------------------------------------------------------------------------*/
1166 :
1167 : static void biDiagonalReductionLeft_fx(
1168 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
1169 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
1170 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
1171 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
1172 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
1173 : Word16 *secDiag_e,
1174 : const Word16 nChannelsL, /* Q0 */
1175 : const Word16 nChannelsC, /* Q0 */
1176 : const Word16 currChannel, /* Q0 */
1177 : Word32 *sig_x, /* exp(sig_x_e) */
1178 : Word16 *sig_x_e,
1179 : Word32 *g /* Q31 */
1180 : )
1181 : {
1182 : Word16 iCh, jCh, idx;
1183 : Word32 norm_x, f, r;
1184 : Word16 norm_x_e, f_e, r_e;
1185 : Word32 L_temp;
1186 : Word16 L_temp_e;
1187 :
1188 : secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */
1189 : move32();
1190 : secDiag_e[currChannel] = *sig_x_e;
1191 : move16();
1192 :
1193 : /* Setting values to 0 */
1194 : ( *sig_x ) = 0;
1195 : move32();
1196 : ( *g ) = 0;
1197 : move32();
1198 :
1199 : IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
1200 : {
1201 : idx = currChannel;
1202 : move16();
1203 :
1204 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1205 : {
1206 : ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
1207 : }
1208 :
1209 : IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
1210 : {
1211 : Word16 invVal_e;
1212 : Word32 invVal;
1213 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
1214 : Word64 temp = 0;
1215 : move64();
1216 : Word16 max_e = MIN_16;
1217 : move16();
1218 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1219 : {
1220 : Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
1221 : singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
1222 : move32();
1223 : singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
1224 : move16();
1225 : max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] );
1226 : }
1227 :
1228 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1229 : {
1230 : temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors2_e[jCh][currChannel] ), 1 ) ) );
1231 : }
1232 :
1233 : Word16 nrm = W_norm( temp );
1234 : nrm = sub( nrm, 32 );
1235 : norm_x = W_shl_sat_l( temp, nrm );
1236 : norm_x_e = sub( add( max_e, max_e ), nrm );
1237 :
1238 : IF( GT_16( norm_x_e, 0 ) )
1239 : {
1240 : norm_x = MAX_32;
1241 : move32();
1242 : norm_x_e = 0;
1243 : move16();
1244 : }
1245 : L_temp_e = norm_x_e;
1246 : move16();
1247 : L_temp = Sqrt32( norm_x, &L_temp_e );
1248 : L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
1249 : //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
1250 : if ( singularVectors[currChannel][idx] >= 0 )
1251 : {
1252 : L_temp = L_negate( L_temp );
1253 : }
1254 : ( *g ) = L_temp;
1255 : move32();
1256 :
1257 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
1258 : singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
1259 : move32();
1260 :
1261 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
1262 :
1263 : FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1264 : {
1265 : Word16 max2_e = MIN_16;
1266 : max_e = MIN_16;
1267 : move16();
1268 : move16();
1269 : temp = 0;
1270 : move64();
1271 :
1272 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1273 : {
1274 : max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] ); /* exp(norm_x_e) */
1275 : max2_e = s_max( max2_e, singularVectors2_e[jCh][iCh] ); /* exp(norm_x_e) */
1276 : }
1277 : max_e = add( max_e, max2_e );
1278 :
1279 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1280 : {
1281 : temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ) ) ) );
1282 : }
1283 : nrm = W_norm( temp );
1284 : nrm = sub( nrm, 32 );
1285 : norm_x = W_shl_sat_l( temp, nrm );
1286 : norm_x_e = sub( max_e, nrm );
1287 :
1288 : f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
1289 : f_e = add( invVal_e, sub( norm_x_e, r_e ) );
1290 :
1291 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1292 : {
1293 : singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
1294 : move32();
1295 : }
1296 : }
1297 :
1298 :
1299 : FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
1300 : {
1301 : singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); /* sing_exp + sig_x_e */
1302 : move32();
1303 : singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e );
1304 : move16();
1305 : }
1306 : }
1307 :
1308 : // rescaling block
1309 : singularValues[currChannel] = Mpy_32_32( ( *sig_x ), ( *g ) ); /* sig_x_e */
1310 : move32();
1311 : singularValues_e[currChannel] = *sig_x_e;
1312 : move16();
1313 : }
1314 :
1315 : return;
1316 : }
1317 :
1318 : /*-------------------------------------------------------------------------
1319 : * biDiagonalReductionRight()
1320 : *
1321 : *
1322 : *-------------------------------------------------------------------------*/
1323 :
1324 : static void biDiagonalReductionRight_fx(
1325 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
1326 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_exp[]) */
1327 : Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
1328 : Word16 *secDiag_exp,
1329 : const Word16 nChannelsL, /* Q0 */
1330 : const Word16 nChannelsC, /* Q0 */
1331 : const Word16 currChannel, /* Q0 */
1332 : Word32 *sig_x, /* exp(sig_x_e) */
1333 : Word16 *sig_x_e,
1334 : Word32 *g /* Q31 */
1335 : )
1336 : {
1337 : Word16 iCh, jCh, idx;
1338 : Word32 norm_x, r;
1339 : Word16 norm_x_e, r_e;
1340 : Word32 L_temp;
1341 : Word16 L_temp_e;
1342 :
1343 : /* Setting values to 0 */
1344 : ( *sig_x ) = 0;
1345 : move32();
1346 : ( *g ) = 0;
1347 : move32();
1348 :
1349 : IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
1350 : {
1351 : idx = add( currChannel, 1 ); /* Q0 */
1352 :
1353 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1354 : {
1355 : ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), singularVectors2_e[currChannel][jCh], sig_x_e ); /* exp(sig_x_e) */
1356 : }
1357 :
1358 : IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
1359 : {
1360 : norm_x = 0;
1361 : move32();
1362 : norm_x_e = 0;
1363 : move16();
1364 :
1365 : Word16 invVal_e, temp_e;
1366 : Word32 invVal;
1367 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
1368 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
1369 : {
1370 : temp_e = norm_l( singularVectors[currChannel][jCh] );
1371 : singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
1372 : move32();
1373 : singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
1374 : move16();
1375 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
1376 : }
1377 : IF( GT_16( norm_x_e, 0 ) )
1378 : {
1379 : norm_x = MAX_32;
1380 : move32();
1381 : norm_x_e = 0;
1382 : move16();
1383 : }
1384 : L_temp_e = norm_x_e;
1385 : move16();
1386 : L_temp = Sqrt32( norm_x, &L_temp_e );
1387 : L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
1388 : IF( singularVectors[currChannel][idx] >= 0 )
1389 : {
1390 : ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
1391 : move32();
1392 : }
1393 : ELSE
1394 : {
1395 : ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */
1396 : move32();
1397 : }
1398 :
1399 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
1400 : singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* exp(sing_exp) */
1401 : move32();
1402 :
1403 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
1404 :
1405 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1406 : {
1407 : temp_e = norm_l( singularVectors[currChannel][jCh] );
1408 : secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
1409 : move32();
1410 : secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) );
1411 : move16();
1412 : }
1413 :
1414 : FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1415 : {
1416 : norm_x = 0;
1417 : move32();
1418 : norm_x_e = 0;
1419 : move16();
1420 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1421 : {
1422 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
1423 : }
1424 :
1425 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1426 : {
1427 : singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
1428 : move32();
1429 : }
1430 : }
1431 :
1432 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1433 : {
1434 : singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */
1435 : move32();
1436 : singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e );
1437 : move16();
1438 : }
1439 : }
1440 : }
1441 :
1442 : return;
1443 : }
1444 : #endif
1445 :
1446 : /*-------------------------------------------------------------------------
1447 : * singularVectorsAccumulationLeft()
1448 : *
1449 : *
1450 : *-------------------------------------------------------------------------*/
1451 :
1452 927239 : static void singularVectorsAccumulationLeft_fx(
1453 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
1454 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
1455 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
1456 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
1457 : const Word16 nChannelsL, /* Q0 */
1458 : const Word16 nChannelsC /* Q0 */
1459 : )
1460 : {
1461 : Word16 nCh, iCh, k;
1462 : Word16 nChannels;
1463 : Word32 norm_y, t_jj, t_ii;
1464 : Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp;
1465 :
1466 : /* Processing */
1467 927239 : nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/
1468 : // FILE *fp = fopen("t_ii_out.txt","a");
1469 3865702 : FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* min(nChannelsL,ChannelsC) */
1470 : {
1471 2938463 : t_ii = singularValues[nCh]; /* exp(singularValues_e) */
1472 2938463 : move32();
1473 2938463 : t_ii_e = singularValues_e[nCh];
1474 2938463 : move16();
1475 :
1476 7247107 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1477 : {
1478 4308644 : singularVectors_Left[nCh][iCh] = 0;
1479 4308644 : move32();
1480 : }
1481 :
1482 2938463 : IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
1483 : {
1484 2809680 : t_ii = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
1485 2809680 : t_ii_e = sub( temp_exp, t_ii_e );
1486 : Word16 tempe;
1487 2809680 : Word32 temp = BASOP_Util_Divide3232_Scale_newton( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
1488 2809680 : tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
1489 : // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
1490 7113788 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1491 : {
1492 4304108 : Word64 acc = 0;
1493 4304108 : move64();
1494 : Word64 prod[16];
1495 : Word16 prod_e[16];
1496 4304108 : Word16 max_e = -31;
1497 4304108 : move16();
1498 17394839 : FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
1499 : {
1500 13090731 : prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
1501 13090731 : prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
1502 13090731 : max_e = s_max( max_e, prod_e[k] );
1503 : }
1504 :
1505 17394839 : FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
1506 : {
1507 : #ifdef FIX_ISSUE_1811_EXCEEDING_W_SHIFTS
1508 13090731 : acc = W_add( acc, W_shr( prod[k], s_min( 63, sub( max_e, prod_e[k] ) ) ) );
1509 : #else
1510 : acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) );
1511 : #endif
1512 : }
1513 4304108 : Word16 acc_e = W_norm( acc );
1514 4304108 : acc = W_shl( acc, acc_e );
1515 :
1516 4304108 : norm_y = W_extract_h( acc );
1517 4304108 : norm_y_e = add( sub( max_e, acc_e ), 1 );
1518 4304108 : t_jj = Mpy_32_32( temp, norm_y );
1519 4304108 : t_jj_e = add( tempe, norm_y_e );
1520 21698947 : FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
1521 : {
1522 17394839 : singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */
1523 17394839 : move32();
1524 : }
1525 : }
1526 :
1527 10772260 : FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1528 : {
1529 7962580 : singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */
1530 7962580 : move32();
1531 7962580 : singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e );
1532 7962580 : move16();
1533 : }
1534 : }
1535 : ELSE
1536 : {
1537 262102 : FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1538 : {
1539 133319 : singularVectors_Left[iCh][nCh] = 0;
1540 133319 : move32();
1541 : }
1542 : }
1543 2938463 : Word16 exp = s_max( singularVectors_Left_e[nCh][nCh], 1 );
1544 2938463 : singularVectors_Left[nCh][nCh] = L_sub( L_shr( singularVectors_Left[nCh][nCh], sub( exp, singularVectors_Left_e[nCh][nCh] ) ), L_shr( MINUS_ONE_IN_Q31, exp ) ); /* exp(sing_exp2) */
1545 2938463 : move32();
1546 2938463 : singularVectors_Left_e[nCh][nCh] = exp;
1547 2938463 : move16();
1548 : }
1549 : // fclose(fp);
1550 4285653 : FOR( nCh = 0; nCh < nChannelsL; nCh++ )
1551 : {
1552 15762957 : FOR( iCh = 0; iCh < nChannelsC; iCh++ )
1553 : {
1554 12404543 : singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
1555 12404543 : move32();
1556 : }
1557 : }
1558 :
1559 927239 : return;
1560 : }
1561 :
1562 : /*-------------------------------------------------------------------------
1563 : * singularVectorsAccumulationRight()
1564 : *
1565 : *
1566 : *-------------------------------------------------------------------------*/
1567 :
1568 927239 : static void singularVectorsAccumulationRight_fx(
1569 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
1570 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
1571 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
1572 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
1573 : Word16 *secDiag_e,
1574 : const Word16 nChannelsC /* Q0 */
1575 : )
1576 : {
1577 : Word16 nCh, iCh, k;
1578 : Word16 nChannels;
1579 : Word32 norm_y, t_ii, ratio_float;
1580 927239 : Word16 norm_y_e, temp_exp1, sing_right_exp[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
1581 :
1582 : /* Processing */
1583 927239 : nChannels = nChannelsC; /* nChannelsC Q0*/
1584 :
1585 : /* avoid compiler warning */
1586 927239 : t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */
1587 927239 : move32();
1588 :
1589 3865702 : FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */
1590 : {
1591 :
1592 2938463 : IF( LT_16( nCh, sub( nChannelsC, 1 ) ) ) /* nChannelsC */
1593 : {
1594 2011224 : IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
1595 : {
1596 :
1597 6096762 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
1598 : {
1599 4196774 : ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
1600 4196774 : singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
1601 4196774 : temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
1602 4196774 : move32();
1603 4196774 : sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) );
1604 4196774 : move16();
1605 : // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 );
1606 : }
1607 :
1608 6096762 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1609 : {
1610 4196774 : Word64 norm_val = 0;
1611 4196774 : move64();
1612 4196774 : Word16 maxL_e = MIN_16;
1613 4196774 : Word16 maxR_e = MIN_16;
1614 4196774 : Word16 maxR2_e = MIN_16;
1615 4196774 : move16();
1616 4196774 : move16();
1617 4196774 : move16();
1618 16742722 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1619 : {
1620 12545948 : maxL_e = s_max( maxL_e, singularVectors_Left_e[nCh][k] );
1621 12545948 : maxR_e = s_max( maxR_e, sing_right_exp[k][iCh] );
1622 12545948 : maxR2_e = s_max( maxR2_e, sing_right_exp[k][nCh] );
1623 : }
1624 :
1625 16742722 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1626 : {
1627 12545948 : norm_val = W_mac_32_32( norm_val, L_shr( singularVectors_Left[nCh][k], sub( maxL_e, singularVectors_Left_e[nCh][k] ) ), L_shr( singularVectors_Right[k][iCh], sub( maxR_e, sing_right_exp[k][iCh] ) ) );
1628 : }
1629 4196774 : norm_y_e = W_norm( norm_val );
1630 4196774 : norm_y = W_extract_h( W_shl( norm_val, norm_y_e ) );
1631 4196774 : norm_y_e = sub( add( maxL_e, maxR_e ), norm_y_e );
1632 :
1633 4196774 : Word16 max_new = s_max( maxR_e, add( maxR2_e, norm_y_e ) );
1634 16742722 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1635 : {
1636 12545948 : Word32 temp = Mpy_32_32( norm_y, singularVectors_Right[k][nCh] );
1637 12545948 : Word32 op2 = L_shr( temp, sub( max_new, add( norm_y_e, sing_right_exp[k][nCh] ) ) );
1638 12545948 : singularVectors_Right[k][iCh] = L_add_sat( L_shr( singularVectors_Right[k][iCh], sub( max_new, sing_right_exp[k][iCh] ) ), op2 ); /* exp(sing_right_exp) */
1639 12545948 : move32();
1640 12545948 : singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], max_new ); /* Q31 */
1641 12545948 : move32();
1642 12545948 : sing_right_exp[k][iCh] = 0;
1643 12545948 : move16();
1644 : }
1645 : }
1646 : }
1647 :
1648 6319868 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1649 : {
1650 4308644 : singularVectors_Right[nCh][iCh] = singularVectors_Right[iCh][nCh] = 0;
1651 4308644 : move32();
1652 4308644 : move32();
1653 : }
1654 : }
1655 2938463 : singularVectors_Right[nCh][nCh] = MAX_32;
1656 2938463 : move32();
1657 2938463 : t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */
1658 2938463 : move32();
1659 : }
1660 927239 : return;
1661 : }
1662 :
1663 : /*-------------------------------------------------------------------------
1664 : * GivensRotation()
1665 : *
1666 : *
1667 : *-------------------------------------------------------------------------*/
1668 :
1669 :
1670 9572800 : static void GivensRotation2_fx(
1671 : const Word32 x, /* exp(x_e) */
1672 : const Word16 x_e,
1673 : const Word32 z, /* exp(z_e) */
1674 : const Word16 z_e,
1675 : Word32 *result,
1676 : Word32 *resultInv,
1677 : Word16 *out_e,
1678 : Word16 *outInv_e )
1679 : {
1680 : Word32 r;
1681 9572800 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
1682 9572800 : r = L_max( r, 1 );
1683 9572800 : *outInv_e = *out_e;
1684 9572800 : move16();
1685 9572800 : *result = Sqrt32( r, out_e );
1686 9572800 : move32();
1687 :
1688 9572800 : *resultInv = ISqrt32( r, outInv_e );
1689 9572800 : move32();
1690 9572800 : }
1691 :
1692 1999036 : static Word32 GivensRotation_fx(
1693 : const Word32 x, /* exp(x_e) */
1694 : const Word16 x_e,
1695 : const Word32 z, /* exp(z_e) */
1696 : const Word16 z_e,
1697 : Word16 *out_e )
1698 : {
1699 : Word32 r;
1700 :
1701 1999036 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
1702 1999036 : r = Sqrt32( r, out_e );
1703 1999036 : return ( r );
1704 : }
1705 :
1706 : /*-------------------------------------------------------------------------
1707 : * maxWithSign()
1708 : *
1709 : *
1710 : *-------------------------------------------------------------------------*/
1711 :
1712 24719684 : static Word32 maxWithSign_fx(
1713 : const Word32 a /* Qx */
1714 : )
1715 : {
1716 : Word32 result;
1717 24719684 : IF( a >= 0 )
1718 : {
1719 10712890 : result = L_max( a, SVD_MINIMUM_VALUE_FX );
1720 : }
1721 : ELSE
1722 : {
1723 14006794 : result = L_min( a, -SVD_MINIMUM_VALUE_FX );
1724 : }
1725 24719684 : return result;
1726 : }
1727 :
1728 : /*-------------------------------------------------------------------------
1729 : * flushToZeroArray()
1730 : *
1731 : *
1732 : *-------------------------------------------------------------------------*/
1733 :
1734 :
1735 : /*-------------------------------------------------------------------------
1736 : * flushToZeroMat()
1737 : *
1738 : *
1739 : *-------------------------------------------------------------------------*/
|