Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 : #include <stdint.h>
33 : #include "options.h"
34 : #include "prot_fx.h"
35 : #include "ivas_stat_dec.h"
36 : #include "ivas_cnst.h"
37 : #include <math.h>
38 : #include "wmc_auto.h"
39 : #include "ivas_prot_fx.h"
40 :
41 : /*-----------------------------------------------------------------------*
42 : * Local constants
43 : *-----------------------------------------------------------------------*/
44 :
45 : /* The SVD is sensitive to changes to the following constants, so please be careful when trying to tune things */
46 : #define SVD_MAX_NUM_ITERATION 75 /* maximum number of interations before exiting the SVD */
47 : #define SVD_MINIMUM_VALUE_FX ( 2 ) /* minimum value */
48 : #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 )
49 : #define CONVERGENCE_FACTOR_FX 214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
50 :
51 : /*-----------------------------------------------------------------------*
52 : * Local function prototypes
53 : *-----------------------------------------------------------------------*/
54 :
55 : static void HouseholderReduction_fx(
56 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
57 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* exp(singularValues_fx_e) */
58 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
59 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* exp(secDiag_fx_e) */
60 : Word16 singularVectors_Left_e,
61 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
62 : Word16 *secDiag_fx_e,
63 : const Word16 nChannelsL, /* Q0 */
64 : const Word16 nChannelsC, /* Q0 */
65 : Word32 *eps_x_fx, /* exp(eps_x_fx_e) */
66 : Word16 *eps_x_fx_e );
67 :
68 : static void biDiagonalReductionLeft_fx(
69 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
70 : Word16 singularValues_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
71 : const Word16 nChannelsL,
72 : const Word16 nChannelsC, /* Q0 */
73 : const Word16 currChannel, /* Q0 */
74 : Word32 *g,
75 : Word16 *g_e );
76 :
77 : static void biDiagonalReductionRight_fx(
78 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
79 : Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS],
80 : const Word16 nChannelsL, /* Q0 */
81 : const Word16 nChannelsC, /* Q0 */
82 : const Word16 currChannel, /* Q0 */
83 : Word32 *g,
84 : Word16 *g_e );
85 :
86 : static void singularVectorsAccumulationLeft_fx(
87 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
88 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
89 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
90 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
91 : const Word16 nChannelsL, /* Q0 */
92 : const Word16 nChannelsC /* Q0 */
93 : );
94 :
95 : static void singularVectorsAccumulationRight_fx(
96 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
97 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
98 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
99 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
100 : Word16 *secDiag_e,
101 : const Word16 nChannelsC /* Q0 */
102 : );
103 :
104 : static Word32 maxWithSign_fx(
105 : const Word32 a /* Qx */
106 : );
107 :
108 : static Word16 BidagonalDiagonalisation_fx(
109 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_fx_e*/
110 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_fx_e*/
111 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_fx_e*/
112 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_fx_e*/
113 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */
114 : Word16 *secDiag_fx_e, /* i/o: */
115 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
116 : const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/
117 : const Word32 eps_x, /* i : eps_x_e*/
118 : const Word16 eps_x_e /* i : */
119 : );
120 :
121 : static void ApplyRotation_fx(
122 : Word32 singularVector[][MAX_OUTPUT_CHANNELS],
123 : const Word32 c, /* exp(c_e)*/
124 : const Word16 c_e,
125 : const Word32 s, /* exp(s_e) */
126 : const Word16 s_e,
127 : Word32 x11, /* exp(x11_e) */
128 : Word16 x11_e,
129 : Word32 x12, /* exp(x12_e) */
130 : Word16 x12_e,
131 : Word32 *d, /* exp(d_e) */
132 : Word16 *d_e,
133 : Word32 *g, /* exp(g_e) */
134 : Word16 *g_e,
135 : const Word16 currentIndex1, /* Q0 */
136 : const Word16 currentIndex2, /* Q0 */
137 : const Word16 nChannels /* Q0 */
138 : );
139 :
140 : static void GivensRotation2_fx(
141 : const Word32 x, /* exp(x_e) */
142 : const Word16 x_e,
143 : const Word32 z, /* exp(z_e) */
144 : const Word16 z_e,
145 : Word32 *result,
146 : Word32 *resultInv,
147 : Word16 *out_e,
148 : Word16 *outInv_e );
149 :
150 : static Word32 GivensRotation_fx(
151 : const Word32 x, /* exp(x_e) */
152 : const Word16 x_e,
153 : const Word32 z, /* exp(z_e) */
154 : const Word16 z_e,
155 : Word16 *out_e );
156 :
157 : static void ApplyQRTransform_fx(
158 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_e*/
159 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_e*/
160 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_e*/
161 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_e*/
162 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
163 : Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
164 : const Word16 startIndex, /* i : Q0*/
165 : const Word16 currentIndex, /* i : Q0*/
166 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
167 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
168 : );
169 :
170 : /*-------------------------------------------------------------------------
171 : * mat2svdMat()
172 : *
173 : * external matrix format to internal
174 : *-------------------------------------------------------------------------*/
175 :
176 1035976 : void mat2svdMat_fx(
177 : const Word32 *mat, /* i : matrix as column ordered vector Qx*/
178 : Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* o : matrix as two-dimensional arry Qx*/
179 : const Word16 nRows, /* i : number of rows of the matrix Q0*/
180 : const Word16 mCols, /* i : number of columns of the matrix Q0*/
181 : const Word16 transpose /* i : flag indication transposition Q0*/
182 : )
183 : {
184 : Word16 i, j;
185 :
186 1035976 : IF( transpose )
187 : {
188 854511 : FOR( i = 0; i < mCols; i++ )
189 : {
190 2155327 : FOR( j = 0; j < nRows; j++ )
191 : {
192 1441208 : svdMat[i][j] = mat[j + ( nRows * i )]; /* Qx */
193 1441208 : move32();
194 : }
195 :
196 714119 : set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, nRows ) );
197 : }
198 :
199 1672545 : FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
200 : {
201 1532153 : set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
202 : }
203 : }
204 : ELSE
205 : {
206 3800505 : FOR( i = 0; i < nRows; i++ )
207 : {
208 14545256 : FOR( j = 0; j < mCols; j++ )
209 : {
210 11640335 : svdMat[i][j] = mat[i + ( nRows * j )]; /* Qx */
211 11640335 : move32();
212 : }
213 :
214 2904921 : set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, mCols ) );
215 : }
216 :
217 12320007 : FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
218 : {
219 11424423 : set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
220 : }
221 : }
222 :
223 1035976 : return;
224 : }
225 :
226 :
227 : /*---------------------------------------------------------------------*
228 : * svdMat2mat()
229 : *
230 : * transfer a matrix from a two dimensional array to a column wise ordered vector
231 : *---------------------------------------------------------------------*/
232 :
233 1304724 : void svdMat2mat_fx(
234 : Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* i : matrix as two-dimensional arry Qx*/
235 : Word32 *mat, /* o : matrix as column ordered vector Qx*/
236 : const Word16 nRows, /* i : number of rows of the matrix Q0*/
237 : const Word16 mCols /* i : number of columns of the matrix Q0*/
238 : )
239 : {
240 : Word16 i, j;
241 :
242 5035125 : FOR( i = 0; i < nRows; i++ )
243 : {
244 14803895 : FOR( j = 0; j < mCols; j++ )
245 : {
246 11073494 : mat[i + ( nRows * j )] = svdMat[i][j]; /* Qx */
247 11073494 : move32();
248 : }
249 : }
250 :
251 1304724 : return;
252 : }
253 :
254 : /*-------------------------------------------------------------------------
255 : * svd()
256 : *
257 : * perform a singular value decomposition X=USV of a matrix X
258 : *-------------------------------------------------------------------------*/
259 :
260 : /*! r: error or success */
261 1035976 : Word16 svd_fx(
262 : Word32 InputMatrix[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) InputMatrix_e*/
263 : Word16 InputMatrix_e,
264 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) singularValues_fx_e*/
265 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) singularValues_fx_e*/
266 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) singularValues_fx_e*/
267 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
268 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
269 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
270 : )
271 : {
272 : Word16 iCh, jCh;
273 : Word16 lengthSingularValues;
274 : Word16 errorMessage, condition;
275 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS];
276 : Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS];
277 1035976 : move16();
278 1035976 : Word32 eps_x_fx = 0, temp_fx;
279 1035976 : move16();
280 1035976 : Word16 eps_x_fx_e = 0;
281 1035976 : move16();
282 : Word16 temp_fx_e;
283 1035976 : push_wmops( "svd_fx" );
284 :
285 :
286 : /* Collecting Values */
287 4655016 : FOR( iCh = 0; iCh < nChannelsL; iCh++ )
288 : {
289 16700583 : FOR( jCh = 0; jCh < nChannelsC; jCh++ )
290 : {
291 13081543 : singularVectors_Left_fx[iCh][jCh] = InputMatrix[iCh][jCh]; /* InputMatrix_e */
292 13081543 : move32();
293 : }
294 : }
295 :
296 : /* Householder reduction */
297 1035976 : HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
298 : /* Set extremely small values to zero if needed */
299 : // flushToZeroArray(singularValues, max_length);
300 : // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL);
301 : // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC);
302 :
303 : /* BidagonalDiagonalisation */
304 1035976 : errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
305 : /* Sort the singular values descending order */
306 1035976 : lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */
307 :
308 : DO
309 : {
310 1422412 : condition = 0;
311 1422412 : move16();
312 4854429 : FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
313 : {
314 3432017 : IF( LT_32( L_shl_sat( singularValues_fx[iCh], sub( singularValues_fx_e[iCh], singularValues_fx_e[iCh + 1] ) ), singularValues_fx[iCh + 1] ) )
315 : {
316 488184 : condition = 1;
317 488184 : move16();
318 488184 : temp_fx = singularValues_fx[iCh]; /* singularValues_fx_e */
319 488184 : move32();
320 488184 : singularValues_fx[iCh] = singularValues_fx[iCh + 1]; /* singularValues_fx_e */
321 488184 : move32();
322 488184 : singularValues_fx[iCh + 1] = temp_fx; /* singularValues_fx_e */
323 488184 : move32();
324 488184 : temp_fx_e = singularValues_fx_e[iCh];
325 488184 : move16();
326 488184 : singularValues_fx_e[iCh] = singularValues_fx_e[iCh + 1];
327 488184 : move16();
328 488184 : singularValues_fx_e[iCh + 1] = temp_fx_e;
329 488184 : move16();
330 :
331 2714423 : FOR( jCh = 0; jCh < nChannelsL; ++jCh )
332 : {
333 2226239 : temp_fx = singularVectors_Left_fx[jCh][iCh]; /* singularValues_fx_e */
334 2226239 : move32();
335 2226239 : singularVectors_Left_fx[jCh][iCh] = singularVectors_Left_fx[jCh][iCh + 1]; /* singularValues_fx_e */
336 2226239 : move32();
337 2226239 : singularVectors_Left_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
338 2226239 : move32();
339 : }
340 :
341 2705334 : FOR( jCh = 0; jCh < nChannelsC; ++jCh )
342 : {
343 2217150 : temp_fx = singularVectors_Right_fx[jCh][iCh]; /* singularValues_fx_e */
344 2217150 : move32();
345 2217150 : singularVectors_Right_fx[jCh][iCh] = singularVectors_Right_fx[jCh][iCh + 1]; /* singularValues_fx_e */
346 2217150 : move32();
347 2217150 : singularVectors_Right_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
348 2217150 : move32();
349 : }
350 : }
351 : }
352 : }
353 1422412 : WHILE( EQ_16( condition, 1 ) );
354 :
355 1035976 : pop_wmops();
356 1035976 : return ( errorMessage );
357 : }
358 :
359 :
360 : /*-----------------------------------------------------------------------*
361 : * Local functions
362 : *-----------------------------------------------------------------------*/
363 :
364 : /*-------------------------------------------------------------------------
365 : * BidagonalDiagonalisation()
366 : *
367 : *
368 : *-------------------------------------------------------------------------*/
369 :
370 1035976 : static Word16 BidagonalDiagonalisation_fx(
371 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_fx_e*/
372 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_fx_e*/
373 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_fx_e*/
374 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_fx_e*/
375 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */
376 : Word16 *secDiag_new_e, /* i/o: */
377 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
378 : const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed Q0*/
379 : const Word32 eps_x, /* i : eps_x_e*/
380 : const Word16 eps_x_e /* i : */
381 : )
382 : {
383 : Word16 kCh, nCh, iCh, jCh, split;
384 : Word32 c, s, f1, f2;
385 : Word16 c_e, s_e, f1_e, f2_e;
386 1035976 : Word16 x11_e = 0, x12_e = 0;
387 1035976 : move16();
388 1035976 : move16();
389 : Word16 temp_exp;
390 : Word16 temp_exp2;
391 1035976 : Word32 g = 0;
392 1035976 : move16();
393 1035976 : Word16 g_e = 0;
394 1035976 : move16();
395 : Word16 convergence, iteration, found_split;
396 1035976 : Word16 error = 0;
397 1035976 : move16();
398 : Word32 temp;
399 : Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS];
400 1035976 : Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
401 :
402 4223041 : FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */
403 : {
404 3187065 : convergence = 0;
405 3187065 : move16();
406 3187065 : iteration = 0;
407 3187065 : move16();
408 3187065 : split = iCh - 1; /* Q0 */
409 3187065 : move16();
410 :
411 8471324 : WHILE( EQ_16( convergence, 0 ) )
412 : {
413 5284259 : iteration = add( iteration, 1 ); /* Q0 */
414 5284259 : found_split = 1; /* Q0 */
415 5284259 : move16();
416 :
417 10219182 : FOR( jCh = iCh; jCh >= 0; jCh-- )
418 : {
419 10219182 : Word16 com_e = s_max( secDiag_new_e[jCh], eps_x_e );
420 10219182 : IF( LE_32( L_shr( L_abs( secDiag_fx[jCh] ), sub( com_e, secDiag_new_e[jCh] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is secDiag[ch] vanishing compared to eps_x */
421 : {
422 5231849 : found_split = 0;
423 5231849 : move16();
424 5231849 : BREAK;
425 : }
426 4987333 : com_e = s_max( singularValues_new_e[jCh - 1], eps_x_e );
427 4987333 : IF( LE_32( L_shr( L_abs( singularValues_fx[jCh - 1] ), sub( com_e, singularValues_new_e[jCh - 1] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is singularValues[jCh - 1] vanishing compared to eps_x */
428 : {
429 52410 : BREAK;
430 : }
431 : }
432 :
433 : // convergence = ( jCh == iCh ) ? 1 : 0;
434 5284259 : IF( EQ_16( jCh, iCh ) )
435 : {
436 3187065 : convergence = 1; /* Q0 */
437 3187065 : move16();
438 : }
439 : ELSE
440 : {
441 2097194 : convergence = 0;
442 2097194 : move16();
443 : }
444 :
445 5284259 : IF( found_split )
446 : {
447 52410 : s = MAX_32;
448 52410 : move32();
449 52410 : s_e = 0;
450 52410 : move16();
451 52410 : c = 0;
452 52410 : move32();
453 52410 : c_e = 0;
454 52410 : move16();
455 52410 : split = sub( jCh, 1 ); /* Q0 */
456 104890 : FOR( kCh = jCh; kCh <= iCh; kCh++ )
457 : {
458 52483 : g = Mpy_32_32( s, secDiag_fx[kCh] ); /* exp(s_e + secDiag_new_e) */
459 52483 : g_e = add( s_e, secDiag_new_e[kCh] );
460 52483 : secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
461 52483 : secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
462 52483 : Word16 com_e = s_max( g_e, eps_x_e );
463 52483 : IF( LE_32( L_shr( L_abs( g ), sub( com_e, g_e ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) )
464 : {
465 3 : BREAK;
466 : }
467 :
468 52480 : c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */
469 52480 : c_e = singularValues_new_e[kCh];
470 52480 : GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
471 52480 : c = Mpy_32_32( c, temp );
472 52480 : c_e = add( c_e, temp_exp );
473 52480 : temp_exp2 = norm_l( c );
474 52480 : c = L_shl( c, temp_exp2 );
475 52480 : c_e = sub( c_e, temp_exp2 );
476 52480 : s = Mpy_32_32( -g, temp );
477 52480 : s_e = add( g_e, temp_exp );
478 52480 : temp_exp2 = norm_l( s );
479 52480 : s = L_shl( s, temp_exp2 );
480 52480 : s_e = sub( s_e, temp_exp2 );
481 52480 : ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */
482 : }
483 : }
484 :
485 5284259 : IF( convergence )
486 : {
487 3187065 : singularValues_fx[iCh] = singularValues_fx[iCh];
488 3187065 : move32();
489 3187065 : IF( singularValues_fx[iCh] < 0 )
490 : {
491 612067 : singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
492 612067 : move32();
493 2244377 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
494 : {
495 1632310 : singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
496 1632310 : move32();
497 : }
498 : }
499 : }
500 : ELSE
501 : {
502 2097194 : IF( GE_16( iteration, SVD_MAX_NUM_ITERATION ) )
503 : {
504 0 : IF( LT_32( singularValues_fx[iCh], 0 ) )
505 : {
506 0 : singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
507 0 : move32();
508 :
509 0 : FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
510 : {
511 0 : singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
512 0 : move32();
513 : }
514 : }
515 0 : error = 1;
516 0 : move16();
517 0 : convergence = 1;
518 0 : move16();
519 : }
520 : ELSE
521 : {
522 2097194 : ApplyQRTransform_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_new_e, secDiag_new_e, jCh, iCh, nChannelsL, nChannelsC ); /* nChannelsC */
523 : }
524 : }
525 : }
526 : }
527 :
528 : // rescaling block
529 1035976 : Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS );
530 :
531 :
532 1035976 : return ( error );
533 : }
534 :
535 : /*-------------------------------------------------------------------------
536 : * ApplyQRTransform()
537 : *
538 : *
539 : *-------------------------------------------------------------------------*/
540 :
541 2097194 : static void ApplyQRTransform_fx(
542 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) singularValues_e*/
543 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) singularValues_e*/
544 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) singularValues_e*/
545 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* i/o: secDiag_e*/
546 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
547 : Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
548 : const Word16 startIndex, /* i : Q0*/
549 : const Word16 currentIndex, /* i : Q0*/
550 : const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed Q0*/
551 : const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed Q0*/
552 : )
553 : {
554 : Word32 temp;
555 : Word16 temp_e;
556 : Word16 temp_norm_e;
557 : Word16 ch, split;
558 2097194 : Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
559 2097194 : move32();
560 2097194 : move32();
561 2097194 : move32();
562 2097194 : move32();
563 2097194 : move32();
564 2097194 : move32();
565 2097194 : move32();
566 2097194 : move32();
567 2097194 : Word16 d_e = 0, g_e = 0, r_e = 0, x_ii_e = 0, x_split_e = 0, x_kk_e = 0, mu_e = 0, aux_e = 0;
568 2097194 : move16();
569 2097194 : move16();
570 2097194 : move16();
571 2097194 : move16();
572 2097194 : move16();
573 2097194 : move16();
574 2097194 : move16();
575 2097194 : move16();
576 : Word32 L_temp1, L_temp2, L_temp3, L_temp4;
577 : Word16 L_temp1_e, L_temp2_e, L_temp3_e, L_temp4_e, temp_exp;
578 2097194 : Word32 c = MAX_32;
579 2097194 : move32();
580 2097194 : Word16 c_e = 0;
581 2097194 : move16();
582 2097194 : Word32 s = MAX_32;
583 2097194 : move32();
584 2097194 : Word16 s_e = 0;
585 2097194 : move16();
586 :
587 2097194 : x_kk = singularValues[currentIndex]; /* exp(singularValues_e) */
588 2097194 : move32();
589 2097194 : x_kk_e = singularValues_e[currentIndex];
590 2097194 : move16();
591 2097194 : x_ii = singularValues[startIndex]; /* exp(singularValues_e) */
592 2097194 : move32();
593 2097194 : x_ii_e = singularValues_e[startIndex];
594 2097194 : move16();
595 2097194 : split = sub( currentIndex, 1 ); /* Q0 */
596 2097194 : move16();
597 :
598 2097194 : x_split = singularValues[split]; /* exp(singularValues_e) */
599 2097194 : move32();
600 2097194 : x_split_e = singularValues_e[split];
601 2097194 : move16();
602 2097194 : g = secDiag[split]; /* exp(secDiag_e) */
603 2097194 : move32();
604 2097194 : g_e = secDiag_e[split];
605 2097194 : move16();
606 2097194 : r = secDiag[currentIndex]; /* exp(secDiag_e) */
607 2097194 : move32();
608 2097194 : r_e = secDiag_e[currentIndex];
609 2097194 : move16();
610 :
611 : // d = (x_split + x_kk) * (x_split - x_kk) + (g + r) * (g - r);
612 2097194 : L_temp1 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, x_kk, x_kk_e, &L_temp1_e ); /* exp(L_temp1_e) */
613 2097194 : L_temp2 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, L_negate( x_kk ), x_kk_e, &L_temp2_e ); /* exp(L_temp2_e) */
614 2097194 : L_temp3 = BASOP_Util_Add_Mant32Exp( g, g_e, r, r_e, &L_temp3_e ); /* exp(L_temp3_e) */
615 2097194 : L_temp4 = BASOP_Util_Add_Mant32Exp( g, g_e, L_negate( r ), r_e, &L_temp4_e ); /* exp(L_temp4_e) */
616 2097194 : d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( L_temp3, L_temp4 ), add( L_temp3_e, L_temp4_e ), &d_e ); /* exp(d_e) */
617 :
618 : // d /= maxWithSign((r + r) * x_split);
619 2097194 : L_temp1 = BASOP_Util_Add_Mant32Exp( r, r_e, r, r_e, &L_temp1_e ); /* exp(L_temp1_e) */
620 2097194 : L_temp1 = maxWithSign_fx( Mpy_32_32( L_temp1, x_split ) ); /* exp(L_temp1_e + x_split_e) */
621 2097194 : L_temp1_e = add( L_temp1_e, x_split_e );
622 2097194 : d = BASOP_Util_Divide3232_Scale_newton( d, L_temp1, &temp_exp ); /* temp_exp + d_e - L_temp1_e */
623 2097194 : d_e = add( temp_exp, sub( d_e, L_temp1_e ) );
624 :
625 2097194 : g = GivensRotation_fx( MAX_32, 0, d, d_e, &g_e );
626 :
627 : // mu = x_split / maxWithSign(d + (d >= 0.0f ? 1 : (-1)) * fabsf(g)) - r;
628 : // L_temp1 = d >= 0 ? L_abs( g ) : -L_abs( g );
629 2097194 : IF( d >= 0 )
630 : {
631 1234116 : L_temp1 = L_abs( g );
632 : }
633 : ELSE
634 : {
635 863078 : L_temp1 = L_negate( L_abs( g ) );
636 : }
637 2097194 : L_temp1_e = g_e;
638 2097194 : move16();
639 2097194 : L_temp2 = maxWithSign_fx( BASOP_Util_Add_Mant32Exp( d, d_e, L_temp1, L_temp1_e, &L_temp2_e ) ); /* exp(L_temp2_e) */
640 2097194 : mu = BASOP_Util_Divide3232_Scale_newton( x_split, L_temp2, &mu_e ); /* exp(mu_e + (x-plit_e - L_temp2_e)) */
641 2097194 : mu_e = add( mu_e, sub( x_split_e, L_temp2_e ) );
642 2097194 : mu = BASOP_Util_Add_Mant32Exp( mu, mu_e, L_negate( r ), r_e, &mu_e ); /* exp(mu_e) */
643 :
644 : // d = ((x_ii + x_kk) * (x_ii - x_kk) + r * mu) / maxWithSign(x_ii);
645 2097194 : L_temp1 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, x_kk, x_kk_e, &L_temp1_e ); /* exp(L_temp1_e) */
646 2097194 : L_temp2 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, L_negate( x_kk ), x_kk_e, &L_temp2_e ); /* exp(L_temp2_e) */
647 2097194 : d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( r, mu ), add( r_e, mu_e ), &d_e ); /* exp(d_e) */
648 2097194 : d = BASOP_Util_Divide3232_Scale_newton( d, maxWithSign_fx( x_ii ), &temp_exp ); /* exp(temp_exp + (d_e - x_ii_e) */
649 2097194 : d_e = add( temp_exp, sub( d_e, x_ii_e ) );
650 :
651 : /*QR transformation*/
652 7032117 : FOR( ch = startIndex; ch <= split; ch++ )
653 : {
654 4934923 : r = Mpy_32_32( s, secDiag[ch + 1] ); /* exp(s_e + secDiag_e) */
655 4934923 : r_e = add( s_e, secDiag_e[ch + 1] );
656 4934923 : g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */
657 4934923 : g_e = add( c_e, secDiag_e[ch + 1] );
658 :
659 4934923 : GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
660 4934923 : c = Mpy_32_32( d, temp );
661 4934923 : c_e = add( temp_e, d_e );
662 4934923 : temp_norm_e = norm_l( c );
663 4934923 : c = L_shl( c, temp_norm_e );
664 4934923 : c_e = sub( c_e, temp_norm_e );
665 4934923 : s = Mpy_32_32( r, temp );
666 4934923 : s_e = add( r_e, temp_e );
667 4934923 : temp_norm_e = norm_l( s );
668 4934923 : s = L_shl( s, temp_norm_e );
669 4934923 : s_e = sub( s_e, temp_norm_e );
670 4934923 : r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */
671 4934923 : r_e = add( s_e, singularValues_e[ch + 1] );
672 4934923 : x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */
673 4934923 : x_split_e = add( c_e, singularValues_e[ch + 1] );
674 :
675 4934923 : aux = g; /* exp(g_e) */
676 4934923 : move32();
677 4934923 : aux_e = g_e;
678 4934923 : move16();
679 :
680 : // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC);
681 4934923 : ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC );
682 :
683 4934923 : GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */
684 4934923 : IF( singularValues[ch] != 0 )
685 : {
686 :
687 4934923 : c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
688 4934923 : c_e = add( d_e, aux_e );
689 4934923 : temp_norm_e = norm_l( c );
690 4934923 : c = L_shl( c, temp_norm_e );
691 4934923 : c_e = sub( c_e, temp_norm_e );
692 :
693 4934923 : s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */
694 4934923 : s_e = add( r_e, aux_e );
695 4934923 : temp_norm_e = norm_l( s );
696 4934923 : s = L_shl( s, temp_norm_e );
697 4934923 : s_e = sub( s_e, temp_norm_e );
698 : }
699 :
700 : // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL);
701 4934923 : ApplyRotation_fx( singularVectors_Left, c, c_e, s, s_e, g, g_e, x_split, x_split_e, &d, &d_e, &x_ii, &x_ii_e, ch + 1, ch, nChannelsL );
702 : }
703 :
704 2097194 : secDiag[startIndex] = 0;
705 2097194 : move32();
706 2097194 : secDiag[currentIndex] = d; /* exp(d_e) */
707 2097194 : move32();
708 2097194 : secDiag_e[currentIndex] = d_e;
709 2097194 : move16();
710 2097194 : singularValues[currentIndex] = x_ii; /* exp(x_ii_e) */
711 2097194 : move32();
712 2097194 : singularValues_e[currentIndex] = x_ii_e;
713 2097194 : move16();
714 :
715 2097194 : return;
716 : }
717 :
718 : /*-------------------------------------------------------------------------
719 : * ApplyRotation()
720 : *
721 : *
722 : *-------------------------------------------------------------------------*/
723 :
724 9922326 : static void ApplyRotation_fx(
725 : Word32 singularVector[][MAX_OUTPUT_CHANNELS],
726 : const Word32 c, /* exp(c_e)*/
727 : const Word16 c_e,
728 : const Word32 s, /* exp(s_e) */
729 : const Word16 s_e,
730 : Word32 x11, /* exp(x11_e) */
731 : Word16 x11_e,
732 : Word32 x12, /* exp(x12_e) */
733 : Word16 x12_e,
734 : Word32 *d, /* exp(d_e) */
735 : Word16 *d_e,
736 : Word32 *g, /* exp(g_e) */
737 : Word16 *g_e,
738 : const Word16 currentIndex1, /* Q0 */
739 : const Word16 currentIndex2, /* Q0 */
740 : const Word16 nChannels /* Q0 */
741 : )
742 : {
743 : Word16 ch;
744 :
745 9922326 : *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */
746 9922326 : move32();
747 9922326 : *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */
748 9922326 : move32();
749 :
750 9922326 : Word16 c_q = sub( 31, c_e );
751 9922326 : Word16 s_q = sub( 31, s_e );
752 : Word32 op1, op2;
753 : Word16 op_e;
754 :
755 : // Bring c and s to same Q
756 9922326 : IF( GT_16( c_q, s_q ) )
757 : {
758 861663 : op1 = L_shr( c, sub( c_q, s_q ) );
759 861663 : op2 = s;
760 861663 : move32();
761 861663 : op_e = s_q;
762 861663 : move16();
763 : }
764 : ELSE
765 : {
766 9060663 : op1 = c;
767 9060663 : move32();
768 9060663 : op2 = L_shr( s, sub( s_q, c_q ) );
769 9060663 : op_e = c_q;
770 9060663 : move16();
771 : }
772 9922326 : op_e = add( op_e, 1 ); // 64 bit mac -> +1
773 9922326 : op_e = negate( op_e );
774 :
775 58859943 : FOR( ch = 0; ch < nChannels; ch++ )
776 : {
777 48937617 : x11 = singularVector[ch][currentIndex2];
778 48937617 : move32();
779 48937617 : x12 = singularVector[ch][currentIndex1];
780 48937617 : move32();
781 :
782 48937617 : Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
783 48937617 : singularVector[ch][currentIndex2] = W_shl_sat_l( temp, op_e ); // Q(singularVector)
784 48937617 : move32();
785 :
786 48937617 : temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
787 48937617 : singularVector[ch][currentIndex1] = W_shl_sat_l( temp, op_e ); // Q(singularVector)
788 48937617 : move32();
789 : }
790 :
791 9922326 : return;
792 : }
793 :
794 : /*-------------------------------------------------------------------------
795 : * HouseholderReduction()
796 : *
797 : *
798 : *-------------------------------------------------------------------------*/
799 :
800 1035976 : static void HouseholderReduction_fx(
801 : Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
802 : Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* exp(singularValues_fx_e) */
803 : Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
804 : Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* exp(secDiag_fx_e) */
805 : Word16 singularVectors_Left_e,
806 : Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
807 : Word16 *secDiag_fx_e,
808 : const Word16 nChannelsL, /* Q0 */
809 : const Word16 nChannelsC, /* Q0 */
810 : Word32 *eps_x_fx, /* exp(eps_x_fx_e) */
811 : Word16 *eps_x_fx_e )
812 : {
813 : Word16 nCh;
814 :
815 1035976 : Word32 g_left_fx = 0;
816 1035976 : Word16 g_left_e = 0;
817 1035976 : move32();
818 1035976 : move16();
819 1035976 : Word32 g_right_fx = 0;
820 1035976 : Word16 g_right_e = 0;
821 1035976 : move32();
822 1035976 : move16();
823 :
824 :
825 : Word16 iCh, jCh;
826 : Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
827 :
828 1035976 : Word16 sc = 0;
829 1035976 : move16();
830 1035976 : sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC );
831 3619040 : FOR( jCh = 1; jCh < nChannelsL; jCh++ )
832 : {
833 2583064 : sc = s_min( sc, getScaleFactor32( singularVectors_Left_fx[jCh], nChannelsC ) );
834 : }
835 4655016 : FOR( jCh = 0; jCh < nChannelsL; jCh++ )
836 : {
837 3619040 : Scale_sig32( singularVectors_Left_fx[jCh], nChannelsC, sc );
838 16700583 : FOR( iCh = 0; iCh < nChannelsC; iCh++ )
839 : {
840 13081543 : singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e - sc;
841 13081543 : move16();
842 : }
843 : }
844 :
845 4223041 : FOR( nCh = 0; nCh < nChannelsC; nCh++ )
846 : {
847 3187065 : secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
848 3187065 : move32();
849 3187065 : secDiag_fx_e[nCh] = g_right_e;
850 :
851 3187065 : biDiagonalReductionLeft_fx(
852 : singularVectors_Left_fx,
853 : singularVectors_Left_fx_e,
854 : nChannelsL,
855 : nChannelsC,
856 : nCh,
857 : &g_left_fx,
858 : &g_left_e );
859 :
860 3187065 : singularValues_fx[nCh] = g_left_fx;
861 3187065 : move32();
862 3187065 : singularValues_fx_e[nCh] = g_left_e;
863 :
864 3187065 : biDiagonalReductionRight_fx(
865 : singularVectors_Left_fx,
866 : singularVectors_Left_fx_e,
867 : nChannelsL,
868 : nChannelsC,
869 : nCh,
870 : &g_right_fx,
871 : &g_right_e );
872 :
873 : Word16 L_temp_e;
874 3187065 : Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
875 3187065 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
876 : {
877 1482400 : *eps_x_fx = L_temp; /* exp(L_temp_e) */
878 1482400 : move32();
879 1482400 : *eps_x_fx_e = L_temp_e;
880 1482400 : move32();
881 : }
882 : }
883 :
884 :
885 : /* SingularVecotr Accumulation */
886 1035976 : singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
887 :
888 :
889 1035976 : singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
890 :
891 1035976 : return;
892 : }
893 :
894 : /*-------------------------------------------------------------------------
895 : * biDiagonalReductionLeft()
896 : *
897 : *
898 : *-------------------------------------------------------------------------*/
899 3187065 : static void biDiagonalReductionLeft_fx(
900 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
901 : Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
902 : const Word16 nChannelsL,
903 : const Word16 nChannelsC, /* Q0 */
904 : const Word16 currChannel, /* Q0 */
905 : Word32 *g,
906 : Word16 *g_e )
907 : {
908 : Word16 iCh, jCh;
909 : Word32 norm_x, f, r;
910 : Word16 norm_x_e, f_e, r_e;
911 : Word32 L_temp;
912 : Word16 L_temp_e;
913 :
914 : /* Setting values to 0 */
915 3187065 : *g = 0;
916 3187065 : *g_e = 0;
917 3187065 : move32();
918 3187065 : move16();
919 :
920 3187065 : IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
921 : {
922 3187065 : Word64 temp = 0;
923 3187065 : move64();
924 3187065 : norm_x = 0;
925 3187065 : move32();
926 3187065 : norm_x_e = 0;
927 3187065 : move16();
928 3187065 : Word16 max_e = MIN_16;
929 3187065 : move16();
930 11757789 : FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
931 : {
932 8570724 : max_e = s_max( max_e, singularVectors_e[jCh][currChannel] );
933 : }
934 :
935 11757789 : FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
936 : {
937 8570724 : temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors_e[jCh][currChannel] ), 1 ) ) );
938 : }
939 :
940 3187065 : Word16 nrm = W_norm( temp );
941 3187065 : nrm = sub( nrm, 32 );
942 3187065 : norm_x = W_shl_sat_l( temp, nrm );
943 3187065 : norm_x_e = sub( add( max_e, max_e ), nrm );
944 :
945 3187065 : IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
946 : {
947 : Word16 invVal_e;
948 : Word32 invVal;
949 :
950 3016767 : L_temp_e = norm_x_e;
951 3016767 : move16();
952 3016767 : L_temp = Sqrt32( norm_x, &L_temp_e );
953 : //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
954 3016767 : if ( singularVectors[currChannel][currChannel] >= 0 )
955 : {
956 1719992 : L_temp = L_negate( L_temp );
957 1719992 : move32();
958 : }
959 3016767 : *g = L_temp;
960 3016767 : move32();
961 3016767 : *g_e = L_temp_e;
962 3016767 : move16();
963 :
964 3016767 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
965 3016767 : singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */
966 3016767 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
967 :
968 7523064 : FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
969 : {
970 4506297 : Word16 max2_e = MIN_16;
971 4506297 : max_e = MIN_16;
972 4506297 : move16();
973 4506297 : move16();
974 4506297 : temp = 0;
975 4506297 : move64();
976 :
977 22525682 : FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
978 : {
979 18019385 : max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); /* exp(norm_x_e) */
980 18019385 : max2_e = s_max( max2_e, singularVectors_e[jCh][iCh] ); /* exp(norm_x_e) */
981 : }
982 4506297 : max_e = add( max_e, max2_e );
983 :
984 22525682 : FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
985 : {
986 18019385 : temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors_e[jCh][currChannel], singularVectors_e[jCh][iCh] ) ) ) );
987 : }
988 4506297 : nrm = W_norm( temp );
989 4506297 : nrm = sub( nrm, 32 );
990 4506297 : norm_x = W_shl_sat_l( temp, nrm );
991 4506297 : norm_x_e = sub( max_e, nrm );
992 :
993 4506297 : f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
994 4506297 : f_e = add( invVal_e, sub( norm_x_e, r_e ) );
995 :
996 22525682 : FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
997 : {
998 18019385 : singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] );
999 : }
1000 : }
1001 : }
1002 : }
1003 3187065 : return;
1004 : }
1005 :
1006 3187065 : static void biDiagonalReductionRight_fx(
1007 : Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
1008 : Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS],
1009 : const Word16 nChannelsL, /* Q0 */
1010 : const Word16 nChannelsC, /* Q0 */
1011 : const Word16 currChannel, /* Q0 */
1012 : Word32 *g,
1013 : Word16 *g_e )
1014 : {
1015 : Word16 iCh, jCh, idx;
1016 : Word32 norm_x, r;
1017 : Word16 norm_x_e, r_e;
1018 : Word32 L_temp;
1019 : Word16 L_temp_e;
1020 :
1021 : /* Setting values to 0 */
1022 3187065 : *g = 0;
1023 3187065 : *g_e = 0;
1024 3187065 : move32();
1025 3187065 : move16();
1026 3187065 : IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
1027 : {
1028 2151089 : idx = add( currChannel, 1 ); /* Q0 */
1029 :
1030 2151089 : norm_x = 0;
1031 2151089 : move32();
1032 2151089 : norm_x_e = 0;
1033 2151089 : move16();
1034 6661908 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
1035 : {
1036 4510819 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
1037 : }
1038 :
1039 2151089 : IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
1040 : {
1041 : Word16 invVal_e;
1042 : Word32 invVal;
1043 :
1044 1999460 : L_temp_e = norm_x_e;
1045 1999460 : move16();
1046 1999460 : L_temp = Sqrt32( norm_x, &L_temp_e );
1047 : // L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
1048 1999460 : IF( singularVectors[currChannel][idx] >= 0 )
1049 : {
1050 749583 : ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
1051 749583 : move32();
1052 : }
1053 : ELSE
1054 : {
1055 1249877 : ( *g ) = L_temp; /* exp(L_temp_e) */
1056 1249877 : move32();
1057 : }
1058 1999460 : *g_e = L_temp_e;
1059 1999460 : move16();
1060 :
1061 1999460 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + ( *g_e ), -norm_x, norm_x_e, &r_e ); /* exp(r_e) */
1062 1999460 : singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */
1063 :
1064 1999460 : invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
1065 :
1066 6798925 : FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1067 : {
1068 4799465 : norm_x = 0;
1069 4799465 : move32();
1070 4799465 : norm_x_e = 0;
1071 4799465 : move16();
1072 18165076 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1073 : {
1074 13365611 : norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors_e[iCh][jCh], singularVectors_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
1075 : }
1076 :
1077 4799465 : norm_x = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
1078 4799465 : norm_x_e = add( invVal_e, sub( norm_x_e, r_e ) );
1079 :
1080 18165076 : FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
1081 : {
1082 13365611 : singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */
1083 : }
1084 : }
1085 : }
1086 : }
1087 :
1088 3187065 : return;
1089 : }
1090 :
1091 : /*-------------------------------------------------------------------------
1092 : * singularVectorsAccumulationLeft()
1093 : *
1094 : *
1095 : *-------------------------------------------------------------------------*/
1096 :
1097 1035976 : static void singularVectorsAccumulationLeft_fx(
1098 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
1099 : Word32 singularValues[MAX_OUTPUT_CHANNELS], /* exp(singularValues_e) */
1100 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
1101 : Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
1102 : const Word16 nChannelsL, /* Q0 */
1103 : const Word16 nChannelsC /* Q0 */
1104 : )
1105 : {
1106 : Word16 nCh, iCh, k;
1107 : Word16 nChannels;
1108 : Word32 norm_y, t_jj, t_ii;
1109 : Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp;
1110 :
1111 : /* Processing */
1112 1035976 : nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/
1113 : // FILE *fp = fopen("t_ii_out.txt","a");
1114 4223041 : FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* min(nChannelsL,ChannelsC) */
1115 : {
1116 3187065 : t_ii = singularValues[nCh]; /* exp(singularValues_e) */
1117 3187065 : move32();
1118 3187065 : t_ii_e = singularValues_e[nCh];
1119 3187065 : move16();
1120 :
1121 7697884 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1122 : {
1123 4510819 : singularVectors_Left[nCh][iCh] = 0;
1124 4510819 : move32();
1125 : }
1126 :
1127 3187065 : IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
1128 : {
1129 3016767 : t_ii = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
1130 3016767 : t_ii_e = sub( temp_exp, t_ii_e );
1131 : Word16 tempe;
1132 3016767 : Word32 temp = BASOP_Util_Divide3232_Scale_newton( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
1133 3016767 : tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
1134 : // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
1135 7523064 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1136 : {
1137 4506297 : Word64 acc = 0;
1138 4506297 : move64();
1139 : Word64 prod[16];
1140 : Word16 prod_e[16];
1141 4506297 : Word16 max_e = -31;
1142 4506297 : move16();
1143 18019385 : FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
1144 : {
1145 13513088 : prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
1146 13513088 : prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
1147 13513088 : max_e = s_max( max_e, prod_e[k] );
1148 : }
1149 :
1150 18019385 : FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
1151 : {
1152 13513088 : acc = W_add( acc, W_shr( prod[k], s_min( 63, sub( max_e, prod_e[k] ) ) ) );
1153 : }
1154 4506297 : Word16 acc_e = W_norm( acc );
1155 4506297 : acc = W_shl( acc, acc_e );
1156 :
1157 4506297 : norm_y = W_extract_h( acc );
1158 4506297 : norm_y_e = add( sub( max_e, acc_e ), 1 );
1159 4506297 : t_jj = Mpy_32_32( temp, norm_y );
1160 4506297 : t_jj_e = add( tempe, norm_y_e );
1161 22525682 : FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
1162 : {
1163 18019385 : singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */
1164 18019385 : move32();
1165 : }
1166 : }
1167 :
1168 11412638 : FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1169 : {
1170 8395871 : singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */
1171 8395871 : move32();
1172 8395871 : singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e );
1173 8395871 : move16();
1174 : }
1175 : }
1176 : ELSE
1177 : {
1178 345151 : FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
1179 : {
1180 174853 : singularVectors_Left[iCh][nCh] = 0;
1181 174853 : move32();
1182 : }
1183 : }
1184 3187065 : Word16 exp = s_max( singularVectors_Left_e[nCh][nCh], 1 );
1185 3187065 : singularVectors_Left[nCh][nCh] = L_sub( L_shr( singularVectors_Left[nCh][nCh], sub( exp, singularVectors_Left_e[nCh][nCh] ) ), L_shr( MINUS_ONE_IN_Q31, exp ) ); /* exp(sing_exp2) */
1186 3187065 : move32();
1187 3187065 : singularVectors_Left_e[nCh][nCh] = exp;
1188 3187065 : move16();
1189 : }
1190 : // fclose(fp);
1191 4655016 : FOR( nCh = 0; nCh < nChannelsL; nCh++ )
1192 : {
1193 16700583 : FOR( iCh = 0; iCh < nChannelsC; iCh++ )
1194 : {
1195 13081543 : singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
1196 13081543 : move32();
1197 : }
1198 : }
1199 :
1200 1035976 : return;
1201 : }
1202 :
1203 : /*-------------------------------------------------------------------------
1204 : * singularVectorsAccumulationRight()
1205 : *
1206 : *
1207 : *-------------------------------------------------------------------------*/
1208 :
1209 1035976 : static void singularVectorsAccumulationRight_fx(
1210 : Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
1211 : Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
1212 : Word32 secDiag[MAX_OUTPUT_CHANNELS], /* exp(secDiag_e) */
1213 : Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
1214 : Word16 *secDiag_e,
1215 : const Word16 nChannelsC /* Q0 */
1216 : )
1217 : {
1218 : Word16 nCh, iCh, k;
1219 : Word16 nChannels;
1220 : Word32 norm_y, t_ii, ratio_float;
1221 1035976 : Word16 norm_y_e, temp_exp1, sing_right_exp[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
1222 :
1223 : /* Processing */
1224 1035976 : nChannels = nChannelsC; /* nChannelsC Q0*/
1225 :
1226 : /* avoid compiler warning */
1227 1035976 : t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */
1228 1035976 : move32();
1229 :
1230 4223041 : FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */
1231 : {
1232 :
1233 3187065 : IF( LT_16( nCh, sub( nChannelsC, 1 ) ) ) /* nChannelsC */
1234 : {
1235 2151089 : IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
1236 : {
1237 :
1238 6358093 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
1239 : {
1240 4358633 : ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
1241 4358633 : singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) ); /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
1242 4358633 : temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
1243 4358633 : move32();
1244 4358633 : sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) );
1245 4358633 : move16();
1246 : // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 );
1247 : }
1248 :
1249 6358093 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1250 : {
1251 4358633 : Word64 norm_val = 0;
1252 4358633 : move64();
1253 4358633 : Word16 maxL_e = MIN_16;
1254 4358633 : Word16 maxR_e = MIN_16;
1255 4358633 : Word16 maxR2_e = MIN_16;
1256 4358633 : move16();
1257 4358633 : move16();
1258 4358633 : move16();
1259 17274522 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1260 : {
1261 12915889 : maxL_e = s_max( maxL_e, singularVectors_Left_e[nCh][k] );
1262 12915889 : maxR_e = s_max( maxR_e, sing_right_exp[k][iCh] );
1263 12915889 : maxR2_e = s_max( maxR2_e, sing_right_exp[k][nCh] );
1264 : }
1265 :
1266 17274522 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1267 : {
1268 12915889 : norm_val = W_mac_32_32( norm_val, L_shr( singularVectors_Left[nCh][k], sub( maxL_e, singularVectors_Left_e[nCh][k] ) ), L_shr( singularVectors_Right[k][iCh], sub( maxR_e, sing_right_exp[k][iCh] ) ) );
1269 : }
1270 4358633 : norm_y_e = W_norm( norm_val );
1271 4358633 : norm_y = W_extract_h( W_shl( norm_val, norm_y_e ) );
1272 4358633 : norm_y_e = sub( add( maxL_e, maxR_e ), norm_y_e );
1273 :
1274 4358633 : Word16 max_new = s_max( maxR_e, add( maxR2_e, norm_y_e ) );
1275 17274522 : FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
1276 : {
1277 12915889 : Word32 temp = Mpy_32_32( norm_y, singularVectors_Right[k][nCh] );
1278 12915889 : Word32 op2 = L_shr( temp, sub( max_new, add( norm_y_e, sing_right_exp[k][nCh] ) ) );
1279 12915889 : singularVectors_Right[k][iCh] = L_add_sat( L_shr( singularVectors_Right[k][iCh], sub( max_new, sing_right_exp[k][iCh] ) ), op2 ); /* exp(sing_right_exp) */
1280 12915889 : move32();
1281 12915889 : singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], max_new ); /* Q31 */
1282 12915889 : move32();
1283 12915889 : sing_right_exp[k][iCh] = 0;
1284 12915889 : move16();
1285 : }
1286 : }
1287 : }
1288 :
1289 6661908 : FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
1290 : {
1291 4510819 : singularVectors_Right[nCh][iCh] = singularVectors_Right[iCh][nCh] = 0;
1292 4510819 : move32();
1293 4510819 : move32();
1294 : }
1295 : }
1296 3187065 : singularVectors_Right[nCh][nCh] = MAX_32;
1297 3187065 : move32();
1298 3187065 : t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */
1299 3187065 : move32();
1300 : }
1301 1035976 : return;
1302 : }
1303 :
1304 : /*-------------------------------------------------------------------------
1305 : * GivensRotation()
1306 : *
1307 : *
1308 : *-------------------------------------------------------------------------*/
1309 :
1310 :
1311 9922326 : static void GivensRotation2_fx(
1312 : const Word32 x, /* exp(x_e) */
1313 : const Word16 x_e,
1314 : const Word32 z, /* exp(z_e) */
1315 : const Word16 z_e,
1316 : Word32 *result,
1317 : Word32 *resultInv,
1318 : Word16 *out_e,
1319 : Word16 *outInv_e )
1320 : {
1321 : Word32 r;
1322 9922326 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
1323 9922326 : r = L_max( r, 1 );
1324 9922326 : *outInv_e = *out_e;
1325 9922326 : move16();
1326 9922326 : *result = Sqrt32( r, out_e );
1327 9922326 : move32();
1328 :
1329 9922326 : *resultInv = ISqrt32( r, outInv_e );
1330 9922326 : move32();
1331 9922326 : }
1332 :
1333 2097194 : static Word32 GivensRotation_fx(
1334 : const Word32 x, /* exp(x_e) */
1335 : const Word16 x_e,
1336 : const Word32 z, /* exp(z_e) */
1337 : const Word16 z_e,
1338 : Word16 *out_e )
1339 : {
1340 : Word32 r;
1341 :
1342 2097194 : r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
1343 2097194 : r = Sqrt32( r, out_e );
1344 2097194 : return ( r );
1345 : }
1346 :
1347 : /*-------------------------------------------------------------------------
1348 : * maxWithSign()
1349 : *
1350 : *
1351 : *-------------------------------------------------------------------------*/
1352 :
1353 26058609 : static Word32 maxWithSign_fx(
1354 : const Word32 a /* Qx */
1355 : )
1356 : {
1357 : Word32 result;
1358 26058609 : IF( a >= 0 )
1359 : {
1360 11182931 : result = L_max( a, SVD_MINIMUM_VALUE_FX );
1361 : }
1362 : ELSE
1363 : {
1364 14875678 : result = L_min( a, -SVD_MINIMUM_VALUE_FX );
1365 : }
1366 26058609 : return result;
1367 : }
1368 :
1369 : /*-------------------------------------------------------------------------
1370 : * flushToZeroArray()
1371 : *
1372 : *
1373 : *-------------------------------------------------------------------------*/
1374 :
1375 :
1376 : /*-------------------------------------------------------------------------
1377 : * flushToZeroMat()
1378 : *
1379 : *
1380 : *-------------------------------------------------------------------------*/
|