LCOV - code coverage report
Current view: top level - lib_dec - ivas_svd_dec_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main enc/dec/rend @ 3b2f07138c61dcf997bbf4165d0882f794b2995f Lines: 563 573 98.3 %
Date: 2025-05-03 01:55:50 Functions: 14 14 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : #include <stdint.h>
      34             : #include "options.h"
      35             : #include "prot_fx.h"
      36             : #include "ivas_stat_dec.h"
      37             : #include "ivas_cnst.h"
      38             : #include <math.h>
      39             : #include "wmc_auto.h"
      40             : #include "ivas_prot_fx.h"
      41             : 
      42             : /*-----------------------------------------------------------------------*
      43             :  * Local constants
      44             :  *-----------------------------------------------------------------------*/
      45             : 
      46             : /* The SVD is sensitive to changes to the following constants, so please be careful when trying to tune things   */
      47             : #define SVD_MAX_NUM_ITERATION       75    /* maximum number of interations before exiting the SVD            */
      48             : #define SVD_MINIMUM_VALUE_FX        ( 2 ) /* minimum value                                                   */
      49             : #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 )
      50             : #define CONVERGENCE_FACTOR_FX       214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
      51             : 
      52             : /*-----------------------------------------------------------------------*
      53             :  * Local function prototypes
      54             :  *-----------------------------------------------------------------------*/
      55             : 
      56             : static void HouseholderReduction_fx(
      57             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
      58             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* exp(singularValues_fx_e) */
      59             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
      60             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_fx_e) */
      61             :     Word16 singularVectors_Left_e,
      62             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
      63             :     Word16 *secDiag_fx_e,
      64             :     const Word16 nChannelsL, /* Q0 */
      65             :     const Word16 nChannelsC, /* Q0 */
      66             :     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
      67             :     Word16 *eps_x_fx_e );
      68             : 
      69             : static void biDiagonalReductionLeft_fx(
      70             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
      71             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
      72             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
      73             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
      74             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
      75             :     Word16 *secDiag_e,
      76             :     const Word16 nChannelsL,  /* Q0 */
      77             :     const Word16 nChannelsC,  /* Q0 */
      78             :     const Word16 currChannel, /* Q0 */
      79             :     Word32 *sig_x,            /* exp(sig_x_e) */
      80             :     Word16 *sig_x_e,
      81             :     Word32 *g /* Q31 */
      82             : );
      83             : 
      84             : static void biDiagonalReductionRight_fx(
      85             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
      86             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
      87             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
      88             :     Word16 *secDiag_e,
      89             :     const Word16 nChannelsL,  /* Q0 */
      90             :     const Word16 nChannelsC,  /* Q0 */
      91             :     const Word16 currChannel, /* Q0 */
      92             :     Word32 *sig_x,            /* exp(sig_x_e) */
      93             :     Word16 *sig_x_e,
      94             :     Word32 *g /* Q31 */
      95             : );            // Q31
      96             : 
      97             : static void singularVectorsAccumulationLeft_fx(
      98             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
      99             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
     100             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
     101             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     102             :     const Word16 nChannelsL, /* Q0 */
     103             :     const Word16 nChannelsC  /* Q0 */
     104             : );
     105             : 
     106             : static void singularVectorsAccumulationRight_fx(
     107             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* singularVectors_e */
     108             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
     109             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
     110             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
     111             :     Word16 *secDiag_e,
     112             :     const Word16 nChannelsC /* Q0 */
     113             : );
     114             : 
     115             : static Word32 maxWithSign_fx(
     116             :     const Word32 a /* Qx */
     117             : );
     118             : 
     119             : static Word16 BidagonalDiagonalisation_fx(
     120             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)                   singularValues_fx_e*/
     121             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)         singularValues_fx_e*/
     122             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)                  singularValues_fx_e*/
     123             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* i/o:                                           secDiag_fx_e*/
     124             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],        /* i/o: singular values vector (S)                                                    */
     125             :     Word16 *secDiag_fx_e,                                   /* i/o:                                                                                                               */
     126             :     const Word16 nChannelsL,                                /* i  : number of rows in the matrix to be decomposed               Q0*/
     127             :     const Word16 nChannelsC,                                /* i  : number of columns in the matrix to be decomposed    Q0*/
     128             :     const Word32 eps_x,                                     /* i  :                                                eps_x_e*/
     129             :     const Word16 eps_x_e                                    /* i  :                                                       */
     130             : );
     131             : 
     132             : static void ApplyRotation_fx(
     133             :     Word32 singularVector[][MAX_OUTPUT_CHANNELS],
     134             :     const Word32 c, /* exp(c_e)*/
     135             :     const Word16 c_e,
     136             :     const Word32 s, /* exp(s_e) */
     137             :     const Word16 s_e,
     138             :     Word32 x11, /* exp(x11_e) */
     139             :     Word16 x11_e,
     140             :     Word32 x12, /* exp(x12_e) */
     141             :     Word16 x12_e,
     142             :     Word32 *d, /* exp(d_e) */
     143             :     Word16 *d_e,
     144             :     Word32 *g, /* exp(g_e) */
     145             :     Word16 *g_e,
     146             :     const Word16 currentIndex1, /* Q0 */
     147             :     const Word16 currentIndex2, /* Q0 */
     148             :     const Word16 nChannels      /* Q0 */
     149             : );
     150             : 
     151             : static void GivensRotation2_fx(
     152             :     const Word32 x, /* exp(x_e) */
     153             :     const Word16 x_e,
     154             :     const Word32 z, /* exp(z_e) */
     155             :     const Word16 z_e,
     156             :     Word32 *result,
     157             :     Word32 *resultInv,
     158             :     Word16 *out_e,
     159             :     Word16 *outInv_e );
     160             : 
     161             : static Word32 GivensRotation_fx(
     162             :     const Word32 x, /* exp(x_e) */
     163             :     const Word16 x_e,
     164             :     const Word32 z, /* exp(z_e) */
     165             :     const Word16 z_e,
     166             :     Word16 *out_e );
     167             : 
     168             : static void ApplyQRTransform_fx(
     169             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)                      singularValues_e*/
     170             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)        singularValues_e*/
     171             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)                     singularValues_e*/
     172             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* i/o:                                                                                  secDiag_e*/
     173             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     174             :     Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
     175             :     const Word16 startIndex,   /* i  :                                                  Q0*/
     176             :     const Word16 currentIndex, /* i  :                                                  Q0*/
     177             :     const Word16 nChannelsL,   /* i  : number of rows in the matrix to be decomposed                    Q0*/
     178             :     const Word16 nChannelsC    /* i  : number of columns in the matrix to be decomposed                 Q0*/
     179             : );
     180             : 
     181             : /*-------------------------------------------------------------------------
     182             :  * mat2svdMat()
     183             :  *
     184             :  * external matrix format to internal
     185             :  *-------------------------------------------------------------------------*/
     186             : 
     187      836340 : void mat2svdMat_fx(
     188             :     const Word32 *mat,                                       /* i  : matrix as column ordered vector    Qx*/
     189             :     Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* o  : matrix as two-dimensional arry             Qx*/
     190             :     const Word16 nRows,                                      /* i  : number of rows of the matrix               Q0*/
     191             :     const Word16 mCols,                                      /* i  : number of columns of the matrix    Q0*/
     192             :     const Word16 transpose                                   /* i  : flag indication transposition              Q0*/
     193             : )
     194             : {
     195             :     Word16 i, j;
     196             : 
     197      836340 :     IF( transpose )
     198             :     {
     199      706444 :         FOR( i = 0; i < mCols; i++ )
     200             :         {
     201     1775372 :             FOR( j = 0; j < nRows; j++ )
     202             :             {
     203     1185648 :                 svdMat[i][j] = mat[j + ( nRows * i )]; /* Qx */
     204     1185648 :                 move32();
     205             :             }
     206             : 
     207      589724 :             set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, nRows ) );
     208             :         }
     209             : 
     210     1394516 :         FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
     211             :         {
     212     1277796 :             set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
     213             :         }
     214             :     }
     215             :     ELSE
     216             :     {
     217     3069792 :         FOR( i = 0; i < nRows; i++ )
     218             :         {
     219    11768224 :             FOR( j = 0; j < mCols; j++ )
     220             :             {
     221     9418052 :                 svdMat[i][j] = mat[i + ( nRows * j )]; /* Qx */
     222     9418052 :                 move32();
     223             :             }
     224             : 
     225     2350172 :             set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, mCols ) );
     226             :         }
     227             : 
     228     9883368 :         FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
     229             :         {
     230     9163748 :             set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
     231             :         }
     232             :     }
     233             : 
     234      836340 :     return;
     235             : }
     236             : 
     237             : 
     238             : /*---------------------------------------------------------------------*
     239             :  * svdMat2mat()
     240             :  *
     241             :  * transfer a matrix from a two dimensional array  to a column wise ordered vector
     242             :  *---------------------------------------------------------------------*/
     243             : 
     244     1038400 : void svdMat2mat_fx(
     245             :     Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* i  : matrix as two-dimensional arry             Qx*/
     246             :     Word32 *mat,                                             /* o  : matrix as column ordered vector    Qx*/
     247             :     const Word16 nRows,                                      /* i  : number of rows of the matrix               Q0*/
     248             :     const Word16 mCols                                       /* i  : number of columns of the matrix    Q0*/
     249             : )
     250             : {
     251             :     Word16 i, j;
     252             : 
     253     4033332 :     FOR( i = 0; i < nRows; i++ )
     254             :     {
     255    11900708 :         FOR( j = 0; j < mCols; j++ )
     256             :         {
     257     8905776 :             mat[i + ( nRows * j )] = svdMat[i][j]; /* Qx */
     258     8905776 :             move32();
     259             :         }
     260             :     }
     261             : 
     262     1038400 :     return;
     263             : }
     264             : 
     265             : /*-------------------------------------------------------------------------
     266             :  * svd()
     267             :  *
     268             :  * perform a singular value decomposition X=USV of a matrix X
     269             :  *-------------------------------------------------------------------------*/
     270             : 
     271             : /*! r: error or success */
     272      836340 : Word16 svd_fx(
     273             :     Word32 InputMatrix[][MAX_OUTPUT_CHANNELS], /* i  : matrix to be decomposed (M)            InputMatrix_e*/
     274             :     Word16 InputMatrix_e,
     275             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* o  : left singular vectors (U)                   singularValues_fx_e*/
     276             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* o  : singular values vector (S)         singularValues_fx_e*/
     277             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* o  : right singular vectors (V)                  singularValues_fx_e*/
     278             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
     279             :     const Word16 nChannelsL, /* i  : number of rows in the matrix to be decomposed              Q0*/
     280             :     const Word16 nChannelsC  /* i  : number of columns in the matrix to be decomposed   Q0*/
     281             : )
     282             : {
     283             :     Word16 iCh, jCh;
     284             :     Word16 lengthSingularValues;
     285             :     Word16 errorMessage, condition;
     286             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS];
     287             :     Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS];
     288      836340 :     move16();
     289      836340 :     Word32 eps_x_fx = 0, temp_fx;
     290      836340 :     move16();
     291      836340 :     Word16 eps_x_fx_e = 0;
     292      836340 :     move16();
     293             :     Word16 temp_fx_e;
     294      836340 :     push_wmops( "svd_fx" );
     295             : 
     296             : 
     297             :     /* Collecting Values */
     298     3776236 :     FOR( iCh = 0; iCh < nChannelsL; iCh++ )
     299             :     {
     300    13543596 :         FOR( jCh = 0; jCh < nChannelsC; jCh++ )
     301             :         {
     302    10603700 :             singularVectors_Left_fx[iCh][jCh] = InputMatrix[iCh][jCh]; /* InputMatrix_e */
     303    10603700 :             move32();
     304             :         }
     305             :     }
     306             : 
     307             :     /* Householder reduction */
     308      836340 :     HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
     309             :     /* Set extremely small values to zero if needed */
     310             :     // flushToZeroArray(singularValues, max_length);
     311             :     // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL);
     312             :     // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC);
     313             : 
     314             :     /* BidagonalDiagonalisation */
     315      836340 :     errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
     316             :     /* Sort the singular values descending order */
     317      836340 :     lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */
     318             : 
     319             :     DO
     320             :     {
     321     1156995 :         condition = 0;
     322     1156995 :         move16();
     323     3961884 :         FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
     324             :         {
     325     2804889 :             IF( BASOP_Util_Cmp_Mant32Exp( singularValues_fx[iCh], singularValues_fx_e[iCh], singularValues_fx[iCh + 1], singularValues_fx_e[iCh + 1] ) < 0 )
     326             :             {
     327      405400 :                 condition = 1;
     328      405400 :                 move16();
     329      405400 :                 temp_fx = singularValues_fx[iCh]; /* singularValues_fx_e */
     330      405400 :                 move32();
     331      405400 :                 singularValues_fx[iCh] = singularValues_fx[iCh + 1]; /* singularValues_fx_e */
     332      405400 :                 move32();
     333      405400 :                 singularValues_fx[iCh + 1] = temp_fx; /* singularValues_fx_e */
     334      405400 :                 move32();
     335      405400 :                 temp_fx_e = singularValues_fx_e[iCh];
     336      405400 :                 move16();
     337      405400 :                 singularValues_fx_e[iCh] = singularValues_fx_e[iCh + 1];
     338      405400 :                 move16();
     339      405400 :                 singularValues_fx_e[iCh + 1] = temp_fx_e;
     340      405400 :                 move16();
     341             : 
     342     2240259 :                 FOR( jCh = 0; jCh < nChannelsL; ++jCh )
     343             :                 {
     344     1834859 :                     temp_fx = singularVectors_Left_fx[jCh][iCh]; /* singularValues_fx_e */
     345     1834859 :                     move32();
     346     1834859 :                     singularVectors_Left_fx[jCh][iCh] = singularVectors_Left_fx[jCh][iCh + 1]; /* singularValues_fx_e */
     347     1834859 :                     move32();
     348     1834859 :                     singularVectors_Left_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
     349     1834859 :                     move32();
     350             :                 }
     351             : 
     352     2234267 :                 FOR( jCh = 0; jCh < nChannelsC; ++jCh )
     353             :                 {
     354     1828867 :                     temp_fx = singularVectors_Right_fx[jCh][iCh]; /* singularValues_fx_e */
     355     1828867 :                     move32();
     356     1828867 :                     singularVectors_Right_fx[jCh][iCh] = singularVectors_Right_fx[jCh][iCh + 1]; /* singularValues_fx_e */
     357     1828867 :                     move32();
     358     1828867 :                     singularVectors_Right_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
     359     1828867 :                     move32();
     360             :                 }
     361             :             }
     362             :         }
     363             :     }
     364     1156995 :     WHILE( EQ_16( condition, 1 ) );
     365             : 
     366      836340 :     pop_wmops();
     367      836340 :     return ( errorMessage );
     368             : }
     369             : 
     370             : 
     371             : /*-----------------------------------------------------------------------*
     372             :  * Local functions
     373             :  *-----------------------------------------------------------------------*/
     374             : 
     375             : /*-------------------------------------------------------------------------
     376             :  * BidagonalDiagonalisation()
     377             :  *
     378             :  *
     379             :  *-------------------------------------------------------------------------*/
     380             : 
     381      836340 : static Word16 BidagonalDiagonalisation_fx(
     382             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)              singularValues_fx_e*/
     383             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)         singularValues_fx_e*/
     384             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)             singularValues_fx_e*/
     385             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* i/o:                                           secDiag_fx_e*/
     386             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],        /* i/o: singular values vector (S)                                                    */
     387             :     Word16 *secDiag_new_e,                                  /* i/o:                                                                                                               */
     388             :     const Word16 nChannelsL,                                /* i  : number of rows in the matrix to be decomposed               Q0*/
     389             :     const Word16 nChannelsC,                                /* i  : number of columns in the matrix to be decomposed    Q0*/
     390             :     const Word32 eps_x,                                     /* i  :                                                eps_x_e*/
     391             :     const Word16 eps_x_e                                    /* i  :                                                       */
     392             : )
     393             : {
     394             :     Word16 kCh, nCh, iCh, jCh, split;
     395             :     Word32 c, s, f1, f2;
     396             :     Word16 c_e, s_e, f1_e, f2_e;
     397      836340 :     Word16 x11_e = 0, x12_e = 0;
     398      836340 :     move16();
     399      836340 :     move16();
     400             :     Word16 temp_exp;
     401             :     Word16 temp_exp2;
     402      836340 :     Word32 g = 0;
     403      836340 :     move16();
     404      836340 :     Word16 g_e = 0;
     405      836340 :     move16();
     406             :     Word16 convergence, iteration, found_split;
     407      836340 :     Word16 error = 0;
     408      836340 :     move16();
     409             :     Word32 temp;
     410             :     Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS];
     411      836340 :     Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
     412             : 
     413     3420512 :     FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */
     414             :     {
     415     2584172 :         convergence = 0;
     416     2584172 :         move16();
     417     2584172 :         iteration = 0;
     418     2584172 :         move16();
     419     2584172 :         split = iCh - 1; /* Q0 */
     420     2584172 :         move16();
     421             : 
     422     6889234 :         WHILE( EQ_16( convergence, 0 ) )
     423             :         {
     424     4305062 :             iteration = add( iteration, 1 ); /* Q0 */
     425     4305062 :             found_split = 1;                 /* Q0 */
     426     4305062 :             move16();
     427             : 
     428     8342986 :             FOR( jCh = iCh; jCh >= 0; jCh-- )
     429             :             {
     430     8342986 :                 split = sub( jCh, 1 );                                                                                                                         /* Q0 */
     431     8342986 :                 IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( secDiag_fx[jCh] ), secDiag_new_e[jCh], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is secDiag[ch] vanishing compared to eps_x */
     432             :                 {
     433     4261956 :                     found_split = 0;
     434     4261956 :                     move16();
     435     4261956 :                     BREAK;
     436             :                 }
     437     4081030 :                 IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( singularValues_fx[split] ), singularValues_new_e[split], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
     438             :                 {
     439       43106 :                     BREAK;
     440             :                 }
     441             :             }
     442             : 
     443             :             // convergence = ( jCh == iCh ) ? 1 : 0;
     444     4305062 :             IF( EQ_16( jCh, iCh ) )
     445             :             {
     446     2584172 :                 convergence = 1; /* Q0 */
     447     2584172 :                 move16();
     448             :             }
     449             :             ELSE
     450             :             {
     451     1720890 :                 convergence = 0;
     452     1720890 :                 move16();
     453             :             }
     454             : 
     455     4305062 :             IF( found_split )
     456             :             {
     457       43106 :                 s = MAX_32;
     458       43106 :                 move32();
     459       43106 :                 s_e = 0;
     460       43106 :                 move16();
     461       43106 :                 c = 0;
     462       43106 :                 move32();
     463       43106 :                 c_e = 0;
     464       43106 :                 move16();
     465             : 
     466       86253 :                 FOR( kCh = jCh; kCh <= iCh; kCh++ )
     467             :                 {
     468       43149 :                     g = Mpy_32_32( s, secDiag_fx[kCh] ); /* exp(s_e + secDiag_new_e) */
     469       43149 :                     g_e = add( s_e, secDiag_new_e[kCh] );
     470       43149 :                     secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
     471       43149 :                     secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
     472       43149 :                     IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( g ), g_e, Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
     473             :                     {
     474           2 :                         BREAK;
     475             :                     }
     476             : 
     477       43147 :                     c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */
     478       43147 :                     c_e = singularValues_new_e[kCh];
     479       43147 :                     GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
     480       43147 :                     c = Mpy_32_32( c, temp );
     481       43147 :                     c_e = add( c_e, temp_exp );
     482       43147 :                     temp_exp2 = norm_l( c );
     483       43147 :                     c = L_shl( c, temp_exp2 );
     484       43147 :                     c_e = sub( c_e, temp_exp2 );
     485       43147 :                     s = Mpy_32_32( -g, temp );
     486       43147 :                     s_e = add( g_e, temp_exp );
     487       43147 :                     temp_exp2 = norm_l( s );
     488       43147 :                     s = L_shl( s, temp_exp2 );
     489       43147 :                     s_e = sub( s_e, temp_exp2 );
     490       43147 :                     ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */
     491             :                 }
     492             :             }
     493             : 
     494     4305062 :             IF( convergence )
     495             :             {
     496     2584172 :                 singularValues_fx[iCh] = singularValues_fx[iCh];
     497     2584172 :                 move32();
     498     2584172 :                 IF( singularValues_fx[iCh] < 0 )
     499             :                 {
     500      483797 :                     singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
     501      483797 :                     move32();
     502     1755350 :                     FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     503             :                     {
     504     1271553 :                         singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
     505     1271553 :                         move32();
     506             :                     }
     507             :                 }
     508             :             }
     509             :             ELSE
     510             :             {
     511     1720890 :                 IF( GE_16( iteration, SVD_MAX_NUM_ITERATION ) )
     512             :                 {
     513           0 :                     IF( LT_32( singularValues_fx[iCh], 0 ) )
     514             :                     {
     515           0 :                         singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
     516           0 :                         move32();
     517             : 
     518           0 :                         FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     519             :                         {
     520           0 :                             singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
     521           0 :                             move32();
     522             :                         }
     523             :                     }
     524           0 :                     error = 1;
     525           0 :                     move16();
     526           0 :                     convergence = 1;
     527           0 :                     move16();
     528             :                 }
     529             :                 ELSE
     530             :                 {
     531     1720890 :                     ApplyQRTransform_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_new_e, secDiag_new_e, jCh, iCh, nChannelsL, nChannelsC ); /* nChannelsC */
     532             :                 }
     533             :             }
     534             :         }
     535             :     }
     536             : 
     537             :     // rescaling block
     538      836340 :     Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS );
     539             : 
     540             : 
     541      836340 :     return ( error );
     542             : }
     543             : 
     544             : /*-------------------------------------------------------------------------
     545             :  * ApplyQRTransform()
     546             :  *
     547             :  *
     548             :  *-------------------------------------------------------------------------*/
     549             : 
     550     1720890 : static void ApplyQRTransform_fx(
     551             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)                                              singularValues_e*/
     552             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)                                             singularValues_e*/
     553             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)                                             singularValues_e*/
     554             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* i/o:                                                                                                          secDiag_e*/
     555             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     556             :     Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
     557             :     const Word16 startIndex,   /* i  :                                                  Q0*/
     558             :     const Word16 currentIndex, /* i  :                                                  Q0*/
     559             :     const Word16 nChannelsL,   /* i  : number of rows in the matrix to be decomposed                    Q0*/
     560             :     const Word16 nChannelsC    /* i  : number of columns in the matrix to be decomposed                 Q0*/
     561             : )
     562             : {
     563             :     Word32 temp;
     564             :     Word16 temp_e;
     565             :     Word16 temp_norm_e;
     566             :     Word16 ch, split;
     567     1720890 :     Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
     568     1720890 :     move32();
     569     1720890 :     move32();
     570     1720890 :     move32();
     571     1720890 :     move32();
     572     1720890 :     move32();
     573     1720890 :     move32();
     574     1720890 :     move32();
     575     1720890 :     move32();
     576     1720890 :     Word16 d_e = 0, g_e = 0, r_e = 0, x_ii_e = 0, x_split_e = 0, x_kk_e = 0, mu_e = 0, aux_e = 0;
     577     1720890 :     move16();
     578     1720890 :     move16();
     579     1720890 :     move16();
     580     1720890 :     move16();
     581     1720890 :     move16();
     582     1720890 :     move16();
     583     1720890 :     move16();
     584     1720890 :     move16();
     585             :     Word32 L_temp1, L_temp2, L_temp3, L_temp4;
     586             :     Word16 L_temp1_e, L_temp2_e, L_temp3_e, L_temp4_e, temp_exp;
     587     1720890 :     Word32 c = MAX_32;
     588     1720890 :     move32();
     589     1720890 :     Word16 c_e = 0;
     590     1720890 :     move16();
     591     1720890 :     Word32 s = MAX_32;
     592     1720890 :     move32();
     593     1720890 :     Word16 s_e = 0;
     594     1720890 :     move16();
     595             : 
     596     1720890 :     x_kk = singularValues[currentIndex]; /* exp(singularValues_e) */
     597     1720890 :     move32();
     598     1720890 :     x_kk_e = singularValues_e[currentIndex];
     599     1720890 :     move16();
     600     1720890 :     x_ii = singularValues[startIndex]; /* exp(singularValues_e) */
     601     1720890 :     move32();
     602     1720890 :     x_ii_e = singularValues_e[startIndex];
     603     1720890 :     move16();
     604     1720890 :     split = sub( currentIndex, 1 ); /* Q0 */
     605     1720890 :     move16();
     606             : 
     607     1720890 :     x_split = singularValues[split]; /* exp(singularValues_e) */
     608     1720890 :     move32();
     609     1720890 :     x_split_e = singularValues_e[split];
     610     1720890 :     move16();
     611     1720890 :     g = secDiag[split]; /* exp(secDiag_e) */
     612     1720890 :     move32();
     613     1720890 :     g_e = secDiag_e[split];
     614     1720890 :     move16();
     615     1720890 :     r = secDiag[currentIndex]; /* exp(secDiag_e) */
     616     1720890 :     move32();
     617     1720890 :     r_e = secDiag_e[currentIndex];
     618     1720890 :     move16();
     619             : 
     620             :     // d = (x_split + x_kk) * (x_split - x_kk) + (g + r) * (g - r);
     621     1720890 :     L_temp1 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, x_kk, x_kk_e, &L_temp1_e );                                                                           /* exp(L_temp1_e) */
     622     1720890 :     L_temp2 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, L_negate( x_kk ), x_kk_e, &L_temp2_e );                                                               /* exp(L_temp2_e) */
     623     1720890 :     L_temp3 = BASOP_Util_Add_Mant32Exp( g, g_e, r, r_e, &L_temp3_e );                                                                                             /* exp(L_temp3_e) */
     624     1720890 :     L_temp4 = BASOP_Util_Add_Mant32Exp( g, g_e, L_negate( r ), r_e, &L_temp4_e );                                                                                 /* exp(L_temp4_e) */
     625     1720890 :     d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( L_temp3, L_temp4 ), add( L_temp3_e, L_temp4_e ), &d_e ); /* exp(d_e) */
     626             : 
     627             :     // d /= maxWithSign((r + r) * x_split);
     628     1720890 :     L_temp1 = BASOP_Util_Add_Mant32Exp( r, r_e, r, r_e, &L_temp1_e ); /* exp(L_temp1_e) */
     629     1720890 :     L_temp1 = maxWithSign_fx( Mpy_32_32( L_temp1, x_split ) );        /* exp(L_temp1_e + x_split_e) */
     630     1720890 :     L_temp1_e = add( L_temp1_e, x_split_e );
     631     1720890 :     d = BASOP_Util_Divide3232_Scale_newton( d, L_temp1, &temp_exp ); /* temp_exp + d_e - L_temp1_e */
     632     1720890 :     d_e = add( temp_exp, sub( d_e, L_temp1_e ) );
     633             : 
     634     1720890 :     g = GivensRotation_fx( MAX_32, 0, d, d_e, &g_e );
     635             : 
     636             :     // mu = x_split / maxWithSign(d + (d >= 0.0f ? 1 : (-1)) * fabsf(g)) - r;
     637             :     // L_temp1 = d >= 0 ? L_abs( g ) : -L_abs( g );
     638     1720890 :     IF( d >= 0 )
     639             :     {
     640     1011431 :         L_temp1 = L_abs( g );
     641             :     }
     642             :     ELSE
     643             :     {
     644      709459 :         L_temp1 = L_negate( L_abs( g ) );
     645             :     }
     646     1720890 :     L_temp1_e = g_e;
     647     1720890 :     move16();
     648     1720890 :     L_temp2 = maxWithSign_fx( BASOP_Util_Add_Mant32Exp( d, d_e, L_temp1, L_temp1_e, &L_temp2_e ) ); /* exp(L_temp2_e) */
     649     1720890 :     mu = BASOP_Util_Divide3232_Scale_newton( x_split, L_temp2, &mu_e );                             /* exp(mu_e + (x-plit_e - L_temp2_e)) */
     650     1720890 :     mu_e = add( mu_e, sub( x_split_e, L_temp2_e ) );
     651     1720890 :     mu = BASOP_Util_Add_Mant32Exp( mu, mu_e, L_negate( r ), r_e, &mu_e ); /* exp(mu_e) */
     652             : 
     653             :     // d = ((x_ii + x_kk) * (x_ii - x_kk) + r * mu) / maxWithSign(x_ii);
     654     1720890 :     L_temp1 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, x_kk, x_kk_e, &L_temp1_e );                                                           /* exp(L_temp1_e) */
     655     1720890 :     L_temp2 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, L_negate( x_kk ), x_kk_e, &L_temp2_e );                                               /* exp(L_temp2_e) */
     656     1720890 :     d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( r, mu ), add( r_e, mu_e ), &d_e ); /* exp(d_e) */
     657     1720890 :     d = BASOP_Util_Divide3232_Scale_newton( d, maxWithSign_fx( x_ii ), &temp_exp );                                                         /* exp(temp_exp + (d_e - x_ii_e) */
     658     1720890 :     d_e = add( temp_exp, sub( d_e, x_ii_e ) );
     659             : 
     660             :     /*QR transformation*/
     661     5758814 :     FOR( ch = startIndex; ch <= split; ch++ )
     662             :     {
     663     4037924 :         r = Mpy_32_32( s, secDiag[ch + 1] ); /* exp(s_e + secDiag_e) */
     664     4037924 :         r_e = add( s_e, secDiag_e[ch + 1] );
     665     4037924 :         g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */
     666     4037924 :         g_e = add( c_e, secDiag_e[ch + 1] );
     667             : 
     668     4037924 :         GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
     669     4037924 :         c = Mpy_32_32( d, temp );
     670     4037924 :         c_e = add( temp_e, d_e );
     671     4037924 :         temp_norm_e = norm_l( c );
     672     4037924 :         c = L_shl( c, temp_norm_e );
     673     4037924 :         c_e = sub( c_e, temp_norm_e );
     674     4037924 :         s = Mpy_32_32( r, temp );
     675     4037924 :         s_e = add( r_e, temp_e );
     676     4037924 :         temp_norm_e = norm_l( s );
     677     4037924 :         s = L_shl( s, temp_norm_e );
     678     4037924 :         s_e = sub( s_e, temp_norm_e );
     679     4037924 :         r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */
     680     4037924 :         r_e = add( s_e, singularValues_e[ch + 1] );
     681     4037924 :         x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */
     682     4037924 :         x_split_e = add( c_e, singularValues_e[ch + 1] );
     683             : 
     684     4037924 :         aux = g; /* exp(g_e) */
     685     4037924 :         move32();
     686     4037924 :         aux_e = g_e;
     687     4037924 :         move16();
     688             : 
     689             :         // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC);
     690     4037924 :         ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC );
     691             : 
     692     4037924 :         GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */
     693     4037924 :         IF( singularValues[ch] != 0 )
     694             :         {
     695             : 
     696     4037924 :             c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
     697     4037924 :             c_e = add( d_e, aux_e );
     698     4037924 :             temp_norm_e = norm_l( c );
     699     4037924 :             c = L_shl( c, temp_norm_e );
     700     4037924 :             c_e = sub( c_e, temp_norm_e );
     701             : 
     702     4037924 :             s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */
     703     4037924 :             s_e = add( r_e, aux_e );
     704     4037924 :             temp_norm_e = norm_l( s );
     705     4037924 :             s = L_shl( s, temp_norm_e );
     706     4037924 :             s_e = sub( s_e, temp_norm_e );
     707             :         }
     708             : 
     709             :         // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL);
     710     4037924 :         ApplyRotation_fx( singularVectors_Left, c, c_e, s, s_e, g, g_e, x_split, x_split_e, &d, &d_e, &x_ii, &x_ii_e, ch + 1, ch, nChannelsL );
     711             :     }
     712             : 
     713     1720890 :     secDiag[startIndex] = 0;
     714     1720890 :     move32();
     715     1720890 :     secDiag[currentIndex] = d; /* exp(d_e) */
     716     1720890 :     move32();
     717     1720890 :     secDiag_e[currentIndex] = d_e;
     718     1720890 :     move16();
     719     1720890 :     singularValues[currentIndex] = x_ii; /* exp(x_ii_e) */
     720     1720890 :     move32();
     721     1720890 :     singularValues_e[currentIndex] = x_ii_e;
     722     1720890 :     move16();
     723             : 
     724     1720890 :     return;
     725             : }
     726             : 
     727             : /*-------------------------------------------------------------------------
     728             :  * ApplyRotation()
     729             :  *
     730             :  *
     731             :  *-------------------------------------------------------------------------*/
     732             : 
     733     8118995 : static void ApplyRotation_fx(
     734             :     Word32 singularVector[][MAX_OUTPUT_CHANNELS],
     735             :     const Word32 c, /* exp(c_e)*/
     736             :     const Word16 c_e,
     737             :     const Word32 s, /* exp(s_e) */
     738             :     const Word16 s_e,
     739             :     Word32 x11, /* exp(x11_e) */
     740             :     Word16 x11_e,
     741             :     Word32 x12, /* exp(x12_e) */
     742             :     Word16 x12_e,
     743             :     Word32 *d, /* exp(d_e) */
     744             :     Word16 *d_e,
     745             :     Word32 *g, /* exp(g_e) */
     746             :     Word16 *g_e,
     747             :     const Word16 currentIndex1, /* Q0 */
     748             :     const Word16 currentIndex2, /* Q0 */
     749             :     const Word16 nChannels      /* Q0 */
     750             : )
     751             : {
     752             :     Word16 ch;
     753             : 
     754     8118995 :     *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */
     755     8118995 :     move32();
     756     8118995 :     *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */
     757     8118995 :     move32();
     758             : 
     759     8118995 :     Word16 c_q = sub( 31, c_e );
     760     8118995 :     Word16 s_q = sub( 31, s_e );
     761             :     Word32 op1, op2;
     762             :     Word16 op_e;
     763             : 
     764             :     // Bring c and s to same Q
     765     8118995 :     IF( GT_16( c_q, s_q ) )
     766             :     {
     767      713848 :         op1 = L_shr( c, sub( c_q, s_q ) );
     768      713848 :         op2 = s;
     769      713848 :         move32();
     770      713848 :         op_e = s_q;
     771      713848 :         move16();
     772             :     }
     773             :     ELSE
     774             :     {
     775     7405147 :         op1 = c;
     776     7405147 :         move32();
     777     7405147 :         op2 = L_shr( s, sub( s_q, c_q ) );
     778     7405147 :         op_e = c_q;
     779     7405147 :         move16();
     780             :     }
     781     8118995 :     op_e = add( op_e, 1 ); // 64 bit mac -> +1
     782             : 
     783    47765580 :     FOR( ch = 0; ch < nChannels; ch++ )
     784             :     {
     785    39646585 :         x11 = singularVector[ch][currentIndex2];
     786    39646585 :         move32();
     787    39646585 :         x12 = singularVector[ch][currentIndex1];
     788    39646585 :         move32();
     789             : 
     790    39646585 :         Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
     791    39646585 :         temp = W_shr( temp, op_e );                                      // Q(singularVector)
     792    39646585 :         singularVector[ch][currentIndex2] = W_sat_l( temp );             // Q(singularVector)
     793    39646585 :         move32();
     794             : 
     795    39646585 :         temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
     796    39646585 :         temp = W_shr( temp, op_e );                                           // Q(singularVector)
     797    39646585 :         singularVector[ch][currentIndex1] = W_sat_l( temp );                  // Q(singularVector)
     798    39646585 :         move32();
     799             :     }
     800             : 
     801     8118995 :     return;
     802             : }
     803             : 
     804             : /*-------------------------------------------------------------------------
     805             :  * HouseholderReduction()
     806             :  *
     807             :  *
     808             :  *-------------------------------------------------------------------------*/
     809             : 
     810      836340 : static void HouseholderReduction_fx(
     811             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
     812             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* exp(singularValues_fx_e) */
     813             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
     814             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_fx_e) */
     815             :     Word16 singularVectors_Left_e,
     816             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
     817             :     Word16 *secDiag_fx_e,
     818             :     const Word16 nChannelsL, /* Q0 */
     819             :     const Word16 nChannelsC, /* Q0 */
     820             :     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
     821             :     Word16 *eps_x_fx_e )
     822             : {
     823             :     Word16 nCh;
     824             :     // float g = 0.0f, sig_x = 0.0f;// to be removed
     825      836340 :     Word32 g_fx = 0, sig_x_fx = 0;
     826      836340 :     move32();
     827      836340 :     move32();
     828      836340 :     Word16 sig_x_fx_e = 0;
     829      836340 :     move16();
     830             : 
     831             :     Word16 iCh, jCh;
     832             :     Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     833     3776236 :     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
     834             :     {
     835    13543596 :         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
     836             :         {
     837    10603700 :             singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
     838    10603700 :             move16();
     839             :         }
     840             :     }
     841             : 
     842             :     /* Bidiagonal Reduction for every channel */
     843     3420512 :     FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     844             :     {
     845     2584172 :         biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
     846     2584172 :         biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
     847             : 
     848             :         Word16 L_temp_e;
     849     2584172 :         Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
     850     2584172 :         IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
     851             :         {
     852     1204856 :             *eps_x_fx = L_temp; /* exp(L_temp_e) */
     853     1204856 :             move32();
     854     1204856 :             *eps_x_fx_e = L_temp_e;
     855     1204856 :             move32();
     856             :         }
     857             :     }
     858             : 
     859             :     /* SingularVecotr Accumulation */
     860      836340 :     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
     861             : 
     862             : 
     863      836340 :     singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
     864             : 
     865      836340 :     return;
     866             : }
     867             : 
     868             : /*-------------------------------------------------------------------------
     869             :  * biDiagonalReductionLeft()
     870             :  *
     871             :  *
     872             :  *-------------------------------------------------------------------------*/
     873             : 
     874     2584172 : static void biDiagonalReductionLeft_fx(
     875             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
     876             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
     877             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
     878             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
     879             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     880             :     Word16 *secDiag_e,
     881             :     const Word16 nChannelsL,  /* Q0 */
     882             :     const Word16 nChannelsC,  /* Q0 */
     883             :     const Word16 currChannel, /* Q0 */
     884             :     Word32 *sig_x,            /* exp(sig_x_e) */
     885             :     Word16 *sig_x_e,
     886             :     Word32 *g /* Q31 */
     887             : )
     888             : {
     889             :     Word16 iCh, jCh, idx;
     890             :     Word32 norm_x, f, r;
     891             :     Word16 norm_x_e, f_e, r_e;
     892             :     Word32 L_temp;
     893             :     Word16 L_temp_e;
     894             : 
     895     2584172 :     secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */
     896     2584172 :     move32();
     897     2584172 :     secDiag_e[currChannel] = *sig_x_e;
     898     2584172 :     move16();
     899             : 
     900             :     /* Setting values to 0 */
     901     2584172 :     ( *sig_x ) = 0;
     902     2584172 :     move32();
     903     2584172 :     ( *g ) = 0;
     904     2584172 :     move32();
     905             : 
     906     2584172 :     IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
     907             :     {
     908     2584172 :         idx = currChannel;
     909     2584172 :         move16();
     910             : 
     911     9536092 :         FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     912             :         {
     913     6951920 :             ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
     914             :         }
     915             : 
     916     2584172 :         IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
     917             :         {
     918             :             Word16 invVal_e;
     919             :             Word32 invVal;
     920     2458066 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
     921     2458066 :             norm_x = 0;
     922     2458066 :             move32();
     923     2458066 :             norm_x_e = 0;
     924     2458066 :             move16();
     925     9278973 :             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     926             :             {
     927     6820907 :                 Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
     928     6820907 :                 singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
     929     6820907 :                 move32();
     930     6820907 :                 singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
     931     6820907 :                 move16();
     932     6820907 :                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
     933             :             }
     934     2458066 :             IF( GT_16( norm_x_e, 0 ) )
     935             :             {
     936      418400 :                 norm_x = MAX_32;
     937      418400 :                 move32();
     938      418400 :                 norm_x_e = 0;
     939      418400 :                 move16();
     940             :             }
     941     2458066 :             L_temp_e = norm_x_e;
     942     2458066 :             move16();
     943     2458066 :             L_temp = Sqrt32( norm_x, &L_temp_e );
     944     2458066 :             L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
     945             :                                                   //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
     946     2458066 :             if ( singularVectors[currChannel][idx] >= 0 )
     947             :             {
     948     1383415 :                 L_temp = L_negate( L_temp );
     949             :             }
     950     2458066 :             ( *g ) = L_temp;
     951     2458066 :             move32();
     952             : 
     953     2458066 :             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
     954     2458066 :             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
     955     2458066 :             move32();
     956             : 
     957     2458066 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
     958             : 
     959     6106501 :             FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
     960             :             {
     961     3648435 :                 norm_x = 0;
     962     3648435 :                 move32();
     963     3648435 :                 norm_x_e = 0;
     964     3648435 :                 move16();
     965    18090665 :                 FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     966             :                 {
     967    14442230 :                     norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
     968             :                 }
     969             : 
     970     3648435 :                 f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
     971     3648435 :                 f_e = add( invVal_e, sub( norm_x_e, r_e ) );
     972             : 
     973    18090665 :                 FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     974             :                 {
     975    14442230 :                     singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
     976    14442230 :                     move32();
     977             :                 }
     978             :             }
     979             : 
     980             : 
     981     9278973 :             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     982             :             {
     983     6820907 :                 singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); /* sing_exp + sig_x_e */
     984     6820907 :                 move32();
     985     6820907 :                 singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e );
     986     6820907 :                 move16();
     987             :             }
     988             :         }
     989             : 
     990             :         // rescaling block
     991     2584172 :         singularValues[currChannel] = Mpy_32_32( ( *sig_x ), ( *g ) ); /* sig_x_e */
     992     2584172 :         move32();
     993     2584172 :         singularValues_e[currChannel] = *sig_x_e;
     994     2584172 :         move16();
     995             :     }
     996             : 
     997     2584172 :     return;
     998             : }
     999             : 
    1000             : /*-------------------------------------------------------------------------
    1001             :  * biDiagonalReductionRight()
    1002             :  *
    1003             :  *
    1004             :  *-------------------------------------------------------------------------*/
    1005             : 
    1006     2584172 : static void biDiagonalReductionRight_fx(
    1007             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    1008             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_exp[]) */
    1009             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
    1010             :     Word16 *secDiag_exp,
    1011             :     const Word16 nChannelsL,  /* Q0 */
    1012             :     const Word16 nChannelsC,  /* Q0 */
    1013             :     const Word16 currChannel, /* Q0 */
    1014             :     Word32 *sig_x,            /* exp(sig_x_e) */
    1015             :     Word16 *sig_x_e,
    1016             :     Word32 *g /* Q31 */
    1017             : )
    1018             : {
    1019             :     Word16 iCh, jCh, idx;
    1020             :     Word32 norm_x, r;
    1021             :     Word16 norm_x_e, r_e;
    1022             :     Word32 L_temp;
    1023             :     Word16 L_temp_e;
    1024             : 
    1025             :     /* Setting values to 0 */
    1026     2584172 :     ( *sig_x ) = 0;
    1027     2584172 :     move32();
    1028     2584172 :     ( *g ) = 0;
    1029     2584172 :     move32();
    1030             : 
    1031     2584172 :     IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
    1032             :     {
    1033     1747832 :         idx = add( currChannel, 1 ); /* Q0 */
    1034             : 
    1035     5399612 :         FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1036             :         {
    1037     3651780 :             ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), singularVectors2_e[currChannel][jCh], sig_x_e ); /* exp(sig_x_e) */
    1038             :         }
    1039             : 
    1040     1747832 :         IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
    1041             :         {
    1042     1636776 :             norm_x = 0;
    1043     1636776 :             move32();
    1044     1636776 :             norm_x_e = 0;
    1045     1636776 :             move16();
    1046             : 
    1047             :             Word16 invVal_e, temp_e;
    1048             :             Word32 invVal;
    1049     1636776 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
    1050     5176587 :             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
    1051             :             {
    1052     3539811 :                 temp_e = norm_l( singularVectors[currChannel][jCh] );
    1053     3539811 :                 singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
    1054     3539811 :                 move32();
    1055     3539811 :                 singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
    1056     3539811 :                 move16();
    1057     3539811 :                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
    1058             :             }
    1059     1636776 :             IF( GT_16( norm_x_e, 0 ) )
    1060             :             {
    1061      522217 :                 norm_x = MAX_32;
    1062      522217 :                 move32();
    1063      522217 :                 norm_x_e = 0;
    1064      522217 :                 move16();
    1065             :             }
    1066     1636776 :             L_temp_e = norm_x_e;
    1067     1636776 :             move16();
    1068     1636776 :             L_temp = Sqrt32( norm_x, &L_temp_e );
    1069     1636776 :             L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
    1070     1636776 :             IF( singularVectors[currChannel][idx] >= 0 )
    1071             :             {
    1072      612349 :                 ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
    1073      612349 :                 move32();
    1074             :             }
    1075             :             ELSE
    1076             :             {
    1077     1024427 :                 ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */
    1078     1024427 :                 move32();
    1079             :             }
    1080             : 
    1081     1636776 :             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
    1082     1636776 :             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* exp(sing_exp) */
    1083     1636776 :             move32();
    1084             : 
    1085     1636776 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
    1086             : 
    1087     5176587 :             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1088             :             {
    1089     3539811 :                 temp_e = norm_l( singularVectors[currChannel][jCh] );
    1090     3539811 :                 secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
    1091     3539811 :                 move32();
    1092     3539811 :                 secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) );
    1093     3539811 :                 move16();
    1094             :             }
    1095             : 
    1096     5535749 :             FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
    1097             :             {
    1098     3898973 :                 norm_x = 0;
    1099     3898973 :                 move32();
    1100     3898973 :                 norm_x_e = 0;
    1101     3898973 :                 move16();
    1102    14583542 :                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1103             :                 {
    1104    10684569 :                     norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
    1105             :                 }
    1106             : 
    1107    14583542 :                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
    1108             :                 {
    1109    10684569 :                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
    1110    10684569 :                     move32();
    1111             :                 }
    1112             :             }
    1113             : 
    1114     5176587 :             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
    1115             :             {
    1116     3539811 :                 singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */
    1117     3539811 :                 move32();
    1118     3539811 :                 singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e );
    1119     3539811 :                 move16();
    1120             :             }
    1121             :         }
    1122             :     }
    1123             : 
    1124     2584172 :     return;
    1125             : }
    1126             : 
    1127             : /*-------------------------------------------------------------------------
    1128             :  * singularVectorsAccumulationLeft()
    1129             :  *
    1130             :  *
    1131             :  *-------------------------------------------------------------------------*/
    1132             : 
    1133      836340 : static void singularVectorsAccumulationLeft_fx(
    1134             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
    1135             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
    1136             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
    1137             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    1138             :     const Word16 nChannelsL, /* Q0 */
    1139             :     const Word16 nChannelsC  /* Q0 */
    1140             : )
    1141             : {
    1142             :     Word16 nCh, iCh, k;
    1143             :     Word16 nChannels;
    1144             :     Word32 norm_y, t_jj, t_ii;
    1145             :     Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp;
    1146             : 
    1147             :     /* Processing */
    1148      836340 :     nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/
    1149             :     // FILE *fp = fopen("t_ii_out.txt","a");
    1150     3420512 :     FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* min(nChannelsL,ChannelsC) */
    1151             :     {
    1152     2584172 :         t_ii = singularValues[nCh]; /* exp(singularValues_e) */
    1153     2584172 :         move32();
    1154     2584172 :         t_ii_e = singularValues_e[nCh];
    1155     2584172 :         move16();
    1156             : 
    1157     6235952 :         FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1158             :         {
    1159     3651780 :             singularVectors_Left[nCh][iCh] = 0;
    1160     3651780 :             move32();
    1161             :         }
    1162             : 
    1163     2584172 :         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
    1164             :         {
    1165     2458066 :             t_ii = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
    1166     2458066 :             t_ii_e = sub( temp_exp, t_ii_e );
    1167             :             Word16 tempe;
    1168     2458066 :             Word32 temp = BASOP_Util_Divide3232_Scale_newton( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
    1169     2458066 :             tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
    1170             :             // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
    1171     6106501 :             FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1172             :             {
    1173     3648435 :                 Word64 acc = 0;
    1174     3648435 :                 move64();
    1175             :                 Word64 prod[16];
    1176             :                 Word16 prod_e[16];
    1177     3648435 :                 Word16 max_e = -31;
    1178     3648435 :                 move16();
    1179    14442230 :                 FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
    1180             :                 {
    1181    10793795 :                     prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
    1182    10793795 :                     prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
    1183    10793795 :                     max_e = s_max( max_e, prod_e[k] );
    1184             :                 }
    1185             : 
    1186    14442230 :                 FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
    1187             :                 {
    1188    10793795 :                     acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) );
    1189             :                 }
    1190     3648435 :                 Word16 acc_e = W_norm( acc );
    1191     3648435 :                 acc = W_shl( acc, acc_e );
    1192             : 
    1193     3648435 :                 norm_y = W_extract_h( acc );
    1194     3648435 :                 norm_y_e = add( sub( max_e, acc_e ), 1 );
    1195     3648435 :                 t_jj = Mpy_32_32( temp, norm_y );
    1196     3648435 :                 t_jj_e = add( tempe, norm_y_e );
    1197    18090665 :                 FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
    1198             :                 {
    1199    14442230 :                     singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */
    1200    14442230 :                     move32();
    1201             :                 }
    1202             :             }
    1203             : 
    1204     9278973 :             FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
    1205             :             {
    1206     6820907 :                 singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */
    1207     6820907 :                 move32();
    1208     6820907 :                 singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e );
    1209     6820907 :                 move16();
    1210             :             }
    1211             :         }
    1212             :         ELSE
    1213             :         {
    1214      257119 :             FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
    1215             :             {
    1216      131013 :                 singularVectors_Left[iCh][nCh] = 0;
    1217      131013 :                 move32();
    1218             :             }
    1219             :         }
    1220     2584172 :         singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */
    1221     2584172 :         move32();
    1222             :     }
    1223             :     // fclose(fp);
    1224     3776236 :     FOR( nCh = 0; nCh < nChannelsL; nCh++ )
    1225             :     {
    1226    13543596 :         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
    1227             :         {
    1228    10603700 :             singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
    1229    10603700 :             move32();
    1230             :         }
    1231             :     }
    1232             : 
    1233      836340 :     return;
    1234             : }
    1235             : 
    1236             : /*-------------------------------------------------------------------------
    1237             :  * singularVectorsAccumulationRight()
    1238             :  *
    1239             :  *
    1240             :  *-------------------------------------------------------------------------*/
    1241             : 
    1242      836340 : static void singularVectorsAccumulationRight_fx(
    1243             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
    1244             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
    1245             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
    1246             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
    1247             :     Word16 *secDiag_e,
    1248             :     const Word16 nChannelsC /* Q0 */
    1249             : )
    1250             : {
    1251             :     Word16 nCh, iCh, k;
    1252             :     Word16 nChannels;
    1253             :     Word32 norm_y, t_ii, ratio_float;
    1254      836340 :     Word16 norm_y_e, temp_exp1, sing_right_exp[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
    1255             : 
    1256             :     /* Processing */
    1257      836340 :     nChannels = nChannelsC; /* nChannelsC       Q0*/
    1258             : 
    1259             :     /* avoid compiler warning */
    1260      836340 :     t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */
    1261      836340 :     move32();
    1262             : 
    1263     3420512 :     FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */
    1264             :     {
    1265             : 
    1266     2584172 :         IF( LT_16( nCh, sub( nChannelsC, 1 ) ) ) /* nChannelsC */
    1267             :         {
    1268     1747832 :             IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
    1269             :             {
    1270             : 
    1271     5176587 :                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
    1272             :                 {
    1273     3539811 :                     ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
    1274     3539811 :                     singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
    1275     3539811 :                     temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
    1276     3539811 :                     move32();
    1277     3539811 :                     sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) );
    1278     3539811 :                     move16();
    1279             :                     // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 );
    1280             :                 }
    1281             : 
    1282     5176587 :                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1283             :                 {
    1284     3539811 :                     norm_y = 0;
    1285     3539811 :                     move32();
    1286     3539811 :                     norm_y_e = 0;
    1287     3539811 :                     move16();
    1288             : 
    1289    13860698 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1290             :                     {
    1291    10320887 :                         norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
    1292             :                     }
    1293             : 
    1294    13860698 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1295             :                     {
    1296    10320887 :                         singularVectors_Right[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Right[k][iCh], sing_right_exp[k][iCh], Mpy_32_32( norm_y, singularVectors_Right[k][nCh] ), add( norm_y_e, sing_right_exp[k][nCh] ), &sing_right_exp[k][iCh] ); /* exp(sing_right_exp) */
    1297    10320887 :                         move32();
    1298    10320887 :                         singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], sing_right_exp[k][iCh] ); /* Q31 */
    1299    10320887 :                         move32();
    1300    10320887 :                         sing_right_exp[k][iCh] = 0;
    1301    10320887 :                         move16();
    1302             :                     }
    1303             :                 }
    1304             :             }
    1305             : 
    1306     5399612 :             FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1307             :             {
    1308     3651780 :                 singularVectors_Right[nCh][iCh] = singularVectors_Right[iCh][nCh] = 0;
    1309     3651780 :                 move32();
    1310     3651780 :                 move32();
    1311             :             }
    1312             :         }
    1313     2584172 :         singularVectors_Right[nCh][nCh] = MAX_32;
    1314     2584172 :         move32();
    1315     2584172 :         t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */
    1316     2584172 :         move32();
    1317             :     }
    1318      836340 :     return;
    1319             : }
    1320             : 
    1321             : /*-------------------------------------------------------------------------
    1322             :  * GivensRotation()
    1323             :  *
    1324             :  *
    1325             :  *-------------------------------------------------------------------------*/
    1326             : 
    1327             : 
    1328     8118995 : static void GivensRotation2_fx(
    1329             :     const Word32 x, /* exp(x_e) */
    1330             :     const Word16 x_e,
    1331             :     const Word32 z, /* exp(z_e) */
    1332             :     const Word16 z_e,
    1333             :     Word32 *result,
    1334             :     Word32 *resultInv,
    1335             :     Word16 *out_e,
    1336             :     Word16 *outInv_e )
    1337             : {
    1338             :     Word32 r;
    1339     8118995 :     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
    1340     8118995 :     r = L_max( r, 1 );
    1341     8118995 :     *outInv_e = *out_e;
    1342     8118995 :     move16();
    1343     8118995 :     *result = Sqrt32( r, out_e );
    1344     8118995 :     move32();
    1345             : 
    1346     8118995 :     *resultInv = ISqrt32( r, outInv_e );
    1347     8118995 :     move32();
    1348     8118995 : }
    1349             : 
    1350     1720890 : static Word32 GivensRotation_fx(
    1351             :     const Word32 x, /* exp(x_e) */
    1352             :     const Word16 x_e,
    1353             :     const Word32 z, /* exp(z_e) */
    1354             :     const Word16 z_e,
    1355             :     Word16 *out_e )
    1356             : {
    1357             :     Word32 r;
    1358             : 
    1359     1720890 :     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
    1360     1720890 :     r = Sqrt32( r, out_e );
    1361     1720890 :     return ( r );
    1362             : }
    1363             : 
    1364             : /*-------------------------------------------------------------------------
    1365             :  * maxWithSign()
    1366             :  *
    1367             :  *
    1368             :  *-------------------------------------------------------------------------*/
    1369             : 
    1370    25348108 : static Word32 maxWithSign_fx(
    1371             :     const Word32 a /* Qx */
    1372             : )
    1373             : {
    1374             :     Word32 result;
    1375    25348108 :     IF( a >= 0 )
    1376             :     {
    1377    13220820 :         result = L_max( a, SVD_MINIMUM_VALUE_FX );
    1378             :     }
    1379             :     ELSE
    1380             :     {
    1381    12127288 :         result = L_min( a, -SVD_MINIMUM_VALUE_FX );
    1382             :     }
    1383    25348108 :     return result;
    1384             : }
    1385             : 
    1386             : /*-------------------------------------------------------------------------
    1387             :  * flushToZeroArray()
    1388             :  *
    1389             :  *
    1390             :  *-------------------------------------------------------------------------*/
    1391             : 
    1392             : 
    1393             : /*-------------------------------------------------------------------------
    1394             :  * flushToZeroMat()
    1395             :  *
    1396             :  *
    1397             :  *-------------------------------------------------------------------------*/

Generated by: LCOV version 1.14