LCOV - code coverage report
Current view: top level - lib_dec - ivas_svd_dec_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main -- dec/rend @ 633e3f2e309758d10805ef21e0436356fe719b7a Lines: 566 576 98.3 %
Date: 2025-08-23 01:22:27 Functions: 14 14 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : #include <stdint.h>
      33             : #include "options.h"
      34             : #include "prot_fx.h"
      35             : #include "ivas_stat_dec.h"
      36             : #include "ivas_cnst.h"
      37             : #include <math.h>
      38             : #include "wmc_auto.h"
      39             : #include "ivas_prot_fx.h"
      40             : 
      41             : /*-----------------------------------------------------------------------*
      42             :  * Local constants
      43             :  *-----------------------------------------------------------------------*/
      44             : 
      45             : /* The SVD is sensitive to changes to the following constants, so please be careful when trying to tune things   */
      46             : #define SVD_MAX_NUM_ITERATION       75    /* maximum number of interations before exiting the SVD            */
      47             : #define SVD_MINIMUM_VALUE_FX        ( 2 ) /* minimum value                                                   */
      48             : #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 )
      49             : #define CONVERGENCE_FACTOR_FX       214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
      50             : 
      51             : /*-----------------------------------------------------------------------*
      52             :  * Local function prototypes
      53             :  *-----------------------------------------------------------------------*/
      54             : 
      55             : static void HouseholderReduction_fx(
      56             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
      57             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* exp(singularValues_fx_e) */
      58             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
      59             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_fx_e) */
      60             :     Word16 singularVectors_Left_e,
      61             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
      62             :     Word16 *secDiag_fx_e,
      63             :     const Word16 nChannelsL, /* Q0 */
      64             :     const Word16 nChannelsC, /* Q0 */
      65             :     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
      66             :     Word16 *eps_x_fx_e );
      67             : #ifdef NONBE_SVD_OPTIMIZATION
      68             : 
      69             : static void biDiagonalReductionLeft_fx(
      70             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_e) */
      71             :     Word16 singularValues_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
      72             :     const Word16 nChannelsL,
      73             :     const Word16 nChannelsC,  /* Q0 */
      74             :     const Word16 currChannel, /* Q0 */
      75             :     Word32 *g,
      76             :     Word16 *g_e );
      77             : 
      78             : static void biDiagonalReductionRight_fx(
      79             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
      80             :     Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS],
      81             :     const Word16 nChannelsL,  /* Q0 */
      82             :     const Word16 nChannelsC,  /* Q0 */
      83             :     const Word16 currChannel, /* Q0 */
      84             :     Word32 *g,
      85             :     Word16 *g_e );
      86             : #else
      87             : static void biDiagonalReductionLeft_fx(
      88             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
      89             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
      90             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
      91             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
      92             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
      93             :     Word16 *secDiag_e,
      94             :     const Word16 nChannelsL,  /* Q0 */
      95             :     const Word16 nChannelsC,  /* Q0 */
      96             :     const Word16 currChannel, /* Q0 */
      97             :     Word32 *sig_x,            /* exp(sig_x_e) */
      98             :     Word16 *sig_x_e,
      99             :     Word32 *g /* Q31 */
     100             : );
     101             : 
     102             : static void biDiagonalReductionRight_fx(
     103             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
     104             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
     105             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
     106             :     Word16 *secDiag_e,
     107             :     const Word16 nChannelsL,  /* Q0 */
     108             :     const Word16 nChannelsC,  /* Q0 */
     109             :     const Word16 currChannel, /* Q0 */
     110             :     Word32 *sig_x,            /* exp(sig_x_e) */
     111             :     Word16 *sig_x_e,
     112             :     Word32 *g /* Q31 */
     113             : );            // Q31
     114             : #endif
     115             : 
     116             : static void singularVectorsAccumulationLeft_fx(
     117             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
     118             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
     119             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
     120             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     121             :     const Word16 nChannelsL, /* Q0 */
     122             :     const Word16 nChannelsC  /* Q0 */
     123             : );
     124             : 
     125             : static void singularVectorsAccumulationRight_fx(
     126             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* singularVectors_e */
     127             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
     128             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
     129             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
     130             :     Word16 *secDiag_e,
     131             :     const Word16 nChannelsC /* Q0 */
     132             : );
     133             : 
     134             : static Word32 maxWithSign_fx(
     135             :     const Word32 a /* Qx */
     136             : );
     137             : 
     138             : static Word16 BidagonalDiagonalisation_fx(
     139             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)                   singularValues_fx_e*/
     140             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)         singularValues_fx_e*/
     141             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)                  singularValues_fx_e*/
     142             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* i/o:                                           secDiag_fx_e*/
     143             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],        /* i/o: singular values vector (S)                                                    */
     144             :     Word16 *secDiag_fx_e,                                   /* i/o:                                                                                                               */
     145             :     const Word16 nChannelsL,                                /* i  : number of rows in the matrix to be decomposed               Q0*/
     146             :     const Word16 nChannelsC,                                /* i  : number of columns in the matrix to be decomposed    Q0*/
     147             :     const Word32 eps_x,                                     /* i  :                                                eps_x_e*/
     148             :     const Word16 eps_x_e                                    /* i  :                                                       */
     149             : );
     150             : 
     151             : static void ApplyRotation_fx(
     152             :     Word32 singularVector[][MAX_OUTPUT_CHANNELS],
     153             :     const Word32 c, /* exp(c_e)*/
     154             :     const Word16 c_e,
     155             :     const Word32 s, /* exp(s_e) */
     156             :     const Word16 s_e,
     157             :     Word32 x11, /* exp(x11_e) */
     158             :     Word16 x11_e,
     159             :     Word32 x12, /* exp(x12_e) */
     160             :     Word16 x12_e,
     161             :     Word32 *d, /* exp(d_e) */
     162             :     Word16 *d_e,
     163             :     Word32 *g, /* exp(g_e) */
     164             :     Word16 *g_e,
     165             :     const Word16 currentIndex1, /* Q0 */
     166             :     const Word16 currentIndex2, /* Q0 */
     167             :     const Word16 nChannels      /* Q0 */
     168             : );
     169             : 
     170             : static void GivensRotation2_fx(
     171             :     const Word32 x, /* exp(x_e) */
     172             :     const Word16 x_e,
     173             :     const Word32 z, /* exp(z_e) */
     174             :     const Word16 z_e,
     175             :     Word32 *result,
     176             :     Word32 *resultInv,
     177             :     Word16 *out_e,
     178             :     Word16 *outInv_e );
     179             : 
     180             : static Word32 GivensRotation_fx(
     181             :     const Word32 x, /* exp(x_e) */
     182             :     const Word16 x_e,
     183             :     const Word32 z, /* exp(z_e) */
     184             :     const Word16 z_e,
     185             :     Word16 *out_e );
     186             : 
     187             : static void ApplyQRTransform_fx(
     188             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)                      singularValues_e*/
     189             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)        singularValues_e*/
     190             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)                     singularValues_e*/
     191             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* i/o:                                                                                  secDiag_e*/
     192             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     193             :     Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
     194             :     const Word16 startIndex,   /* i  :                                                  Q0*/
     195             :     const Word16 currentIndex, /* i  :                                                  Q0*/
     196             :     const Word16 nChannelsL,   /* i  : number of rows in the matrix to be decomposed                    Q0*/
     197             :     const Word16 nChannelsC    /* i  : number of columns in the matrix to be decomposed                 Q0*/
     198             : );
     199             : 
     200             : /*-------------------------------------------------------------------------
     201             :  * mat2svdMat()
     202             :  *
     203             :  * external matrix format to internal
     204             :  *-------------------------------------------------------------------------*/
     205             : 
     206      927239 : void mat2svdMat_fx(
     207             :     const Word32 *mat,                                       /* i  : matrix as column ordered vector    Qx*/
     208             :     Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* o  : matrix as two-dimensional arry             Qx*/
     209             :     const Word16 nRows,                                      /* i  : number of rows of the matrix               Q0*/
     210             :     const Word16 mCols,                                      /* i  : number of columns of the matrix    Q0*/
     211             :     const Word16 transpose                                   /* i  : flag indication transposition              Q0*/
     212             : )
     213             : {
     214             :     Word16 i, j;
     215             : 
     216      927239 :     IF( transpose )
     217             :     {
     218      830472 :         FOR( i = 0; i < mCols; i++ )
     219             :         {
     220     2095225 :             FOR( j = 0; j < nRows; j++ )
     221             :             {
     222     1401140 :                 svdMat[i][j] = mat[j + ( nRows * i )]; /* Qx */
     223     1401140 :                 move32();
     224             :             }
     225             : 
     226      694085 :             set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, nRows ) );
     227             :         }
     228             : 
     229     1624494 :         FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
     230             :         {
     231     1488107 :             set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
     232             :         }
     233             :     }
     234             :     ELSE
     235             :     {
     236     3455181 :         FOR( i = 0; i < nRows; i++ )
     237             :         {
     238    13667732 :             FOR( j = 0; j < mCols; j++ )
     239             :             {
     240    11003403 :                 svdMat[i][j] = mat[i + ( nRows * j )]; /* Qx */
     241    11003403 :                 move32();
     242             :             }
     243             : 
     244     2664329 :             set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, mCols ) );
     245             :         }
     246             : 
     247    10780155 :         FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
     248             :         {
     249     9989303 :             set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
     250             :         }
     251             :     }
     252             : 
     253      927239 :     return;
     254             : }
     255             : 
     256             : 
     257             : /*---------------------------------------------------------------------*
     258             :  * svdMat2mat()
     259             :  *
     260             :  * transfer a matrix from a two dimensional array  to a column wise ordered vector
     261             :  *---------------------------------------------------------------------*/
     262             : 
     263     1109632 : void svdMat2mat_fx(
     264             :     Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* i  : matrix as two-dimensional arry             Qx*/
     265             :     Word32 *mat,                                             /* o  : matrix as column ordered vector    Qx*/
     266             :     const Word16 nRows,                                      /* i  : number of rows of the matrix               Q0*/
     267             :     const Word16 mCols                                       /* i  : number of columns of the matrix    Q0*/
     268             : )
     269             : {
     270             :     Word16 i, j;
     271             : 
     272     4418721 :     FOR( i = 0; i < nRows; i++ )
     273             :     {
     274    13444367 :         FOR( j = 0; j < mCols; j++ )
     275             :         {
     276    10135278 :             mat[i + ( nRows * j )] = svdMat[i][j]; /* Qx */
     277    10135278 :             move32();
     278             :         }
     279             :     }
     280             : 
     281     1109632 :     return;
     282             : }
     283             : 
     284             : /*-------------------------------------------------------------------------
     285             :  * svd()
     286             :  *
     287             :  * perform a singular value decomposition X=USV of a matrix X
     288             :  *-------------------------------------------------------------------------*/
     289             : 
     290             : /*! r: error or success */
     291      927239 : Word16 svd_fx(
     292             :     Word32 InputMatrix[][MAX_OUTPUT_CHANNELS], /* i  : matrix to be decomposed (M)            InputMatrix_e*/
     293             :     Word16 InputMatrix_e,
     294             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* o  : left singular vectors (U)                   singularValues_fx_e*/
     295             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* o  : singular values vector (S)         singularValues_fx_e*/
     296             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* o  : right singular vectors (V)                  singularValues_fx_e*/
     297             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
     298             :     const Word16 nChannelsL, /* i  : number of rows in the matrix to be decomposed              Q0*/
     299             :     const Word16 nChannelsC  /* i  : number of columns in the matrix to be decomposed   Q0*/
     300             : )
     301             : {
     302             :     Word16 iCh, jCh;
     303             :     Word16 lengthSingularValues;
     304             :     Word16 errorMessage, condition;
     305             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS];
     306             :     Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS];
     307      927239 :     move16();
     308      927239 :     Word32 eps_x_fx = 0, temp_fx;
     309      927239 :     move16();
     310      927239 :     Word16 eps_x_fx_e = 0;
     311      927239 :     move16();
     312             :     Word16 temp_fx_e;
     313      927239 :     push_wmops( "svd_fx" );
     314             : 
     315             : 
     316             :     /* Collecting Values */
     317     4285653 :     FOR( iCh = 0; iCh < nChannelsL; iCh++ )
     318             :     {
     319    15762957 :         FOR( jCh = 0; jCh < nChannelsC; jCh++ )
     320             :         {
     321    12404543 :             singularVectors_Left_fx[iCh][jCh] = InputMatrix[iCh][jCh]; /* InputMatrix_e */
     322    12404543 :             move32();
     323             :         }
     324             :     }
     325             : 
     326             :     /* Householder reduction */
     327      927239 :     HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
     328             :     /* Set extremely small values to zero if needed */
     329             :     // flushToZeroArray(singularValues, max_length);
     330             :     // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL);
     331             :     // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC);
     332             : 
     333             :     /* BidagonalDiagonalisation */
     334      927239 :     errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
     335             :     /* Sort the singular values descending order */
     336      927239 :     lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */
     337             : 
     338             :     DO
     339             :     {
     340     1301628 :         condition = 0;
     341     1301628 :         move16();
     342     4558543 :         FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
     343             :         {
     344     3256915 :             IF( LT_32( L_shl_sat( singularValues_fx[iCh], sub( singularValues_fx_e[iCh], singularValues_fx_e[iCh + 1] ) ), singularValues_fx[iCh + 1] ) )
     345             :             {
     346      474384 :                 condition = 1;
     347      474384 :                 move16();
     348      474384 :                 temp_fx = singularValues_fx[iCh]; /* singularValues_fx_e */
     349      474384 :                 move32();
     350      474384 :                 singularValues_fx[iCh] = singularValues_fx[iCh + 1]; /* singularValues_fx_e */
     351      474384 :                 move32();
     352      474384 :                 singularValues_fx[iCh + 1] = temp_fx; /* singularValues_fx_e */
     353      474384 :                 move32();
     354      474384 :                 temp_fx_e = singularValues_fx_e[iCh];
     355      474384 :                 move16();
     356      474384 :                 singularValues_fx_e[iCh] = singularValues_fx_e[iCh + 1];
     357      474384 :                 move16();
     358      474384 :                 singularValues_fx_e[iCh + 1] = temp_fx_e;
     359      474384 :                 move16();
     360             : 
     361     2646240 :                 FOR( jCh = 0; jCh < nChannelsL; ++jCh )
     362             :                 {
     363     2171856 :                     temp_fx = singularVectors_Left_fx[jCh][iCh]; /* singularValues_fx_e */
     364     2171856 :                     move32();
     365     2171856 :                     singularVectors_Left_fx[jCh][iCh] = singularVectors_Left_fx[jCh][iCh + 1]; /* singularValues_fx_e */
     366     2171856 :                     move32();
     367     2171856 :                     singularVectors_Left_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
     368     2171856 :                     move32();
     369             :                 }
     370             : 
     371     2637281 :                 FOR( jCh = 0; jCh < nChannelsC; ++jCh )
     372             :                 {
     373     2162897 :                     temp_fx = singularVectors_Right_fx[jCh][iCh]; /* singularValues_fx_e */
     374     2162897 :                     move32();
     375     2162897 :                     singularVectors_Right_fx[jCh][iCh] = singularVectors_Right_fx[jCh][iCh + 1]; /* singularValues_fx_e */
     376     2162897 :                     move32();
     377     2162897 :                     singularVectors_Right_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
     378     2162897 :                     move32();
     379             :                 }
     380             :             }
     381             :         }
     382             :     }
     383     1301628 :     WHILE( EQ_16( condition, 1 ) );
     384             : 
     385      927239 :     pop_wmops();
     386      927239 :     return ( errorMessage );
     387             : }
     388             : 
     389             : 
     390             : /*-----------------------------------------------------------------------*
     391             :  * Local functions
     392             :  *-----------------------------------------------------------------------*/
     393             : 
     394             : /*-------------------------------------------------------------------------
     395             :  * BidagonalDiagonalisation()
     396             :  *
     397             :  *
     398             :  *-------------------------------------------------------------------------*/
     399             : 
     400      927239 : static Word16 BidagonalDiagonalisation_fx(
     401             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)              singularValues_fx_e*/
     402             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)         singularValues_fx_e*/
     403             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)             singularValues_fx_e*/
     404             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* i/o:                                           secDiag_fx_e*/
     405             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],        /* i/o: singular values vector (S)                                                    */
     406             :     Word16 *secDiag_new_e,                                  /* i/o:                                                                                                               */
     407             :     const Word16 nChannelsL,                                /* i  : number of rows in the matrix to be decomposed               Q0*/
     408             :     const Word16 nChannelsC,                                /* i  : number of columns in the matrix to be decomposed    Q0*/
     409             :     const Word32 eps_x,                                     /* i  :                                                eps_x_e*/
     410             :     const Word16 eps_x_e                                    /* i  :                                                       */
     411             : )
     412             : {
     413             :     Word16 kCh, nCh, iCh, jCh, split;
     414             :     Word32 c, s, f1, f2;
     415             :     Word16 c_e, s_e, f1_e, f2_e;
     416      927239 :     Word16 x11_e = 0, x12_e = 0;
     417      927239 :     move16();
     418      927239 :     move16();
     419             :     Word16 temp_exp;
     420             :     Word16 temp_exp2;
     421      927239 :     Word32 g = 0;
     422      927239 :     move16();
     423      927239 :     Word16 g_e = 0;
     424      927239 :     move16();
     425             :     Word16 convergence, iteration, found_split;
     426      927239 :     Word16 error = 0;
     427      927239 :     move16();
     428             :     Word32 temp;
     429             :     Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS];
     430      927239 :     Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
     431             : 
     432     3865702 :     FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */
     433             :     {
     434     2938463 :         convergence = 0;
     435     2938463 :         move16();
     436     2938463 :         iteration = 0;
     437     2938463 :         move16();
     438     2938463 :         split = iCh - 1; /* Q0 */
     439     2938463 :         move16();
     440             : 
     441     7875962 :         WHILE( EQ_16( convergence, 0 ) )
     442             :         {
     443     4937499 :             iteration = add( iteration, 1 ); /* Q0 */
     444     4937499 :             found_split = 1;                 /* Q0 */
     445     4937499 :             move16();
     446             : 
     447     9698722 :             FOR( jCh = iCh; jCh >= 0; jCh-- )
     448             :             {
     449     9698722 :                 Word16 com_e = s_max( secDiag_new_e[jCh], eps_x_e );
     450     9698722 :                 IF( LE_32( L_shr( L_abs( secDiag_fx[jCh] ), sub( com_e, secDiag_new_e[jCh] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is secDiag[ch] vanishing compared to eps_x */
     451             :                 {
     452     4887206 :                     found_split = 0;
     453     4887206 :                     move16();
     454     4887206 :                     BREAK;
     455             :                 }
     456     4811516 :                 com_e = s_max( singularValues_new_e[jCh - 1], eps_x_e );
     457     4811516 :                 IF( LE_32( L_shr( L_abs( singularValues_fx[jCh - 1] ), sub( com_e, singularValues_new_e[jCh - 1] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is singularValues[jCh - 1] vanishing compared to eps_x */
     458             :                 {
     459       50293 :                     BREAK;
     460             :                 }
     461             :             }
     462             : 
     463             :             // convergence = ( jCh == iCh ) ? 1 : 0;
     464     4937499 :             IF( EQ_16( jCh, iCh ) )
     465             :             {
     466     2938463 :                 convergence = 1; /* Q0 */
     467     2938463 :                 move16();
     468             :             }
     469             :             ELSE
     470             :             {
     471     1999036 :                 convergence = 0;
     472     1999036 :                 move16();
     473             :             }
     474             : 
     475     4937499 :             IF( found_split )
     476             :             {
     477       50293 :                 s = MAX_32;
     478       50293 :                 move32();
     479       50293 :                 s_e = 0;
     480       50293 :                 move16();
     481       50293 :                 c = 0;
     482       50293 :                 move32();
     483       50293 :                 c_e = 0;
     484       50293 :                 move16();
     485       50293 :                 split = sub( jCh, 1 ); /* Q0 */
     486      100647 :                 FOR( kCh = jCh; kCh <= iCh; kCh++ )
     487             :                 {
     488       50359 :                     g = Mpy_32_32( s, secDiag_fx[kCh] ); /* exp(s_e + secDiag_new_e) */
     489       50359 :                     g_e = add( s_e, secDiag_new_e[kCh] );
     490       50359 :                     secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
     491       50359 :                     secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
     492       50359 :                     Word16 com_e = s_max( g_e, eps_x_e );
     493       50359 :                     IF( LE_32( L_shr( L_abs( g ), sub( com_e, g_e ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) )
     494             :                     {
     495           5 :                         BREAK;
     496             :                     }
     497             : 
     498       50354 :                     c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */
     499       50354 :                     c_e = singularValues_new_e[kCh];
     500       50354 :                     GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
     501       50354 :                     c = Mpy_32_32( c, temp );
     502       50354 :                     c_e = add( c_e, temp_exp );
     503       50354 :                     temp_exp2 = norm_l( c );
     504       50354 :                     c = L_shl( c, temp_exp2 );
     505       50354 :                     c_e = sub( c_e, temp_exp2 );
     506       50354 :                     s = Mpy_32_32( -g, temp );
     507       50354 :                     s_e = add( g_e, temp_exp );
     508       50354 :                     temp_exp2 = norm_l( s );
     509       50354 :                     s = L_shl( s, temp_exp2 );
     510       50354 :                     s_e = sub( s_e, temp_exp2 );
     511       50354 :                     ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */
     512             :                 }
     513             :             }
     514             : 
     515     4937499 :             IF( convergence )
     516             :             {
     517     2938463 :                 singularValues_fx[iCh] = singularValues_fx[iCh];
     518     2938463 :                 move32();
     519     2938463 :                 IF( singularValues_fx[iCh] < 0 )
     520             :                 {
     521      538048 :                     singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
     522      538048 :                     move32();
     523     2016080 :                     FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     524             :                     {
     525     1478032 :                         singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
     526     1478032 :                         move32();
     527             :                     }
     528             :                 }
     529             :             }
     530             :             ELSE
     531             :             {
     532     1999036 :                 IF( GE_16( iteration, SVD_MAX_NUM_ITERATION ) )
     533             :                 {
     534           0 :                     IF( LT_32( singularValues_fx[iCh], 0 ) )
     535             :                     {
     536           0 :                         singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
     537           0 :                         move32();
     538             : 
     539           0 :                         FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     540             :                         {
     541           0 :                             singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
     542           0 :                             move32();
     543             :                         }
     544             :                     }
     545           0 :                     error = 1;
     546           0 :                     move16();
     547           0 :                     convergence = 1;
     548           0 :                     move16();
     549             :                 }
     550             :                 ELSE
     551             :                 {
     552     1999036 :                     ApplyQRTransform_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_new_e, secDiag_new_e, jCh, iCh, nChannelsL, nChannelsC ); /* nChannelsC */
     553             :                 }
     554             :             }
     555             :         }
     556             :     }
     557             : 
     558             :     // rescaling block
     559      927239 :     Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS );
     560             : 
     561             : 
     562      927239 :     return ( error );
     563             : }
     564             : 
     565             : /*-------------------------------------------------------------------------
     566             :  * ApplyQRTransform()
     567             :  *
     568             :  *
     569             :  *-------------------------------------------------------------------------*/
     570             : 
     571     1999036 : static void ApplyQRTransform_fx(
     572             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)                                              singularValues_e*/
     573             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)                                             singularValues_e*/
     574             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)                                             singularValues_e*/
     575             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* i/o:                                                                                                          secDiag_e*/
     576             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     577             :     Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
     578             :     const Word16 startIndex,   /* i  :                                                  Q0*/
     579             :     const Word16 currentIndex, /* i  :                                                  Q0*/
     580             :     const Word16 nChannelsL,   /* i  : number of rows in the matrix to be decomposed                    Q0*/
     581             :     const Word16 nChannelsC    /* i  : number of columns in the matrix to be decomposed                 Q0*/
     582             : )
     583             : {
     584             :     Word32 temp;
     585             :     Word16 temp_e;
     586             :     Word16 temp_norm_e;
     587             :     Word16 ch, split;
     588     1999036 :     Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
     589     1999036 :     move32();
     590     1999036 :     move32();
     591     1999036 :     move32();
     592     1999036 :     move32();
     593     1999036 :     move32();
     594     1999036 :     move32();
     595     1999036 :     move32();
     596     1999036 :     move32();
     597     1999036 :     Word16 d_e = 0, g_e = 0, r_e = 0, x_ii_e = 0, x_split_e = 0, x_kk_e = 0, mu_e = 0, aux_e = 0;
     598     1999036 :     move16();
     599     1999036 :     move16();
     600     1999036 :     move16();
     601     1999036 :     move16();
     602     1999036 :     move16();
     603     1999036 :     move16();
     604     1999036 :     move16();
     605     1999036 :     move16();
     606             :     Word32 L_temp1, L_temp2, L_temp3, L_temp4;
     607             :     Word16 L_temp1_e, L_temp2_e, L_temp3_e, L_temp4_e, temp_exp;
     608     1999036 :     Word32 c = MAX_32;
     609     1999036 :     move32();
     610     1999036 :     Word16 c_e = 0;
     611     1999036 :     move16();
     612     1999036 :     Word32 s = MAX_32;
     613     1999036 :     move32();
     614     1999036 :     Word16 s_e = 0;
     615     1999036 :     move16();
     616             : 
     617     1999036 :     x_kk = singularValues[currentIndex]; /* exp(singularValues_e) */
     618     1999036 :     move32();
     619     1999036 :     x_kk_e = singularValues_e[currentIndex];
     620     1999036 :     move16();
     621     1999036 :     x_ii = singularValues[startIndex]; /* exp(singularValues_e) */
     622     1999036 :     move32();
     623     1999036 :     x_ii_e = singularValues_e[startIndex];
     624     1999036 :     move16();
     625     1999036 :     split = sub( currentIndex, 1 ); /* Q0 */
     626     1999036 :     move16();
     627             : 
     628     1999036 :     x_split = singularValues[split]; /* exp(singularValues_e) */
     629     1999036 :     move32();
     630     1999036 :     x_split_e = singularValues_e[split];
     631     1999036 :     move16();
     632     1999036 :     g = secDiag[split]; /* exp(secDiag_e) */
     633     1999036 :     move32();
     634     1999036 :     g_e = secDiag_e[split];
     635     1999036 :     move16();
     636     1999036 :     r = secDiag[currentIndex]; /* exp(secDiag_e) */
     637     1999036 :     move32();
     638     1999036 :     r_e = secDiag_e[currentIndex];
     639     1999036 :     move16();
     640             : 
     641             :     // d = (x_split + x_kk) * (x_split - x_kk) + (g + r) * (g - r);
     642     1999036 :     L_temp1 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, x_kk, x_kk_e, &L_temp1_e );                                                                           /* exp(L_temp1_e) */
     643     1999036 :     L_temp2 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, L_negate( x_kk ), x_kk_e, &L_temp2_e );                                                               /* exp(L_temp2_e) */
     644     1999036 :     L_temp3 = BASOP_Util_Add_Mant32Exp( g, g_e, r, r_e, &L_temp3_e );                                                                                             /* exp(L_temp3_e) */
     645     1999036 :     L_temp4 = BASOP_Util_Add_Mant32Exp( g, g_e, L_negate( r ), r_e, &L_temp4_e );                                                                                 /* exp(L_temp4_e) */
     646     1999036 :     d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( L_temp3, L_temp4 ), add( L_temp3_e, L_temp4_e ), &d_e ); /* exp(d_e) */
     647             : 
     648             :     // d /= maxWithSign((r + r) * x_split);
     649     1999036 :     L_temp1 = BASOP_Util_Add_Mant32Exp( r, r_e, r, r_e, &L_temp1_e ); /* exp(L_temp1_e) */
     650     1999036 :     L_temp1 = maxWithSign_fx( Mpy_32_32( L_temp1, x_split ) );        /* exp(L_temp1_e + x_split_e) */
     651     1999036 :     L_temp1_e = add( L_temp1_e, x_split_e );
     652     1999036 :     d = BASOP_Util_Divide3232_Scale_newton( d, L_temp1, &temp_exp ); /* temp_exp + d_e - L_temp1_e */
     653     1999036 :     d_e = add( temp_exp, sub( d_e, L_temp1_e ) );
     654             : 
     655     1999036 :     g = GivensRotation_fx( MAX_32, 0, d, d_e, &g_e );
     656             : 
     657             :     // mu = x_split / maxWithSign(d + (d >= 0.0f ? 1 : (-1)) * fabsf(g)) - r;
     658             :     // L_temp1 = d >= 0 ? L_abs( g ) : -L_abs( g );
     659     1999036 :     IF( d >= 0 )
     660             :     {
     661     1200425 :         L_temp1 = L_abs( g );
     662             :     }
     663             :     ELSE
     664             :     {
     665      798611 :         L_temp1 = L_negate( L_abs( g ) );
     666             :     }
     667     1999036 :     L_temp1_e = g_e;
     668     1999036 :     move16();
     669     1999036 :     L_temp2 = maxWithSign_fx( BASOP_Util_Add_Mant32Exp( d, d_e, L_temp1, L_temp1_e, &L_temp2_e ) ); /* exp(L_temp2_e) */
     670     1999036 :     mu = BASOP_Util_Divide3232_Scale_newton( x_split, L_temp2, &mu_e );                             /* exp(mu_e + (x-plit_e - L_temp2_e)) */
     671     1999036 :     mu_e = add( mu_e, sub( x_split_e, L_temp2_e ) );
     672     1999036 :     mu = BASOP_Util_Add_Mant32Exp( mu, mu_e, L_negate( r ), r_e, &mu_e ); /* exp(mu_e) */
     673             : 
     674             :     // d = ((x_ii + x_kk) * (x_ii - x_kk) + r * mu) / maxWithSign(x_ii);
     675     1999036 :     L_temp1 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, x_kk, x_kk_e, &L_temp1_e );                                                           /* exp(L_temp1_e) */
     676     1999036 :     L_temp2 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, L_negate( x_kk ), x_kk_e, &L_temp2_e );                                               /* exp(L_temp2_e) */
     677     1999036 :     d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( r, mu ), add( r_e, mu_e ), &d_e ); /* exp(d_e) */
     678     1999036 :     d = BASOP_Util_Divide3232_Scale_newton( d, maxWithSign_fx( x_ii ), &temp_exp );                                                         /* exp(temp_exp + (d_e - x_ii_e) */
     679     1999036 :     d_e = add( temp_exp, sub( d_e, x_ii_e ) );
     680             : 
     681             :     /*QR transformation*/
     682     6760259 :     FOR( ch = startIndex; ch <= split; ch++ )
     683             :     {
     684     4761223 :         r = Mpy_32_32( s, secDiag[ch + 1] ); /* exp(s_e + secDiag_e) */
     685     4761223 :         r_e = add( s_e, secDiag_e[ch + 1] );
     686     4761223 :         g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */
     687     4761223 :         g_e = add( c_e, secDiag_e[ch + 1] );
     688             : 
     689     4761223 :         GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
     690     4761223 :         c = Mpy_32_32( d, temp );
     691     4761223 :         c_e = add( temp_e, d_e );
     692     4761223 :         temp_norm_e = norm_l( c );
     693     4761223 :         c = L_shl( c, temp_norm_e );
     694     4761223 :         c_e = sub( c_e, temp_norm_e );
     695     4761223 :         s = Mpy_32_32( r, temp );
     696     4761223 :         s_e = add( r_e, temp_e );
     697     4761223 :         temp_norm_e = norm_l( s );
     698     4761223 :         s = L_shl( s, temp_norm_e );
     699     4761223 :         s_e = sub( s_e, temp_norm_e );
     700     4761223 :         r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */
     701     4761223 :         r_e = add( s_e, singularValues_e[ch + 1] );
     702     4761223 :         x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */
     703     4761223 :         x_split_e = add( c_e, singularValues_e[ch + 1] );
     704             : 
     705     4761223 :         aux = g; /* exp(g_e) */
     706     4761223 :         move32();
     707     4761223 :         aux_e = g_e;
     708     4761223 :         move16();
     709             : 
     710             :         // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC);
     711     4761223 :         ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC );
     712             : 
     713     4761223 :         GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */
     714     4761223 :         IF( singularValues[ch] != 0 )
     715             :         {
     716             : 
     717     4761223 :             c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
     718     4761223 :             c_e = add( d_e, aux_e );
     719     4761223 :             temp_norm_e = norm_l( c );
     720     4761223 :             c = L_shl( c, temp_norm_e );
     721     4761223 :             c_e = sub( c_e, temp_norm_e );
     722             : 
     723     4761223 :             s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */
     724     4761223 :             s_e = add( r_e, aux_e );
     725     4761223 :             temp_norm_e = norm_l( s );
     726     4761223 :             s = L_shl( s, temp_norm_e );
     727     4761223 :             s_e = sub( s_e, temp_norm_e );
     728             :         }
     729             : 
     730             :         // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL);
     731     4761223 :         ApplyRotation_fx( singularVectors_Left, c, c_e, s, s_e, g, g_e, x_split, x_split_e, &d, &d_e, &x_ii, &x_ii_e, ch + 1, ch, nChannelsL );
     732             :     }
     733             : 
     734     1999036 :     secDiag[startIndex] = 0;
     735     1999036 :     move32();
     736     1999036 :     secDiag[currentIndex] = d; /* exp(d_e) */
     737     1999036 :     move32();
     738     1999036 :     secDiag_e[currentIndex] = d_e;
     739     1999036 :     move16();
     740     1999036 :     singularValues[currentIndex] = x_ii; /* exp(x_ii_e) */
     741     1999036 :     move32();
     742     1999036 :     singularValues_e[currentIndex] = x_ii_e;
     743     1999036 :     move16();
     744             : 
     745     1999036 :     return;
     746             : }
     747             : 
     748             : /*-------------------------------------------------------------------------
     749             :  * ApplyRotation()
     750             :  *
     751             :  *
     752             :  *-------------------------------------------------------------------------*/
     753             : 
     754     9572800 : static void ApplyRotation_fx(
     755             :     Word32 singularVector[][MAX_OUTPUT_CHANNELS],
     756             :     const Word32 c, /* exp(c_e)*/
     757             :     const Word16 c_e,
     758             :     const Word32 s, /* exp(s_e) */
     759             :     const Word16 s_e,
     760             :     Word32 x11, /* exp(x11_e) */
     761             :     Word16 x11_e,
     762             :     Word32 x12, /* exp(x12_e) */
     763             :     Word16 x12_e,
     764             :     Word32 *d, /* exp(d_e) */
     765             :     Word16 *d_e,
     766             :     Word32 *g, /* exp(g_e) */
     767             :     Word16 *g_e,
     768             :     const Word16 currentIndex1, /* Q0 */
     769             :     const Word16 currentIndex2, /* Q0 */
     770             :     const Word16 nChannels      /* Q0 */
     771             : )
     772             : {
     773             :     Word16 ch;
     774             : 
     775     9572800 :     *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */
     776     9572800 :     move32();
     777     9572800 :     *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */
     778     9572800 :     move32();
     779             : 
     780     9572800 :     Word16 c_q = sub( 31, c_e );
     781     9572800 :     Word16 s_q = sub( 31, s_e );
     782             :     Word32 op1, op2;
     783             :     Word16 op_e;
     784             : 
     785             :     // Bring c and s to same Q
     786     9572800 :     IF( GT_16( c_q, s_q ) )
     787             :     {
     788      838677 :         op1 = L_shr( c, sub( c_q, s_q ) );
     789      838677 :         op2 = s;
     790      838677 :         move32();
     791      838677 :         op_e = s_q;
     792      838677 :         move16();
     793             :     }
     794             :     ELSE
     795             :     {
     796     8734123 :         op1 = c;
     797     8734123 :         move32();
     798     8734123 :         op2 = L_shr( s, sub( s_q, c_q ) );
     799     8734123 :         op_e = c_q;
     800     8734123 :         move16();
     801             :     }
     802     9572800 :     op_e = add( op_e, 1 ); // 64 bit mac -> +1
     803     9572800 :     op_e = negate( op_e );
     804             : 
     805    57092883 :     FOR( ch = 0; ch < nChannels; ch++ )
     806             :     {
     807    47520083 :         x11 = singularVector[ch][currentIndex2];
     808    47520083 :         move32();
     809    47520083 :         x12 = singularVector[ch][currentIndex1];
     810    47520083 :         move32();
     811             : 
     812    47520083 :         Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
     813    47520083 :         singularVector[ch][currentIndex2] = W_shl_sat_l( temp, op_e );   // Q(singularVector)
     814    47520083 :         move32();
     815             : 
     816    47520083 :         temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
     817    47520083 :         singularVector[ch][currentIndex1] = W_shl_sat_l( temp, op_e );        // Q(singularVector)
     818    47520083 :         move32();
     819             :     }
     820             : 
     821     9572800 :     return;
     822             : }
     823             : 
     824             : /*-------------------------------------------------------------------------
     825             :  * HouseholderReduction()
     826             :  *
     827             :  *
     828             :  *-------------------------------------------------------------------------*/
     829             : 
     830      927239 : static void HouseholderReduction_fx(
     831             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
     832             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* exp(singularValues_fx_e) */
     833             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
     834             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_fx_e) */
     835             :     Word16 singularVectors_Left_e,
     836             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
     837             :     Word16 *secDiag_fx_e,
     838             :     const Word16 nChannelsL, /* Q0 */
     839             :     const Word16 nChannelsC, /* Q0 */
     840             :     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
     841             :     Word16 *eps_x_fx_e )
     842             : {
     843             :     Word16 nCh;
     844             : #ifdef NONBE_SVD_OPTIMIZATION
     845             : 
     846      927239 :     Word32 g_left_fx = 0;
     847      927239 :     Word16 g_left_e = 0;
     848      927239 :     move32();
     849      927239 :     move16();
     850      927239 :     Word32 g_right_fx = 0;
     851      927239 :     Word16 g_right_e = 0;
     852      927239 :     move32();
     853      927239 :     move16();
     854             : 
     855             : #else
     856             : 
     857             :     // float g = 0.0f, sig_x = 0.0f;// to be removed
     858             :     Word32 g_fx = 0, sig_x_fx = 0;
     859             :     move32();
     860             :     move32();
     861             :     Word16 sig_x_fx_e = 0;
     862             :     move16();
     863             : #endif
     864             : 
     865             :     Word16 iCh, jCh;
     866             :     Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     867             : 
     868             : #ifdef NONBE_SVD_OPTIMIZATION
     869      927239 :     Word16 sc = 0;
     870      927239 :     move16();
     871      927239 :     sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC );
     872     3358414 :     FOR( jCh = 1; jCh < nChannelsL; jCh++ )
     873             :     {
     874     2431175 :         sc = s_min( sc, getScaleFactor32( singularVectors_Left_fx[jCh], nChannelsC ) );
     875             :     }
     876     4285653 :     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
     877             :     {
     878     3358414 :         Scale_sig32( singularVectors_Left_fx[jCh], nChannelsC, sc );
     879    15762957 :         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
     880             :         {
     881    12404543 :             singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e - sc;
     882    12404543 :             move16();
     883             :         }
     884             :     }
     885             : 
     886     3865702 :     FOR( nCh = 0; nCh < nChannelsC; nCh++ )
     887             :     {
     888     2938463 :         secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
     889     2938463 :         move32();
     890     2938463 :         secDiag_fx_e[nCh] = g_right_e;
     891             : 
     892     2938463 :         biDiagonalReductionLeft_fx(
     893             :             singularVectors_Left_fx,
     894             :             singularVectors_Left_fx_e,
     895             :             nChannelsL,
     896             :             nChannelsC,
     897             :             nCh,
     898             :             &g_left_fx,
     899             :             &g_left_e );
     900             : 
     901     2938463 :         singularValues_fx[nCh] = g_left_fx;
     902     2938463 :         move32();
     903     2938463 :         singularValues_fx_e[nCh] = g_left_e;
     904             : 
     905     2938463 :         biDiagonalReductionRight_fx(
     906             :             singularVectors_Left_fx,
     907             :             singularVectors_Left_fx_e,
     908             :             nChannelsL,
     909             :             nChannelsC,
     910             :             nCh,
     911             :             &g_right_fx,
     912             :             &g_right_e );
     913             : 
     914             :         Word16 L_temp_e;
     915     2938463 :         Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
     916     2938463 :         IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
     917             :         {
     918     1357226 :             *eps_x_fx = L_temp; /* exp(L_temp_e) */
     919     1357226 :             move32();
     920     1357226 :             *eps_x_fx_e = L_temp_e;
     921     1357226 :             move32();
     922             :         }
     923             :     }
     924             : 
     925             : #else
     926             : 
     927             :     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
     928             :     {
     929             :         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
     930             :         {
     931             :             singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
     932             :             move16();
     933             :         }
     934             :     }
     935             : 
     936             :     /* Bidiagonal Reduction for every channel */
     937             :     FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     938             :     {
     939             :         biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
     940             :         biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
     941             : 
     942             :         Word16 L_temp_e;
     943             :         Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
     944             :         IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
     945             :         {
     946             :             *eps_x_fx = L_temp; /* exp(L_temp_e) */
     947             :             move32();
     948             :             *eps_x_fx_e = L_temp_e;
     949             :             move32();
     950             :         }
     951             :     }
     952             : #endif
     953             : 
     954             :     /* SingularVecotr Accumulation */
     955      927239 :     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
     956             : 
     957             : 
     958      927239 :     singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
     959             : 
     960      927239 :     return;
     961             : }
     962             : 
     963             : #ifdef NONBE_SVD_OPTIMIZATION
     964             : /*-------------------------------------------------------------------------
     965             :  * biDiagonalReductionLeft()
     966             :  *
     967             :  *
     968             :  *-------------------------------------------------------------------------*/
     969     2938463 : static void biDiagonalReductionLeft_fx(
     970             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS],   /* exp(singularVectors_e) */
     971             :     Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
     972             :     const Word16 nChannelsL,
     973             :     const Word16 nChannelsC,  /* Q0 */
     974             :     const Word16 currChannel, /* Q0 */
     975             :     Word32 *g,
     976             :     Word16 *g_e )
     977             : {
     978             :     Word16 iCh, jCh;
     979             :     Word32 norm_x, f, r;
     980             :     Word16 norm_x_e, f_e, r_e;
     981             :     Word32 L_temp;
     982             :     Word16 L_temp_e;
     983             : 
     984             :     /* Setting values to 0 */
     985     2938463 :     *g = 0;
     986     2938463 :     *g_e = 0;
     987     2938463 :     move32();
     988     2938463 :     move16();
     989             : 
     990     2938463 :     IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
     991             :     {
     992     2938463 :         Word64 temp = 0;
     993     2938463 :         move64();
     994     2938463 :         norm_x = 0;
     995     2938463 :         move32();
     996     2938463 :         norm_x_e = 0;
     997     2938463 :         move16();
     998     2938463 :         Word16 max_e = MIN_16;
     999     2938463 :         move16();
    1000    11034362 :         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1001             :         {
    1002     8095899 :             max_e = s_max( max_e, singularVectors_e[jCh][currChannel] );
    1003             :         }
    1004             : 
    1005    11034362 :         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1006             :         {
    1007     8095899 :             temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors_e[jCh][currChannel] ), 1 ) ) );
    1008             :         }
    1009             : 
    1010     2938463 :         Word16 nrm = W_norm( temp );
    1011     2938463 :         nrm = sub( nrm, 32 );
    1012     2938463 :         norm_x = W_shl_sat_l( temp, nrm );
    1013     2938463 :         norm_x_e = sub( add( max_e, max_e ), nrm );
    1014             : 
    1015     2938463 :         IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
    1016             :         {
    1017             :             Word16 invVal_e;
    1018             :             Word32 invVal;
    1019             : 
    1020     2809680 :             L_temp_e = norm_x_e;
    1021     2809680 :             move16();
    1022     2809680 :             L_temp = Sqrt32( norm_x, &L_temp_e );
    1023             :             //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
    1024     2809680 :             if ( singularVectors[currChannel][currChannel] >= 0 )
    1025             :             {
    1026     1572183 :                 L_temp = L_negate( L_temp );
    1027     1572183 :                 move32();
    1028             :             }
    1029     2809680 :             *g = L_temp;
    1030     2809680 :             move32();
    1031     2809680 :             *g_e = L_temp_e;
    1032     2809680 :             move16();
    1033             : 
    1034     2809680 :             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e );                                             /* exp(r_e) */
    1035     2809680 :             singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */
    1036     2809680 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
    1037             : 
    1038     7113788 :             FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1039             :             {
    1040     4304108 :                 Word16 max2_e = MIN_16;
    1041     4304108 :                 max_e = MIN_16;
    1042     4304108 :                 move16();
    1043     4304108 :                 move16();
    1044     4304108 :                 temp = 0;
    1045     4304108 :                 move64();
    1046             : 
    1047    21698947 :                 FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1048             :                 {
    1049    17394839 :                     max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); /* exp(norm_x_e) */
    1050    17394839 :                     max2_e = s_max( max2_e, singularVectors_e[jCh][iCh] );       /* exp(norm_x_e) */
    1051             :                 }
    1052     4304108 :                 max_e = add( max_e, max2_e );
    1053             : 
    1054    21698947 :                 FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1055             :                 {
    1056    17394839 :                     temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors_e[jCh][currChannel], singularVectors_e[jCh][iCh] ) ) ) );
    1057             :                 }
    1058     4304108 :                 nrm = W_norm( temp );
    1059     4304108 :                 nrm = sub( nrm, 32 );
    1060     4304108 :                 norm_x = W_shl_sat_l( temp, nrm );
    1061     4304108 :                 norm_x_e = sub( max_e, nrm );
    1062             : 
    1063     4304108 :                 f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
    1064     4304108 :                 f_e = add( invVal_e, sub( norm_x_e, r_e ) );
    1065             : 
    1066    21698947 :                 FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1067             :                 {
    1068    17394839 :                     singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] );
    1069             :                 }
    1070             :             }
    1071             :         }
    1072             :     }
    1073     2938463 :     return;
    1074             : }
    1075             : 
    1076     2938463 : static void biDiagonalReductionRight_fx(
    1077             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    1078             :     Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS],
    1079             :     const Word16 nChannelsL,  /* Q0 */
    1080             :     const Word16 nChannelsC,  /* Q0 */
    1081             :     const Word16 currChannel, /* Q0 */
    1082             :     Word32 *g,
    1083             :     Word16 *g_e )
    1084             : {
    1085             :     Word16 iCh, jCh, idx;
    1086             :     Word32 norm_x, r;
    1087             :     Word16 norm_x_e, r_e;
    1088             :     Word32 L_temp;
    1089             :     Word16 L_temp_e;
    1090             : 
    1091             :     /* Setting values to 0 */
    1092     2938463 :     *g = 0;
    1093     2938463 :     *g_e = 0;
    1094     2938463 :     move32();
    1095     2938463 :     move16();
    1096     2938463 :     IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
    1097             :     {
    1098     2011224 :         idx = add( currChannel, 1 ); /* Q0 */
    1099             : 
    1100     2011224 :         norm_x = 0;
    1101     2011224 :         move32();
    1102     2011224 :         norm_x_e = 0;
    1103     2011224 :         move16();
    1104     6319868 :         FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
    1105             :         {
    1106     4308644 :             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
    1107             :         }
    1108             : 
    1109     2011224 :         IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
    1110             :         {
    1111             :             Word16 invVal_e;
    1112             :             Word32 invVal;
    1113             : 
    1114     1899988 :             L_temp_e = norm_x_e;
    1115     1899988 :             move16();
    1116     1899988 :             L_temp = Sqrt32( norm_x, &L_temp_e );
    1117             :             // L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
    1118     1899988 :             IF( singularVectors[currChannel][idx] >= 0 )
    1119             :             {
    1120      727810 :                 ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
    1121      727810 :                 move32();
    1122             :             }
    1123             :             ELSE
    1124             :             {
    1125     1172178 :                 ( *g ) = L_temp; /* exp(L_temp_e) */
    1126     1172178 :                 move32();
    1127             :             }
    1128     1899988 :             *g_e = L_temp_e;
    1129     1899988 :             move16();
    1130             : 
    1131     1899988 :             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + ( *g_e ), -norm_x, norm_x_e, &r_e );                             /* exp(r_e) */
    1132     1899988 :             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */
    1133             : 
    1134     1899988 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
    1135             : 
    1136     6525603 :             FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
    1137             :             {
    1138     4625615 :                 norm_x = 0;
    1139     4625615 :                 move32();
    1140     4625615 :                 norm_x_e = 0;
    1141     4625615 :                 move16();
    1142    17609294 :                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1143             :                 {
    1144    12983679 :                     norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors_e[iCh][jCh], singularVectors_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
    1145             :                 }
    1146             : 
    1147     4625615 :                 norm_x = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
    1148     4625615 :                 norm_x_e = add( invVal_e, sub( norm_x_e, r_e ) );
    1149             : 
    1150    17609294 :                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
    1151             :                 {
    1152    12983679 :                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */
    1153             :                 }
    1154             :             }
    1155             :         }
    1156             :     }
    1157             : 
    1158     2938463 :     return;
    1159             : }
    1160             : #else
    1161             : /*-------------------------------------------------------------------------
    1162             :  * biDiagonalReductionLeft()
    1163             :  *
    1164             :  *
    1165             :  *-------------------------------------------------------------------------*/
    1166             : 
    1167             : static void biDiagonalReductionLeft_fx(
    1168             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    1169             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
    1170             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
    1171             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
    1172             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    1173             :     Word16 *secDiag_e,
    1174             :     const Word16 nChannelsL,  /* Q0 */
    1175             :     const Word16 nChannelsC,  /* Q0 */
    1176             :     const Word16 currChannel, /* Q0 */
    1177             :     Word32 *sig_x,            /* exp(sig_x_e) */
    1178             :     Word16 *sig_x_e,
    1179             :     Word32 *g /* Q31 */
    1180             : )
    1181             : {
    1182             :     Word16 iCh, jCh, idx;
    1183             :     Word32 norm_x, f, r;
    1184             :     Word16 norm_x_e, f_e, r_e;
    1185             :     Word32 L_temp;
    1186             :     Word16 L_temp_e;
    1187             : 
    1188             :     secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */
    1189             :     move32();
    1190             :     secDiag_e[currChannel] = *sig_x_e;
    1191             :     move16();
    1192             : 
    1193             :     /* Setting values to 0 */
    1194             :     ( *sig_x ) = 0;
    1195             :     move32();
    1196             :     ( *g ) = 0;
    1197             :     move32();
    1198             : 
    1199             :     IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
    1200             :     {
    1201             :         idx = currChannel;
    1202             :         move16();
    1203             : 
    1204             :         FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1205             :         {
    1206             :             ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
    1207             :         }
    1208             : 
    1209             :         IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
    1210             :         {
    1211             :             Word16 invVal_e;
    1212             :             Word32 invVal;
    1213             :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
    1214             :             Word64 temp = 0;
    1215             :             move64();
    1216             :             Word16 max_e = MIN_16;
    1217             :             move16();
    1218             :             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1219             :             {
    1220             :                 Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
    1221             :                 singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
    1222             :                 move32();
    1223             :                 singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
    1224             :                 move16();
    1225             :                 max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] );
    1226             :             }
    1227             : 
    1228             :             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1229             :             {
    1230             :                 temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors2_e[jCh][currChannel] ), 1 ) ) );
    1231             :             }
    1232             : 
    1233             :             Word16 nrm = W_norm( temp );
    1234             :             nrm = sub( nrm, 32 );
    1235             :             norm_x = W_shl_sat_l( temp, nrm );
    1236             :             norm_x_e = sub( add( max_e, max_e ), nrm );
    1237             : 
    1238             :             IF( GT_16( norm_x_e, 0 ) )
    1239             :             {
    1240             :                 norm_x = MAX_32;
    1241             :                 move32();
    1242             :                 norm_x_e = 0;
    1243             :                 move16();
    1244             :             }
    1245             :             L_temp_e = norm_x_e;
    1246             :             move16();
    1247             :             L_temp = Sqrt32( norm_x, &L_temp_e );
    1248             :             L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
    1249             :                                                   //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
    1250             :             if ( singularVectors[currChannel][idx] >= 0 )
    1251             :             {
    1252             :                 L_temp = L_negate( L_temp );
    1253             :             }
    1254             :             ( *g ) = L_temp;
    1255             :             move32();
    1256             : 
    1257             :             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
    1258             :             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
    1259             :             move32();
    1260             : 
    1261             :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
    1262             : 
    1263             :             FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1264             :             {
    1265             :                 Word16 max2_e = MIN_16;
    1266             :                 max_e = MIN_16;
    1267             :                 move16();
    1268             :                 move16();
    1269             :                 temp = 0;
    1270             :                 move64();
    1271             : 
    1272             :                 FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1273             :                 {
    1274             :                     max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] ); /* exp(norm_x_e) */
    1275             :                     max2_e = s_max( max2_e, singularVectors2_e[jCh][iCh] );       /* exp(norm_x_e) */
    1276             :                 }
    1277             :                 max_e = add( max_e, max2_e );
    1278             : 
    1279             :                 FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1280             :                 {
    1281             :                     temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ) ) ) );
    1282             :                 }
    1283             :                 nrm = W_norm( temp );
    1284             :                 nrm = sub( nrm, 32 );
    1285             :                 norm_x = W_shl_sat_l( temp, nrm );
    1286             :                 norm_x_e = sub( max_e, nrm );
    1287             : 
    1288             :                 f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
    1289             :                 f_e = add( invVal_e, sub( norm_x_e, r_e ) );
    1290             : 
    1291             :                 FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1292             :                 {
    1293             :                     singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
    1294             :                     move32();
    1295             :                 }
    1296             :             }
    1297             : 
    1298             : 
    1299             :             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1300             :             {
    1301             :                 singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); /* sing_exp + sig_x_e */
    1302             :                 move32();
    1303             :                 singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e );
    1304             :                 move16();
    1305             :             }
    1306             :         }
    1307             : 
    1308             :         // rescaling block
    1309             :         singularValues[currChannel] = Mpy_32_32( ( *sig_x ), ( *g ) ); /* sig_x_e */
    1310             :         move32();
    1311             :         singularValues_e[currChannel] = *sig_x_e;
    1312             :         move16();
    1313             :     }
    1314             : 
    1315             :     return;
    1316             : }
    1317             : 
    1318             : /*-------------------------------------------------------------------------
    1319             :  * biDiagonalReductionRight()
    1320             :  *
    1321             :  *
    1322             :  *-------------------------------------------------------------------------*/
    1323             : 
    1324             : static void biDiagonalReductionRight_fx(
    1325             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    1326             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_exp[]) */
    1327             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
    1328             :     Word16 *secDiag_exp,
    1329             :     const Word16 nChannelsL,  /* Q0 */
    1330             :     const Word16 nChannelsC,  /* Q0 */
    1331             :     const Word16 currChannel, /* Q0 */
    1332             :     Word32 *sig_x,            /* exp(sig_x_e) */
    1333             :     Word16 *sig_x_e,
    1334             :     Word32 *g /* Q31 */
    1335             : )
    1336             : {
    1337             :     Word16 iCh, jCh, idx;
    1338             :     Word32 norm_x, r;
    1339             :     Word16 norm_x_e, r_e;
    1340             :     Word32 L_temp;
    1341             :     Word16 L_temp_e;
    1342             : 
    1343             :     /* Setting values to 0 */
    1344             :     ( *sig_x ) = 0;
    1345             :     move32();
    1346             :     ( *g ) = 0;
    1347             :     move32();
    1348             : 
    1349             :     IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
    1350             :     {
    1351             :         idx = add( currChannel, 1 ); /* Q0 */
    1352             : 
    1353             :         FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1354             :         {
    1355             :             ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), singularVectors2_e[currChannel][jCh], sig_x_e ); /* exp(sig_x_e) */
    1356             :         }
    1357             : 
    1358             :         IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
    1359             :         {
    1360             :             norm_x = 0;
    1361             :             move32();
    1362             :             norm_x_e = 0;
    1363             :             move16();
    1364             : 
    1365             :             Word16 invVal_e, temp_e;
    1366             :             Word32 invVal;
    1367             :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
    1368             :             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
    1369             :             {
    1370             :                 temp_e = norm_l( singularVectors[currChannel][jCh] );
    1371             :                 singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
    1372             :                 move32();
    1373             :                 singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
    1374             :                 move16();
    1375             :                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
    1376             :             }
    1377             :             IF( GT_16( norm_x_e, 0 ) )
    1378             :             {
    1379             :                 norm_x = MAX_32;
    1380             :                 move32();
    1381             :                 norm_x_e = 0;
    1382             :                 move16();
    1383             :             }
    1384             :             L_temp_e = norm_x_e;
    1385             :             move16();
    1386             :             L_temp = Sqrt32( norm_x, &L_temp_e );
    1387             :             L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
    1388             :             IF( singularVectors[currChannel][idx] >= 0 )
    1389             :             {
    1390             :                 ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
    1391             :                 move32();
    1392             :             }
    1393             :             ELSE
    1394             :             {
    1395             :                 ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */
    1396             :                 move32();
    1397             :             }
    1398             : 
    1399             :             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
    1400             :             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* exp(sing_exp) */
    1401             :             move32();
    1402             : 
    1403             :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
    1404             : 
    1405             :             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1406             :             {
    1407             :                 temp_e = norm_l( singularVectors[currChannel][jCh] );
    1408             :                 secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
    1409             :                 move32();
    1410             :                 secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) );
    1411             :                 move16();
    1412             :             }
    1413             : 
    1414             :             FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
    1415             :             {
    1416             :                 norm_x = 0;
    1417             :                 move32();
    1418             :                 norm_x_e = 0;
    1419             :                 move16();
    1420             :                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1421             :                 {
    1422             :                     norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
    1423             :                 }
    1424             : 
    1425             :                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
    1426             :                 {
    1427             :                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
    1428             :                     move32();
    1429             :                 }
    1430             :             }
    1431             : 
    1432             :             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
    1433             :             {
    1434             :                 singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */
    1435             :                 move32();
    1436             :                 singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e );
    1437             :                 move16();
    1438             :             }
    1439             :         }
    1440             :     }
    1441             : 
    1442             :     return;
    1443             : }
    1444             : #endif
    1445             : 
    1446             : /*-------------------------------------------------------------------------
    1447             :  * singularVectorsAccumulationLeft()
    1448             :  *
    1449             :  *
    1450             :  *-------------------------------------------------------------------------*/
    1451             : 
    1452      927239 : static void singularVectorsAccumulationLeft_fx(
    1453             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
    1454             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
    1455             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
    1456             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    1457             :     const Word16 nChannelsL, /* Q0 */
    1458             :     const Word16 nChannelsC  /* Q0 */
    1459             : )
    1460             : {
    1461             :     Word16 nCh, iCh, k;
    1462             :     Word16 nChannels;
    1463             :     Word32 norm_y, t_jj, t_ii;
    1464             :     Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp;
    1465             : 
    1466             :     /* Processing */
    1467      927239 :     nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/
    1468             :     // FILE *fp = fopen("t_ii_out.txt","a");
    1469     3865702 :     FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* min(nChannelsL,ChannelsC) */
    1470             :     {
    1471     2938463 :         t_ii = singularValues[nCh]; /* exp(singularValues_e) */
    1472     2938463 :         move32();
    1473     2938463 :         t_ii_e = singularValues_e[nCh];
    1474     2938463 :         move16();
    1475             : 
    1476     7247107 :         FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1477             :         {
    1478     4308644 :             singularVectors_Left[nCh][iCh] = 0;
    1479     4308644 :             move32();
    1480             :         }
    1481             : 
    1482     2938463 :         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
    1483             :         {
    1484     2809680 :             t_ii = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
    1485     2809680 :             t_ii_e = sub( temp_exp, t_ii_e );
    1486             :             Word16 tempe;
    1487     2809680 :             Word32 temp = BASOP_Util_Divide3232_Scale_newton( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
    1488     2809680 :             tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
    1489             :             // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
    1490     7113788 :             FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1491             :             {
    1492     4304108 :                 Word64 acc = 0;
    1493     4304108 :                 move64();
    1494             :                 Word64 prod[16];
    1495             :                 Word16 prod_e[16];
    1496     4304108 :                 Word16 max_e = -31;
    1497     4304108 :                 move16();
    1498    17394839 :                 FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
    1499             :                 {
    1500    13090731 :                     prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
    1501    13090731 :                     prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
    1502    13090731 :                     max_e = s_max( max_e, prod_e[k] );
    1503             :                 }
    1504             : 
    1505    17394839 :                 FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
    1506             :                 {
    1507             : #ifdef FIX_ISSUE_1811_EXCEEDING_W_SHIFTS
    1508    13090731 :                     acc = W_add( acc, W_shr( prod[k], s_min( 63, sub( max_e, prod_e[k] ) ) ) );
    1509             : #else
    1510             :                     acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) );
    1511             : #endif
    1512             :                 }
    1513     4304108 :                 Word16 acc_e = W_norm( acc );
    1514     4304108 :                 acc = W_shl( acc, acc_e );
    1515             : 
    1516     4304108 :                 norm_y = W_extract_h( acc );
    1517     4304108 :                 norm_y_e = add( sub( max_e, acc_e ), 1 );
    1518     4304108 :                 t_jj = Mpy_32_32( temp, norm_y );
    1519     4304108 :                 t_jj_e = add( tempe, norm_y_e );
    1520    21698947 :                 FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
    1521             :                 {
    1522    17394839 :                     singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */
    1523    17394839 :                     move32();
    1524             :                 }
    1525             :             }
    1526             : 
    1527    10772260 :             FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
    1528             :             {
    1529     7962580 :                 singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */
    1530     7962580 :                 move32();
    1531     7962580 :                 singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e );
    1532     7962580 :                 move16();
    1533             :             }
    1534             :         }
    1535             :         ELSE
    1536             :         {
    1537      262102 :             FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
    1538             :             {
    1539      133319 :                 singularVectors_Left[iCh][nCh] = 0;
    1540      133319 :                 move32();
    1541             :             }
    1542             :         }
    1543     2938463 :         Word16 exp = s_max( singularVectors_Left_e[nCh][nCh], 1 );
    1544     2938463 :         singularVectors_Left[nCh][nCh] = L_sub( L_shr( singularVectors_Left[nCh][nCh], sub( exp, singularVectors_Left_e[nCh][nCh] ) ), L_shr( MINUS_ONE_IN_Q31, exp ) ); /* exp(sing_exp2) */
    1545     2938463 :         move32();
    1546     2938463 :         singularVectors_Left_e[nCh][nCh] = exp;
    1547     2938463 :         move16();
    1548             :     }
    1549             :     // fclose(fp);
    1550     4285653 :     FOR( nCh = 0; nCh < nChannelsL; nCh++ )
    1551             :     {
    1552    15762957 :         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
    1553             :         {
    1554    12404543 :             singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
    1555    12404543 :             move32();
    1556             :         }
    1557             :     }
    1558             : 
    1559      927239 :     return;
    1560             : }
    1561             : 
    1562             : /*-------------------------------------------------------------------------
    1563             :  * singularVectorsAccumulationRight()
    1564             :  *
    1565             :  *
    1566             :  *-------------------------------------------------------------------------*/
    1567             : 
    1568      927239 : static void singularVectorsAccumulationRight_fx(
    1569             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
    1570             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
    1571             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
    1572             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
    1573             :     Word16 *secDiag_e,
    1574             :     const Word16 nChannelsC /* Q0 */
    1575             : )
    1576             : {
    1577             :     Word16 nCh, iCh, k;
    1578             :     Word16 nChannels;
    1579             :     Word32 norm_y, t_ii, ratio_float;
    1580      927239 :     Word16 norm_y_e, temp_exp1, sing_right_exp[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
    1581             : 
    1582             :     /* Processing */
    1583      927239 :     nChannels = nChannelsC; /* nChannelsC       Q0*/
    1584             : 
    1585             :     /* avoid compiler warning */
    1586      927239 :     t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */
    1587      927239 :     move32();
    1588             : 
    1589     3865702 :     FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */
    1590             :     {
    1591             : 
    1592     2938463 :         IF( LT_16( nCh, sub( nChannelsC, 1 ) ) ) /* nChannelsC */
    1593             :         {
    1594     2011224 :             IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
    1595             :             {
    1596             : 
    1597     6096762 :                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
    1598             :                 {
    1599     4196774 :                     ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
    1600     4196774 :                     singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
    1601     4196774 :                     temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
    1602     4196774 :                     move32();
    1603     4196774 :                     sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) );
    1604     4196774 :                     move16();
    1605             :                     // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 );
    1606             :                 }
    1607             : 
    1608     6096762 :                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1609             :                 {
    1610     4196774 :                     Word64 norm_val = 0;
    1611     4196774 :                     move64();
    1612     4196774 :                     Word16 maxL_e = MIN_16;
    1613     4196774 :                     Word16 maxR_e = MIN_16;
    1614     4196774 :                     Word16 maxR2_e = MIN_16;
    1615     4196774 :                     move16();
    1616     4196774 :                     move16();
    1617     4196774 :                     move16();
    1618    16742722 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1619             :                     {
    1620    12545948 :                         maxL_e = s_max( maxL_e, singularVectors_Left_e[nCh][k] );
    1621    12545948 :                         maxR_e = s_max( maxR_e, sing_right_exp[k][iCh] );
    1622    12545948 :                         maxR2_e = s_max( maxR2_e, sing_right_exp[k][nCh] );
    1623             :                     }
    1624             : 
    1625    16742722 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1626             :                     {
    1627    12545948 :                         norm_val = W_mac_32_32( norm_val, L_shr( singularVectors_Left[nCh][k], sub( maxL_e, singularVectors_Left_e[nCh][k] ) ), L_shr( singularVectors_Right[k][iCh], sub( maxR_e, sing_right_exp[k][iCh] ) ) );
    1628             :                     }
    1629     4196774 :                     norm_y_e = W_norm( norm_val );
    1630     4196774 :                     norm_y = W_extract_h( W_shl( norm_val, norm_y_e ) );
    1631     4196774 :                     norm_y_e = sub( add( maxL_e, maxR_e ), norm_y_e );
    1632             : 
    1633     4196774 :                     Word16 max_new = s_max( maxR_e, add( maxR2_e, norm_y_e ) );
    1634    16742722 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1635             :                     {
    1636    12545948 :                         Word32 temp = Mpy_32_32( norm_y, singularVectors_Right[k][nCh] );
    1637    12545948 :                         Word32 op2 = L_shr( temp, sub( max_new, add( norm_y_e, sing_right_exp[k][nCh] ) ) );
    1638    12545948 :                         singularVectors_Right[k][iCh] = L_add_sat( L_shr( singularVectors_Right[k][iCh], sub( max_new, sing_right_exp[k][iCh] ) ), op2 ); /* exp(sing_right_exp) */
    1639    12545948 :                         move32();
    1640    12545948 :                         singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], max_new ); /* Q31 */
    1641    12545948 :                         move32();
    1642    12545948 :                         sing_right_exp[k][iCh] = 0;
    1643    12545948 :                         move16();
    1644             :                     }
    1645             :                 }
    1646             :             }
    1647             : 
    1648     6319868 :             FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1649             :             {
    1650     4308644 :                 singularVectors_Right[nCh][iCh] = singularVectors_Right[iCh][nCh] = 0;
    1651     4308644 :                 move32();
    1652     4308644 :                 move32();
    1653             :             }
    1654             :         }
    1655     2938463 :         singularVectors_Right[nCh][nCh] = MAX_32;
    1656     2938463 :         move32();
    1657     2938463 :         t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */
    1658     2938463 :         move32();
    1659             :     }
    1660      927239 :     return;
    1661             : }
    1662             : 
    1663             : /*-------------------------------------------------------------------------
    1664             :  * GivensRotation()
    1665             :  *
    1666             :  *
    1667             :  *-------------------------------------------------------------------------*/
    1668             : 
    1669             : 
    1670     9572800 : static void GivensRotation2_fx(
    1671             :     const Word32 x, /* exp(x_e) */
    1672             :     const Word16 x_e,
    1673             :     const Word32 z, /* exp(z_e) */
    1674             :     const Word16 z_e,
    1675             :     Word32 *result,
    1676             :     Word32 *resultInv,
    1677             :     Word16 *out_e,
    1678             :     Word16 *outInv_e )
    1679             : {
    1680             :     Word32 r;
    1681     9572800 :     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
    1682     9572800 :     r = L_max( r, 1 );
    1683     9572800 :     *outInv_e = *out_e;
    1684     9572800 :     move16();
    1685     9572800 :     *result = Sqrt32( r, out_e );
    1686     9572800 :     move32();
    1687             : 
    1688     9572800 :     *resultInv = ISqrt32( r, outInv_e );
    1689     9572800 :     move32();
    1690     9572800 : }
    1691             : 
    1692     1999036 : static Word32 GivensRotation_fx(
    1693             :     const Word32 x, /* exp(x_e) */
    1694             :     const Word16 x_e,
    1695             :     const Word32 z, /* exp(z_e) */
    1696             :     const Word16 z_e,
    1697             :     Word16 *out_e )
    1698             : {
    1699             :     Word32 r;
    1700             : 
    1701     1999036 :     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
    1702     1999036 :     r = Sqrt32( r, out_e );
    1703     1999036 :     return ( r );
    1704             : }
    1705             : 
    1706             : /*-------------------------------------------------------------------------
    1707             :  * maxWithSign()
    1708             :  *
    1709             :  *
    1710             :  *-------------------------------------------------------------------------*/
    1711             : 
    1712    24719684 : static Word32 maxWithSign_fx(
    1713             :     const Word32 a /* Qx */
    1714             : )
    1715             : {
    1716             :     Word32 result;
    1717    24719684 :     IF( a >= 0 )
    1718             :     {
    1719    10712890 :         result = L_max( a, SVD_MINIMUM_VALUE_FX );
    1720             :     }
    1721             :     ELSE
    1722             :     {
    1723    14006794 :         result = L_min( a, -SVD_MINIMUM_VALUE_FX );
    1724             :     }
    1725    24719684 :     return result;
    1726             : }
    1727             : 
    1728             : /*-------------------------------------------------------------------------
    1729             :  * flushToZeroArray()
    1730             :  *
    1731             :  *
    1732             :  *-------------------------------------------------------------------------*/
    1733             : 
    1734             : 
    1735             : /*-------------------------------------------------------------------------
    1736             :  * flushToZeroMat()
    1737             :  *
    1738             :  *
    1739             :  *-------------------------------------------------------------------------*/

Generated by: LCOV version 1.14