LCOV - code coverage report
Current view: top level - lib_dec - ivas_svd_dec_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main enc/dec/rend @ bf9ba511f19a231c475b8e110fdaeacf18aa32c4 Lines: 598 608 98.4 %
Date: 2025-07-11 02:46:47 Functions: 14 14 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : #include <stdint.h>
      34             : #include "options.h"
      35             : #include "prot_fx.h"
      36             : #include "ivas_stat_dec.h"
      37             : #include "ivas_cnst.h"
      38             : #include <math.h>
      39             : #include "wmc_auto.h"
      40             : #include "ivas_prot_fx.h"
      41             : 
      42             : /*-----------------------------------------------------------------------*
      43             :  * Local constants
      44             :  *-----------------------------------------------------------------------*/
      45             : 
      46             : /* The SVD is sensitive to changes to the following constants, so please be careful when trying to tune things   */
      47             : #define SVD_MAX_NUM_ITERATION       75    /* maximum number of interations before exiting the SVD            */
      48             : #define SVD_MINIMUM_VALUE_FX        ( 2 ) /* minimum value                                                   */
      49             : #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 )
      50             : #define CONVERGENCE_FACTOR_FX       214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
      51             : 
      52             : /*-----------------------------------------------------------------------*
      53             :  * Local function prototypes
      54             :  *-----------------------------------------------------------------------*/
      55             : 
      56             : static void HouseholderReduction_fx(
      57             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
      58             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* exp(singularValues_fx_e) */
      59             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
      60             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_fx_e) */
      61             :     Word16 singularVectors_Left_e,
      62             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
      63             :     Word16 *secDiag_fx_e,
      64             :     const Word16 nChannelsL, /* Q0 */
      65             :     const Word16 nChannelsC, /* Q0 */
      66             :     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
      67             :     Word16 *eps_x_fx_e );
      68             : 
      69             : static void biDiagonalReductionLeft_fx(
      70             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
      71             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
      72             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
      73             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
      74             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
      75             :     Word16 *secDiag_e,
      76             :     const Word16 nChannelsL,  /* Q0 */
      77             :     const Word16 nChannelsC,  /* Q0 */
      78             :     const Word16 currChannel, /* Q0 */
      79             :     Word32 *sig_x,            /* exp(sig_x_e) */
      80             :     Word16 *sig_x_e,
      81             :     Word32 *g /* Q31 */
      82             : );
      83             : 
      84             : static void biDiagonalReductionRight_fx(
      85             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
      86             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
      87             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
      88             :     Word16 *secDiag_e,
      89             :     const Word16 nChannelsL,  /* Q0 */
      90             :     const Word16 nChannelsC,  /* Q0 */
      91             :     const Word16 currChannel, /* Q0 */
      92             :     Word32 *sig_x,            /* exp(sig_x_e) */
      93             :     Word16 *sig_x_e,
      94             :     Word32 *g /* Q31 */
      95             : );            // Q31
      96             : 
      97             : static void singularVectorsAccumulationLeft_fx(
      98             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
      99             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
     100             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
     101             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     102             :     const Word16 nChannelsL, /* Q0 */
     103             :     const Word16 nChannelsC  /* Q0 */
     104             : );
     105             : 
     106             : static void singularVectorsAccumulationRight_fx(
     107             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* singularVectors_e */
     108             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
     109             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
     110             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
     111             :     Word16 *secDiag_e,
     112             :     const Word16 nChannelsC /* Q0 */
     113             : );
     114             : 
     115             : static Word32 maxWithSign_fx(
     116             :     const Word32 a /* Qx */
     117             : );
     118             : 
     119             : static Word16 BidagonalDiagonalisation_fx(
     120             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)                   singularValues_fx_e*/
     121             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)         singularValues_fx_e*/
     122             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)                  singularValues_fx_e*/
     123             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* i/o:                                           secDiag_fx_e*/
     124             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],        /* i/o: singular values vector (S)                                                    */
     125             :     Word16 *secDiag_fx_e,                                   /* i/o:                                                                                                               */
     126             :     const Word16 nChannelsL,                                /* i  : number of rows in the matrix to be decomposed               Q0*/
     127             :     const Word16 nChannelsC,                                /* i  : number of columns in the matrix to be decomposed    Q0*/
     128             :     const Word32 eps_x,                                     /* i  :                                                eps_x_e*/
     129             :     const Word16 eps_x_e                                    /* i  :                                                       */
     130             : );
     131             : 
     132             : static void ApplyRotation_fx(
     133             :     Word32 singularVector[][MAX_OUTPUT_CHANNELS],
     134             :     const Word32 c, /* exp(c_e)*/
     135             :     const Word16 c_e,
     136             :     const Word32 s, /* exp(s_e) */
     137             :     const Word16 s_e,
     138             :     Word32 x11, /* exp(x11_e) */
     139             :     Word16 x11_e,
     140             :     Word32 x12, /* exp(x12_e) */
     141             :     Word16 x12_e,
     142             :     Word32 *d, /* exp(d_e) */
     143             :     Word16 *d_e,
     144             :     Word32 *g, /* exp(g_e) */
     145             :     Word16 *g_e,
     146             :     const Word16 currentIndex1, /* Q0 */
     147             :     const Word16 currentIndex2, /* Q0 */
     148             :     const Word16 nChannels      /* Q0 */
     149             : );
     150             : 
     151             : static void GivensRotation2_fx(
     152             :     const Word32 x, /* exp(x_e) */
     153             :     const Word16 x_e,
     154             :     const Word32 z, /* exp(z_e) */
     155             :     const Word16 z_e,
     156             :     Word32 *result,
     157             :     Word32 *resultInv,
     158             :     Word16 *out_e,
     159             :     Word16 *outInv_e );
     160             : 
     161             : static Word32 GivensRotation_fx(
     162             :     const Word32 x, /* exp(x_e) */
     163             :     const Word16 x_e,
     164             :     const Word32 z, /* exp(z_e) */
     165             :     const Word16 z_e,
     166             :     Word16 *out_e );
     167             : 
     168             : static void ApplyQRTransform_fx(
     169             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)                      singularValues_e*/
     170             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)        singularValues_e*/
     171             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)                     singularValues_e*/
     172             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* i/o:                                                                                  secDiag_e*/
     173             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     174             :     Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
     175             :     const Word16 startIndex,   /* i  :                                                  Q0*/
     176             :     const Word16 currentIndex, /* i  :                                                  Q0*/
     177             :     const Word16 nChannelsL,   /* i  : number of rows in the matrix to be decomposed                    Q0*/
     178             :     const Word16 nChannelsC    /* i  : number of columns in the matrix to be decomposed                 Q0*/
     179             : );
     180             : 
     181             : /*-------------------------------------------------------------------------
     182             :  * mat2svdMat()
     183             :  *
     184             :  * external matrix format to internal
     185             :  *-------------------------------------------------------------------------*/
     186             : 
     187      836340 : void mat2svdMat_fx(
     188             :     const Word32 *mat,                                       /* i  : matrix as column ordered vector    Qx*/
     189             :     Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* o  : matrix as two-dimensional arry             Qx*/
     190             :     const Word16 nRows,                                      /* i  : number of rows of the matrix               Q0*/
     191             :     const Word16 mCols,                                      /* i  : number of columns of the matrix    Q0*/
     192             :     const Word16 transpose                                   /* i  : flag indication transposition              Q0*/
     193             : )
     194             : {
     195             :     Word16 i, j;
     196             : 
     197      836340 :     IF( transpose )
     198             :     {
     199      706444 :         FOR( i = 0; i < mCols; i++ )
     200             :         {
     201     1775372 :             FOR( j = 0; j < nRows; j++ )
     202             :             {
     203     1185648 :                 svdMat[i][j] = mat[j + ( nRows * i )]; /* Qx */
     204     1185648 :                 move32();
     205             :             }
     206             : 
     207      589724 :             set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, nRows ) );
     208             :         }
     209             : 
     210     1394516 :         FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
     211             :         {
     212     1277796 :             set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
     213             :         }
     214             :     }
     215             :     ELSE
     216             :     {
     217     3069792 :         FOR( i = 0; i < nRows; i++ )
     218             :         {
     219    11768224 :             FOR( j = 0; j < mCols; j++ )
     220             :             {
     221     9418052 :                 svdMat[i][j] = mat[i + ( nRows * j )]; /* Qx */
     222     9418052 :                 move32();
     223             :             }
     224             : 
     225     2350172 :             set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, mCols ) );
     226             :         }
     227             : 
     228     9883368 :         FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
     229             :         {
     230     9163748 :             set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
     231             :         }
     232             :     }
     233             : 
     234      836340 :     return;
     235             : }
     236             : 
     237             : 
     238             : /*---------------------------------------------------------------------*
     239             :  * svdMat2mat()
     240             :  *
     241             :  * transfer a matrix from a two dimensional array  to a column wise ordered vector
     242             :  *---------------------------------------------------------------------*/
     243             : 
     244     1038400 : void svdMat2mat_fx(
     245             :     Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* i  : matrix as two-dimensional arry             Qx*/
     246             :     Word32 *mat,                                             /* o  : matrix as column ordered vector    Qx*/
     247             :     const Word16 nRows,                                      /* i  : number of rows of the matrix               Q0*/
     248             :     const Word16 mCols                                       /* i  : number of columns of the matrix    Q0*/
     249             : )
     250             : {
     251             :     Word16 i, j;
     252             : 
     253     4033332 :     FOR( i = 0; i < nRows; i++ )
     254             :     {
     255    11900708 :         FOR( j = 0; j < mCols; j++ )
     256             :         {
     257     8905776 :             mat[i + ( nRows * j )] = svdMat[i][j]; /* Qx */
     258     8905776 :             move32();
     259             :         }
     260             :     }
     261             : 
     262     1038400 :     return;
     263             : }
     264             : 
     265             : /*-------------------------------------------------------------------------
     266             :  * svd()
     267             :  *
     268             :  * perform a singular value decomposition X=USV of a matrix X
     269             :  *-------------------------------------------------------------------------*/
     270             : 
     271             : /*! r: error or success */
     272      836340 : Word16 svd_fx(
     273             :     Word32 InputMatrix[][MAX_OUTPUT_CHANNELS], /* i  : matrix to be decomposed (M)            InputMatrix_e*/
     274             :     Word16 InputMatrix_e,
     275             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* o  : left singular vectors (U)                   singularValues_fx_e*/
     276             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* o  : singular values vector (S)         singularValues_fx_e*/
     277             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* o  : right singular vectors (V)                  singularValues_fx_e*/
     278             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
     279             :     const Word16 nChannelsL, /* i  : number of rows in the matrix to be decomposed              Q0*/
     280             :     const Word16 nChannelsC  /* i  : number of columns in the matrix to be decomposed   Q0*/
     281             : )
     282             : {
     283             :     Word16 iCh, jCh;
     284             :     Word16 lengthSingularValues;
     285             :     Word16 errorMessage, condition;
     286             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS];
     287             :     Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS];
     288      836340 :     move16();
     289      836340 :     Word32 eps_x_fx = 0, temp_fx;
     290      836340 :     move16();
     291      836340 :     Word16 eps_x_fx_e = 0;
     292      836340 :     move16();
     293             :     Word16 temp_fx_e;
     294      836340 :     push_wmops( "svd_fx" );
     295             : 
     296             : 
     297             :     /* Collecting Values */
     298     3776236 :     FOR( iCh = 0; iCh < nChannelsL; iCh++ )
     299             :     {
     300    13543596 :         FOR( jCh = 0; jCh < nChannelsC; jCh++ )
     301             :         {
     302    10603700 :             singularVectors_Left_fx[iCh][jCh] = InputMatrix[iCh][jCh]; /* InputMatrix_e */
     303    10603700 :             move32();
     304             :         }
     305             :     }
     306             : 
     307             :     /* Householder reduction */
     308      836340 :     HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
     309             :     /* Set extremely small values to zero if needed */
     310             :     // flushToZeroArray(singularValues, max_length);
     311             :     // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL);
     312             :     // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC);
     313             : 
     314             :     /* BidagonalDiagonalisation */
     315      836340 :     errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
     316             :     /* Sort the singular values descending order */
     317      836340 :     lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */
     318             : 
     319             :     DO
     320             :     {
     321     1156256 :         condition = 0;
     322     1156256 :         move16();
     323     3952548 :         FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
     324             :         {
     325             : #ifdef OPT_MCH_DEC_V1_NBE
     326     2796292 :             IF( LT_32( L_shl_sat( singularValues_fx[iCh], sub( singularValues_fx_e[iCh], singularValues_fx_e[iCh + 1] ) ), singularValues_fx[iCh + 1] ) )
     327             : #else  /* OPT_MCH_DEC_V1_NBE */
     328             :             IF( BASOP_Util_Cmp_Mant32Exp( singularValues_fx[iCh], singularValues_fx_e[iCh], singularValues_fx[iCh + 1], singularValues_fx_e[iCh + 1] ) < 0 )
     329             : #endif /* OPT_MCH_DEC_V1_NBE */
     330             :             {
     331      403439 :                 condition = 1;
     332      403439 :                 move16();
     333      403439 :                 temp_fx = singularValues_fx[iCh]; /* singularValues_fx_e */
     334      403439 :                 move32();
     335      403439 :                 singularValues_fx[iCh] = singularValues_fx[iCh + 1]; /* singularValues_fx_e */
     336      403439 :                 move32();
     337      403439 :                 singularValues_fx[iCh + 1] = temp_fx; /* singularValues_fx_e */
     338      403439 :                 move32();
     339      403439 :                 temp_fx_e = singularValues_fx_e[iCh];
     340      403439 :                 move16();
     341      403439 :                 singularValues_fx_e[iCh] = singularValues_fx_e[iCh + 1];
     342      403439 :                 move16();
     343      403439 :                 singularValues_fx_e[iCh + 1] = temp_fx_e;
     344      403439 :                 move16();
     345             : 
     346     2224309 :                 FOR( jCh = 0; jCh < nChannelsL; ++jCh )
     347             :                 {
     348     1820870 :                     temp_fx = singularVectors_Left_fx[jCh][iCh]; /* singularValues_fx_e */
     349     1820870 :                     move32();
     350     1820870 :                     singularVectors_Left_fx[jCh][iCh] = singularVectors_Left_fx[jCh][iCh + 1]; /* singularValues_fx_e */
     351     1820870 :                     move32();
     352     1820870 :                     singularVectors_Left_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
     353     1820870 :                     move32();
     354             :                 }
     355             : 
     356     2216677 :                 FOR( jCh = 0; jCh < nChannelsC; ++jCh )
     357             :                 {
     358     1813238 :                     temp_fx = singularVectors_Right_fx[jCh][iCh]; /* singularValues_fx_e */
     359     1813238 :                     move32();
     360     1813238 :                     singularVectors_Right_fx[jCh][iCh] = singularVectors_Right_fx[jCh][iCh + 1]; /* singularValues_fx_e */
     361     1813238 :                     move32();
     362     1813238 :                     singularVectors_Right_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
     363     1813238 :                     move32();
     364             :                 }
     365             :             }
     366             :         }
     367             :     }
     368     1156256 :     WHILE( EQ_16( condition, 1 ) );
     369             : 
     370      836340 :     pop_wmops();
     371      836340 :     return ( errorMessage );
     372             : }
     373             : 
     374             : 
     375             : /*-----------------------------------------------------------------------*
     376             :  * Local functions
     377             :  *-----------------------------------------------------------------------*/
     378             : 
     379             : /*-------------------------------------------------------------------------
     380             :  * BidagonalDiagonalisation()
     381             :  *
     382             :  *
     383             :  *-------------------------------------------------------------------------*/
     384             : 
     385      836340 : static Word16 BidagonalDiagonalisation_fx(
     386             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)              singularValues_fx_e*/
     387             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)         singularValues_fx_e*/
     388             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)             singularValues_fx_e*/
     389             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* i/o:                                           secDiag_fx_e*/
     390             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],        /* i/o: singular values vector (S)                                                    */
     391             :     Word16 *secDiag_new_e,                                  /* i/o:                                                                                                               */
     392             :     const Word16 nChannelsL,                                /* i  : number of rows in the matrix to be decomposed               Q0*/
     393             :     const Word16 nChannelsC,                                /* i  : number of columns in the matrix to be decomposed    Q0*/
     394             :     const Word32 eps_x,                                     /* i  :                                                eps_x_e*/
     395             :     const Word16 eps_x_e                                    /* i  :                                                       */
     396             : )
     397             : {
     398             :     Word16 kCh, nCh, iCh, jCh, split;
     399             :     Word32 c, s, f1, f2;
     400             :     Word16 c_e, s_e, f1_e, f2_e;
     401      836340 :     Word16 x11_e = 0, x12_e = 0;
     402      836340 :     move16();
     403      836340 :     move16();
     404             :     Word16 temp_exp;
     405             :     Word16 temp_exp2;
     406      836340 :     Word32 g = 0;
     407      836340 :     move16();
     408      836340 :     Word16 g_e = 0;
     409      836340 :     move16();
     410             :     Word16 convergence, iteration, found_split;
     411      836340 :     Word16 error = 0;
     412      836340 :     move16();
     413             :     Word32 temp;
     414             :     Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS];
     415      836340 :     Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
     416             : 
     417     3420512 :     FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */
     418             :     {
     419     2584172 :         convergence = 0;
     420     2584172 :         move16();
     421     2584172 :         iteration = 0;
     422     2584172 :         move16();
     423     2584172 :         split = iCh - 1; /* Q0 */
     424     2584172 :         move16();
     425             : 
     426     6893394 :         WHILE( EQ_16( convergence, 0 ) )
     427             :         {
     428     4309222 :             iteration = add( iteration, 1 ); /* Q0 */
     429     4309222 :             found_split = 1;                 /* Q0 */
     430     4309222 :             move16();
     431             : 
     432     8354566 :             FOR( jCh = iCh; jCh >= 0; jCh-- )
     433             :             {
     434             : #ifdef OPT_MCH_DEC_V1_NBE
     435     8354566 :                 Word16 com_e = s_max( secDiag_new_e[jCh], eps_x_e );
     436     8354566 :                 IF( LE_32( L_shr( L_abs( secDiag_fx[jCh] ), sub( com_e, secDiag_new_e[jCh] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is secDiag[ch] vanishing compared to eps_x */
     437             : #else
     438             :                 split = sub( jCh, 1 ); /* Q0 */                                                                                                                                  /* OPT_MCH_DEC_V1_NBE */
     439             :                 IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( secDiag_fx[jCh] ), secDiag_new_e[jCh], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) )                   /* is secDiag[ch] vanishing compared to eps_x */
     440             : #endif /* OPT_MCH_DEC_V1_NBE */
     441             :                 {
     442     4266698 :                     found_split = 0;
     443     4266698 :                     move16();
     444     4266698 :                     BREAK;
     445             :                 }
     446             : #ifdef OPT_MCH_DEC_V1_NBE
     447     4087868 :                 com_e = s_max( singularValues_new_e[jCh - 1], eps_x_e );
     448     4087868 :                 IF( LE_32( L_shr( L_abs( singularValues_fx[jCh - 1] ), sub( com_e, singularValues_new_e[jCh - 1] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is singularValues[jCh - 1] vanishing compared to eps_x */
     449             : #else                                                                                                                                                                                               /* OPT_MCH_DEC_V1_NBE */
     450             :                 IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( singularValues_fx[split] ), singularValues_new_e[split], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
     451             : #endif                                                                                                                                                                                              /* OPT_MCH_DEC_V1_NBE */
     452             :                 {
     453       42524 :                     BREAK;
     454             :                 }
     455             :             }
     456             : 
     457             :             // convergence = ( jCh == iCh ) ? 1 : 0;
     458     4309222 :             IF( EQ_16( jCh, iCh ) )
     459             :             {
     460     2584172 :                 convergence = 1; /* Q0 */
     461     2584172 :                 move16();
     462             :             }
     463             :             ELSE
     464             :             {
     465     1725050 :                 convergence = 0;
     466     1725050 :                 move16();
     467             :             }
     468             : 
     469     4309222 :             IF( found_split )
     470             :             {
     471       42524 :                 s = MAX_32;
     472       42524 :                 move32();
     473       42524 :                 s_e = 0;
     474       42524 :                 move16();
     475       42524 :                 c = 0;
     476       42524 :                 move32();
     477       42524 :                 c_e = 0;
     478       42524 :                 move16();
     479             : #ifdef OPT_MCH_DEC_V1_NBE
     480       42524 :                 split = sub( jCh, 1 ); /* Q0 */
     481             : #endif                                 /* OPT_MCH_DEC_V1_NBE */
     482       85097 :                 FOR( kCh = jCh; kCh <= iCh; kCh++ )
     483             :                 {
     484       42576 :                     g = Mpy_32_32( s, secDiag_fx[kCh] ); /* exp(s_e + secDiag_new_e) */
     485       42576 :                     g_e = add( s_e, secDiag_new_e[kCh] );
     486       42576 :                     secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
     487       42576 :                     secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
     488             : #ifdef OPT_MCH_DEC_V1_NBE
     489       42576 :                     Word16 com_e = s_max( g_e, eps_x_e );
     490       42576 :                     IF( LE_32( L_shr( L_abs( g ), sub( com_e, g_e ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) )
     491             : #else  /* OPT_MCH_DEC_V1_NBE */
     492             :                     IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( g ), g_e, Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) )                                            /* is singularValues[split] vanishing compared to eps_x */
     493             : #endif /* OPT_MCH_DEC_V1_NBE */
     494             :                     {
     495           3 :                         BREAK;
     496             :                     }
     497             : 
     498       42573 :                     c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */
     499       42573 :                     c_e = singularValues_new_e[kCh];
     500       42573 :                     GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
     501       42573 :                     c = Mpy_32_32( c, temp );
     502       42573 :                     c_e = add( c_e, temp_exp );
     503       42573 :                     temp_exp2 = norm_l( c );
     504       42573 :                     c = L_shl( c, temp_exp2 );
     505       42573 :                     c_e = sub( c_e, temp_exp2 );
     506       42573 :                     s = Mpy_32_32( -g, temp );
     507       42573 :                     s_e = add( g_e, temp_exp );
     508       42573 :                     temp_exp2 = norm_l( s );
     509       42573 :                     s = L_shl( s, temp_exp2 );
     510       42573 :                     s_e = sub( s_e, temp_exp2 );
     511       42573 :                     ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */
     512             :                 }
     513             :             }
     514             : 
     515     4309222 :             IF( convergence )
     516             :             {
     517     2584172 :                 singularValues_fx[iCh] = singularValues_fx[iCh];
     518     2584172 :                 move32();
     519     2584172 :                 IF( singularValues_fx[iCh] < 0 )
     520             :                 {
     521      486140 :                     singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
     522      486140 :                     move32();
     523     1773858 :                     FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     524             :                     {
     525     1287718 :                         singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
     526     1287718 :                         move32();
     527             :                     }
     528             :                 }
     529             :             }
     530             :             ELSE
     531             :             {
     532     1725050 :                 IF( GE_16( iteration, SVD_MAX_NUM_ITERATION ) )
     533             :                 {
     534           0 :                     IF( LT_32( singularValues_fx[iCh], 0 ) )
     535             :                     {
     536           0 :                         singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
     537           0 :                         move32();
     538             : 
     539           0 :                         FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     540             :                         {
     541           0 :                             singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
     542           0 :                             move32();
     543             :                         }
     544             :                     }
     545           0 :                     error = 1;
     546           0 :                     move16();
     547           0 :                     convergence = 1;
     548           0 :                     move16();
     549             :                 }
     550             :                 ELSE
     551             :                 {
     552     1725050 :                     ApplyQRTransform_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_new_e, secDiag_new_e, jCh, iCh, nChannelsL, nChannelsC ); /* nChannelsC */
     553             :                 }
     554             :             }
     555             :         }
     556             :     }
     557             : 
     558             :     // rescaling block
     559      836340 :     Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS );
     560             : 
     561             : 
     562      836340 :     return ( error );
     563             : }
     564             : 
     565             : /*-------------------------------------------------------------------------
     566             :  * ApplyQRTransform()
     567             :  *
     568             :  *
     569             :  *-------------------------------------------------------------------------*/
     570             : 
     571     1725050 : static void ApplyQRTransform_fx(
     572             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)                                              singularValues_e*/
     573             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)                                             singularValues_e*/
     574             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)                                             singularValues_e*/
     575             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* i/o:                                                                                                          secDiag_e*/
     576             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     577             :     Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
     578             :     const Word16 startIndex,   /* i  :                                                  Q0*/
     579             :     const Word16 currentIndex, /* i  :                                                  Q0*/
     580             :     const Word16 nChannelsL,   /* i  : number of rows in the matrix to be decomposed                    Q0*/
     581             :     const Word16 nChannelsC    /* i  : number of columns in the matrix to be decomposed                 Q0*/
     582             : )
     583             : {
     584             :     Word32 temp;
     585             :     Word16 temp_e;
     586             :     Word16 temp_norm_e;
     587             :     Word16 ch, split;
     588     1725050 :     Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
     589     1725050 :     move32();
     590     1725050 :     move32();
     591     1725050 :     move32();
     592     1725050 :     move32();
     593     1725050 :     move32();
     594     1725050 :     move32();
     595     1725050 :     move32();
     596     1725050 :     move32();
     597     1725050 :     Word16 d_e = 0, g_e = 0, r_e = 0, x_ii_e = 0, x_split_e = 0, x_kk_e = 0, mu_e = 0, aux_e = 0;
     598     1725050 :     move16();
     599     1725050 :     move16();
     600     1725050 :     move16();
     601     1725050 :     move16();
     602     1725050 :     move16();
     603     1725050 :     move16();
     604     1725050 :     move16();
     605     1725050 :     move16();
     606             :     Word32 L_temp1, L_temp2, L_temp3, L_temp4;
     607             :     Word16 L_temp1_e, L_temp2_e, L_temp3_e, L_temp4_e, temp_exp;
     608     1725050 :     Word32 c = MAX_32;
     609     1725050 :     move32();
     610     1725050 :     Word16 c_e = 0;
     611     1725050 :     move16();
     612     1725050 :     Word32 s = MAX_32;
     613     1725050 :     move32();
     614     1725050 :     Word16 s_e = 0;
     615     1725050 :     move16();
     616             : 
     617     1725050 :     x_kk = singularValues[currentIndex]; /* exp(singularValues_e) */
     618     1725050 :     move32();
     619     1725050 :     x_kk_e = singularValues_e[currentIndex];
     620     1725050 :     move16();
     621     1725050 :     x_ii = singularValues[startIndex]; /* exp(singularValues_e) */
     622     1725050 :     move32();
     623     1725050 :     x_ii_e = singularValues_e[startIndex];
     624     1725050 :     move16();
     625     1725050 :     split = sub( currentIndex, 1 ); /* Q0 */
     626     1725050 :     move16();
     627             : 
     628     1725050 :     x_split = singularValues[split]; /* exp(singularValues_e) */
     629     1725050 :     move32();
     630     1725050 :     x_split_e = singularValues_e[split];
     631     1725050 :     move16();
     632     1725050 :     g = secDiag[split]; /* exp(secDiag_e) */
     633     1725050 :     move32();
     634     1725050 :     g_e = secDiag_e[split];
     635     1725050 :     move16();
     636     1725050 :     r = secDiag[currentIndex]; /* exp(secDiag_e) */
     637     1725050 :     move32();
     638     1725050 :     r_e = secDiag_e[currentIndex];
     639     1725050 :     move16();
     640             : 
     641             :     // d = (x_split + x_kk) * (x_split - x_kk) + (g + r) * (g - r);
     642     1725050 :     L_temp1 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, x_kk, x_kk_e, &L_temp1_e );                                                                           /* exp(L_temp1_e) */
     643     1725050 :     L_temp2 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, L_negate( x_kk ), x_kk_e, &L_temp2_e );                                                               /* exp(L_temp2_e) */
     644     1725050 :     L_temp3 = BASOP_Util_Add_Mant32Exp( g, g_e, r, r_e, &L_temp3_e );                                                                                             /* exp(L_temp3_e) */
     645     1725050 :     L_temp4 = BASOP_Util_Add_Mant32Exp( g, g_e, L_negate( r ), r_e, &L_temp4_e );                                                                                 /* exp(L_temp4_e) */
     646     1725050 :     d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( L_temp3, L_temp4 ), add( L_temp3_e, L_temp4_e ), &d_e ); /* exp(d_e) */
     647             : 
     648             :     // d /= maxWithSign((r + r) * x_split);
     649     1725050 :     L_temp1 = BASOP_Util_Add_Mant32Exp( r, r_e, r, r_e, &L_temp1_e ); /* exp(L_temp1_e) */
     650     1725050 :     L_temp1 = maxWithSign_fx( Mpy_32_32( L_temp1, x_split ) );        /* exp(L_temp1_e + x_split_e) */
     651     1725050 :     L_temp1_e = add( L_temp1_e, x_split_e );
     652     1725050 :     d = BASOP_Util_Divide3232_Scale_newton( d, L_temp1, &temp_exp ); /* temp_exp + d_e - L_temp1_e */
     653     1725050 :     d_e = add( temp_exp, sub( d_e, L_temp1_e ) );
     654             : 
     655     1725050 :     g = GivensRotation_fx( MAX_32, 0, d, d_e, &g_e );
     656             : 
     657             :     // mu = x_split / maxWithSign(d + (d >= 0.0f ? 1 : (-1)) * fabsf(g)) - r;
     658             :     // L_temp1 = d >= 0 ? L_abs( g ) : -L_abs( g );
     659     1725050 :     IF( d >= 0 )
     660             :     {
     661     1019752 :         L_temp1 = L_abs( g );
     662             :     }
     663             :     ELSE
     664             :     {
     665      705298 :         L_temp1 = L_negate( L_abs( g ) );
     666             :     }
     667     1725050 :     L_temp1_e = g_e;
     668     1725050 :     move16();
     669     1725050 :     L_temp2 = maxWithSign_fx( BASOP_Util_Add_Mant32Exp( d, d_e, L_temp1, L_temp1_e, &L_temp2_e ) ); /* exp(L_temp2_e) */
     670     1725050 :     mu = BASOP_Util_Divide3232_Scale_newton( x_split, L_temp2, &mu_e );                             /* exp(mu_e + (x-plit_e - L_temp2_e)) */
     671     1725050 :     mu_e = add( mu_e, sub( x_split_e, L_temp2_e ) );
     672     1725050 :     mu = BASOP_Util_Add_Mant32Exp( mu, mu_e, L_negate( r ), r_e, &mu_e ); /* exp(mu_e) */
     673             : 
     674             :     // d = ((x_ii + x_kk) * (x_ii - x_kk) + r * mu) / maxWithSign(x_ii);
     675     1725050 :     L_temp1 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, x_kk, x_kk_e, &L_temp1_e );                                                           /* exp(L_temp1_e) */
     676     1725050 :     L_temp2 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, L_negate( x_kk ), x_kk_e, &L_temp2_e );                                               /* exp(L_temp2_e) */
     677     1725050 :     d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( r, mu ), add( r_e, mu_e ), &d_e ); /* exp(d_e) */
     678     1725050 :     d = BASOP_Util_Divide3232_Scale_newton( d, maxWithSign_fx( x_ii ), &temp_exp );                                                         /* exp(temp_exp + (d_e - x_ii_e) */
     679     1725050 :     d_e = add( temp_exp, sub( d_e, x_ii_e ) );
     680             : 
     681             :     /*QR transformation*/
     682     5770394 :     FOR( ch = startIndex; ch <= split; ch++ )
     683             :     {
     684     4045344 :         r = Mpy_32_32( s, secDiag[ch + 1] ); /* exp(s_e + secDiag_e) */
     685     4045344 :         r_e = add( s_e, secDiag_e[ch + 1] );
     686     4045344 :         g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */
     687     4045344 :         g_e = add( c_e, secDiag_e[ch + 1] );
     688             : 
     689     4045344 :         GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
     690     4045344 :         c = Mpy_32_32( d, temp );
     691     4045344 :         c_e = add( temp_e, d_e );
     692     4045344 :         temp_norm_e = norm_l( c );
     693     4045344 :         c = L_shl( c, temp_norm_e );
     694     4045344 :         c_e = sub( c_e, temp_norm_e );
     695     4045344 :         s = Mpy_32_32( r, temp );
     696     4045344 :         s_e = add( r_e, temp_e );
     697     4045344 :         temp_norm_e = norm_l( s );
     698     4045344 :         s = L_shl( s, temp_norm_e );
     699     4045344 :         s_e = sub( s_e, temp_norm_e );
     700     4045344 :         r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */
     701     4045344 :         r_e = add( s_e, singularValues_e[ch + 1] );
     702     4045344 :         x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */
     703     4045344 :         x_split_e = add( c_e, singularValues_e[ch + 1] );
     704             : 
     705     4045344 :         aux = g; /* exp(g_e) */
     706     4045344 :         move32();
     707     4045344 :         aux_e = g_e;
     708     4045344 :         move16();
     709             : 
     710             :         // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC);
     711     4045344 :         ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC );
     712             : 
     713     4045344 :         GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */
     714     4045344 :         IF( singularValues[ch] != 0 )
     715             :         {
     716             : 
     717     4045344 :             c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
     718     4045344 :             c_e = add( d_e, aux_e );
     719     4045344 :             temp_norm_e = norm_l( c );
     720     4045344 :             c = L_shl( c, temp_norm_e );
     721     4045344 :             c_e = sub( c_e, temp_norm_e );
     722             : 
     723     4045344 :             s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */
     724     4045344 :             s_e = add( r_e, aux_e );
     725     4045344 :             temp_norm_e = norm_l( s );
     726     4045344 :             s = L_shl( s, temp_norm_e );
     727     4045344 :             s_e = sub( s_e, temp_norm_e );
     728             :         }
     729             : 
     730             :         // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL);
     731     4045344 :         ApplyRotation_fx( singularVectors_Left, c, c_e, s, s_e, g, g_e, x_split, x_split_e, &d, &d_e, &x_ii, &x_ii_e, ch + 1, ch, nChannelsL );
     732             :     }
     733             : 
     734     1725050 :     secDiag[startIndex] = 0;
     735     1725050 :     move32();
     736     1725050 :     secDiag[currentIndex] = d; /* exp(d_e) */
     737     1725050 :     move32();
     738     1725050 :     secDiag_e[currentIndex] = d_e;
     739     1725050 :     move16();
     740     1725050 :     singularValues[currentIndex] = x_ii; /* exp(x_ii_e) */
     741     1725050 :     move32();
     742     1725050 :     singularValues_e[currentIndex] = x_ii_e;
     743     1725050 :     move16();
     744             : 
     745     1725050 :     return;
     746             : }
     747             : 
     748             : /*-------------------------------------------------------------------------
     749             :  * ApplyRotation()
     750             :  *
     751             :  *
     752             :  *-------------------------------------------------------------------------*/
     753             : 
     754     8133261 : static void ApplyRotation_fx(
     755             :     Word32 singularVector[][MAX_OUTPUT_CHANNELS],
     756             :     const Word32 c, /* exp(c_e)*/
     757             :     const Word16 c_e,
     758             :     const Word32 s, /* exp(s_e) */
     759             :     const Word16 s_e,
     760             :     Word32 x11, /* exp(x11_e) */
     761             :     Word16 x11_e,
     762             :     Word32 x12, /* exp(x12_e) */
     763             :     Word16 x12_e,
     764             :     Word32 *d, /* exp(d_e) */
     765             :     Word16 *d_e,
     766             :     Word32 *g, /* exp(g_e) */
     767             :     Word16 *g_e,
     768             :     const Word16 currentIndex1, /* Q0 */
     769             :     const Word16 currentIndex2, /* Q0 */
     770             :     const Word16 nChannels      /* Q0 */
     771             : )
     772             : {
     773             :     Word16 ch;
     774             : 
     775     8133261 :     *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */
     776     8133261 :     move32();
     777     8133261 :     *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */
     778     8133261 :     move32();
     779             : 
     780     8133261 :     Word16 c_q = sub( 31, c_e );
     781     8133261 :     Word16 s_q = sub( 31, s_e );
     782             :     Word32 op1, op2;
     783             :     Word16 op_e;
     784             : 
     785             :     // Bring c and s to same Q
     786     8133261 :     IF( GT_16( c_q, s_q ) )
     787             :     {
     788      711006 :         op1 = L_shr( c, sub( c_q, s_q ) );
     789      711006 :         op2 = s;
     790      711006 :         move32();
     791      711006 :         op_e = s_q;
     792      711006 :         move16();
     793             :     }
     794             :     ELSE
     795             :     {
     796     7422255 :         op1 = c;
     797     7422255 :         move32();
     798     7422255 :         op2 = L_shr( s, sub( s_q, c_q ) );
     799     7422255 :         op_e = c_q;
     800     7422255 :         move16();
     801             :     }
     802     8133261 :     op_e = add( op_e, 1 ); // 64 bit mac -> +1
     803             : #ifdef OPT_MCH_DEC_V1_BE
     804     8133261 :     op_e = negate( op_e );
     805             : #endif /* OPT_MCH_DEC_V1_BE */
     806             : 
     807    47836581 :     FOR( ch = 0; ch < nChannels; ch++ )
     808             :     {
     809    39703320 :         x11 = singularVector[ch][currentIndex2];
     810    39703320 :         move32();
     811    39703320 :         x12 = singularVector[ch][currentIndex1];
     812    39703320 :         move32();
     813             : 
     814    39703320 :         Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
     815             : #ifdef OPT_MCH_DEC_V1_BE
     816    39703320 :         singularVector[ch][currentIndex2] = W_shl_sat_l( temp, op_e ); // Q(singularVector)
     817             : #else                                                                  /* OPT_MCH_DEC_V1_BE */
     818             :         temp = W_shr( temp, op_e );                                                                                                                                              // Q(singularVector)
     819             :         singularVector[ch][currentIndex2] = W_sat_l( temp );                                                                                                                     // Q(singularVector)
     820             : #endif                                                                 /* OPT_MCH_DEC_V1_BE */
     821    39703320 :         move32();
     822             : 
     823    39703320 :         temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
     824             : #ifdef OPT_MCH_DEC_V1_BE
     825    39703320 :         singularVector[ch][currentIndex1] = W_shl_sat_l( temp, op_e ); // Q(singularVector)
     826             : #else                                                                  /* OPT_MCH_DEC_V1_BE */
     827             :         temp = W_shr( temp, op_e );                                                                                                                                              // Q(singularVector)
     828             :         singularVector[ch][currentIndex1] = W_sat_l( temp );                                                                                                                     // Q(singularVector)
     829             : #endif                                                                 /* OPT_MCH_DEC_V1_BE */
     830    39703320 :         move32();
     831             :     }
     832             : 
     833     8133261 :     return;
     834             : }
     835             : 
     836             : /*-------------------------------------------------------------------------
     837             :  * HouseholderReduction()
     838             :  *
     839             :  *
     840             :  *-------------------------------------------------------------------------*/
     841             : 
     842      836340 : static void HouseholderReduction_fx(
     843             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
     844             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* exp(singularValues_fx_e) */
     845             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
     846             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_fx_e) */
     847             :     Word16 singularVectors_Left_e,
     848             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
     849             :     Word16 *secDiag_fx_e,
     850             :     const Word16 nChannelsL, /* Q0 */
     851             :     const Word16 nChannelsC, /* Q0 */
     852             :     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
     853             :     Word16 *eps_x_fx_e )
     854             : {
     855             :     Word16 nCh;
     856             :     // float g = 0.0f, sig_x = 0.0f;// to be removed
     857      836340 :     Word32 g_fx = 0, sig_x_fx = 0;
     858      836340 :     move32();
     859      836340 :     move32();
     860      836340 :     Word16 sig_x_fx_e = 0;
     861      836340 :     move16();
     862             : 
     863             :     Word16 iCh, jCh;
     864             :     Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     865     3776236 :     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
     866             :     {
     867    13543596 :         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
     868             :         {
     869    10603700 :             singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
     870    10603700 :             move16();
     871             :         }
     872             :     }
     873             : 
     874             :     /* Bidiagonal Reduction for every channel */
     875     3420512 :     FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     876             :     {
     877     2584172 :         biDiagonalReductionLeft_fx( singularVectors_Left_fx, singularValues_fx, secDiag_fx, singularVectors_Left_fx_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
     878     2584172 :         biDiagonalReductionRight_fx( singularVectors_Left_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx );
     879             : 
     880             :         Word16 L_temp_e;
     881     2584172 :         Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
     882     2584172 :         IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
     883             :         {
     884     1206737 :             *eps_x_fx = L_temp; /* exp(L_temp_e) */
     885     1206737 :             move32();
     886     1206737 :             *eps_x_fx_e = L_temp_e;
     887     1206737 :             move32();
     888             :         }
     889             :     }
     890             : 
     891             :     /* SingularVecotr Accumulation */
     892      836340 :     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
     893             : 
     894             : 
     895      836340 :     singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
     896             : 
     897      836340 :     return;
     898             : }
     899             : 
     900             : /*-------------------------------------------------------------------------
     901             :  * biDiagonalReductionLeft()
     902             :  *
     903             :  *
     904             :  *-------------------------------------------------------------------------*/
     905             : 
     906     2584172 : static void biDiagonalReductionLeft_fx(
     907             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
     908             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
     909             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_e) */
     910             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
     911             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     912             :     Word16 *secDiag_e,
     913             :     const Word16 nChannelsL,  /* Q0 */
     914             :     const Word16 nChannelsC,  /* Q0 */
     915             :     const Word16 currChannel, /* Q0 */
     916             :     Word32 *sig_x,            /* exp(sig_x_e) */
     917             :     Word16 *sig_x_e,
     918             :     Word32 *g /* Q31 */
     919             : )
     920             : {
     921             :     Word16 iCh, jCh, idx;
     922             :     Word32 norm_x, f, r;
     923             :     Word16 norm_x_e, f_e, r_e;
     924             :     Word32 L_temp;
     925             :     Word16 L_temp_e;
     926             : 
     927     2584172 :     secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); /* exp(sig_x_e) */
     928     2584172 :     move32();
     929     2584172 :     secDiag_e[currChannel] = *sig_x_e;
     930     2584172 :     move16();
     931             : 
     932             :     /* Setting values to 0 */
     933     2584172 :     ( *sig_x ) = 0;
     934     2584172 :     move32();
     935     2584172 :     ( *g ) = 0;
     936     2584172 :     move32();
     937             : 
     938     2584172 :     IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
     939             :     {
     940     2584172 :         idx = currChannel;
     941     2584172 :         move16();
     942             : 
     943     9536092 :         FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     944             :         {
     945     6951920 :             ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), singularVectors2_e[jCh][currChannel], sig_x_e ); /* exp(sig_x_e) */
     946             :         }
     947             : 
     948     2584172 :         IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
     949             :         {
     950             :             Word16 invVal_e;
     951             :             Word32 invVal;
     952     2459612 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
     953             : #ifdef OPT_MCH_DEC_V1_NBE
     954     2459612 :             Word64 temp = 0;
     955     2459612 :             move64();
     956     2459612 :             Word16 max_e = MIN_16;
     957             : #else  /* OPT_MCH_DEC_V1_NBE */
     958             :             norm_x = 0;
     959             :             move32();
     960             :             norm_x_e = 0;
     961             : #endif /* OPT_MCH_DEC_V1_NBE */
     962     2459612 :             move16();
     963     9285373 :             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     964             :             {
     965     6825761 :                 Word16 temp_e = norm_l( singularVectors[jCh][currChannel] );
     966     6825761 :                 singularVectors[jCh][currChannel] = Mpy_32_32( L_shl( singularVectors[jCh][currChannel], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
     967     6825761 :                 move32();
     968     6825761 :                 singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
     969     6825761 :                 move16();
     970             : #ifdef OPT_MCH_DEC_V1_NBE
     971     6825761 :                 max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] );
     972             : #else  /* OPT_MCH_DEC_V1_NBE */
     973             :                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
     974             : #endif /* OPT_MCH_DEC_V1_NBE */
     975             :             }
     976             : 
     977             : #ifdef OPT_MCH_DEC_V1_NBE
     978     9285373 :             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     979             :             {
     980     6825761 :                 temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors2_e[jCh][currChannel] ), 1 ) ) );
     981             :             }
     982             : 
     983     2459612 :             Word16 nrm = W_norm( temp );
     984     2459612 :             nrm = sub( nrm, 32 );
     985     2459612 :             norm_x = W_shl_sat_l( temp, nrm );
     986     2459612 :             norm_x_e = sub( add( max_e, max_e ), nrm );
     987             : #endif /* OPT_MCH_DEC_V1_NBE */
     988             : 
     989     2459612 :             IF( GT_16( norm_x_e, 0 ) )
     990             :             {
     991      417073 :                 norm_x = MAX_32;
     992      417073 :                 move32();
     993      417073 :                 norm_x_e = 0;
     994      417073 :                 move16();
     995             :             }
     996     2459612 :             L_temp_e = norm_x_e;
     997     2459612 :             move16();
     998     2459612 :             L_temp = Sqrt32( norm_x, &L_temp_e );
     999     2459612 :             L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
    1000             :                                                   //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
    1001     2459612 :             if ( singularVectors[currChannel][idx] >= 0 )
    1002             :             {
    1003     1392322 :                 L_temp = L_negate( L_temp );
    1004             :             }
    1005     2459612 :             ( *g ) = L_temp;
    1006     2459612 :             move32();
    1007             : 
    1008     2459612 :             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
    1009     2459612 :             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* sing_exp */
    1010     2459612 :             move32();
    1011             : 
    1012     2459612 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
    1013             : 
    1014     6109793 :             FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1015             :             {
    1016             : #ifdef OPT_MCH_DEC_V1_NBE
    1017     3650181 :                 Word16 max2_e = MIN_16;
    1018     3650181 :                 max_e = MIN_16;
    1019     3650181 :                 move16();
    1020     3650181 :                 move16();
    1021     3650181 :                 temp = 0;
    1022     3650181 :                 move64();
    1023             : 
    1024    18098633 :                 FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1025             :                 {
    1026    14448452 :                     max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] ); /* exp(norm_x_e) */
    1027    14448452 :                     max2_e = s_max( max2_e, singularVectors2_e[jCh][iCh] );       /* exp(norm_x_e) */
    1028             :                 }
    1029     3650181 :                 max_e = add( max_e, max2_e );
    1030             : 
    1031    18098633 :                 FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1032             :                 {
    1033    14448452 :                     temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ) ) ) );
    1034             :                 }
    1035     3650181 :                 nrm = W_norm( temp );
    1036     3650181 :                 nrm = sub( nrm, 32 );
    1037     3650181 :                 norm_x = W_shl_sat_l( temp, nrm );
    1038     3650181 :                 norm_x_e = sub( max_e, nrm );
    1039             : #else  /* OPT_MCH_DEC_V1_NBE */
    1040             :                 norm_x = 0;
    1041             :                 move32();
    1042             :                 norm_x_e = 0;
    1043             :                 move16();
    1044             :                 FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1045             :                 {
    1046             :                     norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
    1047             :                 }
    1048             : #endif /* OPT_MCH_DEC_V1_NBE */
    1049             : 
    1050     3650181 :                 f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
    1051     3650181 :                 f_e = add( invVal_e, sub( norm_x_e, r_e ) );
    1052             : 
    1053    18098633 :                 FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1054             :                 {
    1055    14448452 :                     singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors2_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors2_e[jCh][currChannel] ), &singularVectors2_e[jCh][iCh] );
    1056    14448452 :                     move32();
    1057             :                 }
    1058             :             }
    1059             : 
    1060             : 
    1061     9285373 :             FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
    1062             :             {
    1063     6825761 :                 singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); /* sing_exp + sig_x_e */
    1064     6825761 :                 move32();
    1065     6825761 :                 singularVectors2_e[jCh][currChannel] = add( singularVectors2_e[jCh][currChannel], *sig_x_e );
    1066     6825761 :                 move16();
    1067             :             }
    1068             :         }
    1069             : 
    1070             :         // rescaling block
    1071     2584172 :         singularValues[currChannel] = Mpy_32_32( ( *sig_x ), ( *g ) ); /* sig_x_e */
    1072     2584172 :         move32();
    1073     2584172 :         singularValues_e[currChannel] = *sig_x_e;
    1074     2584172 :         move16();
    1075             :     }
    1076             : 
    1077     2584172 :     return;
    1078             : }
    1079             : 
    1080             : /*-------------------------------------------------------------------------
    1081             :  * biDiagonalReductionRight()
    1082             :  *
    1083             :  *
    1084             :  *-------------------------------------------------------------------------*/
    1085             : 
    1086     2584172 : static void biDiagonalReductionRight_fx(
    1087             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    1088             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],           /* exp(secDiag_exp[]) */
    1089             :     Word16 singularVectors2_e[][MAX_OUTPUT_CHANNELS],
    1090             :     Word16 *secDiag_exp,
    1091             :     const Word16 nChannelsL,  /* Q0 */
    1092             :     const Word16 nChannelsC,  /* Q0 */
    1093             :     const Word16 currChannel, /* Q0 */
    1094             :     Word32 *sig_x,            /* exp(sig_x_e) */
    1095             :     Word16 *sig_x_e,
    1096             :     Word32 *g /* Q31 */
    1097             : )
    1098             : {
    1099             :     Word16 iCh, jCh, idx;
    1100             :     Word32 norm_x, r;
    1101             :     Word16 norm_x_e, r_e;
    1102             :     Word32 L_temp;
    1103             :     Word16 L_temp_e;
    1104             : 
    1105             :     /* Setting values to 0 */
    1106     2584172 :     ( *sig_x ) = 0;
    1107     2584172 :     move32();
    1108     2584172 :     ( *g ) = 0;
    1109     2584172 :     move32();
    1110             : 
    1111     2584172 :     IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
    1112             :     {
    1113     1747832 :         idx = add( currChannel, 1 ); /* Q0 */
    1114             : 
    1115     5399612 :         FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1116             :         {
    1117     3651780 :             ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh] ), singularVectors2_e[currChannel][jCh], sig_x_e ); /* exp(sig_x_e) */
    1118             :         }
    1119             : 
    1120     1747832 :         IF( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
    1121             :         {
    1122     1638035 :             norm_x = 0;
    1123     1638035 :             move32();
    1124     1638035 :             norm_x_e = 0;
    1125     1638035 :             move16();
    1126             : 
    1127             :             Word16 invVal_e, temp_e;
    1128             :             Word32 invVal;
    1129     1638035 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
    1130     5179741 :             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
    1131             :             {
    1132     3541706 :                 temp_e = norm_l( singularVectors[currChannel][jCh] );
    1133     3541706 :                 singularVectors[currChannel][jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
    1134     3541706 :                 move32();
    1135     3541706 :                 singularVectors2_e[currChannel][jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], *sig_x_e ) );
    1136     3541706 :                 move16();
    1137     3541706 :                 norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors2_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
    1138             :             }
    1139     1638035 :             IF( GT_16( norm_x_e, 0 ) )
    1140             :             {
    1141      522785 :                 norm_x = MAX_32;
    1142      522785 :                 move32();
    1143      522785 :                 norm_x_e = 0;
    1144      522785 :                 move16();
    1145             :             }
    1146     1638035 :             L_temp_e = norm_x_e;
    1147     1638035 :             move16();
    1148     1638035 :             L_temp = Sqrt32( norm_x, &L_temp_e );
    1149     1638035 :             L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
    1150     1638035 :             IF( singularVectors[currChannel][idx] >= 0 )
    1151             :             {
    1152      617906 :                 ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
    1153      617906 :                 move32();
    1154             :             }
    1155             :             ELSE
    1156             :             {
    1157     1020129 :                 ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */
    1158     1020129 :                 move32();
    1159             :             }
    1160             : 
    1161     1638035 :             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors2_e[currChannel][idx], -norm_x, norm_x_e, &r_e );                                      /* exp(r_e) */
    1162     1638035 :             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors2_e[currChannel][idx], -( *g ), 0, &singularVectors2_e[currChannel][idx] ); /* exp(sing_exp) */
    1163     1638035 :             move32();
    1164             : 
    1165     1638035 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
    1166             : 
    1167     5179741 :             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1168             :             {
    1169     3541706 :                 temp_e = norm_l( singularVectors[currChannel][jCh] );
    1170     3541706 :                 secDiag[jCh] = Mpy_32_32( L_shl( singularVectors[currChannel][jCh], temp_e ), invVal ); /* exp(sing_exp + (singularVectors_e - sig_x_e) */
    1171     3541706 :                 move32();
    1172     3541706 :                 secDiag_exp[jCh] = add( sub( invVal_e, temp_e ), sub( singularVectors2_e[currChannel][jCh], r_e ) );
    1173     3541706 :                 move16();
    1174             :             }
    1175             : 
    1176     5539985 :             FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
    1177             :             {
    1178     3901950 :                 norm_x = 0;
    1179     3901950 :                 move32();
    1180     3901950 :                 norm_x_e = 0;
    1181     3901950 :                 move16();
    1182    14591642 :                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1183             :                 {
    1184    10689692 :                     norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors2_e[iCh][jCh], singularVectors2_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
    1185             :                 }
    1186             : 
    1187    14591642 :                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
    1188             :                 {
    1189    10689692 :                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors2_e[iCh][jCh], Mpy_32_32( norm_x, secDiag[jCh] ), add( norm_x_e, secDiag_exp[jCh] ), &singularVectors2_e[iCh][jCh] ); /* exp(sing_exp2) */
    1190    10689692 :                     move32();
    1191             :                 }
    1192             :             }
    1193             : 
    1194     5179741 :             FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
    1195             :             {
    1196     3541706 :                 singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); /* exp(sing_exp + sig_x_e) */
    1197     3541706 :                 move32();
    1198     3541706 :                 singularVectors2_e[currChannel][jCh] = add( singularVectors2_e[currChannel][jCh], *sig_x_e );
    1199     3541706 :                 move16();
    1200             :             }
    1201             :         }
    1202             :     }
    1203             : 
    1204     2584172 :     return;
    1205             : }
    1206             : 
    1207             : /*-------------------------------------------------------------------------
    1208             :  * singularVectorsAccumulationLeft()
    1209             :  *
    1210             :  *
    1211             :  *-------------------------------------------------------------------------*/
    1212             : 
    1213      836340 : static void singularVectorsAccumulationLeft_fx(
    1214             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
    1215             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
    1216             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
    1217             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    1218             :     const Word16 nChannelsL, /* Q0 */
    1219             :     const Word16 nChannelsC  /* Q0 */
    1220             : )
    1221             : {
    1222             :     Word16 nCh, iCh, k;
    1223             :     Word16 nChannels;
    1224             :     Word32 norm_y, t_jj, t_ii;
    1225             :     Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp;
    1226             : 
    1227             :     /* Processing */
    1228      836340 :     nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/
    1229             :     // FILE *fp = fopen("t_ii_out.txt","a");
    1230     3420512 :     FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* min(nChannelsL,ChannelsC) */
    1231             :     {
    1232     2584172 :         t_ii = singularValues[nCh]; /* exp(singularValues_e) */
    1233     2584172 :         move32();
    1234     2584172 :         t_ii_e = singularValues_e[nCh];
    1235     2584172 :         move16();
    1236             : 
    1237     6235952 :         FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1238             :         {
    1239     3651780 :             singularVectors_Left[nCh][iCh] = 0;
    1240     3651780 :             move32();
    1241             :         }
    1242             : 
    1243     2584172 :         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
    1244             :         {
    1245     2459612 :             t_ii = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
    1246     2459612 :             t_ii_e = sub( temp_exp, t_ii_e );
    1247             :             Word16 tempe;
    1248     2459612 :             Word32 temp = BASOP_Util_Divide3232_Scale_newton( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
    1249     2459612 :             tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
    1250             :             // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
    1251     6109793 :             FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1252             :             {
    1253     3650181 :                 Word64 acc = 0;
    1254     3650181 :                 move64();
    1255             :                 Word64 prod[16];
    1256             :                 Word16 prod_e[16];
    1257     3650181 :                 Word16 max_e = -31;
    1258     3650181 :                 move16();
    1259    14448452 :                 FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
    1260             :                 {
    1261    10798271 :                     prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
    1262    10798271 :                     prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
    1263    10798271 :                     max_e = s_max( max_e, prod_e[k] );
    1264             :                 }
    1265             : 
    1266    14448452 :                 FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
    1267             :                 {
    1268    10798271 :                     acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) );
    1269             :                 }
    1270     3650181 :                 Word16 acc_e = W_norm( acc );
    1271     3650181 :                 acc = W_shl( acc, acc_e );
    1272             : 
    1273     3650181 :                 norm_y = W_extract_h( acc );
    1274     3650181 :                 norm_y_e = add( sub( max_e, acc_e ), 1 );
    1275     3650181 :                 t_jj = Mpy_32_32( temp, norm_y );
    1276     3650181 :                 t_jj_e = add( tempe, norm_y_e );
    1277    18098633 :                 FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
    1278             :                 {
    1279    14448452 :                     singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */
    1280    14448452 :                     move32();
    1281             :                 }
    1282             :             }
    1283             : 
    1284     9285373 :             FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
    1285             :             {
    1286     6825761 :                 singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */
    1287     6825761 :                 move32();
    1288     6825761 :                 singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e );
    1289     6825761 :                 move16();
    1290             :             }
    1291             :         }
    1292             :         ELSE
    1293             :         {
    1294      250719 :             FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
    1295             :             {
    1296      126159 :                 singularVectors_Left[iCh][nCh] = 0;
    1297      126159 :                 move32();
    1298             :             }
    1299             :         }
    1300             : #ifdef OPT_MCH_DEC_V1_NBE
    1301     2584172 :         Word16 exp = s_max( singularVectors_Left_e[nCh][nCh], 1 );
    1302     2584172 :         singularVectors_Left[nCh][nCh] = L_sub( L_shr( singularVectors_Left[nCh][nCh], sub( exp, singularVectors_Left_e[nCh][nCh] ) ), L_shr( MINUS_ONE_IN_Q31, exp ) ); /* exp(sing_exp2) */
    1303     2584172 :         move32();
    1304     2584172 :         singularVectors_Left_e[nCh][nCh] = exp;
    1305     2584172 :         move16();
    1306             : #else  /* OPT_MCH_DEC_V1_NBE */
    1307             :         singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */
    1308             :         move32();
    1309             : #endif /* OPT_MCH_DEC_V1_NBE */
    1310             :     }
    1311             :     // fclose(fp);
    1312     3776236 :     FOR( nCh = 0; nCh < nChannelsL; nCh++ )
    1313             :     {
    1314    13543596 :         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
    1315             :         {
    1316    10603700 :             singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
    1317    10603700 :             move32();
    1318             :         }
    1319             :     }
    1320             : 
    1321      836340 :     return;
    1322             : }
    1323             : 
    1324             : /*-------------------------------------------------------------------------
    1325             :  * singularVectorsAccumulationRight()
    1326             :  *
    1327             :  *
    1328             :  *-------------------------------------------------------------------------*/
    1329             : 
    1330      836340 : static void singularVectorsAccumulationRight_fx(
    1331             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
    1332             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
    1333             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
    1334             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
    1335             :     Word16 *secDiag_e,
    1336             :     const Word16 nChannelsC /* Q0 */
    1337             : )
    1338             : {
    1339             :     Word16 nCh, iCh, k;
    1340             :     Word16 nChannels;
    1341             :     Word32 norm_y, t_ii, ratio_float;
    1342      836340 :     Word16 norm_y_e, temp_exp1, sing_right_exp[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
    1343             : 
    1344             :     /* Processing */
    1345      836340 :     nChannels = nChannelsC; /* nChannelsC       Q0*/
    1346             : 
    1347             :     /* avoid compiler warning */
    1348      836340 :     t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */
    1349      836340 :     move32();
    1350             : 
    1351     3420512 :     FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */
    1352             :     {
    1353             : 
    1354     2584172 :         IF( LT_16( nCh, sub( nChannelsC, 1 ) ) ) /* nChannelsC */
    1355             :         {
    1356     1747832 :             IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
    1357             :             {
    1358             : 
    1359     5179741 :                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
    1360             :                 {
    1361     3541706 :                     ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
    1362     3541706 :                     singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
    1363     3541706 :                     temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
    1364     3541706 :                     move32();
    1365     3541706 :                     sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) );
    1366     3541706 :                     move16();
    1367             :                     // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 );
    1368             :                 }
    1369             : 
    1370     5179741 :                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1371             :                 {
    1372             : #ifdef OPT_MCH_DEC_V1_NBE
    1373     3541706 :                     Word64 norm_val = 0;
    1374     3541706 :                     move64();
    1375     3541706 :                     Word16 maxL_e = MIN_16;
    1376     3541706 :                     Word16 maxR_e = MIN_16;
    1377     3541706 :                     Word16 maxR2_e = MIN_16;
    1378     3541706 :                     move16();
    1379     3541706 :                     move16();
    1380     3541706 :                     move16();
    1381    13866634 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1382             :                     {
    1383    10324928 :                         maxL_e = s_max( maxL_e, singularVectors_Left_e[nCh][k] );
    1384    10324928 :                         maxR_e = s_max( maxR_e, sing_right_exp[k][iCh] );
    1385    10324928 :                         maxR2_e = s_max( maxR2_e, sing_right_exp[k][nCh] );
    1386             :                     }
    1387             : #else  /* OPT_MCH_DEC_V1_NBE */
    1388             :                     norm_y = 0;
    1389             :                     move32();
    1390             :                     norm_y_e = 0;
    1391             :                     move16();
    1392             : #endif /* OPT_MCH_DEC_V1_NBE */
    1393             : 
    1394    13866634 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1395             :                     {
    1396             : #ifdef OPT_MCH_DEC_V1_NBE
    1397    10324928 :                         norm_val = W_mac_32_32( norm_val, L_shr( singularVectors_Left[nCh][k], sub( maxL_e, singularVectors_Left_e[nCh][k] ) ), L_shr( singularVectors_Right[k][iCh], sub( maxR_e, sing_right_exp[k][iCh] ) ) );
    1398             : #else  /* OPT_MCH_DEC_V1_NBE */
    1399             :                         norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e );                               /* exp(norm_y_e) */
    1400             : #endif /* OPT_MCH_DEC_V1_NBE */
    1401             :                     }
    1402             : #ifdef OPT_MCH_DEC_V1_NBE
    1403     3541706 :                     norm_y_e = W_norm( norm_val );
    1404     3541706 :                     norm_y = W_extract_h( W_shl( norm_val, norm_y_e ) );
    1405     3541706 :                     norm_y_e = sub( add( maxL_e, maxR_e ), norm_y_e );
    1406             : 
    1407     3541706 :                     Word16 max_new = s_max( maxR_e, add( maxR2_e, norm_y_e ) );
    1408             : #endif                                                      /* OPT_MCH_DEC_V1_NBE */
    1409    13866634 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1410             :                     {
    1411             : #ifdef OPT_MCH_DEC_V1_NBE
    1412    10324928 :                         Word32 temp = Mpy_32_32( norm_y, singularVectors_Right[k][nCh] );
    1413    10324928 :                         Word32 op2 = L_shr( temp, sub( max_new, add( norm_y_e, sing_right_exp[k][nCh] ) ) );
    1414    10324928 :                         singularVectors_Right[k][iCh] = L_add_sat( L_shr( singularVectors_Right[k][iCh], sub( max_new, sing_right_exp[k][iCh] ) ), op2 ); /* exp(sing_right_exp) */
    1415    10324928 :                         move32();
    1416    10324928 :                         singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], max_new ); /* Q31 */
    1417             : #else                                                                                                        /* OPT_MCH_DEC_V1_NBE */
    1418             :                         singularVectors_Right[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Right[k][iCh], sing_right_exp[k][iCh], Mpy_32_32( norm_y, singularVectors_Right[k][nCh] ), add( norm_y_e, sing_right_exp[k][nCh] ), &sing_right_exp[k][iCh] ); /* exp(sing_right_exp) */
    1419             :                         move32();
    1420             :                         singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], sing_right_exp[k][iCh] ); /* Q31 */
    1421             : #endif                                                                                                       /* OPT_MCH_DEC_V1_NBE */
    1422    10324928 :                         move32();
    1423    10324928 :                         sing_right_exp[k][iCh] = 0;
    1424    10324928 :                         move16();
    1425             :                     }
    1426             :                 }
    1427             :             }
    1428             : 
    1429     5399612 :             FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1430             :             {
    1431     3651780 :                 singularVectors_Right[nCh][iCh] = singularVectors_Right[iCh][nCh] = 0;
    1432     3651780 :                 move32();
    1433     3651780 :                 move32();
    1434             :             }
    1435             :         }
    1436     2584172 :         singularVectors_Right[nCh][nCh] = MAX_32;
    1437     2584172 :         move32();
    1438     2584172 :         t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */
    1439     2584172 :         move32();
    1440             :     }
    1441      836340 :     return;
    1442             : }
    1443             : 
    1444             : /*-------------------------------------------------------------------------
    1445             :  * GivensRotation()
    1446             :  *
    1447             :  *
    1448             :  *-------------------------------------------------------------------------*/
    1449             : 
    1450             : 
    1451     8133261 : static void GivensRotation2_fx(
    1452             :     const Word32 x, /* exp(x_e) */
    1453             :     const Word16 x_e,
    1454             :     const Word32 z, /* exp(z_e) */
    1455             :     const Word16 z_e,
    1456             :     Word32 *result,
    1457             :     Word32 *resultInv,
    1458             :     Word16 *out_e,
    1459             :     Word16 *outInv_e )
    1460             : {
    1461             :     Word32 r;
    1462     8133261 :     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
    1463     8133261 :     r = L_max( r, 1 );
    1464     8133261 :     *outInv_e = *out_e;
    1465     8133261 :     move16();
    1466     8133261 :     *result = Sqrt32( r, out_e );
    1467     8133261 :     move32();
    1468             : 
    1469     8133261 :     *resultInv = ISqrt32( r, outInv_e );
    1470     8133261 :     move32();
    1471     8133261 : }
    1472             : 
    1473     1725050 : static Word32 GivensRotation_fx(
    1474             :     const Word32 x, /* exp(x_e) */
    1475             :     const Word16 x_e,
    1476             :     const Word32 z, /* exp(z_e) */
    1477             :     const Word16 z_e,
    1478             :     Word16 *out_e )
    1479             : {
    1480             :     Word32 r;
    1481             : 
    1482     1725050 :     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
    1483     1725050 :     r = Sqrt32( r, out_e );
    1484     1725050 :     return ( r );
    1485             : }
    1486             : 
    1487             : /*-------------------------------------------------------------------------
    1488             :  * maxWithSign()
    1489             :  *
    1490             :  *
    1491             :  *-------------------------------------------------------------------------*/
    1492             : 
    1493    25373080 : static Word32 maxWithSign_fx(
    1494             :     const Word32 a /* Qx */
    1495             : )
    1496             : {
    1497             :     Word32 result;
    1498    25373080 :     IF( a >= 0 )
    1499             :     {
    1500    13241407 :         result = L_max( a, SVD_MINIMUM_VALUE_FX );
    1501             :     }
    1502             :     ELSE
    1503             :     {
    1504    12131673 :         result = L_min( a, -SVD_MINIMUM_VALUE_FX );
    1505             :     }
    1506    25373080 :     return result;
    1507             : }
    1508             : 
    1509             : /*-------------------------------------------------------------------------
    1510             :  * flushToZeroArray()
    1511             :  *
    1512             :  *
    1513             :  *-------------------------------------------------------------------------*/
    1514             : 
    1515             : 
    1516             : /*-------------------------------------------------------------------------
    1517             :  * flushToZeroMat()
    1518             :  *
    1519             :  *
    1520             :  *-------------------------------------------------------------------------*/

Generated by: LCOV version 1.14