LCOV - code coverage report
Current view: top level - lib_dec - ivas_svd_dec_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main @ d6739f868d005b89f6b8a19b0accc1255692b205 Lines: 566 576 98.3 %
Date: 2025-11-21 02:40:13 Functions: 14 14 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : #include <stdint.h>
      33             : #include "options.h"
      34             : #include "prot_fx.h"
      35             : #include "ivas_stat_dec.h"
      36             : #include "ivas_cnst.h"
      37             : #include <math.h>
      38             : #include "wmc_auto.h"
      39             : #include "ivas_prot_fx.h"
      40             : 
      41             : /*-----------------------------------------------------------------------*
      42             :  * Local constants
      43             :  *-----------------------------------------------------------------------*/
      44             : 
      45             : /* The SVD is sensitive to changes to the following constants, so please be careful when trying to tune things   */
      46             : #define SVD_MAX_NUM_ITERATION       75    /* maximum number of interations before exiting the SVD            */
      47             : #define SVD_MINIMUM_VALUE_FX        ( 2 ) /* minimum value                                                   */
      48             : #define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 )
      49             : #define CONVERGENCE_FACTOR_FX       214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */
      50             : 
      51             : /*-----------------------------------------------------------------------*
      52             :  * Local function prototypes
      53             :  *-----------------------------------------------------------------------*/
      54             : 
      55             : static void HouseholderReduction_fx(
      56             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
      57             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* exp(singularValues_fx_e) */
      58             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
      59             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_fx_e) */
      60             :     Word16 singularVectors_Left_e,
      61             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
      62             :     Word16 *secDiag_fx_e,
      63             :     const Word16 nChannelsL, /* Q0 */
      64             :     const Word16 nChannelsC, /* Q0 */
      65             :     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
      66             :     Word16 *eps_x_fx_e );
      67             : 
      68             : static void biDiagonalReductionLeft_fx(
      69             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_e) */
      70             :     Word16 singularValues_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
      71             :     const Word16 nChannelsL,
      72             :     const Word16 nChannelsC,  /* Q0 */
      73             :     const Word16 currChannel, /* Q0 */
      74             :     Word32 *g,
      75             :     Word16 *g_e );
      76             : 
      77             : static void biDiagonalReductionRight_fx(
      78             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
      79             :     Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS],
      80             :     const Word16 nChannelsL,  /* Q0 */
      81             :     const Word16 nChannelsC,  /* Q0 */
      82             :     const Word16 currChannel, /* Q0 */
      83             :     Word32 *g,
      84             :     Word16 *g_e );
      85             : 
      86             : static void singularVectorsAccumulationLeft_fx(
      87             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
      88             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
      89             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
      90             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
      91             :     const Word16 nChannelsL, /* Q0 */
      92             :     const Word16 nChannelsC  /* Q0 */
      93             : );
      94             : 
      95             : static void singularVectorsAccumulationRight_fx(
      96             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* singularVectors_e */
      97             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* singularVectors_e */
      98             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
      99             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
     100             :     Word16 *secDiag_e,
     101             :     const Word16 nChannelsC /* Q0 */
     102             : );
     103             : 
     104             : static Word32 maxWithSign_fx(
     105             :     const Word32 a /* Qx */
     106             : );
     107             : 
     108             : static Word16 BidagonalDiagonalisation_fx(
     109             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)          singularValues_fx_e*/
     110             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)         singularValues_fx_e*/
     111             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)         singularValues_fx_e*/
     112             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* i/o:                                           secDiag_fx_e*/
     113             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],        /* i/o: singular values vector (S)                            */
     114             :     Word16 *secDiag_fx_e,                                   /* i/o:                                                       */
     115             :     const Word16 nChannelsL,                                /* i  : number of rows in the matrix to be decomposed       Q0*/
     116             :     const Word16 nChannelsC,                                /* i  : number of columns in the matrix to be decomposed    Q0*/
     117             :     const Word32 eps_x,                                     /* i  :                                                eps_x_e*/
     118             :     const Word16 eps_x_e                                    /* i  :                                                       */
     119             : );
     120             : 
     121             : static void ApplyRotation_fx(
     122             :     Word32 singularVector[][MAX_OUTPUT_CHANNELS],
     123             :     const Word32 c, /* exp(c_e)*/
     124             :     const Word16 c_e,
     125             :     const Word32 s, /* exp(s_e) */
     126             :     const Word16 s_e,
     127             :     Word32 x11, /* exp(x11_e) */
     128             :     Word16 x11_e,
     129             :     Word32 x12, /* exp(x12_e) */
     130             :     Word16 x12_e,
     131             :     Word32 *d, /* exp(d_e) */
     132             :     Word16 *d_e,
     133             :     Word32 *g, /* exp(g_e) */
     134             :     Word16 *g_e,
     135             :     const Word16 currentIndex1, /* Q0 */
     136             :     const Word16 currentIndex2, /* Q0 */
     137             :     const Word16 nChannels      /* Q0 */
     138             : );
     139             : 
     140             : static void GivensRotation2_fx(
     141             :     const Word32 x, /* exp(x_e) */
     142             :     const Word16 x_e,
     143             :     const Word32 z, /* exp(z_e) */
     144             :     const Word16 z_e,
     145             :     Word32 *result,
     146             :     Word32 *resultInv,
     147             :     Word16 *out_e,
     148             :     Word16 *outInv_e );
     149             : 
     150             : static Word32 GivensRotation_fx(
     151             :     const Word32 x, /* exp(x_e) */
     152             :     const Word16 x_e,
     153             :     const Word32 z, /* exp(z_e) */
     154             :     const Word16 z_e,
     155             :     Word16 *out_e );
     156             : 
     157             : static void ApplyQRTransform_fx(
     158             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)         singularValues_e*/
     159             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)        singularValues_e*/
     160             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)        singularValues_e*/
     161             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* i/o:                                          secDiag_e*/
     162             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     163             :     Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
     164             :     const Word16 startIndex,   /* i  :                            Q0*/
     165             :     const Word16 currentIndex, /* i  :                            Q0*/
     166             :     const Word16 nChannelsL,   /* i  : number of rows in the matrix to be decomposed            Q0*/
     167             :     const Word16 nChannelsC    /* i  : number of columns in the matrix to be decomposed            Q0*/
     168             : );
     169             : 
     170             : /*-------------------------------------------------------------------------
     171             :  * mat2svdMat()
     172             :  *
     173             :  * external matrix format to internal
     174             :  *-------------------------------------------------------------------------*/
     175             : 
     176     1035976 : void mat2svdMat_fx(
     177             :     const Word32 *mat,                                       /* i  : matrix as column ordered vector    Qx*/
     178             :     Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* o  : matrix as two-dimensional arry     Qx*/
     179             :     const Word16 nRows,                                      /* i  : number of rows of the matrix       Q0*/
     180             :     const Word16 mCols,                                      /* i  : number of columns of the matrix    Q0*/
     181             :     const Word16 transpose                                   /* i  : flag indication transposition      Q0*/
     182             : )
     183             : {
     184             :     Word16 i, j;
     185             : 
     186     1035976 :     IF( transpose )
     187             :     {
     188      854511 :         FOR( i = 0; i < mCols; i++ )
     189             :         {
     190     2155327 :             FOR( j = 0; j < nRows; j++ )
     191             :             {
     192     1441208 :                 svdMat[i][j] = mat[j + ( nRows * i )]; /* Qx */
     193     1441208 :                 move32();
     194             :             }
     195             : 
     196      714119 :             set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, nRows ) );
     197             :         }
     198             : 
     199     1672545 :         FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
     200             :         {
     201     1532153 :             set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
     202             :         }
     203             :     }
     204             :     ELSE
     205             :     {
     206     3800505 :         FOR( i = 0; i < nRows; i++ )
     207             :         {
     208    14545256 :             FOR( j = 0; j < mCols; j++ )
     209             :             {
     210    11640335 :                 svdMat[i][j] = mat[i + ( nRows * j )]; /* Qx */
     211    11640335 :                 move32();
     212             :             }
     213             : 
     214     2904921 :             set_zero_fx( &svdMat[i][mCols], sub( MAX_OUTPUT_CHANNELS, mCols ) );
     215             :         }
     216             : 
     217    12320007 :         FOR( ; i < MAX_OUTPUT_CHANNELS; i++ )
     218             :         {
     219    11424423 :             set_zero_fx( svdMat[i], MAX_OUTPUT_CHANNELS );
     220             :         }
     221             :     }
     222             : 
     223     1035976 :     return;
     224             : }
     225             : 
     226             : 
     227             : /*---------------------------------------------------------------------*
     228             :  * svdMat2mat()
     229             :  *
     230             :  * transfer a matrix from a two dimensional array  to a column wise ordered vector
     231             :  *---------------------------------------------------------------------*/
     232             : 
     233     1304724 : void svdMat2mat_fx(
     234             :     Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* i  : matrix as two-dimensional arry     Qx*/
     235             :     Word32 *mat,                                             /* o  : matrix as column ordered vector    Qx*/
     236             :     const Word16 nRows,                                      /* i  : number of rows of the matrix       Q0*/
     237             :     const Word16 mCols                                       /* i  : number of columns of the matrix    Q0*/
     238             : )
     239             : {
     240             :     Word16 i, j;
     241             : 
     242     5035125 :     FOR( i = 0; i < nRows; i++ )
     243             :     {
     244    14803895 :         FOR( j = 0; j < mCols; j++ )
     245             :         {
     246    11073494 :             mat[i + ( nRows * j )] = svdMat[i][j]; /* Qx */
     247    11073494 :             move32();
     248             :         }
     249             :     }
     250             : 
     251     1304724 :     return;
     252             : }
     253             : 
     254             : /*-------------------------------------------------------------------------
     255             :  * svd()
     256             :  *
     257             :  * perform a singular value decomposition X=USV of a matrix X
     258             :  *-------------------------------------------------------------------------*/
     259             : 
     260             : /*! r: error or success */
     261     1035976 : Word16 svd_fx(
     262             :     Word32 InputMatrix[][MAX_OUTPUT_CHANNELS], /* i  : matrix to be decomposed (M)            InputMatrix_e*/
     263             :     Word16 InputMatrix_e,
     264             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* o  : left singular vectors (U)          singularValues_fx_e*/
     265             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* o  : singular values vector (S)         singularValues_fx_e*/
     266             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* o  : right singular vectors (V)         singularValues_fx_e*/
     267             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
     268             :     const Word16 nChannelsL, /* i  : number of rows in the matrix to be decomposed       Q0*/
     269             :     const Word16 nChannelsC  /* i  : number of columns in the matrix to be decomposed    Q0*/
     270             : )
     271             : {
     272             :     Word16 iCh, jCh;
     273             :     Word16 lengthSingularValues;
     274             :     Word16 errorMessage, condition;
     275             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS];
     276             :     Word16 secDiag_fx_e[MAX_OUTPUT_CHANNELS];
     277     1035976 :     move16();
     278     1035976 :     Word32 eps_x_fx = 0, temp_fx;
     279     1035976 :     move16();
     280     1035976 :     Word16 eps_x_fx_e = 0;
     281     1035976 :     move16();
     282             :     Word16 temp_fx_e;
     283     1035976 :     push_wmops( "svd_fx" );
     284             : 
     285             : 
     286             :     /* Collecting Values */
     287     4655016 :     FOR( iCh = 0; iCh < nChannelsL; iCh++ )
     288             :     {
     289    16700583 :         FOR( jCh = 0; jCh < nChannelsC; jCh++ )
     290             :         {
     291    13081543 :             singularVectors_Left_fx[iCh][jCh] = InputMatrix[iCh][jCh]; /* InputMatrix_e */
     292    13081543 :             move32();
     293             :         }
     294             :     }
     295             : 
     296             :     /* Householder reduction */
     297     1035976 :     HouseholderReduction_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e );
     298             :     /* Set extremely small values to zero if needed */
     299             :     // flushToZeroArray(singularValues, max_length);
     300             :     // flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL);
     301             :     // flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC);
     302             : 
     303             :     /* BidagonalDiagonalisation */
     304     1035976 :     errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Q0 */
     305             :     /* Sort the singular values descending order */
     306     1035976 :     lengthSingularValues = s_min( nChannelsL, nChannelsC ); /* Q0 */
     307             : 
     308             :     DO
     309             :     {
     310     1422412 :         condition = 0;
     311     1422412 :         move16();
     312     4854429 :         FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
     313             :         {
     314     3432017 :             IF( LT_32( L_shl_sat( singularValues_fx[iCh], sub( singularValues_fx_e[iCh], singularValues_fx_e[iCh + 1] ) ), singularValues_fx[iCh + 1] ) )
     315             :             {
     316      488184 :                 condition = 1;
     317      488184 :                 move16();
     318      488184 :                 temp_fx = singularValues_fx[iCh]; /* singularValues_fx_e */
     319      488184 :                 move32();
     320      488184 :                 singularValues_fx[iCh] = singularValues_fx[iCh + 1]; /* singularValues_fx_e */
     321      488184 :                 move32();
     322      488184 :                 singularValues_fx[iCh + 1] = temp_fx; /* singularValues_fx_e */
     323      488184 :                 move32();
     324      488184 :                 temp_fx_e = singularValues_fx_e[iCh];
     325      488184 :                 move16();
     326      488184 :                 singularValues_fx_e[iCh] = singularValues_fx_e[iCh + 1];
     327      488184 :                 move16();
     328      488184 :                 singularValues_fx_e[iCh + 1] = temp_fx_e;
     329      488184 :                 move16();
     330             : 
     331     2714423 :                 FOR( jCh = 0; jCh < nChannelsL; ++jCh )
     332             :                 {
     333     2226239 :                     temp_fx = singularVectors_Left_fx[jCh][iCh]; /* singularValues_fx_e */
     334     2226239 :                     move32();
     335     2226239 :                     singularVectors_Left_fx[jCh][iCh] = singularVectors_Left_fx[jCh][iCh + 1]; /* singularValues_fx_e */
     336     2226239 :                     move32();
     337     2226239 :                     singularVectors_Left_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
     338     2226239 :                     move32();
     339             :                 }
     340             : 
     341     2705334 :                 FOR( jCh = 0; jCh < nChannelsC; ++jCh )
     342             :                 {
     343     2217150 :                     temp_fx = singularVectors_Right_fx[jCh][iCh]; /* singularValues_fx_e */
     344     2217150 :                     move32();
     345     2217150 :                     singularVectors_Right_fx[jCh][iCh] = singularVectors_Right_fx[jCh][iCh + 1]; /* singularValues_fx_e */
     346     2217150 :                     move32();
     347     2217150 :                     singularVectors_Right_fx[jCh][iCh + 1] = temp_fx; /* singularValues_fx_e */
     348     2217150 :                     move32();
     349             :                 }
     350             :             }
     351             :         }
     352             :     }
     353     1422412 :     WHILE( EQ_16( condition, 1 ) );
     354             : 
     355     1035976 :     pop_wmops();
     356     1035976 :     return ( errorMessage );
     357             : }
     358             : 
     359             : 
     360             : /*-----------------------------------------------------------------------*
     361             :  * Local functions
     362             :  *-----------------------------------------------------------------------*/
     363             : 
     364             : /*-------------------------------------------------------------------------
     365             :  * BidagonalDiagonalisation()
     366             :  *
     367             :  *
     368             :  *-------------------------------------------------------------------------*/
     369             : 
     370     1035976 : static Word16 BidagonalDiagonalisation_fx(
     371             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)          singularValues_fx_e*/
     372             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)         singularValues_fx_e*/
     373             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)         singularValues_fx_e*/
     374             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* i/o:                                           secDiag_fx_e*/
     375             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],        /* i/o: singular values vector (S)                            */
     376             :     Word16 *secDiag_new_e,                                  /* i/o:                                                       */
     377             :     const Word16 nChannelsL,                                /* i  : number of rows in the matrix to be decomposed       Q0*/
     378             :     const Word16 nChannelsC,                                /* i  : number of columns in the matrix to be decomposed    Q0*/
     379             :     const Word32 eps_x,                                     /* i  :                                                eps_x_e*/
     380             :     const Word16 eps_x_e                                    /* i  :                                                       */
     381             : )
     382             : {
     383             :     Word16 kCh, nCh, iCh, jCh, split;
     384             :     Word32 c, s, f1, f2;
     385             :     Word16 c_e, s_e, f1_e, f2_e;
     386     1035976 :     Word16 x11_e = 0, x12_e = 0;
     387     1035976 :     move16();
     388     1035976 :     move16();
     389             :     Word16 temp_exp;
     390             :     Word16 temp_exp2;
     391     1035976 :     Word32 g = 0;
     392     1035976 :     move16();
     393     1035976 :     Word16 g_e = 0;
     394     1035976 :     move16();
     395             :     Word16 convergence, iteration, found_split;
     396     1035976 :     Word16 error = 0;
     397     1035976 :     move16();
     398             :     Word32 temp;
     399             :     Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS];
     400     1035976 :     Copy( singularValues_fx_e, singularValues_new_e, MAX_OUTPUT_CHANNELS );
     401             : 
     402     4223041 :     FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */
     403             :     {
     404     3187065 :         convergence = 0;
     405     3187065 :         move16();
     406     3187065 :         iteration = 0;
     407     3187065 :         move16();
     408     3187065 :         split = iCh - 1; /* Q0 */
     409     3187065 :         move16();
     410             : 
     411     8471324 :         WHILE( EQ_16( convergence, 0 ) )
     412             :         {
     413     5284259 :             iteration = add( iteration, 1 ); /* Q0 */
     414     5284259 :             found_split = 1;                 /* Q0 */
     415     5284259 :             move16();
     416             : 
     417    10219182 :             FOR( jCh = iCh; jCh >= 0; jCh-- )
     418             :             {
     419    10219182 :                 Word16 com_e = s_max( secDiag_new_e[jCh], eps_x_e );
     420    10219182 :                 IF( LE_32( L_shr( L_abs( secDiag_fx[jCh] ), sub( com_e, secDiag_new_e[jCh] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is secDiag[ch] vanishing compared to eps_x */
     421             :                 {
     422     5231849 :                     found_split = 0;
     423     5231849 :                     move16();
     424     5231849 :                     BREAK;
     425             :                 }
     426     4987333 :                 com_e = s_max( singularValues_new_e[jCh - 1], eps_x_e );
     427     4987333 :                 IF( LE_32( L_shr( L_abs( singularValues_fx[jCh - 1] ), sub( com_e, singularValues_new_e[jCh - 1] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is singularValues[jCh - 1] vanishing compared to eps_x */
     428             :                 {
     429       52410 :                     BREAK;
     430             :                 }
     431             :             }
     432             : 
     433             :             // convergence = ( jCh == iCh ) ? 1 : 0;
     434     5284259 :             IF( EQ_16( jCh, iCh ) )
     435             :             {
     436     3187065 :                 convergence = 1; /* Q0 */
     437     3187065 :                 move16();
     438             :             }
     439             :             ELSE
     440             :             {
     441     2097194 :                 convergence = 0;
     442     2097194 :                 move16();
     443             :             }
     444             : 
     445     5284259 :             IF( found_split )
     446             :             {
     447       52410 :                 s = MAX_32;
     448       52410 :                 move32();
     449       52410 :                 s_e = 0;
     450       52410 :                 move16();
     451       52410 :                 c = 0;
     452       52410 :                 move32();
     453       52410 :                 c_e = 0;
     454       52410 :                 move16();
     455       52410 :                 split = sub( jCh, 1 ); /* Q0 */
     456      104890 :                 FOR( kCh = jCh; kCh <= iCh; kCh++ )
     457             :                 {
     458       52483 :                     g = Mpy_32_32( s, secDiag_fx[kCh] ); /* exp(s_e + secDiag_new_e) */
     459       52483 :                     g_e = add( s_e, secDiag_new_e[kCh] );
     460       52483 :                     secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
     461       52483 :                     secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
     462       52483 :                     Word16 com_e = s_max( g_e, eps_x_e );
     463       52483 :                     IF( LE_32( L_shr( L_abs( g ), sub( com_e, g_e ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) )
     464             :                     {
     465           3 :                         BREAK;
     466             :                     }
     467             : 
     468       52480 :                     c = singularValues_fx[kCh]; /* exp(singularValues_new_e) */
     469       52480 :                     c_e = singularValues_new_e[kCh];
     470       52480 :                     GivensRotation2_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_fx[kCh], &temp, &singularValues_new_e[kCh], &temp_exp ); /* exp(singularValues_new_e) */
     471       52480 :                     c = Mpy_32_32( c, temp );
     472       52480 :                     c_e = add( c_e, temp_exp );
     473       52480 :                     temp_exp2 = norm_l( c );
     474       52480 :                     c = L_shl( c, temp_exp2 );
     475       52480 :                     c_e = sub( c_e, temp_exp2 );
     476       52480 :                     s = Mpy_32_32( -g, temp );
     477       52480 :                     s_e = add( g_e, temp_exp );
     478       52480 :                     temp_exp2 = norm_l( s );
     479       52480 :                     s = L_shl( s, temp_exp2 );
     480       52480 :                     s_e = sub( s_e, temp_exp2 );
     481       52480 :                     ApplyRotation_fx( singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */
     482             :                 }
     483             :             }
     484             : 
     485     5284259 :             IF( convergence )
     486             :             {
     487     3187065 :                 singularValues_fx[iCh] = singularValues_fx[iCh];
     488     3187065 :                 move32();
     489     3187065 :                 IF( singularValues_fx[iCh] < 0 )
     490             :                 {
     491      612067 :                     singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
     492      612067 :                     move32();
     493     2244377 :                     FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     494             :                     {
     495     1632310 :                         singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
     496     1632310 :                         move32();
     497             :                     }
     498             :                 }
     499             :             }
     500             :             ELSE
     501             :             {
     502     2097194 :                 IF( GE_16( iteration, SVD_MAX_NUM_ITERATION ) )
     503             :                 {
     504           0 :                     IF( LT_32( singularValues_fx[iCh], 0 ) )
     505             :                     {
     506           0 :                         singularValues_fx[iCh] = L_negate( singularValues_fx[iCh] );
     507           0 :                         move32();
     508             : 
     509           0 :                         FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */
     510             :                         {
     511           0 :                             singularVectors_Right_fx[nCh][iCh] = L_negate( singularVectors_Right_fx[nCh][iCh] );
     512           0 :                             move32();
     513             :                         }
     514             :                     }
     515           0 :                     error = 1;
     516           0 :                     move16();
     517           0 :                     convergence = 1;
     518           0 :                     move16();
     519             :                 }
     520             :                 ELSE
     521             :                 {
     522     2097194 :                     ApplyQRTransform_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_new_e, secDiag_new_e, jCh, iCh, nChannelsL, nChannelsC ); /* nChannelsC */
     523             :                 }
     524             :             }
     525             :         }
     526             :     }
     527             : 
     528             :     // rescaling block
     529     1035976 :     Copy( singularValues_new_e, singularValues_fx_e, MAX_OUTPUT_CHANNELS );
     530             : 
     531             : 
     532     1035976 :     return ( error );
     533             : }
     534             : 
     535             : /*-------------------------------------------------------------------------
     536             :  * ApplyQRTransform()
     537             :  *
     538             :  *
     539             :  *-------------------------------------------------------------------------*/
     540             : 
     541     2097194 : static void ApplyQRTransform_fx(
     542             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* i/o: left singular vectors (U)                     singularValues_e*/
     543             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],          /* i/o: singular values vector (S)                    singularValues_e*/
     544             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V)                    singularValues_e*/
     545             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* i/o:                                                      secDiag_e*/
     546             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     547             :     Word16 secDiag_e[MAX_OUTPUT_CHANNELS],
     548             :     const Word16 startIndex,   /* i  :                            Q0*/
     549             :     const Word16 currentIndex, /* i  :                            Q0*/
     550             :     const Word16 nChannelsL,   /* i  : number of rows in the matrix to be decomposed            Q0*/
     551             :     const Word16 nChannelsC    /* i  : number of columns in the matrix to be decomposed         Q0*/
     552             : )
     553             : {
     554             :     Word32 temp;
     555             :     Word16 temp_e;
     556             :     Word16 temp_norm_e;
     557             :     Word16 ch, split;
     558     2097194 :     Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0;
     559     2097194 :     move32();
     560     2097194 :     move32();
     561     2097194 :     move32();
     562     2097194 :     move32();
     563     2097194 :     move32();
     564     2097194 :     move32();
     565     2097194 :     move32();
     566     2097194 :     move32();
     567     2097194 :     Word16 d_e = 0, g_e = 0, r_e = 0, x_ii_e = 0, x_split_e = 0, x_kk_e = 0, mu_e = 0, aux_e = 0;
     568     2097194 :     move16();
     569     2097194 :     move16();
     570     2097194 :     move16();
     571     2097194 :     move16();
     572     2097194 :     move16();
     573     2097194 :     move16();
     574     2097194 :     move16();
     575     2097194 :     move16();
     576             :     Word32 L_temp1, L_temp2, L_temp3, L_temp4;
     577             :     Word16 L_temp1_e, L_temp2_e, L_temp3_e, L_temp4_e, temp_exp;
     578     2097194 :     Word32 c = MAX_32;
     579     2097194 :     move32();
     580     2097194 :     Word16 c_e = 0;
     581     2097194 :     move16();
     582     2097194 :     Word32 s = MAX_32;
     583     2097194 :     move32();
     584     2097194 :     Word16 s_e = 0;
     585     2097194 :     move16();
     586             : 
     587     2097194 :     x_kk = singularValues[currentIndex]; /* exp(singularValues_e) */
     588     2097194 :     move32();
     589     2097194 :     x_kk_e = singularValues_e[currentIndex];
     590     2097194 :     move16();
     591     2097194 :     x_ii = singularValues[startIndex]; /* exp(singularValues_e) */
     592     2097194 :     move32();
     593     2097194 :     x_ii_e = singularValues_e[startIndex];
     594     2097194 :     move16();
     595     2097194 :     split = sub( currentIndex, 1 ); /* Q0 */
     596     2097194 :     move16();
     597             : 
     598     2097194 :     x_split = singularValues[split]; /* exp(singularValues_e) */
     599     2097194 :     move32();
     600     2097194 :     x_split_e = singularValues_e[split];
     601     2097194 :     move16();
     602     2097194 :     g = secDiag[split]; /* exp(secDiag_e) */
     603     2097194 :     move32();
     604     2097194 :     g_e = secDiag_e[split];
     605     2097194 :     move16();
     606     2097194 :     r = secDiag[currentIndex]; /* exp(secDiag_e) */
     607     2097194 :     move32();
     608     2097194 :     r_e = secDiag_e[currentIndex];
     609     2097194 :     move16();
     610             : 
     611             :     // d = (x_split + x_kk) * (x_split - x_kk) + (g + r) * (g - r);
     612     2097194 :     L_temp1 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, x_kk, x_kk_e, &L_temp1_e );                                                                           /* exp(L_temp1_e) */
     613     2097194 :     L_temp2 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, L_negate( x_kk ), x_kk_e, &L_temp2_e );                                                               /* exp(L_temp2_e) */
     614     2097194 :     L_temp3 = BASOP_Util_Add_Mant32Exp( g, g_e, r, r_e, &L_temp3_e );                                                                                             /* exp(L_temp3_e) */
     615     2097194 :     L_temp4 = BASOP_Util_Add_Mant32Exp( g, g_e, L_negate( r ), r_e, &L_temp4_e );                                                                                 /* exp(L_temp4_e) */
     616     2097194 :     d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( L_temp3, L_temp4 ), add( L_temp3_e, L_temp4_e ), &d_e ); /* exp(d_e) */
     617             : 
     618             :     // d /= maxWithSign((r + r) * x_split);
     619     2097194 :     L_temp1 = BASOP_Util_Add_Mant32Exp( r, r_e, r, r_e, &L_temp1_e ); /* exp(L_temp1_e) */
     620     2097194 :     L_temp1 = maxWithSign_fx( Mpy_32_32( L_temp1, x_split ) );        /* exp(L_temp1_e + x_split_e) */
     621     2097194 :     L_temp1_e = add( L_temp1_e, x_split_e );
     622     2097194 :     d = BASOP_Util_Divide3232_Scale_newton( d, L_temp1, &temp_exp ); /* temp_exp + d_e - L_temp1_e */
     623     2097194 :     d_e = add( temp_exp, sub( d_e, L_temp1_e ) );
     624             : 
     625     2097194 :     g = GivensRotation_fx( MAX_32, 0, d, d_e, &g_e );
     626             : 
     627             :     // mu = x_split / maxWithSign(d + (d >= 0.0f ? 1 : (-1)) * fabsf(g)) - r;
     628             :     // L_temp1 = d >= 0 ? L_abs( g ) : -L_abs( g );
     629     2097194 :     IF( d >= 0 )
     630             :     {
     631     1234116 :         L_temp1 = L_abs( g );
     632             :     }
     633             :     ELSE
     634             :     {
     635      863078 :         L_temp1 = L_negate( L_abs( g ) );
     636             :     }
     637     2097194 :     L_temp1_e = g_e;
     638     2097194 :     move16();
     639     2097194 :     L_temp2 = maxWithSign_fx( BASOP_Util_Add_Mant32Exp( d, d_e, L_temp1, L_temp1_e, &L_temp2_e ) ); /* exp(L_temp2_e) */
     640     2097194 :     mu = BASOP_Util_Divide3232_Scale_newton( x_split, L_temp2, &mu_e );                             /* exp(mu_e + (x-plit_e - L_temp2_e)) */
     641     2097194 :     mu_e = add( mu_e, sub( x_split_e, L_temp2_e ) );
     642     2097194 :     mu = BASOP_Util_Add_Mant32Exp( mu, mu_e, L_negate( r ), r_e, &mu_e ); /* exp(mu_e) */
     643             : 
     644             :     // d = ((x_ii + x_kk) * (x_ii - x_kk) + r * mu) / maxWithSign(x_ii);
     645     2097194 :     L_temp1 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, x_kk, x_kk_e, &L_temp1_e );                                                           /* exp(L_temp1_e) */
     646     2097194 :     L_temp2 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, L_negate( x_kk ), x_kk_e, &L_temp2_e );                                               /* exp(L_temp2_e) */
     647     2097194 :     d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( r, mu ), add( r_e, mu_e ), &d_e ); /* exp(d_e) */
     648     2097194 :     d = BASOP_Util_Divide3232_Scale_newton( d, maxWithSign_fx( x_ii ), &temp_exp );                                                         /* exp(temp_exp + (d_e - x_ii_e) */
     649     2097194 :     d_e = add( temp_exp, sub( d_e, x_ii_e ) );
     650             : 
     651             :     /*QR transformation*/
     652     7032117 :     FOR( ch = startIndex; ch <= split; ch++ )
     653             :     {
     654     4934923 :         r = Mpy_32_32( s, secDiag[ch + 1] ); /* exp(s_e + secDiag_e) */
     655     4934923 :         r_e = add( s_e, secDiag_e[ch + 1] );
     656     4934923 :         g = Mpy_32_32( c, secDiag[ch + 1] ); /* exp(c_e + secDiag_e) */
     657     4934923 :         g_e = add( c_e, secDiag_e[ch + 1] );
     658             : 
     659     4934923 :         GivensRotation2_fx( d, d_e, r, r_e, &secDiag[ch], &temp, &secDiag_e[ch], &temp_e ); /* exp(secDiag_e) */
     660     4934923 :         c = Mpy_32_32( d, temp );
     661     4934923 :         c_e = add( temp_e, d_e );
     662     4934923 :         temp_norm_e = norm_l( c );
     663     4934923 :         c = L_shl( c, temp_norm_e );
     664     4934923 :         c_e = sub( c_e, temp_norm_e );
     665     4934923 :         s = Mpy_32_32( r, temp );
     666     4934923 :         s_e = add( r_e, temp_e );
     667     4934923 :         temp_norm_e = norm_l( s );
     668     4934923 :         s = L_shl( s, temp_norm_e );
     669     4934923 :         s_e = sub( s_e, temp_norm_e );
     670     4934923 :         r = Mpy_32_32( s, singularValues[ch + 1] ); /* exp(r_e + secDiag_e) */
     671     4934923 :         r_e = add( s_e, singularValues_e[ch + 1] );
     672     4934923 :         x_split = Mpy_32_32( c, singularValues[ch + 1] ); /* exp(c_e + secDiag_e) */
     673     4934923 :         x_split_e = add( c_e, singularValues_e[ch + 1] );
     674             : 
     675     4934923 :         aux = g; /* exp(g_e) */
     676     4934923 :         move32();
     677     4934923 :         aux_e = g_e;
     678     4934923 :         move16();
     679             : 
     680             :         // ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC);
     681     4934923 :         ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC );
     682             : 
     683     4934923 :         GivensRotation2_fx( d, d_e, r, r_e, &singularValues[ch], &aux, &singularValues_e[ch], &aux_e ); /* exp(singularValues_e) */
     684     4934923 :         IF( singularValues[ch] != 0 )
     685             :         {
     686             : 
     687     4934923 :             c = Mpy_32_32( d, aux ); /* exp(d_e + aux_e) */
     688     4934923 :             c_e = add( d_e, aux_e );
     689     4934923 :             temp_norm_e = norm_l( c );
     690     4934923 :             c = L_shl( c, temp_norm_e );
     691     4934923 :             c_e = sub( c_e, temp_norm_e );
     692             : 
     693     4934923 :             s = Mpy_32_32( r, aux ); /* exp(r_e + aux_e) */
     694     4934923 :             s_e = add( r_e, aux_e );
     695     4934923 :             temp_norm_e = norm_l( s );
     696     4934923 :             s = L_shl( s, temp_norm_e );
     697     4934923 :             s_e = sub( s_e, temp_norm_e );
     698             :         }
     699             : 
     700             :         // ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL);
     701     4934923 :         ApplyRotation_fx( singularVectors_Left, c, c_e, s, s_e, g, g_e, x_split, x_split_e, &d, &d_e, &x_ii, &x_ii_e, ch + 1, ch, nChannelsL );
     702             :     }
     703             : 
     704     2097194 :     secDiag[startIndex] = 0;
     705     2097194 :     move32();
     706     2097194 :     secDiag[currentIndex] = d; /* exp(d_e) */
     707     2097194 :     move32();
     708     2097194 :     secDiag_e[currentIndex] = d_e;
     709     2097194 :     move16();
     710     2097194 :     singularValues[currentIndex] = x_ii; /* exp(x_ii_e) */
     711     2097194 :     move32();
     712     2097194 :     singularValues_e[currentIndex] = x_ii_e;
     713     2097194 :     move16();
     714             : 
     715     2097194 :     return;
     716             : }
     717             : 
     718             : /*-------------------------------------------------------------------------
     719             :  * ApplyRotation()
     720             :  *
     721             :  *
     722             :  *-------------------------------------------------------------------------*/
     723             : 
     724     9922326 : static void ApplyRotation_fx(
     725             :     Word32 singularVector[][MAX_OUTPUT_CHANNELS],
     726             :     const Word32 c, /* exp(c_e)*/
     727             :     const Word16 c_e,
     728             :     const Word32 s, /* exp(s_e) */
     729             :     const Word16 s_e,
     730             :     Word32 x11, /* exp(x11_e) */
     731             :     Word16 x11_e,
     732             :     Word32 x12, /* exp(x12_e) */
     733             :     Word16 x12_e,
     734             :     Word32 *d, /* exp(d_e) */
     735             :     Word16 *d_e,
     736             :     Word32 *g, /* exp(g_e) */
     737             :     Word16 *g_e,
     738             :     const Word16 currentIndex1, /* Q0 */
     739             :     const Word16 currentIndex2, /* Q0 */
     740             :     const Word16 nChannels      /* Q0 */
     741             : )
     742             : {
     743             :     Word16 ch;
     744             : 
     745     9922326 :     *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */
     746     9922326 :     move32();
     747     9922326 :     *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */
     748     9922326 :     move32();
     749             : 
     750     9922326 :     Word16 c_q = sub( 31, c_e );
     751     9922326 :     Word16 s_q = sub( 31, s_e );
     752             :     Word32 op1, op2;
     753             :     Word16 op_e;
     754             : 
     755             :     // Bring c and s to same Q
     756     9922326 :     IF( GT_16( c_q, s_q ) )
     757             :     {
     758      861663 :         op1 = L_shr( c, sub( c_q, s_q ) );
     759      861663 :         op2 = s;
     760      861663 :         move32();
     761      861663 :         op_e = s_q;
     762      861663 :         move16();
     763             :     }
     764             :     ELSE
     765             :     {
     766     9060663 :         op1 = c;
     767     9060663 :         move32();
     768     9060663 :         op2 = L_shr( s, sub( s_q, c_q ) );
     769     9060663 :         op_e = c_q;
     770     9060663 :         move16();
     771             :     }
     772     9922326 :     op_e = add( op_e, 1 ); // 64 bit mac -> +1
     773     9922326 :     op_e = negate( op_e );
     774             : 
     775    58859943 :     FOR( ch = 0; ch < nChannels; ch++ )
     776             :     {
     777    48937617 :         x11 = singularVector[ch][currentIndex2];
     778    48937617 :         move32();
     779    48937617 :         x12 = singularVector[ch][currentIndex1];
     780    48937617 :         move32();
     781             : 
     782    48937617 :         Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
     783    48937617 :         singularVector[ch][currentIndex2] = W_shl_sat_l( temp, op_e );   // Q(singularVector)
     784    48937617 :         move32();
     785             : 
     786    48937617 :         temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
     787    48937617 :         singularVector[ch][currentIndex1] = W_shl_sat_l( temp, op_e );        // Q(singularVector)
     788    48937617 :         move32();
     789             :     }
     790             : 
     791     9922326 :     return;
     792             : }
     793             : 
     794             : /*-------------------------------------------------------------------------
     795             :  * HouseholderReduction()
     796             :  *
     797             :  *
     798             :  *-------------------------------------------------------------------------*/
     799             : 
     800     1035976 : static void HouseholderReduction_fx(
     801             :     Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
     802             :     Word32 singularValues_fx[MAX_OUTPUT_CHANNELS],          /* exp(singularValues_fx_e) */
     803             :     Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_Left_e) */
     804             :     Word32 secDiag_fx[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_fx_e) */
     805             :     Word16 singularVectors_Left_e,
     806             :     Word16 singularValues_fx_e[MAX_OUTPUT_CHANNELS],
     807             :     Word16 *secDiag_fx_e,
     808             :     const Word16 nChannelsL, /* Q0 */
     809             :     const Word16 nChannelsC, /* Q0 */
     810             :     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
     811             :     Word16 *eps_x_fx_e )
     812             : {
     813             :     Word16 nCh;
     814             : 
     815     1035976 :     Word32 g_left_fx = 0;
     816     1035976 :     Word16 g_left_e = 0;
     817     1035976 :     move32();
     818     1035976 :     move16();
     819     1035976 :     Word32 g_right_fx = 0;
     820     1035976 :     Word16 g_right_e = 0;
     821     1035976 :     move32();
     822     1035976 :     move16();
     823             : 
     824             : 
     825             :     Word16 iCh, jCh;
     826             :     Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     827             : 
     828     1035976 :     Word16 sc = 0;
     829     1035976 :     move16();
     830     1035976 :     sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC );
     831     3619040 :     FOR( jCh = 1; jCh < nChannelsL; jCh++ )
     832             :     {
     833     2583064 :         sc = s_min( sc, getScaleFactor32( singularVectors_Left_fx[jCh], nChannelsC ) );
     834             :     }
     835     4655016 :     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
     836             :     {
     837     3619040 :         Scale_sig32( singularVectors_Left_fx[jCh], nChannelsC, sc );
     838    16700583 :         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
     839             :         {
     840    13081543 :             singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e - sc;
     841    13081543 :             move16();
     842             :         }
     843             :     }
     844             : 
     845     4223041 :     FOR( nCh = 0; nCh < nChannelsC; nCh++ )
     846             :     {
     847     3187065 :         secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
     848     3187065 :         move32();
     849     3187065 :         secDiag_fx_e[nCh] = g_right_e;
     850             : 
     851     3187065 :         biDiagonalReductionLeft_fx(
     852             :             singularVectors_Left_fx,
     853             :             singularVectors_Left_fx_e,
     854             :             nChannelsL,
     855             :             nChannelsC,
     856             :             nCh,
     857             :             &g_left_fx,
     858             :             &g_left_e );
     859             : 
     860     3187065 :         singularValues_fx[nCh] = g_left_fx;
     861     3187065 :         move32();
     862     3187065 :         singularValues_fx_e[nCh] = g_left_e;
     863             : 
     864     3187065 :         biDiagonalReductionRight_fx(
     865             :             singularVectors_Left_fx,
     866             :             singularVectors_Left_fx_e,
     867             :             nChannelsL,
     868             :             nChannelsC,
     869             :             nCh,
     870             :             &g_right_fx,
     871             :             &g_right_e );
     872             : 
     873             :         Word16 L_temp_e;
     874     3187065 :         Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
     875     3187065 :         IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
     876             :         {
     877     1482400 :             *eps_x_fx = L_temp; /* exp(L_temp_e) */
     878     1482400 :             move32();
     879     1482400 :             *eps_x_fx_e = L_temp_e;
     880     1482400 :             move32();
     881             :         }
     882             :     }
     883             : 
     884             : 
     885             :     /* SingularVecotr Accumulation */
     886     1035976 :     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
     887             : 
     888             : 
     889     1035976 :     singularVectorsAccumulationLeft_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Left_fx_e, singularValues_fx_e, nChannelsL, nChannelsC );
     890             : 
     891     1035976 :     return;
     892             : }
     893             : 
     894             : /*-------------------------------------------------------------------------
     895             :  * biDiagonalReductionLeft()
     896             :  *
     897             :  *
     898             :  *-------------------------------------------------------------------------*/
     899     3187065 : static void biDiagonalReductionLeft_fx(
     900             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS],   /* exp(singularVectors_e) */
     901             :     Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
     902             :     const Word16 nChannelsL,
     903             :     const Word16 nChannelsC,  /* Q0 */
     904             :     const Word16 currChannel, /* Q0 */
     905             :     Word32 *g,
     906             :     Word16 *g_e )
     907             : {
     908             :     Word16 iCh, jCh;
     909             :     Word32 norm_x, f, r;
     910             :     Word16 norm_x_e, f_e, r_e;
     911             :     Word32 L_temp;
     912             :     Word16 L_temp_e;
     913             : 
     914             :     /* Setting values to 0 */
     915     3187065 :     *g = 0;
     916     3187065 :     *g_e = 0;
     917     3187065 :     move32();
     918     3187065 :     move16();
     919             : 
     920     3187065 :     IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
     921             :     {
     922     3187065 :         Word64 temp = 0;
     923     3187065 :         move64();
     924     3187065 :         norm_x = 0;
     925     3187065 :         move32();
     926     3187065 :         norm_x_e = 0;
     927     3187065 :         move16();
     928     3187065 :         Word16 max_e = MIN_16;
     929     3187065 :         move16();
     930    11757789 :         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     931             :         {
     932     8570724 :             max_e = s_max( max_e, singularVectors_e[jCh][currChannel] );
     933             :         }
     934             : 
     935    11757789 :         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     936             :         {
     937     8570724 :             temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors_e[jCh][currChannel] ), 1 ) ) );
     938             :         }
     939             : 
     940     3187065 :         Word16 nrm = W_norm( temp );
     941     3187065 :         nrm = sub( nrm, 32 );
     942     3187065 :         norm_x = W_shl_sat_l( temp, nrm );
     943     3187065 :         norm_x_e = sub( add( max_e, max_e ), nrm );
     944             : 
     945     3187065 :         IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
     946             :         {
     947             :             Word16 invVal_e;
     948             :             Word32 invVal;
     949             : 
     950     3016767 :             L_temp_e = norm_x_e;
     951     3016767 :             move16();
     952     3016767 :             L_temp = Sqrt32( norm_x, &L_temp_e );
     953             :             //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
     954     3016767 :             if ( singularVectors[currChannel][currChannel] >= 0 )
     955             :             {
     956     1719992 :                 L_temp = L_negate( L_temp );
     957     1719992 :                 move32();
     958             :             }
     959     3016767 :             *g = L_temp;
     960     3016767 :             move32();
     961     3016767 :             *g_e = L_temp_e;
     962     3016767 :             move16();
     963             : 
     964     3016767 :             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e );                                             /* exp(r_e) */
     965     3016767 :             singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */
     966     3016767 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
     967             : 
     968     7523064 :             FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
     969             :             {
     970     4506297 :                 Word16 max2_e = MIN_16;
     971     4506297 :                 max_e = MIN_16;
     972     4506297 :                 move16();
     973     4506297 :                 move16();
     974     4506297 :                 temp = 0;
     975     4506297 :                 move64();
     976             : 
     977    22525682 :                 FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     978             :                 {
     979    18019385 :                     max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); /* exp(norm_x_e) */
     980    18019385 :                     max2_e = s_max( max2_e, singularVectors_e[jCh][iCh] );       /* exp(norm_x_e) */
     981             :                 }
     982     4506297 :                 max_e = add( max_e, max2_e );
     983             : 
     984    22525682 :                 FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     985             :                 {
     986    18019385 :                     temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors_e[jCh][currChannel], singularVectors_e[jCh][iCh] ) ) ) );
     987             :                 }
     988     4506297 :                 nrm = W_norm( temp );
     989     4506297 :                 nrm = sub( nrm, 32 );
     990     4506297 :                 norm_x = W_shl_sat_l( temp, nrm );
     991     4506297 :                 norm_x_e = sub( max_e, nrm );
     992             : 
     993     4506297 :                 f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
     994     4506297 :                 f_e = add( invVal_e, sub( norm_x_e, r_e ) );
     995             : 
     996    22525682 :                 FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
     997             :                 {
     998    18019385 :                     singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] );
     999             :                 }
    1000             :             }
    1001             :         }
    1002             :     }
    1003     3187065 :     return;
    1004             : }
    1005             : 
    1006     3187065 : static void biDiagonalReductionRight_fx(
    1007             :     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
    1008             :     Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS],
    1009             :     const Word16 nChannelsL,  /* Q0 */
    1010             :     const Word16 nChannelsC,  /* Q0 */
    1011             :     const Word16 currChannel, /* Q0 */
    1012             :     Word32 *g,
    1013             :     Word16 *g_e )
    1014             : {
    1015             :     Word16 iCh, jCh, idx;
    1016             :     Word32 norm_x, r;
    1017             :     Word16 norm_x_e, r_e;
    1018             :     Word32 L_temp;
    1019             :     Word16 L_temp_e;
    1020             : 
    1021             :     /* Setting values to 0 */
    1022     3187065 :     *g = 0;
    1023     3187065 :     *g_e = 0;
    1024     3187065 :     move32();
    1025     3187065 :     move16();
    1026     3187065 :     IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
    1027             :     {
    1028     2151089 :         idx = add( currChannel, 1 ); /* Q0 */
    1029             : 
    1030     2151089 :         norm_x = 0;
    1031     2151089 :         move32();
    1032     2151089 :         norm_x_e = 0;
    1033     2151089 :         move16();
    1034     6661908 :         FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
    1035             :         {
    1036     4510819 :             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
    1037             :         }
    1038             : 
    1039     2151089 :         IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
    1040             :         {
    1041             :             Word16 invVal_e;
    1042             :             Word32 invVal;
    1043             : 
    1044     1999460 :             L_temp_e = norm_x_e;
    1045     1999460 :             move16();
    1046     1999460 :             L_temp = Sqrt32( norm_x, &L_temp_e );
    1047             :             // L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
    1048     1999460 :             IF( singularVectors[currChannel][idx] >= 0 )
    1049             :             {
    1050      749583 :                 ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
    1051      749583 :                 move32();
    1052             :             }
    1053             :             ELSE
    1054             :             {
    1055     1249877 :                 ( *g ) = L_temp; /* exp(L_temp_e) */
    1056     1249877 :                 move32();
    1057             :             }
    1058     1999460 :             *g_e = L_temp_e;
    1059     1999460 :             move16();
    1060             : 
    1061     1999460 :             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + ( *g_e ), -norm_x, norm_x_e, &r_e );                             /* exp(r_e) */
    1062     1999460 :             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */
    1063             : 
    1064     1999460 :             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
    1065             : 
    1066     6798925 :             FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
    1067             :             {
    1068     4799465 :                 norm_x = 0;
    1069     4799465 :                 move32();
    1070     4799465 :                 norm_x_e = 0;
    1071     4799465 :                 move16();
    1072    18165076 :                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
    1073             :                 {
    1074    13365611 :                     norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors_e[iCh][jCh], singularVectors_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
    1075             :                 }
    1076             : 
    1077     4799465 :                 norm_x = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
    1078     4799465 :                 norm_x_e = add( invVal_e, sub( norm_x_e, r_e ) );
    1079             : 
    1080    18165076 :                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
    1081             :                 {
    1082    13365611 :                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */
    1083             :                 }
    1084             :             }
    1085             :         }
    1086             :     }
    1087             : 
    1088     3187065 :     return;
    1089             : }
    1090             : 
    1091             : /*-------------------------------------------------------------------------
    1092             :  * singularVectorsAccumulationLeft()
    1093             :  *
    1094             :  *
    1095             :  *-------------------------------------------------------------------------*/
    1096             : 
    1097     1035976 : static void singularVectorsAccumulationLeft_fx(
    1098             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
    1099             :     Word32 singularValues[MAX_OUTPUT_CHANNELS],         /* exp(singularValues_e) */
    1100             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
    1101             :     Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
    1102             :     const Word16 nChannelsL, /* Q0 */
    1103             :     const Word16 nChannelsC  /* Q0 */
    1104             : )
    1105             : {
    1106             :     Word16 nCh, iCh, k;
    1107             :     Word16 nChannels;
    1108             :     Word32 norm_y, t_jj, t_ii;
    1109             :     Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp;
    1110             : 
    1111             :     /* Processing */
    1112     1035976 :     nChannels = s_min( nChannelsL, nChannelsC ); /* min(nChannelsL,ChannelsC) Q0*/
    1113             :     // FILE *fp = fopen("t_ii_out.txt","a");
    1114     4223041 :     FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* min(nChannelsL,ChannelsC) */
    1115             :     {
    1116     3187065 :         t_ii = singularValues[nCh]; /* exp(singularValues_e) */
    1117     3187065 :         move32();
    1118     3187065 :         t_ii_e = singularValues_e[nCh];
    1119     3187065 :         move16();
    1120             : 
    1121     7697884 :         FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1122             :         {
    1123     4510819 :             singularVectors_Left[nCh][iCh] = 0;
    1124     4510819 :             move32();
    1125             :         }
    1126             : 
    1127     3187065 :         IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
    1128             :         {
    1129     3016767 :             t_ii = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( t_ii ), &temp_exp );
    1130     3016767 :             t_ii_e = sub( temp_exp, t_ii_e );
    1131             :             Word16 tempe;
    1132     3016767 :             Word32 temp = BASOP_Util_Divide3232_Scale_newton( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
    1133     3016767 :             tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
    1134             :             // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
    1135     7523064 :             FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1136             :             {
    1137     4506297 :                 Word64 acc = 0;
    1138     4506297 :                 move64();
    1139             :                 Word64 prod[16];
    1140             :                 Word16 prod_e[16];
    1141     4506297 :                 Word16 max_e = -31;
    1142     4506297 :                 move16();
    1143    18019385 :                 FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
    1144             :                 {
    1145    13513088 :                     prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
    1146    13513088 :                     prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
    1147    13513088 :                     max_e = s_max( max_e, prod_e[k] );
    1148             :                 }
    1149             : 
    1150    18019385 :                 FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
    1151             :                 {
    1152    13513088 :                     acc = W_add( acc, W_shr( prod[k], s_min( 63, sub( max_e, prod_e[k] ) ) ) );
    1153             :                 }
    1154     4506297 :                 Word16 acc_e = W_norm( acc );
    1155     4506297 :                 acc = W_shl( acc, acc_e );
    1156             : 
    1157     4506297 :                 norm_y = W_extract_h( acc );
    1158     4506297 :                 norm_y_e = add( sub( max_e, acc_e ), 1 );
    1159     4506297 :                 t_jj = Mpy_32_32( temp, norm_y );
    1160     4506297 :                 t_jj_e = add( tempe, norm_y_e );
    1161    22525682 :                 FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
    1162             :                 {
    1163    18019385 :                     singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], singularVectors_Left_e[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, singularVectors_Left_e[k][nCh] ), &singularVectors_Left_e[k][iCh] ); /* exp(sing_exp2) */
    1164    18019385 :                     move32();
    1165             :                 }
    1166             :             }
    1167             : 
    1168    11412638 :             FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
    1169             :             {
    1170     8395871 :                 singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); /* exp(sing_exp2 + t_ii_e) */
    1171     8395871 :                 move32();
    1172     8395871 :                 singularVectors_Left_e[iCh][nCh] = add( singularVectors_Left_e[iCh][nCh], t_ii_e );
    1173     8395871 :                 move16();
    1174             :             }
    1175             :         }
    1176             :         ELSE
    1177             :         {
    1178      345151 :             FOR( iCh = nCh; iCh < nChannelsL; iCh++ ) /* nChannelsL */
    1179             :             {
    1180      174853 :                 singularVectors_Left[iCh][nCh] = 0;
    1181      174853 :                 move32();
    1182             :             }
    1183             :         }
    1184     3187065 :         Word16 exp = s_max( singularVectors_Left_e[nCh][nCh], 1 );
    1185     3187065 :         singularVectors_Left[nCh][nCh] = L_sub( L_shr( singularVectors_Left[nCh][nCh], sub( exp, singularVectors_Left_e[nCh][nCh] ) ), L_shr( MINUS_ONE_IN_Q31, exp ) ); /* exp(sing_exp2) */
    1186     3187065 :         move32();
    1187     3187065 :         singularVectors_Left_e[nCh][nCh] = exp;
    1188     3187065 :         move16();
    1189             :     }
    1190             :     // fclose(fp);
    1191     4655016 :     FOR( nCh = 0; nCh < nChannelsL; nCh++ )
    1192             :     {
    1193    16700583 :         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
    1194             :         {
    1195    13081543 :             singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], singularVectors_Left_e[nCh][iCh] ); /* Q31 */
    1196    13081543 :             move32();
    1197             :         }
    1198             :     }
    1199             : 
    1200     1035976 :     return;
    1201             : }
    1202             : 
    1203             : /*-------------------------------------------------------------------------
    1204             :  * singularVectorsAccumulationRight()
    1205             :  *
    1206             :  *
    1207             :  *-------------------------------------------------------------------------*/
    1208             : 
    1209     1035976 : static void singularVectorsAccumulationRight_fx(
    1210             :     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_Left_e) */
    1211             :     Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* input exp(singularVectors_Left_e), output Q31 */
    1212             :     Word32 secDiag[MAX_OUTPUT_CHANNELS],                 /* exp(secDiag_e) */
    1213             :     Word16 singularVectors_Left_e[][MAX_OUTPUT_CHANNELS],
    1214             :     Word16 *secDiag_e,
    1215             :     const Word16 nChannelsC /* Q0 */
    1216             : )
    1217             : {
    1218             :     Word16 nCh, iCh, k;
    1219             :     Word16 nChannels;
    1220             :     Word32 norm_y, t_ii, ratio_float;
    1221     1035976 :     Word16 norm_y_e, temp_exp1, sing_right_exp[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 };
    1222             : 
    1223             :     /* Processing */
    1224     1035976 :     nChannels = nChannelsC; /* nChannelsC    Q0*/
    1225             : 
    1226             :     /* avoid compiler warning */
    1227     1035976 :     t_ii = secDiag[nChannels - 1]; /* exp(secDiag_e[nChannels - 1]) */
    1228     1035976 :     move32();
    1229             : 
    1230     4223041 :     FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */
    1231             :     {
    1232             : 
    1233     3187065 :         IF( LT_16( nCh, sub( nChannelsC, 1 ) ) ) /* nChannelsC */
    1234             :         {
    1235     2151089 :             IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/
    1236             :             {
    1237             : 
    1238     6358093 :                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/
    1239             :                 {
    1240     4358633 :                     ratio_float = L_deposit_h( BASOP_Util_Divide3232_Scale( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1] ), &temp_exp1 ) ); /* exp(temp_exp1) */
    1241     4358633 :                     singularVectors_Right[iCh][nCh] = L_deposit_h( BASOP_Util_Divide3232_Scale( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh] ) );               /* exp(sing_right_exp + (temp_exp1 - secDiag_e) */
    1242     4358633 :                     temp_exp1 = add( temp_exp1, sub( singularVectors_Left_e[nCh][iCh], singularVectors_Left_e[nCh][nCh + 1] ) );
    1243     4358633 :                     move32();
    1244     4358633 :                     sing_right_exp[iCh][nCh] = add( sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e[nCh + 1] ) );
    1245     4358633 :                     move16();
    1246             :                     // singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 );
    1247             :                 }
    1248             : 
    1249     6358093 :                 FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1250             :                 {
    1251     4358633 :                     Word64 norm_val = 0;
    1252     4358633 :                     move64();
    1253     4358633 :                     Word16 maxL_e = MIN_16;
    1254     4358633 :                     Word16 maxR_e = MIN_16;
    1255     4358633 :                     Word16 maxR2_e = MIN_16;
    1256     4358633 :                     move16();
    1257     4358633 :                     move16();
    1258     4358633 :                     move16();
    1259    17274522 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1260             :                     {
    1261    12915889 :                         maxL_e = s_max( maxL_e, singularVectors_Left_e[nCh][k] );
    1262    12915889 :                         maxR_e = s_max( maxR_e, sing_right_exp[k][iCh] );
    1263    12915889 :                         maxR2_e = s_max( maxR2_e, sing_right_exp[k][nCh] );
    1264             :                     }
    1265             : 
    1266    17274522 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1267             :                     {
    1268    12915889 :                         norm_val = W_mac_32_32( norm_val, L_shr( singularVectors_Left[nCh][k], sub( maxL_e, singularVectors_Left_e[nCh][k] ) ), L_shr( singularVectors_Right[k][iCh], sub( maxR_e, sing_right_exp[k][iCh] ) ) );
    1269             :                     }
    1270     4358633 :                     norm_y_e = W_norm( norm_val );
    1271     4358633 :                     norm_y = W_extract_h( W_shl( norm_val, norm_y_e ) );
    1272     4358633 :                     norm_y_e = sub( add( maxL_e, maxR_e ), norm_y_e );
    1273             : 
    1274     4358633 :                     Word16 max_new = s_max( maxR_e, add( maxR2_e, norm_y_e ) );
    1275    17274522 :                     FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
    1276             :                     {
    1277    12915889 :                         Word32 temp = Mpy_32_32( norm_y, singularVectors_Right[k][nCh] );
    1278    12915889 :                         Word32 op2 = L_shr( temp, sub( max_new, add( norm_y_e, sing_right_exp[k][nCh] ) ) );
    1279    12915889 :                         singularVectors_Right[k][iCh] = L_add_sat( L_shr( singularVectors_Right[k][iCh], sub( max_new, sing_right_exp[k][iCh] ) ), op2 ); /* exp(sing_right_exp) */
    1280    12915889 :                         move32();
    1281    12915889 :                         singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], max_new ); /* Q31 */
    1282    12915889 :                         move32();
    1283    12915889 :                         sing_right_exp[k][iCh] = 0;
    1284    12915889 :                         move16();
    1285             :                     }
    1286             :                 }
    1287             :             }
    1288             : 
    1289     6661908 :             FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
    1290             :             {
    1291     4510819 :                 singularVectors_Right[nCh][iCh] = singularVectors_Right[iCh][nCh] = 0;
    1292     4510819 :                 move32();
    1293     4510819 :                 move32();
    1294             :             }
    1295             :         }
    1296     3187065 :         singularVectors_Right[nCh][nCh] = MAX_32;
    1297     3187065 :         move32();
    1298     3187065 :         t_ii = secDiag[nCh]; /* exp(secDiag_e[nCh]) */
    1299     3187065 :         move32();
    1300             :     }
    1301     1035976 :     return;
    1302             : }
    1303             : 
    1304             : /*-------------------------------------------------------------------------
    1305             :  * GivensRotation()
    1306             :  *
    1307             :  *
    1308             :  *-------------------------------------------------------------------------*/
    1309             : 
    1310             : 
    1311     9922326 : static void GivensRotation2_fx(
    1312             :     const Word32 x, /* exp(x_e) */
    1313             :     const Word16 x_e,
    1314             :     const Word32 z, /* exp(z_e) */
    1315             :     const Word16 z_e,
    1316             :     Word32 *result,
    1317             :     Word32 *resultInv,
    1318             :     Word16 *out_e,
    1319             :     Word16 *outInv_e )
    1320             : {
    1321             :     Word32 r;
    1322     9922326 :     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
    1323     9922326 :     r = L_max( r, 1 );
    1324     9922326 :     *outInv_e = *out_e;
    1325     9922326 :     move16();
    1326     9922326 :     *result = Sqrt32( r, out_e );
    1327     9922326 :     move32();
    1328             : 
    1329     9922326 :     *resultInv = ISqrt32( r, outInv_e );
    1330     9922326 :     move32();
    1331     9922326 : }
    1332             : 
    1333     2097194 : static Word32 GivensRotation_fx(
    1334             :     const Word32 x, /* exp(x_e) */
    1335             :     const Word16 x_e,
    1336             :     const Word32 z, /* exp(z_e) */
    1337             :     const Word16 z_e,
    1338             :     Word16 *out_e )
    1339             : {
    1340             :     Word32 r;
    1341             : 
    1342     2097194 :     r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( z, z ), shl( z_e, 1 ), Mpy_32_32( x, x ), shl( x_e, 1 ), out_e );
    1343     2097194 :     r = Sqrt32( r, out_e );
    1344     2097194 :     return ( r );
    1345             : }
    1346             : 
    1347             : /*-------------------------------------------------------------------------
    1348             :  * maxWithSign()
    1349             :  *
    1350             :  *
    1351             :  *-------------------------------------------------------------------------*/
    1352             : 
    1353    26058609 : static Word32 maxWithSign_fx(
    1354             :     const Word32 a /* Qx */
    1355             : )
    1356             : {
    1357             :     Word32 result;
    1358    26058609 :     IF( a >= 0 )
    1359             :     {
    1360    11182931 :         result = L_max( a, SVD_MINIMUM_VALUE_FX );
    1361             :     }
    1362             :     ELSE
    1363             :     {
    1364    14875678 :         result = L_min( a, -SVD_MINIMUM_VALUE_FX );
    1365             :     }
    1366    26058609 :     return result;
    1367             : }
    1368             : 
    1369             : /*-------------------------------------------------------------------------
    1370             :  * flushToZeroArray()
    1371             :  *
    1372             :  *
    1373             :  *-------------------------------------------------------------------------*/
    1374             : 
    1375             : 
    1376             : /*-------------------------------------------------------------------------
    1377             :  * flushToZeroMat()
    1378             :  *
    1379             :  *
    1380             :  *-------------------------------------------------------------------------*/

Generated by: LCOV version 1.14