LCOV - Coverage on main -- dec/rend @ 4c82f1d24d39d0296b18d775f18a006f4c7d024b

LCOV - code coverage report

Current view:	top level - lib_com - ivas_tools_fx.c (source / functions)		Hit	Total	Coverage
Test:	Coverage on main -- dec/rend @ 4c82f1d24d39d0296b18d775f18a006f4c7d024b	Lines:	733	1159	63.2 %
Date:	2025-05-17 01:59:02	Functions:	34	47	72.3 %

          Line data    Source code

       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : #include <assert.h>
      34             : #include <stdint.h>
      35             : #include "options.h"
      36             : #include <math.h>
      37             : #include "prot_fx.h"
      38             : #include "wmc_auto.h"
      39             : #include "ivas_rom_com.h"
      40             : #include "ivas_prot_fx.h"
      41             : 
      42             : #define ANGLE_90_DEG_Q22  377487360
      43             : #define ANGLE_180_DEG_Q22 754974720
      44             : #define ANGLE_360_DEG_Q22 1509949440
      45             : 
      46             : /*---------------------------------------------------------------
      47             :  * sumAbs()
      48             :  *
      49             :  * sum of absolute values
      50             :  * ---------------------------------------------------------------*/
      51             : 
      52           0 : Word32 sumAbs_fx(
      53             :     const Word32 *vec, /* i  : input vector                     Qx*/
      54             :     const Word16 lvec  /* i  : length of input vector           Q0*/
      55             : )
      56             : {
      57             :     Word16 i;
      58             :     Word32 tmp; /*Qx*/
      59             : 
      60           0 :     tmp = 0;
      61           0 :     move32();
      62           0 :     FOR( i = 0; i < lvec; i++ )
      63             :     {
      64           0 :         tmp = L_add( tmp, L_abs( vec[i] ) ); /*Qx*/
      65             :     }
      66             : 
      67           0 :     return tmp;
      68             : }
      69             : 
      70             : /*---------------------------------------------------------------------*
      71             :  * mvc2c()
      72             :  *
      73             :  * Transfer the contents of vector x[] to vector y[] in char format
      74             :  *---------------------------------------------------------------------*/
      75             : 
      76       38621 : void mvc2c(
      77             :     const UWord8 x[], /* i  : input vector  */
      78             :     UWord8 y[],       /* o  : output vector */
      79             :     const Word16 n    /* i  : vector size   */
      80             : )
      81             : {
      82             :     Word16 i;
      83             : 
      84       38621 :     IF( n <= 0 )
      85             :     {
      86             :         /* no need to transfer vectors with size 0 */
      87           0 :         return;
      88             :     }
      89             : 
      90       38621 :     IF( y < x )
      91             :     {
      92     3748610 :         FOR( i = 0; i < n; i++ )
      93             :         {
      94     3730323 :             y[i] = x[i];
      95     3730323 :             move16();
      96             :         }
      97             :     }
      98             :     ELSE
      99             :     {
     100       87405 :         FOR( i = n - 1; i >= 0; i-- )
     101             :         {
     102       67071 :             y[i] = x[i];
     103       67071 :             move16();
     104             :         }
     105             :     }
     106             : 
     107       38621 :     return;
     108             : }
     109             : 
     110             : 
     111             : /*-------------------------------------------------------------------*
     112             :  * ivas_syn_output()
     113             :  *
     114             :  * Output ivas synthesis signal with compensation for saturation
     115             :  * returns number of clipped samples
     116             :  *-------------------------------------------------------------------*/
     117             : 
     118             : /*! r: number of clipped samples */
     119      412070 : UWord32 ivas_syn_output_fx(
     120             :     Word32 *synth[], /* i/o: float synthesis signal              q_synth*/
     121             :     const Word16 q_synth,
     122             :     const Word16 output_frame, /* i  : output frame length (one channel)   Q0*/
     123             :     const Word16 n_channels,   /* i  : number of output channels           Q0*/
     124             :     Word16 *synth_out          /* o  : integer 16 bits synthesis signal    Q0*/
     125             : )
     126             : {
     127             :     Word16 i, n;
     128             :     Word16 synth_loc[MAX_JBM_L_FRAME48k];
     129             :     UWord32 tmp;
     130      412070 :     UWord32 noClipping = 0;
     131      412070 :     move32();
     132             : 
     133             :     /*-----------------------------------------------------------------*
     134             :      * float to integer conversion with saturation control
     135             :      *-----------------------------------------------------------------*/
     136             : 
     137     1880415 :     FOR( n = 0; n < n_channels; n++ )
     138             :     {
     139     1468345 :         tmp = mvl2s_r( synth[n], q_synth, synth_loc, output_frame ); /*Q0*/
     140     1468345 :         noClipping = UL_addNsD( noClipping, tmp );
     141             : 
     142  1172866425 :         FOR( i = 0; i < output_frame; i++ )
     143             :         {
     144  1171398080 :             synth_out[( ( i * n_channels ) + n )] = synth_loc[i]; /*q_synth*/
     145  1171398080 :             move16();
     146             :         }
     147             :     }
     148             : 
     149      412070 :     return noClipping; /*Q0*/
     150             : }
     151             : 
     152             : 
     153             : /*-------------------------------------------------------------------*
     154             :  * ivas_syn_output_f()
     155             :  *
     156             :  * Output ivas synthesis signal with compensation for saturation
     157             :  * returns number of clipped samples
     158             :  *-------------------------------------------------------------------*/
     159             : 
     160             : /*! r: number of clipped samples */
     161       19405 : void ivas_syn_output_f_fx(
     162             :     Word32 *synth[],           /* i/o: float synthesis signal              Q11*/
     163             :     const Word16 output_frame, /* i  : output frame length (one channel)   Q0*/
     164             :     const Word16 n_channels,   /* i  : number of output channels           Q0*/
     165             :     Word32 *synth_out          /* o  : integer 16 bits synthesis signal    Q11*/
     166             : )
     167             : {
     168             :     Word16 i, n;
     169             : 
     170             :     /*-----------------------------------------------------------------*
     171             :      * float to integer conversion with saturation control
     172             :      *-----------------------------------------------------------------*/
     173             : 
     174       63116 :     FOR( n = 0; n < n_channels; n++ )
     175             :     {
     176    34908031 :         FOR( i = 0; i < output_frame; i++ )
     177             :         {
     178    34864320 :             synth_out[( ( i * n_channels ) + n )] = synth[n][i]; /*Q11*/
     179    34864320 :             move16();
     180             :         }
     181             :     }
     182             : 
     183       19405 :     return;
     184             : }
     185             : 
     186             : 
     187             : /*-------------------------------------------------------------------*
     188             :  * mvr2r_inc()
     189             :  *
     190             :  *
     191             :  *-------------------------------------------------------------------*/
     192           0 : void mvr2r_inc_fixed_one(
     193             :     const Word32 x_fx[], /* i  : input vector               Q29*/
     194             :     const Word16 x_inc,  /* i  : increment for vector x[]   Q0*/
     195             :     Word32 y_fx[],       /* o  : output vector              Q29*/
     196             :     const Word16 y_inc,  /* i  : increment for vector y[]   Q0*/
     197             :     const Word16 n       /* i  : vector size                Q0*/
     198             : )
     199             : {
     200             :     Word16 i;
     201             :     Word16 ix;
     202             :     Word16 iy;
     203             : 
     204           0 :     IF( n <= 0 )
     205             :     {
     206             :         /* cannot transfer vectors with size 0 */
     207           0 :         return;
     208             :     }
     209             : 
     210           0 :     IF( y_fx < x_fx )
     211             :     {
     212           0 :         ix = 0;
     213           0 :         move16();
     214           0 :         iy = 0;
     215           0 :         move16();
     216           0 :         FOR( i = 0; i < n; i++ )
     217             :         {
     218           0 :             y_fx[iy] = x_fx[ix]; /*Q29*/
     219           0 :             move32();
     220             : 
     221           0 :             ix = ix + x_inc;
     222           0 :             iy = iy + y_inc;
     223             :         }
     224             :     }
     225             :     ELSE
     226             :     {
     227           0 :         ix = imult1616( sub( n, 1 ), x_inc ); /*Q0*/
     228           0 :         iy = imult1616( sub( n, 1 ), y_inc ); /*Q0*/
     229           0 :         FOR( i = ( n - 1 ); i >= 0; i-- )
     230             :         {
     231           0 :             y_fx[iy] = x_fx[ix]; /*Q29*/
     232           0 :             move32();
     233             : 
     234           0 :             ix = ix - x_inc;
     235           0 :             iy = iy - y_inc;
     236             :         }
     237             :     }
     238             : 
     239           0 :     return;
     240             : }
     241             : 
     242    80852919 : void mvr2r_inc_fixed(
     243             :     const Word32 x_fx[], /* i  : input vector               Q29*/
     244             :     const Word16 x_inc,  /* i  : increment for vector x[]   Q0*/
     245             :     Word32 y_fx[],       /* o  : output vector              Q29*/
     246             :     const Word16 y_inc,  /* i  : increment for vector y[]   Q0*/
     247             :     const Word16 n       /* i  : vector size                Q0*/
     248             : )
     249             : {
     250             :     Word16 i;
     251             :     Word16 ix;
     252             :     Word16 iy;
     253             : 
     254    80852919 :     IF( n <= 0 )
     255             :     {
     256             :         /* cannot transfer vectors with size 0 */
     257           0 :         return;
     258             :     }
     259             : 
     260    80852919 :     IF( y_fx < x_fx )
     261             :     {
     262    73209942 :         ix = 0;
     263    73209942 :         move16();
     264    73209942 :         iy = 0;
     265    73209942 :         move16();
     266  1127534682 :         FOR( i = 0; i < n; i++ )
     267             :         {
     268  1054324740 :             y_fx[iy] = x_fx[ix]; /*Q29*/
     269  1054324740 :             move32();
     270             : 
     271  1054324740 :             ix = ix + x_inc;
     272  1054324740 :             iy = iy + y_inc;
     273             :         }
     274             :     }
     275             :     ELSE
     276             :     {
     277     7642977 :         ix = i_mult( sub( n, 1 ), x_inc ); /*Q0*/
     278     7642977 :         iy = i_mult( sub( n, 1 ), y_inc );
     279    57807899 :         FOR( i = sub( n, 1 ); i >= 0; i-- )
     280             :         {
     281    50164922 :             y_fx[iy] = x_fx[ix]; /*Q29*/
     282    50164922 :             move32();
     283             : 
     284    50164922 :             ix = ix - x_inc;
     285    50164922 :             iy = iy - y_inc;
     286             :         }
     287             :     }
     288             : 
     289    80852919 :     return;
     290             : }
     291             : 
     292             : /*-------------------------------------------------------------------*
     293             :  * v_add_inc()
     294             :  *
     295             :  * Addition of two vectors sample by sample with explicit increments
     296             :  *-------------------------------------------------------------------*/
     297             : 
     298             : // for same q//
     299     8094690 : void v_add_inc_fx(
     300             :     const Word32 x1[],   /* i  : Input vector 1                       Qx*/
     301             :     const Word16 x_inc,  /* i  : Increment for input vector 1         Q0*/
     302             :     const Word32 x2[],   /* i  : Input vector 2                       Qx*/
     303             :     const Word16 x2_inc, /* i  : Increment for input vector 2         Q0*/
     304             :     Word32 y[],          /* o  : Output vector that contains vector 1 + vector 2  Qx*/
     305             :     const Word16 y_inc,  /* i  : increment for vector y[]              Q0*/
     306             :     const Word16 N       /* i  : Vector length                         Q0*/
     307             : )
     308             : {
     309             :     Word16 i, ix1, ix2, iy;
     310             : 
     311             :     /* The use of this function is currently always for the interleaved input format, */
     312             :     /* that means, the following conditions are always true and thus obsolete.        */
     313     8094690 :     test();
     314     8094690 :     test();
     315     8094690 :     test();
     316     8094690 :     test();
     317     8094690 :     IF( ( sub( x_inc, 2 ) == 0 ) && ( sub( x2_inc, 2 ) == 0 ) && ( sub( y_inc, 1 ) == 0 ) && ( &x1[1] == &x2[0] ) )
     318             :     {
     319             :         /* Interleaved input case, linear output */
     320   189560046 :         FOR( i = 0; i < N; i++ )
     321             :         {
     322   181465356 :             y[i] = L_add( x1[2 * i + 0], x1[2 * i + 1] ); /*Qx*/
     323   181465356 :             move32();
     324             :         }
     325     8094690 :         return;
     326             :     }
     327             : 
     328           0 :     ix1 = 0;
     329           0 :     ix2 = 0;
     330           0 :     iy = 0;
     331           0 :     move16();
     332           0 :     move16();
     333           0 :     move16();
     334           0 :     FOR( i = 0; i < N; i++ )
     335             :     {
     336           0 :         y[iy] = L_add( x1[ix1], x2[ix2] ); /*Qx*/
     337           0 :         move32();
     338           0 :         ix1 = add( ix1, x_inc );  /*Q0*/
     339           0 :         ix2 = add( ix2, x2_inc ); /*Q0*/
     340           0 :         iy = add( iy, y_inc );    /*Q0*/
     341             :     }
     342           0 :     return;
     343             : }
     344             : 
     345             : /*-------------------------------------------------------------------*
     346             :  * v_mult_inc_fx()
     347             :  *
     348             :  * Multiplication of two vectors with explicit increments
     349             :  *-------------------------------------------------------------------*/
     350             : 
     351           0 : void v_mult_inc_fx(
     352             :     const Word32 x1_fx[], /* i  : Input vector 1                                   x1_q_fx*/
     353             :     Word16 *x1_q_fx,
     354             :     const Word16 x1_inc,  /* i  : Increment for input vector 1                     Q0*/
     355             :     const Word32 x2_fx[], /* i  : Input vector 2                                   x2_q_fx*/
     356             :     Word16 *x2_q_fx,
     357             :     const Word16 x2_inc, /* i  : Increment for input vector 1                     Q0*/
     358             :     Word32 y_fx[],       /* o  : Output vector that contains vector 1 .* vector 2 y_q_fx*/
     359             :     Word16 *y_q_fx,
     360             :     const Word16 y_inc, /* i  : increment for vector y[i]                        Q0*/
     361             :     const Word16 N      /* i  : Vector length                                    Q0*/
     362             : )
     363             : {
     364             :     Word16 i;
     365           0 :     Word16 ix1 = 0;
     366           0 :     Word16 ix2 = 0;
     367           0 :     Word16 iy = 0;
     368             : 
     369           0 :     move16();
     370           0 :     move16();
     371           0 :     move16();
     372             : 
     373           0 :     FOR( i = 0; i < N; i++ )
     374             :     {
     375           0 :         y_fx[iy] = Mpy_32_32( x1_fx[ix1], x2_fx[ix2] ); /* x1_q_fx + x2_q_fx - 31 */
     376           0 :         move32();
     377           0 :         y_q_fx[iy] = sub( add( x1_q_fx[ix1], x2_q_fx[ix2] ), 31 );
     378           0 :         move16();
     379             : 
     380           0 :         ix1 = add( ix1, x1_inc ); /*Q0*/
     381           0 :         ix2 = add( ix2, x2_inc ); /*Q0*/
     382           0 :         iy = add( iy, y_inc );    /*Q0*/
     383             :     }
     384             : 
     385           0 :     return;
     386             : }
     387             : 
     388             : // when buffers have constant q/
     389     4284440 : void v_mult_inc_fixed(
     390             :     const Word32 x1_fx[], /* i  : Input vector 1                                   Qx1*/
     391             :     const Word16 x1_inc,  /* i  : Increment for input vector 1                     Q0*/
     392             :     const Word32 x2_fx[], /* i  : Input vector 2                                   Qx2*/
     393             :     const Word16 x2_inc,  /* i  : Increment for input vector 1                     Q0*/
     394             :     Word32 y_fx[],        /* o  : Output vector that contains vector 1 .* vector 2 Qx1 + Qx2 - 31*/
     395             :     const Word16 y_inc,   /* i  : increment for vector y[i]                        Q0*/
     396             :     const Word16 N        /* i  : Vector length                                    Q0*/
     397             : )
     398             : {
     399             :     Word16 i;
     400     4284440 :     Word16 ix1 = 0;
     401     4284440 :     Word16 ix2 = 0;
     402     4284440 :     Word16 iy = 0;
     403             : 
     404     4284440 :     move16();
     405     4284440 :     move16();
     406     4284440 :     move16();
     407             : 
     408    73489728 :     FOR( i = 0; i < N; i++ )
     409             :     {
     410    69205288 :         y_fx[iy] = Mpy_32_32( x1_fx[ix1], x2_fx[ix2] ); /*Qx1 + Qx2 - 31*/
     411    69205288 :         move32();
     412             : 
     413    69205288 :         ix1 = add( ix1, x1_inc );
     414    69205288 :         ix2 = add( ix2, x2_inc );
     415    69205288 :         iy = add( iy, y_inc );
     416             :     }
     417             : 
     418     4284440 :     return;
     419             : }
     420             : 
     421             : /*-------------------------------------------------------------------*
     422             :  * v_addc_fx()
     423             :  *
     424             :  * Addition of constant to vector
     425             :  *-------------------------------------------------------------------*/
     426             : 
     427           0 : void v_addc_fx(
     428             :     const Word32 x_fx[], /* i  : Input vector                                     Qx*/
     429             :     const Word32 c_fx,   /* i  : Constant                                         Qx*/
     430             :     Word32 y_fx[],       /* o  : Output vector that contains c*x                  Qx*/
     431             :     const Word16 N       /* i  : Vector length                                    Q0*/
     432             : )
     433             : {
     434             :     Word16 i;
     435             : 
     436           0 :     FOR( i = 0; i < N; i++ )
     437             :     {
     438           0 :         y_fx[i] = L_add( c_fx, x_fx[i] ); /*Qx*/
     439           0 :         move32();
     440             :     }
     441             : 
     442           0 :     return;
     443             : }
     444             : /*-------------------------------------------------------------------*
     445             :  * v_addc()
     446             :  *
     447             :  * Addition of constant to vector
     448             :  *-------------------------------------------------------------------*/
     449     2047467 : void v_addc_fixed(
     450             :     const Word32 x[], /* i  : Input vector                                     Qx*/
     451             :     const Word32 c,   /* i  : Constant                                         Qx*/
     452             :     Word32 y[],       /* o  : Output vector that contains c*x                  Qx*/
     453             :     const Word16 N    /* i  : Vector length                                    Q0*/
     454             : )
     455             : {
     456             :     Word16 i;
     457             : 
     458    58173287 :     FOR( i = 0; i < N; i++ )
     459             :     {
     460    56125820 :         y[i] = L_add( c, x[i] ); /*Qx*/
     461    56125820 :         move32();
     462             :     }
     463             : 
     464     2047467 :     return;
     465             : }
     466             : 
     467             : /*-------------------------------------------------------------------*
     468             :  * v_min_fx()
     469             :  *
     470             :  * minimum of two vectors
     471             :  *-------------------------------------------------------------------*/
     472             : 
     473        2400 : void v_min_fx(
     474             :     const Word32 x1_fx[], /* i  : Input vector 1                                   x1_q_fx*/
     475             :     Word16 *x1_q_fx,
     476             :     const Word32 x2_fx[], /* i  : Input vector 2                                   x2_q_fx*/
     477             :     Word16 *x2_q_fx,
     478             :     Word32 y_fx[], /* o  : Output vector that contains vector 1 .* vector 2 y_q_fx*/
     479             :     Word16 *y_q_fx,
     480             :     const Word16 N /* i  : Vector length                                    Q0*/
     481             : )
     482             : {
     483             :     Word16 i;
     484             : 
     485       60000 :     FOR( i = 0; i < N; i++ )
     486             :     {
     487       57600 :         IF( GT_16( x1_q_fx[i], x2_q_fx[i] ) )
     488             :         {
     489           0 :             IF( LT_32( L_shr( x1_fx[i], sub( x1_q_fx[i], x2_q_fx[i] ) ), x2_fx[i] ) )
     490             :             {
     491           0 :                 y_fx[i] = x1_fx[i]; /*x1_q_fx*/
     492           0 :                 move32();
     493           0 :                 y_q_fx[i] = x1_q_fx[i];
     494           0 :                 move16();
     495             :             }
     496             :             ELSE
     497             :             {
     498           0 :                 y_fx[i] = x2_fx[i]; /*x2_q_fx*/
     499           0 :                 move32();
     500           0 :                 y_q_fx[i] = x2_q_fx[i];
     501           0 :                 move16();
     502             :             }
     503             :         }
     504             :         ELSE
     505             :         {
     506       57600 :             IF( LT_32( x1_fx[i], L_shr( x2_fx[i], sub( x2_q_fx[i], x1_q_fx[i] ) ) ) )
     507             :             {
     508       52216 :                 y_fx[i] = x1_fx[i]; /*x1_q_fx*/
     509       52216 :                 move32();
     510       52216 :                 y_q_fx[i] = x1_q_fx[i];
     511       52216 :                 move16();
     512             :             }
     513             :             ELSE
     514             :             {
     515        5384 :                 y_fx[i] = x2_fx[i]; /*x2_q_fx*/
     516        5384 :                 move32();
     517        5384 :                 y_q_fx[i] = x2_q_fx[i];
     518        5384 :                 move16();
     519             :             }
     520             :         }
     521             :     }
     522             : 
     523        2400 :     return;
     524             : }
     525             : 
     526             : /*-------------------------------------------------------------------*
     527             :  * v_sqrt()
     528             :  *
     529             :  * square root of vector
     530             :  *-------------------------------------------------------------------*/
     531      315180 : void v_sqrt_fx(
     532             :     const Word32 x[], /* i  : Input vector                                     Qx*/
     533             :     Word16 exp[],
     534             :     Word32 y[],    /* o  : Output vector that contains sqrt(x)              Q31 - exp[]*/
     535             :     const Word16 N /* i  : Vector length                                    Q0*/
     536             : )
     537             : {
     538             :     Word16 i;
     539             : 
     540      945540 :     FOR( i = 0; i < N; i++ )
     541             :     {
     542      630360 :         y[i] = Sqrt32( x[i], &exp[i] ); /*Q31 - exp[i]*/
     543      630360 :         move32();
     544             :     }
     545             : 
     546      315180 :     return;
     547             : }
     548             : 
     549             : /*-------------------------------------------------------------------*
     550             :  * v_sub_s()
     551             :  *
     552             :  * Subtraction of two vectors
     553             :  *-------------------------------------------------------------------*/
     554             : 
     555           0 : void v_sub_s16_fx(
     556             :     const Word16 x1[], /* i  : Input vector 1                                   Qx*/
     557             :     const Word16 x2[], /* i  : Input vector 2                                   Qx*/
     558             :     Word16 y[],        /* o  : Output vector that contains vector 1 - vector 2  Qx*/
     559             :     const Word16 N     /* i  : Vector length                                    Q0*/
     560             : )
     561             : {
     562             :     Word16 i;
     563             : 
     564           0 :     FOR( i = 0; i < N; i++ )
     565             :     {
     566           0 :         y[i] = sub( x1[i], x2[i] ); /*Qx*/
     567           0 :         move16();
     568             :     }
     569             : 
     570           0 :     return;
     571             : }
     572             : 
     573             : 
     574           0 : void v_sub32_fx(
     575             :     const Word32 x1[], /* i  : Input vector 1                                   Qx*/
     576             :     const Word32 x2[], /* i  : Input vector 2                                   Qx*/
     577             :     Word32 y[],        /* o  : Output vector that contains vector 1 - vector 2  Qx*/
     578             :     const Word16 N     /* i  : Vector length                                    Q0*/
     579             : )
     580             : {
     581             :     Word16 i;
     582             : 
     583           0 :     FOR( i = 0; i < N; i++ )
     584             :     {
     585           0 :         y[i] = L_sub( x1[i], x2[i] ); /*Qx*/
     586           0 :         move32();
     587             :     }
     588             : 
     589           0 :     return;
     590             : }
     591             : 
     592             : /*---------------------------------------------------------------------*
     593             :  * dot_product_cholesky()
     594             :  *
     595             :  * Calculates dot product of type x'*A*A'*x, where x is column vector of size m,
     596             :  * and A is a Cholesky decomposition of some Hermitian matrix S whose size is m*m.
     597             :  * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise)
     598             :  *---------------------------------------------------------------------*/
     599             : 
     600             : /*! r: the dot product x'*A*A'*x */
     601           0 : Word64 dot_product_cholesky_fixed(
     602             :     const Word32 *x, /* i  : vector x                        Q31 - exp_x*/
     603             :     const Word32 *A, /* i  : Cholesky  matrix A              Q31 - exp_A*/
     604             :     const Word16 N   /* i  : vector & matrix size            Q0*/
     605             : )
     606             : {
     607             :     Word16 i, j;
     608             :     Word64 suma, tmp_sum;
     609             :     Word32 mul;
     610             :     Word32 tmp;
     611             :     const Word32 *pt_x, *pt_A;
     612           0 :     pt_A = A;
     613           0 :     suma = 0;
     614           0 :     move64();
     615             : 
     616           0 :     FOR( i = 0; i < N; i++ )
     617             :     {
     618           0 :         tmp_sum = 0;
     619           0 :         move32();
     620           0 :         pt_x = x;
     621             : 
     622           0 :         FOR( j = 0; j <= i; j++ )
     623             :         {
     624           0 :             mul = Mpy_32_32( *pt_x++, *pt_A++ );
     625           0 :             tmp_sum = W_add( tmp_sum, W_deposit32_l( mul ) );
     626             :         }
     627             : 
     628           0 :         tmp_sum = W_shr( tmp_sum, 4 ); // to make sure that the tmp_sum will not overflow
     629           0 :         tmp = W_extract_l( tmp_sum );
     630           0 :         suma = W_mac_32_32( suma, tmp, tmp );
     631             :     }
     632             : 
     633           0 :     return suma;
     634             : }
     635             : 
     636           0 : void v_mult_mat_fixed(
     637             :     Word32 *y,       /* o  : the product x*A               Qx - guardbits*/
     638             :     const Word32 *x, /* i  : vector x                      Qx*/
     639             :     const Word32 *A, /* i  : matrix A                      Q31*/
     640             :     const Word16 Nr, /* i  : number of rows                Q0*/
     641             :     const Word16 Nc, /* i  : number of columns             Q0*/
     642             :     Word16 guardbits )
     643             : {
     644             :     Word16 i, j;
     645             :     const Word32 *pt_x, *pt_A;
     646             :     Word32 tmp_y[MAX_V_MULT_MAT];
     647             :     Word32 *pt_y;
     648             : 
     649           0 :     pt_y = tmp_y;
     650           0 :     pt_A = A; /*Q31*/
     651             : 
     652           0 :     FOR( i = 0; i < Nc; i++ )
     653             :     {
     654           0 :         pt_x = x; /*Qx*/
     655           0 :         *pt_y = 0;
     656           0 :         move32();
     657           0 :         FOR( j = 0; j < Nr; j++ )
     658             :         {
     659           0 :             *pt_y = L_add( *pt_y, L_shr( Mpy_32_32( ( *pt_x++ ), ( *pt_A++ ) ), guardbits ) ); /*Qx - guardbits*/
     660           0 :             move32();
     661             :         }
     662           0 :         pt_y++;
     663             :     }
     664             : 
     665           0 :     MVR2R_WORD32( tmp_y, y, Nc ); /*Qx - guardbits*/
     666           0 : }
     667             : 
     668           0 : Word32 dot_product_cholesky_fx(
     669             :     const Word32 *x, /* i  : vector x                        Qx1*/
     670             :     const Word32 *A, /* i  : Cholesky  matrix A              Qx2*/
     671             :     const Word16 N   /* i  : vector & matrix size            Q0*/
     672             : )
     673             : {
     674             :     Word16 i, j;
     675             :     Word32 suma, tmp_sum;
     676             :     const Word32 *pt_x, *pt_A;
     677             : 
     678           0 :     pt_A = A;
     679           0 :     suma = 0;
     680           0 :     move32();
     681             : 
     682           0 :     FOR( i = 0; i < N; i++ )
     683             :     {
     684           0 :         tmp_sum = 0;
     685           0 :         move32();
     686           0 :         pt_x = x;
     687           0 :         FOR( j = 0; j <= i; j++ )
     688             :         {
     689           0 :             tmp_sum = L_add( tmp_sum, Mpy_32_32( *pt_x++, *pt_A++ ) ); /*Qx1 + Qx2 - 31*/
     690             :         }
     691             : 
     692           0 :         suma = L_add( suma, Mpy_32_32( tmp_sum, tmp_sum ) );
     693             :     }
     694             : 
     695           0 :     return suma;
     696             : }
     697             : 
     698             : 
     699             : /*---------------------------------------------------------------------*
     700             :  * v_mult_mat_fx()
     701             :  *
     702             :  * Multiplication of row vector x by matrix A, where x has size Nr and
     703             :  * A has size Nr x Nc ans it is stored column-wise in memory.
     704             :  * The resulting row vector y has size Nc
     705             :  *---------------------------------------------------------------------*/
     706             : 
     707           0 : void v_mult_mat_fx(
     708             :     Word32 *y_fx, /* o  : the product x*A               y_q_fx*/
     709             :     Word16 *y_q_fx,
     710             :     const Word32 *x_fx, /* i  : vector x                      x_q_fx*/
     711             :     Word16 *x_q_fx,
     712             :     const Word32 *A_fx, /* i  : matrix A                      A_q_fx*/
     713             :     Word16 *A_q_fx,
     714             :     const Word16 Nr, /* i  : number of rows                Q0*/
     715             :     const Word16 Nc  /* i  : number of columns             Q0*/
     716             : )
     717             : {
     718             :     Word16 i, j;
     719             :     const Word32 *pt_x_fx, *pt_A_fx;
     720             :     Word32 *pt_y_fx;
     721             :     Word32 tmp_y_fx[MAX_V_MULT_MAT];
     722             :     Word32 temp;
     723             :     Word16 temp_q;
     724             : 
     725           0 :     pt_y_fx = tmp_y_fx;
     726           0 :     pt_A_fx = A_fx; /*A_q_fx*/
     727           0 :     pt_x_fx = x_fx; /*x_q_fx*/
     728             : 
     729           0 :     FOR( i = 0; i < Nc; i++ )
     730             :     {
     731           0 :         pt_x_fx = x_fx; /*x_q_fx*/
     732           0 :         *pt_y_fx = 0;
     733           0 :         move32();
     734           0 :         y_q_fx[i] = 0;
     735           0 :         move32();
     736           0 :         FOR( j = 0; j < Nr; j++ )
     737             :         {
     738           0 :             temp = Mpy_32_32( *pt_x_fx++, *pt_A_fx++ ); /*x_q_fx + A_q_fx - 31*/
     739           0 :             temp_q = sub( add( x_q_fx[j], A_q_fx[Nr * i + j] ), 31 );
     740           0 :             IF( j == 0 )
     741             :             {
     742           0 :                 *pt_y_fx = temp;
     743           0 :                 move32();
     744           0 :                 y_q_fx[i] = temp_q;
     745           0 :                 move16();
     746             :             }
     747             :             ELSE
     748             :             {
     749           0 :                 IF( GT_16( y_q_fx[i], temp_q ) )
     750             :                 {
     751           0 :                     *pt_y_fx = L_add( L_shr( *pt_y_fx, sub( y_q_fx[i], temp_q ) ), temp );
     752           0 :                     move32();
     753           0 :                     y_q_fx[i] = temp_q;
     754           0 :                     move16();
     755             :                 }
     756             :                 ELSE
     757             :                 {
     758           0 :                     *pt_y_fx = L_add( *pt_y_fx, L_shr( temp, sub( temp_q, y_q_fx[i] ) ) );
     759           0 :                     move32();
     760             :                 }
     761             :             }
     762             :         }
     763           0 :         pt_y_fx++;
     764             :     }
     765             : 
     766           0 :     MVR2R_WORD32( tmp_y_fx, y_fx, Nc ); /*y_q_fx*/
     767             : 
     768           0 :     return;
     769             : }
     770             : 
     771             : /*---------------------------------------------------------------------*
     772             :  * logsumexp()
     773             :  *
     774             :  * Compute logarithm of the sum of exponentials of input vector in a numerically stable way
     775             :  *---------------------------------------------------------------------*/
     776             : 
     777             : /*! r: log(sum(exp(x)) of the input array x */
     778           0 : Word32 logsumexp_fx(
     779             :     const Word32 x[], /* i  : input array x                           Q31 - x_e*/
     780             :     const Word16 x_e,
     781             :     const Word16 N /* i  : number of elements in array x           Q0*/
     782             : )
     783             : {
     784             :     Word32 max_exp, temp32_sub;
     785             :     Word32 sum, temp32, pow_temp;
     786           0 :     Word32 log2_e_fx = 1549082005; // Q30 of log2(e);
     787           0 :     Word16 log2_e_fx_e = 1;
     788           0 :     move16();
     789           0 :     move16();
     790             :     Word16 i;
     791           0 :     Word16 pow_e, sum_e = 0;
     792           0 :     move16();
     793           0 :     max_exp = x[0]; /*Q31 - x_e*/
     794           0 :     move32();
     795           0 :     sum = 0;
     796           0 :     move32();
     797           0 :     FOR( i = 1; i < N; i++ )
     798             :     {
     799           0 :         IF( GT_32( x[i], max_exp ) )
     800             :         {
     801           0 :             max_exp = x[i]; /*Q31 - x_e*/
     802           0 :             move32();
     803             :         }
     804             :     }
     805             : 
     806           0 :     FOR( i = 0; i < N; i++ )
     807             :     {
     808           0 :         temp32_sub = L_sub( x[i], max_exp ); /*Q31 - x_e*/
     809           0 :         pow_e = 0;
     810           0 :         move16();
     811           0 :         temp32 = Mpy_32_32( log2_e_fx, temp32_sub );                           /*Q30 - x_e*/
     812           0 :         pow_temp = BASOP_util_Pow2( temp32, add( x_e, log2_e_fx_e ), &pow_e ); /*Q31 - pow_e*/
     813           0 :         sum = BASOP_Util_Add_Mant32Exp( sum, sum_e, pow_temp, pow_e, &sum_e ); /*Q31 - sum_e*/
     814             :     }
     815           0 :     temp32 = L_add( BASOP_Util_Log2( sum ), L_shl( sum_e, Q25 ) ); /*Q25*/
     816           0 :     temp32 = Mpy_32_32( temp32, 1488522239 );                      /*logf(x) = log2(x)*logf(2)   Q25*/
     817           0 :     temp32 = L_add( L_shr( temp32, sub( x_e, 6 ) ), max_exp );     // q = 31-x_e
     818           0 :     return temp32;                                                 /*31-x_e*/
     819             : }
     820             : 
     821             : /*---------------------------------------------------------------------*
     822             :  * lin_interp()
     823             :  *
     824             :  * Linearly maps x from source range <x1, x2> to the target range <y1, y2>
     825             :  *---------------------------------------------------------------------*/
     826             : /*! r: mapped output value */
     827           0 : Word32 lin_interp32_fx(
     828             :     const Word32 x,       /* i  : the value to be mapped                        Qx */
     829             :     const Word32 x1,      /* i  : source range interval: low end                Qx */
     830             :     const Word32 y1,      /* i  : source range interval: high end              Q31 */
     831             :     const Word32 x2,      /* i  : target range interval: low                    Qx */
     832             :     const Word32 y2,      /* i  : target range interval: high                  Q31 */
     833             :     const Word16 flag_sat /* i  : flag to indicate whether to apply saturation     */
     834             : )
     835             : {
     836             :     Word32 temp32;
     837             :     Word32 temp_div;
     838             :     Word16 exp_out;
     839             : 
     840           0 :     IF( L_sub( x2, x1 ) == 0 )
     841             :     {
     842           0 :         return y1;
     843             :     }
     844           0 :     ELSE IF( flag_sat )
     845             :     {
     846           0 :         IF( GE_32( x, L_max( x1, x2 ) ) )
     847             :         {
     848           0 :             return GT_32( x1, x2 ) ? y1 : y2;
     849             :         }
     850           0 :         ELSE IF( LE_32( x, L_min( x1, x2 ) ) )
     851             :         {
     852           0 :             return LT_32( x1, x2 ) ? y1 : y2;
     853             :         }
     854             :     }
     855             : 
     856           0 :     temp32 = Mpy_32_32( L_sub( x, x1 ), L_sub( y2, y1 ) ); /* Qx */
     857           0 :     temp_div = L_deposit_h( BASOP_Util_Divide3232_Scale( temp32, L_sub( x2, x1 ), &exp_out ) );
     858           0 :     temp32 = BASOP_Util_Add_Mant32Exp( y1, 0, temp_div, exp_out, &exp_out );
     859           0 :     temp32 = L_shl_sat( temp32, exp_out ); /* Q31 */
     860             : 
     861           0 :     return temp32;
     862             : }
     863             : 
     864             : /*-------------------------------------------------------------------*
     865             :  * check_bounds_s_fx()
     866             :  *
     867             :  * Ensure the input value is within the given limits
     868             :  *-------------------------------------------------------------------*/
     869             : 
     870             : /*! r: Adjusted value */
     871      839434 : Word16 check_bounds_s_fx(
     872             :     const Word16 value, /* i  : Input value                  Q0*/
     873             :     const Word16 low,   /* i  : Low limit                    Q0*/
     874             :     const Word16 high   /* i  : High limit                   Q0*/
     875             : )
     876             : {
     877             :     Word16 value_adj;
     878             : 
     879      839434 :     value_adj = s_min( s_max( value, low ), high ); /*Q0*/
     880             : 
     881      839434 :     return value_adj; /*Q0*/
     882             : }
     883             : 
     884             : /*-------------------------------------------------------------------*
     885             :  * check_bounds_l()
     886             :  *
     887             :  * Ensure the input value is within the given limits
     888             :  *-------------------------------------------------------------------*/
     889             : 
     890             : /*! r: Adjusted value */
     891      521900 : Word32 check_bounds_l(
     892             :     const Word32 value, /* i  : Input value                  Q0*/
     893             :     const Word32 low,   /* i  : Low limit                    Q0*/
     894             :     const Word32 high   /* i  : High limit                   Q0*/
     895             : )
     896             : {
     897             :     Word32 value_adj;
     898             : 
     899      521900 :     value_adj = L_min( L_max( value, low ), high ); /*Q0*/
     900             : 
     901      521900 :     return value_adj; /*Q0*/
     902             : }
     903             : 
     904             : /****************************************************************************/
     905             : /* matrix functions                                                         */
     906             : /* matrices are ordered column-wise in memory                               */
     907             : /****************************************************************************/
     908             : 
     909             : /*---------------------------------------------------------------------*
     910             :  * matrix product
     911             :  *
     912             :  * comput the matrix product of two matrices (Z=X*Y)
     913             :  *---------------------------------------------------------------------*/
     914             : 
     915     2008120 : Word16 matrix_product_mant_exp_fx(
     916             :     const Word32 *X_fx,   /* i  : left hand matrix                                                                       Q31 - X_fx_e*/
     917             :     const Word16 X_fx_e,  /* i  : left hand matrix                                                                       */
     918             :     const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 Q0*/
     919             :     const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              Q0*/
     920             :     const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  Q0*/
     921             :     const Word32 *Y_fx,   /* i  : right hand matrix                                                                      Q31 - Y_fx_e*/
     922             :     const Word16 Y_fx_e,  /* i  : right hand matrix                                                                      */
     923             :     const Word16 rowsY,   /* i  : number of rows of the right hand matrix                                                Q0*/
     924             :     const Word16 colsY,   /* i  : number of columns of the right hand matrix                                             Q0*/
     925             :     const Word16 transpY, /* i  : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/
     926             :     Word32 *Z_fx,         /* o  : resulting matrix after the matrix multiplication                                       Q31 - Z_fx_e*/
     927             :     Word16 *Z_fx_e        /* o  : resulting matrix after the matrix multiplication                                       */
     928             : )
     929             : {
     930             :     Word16 i, j, k;
     931     2008120 :     Word32 *Zp_fx = Z_fx;
     932             :     Word16 out_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     933     2008120 :     Word16 *Zp_fx_e = out_e;
     934             :     Word16 row, col;
     935             :     Word64 temp;
     936             :     Word16 temp_e;
     937     2008120 :     Word16 prod_e = add( X_fx_e, Y_fx_e );
     938             : 
     939     2008120 :     Word16 max_exp = -31;
     940     2008120 :     move16();
     941             : 
     942             :     /* Processing */
     943     2008120 :     test();
     944     2008120 :     test();
     945     2008120 :     test();
     946     2008120 :     IF( EQ_16( transpX, 1 ) && transpY == 0 ) /* We use X transpose */
     947             :     {
     948           0 :         IF( NE_16( rowsX, rowsY ) )
     949             :         {
     950           0 :             return EXIT_FAILURE;
     951             :         }
     952           0 :         FOR( j = 0; j < colsY; ++j )
     953             :         {
     954           0 :             FOR( i = 0; i < colsX; ++i )
     955             :             {
     956           0 :                 temp = 0;
     957           0 :                 move64();
     958             : 
     959           0 :                 FOR( k = 0; k < rowsX; ++k )
     960             :                 {
     961           0 :                     temp = W_mac_32_32( temp, X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ); // X_fx_e + Y_fx_e
     962             :                 }
     963             :                 /* Maximize accumulated value to 32-bit */
     964           0 :                 temp_e = W_norm( temp );
     965           0 :                 temp = W_shl( temp, temp_e );
     966           0 :                 if ( 0 == temp )
     967             :                 {
     968           0 :                     temp_e = prod_e;
     969           0 :                     move16();
     970             :                 }
     971           0 :                 *Zp_fx_e = sub( prod_e, temp_e );
     972           0 :                 move16();
     973           0 :                 ( *Zp_fx ) = W_extract_h( temp );
     974           0 :                 move32();
     975           0 :                 max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
     976           0 :                 Zp_fx++;
     977           0 :                 Zp_fx_e++;
     978             :             }
     979             :         }
     980           0 :         row = colsY; /*Q0*/
     981           0 :         move16();
     982           0 :         col = colsX; /*Q0*/
     983           0 :         move16();
     984             :     }
     985     2008120 :     ELSE IF( transpX == 0 && EQ_16( transpY, 1 ) ) /* We use Y transpose */
     986             :     {
     987      834380 :         IF( NE_16( colsX, colsY ) )
     988             :         {
     989           0 :             return EXIT_FAILURE;
     990             :         }
     991     5032904 :         FOR( j = 0; j < rowsY; ++j )
     992             :         {
     993    37245276 :             FOR( i = 0; i < rowsX; ++i )
     994             :             {
     995    33046752 :                 temp = 0;
     996    33046752 :                 move64();
     997   106886472 :                 FOR( k = 0; k < colsX; ++k )
     998             :                 {
     999    73839720 :                     temp = W_mac_32_32( temp, X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ); // X_fx_e + Y_fx_e
    1000             :                 }
    1001             :                 /* Maximize accumulated value to 32-bit */
    1002    33046752 :                 temp_e = W_norm( temp );
    1003    33046752 :                 temp = W_shl( temp, temp_e );
    1004    33046752 :                 if ( 0 == temp )
    1005             :                 {
    1006    15085835 :                     temp_e = prod_e;
    1007    15085835 :                     move16();
    1008             :                 }
    1009    33046752 :                 *Zp_fx_e = sub( prod_e, temp_e );
    1010    33046752 :                 move16();
    1011    33046752 :                 ( *Zp_fx ) = W_extract_h( temp );
    1012    33046752 :                 move32();
    1013    33046752 :                 max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
    1014    33046752 :                 Zp_fx++;
    1015    33046752 :                 Zp_fx_e++;
    1016             :             }
    1017             :         }
    1018      834380 :         row = rowsY; /*Q0*/
    1019      834380 :         move16();
    1020      834380 :         col = rowsX; /*Q0*/
    1021      834380 :         move16();
    1022             :     }
    1023     1173740 :     ELSE IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 1 ) ) /* We use both transpose */
    1024             :     {
    1025      113120 :         IF( NE_16( rowsX, colsY ) )
    1026             :         {
    1027           0 :             return EXIT_FAILURE;
    1028             :         }
    1029      688444 :         FOR( j = 0; j < rowsY; ++j )
    1030             :         {
    1031     1732172 :             FOR( i = 0; i < colsX; ++i )
    1032             :             {
    1033     1156848 :                 temp = 0;
    1034     1156848 :                 move64();
    1035     3489144 :                 FOR( k = 0; k < colsX; ++k )
    1036             :                 {
    1037     2332296 :                     temp = W_mac_32_32( temp, X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ); // X_fx_e + Y_fx_e
    1038             :                 }
    1039             :                 /* Maximize accumulated value to 32-bit */
    1040     1156848 :                 temp_e = W_norm( temp );
    1041     1156848 :                 temp = W_shl( temp, temp_e );
    1042     1156848 :                 if ( 0 == temp )
    1043             :                 {
    1044         400 :                     temp_e = prod_e;
    1045         400 :                     move16();
    1046             :                 }
    1047     1156848 :                 *Zp_fx_e = sub( prod_e, temp_e );
    1048     1156848 :                 move16();
    1049     1156848 :                 ( *Zp_fx ) = W_extract_h( temp );
    1050     1156848 :                 move32();
    1051     1156848 :                 max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
    1052     1156848 :                 Zp_fx++;
    1053     1156848 :                 Zp_fx_e++;
    1054             :             }
    1055             :         }
    1056      113120 :         row = rowsY; /*Q0*/
    1057      113120 :         move16();
    1058      113120 :         col = colsX; /*Q0*/
    1059      113120 :         move16();
    1060             :     }
    1061             :     ELSE /* Regular case */
    1062             :     {
    1063     1060620 :         IF( NE_16( colsX, rowsY ) )
    1064             :         {
    1065           0 :             return EXIT_FAILURE;
    1066             :         }
    1067             : 
    1068     3819908 :         FOR( j = 0; j < colsY; ++j )
    1069             :         {
    1070    15641696 :             FOR( i = 0; i < rowsX; ++i )
    1071             :             {
    1072    12882408 :                 temp = 0;
    1073    12882408 :                 move64();
    1074    62919424 :                 FOR( k = 0; k < colsX; ++k )
    1075             :                 {
    1076    50037016 :                     temp = W_mac_32_32( temp, X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ); // X_fx_e + Y_fx_e
    1077             :                 }
    1078             :                 /* Maximize accumulated value to 32-bit */
    1079    12882408 :                 temp_e = W_norm( temp );
    1080    12882408 :                 temp = W_shl( temp, temp_e );
    1081    12882408 :                 if ( 0 == temp )
    1082             :                 {
    1083     2422111 :                     temp_e = prod_e;
    1084             :                 }
    1085    12882408 :                 *Zp_fx_e = sub( prod_e, temp_e );
    1086    12882408 :                 move16();
    1087    12882408 :                 ( *Zp_fx ) = W_extract_h( temp );
    1088    12882408 :                 move32();
    1089    12882408 :                 max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
    1090    12882408 :                 Zp_fx++;
    1091    12882408 :                 Zp_fx_e++;
    1092             :             }
    1093             :         }
    1094     1060620 :         row = colsY; /*Q0*/
    1095     1060620 :         move16();
    1096     1060620 :         col = rowsX; /*Q0*/
    1097     1060620 :         move16();
    1098             :     }
    1099     2008120 :     Zp_fx = Z_fx; /*Q31 - Zp_fx_e*/
    1100             : 
    1101     2008120 :     Zp_fx_e = out_e;
    1102     2008120 :     move16();
    1103             : 
    1104             : 
    1105     2008120 :     *Z_fx_e = max_exp;
    1106     2008120 :     move16();
    1107     9541256 :     FOR( j = 0; j < row; ++j )
    1108             :     {
    1109    54619144 :         FOR( i = 0; i < col; ++i )
    1110             :         {
    1111    47086008 :             *Zp_fx = L_shr_r( *Zp_fx, sub( *Z_fx_e, *Zp_fx_e ) ); /*Q31 - Zp_fx_e*/
    1112    47086008 :             move32();
    1113    47086008 :             Zp_fx++;
    1114    47086008 :             Zp_fx_e++;
    1115             :         }
    1116             :     }
    1117             : 
    1118     2008120 :     return EXIT_SUCCESS;
    1119             : }
    1120             : 
    1121      340403 : Word16 matrix_product_fx(
    1122             :     const Word32 *X_fx,   /* i  : left hand matrix                                                                       Qx*/
    1123             :     const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 Q0*/
    1124             :     const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              Q0*/
    1125             :     const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  Q0*/
    1126             :     const Word32 *Y_fx,   /* i  : right hand matrix                                                                      Qy*/
    1127             :     const Word16 rowsY,   /* i  : number of rows of the right hand matrix                                                Q0*/
    1128             :     const Word16 colsY,   /* i  : number of columns of the right hand matrix                                             Q0*/
    1129             :     const Word16 transpY, /* i  : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/
    1130             :     Word32 *Z_fx          /* o  : resulting matrix after the matrix multiplication                                       Qx + Qy - 31*/
    1131             : )
    1132             : {
    1133             :     Word16 i, j, k;
    1134      340403 :     Word32 *Zp_fx = Z_fx;
    1135             : 
    1136             :     /* Processing */
    1137      340403 :     test();
    1138      340403 :     test();
    1139      340403 :     test();
    1140      340403 :     IF( EQ_16( transpX, 1 ) && transpY == 0 ) /* We use X transpose */
    1141             :     {
    1142        1040 :         IF( NE_16( rowsX, rowsY ) )
    1143             :         {
    1144           0 :             return EXIT_FAILURE;
    1145             :         }
    1146        5200 :         FOR( j = 0; j < colsY; ++j )
    1147             :         {
    1148       24960 :             FOR( i = 0; i < colsX; ++i )
    1149             :             {
    1150       20800 :                 ( *Zp_fx ) = 0;
    1151       20800 :                 move32();
    1152      124800 :                 FOR( k = 0; k < rowsX; ++k )
    1153             :                 {
    1154      104000 :                     ( *Zp_fx ) = Madd_32_32( *Zp_fx, X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ); /*Qx + Qy - 31*/
    1155      104000 :                     move32();
    1156             :                 }
    1157       20800 :                 Zp_fx++;
    1158             :             }
    1159             :         }
    1160             :     }
    1161      339363 :     ELSE IF( transpX == 0 && EQ_16( transpY, 1 ) ) /* We use Y transpose */
    1162             :     {
    1163      113120 :         IF( NE_16( colsX, colsY ) )
    1164             :         {
    1165           0 :             return EXIT_FAILURE;
    1166             :         }
    1167      804960 :         FOR( j = 0; j < rowsY; ++j )
    1168             :         {
    1169     4961280 :             FOR( i = 0; i < rowsX; ++i )
    1170             :             {
    1171     4269440 :                 ( *Zp_fx ) = 0;
    1172     4269440 :                 move32();
    1173    12888960 :                 FOR( k = 0; k < colsX; ++k )
    1174             :                 {
    1175     8619520 :                     ( *Zp_fx ) = Madd_32_32( *Zp_fx, X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ); /*Qx + Qy - 31*/
    1176     8619520 :                     move32();
    1177             :                 }
    1178     4269440 :                 Zp_fx++;
    1179             :             }
    1180             :         }
    1181             :     }
    1182      226243 :     ELSE IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 1 ) ) /* We use both transpose */
    1183             :     {
    1184           0 :         IF( NE_16( rowsX, colsY ) )
    1185             :         {
    1186           0 :             return EXIT_FAILURE;
    1187             :         }
    1188           0 :         FOR( j = 0; j < rowsY; ++j )
    1189             :         {
    1190           0 :             FOR( i = 0; i < colsX; ++i )
    1191             :             {
    1192           0 :                 ( *Zp_fx ) = 0;
    1193           0 :                 move32();
    1194           0 :                 FOR( k = 0; k < colsX; ++k )
    1195             :                 {
    1196           0 :                     ( *Zp_fx ) = Madd_32_32( *Zp_fx, X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ); /*Qx + Qy - 31*/
    1197           0 :                     move32();
    1198             :                 }
    1199             : 
    1200           0 :                 Zp_fx++;
    1201             :             }
    1202             :         }
    1203             :     }
    1204             :     ELSE /* Regular case */
    1205             :     {
    1206      226243 :         IF( NE_16( colsX, rowsY ) )
    1207             :         {
    1208           0 :             return EXIT_FAILURE;
    1209             :         }
    1210             : 
    1211      679851 :         FOR( j = 0; j < colsY; ++j )
    1212             :         {
    1213     3000880 :             FOR( i = 0; i < rowsX; ++i )
    1214             :             {
    1215     2547272 :                 ( *Zp_fx ) = 0;
    1216     2547272 :                 move32();
    1217     7680768 :                 FOR( k = 0; k < colsX; ++k )
    1218             :                 {
    1219     5133496 :                     ( *Zp_fx ) = L_add_sat( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); /*Qx + Qy - 31*/
    1220             :                     // TODO: overflow of Z_fx to be checked
    1221     5133496 :                     move32();
    1222             :                 }
    1223     2547272 :                 Zp_fx++;
    1224             :             }
    1225             :         }
    1226             :     }
    1227             : 
    1228      340403 :     return EXIT_SUCCESS;
    1229             : }
    1230             : 
    1231        1597 : Word16 matrix_product_q30_fx(
    1232             :     const Word32 *X_fx,   /* i  : left hand matrix                                                                       Q31*/
    1233             :     const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 Q0*/
    1234             :     const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              Q0*/
    1235             :     const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  Q0*/
    1236             :     const Word32 *Y_fx,   /* i  : right hand matrix                                                                      Q25*/
    1237             :     const Word16 rowsY,   /* i  : number of rows of the right hand matrix                                                Q0*/
    1238             :     const Word16 colsY,   /* i  : number of columns of the right hand matrix                                             Q0*/
    1239             :     const Word16 transpY, /* i  : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/
    1240             :     Word32 *Z_fx          /* o  : resulting matrix after the matrix multiplication                                       Q30*/
    1241             : )
    1242             : {
    1243             :     Word16 i, j, k;
    1244        1597 :     Word32 *Zp_fx = Z_fx;
    1245             :     Word64 W_tmp;
    1246             : 
    1247             :     /* Processing */
    1248        1597 :     test();
    1249        1597 :     test();
    1250        1597 :     test();
    1251        1597 :     IF( EQ_16( transpX, 1 ) && transpY == 0 ) /* We use X transpose */
    1252             :     {
    1253         557 :         IF( NE_16( rowsX, rowsY ) )
    1254             :         {
    1255           0 :             return EXIT_FAILURE;
    1256             :         }
    1257        1114 :         FOR( j = 0; j < colsY; ++j )
    1258             :         {
    1259        5863 :             FOR( i = 0; i < colsX; ++i )
    1260             :             {
    1261             :                 //( *Zp_fx ) = 0;
    1262        5306 :                 W_tmp = 0;
    1263        5306 :                 move64();
    1264       62248 :                 FOR( k = 0; k < rowsX; ++k )
    1265             :                 {
    1266       56942 :                     W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) ); // Q56
    1267             :                 }
    1268        5306 :                 W_tmp = W_shl( W_tmp, 6 );         /*Q62*/
    1269        5306 :                 ( *Zp_fx ) = W_round64_L( W_tmp ); /*Q30*/
    1270        5306 :                 move32();
    1271        5306 :                 Zp_fx++;
    1272             :             }
    1273             :         }
    1274             :     }
    1275        1040 :     ELSE IF( transpX == 0 && EQ_16( transpY, 1 ) ) /* We use Y transpose */
    1276             :     {
    1277           0 :         IF( NE_16( colsX, colsY ) )
    1278             :         {
    1279           0 :             return EXIT_FAILURE;
    1280             :         }
    1281           0 :         FOR( j = 0; j < rowsY; ++j )
    1282             :         {
    1283           0 :             FOR( i = 0; i < rowsX; ++i )
    1284             :             {
    1285             :                 //( *Zp_fx ) = 0;
    1286           0 :                 W_tmp = 0;
    1287           0 :                 move64();
    1288           0 :                 FOR( k = 0; k < colsX; ++k )
    1289             :                 {
    1290           0 :                     W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) ); // Q56
    1291             :                 }
    1292           0 :                 W_tmp = W_shl( W_tmp, 6 );         /*Q62*/
    1293           0 :                 ( *Zp_fx ) = W_round64_L( W_tmp ); /*Q30*/
    1294           0 :                 move32();
    1295           0 :                 Zp_fx++;
    1296             :             }
    1297             :         }
    1298             :     }
    1299        1040 :     ELSE IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 1 ) ) /* We use both transpose */
    1300             :     {
    1301           0 :         IF( NE_16( rowsX, colsY ) )
    1302             :         {
    1303           0 :             return EXIT_FAILURE;
    1304             :         }
    1305           0 :         FOR( j = 0; j < rowsY; ++j )
    1306             :         {
    1307           0 :             FOR( i = 0; i < colsX; ++i )
    1308             :             {
    1309             :                 //( *Zp_fx ) = 0;
    1310           0 :                 W_tmp = 0;
    1311           0 :                 move64();
    1312           0 :                 FOR( k = 0; k < colsX; ++k )
    1313             :                 {
    1314           0 :                     W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) ); // Q56
    1315             :                 }
    1316             : 
    1317           0 :                 W_tmp = W_shl( W_tmp, 6 );         /*Q62*/
    1318           0 :                 ( *Zp_fx ) = W_round64_L( W_tmp ); /*Q30*/
    1319           0 :                 move32();
    1320           0 :                 Zp_fx++;
    1321             :             }
    1322             :         }
    1323             :     }
    1324             :     ELSE /* Regular case */
    1325             :     {
    1326        1040 :         IF( NE_16( colsX, rowsY ) )
    1327             :         {
    1328           0 :             return EXIT_FAILURE;
    1329             :         }
    1330             : 
    1331        5200 :         FOR( j = 0; j < colsY; ++j )
    1332             :         {
    1333       24960 :             FOR( i = 0; i < rowsX; ++i )
    1334             :             {
    1335             :                 //( *Zp_fx ) = 0;
    1336       20800 :                 W_tmp = 0;
    1337       20800 :                 move64();
    1338      104000 :                 FOR( k = 0; k < colsX; ++k )
    1339             :                 {
    1340       83200 :                     W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); // Q56
    1341             :                 }
    1342       20800 :                 W_tmp = W_shl( W_tmp, 6 );         /*Q62*/
    1343       20800 :                 ( *Zp_fx ) = W_round64_L( W_tmp ); /*Q30*/
    1344       20800 :                 move32();
    1345       20800 :                 Zp_fx++;
    1346             :             }
    1347             :         }
    1348             :     }
    1349             : 
    1350        1597 :     return EXIT_SUCCESS;
    1351             : }
    1352             : /*takes input matrices in mantissa and exponent forms*/
    1353      343560 : Word16 matrix_product_mant_exp(
    1354             :     const Word32 *X_fx,   /* i  : left hand matrix                                                                       Q31 - X_e*/
    1355             :     const Word16 *X_e,    /* i  : left hand matrix                                                                       */
    1356             :     const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 Q0*/
    1357             :     const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              Q0*/
    1358             :     const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  Q0*/
    1359             :     const Word32 *Y_fx,   /* i  : right hand matrix                                                                      Q31 - Y_e*/
    1360             :     const Word16 *Y_e,    /* i  : right hand matrix                                                                      */
    1361             :     const Word16 rowsY,   /* i  : number of rows of the right hand matrix                                                Q0*/
    1362             :     const Word16 colsY,   /* i  : number of columns of the right hand matrix                                             Q0*/
    1363             :     const Word16 transpY, /* i  : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/
    1364             :     Word32 *Z_fx,         /* o  : resulting matrix after the matrix multiplication                                       Q31 - Z_e*/
    1365             :     Word16 *Z_e           /* o  : resulting matrix after the matrix multiplication                                       */
    1366             : )
    1367             : {
    1368             :     Word16 i, j, k;
    1369      343560 :     Word32 *Zp = Z_fx;
    1370      343560 :     Word16 *Zp_e = Z_e;
    1371             :     Word32 L_tmp;
    1372             :     Word16 tmp_e;
    1373             : 
    1374             :     /* Processing */
    1375      343560 :     test();
    1376      343560 :     test();
    1377      343560 :     test();
    1378      343560 :     IF( EQ_16( transpX, 1 ) && transpY == 0 ) /* We use X transpose */
    1379             :     {
    1380           0 :         IF( NE_16( rowsX, rowsY ) )
    1381             :         {
    1382           0 :             return EXIT_FAILURE;
    1383             :         }
    1384           0 :         FOR( j = 0; j < colsY; ++j )
    1385             :         {
    1386           0 :             FOR( i = 0; i < colsX; ++i )
    1387             :             {
    1388           0 :                 ( *Zp ) = 0;
    1389           0 :                 move32();
    1390           0 :                 ( *Zp_e ) = 0;
    1391           0 :                 move16();
    1392           0 :                 FOR( k = 0; k < rowsX; ++k )
    1393             :                 {
    1394           0 :                     L_tmp = Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ); /*Q31 - (X_e + Y_e)*/
    1395           0 :                     tmp_e = add( X_e[k + i * rowsX], Y_e[k + j * rowsY] );
    1396             : 
    1397           0 :                     ( *Zp ) = BASOP_Util_Add_Mant32Exp( *Zp, *Zp_e, L_tmp, tmp_e, &tmp_e );
    1398           0 :                     move32();
    1399           0 :                     ( *Zp_e ) = tmp_e;
    1400           0 :                     move16();
    1401             :                 }
    1402           0 :                 Zp++;
    1403           0 :                 Zp_e++;
    1404             :             }
    1405             :         }
    1406             :     }
    1407      343560 :     ELSE IF( transpX == 0 && EQ_16( transpY, 1 ) ) /* We use Y transpose */
    1408             :     {
    1409      115220 :         IF( NE_16( colsX, colsY ) )
    1410             :         {
    1411           0 :             return EXIT_FAILURE;
    1412             :         }
    1413      703144 :         FOR( j = 0; j < rowsY; ++j )
    1414             :         {
    1415     3622168 :             FOR( i = 0; i < rowsX; ++i )
    1416             :             {
    1417     3034244 :                 ( *Zp ) = 0;
    1418     3034244 :                 move32();
    1419     3034244 :                 ( *Zp_e ) = 0;
    1420     3034244 :                 move16();
    1421     9625012 :                 FOR( k = 0; k < colsX; ++k )
    1422             :                 {
    1423     6590768 :                     L_tmp = Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ); /*Q31 - (X_e + Y_e)*/
    1424     6590768 :                     tmp_e = add( X_e[i + k * rowsX], Y_e[j + k * rowsY] );
    1425             : 
    1426     6590768 :                     ( *Zp ) = BASOP_Util_Add_Mant32Exp( *Zp, *Zp_e, L_tmp, tmp_e, &tmp_e );
    1427     6590768 :                     ( *Zp_e ) = tmp_e;
    1428     6590768 :                     move16();
    1429             :                 }
    1430     3034244 :                 Zp++;
    1431     3034244 :                 Zp_e++;
    1432             :             }
    1433             :         }
    1434             :     }
    1435      228340 :     ELSE IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 1 ) ) /* We use both transpose */
    1436             :     {
    1437           0 :         IF( NE_16( rowsX, colsY ) )
    1438             :         {
    1439           0 :             return EXIT_FAILURE;
    1440             :         }
    1441           0 :         FOR( j = 0; j < rowsY; ++j )
    1442             :         {
    1443           0 :             FOR( i = 0; i < colsX; ++i )
    1444             :             {
    1445           0 :                 ( *Zp ) = 0;
    1446           0 :                 move32();
    1447           0 :                 ( *Zp_e ) = 0;
    1448           0 :                 move16();
    1449           0 :                 FOR( k = 0; k < colsX; ++k )
    1450             :                 {
    1451           0 :                     L_tmp = Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ); /*Q31 - (X_e + Y_e)*/
    1452           0 :                     tmp_e = add( X_e[k + i * rowsX], Y_e[j + k * rowsY] );
    1453             : 
    1454           0 :                     ( *Zp ) = BASOP_Util_Add_Mant32Exp( *Zp, *Zp_e, L_tmp, tmp_e, &tmp_e );
    1455           0 :                     move32();
    1456           0 :                     ( *Zp_e ) = tmp_e;
    1457           0 :                     move16();
    1458             :                 }
    1459             : 
    1460           0 :                 Zp++;
    1461           0 :                 Zp_e++;
    1462             :             }
    1463             :         }
    1464             :     }
    1465             :     ELSE /* Regular case */
    1466             :     {
    1467      228340 :         IF( NE_16( colsX, rowsY ) )
    1468             :         {
    1469           0 :             return EXIT_FAILURE;
    1470             :         }
    1471             : 
    1472      698740 :         FOR( j = 0; j < colsY; ++j )
    1473             :         {
    1474     2884896 :             FOR( i = 0; i < rowsX; ++i )
    1475             :             {
    1476     2414496 :                 ( *Zp ) = 0;
    1477     2414496 :                 move32();
    1478     2414496 :                 ( *Zp_e ) = 0;
    1479     2414496 :                 move16();
    1480     7885488 :                 FOR( k = 0; k < colsX; ++k )
    1481             :                 {
    1482     5470992 :                     L_tmp = Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ); /*Q31 - (X_e + Y_e)*/
    1483     5470992 :                     tmp_e = add( X_e[i + k * rowsX], Y_e[k + j * rowsY] );
    1484             : 
    1485     5470992 :                     ( *Zp ) = BASOP_Util_Add_Mant32Exp( *Zp, *Zp_e, L_tmp, tmp_e, &tmp_e );
    1486     5470992 :                     move32();
    1487     5470992 :                     ( *Zp_e ) = tmp_e;
    1488     5470992 :                     move16();
    1489             :                 }
    1490     2414496 :                 Zp++;
    1491     2414496 :                 Zp_e++;
    1492             :             }
    1493             :         }
    1494             :     }
    1495             : 
    1496      343560 :     return EXIT_SUCCESS;
    1497             : }
    1498             : 
    1499             : 
    1500     1575900 : Word16 matrix_diag_product_fx(
    1501             :     const Word32 *X, /* i  : left hand matrix                                                                       Q31 - X_e*/
    1502             :     Word16 X_e,
    1503             :     const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 Q0*/
    1504             :     const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              Q0*/
    1505             :     const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  Q0*/
    1506             :     const Word32 *Y,      /* i  : right hand diagonal matrix as vector containing the diagonal elements                  Q31 - Y_e*/
    1507             :     Word16 Y_e,
    1508             :     const Word16 entriesY, /* i  : number of entries in the diagonal                                                      Q0*/
    1509             :     Word32 *Z,             /* o  : resulting matrix after the matrix multiplication                                       Q31 - Z_e*/
    1510             :     Word16 *Z_e )
    1511             : {
    1512             :     Word16 i, j;
    1513     1575900 :     Word32 *Zp = Z;
    1514             : 
    1515             :     /* Processing */
    1516     1575900 :     IF( EQ_16( transpX, 1 ) ) /* We use X transpose */
    1517             :     {
    1518           0 :         IF( NE_16( rowsX, entriesY ) )
    1519             :         {
    1520           0 :             return EXIT_FAILURE;
    1521             :         }
    1522           0 :         FOR( j = 0; j < entriesY; ++j )
    1523             :         {
    1524           0 :             FOR( i = 0; i < colsX; ++i )
    1525             :             {
    1526           0 :                 *( Zp ) = Mpy_32_32( X[j + i * rowsX], Y[j] ); /*Q31 - (X_e + Y_e)*/
    1527           0 :                 move32();
    1528           0 :                 Zp++;
    1529             :             }
    1530             :         }
    1531             :     }
    1532             :     ELSE /* Regular case */
    1533             :     {
    1534     1575900 :         IF( NE_16( colsX, entriesY ) )
    1535             :         {
    1536           0 :             return EXIT_FAILURE;
    1537             :         }
    1538             : 
    1539     6983460 :         FOR( j = 0; j < entriesY; ++j )
    1540             :         {
    1541    34250760 :             FOR( i = 0; i < rowsX; ++i )
    1542             :             {
    1543    28843200 :                 *( Zp ) = Mpy_32_32( *( X ), Y[j] ); /*Q31 - (X_e + Y_e)*/
    1544    28843200 :                 move32();
    1545    28843200 :                 Zp++;
    1546    28843200 :                 X++;
    1547             :             }
    1548             :         }
    1549             :     }
    1550             : 
    1551     1575900 :     *Z_e = add( X_e, Y_e );
    1552     1575900 :     move16();
    1553             : 
    1554     1575900 :     return EXIT_SUCCESS;
    1555             : }
    1556             : 
    1557      113120 : Word16 matrix_diag_product_fx_2(
    1558             :     const Word32 *X, /* i  : left hand matrix                                                                       Q31 - X_e*/
    1559             :     const Word16 X_e,
    1560             :     const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 Q0*/
    1561             :     const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              Q0*/
    1562             :     const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  Q0*/
    1563             :     const Word32 *Y,      /* i  : right hand diagonal matrix as vector containing the diagonal elements                  Q31 - Y_e*/
    1564             :     const Word16 *Y_e,
    1565             :     const Word16 entriesY, /* i  : number of entries in the diagonal                                                      Q0*/
    1566             :     Word32 *Z,             /* o  : resulting matrix after the matrix multiplication                                       Q31 - Z_e*/
    1567             :     Word16 *Z_e )
    1568             : {
    1569             :     Word16 i, j;
    1570      113120 :     Word32 *Zp = Z;
    1571      113120 :     Word16 *Z_ep = Z_e;
    1572             :     Word16 tmp;
    1573      113120 :     Word16 max_exp = -31;
    1574      113120 :     move16();
    1575             : 
    1576             :     /* Processing */
    1577      113120 :     IF( EQ_16( transpX, 1 ) ) /* We use X transpose */
    1578             :     {
    1579           0 :         IF( NE_16( rowsX, entriesY ) )
    1580             :         {
    1581           0 :             return EXIT_FAILURE;
    1582             :         }
    1583           0 :         FOR( j = 0; j < entriesY; ++j )
    1584             :         {
    1585           0 :             FOR( i = 0; i < colsX; ++i )
    1586             :             {
    1587           0 :                 tmp = j + i * rowsX;                 /*Q0*/
    1588           0 :                 *( Zp ) = Mpy_32_32( X[tmp], Y[j] ); /*Q31 - (X_e + Y_e)*/
    1589           0 :                 move32();
    1590           0 :                 Zp++;
    1591           0 :                 *( Z_ep ) = add( X_e, Y_e[j] );
    1592           0 :                 move16();
    1593           0 :                 max_exp = s_max( max_exp, *Z_ep ); // Find the max exp
    1594           0 :                 Z_ep++;
    1595             :             }
    1596             :         }
    1597             : 
    1598           0 :         Zp = Z;
    1599           0 :         Z_ep = Z_e;
    1600           0 :         FOR( j = 0; j < entriesY; ++j )
    1601             :         {
    1602           0 :             FOR( i = 0; i < colsX; ++i )
    1603             :             {
    1604           0 :                 *Zp = L_shr( *Zp, sub( max_exp, *Z_ep ) );
    1605           0 :                 *Z_ep = max_exp;
    1606           0 :                 Zp++;
    1607           0 :                 Z_ep++;
    1608             :             }
    1609             :         }
    1610             :     }
    1611             :     ELSE /* Regular case */
    1612             :     {
    1613      113120 :         IF( NE_16( colsX, entriesY ) )
    1614             :         {
    1615           0 :             return EXIT_FAILURE;
    1616             :         }
    1617             : 
    1618      688444 :         FOR( j = 0; j < entriesY; ++j )
    1619             :         {
    1620     1732172 :             FOR( i = 0; i < rowsX; ++i )
    1621             :             {
    1622     1156848 :                 *( Zp ) = Mpy_32_32( *( X ), Y[j] ); /*Q31 - (X_e + Y_e)*/
    1623     1156848 :                 move32();
    1624     1156848 :                 Zp++;
    1625     1156848 :                 *( Z_ep ) = add( X_e, Y_e[j] );
    1626     1156848 :                 move16();
    1627     1156848 :                 max_exp = s_max( max_exp, *Z_ep ); // Find the max exp
    1628     1156848 :                 Z_ep++;
    1629     1156848 :                 X++;
    1630             :             }
    1631             :         }
    1632      113120 :         Zp = Z;
    1633      113120 :         Z_ep = Z_e;
    1634      688444 :         FOR( j = 0; j < entriesY; ++j )
    1635             :         {
    1636     1732172 :             FOR( i = 0; i < rowsX; ++i )
    1637             :             {
    1638     1156848 :                 *Zp = L_shr( *Zp, sub( max_exp, *Z_ep ) );
    1639     1156848 :                 *Z_ep = max_exp;
    1640     1156848 :                 Zp++;
    1641     1156848 :                 Z_ep++;
    1642             :             }
    1643             :         }
    1644             :     }
    1645             : 
    1646      113120 :     return EXIT_SUCCESS;
    1647             : }
    1648             : 
    1649       90900 : Word16 matrix_diag_product_fx_1(
    1650             :     const Word32 *X, /* i  : left hand matrix                                                                       Q31 - X_e*/
    1651             :     const Word16 *X_e,
    1652             :     const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 Q0*/
    1653             :     const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              Q0*/
    1654             :     const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  Q0*/
    1655             :     const Word32 *Y,      /* i  : right hand diagonal matrix as vector containing the diagonal elements                  Q31 - Y_e*/
    1656             :     const Word16 *Y_e,
    1657             :     const Word16 entriesY, /* i  : number of entries in the diagonal                                                      Q0*/
    1658             :     Word32 *Z,             /* o  : resulting matrix after the matrix multiplication                                       Q31 - Z_e*/
    1659             :     Word16 *Z_e )
    1660             : {
    1661             :     Word16 i, j;
    1662       90900 :     Word32 *Zp = Z;
    1663       90900 :     Word16 *Z_ep = Z_e;
    1664             : 
    1665             :     /* Processing */
    1666       90900 :     IF( EQ_16( transpX, 1 ) ) /* We use X transpose */
    1667             :     {
    1668           0 :         IF( NE_16( rowsX, entriesY ) )
    1669             :         {
    1670           0 :             return EXIT_FAILURE;
    1671             :         }
    1672           0 :         FOR( j = 0; j < entriesY; ++j )
    1673             :         {
    1674           0 :             FOR( i = 0; i < colsX; ++i )
    1675             :             {
    1676           0 :                 *( Zp ) = Mpy_32_32( X[j + i * rowsX], Y[j] ); /*Q31 - (X_e + Y_e)*/
    1677           0 :                 move32();
    1678           0 :                 Zp++;
    1679           0 :                 *( Z_ep ) = add( X_e[j + i * rowsX], Y_e[j] );
    1680           0 :                 move16();
    1681           0 :                 Z_ep++;
    1682             :             }
    1683             :         }
    1684             :     }
    1685             :     ELSE /* Regular case */
    1686             :     {
    1687       90900 :         IF( NE_16( colsX, entriesY ) )
    1688             :         {
    1689           0 :             return EXIT_FAILURE;
    1690             :         }
    1691             : 
    1692      553344 :         FOR( j = 0; j < entriesY; ++j )
    1693             :         {
    1694     2841348 :             FOR( i = 0; i < rowsX; ++i )
    1695             :             {
    1696     2378904 :                 *( Zp ) = Mpy_32_32( *( X ), Y[j] ); /*Q31 - (X_e + Y_e)*/
    1697     2378904 :                 move32();
    1698     2378904 :                 Zp++;
    1699     2378904 :                 *( Z_ep ) = add( *( X_e ), Y_e[j] );
    1700     2378904 :                 move16();
    1701     2378904 :                 Z_ep++;
    1702     2378904 :                 X++;
    1703     2378904 :                 X_e++;
    1704             :             }
    1705             :         }
    1706             :     }
    1707             : 
    1708       90900 :     return EXIT_SUCCESS;
    1709             : }
    1710             : 
    1711      743480 : Word16 diag_matrix_product_fx(
    1712             :     const Word32 *Y, /* i  : left hand diagonal matrix as vector containing the diagonal elements                   Q31 - Y_e*/
    1713             :     Word16 Y_e,
    1714             :     const Word16 entriesY, /* i  : length of the diagonal of the left hand matrix                                         Q0*/
    1715             :     const Word32 *X,       /* i  : right hand matrix                                                                      Q31 - X_e*/
    1716             :     Word16 X_e,
    1717             :     const Word16 rowsX,   /* i  : number of rows of the right hand matrix                                                Q0*/
    1718             :     const Word16 colsX,   /* i  : number of columns of the right hand matrix                                             Q0*/
    1719             :     const Word16 transpX, /* i  : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/
    1720             :     Word32 *Z,            /* o  : resulting matrix after the matrix multiplication                                       Q31 - Z_e*/
    1721             :     Word16 *Z_e )
    1722             : {
    1723             :     Word16 i, j;
    1724      743480 :     Word32 *Zp = Z;
    1725             : 
    1726             :     /* Processing */
    1727      743480 :     IF( EQ_16( transpX, 1 ) ) /* We use X transpose */
    1728             :     {
    1729      315180 :         IF( NE_16( colsX, entriesY ) )
    1730             :         {
    1731           0 :             return EXIT_FAILURE;
    1732             :         }
    1733     3194100 :         FOR( i = 0; i < rowsX; ++i )
    1734             :         {
    1735     8636760 :             FOR( j = 0; j < entriesY; ++j )
    1736             :             {
    1737     5757840 :                 *( Zp ) = Mpy_32_32( X[i + j * rowsX], Y[j] ); /*Q31 - (X_e + Y_e)*/
    1738     5757840 :                 move32();
    1739     5757840 :                 Zp++;
    1740             :             }
    1741             :         }
    1742             :     }
    1743             :     ELSE /* Regular case */
    1744             :     {
    1745      428300 :         IF( NE_16( rowsX, entriesY ) )
    1746             :         {
    1747           0 :             return EXIT_FAILURE;
    1748             :         }
    1749     1565664 :         FOR( i = 0; i < colsX; ++i )
    1750             :         {
    1751     9501188 :             FOR( j = 0; j < entriesY; ++j )
    1752             :             {
    1753     8363824 :                 *( Zp ) = Mpy_32_32( *( X ), Y[j] ); /*Q31 - (X_e + Y_e)*/
    1754     8363824 :                 move32();
    1755     8363824 :                 Zp++;
    1756     8363824 :                 X++;
    1757             :             }
    1758             :         }
    1759             :     }
    1760             : 
    1761      743480 :     *Z_e = add( Y_e, X_e );
    1762      743480 :     move16();
    1763             : 
    1764      743480 :     return EXIT_SUCCESS;
    1765             : }
    1766             : 
    1767      749206 : Word16 matrix_product_diag_fx(
    1768             :     const Word32 *X, /* i  : left hand matrix                                                                       Q31 - X_e*/
    1769             :     Word16 X_e,
    1770             :     const Word16 rowsX,   /* i  : number of rows of the left hand matrix                                                 Q0*/
    1771             :     const Word16 colsX,   /* i  : number of columns of the left hand matrix                                              Q0*/
    1772             :     const Word16 transpX, /* i  : flag indicating the transposition of the left hand matrix prior to the multiplication  Q0*/
    1773             :     const Word32 *Y,      /* i  : right hand matrix                                                                      Q31 - Y_e*/
    1774             :     Word16 Y_e,
    1775             :     const Word16 rowsY,   /* i  : number of rows of the right hand matrix                                                Q0*/
    1776             :     const Word16 colsY,   /* i  : number of columns of the right hand matrix                                             Q0*/
    1777             :     const Word16 transpY, /* i  : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/
    1778             :     Word32 *Z,            /* o  : resulting matrix after the matrix multiplication                                       Q31 - Z_e*/
    1779             :     Word16 *Z_e )
    1780             : {
    1781             :     Word16 j, k;
    1782      749206 :     Word32 *Zp = Z;
    1783             : 
    1784             :     /* Processing */
    1785      749206 :     test();
    1786      749206 :     test();
    1787      749206 :     test();
    1788      749206 :     IF( EQ_16( transpX, 1 ) && transpY == 0 ) /* We use X transpose */
    1789             :     {
    1790           0 :         IF( NE_16( rowsX, rowsY ) )
    1791             :         {
    1792           0 :             return EXIT_FAILURE;
    1793             :         }
    1794             : 
    1795           0 :         FOR( j = 0; j < colsY; ++j )
    1796             :         {
    1797           0 :             ( *Zp ) = 0;
    1798           0 :             move32();
    1799           0 :             FOR( k = 0; k < rowsX; ++k )
    1800             :             {
    1801           0 :                 ( *Zp ) = Madd_32_32( ( *Zp ), X[k + j * rowsX], Y[k + j * rowsY] ); /*Q31 - (X_e + Y_e)*/
    1802           0 :                 move32();
    1803             :             }
    1804           0 :             Zp++;
    1805             :         }
    1806             :     }
    1807      749206 :     ELSE IF( transpX == 0 && EQ_16( transpY, 1 ) ) /* We use Y transpose */
    1808             :     {
    1809      632320 :         IF( NE_16( colsX, colsY ) )
    1810             :         {
    1811           0 :             return EXIT_FAILURE;
    1812             :         }
    1813     5124332 :         FOR( j = 0; j < rowsY; ++j )
    1814             :         {
    1815     4492012 :             ( *Zp ) = 0;
    1816     4492012 :             move32();
    1817    14942452 :             FOR( k = 0; k < colsX; ++k )
    1818             :             {
    1819    10450440 :                 ( *Zp ) = Madd_32_32( ( *Zp ), X[j + k * rowsX], Y[j + k * rowsY] ); /*Q31 - (X_e + Y_e)*/
    1820    10450440 :                 move32();
    1821             :             }
    1822     4492012 :             Zp++;
    1823             :         }
    1824             :     }
    1825      116886 :     ELSE IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 1 ) ) /* We use both transpose */
    1826             :     {
    1827           0 :         IF( NE_16( rowsX, colsY ) )
    1828             :         {
    1829           0 :             return EXIT_FAILURE;
    1830             :         }
    1831             : 
    1832           0 :         FOR( j = 0; j < rowsY; ++j )
    1833             :         {
    1834             : 
    1835           0 :             ( *Zp ) = 0;
    1836           0 :             move32();
    1837           0 :             FOR( k = 0; k < colsX; ++k )
    1838             :             {
    1839           0 :                 ( *Zp ) = Madd_32_32( ( *Zp ), X[k + j * rowsX], Y[j + k * rowsY] ); /*Q31 - (X_e + Y_e)*/
    1840           0 :                 move32();
    1841             :             }
    1842             : 
    1843           0 :             Zp++;
    1844             :         }
    1845             :     }
    1846             :     ELSE /* Regular case */
    1847             :     {
    1848      116886 :         IF( NE_16( colsX, rowsY ) )
    1849             :         {
    1850           0 :             return EXIT_FAILURE;
    1851             :         }
    1852             : 
    1853     1182390 :         FOR( j = 0; j < colsY; ++j )
    1854             :         {
    1855     1065504 :             ( *Zp ) = 0;
    1856     1065504 :             move32();
    1857     2131008 :             FOR( k = 0; k < colsX; ++k )
    1858             :             {
    1859     1065504 :                 ( *Zp ) = Madd_32_32( ( *Zp ), X[j + k * rowsX], Y[k + j * rowsY] ); /*Q31 - (X_e + Y_e)*/
    1860     1065504 :                 move32();
    1861             :             }
    1862     1065504 :             Zp++;
    1863             :         }
    1864             :     }
    1865             : 
    1866      749206 :     *Z_e = add( X_e, Y_e );
    1867      749206 :     move16();
    1868             : 
    1869      749206 :     return EXIT_SUCCESS;
    1870             : }
    1871             : 
    1872             : /*---------------------------------------------------------------------*
    1873             :  * matrix_product_diag()
    1874             :  *
    1875             :  * compute only the main diagonal of X*Y (Z=diag(X*Y))
    1876             :  *---------------------------------------------------------------------*/
    1877             : 
    1878     1776710 : void cmplx_matrix_square_fx(
    1879             :     const Word32 *realX, /* i  : real part of the matrix                                                     Q31 - input_exp*/
    1880             :     const Word32 *imagX, /* i  : imaginary part of the matrix                                                Q31 - input_exp*/
    1881             :     const Word16 mRows,  /* i  : number of rows of the matrix                                                Q0*/
    1882             :     const Word16 nCols,  /* i  : number of columns of the matrix                                             Q0*/
    1883             :     Word32 *realZ,       /* o  : real part of the resulting matrix                                           Q31 - output_exp*/
    1884             :     Word32 *imagZ,       /* o  : imaginary part of the resulting matrix                                      Q31 - output_exp*/
    1885             :     Word16 input_exp,
    1886             :     Word16 *output_exp )
    1887             : {
    1888             :     Word16 i, j, k;
    1889             :     Word32 *realZp, *imagZp;
    1890             :     const Word32 *p_real1, *p_real2, *p_imag1, *p_imag2;
    1891             :     Word16 tmp1, tmp2;
    1892             : 
    1893             :     /* resulting matrix is hermitean, we only need to calc the upper triangle */
    1894             :     /* we assume transposition needed */
    1895             : 
    1896             :     /* column*column = column/column */
    1897     5338398 :     FOR( i = 0; i < nCols; i++ )
    1898             :     {
    1899     8916622 :         FOR( j = i; j < nCols; j++ )
    1900             :         {
    1901     5354934 :             p_real1 = realX + imult1616( i, mRows );          /*Q31 - input_exp*/
    1902     5354934 :             p_imag1 = imagX + imult1616( i, mRows );          /*Q31 - input_exp*/
    1903     5354934 :             p_real2 = realX + imult1616( j, mRows );          /*Q31 - input_exp*/
    1904     5354934 :             p_imag2 = imagX + imult1616( j, mRows );          /*Q31 - input_exp*/
    1905     5354934 :             realZp = realZ + add( i, imult1616( nCols, j ) ); /*Q31 - output_exp*/
    1906     5354934 :             imagZp = imagZ + add( i, imult1616( nCols, j ) ); /*Q31 - output_exp*/
    1907     5354934 :             *( realZp ) = 0;
    1908     5354934 :             move32();
    1909     5354934 :             *( imagZp ) = 0;
    1910     5354934 :             move32();
    1911             : 
    1912    28343694 :             FOR( k = 0; k < mRows; k++ )
    1913             :             {
    1914    22988760 :                 *( imagZp ) = L_add( *( imagZp ), L_sub( Mpy_32_32( *( p_real1 ), *( p_imag2 ) ), Mpy_32_32( *( p_real2 ), *( p_imag1 ) ) ) ); /* Q31 - 2*input_exp */
    1915    22988760 :                 move32();
    1916    22988760 :                 *( realZp ) = L_add( *( realZp ), L_add( Mpy_32_32( *( p_real1 ), *( p_real2 ) ), Mpy_32_32( *( p_imag1 ), *( p_imag2 ) ) ) ); /* Q31 - 2*input_exp */
    1917    22988760 :                 move32();
    1918    22988760 :                 p_real1++;
    1919    22988760 :                 p_real2++;
    1920    22988760 :                 p_imag1++;
    1921    22988760 :                 p_imag2++;
    1922             :             }
    1923             :         }
    1924             :     }
    1925             : 
    1926             :     /* fill lower triangle */
    1927     3561688 :     FOR( i = 1; i < nCols; i++ )
    1928             :     {
    1929     3578224 :         FOR( j = 0; j < i; j++ )
    1930             :         {
    1931     1793246 :             tmp1 = add( i, imult1616( nCols, j ) ); /*Q0*/
    1932     1793246 :             tmp2 = add( j, imult1616( nCols, i ) ); /*Q0*/
    1933     1793246 :             realZ[tmp1] = realZ[tmp2];              /*Q31 - output_exp*/
    1934     1793246 :             move32();
    1935     1793246 :             imagZ[tmp1] = imagZ[tmp2]; /*Q31 - output_exp*/
    1936     1793246 :             move32();
    1937             :         }
    1938             :     }
    1939             : 
    1940     1776710 :     *output_exp = shl( input_exp, 1 );
    1941     1776710 :     move16();
    1942             : 
    1943     1776710 :     return;
    1944             : }
    1945             : 
    1946             : 
    1947      739020 : void v_multc_acc_32_16(
    1948             :     const Word32 x[], /* i  : Input vector                                     Qx*/
    1949             :     const Word16 c,   /* i  : Constant                                         Q31*/
    1950             :     Word32 y[],       /* o  : Output vector that contains y + c*x              Qx*/
    1951             :     const Word16 N    /* i  : Vector length                                    Q0*/
    1952             : )
    1953             : {
    1954             :     Word16 i;
    1955             : 
    1956    39585340 :     FOR( i = 0; i < N; i++ )
    1957             :     {
    1958    38846320 :         y[i] = L_add( y[i], Mpy_32_16_1( x[i], c ) );
    1959    38846320 :         move32();
    1960             :     }
    1961             : 
    1962      739020 :     return;
    1963             : }
    1964      240000 : void v_multc_acc_32_32(
    1965             :     const Word32 x[], /* i  : Input vector                                     Qx*/
    1966             :     const Word32 c,   /* i  : Constant                                         Q31*/
    1967             :     Word32 y[],       /* o  : Output vector that contains y + c*x              Qx*/
    1968             :     const Word16 N    /* i  : Vector length                                    Q0*/
    1969             : )
    1970             : {
    1971             :     Word16 i;
    1972             : 
    1973    14640000 :     FOR( i = 0; i < N; i++ )
    1974             :     {
    1975    14400000 :         y[i] = L_add( y[i], Mpy_32_32( x[i], c ) ); /*Qx*/
    1976    14400000 :         move32();
    1977             :     }
    1978             : 
    1979      240000 :     return;
    1980             : }
    1981             : 
    1982             : /*---------------------------------------------------------------------*
    1983             :  * lls_interp_n()
    1984             :  *
    1985             :  * Linear least squares interpolation of an input vector
    1986             :  * The function calculates the slope 'a' and the offset 'b' of a LLS estimate for an input vector x such that
    1987             :  * y_i = a*i + b where i=1,..,N and (a,b) = min(a,b) [sum_N[(y_i - x_i)^2]]
    1988             :  * the interpolated vector is return as x[], if requested
    1989             :  *---------------------------------------------------------------------*/
    1990             : 
    1991        4106 : void lls_interp_n_fx(
    1992             :     Word16 x_fx[],   /* i/o: input/output vector                               Q15*/
    1993             :     const Word16 N,  /* i  : length of the input vector                        Q0*/
    1994             :     Word16 *a_fx,    /* o  : calculated slope                                  Q15*/
    1995             :     Word16 *b_fx,    /* o  : calculated offset                                 Q15*/
    1996             :     const Word16 upd /* i  : use 1 to update x[] with the interpolated output  Q0*/
    1997             : )
    1998             : {
    1999             :     Word16 i;
    2000        4106 :     const Word16 n_i_fx[11] = { 0, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, 20480 }; // Q11
    2001        4106 :     move16();
    2002        4106 :     move16();
    2003        4106 :     move16();
    2004        4106 :     move16();
    2005        4106 :     move16();
    2006        4106 :     move16();
    2007        4106 :     move16();
    2008        4106 :     move16();
    2009        4106 :     move16();
    2010        4106 :     move16();
    2011        4106 :     move16();
    2012             : 
    2013        4106 :     const Word16 one_by_n_fx[11] = { 0, 32767, 16384, 10911, 8192, 6553, 5459, 4681, 4096, 3640, 3276 }; /*Q15*/
    2014        4106 :     move16();
    2015        4106 :     move16();
    2016        4106 :     move16();
    2017        4106 :     move16();
    2018        4106 :     move16();
    2019        4106 :     move16();
    2020        4106 :     move16();
    2021        4106 :     move16();
    2022        4106 :     move16();
    2023        4106 :     move16();
    2024        4106 :     move16();
    2025             : 
    2026        4106 :     const Word16 sum_i_fx[12] = { 0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55 }; /*Q0*/
    2027        4106 :     move16();
    2028        4106 :     move16();
    2029        4106 :     move16();
    2030        4106 :     move16();
    2031        4106 :     move16();
    2032        4106 :     move16();
    2033        4106 :     move16();
    2034        4106 :     move16();
    2035        4106 :     move16();
    2036        4106 :     move16();
    2037        4106 :     move16();
    2038        4106 :     move16();
    2039             : 
    2040             :     // 1.0f/ ( N * sum_ii[N] - sum_i[N] * sum_i[N] )
    2041        4106 :     const Word32 res_table[12] = { 0, 0, 0, 357913952, 107374184, 42949672, 20452226, 10956549, 6391320, 3976821, 2603010, 385 }; /*Q31*/
    2042        4106 :     move16();
    2043        4106 :     move16();
    2044        4106 :     move16();
    2045        4106 :     move16();
    2046        4106 :     move16();
    2047        4106 :     move16();
    2048        4106 :     move16();
    2049        4106 :     move16();
    2050        4106 :     move16();
    2051        4106 :     move16();
    2052        4106 :     move16();
    2053        4106 :     move16();
    2054             : 
    2055             :     Word32 sum_x_fx, sum_ix_fx, slope_fx, offset_fx;
    2056        4106 :     Word16 dot_exp = 0, sum_ix_q = 0;
    2057        4106 :     move16();
    2058        4106 :     move16();
    2059             : 
    2060             :     Word32 num;
    2061        4106 :     assert( N > 0 && LE_16( N, 10 ) );
    2062             : 
    2063        4106 :     sum_x_fx = 0;
    2064        4106 :     move32();
    2065             :     Word16 idx;
    2066       20530 :     FOR( idx = 0; idx < N; idx++ )
    2067             :     {
    2068       16424 :         sum_x_fx = L_add( sum_x_fx, x_fx[idx] ); /*Q15*/
    2069             :     }
    2070        4106 :     sum_ix_fx = dotp_fx( x_fx, n_i_fx, N, &dot_exp ); /*sum_ix_q*/
    2071        4106 :     sum_ix_q = sub( 30, sub( dot_exp, ( 11 + 15 ) ) );
    2072             : 
    2073        4106 :     sum_ix_fx = L_shr( sum_ix_fx, sub( sum_ix_q, 15 ) );                                              /*Q15*/
    2074        4106 :     num = L_sub( imult3216( sum_ix_fx, N ), imult3216( sum_x_fx, sum_i_fx[N] ) );                     /*Q15*/
    2075        4106 :     slope_fx = Mpy_32_32( num, res_table[N] );                                                        /*Q15*/
    2076        4106 :     offset_fx = Mpy_32_16_1( L_sub( sum_x_fx, imult3216( slope_fx, sum_i_fx[N] ) ), one_by_n_fx[N] ); /*Q15*/
    2077             : 
    2078        4106 :     IF( upd )
    2079             :     {
    2080       20530 :         FOR( i = 0; i < N; i++ )
    2081             :         {
    2082       16424 :             IF( GT_32( imult3216( slope_fx, i ), MAX_WORD16 ) )
    2083             :             {
    2084           0 :                 x_fx[i] = MAX_WORD16; /*Q15*/
    2085           0 :                 move16();
    2086             :             }
    2087             :             ELSE
    2088             :             {
    2089       16424 :                 x_fx[i] = extract_l( L_add_sat( imult3216( slope_fx, i ), offset_fx ) ); /*Q15*/
    2090       16424 :                 move16();
    2091             :             }
    2092             :         }
    2093             :     }
    2094             : 
    2095        4106 :     IF( a_fx != NULL )
    2096             :     {
    2097        4106 :         *a_fx = extract_l( slope_fx ); /*Q15*/
    2098        4106 :         move16();
    2099             :     }
    2100             : 
    2101        4106 :     IF( b_fx != NULL )
    2102             :     {
    2103        4106 :         *b_fx = extract_l( offset_fx ); /*Q15*/
    2104        4106 :         move16();
    2105             :     }
    2106             : 
    2107        4106 :     return;
    2108             : }
    2109             : 
    2110             : 
    2111             : /* helper function for panning_wrap_angles */
    2112        1776 : static float wrap_azi(
    2113             :     const float azi_deg )
    2114             : {
    2115        1776 :     float azi = azi_deg;
    2116             : 
    2117             :     /* Wrap azimuth value */
    2118        1776 :     while ( azi > 180 )
    2119             :     {
    2120           0 :         azi -= 360.0f;
    2121             :     }
    2122             : 
    2123        1827 :     while ( azi <= -180 )
    2124             :     {
    2125          51 :         azi += 360;
    2126             :     }
    2127             : 
    2128        1776 :     return azi;
    2129             : }
    2130             : /* helper function for panning_wrap_angles */
    2131     6830268 : static Word32 wrap_azi_fx(
    2132             :     const Word32 azi_deg /* Q22 */ )
    2133             : {
    2134     6830268 :     Word32 azi = azi_deg; /*Q22*/
    2135     6830268 :     move32();
    2136             : 
    2137             :     /* Wrap azimuth value */
    2138     8670789 :     WHILE( GT_32( azi, ANGLE_180_DEG_Q22 ) )
    2139             :     {
    2140     1840521 :         azi = L_sub( azi, ANGLE_360_DEG_Q22 ); /*Q22*/
    2141             :     }
    2142             : 
    2143     6869040 :     WHILE( LE_32( azi, -ANGLE_180_DEG_Q22 ) )
    2144             :     {
    2145       38772 :         azi = L_add( azi, ANGLE_360_DEG_Q22 ); /*Q22*/
    2146             :     }
    2147             : 
    2148     6830268 :     return azi; /*Q22*/
    2149             : }
    2150             : /*-------------------------------------------------------------------*
    2151             :  * panning_wrap_angles()
    2152             :  *
    2153             :  * Wrap angles for amplitude panning to the range:
    2154             :  * azimuth = (-180, 180]
    2155             :  * elevation = [-90, 90]
    2156             :  * Considers direction changes from large elevation values
    2157             :  *-------------------------------------------------------------------*/
    2158        1776 : void panning_wrap_angles(
    2159             :     const float azi_deg, /* i  : azimuth in degrees for panning direction (positive left) */
    2160             :     const float ele_deg, /* i  : elevation in degrees for panning direction (positive up) */
    2161             :     float *azi_wrapped,  /* o  : wrapped azimuth component                                */
    2162             :     float *ele_wrapped   /* o  : wrapped elevation component                              */
    2163             : )
    2164             : {
    2165             :     float azi, ele;
    2166             : 
    2167        1776 :     azi = azi_deg;
    2168        1776 :     ele = ele_deg;
    2169             : 
    2170        1776 :     if ( fabsf( ele ) < 90 )
    2171             :     {
    2172        1776 :         *ele_wrapped = ele;
    2173        1776 :         *azi_wrapped = wrap_azi( azi );
    2174        1776 :         return;
    2175             :     }
    2176             :     else
    2177             :     {
    2178             :         /* Special case when elevation is a multiple of 90; azimuth is irrelevant */
    2179           0 :         if ( ( fmodf( ele, 90 ) == 0 ) && ( fmodf( ele, 180 ) != 0 ) )
    2180             :         {
    2181           0 :             *azi_wrapped = 0;
    2182           0 :             while ( ele > 90 )
    2183             :             {
    2184           0 :                 ele -= 360;
    2185             :             }
    2186           0 :             while ( ele < -90 )
    2187             :             {
    2188           0 :                 ele += 360;
    2189             :             }
    2190           0 :             *ele_wrapped = ele;
    2191             :         }
    2192             :         else
    2193             :         {
    2194             :             /* Wrap elevation and adjust azimuth accordingly */
    2195           0 :             while ( fabsf( ele ) > 90 )
    2196             :             {
    2197             :                 /* Flip to other hemisphere */
    2198           0 :                 azi += 180;
    2199             : 
    2200             :                 /* Compensate elevation accordingly */
    2201           0 :                 if ( ele > 90 )
    2202             :                 {
    2203           0 :                     ele = 180 - ele;
    2204             :                 }
    2205           0 :                 else if ( ele < -90 )
    2206             :                 {
    2207           0 :                     ele = -180 - ele;
    2208             :                 }
    2209             :             }
    2210           0 :             *azi_wrapped = wrap_azi( azi );
    2211           0 :             *ele_wrapped = ele;
    2212             :         }
    2213             : 
    2214           0 :         return;
    2215             :     }
    2216             : }
    2217             : /*-------------------------------------------------------------------*
    2218             :  * panning_wrap_angles_fx()
    2219             :  *
    2220             :  * Wrap angles for amplitude panning to the range:
    2221             :  * azimuth = (-180, 180]
    2222             :  * elevation = [-90, 90]
    2223             :  * Considers direction changes from large elevation values
    2224             :  *-------------------------------------------------------------------*/
    2225     6833930 : void panning_wrap_angles_fx(
    2226             :     const Word32 azi_deg, /* i  : azimuth in degrees for panning direction (positive left) Q22 */
    2227             :     const Word32 ele_deg, /* i  : elevation in degrees for panning direction (positive up) Q22 */
    2228             :     Word32 *azi_wrapped,  /* o  : wrapped azimuth component                                Q22 */
    2229             :     Word32 *ele_wrapped   /* o  : wrapped elevation component                              Q22 */
    2230             : )
    2231             : {
    2232             :     Word32 azi, ele;
    2233             : 
    2234     6833930 :     azi = azi_deg; /*Q22*/
    2235     6833930 :     move32();
    2236     6833930 :     ele = ele_deg; /*Q22*/
    2237     6833930 :     move32();
    2238             : 
    2239     6833930 :     IF( LT_32( L_abs( ele ), ANGLE_90_DEG_Q22 ) )
    2240             :     {
    2241     6830268 :         *ele_wrapped = ele; /*Q22*/
    2242     6830268 :         move32();
    2243     6830268 :         *azi_wrapped = wrap_azi_fx( azi ); /*Q22*/
    2244     6830268 :         move32();
    2245     6830268 :         return;
    2246             :     }
    2247             :     ELSE
    2248             :     {
    2249             :         /* Special case when elevation is a multiple of 90; azimuth is irrelevant */
    2250        3662 :         IF( ( ( ele % ANGLE_90_DEG_Q22 ) == 0 ) && ( ( ele % ANGLE_180_DEG_Q22 ) != 0 ) )
    2251             :         {
    2252        3662 :             *azi_wrapped = 0;
    2253        3662 :             move32();
    2254        3662 :             WHILE( GT_32( ele, ANGLE_90_DEG_Q22 ) )
    2255             :             {
    2256           0 :                 ele = L_sub( ele, ANGLE_360_DEG_Q22 );
    2257             :             }
    2258        3662 :             WHILE( LT_32( ele, -ANGLE_90_DEG_Q22 ) )
    2259             :             {
    2260           0 :                 ele = L_add( ele, ANGLE_360_DEG_Q22 );
    2261             :             }
    2262        3662 :             *ele_wrapped = ele;
    2263        3662 :             move32();
    2264             :         }
    2265             :         ELSE
    2266             :         {
    2267             :             /* Wrap elevation and adjust azimuth accordingly */
    2268           0 :             WHILE( GT_32( L_abs( ele ), ANGLE_90_DEG_Q22 ) )
    2269             :             {
    2270             :                 /* Flip to other hemisphere */
    2271           0 :                 azi = L_add( azi, ANGLE_180_DEG_Q22 );
    2272             : 
    2273             :                 /* Compensate elevation accordingly */
    2274           0 :                 IF( GT_32( ele, ANGLE_90_DEG_Q22 ) )
    2275             :                 {
    2276           0 :                     ele = L_sub( ANGLE_180_DEG_Q22, ele );
    2277             :                 }
    2278           0 :                 ELSE IF( LT_32( ele, -ANGLE_90_DEG_Q22 ) )
    2279             :                 {
    2280           0 :                     ele = L_sub( -ANGLE_180_DEG_Q22, ele );
    2281             :                 }
    2282             :             }
    2283           0 :             *azi_wrapped = wrap_azi_fx( azi ); /*Q22*/
    2284           0 :             move32();
    2285           0 :             *ele_wrapped = ele; /*Q22*/
    2286           0 :             move32();
    2287             :         }
    2288             : 
    2289        3662 :         return;
    2290             :     }
    2291             : }
    2292             : 
    2293             : /*-------------------------------------------------------------------------*
    2294             :  * v_sort_ind_fixed()
    2295             :  *
    2296             :  * Sort a float array
    2297             :  * (modified version of v_sort() to return an index array)
    2298             :  *-------------------------------------------------------------------------*/
    2299             : 
    2300       16711 : void v_sort_ind_fixed(
    2301             :     Word32 *x,       /* i/o: Vector to be sorted      Qx*/
    2302             :     Word16 *idx,     /* o  : Original index positions Q0*/
    2303             :     const Word16 len /* i  : vector length            Q0*/
    2304             : )
    2305             : {
    2306             :     Word16 i, j;
    2307             :     Word32 tempr;
    2308             :     Word16 tempi;
    2309             : 
    2310       75999 :     FOR( i = 0; i < len; i++ )
    2311             :     {
    2312       59288 :         idx[i] = i;
    2313       59288 :         move16();
    2314             :     }
    2315             : 
    2316       59288 :     FOR( i = len - 2; i >= 0; i-- )
    2317             :     {
    2318       42577 :         tempr = x[i]; /*Qx*/
    2319       42577 :         move32();
    2320       42577 :         tempi = idx[i]; /*Qx*/
    2321       42577 :         move16();
    2322       42577 :         test();
    2323       96593 :         FOR( j = ( i + 1 ); LT_16( j, len ) && GT_32( tempr, x[j] ); j++ )
    2324             :         {
    2325       54016 :             test();
    2326       54016 :             x[j - 1] = x[j]; /*Qx*/
    2327       54016 :             move32();
    2328       54016 :             idx[j - 1] = idx[j]; /*Qx*/
    2329       54016 :             move16();
    2330             :         }
    2331       42577 :         x[j - 1] = tempr; /*Qx*/
    2332       42577 :         move32();
    2333       42577 :         idx[j - 1] = tempi; /*Qx*/
    2334       42577 :         move16();
    2335             :     }
    2336             : 
    2337       16711 :     return;
    2338             : }
    2339             : 
    2340             : /*-------------------------------------------------------------------*
    2341             :  * is_IVAS_bitrate()
    2342             :  *
    2343             :  * check if the bitrate is IVAS supported active bitrate
    2344             :  *-------------------------------------------------------------------*/
    2345             : 
    2346             : /*! r: flag indicating a valid bitrate */
    2347           0 : Word16 is_IVAS_bitrate_fx(
    2348             :     const Word32 ivas_total_brate /* i  : IVAS total bitrate  Q0*/
    2349             : )
    2350             : {
    2351             :     Word16 j;
    2352             : 
    2353           0 :     j = SIZE_IVAS_BRATE_TBL - IVAS_NUM_ACTIVE_BRATES; /* skip NO_DATA and SID bitrates */
    2354           0 :     move16();
    2355             : 
    2356           0 :     test();
    2357           0 :     WHILE( LE_16( j, SIZE_IVAS_BRATE_TBL ) && NE_32( ivas_total_brate, ivas_brate_tbl[j] ) )
    2358             :     {
    2359           0 :         test();
    2360           0 :         j = add( j, 1 );
    2361             :     }
    2362             : 
    2363           0 :     IF( GE_16( j, SIZE_IVAS_BRATE_TBL ) )
    2364             :     {
    2365           0 :         return 0;
    2366             :     }
    2367             : 
    2368           0 :     return 1;
    2369             : }
    2370             : 
    2371             : /*-------------------------------------------------------------------*
    2372             :  * is_DTXrate()
    2373             :  *
    2374             :  * identify DTX frame bitrates
    2375             :  *-------------------------------------------------------------------*/
    2376             : 
    2377     4123317 : Word16 is_DTXrate(
    2378             :     const Word32 ivas_total_brate /* i  : IVAS total bitrate   Q0*/
    2379             : )
    2380             : {
    2381     4123317 :     Word16 dtx_rate_flag = 0;
    2382     4123317 :     move16();
    2383             : 
    2384     4123317 :     test();
    2385     4123317 :     IF( is_SIDrate( ivas_total_brate ) || ( EQ_32( ivas_total_brate, FRAME_NO_DATA ) ) )
    2386             :     {
    2387      169534 :         dtx_rate_flag = 1;
    2388      169534 :         move16();
    2389             :     }
    2390             : 
    2391     4123317 :     return dtx_rate_flag; /*Q0*/
    2392             : }
    2393             : 
    2394             : /*-------------------------------------------------------------------*
    2395             :  * is_SIDrate()
    2396             :  *
    2397             :  * identify SID frame bitrates
    2398             :  *-------------------------------------------------------------------*/
    2399             : 
    2400     4523302 : Word16 is_SIDrate(
    2401             :     const Word32 ivas_total_brate /* i  : IVAS total bitrate   Q0*/
    2402             : )
    2403             : {
    2404     4523302 :     Word16 sid_rate_flag = 0;
    2405     4523302 :     move16();
    2406             : 
    2407     4523302 :     test();
    2408     4523302 :     test();
    2409     9046604 :     if ( EQ_32( ivas_total_brate, SID_1k75 ) ||
    2410     9046604 :          EQ_32( ivas_total_brate, SID_2k40 ) ||
    2411     4523302 :          EQ_32( ivas_total_brate, IVAS_SID_5k2 ) )
    2412             :     {
    2413       29148 :         sid_rate_flag = 1;
    2414       29148 :         move16();
    2415             :     }
    2416             : 
    2417     4523302 :     return sid_rate_flag; /*Q0*/
    2418             : }
    2419             : 
    2420             : 
    2421             : /*-------------------------------------------------------------------*
    2422             :  * rand_triangular_signed()
    2423             :  *
    2424             :  * generate a random value with a triangular pdf in [-0.5, 0.5]
    2425             :  *-------------------------------------------------------------------*/
    2426             : 
    2427    24038576 : Word16 rand_triangular_signed_fx(
    2428             :     Word16 *seed, /*Q0*/
    2429             :     Word16 *exp_fac )
    2430             : {
    2431             :     Word16 rand_val;
    2432    24038576 :     rand_val = Random( seed ); // q15
    2433             :     Word16 tmp1, tmp2;
    2434    24038576 :     Word16 exp1, exp = 1;
    2435    24038576 :     move16();
    2436    24038576 :     IF( rand_val <= 0 )
    2437             :     {
    2438             :         /* rand_val in [-1, 0] */
    2439             :         /*0.5f * (sqrtf(rand_val + 1.0f) - 1)*/
    2440    12015506 :         tmp1 = Sqrt16( add( shr( rand_val, 1 ), ONE_IN_Q14 ), &exp );               /*Q15 - exp*/
    2441    12015506 :         exp1 = BASOP_Util_Add_MantExp( tmp1, exp, negate( ONE_IN_Q14 ), 1, &tmp2 ); /*Q15 - exp1*/
    2442    12015506 :         tmp2 = shr( tmp2, 1 );                                                      /*Q15 - exp1*/
    2443    12015506 :         *exp_fac = exp1;
    2444    12015506 :         move16();
    2445    12015506 :         return tmp2;
    2446             :     }
    2447             :     ELSE
    2448             :     {
    2449             :         /* rand_val in (0, 1) */
    2450             :         /*0.5f * ( 1 - sqrtf(1.0f - rand_val))*/
    2451    12023070 :         Word16 one_minus_rand = sub( MAX16B, rand_val ); /*Q15*/
    2452    12023070 :         exp = 0;
    2453    12023070 :         move16();
    2454    12023070 :         tmp1 = Sqrt16( one_minus_rand, &exp );                                      // q15 - exp
    2455    12023070 :         exp1 = BASOP_Util_Add_MantExp( ONE_IN_Q14, 1, negate( tmp1 ), exp, &tmp2 ); /*Q15 - exp1*/
    2456    12023070 :         tmp2 = shr( tmp2, 1 );                                                      /*Q15 - exp1*/
    2457    12023070 :         *exp_fac = exp1;
    2458    12023070 :         move16();
    2459             : 
    2460    12023070 :         return tmp2; /*Q15 - exp_fac*/
    2461             :     }
    2462             : }
    2463             : 
    2464             : /*-------------------------------------------------------------------*
    2465             :  * ceil_log_2()
    2466             :  *
    2467             :  * calculates ceil(log2(val))
    2468             :  *-------------------------------------------------------------------*/
    2469       50649 : Word16 ceil_log_2(
    2470             :     UWord64 val /*Q0*/ )
    2471             : {
    2472             : 
    2473       50649 :     IF( val <= 0 )
    2474             :     {
    2475           0 :         assert( 0 );
    2476             :     }
    2477       50649 :     ELSE IF( LE_64( val, 1 ) )
    2478             :     {
    2479         285 :         return 0;
    2480             :     }
    2481       50364 :     ELSE IF( LE_64( val, 2 ) )
    2482             :     {
    2483        2909 :         return 1;
    2484             :     }
    2485       47455 :     ELSE IF( LE_64( val, 4 ) )
    2486             :     {
    2487        4034 :         return 2;
    2488             :     }
    2489       43421 :     ELSE IF( LE_64( val, 8 ) )
    2490             :     {
    2491        2652 :         return 3;
    2492             :     }
    2493       40769 :     ELSE IF( LE_64( val, 16 ) )
    2494             :     {
    2495        1311 :         return 4;
    2496             :     }
    2497       39458 :     ELSE IF( LE_64( val, 32 ) )
    2498             :     {
    2499        1371 :         return 5;
    2500             :     }
    2501       38087 :     ELSE IF( LE_64( val, 64 ) )
    2502             :     {
    2503        1313 :         return 6;
    2504             :     }
    2505       36774 :     ELSE IF( LE_64( val, 128 ) )
    2506             :     {
    2507        3534 :         return 7;
    2508             :     }
    2509       33240 :     ELSE IF( LE_64( val, 256 ) )
    2510             :     {
    2511        5583 :         return 8;
    2512             :     }
    2513       27657 :     ELSE IF( LE_64( val, 512 ) )
    2514             :     {
    2515        4934 :         return 9;
    2516             :     }
    2517       22723 :     ELSE IF( LE_64( val, 1024 ) )
    2518             :     {
    2519        6894 :         return 10;
    2520             :     }
    2521       15829 :     ELSE IF( LE_64( val, 2048 ) )
    2522             :     {
    2523        2074 :         return 11;
    2524             :     }
    2525       13755 :     ELSE IF( LE_64( val, 4096 ) )
    2526             :     {
    2527        1663 :         return 12;
    2528             :     }
    2529       12092 :     ELSE IF( LE_64( val, 8192 ) )
    2530             :     {
    2531        1247 :         return 13;
    2532             :     }
    2533       10845 :     ELSE IF( LE_64( val, 16384 ) )
    2534             :     {
    2535        1543 :         return 14;
    2536             :     }
    2537        9302 :     ELSE IF( LE_64( val, 32768 ) )
    2538             :     {
    2539        3074 :         return 15;
    2540             :     }
    2541        6228 :     ELSE IF( LE_64( val, 65536 ) )
    2542             :     {
    2543         472 :         return 16;
    2544             :     }
    2545        5756 :     ELSE IF( LE_64( val, 131072 ) )
    2546             :     {
    2547         192 :         return 17;
    2548             :     }
    2549        5564 :     ELSE IF( LE_64( val, 262144 ) )
    2550             :     {
    2551         259 :         return 18;
    2552             :     }
    2553        5305 :     ELSE IF( LE_64( val, 524288 ) )
    2554             :     {
    2555         248 :         return 19;
    2556             :     }
    2557        5057 :     ELSE IF( LE_64( val, 1048576 ) )
    2558             :     {
    2559          92 :         return 20;
    2560             :     }
    2561        4965 :     ELSE IF( LE_64( val, 2097152 ) )
    2562             :     {
    2563         109 :         return 21;
    2564             :     }
    2565        4856 :     ELSE IF( LE_64( val, 4194304 ) )
    2566             :     {
    2567         327 :         return 22;
    2568             :     }
    2569        4529 :     ELSE IF( LE_64( val, 8388608 ) )
    2570             :     {
    2571         438 :         return 23;
    2572             :     }
    2573        4091 :     ELSE IF( LE_64( val, 16777216 ) )
    2574             :     {
    2575         792 :         return 24;
    2576             :     }
    2577        3299 :     ELSE IF( LE_64( val, 33554432 ) )
    2578             :     {
    2579         485 :         return 25;
    2580             :     }
    2581        2814 :     ELSE IF( LE_64( val, 67108864 ) )
    2582             :     {
    2583         329 :         return 26;
    2584             :     }
    2585        2485 :     ELSE IF( LE_64( val, 134217728 ) )
    2586             :     {
    2587         135 :         return 27;
    2588             :     }
    2589        2350 :     ELSE IF( LE_64( val, 268435456 ) )
    2590             :     {
    2591         121 :         return 28;
    2592             :     }
    2593        2229 :     ELSE IF( LE_64( val, 536870912 ) )
    2594             :     {
    2595          51 :         return 29;
    2596             :     }
    2597        2178 :     ELSE IF( LE_64( val, 1073741824 ) )
    2598             :     {
    2599          60 :         return 30;
    2600             :     }
    2601        2118 :     ELSE IF( LE_64( val, 2147483648 ) )
    2602             :     {
    2603          91 :         return 31;
    2604             :     }
    2605        2027 :     ELSE IF( LE_64( val, 4294967296 ) )
    2606             :     {
    2607         145 :         return 32;
    2608             :     }
    2609        1882 :     ELSE IF( LE_64( val, 8589934592 ) )
    2610             :     {
    2611          59 :         return 33;
    2612             :     }
    2613        1823 :     ELSE IF( LE_64( val, 17179869184 ) )
    2614             :     {
    2615         200 :         return 34;
    2616             :     }
    2617        1623 :     ELSE IF( LE_64( val, 34359738368 ) )
    2618             :     {
    2619         216 :         return 35;
    2620             :     }
    2621        1407 :     ELSE IF( LE_64( val, 68719476736 ) )
    2622             :     {
    2623         166 :         return 36;
    2624             :     }
    2625        1241 :     ELSE IF( LE_64( val, 137438953472 ) )
    2626             :     {
    2627         136 :         return 37;
    2628             :     }
    2629        1105 :     ELSE IF( LE_64( val, 274877906944 ) )
    2630             :     {
    2631         144 :         return 38;
    2632             :     }
    2633         961 :     ELSE IF( LE_64( val, 549755813888 ) )
    2634             :     {
    2635         117 :         return 39;
    2636             :     }
    2637         844 :     ELSE IF( LE_64( val, 1099511627776 ) )
    2638             :     {
    2639         116 :         return 40;
    2640             :     }
    2641         728 :     ELSE IF( LE_64( val, 2199023255552 ) )
    2642             :     {
    2643         112 :         return 41;
    2644             :     }
    2645         616 :     ELSE IF( LE_64( val, 4398046511104 ) )
    2646             :     {
    2647         163 :         return 42;
    2648             :     }
    2649         453 :     ELSE IF( LE_64( val, 8796093022208 ) )
    2650             :     {
    2651          80 :         return 43;
    2652             :     }
    2653         373 :     ELSE IF( LE_64( val, 17592186044416 ) )
    2654             :     {
    2655          58 :         return 44;
    2656             :     }
    2657         315 :     ELSE IF( LE_64( val, 35184372088832 ) )
    2658             :     {
    2659         122 :         return 45;
    2660             :     }
    2661         193 :     ELSE IF( LE_64( val, 70368744177664 ) )
    2662             :     {
    2663          90 :         return 46;
    2664             :     }
    2665         103 :     ELSE IF( LE_64( val, 140737488355328 ) )
    2666             :     {
    2667          64 :         return 47;
    2668             :     }
    2669          39 :     ELSE IF( LE_64( val, 281474976710656 ) )
    2670             :     {
    2671          39 :         return 48;
    2672             :     }
    2673           0 :     ELSE IF( LE_64( val, 562949953421312 ) )
    2674             :     {
    2675           0 :         return 49;
    2676             :     }
    2677           0 :     ELSE IF( LE_64( val, 1125899906842624 ) )
    2678             :     {
    2679           0 :         return 50;
    2680             :     }
    2681           0 :     ELSE IF( LE_64( val, 2251799813685248 ) )
    2682             :     {
    2683           0 :         return 51;
    2684             :     }
    2685           0 :     ELSE IF( LE_64( val, 4503599627370496 ) )
    2686             :     {
    2687           0 :         return 52;
    2688             :     }
    2689           0 :     ELSE IF( LE_64( val, 9007199254740992 ) )
    2690             :     {
    2691           0 :         return 53;
    2692             :     }
    2693           0 :     ELSE IF( LE_64( val, 18014398509481984 ) )
    2694             :     {
    2695           0 :         return 54;
    2696             :     }
    2697           0 :     ELSE IF( LE_64( val, 36028797018963968 ) )
    2698             :     {
    2699           0 :         return 55;
    2700             :     }
    2701           0 :     ELSE IF( LE_64( val, 72057594037927936 ) )
    2702             :     {
    2703           0 :         return 56;
    2704             :     }
    2705           0 :     ELSE IF( LE_64( val, 144115188075855872 ) )
    2706             :     {
    2707           0 :         return 57;
    2708             :     }
    2709           0 :     ELSE IF( LE_64( val, 288230376151711744 ) )
    2710             :     {
    2711           0 :         return 58;
    2712             :     }
    2713           0 :     ELSE IF( LE_64( val, 576460752303423488 ) )
    2714             :     {
    2715           0 :         return 59;
    2716             :     }
    2717           0 :     ELSE IF( LE_64( val, 1152921504606846976 ) )
    2718             :     {
    2719           0 :         return 60;
    2720             :     }
    2721           0 :     ELSE IF( LE_64( val, 2305843009213693952 ) )
    2722             :     {
    2723           0 :         return 61;
    2724             :     }
    2725           0 :     ELSE IF( LE_64( val, 4611686018427387904 ) )
    2726             :     {
    2727           0 :         return 62;
    2728             :     }
    2729           0 :     ELSE IF( LE_64( val, 9223372036854775807 ) )
    2730             :     {
    2731           0 :         return 63;
    2732             :     }
    2733           0 :     return 64;
    2734             : }
    2735             : 
    2736             : 
    2737             : /*-------------------------------------------------------------------*
    2738             :  * var_32_fx()
    2739             :  *
    2740             :  * calculates variance of 32-bit array
    2741             :  * Currently using direct division.
    2742             :  * Needs update once required basops are implemented.
    2743             :  *-------------------------------------------------------------------*/
    2744             : 
    2745      129221 : Word64 var_32_fx(
    2746             :     const Word32 *x,  /* i  : input vector                          q*/
    2747             :     const Word16 len, /* i  : length of inputvector                 Q0*/
    2748             :     Word16 q          /* q       : q-factor for the array                                */
    2749             : )
    2750             : {
    2751             :     Word16 i;
    2752             :     Word64 mean, var;
    2753             : 
    2754      129221 :     mean = 0;
    2755      129221 :     move64();
    2756      129221 :     var = 0;
    2757      129221 :     move64();
    2758             : 
    2759      646105 :     FOR( i = 0; i < len; i++ )
    2760             :     {
    2761      516884 :         mean = W_add( mean, x[i] ); /*q*/
    2762             :     }
    2763             : 
    2764      129221 :     mean = mean / len; /* NOTE: No BASOP for 64 bit division q*/
    2765             : 
    2766      646105 :     FOR( i = 0; i < len; i++ )
    2767             :     {
    2768      516884 :         var = W_add( var, Mpy_32_32( L_sub( x[i], W_extract_l( mean ) ), L_sub( x[i], W_extract_l( mean ) ) ) ); /*q + q - 31*/
    2769             :     }
    2770             : 
    2771      129221 :     var = W_shl( var, sub( 31, q ) ); /*q*/
    2772             : 
    2773      129221 :     var = var / len; /* NOTE: No BASOP for 64 bit division q*/
    2774             : 
    2775      129221 :     return var; /*q*/
    2776             : }

Generated by: LCOV version 1.14