LCOV - code coverage report
Current view: top level - lib_com - fft_fx_evs.c (source / functions) Hit Total Coverage
Test: Coverage on main -- dec/rend @ 633e3f2e309758d10805ef21e0436356fe719b7a Lines: 1932 2070 93.3 %
Date: 2025-08-23 01:22:27 Functions: 23 25 92.0 %

          Line data    Source code
       1             : /*====================================================================================
       2             :     EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
       3             :   ====================================================================================*/
       4             : 
       5             : #include "options.h" /* Compilation switches                   */
       6             : #include "cnst.h"    /* Common constants                       */
       7             : #include "prot_fx.h" /* Function prototypes                    */
       8             : #include "rom_com.h" /* Static table prototypes                */
       9             : #include "stl.h"
      10             : #include <assert.h>
      11             : 
      12             : /*-----------------------------------------------------------------*
      13             :  * Local functions
      14             :  *-----------------------------------------------------------------*/
      15             : 
      16             : #define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */
      17             : /* DCT related */
      18             : #define KP559016994_16FX 1200479845 /* EDCT & EMDCT constants Q31*/
      19             : #define KP951056516_16FX 2042378325 /* EDCT & EMDCT constants Q31*/
      20             : #define KP587785252_16FX 1262259213 /* EDCT & EMDCT constants Q31*/
      21             : 
      22             : static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
      23             : static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
      24             : static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
      25             : static void cftmdl_16fx( Word16 n, Word16 l, Word16 *a, const Word32 *w );
      26             : static void cftfsub_16fx( Word16 n, Word16 *a, const Word32 *w );
      27             : static void cft1st_16fx( Word16 n, Word16 *a, const Word32 *w );
      28             : static void cftmdl_16fx( Word16 n, Word16 l, Word16 *a, const Word32 *w );
      29             : static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
      30             : static void bitrv2_SR_16fx( Word16 n, const Word16 *ip, Word16 *a );
      31             : static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
      32             : static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx );
      33             : static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w );
      34             : 
      35             : #include "math_32.h"
      36             : 
      37             : /*-----------------------------------------------------------------*
      38             :  * Local functions
      39             :  *-----------------------------------------------------------------*/
      40             : static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w );
      41             : static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a );
      42             : static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w );
      43             : static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w );
      44             : static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w );
      45             : 
      46             : 
      47         267 : void DoRTFTn_fx(
      48             :     Word32 *x,     /* i/o : real part of input and output data Q(x)      */
      49             :     Word32 *y,     /* i/o : imaginary part of input and output data Q(x) */
      50             :     const Word16 n /* i : size of the FFT up to 1024 */
      51             : )
      52             : {
      53             : 
      54             :     Word16 i;
      55             :     Word32 z[2048], *pt;
      56             : 
      57         267 :     pt = z;
      58      136523 :     FOR( i = 0; i < n; i++ )
      59             :     {
      60      136256 :         *pt++ = x[i];
      61      136256 :         move16();
      62      136256 :         *pt++ = y[i];
      63      136256 :         move16();
      64             :     }
      65             : 
      66         267 :     IF( EQ_16( n, 16 ) )
      67             :     {
      68           0 :         cdftForw_fx( 2 * n, z, Ip_fft16, w_fft512_fx_evs );
      69             :     }
      70         267 :     ELSE IF( EQ_16( n, 32 ) )
      71             :     {
      72           0 :         cdftForw_fx( 2 * n, z, Ip_fft32, w_fft512_fx_evs );
      73             :     }
      74         267 :     ELSE IF( EQ_16( n, 64 ) )
      75             :     {
      76           1 :         cdftForw_fx( 2 * n, z, Ip_fft64, w_fft512_fx_evs );
      77             :     }
      78         266 :     ELSE IF( EQ_16( n, 128 ) )
      79             :     {
      80           0 :         cdftForw_fx( 2 * n, z, Ip_fft128, w_fft512_fx_evs );
      81             :     }
      82         266 :     ELSE IF( EQ_16( n, 256 ) )
      83             :     {
      84           0 :         cdftForw_fx( 2 * n, z, Ip_fft256, w_fft512_fx_evs );
      85             :     }
      86         266 :     ELSE IF( EQ_16( n, 512 ) )
      87             :     {
      88         266 :         cdftForw_fx( 2 * n, z, Ip_fft512, w_fft512_fx_evs );
      89             :     }
      90             :     ELSE
      91             :     {
      92           0 :         assert( 0 );
      93             :     }
      94             : 
      95         267 :     x[0] = z[0];
      96         267 :     move16();
      97         267 :     y[0] = z[1];
      98         267 :     move16();
      99         267 :     pt = &z[2];
     100      136256 :     FOR( i = n - 1; i >= 1; i-- )
     101             :     {
     102      135989 :         x[i] = *pt++;
     103      135989 :         move16();
     104      135989 :         y[i] = *pt++;
     105      135989 :         move16();
     106             :     }
     107             : 
     108         267 :     return;
     109             : }
     110             : 
     111             : /*-----------------------------------------------------------------*
     112             :  * cdftForw_fx()
     113             :  * Main fuction of Complex Discrete Fourier Transform
     114             :  *-----------------------------------------------------------------*/
     115         267 : static void cdftForw_fx(
     116             :     Word16 n,         /* i    : data length of real and imag                             */
     117             :     Word32 *a,        /* i/o  : input/output data                     Q(q)*/
     118             :     const Word16 *ip, /* i    : work area for bit reversal                               */
     119             :     const Word16 *w   /* i    : cos/sin table                                             Q14*/
     120             : )
     121             : {
     122             :     /* bit reversal */
     123         267 :     bitrv2_SR_fx( n, ip + 2, a );
     124             : 
     125             :     /* Do FFT */
     126         267 :     cftfsub_fx( n, a, w );
     127         267 : }
     128             : 
     129             : /*-----------------------------------------------------------------*
     130             :  * bitrv2_SR_fx()
     131             :  * Bit reversal
     132             :  *-----------------------------------------------------------------*/
     133       27132 : static void bitrv2_SR_fx(
     134             :     Word16 n,         /* i    : data length of real and imag                      */
     135             :     const Word16 *ip, /* i/o  : work area for bit reversal                                */
     136             :     Word32 *a         /* i/o  : input/output data                     Q(q)*/
     137             : )
     138             : {
     139             :     Word16 j, j1, k, k1, m, m2;
     140             :     Word16 l;
     141             :     Word32 xr, xi, yr, yi;
     142             : 
     143       27132 :     l = n;
     144       27132 :     move16();
     145       27132 :     m = 1;
     146       27132 :     move16();
     147             : 
     148       81928 :     WHILE( ( ( m << 3 ) < l ) )
     149             :     {
     150       54796 :         l = shr( l, 1 );
     151       54796 :         m = shl( m, 1 );
     152             :     }
     153             : 
     154       27132 :     m2 = shl( m, 1 );
     155       27132 :     IF( EQ_16( shl( m, 3 ), l ) )
     156             :     {
     157           5 :         FOR( k = 0; k < m; k++ )
     158             :         {
     159          10 :             FOR( j = 0; j < k; j++ )
     160             :             {
     161           6 :                 j1 = add( shl( j, 1 ), ip[k] );
     162           6 :                 k1 = add( shl( k, 1 ), ip[j] );
     163           6 :                 xr = a[j1];
     164           6 :                 move32();
     165           6 :                 xi = a[j1 + 1];
     166           6 :                 move32();
     167           6 :                 yr = a[k1];
     168           6 :                 move32();
     169           6 :                 yi = a[k1 + 1];
     170           6 :                 move32();
     171           6 :                 a[j1] = yr;
     172           6 :                 move32();
     173           6 :                 a[j1 + 1] = yi;
     174           6 :                 move32();
     175           6 :                 a[k1] = xr;
     176           6 :                 move32();
     177           6 :                 a[k1 + 1] = xi;
     178           6 :                 move32();
     179           6 :                 j1 = add( j1, m2 );
     180           6 :                 k1 = add( k1, shl( m2, 1 ) );
     181           6 :                 xr = a[j1];
     182           6 :                 move32();
     183           6 :                 xi = a[j1 + 1];
     184           6 :                 move32();
     185           6 :                 yr = a[k1];
     186           6 :                 move32();
     187           6 :                 yi = a[k1 + 1];
     188           6 :                 move32();
     189           6 :                 a[j1] = yr;
     190           6 :                 move32();
     191           6 :                 a[j1 + 1] = yi;
     192           6 :                 move32();
     193           6 :                 a[k1] = xr;
     194           6 :                 move32();
     195           6 :                 a[k1 + 1] = xi;
     196           6 :                 move32();
     197           6 :                 j1 = add( j1, m2 );
     198           6 :                 k1 = sub( k1, m2 );
     199           6 :                 xr = a[j1];
     200           6 :                 move32();
     201           6 :                 xi = a[j1 + 1];
     202           6 :                 move32();
     203           6 :                 xi = a[j1 + 1];
     204           6 :                 move32();
     205           6 :                 yr = a[k1];
     206           6 :                 move32();
     207           6 :                 yi = a[k1 + 1];
     208           6 :                 move32();
     209           6 :                 a[j1] = yr;
     210           6 :                 move32();
     211           6 :                 a[j1 + 1] = yi;
     212           6 :                 move32();
     213           6 :                 a[k1] = xr;
     214           6 :                 move32();
     215           6 :                 a[k1 + 1] = xi;
     216           6 :                 move32();
     217           6 :                 j1 = add( j1, m2 );
     218           6 :                 k1 = add( k1, shl( m2, 1 ) );
     219           6 :                 xr = a[j1];
     220           6 :                 move32();
     221           6 :                 xi = a[j1 + 1];
     222           6 :                 move32();
     223           6 :                 yr = a[k1];
     224           6 :                 move32();
     225           6 :                 yi = a[k1 + 1];
     226           6 :                 move32();
     227           6 :                 a[j1] = yr;
     228           6 :                 move32();
     229           6 :                 a[j1 + 1] = yi;
     230           6 :                 move32();
     231           6 :                 a[k1] = xr;
     232           6 :                 move32();
     233           6 :                 a[k1 + 1] = xi;
     234           6 :                 move32();
     235             :             }
     236             : 
     237           4 :             j1 = add( add( shl( k, 1 ), m2 ), ip[k] );
     238           4 :             k1 = add( j1, m2 );
     239           4 :             xr = a[j1];
     240           4 :             move32();
     241           4 :             xi = a[j1 + 1];
     242           4 :             move32();
     243           4 :             yr = a[k1];
     244           4 :             move32();
     245           4 :             yi = a[k1 + 1];
     246           4 :             move32();
     247           4 :             a[j1] = yr;
     248           4 :             move32();
     249           4 :             a[j1 + 1] = yi;
     250           4 :             move32();
     251           4 :             a[k1] = xr;
     252           4 :             move32();
     253           4 :             a[k1 + 1] = xi;
     254           4 :             move32();
     255             :         }
     256             :     }
     257             :     ELSE
     258             :     {
     259      111716 :         FOR( k = 1; k < m; k++ )
     260             :         {
     261      277695 :             FOR( j = 0; j < k; j++ )
     262             :             {
     263      193110 :                 j1 = add( shl( j, 1 ), ip[k] );
     264      193110 :                 k1 = add( shl( k, 1 ), ip[j] );
     265      193110 :                 xr = a[j1];
     266      193110 :                 move32();
     267      193110 :                 xi = a[j1 + 1];
     268      193110 :                 move32();
     269      193110 :                 yr = a[k1];
     270      193110 :                 move32();
     271      193110 :                 yi = a[k1 + 1];
     272      193110 :                 move32();
     273      193110 :                 a[j1] = yr;
     274      193110 :                 move32();
     275      193110 :                 a[j1 + 1] = yi;
     276      193110 :                 move32();
     277      193110 :                 a[k1] = xr;
     278      193110 :                 move32();
     279      193110 :                 a[k1 + 1] = xi;
     280      193110 :                 move32();
     281      193110 :                 j1 = add( j1, m2 );
     282      193110 :                 k1 = add( k1, m2 );
     283      193110 :                 xr = a[j1];
     284      193110 :                 move32();
     285      193110 :                 xi = a[j1 + 1];
     286      193110 :                 move32();
     287      193110 :                 yr = a[k1];
     288      193110 :                 move32();
     289      193110 :                 yi = a[k1 + 1];
     290      193110 :                 move32();
     291      193110 :                 a[j1] = yr;
     292      193110 :                 move32();
     293      193110 :                 a[j1 + 1] = yi;
     294      193110 :                 move32();
     295      193110 :                 a[k1] = xr;
     296      193110 :                 move32();
     297      193110 :                 a[k1 + 1] = xi;
     298      193110 :                 move32();
     299             :             }
     300             :         }
     301             :     }
     302             : 
     303       27132 :     return;
     304             : }
     305             : 
     306             : /*-----------------------------------------------------------------*
     307             :  * cftfsub_fx()
     308             :  * Complex Discrete Fourier Transform
     309             :  *-----------------------------------------------------------------*/
     310       27132 : static void cftfsub_fx(
     311             :     Word16 n,       /* i    : data length of real and imag                        */
     312             :     Word32 *a,      /* i/o  : input/output data                     Q(q)*/
     313             :     const Word16 *w /* i    : cos/sin table                          Q14*/
     314             : )
     315             : {
     316             :     Word16 j, j1, j2, j3, l;
     317             :     Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
     318             : 
     319       27132 :     l = 2;
     320       27132 :     move16();
     321             : 
     322       27132 :     IF( n > 8 )
     323             :     {
     324       27132 :         cft1st_fx( n, a, w );
     325       27132 :         l = 8;
     326       27132 :         move16();
     327       54796 :         WHILE( ( ( l << 2 ) < n ) )
     328             :         {
     329       27664 :             cftmdl_fx( n, l, a, w );
     330       27664 :             l = shl( l, 2 );
     331             :         }
     332             :     }
     333       27132 :     IF( shl( l, 2 ) == n )
     334             :     {
     335          17 :         FOR( j = 0; j < l; j += 2 )
     336             :         {
     337          16 :             j1 = add( j, l );
     338          16 :             j2 = add( j1, l );
     339          16 :             j3 = add( j2, l );
     340          16 :             x0r = L_add( a[j], a[j1] );
     341          16 :             x0i = L_add( a[j + 1], a[j1 + 1] );
     342          16 :             x1r = L_sub( a[j], a[j1] );
     343          16 :             x1i = L_sub( a[j + 1], a[j1 + 1] );
     344          16 :             x2r = L_add( a[j2], a[j3] );
     345          16 :             x2i = L_add( a[j2 + 1], a[j3 + 1] );
     346          16 :             x3r = L_sub( a[j2], a[j3] );
     347          16 :             x3i = L_sub( a[j2 + 1], a[j3 + 1] );
     348          16 :             a[j] = L_add( x0r, x2r );
     349          16 :             move32();
     350          16 :             a[j2] = L_sub( x0r, x2r );
     351          16 :             move32();
     352          16 :             a[j + 1] = L_add( x0i, x2i );
     353          16 :             move32();
     354          16 :             a[j2 + 1] = L_sub( x0i, x2i );
     355          16 :             move32();
     356          16 :             a[j1] = L_sub( x1r, x3i );
     357          16 :             move32();
     358          16 :             a[j1 + 1] = L_add( x1i, x3r );
     359          16 :             move32();
     360          16 :             a[j3] = L_add( x1r, x3i );
     361          16 :             move32();
     362          16 :             a[j3 + 1] = L_sub( x1i, x3r );
     363          16 :             move32();
     364             :         }
     365             :     }
     366             :     ELSE
     367             :     {
     368      525067 :         FOR( j = 0; j < l; j += 2 )
     369             :         {
     370      497936 :             j1 = add( j, l );
     371      497936 :             x0r = L_sub( a[j], a[j1] );
     372      497936 :             x0i = L_sub( a[j + 1], a[j1 + 1] );
     373      497936 :             a[j] = L_add( a[j], a[j1] );
     374      497936 :             move32();
     375      497936 :             a[j + 1] = L_add( a[j + 1], a[j1 + 1] );
     376      497936 :             move32();
     377      497936 :             a[j1] = x0r;
     378      497936 :             move32();
     379      497936 :             move32();
     380      497936 :             a[j1 + 1] = x0i;
     381      497936 :             move32();
     382      497936 :             move32();
     383             :         }
     384             :     }
     385             : 
     386       27132 :     return;
     387             : }
     388             : 
     389             : /*-----------------------------------------------------------------*
     390             :  * cft1st_fx()
     391             :  * Subfunction of Complex Discrete Fourier Transform
     392             :  *-----------------------------------------------------------------*/
     393       27132 : static void cft1st_fx(
     394             :     Word16 n,       /* i    : data length of real and imag              */
     395             :     Word32 *a,      /* i/o  : input/output data                     Q(q)*/
     396             :     const Word16 *w /* i    : cos/sin table                          Q14*/
     397             : )
     398             : {
     399             :     Word16 j, k1, k2;
     400             :     Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
     401             :     Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
     402             : 
     403       27132 :     x0r = L_add( a[0], a[2] );
     404       27132 :     x0i = L_add( a[1], a[3] );
     405       27132 :     x1r = L_sub( a[0], a[2] );
     406       27132 :     x1i = L_sub( a[1], a[3] );
     407       27132 :     x2r = L_add( a[4], a[6] );
     408       27132 :     x2i = L_add( a[5], a[7] );
     409       27132 :     x3r = L_sub( a[4], a[6] );
     410       27132 :     x3i = L_sub( a[5], a[7] );
     411       27132 :     a[0] = L_add( x0r, x2r );
     412       27132 :     move32();
     413       27132 :     a[1] = L_add( x0i, x2i );
     414       27132 :     move32();
     415       27132 :     a[4] = L_sub( x0r, x2r );
     416       27132 :     move32();
     417       27132 :     a[5] = L_sub( x0i, x2i );
     418       27132 :     move32();
     419       27132 :     a[2] = L_sub( x1r, x3i );
     420       27132 :     move32();
     421       27132 :     a[3] = L_add( x1i, x3r );
     422       27132 :     move32();
     423       27132 :     a[6] = L_add( x1r, x3i );
     424       27132 :     move32();
     425       27132 :     a[7] = L_sub( x1i, x3r );
     426       27132 :     move32();
     427             : 
     428       27132 :     wk1r = w[2];
     429       27132 :     move16();
     430       27132 :     x0r = L_add( a[8], a[10] );
     431       27132 :     x0i = L_add( a[9], a[11] );
     432       27132 :     x1r = L_sub( a[8], a[10] );
     433       27132 :     x1i = L_sub( a[9], a[11] );
     434       27132 :     x2r = L_add( a[12], a[14] );
     435       27132 :     x2i = L_add( a[13], a[15] );
     436       27132 :     x3r = L_sub( a[12], a[14] );
     437       27132 :     x3i = L_sub( a[13], a[15] );
     438       27132 :     a[8] = L_add( x0r, x2r );
     439       27132 :     move32();
     440       27132 :     a[9] = L_add( x0i, x2i );
     441       27132 :     move32();
     442       27132 :     a[12] = L_sub( x2i, x0i );
     443       27132 :     move32();
     444       27132 :     a[13] = L_sub( x0r, x2r );
     445       27132 :     move32();
     446             : 
     447       27132 :     x0r = L_sub( x1r, x3i );
     448       27132 :     x0i = L_add( x1i, x3r );
     449       27132 :     a[10] = Mult_32_16( L_shl( L_sub( x0r, x0i ), 1 ), wk1r );
     450       27132 :     move32();
     451       27132 :     a[11] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
     452       27132 :     move32();
     453       27132 :     x0r = L_add( x3i, x1r );
     454       27132 :     x0i = L_sub( x3r, x1i );
     455       27132 :     a[14] = Mult_32_16( L_shl( L_sub( x0i, x0r ), 1 ), wk1r );
     456       27132 :     move32();
     457       27132 :     a[15] = Mult_32_16( L_shl( L_add( x0i, x0r ), 1 ), wk1r );
     458       27132 :     move32();
     459             : 
     460       27132 :     k1 = 0;
     461       27132 :     move16();
     462      124492 :     FOR( j = 16; j < n; j += 16 )
     463             :     {
     464       97360 :         k1 = add( k1, 2 );
     465       97360 :         k2 = shl( k1, 1 );
     466       97360 :         wk2r = w[k1];
     467       97360 :         move16();
     468       97360 :         wk2i = w[k1 + 1];
     469       97360 :         move16();
     470       97360 :         wk1r = w[k2];
     471       97360 :         move16();
     472       97360 :         wk1i = w[k2 + 1];
     473       97360 :         move16();
     474       97360 :         wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2i, wk1i ), 14 ) ) );
     475       97360 :         wk3i = extract_l( L_msu0( L_shr( L_mult( wk2i, wk1r ), 14 ), wk1i, 1 ) );
     476       97360 :         x0r = L_add( a[j], a[j + 2] );
     477       97360 :         x0i = L_add( a[j + 1], a[j + 3] );
     478       97360 :         x1r = L_sub( a[j], a[j + 2] );
     479       97360 :         x1i = L_sub( a[j + 1], a[j + 3] );
     480       97360 :         x2r = L_add( a[j + 4], a[j + 6] );
     481       97360 :         x2i = L_add( a[j + 5], a[j + 7] );
     482       97360 :         x3r = L_sub( a[j + 4], a[j + 6] );
     483       97360 :         x3i = L_sub( a[j + 5], a[j + 7] );
     484       97360 :         a[j] = L_add( x0r, x2r );
     485       97360 :         move32();
     486       97360 :         a[j + 1] = L_add( x0i, x2i );
     487       97360 :         move32();
     488       97360 :         x0r = L_sub( x0r, x2r );
     489       97360 :         x0i = L_sub( x0i, x2i );
     490       97360 :         a[j + 4] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
     491       97360 :         move32();
     492       97360 :         a[j + 5] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk2r ), Mult_32_16( L_shl( x0r, 1 ), wk2i ) );
     493       97360 :         move32();
     494       97360 :         x0r = L_sub( x1r, x3i );
     495       97360 :         x0i = L_add( x1i, x3r );
     496       97360 :         a[j + 2] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
     497       97360 :         move32();
     498       97360 :         a[j + 3] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
     499       97360 :         move32();
     500       97360 :         x0r = L_add( x1r, x3i );
     501       97360 :         x0i = L_sub( x1i, x3r );
     502       97360 :         a[j + 6] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
     503       97360 :         move32();
     504       97360 :         a[j + 7] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
     505       97360 :         move32();
     506             : 
     507       97360 :         wk1r = w[k2 + 2];
     508       97360 :         move16();
     509       97360 :         wk1i = w[k2 + 3];
     510       97360 :         move16();
     511       97360 :         wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2r, wk1i ), 14 ) ) );
     512       97360 :         wk3i = extract_l( L_msu0( L_shr( L_mult( wk2r, wk1r ), 14 ), wk1i, 1 ) );
     513       97360 :         x0r = L_add( a[j + 8], a[j + 10] );
     514       97360 :         x0i = L_add( a[j + 9], a[j + 11] );
     515       97360 :         x1r = L_sub( a[j + 8], a[j + 10] );
     516       97360 :         x1i = L_sub( a[j + 9], a[j + 11] );
     517       97360 :         x2r = L_add( a[j + 12], a[j + 14] );
     518       97360 :         x2i = L_add( a[j + 13], a[j + 15] );
     519       97360 :         x3r = L_sub( a[j + 12], a[j + 14] );
     520       97360 :         x3i = L_sub( a[j + 13], a[j + 15] );
     521       97360 :         a[j + 8] = L_add( x0r, x2r );
     522       97360 :         move32();
     523       97360 :         a[j + 9] = L_add( x0i, x2i );
     524       97360 :         move32();
     525       97360 :         x0r = L_sub( x0r, x2r );
     526       97360 :         x0i = L_sub( x0i, x2i );
     527       97360 :         a[j + 12] = L_negate( L_add( Mult_32_16( L_shl( x0r, 1 ), wk2i ), Mult_32_16( L_shl( x0i, 1 ), wk2r ) ) );
     528       97360 :         move32();
     529       97360 :         a[j + 13] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
     530       97360 :         move32();
     531       97360 :         x0r = L_sub( x1r, x3i );
     532       97360 :         x0i = L_add( x1i, x3r );
     533       97360 :         a[j + 10] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
     534       97360 :         move32();
     535       97360 :         a[j + 11] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
     536       97360 :         move32();
     537       97360 :         x0r = L_add( x1r, x3i );
     538       97360 :         x0i = L_sub( x1i, x3r );
     539       97360 :         a[j + 14] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
     540       97360 :         move32();
     541       97360 :         a[j + 15] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
     542       97360 :         move32();
     543             :     }
     544             : 
     545       27132 :     return;
     546             : }
     547             : 
     548             : /*-----------------------------------------------------------------*
     549             :  * cftmdl_fx()
     550             :  * Subfunction of Complex Discrete Fourier Transform
     551             :  *-----------------------------------------------------------------*/
     552       27664 : static void cftmdl_fx(
     553             :     Word16 n,       /* i    : data length of real and imag                         */
     554             :     Word16 l,       /* i    : initial shift for processing                         */
     555             :     Word32 *a,      /* i/o  : input/output data              Q(Qx+Q_edct)*/
     556             :     const Word16 *w /* i    : cos/sin table                                                     Q30*/
     557             : )
     558             : {
     559             :     Word16 j, j1, j2, j3, k, k1, k2, m, m2;
     560             :     Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
     561             :     Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
     562             :     Word16 tmp;
     563             : 
     564       27664 :     m = shl( l, 2 );
     565      157472 :     FOR( j = 0; j < l; j += 2 )
     566             :     {
     567      129808 :         j1 = add( j, l );
     568      129808 :         j2 = add( j1, l );
     569      129808 :         j3 = add( j2, l );
     570      129808 :         x0r = L_add( a[j], a[j1] );
     571      129808 :         x0i = L_add( a[j + 1], a[j1 + 1] );
     572      129808 :         x1r = L_sub( a[j], a[j1] );
     573      129808 :         x1i = L_sub( a[j + 1], a[j1 + 1] );
     574      129808 :         x2r = L_add( a[j2], a[j3] );
     575      129808 :         x2i = L_add( a[j2 + 1], a[j3 + 1] );
     576      129808 :         x3r = L_sub( a[j2], a[j3] );
     577      129808 :         x3i = L_sub( a[j2 + 1], a[j3 + 1] );
     578      129808 :         a[j] = L_add( x0r, x2r );
     579      129808 :         move32();
     580      129808 :         a[j + 1] = L_add( x0i, x2i );
     581      129808 :         move32();
     582      129808 :         a[j2] = L_sub( x0r, x2r );
     583      129808 :         move32();
     584      129808 :         a[j2 + 1] = L_sub( x0i, x2i );
     585      129808 :         move32();
     586      129808 :         a[j1] = L_sub( x1r, x3i );
     587      129808 :         move32();
     588      129808 :         a[j1 + 1] = L_add( x1i, x3r );
     589      129808 :         move32();
     590      129808 :         a[j3] = L_add( x1r, x3i );
     591      129808 :         move32();
     592      129808 :         a[j3 + 1] = L_sub( x1i, x3r );
     593      129808 :         move32();
     594             :     }
     595             : 
     596       27664 :     wk1r = w[2];
     597       27664 :     move16();
     598       27664 :     tmp = add( l, m );
     599      157472 :     FOR( j = m; j < tmp; j += 2 )
     600             :     {
     601      129808 :         j1 = add( j, l );
     602      129808 :         j2 = add( j1, l );
     603      129808 :         j3 = add( j2, l );
     604      129808 :         x0r = L_add( a[j], a[j1] );
     605      129808 :         x0i = L_add( a[j + 1], a[j1 + 1] );
     606      129808 :         x1r = L_sub( a[j], a[j1] );
     607      129808 :         x1i = L_sub( a[j + 1], a[j1 + 1] );
     608      129808 :         x2r = L_add( a[j2], a[j3] );
     609      129808 :         x2i = L_add( a[j2 + 1], a[j3 + 1] );
     610      129808 :         x3r = L_sub( a[j2], a[j3] );
     611      129808 :         x3i = L_sub( a[j2 + 1], a[j3 + 1] );
     612      129808 :         a[j] = L_add( x0r, x2r );
     613      129808 :         move32();
     614      129808 :         a[j + 1] = L_add( x0i, x2i );
     615      129808 :         move32();
     616      129808 :         a[j2] = L_sub( x2i, x0i );
     617      129808 :         move32();
     618      129808 :         a[j2 + 1] = L_sub( x0r, x2r );
     619      129808 :         move32();
     620      129808 :         x0r = L_sub( x1r, x3i );
     621      129808 :         x0i = L_add( x1i, x3r );
     622      129808 :         a[j1] = Mult_32_16( L_shl( L_sub( x0r, x0i ), 1 ), wk1r );
     623      129808 :         move32();
     624      129808 :         a[j1 + 1] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
     625      129808 :         move32();
     626      129808 :         x0r = L_add( x3i, x1r );
     627      129808 :         x0i = L_sub( x3r, x1i );
     628      129808 :         a[j3] = Mult_32_16( L_shl( L_sub( x0i, x0r ), 1 ), wk1r );
     629      129808 :         move32();
     630      129808 :         a[j3 + 1] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
     631      129808 :         move32();
     632             :     }
     633             : 
     634       27664 :     k1 = 0;
     635       27664 :     move16();
     636       27664 :     m2 = shl( m, 1 );
     637       32453 :     FOR( k = m2; k < n; k += m2 )
     638             :     {
     639        4789 :         k1 = add( k1, 2 );
     640        4789 :         k2 = shl( k1, 1 );
     641        4789 :         wk2r = w[k1];
     642        4789 :         move16();
     643        4789 :         wk2i = w[k1 + 1];
     644        4789 :         move16();
     645        4789 :         wk1r = w[k2];
     646        4789 :         move16();
     647        4789 :         wk1i = w[k2 + 1];
     648        4789 :         move16();
     649        4789 :         wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2i, wk1i ), 14 ) ) );
     650        4789 :         wk3i = extract_l( L_msu0( L_shr( L_mult( wk2i, wk1r ), 14 ), wk1i, 1 ) );
     651             : 
     652        4789 :         tmp = add( l, k );
     653       33521 :         FOR( j = k; j < tmp; j += 2 )
     654             :         {
     655       28732 :             j1 = add( j, l );
     656       28732 :             j2 = add( j1, l );
     657       28732 :             j3 = add( j2, l );
     658       28732 :             x0r = L_add( a[j], a[j1] );
     659       28732 :             x0i = L_add( a[j + 1], a[j1 + 1] );
     660       28732 :             x1r = L_sub( a[j], a[j1] );
     661       28732 :             x1i = L_sub( a[j + 1], a[j1 + 1] );
     662       28732 :             x2r = L_add( a[j2], a[j3] );
     663       28732 :             x2i = L_add( a[j2 + 1], a[j3 + 1] );
     664       28732 :             x3r = L_sub( a[j2], a[j3] );
     665       28732 :             x3i = L_sub( a[j2 + 1], a[j3 + 1] );
     666       28732 :             a[j] = L_add( x0r, x2r );
     667       28732 :             move32();
     668       28732 :             a[j + 1] = L_add( x0i, x2i );
     669       28732 :             move32();
     670       28732 :             x0r = L_sub( x0r, x2r );
     671       28732 :             x0i = L_sub( x0i, x2i );
     672       28732 :             a[j2] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
     673       28732 :             move32();
     674       28732 :             a[j2 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk2r ), Mult_32_16( L_shl( x0r, 1 ), wk2i ) );
     675       28732 :             move32();
     676       28732 :             x0r = L_sub( x1r, x3i );
     677       28732 :             x0i = L_add( x1i, x3r );
     678       28732 :             a[j1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
     679       28732 :             move32();
     680       28732 :             a[j1 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
     681       28732 :             move32();
     682       28732 :             x0r = L_add( x1r, x3i );
     683       28732 :             x0i = L_sub( x1i, x3r );
     684       28732 :             a[j3] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
     685       28732 :             move32();
     686       28732 :             a[j3 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
     687       28732 :             move32();
     688             :         }
     689             : 
     690        4789 :         wk1r = w[k2 + 2];
     691        4789 :         move16();
     692        4789 :         wk1i = w[k2 + 3];
     693        4789 :         move16();
     694        4789 :         wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2r, wk1i ), 14 ) ) );
     695        4789 :         wk3i = extract_l( L_msu0( L_shr( L_mult( wk2r, wk1r ), 14 ), wk1i, 1 ) );
     696             : 
     697        4789 :         tmp = add( l, add( k, m ) );
     698       33521 :         FOR( j = k + m; j < tmp; j += 2 )
     699             :         {
     700       28732 :             j1 = add( j, l );
     701       28732 :             j2 = add( j1, l );
     702       28732 :             j3 = add( j2, l );
     703       28732 :             x0r = L_add( a[j], a[j1] );
     704       28732 :             x0i = L_add( a[j + 1], a[j1 + 1] );
     705       28732 :             x1r = L_sub( a[j], a[j1] );
     706       28732 :             x1i = L_sub( a[j + 1], a[j1 + 1] );
     707       28732 :             x2r = L_add( a[j2], a[j3] );
     708       28732 :             x2i = L_add( a[j2 + 1], a[j3 + 1] );
     709       28732 :             x3r = L_sub( a[j2], a[j3] );
     710       28732 :             x3i = L_sub( a[j2 + 1], a[j3 + 1] );
     711       28732 :             a[j] = L_add( x0r, x2r );
     712       28732 :             move32();
     713       28732 :             a[j + 1] = L_add( x0i, x2i );
     714       28732 :             move32();
     715       28732 :             x0r = L_sub( x0r, x2r );
     716       28732 :             x0i = L_sub( x0i, x2i );
     717       28732 :             a[j2] = L_negate( L_add( Mult_32_16( L_shl( x0r, 1 ), wk2i ), Mult_32_16( L_shl( x0i, 1 ), wk2r ) ) );
     718       28732 :             move32();
     719       28732 :             a[j2 + 1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
     720       28732 :             move32();
     721       28732 :             x0r = L_sub( x1r, x3i );
     722       28732 :             x0i = L_add( x1i, x3r );
     723       28732 :             a[j1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
     724       28732 :             move32();
     725       28732 :             a[j1 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
     726       28732 :             move32();
     727       28732 :             x0r = L_add( x1r, x3i );
     728       28732 :             x0i = L_sub( x1i, x3r );
     729       28732 :             a[j3] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
     730       28732 :             move32();
     731       28732 :             a[j3 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
     732       28732 :             move32();
     733             :         }
     734             :     }
     735             : 
     736       27664 :     return;
     737             : }
     738             : 
     739             : 
     740           0 : static void cftbsub_fx(
     741             :     Word16 n,
     742             :     Word32 *a,      // Q(Qx+Q_edct)
     743             :     const Word16 *w /* i    : cos/sin table Q14                */
     744             : )
     745             : {
     746             :     Word16 j, j1, j2, j3, l;
     747             :     Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
     748             : 
     749           0 :     l = 2;
     750           0 :     move16();
     751           0 :     IF( GT_16( n, 8 ) )
     752             :     {
     753           0 :         cft1st_fx( n, a, w );
     754           0 :         l = 8;
     755           0 :         move16();
     756             : 
     757           0 :         WHILE( ( ( l << 2 ) < n ) )
     758             :         {
     759           0 :             cftmdl_fx( n, l, a, w );
     760           0 :             l = shl( l, 2 );
     761             :         }
     762             :     }
     763             : 
     764           0 :     IF( EQ_16( shl( l, 2 ), n ) )
     765             :     {
     766           0 :         FOR( j = 0; j < l; j += 2 )
     767             :         {
     768           0 :             j1 = add( j, l );
     769           0 :             j2 = add( j1, l );
     770           0 :             j3 = add( j2, l );
     771           0 :             x0r = L_add( a[j], a[j1] );
     772           0 :             x0i = L_negate( L_add( a[j + 1], a[j1 + 1] ) );
     773           0 :             x1r = L_sub( a[j], a[j1] );
     774           0 :             x1i = L_sub( a[j1 + 1], a[j + 1] );
     775           0 :             x2r = L_add( a[j2], a[j3] );
     776           0 :             x2i = L_add( a[j2 + 1], a[j3 + 1] );
     777           0 :             x3r = L_sub( a[j2], a[j3] );
     778           0 :             x3i = L_sub( a[j2 + 1], a[j3 + 1] );
     779           0 :             a[j] = L_add( x0r, x2r );
     780           0 :             move32();
     781           0 :             a[j + 1] = L_sub( x0i, x2i );
     782           0 :             move32();
     783           0 :             a[j2] = L_sub( x0r, x2r );
     784           0 :             move32();
     785           0 :             a[j2 + 1] = L_add( x0i, x2i );
     786           0 :             move32();
     787           0 :             a[j1] = L_sub( x1r, x3i );
     788           0 :             move32();
     789           0 :             a[j1 + 1] = L_sub( x1i, x3r );
     790           0 :             move32();
     791           0 :             a[j3] = L_add( x1r, x3i );
     792           0 :             move32();
     793           0 :             a[j3 + 1] = L_add( x1i, x3r );
     794           0 :             move32();
     795             :         }
     796             :     }
     797             :     ELSE
     798             :     {
     799           0 :         FOR( j = 0; j < l; j += 2 )
     800             :         {
     801           0 :             j1 = add( j, l );
     802           0 :             x0r = L_sub( a[j], a[j1] );
     803           0 :             x0i = L_sub( a[j1 + 1], a[j + 1] );
     804           0 :             a[j] = L_add( a[j], a[j1] );
     805           0 :             move32();
     806           0 :             a[j + 1] = L_negate( L_add( a[j + 1], a[j1 + 1] ) );
     807           0 :             move32();
     808           0 :             a[j1] = x0r;
     809           0 :             move32();
     810           0 :             a[j1 + 1] = x0i;
     811           0 :             move32();
     812             :         }
     813             :     }
     814           0 : }
     815             : 
     816       26865 : static void rftfsub_fx(
     817             :     Word16 n,
     818             :     Word32 *a, // Qx
     819             :     Word16 nc,
     820             :     const Word16 *c /*Q14*/ )
     821             : {
     822             :     Word16 j, k, kk, ks, m, tmp;
     823             :     Word32 xr, xi, yr, yi;
     824             :     Word16 wkr, wki;
     825             : 
     826       26865 :     m = shr( n, 1 );
     827             :     /*ks = 2 * nc / m; */
     828       26865 :     tmp = shl( nc, 1 );
     829       26865 :     ks = 0;
     830       26865 :     move16();
     831      134325 :     WHILE( ( tmp >= m ) )
     832             :     {
     833      107460 :         ks = add( ks, 1 );
     834      107460 :         tmp = sub( tmp, m );
     835             :     }
     836       26865 :     kk = 0;
     837       26865 :     move16();
     838      429840 :     FOR( j = 2; j < m; j += 2 )
     839             :     {
     840      402975 :         k = sub( n, j );
     841      402975 :         kk = add( kk, ks );
     842      402975 :         wkr = sub( 8192 /*0.5.Q14*/, c[( nc - kk )] ); // Q14
     843      402975 :         wki = c[kk];                                   // Q14
     844      402975 :         move16();
     845      402975 :         xr = L_sub( a[j], a[k] );                                                           // Qx
     846      402975 :         xi = L_add( a[j + 1], a[k + 1] );                                                   // Qx
     847      402975 :         yr = L_sub( Mult_32_16( L_shl( xr, 1 ), wkr ), Mult_32_16( L_shl( xi, 1 ), wki ) ); // Qx
     848      402975 :         yi = L_add( Mult_32_16( L_shl( xi, 1 ), wkr ), Mult_32_16( L_shl( xr, 1 ), wki ) ); // Qx
     849      402975 :         a[j] = L_sub( a[j], yr );
     850      402975 :         move32();
     851      402975 :         a[j + 1] = L_sub( a[j + 1], yi );
     852      402975 :         move32();
     853      402975 :         a[k] = L_add( a[k], yr );
     854      402975 :         move32();
     855      402975 :         a[k + 1] = L_sub( a[k + 1], yi );
     856      402975 :         move32();
     857             :     }
     858       26865 : }
     859             : 
     860             : 
     861           0 : static void rftbsub_fx(
     862             :     Word16 n,
     863             :     Word32 *a, // Qx
     864             :     Word16 nc,
     865             :     const Word16 *c /*Q14*/ )
     866             : {
     867             :     Word16 j, k, kk, ks, m, tmp;
     868             :     Word32 xr, xi, yr, yi;
     869             :     Word16 wkr, wki;
     870             : 
     871           0 :     a[1] = L_negate( a[1] );
     872           0 :     m = shr( n, 1 );
     873             :     /*ks = 2 * nc / m; */
     874           0 :     tmp = shl( nc, 1 );
     875           0 :     ks = 0;
     876           0 :     move16();
     877           0 :     WHILE( ( tmp >= m ) )
     878             :     {
     879           0 :         ks = add( ks, 1 );
     880           0 :         tmp = sub( tmp, m );
     881             :     }
     882           0 :     kk = 0;
     883           0 :     move16();
     884           0 :     FOR( j = 2; j < m; j += 2 )
     885             :     {
     886           0 :         k = sub( n, j );
     887           0 :         kk = add( kk, ks );
     888           0 :         wkr = sub( 8192 /*0.5.Q14*/, c[( nc - kk )] ); // Q14
     889           0 :         wki = c[kk];                                   // Q14
     890           0 :         move16();
     891           0 :         xr = L_sub( a[j], a[k] );                                                           // Qx
     892           0 :         xi = L_add( a[j + 1], a[k + 1] );                                                   // Qx
     893           0 :         yr = L_add( Mult_32_16( L_shl( xr, 1 ), wkr ), Mult_32_16( L_shl( xi, 1 ), wki ) ); // Qx
     894           0 :         yi = L_sub( Mult_32_16( L_shl( xi, 1 ), wkr ), Mult_32_16( L_shl( xr, 1 ), wki ) ); // Qx
     895           0 :         a[j] = L_sub( a[j], yr );
     896           0 :         move32();
     897           0 :         a[j + 1] = L_sub( yi, a[j + 1] );
     898           0 :         move32();
     899           0 :         a[k] = L_add( a[k], yr );
     900           0 :         move32();
     901           0 :         a[k + 1] = L_sub( yi, a[k + 1] );
     902           0 :         move32();
     903             :     }
     904           0 :     a[m + 1] = L_negate( a[m + 1] );
     905           0 :     move32();
     906           0 : }
     907             : 
     908             : 
     909       26865 : static void dctsub_fx(
     910             :     Word16 n,
     911             :     Word32 *a, // Qx
     912             :     Word16 nc,
     913             :     const Word16 *c /*Q14*/ )
     914             : {
     915             :     Word16 j, k, kk, ks, m, tmp;
     916             :     Word16 wkr, wki;
     917             :     Word32 xr;
     918             : 
     919       26865 :     m = shr( n, 1 );
     920             :     /*ks = nc / n; */
     921       26865 :     tmp = nc;
     922       26865 :     move16();
     923       26865 :     ks = 0;
     924       26865 :     move16();
     925       53730 :     WHILE( ( tmp >= n ) )
     926             :     {
     927       26865 :         ks = add( ks, 1 );
     928       26865 :         tmp = sub( tmp, n );
     929             :     }
     930       26865 :     kk = 0;
     931       26865 :     move16();
     932      859680 :     FOR( j = 1; j < m; j++ )
     933             :     {
     934      832815 :         k = sub( n, j );
     935      832815 :         kk = add( kk, ks );
     936      832815 :         wkr = sub( c[kk], c[( nc - kk )] );                                                       // Q14
     937      832815 :         wki = add( c[kk], c[( nc - kk )] );                                                       // Q14
     938      832815 :         xr = L_sub( Mult_32_16( L_shl( a[j], 1 ), wki ), Mult_32_16( L_shl( a[k], 1 ), wkr ) );   // Qx
     939      832815 :         a[j] = L_add( Mult_32_16( L_shl( a[j], 1 ), wkr ), Mult_32_16( L_shl( a[k], 1 ), wki ) ); // Qx
     940      832815 :         move32();
     941      832815 :         a[k] = xr;
     942      832815 :         move32();
     943             :     }
     944       26865 :     a[m] = Mult_32_16( L_shl( a[m], 1 ), c[0] ); // Qx
     945       26865 :     move16();
     946       26865 : }
     947             : 
     948             : /*-----------------------------------------------------------------*
     949             :  * edct2_fx()
     950             :  *
     951             :  * Transformation of the signal to DCT domain
     952             :  * OR Inverse EDCT-II for short frames
     953             :  *-----------------------------------------------------------------*/
     954             : 
     955       26865 : void edct2_fx(
     956             :     Word16 n,
     957             :     Word16 isgn,
     958             :     Word16 *in, // Q(q)
     959             :     Word32 *a,  // Qx
     960             :     Word16 *q,
     961             :     const Word16 *ip,
     962             :     const Word16 *w /*Q14*/ )
     963             : {
     964             :     Word16 j, nw, nc;
     965             :     Word32 xr;
     966             : 
     967       26865 :     *q = Exp16Array( n, in );
     968       26865 :     move16();
     969       26865 :     *q = add( *q, 6 );
     970       26865 :     move16();
     971     1746225 :     FOR( j = 0; j < n; j++ )
     972             :     {
     973     1719360 :         a[j] = L_shl( (Word32) in[j], *q );
     974     1719360 :         move32();
     975             :     }
     976             : 
     977       26865 :     nw = ip[0];
     978       26865 :     move16();
     979       26865 :     if ( GT_16( n, shl( nw, 2 ) ) )
     980             :     {
     981           0 :         nw = shr( n, 2 );
     982             :     }
     983             : 
     984       26865 :     nc = ip[1];
     985       26865 :     move16();
     986       26865 :     if ( GT_16( n, nc ) )
     987             :     {
     988           0 :         nc = n;
     989           0 :         move16();
     990             :     }
     991             : 
     992       26865 :     IF( isgn < 0 )
     993             :     {
     994           0 :         xr = a[n - 1];
     995           0 :         move32();
     996           0 :         FOR( j = n - 2; j >= 2; j -= 2 )
     997             :         {
     998           0 :             a[j + 1] = L_sub( a[j], a[j - 1] );
     999           0 :             move32();
    1000           0 :             a[j] = L_add( a[j], a[j - 1] );
    1001           0 :             move32();
    1002             :         }
    1003           0 :         a[1] = L_sub( a[0], xr );
    1004           0 :         move32();
    1005           0 :         a[0] = L_add( a[0], xr );
    1006           0 :         move32();
    1007             : 
    1008           0 :         IF( GT_16( n, 4 ) )
    1009             :         {
    1010           0 :             rftbsub_fx( n, a, nc, w + nw );
    1011           0 :             bitrv2_SR_fx( n, ip + 2, a );
    1012           0 :             cftbsub_fx( n, a, w );
    1013             :         }
    1014           0 :         ELSE IF( EQ_16( n, 4 ) )
    1015             :         {
    1016           0 :             cftfsub_fx( n, a, w );
    1017             :         }
    1018             :     }
    1019             : 
    1020       26865 :     IF( isgn >= 0 )
    1021             :     {
    1022       26865 :         a[0] = L_shr( a[0], 1 );
    1023       26865 :         move32();
    1024             :     }
    1025             : 
    1026       26865 :     dctsub_fx( n, a, nc, w + nw );
    1027             : 
    1028       26865 :     IF( isgn >= 0 )
    1029             :     {
    1030       26865 :         IF( GT_16( n, 4 ) )
    1031             :         {
    1032       26865 :             bitrv2_SR_fx( n, ip + 2, a );
    1033       26865 :             cftfsub_fx( n, a, w );
    1034       26865 :             rftfsub_fx( n, a, nc, w + nw );
    1035             :         }
    1036           0 :         ELSE IF( EQ_16( n, 4 ) )
    1037             :         {
    1038           0 :             cftfsub_fx( n, a, w );
    1039             :         }
    1040       26865 :         xr = L_sub( a[0], a[1] );
    1041       26865 :         a[0] = L_add( a[0], a[1] );
    1042       26865 :         move32();
    1043      859680 :         FOR( j = 2; j < n; j += 2 )
    1044             :         {
    1045      832815 :             a[j - 1] = L_sub( a[j], a[j + 1] );
    1046      832815 :             move32();
    1047      832815 :             a[j] = L_add( a[j], a[j + 1] );
    1048      832815 :             move32();
    1049             :         }
    1050       26865 :         a[n - 1] = xr;
    1051       26865 :         move32();
    1052             : 
    1053     1746225 :         FOR( j = 0; j < n; j++ )
    1054             :         {
    1055     1719360 :             a[j] = L_shr( a[j], 5 ); // a[j] / 32.0f
    1056     1719360 :             move32();
    1057             :         }
    1058             :     }
    1059       26865 : }
    1060             : 
    1061             : 
    1062             : /*-----------------------------------------------------------------*
    1063             :  * fft5_shift4()
    1064             :  * 5-point FFT with 4-point circular shift
    1065             :  *-----------------------------------------------------------------*/
    1066             : 
    1067     1193856 : static void fft5_shift4_16fx(
    1068             :     Word16 n1,        /* i   : length of data                           */
    1069             :     Word16 *zRe,      /* i/o : real part of input and output data Q(Qx+Q_edct)      */
    1070             :     Word16 *zIm,      /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
    1071             :     const Word16 *Idx /* i   : pointer of the address table Q0            */
    1072             : )
    1073             : {
    1074             :     Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn, T2, T3, T4, T5, T6, T7;
    1075             :     Word16 i0, i1, i2, i3, i4;
    1076             :     Word32 L_tmp;
    1077             : 
    1078             : 
    1079     1193856 :     i0 = Idx[0];
    1080     1193856 :     move16();
    1081     1193856 :     i1 = Idx[n1];
    1082     1193856 :     move16();
    1083     1193856 :     i2 = Idx[n1 * 2];
    1084     1193856 :     move16();
    1085     1193856 :     i3 = Idx[n1 * 3];
    1086     1193856 :     move16();
    1087     1193856 :     i4 = Idx[n1 * 4];
    1088     1193856 :     move16();
    1089             : 
    1090     1193856 :     T1 = zRe[i0]; // Qx
    1091     1193856 :     move16();
    1092     1193856 :     To = zIm[i0]; // Qx
    1093     1193856 :     move16();
    1094             : 
    1095     1193856 :     T2 = zRe[i1];
    1096     1193856 :     move16();
    1097     1193856 :     T3 = zRe[i4];
    1098     1193856 :     move16();
    1099     1193856 :     T4 = add_sat( T2, T3 );
    1100     1193856 :     T5 = zRe[i2];
    1101     1193856 :     move16();
    1102     1193856 :     T6 = zRe[i3];
    1103     1193856 :     move16();
    1104     1193856 :     T7 = add_sat( T5, T6 );
    1105     1193856 :     T8 = add_sat( T4, T7 );
    1106     1193856 :     Tt = sub_sat( T5, T6 );
    1107             :     /*    T9 = KP559016994 * (T4 - T7); */
    1108     1193856 :     L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
    1109     1193856 :     T9 = round_fx_sat( L_tmp );                                // Qx
    1110     1193856 :     Ts = sub_sat( T2, T3 );
    1111             : 
    1112     1193856 :     T2 = zIm[i1];
    1113     1193856 :     move16();
    1114     1193856 :     T3 = zIm[i4];
    1115     1193856 :     move16();
    1116     1193856 :     T4 = add( T2, T3 );
    1117     1193856 :     T5 = zIm[i2];
    1118     1193856 :     move16();
    1119     1193856 :     T6 = zIm[i3];
    1120     1193856 :     move16();
    1121     1193856 :     T7 = add_sat( T5, T6 );
    1122     1193856 :     Te = sub_sat( T2, T3 );
    1123     1193856 :     Tp = add_sat( T4, T7 );
    1124     1193856 :     Th = sub_sat( T5, T6 );
    1125             : 
    1126             :     /*       Tn = KP559016994 * (T4 - T7); */
    1127     1193856 :     L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
    1128     1193856 :     Tn = round_fx_sat( L_tmp );                                // Qx
    1129     1193856 :     zRe[i0] = add_sat( T1, T8 );
    1130     1193856 :     move16();
    1131     1193856 :     zIm[i0] = add_sat( To, Tp );
    1132     1193856 :     move16();
    1133             : 
    1134             :     /*        T2 = KP951056516*Te + KP587785252*Th; */
    1135     1193856 :     L_tmp = Mult_32_16( KP951056516_16FX, Te );        // Q(16 +x)
    1136     1193856 :     L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Th ); // Q(16 +x)
    1137     1193856 :     T2 = round_fx_sat( L_tmp );                        // Qx
    1138             :     /*T3 = KP951056516*Th - KP587785252*Te; */
    1139     1193856 :     L_tmp = Mult_32_16( KP951056516_16FX, Th );        // Q(16 +x)
    1140     1193856 :     L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Te ); // Q(16 +x)
    1141     1193856 :     T3 = round_fx_sat( L_tmp );                        // Qx
    1142     1193856 :     T6 = sub_sat( T1, shr_sat( T8, 2 ) );
    1143     1193856 :     T4 = add_sat( T9, T6 );
    1144     1193856 :     T5 = sub_sat( T6, T9 );
    1145     1193856 :     zRe[i1] = sub_sat( T4, T2 );
    1146     1193856 :     move16();
    1147     1193856 :     zRe[i2] = add_sat( T5, T3 );
    1148     1193856 :     move16();
    1149     1193856 :     zRe[i4] = add_sat( T4, T2 );
    1150     1193856 :     move16();
    1151     1193856 :     zRe[i3] = sub_sat( T5, T3 );
    1152     1193856 :     move16();
    1153             : 
    1154             :     /*    T2 = KP951056516 * Ts + KP587785252 * Tt; */
    1155     1193856 :     L_tmp = Mult_32_16( KP951056516_16FX, Ts );        // Q(16 +x)
    1156     1193856 :     L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Tt ); // Q(16 +x)
    1157     1193856 :     T2 = round_fx_sat( L_tmp );                        // Qx
    1158             :     /*                T3 = KP951056516 * Tt - KP587785252 * Ts; */
    1159     1193856 :     L_tmp = Mult_32_16( KP951056516_16FX, Tt );        // Q(16 +x)
    1160     1193856 :     L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Ts ); // Q(16 +x)
    1161     1193856 :     T3 = round_fx_sat( L_tmp );                        // Qx
    1162     1193856 :     T6 = sub_sat( To, shr( Tp, 2 ) );                  // To - (Tp / 4)
    1163     1193856 :     T4 = add_sat( Tn, T6 );
    1164     1193856 :     T5 = sub_sat( T6, Tn );
    1165     1193856 :     zIm[i4] = sub_sat( T4, T2 );
    1166     1193856 :     move16();
    1167     1193856 :     zIm[i2] = sub_sat( T5, T3 );
    1168     1193856 :     move16();
    1169     1193856 :     zIm[i1] = add_sat( T2, T4 );
    1170     1193856 :     move16();
    1171     1193856 :     zIm[i3] = add_sat( T3, T5 );
    1172     1193856 :     move16();
    1173     1193856 :     return;
    1174             : }
    1175             : 
    1176             : /*-----------------------------------------------------------------*
    1177             :  * fft5_32()
    1178             :  * 5-point FFT called for 32 times
    1179             :  *-----------------------------------------------------------------*/
    1180     2486656 : static void fft5_32_16fx(
    1181             :     Word16 *zRe,      /* i/o : real part of input and output data Qx      */
    1182             :     Word16 *zIm,      /* i/o : imaginary part of input and output data Qx */
    1183             :     const Word16 *Idx /* i   : pointer of the address table Q0            */
    1184             : )
    1185             : {
    1186             :     Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn, T2, T3, T4, T5, T6, T7;
    1187             :     Word16 i0, i1, i2, i3, i4;
    1188             :     Word32 L_tmp;
    1189             : #ifndef ISSUE_1836_replace_overflow_libcom
    1190             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
    1191             :     Flag Overflow = 0;
    1192             :     move32();
    1193             : #endif
    1194             : #endif
    1195     2486656 :     i0 = Idx[0];
    1196     2486656 :     move16();
    1197     2486656 :     i1 = Idx[32];
    1198     2486656 :     move16();
    1199     2486656 :     i2 = Idx[64];
    1200     2486656 :     move16();
    1201     2486656 :     i3 = Idx[96];
    1202     2486656 :     move16();
    1203     2486656 :     i4 = Idx[128];
    1204     2486656 :     move16();
    1205             : 
    1206     2486656 :     T1 = zRe[i0]; // Qx
    1207     2486656 :     move16();
    1208     2486656 :     To = zIm[i0]; // Qx
    1209     2486656 :     move16();
    1210             : 
    1211     2486656 :     T2 = zRe[i1]; // Qx
    1212     2486656 :     move16();
    1213     2486656 :     T3 = zRe[i4]; // Qx
    1214     2486656 :     move16();
    1215             : 
    1216     2486656 :     T4 = add_sat( T2, T3 );
    1217     2486656 :     T5 = zRe[i2];
    1218     2486656 :     move16();
    1219     2486656 :     T6 = zRe[i3];
    1220     2486656 :     move16();
    1221             : #ifdef ISSUE_1836_replace_overflow_libcom
    1222     2486656 :     T7 = add_sat( T5, T6 );
    1223     2486656 :     T8 = add_sat( T4, T7 );
    1224     2486656 :     Tt = sub_sat( T5, T6 );
    1225             : #else
    1226             :     T7 = add_o( T5, T6, &Overflow );
    1227             :     T8 = add_o( T4, T7, &Overflow );
    1228             :     Tt = sub_o( T5, T6, &Overflow );
    1229             : #endif
    1230             :     /* T9 = KP559016994 * (T4 - T7); */
    1231     2486656 :     L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
    1232     2486656 :     T9 = round_fx_sat( L_tmp );                                // Qx
    1233     2486656 :     Ts = sub_sat( T2, T3 );
    1234             : 
    1235     2486656 :     T2 = zIm[i1];
    1236     2486656 :     move16();
    1237     2486656 :     T3 = zIm[i4];
    1238     2486656 :     move16();
    1239     2486656 :     T4 = add_sat( T2, T3 );
    1240     2486656 :     T5 = zIm[i2];
    1241     2486656 :     move16();
    1242     2486656 :     T6 = zIm[i3];
    1243     2486656 :     move16();
    1244     2486656 :     T7 = add_sat( T5, T6 );
    1245     2486656 :     Te = sub_sat( T2, T3 );
    1246     2486656 :     Tp = add_sat( T4, T7 );
    1247     2486656 :     Th = sub_sat( T5, T6 );
    1248     2486656 :     L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
    1249     2486656 :     Tn = round_fx_sat( L_tmp );                                // Qx
    1250             : 
    1251             : #ifdef ISSUE_1836_replace_overflow_libcom
    1252     2486656 :     zRe[i0] = add_sat( T1, T8 );
    1253     2486656 :     move16();
    1254     2486656 :     zIm[i0] = add_sat( To, Tp );
    1255     2486656 :     move32();
    1256             : #else
    1257             :     zRe[i0] = add_o( T1, T8, &Overflow );
    1258             :     move16();
    1259             :     zIm[i0] = add_o( To, Tp, &Overflow );
    1260             :     move32();
    1261             : #endif
    1262             : 
    1263             :     /*T2 = KP951056516*Te + KP587785252*Th; */
    1264     2486656 :     L_tmp = Mult_32_16( KP951056516_16FX, Te );        // Q(16 +x)
    1265     2486656 :     L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Th ); // Q(16 +x)
    1266     2486656 :     T2 = round_fx_sat( L_tmp );                        // Qx
    1267             : 
    1268             :     /*T3 = KP951056516*Th - KP587785252*Te; */
    1269     2486656 :     L_tmp = Mult_32_16( KP951056516_16FX, Th );        // Q(16 +x)
    1270     2486656 :     L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Te ); // Q(16 +x)
    1271     2486656 :     T3 = round_fx_sat( L_tmp );                        // Qx
    1272             : 
    1273             : 
    1274     2486656 :     T6 = sub_sat( T1, shr( T8, 2 ) );
    1275     2486656 :     T4 = add_sat( T9, T6 );
    1276     2486656 :     T5 = sub_sat( T6, T9 );
    1277             : 
    1278             : #ifdef ISSUE_1836_replace_overflow_libcom
    1279     2486656 :     zRe[i3] = sub_sat( T4, T2 );
    1280     2486656 :     move32();
    1281     2486656 :     zRe[i1] = add_sat( T5, T3 );
    1282     2486656 :     move32();
    1283     2486656 :     zRe[i2] = add_sat( T4, T2 );
    1284     2486656 :     move32();
    1285     2486656 :     zRe[i4] = sub_sat( T5, T3 );
    1286     2486656 :     move32();
    1287             : #else
    1288             :     zRe[i3] = sub_o( T4, T2, &Overflow );
    1289             :     move32();
    1290             :     zRe[i1] = add_o( T5, T3, &Overflow );
    1291             :     move32();
    1292             :     zRe[i2] = add_o( T4, T2, &Overflow );
    1293             :     move32();
    1294             :     zRe[i4] = sub_o( T5, T3, &Overflow );
    1295             :     move32();
    1296             : #endif
    1297             : 
    1298             :     /*    T2 = KP951056516 * Ts + KP587785252 * Tt; */
    1299     2486656 :     L_tmp = Mult_32_16( KP951056516_16FX, Ts );        // Q(16 +x)
    1300     2486656 :     L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Tt ); // Q(16 +x)
    1301     2486656 :     T2 = round_fx_sat( L_tmp );                        // Qx
    1302             : 
    1303             :     /*                T3 = KP951056516 * Tt - KP587785252 * Ts; */
    1304     2486656 :     L_tmp = Mult_32_16( KP951056516_16FX, Tt );        // Q(16 +x)
    1305     2486656 :     L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Ts ); // Q(16 +x)
    1306             : 
    1307     2486656 :     T3 = round_fx_sat( L_tmp ); // Qx
    1308             : 
    1309     2486656 :     T6 = sub_sat( To, shr( Tp, 2 ) );
    1310     2486656 :     T4 = add_sat( Tn, T6 );
    1311     2486656 :     T5 = sub_sat( T6, Tn );
    1312     2486656 :     zIm[i2] = sub_sat( T4, T2 );
    1313     2486656 :     move16();
    1314     2486656 :     zIm[i1] = sub_sat( T5, T3 );
    1315     2486656 :     move16();
    1316     2486656 :     zIm[i3] = add_sat( T2, T4 );
    1317     2486656 :     move16();
    1318     2486656 :     zIm[i4] = add_sat( T3, T5 );
    1319     2486656 :     move16();
    1320             : 
    1321     2486656 :     return;
    1322             : }
    1323             : 
    1324             : /*-----------------------------------------------------------------*
    1325             :  * fft64()
    1326             :  * 64-point FFT
    1327             :  *-----------------------------------------------------------------*/
    1328       93270 : static void fft64_16fx(
    1329             :     Word16 *x,        /* i/o : real part of input and output data Q(Qx+Q_edct)      */
    1330             :     Word16 *y,        /* i/o : imaginary part of input and output data Q(Qx+Q_edct)  */
    1331             :     const Word16 *Idx /* i   : pointer of the address table Q0            */
    1332             : )
    1333             : {
    1334             :     Word16 i, id, jd;
    1335             :     Word16 z[128];
    1336       93270 :     move16(); /*penalty for 1 ptr init */
    1337     6062550 :     FOR( i = 0; i < 64; i++ )
    1338             :     {
    1339     5969280 :         id = Idx[i];
    1340     5969280 :         move16();
    1341     5969280 :         z[2 * i] = x[id];
    1342     5969280 :         move16();
    1343     5969280 :         z[2 * i + 1] = y[id];
    1344     5969280 :         move16();
    1345             :     }
    1346             : 
    1347       93270 :     cdftForw_16fx( 128, z, Ip_fft64, w_fft128_16fx );
    1348             : 
    1349       93270 :     move16(); /*penalty for 1 ptr init */
    1350     6062550 :     FOR( i = 0; i < 64; i++ )
    1351             :     {
    1352     5969280 :         jd = Odx_fft64[i];
    1353     5969280 :         move16();
    1354     5969280 :         id = Idx[jd];
    1355     5969280 :         move16();
    1356     5969280 :         x[id] = z[2 * i];
    1357     5969280 :         move16();
    1358     5969280 :         y[id] = z[2 * i + 1];
    1359     5969280 :         move16();
    1360             :     }
    1361             : 
    1362       93270 :     return;
    1363             : }
    1364             : 
    1365             : 
    1366             : /*-----------------------------------------------------------------*
    1367             :  * fft32_5()
    1368             :  * 32-point FFT called for 5 times
    1369             :  *-----------------------------------------------------------------*/
    1370      388540 : static void fft32_5_16fx(
    1371             :     Word16 *x,        /* i/o : real part of input and output data Q(Qx+Q_edct) */
    1372             :     Word16 *y,        /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
    1373             :     const Word16 *Idx /* i   : pointer of the address table             */
    1374             : )
    1375             : {
    1376             :     Word16 i, id, jd;
    1377             :     Word16 z[64];
    1378             : 
    1379    12821820 :     FOR( i = 0; i < 32; i++ )
    1380             :     {
    1381    12433280 :         id = Idx[i];
    1382    12433280 :         move16();
    1383    12433280 :         z[2 * i] = x[id];
    1384    12433280 :         move16();
    1385    12433280 :         z[2 * i + 1] = y[id];
    1386    12433280 :         move16();
    1387             :     }
    1388             : 
    1389      388540 :     cdftForw_16fx( 64, z, Ip_fft32, w_fft32_16fx );
    1390             : 
    1391    12821820 :     FOR( i = 0; i < 32; i++ )
    1392             :     {
    1393    12433280 :         jd = Odx_fft32_5[i];
    1394    12433280 :         move16();
    1395    12433280 :         id = Idx[jd];
    1396    12433280 :         move16();
    1397    12433280 :         x[id] = z[2 * i];
    1398    12433280 :         move16();
    1399    12433280 :         y[id] = z[2 * i + 1];
    1400    12433280 :         move16();
    1401             :     }
    1402             : 
    1403      388540 :     return;
    1404             : }
    1405             : 
    1406             : 
    1407             : /*-----------------------------------------------------------------*
    1408             :  * DoRTFT160()
    1409             :  * a low complexity 2-dimensional DFT of 160 points
    1410             :  *-----------------------------------------------------------------*/
    1411       77708 : void DoRTFT160_16fx(
    1412             :     Word16 x[], /* i/o : real part of input and output data Q(Qx+Q_edct)      */
    1413             :     Word16 y[]  /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
    1414             : )
    1415             : {
    1416             :     Word16 j;
    1417             : 
    1418             :     /* Applying 32-point FFT for 5 times based on the address table Idx_dortft160 */
    1419      466248 :     FOR( j = 0; j < 5; j++ )
    1420             :     {
    1421      388540 :         fft32_5_16fx( x, y, Idx_dortft160 + shl( j, 5 ) /*32*j*/ );
    1422             :     }
    1423             : 
    1424             :     /* Applying 5-point FFT for 32 times based on the address table Idx_dortft160 */
    1425     2564364 :     FOR( j = 0; j < 32; j++ )
    1426             :     {
    1427     2486656 :         fft5_32_16fx( x, y, Idx_dortft160 + j );
    1428             :     }
    1429             : 
    1430       77708 :     return;
    1431             : }
    1432             : 
    1433             : /*-----------------------------------------------------------------*
    1434             :  * DoRTFT320()
    1435             :  * a low complexity 2-dimensional DFT of 320 points
    1436             :  *-----------------------------------------------------------------*/
    1437       18654 : void DoRTFT320_16fx(
    1438             :     Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct)      */
    1439             :     Word16 *y  /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
    1440             : )
    1441             : {
    1442             :     Word16 j;
    1443             : 
    1444             :     /* Applying 64-point FFT for 5 times based on the address table Idx_dortft160 */
    1445      111924 :     FOR( j = 0; j < 5; j++ )
    1446             :     {
    1447       93270 :         fft64_16fx( x, y, Idx_dortft320 + shl( j, 6 ) /*64*j*/ );
    1448             :     }
    1449             : 
    1450             :     /* Applying 5-point FFT for 64 times based on the address table Idx_dortft160 */
    1451     1212510 :     FOR( j = 0; j < 64; j++ )
    1452             :     {
    1453     1193856 :         fft5_shift4_16fx( 64, x, y, Idx_dortft320 + j );
    1454             :     }
    1455             : 
    1456       18654 :     return;
    1457             : }
    1458             : 
    1459             : /*-----------------------------------------------------------------*
    1460             :  * DoRTFT128()
    1461             :  * FFT with 128 points
    1462             :  *-----------------------------------------------------------------*/
    1463      121189 : void DoRTFT128_16fx(
    1464             :     Word16 *x, /* i/o : real part of input and output data       Q(Qx+Q_edct)*/
    1465             :     Word16 *y  /* i/o : imaginary part of input and output data  Q(Qx+Q_edct)*/
    1466             : )
    1467             : {
    1468             : 
    1469             :     Word16 i;
    1470             :     Word16 z[256];
    1471             : 
    1472    15633381 :     FOR( i = 0; i < 128; i++ )
    1473             :     {
    1474    15512192 :         z[2 * i] = x[i];
    1475    15512192 :         move16();
    1476    15512192 :         z[2 * i + 1] = y[i];
    1477    15512192 :         move16();
    1478             :     }
    1479             : 
    1480      121189 :     cdftForw_16fx( 256, z, Ip_fft128, w_fft128_16fx );
    1481             : 
    1482      121189 :     x[0] = z[0];
    1483      121189 :     move16();
    1484      121189 :     y[0] = z[1];
    1485      121189 :     move16();
    1486    15512192 :     FOR( i = 1; i < 128; i++ )
    1487             :     {
    1488    15391003 :         x[128 - i] = z[2 * i];
    1489    15391003 :         move16();
    1490    15391003 :         y[128 - i] = z[2 * i + 1];
    1491    15391003 :         move16();
    1492             :     }
    1493             : 
    1494      121189 :     return;
    1495             : }
    1496             : /*-----------------------------------------------------------------*
    1497             :  * cdftForw()
    1498             :  * Main fuction of Complex Discrete Fourier Transform
    1499             :  *-----------------------------------------------------------------*/
    1500      602999 : static void cdftForw_16fx(
    1501             :     Word16 n,         /* i    : data length of real and imag                             */
    1502             :     Word16 *a,        /* i/o  : input/output data             Q(Qx+Q_edct)*/
    1503             :     const Word16 *ip, /* i    : work area for bit reversal                               */
    1504             :     const Word32 *w   /* i    : cos/sin table                                             Q30*/
    1505             : )
    1506             : {
    1507             :     /* bit reversal */
    1508      602999 :     bitrv2_SR_16fx( n, ip + 2, a );
    1509             : 
    1510             :     /* Do FFT */
    1511      602999 :     cftfsub_16fx( n, a, w );
    1512      602999 : }
    1513             : 
    1514             : /*-----------------------------------------------------------------*
    1515             :  * bitrv2_SR()
    1516             :  * Bit reversal
    1517             :  *-----------------------------------------------------------------*/
    1518      602999 : static void bitrv2_SR_16fx(
    1519             :     Word16 n,         /* i    : data length of real and imag                      */
    1520             :     const Word16 *ip, /* i/o  : work area for bit reversal                                */
    1521             :     Word16 *a         /* i/o  : input/output data             Q(Qx+Q_edct)*/
    1522             : )
    1523             : {
    1524             :     Word16 j, j1, k, k1, m, m2;
    1525             :     Word16 l;
    1526             :     Word16 xr, xi, yr, yi;
    1527             : 
    1528      602999 :     l = n;
    1529      602999 :     move16();
    1530      602999 :     m = 1;
    1531      602999 :     move16();
    1532             : 
    1533     1930186 :     WHILE( ( ( m << 3 ) < l ) )
    1534             :     {
    1535     1327187 :         l = shr( l, 1 );
    1536     1327187 :         m = shl( m, 1 );
    1537             :     }
    1538             : 
    1539      602999 :     m2 = shl( m, 1 );
    1540      602999 :     IF( EQ_16( shl( m, 3 ), l ) )
    1541             :     {
    1542      466350 :         FOR( k = 0; k < m; k++ )
    1543             :         {
    1544      932700 :             FOR( j = 0; j < k; j++ )
    1545             :             {
    1546      559620 :                 j1 = add( shl( j, 1 ), ip[k] );
    1547      559620 :                 k1 = add( shl( k, 1 ), ip[j] );
    1548      559620 :                 xr = a[j1];
    1549      559620 :                 move16();
    1550      559620 :                 xi = a[j1 + 1];
    1551      559620 :                 move16();
    1552      559620 :                 yr = a[k1];
    1553      559620 :                 move16();
    1554      559620 :                 yi = a[k1 + 1];
    1555      559620 :                 move16();
    1556      559620 :                 a[j1] = yr;
    1557      559620 :                 move16();
    1558      559620 :                 a[j1 + 1] = yi;
    1559      559620 :                 move16();
    1560      559620 :                 a[k1] = xr;
    1561      559620 :                 move16();
    1562      559620 :                 a[k1 + 1] = xi;
    1563      559620 :                 move16();
    1564      559620 :                 j1 = add( j1, m2 );
    1565      559620 :                 k1 = add( k1, shl( m2, 1 ) );
    1566      559620 :                 xr = a[j1];
    1567      559620 :                 move16();
    1568      559620 :                 xi = a[j1 + 1];
    1569      559620 :                 move16();
    1570      559620 :                 yr = a[k1];
    1571      559620 :                 move16();
    1572      559620 :                 yi = a[k1 + 1];
    1573      559620 :                 move16();
    1574      559620 :                 a[j1] = yr;
    1575      559620 :                 move16();
    1576      559620 :                 a[j1 + 1] = yi;
    1577      559620 :                 move16();
    1578      559620 :                 a[k1] = xr;
    1579      559620 :                 move16();
    1580      559620 :                 a[k1 + 1] = xi;
    1581      559620 :                 move16();
    1582      559620 :                 j1 = add( j1, m2 );
    1583      559620 :                 k1 = sub( k1, m2 );
    1584      559620 :                 xr = a[j1];
    1585      559620 :                 move16();
    1586      559620 :                 xi = a[j1 + 1];
    1587      559620 :                 move16();
    1588      559620 :                 xi = a[j1 + 1];
    1589      559620 :                 move16();
    1590      559620 :                 yr = a[k1];
    1591      559620 :                 move16();
    1592      559620 :                 yi = a[k1 + 1];
    1593      559620 :                 move16();
    1594      559620 :                 a[j1] = yr;
    1595      559620 :                 move16();
    1596      559620 :                 a[j1 + 1] = yi;
    1597      559620 :                 move16();
    1598      559620 :                 a[k1] = xr;
    1599      559620 :                 move16();
    1600      559620 :                 a[k1 + 1] = xi;
    1601      559620 :                 move16();
    1602      559620 :                 j1 = add( j1, m2 );
    1603      559620 :                 k1 = add( k1, shl( m2, 1 ) );
    1604      559620 :                 xr = a[j1];
    1605      559620 :                 move16();
    1606      559620 :                 xi = a[j1 + 1];
    1607      559620 :                 move16();
    1608      559620 :                 yr = a[k1];
    1609      559620 :                 move16();
    1610      559620 :                 yi = a[k1 + 1];
    1611      559620 :                 move16();
    1612      559620 :                 a[j1] = yr;
    1613      559620 :                 move16();
    1614      559620 :                 a[j1 + 1] = yi;
    1615      559620 :                 move16();
    1616      559620 :                 a[k1] = xr;
    1617      559620 :                 move16();
    1618      559620 :                 a[k1 + 1] = xi;
    1619      559620 :                 move16();
    1620             :             }
    1621             : 
    1622      373080 :             j1 = add( add( shl( k, 1 ), m2 ), ip[k] );
    1623      373080 :             k1 = add( j1, m2 );
    1624      373080 :             xr = a[j1];
    1625      373080 :             move16();
    1626      373080 :             xi = a[j1 + 1];
    1627      373080 :             move16();
    1628      373080 :             yr = a[k1];
    1629      373080 :             move16();
    1630      373080 :             yi = a[k1 + 1];
    1631      373080 :             move16();
    1632      373080 :             a[j1] = yr;
    1633      373080 :             move16();
    1634      373080 :             a[j1 + 1] = yi;
    1635      373080 :             move16();
    1636      373080 :             a[k1] = xr;
    1637      373080 :             move16();
    1638      373080 :             a[k1 + 1] = xi;
    1639      373080 :             move16();
    1640             :         }
    1641             :     }
    1642             :     ELSE
    1643             :     {
    1644     2523672 :         FOR( k = 1; k < m; k++ )
    1645             :         {
    1646     7738475 :             FOR( j = 0; j < k; j++ )
    1647             :             {
    1648     5724532 :                 j1 = add( shl( j, 1 ), ip[k] );
    1649     5724532 :                 k1 = add( shl( k, 1 ), ip[j] );
    1650     5724532 :                 xr = a[j1];
    1651     5724532 :                 move16();
    1652     5724532 :                 xi = a[j1 + 1];
    1653     5724532 :                 move16();
    1654     5724532 :                 yr = a[k1];
    1655     5724532 :                 move16();
    1656     5724532 :                 yi = a[k1 + 1];
    1657     5724532 :                 move16();
    1658     5724532 :                 a[j1] = yr;
    1659     5724532 :                 move16();
    1660     5724532 :                 a[j1 + 1] = yi;
    1661     5724532 :                 move16();
    1662     5724532 :                 a[k1] = xr;
    1663     5724532 :                 move16();
    1664     5724532 :                 a[k1 + 1] = xi;
    1665     5724532 :                 move16();
    1666     5724532 :                 j1 = add( j1, m2 );
    1667     5724532 :                 k1 = add( k1, m2 );
    1668     5724532 :                 xr = a[j1];
    1669     5724532 :                 move16();
    1670     5724532 :                 xi = a[j1 + 1];
    1671     5724532 :                 move16();
    1672     5724532 :                 yr = a[k1];
    1673     5724532 :                 move16();
    1674     5724532 :                 yi = a[k1 + 1];
    1675     5724532 :                 move16();
    1676     5724532 :                 a[j1] = yr;
    1677     5724532 :                 move16();
    1678     5724532 :                 a[j1 + 1] = yi;
    1679     5724532 :                 move16();
    1680     5724532 :                 a[k1] = xr;
    1681     5724532 :                 move16();
    1682     5724532 :                 a[k1 + 1] = xi;
    1683     5724532 :                 move16();
    1684             :             }
    1685             :         }
    1686             :     }
    1687             : 
    1688      602999 :     return;
    1689             : }
    1690             : 
    1691             : /*-----------------------------------------------------------------*
    1692             :  * cftfsub()
    1693             :  * Complex Discrete Fourier Transform
    1694             :  *-----------------------------------------------------------------*/
    1695      602999 : static void cftfsub_16fx(
    1696             :     Word16 n,       /* i    : data length of real and imag                        */
    1697             :     Word16 *a,      /* i/o  : input/output data             Q(Qx+Q_edct)*/
    1698             :     const Word32 *w /* i    : cos/sin table                          Q30*/
    1699             : )
    1700             : {
    1701             :     Word16 j, j1, j2, j3, l;
    1702             :     Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    1703             : #ifndef ISSUE_1836_replace_overflow_libcom
    1704             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
    1705             :     Flag Overflow = 0;
    1706             :     move32();
    1707             : #endif
    1708             : #endif
    1709             : 
    1710      602999 :     l = 2;
    1711      602999 :     move16();
    1712      602999 :     IF( GT_16( n, 8 ) )
    1713             :     {
    1714      602999 :         cft1st_16fx( n, a, w );
    1715      602999 :         l = 8;
    1716      602999 :         move16();
    1717     1327187 :         WHILE( ( ( l << 2 ) < n ) )
    1718             :         {
    1719      724188 :             cftmdl_16fx( n, l, a, w );
    1720      724188 :             l = shl( l, 2 );
    1721             :         }
    1722             :     }
    1723             : 
    1724      602999 :     IF( EQ_16( shl( l, 2 ), n ) )
    1725             :     {
    1726     1585590 :         FOR( j = 0; j < l; j += 2 )
    1727             :         {
    1728     1492320 :             j1 = add( j, l );
    1729     1492320 :             j2 = add( j1, l );
    1730     1492320 :             j3 = add( j2, l );
    1731     1492320 :             x0r = add( a[j], a[j1] );
    1732     1492320 :             x0i = add( a[j + 1], a[j1 + 1] );
    1733     1492320 :             x1r = sub( a[j], a[j1] );
    1734     1492320 :             x1i = sub( a[j + 1], a[j1 + 1] );
    1735     1492320 :             x2r = add( a[j2], a[j3] );
    1736     1492320 :             x2i = add( a[j2 + 1], a[j3 + 1] );
    1737     1492320 :             x3r = sub( a[j2], a[j3] );
    1738     1492320 :             x3i = sub( a[j2 + 1], a[j3 + 1] );
    1739     1492320 :             a[j] = add( x0r, x2r );
    1740     1492320 :             move16();
    1741     1492320 :             a[j + 1] = add( x0i, x2i );
    1742     1492320 :             move16();
    1743     1492320 :             a[j2] = sub( x0r, x2r );
    1744     1492320 :             move16();
    1745     1492320 :             a[j2 + 1] = sub( x0i, x2i );
    1746     1492320 :             move16();
    1747     1492320 :             a[j1] = sub( x1r, x3i );
    1748     1492320 :             move16();
    1749     1492320 :             a[j1 + 1] = add( x1i, x3r );
    1750     1492320 :             move16();
    1751     1492320 :             a[j3] = add( x1r, x3i );
    1752     1492320 :             move16();
    1753     1492320 :             a[j3 + 1] = sub( x1i, x3r );
    1754     1492320 :             move16();
    1755             :         }
    1756             :     }
    1757             :     ELSE
    1758             :     {
    1759    14482465 :         FOR( j = 0; j < l; j += 2 )
    1760             :         {
    1761             : #ifdef ISSUE_1836_replace_overflow_libcom
    1762    13972736 :             j1 = add_sat( j, l );
    1763    13972736 :             x0r = sub_sat( a[j], a[j1] );
    1764    13972736 :             x0i = sub_sat( a[j + 1], a[j1 + 1] );
    1765    13972736 :             a[j] = add_sat( a[j], a[j1] );
    1766    13972736 :             move16();
    1767    13972736 :             a[j + 1] = add_sat( a[j + 1], a[j1 + 1] );
    1768    13972736 :             move16();
    1769             : #else
    1770             :             j1 = add_o( j, l, &Overflow );
    1771             :             x0r = sub_o( a[j], a[j1], &Overflow );
    1772             :             x0i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
    1773             :             a[j] = add_o( a[j], a[j1], &Overflow );
    1774             :             move16();
    1775             :             a[j + 1] = add_o( a[j + 1], a[j1 + 1], &Overflow );
    1776             :             move16();
    1777             : #endif
    1778    13972736 :             a[j1] = x0r;
    1779    13972736 :             move16();
    1780    13972736 :             a[j1 + 1] = x0i;
    1781    13972736 :             move16();
    1782             :         }
    1783             :     }
    1784      602999 :     return;
    1785             : }
    1786             : 
    1787             : /*-----------------------------------------------------------------*
    1788             :  * cft1st()
    1789             :  * Subfunction of Complex Discrete Fourier Transform
    1790             :  *-----------------------------------------------------------------*/
    1791      602999 : static void cft1st_16fx(
    1792             :     Word16 n,       /* i    : data length of real and imag              */
    1793             :     Word16 *a,      /* i/o  : input/output data             Q(Qx+Q_edct)*/
    1794             :     const Word32 *w /* i    : cos/sin table                          Q30*/
    1795             : )
    1796             : {
    1797             :     Word16 j, k1, k2;
    1798             :     Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
    1799             :     Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    1800             :     Word16 tmp;
    1801             :     Word32 L_tmp;
    1802             : #ifndef ISSUE_1836_replace_overflow_libcom
    1803             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
    1804             :     Flag Overflow = 0;
    1805             :     move32();
    1806             : #endif
    1807             : #endif
    1808             : 
    1809             : #ifdef ISSUE_1836_replace_overflow_libcom
    1810      602999 :     x0r = add_sat( a[0], a[2] );
    1811      602999 :     x0i = add_sat( a[1], a[3] );
    1812      602999 :     x1r = sub_sat( a[0], a[2] );
    1813      602999 :     x1i = sub_sat( a[1], a[3] );
    1814      602999 :     x2r = add_sat( a[4], a[6] );
    1815      602999 :     x2i = add_sat( a[5], a[7] );
    1816      602999 :     x3r = sub_sat( a[4], a[6] );
    1817      602999 :     x3i = sub_sat( a[5], a[7] );
    1818      602999 :     a[0] = add_sat( x0r, x2r );
    1819      602999 :     move16();
    1820      602999 :     a[1] = add_sat( x0i, x2i );
    1821      602999 :     move16();
    1822      602999 :     a[4] = sub_sat( x0r, x2r );
    1823      602999 :     move16();
    1824      602999 :     a[5] = sub_sat( x0i, x2i );
    1825      602999 :     move16();
    1826      602999 :     a[2] = sub_sat( x1r, x3i );
    1827      602999 :     move16();
    1828      602999 :     a[3] = add_sat( x1i, x3r );
    1829      602999 :     move16();
    1830      602999 :     a[6] = add_sat( x1r, x3i );
    1831      602999 :     move16();
    1832      602999 :     a[7] = sub_sat( x1i, x3r );
    1833      602999 :     wk1r = w[2];
    1834      602999 :     move32();
    1835             : 
    1836      602999 :     x0r = add_sat( a[8], a[10] );
    1837      602999 :     x0i = add_sat( a[9], a[11] );
    1838      602999 :     x1r = sub_sat( a[8], a[10] );
    1839      602999 :     x1i = sub_sat( a[9], a[11] );
    1840      602999 :     x2r = add_sat( a[12], a[14] );
    1841      602999 :     x2i = add_sat( a[13], a[15] );
    1842      602999 :     x3r = sub_sat( a[12], a[14] );
    1843      602999 :     x3i = sub_sat( a[13], a[15] );
    1844      602999 :     a[8] = add_sat( x0r, x2r );
    1845      602999 :     move16();
    1846      602999 :     a[9] = add_sat( x0i, x2i );
    1847      602999 :     move16();
    1848      602999 :     a[12] = sub_sat( x2i, x0i );
    1849      602999 :     move16();
    1850      602999 :     a[13] = sub_sat( x0r, x2r );
    1851      602999 :     move16();
    1852             : 
    1853      602999 :     x0r = sub_sat( x1r, x3i );
    1854      602999 :     x0i = add_sat( x1i, x3r );
    1855      602999 :     tmp = sub_sat( x0r, x0i );
    1856      602999 :     L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
    1857             : 
    1858      602999 :     a[10] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    1859      602999 :     move16();
    1860             : 
    1861      602999 :     tmp = add_sat( x0r, x0i );
    1862      602999 :     L_tmp = Mult_32_16( wk1r, tmp );               /*Q(15+Qx+Q_edct) */
    1863      602999 :     a[11] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /* Q(Qx+Q_edct)*/
    1864      602999 :     move16();
    1865             : 
    1866      602999 :     x0r = add_sat( x3i, x1r );
    1867      602999 :     x0i = sub_sat( x3r, x1i );
    1868      602999 :     tmp = sub_sat( x0i, x0r );
    1869      602999 :     L_tmp = Mult_32_16( wk1r, tmp );               /*Q(15+Qx+Q_edct) */
    1870      602999 :     a[14] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    1871      602999 :     move16();
    1872             : 
    1873      602999 :     tmp = add_sat( x0i, x0r );
    1874      602999 :     L_tmp = Mult_32_16( wk1r, tmp );               /*Q(15+Qx+Q_edct) */
    1875      602999 :     a[15] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    1876      602999 :     move16();
    1877      602999 :     k1 = 0;
    1878      602999 :     move16();
    1879             : #else
    1880             :     x0r = add_o( a[0], a[2], &Overflow );
    1881             :     x0i = add_o( a[1], a[3], &Overflow );
    1882             :     x1r = sub_o( a[0], a[2], &Overflow );
    1883             :     x1i = sub_o( a[1], a[3], &Overflow );
    1884             :     x2r = add_o( a[4], a[6], &Overflow );
    1885             :     x2i = add_o( a[5], a[7], &Overflow );
    1886             :     x3r = sub_o( a[4], a[6], &Overflow );
    1887             :     x3i = sub_o( a[5], a[7], &Overflow );
    1888             :     a[0] = add_o( x0r, x2r, &Overflow );
    1889             :     move16();
    1890             :     a[1] = add_o( x0i, x2i, &Overflow );
    1891             :     move16();
    1892             :     a[4] = sub_o( x0r, x2r, &Overflow );
    1893             :     move16();
    1894             :     a[5] = sub_o( x0i, x2i, &Overflow );
    1895             :     move16();
    1896             :     a[2] = sub_o( x1r, x3i, &Overflow );
    1897             :     move16();
    1898             :     a[3] = add_o( x1i, x3r, &Overflow );
    1899             :     move16();
    1900             :     a[6] = add_o( x1r, x3i, &Overflow );
    1901             :     move16();
    1902             :     a[7] = sub_o( x1i, x3r, &Overflow );
    1903             :     wk1r = w[2];
    1904             :     move32();
    1905             : 
    1906             :     x0r = add_o( a[8], a[10], &Overflow );
    1907             :     x0i = add_o( a[9], a[11], &Overflow );
    1908             :     x1r = sub_o( a[8], a[10], &Overflow );
    1909             :     x1i = sub_o( a[9], a[11], &Overflow );
    1910             :     x2r = add_o( a[12], a[14], &Overflow );
    1911             :     x2i = add_o( a[13], a[15], &Overflow );
    1912             :     x3r = sub_o( a[12], a[14], &Overflow );
    1913             :     x3i = sub_o( a[13], a[15], &Overflow );
    1914             :     a[8] = add_o( x0r, x2r, &Overflow );
    1915             :     move16();
    1916             :     a[9] = add_o( x0i, x2i, &Overflow );
    1917             :     move16();
    1918             :     a[12] = sub_o( x2i, x0i, &Overflow );
    1919             :     move16();
    1920             :     a[13] = sub_o( x0r, x2r, &Overflow );
    1921             :     move16();
    1922             : 
    1923             :     x0r = sub_o( x1r, x3i, &Overflow );
    1924             :     x0i = add_o( x1i, x3r, &Overflow );
    1925             :     tmp = sub_o( x0r, x0i, &Overflow );
    1926             :     L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
    1927             : 
    1928             :     a[10] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    1929             :     move16();
    1930             : 
    1931             :     tmp = add_o( x0r, x0i, &Overflow );
    1932             :     L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */
    1933             :     a[11] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /* Q(Qx+Q_edct) */
    1934             :     move16();
    1935             : 
    1936             :     x0r = add_o( x3i, x1r, &Overflow );
    1937             :     x0i = sub_o( x3r, x1i, &Overflow );
    1938             :     tmp = sub_o( x0i, x0r, &Overflow );
    1939             :     L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */
    1940             :     a[14] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    1941             :     move16();
    1942             : 
    1943             :     tmp = add_o( x0i, x0r, &Overflow );
    1944             :     L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */
    1945             :     a[15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    1946             :     move16();
    1947             :     k1 = 0;
    1948             :     move16();
    1949             : #endif
    1950             : 
    1951     4239344 :     FOR( j = 16; j < n; j += 16 )
    1952             :     {
    1953     3636345 :         k1 = add( k1, 2 );
    1954     3636345 :         k2 = shl( k1, 1 );
    1955             : 
    1956     3636345 :         wk2r = w[k1];
    1957     3636345 :         move32();
    1958     3636345 :         wk2i = w[k1 + 1];
    1959     3636345 :         move32();
    1960     3636345 :         wk1r = w[k2];
    1961     3636345 :         move32();
    1962     3636345 :         wk1i = w[k2 + 1];
    1963     3636345 :         move32();
    1964             : 
    1965     3636345 :         L_tmp = L_shl( Mult_32_32( wk2i, wk1i ), 1 ); /*Q29 */
    1966     3636345 :         wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) );      /*Q30 */
    1967             : 
    1968     3636345 :         L_tmp = L_shl( Mult_32_32( wk2i, wk1r ), 1 ); /*Q29 */
    1969     3636345 :         wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i );      /*Q30 */
    1970             : #ifdef ISSUE_1836_replace_overflow_libcom
    1971     3636345 :         x0r = add_sat( a[j], a[j + 2] );
    1972     3636345 :         x0i = add_sat( a[j + 1], a[j + 3] );
    1973     3636345 :         x1r = sub_sat( a[j], a[j + 2] );
    1974     3636345 :         x1i = sub_sat( a[j + 1], a[j + 3] );
    1975     3636345 :         x2r = add_sat( a[j + 4], a[j + 6] );
    1976     3636345 :         x2i = add_sat( a[j + 5], a[j + 7] );
    1977     3636345 :         x3r = sub_sat( a[j + 4], a[j + 6] );
    1978     3636345 :         x3i = sub_sat( a[j + 5], a[j + 7] );
    1979     3636345 :         a[j] = add_sat( x0r, x2r );
    1980     3636345 :         move16();
    1981     3636345 :         a[j + 1] = add_sat( x0i, x2i );
    1982     3636345 :         move16();
    1983             : 
    1984     3636345 :         x0r = sub_sat( x0r, x2r );
    1985     3636345 :         x0i = sub_sat( x0i, x2i );
    1986     3636345 :         L_tmp = Mult_32_16( wk2r, x0r );                  /*Q(15+Qx+Q_edct) */
    1987     3636345 :         L_tmp = Msub_32_16( L_tmp, wk2i, x0i );           /*Q(15+Qx+Q_edct) */
    1988     3636345 :         a[j + 4] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    1989     3636345 :         move16();
    1990             : 
    1991     3636345 :         L_tmp = Mult_32_16( wk2r, x0i );                  /*Q(15+Qx+Q_edct) */
    1992     3636345 :         L_tmp = Madd_32_16( L_tmp, wk2i, x0r );           /*Q(15+Qx+Q_edct) */
    1993     3636345 :         a[j + 5] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    1994     3636345 :         move16();
    1995             : 
    1996     3636345 :         x0r = sub_sat( x1r, x3i );
    1997     3636345 :         x0i = add_sat( x1i, x3r );
    1998     3636345 :         L_tmp = Mult_32_16( wk1r, x0r );                  /*Q(15+Qx+Q_edct) */
    1999     3636345 :         L_tmp = Msub_32_16( L_tmp, wk1i, x0i );           /*Q(15+Qx+Q_edct) */
    2000     3636345 :         a[j + 2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2001     3636345 :         move16();
    2002             : 
    2003     3636345 :         L_tmp = Mult_32_16( wk1r, x0i );                  /*Q(15+Qx+Q_edct) */
    2004     3636345 :         L_tmp = Madd_32_16( L_tmp, wk1i, x0r );           /*Q(15+Qx+Q_edct) */
    2005     3636345 :         a[j + 3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2006     3636345 :         move16();
    2007             : 
    2008     3636345 :         x0r = add_sat( x1r, x3i );
    2009     3636345 :         x0i = sub_sat( x1i, x3r );
    2010     3636345 :         L_tmp = Mult_32_16( wk3r, x0r );                  /*Q(15+Qx+Q_edct) */
    2011     3636345 :         L_tmp = Msub_32_16( L_tmp, wk3i, x0i );           /*Q(15+Qx+Q_edct) */
    2012     3636345 :         a[j + 6] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2013     3636345 :         move16();
    2014             : 
    2015     3636345 :         L_tmp = Mult_32_16( wk3r, x0i );                  /*Q(15+Qx+Q_edct) */
    2016     3636345 :         L_tmp = Madd_32_16( L_tmp, wk3i, x0r );           /*Q(15+Qx+Q_edct) */
    2017     3636345 :         a[j + 7] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2018     3636345 :         move16();
    2019             : 
    2020     3636345 :         wk1r = w[k2 + 2];
    2021     3636345 :         move32();
    2022     3636345 :         wk1i = w[k2 + 3];
    2023     3636345 :         move32();
    2024     3636345 :         L_tmp = L_shl( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
    2025     3636345 :         wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) );      /*Q30  */
    2026             : 
    2027     3636345 :         L_tmp = L_shl( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
    2028     3636345 :         wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i );      /*Q30 */
    2029             : 
    2030     3636345 :         x0r = add_sat( a[j + 8], a[j + 10] );
    2031     3636345 :         x0i = add_sat( a[j + 9], a[j + 11] );
    2032     3636345 :         x1r = sub_sat( a[j + 8], a[j + 10] );
    2033     3636345 :         x1i = sub_sat( a[j + 9], a[j + 11] );
    2034     3636345 :         x2r = add_sat( a[j + 12], a[j + 14] );
    2035     3636345 :         x2i = add_sat( a[j + 13], a[j + 15] );
    2036     3636345 :         x3r = sub_sat( a[j + 12], a[j + 14] );
    2037     3636345 :         x3i = sub_sat( a[j + 13], a[j + 15] );
    2038     3636345 :         a[j + 8] = add_sat( x0r, x2r );
    2039     3636345 :         move16();
    2040     3636345 :         a[j + 9] = add_sat( x0i, x2i );
    2041     3636345 :         move16();
    2042             : 
    2043     3636345 :         x0r = sub_sat( x0r, x2r );
    2044     3636345 :         x0i = sub_sat( x0i, x2i );
    2045     3636345 :         tmp = negate( x0r );
    2046     3636345 :         L_tmp = Mult_32_16( wk2i, tmp );                   /*Q(15+Qx+Q_edct) */
    2047     3636345 :         L_tmp = Msub_32_16( L_tmp, wk2r, x0i );            /*Q(15+Qx+Q_edct) */
    2048     3636345 :         a[j + 12] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2049     3636345 :         move16();
    2050             : 
    2051     3636345 :         tmp = negate( x0i );
    2052     3636345 :         L_tmp = Mult_32_16( wk2i, tmp );                   /*Q(15+Qx+Q_edct) */
    2053     3636345 :         L_tmp = Madd_32_16( L_tmp, wk2r, x0r );            /*Q(15+Qx+Q_edct) */
    2054     3636345 :         a[j + 13] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2055     3636345 :         move16();
    2056             : 
    2057     3636345 :         x0r = sub_sat( x1r, x3i );
    2058     3636345 :         x0i = add_sat( x1i, x3r );
    2059     3636345 :         L_tmp = Mult_32_16( wk1r, x0r );                   /*Q(15+Qx+Q_edct) */
    2060     3636345 :         L_tmp = Msub_32_16( L_tmp, wk1i, x0i );            /*Q(15+Qx+Q_edct) */
    2061     3636345 :         a[j + 10] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2062     3636345 :         move16();
    2063             : 
    2064     3636345 :         L_tmp = Mult_32_16( wk1r, x0i );                   /*Q(15+Qx+Q_edct) */
    2065     3636345 :         L_tmp = Madd_32_16( L_tmp, wk1i, x0r );            /*Q(15+Qx+Q_edct) */
    2066     3636345 :         a[j + 11] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2067     3636345 :         move16();
    2068             : 
    2069     3636345 :         x0r = add_sat( x1r, x3i );
    2070     3636345 :         x0i = sub_sat( x1i, x3r );
    2071             : 
    2072     3636345 :         L_tmp = Mult_32_16( wk3r, x0r );                   /*Q(15+Qx+Q_edct) */
    2073     3636345 :         L_tmp = Msub_32_16( L_tmp, wk3i, x0i );            /*Q(15+Qx+Q_edct) */
    2074     3636345 :         a[j + 14] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2075     3636345 :         move16();
    2076             : 
    2077     3636345 :         L_tmp = Mult_32_16( wk3r, x0i );                   /*Q(15+Qx+Q_edct) */
    2078     3636345 :         L_tmp = Madd_32_16( L_tmp, wk3i, x0r );            /*Q(15+Qx+Q_edct) */
    2079     3636345 :         a[j + 15] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2080     3636345 :         move16();
    2081             : #else
    2082             :         x0r = add_o( a[j], a[j + 2], &Overflow );
    2083             :         x0i = add_o( a[j + 1], a[j + 3], &Overflow );
    2084             :         x1r = sub_o( a[j], a[j + 2], &Overflow );
    2085             :         x1i = sub_o( a[j + 1], a[j + 3], &Overflow );
    2086             :         x2r = add_o( a[j + 4], a[j + 6], &Overflow );
    2087             :         x2i = add_o( a[j + 5], a[j + 7], &Overflow );
    2088             :         x3r = sub_o( a[j + 4], a[j + 6], &Overflow );
    2089             :         x3i = sub_o( a[j + 5], a[j + 7], &Overflow );
    2090             :         a[j] = add_o( x0r, x2r, &Overflow );
    2091             :         move16();
    2092             :         a[j + 1] = add_o( x0i, x2i, &Overflow );
    2093             :         move16();
    2094             : 
    2095             :         x0r = sub_o( x0r, x2r, &Overflow );
    2096             :         x0i = sub_o( x0i, x2i, &Overflow );
    2097             :         L_tmp = Mult_32_16( wk2r, x0r );                                    /*Q(15+Qx+Q_edct) */
    2098             :         L_tmp = Msub_32_16( L_tmp, wk2i, x0i );                             /*Q(15+Qx+Q_edct) */
    2099             :         a[j + 4] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2100             :         move16();
    2101             : 
    2102             :         L_tmp = Mult_32_16( wk2r, x0i );                                    /*Q(15+Qx+Q_edct) */
    2103             :         L_tmp = Madd_32_16( L_tmp, wk2i, x0r );                             /*Q(15+Qx+Q_edct) */
    2104             :         a[j + 5] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2105             :         move16();
    2106             : 
    2107             :         x0r = sub_o( x1r, x3i, &Overflow );
    2108             :         x0i = add_o( x1i, x3r, &Overflow );
    2109             :         L_tmp = Mult_32_16( wk1r, x0r );                                    /*Q(15+Qx+Q_edct) */
    2110             :         L_tmp = Msub_32_16( L_tmp, wk1i, x0i );                             /*Q(15+Qx+Q_edct) */
    2111             :         a[j + 2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2112             :         move16();
    2113             : 
    2114             :         L_tmp = Mult_32_16( wk1r, x0i );                                    /*Q(15+Qx+Q_edct) */
    2115             :         L_tmp = Madd_32_16( L_tmp, wk1i, x0r );                             /*Q(15+Qx+Q_edct) */
    2116             :         a[j + 3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2117             :         move16();
    2118             : 
    2119             :         x0r = add_o( x1r, x3i, &Overflow );
    2120             :         x0i = sub_o( x1i, x3r, &Overflow );
    2121             :         L_tmp = Mult_32_16( wk3r, x0r );                                    /*Q(15+Qx+Q_edct) */
    2122             :         L_tmp = Msub_32_16( L_tmp, wk3i, x0i );                             /*Q(15+Qx+Q_edct) */
    2123             :         a[j + 6] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2124             :         move16();
    2125             : 
    2126             :         L_tmp = Mult_32_16( wk3r, x0i );                                    /*Q(15+Qx+Q_edct) */
    2127             :         L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                             /*Q(15+Qx+Q_edct) */
    2128             :         a[j + 7] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2129             :         move16();
    2130             : 
    2131             :         wk1r = w[k2 + 2];
    2132             :         move32();
    2133             :         wk1i = w[k2 + 3];
    2134             :         move32();
    2135             :         L_tmp = L_shl( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
    2136             :         wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) );      /*Q30  */
    2137             : 
    2138             :         L_tmp = L_shl( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
    2139             :         wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i );      /*Q30 */
    2140             : 
    2141             :         x0r = add_o( a[j + 8], a[j + 10], &Overflow );
    2142             :         x0i = add_o( a[j + 9], a[j + 11], &Overflow );
    2143             :         x1r = sub_o( a[j + 8], a[j + 10], &Overflow );
    2144             :         x1i = sub_o( a[j + 9], a[j + 11], &Overflow );
    2145             :         x2r = add_o( a[j + 12], a[j + 14], &Overflow );
    2146             :         x2i = add_o( a[j + 13], a[j + 15], &Overflow );
    2147             :         x3r = sub_o( a[j + 12], a[j + 14], &Overflow );
    2148             :         x3i = sub_o( a[j + 13], a[j + 15], &Overflow );
    2149             :         a[j + 8] = add_o( x0r, x2r, &Overflow );
    2150             :         move16();
    2151             :         a[j + 9] = add_o( x0i, x2i, &Overflow );
    2152             :         move16();
    2153             : 
    2154             :         x0r = sub_o( x0r, x2r, &Overflow );
    2155             :         x0i = sub_o( x0i, x2i, &Overflow );
    2156             :         tmp = negate( x0r );
    2157             :         L_tmp = Mult_32_16( wk2i, tmp );                                     /*Q(15+Qx+Q_edct) */
    2158             :         L_tmp = Msub_32_16( L_tmp, wk2r, x0i );                              /*Q(15+Qx+Q_edct) */
    2159             :         a[j + 12] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2160             :         move16();
    2161             : 
    2162             :         tmp = negate( x0i );
    2163             :         L_tmp = Mult_32_16( wk2i, tmp );                                     /*Q(15+Qx+Q_edct) */
    2164             :         L_tmp = Madd_32_16( L_tmp, wk2r, x0r );                              /*Q(15+Qx+Q_edct) */
    2165             :         a[j + 13] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2166             :         move16();
    2167             : 
    2168             :         x0r = sub_o( x1r, x3i, &Overflow );
    2169             :         x0i = add_o( x1i, x3r, &Overflow );
    2170             :         L_tmp = Mult_32_16( wk1r, x0r );                                     /*Q(15+Qx+Q_edct) */
    2171             :         L_tmp = Msub_32_16( L_tmp, wk1i, x0i );                              /*Q(15+Qx+Q_edct) */
    2172             :         a[j + 10] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2173             :         move16();
    2174             : 
    2175             :         L_tmp = Mult_32_16( wk1r, x0i );                                     /*Q(15+Qx+Q_edct) */
    2176             :         L_tmp = Madd_32_16( L_tmp, wk1i, x0r );                              /*Q(15+Qx+Q_edct) */
    2177             :         a[j + 11] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2178             :         move16();
    2179             : 
    2180             :         x0r = add_o( x1r, x3i, &Overflow );
    2181             :         x0i = sub_o( x1i, x3r, &Overflow );
    2182             : 
    2183             :         L_tmp = Mult_32_16( wk3r, x0r );                                     /*Q(15+Qx+Q_edct) */
    2184             :         L_tmp = Msub_32_16( L_tmp, wk3i, x0i );                              /*Q(15+Qx+Q_edct) */
    2185             :         a[j + 14] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2186             :         move16();
    2187             : 
    2188             :         L_tmp = Mult_32_16( wk3r, x0i );                                     /*Q(15+Qx+Q_edct) */
    2189             :         L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                              /*Q(15+Qx+Q_edct) */
    2190             :         a[j + 15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2191             :         move16();
    2192             : #endif
    2193             :     }
    2194             : 
    2195      602999 :     return;
    2196             : }
    2197             : 
    2198             : /*-----------------------------------------------------------------*
    2199             :  * cftmdl()
    2200             :  * Subfunction of Complex Discrete Fourier Transform
    2201             :  *-----------------------------------------------------------------*/
    2202      724188 : static void cftmdl_16fx(
    2203             :     Word16 n,       /* i    : data length of real and imag                         */
    2204             :     Word16 l,       /* i    : initial shift for processing                         */
    2205             :     Word16 *a,      /* i/o  : input/output data              Q(Qx+Q_edct)*/
    2206             :     const Word32 *w /* i    : cos/sin table                                                     Q30*/
    2207             : )
    2208             : {
    2209             :     Word16 j, j1, j2, j3, k, k1, k2, m, m2;
    2210             :     Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
    2211             :     Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    2212             :     Word16 tmp, tmp2;
    2213             :     Word32 L_tmp;
    2214             :     Word32 L_x0r, L_x0i;
    2215             : #ifndef ISSUE_1836_replace_overflow_libcom
    2216             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
    2217             :     Flag Overflow = 0;
    2218             :     move32();
    2219             : #endif
    2220             : #endif
    2221      724188 :     m = shl( l, 2 );
    2222     5075208 :     FOR( j = 0; j < l; j += 2 )
    2223             :     {
    2224             : #ifdef ISSUE_1836_replace_overflow_libcom
    2225     4351020 :         j1 = add_sat( j, l );
    2226     4351020 :         j2 = add_sat( j1, l );
    2227     4351020 :         j3 = add_sat( j2, l );
    2228     4351020 :         x0r = add_sat( a[j], a[j1] );
    2229     4351020 :         x0i = add_sat( a[j + 1], a[j1 + 1] );
    2230     4351020 :         x1r = sub_sat( a[j], a[j1] );
    2231     4351020 :         x1i = sub_sat( a[j + 1], a[j1 + 1] );
    2232     4351020 :         x2r = add_sat( a[j2], a[j3] );
    2233     4351020 :         x2i = add_sat( a[j2 + 1], a[j3 + 1] );
    2234     4351020 :         x3r = sub_sat( a[j2], a[j3] );
    2235     4351020 :         x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
    2236     4351020 :         a[j] = add_sat( x0r, x2r );
    2237     4351020 :         move16();
    2238     4351020 :         a[j + 1] = add_sat( x0i, x2i );
    2239     4351020 :         move16();
    2240     4351020 :         a[j2] = sub_sat( x0r, x2r );
    2241     4351020 :         move16();
    2242     4351020 :         a[j2 + 1] = sub_sat( x0i, x2i );
    2243     4351020 :         move16();
    2244     4351020 :         a[j1] = sub_sat( x1r, x3i );
    2245     4351020 :         move16();
    2246     4351020 :         a[j1 + 1] = add_sat( x1i, x3r );
    2247     4351020 :         move16();
    2248     4351020 :         a[j3] = add_sat( x1r, x3i );
    2249     4351020 :         move16();
    2250     4351020 :         a[j3 + 1] = sub_sat( x1i, x3r );
    2251     4351020 :         move16();
    2252             : #else
    2253             :         j1 = add_o( j, l, &Overflow );
    2254             :         j2 = add_o( j1, l, &Overflow );
    2255             :         j3 = add_o( j2, l, &Overflow );
    2256             :         x0r = add_o( a[j], a[j1], &Overflow );
    2257             :         x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
    2258             :         x1r = sub_o( a[j], a[j1], &Overflow );
    2259             :         x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
    2260             :         x2r = add_o( a[j2], a[j3], &Overflow );
    2261             :         x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
    2262             :         x3r = sub_o( a[j2], a[j3], &Overflow );
    2263             :         x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
    2264             :         a[j] = add_o( x0r, x2r, &Overflow );
    2265             :         move16();
    2266             :         a[j + 1] = add_o( x0i, x2i, &Overflow );
    2267             :         move16();
    2268             :         a[j2] = sub_o( x0r, x2r, &Overflow );
    2269             :         move16();
    2270             :         a[j2 + 1] = sub_o( x0i, x2i, &Overflow );
    2271             :         move16();
    2272             :         a[j1] = sub_o( x1r, x3i, &Overflow );
    2273             :         move16();
    2274             :         a[j1 + 1] = add_o( x1i, x3r, &Overflow );
    2275             :         move16();
    2276             :         a[j3] = add_o( x1r, x3i, &Overflow );
    2277             :         move16();
    2278             :         a[j3 + 1] = sub_o( x1i, x3r, &Overflow );
    2279             :         move16();
    2280             : #endif
    2281             :     }
    2282             : 
    2283      724188 :     wk1r = w[2];
    2284      724188 :     move32();
    2285      724188 :     tmp2 = add( l, m );
    2286     5075208 :     FOR( j = m; j < tmp2; j += 2 )
    2287             :     {
    2288             : #ifdef ISSUE_1836_replace_overflow_libcom
    2289     4351020 :         j1 = add_sat( j, l );
    2290     4351020 :         j2 = add_sat( j1, l );
    2291     4351020 :         j3 = add_sat( j2, l );
    2292     4351020 :         x0r = add_sat( a[j], a[j1] );
    2293     4351020 :         x0i = add_sat( a[j + 1], a[j1 + 1] );
    2294     4351020 :         x1r = sub_sat( a[j], a[j1] );
    2295     4351020 :         x1i = sub_sat( a[j + 1], a[j1 + 1] );
    2296     4351020 :         x2r = add_sat( a[j2], a[j3] );
    2297     4351020 :         x2i = add_sat( a[j2 + 1], a[j3 + 1] );
    2298     4351020 :         x3r = sub_sat( a[j2], a[j3] );
    2299     4351020 :         x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
    2300     4351020 :         a[j] = add_sat( x0r, x2r );
    2301     4351020 :         move16();
    2302     4351020 :         a[j + 1] = add_sat( x0i, x2i );
    2303     4351020 :         move16();
    2304     4351020 :         a[j2] = sub_sat( x2i, x0i );
    2305     4351020 :         move16();
    2306     4351020 :         a[j2 + 1] = sub_sat( x0r, x2r );
    2307     4351020 :         move16();
    2308             : 
    2309     4351020 :         x0r = sub_sat( x1r, x3i );
    2310     4351020 :         x0i = add_sat( x1i, x3r );
    2311     4351020 :         tmp = sub_sat( x0r, x0i );
    2312     4351020 :         L_tmp = Mult_32_16( wk1r, tmp );               /*Q(15+Qx+Q_edct) */
    2313     4351020 :         a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2314     4351020 :         move16();
    2315             : 
    2316     4351020 :         tmp = add_sat( x0r, x0i );
    2317     4351020 :         L_tmp = Mult_32_16( wk1r, tmp );                   /*Q(15+Qx+Q_edct) */
    2318     4351020 :         a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2319     4351020 :         move16();
    2320             : 
    2321     4351020 :         x0r = add_sat( x3i, x1r );
    2322     4351020 :         x0i = sub_sat( x3r, x1i );
    2323     4351020 :         tmp = sub_sat( x0i, x0r );
    2324     4351020 :         L_tmp = Mult_32_16( wk1r, tmp );               /*Q(15+Qx+Q_edct) */
    2325     4351020 :         a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2326     4351020 :         move16();
    2327             : 
    2328     4351020 :         tmp = add_sat( x0i, x0r );
    2329     4351020 :         L_tmp = Mult_32_16( wk1r, tmp );                   /*Q(15+Qx+Q_edct) */
    2330     4351020 :         a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2331     4351020 :         move16();
    2332             : #else
    2333             :         j1 = add_o( j, l, &Overflow );
    2334             :         j2 = add_o( j1, l, &Overflow );
    2335             :         j3 = add_o( j2, l, &Overflow );
    2336             :         x0r = add_o( a[j], a[j1], &Overflow );
    2337             :         x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
    2338             :         x1r = sub_o( a[j], a[j1], &Overflow );
    2339             :         x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
    2340             :         x2r = add_o( a[j2], a[j3], &Overflow );
    2341             :         x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
    2342             :         x3r = sub_o( a[j2], a[j3], &Overflow );
    2343             :         x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
    2344             :         a[j] = add_o( x0r, x2r, &Overflow );
    2345             :         move16();
    2346             :         a[j + 1] = add_o( x0i, x2i, &Overflow );
    2347             :         move16();
    2348             :         a[j2] = sub_o( x2i, x0i, &Overflow );
    2349             :         move16();
    2350             :         a[j2 + 1] = sub_o( x0r, x2r, &Overflow );
    2351             :         move16();
    2352             : 
    2353             :         x0r = sub_o( x1r, x3i, &Overflow );
    2354             :         x0i = add_o( x1i, x3r, &Overflow );
    2355             :         tmp = sub_o( x0r, x0i, &Overflow );
    2356             :         L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */
    2357             :         a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2358             :         move16();
    2359             : 
    2360             :         tmp = add_o( x0r, x0i, &Overflow );
    2361             :         L_tmp = Mult_32_16( wk1r, tmp );                                     /*Q(15+Qx+Q_edct) */
    2362             :         a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2363             :         move16();
    2364             : 
    2365             :         x0r = add_o( x3i, x1r, &Overflow );
    2366             :         x0i = sub_o( x3r, x1i, &Overflow );
    2367             :         tmp = sub_o( x0i, x0r, &Overflow );
    2368             :         L_tmp = Mult_32_16( wk1r, tmp );                                 /*Q(15+Qx+Q_edct) */
    2369             :         a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2370             :         move16();
    2371             : 
    2372             :         tmp = add_o( x0i, x0r, &Overflow );
    2373             :         L_tmp = Mult_32_16( wk1r, tmp );                                     /*Q(15+Qx+Q_edct) */
    2374             :         a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2375             :         move16();
    2376             : #endif
    2377             :     }
    2378             : 
    2379      724188 :     k1 = 0;
    2380      724188 :     move16();
    2381      724188 :     m2 = shl( m, 1 );
    2382     1181025 :     FOR( k = m2; k < n; k += m2 )
    2383             :     {
    2384      456837 :         k1 = add( k1, 2 );
    2385      456837 :         k2 = shl( k1, 1 );
    2386      456837 :         wk2r = w[k1];
    2387      456837 :         move32();
    2388      456837 :         wk2i = w[k1 + 1];
    2389      456837 :         move32();
    2390      456837 :         wk1r = w[k2];
    2391      456837 :         move32();
    2392      456837 :         wk1i = w[k2 + 1];
    2393      456837 :         move32();
    2394      456837 :         L_tmp = L_shl( Mult_32_32( wk2i, wk1i ), 1 ); /*Q29 */
    2395      456837 :         wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) );      /*Q30 */
    2396             : 
    2397      456837 :         L_tmp = L_shl( Mult_32_32( wk2i, wk1r ), 1 ); /*Q29 */
    2398      456837 :         wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i );      /*Q30 */
    2399             : 
    2400      456837 :         tmp2 = add( l, k );
    2401     2284185 :         FOR( j = k; j < tmp2; j += 2 )
    2402             :         {
    2403             : #ifdef ISSUE_1836_replace_overflow_libcom
    2404     1827348 :             j1 = add_sat( j, l );
    2405     1827348 :             j2 = add_sat( j1, l );
    2406     1827348 :             j3 = add_sat( j2, l );
    2407     1827348 :             x0r = add_sat( a[j], a[j1] );
    2408     1827348 :             x0i = add_sat( a[j + 1], a[j1 + 1] );
    2409     1827348 :             x1r = sub_sat( a[j], a[j1] );
    2410     1827348 :             x1i = sub_sat( a[j + 1], a[j1 + 1] );
    2411     1827348 :             x2r = add_sat( a[j2], a[j3] );
    2412     1827348 :             x2i = add_sat( a[j2 + 1], a[j3 + 1] );
    2413     1827348 :             x3r = sub_sat( a[j2], a[j3] );
    2414     1827348 :             x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
    2415     1827348 :             a[j] = add_sat( x0r, x2r );
    2416     1827348 :             move16();
    2417     1827348 :             a[j + 1] = add_sat( x0i, x2i );
    2418     1827348 :             move16();
    2419             : 
    2420     1827348 :             x0r = sub_sat( x0r, x2r );
    2421     1827348 :             x0i = sub_sat( x0i, x2i );
    2422             : 
    2423     1827348 :             L_tmp = Mult_32_16( wk2r, x0r );               /*Q(15+Qx+Q_edct) */
    2424     1827348 :             L_tmp = Msub_32_16( L_tmp, wk2i, x0i );        /*Q(15+Qx+Q_edct) */
    2425     1827348 :             a[j2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2426     1827348 :             move16();
    2427             : 
    2428     1827348 :             L_tmp = Mult_32_16( wk2r, x0i );                   /*Q(15+Qx+Q_edct) */
    2429     1827348 :             L_tmp = Madd_32_16( L_tmp, wk2i, x0r );            /*Q(15+Qx+Q_edct) */
    2430     1827348 :             a[j2 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2431     1827348 :             move16();
    2432             : 
    2433     1827348 :             x0r = sub_sat( x1r, x3i );
    2434     1827348 :             x0i = add_sat( x1i, x3r );
    2435             : 
    2436     1827348 :             L_tmp = Mult_32_16( wk1r, x0r );               /*Q(15+Qx+Q_edct) */
    2437     1827348 :             L_tmp = Msub_32_16( L_tmp, wk1i, x0i );        /*Q(15+Qx+Q_edct) */
    2438     1827348 :             a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2439     1827348 :             move16();
    2440             : 
    2441     1827348 :             L_tmp = Mult_32_16( wk1r, x0i );                   /*Q(15+Qx+Q_edct) */
    2442     1827348 :             L_tmp = Madd_32_16( L_tmp, wk1i, x0r );            /*Q(15+Qx+Q_edct) */
    2443     1827348 :             a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2444     1827348 :             move16();
    2445             : 
    2446     1827348 :             L_x0r = L_add( (Word32) x1r, (Word32) x3i );
    2447     1827348 :             L_x0i = L_sub( (Word32) x1i, (Word32) x3r );
    2448     1827348 :             x0r = extract_l( L_x0r );
    2449     1827348 :             x0i = extract_l( L_x0i );
    2450     1827348 :             L_tmp = Mult_32_16( wk3r, x0r );               /*Q(15+Qx+Q_edct) */
    2451     1827348 :             L_tmp = Msub_32_16( L_tmp, wk3i, x0i );        /*Q(15+Qx+Q_edct) */
    2452     1827348 :             a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2453     1827348 :             move16();
    2454             : 
    2455     1827348 :             L_tmp = Mult_32_16( wk3r, x0i );                   /*Q(15+Qx+Q_edct) */
    2456     1827348 :             L_tmp = Madd_32_16( L_tmp, wk3i, x0r );            /*Q(15+Qx+Q_edct) */
    2457     1827348 :             a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2458     1827348 :             move16();
    2459             : #else
    2460             :             j1 = add_o( j, l, &Overflow );
    2461             :             j2 = add_o( j1, l, &Overflow );
    2462             :             j3 = add_o( j2, l, &Overflow );
    2463             :             x0r = add_o( a[j], a[j1], &Overflow );
    2464             :             x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
    2465             :             x1r = sub_o( a[j], a[j1], &Overflow );
    2466             :             x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
    2467             :             x2r = add_o( a[j2], a[j3], &Overflow );
    2468             :             x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
    2469             :             x3r = sub_o( a[j2], a[j3], &Overflow );
    2470             :             x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
    2471             :             a[j] = add_o( x0r, x2r, &Overflow );
    2472             :             move16();
    2473             :             a[j + 1] = add_o( x0i, x2i, &Overflow );
    2474             :             move16();
    2475             : 
    2476             :             x0r = sub_o( x0r, x2r, &Overflow );
    2477             :             x0i = sub_o( x0i, x2i, &Overflow );
    2478             : 
    2479             :             L_tmp = Mult_32_16( wk2r, x0r );                                 /*Q(15+Qx+Q_edct) */
    2480             :             L_tmp = Msub_32_16( L_tmp, wk2i, x0i );                          /*Q(15+Qx+Q_edct) */
    2481             :             a[j2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2482             :             move16();
    2483             : 
    2484             :             L_tmp = Mult_32_16( wk2r, x0i );                                     /*Q(15+Qx+Q_edct) */
    2485             :             L_tmp = Madd_32_16( L_tmp, wk2i, x0r );                              /*Q(15+Qx+Q_edct) */
    2486             :             a[j2 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2487             :             move16();
    2488             : 
    2489             :             x0r = sub_o( x1r, x3i, &Overflow );
    2490             :             x0i = add_o( x1i, x3r, &Overflow );
    2491             : 
    2492             :             L_tmp = Mult_32_16( wk1r, x0r );                                 /*Q(15+Qx+Q_edct) */
    2493             :             L_tmp = Msub_32_16( L_tmp, wk1i, x0i );                          /*Q(15+Qx+Q_edct) */
    2494             :             a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2495             :             move16();
    2496             : 
    2497             :             L_tmp = Mult_32_16( wk1r, x0i );                                     /*Q(15+Qx+Q_edct) */
    2498             :             L_tmp = Madd_32_16( L_tmp, wk1i, x0r );                              /*Q(15+Qx+Q_edct) */
    2499             :             a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2500             :             move16();
    2501             : 
    2502             :             L_x0r = L_add( (Word32) x1r, (Word32) x3i );
    2503             :             L_x0i = L_sub( (Word32) x1i, (Word32) x3r );
    2504             :             x0r = extract_l( L_x0r );
    2505             :             x0i = extract_l( L_x0i );
    2506             :             L_tmp = Mult_32_16( wk3r, x0r );                                 /*Q(15+Qx+Q_edct) */
    2507             :             L_tmp = Msub_32_16( L_tmp, wk3i, x0i );                          /*Q(15+Qx+Q_edct) */
    2508             :             a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2509             :             move16();
    2510             : 
    2511             :             L_tmp = Mult_32_16( wk3r, x0i );                                     /*Q(15+Qx+Q_edct) */
    2512             :             L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                              /*Q(15+Qx+Q_edct) */
    2513             :             a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2514             :             move16();
    2515             : #endif
    2516             :         }
    2517             : 
    2518      456837 :         wk1r = w[k2 + 2];
    2519      456837 :         move32();
    2520      456837 :         wk1i = w[k2 + 3];
    2521      456837 :         move32();
    2522             : #ifdef ISSUE_1836_replace_overflow_libcom
    2523      456837 :         L_tmp = L_shl_sat( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
    2524      456837 :         wk3r = L_sub_sat( wk1r, L_shl_sat( L_tmp, 1 ) );  /*Q30  */
    2525             : 
    2526      456837 :         L_tmp = L_shl_sat( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
    2527      456837 :         wk3i = L_sub_sat( L_shl_sat( L_tmp, 1 ), wk1i );  /*Q30 */
    2528      456837 :         tmp2 = add( l, add( k, m ) );
    2529             : #else
    2530             :         L_tmp = L_shl_o( Mult_32_32( wk2r, wk1i ), 1, &Overflow );         /*Q29 */
    2531             :         wk3r = L_sub_o( wk1r, L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q30  */
    2532             : 
    2533             :         L_tmp = L_shl_o( Mult_32_32( wk2r, wk1r ), 1, &Overflow );         /*Q29 */
    2534             :         wk3i = L_sub_o( L_shl_o( L_tmp, 1, &Overflow ), wk1i, &Overflow ); /*Q30 */
    2535             :         tmp2 = add( l, add( k, m ) );
    2536             : #endif
    2537     2284185 :         FOR( j = add( k, m ); j < tmp2; j += 2 )
    2538             :         {
    2539             : #ifdef ISSUE_1836_replace_overflow_libcom
    2540     1827348 :             j1 = add_sat( j, l );
    2541     1827348 :             j2 = add_sat( j1, l );
    2542     1827348 :             j3 = add_sat( j2, l );
    2543     1827348 :             x0r = add_sat( a[j], a[j1] );
    2544     1827348 :             x0i = add_sat( a[j + 1], a[j1 + 1] );
    2545     1827348 :             x1r = sub_sat( a[j], a[j1] );
    2546     1827348 :             x1i = sub_sat( a[j + 1], a[j1 + 1] );
    2547     1827348 :             x2r = add_sat( a[j2], a[j3] );
    2548     1827348 :             x2i = add_sat( a[j2 + 1], a[j3 + 1] );
    2549     1827348 :             x3r = sub_sat( a[j2], a[j3] );
    2550     1827348 :             x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
    2551     1827348 :             a[j] = add_sat( x0r, x2r );
    2552     1827348 :             move16();
    2553     1827348 :             a[j + 1] = add_sat( x0i, x2i );
    2554     1827348 :             move16();
    2555             : 
    2556     1827348 :             x0r = sub_sat( x0r, x2r );
    2557     1827348 :             x0i = sub_sat( x0i, x2i );
    2558             : 
    2559     1827348 :             tmp = negate( x0r );
    2560     1827348 :             L_tmp = Mult_32_16( wk2i, tmp );               /*Q(15+Qx+Q_edct) */
    2561     1827348 :             L_tmp = Msub_32_16( L_tmp, wk2r, x0i );        /*Q(15+Qx+Q_edct) */
    2562     1827348 :             a[j2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2563     1827348 :             move16();
    2564             : 
    2565     1827348 :             tmp = negate( x0i );
    2566     1827348 :             L_tmp = Mult_32_16( wk2i, tmp );                   /*Q(15+Qx+Q_edct) */
    2567     1827348 :             L_tmp = Madd_32_16( L_tmp, wk2r, x0r );            /*Q(15+Qx+Q_edct) */
    2568     1827348 :             a[j2 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2569     1827348 :             move16();
    2570             : 
    2571     1827348 :             x0r = sub_sat( x1r, x3i );
    2572     1827348 :             x0i = add_sat( x1i, x3r );
    2573             : 
    2574     1827348 :             L_tmp = Mult_32_16( wk1r, x0r );               /*Q(15+Qx+Q_edct) */
    2575     1827348 :             L_tmp = Msub_32_16( L_tmp, wk1i, x0i );        /*Q(15+Qx+Q_edct) */
    2576     1827348 :             a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2577     1827348 :             move16();
    2578             : 
    2579     1827348 :             L_tmp = Mult_32_16( wk1r, x0i );                   /*Q(15+Qx+Q_edct) */
    2580     1827348 :             L_tmp = Madd_32_16( L_tmp, wk1i, x0r );            /*Q(15+Qx+Q_edct) */
    2581     1827348 :             a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2582     1827348 :             move16();
    2583             : 
    2584     1827348 :             x0r = add_sat( x1r, x3i );
    2585     1827348 :             x0i = sub_sat( x1i, x3r );
    2586             : 
    2587     1827348 :             L_tmp = Mult_32_16( wk3r, x0r );               /*Q(15+Qx+Q_edct) */
    2588     1827348 :             L_tmp = Msub_32_16( L_tmp, wk3i, x0i );        /*Q(15+Qx+Q_edct) */
    2589     1827348 :             a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2590     1827348 :             move16();
    2591             : 
    2592     1827348 :             L_tmp = Mult_32_16( wk3r, x0i );                   /*Q(15+Qx+Q_edct) */
    2593     1827348 :             L_tmp = Madd_32_16( L_tmp, wk3i, x0r );            /*Q(15+Qx+Q_edct) */
    2594     1827348 :             a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
    2595     1827348 :             move16();
    2596             : #else
    2597             :             j1 = add_o( j, l, &Overflow );
    2598             :             j2 = add_o( j1, l, &Overflow );
    2599             :             j3 = add_o( j2, l, &Overflow );
    2600             :             x0r = add_o( a[j], a[j1], &Overflow );
    2601             :             x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
    2602             :             x1r = sub_o( a[j], a[j1], &Overflow );
    2603             :             x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
    2604             :             x2r = add_o( a[j2], a[j3], &Overflow );
    2605             :             x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
    2606             :             x3r = sub_o( a[j2], a[j3], &Overflow );
    2607             :             x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
    2608             :             a[j] = add_o( x0r, x2r, &Overflow );
    2609             :             move16();
    2610             :             a[j + 1] = add_o( x0i, x2i, &Overflow );
    2611             :             move16();
    2612             : 
    2613             :             x0r = sub_o( x0r, x2r, &Overflow );
    2614             :             x0i = sub_o( x0i, x2i, &Overflow );
    2615             : 
    2616             :             tmp = negate( x0r );
    2617             :             L_tmp = Mult_32_16( wk2i, tmp );                                 /*Q(15+Qx+Q_edct) */
    2618             :             L_tmp = Msub_32_16( L_tmp, wk2r, x0i );                          /*Q(15+Qx+Q_edct) */
    2619             :             a[j2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2620             :             move16();
    2621             : 
    2622             :             tmp = negate( x0i );
    2623             :             L_tmp = Mult_32_16( wk2i, tmp );                                     /*Q(15+Qx+Q_edct) */
    2624             :             L_tmp = Madd_32_16( L_tmp, wk2r, x0r );                              /*Q(15+Qx+Q_edct) */
    2625             :             a[j2 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2626             :             move16();
    2627             : 
    2628             :             x0r = sub_o( x1r, x3i, &Overflow );
    2629             :             x0i = add_o( x1i, x3r, &Overflow );
    2630             : 
    2631             :             L_tmp = Mult_32_16( wk1r, x0r );                                 /*Q(15+Qx+Q_edct) */
    2632             :             L_tmp = Msub_32_16( L_tmp, wk1i, x0i );                          /*Q(15+Qx+Q_edct) */
    2633             :             a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2634             :             move16();
    2635             : 
    2636             :             L_tmp = Mult_32_16( wk1r, x0i );                                     /*Q(15+Qx+Q_edct) */
    2637             :             L_tmp = Madd_32_16( L_tmp, wk1i, x0r );                              /*Q(15+Qx+Q_edct) */
    2638             :             a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2639             :             move16();
    2640             : 
    2641             :             x0r = add_o( x1r, x3i, &Overflow );
    2642             :             x0i = sub_o( x1i, x3r, &Overflow );
    2643             : 
    2644             :             L_tmp = Mult_32_16( wk3r, x0r );                                 /*Q(15+Qx+Q_edct) */
    2645             :             L_tmp = Msub_32_16( L_tmp, wk3i, x0i );                          /*Q(15+Qx+Q_edct) */
    2646             :             a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2647             :             move16();
    2648             : 
    2649             :             L_tmp = Mult_32_16( wk3r, x0i );                                     /*Q(15+Qx+Q_edct) */
    2650             :             L_tmp = Madd_32_16( L_tmp, wk3i, x0r );                              /*Q(15+Qx+Q_edct) */
    2651             :             a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
    2652             :             move16();
    2653             : #endif
    2654             :         }
    2655             :     }
    2656             : 
    2657      724188 :     return;
    2658             : }
    2659             : 
    2660          93 : void fft3_fx( const Word16 X[] /*Qx*/, Word16 Y[] /*Qx*/, const Word16 n )
    2661             : {
    2662             :     Word16 Z[PH_ECU_SPEC_SIZE];
    2663             :     Word16 *Z0, *Z1, *Z2;
    2664             :     Word16 *z0, *z1, *z2;
    2665             :     const Word16 *x;
    2666          93 :     const Word16 *t_sin = sincos_t_rad3_fx; // Q15
    2667             :     Word16 m, mMinus1, step;
    2668             :     Word16 i, l;
    2669             :     Word16 c1_ind, s1_ind, c2_ind, s2_ind;
    2670             :     Word16 c1_step, s1_step, c2_step, s2_step;
    2671             :     Word16 *RY, *IY, *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
    2672             :     Word32 acc;
    2673             :     Word16 mBy2, orderMinus1;
    2674             :     const Word16 *pPhaseTbl;
    2675             : 
    2676             :     /* Determine the order of the transform, the length of decimated  */
    2677             :     /* transforms m, and the step for the sine and cosine tables.     */
    2678          93 :     SWITCH( n )
    2679             :     {
    2680          31 :         case 1536:
    2681          31 :             orderMinus1 = 9 - 1;
    2682          31 :             move16();
    2683          31 :             m = 512;
    2684          31 :             move16();
    2685          31 :             step = 1;
    2686          31 :             move16();
    2687          31 :             pPhaseTbl = FFT_W256;
    2688          31 :             BREAK;
    2689          62 :         case 384:
    2690          62 :             orderMinus1 = 7 - 1;
    2691          62 :             move16();
    2692          62 :             m = 128;
    2693          62 :             move16();
    2694          62 :             step = 4;
    2695          62 :             move16();
    2696          62 :             pPhaseTbl = FFT_W64;
    2697          62 :             BREAK;
    2698           0 :         default:
    2699           0 :             orderMinus1 = 7 - 1;
    2700           0 :             move16();
    2701           0 :             m = 128;
    2702           0 :             move16();
    2703           0 :             step = 4;
    2704           0 :             move16();
    2705           0 :             pPhaseTbl = FFT_W64;
    2706           0 :             BREAK;
    2707             :     }
    2708             : 
    2709             :     /* Compose decimated sequences X[3i], X[3i+1],X[3i+2] */
    2710             :     /* compute their FFT of length m.                                 */
    2711          93 :     Z0 = &Z[0];
    2712          93 :     z0 = &Z0[0];
    2713          93 :     Z1 = &Z0[m];
    2714          93 :     z1 = &Z1[0]; /* Z1 = &Z[ m];     */
    2715          93 :     Z2 = &Z1[m];
    2716          93 :     z2 = &Z2[0]; /* Z2 = &Z[2m];     */
    2717          93 :     x = &X[0];   // Qx
    2718       23901 :     FOR( i = 0; i < m; i++ )
    2719             :     {
    2720       23808 :         *z0++ = *x++; /* Z0[i] = X[3i]; Qx   */
    2721       23808 :         move16();
    2722       23808 :         *z1++ = *x++; /* Z1[i] = X[3i+1]; Qx */
    2723       23808 :         move16();
    2724       23808 :         *z2++ = *x++; /* Z2[i] = X[3i+2]; Qx */
    2725       23808 :         move16();
    2726             :     }
    2727          93 :     mBy2 = shr( m, 1 );
    2728          93 :     r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z0, Z0, 1 );
    2729          93 :     r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z1, Z1, 1 );
    2730          93 :     r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z2, Z2, 1 );
    2731             : 
    2732             :     /* Butterflies of order 3. */
    2733             :     /* pointer initialization */
    2734          93 :     mMinus1 = sub( m, 1 );
    2735          93 :     RY = &Y[0];   // Qx
    2736          93 :     IY = &Y[n];   // Qx
    2737          93 :     IY--;         /* Decrement the address counter.*/
    2738          93 :     RZ0 = &Z0[0]; // Qx
    2739          93 :     IZ0 = &Z0[mMinus1];
    2740          93 :     RZ1 = &Z1[0];       // Qx
    2741          93 :     IZ1 = &Z1[mMinus1]; // Qx
    2742          93 :     RZ2 = &Z2[0];       // Qx
    2743          93 :     IZ2 = &Z2[mMinus1]; // Qx
    2744             : 
    2745          93 :     c1_step = negate( step );
    2746          93 :     s1_step = step;
    2747          93 :     move16();
    2748          93 :     c2_step = shl( c1_step, 1 );
    2749          93 :     s2_step = shl( s1_step, 1 );
    2750          93 :     c1_ind = add( T_SIN_PI_2, c1_step );
    2751          93 :     s1_ind = s1_step;
    2752          93 :     move16();
    2753          93 :     c2_ind = add( T_SIN_PI_2, c2_step );
    2754          93 :     s2_ind = s2_step;
    2755          93 :     move16();
    2756             : 
    2757             :     /* special case: i = 0 */
    2758          93 :     acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ );           // Q15 + Qx
    2759          93 :     acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ );       // Q15 + Qx
    2760          93 :     *RY++ = mac_r_sat( acc, *RZ2++, 0x4000 /*1.Q14*/ ); // Qx
    2761          93 :     move16();
    2762             : 
    2763             :     /* first 3/12-- from 1 to (3*m/8)-1 */
    2764          93 :     l = sub( shr( n, 3 ), 1 ); /* (3*m/8) - 1 = (n/8) - 1 */
    2765        8928 :     FOR( i = 0; i < l; i++ )
    2766             :     {
    2767        8835 :         acc = L_shl( *RZ0++, 15 );                /* Align with the following non-fractional mode so as to gain 1 more bit headroom. Q15 + Qx*/
    2768        8835 :         acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
    2769        8835 :         acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
    2770        8835 :         acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
    2771        8835 :         acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
    2772        8835 :         *RY++ = round_fx( acc );                  /* bit growth = 1 (compensated by non-fractional mode MAC). Qx - 1*/
    2773        8835 :         move16();
    2774             : 
    2775        8835 :         acc = L_shl( *IZ0--, 15 );                  // Q15 + Qx
    2776        8835 :         acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
    2777        8835 :         acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
    2778        8835 :         acc = L_msu0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
    2779        8835 :         acc = L_mac0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
    2780        8835 :         *IY-- = round_fx( acc );                    // Qx - 1
    2781        8835 :         move16();
    2782             : 
    2783        8835 :         c1_ind = add( c1_ind, c1_step );
    2784        8835 :         s1_ind = add( s1_ind, s1_step );
    2785        8835 :         c2_ind = add( c2_ind, c2_step );
    2786        8835 :         s2_ind = add( s2_ind, s2_step );
    2787             :     }
    2788             : 
    2789             :     /* next 1/12-- from (3*m/8) to (4*m/8)-1 */
    2790          93 :     l = shr( m, 3 ); /* (4*m/8) - (3*m/8) = m/8 */
    2791        3069 :     FOR( i = 0; i < l; i++ )
    2792             :     {
    2793        2976 :         acc = L_shl( *RZ0++, 15 );                // Q15 + Qx
    2794        2976 :         acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
    2795        2976 :         acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
    2796        2976 :         acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
    2797        2976 :         acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
    2798        2976 :         *RY++ = round_fx( acc );                  // Qx - 1
    2799        2976 :         move16();
    2800             : 
    2801        2976 :         acc = L_shl( *IZ0--, 15 );                  // Q15 + Qx
    2802        2976 :         acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
    2803        2976 :         acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
    2804        2976 :         acc = L_msu0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
    2805        2976 :         acc = L_msu0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
    2806        2976 :         *IY-- = round_fx( acc );                    // Qx - 1
    2807        2976 :         move16();
    2808             : 
    2809        2976 :         c1_ind = add( c1_ind, c1_step );
    2810        2976 :         s1_ind = add( s1_ind, s1_step );
    2811        2976 :         c2_ind = sub( c2_ind, c2_step );
    2812        2976 :         s2_ind = sub( s2_ind, s2_step );
    2813             :     }
    2814             : 
    2815             :     /* special case: i = m/2 i.e. 1/3 */
    2816          93 :     acc = L_shl( *RZ0--, 15 );                // Q15 + Qx
    2817          93 :     acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
    2818          93 :     acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
    2819          93 :     *RY++ = round_fx( acc );                  // Qx - 1
    2820          93 :     move16();
    2821             : 
    2822          93 :     acc = 0;
    2823          93 :     move32();
    2824          93 :     acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
    2825          93 :     acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
    2826          93 :     *IY-- = round_fx( acc );                    // Qx - 1
    2827          93 :     move16();
    2828          93 :     IZ0++;
    2829          93 :     IZ1++;
    2830          93 :     IZ2++;
    2831             : 
    2832          93 :     c1_ind = add( c1_ind, c1_step );
    2833          93 :     s1_ind = add( s1_ind, s1_step );
    2834          93 :     c2_ind = sub( c2_ind, c2_step );
    2835          93 :     s2_ind = sub( s2_ind, s2_step );
    2836             : 
    2837             :     /* next  2/12-- from ((m/2)+1) to (6*m/8)-1 */
    2838          93 :     l = sub( shr( m, 2 ), 1 ); /* (6*m/8) - ((m/2)+1) = m/4 - 1 */
    2839        5952 :     FOR( i = 0; i < l; i++ )
    2840             :     {
    2841        5859 :         acc = L_shl( *RZ0--, 15 );                // Q15 + Qx
    2842        5859 :         acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
    2843        5859 :         acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
    2844        5859 :         acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
    2845        5859 :         acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
    2846        5859 :         *RY++ = round_fx( acc );                  // Qx - 1
    2847        5859 :         move16();
    2848             : 
    2849        5859 :         acc = L_mult0( *IZ0++, -32768 );            // Q15 + Qx
    2850        5859 :         acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
    2851        5859 :         acc = L_msu0( acc, *IZ1++, t_sin[c1_ind] ); // Q15 + Qx
    2852        5859 :         acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
    2853        5859 :         acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Q15 + Qx
    2854        5859 :         *IY-- = round_fx( acc );                    // Qx - 1
    2855        5859 :         move16();
    2856             : 
    2857        5859 :         c1_ind = add( c1_ind, c1_step );
    2858        5859 :         s1_ind = add( s1_ind, s1_step );
    2859        5859 :         c2_ind = sub( c2_ind, c2_step );
    2860        5859 :         s2_ind = sub( s2_ind, s2_step );
    2861             :     }
    2862             : 
    2863             :     /*--------------------------half--------------------------// */
    2864             :     /* next 2/12-- from (6*m/8) to (8*m/8) - 1 */
    2865          93 :     l = shr( m, 2 );
    2866        6045 :     FOR( i = 0; i < l; i++ )
    2867             :     {
    2868        5952 :         acc = L_shl( *RZ0--, 15 );                // Q15 + Qx
    2869        5952 :         acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
    2870        5952 :         acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
    2871        5952 :         acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
    2872        5952 :         acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
    2873        5952 :         *RY++ = round_fx( acc );                  // Qx - 1
    2874        5952 :         move16();
    2875             : 
    2876        5952 :         acc = L_mult0( *IZ0++, -32768 );            // Q15 + Qx
    2877        5952 :         acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
    2878        5952 :         acc = L_mac0( acc, *IZ1++, t_sin[c1_ind] ); // Q15 + Qx
    2879        5952 :         acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
    2880        5952 :         acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Q15 + Qx
    2881        5952 :         *IY-- = round_fx( acc );                    // Qx - 1
    2882        5952 :         move16();
    2883             : 
    2884        5952 :         c1_ind = sub( c1_ind, c1_step );
    2885        5952 :         s1_ind = sub( s1_ind, s1_step );
    2886        5952 :         c2_ind = add( c2_ind, c2_step );
    2887        5952 :         s2_ind = add( s2_ind, s2_step );
    2888             :     }
    2889             : 
    2890             :     /* special case: i = m, i.e 2/3 */
    2891          93 :     acc = L_shl( *RZ0++, 15 );                // Q15 + Qx
    2892          93 :     acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
    2893          93 :     acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
    2894          93 :     *RY++ = round_fx( acc );                  // Qx - 1
    2895          93 :     move16();
    2896             : 
    2897          93 :     acc = L_deposit_l( 0 );
    2898          93 :     acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
    2899          93 :     acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
    2900          93 :     *IY-- = round_fx( acc );                    // Qx - 1
    2901          93 :     move16();
    2902          93 :     IZ0--; /* Just decrement the address counter */
    2903          93 :     IZ1--;
    2904          93 :     IZ2--;
    2905             : 
    2906          93 :     c1_ind = sub( c1_ind, c1_step );
    2907          93 :     s1_ind = sub( s1_ind, s1_step );
    2908          93 :     c2_ind = add( c2_ind, c2_step );
    2909          93 :     s2_ind = add( s2_ind, s2_step );
    2910             : 
    2911             :     /* next 1/12-- from (m + 1) to (9*m/8) - 1 */
    2912          93 :     l = sub( shr( m, 3 ), 1 ); /* (9*m/8) - (m +1) = m/8 - 1 */
    2913        2976 :     FOR( i = 0; i < l; i++ )
    2914             :     {
    2915        2883 :         acc = L_shl( *RZ0++, 15 );                // Q15 + Qx
    2916        2883 :         acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
    2917        2883 :         acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
    2918        2883 :         acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
    2919        2883 :         acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
    2920        2883 :         *RY++ = round_fx( acc );                  // Qx - 1
    2921        2883 :         move16();
    2922             : 
    2923        2883 :         acc = L_shl( *IZ0--, 15 );                  // Q15 + Qx
    2924        2883 :         acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
    2925        2883 :         acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
    2926        2883 :         acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
    2927        2883 :         acc = L_msu0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
    2928        2883 :         *IY-- = round_fx( acc );                    // Qx - 1
    2929        2883 :         move16();
    2930             : 
    2931        2883 :         c1_ind = sub( c1_ind, c1_step );
    2932        2883 :         s1_ind = sub( s1_ind, s1_step );
    2933        2883 :         c2_ind = add( c2_ind, c2_step );
    2934        2883 :         s2_ind = add( s2_ind, s2_step );
    2935             :     }
    2936             : 
    2937             :     /* last 3/12-- from (9*m/8) to (12*m/8) - 1 */
    2938          93 :     l = shr( n, 3 ); /* (12*m/8) - (9*m/8) = 3*m/8 = n/8 */
    2939        9021 :     FOR( i = 0; i < l; i++ )
    2940             :     {
    2941        8928 :         acc = L_shl( *RZ0++, 15 );                // Q15 + Qx
    2942        8928 :         acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
    2943        8928 :         acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
    2944        8928 :         acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
    2945        8928 :         acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
    2946        8928 :         *RY++ = round_fx( acc );                  // Qx - 1
    2947        8928 :         move16();
    2948             : 
    2949        8928 :         acc = L_shl( *IZ0--, 15 );                  // Q15 + Qx
    2950        8928 :         acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
    2951        8928 :         acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
    2952        8928 :         acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
    2953        8928 :         acc = L_mac0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
    2954        8928 :         *IY-- = round_fx( acc );                    // Qx - 1
    2955        8928 :         move16();
    2956             : 
    2957        8928 :         c1_ind = sub( c1_ind, c1_step );
    2958        8928 :         s1_ind = sub( s1_ind, s1_step );
    2959        8928 :         c2_ind = sub( c2_ind, c2_step );
    2960        8928 :         s2_ind = sub( s2_ind, s2_step );
    2961             :     }
    2962             : 
    2963             :     /* special case: i = 3*m/2 */
    2964          93 :     acc = L_shl( *RZ0, 15 );                  // Q15 + Qx
    2965          93 :     acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
    2966          93 :     acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
    2967          93 :     *RY = round_fx( acc );                    // Qx - 1
    2968          93 :     move16();
    2969             : 
    2970          93 :     return;
    2971             : }
    2972             : 
    2973             : 
    2974         109 : void ifft3_fx( const Word16 Z[] /*Qx*/, Word16 X[] /*Qx*/, const Word16 n )
    2975             : {
    2976             :     Word16 Y[PH_ECU_SPEC_SIZE];
    2977         109 :     const Word16 *t_sin = sincos_t_rad3_fx; // Q15
    2978             :     Word16 m, mMinus1, step, step2;
    2979             :     Word16 i, l;
    2980             :     Word16 c0_ind, s0_ind, c1_ind, s1_ind, c2_ind, s2_ind;
    2981             :     const Word16 *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
    2982             :     const Word16 *RZ00, *IZ00, *RZ10, *IZ10, *RZ20, *IZ20;
    2983             :     Word16 *RY0, *IY0, *RY1, *IY1, *RY2, *IY2, *y0, *y1, *y2, *pX;
    2984             :     Word32 acc;
    2985             :     Word16 mBy2, orderMinus1, nMinusMBy2;
    2986             :     const Word16 *pPhaseTbl;
    2987             : 
    2988             :     /* Determine the order of the transform, the length of decimated  */
    2989             :     /* transforms m, and the step for the sine and cosine tables.     */
    2990         109 :     SWITCH( n )
    2991             :     {
    2992         109 :         case 1536:
    2993         109 :             orderMinus1 = 9 - 1;
    2994         109 :             move16();
    2995         109 :             m = 512;
    2996         109 :             move16();
    2997         109 :             step = 1;
    2998         109 :             move16();
    2999         109 :             pPhaseTbl = FFT_W256;
    3000         109 :             BREAK;
    3001           0 :         case 384:
    3002           0 :             orderMinus1 = 7 - 1;
    3003           0 :             move16();
    3004           0 :             m = 128;
    3005           0 :             move16();
    3006           0 :             step = 4;
    3007           0 :             move16();
    3008           0 :             pPhaseTbl = FFT_W64;
    3009           0 :             BREAK;
    3010           0 :         default:
    3011           0 :             orderMinus1 = 7 - 1;
    3012           0 :             move16();
    3013           0 :             m = 128;
    3014           0 :             move16();
    3015           0 :             step = 4;
    3016           0 :             move16();
    3017           0 :             pPhaseTbl = FFT_W64;
    3018           0 :             BREAK;
    3019             :     }
    3020             : 
    3021         109 :     nMinusMBy2 = shr( sub( n, m ), 1 );
    3022         109 :     mMinus1 = sub( m, 1 );
    3023             :     /* pointer initialization */
    3024         109 :     RY0 = &Y[0];         // Qx
    3025         109 :     IY0 = &Y[m];         // Qx
    3026         109 :     RY1 = &RY0[m];       // Qx
    3027         109 :     IY1 = &RY1[mMinus1]; // Qx
    3028         109 :     RY2 = &RY1[m];       // Qx
    3029         109 :     IY2 = &RY2[mMinus1]; // Qx
    3030             : 
    3031         109 :     RZ00 = &Z[0];              /* The zero positions of the pointers Qx*/
    3032         109 :     RZ10 = &RZ00[m];           // Qx
    3033         109 :     RZ20 = &RZ00[nMinusMBy2];  // Qx
    3034         109 :     IZ00 = &Z[n];              // Qx
    3035         109 :     IZ10 = &IZ00[-m];          // Qx
    3036         109 :     IZ20 = &IZ00[-nMinusMBy2]; // Qx
    3037             : 
    3038         109 :     RZ0 = RZ00; /* Reset the pointers to zero positions.  */
    3039         109 :     RZ1 = RZ10;
    3040         109 :     RZ2 = RZ20;
    3041         109 :     IZ0 = IZ00;
    3042         109 :     IZ1 = IZ10;
    3043         109 :     IZ2 = IZ20;
    3044             : 
    3045             :     /* Inverse butterflies of order 3. */
    3046             : 
    3047             :     /* Construction of Y0 */
    3048         109 :     acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ );        // Qx + Q15
    3049         109 :     acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ );    // Qx + Q15
    3050         109 :     *RY0++ = mac_r( acc, *RZ2--, 0x4000 /*1.Q14*/ ); // Qx
    3051         109 :     move16();
    3052         109 :     IZ0--;
    3053         109 :     IZ1--;
    3054         109 :     IZ2++;
    3055         109 :     IY0--;
    3056             : 
    3057         109 :     l = sub( shr( m, 1 ), 1 );
    3058       27904 :     FOR( i = 0; i < l; i++ )
    3059             :     {
    3060       27795 :         acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ );        // Qx + Q15
    3061       27795 :         acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ );    // Qx + Q15
    3062       27795 :         *RY0++ = mac_r( acc, *RZ2--, 0x4000 /*1.Q14*/ ); // Qx
    3063       27795 :         move16();
    3064             : 
    3065       27795 :         acc = L_mult( *IZ0--, 0x4000 /*1.Q14*/ );        // Qx + Q15
    3066       27795 :         acc = L_mac( acc, *IZ1--, 0x4000 /*1.Q14*/ );    // Qx + Q15
    3067       27795 :         *IY0-- = msu_r( acc, *IZ2++, 0x4000 /*1.Q14*/ ); // Qx
    3068       27795 :         move16();
    3069             :     }
    3070             : 
    3071             :     /* m/2 */
    3072         109 :     acc = L_mult( *RZ0, 0x4000 /*1.Q14*/ );        // Qx + Q15
    3073         109 :     acc = L_mac( acc, *RZ1, 0x4000 /*1.Q14*/ );    // Qx + Q15
    3074         109 :     *RY0++ = mac_r( acc, *RZ2, 0x4000 /*1.Q14*/ ); // Qx
    3075         109 :     move16();
    3076             : 
    3077             : 
    3078             :     /* Construction of Y1 */
    3079         109 :     c0_ind = T_SIN_PI_2;
    3080         109 :     s0_ind = 0;
    3081         109 :     c1_ind = T_SIN_PI_2 * 1 / 3;
    3082         109 :     s1_ind = T_SIN_PI_2 * 2 / 3;
    3083         109 :     c2_ind = T_SIN_PI_2 * 1 / 3;
    3084         109 :     s2_ind = T_SIN_PI_2 * 2 / 3;
    3085             : 
    3086         109 :     RZ0 = RZ00; /* Reset pointers to zero positions. */
    3087         109 :     RZ1 = RZ10;
    3088         109 :     RZ2 = RZ20;
    3089         109 :     IZ0 = IZ00;
    3090         109 :     IZ1 = IZ10;
    3091         109 :     IZ2 = IZ20;
    3092         109 :     acc = L_mult0( *RZ0++, t_sin[c0_ind] );     // Qx + Q15
    3093         109 :     acc = L_msu0( acc, *RZ1++, t_sin[c1_ind] ); // Qx + Q15
    3094         109 :     acc = L_msu0( acc, *RZ2--, t_sin[c2_ind] ); // Qx + Q15
    3095         109 :     IZ0--;
    3096         109 :     acc = L_msu0( acc, *IZ1--, t_sin[s1_ind] ); // Qx + Q15
    3097         109 :     acc = L_msu0( acc, *IZ2++, t_sin[s2_ind] ); // Qx + Q15
    3098         109 :     *RY1++ = round_fx( acc );                   // Qx - 1
    3099         109 :     move16();
    3100             : 
    3101         109 :     c0_ind = sub( c0_ind, step );
    3102         109 :     s0_ind = add( s0_ind, step );
    3103         109 :     c1_ind = add( c1_ind, step );
    3104         109 :     s1_ind = sub( s1_ind, step );
    3105         109 :     c2_ind = sub( c2_ind, step );
    3106         109 :     s2_ind = add( s2_ind, step );
    3107             : 
    3108             :     /* From 1 to (m/4) - 1. */
    3109         109 :     l = sub( shr( m, 2 ), 1 );
    3110       13952 :     FOR( i = 0; i < l; i++ )
    3111             :     {
    3112       13843 :         acc = L_mult0( *RZ0, t_sin[c0_ind] );     // Qx + Q15
    3113       13843 :         acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
    3114       13843 :         acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
    3115       13843 :         acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
    3116       13843 :         acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
    3117       13843 :         acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
    3118       13843 :         *RY1++ = round_fx( acc );                 // Qx - 1
    3119       13843 :         move16();
    3120             : 
    3121       13843 :         acc = L_mult0( *IZ0--, t_sin[c0_ind] );     // Qx + Q15
    3122       13843 :         acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
    3123       13843 :         acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
    3124       13843 :         acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
    3125       13843 :         acc = L_mac0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
    3126       13843 :         acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
    3127       13843 :         *IY1-- = round_fx( acc );                   // Qx - 1
    3128       13843 :         move16();
    3129             : 
    3130       13843 :         c0_ind = sub( c0_ind, step );
    3131       13843 :         s0_ind = add( s0_ind, step );
    3132       13843 :         c1_ind = add( c1_ind, step );
    3133       13843 :         s1_ind = sub( s1_ind, step );
    3134       13843 :         c2_ind = sub( c2_ind, step );
    3135       13843 :         s2_ind = add( s2_ind, step );
    3136             :     }
    3137             : 
    3138             :     /* From m/4 to m/2 -1. */
    3139         109 :     l = shr( m, 2 ); /* m/2 - m/4 = m/4 */
    3140       14061 :     FOR( i = 0; i < l; i++ )
    3141             :     {
    3142       13952 :         acc = L_mult0( *RZ0, t_sin[c0_ind] );     // Qx + Q15
    3143       13952 :         acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
    3144       13952 :         acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
    3145       13952 :         acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
    3146       13952 :         acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
    3147       13952 :         acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
    3148       13952 :         *RY1++ = round_fx( acc );                 // Qx - 1
    3149       13952 :         move16();
    3150             : 
    3151       13952 :         acc = L_mult0( *IZ0--, t_sin[c0_ind] );     // Qx + Q15
    3152       13952 :         acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
    3153       13952 :         acc = L_msu0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
    3154       13952 :         acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
    3155       13952 :         acc = L_mac0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
    3156       13952 :         acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
    3157       13952 :         *IY1-- = round_fx( acc );                   // Qx - 1
    3158       13952 :         move16();
    3159             : 
    3160       13952 :         c0_ind = sub( c0_ind, step );
    3161       13952 :         s0_ind = add( s0_ind, step );
    3162       13952 :         c1_ind = add( c1_ind, step );
    3163       13952 :         s1_ind = sub( s1_ind, step );
    3164       13952 :         c2_ind = add( c2_ind, step );
    3165       13952 :         s2_ind = sub( s2_ind, step );
    3166             :     }
    3167             : 
    3168             :     /* m/2 */
    3169         109 :     acc = L_mult0( *RZ0, t_sin[c0_ind] );     // Qx + Q15
    3170         109 :     acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
    3171         109 :     acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
    3172         109 :     acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
    3173         109 :     acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
    3174         109 :     acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
    3175         109 :     *RY1++ = round_fx( acc );                 // Qx - 1
    3176         109 :     move16();
    3177             : 
    3178             :     /* Construction of Y2 */
    3179         109 :     c0_ind = T_SIN_PI_2;
    3180         109 :     s0_ind = 0;
    3181         109 :     c1_ind = T_SIN_PI_2 * 1 / 3;
    3182         109 :     s1_ind = T_SIN_PI_2 * 2 / 3;
    3183         109 :     c2_ind = T_SIN_PI_2 * 1 / 3;
    3184         109 :     s2_ind = T_SIN_PI_2 * 2 / 3;
    3185         109 :     step2 = shl( step, 1 );
    3186             : 
    3187         109 :     RZ0 = RZ00; /* Reset pointers to zero positions. */
    3188         109 :     RZ1 = RZ10;
    3189         109 :     RZ2 = RZ20;
    3190         109 :     IZ0 = IZ00;
    3191         109 :     IZ1 = IZ10;
    3192         109 :     IZ2 = IZ20;
    3193         109 :     acc = L_mult0( *RZ0++, t_sin[c0_ind] );     // Qx + Q15
    3194         109 :     acc = L_msu0( acc, *RZ1++, t_sin[c1_ind] ); // Qx + Q15
    3195         109 :     acc = L_msu0( acc, *RZ2--, t_sin[c2_ind] ); // Qx + Q15
    3196         109 :     IZ0--;
    3197         109 :     acc = L_mac0( acc, *IZ1--, t_sin[s1_ind] ); // Qx + Q15
    3198         109 :     acc = L_mac0( acc, *IZ2++, t_sin[s2_ind] ); // Qx + Q15
    3199         109 :     *RY2++ = round_fx( acc );                   // Qx - 1
    3200         109 :     move16();
    3201             : 
    3202         109 :     c0_ind = sub( c0_ind, step2 );
    3203         109 :     s0_ind = add( s0_ind, step2 );
    3204         109 :     c1_ind = sub( c1_ind, step2 );
    3205         109 :     s1_ind = add( s1_ind, step2 );
    3206         109 :     c2_ind = add( c2_ind, step2 );
    3207         109 :     s2_ind = sub( s2_ind, step2 );
    3208             : 
    3209             :     /* From 1 to (m/8) - 1. */
    3210         109 :     l = sub( shr( m, 3 ), 1 ); /* m/8 - 1. */
    3211        6976 :     FOR( i = 0; i < l; i++ )
    3212             :     {
    3213        6867 :         acc = L_mult0( *RZ0, t_sin[c0_ind] );     // Qx + Q15
    3214        6867 :         acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
    3215        6867 :         acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
    3216        6867 :         acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
    3217        6867 :         acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
    3218        6867 :         acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
    3219        6867 :         *RY2++ = round_fx( acc );                 // Qx - 1
    3220        6867 :         move16();
    3221             : 
    3222        6867 :         acc = L_mult0( *IZ0--, t_sin[c0_ind] );     // Qx + Q15
    3223        6867 :         acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
    3224        6867 :         acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
    3225        6867 :         acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
    3226        6867 :         acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
    3227        6867 :         acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
    3228        6867 :         *IY2-- = round_fx( acc );                   // Qx - 1
    3229        6867 :         move16();
    3230             : 
    3231        6867 :         c0_ind = sub( c0_ind, step2 );
    3232        6867 :         s0_ind = add( s0_ind, step2 );
    3233        6867 :         c1_ind = sub( c1_ind, step2 );
    3234        6867 :         s1_ind = add( s1_ind, step2 );
    3235        6867 :         c2_ind = add( c2_ind, step2 );
    3236        6867 :         s2_ind = sub( s2_ind, step2 );
    3237             :     }
    3238             : 
    3239             :     /* From (m/8) to (m/4) - 1. */
    3240         109 :     l = shr( m, 3 ); /* m/4 - m/8 = m/8 */
    3241        7085 :     FOR( i = 0; i < l; i++ )
    3242             :     {
    3243        6976 :         acc = L_mult0( *RZ0, t_sin[c0_ind] );     // Qx + Q15
    3244        6976 :         acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
    3245        6976 :         acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
    3246        6976 :         acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
    3247        6976 :         acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
    3248        6976 :         acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
    3249        6976 :         *RY2++ = round_fx( acc );                 // Qx - 1
    3250        6976 :         move16();
    3251             : 
    3252        6976 :         acc = L_mult0( *IZ0--, t_sin[c0_ind] );     // Qx + Q15
    3253        6976 :         acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
    3254        6976 :         acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
    3255        6976 :         acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
    3256        6976 :         acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
    3257        6976 :         acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
    3258        6976 :         *IY2-- = round_fx( acc );                   // Qx - 1
    3259        6976 :         move16();
    3260             : 
    3261        6976 :         c0_ind = sub( c0_ind, step2 );
    3262        6976 :         s0_ind = add( s0_ind, step2 );
    3263        6976 :         c1_ind = add( c1_ind, step2 );
    3264        6976 :         s1_ind = sub( s1_ind, step2 );
    3265        6976 :         c2_ind = add( c2_ind, step2 );
    3266        6976 :         s2_ind = sub( s2_ind, step2 );
    3267             :     }
    3268             : 
    3269             :     /* From m/4 to 3*m/8 - 1. */
    3270         109 :     l = shr( m, 3 ); /* 3*m/8 - m/4 = m/8 */
    3271        7085 :     FOR( i = 0; i < l; i++ )
    3272             :     {
    3273        6976 :         acc = L_mult0( *RZ0, t_sin[c0_ind] );     // Qx + Q15
    3274        6976 :         acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
    3275        6976 :         acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
    3276        6976 :         acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
    3277        6976 :         acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
    3278        6976 :         acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
    3279        6976 :         *RY2++ = round_fx( acc );                 // Qx - 1
    3280        6976 :         move16();
    3281             : 
    3282        6976 :         acc = L_mult0( *IZ0--, t_sin[c0_ind] );     // Qx + Q15
    3283        6976 :         acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
    3284        6976 :         acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
    3285        6976 :         acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
    3286        6976 :         acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
    3287        6976 :         acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
    3288        6976 :         *IY2-- = round_fx( acc );                   // Qx - 1
    3289        6976 :         move16();
    3290             : 
    3291        6976 :         c0_ind = sub( c0_ind, step2 );
    3292        6976 :         s0_ind = add( s0_ind, step2 );
    3293        6976 :         c1_ind = add( c1_ind, step2 );
    3294        6976 :         s1_ind = sub( s1_ind, step2 );
    3295        6976 :         c2_ind = sub( c2_ind, step2 );
    3296        6976 :         s2_ind = add( s2_ind, step2 );
    3297             :     }
    3298             : 
    3299             :     /* From 3*m/8 to m/2 - 1*/
    3300         109 :     l = shr( m, 3 ); /* m/2 - 3*m/8 = m/8 */
    3301        7085 :     FOR( i = 0; i < l; i++ )
    3302             :     {
    3303        6976 :         acc = L_mult0( *RZ1, t_sin[c1_ind] );     // Qx + Q15
    3304        6976 :         acc = L_msu0( acc, *RZ0, t_sin[c0_ind] ); // Qx + Q15
    3305        6976 :         acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
    3306        6976 :         acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
    3307        6976 :         acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
    3308        6976 :         acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
    3309        6976 :         *RY2++ = round_fx( acc );                 // Qx - 1
    3310        6976 :         move16();
    3311             : 
    3312        6976 :         acc = L_mult0( *IZ1--, t_sin[c1_ind] );     // Qx + Q15
    3313        6976 :         acc = L_msu0( acc, *IZ0--, t_sin[c0_ind] ); // Qx + Q15
    3314        6976 :         acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
    3315        6976 :         acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
    3316        6976 :         acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
    3317        6976 :         acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
    3318        6976 :         *IY2-- = round_fx( acc );                   // Qx - 1
    3319        6976 :         move16();
    3320             : 
    3321        6976 :         c0_ind = add( c0_ind, step2 );
    3322        6976 :         s0_ind = sub( s0_ind, step2 );
    3323        6976 :         c1_ind = add( c1_ind, step2 );
    3324        6976 :         s1_ind = sub( s1_ind, step2 );
    3325        6976 :         c2_ind = sub( c2_ind, step2 );
    3326        6976 :         s2_ind = add( s2_ind, step2 );
    3327             :     }
    3328             : 
    3329             :     /* m/2 */
    3330         109 :     acc = L_mult0( *RZ1, t_sin[c1_ind] );     // Qx + Q15
    3331         109 :     acc = L_msu0( acc, *RZ0, t_sin[c0_ind] ); // Qx + Q15
    3332         109 :     acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
    3333         109 :     acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
    3334         109 :     acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
    3335         109 :     acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
    3336         109 :     *RY2++ = round_fx( acc );                 // Qx - 1
    3337         109 :     move16();
    3338             : 
    3339             :     /* Compute the inverse FFT for all 3 blocks. */
    3340         109 :     RY0 = &Y[0]; /* Rewind the pointers. */
    3341         109 :     RY1 = &Y[m];
    3342         109 :     RY2 = &RY1[m];
    3343         109 :     mBy2 = shr( m, 1 );
    3344         109 :     r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY0, RY0, 0 ); /* inverse FFT */
    3345         109 :     r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY1, RY1, 0 ); /* inverse FFT */
    3346         109 :     r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY2, RY2, 0 ); /* inverse FFT */
    3347             : 
    3348         109 :     y0 = RY0;
    3349         109 :     y1 = RY1;
    3350         109 :     y2 = RY2;
    3351             : 
    3352             :     /* Interlacing and scaling, scale = 1/3 */
    3353         109 :     pX = X;
    3354       55917 :     FOR( i = 0; i < m; i++ )
    3355             :     {
    3356       55808 :         *pX++ = shl_sat( mult_r( *y0++, FFT3_ONE_THIRD ), 1 ); // Qx
    3357       55808 :         move16();
    3358       55808 :         *pX++ = shl_sat( mult_r( *y1++, FFT3_ONE_THIRD ), 1 ); // Qx
    3359       55808 :         move16();
    3360       55808 :         *pX++ = shl_sat( mult_r( *y2++, FFT3_ONE_THIRD ), 1 ); // Qx
    3361       55808 :         move16();
    3362             :     }
    3363             : 
    3364         109 :     return;
    3365             : }

Generated by: LCOV version 1.14