LCOV - code coverage report
Current view: top level - lib_com - fft_rel_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main @ da9cc8ead0679b4682d329fdff98cf1616159273 Lines: 470 519 90.6 %
Date: 2025-10-13 22:24:20 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*====================================================================================
       2             :     EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
       3             :   ====================================================================================*/
       4             : 
       5             : #include <stdint.h>
       6             : #include "options.h" /* Compilation switches                   */
       7             : #include "prot_fx.h" /* Function prototypes                    */
       8             : #include "rom_com.h" /* Static table prototypes                */
       9             : #include "stl.h"
      10             : #include "stdint.h"
      11             : #include "wmc_auto.h"
      12             : 
      13             : /*---------------------------------------------------------------------*
      14             :  * Local constants
      15             :  *---------------------------------------------------------------------*/
      16             : 
      17             : #define N_MAX_FFT   1024
      18             : #define N_MAX_DIV2  ( N_MAX_FFT >> 1 )
      19             : #define N_MAX_DIV4  ( N_MAX_DIV2 >> 1 )
      20             : #define INV_SQR2_FX 23170
      21             : #define N_MAX_SAS   256
      22             : 
      23             : /*------------------------------------------------------------------
      24             :  *
      25             :  * This is an implementation of decimation-in-time FFT algorithm for
      26             :  * real sequences.  The techniques used here can be found in several
      27             :  * books, e.g., i) Proakis and Manolakis, "Digital Signal Processing",
      28             :  * 2nd Edition, Chapter 9, and ii) W.H. Press et. al., "Numerical
      29             :  * Recipes in C", 2nd Edition, Chapter 12.
      30             :  *
      31             :  * Input -  There are two inputs to this function:
      32             :  *
      33             :  *       1) An integer pointer to the input data array
      34             :  *       2) An integer value which should be set as +1 for FFT
      35             :  *          and some other value, e.g., -1 for ifFT
      36             :  *
      37             :  * Output - There is no return value.
      38             :  *       The input data are replaced with transformed data.  if the
      39             :  *       input is a real time domain sequence, it is replaced with
      40             :  *       the complex FFT for positive frequencies.  The FFT value
      41             :  *       for DC and the foldover frequency are combined to form the
      42             :  *       first complex number in the array.  The remaining complex
      43             :  *       numbers correspond to increasing frequencies.  if the input
      44             :  *       is a complex frequency domain sequence arranged as above,
      45             :  *       it is replaced with the corresponding time domain sequence.
      46             :  *
      47             :  * Notes:
      48             :  *
      49             :  *       1) This function is designed to be a part of a noise supp-
      50             :  *          ression algorithm that requires 128-point FFT of real
      51             :  *          sequences.  This is achieved here through a 64-point
      52             :  *          complex FFT.  Consequently, the FFT size information is
      53             :  *          not transmitted explicitly.  However, some flexibility
      54             :  *          is provided in the function to change the size of the
      55             :  *          FFT by specifying the size information through "define"
      56             :  *          statements.
      57             :  *
      58             :  *       2) The values of the complex sinusoids used in the FFT
      59             :  *          algorithm are computed once (i.e., the first time the
      60             :  *          r_fft function is called) and stored in a table. To
      61             :  *          further speed up the algorithm, these values can be
      62             :  *          precomputed and stored in a ROM table in actual DSP
      63             :  *          based implementations.
      64             :  *
      65             :  *       3) In the c_fft function, the FFT values are divided by
      66             :  *          2 after each stage of computation thus dividing the
      67             :  *          final FFT values by 64.  No multiplying factor is used
      68             :  *          for the ifFT.  This is somewhat different from the usual
      69             :  *          definition of FFT where the factor 1/N, i.e., 1/64, is
      70             :  *          used for the ifFT and not the FFT.  No factor is used in
      71             :  *          the r_fft function.
      72             :  *
      73             :  *       4) Much of the code for the FFT and ifFT parts in r_fft
      74             :  *          and c_fft functions are similar and can be combined.
      75             :  *          They are, however, kept separate here to speed up the
      76             :  *          execution.
      77             :  *------------------------------------------------------------------------*/
      78             : /*------------------------------------------------------------------------*
      79             :  * c_fft_fx:
      80             :  *
      81             :  * Computes the complex part of the split-radix FFT
      82             :  *------------------------------------------------------------------------*/
      83             : 
      84       70648 : static void c_fft_fx(
      85             :     const Word16 *phs_tbl, /* i  : Table of phases            */
      86             :     Word16 SIZE,           /* i  : Size of the FFT           */
      87             :     Word16 NUM_STAGE,      /* i  : Number of stages           */
      88             :     const Word16 *in_ptr,  /* i  : coefficients in the order re[0], re[n/2], re[1], im[1], ..., re[n/2-1], im[n/2-1] Qx*/
      89             :     Word16 *out_ptr,       /* o  : coefficients in the order re[0], re[n/2], re[1], im[1], ..., re[n/2-1], im[n/2-1] Q(x - 1)*/
      90             :     /* in_ptr & out_ptr must not overlap! */
      91             :     const Word16 isign ) /* i  : 1=fft, otherwise it is ifft*/
      92             : {
      93             :     Word16 i, j, k, ii, jj, kk, ji, kj;
      94             :     Word32 L_tmp1, L_tmp2;
      95             :     Word16 tmp1, tmp2, tmp3, tmp4;
      96             :     const Word16 *table_ptr;
      97             :     const Word16 *input_ptr1, *input_ptr2, *input_ptr3, *input_ptr4;
      98       70648 :     Word16 shift = 0;
      99       70648 :     move16();
     100             :     /* Setup Reorder Variables */
     101       70648 :     table_ptr = NULL;
     102       70648 :     table_ptr = FFT_REORDER_1024;
     103       70648 :     SWITCH( SIZE )
     104             :     {
     105          58 :         case 1024:
     106          58 :             shift = 0;
     107          58 :             move16();
     108          58 :             BREAK;
     109         384 :         case 512:
     110         384 :             shift = 1;
     111         384 :             move16();
     112         384 :             BREAK;
     113        6258 :         case 256:
     114        6258 :             shift = 2;
     115        6258 :             move16();
     116        6258 :             BREAK;
     117         150 :         case 128:
     118         150 :             shift = 3;
     119         150 :             move16();
     120         150 :             BREAK;
     121       63798 :         case 64:
     122       63798 :             shift = 4;
     123       63798 :             move16();
     124       63798 :             BREAK;
     125             :     }
     126             :     /* The FFT part */
     127       70648 :     IF( isign != 0 )
     128             :     {
     129             :         /* Unrolled 1st/2nd Stage
     130             :          * 1) to take advantage of Table Values (0 & +/- 16384)
     131             :          * 2) to perform reordering of Input Values
     132             :          */
     133      504771 :         FOR( k = 0; k < SIZE; k += 8 )
     134             :         {
     135             :             /*
     136             :              * This loop use:
     137             :              *   4 Word16 (tmp1...tmp4)
     138             :              *   2 Word32 (L_tmp1 & L_tmp2)
     139             :              *   4 Pointers (table_ptr, input_ptr1, input_ptr2, input_ptr3)
     140             :              *
     141             :              * The addition of 'in_ptr' + and index value from 'reorder_ptr'
     142             :              * is counted as a move16()
     143             :              */
     144             : 
     145      466360 :             input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift ); // Qx
     146             : 
     147      466360 :             L_tmp1 = L_mult( *input_ptr1++, 16384 );
     148      466360 :             L_tmp2 = L_mult( *input_ptr1, 16384 );
     149             : 
     150      466360 :             input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     151             : 
     152      466360 :             tmp1 = msu_r_sat( L_tmp1, *input_ptr1, 16384 );
     153      466360 :             tmp3 = mac_r_sat( L_tmp1, *input_ptr1++, 16384 );
     154      466360 :             input_ptr2 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     155      466360 :             input_ptr3 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     156             : 
     157      466360 :             L_tmp1 = L_mult( *input_ptr2++, 16384 );
     158      466360 :             tmp2 = mac_r_sat( L_tmp1, *input_ptr3, 16384 );
     159      466360 :             tmp4 = msu_r_sat( L_tmp1, *input_ptr3++, 16384 );
     160      466360 :             L_tmp1 = L_mult( tmp3, 16384 );
     161      466360 :             out_ptr[k] = mac_r_sat( L_tmp1, tmp2, 16384 );
     162      466360 :             move16();
     163      466360 :             out_ptr[k + 4] = msu_r_sat( L_tmp1, tmp2, 16384 );
     164      466360 :             move16();
     165             : 
     166      466360 :             tmp2 = mac_r_sat( L_tmp2, *input_ptr1, 16384 );
     167      466360 :             tmp3 = msu_r_sat( L_tmp2, *input_ptr1, 16384 );
     168      466360 :             L_tmp2 = L_mult( *input_ptr2, 16384 );
     169             : 
     170      466360 :             L_tmp1 = L_mult( tmp1, 16384 );
     171      466360 :             tmp1 = msu_r_sat( L_tmp2, *input_ptr3, 16384 );
     172      466360 :             out_ptr[k + 2] = mac_r_sat( L_tmp1, tmp1, 16384 );
     173      466360 :             move16();
     174      466360 :             out_ptr[k + 6] = msu_r_sat( L_tmp1, tmp1, 16384 );
     175      466360 :             move16();
     176      466360 :             L_tmp1 = L_mult( tmp2, 16384 );
     177      466360 :             tmp2 = mac_r_sat( L_tmp2, *input_ptr3, 16384 );
     178      466360 :             out_ptr[k + 1] = mac_r_sat( L_tmp1, tmp2, 16384 );
     179      466360 :             move16();
     180      466360 :             out_ptr[k + 5] = msu_r_sat( L_tmp1, tmp2, 16384 );
     181      466360 :             move16();
     182      466360 :             L_tmp1 = L_mult( tmp3, 16384 );
     183      466360 :             out_ptr[k + 3] = msu_r_sat( L_tmp1, tmp4, 16384 );
     184      466360 :             move16();
     185      466360 :             out_ptr[k + 7] = mac_r_sat( L_tmp1, tmp4, 16384 );
     186      466360 :             move16();
     187             :         }
     188             : 
     189             :         /* Remaining Stages */
     190      166651 :         FOR( i = 2; i < NUM_STAGE; i++ )
     191             :         {
     192             :             /* i is stage counter      */
     193      128240 :             jj = shl( 2, i );  /* FFT size                */
     194      128240 :             kk = shl( jj, 1 ); /* 2 * FFT size            */
     195      128240 :             ii = shr( SIZE, i );
     196      128240 :             ji = 0;
     197      128240 :             move16(); /* ji is phase table index */
     198             : 
     199     1840036 :             FOR( j = 0; j < jj; j += 2 )
     200             :             {
     201             :                 /* j is sample counter     */
     202     5374276 :                 FOR( k = j; k < SIZE; k += kk )
     203             :                 {
     204             :                     /* k is butterfly top     */
     205     3662480 :                     kj = add( k, jj ); /* kj is butterfly bottom */
     206             : 
     207             :                     /* Butterfly computations */
     208     3662480 :                     L_tmp1 = L_msu( L_mult( *( out_ptr + kj ), phs_tbl[ji] ),
     209     3662480 :                                     *( out_ptr + kj + 1 ), phs_tbl[ji + 1] );
     210     3662480 :                     L_tmp2 = L_mac( L_mult( *( out_ptr + kj + 1 ), phs_tbl[ji] ),
     211     3662480 :                                     *( out_ptr + kj ), phs_tbl[ji + 1] );
     212             : 
     213     3662480 :                     out_ptr[kj] = mac_r( L_negate( L_tmp1 ), out_ptr[k], 16384 );
     214     3662480 :                     move16();
     215     3662480 :                     out_ptr[kj + 1] = mac_r( L_negate( L_tmp2 ), out_ptr[k + 1], 16384 );
     216     3662480 :                     move16();
     217     3662480 :                     out_ptr[k] = mac_r( L_tmp1, out_ptr[k], 16384 );
     218     3662480 :                     move16();
     219     3662480 :                     out_ptr[k + 1] = mac_r( L_tmp2, out_ptr[k + 1], 16384 );
     220     3662480 :                     move16();
     221             :                 }
     222     1711796 :                 ji = add( ji, ii );
     223             :             }
     224             :         }
     225             :     }
     226             :     ELSE /* The ifFT part */
     227             :     {
     228             :         /* Unrolled 1st/2nd Stage
     229             :          * 1) to take advantage of Table Values (0 & +/- 16384)
     230             :          * 2) to perform reordering of Input Values
     231             :          */
     232      310917 :         FOR( k = 0; k < SIZE; k += 8 )
     233             :         {
     234             :             /*
     235             :              * This loop use:
     236             :              *   4 Word16 (tmp1...tmp4)
     237             :              *   2 Word32 (L_tmp1 & L_tmp2)
     238             :              *   5 Pointers (reorder_ptr, input_ptr1...input_ptr4)
     239             :              *
     240             :              * The addition of 'in_ptr' + and index value from 'reorder_ptr'
     241             :              * is counted as a move16()
     242             :              */
     243      278680 :             input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     244      278680 :             input_ptr2 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     245             : 
     246      278680 :             input_ptr3 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     247      278680 :             input_ptr4 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     248             : 
     249             : 
     250      278680 :             tmp3 = sub( *input_ptr1, *input_ptr2 );
     251      278680 :             tmp4 = add( *input_ptr1++, *input_ptr2++ );
     252             : 
     253      278680 :             tmp2 = sub( input_ptr3[0], input_ptr4[0] );
     254      278680 :             tmp1 = sub( input_ptr3[1], input_ptr4[1] );
     255             : 
     256      278680 :             out_ptr[k + 2] = sub( tmp3, tmp1 );
     257      278680 :             move16();
     258      278680 :             out_ptr[k + 6] = add( tmp3, tmp1 );
     259      278680 :             move16();
     260             : 
     261      278680 :             tmp1 = sub( *input_ptr1, *input_ptr2 );
     262      278680 :             out_ptr[k + 3] = add( tmp1, tmp2 );
     263      278680 :             move16();
     264      278680 :             out_ptr[k + 7] = sub( tmp1, tmp2 );
     265      278680 :             move16();
     266             : 
     267      278680 :             tmp1 = add( input_ptr3[0], input_ptr4[0] );
     268      278680 :             tmp3 = add( input_ptr3[1], input_ptr4[1] );
     269             : 
     270      278680 :             out_ptr[k] = add( tmp4, tmp1 );
     271      278680 :             move16();
     272      278680 :             out_ptr[k + 4] = sub( tmp4, tmp1 );
     273      278680 :             move16();
     274             : 
     275      278680 :             tmp4 = add( *input_ptr1, *input_ptr2 );
     276      278680 :             out_ptr[k + 1] = add( tmp4, tmp3 );
     277      278680 :             move16();
     278      278680 :             out_ptr[k + 5] = sub( tmp4, tmp3 );
     279      278680 :             move16();
     280             :         }
     281             : 
     282       32237 :         table_ptr = phs_tbl + SIZE; /* access part of table that is scaled by 2 */
     283             : 
     284             :         /* Remaining Stages */
     285      129991 :         FOR( i = 2; i < NUM_STAGE; i++ )
     286             :         {
     287             :             /* i is stage counter      */
     288       97754 :             jj = shl( 2, i );  /* FFT size                */
     289       97754 :             kk = shl( jj, 1 ); /* 2 * FFT size            */
     290       97754 :             ii = shr( SIZE, i );
     291       97754 :             ji = 0;
     292       97754 :             move16(); /* ji is phase table index */
     293             : 
     294     1083526 :             FOR( j = 0; j < jj; j += 2 )
     295             :             {
     296             :                 /* j is sample counter     */
     297             :                 /* This can be computed by successive add_fxitions of ii to ji, starting from 0
     298             :                    hence line-count it as a one-line add (still need to increment op count!!) */
     299             : 
     300     2806204 :                 FOR( k = j; k < SIZE; k += kk )
     301             :                 {
     302             :                     /* k is butterfly top     */
     303     1820432 :                     kj = add( k, jj ); /* kj is butterfly bottom */
     304             : 
     305             :                     /* Butterfly computations */
     306     1820432 :                     tmp1 = mac_r_sat( L_mult_sat( out_ptr[kj], table_ptr[ji] ), out_ptr[kj + 1], table_ptr[ji + 1] );
     307             : 
     308     1820432 :                     tmp2 = msu_r_sat( L_mult_sat( out_ptr[kj + 1], table_ptr[ji] ), out_ptr[kj], table_ptr[ji + 1] );
     309             : 
     310     1820432 :                     out_ptr[kj] = sub_sat( out_ptr[k], tmp1 );
     311     1820432 :                     move16();
     312     1820432 :                     out_ptr[kj + 1] = sub_sat( out_ptr[k + 1], tmp2 );
     313     1820432 :                     move16();
     314     1820432 :                     out_ptr[k] = add_sat( out_ptr[k], tmp1 );
     315     1820432 :                     move16();
     316     1820432 :                     out_ptr[k + 1] = add_sat( out_ptr[k + 1], tmp2 );
     317     1820432 :                     move16();
     318             :                 }
     319      985772 :                 ji = add( ji, ii );
     320             :             }
     321             :         }
     322             :     }
     323       70648 : }
     324             : 
     325             : /*--------------------------------------------------------------------------------*
     326             :  * r_fft_fx:
     327             :  *
     328             :  * Perform FFT fixed-point for real-valued sequences of length 32, 64 or 128
     329             :  *--------------------------------------------------------------------------------*/
     330       70648 : void r_fft_fx_lc(
     331             :     const Word16 *phs_tbl,  /* i  : Table of phase            */
     332             :     const Word16 SIZE,      /* i  : Size of the FFT           */
     333             :     const Word16 SIZE2,     /* i  : Size / 2                  */
     334             :     const Word16 NUM_STAGE, /* i  : Number of stage           */
     335             :     const Word16 *in_ptr,   /* i  : coefficients in the order re[0], re[1], ... re[n/2], im[n/2-1], im[n/2-2], ..., im[1] Qx*/
     336             :     Word16 *out_ptr,        /* o  : coefficients in the order re[0], re[1], ... re[n/2], im[n/2-1], im[n/2-2], ..., im[1] Q(x - 1)*/
     337             :     const Word16 isign      /* i  : 1=fft, otherwize it's ifft                                                      */
     338             : )
     339             : {
     340             :     Word16 tmp2_real, tmp2_imag;
     341             :     Word32 Ltmp1_real, Ltmp1_imag;
     342             :     Word16 i;
     343             :     Word32 Ltmp1;
     344             :     const Word16 *phstbl_ptrDn;
     345             :     Word16 *ptrDn;
     346             :     Word16 temp[1024]; /* Accommodates real input FFT size up to 1024. */
     347             : 
     348             :     /* Setup Pointers */
     349       70648 :     phstbl_ptrDn = &phs_tbl[SIZE - 1];
     350             : 
     351             :     /* The FFT part */
     352       70648 :     IF( isign != 0 )
     353             :     {
     354             :         Word16 *ptRealUp, *ptRealDn, *ptImaUp, *ptImaDn;
     355             : 
     356             :         /* Perform the complex FFT */
     357       38411 :         c_fft_fx( phs_tbl, SIZE, NUM_STAGE, in_ptr, temp, isign );
     358             : 
     359             :         /* First, handle the DC and foldover frequencies */
     360       38411 :         out_ptr[SIZE2] = sub( temp[0], temp[1] );
     361       38411 :         move16();
     362       38411 :         out_ptr[0] = sub( add( temp[0], temp[1] ), shr( NUM_STAGE, 1 ) );
     363       38411 :         move16(); /* DC have a small offset */
     364             : 
     365       38411 :         ptrDn = &temp[SIZE - 1];
     366             : 
     367       38411 :         ptImaDn = &out_ptr[SIZE - 1];
     368       38411 :         ptRealUp = &out_ptr[1];
     369       38411 :         ptImaUp = &out_ptr[SIZE2 + 1];
     370       38411 :         ptRealDn = &out_ptr[SIZE2 - 1];
     371             : 
     372             :         /* Now, handle the remaining positive frequencies */
     373      971131 :         FOR( i = 2; i <= SIZE2; i += 2 )
     374             :         {
     375      932720 :             Ltmp1_imag = L_mult( temp[i + 1], 16384 );
     376      932720 :             Ltmp1_imag = L_msu_sat( Ltmp1_imag, *ptrDn, 16384 );
     377      932720 :             tmp2_real = add_sat( temp[i + 1], *ptrDn-- );
     378             : 
     379      932720 :             Ltmp1_real = L_mult( temp[i], 16384 );
     380      932720 :             Ltmp1_real = L_mac_sat( Ltmp1_real, *ptrDn, 16384 );
     381      932720 :             tmp2_imag = sub( *ptrDn--, temp[i] );
     382             : 
     383             : 
     384      932720 :             *ptRealUp++ = msu_r_sat( L_mac_sat( Ltmp1_real, tmp2_real, phs_tbl[i] ), tmp2_imag, phs_tbl[i + 1] );
     385      932720 :             move16();
     386      932720 :             *ptImaDn-- = mac_r_sat( L_mac_sat( Ltmp1_imag, tmp2_imag, phs_tbl[i] ), tmp2_real, phs_tbl[i + 1] );
     387      932720 :             move16();
     388      932720 :             Ltmp1 = L_mac_sat( L_negate( Ltmp1_imag ), tmp2_real, *phstbl_ptrDn );
     389      932720 :             Ltmp1_real = L_mac_sat( Ltmp1_real, tmp2_imag, *phstbl_ptrDn-- );
     390      932720 :             *ptImaUp++ = msu_r_sat( Ltmp1, tmp2_imag, *phstbl_ptrDn );
     391      932720 :             move16();
     392      932720 :             *ptRealDn-- = mac_r_sat( Ltmp1_real, tmp2_real, *phstbl_ptrDn-- );
     393      932720 :             move16();
     394             :         }
     395             :     }
     396             :     ELSE /* The ifFT part */
     397             :     {
     398             :         const Word16 *ptRealUp, *ptRealDn, *ptImaUp, *ptImaDn;
     399             : 
     400             :         /* First, handle the DC and foldover frequencies */
     401       32237 :         Ltmp1 = L_mult( in_ptr[0], 16384 );
     402       32237 :         temp[0] = mac_r( Ltmp1, in_ptr[SIZE2], 16384 );
     403       32237 :         move16();
     404       32237 :         temp[1] = msu_r( Ltmp1, in_ptr[SIZE2], 16384 );
     405       32237 :         move16();
     406             : 
     407       32237 :         ptrDn = &temp[SIZE - 1];
     408             : 
     409             :         /* Here we cast to Word16 * from a const Word16 *. */
     410             :         /* This is ok because we use these pointers for    */
     411             :         /* reading only. This is just to avoid declaring a */
     412             :         /* bunch of 4 other pointer with const Word16 *.   */
     413       32237 :         ptImaDn = &in_ptr[SIZE - 1];   // Qx
     414       32237 :         ptRealUp = &in_ptr[1];         // Qx
     415       32237 :         ptImaUp = &in_ptr[SIZE2 + 1];  // Qx
     416       32237 :         ptRealDn = &in_ptr[SIZE2 - 1]; // Qx
     417             : 
     418             :         /* Now, handle the remaining positive frequencies */
     419      589597 :         FOR( i = 2; i <= SIZE2; i += 2 )
     420             :         {
     421      557360 :             Ltmp1_imag = L_mult( *ptImaDn, 16384 );
     422      557360 :             Ltmp1_imag = L_msu_sat( Ltmp1_imag, *ptImaUp, 16384 );
     423      557360 :             tmp2_real = add_sat( *ptImaDn--, *ptImaUp++ );
     424      557360 :             Ltmp1_real = L_mult( *ptRealUp, 16384 );
     425      557360 :             Ltmp1_real = L_mac_sat( Ltmp1_real, *ptRealDn, 16384 );
     426      557360 :             tmp2_imag = sub_sat( *ptRealUp++, *ptRealDn-- );
     427      557360 :             temp[i] = mac_r_sat( L_msu_sat( Ltmp1_real, tmp2_real, phs_tbl[i] ), tmp2_imag, phs_tbl[i + 1] );
     428      557360 :             move16();
     429      557360 :             temp[i + 1] = mac_r_sat( L_mac_sat( Ltmp1_imag, tmp2_imag, phs_tbl[i] ), tmp2_real, phs_tbl[i + 1] );
     430      557360 :             move16();
     431      557360 :             Ltmp1 = L_mac_sat( L_negate( Ltmp1_imag ), tmp2_real, *phstbl_ptrDn );
     432      557360 :             Ltmp1_real = L_msu_sat( Ltmp1_real, tmp2_imag, *phstbl_ptrDn-- );
     433      557360 :             *ptrDn-- = msu_r_sat( Ltmp1, tmp2_imag, *phstbl_ptrDn );
     434      557360 :             move16();
     435      557360 :             *ptrDn-- = msu_r_sat( Ltmp1_real, tmp2_real, *phstbl_ptrDn-- );
     436      557360 :             move16();
     437             :         }
     438             : 
     439             :         /* Perform the complex ifFT */
     440       32237 :         c_fft_fx( phs_tbl, SIZE, NUM_STAGE, temp, out_ptr, isign );
     441             :     }
     442       70648 : }
     443             : 
     444             : 
     445             : /*---------------------------------------------------------------------*
     446             :  *  fft_rel()
     447             :  *
     448             :  *  Computes the split-radix FFT in place for the real-valued
     449             :  *  signal x of length n.  The algorithm has been ported from
     450             :  *  the Fortran code of [1].
     451             :  *
     452             :  *  The function  needs sine and cosine tables t_sin and t_cos,
     453             :  *  and the constant N_MAX_FFT.  The table  entries  are defined as
     454             :  *  sin(2*pi*i) and cos(2*pi*i) for i = 0, 1, ..., N_MAX_FFT-1. The
     455             :  *  implementation  assumes  that any entry  will not be needed
     456             :  *  outside the tables. Therefore, N_MAX_FFT and n must be properly
     457             :  *  set.  The function has been tested  with the values n = 16,
     458             :  *  32, 64, 128, 256, and N_MAX_FFT = 1280.
     459             :  *
     460             :  *  References
     461             :  *  [1] H.V. Sorensen,  D.L. Jones, M.T. Heideman, C.S. Burrus,
     462             :  *      "Real-valued fast  Fourier transform  algorithm,"  IEEE
     463             :  *      Trans. on Signal Processing,  Vol.35, No.6, pp 849-863,
     464             :  *      1987.
     465             :  *
     466             :  *  OUTPUT
     467             :  *      x[0:n-1]  Transform coeffients in the order re[0], re[1],
     468             :  *                ..., re[n/2], im[n/2-1], ..., im[1].
     469             :  *---------------------------------------------------------------------*/
     470             : 
     471     2186812 : void fft_rel_16_32fx(
     472             :     Word16 x[],  /* i/o: input/output vector Qx   */
     473             :     Word16 *q_x, /* extra scaling added on speech buffer*/
     474             :     Word16 i_subfr,
     475             :     const Word16 n, /* i  : vector length          */
     476             :     const Word16 m  /* i  : log2 of vector length  */
     477             : )
     478             : {
     479             :     Word16 i, j, k, n1, n2, n4;
     480             :     Word16 step;
     481             :     Word32 xt, t1, t2;
     482             :     Word32 *x0, *x1, *x2;
     483             :     const Word16 *s, *c;
     484             :     Word32 *xi2, *xi3, *xi4, *xi1;
     485             : 
     486             :     Word32 fft_bff32[L_FFT];
     487     2186812 :     Copy_Scale_sig_16_32_no_sat( x, fft_bff32, L_FFT, 0 ); // copying x to fft_bff32 without scaling
     488             : 
     489             :     /*-----------------------------------------------------------------*
     490             :      * Digit reverse counter
     491             :      *-----------------------------------------------------------------*/
     492             : 
     493     2186812 :     j = 0;
     494     2186812 :     move16();
     495     2186812 :     x0 = &fft_bff32[0]; // Qx
     496   559823872 :     FOR( i = 0; i < n - 1; i++ )
     497             :     {
     498   557637060 :         IF( LT_16( i, j ) )
     499             :         {
     500   262417440 :             xt = fft_bff32[j]; // Qx
     501   262417440 :             move32();
     502   262417440 :             fft_bff32[j] = *x0; // Qx
     503   262417440 :             move32();
     504   262417440 :             *x0 = xt; // Qx
     505   262417440 :             move32();
     506             :         }
     507   557637060 :         x0++;
     508   557637060 :         k = shr( n, 1 );
     509  1097779624 :         WHILE( ( k <= j ) )
     510             :         {
     511   540142564 :             j = sub( j, k );
     512   540142564 :             k = shr( k, 1 );
     513             :         }
     514   557637060 :         j = add( j, k );
     515             :     }
     516             : 
     517             :     /*-----------------------------------------------------------------*
     518             :      * Length two butterflies
     519             :      *-----------------------------------------------------------------*/
     520             : 
     521     2186812 :     x0 = &fft_bff32[0];
     522     2186812 :     x1 = &fft_bff32[1];
     523   282098748 :     FOR( i = 0; i < ( n >> 1 ); i++ )
     524             :     {
     525   279911936 :         xt = *x0;
     526   279911936 :         move32();
     527   279911936 :         *x0 = L_add( xt, *x1 );
     528   279911936 :         move32();
     529   279911936 :         *x1 = L_sub( xt, *x1 );
     530   279911936 :         move32();
     531   279911936 :         x0++;
     532   279911936 :         x0++;
     533   279911936 :         x1++;
     534   279911936 :         x1++;
     535             :     }
     536             : 
     537             :     /*-----------------------------------------------------------------*
     538             :      * Other butterflies
     539             :      *
     540             :      * The implementation described in [1] has been changed by using
     541             :      * table lookup for evaluating sine and cosine functions.  The
     542             :      * variable ind and its increment step are needed to access table
     543             :      * entries.  Note that this implementation assumes n4 to be so
     544             :      * small that ind will never exceed the table.  Thus the input
     545             :      * argument n and the constant N_MAX_SAS must be set properly.
     546             :      *-----------------------------------------------------------------*/
     547             : 
     548     2186812 :     n2 = 1;
     549     2186812 :     move16();
     550             :     /* step = N_MAX_SAS/4; */
     551    17494496 :     FOR( k = 2; k <= m; k++ )
     552             :     {
     553    15307684 :         n4 = n2;
     554    15307684 :         move16();
     555    15307684 :         n2 = shl( n4, 1 );
     556    15307684 :         n1 = shl( n2, 1 );
     557             : 
     558    15307684 :         step = idiv1616( N_MAX_SAS, n1 );
     559             : 
     560    15307684 :         x0 = fft_bff32;
     561    15307684 :         x1 = fft_bff32 + n2;
     562    15307684 :         x2 = fft_bff32 + add( n2, n4 );
     563   293032808 :         FOR( i = 0; i < n; i += n1 )
     564             :         {
     565   277725124 :             xt = *x0;
     566   277725124 :             move32(); /* xt = x[i];   */
     567   277725124 :             *x0 = L_add( xt, *x1 );
     568   277725124 :             move32(); /* x[i] = xt + x[i+n2];    */
     569   277725124 :             *x1 = L_sub( xt, *x1 );
     570   277725124 :             move32(); /* x[i+n2] = xt - x[i+n2];      */
     571   277725124 :             *x2 = L_negate( *x2 );
     572   277725124 :             move32(); /* x[i+n2+n4] = -x[i+n2+n4];     */
     573             : 
     574             : 
     575   277725124 :             s = sincos_t_fx + step; // Q15
     576   277725124 :             c = s + 64;             // Q15
     577   277725124 :             xi1 = fft_bff32 + add( i, 1 );
     578   277725124 :             xi3 = xi1 + n2;
     579   277725124 :             xi2 = xi3 - 2;
     580   277725124 :             xi4 = xi1 + sub( n1, 2 );
     581             : 
     582   979691776 :             FOR( j = 1; j < n4; j++ )
     583             :             {
     584   701966652 :                 t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx  */
     585   701966652 :                 t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx    */
     586   701966652 :                 *xi4 = L_sub( *xi2, t2 );
     587   701966652 :                 move32();
     588   701966652 :                 *xi3 = L_negate( L_add( *xi2, t2 ) );
     589   701966652 :                 move32();
     590   701966652 :                 *xi2 = L_sub( *xi1, t1 );
     591   701966652 :                 move32();
     592   701966652 :                 *xi1 = L_add( *xi1, t1 );
     593   701966652 :                 move32();
     594             : 
     595   701966652 :                 xi4--;
     596   701966652 :                 xi2--;
     597   701966652 :                 xi3++;
     598   701966652 :                 xi1++;
     599   701966652 :                 c += step;
     600   701966652 :                 s += step; /* autoincrement by ar0 */
     601             :             }
     602             : 
     603   277725124 :             x0 += n1;
     604   277725124 :             x1 += n1;
     605   277725124 :             x2 += n1;
     606             :         }
     607             :         /* step = shr(step, 1); */
     608             :     }
     609     2186812 :     Word16 norm = L_norm_arr( fft_bff32, L_FFT );
     610     2186812 :     IF( i_subfr == 0 )
     611             :     {
     612     1093406 :         Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
     613     1093406 :         *q_x = sub( norm, 16 );
     614     1093406 :         move16();
     615             :     }
     616             :     ELSE
     617             :     {
     618     1093406 :         IF( LT_16( sub( norm, 16 ), *q_x ) )
     619             :         {
     620      218471 :             scale_sig( x - L_FFT, L_FFT, sub( sub( norm, 16 ), *q_x ) );
     621      218471 :             Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
     622      218471 :             *q_x = sub( norm, 16 );
     623      218471 :             move16();
     624             :         }
     625             :         ELSE
     626             :         {
     627      874935 :             Copy_Scale_sig32_16( fft_bff32, x, L_FFT, add( 16, *q_x ) );
     628             :         }
     629             :     }
     630             : 
     631     2186812 :     return;
     632             : }
     633             : 
     634      211030 : void fft_rel_fx(
     635             :     Word16 x[],     /* i/o: input/output vector Qx   */
     636             :     const Word16 n, /* i  : vector length          */
     637             :     const Word16 m  /* i  : log2 of vector length  */
     638             : )
     639             : {
     640             :     Word16 i, j, k, n1, n2, n4;
     641             :     Word16 step;
     642             :     Word16 xt, t1, t2;
     643             :     Word16 *x0, *x1, *x2;
     644             :     const Word16 *s, *c;
     645             :     Word16 *xi2, *xi3, *xi4, *xi1;
     646             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     647      211030 :     Flag Overflow = 0;
     648      211030 :     move32();
     649             : #endif
     650             : 
     651             : 
     652             :     /*-----------------------------------------------------------------*
     653             :      * Digit reverse counter
     654             :      *-----------------------------------------------------------------*/
     655             : 
     656      211030 :     j = 0;
     657      211030 :     move16();
     658      211030 :     x0 = &x[0]; // Qx
     659      211030 :     move16();
     660    47101744 :     FOR( i = 0; i < n - 1; i++ )
     661             :     {
     662    46890714 :         IF( LT_16( i, j ) )
     663             :         {
     664    22054908 :             xt = x[j]; // Qx
     665    22054908 :             move16();
     666    22054908 :             x[j] = *x0; // Qx
     667    22054908 :             move16();
     668    22054908 :             *x0 = xt; // Qx
     669    22054908 :             move16();
     670             :         }
     671    46890714 :         x0++;
     672    46890714 :         k = shr( n, 1 );
     673    92257996 :         WHILE( ( k <= j ) )
     674             :         {
     675    45367282 :             j = sub( j, k );
     676    45367282 :             k = shr( k, 1 );
     677             :         }
     678    46890714 :         j = add( j, k );
     679             :     }
     680             : 
     681             :     /*-----------------------------------------------------------------*
     682             :      * Length two butterflies
     683             :      *-----------------------------------------------------------------*/
     684             : 
     685      211030 :     x0 = &x[0];
     686      211030 :     move16();
     687      211030 :     x1 = &x[1];
     688      211030 :     move16();
     689    23761902 :     FOR( i = 0; i < ( n >> 1 ); i++ )
     690             :     {
     691    23550872 :         xt = *x0;
     692    23550872 :         move16();
     693    23550872 :         *x0 = add_o( xt, *x1, &Overflow );
     694    23550872 :         move16();
     695    23550872 :         *x1 = sub_o( xt, *x1, &Overflow );
     696    23550872 :         move16();
     697    23550872 :         x0++;
     698    23550872 :         x0++;
     699    23550872 :         x1++;
     700    23550872 :         x1++;
     701             :     }
     702             : 
     703             :     /*-----------------------------------------------------------------*
     704             :      * Other butterflies
     705             :      *
     706             :      * The implementation described in [1] has been changed by using
     707             :      * table lookup for evaluating sine and cosine functions.  The
     708             :      * variable ind and its increment step are needed to access table
     709             :      * entries.  Note that this implementation assumes n4 to be so
     710             :      * small that ind will never exceed the table.  Thus the input
     711             :      * argument n and the constant N_MAX_SAS must be set properly.
     712             :      *-----------------------------------------------------------------*/
     713             : 
     714      211030 :     n2 = 1;
     715      211030 :     move16();
     716             :     /* step = N_MAX_SAS/4; */
     717     1523432 :     FOR( k = 2; k <= m; k++ )
     718             :     {
     719     1312402 :         n4 = n2;
     720     1312402 :         move16();
     721     1312402 :         n2 = shl( n4, 1 );
     722     1312402 :         n1 = shl( n2, 1 );
     723             : 
     724     1312402 :         step = idiv1616( N_MAX_SAS, n1 );
     725             : 
     726     1312402 :         x0 = x;
     727     1312402 :         x1 = x + n2;
     728     1312402 :         x2 = x + add( n2, n4 );
     729    24652244 :         FOR( i = 0; i < n; i += n1 )
     730             :         {
     731    23339842 :             xt = *x0;
     732    23339842 :             move16(); /* xt = x[i];   */
     733    23339842 :             *x0 = add_o( xt, *x1, &Overflow );
     734    23339842 :             move16(); /* x[i] = xt + x[i+n2];    */
     735    23339842 :             *x1 = sub_o( xt, *x1, &Overflow );
     736    23339842 :             move16(); /* x[i+n2] = xt - x[i+n2];      */
     737    23339842 :             *x2 = negate( *x2 );
     738    23339842 :             move16(); /* x[i+n2+n4] = -x[i+n2+n4];     */
     739             : 
     740             : 
     741    23339842 :             s = sincos_t_fx + step; // Q15
     742    23339842 :             c = s + 64;             // Q15
     743    23339842 :             xi1 = x + add( i, 1 );
     744    23339842 :             xi3 = xi1 + n2;
     745    23339842 :             xi2 = xi3 - 2;
     746    23339842 :             xi4 = xi1 + sub( n1, 2 );
     747             : 
     748    82263244 :             FOR( j = 1; j < n4; j++ )
     749             :             {
     750    58923402 :                 t1 = add_o( mult_r( *xi3, *c ), mult_r( *xi4, *s ), &Overflow ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx  */
     751    58923402 :                 t2 = sub_o( mult_r( *xi3, *s ), mult_r( *xi4, *c ), &Overflow ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx    */
     752    58923402 :                 *xi4 = sub_o( *xi2, t2, &Overflow );
     753    58923402 :                 move16();
     754    58923402 :                 *xi3 = negate( add_o( *xi2, t2, &Overflow ) );
     755    58923402 :                 move16();
     756    58923402 :                 *xi2 = sub_o( *xi1, t1, &Overflow );
     757    58923402 :                 move16();
     758    58923402 :                 *xi1 = add_o( *xi1, t1, &Overflow );
     759    58923402 :                 move16();
     760             : 
     761    58923402 :                 xi4--;
     762    58923402 :                 xi2--;
     763    58923402 :                 xi3++;
     764    58923402 :                 xi1++;
     765    58923402 :                 c += step;
     766    58923402 :                 s += step; /* autoincrement by ar0 */
     767             :             }
     768             : 
     769    23339842 :             x0 += n1;
     770    23339842 :             x1 += n1;
     771    23339842 :             x2 += n1;
     772             :         }
     773             :         /* step = shr(step, 1); */
     774             :     }
     775             : 
     776      211030 :     return;
     777             : }
     778             : 
     779       45232 : void fft_rel_fx32(
     780             :     Word32 x[],     /* i/o: input/output vector Qx   */
     781             :     const Word16 n, /* i  : vector length          */
     782             :     const Word16 m  /* i  : log2 of vector length  */
     783             : )
     784             : {
     785             :     Word16 i, j, k, n1, n2, n4;
     786             :     Word16 step;
     787             :     Word32 xt, t1, t2;
     788             :     Word32 *x0, *x1, *x2;
     789             :     Word32 *xi2, *xi3, *xi4, *xi1;
     790             :     const Word16 *s, *c;
     791             :     const Word16 *idx;
     792             : 
     793             :     /* !!!! NMAX = 256 is hardcoded here  (similar optimizations should be done for NMAX > 256) !!! */
     794             : 
     795             :     Word32 *x2even, *x2odd;
     796             :     Word32 temp[512];
     797             : 
     798       45232 :     test();
     799       45232 :     test();
     800       45232 :     IF( EQ_16( n, 128 ) || EQ_16( n, 256 ) || EQ_16( n, 512 ) )
     801             :     {
     802       45232 :         idx = fft256_read_indexes;
     803             : 
     804             :         /* Combined Digit reverse counter & Length two butterflies */
     805       45232 :         IF( EQ_16( n, 128 ) )
     806             :         {
     807           0 :             x2 = temp;
     808           0 :             FOR( i = 0; i < 64; i++ )
     809             :             {
     810           0 :                 j = *idx++;
     811           0 :                 move16();
     812           0 :                 k = *idx++;
     813           0 :                 move16();
     814             : 
     815           0 :                 *x2++ = L_add( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
     816           0 :                 move16();
     817           0 :                 *x2++ = L_sub( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
     818           0 :                 move16();
     819             :             }
     820             :         }
     821       45232 :         ELSE IF( EQ_16( n, 256 ) )
     822             :         {
     823         488 :             x2 = temp;
     824       62952 :             FOR( i = 0; i < 128; i++ )
     825             :             {
     826       62464 :                 j = *idx++;
     827       62464 :                 move16();
     828       62464 :                 k = *idx++;
     829       62464 :                 move16();
     830             : 
     831       62464 :                 *x2++ = L_add( x[j], x[k] ); // Qx
     832       62464 :                 move16();
     833       62464 :                 *x2++ = L_sub( x[j], x[k] ); // Qx
     834       62464 :                 move16();
     835             :             }
     836             :         }
     837       44744 :         ELSE IF( EQ_16( n, 512 ) )
     838             :         {
     839       44744 :             x2even = temp;
     840       44744 :             x2odd = temp + 256;
     841             : 
     842     5771976 :             FOR( i = 0; i < 128; i++ )
     843             :             {
     844     5727232 :                 j = shl( *idx, 1 );
     845     5727232 :                 idx++;
     846     5727232 :                 k = shl( *idx, 1 );
     847     5727232 :                 idx++;
     848             : 
     849     5727232 :                 *x2even++ = L_add( x[j], x[k] ); // Qx
     850     5727232 :                 move16();
     851     5727232 :                 *x2even++ = L_sub( x[j], x[k] ); // Qx
     852     5727232 :                 move16();
     853     5727232 :                 j = add( j, 1 );
     854     5727232 :                 k = add( k, 1 );
     855     5727232 :                 *x2odd++ = L_add( x[j], x[k] ); // Qx
     856     5727232 :                 move16();
     857     5727232 :                 *x2odd++ = L_sub( x[j], x[k] ); // Qx
     858     5727232 :                 move16();
     859             :             }
     860             :         }
     861             : 
     862             :         /*-----------------------------------------------------------------*
     863             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
     864             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
     865             :          * and the associated pointers initialization.
     866             :          * Also, it allows to Put the Data from 'temp' back into 'x' due
     867             :          * to the previous Combined Digit Reverse and Length two butterflies
     868             :          *-----------------------------------------------------------------*/
     869             : 
     870             :         /*for_ (k = 2; k < 3; k++)*/
     871             :         {
     872       45232 :             x0 = temp;
     873       45232 :             x1 = x0 + 2;
     874       45232 :             x2 = x; // Qx
     875             : 
     876     5803696 :             FOR( i = 0; i < n; i += 4 )
     877             :             {
     878     5758464 :                 *x2++ = L_add( *x0++, *x1 ); /* x[i] = xt + x[i+n2];    */
     879     5758464 :                 move16();
     880     5758464 :                 *x2++ = *x0;
     881     5758464 :                 move16();
     882     5758464 :                 x0--;
     883     5758464 :                 *x2++ = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      */
     884     5758464 :                 move16();
     885     5758464 :                 x1++;
     886     5758464 :                 *x2++ = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4];     */
     887     5758464 :                 move16();
     888             : 
     889     5758464 :                 x0 += 4;
     890     5758464 :                 x1 += 3; /* x1 has already advanced */
     891             :             }
     892             :         }
     893             :     }
     894             :     ELSE
     895             :     {
     896             :         /*-----------------------------------------------------------------*
     897             :          * Digit reverse counter
     898             :          *-----------------------------------------------------------------*/
     899             : 
     900           0 :         j = 0;
     901           0 :         move16();
     902           0 :         x0 = &x[0]; // Qx
     903           0 :         FOR( i = 0; i < ( n - 1 ); i++ )
     904             :         {
     905           0 :             IF( LT_16( i, j ) )
     906             :             {
     907           0 :                 xt = x[j]; // Qx
     908           0 :                 move32();
     909           0 :                 x[j] = *x0;
     910           0 :                 move32();
     911           0 :                 *x0 = xt;
     912           0 :                 move32();
     913             :             }
     914           0 :             x0++;
     915           0 :             k = shr( n, 1 );
     916           0 :             WHILE( ( k <= j ) )
     917             :             {
     918           0 :                 j = sub( j, k );
     919           0 :                 k = shr( k, 1 );
     920             :             }
     921           0 :             j = add( j, k );
     922             :         }
     923             : 
     924             :         /*-----------------------------------------------------------------*
     925             :          * Length two butterflies
     926             :          *-----------------------------------------------------------------*/
     927             : 
     928           0 :         x0 = &x[0]; // Qx
     929           0 :         x1 = &x[1]; // Qx
     930           0 :         FOR( i = 0; i < ( n >> 1 ); i++ )
     931             :         {
     932           0 :             *x1 = L_sub( *x0, *x1 ); // Qx
     933           0 :             move32();
     934           0 :             *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); //*x0 * 2 - *x1 (Qx)
     935           0 :             move32();
     936             : 
     937           0 :             x0++;
     938           0 :             x0++;
     939           0 :             x1++;
     940           0 :             x1++;
     941             :         }
     942             : 
     943             :         /*-----------------------------------------------------------------*
     944             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
     945             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
     946             :          * and the associated pointers initialization.
     947             :          *-----------------------------------------------------------------*/
     948             : 
     949             :         /* for_ (k = 2; k < 3; k++) */
     950             :         {
     951           0 :             x0 = x; // Qx
     952           0 :             x1 = x0 + 2;
     953             : 
     954           0 :             FOR( i = 0; i < n; i += 4 )
     955             :             {
     956           0 :                 *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      Qx*/
     957           0 :                 move32();
     958           0 :                 *x0 = L_sub( L_shl( *x0, 1 ), *x1++ ); /* x[i] = xt + x[i+n2];    */ /**x0 * 2 - *x1 (Qx)*/
     959           0 :                 move32();
     960           0 :                 *x1 = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4];     Qx*/
     961           0 :                 move32();
     962             : 
     963           0 :                 x0 += 4;
     964           0 :                 x1 += 3; /* x1 has already advanced */
     965             :             }
     966             :         }
     967             :     }
     968             : 
     969             :     /*-----------------------------------------------------------------*
     970             :      * Other butterflies
     971             :      *
     972             :      * The implementation described in [1] has been changed by using
     973             :      * table lookup for evaluating sine and cosine functions.  The
     974             :      * variable ind and its increment step are needed to access table
     975             :      * entries.  Note that this implementation assumes n4 to be so
     976             :      * small that ind will never exceed the table.  Thus the input
     977             :      * argument n and the constant N_MAX_FFT must be set properly.
     978             :      *-----------------------------------------------------------------*/
     979             : 
     980       45232 :     n4 = 1;
     981       45232 :     move16();
     982       45232 :     n2 = 2;
     983       45232 :     move16();
     984       45232 :     n1 = 4;
     985       45232 :     move16();
     986             : 
     987       45232 :     step = N_MAX_DIV4;
     988       45232 :     move16();
     989             : 
     990      361368 :     FOR( k = 3; k <= m; k++ )
     991             :     {
     992      316136 :         step = shr( step, 1 );
     993      316136 :         n4 = shl( n4, 1 );
     994      316136 :         n2 = shl( n2, 1 );
     995      316136 :         n1 = shl( n1, 1 );
     996             : 
     997      316136 :         x0 = x;
     998      316136 :         x1 = x0 + n2;
     999      316136 :         x2 = x1 + n4;
    1000             : 
    1001     6029368 :         FOR( i = 0; i < n; i += n1 )
    1002             :         {
    1003     5713232 :             *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      */
    1004     5713232 :             move32();
    1005     5713232 :             *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); /* x[i] = xt + x[i+n2];    */ /**x0 * 2 - *x1 (Qx)*/
    1006     5713232 :             move32();
    1007     5713232 :             *x2 = L_negate( *x2 ); /* x[i+n2+n4] = -x[i+n2+n4];    Qx */
    1008     5713232 :             move32();
    1009             : 
    1010     5713232 :             s = sincos_t_ext_fx;   // Q15
    1011     5713232 :             c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 Q15*/
    1012     5713232 :             xi1 = x0;
    1013     5713232 :             xi3 = xi1 + n2;
    1014     5713232 :             xi2 = xi3;
    1015     5713232 :             x0 += n1;
    1016     5713232 :             xi4 = x0;
    1017             : 
    1018    40278016 :             FOR( j = 1; j < n4; j++ )
    1019             :             {
    1020    34564784 :                 xi3++;
    1021    34564784 :                 xi1++;
    1022    34564784 :                 xi4--;
    1023    34564784 :                 xi2--;
    1024    34564784 :                 c += step;
    1025    34564784 :                 s += step; /* autoincrement by ar0 */
    1026             : 
    1027    34564784 :                 t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind);   Qx*/
    1028    34564784 :                 t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind);   Qx*/
    1029             : 
    1030    34564784 :                 *xi4 = L_sub( *xi2, t2 );
    1031    34564784 :                 move32();
    1032    34564784 :                 *xi2 = L_sub( *xi1, t1 );
    1033    34564784 :                 move32();
    1034    34564784 :                 *xi1 = L_sub( L_shl( *xi1, 1 ), *xi2 ); // Qx
    1035    34564784 :                 move32();
    1036    34564784 :                 *xi3 = L_negate( L_add( L_shl( t2, 1 ), *xi4 ) ); // Qx
    1037    34564784 :                 move32();
    1038             :             }
    1039             : 
    1040     5713232 :             x1 += n1;
    1041     5713232 :             x2 += n1;
    1042             :         }
    1043             :     }
    1044             : 
    1045       45232 :     return;
    1046             : }

Generated by: LCOV version 1.14