LCOV - code coverage report
Current view: top level - lib_com - fft_rel_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main enc/dec/rend @ 574a190e3c6896c6c4ed10d7f23649709a0c4347 Lines: 470 625 75.2 %
Date: 2025-06-27 02:59:36 Functions: 5 6 83.3 %

          Line data    Source code
       1             : /*====================================================================================
       2             :     EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
       3             :   ====================================================================================*/
       4             : 
       5             : #include <stdint.h>
       6             : #include "options.h" /* Compilation switches                   */
       7             : #include "prot_fx.h" /* Function prototypes                    */
       8             : #include "rom_com.h" /* Static table prototypes                */
       9             : #include "stl.h"
      10             : #include "stdint.h"
      11             : #include "wmc_auto.h"
      12             : 
      13             : /*---------------------------------------------------------------------*
      14             :  * Local constants
      15             :  *---------------------------------------------------------------------*/
      16             : 
      17             : #define N_MAX_FFT   1024
      18             : #define N_MAX_DIV2  ( N_MAX_FFT >> 1 )
      19             : #define N_MAX_DIV4  ( N_MAX_DIV2 >> 1 )
      20             : #define INV_SQR2_FX 23170
      21             : #define N_MAX_SAS   256
      22             : 
      23             : /*------------------------------------------------------------------
      24             :  *
      25             :  * This is an implementation of decimation-in-time FFT algorithm for
      26             :  * real sequences.  The techniques used here can be found in several
      27             :  * books, e.g., i) Proakis and Manolakis, "Digital Signal Processing",
      28             :  * 2nd Edition, Chapter 9, and ii) W.H. Press et. al., "Numerical
      29             :  * Recipes in C", 2nd Edition, Chapter 12.
      30             :  *
      31             :  * Input -  There are two inputs to this function:
      32             :  *
      33             :  *       1) An integer pointer to the input data array
      34             :  *       2) An integer value which should be set as +1 for FFT
      35             :  *          and some other value, e.g., -1 for ifFT
      36             :  *
      37             :  * Output - There is no return value.
      38             :  *       The input data are replaced with transformed data.  if the
      39             :  *       input is a real time domain sequence, it is replaced with
      40             :  *       the complex FFT for positive frequencies.  The FFT value
      41             :  *       for DC and the foldover frequency are combined to form the
      42             :  *       first complex number in the array.  The remaining complex
      43             :  *       numbers correspond to increasing frequencies.  if the input
      44             :  *       is a complex frequency domain sequence arranged as above,
      45             :  *       it is replaced with the corresponding time domain sequence.
      46             :  *
      47             :  * Notes:
      48             :  *
      49             :  *       1) This function is designed to be a part of a noise supp-
      50             :  *          ression algorithm that requires 128-point FFT of real
      51             :  *          sequences.  This is achieved here through a 64-point
      52             :  *          complex FFT.  Consequently, the FFT size information is
      53             :  *          not transmitted explicitly.  However, some flexibility
      54             :  *          is provided in the function to change the size of the
      55             :  *          FFT by specifying the size information through "define"
      56             :  *          statements.
      57             :  *
      58             :  *       2) The values of the complex sinusoids used in the FFT
      59             :  *          algorithm are computed once (i.e., the first time the
      60             :  *          r_fft function is called) and stored in a table. To
      61             :  *          further speed up the algorithm, these values can be
      62             :  *          precomputed and stored in a ROM table in actual DSP
      63             :  *          based implementations.
      64             :  *
      65             :  *       3) In the c_fft function, the FFT values are divided by
      66             :  *          2 after each stage of computation thus dividing the
      67             :  *          final FFT values by 64.  No multiplying factor is used
      68             :  *          for the ifFT.  This is somewhat different from the usual
      69             :  *          definition of FFT where the factor 1/N, i.e., 1/64, is
      70             :  *          used for the ifFT and not the FFT.  No factor is used in
      71             :  *          the r_fft function.
      72             :  *
      73             :  *       4) Much of the code for the FFT and ifFT parts in r_fft
      74             :  *          and c_fft functions are similar and can be combined.
      75             :  *          They are, however, kept separate here to speed up the
      76             :  *          execution.
      77             :  *------------------------------------------------------------------------*/
      78             : /*------------------------------------------------------------------------*
      79             :  * c_fft_fx:
      80             :  *
      81             :  * Computes the complex part of the split-radix FFT
      82             :  *------------------------------------------------------------------------*/
      83             : 
      84       69200 : static void c_fft_fx(
      85             :     const Word16 *phs_tbl, /* i  : Table of phases            */
      86             :     Word16 SIZE,           /* i  : Size of the FFT           */
      87             :     Word16 NUM_STAGE,      /* i  : Number of stages           */
      88             :     const Word16 *in_ptr,  /* i  : coefficients in the order re[0], re[n/2], re[1], im[1], ..., re[n/2-1], im[n/2-1] Qx*/
      89             :     Word16 *out_ptr,       /* o  : coefficients in the order re[0], re[n/2], re[1], im[1], ..., re[n/2-1], im[n/2-1] Q(x - 1)*/
      90             :     /* in_ptr & out_ptr must not overlap! */
      91             :     const Word16 isign ) /* i  : 1=fft, otherwise it is ifft*/
      92             : {
      93             :     Word16 i, j, k, ii, jj, kk, ji, kj;
      94             :     Word32 L_tmp1, L_tmp2;
      95             :     Word16 tmp1, tmp2, tmp3, tmp4;
      96             :     const Word16 *table_ptr;
      97             :     const Word16 *input_ptr1, *input_ptr2, *input_ptr3, *input_ptr4;
      98       69200 :     Word16 shift = 0;
      99       69200 :     move16();
     100             :     /* Setup Reorder Variables */
     101       69200 :     table_ptr = NULL;
     102       69200 :     table_ptr = FFT_REORDER_1024;
     103       69200 :     SWITCH( SIZE )
     104             :     {
     105          68 :         case 1024:
     106          68 :             shift = 0;
     107          68 :             move16();
     108          68 :             BREAK;
     109         420 :         case 512:
     110         420 :             shift = 1;
     111         420 :             move16();
     112         420 :             BREAK;
     113        6268 :         case 256:
     114        6268 :             shift = 2;
     115        6268 :             move16();
     116        6268 :             BREAK;
     117         186 :         case 128:
     118         186 :             shift = 3;
     119         186 :             move16();
     120         186 :             BREAK;
     121       62258 :         case 64:
     122       62258 :             shift = 4;
     123       62258 :             move16();
     124       62258 :             BREAK;
     125             :     }
     126             :     /* The FFT part */
     127       69200 :     IF( isign != 0 )
     128             :     {
     129             :         /* Unrolled 1st/2nd Stage
     130             :          * 1) to take advantage of Table Values (0 & +/- 16384)
     131             :          * 2) to perform reordering of Input Values
     132             :          */
     133      500598 :         FOR( k = 0; k < SIZE; k += 8 )
     134             :         {
     135             :             /*
     136             :              * This loop use:
     137             :              *   4 Word16 (tmp1...tmp4)
     138             :              *   2 Word32 (L_tmp1 & L_tmp2)
     139             :              *   4 Pointers (table_ptr, input_ptr1, input_ptr2, input_ptr3)
     140             :              *
     141             :              * The addition of 'in_ptr' + and index value from 'reorder_ptr'
     142             :              * is counted as a move16()
     143             :              */
     144             : 
     145      462888 :             input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift ); // Qx
     146             : 
     147      462888 :             L_tmp1 = L_mult( *input_ptr1++, 16384 );
     148      462888 :             L_tmp2 = L_mult( *input_ptr1, 16384 );
     149             : 
     150      462888 :             input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     151             : 
     152      462888 :             tmp1 = msu_r_sat( L_tmp1, *input_ptr1, 16384 );
     153      462888 :             tmp3 = mac_r_sat( L_tmp1, *input_ptr1++, 16384 );
     154      462888 :             input_ptr2 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     155      462888 :             input_ptr3 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     156             : 
     157      462888 :             L_tmp1 = L_mult( *input_ptr2++, 16384 );
     158      462888 :             tmp2 = mac_r_sat( L_tmp1, *input_ptr3, 16384 );
     159      462888 :             tmp4 = msu_r_sat( L_tmp1, *input_ptr3++, 16384 );
     160      462888 :             L_tmp1 = L_mult( tmp3, 16384 );
     161      462888 :             out_ptr[k] = mac_r_sat( L_tmp1, tmp2, 16384 );
     162      462888 :             move16();
     163      462888 :             out_ptr[k + 4] = msu_r_sat( L_tmp1, tmp2, 16384 );
     164      462888 :             move16();
     165             : 
     166      462888 :             tmp2 = mac_r_sat( L_tmp2, *input_ptr1, 16384 );
     167      462888 :             tmp3 = msu_r_sat( L_tmp2, *input_ptr1, 16384 );
     168      462888 :             L_tmp2 = L_mult( *input_ptr2, 16384 );
     169             : 
     170      462888 :             L_tmp1 = L_mult( tmp1, 16384 );
     171      462888 :             tmp1 = msu_r_sat( L_tmp2, *input_ptr3, 16384 );
     172      462888 :             out_ptr[k + 2] = mac_r_sat( L_tmp1, tmp1, 16384 );
     173      462888 :             move16();
     174      462888 :             out_ptr[k + 6] = msu_r_sat( L_tmp1, tmp1, 16384 );
     175      462888 :             move16();
     176      462888 :             L_tmp1 = L_mult( tmp2, 16384 );
     177      462888 :             tmp2 = mac_r_sat( L_tmp2, *input_ptr3, 16384 );
     178      462888 :             out_ptr[k + 1] = mac_r_sat( L_tmp1, tmp2, 16384 );
     179      462888 :             move16();
     180      462888 :             out_ptr[k + 5] = msu_r_sat( L_tmp1, tmp2, 16384 );
     181      462888 :             move16();
     182      462888 :             L_tmp1 = L_mult( tmp3, 16384 );
     183      462888 :             out_ptr[k + 3] = msu_r_sat( L_tmp1, tmp4, 16384 );
     184      462888 :             move16();
     185      462888 :             out_ptr[k + 7] = mac_r_sat( L_tmp1, tmp4, 16384 );
     186      462888 :             move16();
     187             :         }
     188             : 
     189             :         /* Remaining Stages */
     190      163977 :         FOR( i = 2; i < NUM_STAGE; i++ )
     191             :         {
     192             :             /* i is stage counter      */
     193      126267 :             jj = shl( 2, i );  /* FFT size                */
     194      126267 :             kk = shl( jj, 1 ); /* 2 * FFT size            */
     195      126267 :             ii = shr( SIZE, i );
     196      126267 :             ji = 0;
     197      126267 :             move16(); /* ji is phase table index */
     198             : 
     199     1826979 :             FOR( j = 0; j < jj; j += 2 )
     200             :             {
     201             :                 /* j is sample counter     */
     202     5356824 :                 FOR( k = j; k < SIZE; k += kk )
     203             :                 {
     204             :                     /* k is butterfly top     */
     205     3656112 :                     kj = add( k, jj ); /* kj is butterfly bottom */
     206             : 
     207             :                     /* Butterfly computations */
     208     3656112 :                     L_tmp1 = L_msu( L_mult( *( out_ptr + kj ), phs_tbl[ji] ),
     209     3656112 :                                     *( out_ptr + kj + 1 ), phs_tbl[ji + 1] );
     210     3656112 :                     L_tmp2 = L_mac( L_mult( *( out_ptr + kj + 1 ), phs_tbl[ji] ),
     211     3656112 :                                     *( out_ptr + kj ), phs_tbl[ji + 1] );
     212             : 
     213     3656112 :                     out_ptr[kj] = mac_r( L_negate( L_tmp1 ), out_ptr[k], 16384 );
     214     3656112 :                     move16();
     215     3656112 :                     out_ptr[kj + 1] = mac_r( L_negate( L_tmp2 ), out_ptr[k + 1], 16384 );
     216     3656112 :                     move16();
     217     3656112 :                     out_ptr[k] = mac_r( L_tmp1, out_ptr[k], 16384 );
     218     3656112 :                     move16();
     219     3656112 :                     out_ptr[k + 1] = mac_r( L_tmp2, out_ptr[k + 1], 16384 );
     220     3656112 :                     move16();
     221             :                 }
     222     1700712 :                 ji = add( ji, ii );
     223             :             }
     224             :         }
     225             :     }
     226             :     ELSE /* The ifFT part */
     227             :     {
     228             :         /* Unrolled 1st/2nd Stage
     229             :          * 1) to take advantage of Table Values (0 & +/- 16384)
     230             :          * 2) to perform reordering of Input Values
     231             :          */
     232      305802 :         FOR( k = 0; k < SIZE; k += 8 )
     233             :         {
     234             :             /*
     235             :              * This loop use:
     236             :              *   4 Word16 (tmp1...tmp4)
     237             :              *   2 Word32 (L_tmp1 & L_tmp2)
     238             :              *   5 Pointers (reorder_ptr, input_ptr1...input_ptr4)
     239             :              *
     240             :              * The addition of 'in_ptr' + and index value from 'reorder_ptr'
     241             :              * is counted as a move16()
     242             :              */
     243      274312 :             input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     244      274312 :             input_ptr2 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     245             : 
     246      274312 :             input_ptr3 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     247      274312 :             input_ptr4 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     248             : 
     249             : 
     250      274312 :             tmp3 = sub( *input_ptr1, *input_ptr2 );
     251      274312 :             tmp4 = add( *input_ptr1++, *input_ptr2++ );
     252             : 
     253      274312 :             tmp2 = sub( input_ptr3[0], input_ptr4[0] );
     254      274312 :             tmp1 = sub( input_ptr3[1], input_ptr4[1] );
     255             : 
     256      274312 :             out_ptr[k + 2] = sub( tmp3, tmp1 );
     257      274312 :             move16();
     258      274312 :             out_ptr[k + 6] = add( tmp3, tmp1 );
     259      274312 :             move16();
     260             : 
     261      274312 :             tmp1 = sub( *input_ptr1, *input_ptr2 );
     262      274312 :             out_ptr[k + 3] = add( tmp1, tmp2 );
     263      274312 :             move16();
     264      274312 :             out_ptr[k + 7] = sub( tmp1, tmp2 );
     265      274312 :             move16();
     266             : 
     267      274312 :             tmp1 = add( input_ptr3[0], input_ptr4[0] );
     268      274312 :             tmp3 = add( input_ptr3[1], input_ptr4[1] );
     269             : 
     270      274312 :             out_ptr[k] = add( tmp4, tmp1 );
     271      274312 :             move16();
     272      274312 :             out_ptr[k + 4] = sub( tmp4, tmp1 );
     273      274312 :             move16();
     274             : 
     275      274312 :             tmp4 = add( *input_ptr1, *input_ptr2 );
     276      274312 :             out_ptr[k + 1] = add( tmp4, tmp3 );
     277      274312 :             move16();
     278      274312 :             out_ptr[k + 5] = sub( tmp4, tmp3 );
     279      274312 :             move16();
     280             :         }
     281             : 
     282       31490 :         table_ptr = phs_tbl + SIZE; /* access part of table that is scaled by 2 */
     283             : 
     284             :         /* Remaining Stages */
     285      127077 :         FOR( i = 2; i < NUM_STAGE; i++ )
     286             :         {
     287             :             /* i is stage counter      */
     288       95587 :             jj = shl( 2, i );  /* FFT size                */
     289       95587 :             kk = shl( jj, 1 ); /* 2 * FFT size            */
     290       95587 :             ii = shr( SIZE, i );
     291       95587 :             ji = 0;
     292       95587 :             move16(); /* ji is phase table index */
     293             : 
     294     1066875 :             FOR( j = 0; j < jj; j += 2 )
     295             :             {
     296             :                 /* j is sample counter     */
     297             :                 /* This can be computed by successive add_fxitions of ii to ji, starting from 0
     298             :                    hence line-count it as a one-line add (still need to increment op count!!) */
     299             : 
     300     2777544 :                 FOR( k = j; k < SIZE; k += kk )
     301             :                 {
     302             :                     /* k is butterfly top     */
     303     1806256 :                     kj = add( k, jj ); /* kj is butterfly bottom */
     304             : 
     305             :                     /* Butterfly computations */
     306     1806256 :                     tmp1 = mac_r_sat( L_mult_sat( out_ptr[kj], table_ptr[ji] ), out_ptr[kj + 1], table_ptr[ji + 1] );
     307             : 
     308     1806256 :                     tmp2 = msu_r_sat( L_mult_sat( out_ptr[kj + 1], table_ptr[ji] ), out_ptr[kj], table_ptr[ji + 1] );
     309             : 
     310     1806256 :                     out_ptr[kj] = sub_sat( out_ptr[k], tmp1 );
     311     1806256 :                     move16();
     312     1806256 :                     out_ptr[kj + 1] = sub_sat( out_ptr[k + 1], tmp2 );
     313     1806256 :                     move16();
     314     1806256 :                     out_ptr[k] = add_sat( out_ptr[k], tmp1 );
     315     1806256 :                     move16();
     316     1806256 :                     out_ptr[k + 1] = add_sat( out_ptr[k + 1], tmp2 );
     317     1806256 :                     move16();
     318             :                 }
     319      971288 :                 ji = add( ji, ii );
     320             :             }
     321             :         }
     322             :     }
     323       69200 : }
     324             : 
     325             : /*--------------------------------------------------------------------------------*
     326             :  * r_fft_fx:
     327             :  *
     328             :  * Perform FFT fixed-point for real-valued sequences of length 32, 64 or 128
     329             :  *--------------------------------------------------------------------------------*/
     330       69200 : void r_fft_fx_lc(
     331             :     const Word16 *phs_tbl,  /* i  : Table of phase            */
     332             :     const Word16 SIZE,      /* i  : Size of the FFT           */
     333             :     const Word16 SIZE2,     /* i  : Size / 2                  */
     334             :     const Word16 NUM_STAGE, /* i  : Number of stage           */
     335             :     const Word16 *in_ptr,   /* i  : coefficients in the order re[0], re[1], ... re[n/2], im[n/2-1], im[n/2-2], ..., im[1] Qx*/
     336             :     Word16 *out_ptr,        /* o  : coefficients in the order re[0], re[1], ... re[n/2], im[n/2-1], im[n/2-2], ..., im[1] Q(x - 1)*/
     337             :     const Word16 isign      /* i  : 1=fft, otherwize it's ifft                                                      */
     338             : )
     339             : {
     340             :     Word16 tmp2_real, tmp2_imag;
     341             :     Word32 Ltmp1_real, Ltmp1_imag;
     342             :     Word16 i;
     343             :     Word32 Ltmp1;
     344             :     const Word16 *phstbl_ptrDn;
     345             :     Word16 *ptrDn;
     346             :     Word16 temp[1024]; /* Accommodates real input FFT size up to 1024. */
     347             : 
     348             :     /* Setup Pointers */
     349       69200 :     phstbl_ptrDn = &phs_tbl[SIZE - 1];
     350             : 
     351             :     /* The FFT part */
     352       69200 :     IF( isign != 0 )
     353             :     {
     354             :         Word16 *ptRealUp, *ptRealDn, *ptImaUp, *ptImaDn;
     355             : 
     356             :         /* Perform the complex FFT */
     357       37710 :         c_fft_fx( phs_tbl, SIZE, NUM_STAGE, in_ptr, temp, isign );
     358             : 
     359             :         /* First, handle the DC and foldover frequencies */
     360       37710 :         out_ptr[SIZE2] = sub( temp[0], temp[1] );
     361       37710 :         move16();
     362       37710 :         out_ptr[0] = sub( add( temp[0], temp[1] ), shr( NUM_STAGE, 1 ) );
     363       37710 :         move16(); /* DC have a small offset */
     364             : 
     365       37710 :         ptrDn = &temp[SIZE - 1];
     366             : 
     367       37710 :         ptImaDn = &out_ptr[SIZE - 1];
     368       37710 :         ptRealUp = &out_ptr[1];
     369       37710 :         ptImaUp = &out_ptr[SIZE2 + 1];
     370       37710 :         ptRealDn = &out_ptr[SIZE2 - 1];
     371             : 
     372             :         /* Now, handle the remaining positive frequencies */
     373      963486 :         FOR( i = 2; i <= SIZE2; i += 2 )
     374             :         {
     375      925776 :             Ltmp1_imag = L_mult( temp[i + 1], 16384 );
     376      925776 :             Ltmp1_imag = L_msu_sat( Ltmp1_imag, *ptrDn, 16384 );
     377      925776 :             tmp2_real = add_sat( temp[i + 1], *ptrDn-- );
     378             : 
     379      925776 :             Ltmp1_real = L_mult( temp[i], 16384 );
     380      925776 :             Ltmp1_real = L_mac_sat( Ltmp1_real, *ptrDn, 16384 );
     381      925776 :             tmp2_imag = sub( *ptrDn--, temp[i] );
     382             : 
     383             : 
     384      925776 :             *ptRealUp++ = msu_r_sat( L_mac_sat( Ltmp1_real, tmp2_real, phs_tbl[i] ), tmp2_imag, phs_tbl[i + 1] );
     385      925776 :             move16();
     386      925776 :             *ptImaDn-- = mac_r_sat( L_mac_sat( Ltmp1_imag, tmp2_imag, phs_tbl[i] ), tmp2_real, phs_tbl[i + 1] );
     387      925776 :             move16();
     388      925776 :             Ltmp1 = L_mac_sat( L_negate( Ltmp1_imag ), tmp2_real, *phstbl_ptrDn );
     389      925776 :             Ltmp1_real = L_mac_sat( Ltmp1_real, tmp2_imag, *phstbl_ptrDn-- );
     390      925776 :             *ptImaUp++ = msu_r_sat( Ltmp1, tmp2_imag, *phstbl_ptrDn );
     391      925776 :             move16();
     392      925776 :             *ptRealDn-- = mac_r_sat( Ltmp1_real, tmp2_real, *phstbl_ptrDn-- );
     393      925776 :             move16();
     394             :         }
     395             :     }
     396             :     ELSE /* The ifFT part */
     397             :     {
     398             :         const Word16 *ptRealUp, *ptRealDn, *ptImaUp, *ptImaDn;
     399             : 
     400             :         /* First, handle the DC and foldover frequencies */
     401       31490 :         Ltmp1 = L_mult( in_ptr[0], 16384 );
     402       31490 :         temp[0] = mac_r( Ltmp1, in_ptr[SIZE2], 16384 );
     403       31490 :         move16();
     404       31490 :         temp[1] = msu_r( Ltmp1, in_ptr[SIZE2], 16384 );
     405       31490 :         move16();
     406             : 
     407       31490 :         ptrDn = &temp[SIZE - 1];
     408             : 
     409             :         /* Here we cast to Word16 * from a const Word16 *. */
     410             :         /* This is ok because we use these pointers for    */
     411             :         /* reading only. This is just to avoid declaring a */
     412             :         /* bunch of 4 other pointer with const Word16 *.   */
     413       31490 :         ptImaDn = &in_ptr[SIZE - 1];   // Qx
     414       31490 :         ptRealUp = &in_ptr[1];         // Qx
     415       31490 :         ptImaUp = &in_ptr[SIZE2 + 1];  // Qx
     416       31490 :         ptRealDn = &in_ptr[SIZE2 - 1]; // Qx
     417             : 
     418             :         /* Now, handle the remaining positive frequencies */
     419      580114 :         FOR( i = 2; i <= SIZE2; i += 2 )
     420             :         {
     421      548624 :             Ltmp1_imag = L_mult( *ptImaDn, 16384 );
     422      548624 :             Ltmp1_imag = L_msu_sat( Ltmp1_imag, *ptImaUp, 16384 );
     423      548624 :             tmp2_real = add_sat( *ptImaDn--, *ptImaUp++ );
     424      548624 :             Ltmp1_real = L_mult( *ptRealUp, 16384 );
     425      548624 :             Ltmp1_real = L_mac_sat( Ltmp1_real, *ptRealDn, 16384 );
     426      548624 :             tmp2_imag = sub_sat( *ptRealUp++, *ptRealDn-- );
     427      548624 :             temp[i] = mac_r_sat( L_msu_sat( Ltmp1_real, tmp2_real, phs_tbl[i] ), tmp2_imag, phs_tbl[i + 1] );
     428      548624 :             move16();
     429      548624 :             temp[i + 1] = mac_r_sat( L_mac_sat( Ltmp1_imag, tmp2_imag, phs_tbl[i] ), tmp2_real, phs_tbl[i + 1] );
     430      548624 :             move16();
     431      548624 :             Ltmp1 = L_mac_sat( L_negate( Ltmp1_imag ), tmp2_real, *phstbl_ptrDn );
     432      548624 :             Ltmp1_real = L_msu_sat( Ltmp1_real, tmp2_imag, *phstbl_ptrDn-- );
     433      548624 :             *ptrDn-- = msu_r_sat( Ltmp1, tmp2_imag, *phstbl_ptrDn );
     434      548624 :             move16();
     435      548624 :             *ptrDn-- = msu_r_sat( Ltmp1_real, tmp2_real, *phstbl_ptrDn-- );
     436      548624 :             move16();
     437             :         }
     438             : 
     439             :         /* Perform the complex ifFT */
     440       31490 :         c_fft_fx( phs_tbl, SIZE, NUM_STAGE, temp, out_ptr, isign );
     441             :     }
     442       69200 : }
     443             : 
     444             : 
     445             : /*---------------------------------------------------------------------*
     446             :  *  fft_rel()
     447             :  *
     448             :  *  Computes the split-radix FFT in place for the real-valued
     449             :  *  signal x of length n.  The algorithm has been ported from
     450             :  *  the Fortran code of [1].
     451             :  *
     452             :  *  The function  needs sine and cosine tables t_sin and t_cos,
     453             :  *  and the constant N_MAX_FFT.  The table  entries  are defined as
     454             :  *  sin(2*pi*i) and cos(2*pi*i) for i = 0, 1, ..., N_MAX_FFT-1. The
     455             :  *  implementation  assumes  that any entry  will not be needed
     456             :  *  outside the tables. Therefore, N_MAX_FFT and n must be properly
     457             :  *  set.  The function has been tested  with the values n = 16,
     458             :  *  32, 64, 128, 256, and N_MAX_FFT = 1280.
     459             :  *
     460             :  *  References
     461             :  *  [1] H.V. Sorensen,  D.L. Jones, M.T. Heideman, C.S. Burrus,
     462             :  *      "Real-valued fast  Fourier transform  algorithm,"  IEEE
     463             :  *      Trans. on Signal Processing,  Vol.35, No.6, pp 849-863,
     464             :  *      1987.
     465             :  *
     466             :  *  OUTPUT
     467             :  *      x[0:n-1]  Transform coeffients in the order re[0], re[1],
     468             :  *                ..., re[n/2], im[n/2-1], ..., im[1].
     469             :  *---------------------------------------------------------------------*/
     470             : 
     471           0 : void fft_rel(
     472             :     float x[],       /* i/o: input/output vector    */
     473             :     const int16_t n, /* i  : vector length          */
     474             :     const int16_t m  /* i  : log2 of vector length  */
     475             : )
     476             : {
     477             :     int16_t i, j, k, n1, n2, n4;
     478             :     int16_t step;
     479             :     float xt, t1, t2;
     480             :     float *x0, *x1, *x2;
     481             :     float *xi2, *xi3, *xi4, *xi1;
     482             :     const float *s, *c;
     483             :     const int16_t *idx;
     484             : 
     485             :     /* !!!! NMAX = 256 is hardcoded here  (similar optimizations should be done for NMAX > 256) !!! */
     486             : 
     487             :     float *x2even, *x2odd;
     488             :     float temp[512];
     489             : 
     490           0 :     if ( n == 128 || n == 256 || n == 512 )
     491             :     {
     492           0 :         idx = fft256_read_indexes;
     493             : 
     494             :         /* Combined Digit reverse counter & Length two butterflies */
     495           0 :         if ( n == 128 )
     496             :         {
     497           0 :             x2 = temp;
     498           0 :             for ( i = 0; i < 64; i++ )
     499             :             {
     500           0 :                 j = *idx++;
     501           0 :                 k = *idx++;
     502             : 
     503           0 :                 *x2++ = x[j >> 1] + x[k >> 1];
     504           0 :                 *x2++ = x[j >> 1] - x[k >> 1];
     505             :             }
     506             :         }
     507           0 :         else if ( n == 256 )
     508             :         {
     509           0 :             x2 = temp;
     510           0 :             for ( i = 0; i < 128; i++ )
     511             :             {
     512           0 :                 j = *idx++;
     513           0 :                 k = *idx++;
     514             : 
     515           0 :                 *x2++ = x[j] + x[k];
     516           0 :                 *x2++ = x[j] - x[k];
     517             :             }
     518             :         }
     519           0 :         else if ( n == 512 )
     520             :         {
     521           0 :             x2even = temp;
     522           0 :             x2odd = temp + 256;
     523             : 
     524           0 :             for ( i = 0; i < 128; i++ )
     525             :             {
     526           0 :                 j = 2 * *idx++;
     527           0 :                 k = 2 * *idx++;
     528             : 
     529           0 :                 *x2even++ = x[j] + x[k];
     530           0 :                 *x2even++ = x[j] - x[k];
     531           0 :                 *x2odd++ = x[++j] + x[++k];
     532           0 :                 *x2odd++ = x[j] - x[k];
     533             :             }
     534             :         }
     535             : 
     536             :         /*-----------------------------------------------------------------*
     537             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
     538             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
     539             :          * and the associated pointers initialization.
     540             :          * Also, it allows to Put the Data from 'temp' back into 'x' due
     541             :          * to the previous Combined Digit Reverse and Length two butterflies
     542             :          *-----------------------------------------------------------------*/
     543             : 
     544             :         /*for_ (k = 2; k < 3; k++)*/
     545             :         {
     546           0 :             x0 = temp;
     547           0 :             x1 = x0 + 2;
     548           0 :             x2 = x;
     549             : 
     550           0 :             for ( i = 0; i < n; i += 4 )
     551             :             {
     552           0 :                 *x2++ = *x0++ + *x1; /* x[i] = xt + x[i+n2];    */
     553           0 :                 *x2++ = *x0;
     554           0 :                 *x2++ = *--x0 - *x1++; /* x[i+n2] = xt - x[i+n2];      */
     555           0 :                 *x2++ = -*x1;          /* x[i+n2+n4] = -x[i+n2+n4];     */
     556             : 
     557           0 :                 x0 += 4;
     558           0 :                 x1 += 3; /* x1 has already advanced */
     559             :             }
     560             :         }
     561             :     }
     562             :     else
     563             :     {
     564             :         /*-----------------------------------------------------------------*
     565             :          * Digit reverse counter
     566             :          *-----------------------------------------------------------------*/
     567             : 
     568           0 :         j = 0;
     569           0 :         x0 = &x[0];
     570           0 :         for ( i = 0; i < n - 1; i++ )
     571             :         {
     572           0 :             if ( i < j )
     573             :             {
     574           0 :                 xt = x[j];
     575           0 :                 x[j] = *x0;
     576           0 :                 *x0 = xt;
     577             :             }
     578           0 :             x0++;
     579           0 :             k = n / 2;
     580           0 :             while ( k <= j )
     581             :             {
     582           0 :                 j -= k;
     583           0 :                 k = k >> 1;
     584             :             }
     585           0 :             j += k;
     586             :         }
     587             : 
     588             :         /*-----------------------------------------------------------------*
     589             :          * Length two butterflies
     590             :          *-----------------------------------------------------------------*/
     591             : 
     592           0 :         x0 = &x[0];
     593           0 :         x1 = &x[1];
     594           0 :         for ( i = 0; i < n / 2; i++ )
     595             :         {
     596           0 :             *x1 = *x0 - *x1;
     597           0 :             *x0 = *x0 * 2 - *x1;
     598             : 
     599           0 :             x0++;
     600           0 :             x0++;
     601           0 :             x1++;
     602           0 :             x1++;
     603             :         }
     604             : 
     605             :         /*-----------------------------------------------------------------*
     606             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
     607             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
     608             :          * and the associated pointers initialization.
     609             :          *-----------------------------------------------------------------*/
     610             : 
     611             :         /* for_ (k = 2; k < 3; k++) */
     612             :         {
     613           0 :             x0 = x;
     614           0 :             x1 = x0 + 2;
     615             : 
     616           0 :             for ( i = 0; i < n; i += 4 )
     617             :             {
     618           0 :                 *x1 = *x0 - *x1;       /* x[i+n2] = xt - x[i+n2];      */
     619           0 :                 *x0 = *x0 * 2 - *x1++; /* x[i] = xt + x[i+n2];    */
     620           0 :                 *x1 = -*x1;            /* x[i+n2+n4] = -x[i+n2+n4];     */
     621             : 
     622           0 :                 x0 += 4;
     623           0 :                 x1 += 3; /* x1 has already advanced */
     624             :             }
     625             :         }
     626             :     }
     627             : 
     628             :     /*-----------------------------------------------------------------*
     629             :      * Other butterflies
     630             :      *
     631             :      * The implementation described in [1] has been changed by using
     632             :      * table lookup for evaluating sine and cosine functions.  The
     633             :      * variable ind and its increment step are needed to access table
     634             :      * entries.  Note that this implementation assumes n4 to be so
     635             :      * small that ind will never exceed the table.  Thus the input
     636             :      * argument n and the constant N_MAX_FFT must be set properly.
     637             :      *-----------------------------------------------------------------*/
     638             : 
     639           0 :     n4 = 1;
     640           0 :     n2 = 2;
     641           0 :     n1 = 4;
     642             : 
     643           0 :     step = N_MAX_DIV4;
     644             : 
     645           0 :     for ( k = 3; k <= m; k++ )
     646             :     {
     647           0 :         step >>= 1;
     648           0 :         n4 <<= 1;
     649           0 :         n2 <<= 1;
     650           0 :         n1 <<= 1;
     651             : 
     652           0 :         x0 = x;
     653           0 :         x1 = x0 + n2;
     654           0 :         x2 = x1 + n4;
     655             : 
     656           0 :         for ( i = 0; i < n; i += n1 )
     657             :         {
     658           0 :             *x1 = *x0 - *x1;     /* x[i+n2] = xt - x[i+n2];      */
     659           0 :             *x0 = *x0 * 2 - *x1; /* x[i] = xt + x[i+n2];    */
     660           0 :             *x2 = -*x2;          /* x[i+n2+n4] = -x[i+n2+n4];     */
     661             : 
     662           0 :             s = sincos_t_ext;
     663           0 :             c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 */
     664           0 :             xi1 = x0;
     665           0 :             xi3 = xi1 + n2;
     666           0 :             xi2 = xi3;
     667           0 :             x0 += n1;
     668           0 :             xi4 = x0;
     669             : 
     670           0 :             for ( j = 1; j < n4; j++ )
     671             :             {
     672           0 :                 xi3++;
     673           0 :                 xi1++;
     674           0 :                 xi4--;
     675           0 :                 xi2--;
     676           0 :                 c += step;
     677           0 :                 s += step; /* autoincrement by ar0 */
     678             : 
     679           0 :                 t1 = *xi3 * *c + *xi4 * *s; /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind);   */
     680           0 :                 t2 = *xi3 * *s - *xi4 * *c; /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind);     */
     681             : 
     682           0 :                 *xi4 = *xi2 - t2;
     683           0 :                 *xi2 = *xi1 - t1;
     684           0 :                 *xi1 = *xi1 * 2 - *xi2;
     685           0 :                 *xi3 = -2 * t2 - *xi4;
     686             :             }
     687             : 
     688           0 :             x1 += n1;
     689           0 :             x2 += n1;
     690             :         }
     691             :     }
     692             : 
     693           0 :     return;
     694             : }
     695             : 
     696     2118622 : void fft_rel_16_32fx(
     697             :     Word16 x[],  /* i/o: input/output vector Qx   */
     698             :     Word16 *q_x, /* extra scaling added on speech buffer*/
     699             :     Word16 i_subfr,
     700             :     const Word16 n, /* i  : vector length          */
     701             :     const Word16 m  /* i  : log2 of vector length  */
     702             : )
     703             : {
     704             :     Word16 i, j, k, n1, n2, n4;
     705             :     Word16 step;
     706             :     Word32 xt, t1, t2;
     707             :     Word32 *x0, *x1, *x2;
     708             :     const Word16 *s, *c;
     709             :     Word32 *xi2, *xi3, *xi4, *xi1;
     710             : 
     711             :     Word32 fft_bff32[L_FFT];
     712     2118622 :     Copy_Scale_sig_16_32_no_sat( x, fft_bff32, L_FFT, 0 ); // copying x to fft_bff32 without scaling
     713             : 
     714             :     /*-----------------------------------------------------------------*
     715             :      * Digit reverse counter
     716             :      *-----------------------------------------------------------------*/
     717             : 
     718     2118622 :     j = 0;
     719     2118622 :     move16();
     720     2118622 :     x0 = &fft_bff32[0]; // Qx
     721   542367232 :     FOR( i = 0; i < n - 1; i++ )
     722             :     {
     723   540248610 :         IF( LT_16( i, j ) )
     724             :         {
     725   254234640 :             xt = fft_bff32[j]; // Qx
     726   254234640 :             move32();
     727   254234640 :             fft_bff32[j] = *x0; // Qx
     728   254234640 :             move32();
     729   254234640 :             *x0 = xt; // Qx
     730   254234640 :             move32();
     731             :         }
     732   540248610 :         x0++;
     733   540248610 :         k = shr( n, 1 );
     734  1063548244 :         WHILE( ( k <= j ) )
     735             :         {
     736   523299634 :             j = sub( j, k );
     737   523299634 :             k = shr( k, 1 );
     738             :         }
     739   540248610 :         j = add( j, k );
     740             :     }
     741             : 
     742             :     /*-----------------------------------------------------------------*
     743             :      * Length two butterflies
     744             :      *-----------------------------------------------------------------*/
     745             : 
     746     2118622 :     x0 = &fft_bff32[0];
     747     2118622 :     x1 = &fft_bff32[1];
     748   273302238 :     FOR( i = 0; i < ( n >> 1 ); i++ )
     749             :     {
     750   271183616 :         xt = *x0;
     751   271183616 :         move32();
     752   271183616 :         *x0 = L_add( xt, *x1 );
     753   271183616 :         move32();
     754   271183616 :         *x1 = L_sub( xt, *x1 );
     755   271183616 :         move32();
     756   271183616 :         x0++;
     757   271183616 :         x0++;
     758   271183616 :         x1++;
     759   271183616 :         x1++;
     760             :     }
     761             : 
     762             :     /*-----------------------------------------------------------------*
     763             :      * Other butterflies
     764             :      *
     765             :      * The implementation described in [1] has been changed by using
     766             :      * table lookup for evaluating sine and cosine functions.  The
     767             :      * variable ind and its increment step are needed to access table
     768             :      * entries.  Note that this implementation assumes n4 to be so
     769             :      * small that ind will never exceed the table.  Thus the input
     770             :      * argument n and the constant N_MAX_SAS must be set properly.
     771             :      *-----------------------------------------------------------------*/
     772             : 
     773     2118622 :     n2 = 1;
     774     2118622 :     move16();
     775             :     /* step = N_MAX_SAS/4; */
     776    16948976 :     FOR( k = 2; k <= m; k++ )
     777             :     {
     778    14830354 :         n4 = n2;
     779    14830354 :         move16();
     780    14830354 :         n2 = shl( n4, 1 );
     781    14830354 :         n1 = shl( n2, 1 );
     782             : 
     783    14830354 :         step = idiv1616( N_MAX_SAS, n1 );
     784             : 
     785    14830354 :         x0 = fft_bff32;
     786    14830354 :         x1 = fft_bff32 + n2;
     787    14830354 :         x2 = fft_bff32 + add( n2, n4 );
     788   283895348 :         FOR( i = 0; i < n; i += n1 )
     789             :         {
     790   269064994 :             xt = *x0;
     791   269064994 :             move32(); /* xt = x[i];   */
     792   269064994 :             *x0 = L_add( xt, *x1 );
     793   269064994 :             move32(); /* x[i] = xt + x[i+n2];    */
     794   269064994 :             *x1 = L_sub( xt, *x1 );
     795   269064994 :             move32(); /* x[i+n2] = xt - x[i+n2];      */
     796   269064994 :             *x2 = L_negate( *x2 );
     797   269064994 :             move32(); /* x[i+n2+n4] = -x[i+n2+n4];     */
     798             : 
     799             : 
     800   269064994 :             s = sincos_t_fx + step; // Q15
     801   269064994 :             c = s + 64;             // Q15
     802   269064994 :             xi1 = fft_bff32 + add( i, 1 );
     803   269064994 :             xi3 = xi1 + n2;
     804   269064994 :             xi2 = xi3 - 2;
     805   269064994 :             xi4 = xi1 + sub( n1, 2 );
     806             : 
     807   949142656 :             FOR( j = 1; j < n4; j++ )
     808             :             {
     809   680077662 :                 t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx  */
     810   680077662 :                 t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx    */
     811   680077662 :                 *xi4 = L_sub( *xi2, t2 );
     812   680077662 :                 move32();
     813   680077662 :                 *xi3 = L_negate( L_add( *xi2, t2 ) );
     814   680077662 :                 move32();
     815   680077662 :                 *xi2 = L_sub( *xi1, t1 );
     816   680077662 :                 move32();
     817   680077662 :                 *xi1 = L_add( *xi1, t1 );
     818   680077662 :                 move32();
     819             : 
     820   680077662 :                 xi4--;
     821   680077662 :                 xi2--;
     822   680077662 :                 xi3++;
     823   680077662 :                 xi1++;
     824   680077662 :                 c += step;
     825   680077662 :                 s += step; /* autoincrement by ar0 */
     826             :             }
     827             : 
     828   269064994 :             x0 += n1;
     829   269064994 :             x1 += n1;
     830   269064994 :             x2 += n1;
     831             :         }
     832             :         /* step = shr(step, 1); */
     833             :     }
     834     2118622 :     Word16 norm = L_norm_arr( fft_bff32, L_FFT );
     835     2118622 :     IF( i_subfr == 0 )
     836             :     {
     837     1059311 :         Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
     838     1059311 :         *q_x = sub( norm, 16 );
     839     1059311 :         move16();
     840             :     }
     841             :     ELSE
     842             :     {
     843     1059311 :         IF( LT_16( sub( norm, 16 ), *q_x ) )
     844             :         {
     845      211536 :             scale_sig( x - L_FFT, L_FFT, sub( sub( norm, 16 ), *q_x ) );
     846      211536 :             Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
     847      211536 :             *q_x = sub( norm, 16 );
     848      211536 :             move16();
     849             :         }
     850             :         ELSE
     851             :         {
     852      847775 :             Copy_Scale_sig32_16( fft_bff32, x, L_FFT, add( 16, *q_x ) );
     853             :         }
     854             :     }
     855             : 
     856     2118622 :     return;
     857             : }
     858             : 
     859      206912 : void fft_rel_fx(
     860             :     Word16 x[],     /* i/o: input/output vector Qx   */
     861             :     const Word16 n, /* i  : vector length          */
     862             :     const Word16 m  /* i  : log2 of vector length  */
     863             : )
     864             : {
     865             :     Word16 i, j, k, n1, n2, n4;
     866             :     Word16 step;
     867             :     Word16 xt, t1, t2;
     868             :     Word16 *x0, *x1, *x2;
     869             :     const Word16 *s, *c;
     870             :     Word16 *xi2, *xi3, *xi4, *xi1;
     871             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     872      206912 :     Flag Overflow = 0;
     873      206912 :     move32();
     874             : #endif
     875             : 
     876             : 
     877             :     /*-----------------------------------------------------------------*
     878             :      * Digit reverse counter
     879             :      *-----------------------------------------------------------------*/
     880             : 
     881      206912 :     j = 0;
     882      206912 :     move16();
     883      206912 :     x0 = &x[0]; // Qx
     884      206912 :     move16();
     885    46000916 :     FOR( i = 0; i < n - 1; i++ )
     886             :     {
     887    45794004 :         IF( LT_16( i, j ) )
     888             :         {
     889    21538733 :             xt = x[j]; // Qx
     890    21538733 :             move16();
     891    21538733 :             x[j] = *x0; // Qx
     892    21538733 :             move16();
     893    21538733 :             *x0 = xt; // Qx
     894    21538733 :             move16();
     895             :         }
     896    45794004 :         x0++;
     897    45794004 :         k = shr( n, 1 );
     898    90098630 :         WHILE( ( k <= j ) )
     899             :         {
     900    44304626 :             j = sub( j, k );
     901    44304626 :             k = shr( k, 1 );
     902             :         }
     903    45794004 :         j = add( j, k );
     904             :     }
     905             : 
     906             :     /*-----------------------------------------------------------------*
     907             :      * Length two butterflies
     908             :      *-----------------------------------------------------------------*/
     909             : 
     910      206912 :     x0 = &x[0];
     911      206912 :     move16();
     912      206912 :     x1 = &x[1];
     913      206912 :     move16();
     914    23207370 :     FOR( i = 0; i < ( n >> 1 ); i++ )
     915             :     {
     916    23000458 :         xt = *x0;
     917    23000458 :         move16();
     918    23000458 :         *x0 = add_o( xt, *x1, &Overflow );
     919    23000458 :         move16();
     920    23000458 :         *x1 = sub_o( xt, *x1, &Overflow );
     921    23000458 :         move16();
     922    23000458 :         x0++;
     923    23000458 :         x0++;
     924    23000458 :         x1++;
     925    23000458 :         x1++;
     926             :     }
     927             : 
     928             :     /*-----------------------------------------------------------------*
     929             :      * Other butterflies
     930             :      *
     931             :      * The implementation described in [1] has been changed by using
     932             :      * table lookup for evaluating sine and cosine functions.  The
     933             :      * variable ind and its increment step are needed to access table
     934             :      * entries.  Note that this implementation assumes n4 to be so
     935             :      * small that ind will never exceed the table.  Thus the input
     936             :      * argument n and the constant N_MAX_SAS must be set properly.
     937             :      *-----------------------------------------------------------------*/
     938             : 
     939      206912 :     n2 = 1;
     940      206912 :     move16();
     941             :     /* step = N_MAX_SAS/4; */
     942     1489378 :     FOR( k = 2; k <= m; k++ )
     943             :     {
     944     1282466 :         n4 = n2;
     945     1282466 :         move16();
     946     1282466 :         n2 = shl( n4, 1 );
     947     1282466 :         n1 = shl( n2, 1 );
     948             : 
     949     1282466 :         step = idiv1616( N_MAX_SAS, n1 );
     950             : 
     951     1282466 :         x0 = x;
     952     1282466 :         x1 = x + n2;
     953     1282466 :         x2 = x + add( n2, n4 );
     954    24076012 :         FOR( i = 0; i < n; i += n1 )
     955             :         {
     956    22793546 :             xt = *x0;
     957    22793546 :             move16(); /* xt = x[i];   */
     958    22793546 :             *x0 = add_o( xt, *x1, &Overflow );
     959    22793546 :             move16(); /* x[i] = xt + x[i+n2];    */
     960    22793546 :             *x1 = sub_o( xt, *x1, &Overflow );
     961    22793546 :             move16(); /* x[i+n2] = xt - x[i+n2];      */
     962    22793546 :             *x2 = negate( *x2 );
     963    22793546 :             move16(); /* x[i+n2+n4] = -x[i+n2+n4];     */
     964             : 
     965             : 
     966    22793546 :             s = sincos_t_fx + step; // Q15
     967    22793546 :             c = s + 64;             // Q15
     968    22793546 :             xi1 = x + add( i, 1 );
     969    22793546 :             xi3 = xi1 + n2;
     970    22793546 :             xi2 = xi3 - 2;
     971    22793546 :             xi4 = xi1 + sub( n1, 2 );
     972             : 
     973    80335685 :             FOR( j = 1; j < n4; j++ )
     974             :             {
     975    57542139 :                 t1 = add_o( mult_r( *xi3, *c ), mult_r( *xi4, *s ), &Overflow ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx  */
     976    57542139 :                 t2 = sub_o( mult_r( *xi3, *s ), mult_r( *xi4, *c ), &Overflow ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx    */
     977    57542139 :                 *xi4 = sub_o( *xi2, t2, &Overflow );
     978    57542139 :                 move16();
     979    57542139 :                 *xi3 = negate( add_o( *xi2, t2, &Overflow ) );
     980    57542139 :                 move16();
     981    57542139 :                 *xi2 = sub_o( *xi1, t1, &Overflow );
     982    57542139 :                 move16();
     983    57542139 :                 *xi1 = add_o( *xi1, t1, &Overflow );
     984    57542139 :                 move16();
     985             : 
     986    57542139 :                 xi4--;
     987    57542139 :                 xi2--;
     988    57542139 :                 xi3++;
     989    57542139 :                 xi1++;
     990    57542139 :                 c += step;
     991    57542139 :                 s += step; /* autoincrement by ar0 */
     992             :             }
     993             : 
     994    22793546 :             x0 += n1;
     995    22793546 :             x1 += n1;
     996    22793546 :             x2 += n1;
     997             :         }
     998             :         /* step = shr(step, 1); */
     999             :     }
    1000             : 
    1001      206912 :     return;
    1002             : }
    1003             : 
    1004       44321 : void fft_rel_fx32(
    1005             :     Word32 x[],     /* i/o: input/output vector Qx   */
    1006             :     const Word16 n, /* i  : vector length          */
    1007             :     const Word16 m  /* i  : log2 of vector length  */
    1008             : )
    1009             : {
    1010             :     Word16 i, j, k, n1, n2, n4;
    1011             :     Word16 step;
    1012             :     Word32 xt, t1, t2;
    1013             :     Word32 *x0, *x1, *x2;
    1014             :     Word32 *xi2, *xi3, *xi4, *xi1;
    1015             :     const Word16 *s, *c;
    1016             :     const Word16 *idx;
    1017             : 
    1018             :     /* !!!! NMAX = 256 is hardcoded here  (similar optimizations should be done for NMAX > 256) !!! */
    1019             : 
    1020             :     Word32 *x2even, *x2odd;
    1021             :     Word32 temp[512];
    1022             : 
    1023       44321 :     test();
    1024       44321 :     test();
    1025       44321 :     IF( EQ_16( n, 128 ) || EQ_16( n, 256 ) || EQ_16( n, 512 ) )
    1026             :     {
    1027       44321 :         idx = fft256_read_indexes;
    1028             : 
    1029             :         /* Combined Digit reverse counter & Length two butterflies */
    1030       44321 :         IF( EQ_16( n, 128 ) )
    1031             :         {
    1032           0 :             x2 = temp;
    1033           0 :             FOR( i = 0; i < 64; i++ )
    1034             :             {
    1035           0 :                 j = *idx++;
    1036           0 :                 move16();
    1037           0 :                 k = *idx++;
    1038           0 :                 move16();
    1039             : 
    1040           0 :                 *x2++ = L_add( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
    1041           0 :                 move16();
    1042           0 :                 *x2++ = L_sub( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
    1043           0 :                 move16();
    1044             :             }
    1045             :         }
    1046       44321 :         ELSE IF( EQ_16( n, 256 ) )
    1047             :         {
    1048         488 :             x2 = temp;
    1049       62952 :             FOR( i = 0; i < 128; i++ )
    1050             :             {
    1051       62464 :                 j = *idx++;
    1052       62464 :                 move16();
    1053       62464 :                 k = *idx++;
    1054       62464 :                 move16();
    1055             : 
    1056       62464 :                 *x2++ = L_add( x[j], x[k] ); // Qx
    1057       62464 :                 move16();
    1058       62464 :                 *x2++ = L_sub( x[j], x[k] ); // Qx
    1059       62464 :                 move16();
    1060             :             }
    1061             :         }
    1062       43833 :         ELSE IF( EQ_16( n, 512 ) )
    1063             :         {
    1064       43833 :             x2even = temp;
    1065       43833 :             x2odd = temp + 256;
    1066             : 
    1067     5654457 :             FOR( i = 0; i < 128; i++ )
    1068             :             {
    1069     5610624 :                 j = shl( *idx, 1 );
    1070     5610624 :                 idx++;
    1071     5610624 :                 k = shl( *idx, 1 );
    1072     5610624 :                 idx++;
    1073             : 
    1074     5610624 :                 *x2even++ = L_add( x[j], x[k] ); // Qx
    1075     5610624 :                 move16();
    1076     5610624 :                 *x2even++ = L_sub( x[j], x[k] ); // Qx
    1077     5610624 :                 move16();
    1078     5610624 :                 j = add( j, 1 );
    1079     5610624 :                 k = add( k, 1 );
    1080     5610624 :                 *x2odd++ = L_add( x[j], x[k] ); // Qx
    1081     5610624 :                 move16();
    1082     5610624 :                 *x2odd++ = L_sub( x[j], x[k] ); // Qx
    1083     5610624 :                 move16();
    1084             :             }
    1085             :         }
    1086             : 
    1087             :         /*-----------------------------------------------------------------*
    1088             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
    1089             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
    1090             :          * and the associated pointers initialization.
    1091             :          * Also, it allows to Put the Data from 'temp' back into 'x' due
    1092             :          * to the previous Combined Digit Reverse and Length two butterflies
    1093             :          *-----------------------------------------------------------------*/
    1094             : 
    1095             :         /*for_ (k = 2; k < 3; k++)*/
    1096             :         {
    1097       44321 :             x0 = temp;
    1098       44321 :             x1 = x0 + 2;
    1099       44321 :             x2 = x; // Qx
    1100             : 
    1101     5686177 :             FOR( i = 0; i < n; i += 4 )
    1102             :             {
    1103     5641856 :                 *x2++ = L_add( *x0++, *x1 ); /* x[i] = xt + x[i+n2];    */
    1104     5641856 :                 move16();
    1105     5641856 :                 *x2++ = *x0;
    1106     5641856 :                 move16();
    1107     5641856 :                 x0--;
    1108     5641856 :                 *x2++ = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      */
    1109     5641856 :                 move16();
    1110     5641856 :                 x1++;
    1111     5641856 :                 *x2++ = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4];     */
    1112     5641856 :                 move16();
    1113             : 
    1114     5641856 :                 x0 += 4;
    1115     5641856 :                 x1 += 3; /* x1 has already advanced */
    1116             :             }
    1117             :         }
    1118             :     }
    1119             :     ELSE
    1120             :     {
    1121             :         /*-----------------------------------------------------------------*
    1122             :          * Digit reverse counter
    1123             :          *-----------------------------------------------------------------*/
    1124             : 
    1125           0 :         j = 0;
    1126           0 :         move16();
    1127           0 :         x0 = &x[0]; // Qx
    1128           0 :         FOR( i = 0; i < ( n - 1 ); i++ )
    1129             :         {
    1130           0 :             IF( LT_16( i, j ) )
    1131             :             {
    1132           0 :                 xt = x[j]; // Qx
    1133           0 :                 move32();
    1134           0 :                 x[j] = *x0;
    1135           0 :                 move32();
    1136           0 :                 *x0 = xt;
    1137           0 :                 move32();
    1138             :             }
    1139           0 :             x0++;
    1140           0 :             k = shr( n, 1 );
    1141           0 :             WHILE( ( k <= j ) )
    1142             :             {
    1143           0 :                 j = sub( j, k );
    1144           0 :                 k = shr( k, 1 );
    1145             :             }
    1146           0 :             j = add( j, k );
    1147             :         }
    1148             : 
    1149             :         /*-----------------------------------------------------------------*
    1150             :          * Length two butterflies
    1151             :          *-----------------------------------------------------------------*/
    1152             : 
    1153           0 :         x0 = &x[0]; // Qx
    1154           0 :         x1 = &x[1]; // Qx
    1155           0 :         FOR( i = 0; i < ( n >> 1 ); i++ )
    1156             :         {
    1157           0 :             *x1 = L_sub( *x0, *x1 ); // Qx
    1158           0 :             move32();
    1159           0 :             *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); //*x0 * 2 - *x1 (Qx)
    1160           0 :             move32();
    1161             : 
    1162           0 :             x0++;
    1163           0 :             x0++;
    1164           0 :             x1++;
    1165           0 :             x1++;
    1166             :         }
    1167             : 
    1168             :         /*-----------------------------------------------------------------*
    1169             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
    1170             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
    1171             :          * and the associated pointers initialization.
    1172             :          *-----------------------------------------------------------------*/
    1173             : 
    1174             :         /* for_ (k = 2; k < 3; k++) */
    1175             :         {
    1176           0 :             x0 = x; // Qx
    1177           0 :             x1 = x0 + 2;
    1178             : 
    1179           0 :             FOR( i = 0; i < n; i += 4 )
    1180             :             {
    1181           0 :                 *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      Qx*/
    1182           0 :                 move32();
    1183           0 :                 *x0 = L_sub( L_shl( *x0, 1 ), *x1++ ); /* x[i] = xt + x[i+n2];    */ /**x0 * 2 - *x1 (Qx)*/
    1184           0 :                 move32();
    1185           0 :                 *x1 = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4];     Qx*/
    1186           0 :                 move32();
    1187             : 
    1188           0 :                 x0 += 4;
    1189           0 :                 x1 += 3; /* x1 has already advanced */
    1190             :             }
    1191             :         }
    1192             :     }
    1193             : 
    1194             :     /*-----------------------------------------------------------------*
    1195             :      * Other butterflies
    1196             :      *
    1197             :      * The implementation described in [1] has been changed by using
    1198             :      * table lookup for evaluating sine and cosine functions.  The
    1199             :      * variable ind and its increment step are needed to access table
    1200             :      * entries.  Note that this implementation assumes n4 to be so
    1201             :      * small that ind will never exceed the table.  Thus the input
    1202             :      * argument n and the constant N_MAX_FFT must be set properly.
    1203             :      *-----------------------------------------------------------------*/
    1204             : 
    1205       44321 :     n4 = 1;
    1206       44321 :     move16();
    1207       44321 :     n2 = 2;
    1208       44321 :     move16();
    1209       44321 :     n1 = 4;
    1210       44321 :     move16();
    1211             : 
    1212       44321 :     step = N_MAX_DIV4;
    1213       44321 :     move16();
    1214             : 
    1215      354080 :     FOR( k = 3; k <= m; k++ )
    1216             :     {
    1217      309759 :         step = shr( step, 1 );
    1218      309759 :         n4 = shl( n4, 1 );
    1219      309759 :         n2 = shl( n2, 1 );
    1220      309759 :         n1 = shl( n1, 1 );
    1221             : 
    1222      309759 :         x0 = x;
    1223      309759 :         x1 = x0 + n2;
    1224      309759 :         x2 = x1 + n4;
    1225             : 
    1226     5907294 :         FOR( i = 0; i < n; i += n1 )
    1227             :         {
    1228     5597535 :             *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      */
    1229     5597535 :             move32();
    1230     5597535 :             *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); /* x[i] = xt + x[i+n2];    */ /**x0 * 2 - *x1 (Qx)*/
    1231     5597535 :             move32();
    1232     5597535 :             *x2 = L_negate( *x2 ); /* x[i+n2+n4] = -x[i+n2+n4];    Qx */
    1233     5597535 :             move32();
    1234             : 
    1235     5597535 :             s = sincos_t_ext_fx;   // Q15
    1236     5597535 :             c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 Q15*/
    1237     5597535 :             xi1 = x0;
    1238     5597535 :             xi3 = xi1 + n2;
    1239     5597535 :             xi2 = xi3;
    1240     5597535 :             x0 += n1;
    1241     5597535 :             xi4 = x0;
    1242             : 
    1243    39461760 :             FOR( j = 1; j < n4; j++ )
    1244             :             {
    1245    33864225 :                 xi3++;
    1246    33864225 :                 xi1++;
    1247    33864225 :                 xi4--;
    1248    33864225 :                 xi2--;
    1249    33864225 :                 c += step;
    1250    33864225 :                 s += step; /* autoincrement by ar0 */
    1251             : 
    1252    33864225 :                 t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind);   Qx*/
    1253    33864225 :                 t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind);   Qx*/
    1254             : 
    1255    33864225 :                 *xi4 = L_sub( *xi2, t2 );
    1256    33864225 :                 move32();
    1257    33864225 :                 *xi2 = L_sub( *xi1, t1 );
    1258    33864225 :                 move32();
    1259    33864225 :                 *xi1 = L_sub( L_shl( *xi1, 1 ), *xi2 ); // Qx
    1260    33864225 :                 move32();
    1261    33864225 :                 *xi3 = L_negate( L_add( L_shl( t2, 1 ), *xi4 ) ); // Qx
    1262    33864225 :                 move32();
    1263             :             }
    1264             : 
    1265     5597535 :             x1 += n1;
    1266     5597535 :             x2 += n1;
    1267             :         }
    1268             :     }
    1269             : 
    1270       44321 :     return;
    1271             : }

Generated by: LCOV version 1.14