LCOV - code coverage report
Current view: top level - lib_com - fft_rel_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main @ 2ac0e6bcc2725ab07bd14076a8fbdebaaf98bf20 Lines: 470 519 90.6 %
Date: 2025-11-08 23:21:44 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*====================================================================================
       2             :     EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
       3             :   ====================================================================================*/
       4             : 
       5             : #include <stdint.h>
       6             : #include "options.h" /* Compilation switches                   */
       7             : #include "prot_fx.h" /* Function prototypes                    */
       8             : #include "rom_com.h" /* Static table prototypes                */
       9             : #include "stl.h"
      10             : #include "stdint.h"
      11             : #include "wmc_auto.h"
      12             : 
      13             : /*---------------------------------------------------------------------*
      14             :  * Local constants
      15             :  *---------------------------------------------------------------------*/
      16             : 
      17             : #define N_MAX_FFT   1024
      18             : #define N_MAX_DIV2  ( N_MAX_FFT >> 1 )
      19             : #define N_MAX_DIV4  ( N_MAX_DIV2 >> 1 )
      20             : #define INV_SQR2_FX 23170
      21             : #define N_MAX_SAS   256
      22             : 
      23             : /*------------------------------------------------------------------
      24             :  *
      25             :  * This is an implementation of decimation-in-time FFT algorithm for
      26             :  * real sequences.  The techniques used here can be found in several
      27             :  * books, e.g., i) Proakis and Manolakis, "Digital Signal Processing",
      28             :  * 2nd Edition, Chapter 9, and ii) W.H. Press et. al., "Numerical
      29             :  * Recipes in C", 2nd Edition, Chapter 12.
      30             :  *
      31             :  * Input -  There are two inputs to this function:
      32             :  *
      33             :  *       1) An integer pointer to the input data array
      34             :  *       2) An integer value which should be set as +1 for FFT
      35             :  *          and some other value, e.g., -1 for ifFT
      36             :  *
      37             :  * Output - There is no return value.
      38             :  *       The input data are replaced with transformed data.  if the
      39             :  *       input is a real time domain sequence, it is replaced with
      40             :  *       the complex FFT for positive frequencies.  The FFT value
      41             :  *       for DC and the foldover frequency are combined to form the
      42             :  *       first complex number in the array.  The remaining complex
      43             :  *       numbers correspond to increasing frequencies.  if the input
      44             :  *       is a complex frequency domain sequence arranged as above,
      45             :  *       it is replaced with the corresponding time domain sequence.
      46             :  *
      47             :  * Notes:
      48             :  *
      49             :  *       1) This function is designed to be a part of a noise supp-
      50             :  *          ression algorithm that requires 128-point FFT of real
      51             :  *          sequences.  This is achieved here through a 64-point
      52             :  *          complex FFT.  Consequently, the FFT size information is
      53             :  *          not transmitted explicitly.  However, some flexibility
      54             :  *          is provided in the function to change the size of the
      55             :  *          FFT by specifying the size information through "define"
      56             :  *          statements.
      57             :  *
      58             :  *       2) The values of the complex sinusoids used in the FFT
      59             :  *          algorithm are computed once (i.e., the first time the
      60             :  *          r_fft function is called) and stored in a table. To
      61             :  *          further speed up the algorithm, these values can be
      62             :  *          precomputed and stored in a ROM table in actual DSP
      63             :  *          based implementations.
      64             :  *
      65             :  *       3) In the c_fft function, the FFT values are divided by
      66             :  *          2 after each stage of computation thus dividing the
      67             :  *          final FFT values by 64.  No multiplying factor is used
      68             :  *          for the ifFT.  This is somewhat different from the usual
      69             :  *          definition of FFT where the factor 1/N, i.e., 1/64, is
      70             :  *          used for the ifFT and not the FFT.  No factor is used in
      71             :  *          the r_fft function.
      72             :  *
      73             :  *       4) Much of the code for the FFT and ifFT parts in r_fft
      74             :  *          and c_fft functions are similar and can be combined.
      75             :  *          They are, however, kept separate here to speed up the
      76             :  *          execution.
      77             :  *------------------------------------------------------------------------*/
      78             : /*------------------------------------------------------------------------*
      79             :  * c_fft_fx:
      80             :  *
      81             :  * Computes the complex part of the split-radix FFT
      82             :  *------------------------------------------------------------------------*/
      83             : 
      84       88063 : static void c_fft_fx(
      85             :     const Word16 *phs_tbl, /* i  : Table of phases            */
      86             :     Word16 SIZE,           /* i  : Size of the FFT           */
      87             :     Word16 NUM_STAGE,      /* i  : Number of stages           */
      88             :     const Word16 *in_ptr,  /* i  : coefficients in the order re[0], re[n/2], re[1], im[1], ..., re[n/2-1], im[n/2-1] Qx*/
      89             :     Word16 *out_ptr,       /* o  : coefficients in the order re[0], re[n/2], re[1], im[1], ..., re[n/2-1], im[n/2-1] Q(x - 1)*/
      90             :     /* in_ptr & out_ptr must not overlap! */
      91             :     const Word16 isign ) /* i  : 1=fft, otherwise it is ifft*/
      92             : {
      93             :     Word16 i, j, k, ii, jj, kk, ji, kj;
      94             :     Word32 L_tmp1, L_tmp2;
      95             :     Word16 tmp1, tmp2, tmp3, tmp4;
      96             :     const Word16 *table_ptr;
      97             :     const Word16 *input_ptr1, *input_ptr2, *input_ptr3, *input_ptr4;
      98       88063 :     Word16 shift = 0;
      99       88063 :     move16();
     100             :     /* Setup Reorder Variables */
     101       88063 :     table_ptr = NULL;
     102       88063 :     table_ptr = FFT_REORDER_1024;
     103       88063 :     SWITCH( SIZE )
     104             :     {
     105         105 :         case 1024:
     106         105 :             shift = 0;
     107         105 :             move16();
     108         105 :             BREAK;
     109         686 :         case 512:
     110         686 :             shift = 1;
     111         686 :             move16();
     112         686 :             BREAK;
     113        6302 :         case 256:
     114        6302 :             shift = 2;
     115        6302 :             move16();
     116        6302 :             BREAK;
     117         420 :         case 128:
     118         420 :             shift = 3;
     119         420 :             move16();
     120         420 :             BREAK;
     121       80550 :         case 64:
     122       80550 :             shift = 4;
     123       80550 :             move16();
     124       80550 :             BREAK;
     125             :     }
     126             :     /* The FFT part */
     127       88063 :     IF( isign != 0 )
     128             :     {
     129             :         /* Unrolled 1st/2nd Stage
     130             :          * 1) to take advantage of Table Values (0 & +/- 16384)
     131             :          * 2) to perform reordering of Input Values
     132             :          */
     133      597810 :         FOR( k = 0; k < SIZE; k += 8 )
     134             :         {
     135             :             /*
     136             :              * This loop use:
     137             :              *   4 Word16 (tmp1...tmp4)
     138             :              *   2 Word32 (L_tmp1 & L_tmp2)
     139             :              *   4 Pointers (table_ptr, input_ptr1, input_ptr2, input_ptr3)
     140             :              *
     141             :              * The addition of 'in_ptr' + and index value from 'reorder_ptr'
     142             :              * is counted as a move16()
     143             :              */
     144             : 
     145      550552 :             input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift ); // Qx
     146             : 
     147      550552 :             L_tmp1 = L_mult( *input_ptr1++, 16384 );
     148      550552 :             L_tmp2 = L_mult( *input_ptr1, 16384 );
     149             : 
     150      550552 :             input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     151             : 
     152      550552 :             tmp1 = msu_r_sat( L_tmp1, *input_ptr1, 16384 );
     153      550552 :             tmp3 = mac_r_sat( L_tmp1, *input_ptr1++, 16384 );
     154      550552 :             input_ptr2 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     155      550552 :             input_ptr3 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     156             : 
     157      550552 :             L_tmp1 = L_mult( *input_ptr2++, 16384 );
     158      550552 :             tmp2 = mac_r_sat( L_tmp1, *input_ptr3, 16384 );
     159      550552 :             tmp4 = msu_r_sat( L_tmp1, *input_ptr3++, 16384 );
     160      550552 :             L_tmp1 = L_mult( tmp3, 16384 );
     161      550552 :             out_ptr[k] = mac_r_sat( L_tmp1, tmp2, 16384 );
     162      550552 :             move16();
     163      550552 :             out_ptr[k + 4] = msu_r_sat( L_tmp1, tmp2, 16384 );
     164      550552 :             move16();
     165             : 
     166      550552 :             tmp2 = mac_r_sat( L_tmp2, *input_ptr1, 16384 );
     167      550552 :             tmp3 = msu_r_sat( L_tmp2, *input_ptr1, 16384 );
     168      550552 :             L_tmp2 = L_mult( *input_ptr2, 16384 );
     169             : 
     170      550552 :             L_tmp1 = L_mult( tmp1, 16384 );
     171      550552 :             tmp1 = msu_r_sat( L_tmp2, *input_ptr3, 16384 );
     172      550552 :             out_ptr[k + 2] = mac_r_sat( L_tmp1, tmp1, 16384 );
     173      550552 :             move16();
     174      550552 :             out_ptr[k + 6] = msu_r_sat( L_tmp1, tmp1, 16384 );
     175      550552 :             move16();
     176      550552 :             L_tmp1 = L_mult( tmp2, 16384 );
     177      550552 :             tmp2 = mac_r_sat( L_tmp2, *input_ptr3, 16384 );
     178      550552 :             out_ptr[k + 1] = mac_r_sat( L_tmp1, tmp2, 16384 );
     179      550552 :             move16();
     180      550552 :             out_ptr[k + 5] = msu_r_sat( L_tmp1, tmp2, 16384 );
     181      550552 :             move16();
     182      550552 :             L_tmp1 = L_mult( tmp3, 16384 );
     183      550552 :             out_ptr[k + 3] = msu_r_sat( L_tmp1, tmp4, 16384 );
     184      550552 :             move16();
     185      550552 :             out_ptr[k + 7] = mac_r_sat( L_tmp1, tmp4, 16384 );
     186      550552 :             move16();
     187             :         }
     188             : 
     189             :         /* Remaining Stages */
     190      202890 :         FOR( i = 2; i < NUM_STAGE; i++ )
     191             :         {
     192             :             /* i is stage counter      */
     193      155632 :             jj = shl( 2, i );  /* FFT size                */
     194      155632 :             kk = shl( jj, 1 ); /* 2 * FFT size            */
     195      155632 :             ii = shr( SIZE, i );
     196      155632 :             ji = 0;
     197      155632 :             move16(); /* ji is phase table index */
     198             : 
     199     2168808 :             FOR( j = 0; j < jj; j += 2 )
     200             :             {
     201             :                 /* j is sample counter     */
     202     6269448 :                 FOR( k = j; k < SIZE; k += kk )
     203             :                 {
     204             :                     /* k is butterfly top     */
     205     4256272 :                     kj = add( k, jj ); /* kj is butterfly bottom */
     206             : 
     207             :                     /* Butterfly computations */
     208     4256272 :                     L_tmp1 = L_msu( L_mult( *( out_ptr + kj ), phs_tbl[ji] ),
     209     4256272 :                                     *( out_ptr + kj + 1 ), phs_tbl[ji + 1] );
     210     4256272 :                     L_tmp2 = L_mac( L_mult( *( out_ptr + kj + 1 ), phs_tbl[ji] ),
     211     4256272 :                                     *( out_ptr + kj ), phs_tbl[ji + 1] );
     212             : 
     213     4256272 :                     out_ptr[kj] = mac_r( L_negate( L_tmp1 ), out_ptr[k], 16384 );
     214     4256272 :                     move16();
     215     4256272 :                     out_ptr[kj + 1] = mac_r( L_negate( L_tmp2 ), out_ptr[k + 1], 16384 );
     216     4256272 :                     move16();
     217     4256272 :                     out_ptr[k] = mac_r( L_tmp1, out_ptr[k], 16384 );
     218     4256272 :                     move16();
     219     4256272 :                     out_ptr[k + 1] = mac_r( L_tmp2, out_ptr[k + 1], 16384 );
     220     4256272 :                     move16();
     221             :                 }
     222     2013176 :                 ji = add( ji, ii );
     223             :             }
     224             :         }
     225             :     }
     226             :     ELSE /* The ifFT part */
     227             :     {
     228             :         /* Unrolled 1st/2nd Stage
     229             :          * 1) to take advantage of Table Values (0 & +/- 16384)
     230             :          * 2) to perform reordering of Input Values
     231             :          */
     232      400381 :         FOR( k = 0; k < SIZE; k += 8 )
     233             :         {
     234             :             /*
     235             :              * This loop use:
     236             :              *   4 Word16 (tmp1...tmp4)
     237             :              *   2 Word32 (L_tmp1 & L_tmp2)
     238             :              *   5 Pointers (reorder_ptr, input_ptr1...input_ptr4)
     239             :              *
     240             :              * The addition of 'in_ptr' + and index value from 'reorder_ptr'
     241             :              * is counted as a move16()
     242             :              */
     243      359576 :             input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     244      359576 :             input_ptr2 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     245             : 
     246      359576 :             input_ptr3 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     247      359576 :             input_ptr4 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
     248             : 
     249             : 
     250      359576 :             tmp3 = sub( *input_ptr1, *input_ptr2 );
     251      359576 :             tmp4 = add( *input_ptr1++, *input_ptr2++ );
     252             : 
     253      359576 :             tmp2 = sub( input_ptr3[0], input_ptr4[0] );
     254      359576 :             tmp1 = sub( input_ptr3[1], input_ptr4[1] );
     255             : 
     256      359576 :             out_ptr[k + 2] = sub( tmp3, tmp1 );
     257      359576 :             move16();
     258      359576 :             out_ptr[k + 6] = add( tmp3, tmp1 );
     259      359576 :             move16();
     260             : 
     261      359576 :             tmp1 = sub( *input_ptr1, *input_ptr2 );
     262      359576 :             out_ptr[k + 3] = add( tmp1, tmp2 );
     263      359576 :             move16();
     264      359576 :             out_ptr[k + 7] = sub( tmp1, tmp2 );
     265      359576 :             move16();
     266             : 
     267      359576 :             tmp1 = add( input_ptr3[0], input_ptr4[0] );
     268      359576 :             tmp3 = add( input_ptr3[1], input_ptr4[1] );
     269             : 
     270      359576 :             out_ptr[k] = add( tmp4, tmp1 );
     271      359576 :             move16();
     272      359576 :             out_ptr[k + 4] = sub( tmp4, tmp1 );
     273      359576 :             move16();
     274             : 
     275      359576 :             tmp4 = add( *input_ptr1, *input_ptr2 );
     276      359576 :             out_ptr[k + 1] = add( tmp4, tmp3 );
     277      359576 :             move16();
     278      359576 :             out_ptr[k + 5] = sub( tmp4, tmp3 );
     279      359576 :             move16();
     280             :         }
     281             : 
     282       40805 :         table_ptr = phs_tbl + SIZE; /* access part of table that is scaled by 2 */
     283             : 
     284             :         /* Remaining Stages */
     285      164864 :         FOR( i = 2; i < NUM_STAGE; i++ )
     286             :         {
     287             :             /* i is stage counter      */
     288      124059 :             jj = shl( 2, i );  /* FFT size                */
     289      124059 :             kk = shl( jj, 1 ); /* 2 * FFT size            */
     290      124059 :             ii = shr( SIZE, i );
     291      124059 :             ji = 0;
     292      124059 :             move16(); /* ji is phase table index */
     293             : 
     294     1399143 :             FOR( j = 0; j < jj; j += 2 )
     295             :             {
     296             :                 /* j is sample counter     */
     297             :                 /* This can be computed by successive add_fxitions of ii to ji, starting from 0
     298             :                    hence line-count it as a one-line add (still need to increment op count!!) */
     299             : 
     300     3670620 :                 FOR( k = j; k < SIZE; k += kk )
     301             :                 {
     302             :                     /* k is butterfly top     */
     303     2395536 :                     kj = add( k, jj ); /* kj is butterfly bottom */
     304             : 
     305             :                     /* Butterfly computations */
     306     2395536 :                     tmp1 = mac_r_sat( L_mult_sat( out_ptr[kj], table_ptr[ji] ), out_ptr[kj + 1], table_ptr[ji + 1] );
     307             : 
     308     2395536 :                     tmp2 = msu_r_sat( L_mult_sat( out_ptr[kj + 1], table_ptr[ji] ), out_ptr[kj], table_ptr[ji + 1] );
     309             : 
     310     2395536 :                     out_ptr[kj] = sub_sat( out_ptr[k], tmp1 );
     311     2395536 :                     move16();
     312     2395536 :                     out_ptr[kj + 1] = sub_sat( out_ptr[k + 1], tmp2 );
     313     2395536 :                     move16();
     314     2395536 :                     out_ptr[k] = add_sat( out_ptr[k], tmp1 );
     315     2395536 :                     move16();
     316     2395536 :                     out_ptr[k + 1] = add_sat( out_ptr[k + 1], tmp2 );
     317     2395536 :                     move16();
     318             :                 }
     319     1275084 :                 ji = add( ji, ii );
     320             :             }
     321             :         }
     322             :     }
     323       88063 : }
     324             : 
     325             : /*--------------------------------------------------------------------------------*
     326             :  * r_fft_fx:
     327             :  *
     328             :  * Perform FFT fixed-point for real-valued sequences of length 32, 64 or 128
     329             :  *--------------------------------------------------------------------------------*/
     330       88063 : void r_fft_fx_lc(
     331             :     const Word16 *phs_tbl,  /* i  : Table of phase            */
     332             :     const Word16 SIZE,      /* i  : Size of the FFT           */
     333             :     const Word16 SIZE2,     /* i  : Size / 2                  */
     334             :     const Word16 NUM_STAGE, /* i  : Number of stage           */
     335             :     const Word16 *in_ptr,   /* i  : coefficients in the order re[0], re[1], ... re[n/2], im[n/2-1], im[n/2-2], ..., im[1] Qx*/
     336             :     Word16 *out_ptr,        /* o  : coefficients in the order re[0], re[1], ... re[n/2], im[n/2-1], im[n/2-2], ..., im[1] Q(x - 1)*/
     337             :     const Word16 isign      /* i  : 1=fft, otherwize it's ifft                                                      */
     338             : )
     339             : {
     340             :     Word16 tmp2_real, tmp2_imag;
     341             :     Word32 Ltmp1_real, Ltmp1_imag;
     342             :     Word16 i;
     343             :     Word32 Ltmp1;
     344             :     const Word16 *phstbl_ptrDn;
     345             :     Word16 *ptrDn;
     346             :     Word16 temp[1024]; /* Accommodates real input FFT size up to 1024. */
     347             : 
     348             :     /* Setup Pointers */
     349       88063 :     phstbl_ptrDn = &phs_tbl[SIZE - 1];
     350             : 
     351             :     /* The FFT part */
     352       88063 :     IF( isign != 0 )
     353             :     {
     354             :         Word16 *ptRealUp, *ptRealDn, *ptImaUp, *ptImaDn;
     355             : 
     356             :         /* Perform the complex FFT */
     357       47258 :         c_fft_fx( phs_tbl, SIZE, NUM_STAGE, in_ptr, temp, isign );
     358             : 
     359             :         /* First, handle the DC and foldover frequencies */
     360       47258 :         out_ptr[SIZE2] = sub( temp[0], temp[1] );
     361       47258 :         move16();
     362       47258 :         out_ptr[0] = sub( add( temp[0], temp[1] ), shr( NUM_STAGE, 1 ) );
     363       47258 :         move16(); /* DC have a small offset */
     364             : 
     365       47258 :         ptrDn = &temp[SIZE - 1];
     366             : 
     367       47258 :         ptImaDn = &out_ptr[SIZE - 1];
     368       47258 :         ptRealUp = &out_ptr[1];
     369       47258 :         ptImaUp = &out_ptr[SIZE2 + 1];
     370       47258 :         ptRealDn = &out_ptr[SIZE2 - 1];
     371             : 
     372             :         /* Now, handle the remaining positive frequencies */
     373     1148362 :         FOR( i = 2; i <= SIZE2; i += 2 )
     374             :         {
     375     1101104 :             Ltmp1_imag = L_mult( temp[i + 1], 16384 );
     376     1101104 :             Ltmp1_imag = L_msu_sat( Ltmp1_imag, *ptrDn, 16384 );
     377     1101104 :             tmp2_real = add_sat( temp[i + 1], *ptrDn-- );
     378             : 
     379     1101104 :             Ltmp1_real = L_mult( temp[i], 16384 );
     380     1101104 :             Ltmp1_real = L_mac_sat( Ltmp1_real, *ptrDn, 16384 );
     381     1101104 :             tmp2_imag = sub( *ptrDn--, temp[i] );
     382             : 
     383             : 
     384     1101104 :             *ptRealUp++ = msu_r_sat( L_mac_sat( Ltmp1_real, tmp2_real, phs_tbl[i] ), tmp2_imag, phs_tbl[i + 1] );
     385     1101104 :             move16();
     386     1101104 :             *ptImaDn-- = mac_r_sat( L_mac_sat( Ltmp1_imag, tmp2_imag, phs_tbl[i] ), tmp2_real, phs_tbl[i + 1] );
     387     1101104 :             move16();
     388     1101104 :             Ltmp1 = L_mac_sat( L_negate( Ltmp1_imag ), tmp2_real, *phstbl_ptrDn );
     389     1101104 :             Ltmp1_real = L_mac_sat( Ltmp1_real, tmp2_imag, *phstbl_ptrDn-- );
     390     1101104 :             *ptImaUp++ = msu_r_sat( Ltmp1, tmp2_imag, *phstbl_ptrDn );
     391     1101104 :             move16();
     392     1101104 :             *ptRealDn-- = mac_r_sat( Ltmp1_real, tmp2_real, *phstbl_ptrDn-- );
     393     1101104 :             move16();
     394             :         }
     395             :     }
     396             :     ELSE /* The ifFT part */
     397             :     {
     398             :         const Word16 *ptRealUp, *ptRealDn, *ptImaUp, *ptImaDn;
     399             : 
     400             :         /* First, handle the DC and foldover frequencies */
     401       40805 :         Ltmp1 = L_mult( in_ptr[0], 16384 );
     402       40805 :         temp[0] = mac_r( Ltmp1, in_ptr[SIZE2], 16384 );
     403       40805 :         move16();
     404       40805 :         temp[1] = msu_r( Ltmp1, in_ptr[SIZE2], 16384 );
     405       40805 :         move16();
     406             : 
     407       40805 :         ptrDn = &temp[SIZE - 1];
     408             : 
     409             :         /* Here we cast to Word16 * from a const Word16 *. */
     410             :         /* This is ok because we use these pointers for    */
     411             :         /* reading only. This is just to avoid declaring a */
     412             :         /* bunch of 4 other pointer with const Word16 *.   */
     413       40805 :         ptImaDn = &in_ptr[SIZE - 1];   // Qx
     414       40805 :         ptRealUp = &in_ptr[1];         // Qx
     415       40805 :         ptImaUp = &in_ptr[SIZE2 + 1];  // Qx
     416       40805 :         ptRealDn = &in_ptr[SIZE2 - 1]; // Qx
     417             : 
     418             :         /* Now, handle the remaining positive frequencies */
     419      759957 :         FOR( i = 2; i <= SIZE2; i += 2 )
     420             :         {
     421      719152 :             Ltmp1_imag = L_mult( *ptImaDn, 16384 );
     422      719152 :             Ltmp1_imag = L_msu_sat( Ltmp1_imag, *ptImaUp, 16384 );
     423      719152 :             tmp2_real = add_sat( *ptImaDn--, *ptImaUp++ );
     424      719152 :             Ltmp1_real = L_mult( *ptRealUp, 16384 );
     425      719152 :             Ltmp1_real = L_mac_sat( Ltmp1_real, *ptRealDn, 16384 );
     426      719152 :             tmp2_imag = sub_sat( *ptRealUp++, *ptRealDn-- );
     427      719152 :             temp[i] = mac_r_sat( L_msu_sat( Ltmp1_real, tmp2_real, phs_tbl[i] ), tmp2_imag, phs_tbl[i + 1] );
     428      719152 :             move16();
     429      719152 :             temp[i + 1] = mac_r_sat( L_mac_sat( Ltmp1_imag, tmp2_imag, phs_tbl[i] ), tmp2_real, phs_tbl[i + 1] );
     430      719152 :             move16();
     431      719152 :             Ltmp1 = L_mac_sat( L_negate( Ltmp1_imag ), tmp2_real, *phstbl_ptrDn );
     432      719152 :             Ltmp1_real = L_msu_sat( Ltmp1_real, tmp2_imag, *phstbl_ptrDn-- );
     433      719152 :             *ptrDn-- = msu_r_sat( Ltmp1, tmp2_imag, *phstbl_ptrDn );
     434      719152 :             move16();
     435      719152 :             *ptrDn-- = msu_r_sat( Ltmp1_real, tmp2_real, *phstbl_ptrDn-- );
     436      719152 :             move16();
     437             :         }
     438             : 
     439             :         /* Perform the complex ifFT */
     440       40805 :         c_fft_fx( phs_tbl, SIZE, NUM_STAGE, temp, out_ptr, isign );
     441             :     }
     442       88063 : }
     443             : 
     444             : 
     445             : /*---------------------------------------------------------------------*
     446             :  *  fft_rel()
     447             :  *
     448             :  *  Computes the split-radix FFT in place for the real-valued
     449             :  *  signal x of length n.  The algorithm has been ported from
     450             :  *  the Fortran code of [1].
     451             :  *
     452             :  *  The function  needs sine and cosine tables t_sin and t_cos,
     453             :  *  and the constant N_MAX_FFT.  The table  entries  are defined as
     454             :  *  sin(2*pi*i) and cos(2*pi*i) for i = 0, 1, ..., N_MAX_FFT-1. The
     455             :  *  implementation  assumes  that any entry  will not be needed
     456             :  *  outside the tables. Therefore, N_MAX_FFT and n must be properly
     457             :  *  set.  The function has been tested  with the values n = 16,
     458             :  *  32, 64, 128, 256, and N_MAX_FFT = 1280.
     459             :  *
     460             :  *  References
     461             :  *  [1] H.V. Sorensen,  D.L. Jones, M.T. Heideman, C.S. Burrus,
     462             :  *      "Real-valued fast  Fourier transform  algorithm,"  IEEE
     463             :  *      Trans. on Signal Processing,  Vol.35, No.6, pp 849-863,
     464             :  *      1987.
     465             :  *
     466             :  *  OUTPUT
     467             :  *      x[0:n-1]  Transform coeffients in the order re[0], re[1],
     468             :  *                ..., re[n/2], im[n/2-1], ..., im[1].
     469             :  *---------------------------------------------------------------------*/
     470             : 
     471     2393694 : void fft_rel_16_32fx(
     472             :     Word16 x[],  /* i/o: input/output vector Qx   */
     473             :     Word16 *q_x, /* extra scaling added on speech buffer*/
     474             :     Word16 i_subfr,
     475             :     const Word16 n, /* i  : vector length          */
     476             :     const Word16 m  /* i  : log2 of vector length  */
     477             : )
     478             : {
     479             :     Word16 i, j, k, n1, n2, n4;
     480             :     Word16 step;
     481             :     Word32 xt, t1, t2;
     482             :     Word32 *x0, *x1, *x2;
     483             :     const Word16 *s, *c;
     484             :     Word32 *xi2, *xi3, *xi4, *xi1;
     485             : 
     486             :     Word32 fft_bff32[L_FFT];
     487     2393694 :     Copy_Scale_sig_16_32_no_sat( x, fft_bff32, L_FFT, 0 ); // copying x to fft_bff32 without scaling
     488             : 
     489             :     /*-----------------------------------------------------------------*
     490             :      * Digit reverse counter
     491             :      *-----------------------------------------------------------------*/
     492             : 
     493     2393694 :     j = 0;
     494     2393694 :     move16();
     495     2393694 :     x0 = &fft_bff32[0]; // Qx
     496   612785664 :     FOR( i = 0; i < n - 1; i++ )
     497             :     {
     498   610391970 :         IF( LT_16( i, j ) )
     499             :         {
     500   287243280 :             xt = fft_bff32[j]; // Qx
     501   287243280 :             move32();
     502   287243280 :             fft_bff32[j] = *x0; // Qx
     503   287243280 :             move32();
     504   287243280 :             *x0 = xt; // Qx
     505   287243280 :             move32();
     506             :         }
     507   610391970 :         x0++;
     508   610391970 :         k = shr( n, 1 );
     509  1201634388 :         WHILE( ( k <= j ) )
     510             :         {
     511   591242418 :             j = sub( j, k );
     512   591242418 :             k = shr( k, 1 );
     513             :         }
     514   610391970 :         j = add( j, k );
     515             :     }
     516             : 
     517             :     /*-----------------------------------------------------------------*
     518             :      * Length two butterflies
     519             :      *-----------------------------------------------------------------*/
     520             : 
     521     2393694 :     x0 = &fft_bff32[0];
     522     2393694 :     x1 = &fft_bff32[1];
     523   308786526 :     FOR( i = 0; i < ( n >> 1 ); i++ )
     524             :     {
     525   306392832 :         xt = *x0;
     526   306392832 :         move32();
     527   306392832 :         *x0 = L_add( xt, *x1 );
     528   306392832 :         move32();
     529   306392832 :         *x1 = L_sub( xt, *x1 );
     530   306392832 :         move32();
     531   306392832 :         x0++;
     532   306392832 :         x0++;
     533   306392832 :         x1++;
     534   306392832 :         x1++;
     535             :     }
     536             : 
     537             :     /*-----------------------------------------------------------------*
     538             :      * Other butterflies
     539             :      *
     540             :      * The implementation described in [1] has been changed by using
     541             :      * table lookup for evaluating sine and cosine functions.  The
     542             :      * variable ind and its increment step are needed to access table
     543             :      * entries.  Note that this implementation assumes n4 to be so
     544             :      * small that ind will never exceed the table.  Thus the input
     545             :      * argument n and the constant N_MAX_SAS must be set properly.
     546             :      *-----------------------------------------------------------------*/
     547             : 
     548     2393694 :     n2 = 1;
     549     2393694 :     move16();
     550             :     /* step = N_MAX_SAS/4; */
     551    19149552 :     FOR( k = 2; k <= m; k++ )
     552             :     {
     553    16755858 :         n4 = n2;
     554    16755858 :         move16();
     555    16755858 :         n2 = shl( n4, 1 );
     556    16755858 :         n1 = shl( n2, 1 );
     557             : 
     558    16755858 :         step = idiv1616( N_MAX_SAS, n1 );
     559             : 
     560    16755858 :         x0 = fft_bff32;
     561    16755858 :         x1 = fft_bff32 + n2;
     562    16755858 :         x2 = fft_bff32 + add( n2, n4 );
     563   320754996 :         FOR( i = 0; i < n; i += n1 )
     564             :         {
     565   303999138 :             xt = *x0;
     566   303999138 :             move32(); /* xt = x[i];   */
     567   303999138 :             *x0 = L_add( xt, *x1 );
     568   303999138 :             move32(); /* x[i] = xt + x[i+n2];    */
     569   303999138 :             *x1 = L_sub( xt, *x1 );
     570   303999138 :             move32(); /* x[i+n2] = xt - x[i+n2];      */
     571   303999138 :             *x2 = L_negate( *x2 );
     572   303999138 :             move32(); /* x[i+n2+n4] = -x[i+n2+n4];     */
     573             : 
     574             : 
     575   303999138 :             s = sincos_t_fx + step; // Q15
     576   303999138 :             c = s + 64;             // Q15
     577   303999138 :             xi1 = fft_bff32 + add( i, 1 );
     578   303999138 :             xi3 = xi1 + n2;
     579   303999138 :             xi2 = xi3 - 2;
     580   303999138 :             xi4 = xi1 + sub( n1, 2 );
     581             : 
     582  1072374912 :             FOR( j = 1; j < n4; j++ )
     583             :             {
     584   768375774 :                 t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx  */
     585   768375774 :                 t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx    */
     586   768375774 :                 *xi4 = L_sub( *xi2, t2 );
     587   768375774 :                 move32();
     588   768375774 :                 *xi3 = L_negate( L_add( *xi2, t2 ) );
     589   768375774 :                 move32();
     590   768375774 :                 *xi2 = L_sub( *xi1, t1 );
     591   768375774 :                 move32();
     592   768375774 :                 *xi1 = L_add( *xi1, t1 );
     593   768375774 :                 move32();
     594             : 
     595   768375774 :                 xi4--;
     596   768375774 :                 xi2--;
     597   768375774 :                 xi3++;
     598   768375774 :                 xi1++;
     599   768375774 :                 c += step;
     600   768375774 :                 s += step; /* autoincrement by ar0 */
     601             :             }
     602             : 
     603   303999138 :             x0 += n1;
     604   303999138 :             x1 += n1;
     605   303999138 :             x2 += n1;
     606             :         }
     607             :         /* step = shr(step, 1); */
     608             :     }
     609     2393694 :     Word16 norm = L_norm_arr( fft_bff32, L_FFT );
     610     2393694 :     IF( i_subfr == 0 )
     611             :     {
     612     1196847 :         Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
     613     1196847 :         *q_x = sub( norm, 16 );
     614     1196847 :         move16();
     615             :     }
     616             :     ELSE
     617             :     {
     618     1196847 :         IF( LT_16( sub( norm, 16 ), *q_x ) )
     619             :         {
     620      240144 :             scale_sig( x - L_FFT, L_FFT, sub( sub( norm, 16 ), *q_x ) );
     621      240144 :             Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
     622      240144 :             *q_x = sub( norm, 16 );
     623      240144 :             move16();
     624             :         }
     625             :         ELSE
     626             :         {
     627      956703 :             Copy_Scale_sig32_16( fft_bff32, x, L_FFT, add( 16, *q_x ) );
     628             :         }
     629             :     }
     630             : 
     631     2393694 :     return;
     632             : }
     633             : 
     634      268148 : void fft_rel_fx(
     635             :     Word16 x[],     /* i/o: input/output vector Qx   */
     636             :     const Word16 n, /* i  : vector length          */
     637             :     const Word16 m  /* i  : log2 of vector length  */
     638             : )
     639             : {
     640             :     Word16 i, j, k, n1, n2, n4;
     641             :     Word16 step;
     642             :     Word16 xt, t1, t2;
     643             :     Word16 *x0, *x1, *x2;
     644             :     const Word16 *s, *c;
     645             :     Word16 *xi2, *xi3, *xi4, *xi1;
     646             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     647      268148 :     Flag Overflow = 0;
     648      268148 :     move32();
     649             : #endif
     650             : 
     651             : 
     652             :     /*-----------------------------------------------------------------*
     653             :      * Digit reverse counter
     654             :      *-----------------------------------------------------------------*/
     655             : 
     656      268148 :     j = 0;
     657      268148 :     move16();
     658      268148 :     x0 = &x[0]; // Qx
     659      268148 :     move16();
     660    60958376 :     FOR( i = 0; i < n - 1; i++ )
     661             :     {
     662    60690228 :         IF( LT_16( i, j ) )
     663             :         {
     664    28547546 :             xt = x[j]; // Qx
     665    28547546 :             move16();
     666    28547546 :             x[j] = *x0; // Qx
     667    28547546 :             move16();
     668    28547546 :             *x0 = xt; // Qx
     669    28547546 :             move16();
     670             :         }
     671    60690228 :         x0++;
     672    60690228 :         k = shr( n, 1 );
     673   119418308 :         WHILE( ( k <= j ) )
     674             :         {
     675    58728080 :             j = sub( j, k );
     676    58728080 :             k = shr( k, 1 );
     677             :         }
     678    60690228 :         j = add( j, k );
     679             :     }
     680             : 
     681             :     /*-----------------------------------------------------------------*
     682             :      * Length two butterflies
     683             :      *-----------------------------------------------------------------*/
     684             : 
     685      268148 :     x0 = &x[0];
     686      268148 :     move16();
     687      268148 :     x1 = &x[1];
     688      268148 :     move16();
     689    30747336 :     FOR( i = 0; i < ( n >> 1 ); i++ )
     690             :     {
     691    30479188 :         xt = *x0;
     692    30479188 :         move16();
     693    30479188 :         *x0 = add_o( xt, *x1, &Overflow );
     694    30479188 :         move16();
     695    30479188 :         *x1 = sub_o( xt, *x1, &Overflow );
     696    30479188 :         move16();
     697    30479188 :         x0++;
     698    30479188 :         x0++;
     699    30479188 :         x1++;
     700    30479188 :         x1++;
     701             :     }
     702             : 
     703             :     /*-----------------------------------------------------------------*
     704             :      * Other butterflies
     705             :      *
     706             :      * The implementation described in [1] has been changed by using
     707             :      * table lookup for evaluating sine and cosine functions.  The
     708             :      * variable ind and its increment step are needed to access table
     709             :      * entries.  Note that this implementation assumes n4 to be so
     710             :      * small that ind will never exceed the table.  Thus the input
     711             :      * argument n and the constant N_MAX_SAS must be set properly.
     712             :      *-----------------------------------------------------------------*/
     713             : 
     714      268148 :     n2 = 1;
     715      268148 :     move16();
     716             :     /* step = N_MAX_SAS/4; */
     717     1962148 :     FOR( k = 2; k <= m; k++ )
     718             :     {
     719     1694000 :         n4 = n2;
     720     1694000 :         move16();
     721     1694000 :         n2 = shl( n4, 1 );
     722     1694000 :         n1 = shl( n2, 1 );
     723             : 
     724     1694000 :         step = idiv1616( N_MAX_SAS, n1 );
     725             : 
     726     1694000 :         x0 = x;
     727     1694000 :         x1 = x + n2;
     728     1694000 :         x2 = x + add( n2, n4 );
     729    31905040 :         FOR( i = 0; i < n; i += n1 )
     730             :         {
     731    30211040 :             xt = *x0;
     732    30211040 :             move16(); /* xt = x[i];   */
     733    30211040 :             *x0 = add_o( xt, *x1, &Overflow );
     734    30211040 :             move16(); /* x[i] = xt + x[i+n2];    */
     735    30211040 :             *x1 = sub_o( xt, *x1, &Overflow );
     736    30211040 :             move16(); /* x[i+n2] = xt - x[i+n2];      */
     737    30211040 :             *x2 = negate( *x2 );
     738    30211040 :             move16(); /* x[i+n2+n4] = -x[i+n2+n4];     */
     739             : 
     740             : 
     741    30211040 :             s = sincos_t_fx + step; // Q15
     742    30211040 :             c = s + 64;             // Q15
     743    30211040 :             xi1 = x + add( i, 1 );
     744    30211040 :             xi3 = xi1 + n2;
     745    30211040 :             xi2 = xi3 - 2;
     746    30211040 :             xi4 = xi1 + sub( n1, 2 );
     747             : 
     748   106494122 :             FOR( j = 1; j < n4; j++ )
     749             :             {
     750    76283082 :                 t1 = add_o( mult_r( *xi3, *c ), mult_r( *xi4, *s ), &Overflow ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx  */
     751    76283082 :                 t2 = sub_o( mult_r( *xi3, *s ), mult_r( *xi4, *c ), &Overflow ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx    */
     752    76283082 :                 *xi4 = sub_o( *xi2, t2, &Overflow );
     753    76283082 :                 move16();
     754    76283082 :                 *xi3 = negate( add_o( *xi2, t2, &Overflow ) );
     755    76283082 :                 move16();
     756    76283082 :                 *xi2 = sub_o( *xi1, t1, &Overflow );
     757    76283082 :                 move16();
     758    76283082 :                 *xi1 = add_o( *xi1, t1, &Overflow );
     759    76283082 :                 move16();
     760             : 
     761    76283082 :                 xi4--;
     762    76283082 :                 xi2--;
     763    76283082 :                 xi3++;
     764    76283082 :                 xi1++;
     765    76283082 :                 c += step;
     766    76283082 :                 s += step; /* autoincrement by ar0 */
     767             :             }
     768             : 
     769    30211040 :             x0 += n1;
     770    30211040 :             x1 += n1;
     771    30211040 :             x2 += n1;
     772             :         }
     773             :         /* step = shr(step, 1); */
     774             :     }
     775             : 
     776      268148 :     return;
     777             : }
     778             : 
     779       60557 : void fft_rel_fx32(
     780             :     Word32 x[],     /* i/o: input/output vector Qx   */
     781             :     const Word16 n, /* i  : vector length          */
     782             :     const Word16 m  /* i  : log2 of vector length  */
     783             : )
     784             : {
     785             :     Word16 i, j, k, n1, n2, n4;
     786             :     Word16 step;
     787             :     Word32 xt, t1, t2;
     788             :     Word32 *x0, *x1, *x2;
     789             :     Word32 *xi2, *xi3, *xi4, *xi1;
     790             :     const Word16 *s, *c;
     791             :     const Word16 *idx;
     792             : 
     793             :     /* !!!! NMAX = 256 is hardcoded here  (similar optimizations should be done for NMAX > 256) !!! */
     794             : 
     795             :     Word32 *x2even, *x2odd;
     796             :     Word32 temp[512];
     797             : 
     798       60557 :     test();
     799       60557 :     test();
     800       60557 :     IF( EQ_16( n, 128 ) || EQ_16( n, 256 ) || EQ_16( n, 512 ) )
     801             :     {
     802       60557 :         idx = fft256_read_indexes;
     803             : 
     804             :         /* Combined Digit reverse counter & Length two butterflies */
     805       60557 :         IF( EQ_16( n, 128 ) )
     806             :         {
     807           0 :             x2 = temp;
     808           0 :             FOR( i = 0; i < 64; i++ )
     809             :             {
     810           0 :                 j = *idx++;
     811           0 :                 move16();
     812           0 :                 k = *idx++;
     813           0 :                 move16();
     814             : 
     815           0 :                 *x2++ = L_add( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
     816           0 :                 move16();
     817           0 :                 *x2++ = L_sub( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
     818           0 :                 move16();
     819             :             }
     820             :         }
     821       60557 :         ELSE IF( EQ_16( n, 256 ) )
     822             :         {
     823         488 :             x2 = temp;
     824       62952 :             FOR( i = 0; i < 128; i++ )
     825             :             {
     826       62464 :                 j = *idx++;
     827       62464 :                 move16();
     828       62464 :                 k = *idx++;
     829       62464 :                 move16();
     830             : 
     831       62464 :                 *x2++ = L_add( x[j], x[k] ); // Qx
     832       62464 :                 move16();
     833       62464 :                 *x2++ = L_sub( x[j], x[k] ); // Qx
     834       62464 :                 move16();
     835             :             }
     836             :         }
     837       60069 :         ELSE IF( EQ_16( n, 512 ) )
     838             :         {
     839       60069 :             x2even = temp;
     840       60069 :             x2odd = temp + 256;
     841             : 
     842     7748901 :             FOR( i = 0; i < 128; i++ )
     843             :             {
     844     7688832 :                 j = shl( *idx, 1 );
     845     7688832 :                 idx++;
     846     7688832 :                 k = shl( *idx, 1 );
     847     7688832 :                 idx++;
     848             : 
     849     7688832 :                 *x2even++ = L_add( x[j], x[k] ); // Qx
     850     7688832 :                 move16();
     851     7688832 :                 *x2even++ = L_sub( x[j], x[k] ); // Qx
     852     7688832 :                 move16();
     853     7688832 :                 j = add( j, 1 );
     854     7688832 :                 k = add( k, 1 );
     855     7688832 :                 *x2odd++ = L_add( x[j], x[k] ); // Qx
     856     7688832 :                 move16();
     857     7688832 :                 *x2odd++ = L_sub( x[j], x[k] ); // Qx
     858     7688832 :                 move16();
     859             :             }
     860             :         }
     861             : 
     862             :         /*-----------------------------------------------------------------*
     863             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
     864             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
     865             :          * and the associated pointers initialization.
     866             :          * Also, it allows to Put the Data from 'temp' back into 'x' due
     867             :          * to the previous Combined Digit Reverse and Length two butterflies
     868             :          *-----------------------------------------------------------------*/
     869             : 
     870             :         /*for_ (k = 2; k < 3; k++)*/
     871             :         {
     872       60557 :             x0 = temp;
     873       60557 :             x1 = x0 + 2;
     874       60557 :             x2 = x; // Qx
     875             : 
     876     7780621 :             FOR( i = 0; i < n; i += 4 )
     877             :             {
     878     7720064 :                 *x2++ = L_add( *x0++, *x1 ); /* x[i] = xt + x[i+n2];    */
     879     7720064 :                 move16();
     880     7720064 :                 *x2++ = *x0;
     881     7720064 :                 move16();
     882     7720064 :                 x0--;
     883     7720064 :                 *x2++ = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      */
     884     7720064 :                 move16();
     885     7720064 :                 x1++;
     886     7720064 :                 *x2++ = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4];     */
     887     7720064 :                 move16();
     888             : 
     889     7720064 :                 x0 += 4;
     890     7720064 :                 x1 += 3; /* x1 has already advanced */
     891             :             }
     892             :         }
     893             :     }
     894             :     ELSE
     895             :     {
     896             :         /*-----------------------------------------------------------------*
     897             :          * Digit reverse counter
     898             :          *-----------------------------------------------------------------*/
     899             : 
     900           0 :         j = 0;
     901           0 :         move16();
     902           0 :         x0 = &x[0]; // Qx
     903           0 :         FOR( i = 0; i < ( n - 1 ); i++ )
     904             :         {
     905           0 :             IF( LT_16( i, j ) )
     906             :             {
     907           0 :                 xt = x[j]; // Qx
     908           0 :                 move32();
     909           0 :                 x[j] = *x0;
     910           0 :                 move32();
     911           0 :                 *x0 = xt;
     912           0 :                 move32();
     913             :             }
     914           0 :             x0++;
     915           0 :             k = shr( n, 1 );
     916           0 :             WHILE( ( k <= j ) )
     917             :             {
     918           0 :                 j = sub( j, k );
     919           0 :                 k = shr( k, 1 );
     920             :             }
     921           0 :             j = add( j, k );
     922             :         }
     923             : 
     924             :         /*-----------------------------------------------------------------*
     925             :          * Length two butterflies
     926             :          *-----------------------------------------------------------------*/
     927             : 
     928           0 :         x0 = &x[0]; // Qx
     929           0 :         x1 = &x[1]; // Qx
     930           0 :         FOR( i = 0; i < ( n >> 1 ); i++ )
     931             :         {
     932           0 :             *x1 = L_sub( *x0, *x1 ); // Qx
     933           0 :             move32();
     934           0 :             *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); //*x0 * 2 - *x1 (Qx)
     935           0 :             move32();
     936             : 
     937           0 :             x0++;
     938           0 :             x0++;
     939           0 :             x1++;
     940           0 :             x1++;
     941             :         }
     942             : 
     943             :         /*-----------------------------------------------------------------*
     944             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
     945             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
     946             :          * and the associated pointers initialization.
     947             :          *-----------------------------------------------------------------*/
     948             : 
     949             :         /* for_ (k = 2; k < 3; k++) */
     950             :         {
     951           0 :             x0 = x; // Qx
     952           0 :             x1 = x0 + 2;
     953             : 
     954           0 :             FOR( i = 0; i < n; i += 4 )
     955             :             {
     956           0 :                 *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      Qx*/
     957           0 :                 move32();
     958           0 :                 *x0 = L_sub( L_shl( *x0, 1 ), *x1++ ); /* x[i] = xt + x[i+n2];    */ /**x0 * 2 - *x1 (Qx)*/
     959           0 :                 move32();
     960           0 :                 *x1 = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4];     Qx*/
     961           0 :                 move32();
     962             : 
     963           0 :                 x0 += 4;
     964           0 :                 x1 += 3; /* x1 has already advanced */
     965             :             }
     966             :         }
     967             :     }
     968             : 
     969             :     /*-----------------------------------------------------------------*
     970             :      * Other butterflies
     971             :      *
     972             :      * The implementation described in [1] has been changed by using
     973             :      * table lookup for evaluating sine and cosine functions.  The
     974             :      * variable ind and its increment step are needed to access table
     975             :      * entries.  Note that this implementation assumes n4 to be so
     976             :      * small that ind will never exceed the table.  Thus the input
     977             :      * argument n and the constant N_MAX_FFT must be set properly.
     978             :      *-----------------------------------------------------------------*/
     979             : 
     980       60557 :     n4 = 1;
     981       60557 :     move16();
     982       60557 :     n2 = 2;
     983       60557 :     move16();
     984       60557 :     n1 = 4;
     985       60557 :     move16();
     986             : 
     987       60557 :     step = N_MAX_DIV4;
     988       60557 :     move16();
     989             : 
     990      483968 :     FOR( k = 3; k <= m; k++ )
     991             :     {
     992      423411 :         step = shr( step, 1 );
     993      423411 :         n4 = shl( n4, 1 );
     994      423411 :         n2 = shl( n2, 1 );
     995      423411 :         n1 = shl( n1, 1 );
     996             : 
     997      423411 :         x0 = x;
     998      423411 :         x1 = x0 + n2;
     999      423411 :         x2 = x1 + n4;
    1000             : 
    1001     8082918 :         FOR( i = 0; i < n; i += n1 )
    1002             :         {
    1003     7659507 :             *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      */
    1004     7659507 :             move32();
    1005     7659507 :             *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); /* x[i] = xt + x[i+n2];    */ /**x0 * 2 - *x1 (Qx)*/
    1006     7659507 :             move32();
    1007     7659507 :             *x2 = L_negate( *x2 ); /* x[i+n2+n4] = -x[i+n2+n4];    Qx */
    1008     7659507 :             move32();
    1009             : 
    1010     7659507 :             s = sincos_t_ext_fx;   // Q15
    1011     7659507 :             c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 Q15*/
    1012     7659507 :             xi1 = x0;
    1013     7659507 :             xi3 = xi1 + n2;
    1014     7659507 :             xi2 = xi3;
    1015     7659507 :             x0 += n1;
    1016     7659507 :             xi4 = x0;
    1017             : 
    1018    54009216 :             FOR( j = 1; j < n4; j++ )
    1019             :             {
    1020    46349709 :                 xi3++;
    1021    46349709 :                 xi1++;
    1022    46349709 :                 xi4--;
    1023    46349709 :                 xi2--;
    1024    46349709 :                 c += step;
    1025    46349709 :                 s += step; /* autoincrement by ar0 */
    1026             : 
    1027    46349709 :                 t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind);   Qx*/
    1028    46349709 :                 t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind);   Qx*/
    1029             : 
    1030    46349709 :                 *xi4 = L_sub( *xi2, t2 );
    1031    46349709 :                 move32();
    1032    46349709 :                 *xi2 = L_sub( *xi1, t1 );
    1033    46349709 :                 move32();
    1034    46349709 :                 *xi1 = L_sub( L_shl( *xi1, 1 ), *xi2 ); // Qx
    1035    46349709 :                 move32();
    1036    46349709 :                 *xi3 = L_negate( L_add( L_shl( t2, 1 ), *xi4 ) ); // Qx
    1037    46349709 :                 move32();
    1038             :             }
    1039             : 
    1040     7659507 :             x1 += n1;
    1041     7659507 :             x2 += n1;
    1042             :         }
    1043             :     }
    1044             : 
    1045       60557 :     return;
    1046             : }

Generated by: LCOV version 1.14