LCOV - code coverage report
Current view: top level - lib_com - fft_rel.c (source / functions) Hit Total Coverage
Test: Coverage on main enc/dec/rend @ 3b2f07138c61dcf997bbf4165d0882f794b2995f Lines: 274 429 63.9 %
Date: 2025-05-03 01:55:50 Functions: 3 4 75.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <stdint.h>
      38             : #include "options.h"
      39             : #include "prot_fx.h"
      40             : #include "rom_com.h"
      41             : #include "wmc_auto.h"
      42             : 
      43             : /*---------------------------------------------------------------------*
      44             :  * Local constants
      45             :  *---------------------------------------------------------------------*/
      46             : 
      47             : #define N_MAX_FFT   1024
      48             : #define N_MAX_DIV2  ( N_MAX_FFT >> 1 )
      49             : #define N_MAX_DIV4  ( N_MAX_DIV2 >> 1 )
      50             : #define INV_SQR2_FX 23170
      51             : #define N_MAX_SAS   256
      52             : /*---------------------------------------------------------------------*
      53             :  *  fft_rel()
      54             :  *
      55             :  *  Computes the split-radix FFT in place for the real-valued
      56             :  *  signal x of length n.  The algorithm has been ported from
      57             :  *  the Fortran code of [1].
      58             :  *
      59             :  *  The function  needs sine and cosine tables t_sin and t_cos,
      60             :  *  and the constant N_MAX_FFT.  The table  entries  are defined as
      61             :  *  sin(2*pi*i) and cos(2*pi*i) for i = 0, 1, ..., N_MAX_FFT-1. The
      62             :  *  implementation  assumes  that any entry  will not be needed
      63             :  *  outside the tables. Therefore, N_MAX_FFT and n must be properly
      64             :  *  set.  The function has been tested  with the values n = 16,
      65             :  *  32, 64, 128, 256, and N_MAX_FFT = 1280.
      66             :  *
      67             :  *  References
      68             :  *  [1] H.V. Sorensen,  D.L. Jones, M.T. Heideman, C.S. Burrus,
      69             :  *      "Real-valued fast  Fourier transform  algorithm,"  IEEE
      70             :  *      Trans. on Signal Processing,  Vol.35, No.6, pp 849-863,
      71             :  *      1987.
      72             :  *
      73             :  *  OUTPUT
      74             :  *      x[0:n-1]  Transform coeffients in the order re[0], re[1],
      75             :  *                ..., re[n/2], im[n/2-1], ..., im[1].
      76             :  *---------------------------------------------------------------------*/
      77             : 
      78           0 : void fft_rel(
      79             :     float x[],       /* i/o: input/output vector    */
      80             :     const int16_t n, /* i  : vector length          */
      81             :     const int16_t m  /* i  : log2 of vector length  */
      82             : )
      83             : {
      84             :     int16_t i, j, k, n1, n2, n4;
      85             :     int16_t step;
      86             :     float xt, t1, t2;
      87             :     float *x0, *x1, *x2;
      88             :     float *xi2, *xi3, *xi4, *xi1;
      89             :     const float *s, *c;
      90             :     const int16_t *idx;
      91             : 
      92             :     /* !!!! NMAX = 256 is hardcoded here  (similar optimizations should be done for NMAX > 256) !!! */
      93             : 
      94             :     float *x2even, *x2odd;
      95             :     float temp[512];
      96             : 
      97           0 :     if ( n == 128 || n == 256 || n == 512 )
      98             :     {
      99           0 :         idx = fft256_read_indexes;
     100             : 
     101             :         /* Combined Digit reverse counter & Length two butterflies */
     102           0 :         if ( n == 128 )
     103             :         {
     104           0 :             x2 = temp;
     105           0 :             for ( i = 0; i < 64; i++ )
     106             :             {
     107           0 :                 j = *idx++;
     108           0 :                 k = *idx++;
     109             : 
     110           0 :                 *x2++ = x[j >> 1] + x[k >> 1];
     111           0 :                 *x2++ = x[j >> 1] - x[k >> 1];
     112             :             }
     113             :         }
     114           0 :         else if ( n == 256 )
     115             :         {
     116           0 :             x2 = temp;
     117           0 :             for ( i = 0; i < 128; i++ )
     118             :             {
     119           0 :                 j = *idx++;
     120           0 :                 k = *idx++;
     121             : 
     122           0 :                 *x2++ = x[j] + x[k];
     123           0 :                 *x2++ = x[j] - x[k];
     124             :             }
     125             :         }
     126           0 :         else if ( n == 512 )
     127             :         {
     128           0 :             x2even = temp;
     129           0 :             x2odd = temp + 256;
     130             : 
     131           0 :             for ( i = 0; i < 128; i++ )
     132             :             {
     133           0 :                 j = 2 * *idx++;
     134           0 :                 k = 2 * *idx++;
     135             : 
     136           0 :                 *x2even++ = x[j] + x[k];
     137           0 :                 *x2even++ = x[j] - x[k];
     138           0 :                 *x2odd++ = x[++j] + x[++k];
     139           0 :                 *x2odd++ = x[j] - x[k];
     140             :             }
     141             :         }
     142             : 
     143             :         /*-----------------------------------------------------------------*
     144             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
     145             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
     146             :          * and the associated pointers initialization.
     147             :          * Also, it allows to Put the Data from 'temp' back into 'x' due
     148             :          * to the previous Combined Digit Reverse and Length two butterflies
     149             :          *-----------------------------------------------------------------*/
     150             : 
     151             :         /*for_ (k = 2; k < 3; k++)*/
     152             :         {
     153           0 :             x0 = temp;
     154           0 :             x1 = x0 + 2;
     155           0 :             x2 = x;
     156             : 
     157           0 :             for ( i = 0; i < n; i += 4 )
     158             :             {
     159           0 :                 *x2++ = *x0++ + *x1; /* x[i] = xt + x[i+n2];    */
     160           0 :                 *x2++ = *x0;
     161           0 :                 *x2++ = *--x0 - *x1++; /* x[i+n2] = xt - x[i+n2];      */
     162           0 :                 *x2++ = -*x1;          /* x[i+n2+n4] = -x[i+n2+n4];     */
     163             : 
     164           0 :                 x0 += 4;
     165           0 :                 x1 += 3; /* x1 has already advanced */
     166             :             }
     167             :         }
     168             :     }
     169             :     else
     170             :     {
     171             :         /*-----------------------------------------------------------------*
     172             :          * Digit reverse counter
     173             :          *-----------------------------------------------------------------*/
     174             : 
     175           0 :         j = 0;
     176           0 :         x0 = &x[0];
     177           0 :         for ( i = 0; i < n - 1; i++ )
     178             :         {
     179           0 :             if ( i < j )
     180             :             {
     181           0 :                 xt = x[j];
     182           0 :                 x[j] = *x0;
     183           0 :                 *x0 = xt;
     184             :             }
     185           0 :             x0++;
     186           0 :             k = n / 2;
     187           0 :             while ( k <= j )
     188             :             {
     189           0 :                 j -= k;
     190           0 :                 k = k >> 1;
     191             :             }
     192           0 :             j += k;
     193             :         }
     194             : 
     195             :         /*-----------------------------------------------------------------*
     196             :          * Length two butterflies
     197             :          *-----------------------------------------------------------------*/
     198             : 
     199           0 :         x0 = &x[0];
     200           0 :         x1 = &x[1];
     201           0 :         for ( i = 0; i < n / 2; i++ )
     202             :         {
     203           0 :             *x1 = *x0 - *x1;
     204           0 :             *x0 = *x0 * 2 - *x1;
     205             : 
     206           0 :             x0++;
     207           0 :             x0++;
     208           0 :             x1++;
     209           0 :             x1++;
     210             :         }
     211             : 
     212             :         /*-----------------------------------------------------------------*
     213             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
     214             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
     215             :          * and the associated pointers initialization.
     216             :          *-----------------------------------------------------------------*/
     217             : 
     218             :         /* for_ (k = 2; k < 3; k++) */
     219             :         {
     220           0 :             x0 = x;
     221           0 :             x1 = x0 + 2;
     222             : 
     223           0 :             for ( i = 0; i < n; i += 4 )
     224             :             {
     225           0 :                 *x1 = *x0 - *x1;       /* x[i+n2] = xt - x[i+n2];      */
     226           0 :                 *x0 = *x0 * 2 - *x1++; /* x[i] = xt + x[i+n2];    */
     227           0 :                 *x1 = -*x1;            /* x[i+n2+n4] = -x[i+n2+n4];     */
     228             : 
     229           0 :                 x0 += 4;
     230           0 :                 x1 += 3; /* x1 has already advanced */
     231             :             }
     232             :         }
     233             :     }
     234             : 
     235             :     /*-----------------------------------------------------------------*
     236             :      * Other butterflies
     237             :      *
     238             :      * The implementation described in [1] has been changed by using
     239             :      * table lookup for evaluating sine and cosine functions.  The
     240             :      * variable ind and its increment step are needed to access table
     241             :      * entries.  Note that this implementation assumes n4 to be so
     242             :      * small that ind will never exceed the table.  Thus the input
     243             :      * argument n and the constant N_MAX_FFT must be set properly.
     244             :      *-----------------------------------------------------------------*/
     245             : 
     246           0 :     n4 = 1;
     247           0 :     n2 = 2;
     248           0 :     n1 = 4;
     249             : 
     250           0 :     step = N_MAX_DIV4;
     251             : 
     252           0 :     for ( k = 3; k <= m; k++ )
     253             :     {
     254           0 :         step >>= 1;
     255           0 :         n4 <<= 1;
     256           0 :         n2 <<= 1;
     257           0 :         n1 <<= 1;
     258             : 
     259           0 :         x0 = x;
     260           0 :         x1 = x0 + n2;
     261           0 :         x2 = x1 + n4;
     262             : 
     263           0 :         for ( i = 0; i < n; i += n1 )
     264             :         {
     265           0 :             *x1 = *x0 - *x1;     /* x[i+n2] = xt - x[i+n2];      */
     266           0 :             *x0 = *x0 * 2 - *x1; /* x[i] = xt + x[i+n2];    */
     267           0 :             *x2 = -*x2;          /* x[i+n2+n4] = -x[i+n2+n4];     */
     268             : 
     269           0 :             s = sincos_t_ext;
     270           0 :             c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 */
     271           0 :             xi1 = x0;
     272           0 :             xi3 = xi1 + n2;
     273           0 :             xi2 = xi3;
     274           0 :             x0 += n1;
     275           0 :             xi4 = x0;
     276             : 
     277           0 :             for ( j = 1; j < n4; j++ )
     278             :             {
     279           0 :                 xi3++;
     280           0 :                 xi1++;
     281           0 :                 xi4--;
     282           0 :                 xi2--;
     283           0 :                 c += step;
     284           0 :                 s += step; /* autoincrement by ar0 */
     285             : 
     286           0 :                 t1 = *xi3 * *c + *xi4 * *s; /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind);   */
     287           0 :                 t2 = *xi3 * *s - *xi4 * *c; /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind);     */
     288             : 
     289           0 :                 *xi4 = *xi2 - t2;
     290           0 :                 *xi2 = *xi1 - t1;
     291           0 :                 *xi1 = *xi1 * 2 - *xi2;
     292           0 :                 *xi3 = -2 * t2 - *xi4;
     293             :             }
     294             : 
     295           0 :             x1 += n1;
     296           0 :             x2 += n1;
     297             :         }
     298             :     }
     299             : 
     300           0 :     return;
     301             : }
     302             : 
     303     2118790 : void fft_rel_16_32fx(
     304             :     Word16 x[],  /* i/o: input/output vector Qx   */
     305             :     Word16 *q_x, /* extra scaling added on speech buffer*/
     306             :     Word16 i_subfr,
     307             :     const Word16 n, /* i  : vector length          */
     308             :     const Word16 m  /* i  : log2 of vector length  */
     309             : )
     310             : {
     311             :     Word16 i, j, k, n1, n2, n4;
     312             :     Word16 step;
     313             :     Word32 xt, t1, t2;
     314             :     Word32 *x0, *x1, *x2;
     315             :     const Word16 *s, *c;
     316             :     Word32 *xi2, *xi3, *xi4, *xi1;
     317             : 
     318             :     Word32 fft_bff32[L_FFT];
     319     2118790 :     Copy_Scale_sig_16_32_no_sat( x, fft_bff32, L_FFT, 0 ); // copying x to fft_bff32 without scaling
     320             : 
     321             :     /*-----------------------------------------------------------------*
     322             :      * Digit reverse counter
     323             :      *-----------------------------------------------------------------*/
     324             : 
     325     2118790 :     j = 0;
     326     2118790 :     move16();
     327     2118790 :     x0 = &fft_bff32[0]; // Qx
     328   542410240 :     FOR( i = 0; i < n - 1; i++ )
     329             :     {
     330   540291450 :         IF( LT_16( i, j ) )
     331             :         {
     332   254254800 :             xt = fft_bff32[j]; // Qx
     333   254254800 :             move32();
     334   254254800 :             fft_bff32[j] = *x0; // Qx
     335   254254800 :             move32();
     336   254254800 :             *x0 = xt; // Qx
     337   254254800 :             move32();
     338             :         }
     339   540291450 :         x0++;
     340   540291450 :         k = shr( n, 1 );
     341  1063632580 :         WHILE( ( k <= j ) )
     342             :         {
     343   523341130 :             j = sub( j, k );
     344   523341130 :             k = shr( k, 1 );
     345             :         }
     346   540291450 :         j = add( j, k );
     347             :     }
     348             : 
     349             :     /*-----------------------------------------------------------------*
     350             :      * Length two butterflies
     351             :      *-----------------------------------------------------------------*/
     352             : 
     353     2118790 :     x0 = &fft_bff32[0];
     354     2118790 :     x1 = &fft_bff32[1];
     355   273323910 :     FOR( i = 0; i < ( n >> 1 ); i++ )
     356             :     {
     357   271205120 :         xt = *x0;
     358   271205120 :         move32();
     359   271205120 :         *x0 = L_add( xt, *x1 );
     360   271205120 :         move32();
     361   271205120 :         *x1 = L_sub( xt, *x1 );
     362   271205120 :         move32();
     363   271205120 :         x0++;
     364   271205120 :         x0++;
     365   271205120 :         x1++;
     366   271205120 :         x1++;
     367             :     }
     368             : 
     369             :     /*-----------------------------------------------------------------*
     370             :      * Other butterflies
     371             :      *
     372             :      * The implementation described in [1] has been changed by using
     373             :      * table lookup for evaluating sine and cosine functions.  The
     374             :      * variable ind and its increment step are needed to access table
     375             :      * entries.  Note that this implementation assumes n4 to be so
     376             :      * small that ind will never exceed the table.  Thus the input
     377             :      * argument n and the constant N_MAX_SAS must be set properly.
     378             :      *-----------------------------------------------------------------*/
     379             : 
     380     2118790 :     n2 = 1;
     381     2118790 :     move16();
     382             :     /* step = N_MAX_SAS/4; */
     383    16950320 :     FOR( k = 2; k <= m; k++ )
     384             :     {
     385    14831530 :         n4 = n2;
     386    14831530 :         move16();
     387    14831530 :         n2 = shl( n4, 1 );
     388    14831530 :         n1 = shl( n2, 1 );
     389             : 
     390    14831530 :         step = idiv1616( N_MAX_SAS, n1 );
     391             : 
     392    14831530 :         x0 = fft_bff32;
     393    14831530 :         x1 = fft_bff32 + n2;
     394    14831530 :         x2 = fft_bff32 + add( n2, n4 );
     395   283917860 :         FOR( i = 0; i < n; i += n1 )
     396             :         {
     397   269086330 :             xt = *x0;
     398   269086330 :             move32(); /* xt = x[i];   */
     399   269086330 :             *x0 = L_add( xt, *x1 );
     400   269086330 :             move32(); /* x[i] = xt + x[i+n2];    */
     401   269086330 :             *x1 = L_sub( xt, *x1 );
     402   269086330 :             move32(); /* x[i+n2] = xt - x[i+n2];      */
     403   269086330 :             *x2 = L_negate( *x2 );
     404   269086330 :             move32(); /* x[i+n2+n4] = -x[i+n2+n4];     */
     405             : 
     406             : 
     407   269086330 :             s = sincos_t_fx + step; // Q15
     408   269086330 :             c = s + 64;             // Q15
     409   269086330 :             xi1 = fft_bff32 + add( i, 1 );
     410   269086330 :             xi3 = xi1 + n2;
     411   269086330 :             xi2 = xi3 - 2;
     412   269086330 :             xi4 = xi1 + sub( n1, 2 );
     413             : 
     414   949217920 :             FOR( j = 1; j < n4; j++ )
     415             :             {
     416   680131590 :                 t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx  */
     417   680131590 :                 t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx    */
     418   680131590 :                 *xi4 = L_sub( *xi2, t2 );
     419   680131590 :                 move32();
     420   680131590 :                 *xi3 = L_negate( L_add( *xi2, t2 ) );
     421   680131590 :                 move32();
     422   680131590 :                 *xi2 = L_sub( *xi1, t1 );
     423   680131590 :                 move32();
     424   680131590 :                 *xi1 = L_add( *xi1, t1 );
     425   680131590 :                 move32();
     426             : 
     427   680131590 :                 xi4--;
     428   680131590 :                 xi2--;
     429   680131590 :                 xi3++;
     430   680131590 :                 xi1++;
     431   680131590 :                 c += step;
     432   680131590 :                 s += step; /* autoincrement by ar0 */
     433             :             }
     434             : 
     435   269086330 :             x0 += n1;
     436   269086330 :             x1 += n1;
     437   269086330 :             x2 += n1;
     438             :         }
     439             :         /* step = shr(step, 1); */
     440             :     }
     441     2118790 :     Word16 norm = L_norm_arr( fft_bff32, L_FFT );
     442     2118790 :     IF( i_subfr == 0 )
     443             :     {
     444     1059395 :         Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
     445     1059395 :         *q_x = sub( norm, 16 );
     446     1059395 :         move16();
     447             :     }
     448             :     ELSE
     449             :     {
     450     1059395 :         IF( LT_16( sub( norm, 16 ), *q_x ) )
     451             :         {
     452      211597 :             scale_sig( x - L_FFT, L_FFT, sub( sub( norm, 16 ), *q_x ) );
     453      211597 :             Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
     454      211597 :             *q_x = sub( norm, 16 );
     455      211597 :             move16();
     456             :         }
     457             :         ELSE
     458             :         {
     459      847798 :             Copy_Scale_sig32_16( fft_bff32, x, L_FFT, add( 16, *q_x ) );
     460             :         }
     461             :     }
     462             : 
     463     2118790 :     return;
     464             : }
     465             : 
     466      206951 : void fft_rel_fx(
     467             :     Word16 x[],     /* i/o: input/output vector Qx   */
     468             :     const Word16 n, /* i  : vector length          */
     469             :     const Word16 m  /* i  : log2 of vector length  */
     470             : )
     471             : {
     472             :     Word16 i, j, k, n1, n2, n4;
     473             :     Word16 step;
     474             :     Word16 xt, t1, t2;
     475             :     Word16 *x0, *x1, *x2;
     476             :     const Word16 *s, *c;
     477             :     Word16 *xi2, *xi3, *xi4, *xi1;
     478             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     479      206951 :     Flag Overflow = 0;
     480      206951 :     move32();
     481             : #endif
     482             : 
     483             : 
     484             :     /*-----------------------------------------------------------------*
     485             :      * Digit reverse counter
     486             :      *-----------------------------------------------------------------*/
     487             : 
     488      206951 :     j = 0;
     489      206951 :     move16();
     490      206951 :     x0 = &x[0]; // Qx
     491      206951 :     move16();
     492    46004096 :     FOR( i = 0; i < n - 1; i++ )
     493             :     {
     494    45797145 :         IF( LT_16( i, j ) )
     495             :         {
     496    21540200 :             xt = x[j]; // Qx
     497    21540200 :             move16();
     498    21540200 :             x[j] = *x0; // Qx
     499    21540200 :             move16();
     500    21540200 :             *x0 = xt; // Qx
     501    21540200 :             move16();
     502             :         }
     503    45797145 :         x0++;
     504    45797145 :         k = shr( n, 1 );
     505    90104762 :         WHILE( ( k <= j ) )
     506             :         {
     507    44307617 :             j = sub( j, k );
     508    44307617 :             k = shr( k, 1 );
     509             :         }
     510    45797145 :         j = add( j, k );
     511             :     }
     512             : 
     513             :     /*-----------------------------------------------------------------*
     514             :      * Length two butterflies
     515             :      *-----------------------------------------------------------------*/
     516             : 
     517      206951 :     x0 = &x[0];
     518      206951 :     move16();
     519      206951 :     x1 = &x[1];
     520      206951 :     move16();
     521    23208999 :     FOR( i = 0; i < ( n >> 1 ); i++ )
     522             :     {
     523    23002048 :         xt = *x0;
     524    23002048 :         move16();
     525    23002048 :         *x0 = add_o( xt, *x1, &Overflow );
     526    23002048 :         move16();
     527    23002048 :         *x1 = sub_o( xt, *x1, &Overflow );
     528    23002048 :         move16();
     529    23002048 :         x0++;
     530    23002048 :         x0++;
     531    23002048 :         x1++;
     532    23002048 :         x1++;
     533             :     }
     534             : 
     535             :     /*-----------------------------------------------------------------*
     536             :      * Other butterflies
     537             :      *
     538             :      * The implementation described in [1] has been changed by using
     539             :      * table lookup for evaluating sine and cosine functions.  The
     540             :      * variable ind and its increment step are needed to access table
     541             :      * entries.  Note that this implementation assumes n4 to be so
     542             :      * small that ind will never exceed the table.  Thus the input
     543             :      * argument n and the constant N_MAX_SAS must be set properly.
     544             :      *-----------------------------------------------------------------*/
     545             : 
     546      206951 :     n2 = 1;
     547      206951 :     move16();
     548             :     /* step = N_MAX_SAS/4; */
     549     1489528 :     FOR( k = 2; k <= m; k++ )
     550             :     {
     551     1282577 :         n4 = n2;
     552     1282577 :         move16();
     553     1282577 :         n2 = shl( n4, 1 );
     554     1282577 :         n1 = shl( n2, 1 );
     555             : 
     556     1282577 :         step = idiv1616( N_MAX_SAS, n1 );
     557             : 
     558     1282577 :         x0 = x;
     559     1282577 :         x1 = x + n2;
     560     1282577 :         x2 = x + add( n2, n4 );
     561    24077674 :         FOR( i = 0; i < n; i += n1 )
     562             :         {
     563    22795097 :             xt = *x0;
     564    22795097 :             move16(); /* xt = x[i];   */
     565    22795097 :             *x0 = add_o( xt, *x1, &Overflow );
     566    22795097 :             move16(); /* x[i] = xt + x[i+n2];    */
     567    22795097 :             *x1 = sub_o( xt, *x1, &Overflow );
     568    22795097 :             move16(); /* x[i+n2] = xt - x[i+n2];      */
     569    22795097 :             *x2 = negate( *x2 );
     570    22795097 :             move16(); /* x[i+n2+n4] = -x[i+n2+n4];     */
     571             : 
     572             : 
     573    22795097 :             s = sincos_t_fx + step; // Q15
     574    22795097 :             c = s + 64;             // Q15
     575    22795097 :             xi1 = x + add( i, 1 );
     576    22795097 :             xi3 = xi1 + n2;
     577    22795097 :             xi2 = xi3 - 2;
     578    22795097 :             xi4 = xi1 + sub( n1, 2 );
     579             : 
     580    80341088 :             FOR( j = 1; j < n4; j++ )
     581             :             {
     582    57545991 :                 t1 = add_o( mult_r( *xi3, *c ), mult_r( *xi4, *s ), &Overflow ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx  */
     583    57545991 :                 t2 = sub_o( mult_r( *xi3, *s ), mult_r( *xi4, *c ), &Overflow ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx    */
     584    57545991 :                 *xi4 = sub_o( *xi2, t2, &Overflow );
     585    57545991 :                 move16();
     586    57545991 :                 *xi3 = negate( add_o( *xi2, t2, &Overflow ) );
     587    57545991 :                 move16();
     588    57545991 :                 *xi2 = sub_o( *xi1, t1, &Overflow );
     589    57545991 :                 move16();
     590    57545991 :                 *xi1 = add_o( *xi1, t1, &Overflow );
     591    57545991 :                 move16();
     592             : 
     593    57545991 :                 xi4--;
     594    57545991 :                 xi2--;
     595    57545991 :                 xi3++;
     596    57545991 :                 xi1++;
     597    57545991 :                 c += step;
     598    57545991 :                 s += step; /* autoincrement by ar0 */
     599             :             }
     600             : 
     601    22795097 :             x0 += n1;
     602    22795097 :             x1 += n1;
     603    22795097 :             x2 += n1;
     604             :         }
     605             :         /* step = shr(step, 1); */
     606             :     }
     607             : 
     608      206951 :     return;
     609             : }
     610             : 
     611       44321 : void fft_rel_fx32(
     612             :     Word32 x[],     /* i/o: input/output vector Qx   */
     613             :     const Word16 n, /* i  : vector length          */
     614             :     const Word16 m  /* i  : log2 of vector length  */
     615             : )
     616             : {
     617             :     Word16 i, j, k, n1, n2, n4;
     618             :     Word16 step;
     619             :     Word32 xt, t1, t2;
     620             :     Word32 *x0, *x1, *x2;
     621             :     Word32 *xi2, *xi3, *xi4, *xi1;
     622             :     const Word16 *s, *c;
     623             :     const Word16 *idx;
     624             : 
     625             :     /* !!!! NMAX = 256 is hardcoded here  (similar optimizations should be done for NMAX > 256) !!! */
     626             : 
     627             :     Word32 *x2even, *x2odd;
     628             :     Word32 temp[512];
     629             : 
     630       44321 :     test();
     631       44321 :     test();
     632       44321 :     IF( EQ_16( n, 128 ) || EQ_16( n, 256 ) || EQ_16( n, 512 ) )
     633             :     {
     634       44321 :         idx = fft256_read_indexes;
     635             : 
     636             :         /* Combined Digit reverse counter & Length two butterflies */
     637       44321 :         IF( EQ_16( n, 128 ) )
     638             :         {
     639           0 :             x2 = temp;
     640           0 :             FOR( i = 0; i < 64; i++ )
     641             :             {
     642           0 :                 j = *idx++;
     643           0 :                 move16();
     644           0 :                 k = *idx++;
     645           0 :                 move16();
     646             : 
     647           0 :                 *x2++ = L_add( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
     648           0 :                 move16();
     649           0 :                 *x2++ = L_sub( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
     650           0 :                 move16();
     651             :             }
     652             :         }
     653       44321 :         ELSE IF( EQ_16( n, 256 ) )
     654             :         {
     655         488 :             x2 = temp;
     656       62952 :             FOR( i = 0; i < 128; i++ )
     657             :             {
     658       62464 :                 j = *idx++;
     659       62464 :                 move16();
     660       62464 :                 k = *idx++;
     661       62464 :                 move16();
     662             : 
     663       62464 :                 *x2++ = L_add( x[j], x[k] ); // Qx
     664       62464 :                 move16();
     665       62464 :                 *x2++ = L_sub( x[j], x[k] ); // Qx
     666       62464 :                 move16();
     667             :             }
     668             :         }
     669       43833 :         ELSE IF( EQ_16( n, 512 ) )
     670             :         {
     671       43833 :             x2even = temp;
     672       43833 :             x2odd = temp + 256;
     673             : 
     674     5654457 :             FOR( i = 0; i < 128; i++ )
     675             :             {
     676     5610624 :                 j = shl( *idx, 1 );
     677     5610624 :                 idx++;
     678     5610624 :                 k = shl( *idx, 1 );
     679     5610624 :                 idx++;
     680             : 
     681     5610624 :                 *x2even++ = L_add( x[j], x[k] ); // Qx
     682     5610624 :                 move16();
     683     5610624 :                 *x2even++ = L_sub( x[j], x[k] ); // Qx
     684     5610624 :                 move16();
     685     5610624 :                 j = add( j, 1 );
     686     5610624 :                 k = add( k, 1 );
     687     5610624 :                 *x2odd++ = L_add( x[j], x[k] ); // Qx
     688     5610624 :                 move16();
     689     5610624 :                 *x2odd++ = L_sub( x[j], x[k] ); // Qx
     690     5610624 :                 move16();
     691             :             }
     692             :         }
     693             : 
     694             :         /*-----------------------------------------------------------------*
     695             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
     696             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
     697             :          * and the associated pointers initialization.
     698             :          * Also, it allows to Put the Data from 'temp' back into 'x' due
     699             :          * to the previous Combined Digit Reverse and Length two butterflies
     700             :          *-----------------------------------------------------------------*/
     701             : 
     702             :         /*for_ (k = 2; k < 3; k++)*/
     703             :         {
     704       44321 :             x0 = temp;
     705       44321 :             x1 = x0 + 2;
     706       44321 :             x2 = x; // Qx
     707             : 
     708     5686177 :             FOR( i = 0; i < n; i += 4 )
     709             :             {
     710     5641856 :                 *x2++ = L_add( *x0++, *x1 ); /* x[i] = xt + x[i+n2];    */
     711     5641856 :                 move16();
     712     5641856 :                 *x2++ = *x0;
     713     5641856 :                 move16();
     714     5641856 :                 x0--;
     715     5641856 :                 *x2++ = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      */
     716     5641856 :                 move16();
     717     5641856 :                 x1++;
     718     5641856 :                 *x2++ = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4];     */
     719     5641856 :                 move16();
     720             : 
     721     5641856 :                 x0 += 4;
     722     5641856 :                 x1 += 3; /* x1 has already advanced */
     723             :             }
     724             :         }
     725             :     }
     726             :     ELSE
     727             :     {
     728             :         /*-----------------------------------------------------------------*
     729             :          * Digit reverse counter
     730             :          *-----------------------------------------------------------------*/
     731             : 
     732           0 :         j = 0;
     733           0 :         move16();
     734           0 :         x0 = &x[0]; // Qx
     735           0 :         FOR( i = 0; i < ( n - 1 ); i++ )
     736             :         {
     737           0 :             IF( LT_16( i, j ) )
     738             :             {
     739           0 :                 xt = x[j]; // Qx
     740           0 :                 move32();
     741           0 :                 x[j] = *x0;
     742           0 :                 move32();
     743           0 :                 *x0 = xt;
     744           0 :                 move32();
     745             :             }
     746           0 :             x0++;
     747           0 :             k = shr( n, 1 );
     748           0 :             WHILE( ( k <= j ) )
     749             :             {
     750           0 :                 j = sub( j, k );
     751           0 :                 k = shr( k, 1 );
     752             :             }
     753           0 :             j = add( j, k );
     754             :         }
     755             : 
     756             :         /*-----------------------------------------------------------------*
     757             :          * Length two butterflies
     758             :          *-----------------------------------------------------------------*/
     759             : 
     760           0 :         x0 = &x[0]; // Qx
     761           0 :         x1 = &x[1]; // Qx
     762           0 :         FOR( i = 0; i < ( n >> 1 ); i++ )
     763             :         {
     764           0 :             *x1 = L_sub( *x0, *x1 ); // Qx
     765           0 :             move32();
     766           0 :             *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); //*x0 * 2 - *x1 (Qx)
     767           0 :             move32();
     768             : 
     769           0 :             x0++;
     770           0 :             x0++;
     771           0 :             x1++;
     772           0 :             x1++;
     773             :         }
     774             : 
     775             :         /*-----------------------------------------------------------------*
     776             :          * 1st Stage Loop has been Unrolled because n4 is '1' and that
     777             :          * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
     778             :          * and the associated pointers initialization.
     779             :          *-----------------------------------------------------------------*/
     780             : 
     781             :         /* for_ (k = 2; k < 3; k++) */
     782             :         {
     783           0 :             x0 = x; // Qx
     784           0 :             x1 = x0 + 2;
     785             : 
     786           0 :             FOR( i = 0; i < n; i += 4 )
     787             :             {
     788           0 :                 *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      Qx*/
     789           0 :                 move32();
     790           0 :                 *x0 = L_sub( L_shl( *x0, 1 ), *x1++ ); /* x[i] = xt + x[i+n2];    */ /**x0 * 2 - *x1 (Qx)*/
     791           0 :                 move32();
     792           0 :                 *x1 = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4];     Qx*/
     793           0 :                 move32();
     794             : 
     795           0 :                 x0 += 4;
     796           0 :                 x1 += 3; /* x1 has already advanced */
     797             :             }
     798             :         }
     799             :     }
     800             : 
     801             :     /*-----------------------------------------------------------------*
     802             :      * Other butterflies
     803             :      *
     804             :      * The implementation described in [1] has been changed by using
     805             :      * table lookup for evaluating sine and cosine functions.  The
     806             :      * variable ind and its increment step are needed to access table
     807             :      * entries.  Note that this implementation assumes n4 to be so
     808             :      * small that ind will never exceed the table.  Thus the input
     809             :      * argument n and the constant N_MAX_FFT must be set properly.
     810             :      *-----------------------------------------------------------------*/
     811             : 
     812       44321 :     n4 = 1;
     813       44321 :     move16();
     814       44321 :     n2 = 2;
     815       44321 :     move16();
     816       44321 :     n1 = 4;
     817       44321 :     move16();
     818             : 
     819       44321 :     step = N_MAX_DIV4;
     820       44321 :     move16();
     821             : 
     822      354080 :     FOR( k = 3; k <= m; k++ )
     823             :     {
     824      309759 :         step = shr( step, 1 );
     825      309759 :         n4 = shl( n4, 1 );
     826      309759 :         n2 = shl( n2, 1 );
     827      309759 :         n1 = shl( n1, 1 );
     828             : 
     829      309759 :         x0 = x;
     830      309759 :         x1 = x0 + n2;
     831      309759 :         x2 = x1 + n4;
     832             : 
     833     5907294 :         FOR( i = 0; i < n; i += n1 )
     834             :         {
     835     5597535 :             *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2];      */
     836     5597535 :             move32();
     837     5597535 :             *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); /* x[i] = xt + x[i+n2];    */ /**x0 * 2 - *x1 (Qx)*/
     838     5597535 :             move32();
     839     5597535 :             *x2 = L_negate( *x2 ); /* x[i+n2+n4] = -x[i+n2+n4];    Qx */
     840     5597535 :             move32();
     841             : 
     842     5597535 :             s = sincos_t_ext_fx;   // Q15
     843     5597535 :             c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 Q15*/
     844     5597535 :             xi1 = x0;
     845     5597535 :             xi3 = xi1 + n2;
     846     5597535 :             xi2 = xi3;
     847     5597535 :             x0 += n1;
     848     5597535 :             xi4 = x0;
     849             : 
     850    39461760 :             FOR( j = 1; j < n4; j++ )
     851             :             {
     852    33864225 :                 xi3++;
     853    33864225 :                 xi1++;
     854    33864225 :                 xi4--;
     855    33864225 :                 xi2--;
     856    33864225 :                 c += step;
     857    33864225 :                 s += step; /* autoincrement by ar0 */
     858             : 
     859    33864225 :                 t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind);   Qx*/
     860    33864225 :                 t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind);   Qx*/
     861             : 
     862    33864225 :                 *xi4 = L_sub( *xi2, t2 );
     863    33864225 :                 move32();
     864    33864225 :                 *xi2 = L_sub( *xi1, t1 );
     865    33864225 :                 move32();
     866    33864225 :                 *xi1 = L_sub( L_shl( *xi1, 1 ), *xi2 ); // Qx
     867    33864225 :                 move32();
     868    33864225 :                 *xi3 = L_negate( L_add( L_shl( t2, 1 ), *xi4 ) ); // Qx
     869    33864225 :                 move32();
     870             :             }
     871             : 
     872     5597535 :             x1 += n1;
     873     5597535 :             x2 += n1;
     874             :         }
     875             :     }
     876             : 
     877       44321 :     return;
     878             : }

Generated by: LCOV version 1.14