LCOV - code coverage report
Current view: top level - lib_com - fft_cldfb_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main @ da9cc8ead0679b4682d329fdff98cf1616159273 Lines: 553 582 95.0 %
Date: 2025-10-13 22:24:20 Functions: 5 6 83.3 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <stdint.h>
      38             : #include "options.h"
      39             : #include <assert.h>
      40             : #include "prot_fx.h"
      41             : #include "ivas_cnst.h"
      42             : #include "wmc_auto.h"
      43             : #include "basop_util.h"
      44             : #include "complex_basop.h"
      45             : 
      46             : #define Mpy_32_xx Mpy_32_16_1
      47             : 
      48             : #define FFTC( x ) WORD322WORD16( (Word32) x )
      49             : 
      50             : #define C31 ( FFTC( 0x91261468 ) ) /* FL2WORD32( -0.86602540) -sqrt(3)/2 */
      51             : 
      52             : #define C51 ( FFTC( 0x79bc3854 ) ) /* FL2WORD32( 0.95105652)   */
      53             : #define C52 ( FFTC( 0x9d839db0 ) ) /* FL2WORD32(-1.53884180/2) */
      54             : #define C53 ( FFTC( 0xd18053ce ) ) /* FL2WORD32(-0.36327126)   */
      55             : #define C54 ( FFTC( 0x478dde64 ) ) /* FL2WORD32( 0.55901699)   */
      56             : #define C55 ( FFTC( 0xb0000001 ) ) /* FL2WORD32(-1.25/2)       */
      57             : 
      58             : #define C81 ( FFTC( 0x5a82799a ) ) /* FL2WORD32( 7.071067811865475e-1) */
      59             : #define C82 ( FFTC( 0xa57d8666 ) ) /* FL2WORD32(-7.071067811865475e-1) */
      60             : 
      61             : #define C161 ( FFTC( 0x5a82799a ) ) /* FL2WORD32( 7.071067811865475e-1)  INV_SQRT2    */
      62             : #define C162 ( FFTC( 0xa57d8666 ) ) /* FL2WORD32(-7.071067811865475e-1) -INV_SQRT2    */
      63             : 
      64             : #define C163 ( FFTC( 0x7641af3d ) ) /* FL2WORD32( 9.238795325112867e-1)  COS_PI_DIV8  */
      65             : #define C164 ( FFTC( 0x89be50c3 ) ) /* FL2WORD32(-9.238795325112867e-1) -COS_PI_DIV8  */
      66             : 
      67             : #define C165 ( FFTC( 0x30fbc54d ) ) /* FL2WORD32( 3.826834323650898e-1)  COS_3PI_DIV8 */
      68             : #define C166 ( FFTC( 0xcf043ab3 ) ) /* FL2WORD32(-3.826834323650898e-1) -COS_3PI_DIV8 */
      69             : 
      70             : 
      71             : #define cplxMpy4_8_0( re, im, a, b, c, d )                          \
      72             :     re = L_shr( L_sub( Mpy_32_xx( a, c ), Mpy_32_xx( b, d ) ), 1 ); \
      73             :     im = L_shr( L_add( Mpy_32_xx( a, d ), Mpy_32_xx( b, c ) ), 1 );
      74             : 
      75             : #define cplxMpy4_8_1( re, im, a, b ) \
      76             :     re = L_shr( a, 1 );              \
      77             :     im = L_shr( b, 1 );
      78             : 
      79             : void fft16_with_cmplx_data( cmplx *pInp, Word16 bsacle );
      80             : 
      81             : 
      82             : #undef SCALEFACTOR5
      83             : #undef SCALEFACTOR8
      84             : #undef SCALEFACTOR10
      85             : #undef SCALEFACTOR16
      86             : #undef SCALEFACTOR20
      87             : #undef SCALEFACTOR30
      88             : #undef SCALEFACTOR30_1
      89             : #undef SCALEFACTOR30_2
      90             : 
      91             : #define SCALEFACTOR5    ( 0 )
      92             : #define SCALEFACTOR8    ( 0 )
      93             : #define SCALEFACTOR10   ( 0 )
      94             : #define SCALEFACTOR16   ( 0 )
      95             : #define SCALEFACTOR20   ( 0 )
      96             : #define SCALEFACTOR30   ( 0 )
      97             : #define SCALEFACTOR30_1 ( 0 )
      98             : #define SCALEFACTOR30_2 ( 0 )
      99             : 
     100             : cmplx CL_scale_t( cmplx x, Word16 y );
     101             : cmplx CL_dscale_t( cmplx x, Word16 y1, Word16 y2 );
     102             : 
     103             : /**
     104             :  * \brief    Function performs a complex 8-point FFT
     105             :  *           The FFT is performed inplace. The result of the FFT
     106             :  *           is scaled by SCALEFACTOR8 bits.
     107             :  *
     108             :  *           WOPS with 32x16 bit multiplications: 108 cycles
     109             :  *
     110             :  * \param    [i/o] re    real input / output
     111             :  * \param    [i/o] im    imag input / output
     112             :  * \param    [i  ] s     stride real and imag input / output
     113             :  *
     114             :  * \return   void
     115             :  */
     116     4964772 : static void fft8_with_cmplx_data( cmplx *inp /*Qx*/ )
     117             : {
     118             :     cmplx x0, x1, x2, x3, x4, x5, x6, x7;
     119             :     cmplx s0, s1, s2, s3, s4, s5, s6, s7;
     120             :     cmplx t0, t1, t2, t3, t4, t5, t6, t7;
     121             : 
     122             :     /* Pre-additions */
     123     4964772 :     x0 = CL_shr( inp[0], SCALEFACTOR8 );
     124     4964772 :     x1 = CL_shr( inp[1], SCALEFACTOR8 );
     125     4964772 :     x2 = CL_shr( inp[2], SCALEFACTOR8 );
     126     4964772 :     x3 = CL_shr( inp[3], SCALEFACTOR8 );
     127     4964772 :     x4 = CL_shr( inp[4], SCALEFACTOR8 );
     128     4964772 :     x5 = CL_shr( inp[5], SCALEFACTOR8 );
     129     4964772 :     x6 = CL_shr( inp[6], SCALEFACTOR8 );
     130     4964772 :     x7 = CL_shr( inp[7], SCALEFACTOR8 );
     131             : 
     132             :     /* loops are unrolled */
     133             :     {
     134     4964772 :         t0 = CL_add( x0, x4 );
     135     4964772 :         t1 = CL_sub( x0, x4 );
     136             : 
     137     4964772 :         t2 = CL_add( x1, x5 );
     138     4964772 :         t3 = CL_sub( x1, x5 );
     139             : 
     140     4964772 :         t4 = CL_add( x2, x6 );
     141     4964772 :         t5 = CL_sub( x2, x6 );
     142             : 
     143     4964772 :         t6 = CL_add( x3, x7 );
     144     4964772 :         t7 = CL_sub( x3, x7 );
     145             :     }
     146             : 
     147             :     /* Pre-additions and core multiplications */
     148             : 
     149     4964772 :     s0 = CL_add( t0, t4 );
     150     4964772 :     s2 = CL_sub( t0, t4 );
     151             : 
     152     4964772 :     s4 = CL_mac_j( t1, t5 );
     153     4964772 :     s5 = CL_msu_j( t1, t5 );
     154             : 
     155     4964772 :     s1 = CL_add( t2, t6 );
     156     4964772 :     s3 = CL_sub( t2, t6 );
     157     4964772 :     s3 = CL_mul_j( s3 );
     158             : 
     159     4964772 :     t0 = CL_add( t3, t7 );
     160     4964772 :     t1 = CL_sub( t3, t7 );
     161             : 
     162     4964772 :     s6 = CL_scale_t( CL_msu_j( t1, t0 ), C81 );
     163     4964772 :     s7 = CL_dscale_t( CL_swap_real_imag( CL_msu_j( t0, t1 ) ), C81, C82 );
     164             : 
     165             :     /* Post-additions */
     166             : 
     167     4964772 :     inp[0] = CL_add( s0, s1 );
     168     4964772 :     inp[4] = CL_sub( s0, s1 );
     169             : 
     170     4964772 :     inp[2] = CL_sub( s2, s3 );
     171     4964772 :     inp[6] = CL_add( s2, s3 );
     172             : 
     173     4964772 :     inp[3] = CL_add( s4, s7 );
     174     4964772 :     inp[7] = CL_sub( s4, s7 );
     175             : 
     176     4964772 :     inp[1] = CL_add( s5, s6 );
     177     4964772 :     inp[5] = CL_sub( s5, s6 );
     178             : #ifdef WMOPS
     179             :     multiCounter[currCounter].CL_move += 8;
     180             : #endif
     181     4964772 : }
     182             : 
     183             : /**
     184             :  * \brief    Function performs a complex 5-point FFT
     185             :  *           The FFT is performed inplace. The result of the FFT
     186             :  *           is scaled by SCALEFACTOR5 bits.
     187             :  *
     188             :  *           WOPS with 32x16 bit multiplications:  88 cycles
     189             :  *
     190             :  * \param    [i/o] re    real input / output
     191             :  * \param    [i/o] im    imag input / output
     192             :  * \param    [i  ] s     stride real and imag input / output
     193             :  *
     194             :  * \return   void
     195             :  */
     196           0 : static void fft5_with_cmplx_data( cmplx *inp /*Qx*/ )
     197             : {
     198             :     cmplx x0, x1, x2, x3, x4;
     199             :     cmplx y1, y2, y3, y4;
     200             :     cmplx t;
     201             : 
     202           0 :     x0 = CL_shr( inp[0], SCALEFACTOR5 );
     203           0 :     x1 = CL_shr( inp[1], SCALEFACTOR5 );
     204           0 :     x2 = CL_shr( inp[2], SCALEFACTOR5 );
     205           0 :     x3 = CL_shr( inp[3], SCALEFACTOR5 );
     206           0 :     x4 = CL_shr( inp[4], SCALEFACTOR5 );
     207             : 
     208           0 :     y1 = CL_add( x1, x4 );
     209           0 :     y4 = CL_sub( x1, x4 );
     210           0 :     y3 = CL_add( x2, x3 );
     211           0 :     y2 = CL_sub( x2, x3 );
     212           0 :     t = CL_scale_t( CL_sub( y1, y3 ), C54 );
     213           0 :     y1 = CL_add( y1, y3 );
     214           0 :     inp[0] = CL_add( x0, y1 );
     215             : 
     216             :     /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
     217             :     the values as fracts */
     218           0 :     y1 = CL_add( inp[0], ( CL_shl( CL_scale_t( y1, C55 ), 1 ) ) );
     219           0 :     y3 = CL_sub( y1, t );
     220           0 :     y1 = CL_add( y1, t );
     221             : 
     222           0 :     t = CL_scale_t( CL_add( y4, y2 ), C51 );
     223             :     /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
     224             :     the values as fracts */
     225           0 :     y4 = CL_add( t, CL_shl( CL_scale_t( y4, C52 ), 1 ) );
     226           0 :     y2 = CL_add( t, CL_scale_t( y2, C53 ) );
     227             : 
     228             : 
     229             :     /* combination */
     230           0 :     inp[1] = CL_msu_j( y1, y2 );
     231           0 :     inp[4] = CL_mac_j( y1, y2 );
     232             : 
     233           0 :     inp[2] = CL_mac_j( y3, y4 );
     234           0 :     inp[3] = CL_msu_j( y3, y4 );
     235             : 
     236             : #ifdef WMOPS
     237             :     multiCounter[currCounter].CL_move += 5;
     238             : #endif
     239           0 : }
     240             : 
     241             : /**
     242             :  * \brief    Function performs a complex 10-point FFT
     243             :  *           The FFT is performed inplace. The result of the FFT
     244             :  *           is scaled by SCALEFACTOR10 bits.
     245             :  *
     246             :  *           WOPS with 32x16 bit multiplications:  196 cycles
     247             :  *
     248             :  * \param    [i/o] re    real input / output
     249             :  * \param    [i/o] im    imag input / output
     250             :  * \param    [i  ] s     stride real and imag input / output
     251             :  *
     252             :  * \return   void
     253             :  */
     254    13411028 : static void fft10_with_cmplx_data( cmplx *inp_data /*Qx*/ )
     255             : {
     256             :     cmplx r1, r2, r3, r4;
     257             :     cmplx x0, x1, x2, x3, x4, t;
     258             :     cmplx y[10];
     259             : 
     260             :     /* FOR i=0 */
     261             :     {
     262    13411028 :         x0 = CL_shr( inp_data[0], SCALEFACTOR10 );
     263    13411028 :         x1 = CL_shr( inp_data[2], SCALEFACTOR10 );
     264    13411028 :         x2 = CL_shr( inp_data[4], SCALEFACTOR10 );
     265    13411028 :         x3 = CL_shr( inp_data[6], SCALEFACTOR10 );
     266    13411028 :         x4 = CL_shr( inp_data[8], SCALEFACTOR10 );
     267             : 
     268    13411028 :         r1 = CL_add( x3, x2 );
     269    13411028 :         r4 = CL_sub( x3, x2 );
     270    13411028 :         r3 = CL_add( x1, x4 );
     271    13411028 :         r2 = CL_sub( x1, x4 );
     272    13411028 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     273    13411028 :         r1 = CL_add( r1, r3 );
     274    13411028 :         y[0] = CL_add( x0, r1 );
     275    13411028 :         r1 = CL_add( y[0], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     276    13411028 :         r3 = CL_sub( r1, t );
     277    13411028 :         r1 = CL_add( r1, t );
     278    13411028 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     279    13411028 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     280    13411028 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     281             : 
     282             : 
     283    13411028 :         y[2] = CL_msu_j( r1, r2 );
     284    13411028 :         y[8] = CL_mac_j( r1, r2 );
     285    13411028 :         y[4] = CL_mac_j( r3, r4 );
     286    13411028 :         y[6] = CL_msu_j( r3, r4 );
     287             :     }
     288             :     /* FOR i=1 */
     289             :     {
     290    13411028 :         x0 = CL_shr( inp_data[5], SCALEFACTOR10 );
     291    13411028 :         x1 = CL_shr( inp_data[1], SCALEFACTOR10 );
     292    13411028 :         x2 = CL_shr( inp_data[3], SCALEFACTOR10 );
     293    13411028 :         x3 = CL_shr( inp_data[7], SCALEFACTOR10 );
     294    13411028 :         x4 = CL_shr( inp_data[9], SCALEFACTOR10 );
     295             : 
     296    13411028 :         r1 = CL_add( x1, x4 );
     297    13411028 :         r4 = CL_sub( x1, x4 );
     298    13411028 :         r3 = CL_add( x3, x2 );
     299    13411028 :         r2 = CL_sub( x3, x2 );
     300    13411028 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     301    13411028 :         r1 = CL_add( r1, r3 );
     302    13411028 :         y[1] = CL_add( x0, r1 );
     303    13411028 :         r1 = CL_add( y[1], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     304    13411028 :         r3 = CL_sub( r1, t );
     305    13411028 :         r1 = CL_add( r1, t );
     306    13411028 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     307    13411028 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     308    13411028 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     309             : 
     310             : 
     311    13411028 :         y[3] = CL_msu_j( r1, r2 );
     312    13411028 :         y[9] = CL_mac_j( r1, r2 );
     313    13411028 :         y[5] = CL_mac_j( r3, r4 );
     314    13411028 :         y[7] = CL_msu_j( r3, r4 );
     315             :     }
     316             : 
     317             :     /* FOR i=0 */
     318             :     {
     319    13411028 :         inp_data[0] = CL_add( y[0], y[1] );
     320    13411028 :         inp_data[5] = CL_sub( y[0], y[1] );
     321             :     }
     322             :     /* FOR i=2 */
     323             :     {
     324    13411028 :         inp_data[2] = CL_add( y[2], y[3] );
     325    13411028 :         inp_data[7] = CL_sub( y[2], y[3] );
     326             :     }
     327             :     /* FOR i=4 */
     328             :     {
     329    13411028 :         inp_data[4] = CL_add( y[4], y[5] );
     330    13411028 :         inp_data[9] = CL_sub( y[4], y[5] );
     331             :     }
     332             :     /* FOR i=6 */
     333             :     {
     334    13411028 :         inp_data[6] = CL_add( y[6], y[7] );
     335    13411028 :         inp_data[1] = CL_sub( y[6], y[7] );
     336             :     }
     337             :     /* FOR i=8 */
     338             :     {
     339    13411028 :         inp_data[8] = CL_add( y[8], y[9] );
     340    13411028 :         inp_data[3] = CL_sub( y[8], y[9] );
     341             :     }
     342             : 
     343             : #ifdef WMOPS
     344             :     multiCounter[currCounter].CL_move += 10;
     345             : #endif
     346    13411028 : }
     347             : 
     348             : /**
     349             :  * \brief    Function performs a complex 20-point FFT
     350             :  *           The FFT is performed inplace. The result of the FFT
     351             :  *           is scaled by SCALEFACTOR20 bits.
     352             :  *
     353             :  *           WOPS with 32x16 bit multiplications:  432 cycles
     354             :  *
     355             :  * \param    [i/o] re    real input / output
     356             :  * \param    [i/o] im    imag input / output
     357             :  * \param    [i  ] s     stride real and imag input / output
     358             :  *
     359             :  * \return   void
     360             :  */
     361    17314904 : static void fft20_with_cmplx_data( cmplx *inp_data /*Qx*/ )
     362             : {
     363             :     cmplx r1, r2, r3, r4;
     364             :     cmplx x0, x1, x2, x3, x4;
     365             :     cmplx t, t0, t1, t2, t3;
     366             :     cmplx y[20];
     367             :     cmplx *y0, *y1, *y2, *y3, *y4;
     368             : 
     369    17314904 :     y0 = y;
     370    17314904 :     y1 = &y[4];
     371    17314904 :     y2 = &y[16];
     372    17314904 :     y3 = &y[8];
     373    17314904 :     y4 = &y[12];
     374             : 
     375             :     {
     376    17314904 :         x0 = CL_shr( inp_data[0], SCALEFACTOR20 );
     377    17314904 :         x1 = CL_shr( inp_data[16], SCALEFACTOR20 );
     378    17314904 :         x2 = CL_shr( inp_data[12], SCALEFACTOR20 );
     379    17314904 :         x3 = CL_shr( inp_data[8], SCALEFACTOR20 );
     380    17314904 :         x4 = CL_shr( inp_data[4], SCALEFACTOR20 );
     381             : 
     382    17314904 :         r4 = CL_sub( x1, x4 );
     383    17314904 :         r2 = CL_sub( x2, x3 );
     384    17314904 :         r1 = CL_add( x1, x4 );
     385    17314904 :         r3 = CL_add( x2, x3 );
     386    17314904 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     387    17314904 :         r1 = CL_add( r1, r3 );
     388    17314904 :         y0[0] = CL_add( x0, r1 );
     389    17314904 :         r1 = CL_add( y0[0], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     390    17314904 :         r3 = CL_sub( r1, t );
     391    17314904 :         r1 = CL_add( r1, t );
     392    17314904 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     393    17314904 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     394    17314904 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     395             : 
     396             : 
     397    17314904 :         y1[0] = CL_msu_j( r1, r2 );
     398    17314904 :         y2[0] = CL_mac_j( r1, r2 );
     399    17314904 :         y3[0] = CL_mac_j( r3, r4 );
     400    17314904 :         y4[0] = CL_msu_j( r3, r4 );
     401             :     }
     402             :     {
     403    17314904 :         x0 = CL_shr( inp_data[5], SCALEFACTOR20 );
     404    17314904 :         x1 = CL_shr( inp_data[1], SCALEFACTOR20 );
     405    17314904 :         x2 = CL_shr( inp_data[17], SCALEFACTOR20 );
     406    17314904 :         x3 = CL_shr( inp_data[13], SCALEFACTOR20 );
     407    17314904 :         x4 = CL_shr( inp_data[9], SCALEFACTOR20 );
     408             : 
     409    17314904 :         r4 = CL_sub( x1, x4 );
     410    17314904 :         r2 = CL_sub( x2, x3 );
     411    17314904 :         r1 = CL_add( x1, x4 );
     412    17314904 :         r3 = CL_add( x2, x3 );
     413    17314904 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     414    17314904 :         r1 = CL_add( r1, r3 );
     415    17314904 :         y0[1] = CL_add( x0, r1 );
     416    17314904 :         r1 = CL_add( y0[1], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     417    17314904 :         r3 = CL_sub( r1, t );
     418    17314904 :         r1 = CL_add( r1, t );
     419    17314904 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     420    17314904 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     421    17314904 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     422             : 
     423             : 
     424    17314904 :         y1[1] = CL_msu_j( r1, r2 );
     425    17314904 :         y2[1] = CL_mac_j( r1, r2 );
     426    17314904 :         y3[1] = CL_mac_j( r3, r4 );
     427    17314904 :         y4[1] = CL_msu_j( r3, r4 );
     428             :     }
     429             :     {
     430    17314904 :         x0 = CL_shr( inp_data[10], SCALEFACTOR20 );
     431    17314904 :         x1 = CL_shr( inp_data[6], SCALEFACTOR20 );
     432    17314904 :         x2 = CL_shr( inp_data[2], SCALEFACTOR20 );
     433    17314904 :         x3 = CL_shr( inp_data[18], SCALEFACTOR20 );
     434    17314904 :         x4 = CL_shr( inp_data[14], SCALEFACTOR20 );
     435             : 
     436    17314904 :         r4 = CL_sub( x1, x4 );
     437    17314904 :         r2 = CL_sub( x2, x3 );
     438    17314904 :         r1 = CL_add( x1, x4 );
     439    17314904 :         r3 = CL_add( x2, x3 );
     440    17314904 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     441    17314904 :         r1 = CL_add( r1, r3 );
     442    17314904 :         y0[2] = CL_add( x0, r1 );
     443    17314904 :         r1 = CL_add( y0[2], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     444    17314904 :         r3 = CL_sub( r1, t );
     445    17314904 :         r1 = CL_add( r1, t );
     446    17314904 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     447    17314904 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     448    17314904 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     449             : 
     450             : 
     451    17314904 :         y1[2] = CL_msu_j( r1, r2 );
     452    17314904 :         y2[2] = CL_mac_j( r1, r2 );
     453    17314904 :         y3[2] = CL_mac_j( r3, r4 );
     454    17314904 :         y4[2] = CL_msu_j( r3, r4 );
     455             :     }
     456             :     {
     457    17314904 :         x0 = CL_shr( inp_data[15], SCALEFACTOR20 );
     458    17314904 :         x1 = CL_shr( inp_data[11], SCALEFACTOR20 );
     459    17314904 :         x2 = CL_shr( inp_data[7], SCALEFACTOR20 );
     460    17314904 :         x3 = CL_shr( inp_data[3], SCALEFACTOR20 );
     461    17314904 :         x4 = CL_shr( inp_data[19], SCALEFACTOR20 );
     462             : 
     463    17314904 :         r4 = CL_sub( x1, x4 );
     464    17314904 :         r2 = CL_sub( x2, x3 );
     465    17314904 :         r1 = CL_add( x1, x4 );
     466    17314904 :         r3 = CL_add( x2, x3 );
     467    17314904 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     468    17314904 :         r1 = CL_add( r1, r3 );
     469    17314904 :         y0[3] = CL_add( x0, r1 );
     470    17314904 :         r1 = CL_add( y0[3], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     471    17314904 :         r3 = CL_sub( r1, t );
     472    17314904 :         r1 = CL_add( r1, t );
     473    17314904 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     474    17314904 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     475    17314904 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     476             : 
     477             : 
     478    17314904 :         y1[3] = CL_msu_j( r1, r2 );
     479    17314904 :         y2[3] = CL_mac_j( r1, r2 );
     480    17314904 :         y3[3] = CL_mac_j( r3, r4 );
     481    17314904 :         y4[3] = CL_msu_j( r3, r4 );
     482             :     }
     483             : 
     484             :     {
     485    17314904 :         cmplx *ptr_y = y;
     486             :         {
     487             :             cmplx Cy0, Cy1, Cy2, Cy3;
     488             : 
     489    17314904 :             Cy0 = *ptr_y++;
     490    17314904 :             Cy1 = *ptr_y++;
     491    17314904 :             Cy2 = *ptr_y++;
     492    17314904 :             Cy3 = *ptr_y++;
     493             : 
     494             :             /*  Pre-additions */
     495    17314904 :             t0 = CL_add( Cy0, Cy2 );
     496    17314904 :             t1 = CL_sub( Cy0, Cy2 );
     497    17314904 :             t2 = CL_add( Cy1, Cy3 );
     498    17314904 :             t3 = CL_sub( Cy1, Cy3 );
     499             : 
     500             : 
     501    17314904 :             inp_data[0] = CL_add( t0, t2 );
     502    17314904 :             inp_data[5] = CL_msu_j( t1, t3 );
     503    17314904 :             inp_data[10] = CL_sub( t0, t2 );
     504    17314904 :             inp_data[15] = CL_mac_j( t1, t3 );
     505             :         }
     506             : 
     507             :         {
     508             :             cmplx Cy0, Cy1, Cy2, Cy3;
     509             : 
     510    17314904 :             Cy0 = *ptr_y++;
     511    17314904 :             Cy1 = *ptr_y++;
     512    17314904 :             Cy2 = *ptr_y++;
     513    17314904 :             Cy3 = *ptr_y++;
     514             : 
     515             :             /*  Pre-additions */
     516    17314904 :             t0 = CL_add( Cy0, Cy2 );
     517    17314904 :             t1 = CL_sub( Cy0, Cy2 );
     518    17314904 :             t2 = CL_add( Cy1, Cy3 );
     519    17314904 :             t3 = CL_sub( Cy1, Cy3 );
     520             : 
     521             : 
     522    17314904 :             inp_data[4] = CL_add( t0, t2 );
     523    17314904 :             inp_data[9] = CL_msu_j( t1, t3 );
     524    17314904 :             inp_data[14] = CL_sub( t0, t2 );
     525    17314904 :             inp_data[19] = CL_mac_j( t1, t3 );
     526             :         }
     527             : 
     528             :         {
     529             :             cmplx Cy0, Cy1, Cy2, Cy3;
     530             : 
     531    17314904 :             Cy0 = *ptr_y++;
     532    17314904 :             Cy1 = *ptr_y++;
     533    17314904 :             Cy2 = *ptr_y++;
     534    17314904 :             Cy3 = *ptr_y++;
     535             : 
     536             :             /*  Pre-additions */
     537    17314904 :             t0 = CL_add( Cy0, Cy2 );
     538    17314904 :             t1 = CL_sub( Cy0, Cy2 );
     539    17314904 :             t2 = CL_add( Cy1, Cy3 );
     540    17314904 :             t3 = CL_sub( Cy1, Cy3 );
     541             : 
     542             : 
     543    17314904 :             inp_data[8] = CL_add( t0, t2 );
     544    17314904 :             inp_data[13] = CL_msu_j( t1, t3 );
     545    17314904 :             inp_data[18] = CL_sub( t0, t2 );
     546    17314904 :             inp_data[3] = CL_mac_j( t1, t3 );
     547             :         }
     548             : 
     549             :         {
     550             :             cmplx Cy0, Cy1, Cy2, Cy3;
     551             : 
     552    17314904 :             Cy0 = *ptr_y++;
     553    17314904 :             Cy1 = *ptr_y++;
     554    17314904 :             Cy2 = *ptr_y++;
     555    17314904 :             Cy3 = *ptr_y++;
     556             : 
     557             :             /*  Pre-additions */
     558    17314904 :             t0 = CL_add( Cy0, Cy2 );
     559    17314904 :             t1 = CL_sub( Cy0, Cy2 );
     560    17314904 :             t2 = CL_add( Cy1, Cy3 );
     561    17314904 :             t3 = CL_sub( Cy1, Cy3 );
     562             : 
     563    17314904 :             inp_data[12] = CL_add( t0, t2 );
     564    17314904 :             inp_data[17] = CL_msu_j( t1, t3 );
     565    17314904 :             inp_data[2] = CL_sub( t0, t2 );
     566    17314904 :             inp_data[7] = CL_mac_j( t1, t3 );
     567             :         }
     568             : 
     569             :         {
     570             :             cmplx Cy0, Cy1, Cy2, Cy3;
     571             : 
     572    17314904 :             Cy0 = *ptr_y++;
     573    17314904 :             Cy1 = *ptr_y++;
     574    17314904 :             Cy2 = *ptr_y++;
     575    17314904 :             Cy3 = *ptr_y++;
     576             : 
     577             :             /*  Pre-additions */
     578    17314904 :             t0 = CL_add( Cy0, Cy2 );
     579    17314904 :             t1 = CL_sub( Cy0, Cy2 );
     580    17314904 :             t2 = CL_add( Cy1, Cy3 );
     581    17314904 :             t3 = CL_sub( Cy1, Cy3 );
     582             : 
     583             : 
     584    17314904 :             inp_data[16] = CL_add( t0, t2 );
     585    17314904 :             inp_data[1] = CL_msu_j( t1, t3 );
     586    17314904 :             inp_data[6] = CL_sub( t0, t2 );
     587    17314904 :             inp_data[11] = CL_mac_j( t1, t3 );
     588             :         }
     589             :     }
     590             : #ifdef WMOPS
     591             :     multiCounter[currCounter].CL_move += 20;
     592             : #endif
     593    17314904 : }
     594             : 
     595             : 
     596             : /**
     597             :  * \brief    Function performs a complex 30-point FFT
     598             :  *           The FFT is performed inplace. The result of the FFT
     599             :  *           is scaled by SCALEFACTOR30 bits.
     600             :  *
     601             :  *           WOPS with 32x16 bit multiplications:  828 cycles
     602             :  *
     603             :  * \param    [i/o] re    real input / output
     604             :  * \param    [i/o] im    imag input / output
     605             :  * \param    [i  ] s     stride real and imag input / output
     606             :  *
     607             :  * \return   void
     608             :  */
     609    54109742 : static void fft30_with_cmplx_data( cmplx *inp /*Qx*/ )
     610             : {
     611    54109742 :     cmplx *l = &inp[0];
     612    54109742 :     cmplx *h = &inp[15];
     613             : 
     614             :     cmplx z[30], y[15], x[15], rs1, rs2, rs3, rs4, t;
     615             : 
     616             :     /* 1. FFT15 stage */
     617             : 
     618    54109742 :     x[0] = CL_shr( inp[0], SCALEFACTOR30_1 );
     619    54109742 :     x[1] = CL_shr( inp[18], SCALEFACTOR30_1 );
     620    54109742 :     x[2] = CL_shr( inp[6], SCALEFACTOR30_1 );
     621    54109742 :     x[3] = CL_shr( inp[24], SCALEFACTOR30_1 );
     622    54109742 :     x[4] = CL_shr( inp[12], SCALEFACTOR30_1 );
     623             : 
     624    54109742 :     x[5] = CL_shr( inp[20], SCALEFACTOR30_1 );
     625    54109742 :     x[6] = CL_shr( inp[8], SCALEFACTOR30_1 );
     626    54109742 :     x[7] = CL_shr( inp[26], SCALEFACTOR30_1 );
     627    54109742 :     x[8] = CL_shr( inp[14], SCALEFACTOR30_1 );
     628    54109742 :     x[9] = CL_shr( inp[2], SCALEFACTOR30_1 );
     629             : 
     630    54109742 :     x[10] = CL_shr( inp[10], SCALEFACTOR30_1 );
     631    54109742 :     x[11] = CL_shr( inp[28], SCALEFACTOR30_1 );
     632    54109742 :     x[12] = CL_shr( inp[16], SCALEFACTOR30_1 );
     633    54109742 :     x[13] = CL_shr( inp[4], SCALEFACTOR30_1 );
     634    54109742 :     x[14] = CL_shr( inp[22], SCALEFACTOR30_1 );
     635             : 
     636             : 
     637             :     /* 1. FFT5 stage */
     638    54109742 :     rs1 = CL_add( x[1], x[4] );
     639    54109742 :     rs4 = CL_sub( x[1], x[4] );
     640    54109742 :     rs3 = CL_add( x[2], x[3] );
     641    54109742 :     rs2 = CL_sub( x[2], x[3] );
     642    54109742 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     643    54109742 :     rs1 = CL_add( rs1, rs3 );
     644    54109742 :     y[0] = CL_add( x[0], rs1 );
     645    54109742 :     rs1 = CL_add( y[0], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     646    54109742 :     rs3 = CL_sub( rs1, t );
     647    54109742 :     rs1 = CL_add( rs1, t );
     648    54109742 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     649    54109742 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     650    54109742 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     651             : 
     652             :     /* combination */
     653    54109742 :     y[1] = CL_msu_j( rs1, rs2 );
     654    54109742 :     y[4] = CL_mac_j( rs1, rs2 );
     655    54109742 :     y[2] = CL_mac_j( rs3, rs4 );
     656    54109742 :     y[3] = CL_msu_j( rs3, rs4 );
     657             : 
     658             : 
     659             :     /* 2. FFT5 stage */
     660    54109742 :     rs1 = CL_add( x[6], x[9] );
     661    54109742 :     rs4 = CL_sub( x[6], x[9] );
     662    54109742 :     rs3 = CL_add( x[7], x[8] );
     663    54109742 :     rs2 = CL_sub( x[7], x[8] );
     664    54109742 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     665    54109742 :     rs1 = CL_add( rs1, rs3 );
     666    54109742 :     y[5] = CL_add( x[5], rs1 );
     667    54109742 :     rs1 = CL_add( y[5], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     668    54109742 :     rs3 = CL_sub( rs1, t );
     669    54109742 :     rs1 = CL_add( rs1, t );
     670    54109742 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     671    54109742 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     672    54109742 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     673             : 
     674             :     /* combination */
     675    54109742 :     y[6] = CL_msu_j( rs1, rs2 );
     676    54109742 :     y[9] = CL_mac_j( rs1, rs2 );
     677    54109742 :     y[7] = CL_mac_j( rs3, rs4 );
     678    54109742 :     y[8] = CL_msu_j( rs3, rs4 );
     679             : 
     680             : 
     681             :     /* 3. FFT5 stage */
     682    54109742 :     rs1 = CL_add( x[11], x[14] );
     683    54109742 :     rs4 = CL_sub( x[11], x[14] );
     684    54109742 :     rs3 = CL_add( x[12], x[13] );
     685    54109742 :     rs2 = CL_sub( x[12], x[13] );
     686    54109742 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     687    54109742 :     rs1 = CL_add( rs1, rs3 );
     688    54109742 :     y[10] = CL_add( x[10], rs1 );
     689    54109742 :     rs1 = CL_add( y[10], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     690    54109742 :     rs3 = CL_sub( rs1, t );
     691    54109742 :     rs1 = CL_add( rs1, t );
     692    54109742 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     693    54109742 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     694    54109742 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     695             : 
     696             :     /* combination */
     697    54109742 :     y[11] = CL_msu_j( rs1, rs2 );
     698    54109742 :     y[14] = CL_mac_j( rs1, rs2 );
     699    54109742 :     y[12] = CL_mac_j( rs3, rs4 );
     700    54109742 :     y[13] = CL_msu_j( rs3, rs4 );
     701             : 
     702             :     /* 1. FFT3 stage */
     703             :     /* real part */
     704    54109742 :     rs1 = CL_add( y[5], y[10] );
     705    54109742 :     rs2 = CL_scale_t( CL_sub( y[5], y[10] ), C31 );
     706    54109742 :     z[0] = CL_add( y[0], rs1 );
     707    54109742 :     rs1 = CL_sub( y[0], CL_shr( rs1, 1 ) );
     708             : 
     709    54109742 :     z[10] = CL_mac_j( rs1, rs2 );
     710    54109742 :     z[5] = CL_msu_j( rs1, rs2 );
     711             : 
     712             :     /* 2. FFT3 stage */
     713    54109742 :     rs1 = CL_add( y[6], y[11] );
     714    54109742 :     rs2 = CL_scale_t( CL_sub( y[6], y[11] ), C31 );
     715    54109742 :     z[6] = CL_add( y[1], rs1 );
     716    54109742 :     rs1 = CL_sub( y[1], CL_shr( rs1, 1 ) );
     717             : 
     718    54109742 :     z[1] = CL_mac_j( rs1, rs2 );
     719    54109742 :     z[11] = CL_msu_j( rs1, rs2 );
     720             : 
     721             : 
     722             :     /* 3. FFT3 stage */
     723    54109742 :     rs1 = CL_add( y[7], y[12] );
     724    54109742 :     rs2 = CL_scale_t( CL_sub( y[7], y[12] ), C31 );
     725    54109742 :     z[12] = CL_add( y[2], rs1 );
     726    54109742 :     rs1 = CL_sub( y[2], CL_shr( rs1, 1 ) );
     727             : 
     728    54109742 :     z[7] = CL_mac_j( rs1, rs2 );
     729    54109742 :     z[2] = CL_msu_j( rs1, rs2 );
     730             : 
     731             : 
     732             :     /* 4. FFT3 stage */
     733    54109742 :     rs1 = CL_add( y[8], y[13] );
     734    54109742 :     rs2 = CL_scale_t( CL_sub( y[8], y[13] ), C31 );
     735    54109742 :     z[3] = CL_add( y[3], rs1 );
     736    54109742 :     rs1 = CL_sub( y[3], CL_shr( rs1, 1 ) );
     737             : 
     738    54109742 :     z[13] = CL_mac_j( rs1, rs2 );
     739    54109742 :     z[8] = CL_msu_j( rs1, rs2 );
     740             : 
     741             : 
     742             :     /* 5. FFT3 stage */
     743    54109742 :     rs1 = CL_add( y[9], y[14] );
     744    54109742 :     rs2 = CL_scale_t( CL_sub( y[9], y[14] ), C31 );
     745    54109742 :     z[9] = CL_add( y[4], rs1 );
     746    54109742 :     rs1 = CL_sub( y[4], CL_shr( rs1, 1 ) );
     747             : 
     748    54109742 :     z[4] = CL_mac_j( rs1, rs2 );
     749    54109742 :     z[14] = CL_msu_j( rs1, rs2 );
     750             : 
     751             :     /* 2. FFT15 stage */
     752    54109742 :     x[0] = CL_shr( inp[15], SCALEFACTOR30_1 );
     753    54109742 :     x[1] = CL_shr( inp[3], SCALEFACTOR30_1 );
     754    54109742 :     x[2] = CL_shr( inp[21], SCALEFACTOR30_1 );
     755    54109742 :     x[3] = CL_shr( inp[9], SCALEFACTOR30_1 );
     756    54109742 :     x[4] = CL_shr( inp[27], SCALEFACTOR30_1 );
     757             : 
     758    54109742 :     x[5] = CL_shr( inp[5], SCALEFACTOR30_1 );
     759    54109742 :     x[6] = CL_shr( inp[23], SCALEFACTOR30_1 );
     760    54109742 :     x[7] = CL_shr( inp[11], SCALEFACTOR30_1 );
     761    54109742 :     x[8] = CL_shr( inp[29], SCALEFACTOR30_1 );
     762    54109742 :     x[9] = CL_shr( inp[17], SCALEFACTOR30_1 );
     763             : 
     764    54109742 :     x[10] = CL_shr( inp[25], SCALEFACTOR30_1 );
     765    54109742 :     x[11] = CL_shr( inp[13], SCALEFACTOR30_1 );
     766    54109742 :     x[12] = CL_shr( inp[1], SCALEFACTOR30_1 );
     767    54109742 :     x[13] = CL_shr( inp[19], SCALEFACTOR30_1 );
     768    54109742 :     x[14] = CL_shr( inp[7], SCALEFACTOR30_1 );
     769             : 
     770             :     /* 1. FFT5 stage */
     771    54109742 :     rs1 = CL_add( x[1], x[4] );
     772    54109742 :     rs4 = CL_sub( x[1], x[4] );
     773    54109742 :     rs3 = CL_add( x[2], x[3] );
     774    54109742 :     rs2 = CL_sub( x[2], x[3] );
     775    54109742 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     776    54109742 :     rs1 = CL_add( rs1, rs3 );
     777    54109742 :     y[0] = CL_add( x[0], rs1 );
     778    54109742 :     rs1 = CL_add( y[0], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     779    54109742 :     rs3 = CL_sub( rs1, t );
     780    54109742 :     rs1 = CL_add( rs1, t );
     781    54109742 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     782    54109742 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     783    54109742 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     784             : 
     785             :     /* combination */
     786    54109742 :     y[1] = CL_msu_j( rs1, rs2 );
     787    54109742 :     y[4] = CL_mac_j( rs1, rs2 );
     788    54109742 :     y[2] = CL_mac_j( rs3, rs4 );
     789    54109742 :     y[3] = CL_msu_j( rs3, rs4 );
     790             : 
     791             : 
     792             :     /* 2. FFT5 stage */
     793    54109742 :     rs1 = CL_add( x[6], x[9] );
     794    54109742 :     rs4 = CL_sub( x[6], x[9] );
     795    54109742 :     rs3 = CL_add( x[7], x[8] );
     796    54109742 :     rs2 = CL_sub( x[7], x[8] );
     797    54109742 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     798    54109742 :     rs1 = CL_add( rs1, rs3 );
     799    54109742 :     y[5] = CL_add( x[5], rs1 );
     800    54109742 :     rs1 = CL_add( y[5], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     801    54109742 :     rs3 = CL_sub( rs1, t );
     802    54109742 :     rs1 = CL_add( rs1, t );
     803    54109742 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     804    54109742 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     805    54109742 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     806             : 
     807             :     /* combination */
     808    54109742 :     y[6] = CL_msu_j( rs1, rs2 );
     809    54109742 :     y[9] = CL_mac_j( rs1, rs2 );
     810    54109742 :     y[7] = CL_mac_j( rs3, rs4 );
     811    54109742 :     y[8] = CL_msu_j( rs3, rs4 );
     812             : 
     813             : 
     814             :     /* 3. FFT5 stage */
     815    54109742 :     rs1 = CL_add( x[11], x[14] );
     816    54109742 :     rs4 = CL_sub( x[11], x[14] );
     817    54109742 :     rs3 = CL_add( x[12], x[13] );
     818    54109742 :     rs2 = CL_sub( x[12], x[13] );
     819    54109742 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     820    54109742 :     rs1 = CL_add( rs1, rs3 );
     821    54109742 :     y[10] = CL_add( x[10], rs1 );
     822    54109742 :     rs1 = CL_add( y[10], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     823    54109742 :     rs3 = CL_sub( rs1, t );
     824    54109742 :     rs1 = CL_add( rs1, t );
     825    54109742 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     826    54109742 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     827    54109742 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     828             : 
     829             :     /* combination */
     830    54109742 :     y[11] = CL_msu_j( rs1, rs2 );
     831    54109742 :     y[14] = CL_mac_j( rs1, rs2 );
     832    54109742 :     y[12] = CL_mac_j( rs3, rs4 );
     833    54109742 :     y[13] = CL_msu_j( rs3, rs4 );
     834             : 
     835             :     /* 1. FFT3 stage */
     836             :     /* real part */
     837    54109742 :     rs1 = CL_add( y[5], y[10] );
     838    54109742 :     rs2 = CL_scale_t( CL_sub( y[5], y[10] ), C31 );
     839    54109742 :     z[15] = CL_add( y[0], rs1 );
     840    54109742 :     rs1 = CL_sub( y[0], CL_shr( rs1, 1 ) );
     841             : 
     842    54109742 :     z[25] = CL_mac_j( rs1, rs2 );
     843    54109742 :     z[20] = CL_msu_j( rs1, rs2 );
     844             : 
     845             :     /* 2. FFT3 stage */
     846    54109742 :     rs1 = CL_add( y[6], y[11] );
     847    54109742 :     rs2 = CL_scale_t( CL_sub( y[6], y[11] ), C31 );
     848    54109742 :     z[21] = CL_add( y[1], rs1 );
     849    54109742 :     rs1 = CL_sub( y[1], CL_shr( rs1, 1 ) );
     850             : 
     851    54109742 :     z[16] = CL_mac_j( rs1, rs2 );
     852    54109742 :     z[26] = CL_msu_j( rs1, rs2 );
     853             : 
     854             : 
     855             :     /* 3. FFT3 stage */
     856    54109742 :     rs1 = CL_add( y[7], y[12] );
     857    54109742 :     rs2 = CL_scale_t( CL_sub( y[7], y[12] ), C31 );
     858    54109742 :     z[27] = CL_add( y[2], rs1 );
     859    54109742 :     rs1 = CL_sub( y[2], CL_shr( rs1, 1 ) );
     860             : 
     861    54109742 :     z[22] = CL_mac_j( rs1, rs2 );
     862    54109742 :     z[17] = CL_msu_j( rs1, rs2 );
     863             : 
     864             : 
     865             :     /* 4. FFT3 stage */
     866    54109742 :     rs1 = CL_add( y[8], y[13] );
     867    54109742 :     rs2 = CL_scale_t( CL_sub( y[8], y[13] ), C31 );
     868    54109742 :     z[18] = CL_add( y[3], rs1 );
     869    54109742 :     rs1 = CL_sub( y[3], CL_shr( rs1, 1 ) );
     870             : 
     871    54109742 :     z[28] = CL_mac_j( rs1, rs2 );
     872    54109742 :     z[23] = CL_msu_j( rs1, rs2 );
     873             : 
     874             : 
     875             :     /* 5. FFT3 stage */
     876    54109742 :     rs1 = CL_add( y[9], y[14] );
     877    54109742 :     rs2 = CL_scale_t( CL_sub( y[9], y[14] ), C31 );
     878    54109742 :     z[24] = CL_add( y[4], rs1 );
     879    54109742 :     rs1 = CL_sub( y[4], CL_shr( rs1, 1 ) );
     880             : 
     881    54109742 :     z[19] = CL_mac_j( rs1, rs2 );
     882    54109742 :     z[29] = CL_msu_j( rs1, rs2 );
     883             : 
     884             :     /* 1. FFT2 stage */
     885    54109742 :     rs1 = CL_shr( z[0], SCALEFACTOR30_2 );
     886    54109742 :     rs2 = CL_shr( z[15], SCALEFACTOR30_2 );
     887    54109742 :     *l = CL_add( rs1, rs2 );
     888    54109742 :     *h = CL_sub( rs1, rs2 );
     889    54109742 :     l += 1;
     890    54109742 :     h += 1;
     891             : 
     892             :     /* 2. FFT2 stage */
     893    54109742 :     rs1 = CL_shr( z[8], SCALEFACTOR30_2 );
     894    54109742 :     rs2 = CL_shr( z[23], SCALEFACTOR30_2 );
     895    54109742 :     *h = CL_add( rs1, rs2 );
     896    54109742 :     *l = CL_sub( rs1, rs2 );
     897    54109742 :     l += 1;
     898    54109742 :     h += 1;
     899             : 
     900             :     /* 3. FFT2 stage */
     901    54109742 :     rs1 = CL_shr( z[1], SCALEFACTOR30_2 );
     902    54109742 :     rs2 = CL_shr( z[16], SCALEFACTOR30_2 );
     903    54109742 :     *l = CL_add( rs1, rs2 );
     904    54109742 :     *h = CL_sub( rs1, rs2 );
     905    54109742 :     l += 1;
     906    54109742 :     h += 1;
     907             : 
     908             :     /* 4. FFT2 stage */
     909    54109742 :     rs1 = CL_shr( z[9], SCALEFACTOR30_2 );
     910    54109742 :     rs2 = CL_shr( z[24], SCALEFACTOR30_2 );
     911    54109742 :     *h = CL_add( rs1, rs2 );
     912    54109742 :     *l = CL_sub( rs1, rs2 );
     913    54109742 :     l += 1;
     914    54109742 :     h += 1;
     915             : 
     916             :     /* 5. FFT2 stage */
     917    54109742 :     rs1 = CL_shr( z[2], SCALEFACTOR30_2 );
     918    54109742 :     rs2 = CL_shr( z[17], SCALEFACTOR30_2 );
     919    54109742 :     *l = CL_add( rs1, rs2 );
     920    54109742 :     *h = CL_sub( rs1, rs2 );
     921    54109742 :     l += 1;
     922    54109742 :     h += 1;
     923             : 
     924             :     /* 6. FFT2 stage */
     925    54109742 :     rs1 = CL_shr( z[10], SCALEFACTOR30_2 );
     926    54109742 :     rs2 = CL_shr( z[25], SCALEFACTOR30_2 );
     927    54109742 :     *h = CL_add( rs1, rs2 );
     928    54109742 :     *l = CL_sub( rs1, rs2 );
     929    54109742 :     l += 1;
     930    54109742 :     h += 1;
     931             : 
     932             :     /* 7. FFT2 stage */
     933    54109742 :     rs1 = CL_shr( z[3], SCALEFACTOR30_2 );
     934    54109742 :     rs2 = CL_shr( z[18], SCALEFACTOR30_2 );
     935    54109742 :     *l = CL_add( rs1, rs2 );
     936    54109742 :     *h = CL_sub( rs1, rs2 );
     937    54109742 :     l += 1;
     938    54109742 :     h += 1;
     939             : 
     940             :     /* 8. FFT2 stage */
     941    54109742 :     rs1 = CL_shr( z[11], SCALEFACTOR30_2 );
     942    54109742 :     rs2 = CL_shr( z[26], SCALEFACTOR30_2 );
     943    54109742 :     *h = CL_add( rs1, rs2 );
     944    54109742 :     *l = CL_sub( rs1, rs2 );
     945    54109742 :     l += 1;
     946    54109742 :     h += 1;
     947             : 
     948             :     /* 9. FFT2 stage */
     949    54109742 :     rs1 = CL_shr( z[4], SCALEFACTOR30_2 );
     950    54109742 :     rs2 = CL_shr( z[19], SCALEFACTOR30_2 );
     951    54109742 :     *l = CL_add( rs1, rs2 );
     952    54109742 :     *h = CL_sub( rs1, rs2 );
     953    54109742 :     l += 1;
     954    54109742 :     h += 1;
     955             : 
     956             :     /* 10. FFT2 stage */
     957    54109742 :     rs1 = CL_shr( z[12], SCALEFACTOR30_2 );
     958    54109742 :     rs2 = CL_shr( z[27], SCALEFACTOR30_2 );
     959    54109742 :     *h = CL_add( rs1, rs2 );
     960    54109742 :     *l = CL_sub( rs1, rs2 );
     961    54109742 :     l += 1;
     962    54109742 :     h += 1;
     963             : 
     964             :     /* 11. FFT2 stage */
     965    54109742 :     rs1 = CL_shr( z[5], SCALEFACTOR30_2 );
     966    54109742 :     rs2 = CL_shr( z[20], SCALEFACTOR30_2 );
     967    54109742 :     *l = CL_add( rs1, rs2 );
     968    54109742 :     *h = CL_sub( rs1, rs2 );
     969    54109742 :     l += 1;
     970    54109742 :     h += 1;
     971             : 
     972             :     /* 12. FFT2 stage */
     973    54109742 :     rs1 = CL_shr( z[13], SCALEFACTOR30_2 );
     974    54109742 :     rs2 = CL_shr( z[28], SCALEFACTOR30_2 );
     975    54109742 :     *h = CL_add( rs1, rs2 );
     976    54109742 :     *l = CL_sub( rs1, rs2 );
     977    54109742 :     l += 1;
     978    54109742 :     h += 1;
     979             : 
     980             :     /* 13. FFT2 stage */
     981    54109742 :     rs1 = CL_shr( z[6], SCALEFACTOR30_2 );
     982    54109742 :     rs2 = CL_shr( z[21], SCALEFACTOR30_2 );
     983    54109742 :     *l = CL_add( rs1, rs2 );
     984    54109742 :     *h = CL_sub( rs1, rs2 );
     985    54109742 :     l += 1;
     986    54109742 :     h += 1;
     987             : 
     988             :     /* 14. FFT2 stage */
     989    54109742 :     rs1 = CL_shr( z[14], SCALEFACTOR30_2 );
     990    54109742 :     rs2 = CL_shr( z[29], SCALEFACTOR30_2 );
     991    54109742 :     *h = CL_add( rs1, rs2 );
     992    54109742 :     *l = CL_sub( rs1, rs2 );
     993    54109742 :     l += 1;
     994    54109742 :     h += 1;
     995             : 
     996             :     /* 15. FFT2 stage */
     997    54109742 :     rs1 = CL_shr( z[7], SCALEFACTOR30_2 );
     998    54109742 :     rs2 = CL_shr( z[22], SCALEFACTOR30_2 );
     999    54109742 :     *l = CL_add( rs1, rs2 );
    1000    54109742 :     *h = CL_sub( rs1, rs2 );
    1001    54109742 :     l += 1;
    1002    54109742 :     h += 1;
    1003             : 
    1004             : #ifdef WMOPS
    1005             :     multiCounter[currCounter].CL_move += 30;
    1006             : #endif
    1007    54109742 : }
    1008             : 
    1009             : /*-------------------------------------------------------------------*
    1010             :  * fft_cldfb_fx()
    1011             :  *
    1012             :  * Interface functions FFT subroutines
    1013             :  *--------------------------------------------------------------------*/
    1014    89804010 : void fft_cldfb_fx(
    1015             :     Word32 *data,     /* i/o: input/output vector Qx*/
    1016             :     const Word16 size /* size of fft operation */
    1017             : )
    1018             : {
    1019             : 
    1020    89804010 :     SWITCH( size )
    1021             :     {
    1022           0 :         case 5:
    1023           0 :             fft5_with_cmplx_data( (cmplx *) data );
    1024           0 :             BREAK;
    1025     4964772 :         case 8:
    1026     4964772 :             fft8_with_cmplx_data( (cmplx *) data );
    1027     4964772 :             BREAK;
    1028    13411028 :         case 10:
    1029    13411028 :             fft10_with_cmplx_data( (cmplx *) data );
    1030    13411028 :             BREAK;
    1031        3564 :         case 16:
    1032        3564 :             fft16_with_cmplx_data( (cmplx *) data, 0 );
    1033        3564 :             BREAK;
    1034    17314904 :         case 20:
    1035    17314904 :             fft20_with_cmplx_data( (cmplx *) data );
    1036    17314904 :             BREAK;
    1037    54109742 :         case 30:
    1038    54109742 :             fft30_with_cmplx_data( (cmplx *) data );
    1039    54109742 :             BREAK;
    1040             : 
    1041           0 :         default:
    1042           0 :             assert( 0 );
    1043             :             BREAK;
    1044             :     }
    1045             : 
    1046    89804010 :     return;
    1047             : }

Generated by: LCOV version 1.14