LCOV - code coverage report
Current view: top level - lib_com - fft_cldfb_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main enc/dec/rend @ 3b2f07138c61dcf997bbf4165d0882f794b2995f Lines: 553 582 95.0 %
Date: 2025-05-03 01:55:50 Functions: 5 6 83.3 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <stdint.h>
      38             : #include "options.h"
      39             : #include <assert.h>
      40             : #include "prot_fx.h"
      41             : #include "ivas_cnst.h"
      42             : #include "wmc_auto.h"
      43             : #include "basop_util.h"
      44             : #include "complex_basop.h"
      45             : 
      46             : #define Mpy_32_xx Mpy_32_16_1
      47             : 
      48             : #define FFTC( x ) WORD322WORD16( (Word32) x )
      49             : 
      50             : #define C31 ( FFTC( 0x91261468 ) ) /* FL2WORD32( -0.86602540) -sqrt(3)/2 */
      51             : 
      52             : #define C51 ( FFTC( 0x79bc3854 ) ) /* FL2WORD32( 0.95105652)   */
      53             : #define C52 ( FFTC( 0x9d839db0 ) ) /* FL2WORD32(-1.53884180/2) */
      54             : #define C53 ( FFTC( 0xd18053ce ) ) /* FL2WORD32(-0.36327126)   */
      55             : #define C54 ( FFTC( 0x478dde64 ) ) /* FL2WORD32( 0.55901699)   */
      56             : #define C55 ( FFTC( 0xb0000001 ) ) /* FL2WORD32(-1.25/2)       */
      57             : 
      58             : #define C81 ( FFTC( 0x5a82799a ) ) /* FL2WORD32( 7.071067811865475e-1) */
      59             : #define C82 ( FFTC( 0xa57d8666 ) ) /* FL2WORD32(-7.071067811865475e-1) */
      60             : 
      61             : #define C161 ( FFTC( 0x5a82799a ) ) /* FL2WORD32( 7.071067811865475e-1)  INV_SQRT2    */
      62             : #define C162 ( FFTC( 0xa57d8666 ) ) /* FL2WORD32(-7.071067811865475e-1) -INV_SQRT2    */
      63             : 
      64             : #define C163 ( FFTC( 0x7641af3d ) ) /* FL2WORD32( 9.238795325112867e-1)  COS_PI_DIV8  */
      65             : #define C164 ( FFTC( 0x89be50c3 ) ) /* FL2WORD32(-9.238795325112867e-1) -COS_PI_DIV8  */
      66             : 
      67             : #define C165 ( FFTC( 0x30fbc54d ) ) /* FL2WORD32( 3.826834323650898e-1)  COS_3PI_DIV8 */
      68             : #define C166 ( FFTC( 0xcf043ab3 ) ) /* FL2WORD32(-3.826834323650898e-1) -COS_3PI_DIV8 */
      69             : 
      70             : 
      71             : #define cplxMpy4_8_0( re, im, a, b, c, d )                          \
      72             :     re = L_shr( L_sub( Mpy_32_xx( a, c ), Mpy_32_xx( b, d ) ), 1 ); \
      73             :     im = L_shr( L_add( Mpy_32_xx( a, d ), Mpy_32_xx( b, c ) ), 1 );
      74             : 
      75             : #define cplxMpy4_8_1( re, im, a, b ) \
      76             :     re = L_shr( a, 1 );              \
      77             :     im = L_shr( b, 1 );
      78             : 
      79             : void fft16_with_cmplx_data( cmplx *pInp, Word16 bsacle );
      80             : 
      81             : 
      82             : #undef SCALEFACTOR5
      83             : #undef SCALEFACTOR8
      84             : #undef SCALEFACTOR10
      85             : #undef SCALEFACTOR16
      86             : #undef SCALEFACTOR20
      87             : #undef SCALEFACTOR30
      88             : #undef SCALEFACTOR30_1
      89             : #undef SCALEFACTOR30_2
      90             : 
      91             : #define SCALEFACTOR5    ( 0 )
      92             : #define SCALEFACTOR8    ( 0 )
      93             : #define SCALEFACTOR10   ( 0 )
      94             : #define SCALEFACTOR16   ( 0 )
      95             : #define SCALEFACTOR20   ( 0 )
      96             : #define SCALEFACTOR30   ( 0 )
      97             : #define SCALEFACTOR30_1 ( 0 )
      98             : #define SCALEFACTOR30_2 ( 0 )
      99             : 
     100             : cmplx CL_scale_t( cmplx x, Word16 y );
     101             : cmplx CL_dscale_t( cmplx x, Word16 y1, Word16 y2 );
     102             : 
     103             : /**
     104             :  * \brief    Function performs a complex 8-point FFT
     105             :  *           The FFT is performed inplace. The result of the FFT
     106             :  *           is scaled by SCALEFACTOR8 bits.
     107             :  *
     108             :  *           WOPS with 32x16 bit multiplications: 108 cycles
     109             :  *
     110             :  * \param    [i/o] re    real input / output
     111             :  * \param    [i/o] im    imag input / output
     112             :  * \param    [i  ] s     stride real and imag input / output
     113             :  *
     114             :  * \return   void
     115             :  */
     116     4917700 : static void fft8_with_cmplx_data( cmplx *inp /*Qx*/ )
     117             : {
     118             :     cmplx x0, x1, x2, x3, x4, x5, x6, x7;
     119             :     cmplx s0, s1, s2, s3, s4, s5, s6, s7;
     120             :     cmplx t0, t1, t2, t3, t4, t5, t6, t7;
     121             : 
     122             :     /* Pre-additions */
     123     4917700 :     x0 = CL_shr( inp[0], SCALEFACTOR8 );
     124     4917700 :     x1 = CL_shr( inp[1], SCALEFACTOR8 );
     125     4917700 :     x2 = CL_shr( inp[2], SCALEFACTOR8 );
     126     4917700 :     x3 = CL_shr( inp[3], SCALEFACTOR8 );
     127     4917700 :     x4 = CL_shr( inp[4], SCALEFACTOR8 );
     128     4917700 :     x5 = CL_shr( inp[5], SCALEFACTOR8 );
     129     4917700 :     x6 = CL_shr( inp[6], SCALEFACTOR8 );
     130     4917700 :     x7 = CL_shr( inp[7], SCALEFACTOR8 );
     131             : 
     132             :     /* loops are unrolled */
     133             :     {
     134     4917700 :         t0 = CL_add( x0, x4 );
     135     4917700 :         t1 = CL_sub( x0, x4 );
     136             : 
     137     4917700 :         t2 = CL_add( x1, x5 );
     138     4917700 :         t3 = CL_sub( x1, x5 );
     139             : 
     140     4917700 :         t4 = CL_add( x2, x6 );
     141     4917700 :         t5 = CL_sub( x2, x6 );
     142             : 
     143     4917700 :         t6 = CL_add( x3, x7 );
     144     4917700 :         t7 = CL_sub( x3, x7 );
     145             :     }
     146             : 
     147             :     /* Pre-additions and core multiplications */
     148             : 
     149     4917700 :     s0 = CL_add( t0, t4 );
     150     4917700 :     s2 = CL_sub( t0, t4 );
     151             : 
     152     4917700 :     s4 = CL_mac_j( t1, t5 );
     153     4917700 :     s5 = CL_msu_j( t1, t5 );
     154             : 
     155     4917700 :     s1 = CL_add( t2, t6 );
     156     4917700 :     s3 = CL_sub( t2, t6 );
     157     4917700 :     s3 = CL_mul_j( s3 );
     158             : 
     159     4917700 :     t0 = CL_add( t3, t7 );
     160     4917700 :     t1 = CL_sub( t3, t7 );
     161             : 
     162     4917700 :     s6 = CL_scale_t( CL_msu_j( t1, t0 ), C81 );
     163     4917700 :     s7 = CL_dscale_t( CL_swap_real_imag( CL_msu_j( t0, t1 ) ), C81, C82 );
     164             : 
     165             :     /* Post-additions */
     166             : 
     167     4917700 :     inp[0] = CL_add( s0, s1 );
     168     4917700 :     inp[4] = CL_sub( s0, s1 );
     169             : 
     170     4917700 :     inp[2] = CL_sub( s2, s3 );
     171     4917700 :     inp[6] = CL_add( s2, s3 );
     172             : 
     173     4917700 :     inp[3] = CL_add( s4, s7 );
     174     4917700 :     inp[7] = CL_sub( s4, s7 );
     175             : 
     176     4917700 :     inp[1] = CL_add( s5, s6 );
     177     4917700 :     inp[5] = CL_sub( s5, s6 );
     178             : #ifdef WMOPS
     179             :     multiCounter[currCounter].CL_move += 8;
     180             : #endif
     181     4917700 : }
     182             : 
     183             : /**
     184             :  * \brief    Function performs a complex 5-point FFT
     185             :  *           The FFT is performed inplace. The result of the FFT
     186             :  *           is scaled by SCALEFACTOR5 bits.
     187             :  *
     188             :  *           WOPS with 32x16 bit multiplications:  88 cycles
     189             :  *
     190             :  * \param    [i/o] re    real input / output
     191             :  * \param    [i/o] im    imag input / output
     192             :  * \param    [i  ] s     stride real and imag input / output
     193             :  *
     194             :  * \return   void
     195             :  */
     196           0 : static void fft5_with_cmplx_data( cmplx *inp /*Qx*/ )
     197             : {
     198             :     cmplx x0, x1, x2, x3, x4;
     199             :     cmplx y1, y2, y3, y4;
     200             :     cmplx t;
     201             : 
     202           0 :     x0 = CL_shr( inp[0], SCALEFACTOR5 );
     203           0 :     x1 = CL_shr( inp[1], SCALEFACTOR5 );
     204           0 :     x2 = CL_shr( inp[2], SCALEFACTOR5 );
     205           0 :     x3 = CL_shr( inp[3], SCALEFACTOR5 );
     206           0 :     x4 = CL_shr( inp[4], SCALEFACTOR5 );
     207             : 
     208           0 :     y1 = CL_add( x1, x4 );
     209           0 :     y4 = CL_sub( x1, x4 );
     210           0 :     y3 = CL_add( x2, x3 );
     211           0 :     y2 = CL_sub( x2, x3 );
     212           0 :     t = CL_scale_t( CL_sub( y1, y3 ), C54 );
     213           0 :     y1 = CL_add( y1, y3 );
     214           0 :     inp[0] = CL_add( x0, y1 );
     215             : 
     216             :     /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
     217             :     the values as fracts */
     218           0 :     y1 = CL_add( inp[0], ( CL_shl( CL_scale_t( y1, C55 ), 1 ) ) );
     219           0 :     y3 = CL_sub( y1, t );
     220           0 :     y1 = CL_add( y1, t );
     221             : 
     222           0 :     t = CL_scale_t( CL_add( y4, y2 ), C51 );
     223             :     /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
     224             :     the values as fracts */
     225           0 :     y4 = CL_add( t, CL_shl( CL_scale_t( y4, C52 ), 1 ) );
     226           0 :     y2 = CL_add( t, CL_scale_t( y2, C53 ) );
     227             : 
     228             : 
     229             :     /* combination */
     230           0 :     inp[1] = CL_msu_j( y1, y2 );
     231           0 :     inp[4] = CL_mac_j( y1, y2 );
     232             : 
     233           0 :     inp[2] = CL_mac_j( y3, y4 );
     234           0 :     inp[3] = CL_msu_j( y3, y4 );
     235             : 
     236             : #ifdef WMOPS
     237             :     multiCounter[currCounter].CL_move += 5;
     238             : #endif
     239           0 : }
     240             : 
     241             : /**
     242             :  * \brief    Function performs a complex 10-point FFT
     243             :  *           The FFT is performed inplace. The result of the FFT
     244             :  *           is scaled by SCALEFACTOR10 bits.
     245             :  *
     246             :  *           WOPS with 32x16 bit multiplications:  196 cycles
     247             :  *
     248             :  * \param    [i/o] re    real input / output
     249             :  * \param    [i/o] im    imag input / output
     250             :  * \param    [i  ] s     stride real and imag input / output
     251             :  *
     252             :  * \return   void
     253             :  */
     254    13174712 : static void fft10_with_cmplx_data( cmplx *inp_data /*Qx*/ )
     255             : {
     256             :     cmplx r1, r2, r3, r4;
     257             :     cmplx x0, x1, x2, x3, x4, t;
     258             :     cmplx y[10];
     259             : 
     260             :     /* FOR i=0 */
     261             :     {
     262    13174712 :         x0 = CL_shr( inp_data[0], SCALEFACTOR10 );
     263    13174712 :         x1 = CL_shr( inp_data[2], SCALEFACTOR10 );
     264    13174712 :         x2 = CL_shr( inp_data[4], SCALEFACTOR10 );
     265    13174712 :         x3 = CL_shr( inp_data[6], SCALEFACTOR10 );
     266    13174712 :         x4 = CL_shr( inp_data[8], SCALEFACTOR10 );
     267             : 
     268    13174712 :         r1 = CL_add( x3, x2 );
     269    13174712 :         r4 = CL_sub( x3, x2 );
     270    13174712 :         r3 = CL_add( x1, x4 );
     271    13174712 :         r2 = CL_sub( x1, x4 );
     272    13174712 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     273    13174712 :         r1 = CL_add( r1, r3 );
     274    13174712 :         y[0] = CL_add( x0, r1 );
     275    13174712 :         r1 = CL_add( y[0], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     276    13174712 :         r3 = CL_sub( r1, t );
     277    13174712 :         r1 = CL_add( r1, t );
     278    13174712 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     279    13174712 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     280    13174712 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     281             : 
     282             : 
     283    13174712 :         y[2] = CL_msu_j( r1, r2 );
     284    13174712 :         y[8] = CL_mac_j( r1, r2 );
     285    13174712 :         y[4] = CL_mac_j( r3, r4 );
     286    13174712 :         y[6] = CL_msu_j( r3, r4 );
     287             :     }
     288             :     /* FOR i=1 */
     289             :     {
     290    13174712 :         x0 = CL_shr( inp_data[5], SCALEFACTOR10 );
     291    13174712 :         x1 = CL_shr( inp_data[1], SCALEFACTOR10 );
     292    13174712 :         x2 = CL_shr( inp_data[3], SCALEFACTOR10 );
     293    13174712 :         x3 = CL_shr( inp_data[7], SCALEFACTOR10 );
     294    13174712 :         x4 = CL_shr( inp_data[9], SCALEFACTOR10 );
     295             : 
     296    13174712 :         r1 = CL_add( x1, x4 );
     297    13174712 :         r4 = CL_sub( x1, x4 );
     298    13174712 :         r3 = CL_add( x3, x2 );
     299    13174712 :         r2 = CL_sub( x3, x2 );
     300    13174712 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     301    13174712 :         r1 = CL_add( r1, r3 );
     302    13174712 :         y[1] = CL_add( x0, r1 );
     303    13174712 :         r1 = CL_add( y[1], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     304    13174712 :         r3 = CL_sub( r1, t );
     305    13174712 :         r1 = CL_add( r1, t );
     306    13174712 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     307    13174712 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     308    13174712 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     309             : 
     310             : 
     311    13174712 :         y[3] = CL_msu_j( r1, r2 );
     312    13174712 :         y[9] = CL_mac_j( r1, r2 );
     313    13174712 :         y[5] = CL_mac_j( r3, r4 );
     314    13174712 :         y[7] = CL_msu_j( r3, r4 );
     315             :     }
     316             : 
     317             :     /* FOR i=0 */
     318             :     {
     319    13174712 :         inp_data[0] = CL_add( y[0], y[1] );
     320    13174712 :         inp_data[5] = CL_sub( y[0], y[1] );
     321             :     }
     322             :     /* FOR i=2 */
     323             :     {
     324    13174712 :         inp_data[2] = CL_add( y[2], y[3] );
     325    13174712 :         inp_data[7] = CL_sub( y[2], y[3] );
     326             :     }
     327             :     /* FOR i=4 */
     328             :     {
     329    13174712 :         inp_data[4] = CL_add( y[4], y[5] );
     330    13174712 :         inp_data[9] = CL_sub( y[4], y[5] );
     331             :     }
     332             :     /* FOR i=6 */
     333             :     {
     334    13174712 :         inp_data[6] = CL_add( y[6], y[7] );
     335    13174712 :         inp_data[1] = CL_sub( y[6], y[7] );
     336             :     }
     337             :     /* FOR i=8 */
     338             :     {
     339    13174712 :         inp_data[8] = CL_add( y[8], y[9] );
     340    13174712 :         inp_data[3] = CL_sub( y[8], y[9] );
     341             :     }
     342             : 
     343             : #ifdef WMOPS
     344             :     multiCounter[currCounter].CL_move += 10;
     345             : #endif
     346    13174712 : }
     347             : 
     348             : /**
     349             :  * \brief    Function performs a complex 20-point FFT
     350             :  *           The FFT is performed inplace. The result of the FFT
     351             :  *           is scaled by SCALEFACTOR20 bits.
     352             :  *
     353             :  *           WOPS with 32x16 bit multiplications:  432 cycles
     354             :  *
     355             :  * \param    [i/o] re    real input / output
     356             :  * \param    [i/o] im    imag input / output
     357             :  * \param    [i  ] s     stride real and imag input / output
     358             :  *
     359             :  * \return   void
     360             :  */
     361    16880132 : static void fft20_with_cmplx_data( cmplx *inp_data /*Qx*/ )
     362             : {
     363             :     cmplx r1, r2, r3, r4;
     364             :     cmplx x0, x1, x2, x3, x4;
     365             :     cmplx t, t0, t1, t2, t3;
     366             :     cmplx y[20];
     367             :     cmplx *y0, *y1, *y2, *y3, *y4;
     368             : 
     369    16880132 :     y0 = y;
     370    16880132 :     y1 = &y[4];
     371    16880132 :     y2 = &y[16];
     372    16880132 :     y3 = &y[8];
     373    16880132 :     y4 = &y[12];
     374             : 
     375             :     {
     376    16880132 :         x0 = CL_shr( inp_data[0], SCALEFACTOR20 );
     377    16880132 :         x1 = CL_shr( inp_data[16], SCALEFACTOR20 );
     378    16880132 :         x2 = CL_shr( inp_data[12], SCALEFACTOR20 );
     379    16880132 :         x3 = CL_shr( inp_data[8], SCALEFACTOR20 );
     380    16880132 :         x4 = CL_shr( inp_data[4], SCALEFACTOR20 );
     381             : 
     382    16880132 :         r4 = CL_sub( x1, x4 );
     383    16880132 :         r2 = CL_sub( x2, x3 );
     384    16880132 :         r1 = CL_add( x1, x4 );
     385    16880132 :         r3 = CL_add( x2, x3 );
     386    16880132 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     387    16880132 :         r1 = CL_add( r1, r3 );
     388    16880132 :         y0[0] = CL_add( x0, r1 );
     389    16880132 :         r1 = CL_add( y0[0], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     390    16880132 :         r3 = CL_sub( r1, t );
     391    16880132 :         r1 = CL_add( r1, t );
     392    16880132 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     393    16880132 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     394    16880132 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     395             : 
     396             : 
     397    16880132 :         y1[0] = CL_msu_j( r1, r2 );
     398    16880132 :         y2[0] = CL_mac_j( r1, r2 );
     399    16880132 :         y3[0] = CL_mac_j( r3, r4 );
     400    16880132 :         y4[0] = CL_msu_j( r3, r4 );
     401             :     }
     402             :     {
     403    16880132 :         x0 = CL_shr( inp_data[5], SCALEFACTOR20 );
     404    16880132 :         x1 = CL_shr( inp_data[1], SCALEFACTOR20 );
     405    16880132 :         x2 = CL_shr( inp_data[17], SCALEFACTOR20 );
     406    16880132 :         x3 = CL_shr( inp_data[13], SCALEFACTOR20 );
     407    16880132 :         x4 = CL_shr( inp_data[9], SCALEFACTOR20 );
     408             : 
     409    16880132 :         r4 = CL_sub( x1, x4 );
     410    16880132 :         r2 = CL_sub( x2, x3 );
     411    16880132 :         r1 = CL_add( x1, x4 );
     412    16880132 :         r3 = CL_add( x2, x3 );
     413    16880132 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     414    16880132 :         r1 = CL_add( r1, r3 );
     415    16880132 :         y0[1] = CL_add( x0, r1 );
     416    16880132 :         r1 = CL_add( y0[1], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     417    16880132 :         r3 = CL_sub( r1, t );
     418    16880132 :         r1 = CL_add( r1, t );
     419    16880132 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     420    16880132 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     421    16880132 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     422             : 
     423             : 
     424    16880132 :         y1[1] = CL_msu_j( r1, r2 );
     425    16880132 :         y2[1] = CL_mac_j( r1, r2 );
     426    16880132 :         y3[1] = CL_mac_j( r3, r4 );
     427    16880132 :         y4[1] = CL_msu_j( r3, r4 );
     428             :     }
     429             :     {
     430    16880132 :         x0 = CL_shr( inp_data[10], SCALEFACTOR20 );
     431    16880132 :         x1 = CL_shr( inp_data[6], SCALEFACTOR20 );
     432    16880132 :         x2 = CL_shr( inp_data[2], SCALEFACTOR20 );
     433    16880132 :         x3 = CL_shr( inp_data[18], SCALEFACTOR20 );
     434    16880132 :         x4 = CL_shr( inp_data[14], SCALEFACTOR20 );
     435             : 
     436    16880132 :         r4 = CL_sub( x1, x4 );
     437    16880132 :         r2 = CL_sub( x2, x3 );
     438    16880132 :         r1 = CL_add( x1, x4 );
     439    16880132 :         r3 = CL_add( x2, x3 );
     440    16880132 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     441    16880132 :         r1 = CL_add( r1, r3 );
     442    16880132 :         y0[2] = CL_add( x0, r1 );
     443    16880132 :         r1 = CL_add( y0[2], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     444    16880132 :         r3 = CL_sub( r1, t );
     445    16880132 :         r1 = CL_add( r1, t );
     446    16880132 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     447    16880132 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     448    16880132 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     449             : 
     450             : 
     451    16880132 :         y1[2] = CL_msu_j( r1, r2 );
     452    16880132 :         y2[2] = CL_mac_j( r1, r2 );
     453    16880132 :         y3[2] = CL_mac_j( r3, r4 );
     454    16880132 :         y4[2] = CL_msu_j( r3, r4 );
     455             :     }
     456             :     {
     457    16880132 :         x0 = CL_shr( inp_data[15], SCALEFACTOR20 );
     458    16880132 :         x1 = CL_shr( inp_data[11], SCALEFACTOR20 );
     459    16880132 :         x2 = CL_shr( inp_data[7], SCALEFACTOR20 );
     460    16880132 :         x3 = CL_shr( inp_data[3], SCALEFACTOR20 );
     461    16880132 :         x4 = CL_shr( inp_data[19], SCALEFACTOR20 );
     462             : 
     463    16880132 :         r4 = CL_sub( x1, x4 );
     464    16880132 :         r2 = CL_sub( x2, x3 );
     465    16880132 :         r1 = CL_add( x1, x4 );
     466    16880132 :         r3 = CL_add( x2, x3 );
     467    16880132 :         t = CL_scale_t( CL_sub( r1, r3 ), C54 );
     468    16880132 :         r1 = CL_add( r1, r3 );
     469    16880132 :         y0[3] = CL_add( x0, r1 );
     470    16880132 :         r1 = CL_add( y0[3], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
     471    16880132 :         r3 = CL_sub( r1, t );
     472    16880132 :         r1 = CL_add( r1, t );
     473    16880132 :         t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
     474    16880132 :         r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
     475    16880132 :         r2 = CL_add( t, CL_scale_t( r2, C53 ) );
     476             : 
     477             : 
     478    16880132 :         y1[3] = CL_msu_j( r1, r2 );
     479    16880132 :         y2[3] = CL_mac_j( r1, r2 );
     480    16880132 :         y3[3] = CL_mac_j( r3, r4 );
     481    16880132 :         y4[3] = CL_msu_j( r3, r4 );
     482             :     }
     483             : 
     484             :     {
     485    16880132 :         cmplx *ptr_y = y;
     486             :         {
     487             :             cmplx Cy0, Cy1, Cy2, Cy3;
     488             : 
     489    16880132 :             Cy0 = *ptr_y++;
     490    16880132 :             Cy1 = *ptr_y++;
     491    16880132 :             Cy2 = *ptr_y++;
     492    16880132 :             Cy3 = *ptr_y++;
     493             : 
     494             :             /*  Pre-additions */
     495    16880132 :             t0 = CL_add( Cy0, Cy2 );
     496    16880132 :             t1 = CL_sub( Cy0, Cy2 );
     497    16880132 :             t2 = CL_add( Cy1, Cy3 );
     498    16880132 :             t3 = CL_sub( Cy1, Cy3 );
     499             : 
     500             : 
     501    16880132 :             inp_data[0] = CL_add( t0, t2 );
     502    16880132 :             inp_data[5] = CL_msu_j( t1, t3 );
     503    16880132 :             inp_data[10] = CL_sub( t0, t2 );
     504    16880132 :             inp_data[15] = CL_mac_j( t1, t3 );
     505             :         }
     506             : 
     507             :         {
     508             :             cmplx Cy0, Cy1, Cy2, Cy3;
     509             : 
     510    16880132 :             Cy0 = *ptr_y++;
     511    16880132 :             Cy1 = *ptr_y++;
     512    16880132 :             Cy2 = *ptr_y++;
     513    16880132 :             Cy3 = *ptr_y++;
     514             : 
     515             :             /*  Pre-additions */
     516    16880132 :             t0 = CL_add( Cy0, Cy2 );
     517    16880132 :             t1 = CL_sub( Cy0, Cy2 );
     518    16880132 :             t2 = CL_add( Cy1, Cy3 );
     519    16880132 :             t3 = CL_sub( Cy1, Cy3 );
     520             : 
     521             : 
     522    16880132 :             inp_data[4] = CL_add( t0, t2 );
     523    16880132 :             inp_data[9] = CL_msu_j( t1, t3 );
     524    16880132 :             inp_data[14] = CL_sub( t0, t2 );
     525    16880132 :             inp_data[19] = CL_mac_j( t1, t3 );
     526             :         }
     527             : 
     528             :         {
     529             :             cmplx Cy0, Cy1, Cy2, Cy3;
     530             : 
     531    16880132 :             Cy0 = *ptr_y++;
     532    16880132 :             Cy1 = *ptr_y++;
     533    16880132 :             Cy2 = *ptr_y++;
     534    16880132 :             Cy3 = *ptr_y++;
     535             : 
     536             :             /*  Pre-additions */
     537    16880132 :             t0 = CL_add( Cy0, Cy2 );
     538    16880132 :             t1 = CL_sub( Cy0, Cy2 );
     539    16880132 :             t2 = CL_add( Cy1, Cy3 );
     540    16880132 :             t3 = CL_sub( Cy1, Cy3 );
     541             : 
     542             : 
     543    16880132 :             inp_data[8] = CL_add( t0, t2 );
     544    16880132 :             inp_data[13] = CL_msu_j( t1, t3 );
     545    16880132 :             inp_data[18] = CL_sub( t0, t2 );
     546    16880132 :             inp_data[3] = CL_mac_j( t1, t3 );
     547             :         }
     548             : 
     549             :         {
     550             :             cmplx Cy0, Cy1, Cy2, Cy3;
     551             : 
     552    16880132 :             Cy0 = *ptr_y++;
     553    16880132 :             Cy1 = *ptr_y++;
     554    16880132 :             Cy2 = *ptr_y++;
     555    16880132 :             Cy3 = *ptr_y++;
     556             : 
     557             :             /*  Pre-additions */
     558    16880132 :             t0 = CL_add( Cy0, Cy2 );
     559    16880132 :             t1 = CL_sub( Cy0, Cy2 );
     560    16880132 :             t2 = CL_add( Cy1, Cy3 );
     561    16880132 :             t3 = CL_sub( Cy1, Cy3 );
     562             : 
     563    16880132 :             inp_data[12] = CL_add( t0, t2 );
     564    16880132 :             inp_data[17] = CL_msu_j( t1, t3 );
     565    16880132 :             inp_data[2] = CL_sub( t0, t2 );
     566    16880132 :             inp_data[7] = CL_mac_j( t1, t3 );
     567             :         }
     568             : 
     569             :         {
     570             :             cmplx Cy0, Cy1, Cy2, Cy3;
     571             : 
     572    16880132 :             Cy0 = *ptr_y++;
     573    16880132 :             Cy1 = *ptr_y++;
     574    16880132 :             Cy2 = *ptr_y++;
     575    16880132 :             Cy3 = *ptr_y++;
     576             : 
     577             :             /*  Pre-additions */
     578    16880132 :             t0 = CL_add( Cy0, Cy2 );
     579    16880132 :             t1 = CL_sub( Cy0, Cy2 );
     580    16880132 :             t2 = CL_add( Cy1, Cy3 );
     581    16880132 :             t3 = CL_sub( Cy1, Cy3 );
     582             : 
     583             : 
     584    16880132 :             inp_data[16] = CL_add( t0, t2 );
     585    16880132 :             inp_data[1] = CL_msu_j( t1, t3 );
     586    16880132 :             inp_data[6] = CL_sub( t0, t2 );
     587    16880132 :             inp_data[11] = CL_mac_j( t1, t3 );
     588             :         }
     589             :     }
     590             : #ifdef WMOPS
     591             :     multiCounter[currCounter].CL_move += 20;
     592             : #endif
     593    16880132 : }
     594             : 
     595             : 
     596             : /**
     597             :  * \brief    Function performs a complex 30-point FFT
     598             :  *           The FFT is performed inplace. The result of the FFT
     599             :  *           is scaled by SCALEFACTOR30 bits.
     600             :  *
     601             :  *           WOPS with 32x16 bit multiplications:  828 cycles
     602             :  *
     603             :  * \param    [i/o] re    real input / output
     604             :  * \param    [i/o] im    imag input / output
     605             :  * \param    [i  ] s     stride real and imag input / output
     606             :  *
     607             :  * \return   void
     608             :  */
     609    51796414 : static void fft30_with_cmplx_data( cmplx *inp /*Qx*/ )
     610             : {
     611    51796414 :     cmplx *l = &inp[0];
     612    51796414 :     cmplx *h = &inp[15];
     613             : 
     614             :     cmplx z[30], y[15], x[15], rs1, rs2, rs3, rs4, t;
     615             : 
     616             :     /* 1. FFT15 stage */
     617             : 
     618    51796414 :     x[0] = CL_shr( inp[0], SCALEFACTOR30_1 );
     619    51796414 :     x[1] = CL_shr( inp[18], SCALEFACTOR30_1 );
     620    51796414 :     x[2] = CL_shr( inp[6], SCALEFACTOR30_1 );
     621    51796414 :     x[3] = CL_shr( inp[24], SCALEFACTOR30_1 );
     622    51796414 :     x[4] = CL_shr( inp[12], SCALEFACTOR30_1 );
     623             : 
     624    51796414 :     x[5] = CL_shr( inp[20], SCALEFACTOR30_1 );
     625    51796414 :     x[6] = CL_shr( inp[8], SCALEFACTOR30_1 );
     626    51796414 :     x[7] = CL_shr( inp[26], SCALEFACTOR30_1 );
     627    51796414 :     x[8] = CL_shr( inp[14], SCALEFACTOR30_1 );
     628    51796414 :     x[9] = CL_shr( inp[2], SCALEFACTOR30_1 );
     629             : 
     630    51796414 :     x[10] = CL_shr( inp[10], SCALEFACTOR30_1 );
     631    51796414 :     x[11] = CL_shr( inp[28], SCALEFACTOR30_1 );
     632    51796414 :     x[12] = CL_shr( inp[16], SCALEFACTOR30_1 );
     633    51796414 :     x[13] = CL_shr( inp[4], SCALEFACTOR30_1 );
     634    51796414 :     x[14] = CL_shr( inp[22], SCALEFACTOR30_1 );
     635             : 
     636             : 
     637             :     /* 1. FFT5 stage */
     638    51796414 :     rs1 = CL_add( x[1], x[4] );
     639    51796414 :     rs4 = CL_sub( x[1], x[4] );
     640    51796414 :     rs3 = CL_add( x[2], x[3] );
     641    51796414 :     rs2 = CL_sub( x[2], x[3] );
     642    51796414 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     643    51796414 :     rs1 = CL_add( rs1, rs3 );
     644    51796414 :     y[0] = CL_add( x[0], rs1 );
     645    51796414 :     rs1 = CL_add( y[0], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     646    51796414 :     rs3 = CL_sub( rs1, t );
     647    51796414 :     rs1 = CL_add( rs1, t );
     648    51796414 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     649    51796414 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     650    51796414 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     651             : 
     652             :     /* combination */
     653    51796414 :     y[1] = CL_msu_j( rs1, rs2 );
     654    51796414 :     y[4] = CL_mac_j( rs1, rs2 );
     655    51796414 :     y[2] = CL_mac_j( rs3, rs4 );
     656    51796414 :     y[3] = CL_msu_j( rs3, rs4 );
     657             : 
     658             : 
     659             :     /* 2. FFT5 stage */
     660    51796414 :     rs1 = CL_add( x[6], x[9] );
     661    51796414 :     rs4 = CL_sub( x[6], x[9] );
     662    51796414 :     rs3 = CL_add( x[7], x[8] );
     663    51796414 :     rs2 = CL_sub( x[7], x[8] );
     664    51796414 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     665    51796414 :     rs1 = CL_add( rs1, rs3 );
     666    51796414 :     y[5] = CL_add( x[5], rs1 );
     667    51796414 :     rs1 = CL_add( y[5], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     668    51796414 :     rs3 = CL_sub( rs1, t );
     669    51796414 :     rs1 = CL_add( rs1, t );
     670    51796414 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     671    51796414 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     672    51796414 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     673             : 
     674             :     /* combination */
     675    51796414 :     y[6] = CL_msu_j( rs1, rs2 );
     676    51796414 :     y[9] = CL_mac_j( rs1, rs2 );
     677    51796414 :     y[7] = CL_mac_j( rs3, rs4 );
     678    51796414 :     y[8] = CL_msu_j( rs3, rs4 );
     679             : 
     680             : 
     681             :     /* 3. FFT5 stage */
     682    51796414 :     rs1 = CL_add( x[11], x[14] );
     683    51796414 :     rs4 = CL_sub( x[11], x[14] );
     684    51796414 :     rs3 = CL_add( x[12], x[13] );
     685    51796414 :     rs2 = CL_sub( x[12], x[13] );
     686    51796414 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     687    51796414 :     rs1 = CL_add( rs1, rs3 );
     688    51796414 :     y[10] = CL_add( x[10], rs1 );
     689    51796414 :     rs1 = CL_add( y[10], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     690    51796414 :     rs3 = CL_sub( rs1, t );
     691    51796414 :     rs1 = CL_add( rs1, t );
     692    51796414 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     693    51796414 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     694    51796414 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     695             : 
     696             :     /* combination */
     697    51796414 :     y[11] = CL_msu_j( rs1, rs2 );
     698    51796414 :     y[14] = CL_mac_j( rs1, rs2 );
     699    51796414 :     y[12] = CL_mac_j( rs3, rs4 );
     700    51796414 :     y[13] = CL_msu_j( rs3, rs4 );
     701             : 
     702             :     /* 1. FFT3 stage */
     703             :     /* real part */
     704    51796414 :     rs1 = CL_add( y[5], y[10] );
     705    51796414 :     rs2 = CL_scale_t( CL_sub( y[5], y[10] ), C31 );
     706    51796414 :     z[0] = CL_add( y[0], rs1 );
     707    51796414 :     rs1 = CL_sub( y[0], CL_shr( rs1, 1 ) );
     708             : 
     709    51796414 :     z[10] = CL_mac_j( rs1, rs2 );
     710    51796414 :     z[5] = CL_msu_j( rs1, rs2 );
     711             : 
     712             :     /* 2. FFT3 stage */
     713    51796414 :     rs1 = CL_add( y[6], y[11] );
     714    51796414 :     rs2 = CL_scale_t( CL_sub( y[6], y[11] ), C31 );
     715    51796414 :     z[6] = CL_add( y[1], rs1 );
     716    51796414 :     rs1 = CL_sub( y[1], CL_shr( rs1, 1 ) );
     717             : 
     718    51796414 :     z[1] = CL_mac_j( rs1, rs2 );
     719    51796414 :     z[11] = CL_msu_j( rs1, rs2 );
     720             : 
     721             : 
     722             :     /* 3. FFT3 stage */
     723    51796414 :     rs1 = CL_add( y[7], y[12] );
     724    51796414 :     rs2 = CL_scale_t( CL_sub( y[7], y[12] ), C31 );
     725    51796414 :     z[12] = CL_add( y[2], rs1 );
     726    51796414 :     rs1 = CL_sub( y[2], CL_shr( rs1, 1 ) );
     727             : 
     728    51796414 :     z[7] = CL_mac_j( rs1, rs2 );
     729    51796414 :     z[2] = CL_msu_j( rs1, rs2 );
     730             : 
     731             : 
     732             :     /* 4. FFT3 stage */
     733    51796414 :     rs1 = CL_add( y[8], y[13] );
     734    51796414 :     rs2 = CL_scale_t( CL_sub( y[8], y[13] ), C31 );
     735    51796414 :     z[3] = CL_add( y[3], rs1 );
     736    51796414 :     rs1 = CL_sub( y[3], CL_shr( rs1, 1 ) );
     737             : 
     738    51796414 :     z[13] = CL_mac_j( rs1, rs2 );
     739    51796414 :     z[8] = CL_msu_j( rs1, rs2 );
     740             : 
     741             : 
     742             :     /* 5. FFT3 stage */
     743    51796414 :     rs1 = CL_add( y[9], y[14] );
     744    51796414 :     rs2 = CL_scale_t( CL_sub( y[9], y[14] ), C31 );
     745    51796414 :     z[9] = CL_add( y[4], rs1 );
     746    51796414 :     rs1 = CL_sub( y[4], CL_shr( rs1, 1 ) );
     747             : 
     748    51796414 :     z[4] = CL_mac_j( rs1, rs2 );
     749    51796414 :     z[14] = CL_msu_j( rs1, rs2 );
     750             : 
     751             :     /* 2. FFT15 stage */
     752    51796414 :     x[0] = CL_shr( inp[15], SCALEFACTOR30_1 );
     753    51796414 :     x[1] = CL_shr( inp[3], SCALEFACTOR30_1 );
     754    51796414 :     x[2] = CL_shr( inp[21], SCALEFACTOR30_1 );
     755    51796414 :     x[3] = CL_shr( inp[9], SCALEFACTOR30_1 );
     756    51796414 :     x[4] = CL_shr( inp[27], SCALEFACTOR30_1 );
     757             : 
     758    51796414 :     x[5] = CL_shr( inp[5], SCALEFACTOR30_1 );
     759    51796414 :     x[6] = CL_shr( inp[23], SCALEFACTOR30_1 );
     760    51796414 :     x[7] = CL_shr( inp[11], SCALEFACTOR30_1 );
     761    51796414 :     x[8] = CL_shr( inp[29], SCALEFACTOR30_1 );
     762    51796414 :     x[9] = CL_shr( inp[17], SCALEFACTOR30_1 );
     763             : 
     764    51796414 :     x[10] = CL_shr( inp[25], SCALEFACTOR30_1 );
     765    51796414 :     x[11] = CL_shr( inp[13], SCALEFACTOR30_1 );
     766    51796414 :     x[12] = CL_shr( inp[1], SCALEFACTOR30_1 );
     767    51796414 :     x[13] = CL_shr( inp[19], SCALEFACTOR30_1 );
     768    51796414 :     x[14] = CL_shr( inp[7], SCALEFACTOR30_1 );
     769             : 
     770             :     /* 1. FFT5 stage */
     771    51796414 :     rs1 = CL_add( x[1], x[4] );
     772    51796414 :     rs4 = CL_sub( x[1], x[4] );
     773    51796414 :     rs3 = CL_add( x[2], x[3] );
     774    51796414 :     rs2 = CL_sub( x[2], x[3] );
     775    51796414 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     776    51796414 :     rs1 = CL_add( rs1, rs3 );
     777    51796414 :     y[0] = CL_add( x[0], rs1 );
     778    51796414 :     rs1 = CL_add( y[0], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     779    51796414 :     rs3 = CL_sub( rs1, t );
     780    51796414 :     rs1 = CL_add( rs1, t );
     781    51796414 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     782    51796414 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     783    51796414 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     784             : 
     785             :     /* combination */
     786    51796414 :     y[1] = CL_msu_j( rs1, rs2 );
     787    51796414 :     y[4] = CL_mac_j( rs1, rs2 );
     788    51796414 :     y[2] = CL_mac_j( rs3, rs4 );
     789    51796414 :     y[3] = CL_msu_j( rs3, rs4 );
     790             : 
     791             : 
     792             :     /* 2. FFT5 stage */
     793    51796414 :     rs1 = CL_add( x[6], x[9] );
     794    51796414 :     rs4 = CL_sub( x[6], x[9] );
     795    51796414 :     rs3 = CL_add( x[7], x[8] );
     796    51796414 :     rs2 = CL_sub( x[7], x[8] );
     797    51796414 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     798    51796414 :     rs1 = CL_add( rs1, rs3 );
     799    51796414 :     y[5] = CL_add( x[5], rs1 );
     800    51796414 :     rs1 = CL_add( y[5], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     801    51796414 :     rs3 = CL_sub( rs1, t );
     802    51796414 :     rs1 = CL_add( rs1, t );
     803    51796414 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     804    51796414 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     805    51796414 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     806             : 
     807             :     /* combination */
     808    51796414 :     y[6] = CL_msu_j( rs1, rs2 );
     809    51796414 :     y[9] = CL_mac_j( rs1, rs2 );
     810    51796414 :     y[7] = CL_mac_j( rs3, rs4 );
     811    51796414 :     y[8] = CL_msu_j( rs3, rs4 );
     812             : 
     813             : 
     814             :     /* 3. FFT5 stage */
     815    51796414 :     rs1 = CL_add( x[11], x[14] );
     816    51796414 :     rs4 = CL_sub( x[11], x[14] );
     817    51796414 :     rs3 = CL_add( x[12], x[13] );
     818    51796414 :     rs2 = CL_sub( x[12], x[13] );
     819    51796414 :     t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
     820    51796414 :     rs1 = CL_add( rs1, rs3 );
     821    51796414 :     y[10] = CL_add( x[10], rs1 );
     822    51796414 :     rs1 = CL_add( y[10], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
     823    51796414 :     rs3 = CL_sub( rs1, t );
     824    51796414 :     rs1 = CL_add( rs1, t );
     825    51796414 :     t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
     826    51796414 :     rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
     827    51796414 :     rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
     828             : 
     829             :     /* combination */
     830    51796414 :     y[11] = CL_msu_j( rs1, rs2 );
     831    51796414 :     y[14] = CL_mac_j( rs1, rs2 );
     832    51796414 :     y[12] = CL_mac_j( rs3, rs4 );
     833    51796414 :     y[13] = CL_msu_j( rs3, rs4 );
     834             : 
     835             :     /* 1. FFT3 stage */
     836             :     /* real part */
     837    51796414 :     rs1 = CL_add( y[5], y[10] );
     838    51796414 :     rs2 = CL_scale_t( CL_sub( y[5], y[10] ), C31 );
     839    51796414 :     z[15] = CL_add( y[0], rs1 );
     840    51796414 :     rs1 = CL_sub( y[0], CL_shr( rs1, 1 ) );
     841             : 
     842    51796414 :     z[25] = CL_mac_j( rs1, rs2 );
     843    51796414 :     z[20] = CL_msu_j( rs1, rs2 );
     844             : 
     845             :     /* 2. FFT3 stage */
     846    51796414 :     rs1 = CL_add( y[6], y[11] );
     847    51796414 :     rs2 = CL_scale_t( CL_sub( y[6], y[11] ), C31 );
     848    51796414 :     z[21] = CL_add( y[1], rs1 );
     849    51796414 :     rs1 = CL_sub( y[1], CL_shr( rs1, 1 ) );
     850             : 
     851    51796414 :     z[16] = CL_mac_j( rs1, rs2 );
     852    51796414 :     z[26] = CL_msu_j( rs1, rs2 );
     853             : 
     854             : 
     855             :     /* 3. FFT3 stage */
     856    51796414 :     rs1 = CL_add( y[7], y[12] );
     857    51796414 :     rs2 = CL_scale_t( CL_sub( y[7], y[12] ), C31 );
     858    51796414 :     z[27] = CL_add( y[2], rs1 );
     859    51796414 :     rs1 = CL_sub( y[2], CL_shr( rs1, 1 ) );
     860             : 
     861    51796414 :     z[22] = CL_mac_j( rs1, rs2 );
     862    51796414 :     z[17] = CL_msu_j( rs1, rs2 );
     863             : 
     864             : 
     865             :     /* 4. FFT3 stage */
     866    51796414 :     rs1 = CL_add( y[8], y[13] );
     867    51796414 :     rs2 = CL_scale_t( CL_sub( y[8], y[13] ), C31 );
     868    51796414 :     z[18] = CL_add( y[3], rs1 );
     869    51796414 :     rs1 = CL_sub( y[3], CL_shr( rs1, 1 ) );
     870             : 
     871    51796414 :     z[28] = CL_mac_j( rs1, rs2 );
     872    51796414 :     z[23] = CL_msu_j( rs1, rs2 );
     873             : 
     874             : 
     875             :     /* 5. FFT3 stage */
     876    51796414 :     rs1 = CL_add( y[9], y[14] );
     877    51796414 :     rs2 = CL_scale_t( CL_sub( y[9], y[14] ), C31 );
     878    51796414 :     z[24] = CL_add( y[4], rs1 );
     879    51796414 :     rs1 = CL_sub( y[4], CL_shr( rs1, 1 ) );
     880             : 
     881    51796414 :     z[19] = CL_mac_j( rs1, rs2 );
     882    51796414 :     z[29] = CL_msu_j( rs1, rs2 );
     883             : 
     884             :     /* 1. FFT2 stage */
     885    51796414 :     rs1 = CL_shr( z[0], SCALEFACTOR30_2 );
     886    51796414 :     rs2 = CL_shr( z[15], SCALEFACTOR30_2 );
     887    51796414 :     *l = CL_add( rs1, rs2 );
     888    51796414 :     *h = CL_sub( rs1, rs2 );
     889    51796414 :     l += 1;
     890    51796414 :     h += 1;
     891             : 
     892             :     /* 2. FFT2 stage */
     893    51796414 :     rs1 = CL_shr( z[8], SCALEFACTOR30_2 );
     894    51796414 :     rs2 = CL_shr( z[23], SCALEFACTOR30_2 );
     895    51796414 :     *h = CL_add( rs1, rs2 );
     896    51796414 :     *l = CL_sub( rs1, rs2 );
     897    51796414 :     l += 1;
     898    51796414 :     h += 1;
     899             : 
     900             :     /* 3. FFT2 stage */
     901    51796414 :     rs1 = CL_shr( z[1], SCALEFACTOR30_2 );
     902    51796414 :     rs2 = CL_shr( z[16], SCALEFACTOR30_2 );
     903    51796414 :     *l = CL_add( rs1, rs2 );
     904    51796414 :     *h = CL_sub( rs1, rs2 );
     905    51796414 :     l += 1;
     906    51796414 :     h += 1;
     907             : 
     908             :     /* 4. FFT2 stage */
     909    51796414 :     rs1 = CL_shr( z[9], SCALEFACTOR30_2 );
     910    51796414 :     rs2 = CL_shr( z[24], SCALEFACTOR30_2 );
     911    51796414 :     *h = CL_add( rs1, rs2 );
     912    51796414 :     *l = CL_sub( rs1, rs2 );
     913    51796414 :     l += 1;
     914    51796414 :     h += 1;
     915             : 
     916             :     /* 5. FFT2 stage */
     917    51796414 :     rs1 = CL_shr( z[2], SCALEFACTOR30_2 );
     918    51796414 :     rs2 = CL_shr( z[17], SCALEFACTOR30_2 );
     919    51796414 :     *l = CL_add( rs1, rs2 );
     920    51796414 :     *h = CL_sub( rs1, rs2 );
     921    51796414 :     l += 1;
     922    51796414 :     h += 1;
     923             : 
     924             :     /* 6. FFT2 stage */
     925    51796414 :     rs1 = CL_shr( z[10], SCALEFACTOR30_2 );
     926    51796414 :     rs2 = CL_shr( z[25], SCALEFACTOR30_2 );
     927    51796414 :     *h = CL_add( rs1, rs2 );
     928    51796414 :     *l = CL_sub( rs1, rs2 );
     929    51796414 :     l += 1;
     930    51796414 :     h += 1;
     931             : 
     932             :     /* 7. FFT2 stage */
     933    51796414 :     rs1 = CL_shr( z[3], SCALEFACTOR30_2 );
     934    51796414 :     rs2 = CL_shr( z[18], SCALEFACTOR30_2 );
     935    51796414 :     *l = CL_add( rs1, rs2 );
     936    51796414 :     *h = CL_sub( rs1, rs2 );
     937    51796414 :     l += 1;
     938    51796414 :     h += 1;
     939             : 
     940             :     /* 8. FFT2 stage */
     941    51796414 :     rs1 = CL_shr( z[11], SCALEFACTOR30_2 );
     942    51796414 :     rs2 = CL_shr( z[26], SCALEFACTOR30_2 );
     943    51796414 :     *h = CL_add( rs1, rs2 );
     944    51796414 :     *l = CL_sub( rs1, rs2 );
     945    51796414 :     l += 1;
     946    51796414 :     h += 1;
     947             : 
     948             :     /* 9. FFT2 stage */
     949    51796414 :     rs1 = CL_shr( z[4], SCALEFACTOR30_2 );
     950    51796414 :     rs2 = CL_shr( z[19], SCALEFACTOR30_2 );
     951    51796414 :     *l = CL_add( rs1, rs2 );
     952    51796414 :     *h = CL_sub( rs1, rs2 );
     953    51796414 :     l += 1;
     954    51796414 :     h += 1;
     955             : 
     956             :     /* 10. FFT2 stage */
     957    51796414 :     rs1 = CL_shr( z[12], SCALEFACTOR30_2 );
     958    51796414 :     rs2 = CL_shr( z[27], SCALEFACTOR30_2 );
     959    51796414 :     *h = CL_add( rs1, rs2 );
     960    51796414 :     *l = CL_sub( rs1, rs2 );
     961    51796414 :     l += 1;
     962    51796414 :     h += 1;
     963             : 
     964             :     /* 11. FFT2 stage */
     965    51796414 :     rs1 = CL_shr( z[5], SCALEFACTOR30_2 );
     966    51796414 :     rs2 = CL_shr( z[20], SCALEFACTOR30_2 );
     967    51796414 :     *l = CL_add( rs1, rs2 );
     968    51796414 :     *h = CL_sub( rs1, rs2 );
     969    51796414 :     l += 1;
     970    51796414 :     h += 1;
     971             : 
     972             :     /* 12. FFT2 stage */
     973    51796414 :     rs1 = CL_shr( z[13], SCALEFACTOR30_2 );
     974    51796414 :     rs2 = CL_shr( z[28], SCALEFACTOR30_2 );
     975    51796414 :     *h = CL_add( rs1, rs2 );
     976    51796414 :     *l = CL_sub( rs1, rs2 );
     977    51796414 :     l += 1;
     978    51796414 :     h += 1;
     979             : 
     980             :     /* 13. FFT2 stage */
     981    51796414 :     rs1 = CL_shr( z[6], SCALEFACTOR30_2 );
     982    51796414 :     rs2 = CL_shr( z[21], SCALEFACTOR30_2 );
     983    51796414 :     *l = CL_add( rs1, rs2 );
     984    51796414 :     *h = CL_sub( rs1, rs2 );
     985    51796414 :     l += 1;
     986    51796414 :     h += 1;
     987             : 
     988             :     /* 14. FFT2 stage */
     989    51796414 :     rs1 = CL_shr( z[14], SCALEFACTOR30_2 );
     990    51796414 :     rs2 = CL_shr( z[29], SCALEFACTOR30_2 );
     991    51796414 :     *h = CL_add( rs1, rs2 );
     992    51796414 :     *l = CL_sub( rs1, rs2 );
     993    51796414 :     l += 1;
     994    51796414 :     h += 1;
     995             : 
     996             :     /* 15. FFT2 stage */
     997    51796414 :     rs1 = CL_shr( z[7], SCALEFACTOR30_2 );
     998    51796414 :     rs2 = CL_shr( z[22], SCALEFACTOR30_2 );
     999    51796414 :     *l = CL_add( rs1, rs2 );
    1000    51796414 :     *h = CL_sub( rs1, rs2 );
    1001    51796414 :     l += 1;
    1002    51796414 :     h += 1;
    1003             : 
    1004             : #ifdef WMOPS
    1005             :     multiCounter[currCounter].CL_move += 30;
    1006             : #endif
    1007    51796414 : }
    1008             : 
    1009             : /*-------------------------------------------------------------------*
    1010             :  * fft_cldfb_fx()
    1011             :  *
    1012             :  * Interface functions FFT subroutines
    1013             :  *--------------------------------------------------------------------*/
    1014    86772582 : void fft_cldfb_fx(
    1015             :     Word32 *data,     /* i/o: input/output vector Qx*/
    1016             :     const Word16 size /* size of fft operation */
    1017             : )
    1018             : {
    1019             : 
    1020    86772582 :     SWITCH( size )
    1021             :     {
    1022           0 :         case 5:
    1023           0 :             fft5_with_cmplx_data( (cmplx *) data );
    1024           0 :             BREAK;
    1025     4917700 :         case 8:
    1026     4917700 :             fft8_with_cmplx_data( (cmplx *) data );
    1027     4917700 :             BREAK;
    1028    13174712 :         case 10:
    1029    13174712 :             fft10_with_cmplx_data( (cmplx *) data );
    1030    13174712 :             BREAK;
    1031        3624 :         case 16:
    1032        3624 :             fft16_with_cmplx_data( (cmplx *) data, 0 );
    1033        3624 :             BREAK;
    1034    16880132 :         case 20:
    1035    16880132 :             fft20_with_cmplx_data( (cmplx *) data );
    1036    16880132 :             BREAK;
    1037    51796414 :         case 30:
    1038    51796414 :             fft30_with_cmplx_data( (cmplx *) data );
    1039    51796414 :             BREAK;
    1040             : 
    1041           0 :         default:
    1042           0 :             assert( 0 );
    1043             :             BREAK;
    1044             :     }
    1045             : 
    1046    86772582 :     return;
    1047             : }

Generated by: LCOV version 1.14