LCOV - code coverage report
Current view: top level - lib_lc3plus - fft_lc3plus.c (source / functions) Hit Total Coverage
Test: Coverage on main -- dec/rend @ 633e3f2e309758d10805ef21e0436356fe719b7a Lines: 0 2881 0.0 %
Date: 2025-08-23 01:22:27 Functions: 0 17 0.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             : *                        ETSI TS 103 634 V1.5.1                               *
       3             : *              Low Complexity Communication Codec Plus (LC3plus)              *
       4             : *                                                                             *
       5             : * Copyright licence is solely granted through ETSI Intellectual Property      *
       6             : * Rights Policy, 3rd April 2019. No patent licence is granted by implication, *
       7             : * estoppel or otherwise.                                                      *
       8             : ******************************************************************************/
       9             :                                                                               
      10             : #include "functions.h"
      11             : #include "rom_basop_util_lc3plus.h"
      12             : 
      13             : #ifdef ENABLE_FFT_RESCALE
      14             : #ifndef FFT_RESCALE_HR
      15             : #define FFT_RESCALE_HR 3
      16             : #endif
      17             : #endif
      18             : 
      19             : #define SCALEFACTORN2 3
      20             : #define SCALEFACTOR4 3
      21             : #define SCALEFACTOR5 4
      22             : #define SCALEFACTOR8 4
      23             : #define SCALEFACTOR15 5
      24             : #define SCALEFACTOR30_1 5
      25             : #define SCALEFACTOR30_2 1
      26             : #define SCALEFACTOR32_1 5
      27             : #define SCALEFACTOR32_2 1
      28             : 
      29             : #ifdef ENABLE_HR_MODE
      30             : #define Mpy_32_xx Mpy_32_32_lc3plus
      31             : #else
      32             : #define Mpy_32_xx Mpy_32_16_lc3plus
      33             : #endif
      34             : 
      35             : #define SCALEFACTOR6 4
      36             : #define C61_32 (0x6ed9eba1)
      37             : 
      38             : #define SCALEFACTOR10 5
      39             : #define SCALEFACTOR16 5
      40             : #define SCALEFACTOR20 5
      41             : #define SCALEFACTOR30 6
      42             : #define SCALEFACTOR32 6
      43             : #define SCALEFACTOR40 7
      44             : #define SCALEFACTOR48 8
      45             : #define SCALEFACTOR60 7
      46             : #define SCALEFACTOR64 7
      47             : #define SCALEFACTOR80 8
      48             : #define SCALEFACTOR90 9
      49             : #define SCALEFACTOR96 9
      50             : #define SCALEFACTOR120 8
      51             : #define SCALEFACTOR128 8
      52             : #define SCALEFACTOR160 8
      53             : #define SCALEFACTOR180 10
      54             : #define SCALEFACTOR192 10
      55             : #define SCALEFACTOR240 9
      56             : #define SCALEFACTOR256 9
      57             : #define SCALEFACTOR384 11
      58             : 
      59             : #ifdef ENABLE_HR_MODE
      60             : #define SCALEFACTOR360 11
      61             : #ifndef ENABLE_FFT_30X16
      62             : #define SCALEFACTOR480 10
      63             : #else
      64             : #define SCALEFACTOR480 11
      65             : #endif
      66             : #endif
      67             : 
      68             : #ifdef ENABLE_HR_MODE
      69             : #undef  L_shr_pos
      70             : #define L_shr_pos(x, y) (L_shr(L_add(L_shr((x), ((y)-1)),1),1))
      71             : #endif
      72             : 
      73             : #ifdef ENABLE_HR_MODE
      74             : #define FFTC(x) ((Word32)x)
      75             : #else
      76             : #define FFTC(x) WORD322WORD16((Word32)x)
      77             : #endif
      78             : 
      79             : #define C31 (FFTC(0x91261468)) /* FL2WORD32( -0.86602540) -sqrt(3)/2 */
      80             : 
      81             : #define C51 (FFTC(0x79bc3854)) /* FL2WORD32( 0.95105652)   */
      82             : #define C52 (FFTC(0x9d839db0)) /* FL2WORD32(-1.53884180/2) */
      83             : #define C53 (FFTC(0xd18053ce)) /* FL2WORD32(-0.36327126)   */
      84             : #define C54 (FFTC(0x478dde64)) /* FL2WORD32( 0.55901699)   */
      85             : #define C55 (FFTC(0xb0000001)) /* FL2WORD32(-1.25/2)       */
      86             : 
      87             : #define C81 (FFTC(0x5a82799a)) /* FL2WORD32( 7.071067811865475e-1) */
      88             : #define C82 (FFTC(0xa57d8666)) /* FL2WORD32(-7.071067811865475e-1) */
      89             : 
      90             : #define C161 (FFTC(0x5a82799a)) /* FL2WORD32( 7.071067811865475e-1)  INV_SQRT2    */
      91             : #define C162 (FFTC(0xa57d8666)) /* FL2WORD32(-7.071067811865475e-1) -INV_SQRT2    */
      92             : 
      93             : #define C163 (FFTC(0x7641af3d)) /* FL2WORD32( 9.238795325112867e-1)  COS_PI_DIV8  */
      94             : #define C164 (FFTC(0x89be50c3)) /* FL2WORD32(-9.238795325112867e-1) -COS_PI_DIV8  */
      95             : 
      96             : #define C165 (FFTC(0x30fbc54d)) /* FL2WORD32( 3.826834323650898e-1)  COS_3PI_DIV8 */
      97             : #define C166 (FFTC(0xcf043ab3)) /* FL2WORD32(-3.826834323650898e-1) -COS_3PI_DIV8 */
      98             : 
      99             : #define C51_32 (0x79bc3854) /* FL2WORD32( 0.95105652)   */
     100             : #define C52_32 (0x9d839db0) /* FL2WORD32(-1.53884180/2) */
     101             : #define C53_32 (0xd18053ce) /* FL2WORD32(-0.36327126)   */
     102             : #define C54_32 (0x478dde64) /* FL2WORD32( 0.55901699)   */
     103             : #define C55_32 (0xb0000001) /* FL2WORD32(-1.25/2)       */
     104             : 
     105             : 
     106             : #define C81_32 (0x5a82799a) /* FL2WORD32( 7.071067811865475e-1) */
     107             : #define C82_32 (0xa57d8666) /* FL2WORD32(-7.071067811865475e-1) */
     108             : 
     109             : #if defined(ENABLE_HR_MODE)
     110             : 
     111             : #define cplxMpy4_16_0(re, im, a, b, c, d)                                                                              \
     112             :      do                                                                                                                 \
     113             :     {                                                                                                                  \
     114             :         re = L_sub(Mpy_32_xx(a, c), Mpy_32_xx(b, d));                                                                  \
     115             :         move32();                                                                                                      \
     116             :         im = L_add(Mpy_32_xx(a, d), Mpy_32_xx(b, c));                                                                  \
     117             :         move32();                                                                                                      \
     118             :     } while (0)
     119             : 
     120             : #define cplxMpy4_16_1(re, im, a, b)                                                                                    \
     121             :     do                                                                                                                 \
     122             :     {                                                                                                                  \
     123             :         re = a;                                                                                                        \
     124             :         move32();                                                                                                      \
     125             :         im = b;                                                                                                        \
     126             :         move32();                                                                                                      \
     127             :     } while (0)
     128             : 
     129             : #endif
     130             : 
     131             : #define Mpy3_0(s12, s13, s14, s15, t0, t1, t2, t3)                                                                     \
     132             :     do                                                                                                                 \
     133             :     {                                                                                                                  \
     134             :         s12 = Mpy_32_32_lc3plus(L_add(t0, t2), C81_32);                                                                        \
     135             :         s14 = Mpy_32_32_lc3plus(L_sub(t0, t2), C81_32);                                                                        \
     136             :         s13 = Mpy_32_32_lc3plus(L_sub(t3, t1), C81_32);                                                                        \
     137             :         s15 = Mpy_32_32_lc3plus(L_add(t1, t3), C82_32);                                                                        \
     138             :     } while (0)
     139             : 
     140             : #define cplxMpy3_0(a, b, c, d)                                                                                         \
     141             :     do                                                                                                                 \
     142             :     {                                                                                                                  \
     143             :         as = L_shr_pos(a, 1);                                                                                          \
     144             :         bs = L_shr_pos(b, 1);                                                                                          \
     145             :         a  = L_sub(Mpy_32_32_lc3plus(as, c), Mpy_32_32_lc3plus(bs, d));                                                                \
     146             :         b  = L_add(Mpy_32_32_lc3plus(as, d), Mpy_32_32_lc3plus(bs, c));                                                                \
     147             :     } while (0)
     148             : 
     149             : #ifdef ENABLE_HR_MODE
     150             : #define cplxMpy4_4_0(re, im, a, b, c, d)                                                                               \
     151             :     re = L_shr_pos(L_sub(Mpy_32_xx(a, c), Mpy_32_xx(b, d)), SCALEFACTOR60 - SCALEFACTOR15);                            \
     152             :     im = L_shr_pos(L_add(Mpy_32_xx(a, d), Mpy_32_xx(b, c)), SCALEFACTOR60 - SCALEFACTOR15);
     153             : 
     154             : #define cplxMpy4_4_1(re, im, a, b)                                                                                     \
     155             :     re = L_shr_pos(a, SCALEFACTOR60 - SCALEFACTOR15);                                                                  \
     156             :     im = L_shr_pos(b, SCALEFACTOR60 - SCALEFACTOR15);
     157             : #else
     158             : #define cplxMpy4_4_0(re, im, a, b, c, d)                                                                               \
     159             :     re = L_shr(L_sub(Mpy_32_xx(a, c), Mpy_32_xx(b, d)), SCALEFACTOR60 - SCALEFACTOR15);                                \
     160             :     im = L_shr(L_add(Mpy_32_xx(a, d), Mpy_32_xx(b, c)), SCALEFACTOR60 - SCALEFACTOR15);
     161             : 
     162             : #define cplxMpy4_4_1(re, im, a, b)                                                                                     \
     163             :     re = L_shr(a, SCALEFACTOR60 - SCALEFACTOR15);                                                                      \
     164             :     im = L_shr(b, SCALEFACTOR60 - SCALEFACTOR15);
     165             : #endif
     166             : 
     167             : #define cplxMpy4_8_0(re, im, a, b, c, d)                                                                               \
     168             :     do                                                                                                                 \
     169             :     {                                                                                                                  \
     170             :         re = L_shr_pos(L_sub(Mpy_32_xx(a, c), Mpy_32_xx(b, d)), 1);                                                    \
     171             :         im = L_shr_pos(L_add(Mpy_32_xx(a, d), Mpy_32_xx(b, c)), 1);                                                    \
     172             :     } while (0)
     173             : 
     174             : 
     175             : #define cplxMpy4_8_1(re, im, a, b)                                                                                     \
     176             :     do                                                                                                                 \
     177             :     {                                                                                                                  \
     178             :         re = L_shr_pos(a, 1);                                                                                          \
     179             :         im = L_shr_pos(b, 1);                                                                                          \
     180             :     } while (0)
     181             : 
     182             : 
     183             : #define cplxMpy4_8_2(re, im, a, b, c, d)                                                                               \
     184             :     do                                                                                                                 \
     185             :     {                                                                                                                  \
     186             :         re = L_shr_pos(L_add(Mpy_32_32_lc3plus(a, c), Mpy_32_32_lc3plus(b, d)), 1);                                                    \
     187             :         im = L_shr_pos(L_sub(Mpy_32_32_lc3plus(b, c), Mpy_32_32_lc3plus(a, d)), 1);                                                    \
     188             :     } while (0)
     189             : 
     190             : 
     191             : #define cplxMpy4_12_0(re, im, a, b, c, d)                                                                              \
     192             :     do                                                                                                                 \
     193             :     {                                                                                                                  \
     194             :         re = L_sub(Mpy_32_xx(a, c), Mpy_32_xx(b, d));                                                                  \
     195             :         move32();                                                                                                      \
     196             :         im = L_add(Mpy_32_xx(a, d), Mpy_32_xx(b, c));                                                                  \
     197             :         move32();                                                                                                      \
     198             :     } while (0)
     199             : 
     200             : #define cplxMpy4_12_1(re, im, a, b)                                                                                    \
     201             :     do                                                                                                                 \
     202             :     {                                                                                                                  \
     203             :         re = a;                                                                                                        \
     204             :         move32();                                                                                                      \
     205             :         im = b;                                                                                                        \
     206             :         move32();                                                                                                      \
     207             :     } while (0)
     208             : 
     209             : 
     210           0 : static void fft4(Word32 *x)
     211             : {
     212             :     Dyn_Mem_Deluxe_In(Word32 x0, x1, x2, x3, x4, x5, x6, x7; Word32 t0, t1, t2, t3, t4, t5, t6, t7;);
     213             : 
     214           0 :     x0 = L_shr_pos(x[0], SCALEFACTOR4);
     215           0 :     x1 = L_shr_pos(x[1], SCALEFACTOR4);
     216           0 :     x2 = L_shr_pos(x[2], SCALEFACTOR4);
     217           0 :     x3 = L_shr_pos(x[3], SCALEFACTOR4);
     218           0 :     x4 = L_shr_pos(x[4], SCALEFACTOR4);
     219           0 :     x5 = L_shr_pos(x[5], SCALEFACTOR4);
     220           0 :     x6 = L_shr_pos(x[6], SCALEFACTOR4);
     221           0 :     x7 = L_shr_pos(x[7], SCALEFACTOR4);
     222             : 
     223             :     /* Pre-additions */
     224           0 :     t0 = L_add(x0, x4);
     225           0 :     t2 = L_sub(x0, x4);
     226           0 :     t1 = L_add(x1, x5);
     227           0 :     t3 = L_sub(x1, x5);
     228           0 :     t4 = L_add(x2, x6);
     229           0 :     t7 = L_sub(x2, x6);
     230           0 :     t5 = L_add(x7, x3);
     231           0 :     t6 = L_sub(x7, x3);
     232             : 
     233             :     /* Post-additions */
     234           0 :     x[0] = L_add(t0, t4);
     235           0 :     x[1] = L_add(t1, t5);
     236           0 :     x[2] = L_sub(t2, t6);
     237           0 :     x[3] = L_sub(t3, t7);
     238           0 :     x[4] = L_sub(t0, t4);
     239           0 :     x[5] = L_sub(t1, t5);
     240           0 :     x[6] = L_add(t2, t6);
     241           0 :     x[7] = L_add(t3, t7);
     242             : 
     243             :     Dyn_Mem_Deluxe_Out();
     244           0 : }
     245             : 
     246             : /**
     247             :  * \brief    Function performs a complex 5-point FFT
     248             :  *           The FFT is performed inplace. The result of the FFT
     249             :  *           is scaled by SCALEFACTOR5 bits.
     250             :  *
     251             :  *
     252             :  * \param    [i/o] re    real input / output
     253             :  * \param    [i/o] im    imag input / output
     254             :  * \param    [i  ] s     stride real and imag input / output
     255             :  *
     256             :  * \return   void
     257             :  */
     258             : 
     259             : 
     260             : 
     261           0 : static void fft5(Word32 *re, Word32 *im, Word16 s)
     262             : {
     263             :     Dyn_Mem_Deluxe_In(Word32 x0, x1, x2, x3, x4; Word32 r1, r2, r3, r4; Word32 s1, s2, s3, s4; Word32 t;);
     264             : 
     265             :     /* real part */
     266           0 :     x0 = L_shr_pos(re[s * 0], SCALEFACTOR5);
     267           0 :     x1 = L_shr_pos(re[s * 1], SCALEFACTOR5);
     268           0 :     x2 = L_shr_pos(re[s * 2], SCALEFACTOR5);
     269           0 :     x3 = L_shr_pos(re[s * 3], SCALEFACTOR5);
     270           0 :     x4 = L_shr_pos(re[s * 4], SCALEFACTOR5);
     271             : 
     272           0 :     r1    = L_add(x1, x4);
     273           0 :     r4    = L_sub(x1, x4);
     274           0 :     r3    = L_add(x2, x3);
     275           0 :     r2    = L_sub(x2, x3);
     276           0 :     t     = Mpy_32_32_lc3plus(L_sub(r1, r3), C54_32);
     277           0 :     r1    = L_add(r1, r3);
     278           0 :     re[0] = L_add(x0, r1);
     279           0 :     move32();
     280             :     /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
     281             :        the values as fracts */
     282           0 :     r1 = L_add(re[0], (L_shl_pos(Mpy_32_32_lc3plus(r1, C55_32), 1)));
     283           0 :     r3 = L_sub(r1, t);
     284           0 :     r1 = L_add(r1, t);
     285           0 :     t  = Mpy_32_32_lc3plus(L_add(r4, r2), C51_32);
     286             :     /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
     287             :        the values as fracts */
     288           0 :     r4 = L_add(t, L_shl_pos(Mpy_32_32_lc3plus(r4, C52_32), 1));
     289           0 :     r2 = L_add(t, Mpy_32_32_lc3plus(r2, C53_32));
     290             : 
     291             :     /* imaginary part */
     292           0 :     x0 = L_shr_pos(im[s * 0], SCALEFACTOR5);
     293           0 :     x1 = L_shr_pos(im[s * 1], SCALEFACTOR5);
     294           0 :     x2 = L_shr_pos(im[s * 2], SCALEFACTOR5);
     295           0 :     x3 = L_shr_pos(im[s * 3], SCALEFACTOR5);
     296           0 :     x4 = L_shr_pos(im[s * 4], SCALEFACTOR5);
     297             : 
     298           0 :     s1    = L_add(x1, x4);
     299           0 :     s4    = L_sub(x1, x4);
     300           0 :     s3    = L_add(x2, x3);
     301           0 :     s2    = L_sub(x2, x3);
     302           0 :     t     = Mpy_32_32_lc3plus(L_sub(s1, s3), C54_32);
     303           0 :     s1    = L_add(s1, s3);
     304           0 :     im[0] = L_add(x0, s1);
     305           0 :     move32();
     306             :     /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
     307             :        the values as fracts */
     308           0 :     s1 = L_add(im[0], L_shl_pos(Mpy_32_32_lc3plus(s1, C55_32), 1));
     309           0 :     s3 = L_sub(s1, t);
     310           0 :     s1 = L_add(s1, t);
     311           0 :     t  = Mpy_32_32_lc3plus(L_add(s4, s2), C51_32);
     312             :     /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
     313             :        the values as fracts */
     314           0 :     s4 = L_add(t, L_shl_pos(Mpy_32_32_lc3plus(s4, C52_32), 1));
     315           0 :     s2 = L_add(t, Mpy_32_32_lc3plus(s2, C53_32));
     316             : 
     317             :     /* combination */
     318           0 :     re[s * 1] = L_add(r1, s2);
     319           0 :     move32();
     320           0 :     re[s * 4] = L_sub(r1, s2);
     321           0 :     move32();
     322           0 :     re[s * 2] = L_sub(r3, s4);
     323           0 :     move32();
     324           0 :     re[s * 3] = L_add(r3, s4);
     325           0 :     move32();
     326             : 
     327           0 :     im[s * 1] = L_sub(s1, r2);
     328           0 :     move32();
     329           0 :     im[s * 4] = L_add(s1, r2);
     330           0 :     move32();
     331           0 :     im[s * 2] = L_add(s3, r4);
     332           0 :     move32();
     333           0 :     im[s * 3] = L_sub(s3, r4);
     334           0 :     move32();
     335             : 
     336             :     Dyn_Mem_Deluxe_Out();
     337           0 : }
     338             : 
     339             : /**
     340             :  * \brief    Function performs a complex 6-point FFT
     341             :  *           The FFT is performed inplace. The result of the FFT
     342             :  *           is scaled by SCALEFACTOR6 bits.
     343             :  *
     344             :  *
     345             :  * \param    [i/o] re    real input / output
     346             :  * \param    [i/o] im    imag input / output
     347             :  * \param    [i  ] st     stride real and imag input / output
     348             :  *
     349             :  * \return   void
     350             :  */
     351             : 
     352           0 : static void fft6(Word32 *re, Word32 *im, Word16 st)
     353             : {
     354             :     Dyn_Mem_Deluxe_In(Word32 x0, x1, x2, x3, x4, x5; Word32 r1o, r2o, i1e, i2e, i1o, i2o; Word32 t, s;);
     355             : 
     356             :     /* process real parts */
     357             : 
     358           0 :     x0 = L_shr_pos(re[0 * st], SCALEFACTOR6);
     359           0 :     x1 = L_shr_pos(re[1 * st], SCALEFACTOR6);
     360           0 :     x2 = L_shr_pos(re[2 * st], SCALEFACTOR6);
     361           0 :     x3 = L_shr_pos(re[3 * st], SCALEFACTOR6);
     362           0 :     x4 = L_shr_pos(re[4 * st], SCALEFACTOR6);
     363           0 :     x5 = L_shr_pos(re[5 * st], SCALEFACTOR6);
     364             : 
     365           0 :     t          = L_add(x0, L_add(x2, x4));
     366           0 :     s          = L_add(x1, L_add(x3, x5));
     367           0 :     re[0 * st] = L_add(t, s);
     368           0 :     move32();
     369           0 :     re[3 * st] = L_sub(t, s);
     370           0 :     move32();
     371           0 :     t = L_sub(x0, L_shr_pos(L_add(x2, x4), 1));
     372             : 
     373           0 :     re[1 * st] = t;
     374           0 :     move32();
     375           0 :     re[2 * st] = t;
     376           0 :     move32();
     377           0 :     re[4 * st] = t;
     378           0 :     move32();
     379           0 :     re[5 * st] = t;
     380           0 :     move32();
     381             : 
     382           0 :     s = Mpy_32_32_lc3plus(L_sub(x4, x2), C61_32);
     383             : 
     384           0 :     i1e = s;
     385           0 :     i2e = -s;
     386             : 
     387           0 :     t = L_sub(x1, L_shr_pos(L_add(x3, x5), 1));
     388           0 :     s = Mpy_32_32_lc3plus(L_sub(x5, x3), C61_32);
     389             : 
     390           0 :     r1o = r2o = t;
     391           0 :     i1o       = s;
     392           0 :     i2o       = -s;
     393             : 
     394           0 :     x0 = L_shr_pos(im[0 * st], SCALEFACTOR6);
     395           0 :     x1 = L_shr_pos(im[1 * st], SCALEFACTOR6);
     396           0 :     x2 = L_shr_pos(im[2 * st], SCALEFACTOR6);
     397           0 :     x3 = L_shr_pos(im[3 * st], SCALEFACTOR6);
     398           0 :     x4 = L_shr_pos(im[4 * st], SCALEFACTOR6);
     399           0 :     x5 = L_shr_pos(im[5 * st], SCALEFACTOR6);
     400             : 
     401           0 :     t = L_add(x0, L_add(x2, x4));
     402           0 :     s = L_add(x1, L_add(x3, x5));
     403             : 
     404           0 :     im[0 * st] = L_add(t, s);
     405           0 :     move32();
     406           0 :     im[3 * st] = L_sub(t, s);
     407           0 :     move32();
     408             : 
     409           0 :     t = Mpy_32_32_lc3plus(L_sub(x2, x4), C61_32);
     410           0 :     s = L_sub(x0, L_shr_pos(L_add(x2, x4), 1));
     411             : 
     412           0 :     re[1 * st] = L_add(re[1 * st], t);
     413           0 :     move32();
     414           0 :     re[2 * st] = L_sub(re[2 * st], t);
     415           0 :     move32();
     416           0 :     re[4 * st] = L_add(re[4 * st], t);
     417           0 :     move32();
     418           0 :     re[5 * st] = L_sub(re[5 * st], t);
     419           0 :     move32();
     420             : 
     421           0 :     i1e = L_add(i1e, s);
     422           0 :     i2e = L_add(i2e, s);
     423             : 
     424           0 :     t = Mpy_32_32_lc3plus(L_sub(x3, x5), C61_32);
     425           0 :     s = L_sub(x1, L_shr_pos(L_add(x5, x3), 1));
     426             : 
     427           0 :     r1o = L_add(r1o, t);
     428           0 :     r2o = L_sub(r2o, t);
     429             : 
     430           0 :     i1o = L_add(i1o, s);
     431           0 :     i2o = L_add(i2o, s);
     432             : 
     433           0 :     t = L_add(L_shr_pos(r1o, 1), Mpy_32_32_lc3plus(i1o, C61_32));
     434           0 :     s = L_sub(L_shr_pos(i1o, 1), Mpy_32_32_lc3plus(r1o, C61_32));
     435             : 
     436           0 :     re[1 * st] = L_add(re[1 * st], t);
     437           0 :     move32();
     438           0 :     im[1 * st] = L_add(i1e, s);
     439           0 :     move32();
     440             : 
     441           0 :     re[4 * st] = L_sub(re[4 * st], t);
     442           0 :     move32();
     443           0 :     im[4 * st] = L_sub(i1e, s);
     444           0 :     move32();
     445             : 
     446           0 :     t = L_sub(Mpy_32_32_lc3plus(i2o, C61_32), L_shr_pos(r2o, 1));
     447           0 :     s = L_negate(L_add(Mpy_32_32_lc3plus(r2o, C61_32), L_shr_pos(i2o, 1)));
     448             : 
     449           0 :     re[2 * st] = L_add(re[2 * st], t);
     450           0 :     move32();
     451           0 :     im[2 * st] = L_add(i2e, s);
     452           0 :     move32();
     453             : 
     454           0 :     re[5 * st] = L_sub(re[5 * st], t);
     455           0 :     move32();
     456           0 :     im[5 * st] = L_sub(i2e, s);
     457           0 :     move32();
     458             : 
     459             :     Dyn_Mem_Deluxe_Out();
     460           0 : }
     461             : 
     462             : /**
     463             :  * \brief    Function performs a complex 8-point FFT
     464             :  *           The FFT is performed inplace. The result of the FFT
     465             :  *           is scaled by SCALEFACTOR8 bits.
     466             :  *
     467             :  *           WOPS with 32x16 bit multiplications: 108 cycles
     468             :  *
     469             :  * \param    [i/o] re    real input / output
     470             :  * \param    [i/o] im    imag input / output
     471             :  * \param    [i  ] s     stride real and imag input / output
     472             :  *
     473             :  * \return   void
     474             :  */
     475             : 
     476             : 
     477           0 : static void fft8(Word32 *re, Word32 *im, Word16 s)
     478             : {
     479             :     Dyn_Mem_Deluxe_In(Word32 x00, x01, x02, x03, x04, x05, x06, x07; Word32 x08, x09, x10, x11, x12, x13, x14, x15;
     480             :                       Word32 t00, t01, t02, t03, t04, t05, t06, t07; Word32 t08, t09, t10, t11, t12, t13, t14, t15;
     481             :                       Word32 s00, s01, s02, s03, s04, s05, s06, s07; Word32 s08, s09, s10, s11, s12, s13, s14, s15;);
     482             : 
     483             :     /* Pre-additions */
     484             : 
     485           0 :     x00 = L_shr_pos(re[s * 0], SCALEFACTOR8);
     486           0 :     x01 = L_shr_pos(im[s * 0], SCALEFACTOR8);
     487           0 :     x02 = L_shr_pos(re[s * 1], SCALEFACTOR8);
     488           0 :     x03 = L_shr_pos(im[s * 1], SCALEFACTOR8);
     489           0 :     x04 = L_shr_pos(re[s * 2], SCALEFACTOR8);
     490           0 :     x05 = L_shr_pos(im[s * 2], SCALEFACTOR8);
     491           0 :     x06 = L_shr_pos(re[s * 3], SCALEFACTOR8);
     492           0 :     x07 = L_shr_pos(im[s * 3], SCALEFACTOR8);
     493           0 :     x08 = L_shr_pos(re[s * 4], SCALEFACTOR8);
     494           0 :     x09 = L_shr_pos(im[s * 4], SCALEFACTOR8);
     495           0 :     x10 = L_shr_pos(re[s * 5], SCALEFACTOR8);
     496           0 :     x11 = L_shr_pos(im[s * 5], SCALEFACTOR8);
     497           0 :     x12 = L_shr_pos(re[s * 6], SCALEFACTOR8);
     498           0 :     x13 = L_shr_pos(im[s * 6], SCALEFACTOR8);
     499           0 :     x14 = L_shr_pos(re[s * 7], SCALEFACTOR8);
     500           0 :     x15 = L_shr_pos(im[s * 7], SCALEFACTOR8);
     501             : 
     502           0 :     t00 = L_add(x00, x08);
     503           0 :     t02 = L_sub(x00, x08);
     504           0 :     t01 = L_add(x01, x09);
     505           0 :     t03 = L_sub(x01, x09);
     506           0 :     t04 = L_add(x02, x10);
     507           0 :     t06 = L_sub(x02, x10);
     508           0 :     t05 = L_add(x03, x11);
     509           0 :     t07 = L_sub(x03, x11);
     510           0 :     t08 = L_add(x04, x12);
     511           0 :     t10 = L_sub(x04, x12);
     512           0 :     t09 = L_add(x05, x13);
     513           0 :     t11 = L_sub(x05, x13);
     514           0 :     t12 = L_add(x06, x14);
     515           0 :     t14 = L_sub(x06, x14);
     516           0 :     t13 = L_add(x07, x15);
     517           0 :     t15 = L_sub(x07, x15);
     518             : 
     519             :     /* Pre-additions and core multiplications */
     520             : 
     521           0 :     s00 = L_add(t00, t08);
     522           0 :     s04 = L_sub(t00, t08);
     523           0 :     s01 = L_add(t01, t09);
     524           0 :     s05 = L_sub(t01, t09);
     525           0 :     s08 = L_sub(t02, t11);
     526           0 :     s10 = L_add(t02, t11);
     527           0 :     s09 = L_add(t03, t10);
     528           0 :     s11 = L_sub(t03, t10);
     529           0 :     s02 = L_add(t04, t12);
     530           0 :     s07 = L_sub(t04, t12);
     531           0 :     s03 = L_add(t05, t13);
     532           0 :     s06 = L_sub(t13, t05);
     533             : 
     534           0 :     t01 = L_add(t06, t14);
     535           0 :     t02 = L_sub(t06, t14);
     536           0 :     t00 = L_add(t07, t15);
     537           0 :     t03 = L_sub(t07, t15);
     538             : 
     539           0 :     s12 = Mpy_32_xx(L_add(t00, t02), C81);
     540           0 :     s14 = Mpy_32_xx(L_sub(t00, t02), C81);
     541           0 :     s13 = Mpy_32_xx(L_sub(t03, t01), C81);
     542           0 :     s15 = Mpy_32_xx(L_add(t01, t03), C82);
     543             : 
     544             :     /* Post-additions */
     545             : 
     546           0 :     re[s * 0] = L_add(s00, s02);
     547           0 :     move32();
     548           0 :     re[s * 4] = L_sub(s00, s02);
     549           0 :     move32();
     550           0 :     im[s * 0] = L_add(s01, s03);
     551           0 :     move32();
     552           0 :     im[s * 4] = L_sub(s01, s03);
     553           0 :     move32();
     554           0 :     re[s * 2] = L_sub(s04, s06);
     555           0 :     move32();
     556           0 :     re[s * 6] = L_add(s04, s06);
     557           0 :     move32();
     558           0 :     im[s * 2] = L_sub(s05, s07);
     559           0 :     move32();
     560           0 :     im[s * 6] = L_add(s05, s07);
     561           0 :     move32();
     562           0 :     re[s * 3] = L_add(s08, s14);
     563           0 :     move32();
     564           0 :     re[s * 7] = L_sub(s08, s14);
     565           0 :     move32();
     566           0 :     im[s * 3] = L_add(s09, s15);
     567           0 :     move32();
     568           0 :     im[s * 7] = L_sub(s09, s15);
     569           0 :     move32();
     570           0 :     re[s * 1] = L_add(s10, s12);
     571           0 :     move32();
     572           0 :     re[s * 5] = L_sub(s10, s12);
     573           0 :     move32();
     574           0 :     im[s * 1] = L_add(s11, s13);
     575           0 :     move32();
     576           0 :     im[s * 5] = L_sub(s11, s13);
     577           0 :     move32();
     578             : 
     579             :     Dyn_Mem_Deluxe_Out();
     580           0 : }
     581             : 
     582             : /**
     583             :  * \brief    Function performs a complex 10-point FFT
     584             :  *           The FFT is performed inplace. The result of the FFT
     585             :  *           is scaled by SCALEFACTOR10 bits.
     586             :  *
     587             :  *           WOPS with 32x16 bit multiplications:  196 cycles
     588             :  *
     589             :  * \param    [i/o] re    real input / output
     590             :  * \param    [i/o] im    imag input / output
     591             :  * \param    [i  ] s     stride real and imag input / output
     592             :  *
     593             :  * \return   void
     594             :  */
     595             : 
     596             : 
     597             : 
     598           0 : static void fft10(Word32 *re, Word32 *im, Word16 s)
     599             : {
     600             :     Dyn_Mem_Deluxe_In(Word32 t; Word32 x0, x1, x2, x3, x4; Word32 r1, r2, r3, r4; Word32 s1, s2, s3, s4;
     601             :                       Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
     602             :                       Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;);
     603             : 
     604             :     /* 2 fft5 stages */
     605             : 
     606             :     /* real part */
     607           0 :     x0 = L_shr_pos(re[s * 0], SCALEFACTOR10);
     608           0 :     x1 = L_shr_pos(re[s * 2], SCALEFACTOR10);
     609           0 :     x2 = L_shr_pos(re[s * 4], SCALEFACTOR10);
     610           0 :     x3 = L_shr_pos(re[s * 6], SCALEFACTOR10);
     611           0 :     x4 = L_shr_pos(re[s * 8], SCALEFACTOR10);
     612             : 
     613           0 :     r1  = L_add(x3, x2);
     614           0 :     r4  = L_sub(x3, x2);
     615           0 :     r3  = L_add(x1, x4);
     616           0 :     r2  = L_sub(x1, x4);
     617           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
     618           0 :     r1  = L_add(r1, r3);
     619           0 :     y00 = L_add(x0, r1);
     620           0 :     r1  = L_add(y00, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
     621           0 :     r3  = L_sub(r1, t);
     622           0 :     r1  = L_add(r1, t);
     623           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
     624           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
     625           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
     626             : 
     627             :     /* imaginary part */
     628           0 :     x0 = L_shr_pos(im[s * 0], SCALEFACTOR10);
     629           0 :     x1 = L_shr_pos(im[s * 2], SCALEFACTOR10);
     630           0 :     x2 = L_shr_pos(im[s * 4], SCALEFACTOR10);
     631           0 :     x3 = L_shr_pos(im[s * 6], SCALEFACTOR10);
     632           0 :     x4 = L_shr_pos(im[s * 8], SCALEFACTOR10);
     633             : 
     634           0 :     s1  = L_add(x3, x2);
     635           0 :     s4  = L_sub(x3, x2);
     636           0 :     s3  = L_add(x1, x4);
     637           0 :     s2  = L_sub(x1, x4);
     638           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
     639           0 :     s1  = L_add(s1, s3);
     640           0 :     y01 = L_add(x0, s1);
     641           0 :     s1  = L_add(y01, L_shl_pos(Mpy_32_xx(s1, C55), 1));
     642           0 :     s3  = L_sub(s1, t);
     643           0 :     s1  = L_add(s1, t);
     644           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
     645           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
     646           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
     647             : 
     648             :     /* combination */
     649           0 :     y04 = L_add(r1, s2);
     650           0 :     y16 = L_sub(r1, s2);
     651           0 :     y08 = L_sub(r3, s4);
     652           0 :     y12 = L_add(r3, s4);
     653             : 
     654           0 :     y05 = L_sub(s1, r2);
     655           0 :     y17 = L_add(s1, r2);
     656           0 :     y09 = L_add(s3, r4);
     657           0 :     y13 = L_sub(s3, r4);
     658             : 
     659             :     /* real part */
     660           0 :     x0 = L_shr_pos(re[s * 5], SCALEFACTOR10);
     661           0 :     x1 = L_shr_pos(re[s * 1], SCALEFACTOR10);
     662           0 :     x2 = L_shr_pos(re[s * 3], SCALEFACTOR10);
     663           0 :     x3 = L_shr_pos(re[s * 7], SCALEFACTOR10);
     664           0 :     x4 = L_shr_pos(re[s * 9], SCALEFACTOR10);
     665             : 
     666           0 :     r1  = L_add(x1, x4);
     667           0 :     r4  = L_sub(x1, x4);
     668           0 :     r3  = L_add(x3, x2);
     669           0 :     r2  = L_sub(x3, x2);
     670           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
     671           0 :     r1  = L_add(r1, r3);
     672           0 :     y02 = L_add(x0, r1);
     673           0 :     r1  = L_add(y02, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
     674           0 :     r3  = L_sub(r1, t);
     675           0 :     r1  = L_add(r1, t);
     676           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
     677           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
     678           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
     679             : 
     680             :     /* imaginary part */
     681           0 :     x0 = L_shr_pos(im[s * 5], SCALEFACTOR10);
     682           0 :     x1 = L_shr_pos(im[s * 1], SCALEFACTOR10);
     683           0 :     x2 = L_shr_pos(im[s * 3], SCALEFACTOR10);
     684           0 :     x3 = L_shr_pos(im[s * 7], SCALEFACTOR10);
     685           0 :     x4 = L_shr_pos(im[s * 9], SCALEFACTOR10);
     686             : 
     687           0 :     s1  = L_add(x1, x4);
     688           0 :     s4  = L_sub(x1, x4);
     689           0 :     s3  = L_add(x3, x2);
     690           0 :     s2  = L_sub(x3, x2);
     691           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
     692           0 :     s1  = L_add(s1, s3);
     693           0 :     y03 = L_add(x0, s1);
     694           0 :     s1  = L_add(y03, L_shl_pos(Mpy_32_xx(s1, C55), 1));
     695           0 :     s3  = L_sub(s1, t);
     696           0 :     s1  = L_add(s1, t);
     697           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
     698           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
     699           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
     700             : 
     701             :     /* combination */
     702           0 :     y06 = L_add(r1, s2);
     703           0 :     y18 = L_sub(r1, s2);
     704           0 :     y10 = L_sub(r3, s4);
     705           0 :     y14 = L_add(r3, s4);
     706             : 
     707           0 :     y07 = L_sub(s1, r2);
     708           0 :     y19 = L_add(s1, r2);
     709           0 :     y11 = L_add(s3, r4);
     710           0 :     y15 = L_sub(s3, r4);
     711             : 
     712             :     /* 5 fft2 stages */
     713           0 :     re[s * 0] = L_add(y00, y02);
     714           0 :     move32();
     715           0 :     im[s * 0] = L_add(y01, y03);
     716           0 :     move32();
     717           0 :     re[s * 5] = L_sub(y00, y02);
     718           0 :     move32();
     719           0 :     im[s * 5] = L_sub(y01, y03);
     720           0 :     move32();
     721             : 
     722           0 :     re[s * 2] = L_add(y04, y06);
     723           0 :     move32();
     724           0 :     im[s * 2] = L_add(y05, y07);
     725           0 :     move32();
     726           0 :     re[s * 7] = L_sub(y04, y06);
     727           0 :     move32();
     728           0 :     im[s * 7] = L_sub(y05, y07);
     729           0 :     move32();
     730             : 
     731           0 :     re[s * 4] = L_add(y08, y10);
     732           0 :     move32();
     733           0 :     im[s * 4] = L_add(y09, y11);
     734           0 :     move32();
     735           0 :     re[s * 9] = L_sub(y08, y10);
     736           0 :     move32();
     737           0 :     im[s * 9] = L_sub(y09, y11);
     738           0 :     move32();
     739             : 
     740           0 :     re[s * 6] = L_add(y12, y14);
     741           0 :     move32();
     742           0 :     im[s * 6] = L_add(y13, y15);
     743           0 :     move32();
     744           0 :     re[s * 1] = L_sub(y12, y14);
     745           0 :     move32();
     746           0 :     im[s * 1] = L_sub(y13, y15);
     747           0 :     move32();
     748             : 
     749           0 :     re[s * 8] = L_add(y16, y18);
     750           0 :     move32();
     751           0 :     im[s * 8] = L_add(y17, y19);
     752           0 :     move32();
     753           0 :     re[s * 3] = L_sub(y16, y18);
     754           0 :     move32();
     755           0 :     im[s * 3] = L_sub(y17, y19);
     756           0 :     move32();
     757             : 
     758             :     Dyn_Mem_Deluxe_Out();
     759           0 : }
     760             : 
     761             : /**
     762             :  * \brief    Function performs a complex 15-point FFT
     763             :  *           The FFT is performed inplace. The result of the FFT
     764             :  *           is scaled by SCALEFACTOR15 bits.
     765             :  *
     766             :  *           WOPS with 32x16 bit multiplications:  354 cycles
     767             :  *
     768             :  * \param    [i/o] re    real input / output
     769             :  * \param    [i/o] im    imag input / output
     770             :  * \param    [i  ] s     stride real and imag input / output
     771             :  *
     772             :  * \return   void
     773             :  */
     774             : 
     775             : 
     776           0 : static void fft15(Word32 *re, Word32 *im, Word16 s)
     777             : {
     778             :     Dyn_Mem_Deluxe_In(Word32 t; Word32 r1, r2, r3, r4; Word32 s1, s2, s3, s4;
     779             :                       Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09;
     780             :                       Word32 x10, x11, x12, x13, x14, x15, x16, x17, x18, x19;
     781             :                       Word32 x20, x21, x22, x23, x24, x25, x26, x27, x28, x29;
     782             :                       Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
     783             :                       Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
     784             :                       Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;);
     785             : 
     786           0 :     x00 = L_shr_pos(re[s * 0], SCALEFACTOR15);
     787           0 :     x01 = L_shr_pos(im[s * 0], SCALEFACTOR15);
     788           0 :     x02 = L_shr_pos(re[s * 3], SCALEFACTOR15);
     789           0 :     x03 = L_shr_pos(im[s * 3], SCALEFACTOR15);
     790           0 :     x04 = L_shr_pos(re[s * 6], SCALEFACTOR15);
     791           0 :     x05 = L_shr_pos(im[s * 6], SCALEFACTOR15);
     792           0 :     x06 = L_shr_pos(re[s * 9], SCALEFACTOR15);
     793           0 :     x07 = L_shr_pos(im[s * 9], SCALEFACTOR15);
     794           0 :     x08 = L_shr_pos(re[s * 12], SCALEFACTOR15);
     795           0 :     x09 = L_shr_pos(im[s * 12], SCALEFACTOR15);
     796             : 
     797           0 :     x10 = L_shr_pos(re[s * 5], SCALEFACTOR15);
     798           0 :     x11 = L_shr_pos(im[s * 5], SCALEFACTOR15);
     799           0 :     x12 = L_shr_pos(re[s * 8], SCALEFACTOR15);
     800           0 :     x13 = L_shr_pos(im[s * 8], SCALEFACTOR15);
     801           0 :     x14 = L_shr_pos(re[s * 11], SCALEFACTOR15);
     802           0 :     x15 = L_shr_pos(im[s * 11], SCALEFACTOR15);
     803           0 :     x16 = L_shr_pos(re[s * 14], SCALEFACTOR15);
     804           0 :     x17 = L_shr_pos(im[s * 14], SCALEFACTOR15);
     805           0 :     x18 = L_shr_pos(re[s * 2], SCALEFACTOR15);
     806           0 :     x19 = L_shr_pos(im[s * 2], SCALEFACTOR15);
     807             : 
     808           0 :     x20 = L_shr_pos(re[s * 10], SCALEFACTOR15);
     809           0 :     x21 = L_shr_pos(im[s * 10], SCALEFACTOR15);
     810           0 :     x22 = L_shr_pos(re[s * 13], SCALEFACTOR15);
     811           0 :     x23 = L_shr_pos(im[s * 13], SCALEFACTOR15);
     812           0 :     x24 = L_shr_pos(re[s * 1], SCALEFACTOR15);
     813           0 :     x25 = L_shr_pos(im[s * 1], SCALEFACTOR15);
     814           0 :     x26 = L_shr_pos(re[s * 4], SCALEFACTOR15);
     815           0 :     x27 = L_shr_pos(im[s * 4], SCALEFACTOR15);
     816           0 :     x28 = L_shr_pos(re[s * 7], SCALEFACTOR15);
     817           0 :     x29 = L_shr_pos(im[s * 7], SCALEFACTOR15);
     818             : 
     819             :     /* 1. FFT5 stage */
     820             : 
     821             :     /* real part */
     822           0 :     r1  = L_add(x02, x08);
     823           0 :     r4  = L_sub(x02, x08);
     824           0 :     r3  = L_add(x04, x06);
     825           0 :     r2  = L_sub(x04, x06);
     826           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
     827           0 :     r1  = L_add(r1, r3);
     828           0 :     y00 = L_add(x00, r1);
     829           0 :     r1  = L_add(y00, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
     830           0 :     r3  = L_sub(r1, t);
     831           0 :     r1  = L_add(r1, t);
     832           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
     833           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
     834           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
     835             : 
     836             :     /* imaginary part */
     837           0 :     s1  = L_add(x03, x09);
     838           0 :     s4  = L_sub(x03, x09);
     839           0 :     s3  = L_add(x05, x07);
     840           0 :     s2  = L_sub(x05, x07);
     841           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
     842           0 :     s1  = L_add(s1, s3);
     843           0 :     y01 = L_add(x01, s1);
     844           0 :     s1  = L_add(y01, L_shl_pos(Mpy_32_xx(s1, C55), 1));
     845           0 :     s3  = L_sub(s1, t);
     846           0 :     s1  = L_add(s1, t);
     847           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
     848           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
     849           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
     850             : 
     851             :     /* combination */
     852           0 :     y02 = L_add(r1, s2);
     853           0 :     y08 = L_sub(r1, s2);
     854           0 :     y04 = L_sub(r3, s4);
     855           0 :     y06 = L_add(r3, s4);
     856             : 
     857           0 :     y03 = L_sub(s1, r2);
     858           0 :     y09 = L_add(s1, r2);
     859           0 :     y05 = L_add(s3, r4);
     860           0 :     y07 = L_sub(s3, r4);
     861             : 
     862             :     /* 2. FFT5 stage */
     863             : 
     864             :     /* real part */
     865           0 :     r1  = L_add(x12, x18);
     866           0 :     r4  = L_sub(x12, x18);
     867           0 :     r3  = L_add(x14, x16);
     868           0 :     r2  = L_sub(x14, x16);
     869           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
     870           0 :     r1  = L_add(r1, r3);
     871           0 :     y10 = L_add(x10, r1);
     872           0 :     r1  = L_add(y10, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
     873           0 :     r3  = L_sub(r1, t);
     874           0 :     r1  = L_add(r1, t);
     875           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
     876           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
     877           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
     878             : 
     879             :     /* imaginary part */
     880           0 :     s1  = L_add(x13, x19);
     881           0 :     s4  = L_sub(x13, x19);
     882           0 :     s3  = L_add(x15, x17);
     883           0 :     s2  = L_sub(x15, x17);
     884           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
     885           0 :     s1  = L_add(s1, s3);
     886           0 :     y11 = L_add(x11, s1);
     887           0 :     s1  = L_add(y11, L_shl_pos(Mpy_32_xx(s1, C55), 1));
     888           0 :     s3  = L_sub(s1, t);
     889           0 :     s1  = L_add(s1, t);
     890           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
     891           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
     892           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
     893             : 
     894             :     /* combination */
     895           0 :     y12 = L_add(r1, s2);
     896           0 :     y18 = L_sub(r1, s2);
     897           0 :     y14 = L_sub(r3, s4);
     898           0 :     y16 = L_add(r3, s4);
     899             : 
     900           0 :     y13 = L_sub(s1, r2);
     901           0 :     y19 = L_add(s1, r2);
     902           0 :     y15 = L_add(s3, r4);
     903           0 :     y17 = L_sub(s3, r4);
     904             : 
     905             :     /* 3. FFT5 stage */
     906             : 
     907             :     /* real part */
     908           0 :     r1  = L_add(x22, x28);
     909           0 :     r4  = L_sub(x22, x28);
     910           0 :     r3  = L_add(x24, x26);
     911           0 :     r2  = L_sub(x24, x26);
     912           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
     913           0 :     r1  = L_add(r1, r3);
     914           0 :     y20 = L_add(x20, r1);
     915           0 :     r1  = L_add(y20, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
     916           0 :     r3  = L_sub(r1, t);
     917           0 :     r1  = L_add(r1, t);
     918           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
     919           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
     920           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
     921             : 
     922             :     /* imaginary part */
     923           0 :     s1  = L_add(x23, x29);
     924           0 :     s4  = L_sub(x23, x29);
     925           0 :     s3  = L_add(x25, x27);
     926           0 :     s2  = L_sub(x25, x27);
     927           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
     928           0 :     s1  = L_add(s1, s3);
     929           0 :     y21 = L_add(x21, s1);
     930           0 :     s1  = L_add(y21, L_shl_pos(Mpy_32_xx(s1, C55), 1));
     931           0 :     s3  = L_sub(s1, t);
     932           0 :     s1  = L_add(s1, t);
     933           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
     934           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
     935           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
     936             : 
     937             :     /* combination */
     938           0 :     y22 = L_add(r1, s2);
     939           0 :     y28 = L_sub(r1, s2);
     940           0 :     y24 = L_sub(r3, s4);
     941           0 :     y26 = L_add(r3, s4);
     942             : 
     943           0 :     y23 = L_sub(s1, r2);
     944           0 :     y29 = L_add(s1, r2);
     945           0 :     y25 = L_add(s3, r4);
     946           0 :     y27 = L_sub(s3, r4);
     947             : 
     948             :     /* 1. FFT3 stage */
     949             : 
     950             :     /* real part */
     951           0 :     r1        = L_add(y10, y20);
     952           0 :     r2        = Mpy_32_xx(L_sub(y10, y20), C31);
     953           0 :     re[s * 0] = L_add(y00, r1);
     954           0 :     move32();
     955           0 :     r1 = L_sub(y00, L_shr_pos(r1, 1));
     956             : 
     957             :     /* imaginary part */
     958           0 :     s1        = L_add(y11, y21);
     959           0 :     s2        = Mpy_32_xx(L_sub(y11, y21), C31);
     960           0 :     im[s * 0] = L_add(y01, s1);
     961           0 :     move32();
     962           0 :     s1 = L_sub(y01, L_shr_pos(s1, 1));
     963             : 
     964             :     /* combination */
     965           0 :     re[s * 10] = L_sub(r1, s2);
     966           0 :     move32();
     967           0 :     re[s * 5] = L_add(r1, s2);
     968           0 :     move32();
     969           0 :     im[s * 10] = L_add(s1, r2);
     970           0 :     move32();
     971           0 :     im[s * 5] = L_sub(s1, r2);
     972           0 :     move32();
     973             : 
     974             :     /* 2. FFT3 stage */
     975             : 
     976             :     /* real part */
     977           0 :     r1        = L_add(y12, y22);
     978           0 :     r2        = Mpy_32_xx(L_sub(y12, y22), C31);
     979           0 :     re[s * 6] = L_add(y02, r1);
     980           0 :     move32();
     981           0 :     r1 = L_sub(y02, L_shr_pos(r1, 1));
     982             : 
     983             :     /* imaginary part */
     984           0 :     s1        = L_add(y13, y23);
     985           0 :     s2        = Mpy_32_xx(L_sub(y13, y23), C31);
     986           0 :     im[s * 6] = L_add(y03, s1);
     987           0 :     move32();
     988           0 :     s1 = L_sub(y03, L_shr_pos(s1, 1));
     989             : 
     990             :     /* combination */
     991           0 :     re[s * 1] = L_sub(r1, s2);
     992           0 :     move32();
     993           0 :     re[s * 11] = L_add(r1, s2);
     994           0 :     move32();
     995           0 :     im[s * 1] = L_add(s1, r2);
     996           0 :     move32();
     997           0 :     im[s * 11] = L_sub(s1, r2);
     998           0 :     move32();
     999             : 
    1000             :     /* 3. FFT3 stage */
    1001             : 
    1002             :     /* real part */
    1003           0 :     r1         = L_add(y14, y24);
    1004           0 :     r2         = Mpy_32_xx(L_sub(y14, y24), C31);
    1005           0 :     re[s * 12] = L_add(y04, r1);
    1006           0 :     move32();
    1007           0 :     r1 = L_sub(y04, L_shr_pos(r1, 1));
    1008             : 
    1009             :     /* imaginary part */
    1010           0 :     s1         = L_add(y15, y25);
    1011           0 :     s2         = Mpy_32_xx(L_sub(y15, y25), C31);
    1012           0 :     im[s * 12] = L_add(y05, s1);
    1013           0 :     move32();
    1014           0 :     s1 = L_sub(y05, L_shr_pos(s1, 1));
    1015             : 
    1016             :     /* combination */
    1017           0 :     re[s * 7] = L_sub(r1, s2);
    1018           0 :     move32();
    1019           0 :     re[s * 2] = L_add(r1, s2);
    1020           0 :     move32();
    1021           0 :     im[s * 7] = L_add(s1, r2);
    1022           0 :     move32();
    1023           0 :     im[s * 2] = L_sub(s1, r2);
    1024           0 :     move32();
    1025             : 
    1026             :     /* 4. FFT3 stage */
    1027             : 
    1028             :     /* real part */
    1029           0 :     r1        = L_add(y16, y26);
    1030           0 :     r2        = Mpy_32_xx(L_sub(y16, y26), C31);
    1031           0 :     re[s * 3] = L_add(y06, r1);
    1032           0 :     move32();
    1033           0 :     r1 = L_sub(y06, L_shr_pos(r1, 1));
    1034             : 
    1035             :     /* imaginary part */
    1036           0 :     s1        = L_add(y17, y27);
    1037           0 :     s2        = Mpy_32_xx(L_sub(y17, y27), C31);
    1038           0 :     im[s * 3] = L_add(y07, s1);
    1039           0 :     move32();
    1040           0 :     s1 = L_sub(y07, L_shr_pos(s1, 1));
    1041             : 
    1042             :     /* combination */
    1043           0 :     re[s * 13] = L_sub(r1, s2);
    1044           0 :     move32();
    1045           0 :     re[s * 8] = L_add(r1, s2);
    1046           0 :     move32();
    1047           0 :     im[s * 13] = L_add(s1, r2);
    1048           0 :     move32();
    1049           0 :     im[s * 8] = L_sub(s1, r2);
    1050           0 :     move32();
    1051             : 
    1052             :     /* 5. FFT3 stage */
    1053             : 
    1054             :     /* real part */
    1055           0 :     r1        = L_add(y18, y28);
    1056           0 :     r2        = Mpy_32_xx(L_sub(y18, y28), C31);
    1057           0 :     re[s * 9] = L_add(y08, r1);
    1058           0 :     move32();
    1059           0 :     r1 = L_sub(y08, L_shr_pos(r1, 1));
    1060             : 
    1061             :     /* imaginary part */
    1062           0 :     s1        = L_add(y19, y29);
    1063           0 :     s2        = Mpy_32_xx(L_sub(y19, y29), C31);
    1064           0 :     im[s * 9] = L_add(y09, s1);
    1065           0 :     move32();
    1066           0 :     s1 = L_sub(y09, L_shr_pos(s1, 1));
    1067             : 
    1068             :     /* combination */
    1069           0 :     re[s * 4] = L_sub(r1, s2);
    1070           0 :     move32();
    1071           0 :     re[s * 14] = L_add(r1, s2);
    1072           0 :     move32();
    1073           0 :     im[s * 4] = L_add(s1, r2);
    1074           0 :     move32();
    1075           0 :     im[s * 14] = L_sub(s1, r2);
    1076           0 :     move32();
    1077             : 
    1078             :     Dyn_Mem_Deluxe_Out();
    1079           0 : }
    1080             :     
    1081             : #define STC(x) (x)
    1082             : const Word32 RotVectorReal12[] =
    1083             : {
    1084             :   STC(0x6ed9eba1), STC(0x40000000),
    1085             :   STC(0x40000000), STC(0xc0000000),
    1086             : #ifndef FFT12_UNROLLED_ENABLE
    1087             :   STC(0x00000000), STC(0x80000000),
    1088             : #endif
    1089             : };
    1090             : 
    1091             : const Word32 RotVectorImag12[] =
    1092             : {
    1093             :   STC(0x40000000), STC(0x6ed9eba1),
    1094             :   STC(0x6ed9eba1), STC(0x6ed9eba1),
    1095             : #ifndef FFT12_UNROLLED_ENABLE
    1096             :   STC(0x7fffffff), STC(0x00000000),
    1097             : #endif
    1098             : };
    1099             :     
    1100           0 : static void fft12(Word32 *pInput)
    1101             : {
    1102             :     Dyn_Mem_Deluxe_In(Word32 aDst[24]; Word32 * pSrc, *pDst; Counter i; Word32 r1, r2, s1, s2, pD; Word32 re, im;
    1103             :                       Word32 vre, vim;);
    1104             : 
    1105           0 :     pSrc = pInput;
    1106           0 :     move16();
    1107           0 :     pDst = aDst;
    1108           0 :     move16();
    1109             : 
    1110             :     /* First 3*2 samples are shifted right by 2 before output */
    1111           0 :     r1      = L_add(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2));
    1112           0 :     r2      = Mpy_32_xx(L_sub(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2)), C31);
    1113           0 :     pD      = L_shr_pos(pSrc[0], 2);
    1114           0 :     pDst[0] = L_shr_pos(L_add(pD, r1), 1);
    1115           0 :     r1      = L_sub(pD, L_shr_pos(r1, 1));
    1116             : 
    1117             :     /* imaginary part */
    1118           0 :     s1      = L_add(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2));
    1119           0 :     s2      = Mpy_32_xx(L_sub(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2)), C31);
    1120           0 :     pD      = L_shr_pos(pSrc[1], 2);
    1121           0 :     pDst[1] = L_shr_pos(L_add(pD, s1), 1);
    1122           0 :     s1      = L_sub(pD, L_shr_pos(s1, 1));
    1123             : 
    1124           0 :     r1 = L_shr_pos(r1, 1);
    1125           0 :     r2 = L_shr_pos(r2, 1);
    1126           0 :     s1 = L_shr_pos(s1, 1);
    1127           0 :     s2 = L_shr_pos(s2, 1);
    1128             : 
    1129             :     /* combination */
    1130           0 :     pDst[2] = L_sub(r1, s2);
    1131           0 :     pDst[3] = L_add(s1, r2);
    1132           0 :     pDst[4] = L_add(r1, s2);
    1133           0 :     pDst[5] = L_sub(s1, r2);
    1134           0 :     pSrc += 2;
    1135           0 :     pDst += 6;
    1136             :     
    1137           0 :   const Word32 *pVecRe = RotVectorReal12;
    1138           0 :   const Word32 *pVecIm = RotVectorImag12;
    1139             : 
    1140             : 
    1141             : 
    1142           0 :     FOR (i = 0; i < 2; i++)
    1143             :     {
    1144             :         /* sample 0,1 are shifted right by 2 before output */
    1145             :         /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before output */
    1146             : 
    1147           0 :         r1      = L_add(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2));
    1148           0 :         r2      = Mpy_32_xx(L_sub(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2)), C31);
    1149           0 :         pD      = L_shr_pos(pSrc[0], 2);
    1150           0 :         pDst[0] = L_shr_pos(L_add(pD, r1), 1);
    1151           0 :         r1      = L_sub(pD, L_shr_pos(r1, 1));
    1152             : 
    1153             :         /* imaginary part */
    1154           0 :         s1      = L_add(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2));
    1155           0 :         s2      = Mpy_32_xx(L_sub(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2)), C31);
    1156           0 :         pD      = L_shr_pos(pSrc[1], 2);
    1157           0 :         pDst[1] = L_shr_pos(L_add(pD, s1), 1);
    1158           0 :         s1      = L_sub(pD, L_shr_pos(s1, 1));
    1159             : 
    1160           0 :         r1 = L_shr_pos(r1, 1);
    1161           0 :         r2 = L_shr_pos(r2, 1);
    1162           0 :         s1 = L_shr_pos(s1, 1);
    1163           0 :         s2 = L_shr_pos(s2, 1);
    1164             : 
    1165             :         /* combination */
    1166           0 :         re = L_sub(r1, s2);
    1167           0 :         im = L_add(s1, r2);
    1168           0 :         vre = *pVecRe++;
    1169           0 :         vim = *pVecIm++;
    1170           0 :         cplxMpy_32_32(&pDst[3], &pDst[2], im, re, vre, vim);
    1171             :         
    1172           0 :         re  = L_add(r1, s2);
    1173           0 :         im  = L_sub(s1, r2);
    1174             : 
    1175           0 :         vre = *pVecRe++;
    1176           0 :         vim = *pVecIm++;
    1177           0 :         cplxMpy_32_32(&pDst[5], &pDst[4], im, re, vre, vim);
    1178             : 
    1179           0 :         pDst += 6;
    1180           0 :         pSrc += 2;
    1181             :     }
    1182             :     /* sample 0,1 are shifted right by 2 before output */
    1183             :     /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
    1184             :     /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
    1185           0 :     r1      = L_add(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2));
    1186           0 :     r2      = Mpy_32_xx(L_sub(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2)), C31);
    1187           0 :     pD      = L_shr_pos(pSrc[0], 2);
    1188           0 :     pDst[0] = L_shr_pos(L_add(pD, r1), 1);
    1189           0 :     r1      = L_sub(pD, L_shr_pos(r1, 1));
    1190             : 
    1191             :     /* imaginary part */
    1192           0 :     s1      = L_add(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2));
    1193           0 :     s2      = Mpy_32_xx(L_sub(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2)), C31);
    1194           0 :     pD      = L_shr_pos(pSrc[1], 2);
    1195           0 :     pDst[1] = L_shr_pos(L_add(pD, s1), 1);
    1196           0 :     s1      = L_sub(pD, L_shr_pos(s1, 1));
    1197             : 
    1198           0 :     r1 = L_shr_pos(r1, 1);
    1199           0 :     r2 = L_shr_pos(r2, 1);
    1200           0 :     s1 = L_shr_pos(s1, 1);
    1201           0 :     s2 = L_shr_pos(s2, 1);
    1202             : 
    1203             :     /* combination */
    1204             : 
    1205           0 :     pDst[2] = L_add(s1, r2);
    1206           0 :     move32();
    1207           0 :     pDst[3] = L_sub(s2, r1);
    1208           0 :     move32();
    1209           0 :     pDst[4] = L_negate(L_add(r1, s2));
    1210           0 :     move32();
    1211           0 :     pDst[5] = L_sub(r2, s1);
    1212           0 :     move32();
    1213             :     /* Perform 3 times the fft of length 4. The input samples are at the address of aDst and the
    1214             :      output samples are at the address of pInput. The input vector for the fft of length 4 is built
    1215             :      of the interleaved samples in aDst, the output samples are stored consecutively at the address
    1216             :      of pInput.
    1217             :      */
    1218           0 :     move16();
    1219           0 :     move16();
    1220           0 :     pSrc = aDst;
    1221           0 :     pDst = pInput;
    1222           0 :     FOR (i = 0; i < 3; i++)
    1223             :     {
    1224             :         /* inline FFT4 merged with incoming resorting loop */
    1225           0 :         r1 = L_add(L_shr_pos(pSrc[0], 2), L_shr_pos(pSrc[12], 2)); /* Re A + Re B */
    1226           0 :         r2 = L_add(L_shr_pos(pSrc[6], 2), L_shr_pos(pSrc[18], 2)); /* Re C + Re D */
    1227           0 :         s1 = L_add(L_shr_pos(pSrc[1], 2), L_shr_pos(pSrc[13], 2)); /* Im A + Im B */
    1228           0 :         s2 = L_add(L_shr_pos(pSrc[7], 2), L_shr_pos(pSrc[19], 2)); /* Im C + Im D */
    1229             : 
    1230           0 :         pDst[0] = L_add(r1, r2); /* Re A' = Re A + Re B + Re C + Re D */
    1231           0 :         pDst[1] = L_add(s1, s2); /* Im A' = Im A + Im B + Im C + Im D */
    1232             : 
    1233           0 :         re = L_sub(r1, L_shr_pos(pSrc[12], 1)); /* Re A - Re B */
    1234           0 :         im = L_sub(s1, L_shr_pos(pSrc[13], 1)); /* Im A - Im B */
    1235             : 
    1236           0 :         pDst[12] = L_sub(r1, r2); /* Re C' = Re A + Re B - Re C - Re D */
    1237           0 :         pDst[13] = L_sub(s1, s2); /* Im C' = Im A + Im B - Im C - Im D */
    1238             : 
    1239           0 :         r2 = L_sub(r2, L_shr_pos(pSrc[18], 1)); /* Re C - Re D */
    1240           0 :         s2 = L_sub(s2, L_shr_pos(pSrc[19], 1)); /* Im C - Im D */
    1241             : 
    1242           0 :         pDst[6]  = L_add(re, s2); /* Re B' = Re A - Re B + Im C - Im D */
    1243           0 :         pDst[18] = L_sub(re, s2); /* Re D' = Re A - Re B - Im C + Im D */
    1244           0 :         pDst[7]  = L_sub(im, r2); /* Im B' = Im A - Im B - Re C + Re D */
    1245           0 :         pDst[19] = L_add(im, r2); /* Im D' = Im A - Im B + Re C - Re D */
    1246             : 
    1247           0 :         pSrc += 2;
    1248           0 :         pDst += 2;
    1249             :     }
    1250             : 
    1251             :     Dyn_Mem_Deluxe_Out();
    1252           0 : }
    1253             : 
    1254             : /**
    1255             :  * \brief    Function performs a complex 16-point FFT
    1256             :  *           The FFT is performed inplace. The result of the FFT
    1257             :  *           is scaled by SCALEFACTOR16 bits.
    1258             :  *
    1259             :  *           WOPS with 32x16 bit multiplications (scale on ):  288 cycles
    1260             :  *           WOPS with 32x16 bit multiplications (scale off):  256 cycles
    1261             :  *
    1262             :  * \param    [i/o] re    real input / output
    1263             :  * \param    [i/o] im    imag input / output
    1264             :  * \param    [i  ] s     stride real and imag input / output
    1265             :  *
    1266             :  * \return   void
    1267             :  */
    1268             : 
    1269             : 
    1270             : 
    1271           0 : static void fft16(Word32 *re, Word32 *im, Word16 s)
    1272             : {
    1273             :     Dyn_Mem_Deluxe_In(Word32 x0, x1, x2, x3, x4, x5, x6, x7; Word32 t0, t1, t2, t3, t4, t5, t6, t7;
    1274             :                       Word32 y00, y01, y02, y03, y04, y05, y06, y07; Word32 y08, y09, y10, y11, y12, y13, y14, y15;
    1275             :                       Word32 y16, y17, y18, y19, y20, y21, y22, y23; Word32 y24, y25, y26, y27, y28, y29, y30, y31;);
    1276             : 
    1277           0 :     x0 = L_shr_pos(re[s * 0], SCALEFACTOR16);
    1278           0 :     x1 = L_shr_pos(im[s * 0], SCALEFACTOR16);
    1279           0 :     x2 = L_shr_pos(re[s * 4], SCALEFACTOR16);
    1280           0 :     x3 = L_shr_pos(im[s * 4], SCALEFACTOR16);
    1281           0 :     x4 = L_shr_pos(re[s * 8], SCALEFACTOR16);
    1282           0 :     x5 = L_shr_pos(im[s * 8], SCALEFACTOR16);
    1283           0 :     x6 = L_shr_pos(re[s * 12], SCALEFACTOR16);
    1284           0 :     x7 = L_shr_pos(im[s * 12], SCALEFACTOR16);
    1285             : 
    1286             :     /* Pre-additions */
    1287           0 :     t0 = L_add(x0, x4);
    1288           0 :     t2 = L_sub(x0, x4);
    1289           0 :     t1 = L_add(x1, x5);
    1290           0 :     t3 = L_sub(x1, x5);
    1291           0 :     t4 = L_add(x2, x6);
    1292           0 :     t7 = L_sub(x2, x6);
    1293           0 :     t5 = L_add(x7, x3);
    1294           0 :     t6 = L_sub(x7, x3);
    1295             : 
    1296             :     /* Post-additions */
    1297           0 :     y00 = L_add(t0, t4);
    1298           0 :     y01 = L_add(t1, t5);
    1299           0 :     y02 = L_sub(t2, t6);
    1300           0 :     y03 = L_sub(t3, t7);
    1301           0 :     y04 = L_sub(t0, t4);
    1302           0 :     y05 = L_sub(t1, t5);
    1303           0 :     y06 = L_add(t2, t6);
    1304           0 :     y07 = L_add(t3, t7);
    1305             : 
    1306           0 :     x0 = L_shr_pos(re[s * 1], SCALEFACTOR16);
    1307           0 :     x1 = L_shr_pos(im[s * 1], SCALEFACTOR16);
    1308           0 :     x2 = L_shr_pos(re[s * 5], SCALEFACTOR16);
    1309           0 :     x3 = L_shr_pos(im[s * 5], SCALEFACTOR16);
    1310           0 :     x4 = L_shr_pos(re[s * 9], SCALEFACTOR16);
    1311           0 :     x5 = L_shr_pos(im[s * 9], SCALEFACTOR16);
    1312           0 :     x6 = L_shr_pos(re[s * 13], SCALEFACTOR16);
    1313           0 :     x7 = L_shr_pos(im[s * 13], SCALEFACTOR16);
    1314             : 
    1315             :     /* Pre-additions */
    1316           0 :     t0 = L_add(x0, x4);
    1317           0 :     t2 = L_sub(x0, x4);
    1318           0 :     t1 = L_add(x1, x5);
    1319           0 :     t3 = L_sub(x1, x5);
    1320           0 :     t4 = L_add(x2, x6);
    1321           0 :     t7 = L_sub(x2, x6);
    1322           0 :     t5 = L_add(x7, x3);
    1323           0 :     t6 = L_sub(x7, x3);
    1324             : 
    1325             :     /* Post-additions */
    1326           0 :     y08 = L_add(t0, t4);
    1327           0 :     y09 = L_add(t1, t5);
    1328           0 :     y10 = L_sub(t2, t6);
    1329           0 :     y11 = L_sub(t3, t7);
    1330           0 :     y12 = L_sub(t0, t4);
    1331           0 :     y13 = L_sub(t1, t5);
    1332           0 :     y14 = L_add(t2, t6);
    1333           0 :     y15 = L_add(t3, t7);
    1334             : 
    1335           0 :     x0 = L_shr_pos(re[s * 2], SCALEFACTOR16);
    1336           0 :     x1 = L_shr_pos(im[s * 2], SCALEFACTOR16);
    1337           0 :     x2 = L_shr_pos(re[s * 6], SCALEFACTOR16);
    1338           0 :     x3 = L_shr_pos(im[s * 6], SCALEFACTOR16);
    1339           0 :     x4 = L_shr_pos(re[s * 10], SCALEFACTOR16);
    1340           0 :     x5 = L_shr_pos(im[s * 10], SCALEFACTOR16);
    1341           0 :     x6 = L_shr_pos(re[s * 14], SCALEFACTOR16);
    1342           0 :     x7 = L_shr_pos(im[s * 14], SCALEFACTOR16);
    1343             : 
    1344             :     /* Pre-additions */
    1345           0 :     t0 = L_add(x0, x4);
    1346           0 :     t2 = L_sub(x0, x4);
    1347           0 :     t1 = L_add(x1, x5);
    1348           0 :     t3 = L_sub(x1, x5);
    1349           0 :     t4 = L_add(x2, x6);
    1350           0 :     t7 = L_sub(x2, x6);
    1351           0 :     t5 = L_add(x7, x3);
    1352           0 :     t6 = L_sub(x7, x3);
    1353             : 
    1354             :     /* Post-additions */
    1355           0 :     y16 = L_add(t0, t4);
    1356           0 :     y17 = L_add(t1, t5);
    1357           0 :     y18 = L_sub(t2, t6);
    1358           0 :     y19 = L_sub(t3, t7);
    1359           0 :     y20 = L_sub(t1, t5);
    1360           0 :     y21 = L_sub(t4, t0);
    1361           0 :     y22 = L_add(t2, t6);
    1362           0 :     y23 = L_add(t3, t7);
    1363             : 
    1364           0 :     x0 = L_shr_pos(re[s * 3], SCALEFACTOR16);
    1365           0 :     x1 = L_shr_pos(im[s * 3], SCALEFACTOR16);
    1366           0 :     x2 = L_shr_pos(re[s * 7], SCALEFACTOR16);
    1367           0 :     x3 = L_shr_pos(im[s * 7], SCALEFACTOR16);
    1368           0 :     x4 = L_shr_pos(re[s * 11], SCALEFACTOR16);
    1369           0 :     x5 = L_shr_pos(im[s * 11], SCALEFACTOR16);
    1370           0 :     x6 = L_shr_pos(re[s * 15], SCALEFACTOR16);
    1371           0 :     x7 = L_shr_pos(im[s * 15], SCALEFACTOR16);
    1372             : 
    1373             :     /* Pre-additions */
    1374           0 :     t0 = L_add(x0, x4);
    1375           0 :     t2 = L_sub(x0, x4);
    1376           0 :     t1 = L_add(x1, x5);
    1377           0 :     t3 = L_sub(x1, x5);
    1378           0 :     t4 = L_add(x2, x6);
    1379           0 :     t7 = L_sub(x2, x6);
    1380           0 :     t5 = L_add(x7, x3);
    1381           0 :     t6 = L_sub(x7, x3);
    1382             : 
    1383             :     /* Post-additions */
    1384           0 :     y24 = L_add(t0, t4);
    1385           0 :     y25 = L_add(t1, t5);
    1386           0 :     y26 = L_sub(t2, t6);
    1387           0 :     y27 = L_sub(t3, t7);
    1388           0 :     y28 = L_sub(t0, t4);
    1389           0 :     y29 = L_sub(t1, t5);
    1390           0 :     y30 = L_add(t2, t6);
    1391           0 :     y31 = L_add(t3, t7);
    1392             : 
    1393             :     /* rotation */
    1394             : 
    1395           0 :     x0  = Mpy_32_xx(y22, C162);
    1396           0 :     x1  = Mpy_32_xx(y23, C162);
    1397           0 :     y22 = L_sub(x0, x1);
    1398           0 :     y23 = L_add(x0, x1);
    1399             : 
    1400           0 :     x0  = Mpy_32_xx(y28, C162);
    1401           0 :     x1  = Mpy_32_xx(y29, C162);
    1402           0 :     y28 = L_sub(x0, x1);
    1403           0 :     y29 = L_add(x0, x1);
    1404             : 
    1405           0 :     x0  = Mpy_32_xx(y12, C161);
    1406           0 :     x1  = Mpy_32_xx(y13, C161);
    1407           0 :     y12 = L_add(x0, x1);
    1408           0 :     y13 = L_sub(x1, x0);
    1409             : 
    1410           0 :     x0  = Mpy_32_xx(y18, C161);
    1411           0 :     x1  = Mpy_32_xx(y19, C161);
    1412           0 :     y18 = L_add(x0, x1);
    1413           0 :     y19 = L_sub(x1, x0);
    1414             : 
    1415           0 :     x0  = Mpy_32_xx(y10, C163);
    1416           0 :     x1  = Mpy_32_xx(y11, C166);
    1417           0 :     x2  = Mpy_32_xx(y10, C166);
    1418           0 :     x3  = Mpy_32_xx(y11, C163);
    1419           0 :     y10 = L_sub(x0, x1);
    1420           0 :     y11 = L_add(x2, x3);
    1421             : 
    1422           0 :     x0  = Mpy_32_xx(y14, C165);
    1423           0 :     x1  = Mpy_32_xx(y15, C164);
    1424           0 :     x2  = Mpy_32_xx(y14, C164);
    1425           0 :     x3  = Mpy_32_xx(y15, C165);
    1426           0 :     y14 = L_sub(x0, x1);
    1427           0 :     y15 = L_add(x2, x3);
    1428             : 
    1429           0 :     x0  = Mpy_32_xx(y26, C165);
    1430           0 :     x1  = Mpy_32_xx(y27, C164);
    1431           0 :     x2  = Mpy_32_xx(y26, C164);
    1432           0 :     x3  = Mpy_32_xx(y27, C165);
    1433           0 :     y26 = L_sub(x0, x1);
    1434           0 :     y27 = L_add(x2, x3);
    1435             : 
    1436           0 :     x0  = Mpy_32_xx(y30, C164);
    1437           0 :     x1  = Mpy_32_xx(y31, C165);
    1438           0 :     x2  = Mpy_32_xx(y30, C165);
    1439           0 :     x3  = Mpy_32_xx(y31, C164);
    1440           0 :     y30 = L_sub(x0, x1);
    1441           0 :     y31 = L_add(x2, x3);
    1442             : 
    1443             :     /* Pre-additions */
    1444             : 
    1445           0 :     t0 = L_add(y00, y16);
    1446           0 :     t2 = L_sub(y00, y16);
    1447           0 :     t1 = L_add(y01, y17);
    1448           0 :     t3 = L_sub(y01, y17);
    1449           0 :     t4 = L_add(y08, y24);
    1450           0 :     t7 = L_sub(y08, y24);
    1451           0 :     t5 = L_add(y25, y09);
    1452           0 :     t6 = L_sub(y25, y09);
    1453             : 
    1454             :     /* Post-additions */
    1455             : 
    1456           0 :     re[s * 0] = L_add(t0, t4);
    1457           0 :     move32();
    1458           0 :     im[s * 0] = L_add(t1, t5);
    1459           0 :     move32();
    1460           0 :     re[s * 4] = L_sub(t2, t6);
    1461           0 :     move32();
    1462           0 :     im[s * 4] = L_sub(t3, t7);
    1463           0 :     move32();
    1464           0 :     re[s * 8] = L_sub(t0, t4);
    1465           0 :     move32();
    1466           0 :     im[s * 8] = L_sub(t1, t5);
    1467           0 :     move32();
    1468           0 :     re[s * 12] = L_add(t2, t6);
    1469           0 :     move32();
    1470           0 :     im[s * 12] = L_add(t3, t7);
    1471           0 :     move32();
    1472             : 
    1473             :     /* Pre-additions */
    1474             : 
    1475           0 :     t0 = L_add(y02, y18);
    1476           0 :     t2 = L_sub(y02, y18);
    1477           0 :     t1 = L_add(y03, y19);
    1478           0 :     t3 = L_sub(y03, y19);
    1479           0 :     t4 = L_add(y10, y26);
    1480           0 :     t7 = L_sub(y10, y26);
    1481           0 :     t5 = L_add(y27, y11);
    1482           0 :     t6 = L_sub(y27, y11);
    1483             : 
    1484             :     /* Post-additions */
    1485             : 
    1486           0 :     re[s * 1] = L_add(t0, t4);
    1487           0 :     move32();
    1488           0 :     im[s * 1] = L_add(t1, t5);
    1489           0 :     move32();
    1490           0 :     re[s * 5] = L_sub(t2, t6);
    1491           0 :     move32();
    1492           0 :     im[s * 5] = L_sub(t3, t7);
    1493           0 :     move32();
    1494           0 :     re[s * 9] = L_sub(t0, t4);
    1495           0 :     move32();
    1496           0 :     im[s * 9] = L_sub(t1, t5);
    1497           0 :     move32();
    1498           0 :     re[s * 13] = L_add(t2, t6);
    1499           0 :     move32();
    1500           0 :     im[s * 13] = L_add(t3, t7);
    1501           0 :     move32();
    1502             : 
    1503             :     /* Pre-additions */
    1504             : 
    1505           0 :     t0 = L_add(y04, y20);
    1506           0 :     t2 = L_sub(y04, y20);
    1507           0 :     t1 = L_add(y05, y21);
    1508           0 :     t3 = L_sub(y05, y21);
    1509           0 :     t4 = L_add(y12, y28);
    1510           0 :     t7 = L_sub(y12, y28);
    1511           0 :     t5 = L_add(y29, y13);
    1512           0 :     t6 = L_sub(y29, y13);
    1513             : 
    1514             :     /* Post-additions */
    1515             : 
    1516           0 :     re[s * 2] = L_add(t0, t4);
    1517           0 :     move32();
    1518           0 :     im[s * 2] = L_add(t1, t5);
    1519           0 :     move32();
    1520           0 :     re[s * 6] = L_sub(t2, t6);
    1521           0 :     move32();
    1522           0 :     im[s * 6] = L_sub(t3, t7);
    1523           0 :     move32();
    1524           0 :     re[s * 10] = L_sub(t0, t4);
    1525           0 :     move32();
    1526           0 :     im[s * 10] = L_sub(t1, t5);
    1527           0 :     move32();
    1528           0 :     re[s * 14] = L_add(t2, t6);
    1529           0 :     move32();
    1530           0 :     im[s * 14] = L_add(t3, t7);
    1531           0 :     move32();
    1532             : 
    1533             :     /* Pre-additions */
    1534             : 
    1535           0 :     t0 = L_add(y06, y22);
    1536           0 :     t2 = L_sub(y06, y22);
    1537           0 :     t1 = L_add(y07, y23);
    1538           0 :     t3 = L_sub(y07, y23);
    1539           0 :     t4 = L_add(y14, y30);
    1540           0 :     t7 = L_sub(y14, y30);
    1541           0 :     t5 = L_add(y31, y15);
    1542           0 :     t6 = L_sub(y31, y15);
    1543             : 
    1544             :     /* Post-additions */
    1545             : 
    1546           0 :     re[s * 3] = L_add(t0, t4);
    1547           0 :     move32();
    1548           0 :     im[s * 3] = L_add(t1, t5);
    1549           0 :     move32();
    1550           0 :     re[s * 7] = L_sub(t2, t6);
    1551           0 :     move32();
    1552           0 :     im[s * 7] = L_sub(t3, t7);
    1553           0 :     move32();
    1554           0 :     re[s * 11] = L_sub(t0, t4);
    1555           0 :     move32();
    1556           0 :     im[s * 11] = L_sub(t1, t5);
    1557           0 :     move32();
    1558           0 :     re[s * 15] = L_add(t2, t6);
    1559           0 :     move32();
    1560           0 :     im[s * 15] = L_add(t3, t7);
    1561           0 :     move32();
    1562             : 
    1563             :     Dyn_Mem_Deluxe_Out();
    1564           0 : }
    1565             : 
    1566             : /**
    1567             :  * \brief    Function performs a complex 20-point FFT
    1568             :  *           The FFT is performed inplace. The result of the FFT
    1569             :  *           is scaled by SCALEFACTOR20 bits.
    1570             :  *
    1571             :  *           WOPS with 32x16 bit multiplications:  432 cycles
    1572             :  *
    1573             :  * \param    [i/o] re    real input / output
    1574             :  * \param    [i/o] im    imag input / output
    1575             :  * \param    [i  ] s     stride real and imag input / output
    1576             :  *
    1577             :  * \return   void
    1578             :  */
    1579             : 
    1580             : 
    1581           0 : static void fft20(Word32 *re, Word32 *im, Word16 s)
    1582             : {
    1583             :     Dyn_Mem_Deluxe_In(Word32 r1, r2, r3, r4; Word32 s1, s2, s3, s4; Word32 x0, x1, x2, x3, x4;
    1584             :                       Word32 t, t0, t1, t2, t3, t4, t5, t6, t7; Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
    1585             :                       Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
    1586             :                       Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
    1587             :                       Word32 y30, y31, y32, y33, y34, y35, y36, y37, y38, y39;);
    1588             : 
    1589             :     /* 1. FFT5 stage */
    1590             : 
    1591             :     /* real part */
    1592           0 :     x0 = L_shr_pos(re[s * 0], SCALEFACTOR20);
    1593           0 :     x1 = L_shr_pos(re[s * 16], SCALEFACTOR20);
    1594           0 :     x2 = L_shr_pos(re[s * 12], SCALEFACTOR20);
    1595           0 :     x3 = L_shr_pos(re[s * 8], SCALEFACTOR20);
    1596           0 :     x4 = L_shr_pos(re[s * 4], SCALEFACTOR20);
    1597             : 
    1598           0 :     r1  = L_add(x1, x4);
    1599           0 :     r4  = L_sub(x1, x4);
    1600           0 :     r3  = L_add(x2, x3);
    1601           0 :     r2  = L_sub(x2, x3);
    1602           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
    1603           0 :     r1  = L_add(r1, r3);
    1604           0 :     y00 = L_add(x0, r1);
    1605           0 :     r1  = L_add(y00, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
    1606           0 :     r3  = L_sub(r1, t);
    1607           0 :     r1  = L_add(r1, t);
    1608           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
    1609           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
    1610           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
    1611             : 
    1612             :     /* imaginary part */
    1613           0 :     x0 = L_shr_pos(im[s * 0], SCALEFACTOR20);
    1614           0 :     x1 = L_shr_pos(im[s * 16], SCALEFACTOR20);
    1615           0 :     x2 = L_shr_pos(im[s * 12], SCALEFACTOR20);
    1616           0 :     x3 = L_shr_pos(im[s * 8], SCALEFACTOR20);
    1617           0 :     x4 = L_shr_pos(im[s * 4], SCALEFACTOR20);
    1618             : 
    1619           0 :     s1  = L_add(x1, x4);
    1620           0 :     s4  = L_sub(x1, x4);
    1621           0 :     s3  = L_add(x2, x3);
    1622           0 :     s2  = L_sub(x2, x3);
    1623           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
    1624           0 :     s1  = L_add(s1, s3);
    1625           0 :     y01 = L_add(x0, s1);
    1626           0 :     s1  = L_add(y01, L_shl_pos(Mpy_32_xx(s1, C55), 1));
    1627           0 :     s3  = L_sub(s1, t);
    1628           0 :     s1  = L_add(s1, t);
    1629           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
    1630           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
    1631           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
    1632             : 
    1633             :     /* combination */
    1634           0 :     y08 = L_add(r1, s2);
    1635           0 :     y32 = L_sub(r1, s2);
    1636           0 :     y16 = L_sub(r3, s4);
    1637           0 :     y24 = L_add(r3, s4);
    1638             : 
    1639           0 :     y09 = L_sub(s1, r2);
    1640           0 :     y33 = L_add(s1, r2);
    1641           0 :     y17 = L_add(s3, r4);
    1642           0 :     y25 = L_sub(s3, r4);
    1643             : 
    1644             :     /* 2. FFT5 stage */
    1645             : 
    1646             :     /* real part */
    1647           0 :     x0 = L_shr_pos(re[s * 5], SCALEFACTOR20);
    1648           0 :     x1 = L_shr_pos(re[s * 1], SCALEFACTOR20);
    1649           0 :     x2 = L_shr_pos(re[s * 17], SCALEFACTOR20);
    1650           0 :     x3 = L_shr_pos(re[s * 13], SCALEFACTOR20);
    1651           0 :     x4 = L_shr_pos(re[s * 9], SCALEFACTOR20);
    1652             : 
    1653           0 :     r1  = L_add(x1, x4);
    1654           0 :     r4  = L_sub(x1, x4);
    1655           0 :     r3  = L_add(x2, x3);
    1656           0 :     r2  = L_sub(x2, x3);
    1657           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
    1658           0 :     r1  = L_add(r1, r3);
    1659           0 :     y02 = L_add(x0, r1);
    1660           0 :     r1  = L_add(y02, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
    1661           0 :     r3  = L_sub(r1, t);
    1662           0 :     r1  = L_add(r1, t);
    1663           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
    1664           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
    1665           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
    1666             : 
    1667             :     /* imaginary part */
    1668           0 :     x0 = L_shr_pos(im[s * 5], SCALEFACTOR20);
    1669           0 :     x1 = L_shr_pos(im[s * 1], SCALEFACTOR20);
    1670           0 :     x2 = L_shr_pos(im[s * 17], SCALEFACTOR20);
    1671           0 :     x3 = L_shr_pos(im[s * 13], SCALEFACTOR20);
    1672           0 :     x4 = L_shr_pos(im[s * 9], SCALEFACTOR20);
    1673             : 
    1674           0 :     s1  = L_add(x1, x4);
    1675           0 :     s4  = L_sub(x1, x4);
    1676           0 :     s3  = L_add(x2, x3);
    1677           0 :     s2  = L_sub(x2, x3);
    1678           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
    1679           0 :     s1  = L_add(s1, s3);
    1680           0 :     y03 = L_add(x0, s1);
    1681           0 :     s1  = L_add(y03, L_shl_pos(Mpy_32_xx(s1, C55), 1));
    1682           0 :     s3  = L_sub(s1, t);
    1683           0 :     s1  = L_add(s1, t);
    1684           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
    1685           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
    1686           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
    1687             : 
    1688             :     /* combination */
    1689           0 :     y10 = L_add(r1, s2);
    1690           0 :     y34 = L_sub(r1, s2);
    1691           0 :     y18 = L_sub(r3, s4);
    1692           0 :     y26 = L_add(r3, s4);
    1693             : 
    1694           0 :     y11 = L_sub(s1, r2);
    1695           0 :     y35 = L_add(s1, r2);
    1696           0 :     y19 = L_add(s3, r4);
    1697           0 :     y27 = L_sub(s3, r4);
    1698             : 
    1699             :     /* 3. FFT5 stage */
    1700             : 
    1701             :     /* real part */
    1702           0 :     x0 = L_shr_pos(re[s * 10], SCALEFACTOR20);
    1703           0 :     x1 = L_shr_pos(re[s * 6], SCALEFACTOR20);
    1704           0 :     x2 = L_shr_pos(re[s * 2], SCALEFACTOR20);
    1705           0 :     x3 = L_shr_pos(re[s * 18], SCALEFACTOR20);
    1706           0 :     x4 = L_shr_pos(re[s * 14], SCALEFACTOR20);
    1707             : 
    1708           0 :     r1  = L_add(x1, x4);
    1709           0 :     r4  = L_sub(x1, x4);
    1710           0 :     r3  = L_add(x2, x3);
    1711           0 :     r2  = L_sub(x2, x3);
    1712           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
    1713           0 :     r1  = L_add(r1, r3);
    1714           0 :     y04 = L_add(x0, r1);
    1715           0 :     r1  = L_add(y04, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
    1716           0 :     r3  = L_sub(r1, t);
    1717           0 :     r1  = L_add(r1, t);
    1718           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
    1719           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
    1720           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
    1721             : 
    1722             :     /* imaginary part */
    1723           0 :     x0 = L_shr_pos(im[s * 10], SCALEFACTOR20);
    1724           0 :     x1 = L_shr_pos(im[s * 6], SCALEFACTOR20);
    1725           0 :     x2 = L_shr_pos(im[s * 2], SCALEFACTOR20);
    1726           0 :     x3 = L_shr_pos(im[s * 18], SCALEFACTOR20);
    1727           0 :     x4 = L_shr_pos(im[s * 14], SCALEFACTOR20);
    1728             : 
    1729           0 :     s1  = L_add(x1, x4);
    1730           0 :     s4  = L_sub(x1, x4);
    1731           0 :     s3  = L_add(x2, x3);
    1732           0 :     s2  = L_sub(x2, x3);
    1733           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
    1734           0 :     s1  = L_add(s1, s3);
    1735           0 :     y05 = L_add(x0, s1);
    1736           0 :     s1  = L_add(y05, L_shl_pos(Mpy_32_xx(s1, C55), 1));
    1737           0 :     s3  = L_sub(s1, t);
    1738           0 :     s1  = L_add(s1, t);
    1739           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
    1740           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
    1741           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
    1742             : 
    1743             :     /* combination */
    1744           0 :     y12 = L_add(r1, s2);
    1745           0 :     y36 = L_sub(r1, s2);
    1746           0 :     y20 = L_sub(r3, s4);
    1747           0 :     y28 = L_add(r3, s4);
    1748             : 
    1749           0 :     y13 = L_sub(s1, r2);
    1750           0 :     y37 = L_add(s1, r2);
    1751           0 :     y21 = L_add(s3, r4);
    1752           0 :     y29 = L_sub(s3, r4);
    1753             : 
    1754             :     /* 4. FFT5 stage */
    1755             : 
    1756             :     /* real part */
    1757           0 :     x0 = L_shr_pos(re[s * 15], SCALEFACTOR20);
    1758           0 :     x1 = L_shr_pos(re[s * 11], SCALEFACTOR20);
    1759           0 :     x2 = L_shr_pos(re[s * 7], SCALEFACTOR20);
    1760           0 :     x3 = L_shr_pos(re[s * 3], SCALEFACTOR20);
    1761           0 :     x4 = L_shr_pos(re[s * 19], SCALEFACTOR20);
    1762             : 
    1763           0 :     r1  = L_add(x1, x4);
    1764           0 :     r4  = L_sub(x1, x4);
    1765           0 :     r3  = L_add(x2, x3);
    1766           0 :     r2  = L_sub(x2, x3);
    1767           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
    1768           0 :     r1  = L_add(r1, r3);
    1769           0 :     y06 = L_add(x0, r1);
    1770           0 :     r1  = L_add(y06, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
    1771           0 :     r3  = L_sub(r1, t);
    1772           0 :     r1  = L_add(r1, t);
    1773           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
    1774           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
    1775           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
    1776             : 
    1777             :     /* imaginary part */
    1778           0 :     x0 = L_shr_pos(im[s * 15], SCALEFACTOR20);
    1779           0 :     x1 = L_shr_pos(im[s * 11], SCALEFACTOR20);
    1780           0 :     x2 = L_shr_pos(im[s * 7], SCALEFACTOR20);
    1781           0 :     x3 = L_shr_pos(im[s * 3], SCALEFACTOR20);
    1782           0 :     x4 = L_shr_pos(im[s * 19], SCALEFACTOR20);
    1783             : 
    1784           0 :     s1  = L_add(x1, x4);
    1785           0 :     s4  = L_sub(x1, x4);
    1786           0 :     s3  = L_add(x2, x3);
    1787           0 :     s2  = L_sub(x2, x3);
    1788           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
    1789           0 :     s1  = L_add(s1, s3);
    1790           0 :     y07 = L_add(x0, s1);
    1791           0 :     s1  = L_add(y07, L_shl_pos(Mpy_32_xx(s1, C55), 1));
    1792           0 :     s3  = L_sub(s1, t);
    1793           0 :     s1  = L_add(s1, t);
    1794           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
    1795           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
    1796           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
    1797             : 
    1798             :     /* combination */
    1799           0 :     y14 = L_add(r1, s2);
    1800           0 :     y38 = L_sub(r1, s2);
    1801           0 :     y22 = L_sub(r3, s4);
    1802           0 :     y30 = L_add(r3, s4);
    1803             : 
    1804           0 :     y15 = L_sub(s1, r2);
    1805           0 :     y39 = L_add(s1, r2);
    1806           0 :     y23 = L_add(s3, r4);
    1807           0 :     y31 = L_sub(s3, r4);
    1808             : 
    1809             :     /* 1. FFT4 stage */
    1810             : 
    1811             :     /* Pre-additions */
    1812           0 :     t0 = L_add(y00, y04);
    1813           0 :     t2 = L_sub(y00, y04);
    1814           0 :     t1 = L_add(y01, y05);
    1815           0 :     t3 = L_sub(y01, y05);
    1816           0 :     t4 = L_add(y02, y06);
    1817           0 :     t7 = L_sub(y02, y06);
    1818           0 :     t5 = L_add(y07, y03);
    1819           0 :     t6 = L_sub(y07, y03);
    1820             : 
    1821             :     /* Post-additions */
    1822           0 :     re[s * 0] = L_add(t0, t4);
    1823           0 :     move32();
    1824           0 :     im[s * 0] = L_add(t1, t5);
    1825           0 :     move32();
    1826           0 :     re[s * 5] = L_sub(t2, t6);
    1827           0 :     move32();
    1828           0 :     im[s * 5] = L_sub(t3, t7);
    1829           0 :     move32();
    1830           0 :     re[s * 10] = L_sub(t0, t4);
    1831           0 :     move32();
    1832           0 :     im[s * 10] = L_sub(t1, t5);
    1833           0 :     move32();
    1834           0 :     re[s * 15] = L_add(t2, t6);
    1835           0 :     move32();
    1836           0 :     im[s * 15] = L_add(t3, t7);
    1837           0 :     move32();
    1838             : 
    1839             :     /* 2. FFT4 stage */
    1840             : 
    1841             :     /* Pre-additions */
    1842           0 :     t0 = L_add(y08, y12);
    1843           0 :     t2 = L_sub(y08, y12);
    1844           0 :     t1 = L_add(y09, y13);
    1845           0 :     t3 = L_sub(y09, y13);
    1846           0 :     t4 = L_add(y10, y14);
    1847           0 :     t7 = L_sub(y10, y14);
    1848           0 :     t5 = L_add(y15, y11);
    1849           0 :     t6 = L_sub(y15, y11);
    1850             : 
    1851             :     /* Post-additions */
    1852           0 :     re[s * 4] = L_add(t0, t4);
    1853           0 :     move32();
    1854           0 :     im[s * 4] = L_add(t1, t5);
    1855           0 :     move32();
    1856           0 :     re[s * 9] = L_sub(t2, t6);
    1857           0 :     move32();
    1858           0 :     im[s * 9] = L_sub(t3, t7);
    1859           0 :     move32();
    1860           0 :     re[s * 14] = L_sub(t0, t4);
    1861           0 :     move32();
    1862           0 :     im[s * 14] = L_sub(t1, t5);
    1863           0 :     move32();
    1864           0 :     re[s * 19] = L_add(t2, t6);
    1865           0 :     move32();
    1866           0 :     im[s * 19] = L_add(t3, t7);
    1867           0 :     move32();
    1868             : 
    1869             :     /* 3. FFT4 stage */
    1870             : 
    1871             :     /* Pre-additions */
    1872           0 :     t0 = L_add(y16, y20);
    1873           0 :     t2 = L_sub(y16, y20);
    1874           0 :     t1 = L_add(y17, y21);
    1875           0 :     t3 = L_sub(y17, y21);
    1876           0 :     t4 = L_add(y18, y22);
    1877           0 :     t7 = L_sub(y18, y22);
    1878           0 :     t5 = L_add(y23, y19);
    1879           0 :     t6 = L_sub(y23, y19);
    1880             : 
    1881             :     /* Post-additions */
    1882           0 :     re[s * 8] = L_add(t0, t4);
    1883           0 :     move32();
    1884           0 :     im[s * 8] = L_add(t1, t5);
    1885           0 :     move32();
    1886           0 :     re[s * 13] = L_sub(t2, t6);
    1887           0 :     move32();
    1888           0 :     im[s * 13] = L_sub(t3, t7);
    1889           0 :     move32();
    1890           0 :     re[s * 18] = L_sub(t0, t4);
    1891           0 :     move32();
    1892           0 :     im[s * 18] = L_sub(t1, t5);
    1893           0 :     move32();
    1894           0 :     re[s * 3] = L_add(t2, t6);
    1895           0 :     move32();
    1896           0 :     im[s * 3] = L_add(t3, t7);
    1897           0 :     move32();
    1898             : 
    1899             :     /* 4. FFT4 stage */
    1900             : 
    1901             :     /* Pre-additions */
    1902           0 :     t0 = L_add(y24, y28);
    1903           0 :     t2 = L_sub(y24, y28);
    1904           0 :     t1 = L_add(y25, y29);
    1905           0 :     t3 = L_sub(y25, y29);
    1906           0 :     t4 = L_add(y26, y30);
    1907           0 :     t7 = L_sub(y26, y30);
    1908           0 :     t5 = L_add(y31, y27);
    1909           0 :     t6 = L_sub(y31, y27);
    1910             : 
    1911             :     /* Post-additions */
    1912           0 :     re[s * 12] = L_add(t0, t4);
    1913           0 :     move32();
    1914           0 :     im[s * 12] = L_add(t1, t5);
    1915           0 :     move32();
    1916           0 :     re[s * 17] = L_sub(t2, t6);
    1917           0 :     move32();
    1918           0 :     im[s * 17] = L_sub(t3, t7);
    1919           0 :     move32();
    1920           0 :     re[s * 2] = L_sub(t0, t4);
    1921           0 :     move32();
    1922           0 :     im[s * 2] = L_sub(t1, t5);
    1923           0 :     move32();
    1924           0 :     re[s * 7] = L_add(t2, t6);
    1925           0 :     move32();
    1926           0 :     im[s * 7] = L_add(t3, t7);
    1927           0 :     move32();
    1928             : 
    1929             :     /* 5. FFT4 stage */
    1930             : 
    1931             :     /* Pre-additions */
    1932           0 :     t0 = L_add(y32, y36);
    1933           0 :     t2 = L_sub(y32, y36);
    1934           0 :     t1 = L_add(y33, y37);
    1935           0 :     t3 = L_sub(y33, y37);
    1936           0 :     t4 = L_add(y34, y38);
    1937           0 :     t7 = L_sub(y34, y38);
    1938           0 :     t5 = L_add(y39, y35);
    1939           0 :     t6 = L_sub(y39, y35);
    1940             : 
    1941             :     /* Post-additions */
    1942           0 :     re[s * 16] = L_add(t0, t4);
    1943           0 :     move32();
    1944           0 :     im[s * 16] = L_add(t1, t5);
    1945           0 :     move32();
    1946           0 :     re[s * 1] = L_sub(t2, t6);
    1947           0 :     move32();
    1948           0 :     im[s * 1] = L_sub(t3, t7);
    1949           0 :     move32();
    1950           0 :     re[s * 6] = L_sub(t0, t4);
    1951           0 :     move32();
    1952           0 :     im[s * 6] = L_sub(t1, t5);
    1953           0 :     move32();
    1954           0 :     re[s * 11] = L_add(t2, t6);
    1955           0 :     move32();
    1956           0 :     im[s * 11] = L_add(t3, t7);
    1957           0 :     move32();
    1958             : 
    1959             :     Dyn_Mem_Deluxe_Out();
    1960           0 : }
    1961             : 
    1962             : /**
    1963             :  * \brief    Function performs a complex 30-point FFT
    1964             :  *           The FFT is performed inplace. The result of the FFT
    1965             :  *           is scaled by SCALEFACTOR30 bits.
    1966             :  *
    1967             :  *           WOPS with 32x16 bit multiplications:  828 cycles
    1968             :  *
    1969             :  * \param    [i/o] re    real input / output
    1970             :  * \param    [i/o] im    imag input / output
    1971             :  * \param    [i  ] s     stride real and imag input / output
    1972             :  *
    1973             :  * \return   void
    1974             :  */
    1975             : 
    1976             : 
    1977           0 : static void fft30(Word32 *re, Word32 *im, Word16 s)
    1978             : {
    1979             :     Dyn_Mem_Deluxe_In(Word32 t; Word32 r1, r2, r3, r4; Word32 s1, s2, s3, s4;
    1980             :                       Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09;
    1981             :                       Word32 x10, x11, x12, x13, x14, x15, x16, x17, x18, x19;
    1982             :                       Word32 x20, x21, x22, x23, x24, x25, x26, x27, x28, x29;
    1983             : 
    1984             :                       Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
    1985             :                       Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
    1986             :                       Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
    1987             : 
    1988             :                       Word32 z00, z01, z02, z03, z04, z05, z06, z07, z08, z09;
    1989             :                       Word32 z10, z11, z12, z13, z14, z15, z16, z17, z18, z19;
    1990             :                       Word32 z20, z21, z22, z23, z24, z25, z26, z27, z28, z29;
    1991             :                       Word32 z30, z31, z32, z33, z34, z35, z36, z37, z38, z39;
    1992             :                       Word32 z40, z41, z42, z43, z44, z45, z46, z47, z48, z49;
    1993             :                       Word32 z50, z51, z52, z53, z54, z55, z56, z57, z58, z59;
    1994             : 
    1995             :                       Word32 * rel, *reh, *iml, *imh;);
    1996             : 
    1997           0 :     rel = &re[s * 0];
    1998           0 :     reh = &re[s * 15];
    1999           0 :     iml = &im[s * 0];
    2000           0 :     imh = &im[s * 15];
    2001             : 
    2002             :     /* 1. FFT15 stage */
    2003           0 :     x00 = L_shr_pos(re[s * 0], SCALEFACTOR30_1);
    2004           0 :     x01 = L_shr_pos(im[s * 0], SCALEFACTOR30_1);
    2005           0 :     x02 = L_shr_pos(re[s * 18], SCALEFACTOR30_1);
    2006           0 :     x03 = L_shr_pos(im[s * 18], SCALEFACTOR30_1);
    2007           0 :     x04 = L_shr_pos(re[s * 6], SCALEFACTOR30_1);
    2008           0 :     x05 = L_shr_pos(im[s * 6], SCALEFACTOR30_1);
    2009           0 :     x06 = L_shr_pos(re[s * 24], SCALEFACTOR30_1);
    2010           0 :     x07 = L_shr_pos(im[s * 24], SCALEFACTOR30_1);
    2011           0 :     x08 = L_shr_pos(re[s * 12], SCALEFACTOR30_1);
    2012           0 :     x09 = L_shr_pos(im[s * 12], SCALEFACTOR30_1);
    2013             : 
    2014           0 :     x10 = L_shr_pos(re[s * 20], SCALEFACTOR30_1);
    2015           0 :     x11 = L_shr_pos(im[s * 20], SCALEFACTOR30_1);
    2016           0 :     x12 = L_shr_pos(re[s * 8], SCALEFACTOR30_1);
    2017           0 :     x13 = L_shr_pos(im[s * 8], SCALEFACTOR30_1);
    2018           0 :     x14 = L_shr_pos(re[s * 26], SCALEFACTOR30_1);
    2019           0 :     x15 = L_shr_pos(im[s * 26], SCALEFACTOR30_1);
    2020           0 :     x16 = L_shr_pos(re[s * 14], SCALEFACTOR30_1);
    2021           0 :     x17 = L_shr_pos(im[s * 14], SCALEFACTOR30_1);
    2022           0 :     x18 = L_shr_pos(re[s * 2], SCALEFACTOR30_1);
    2023           0 :     x19 = L_shr_pos(im[s * 2], SCALEFACTOR30_1);
    2024             : 
    2025           0 :     x20 = L_shr_pos(re[s * 10], SCALEFACTOR30_1);
    2026           0 :     x21 = L_shr_pos(im[s * 10], SCALEFACTOR30_1);
    2027           0 :     x22 = L_shr_pos(re[s * 28], SCALEFACTOR30_1);
    2028           0 :     x23 = L_shr_pos(im[s * 28], SCALEFACTOR30_1);
    2029           0 :     x24 = L_shr_pos(re[s * 16], SCALEFACTOR30_1);
    2030           0 :     x25 = L_shr_pos(im[s * 16], SCALEFACTOR30_1);
    2031           0 :     x26 = L_shr_pos(re[s * 4], SCALEFACTOR30_1);
    2032           0 :     x27 = L_shr_pos(im[s * 4], SCALEFACTOR30_1);
    2033           0 :     x28 = L_shr_pos(re[s * 22], SCALEFACTOR30_1);
    2034           0 :     x29 = L_shr_pos(im[s * 22], SCALEFACTOR30_1);
    2035             : 
    2036             :     /* 1. FFT5 stage */
    2037             : 
    2038             :     /* real part */
    2039           0 :     r1  = L_add(x02, x08);
    2040           0 :     r4  = L_sub(x02, x08);
    2041           0 :     r3  = L_add(x04, x06);
    2042           0 :     r2  = L_sub(x04, x06);
    2043           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
    2044           0 :     r1  = L_add(r1, r3);
    2045           0 :     y00 = L_add(x00, r1);
    2046           0 :     r1  = L_add(y00, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
    2047           0 :     r3  = L_sub(r1, t);
    2048           0 :     r1  = L_add(r1, t);
    2049           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
    2050           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
    2051           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
    2052             : 
    2053             :     /* imaginary part */
    2054           0 :     s1  = L_add(x03, x09);
    2055           0 :     s4  = L_sub(x03, x09);
    2056           0 :     s3  = L_add(x05, x07);
    2057           0 :     s2  = L_sub(x05, x07);
    2058           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
    2059           0 :     s1  = L_add(s1, s3);
    2060           0 :     y01 = L_add(x01, s1);
    2061           0 :     s1  = L_add(y01, L_shl_pos(Mpy_32_xx(s1, C55), 1));
    2062           0 :     s3  = L_sub(s1, t);
    2063           0 :     s1  = L_add(s1, t);
    2064           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
    2065           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
    2066           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
    2067             : 
    2068             :     /* combination */
    2069           0 :     y02 = L_add(r1, s2);
    2070           0 :     y08 = L_sub(r1, s2);
    2071           0 :     y04 = L_sub(r3, s4);
    2072           0 :     y06 = L_add(r3, s4);
    2073             : 
    2074           0 :     y03 = L_sub(s1, r2);
    2075           0 :     y09 = L_add(s1, r2);
    2076           0 :     y05 = L_add(s3, r4);
    2077           0 :     y07 = L_sub(s3, r4);
    2078             : 
    2079             :     /* 2. FFT5 stage */
    2080             : 
    2081             :     /* real part */
    2082           0 :     r1  = L_add(x12, x18);
    2083           0 :     r4  = L_sub(x12, x18);
    2084           0 :     r3  = L_add(x14, x16);
    2085           0 :     r2  = L_sub(x14, x16);
    2086           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
    2087           0 :     r1  = L_add(r1, r3);
    2088           0 :     y10 = L_add(x10, r1);
    2089           0 :     r1  = L_add(y10, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
    2090           0 :     r3  = L_sub(r1, t);
    2091           0 :     r1  = L_add(r1, t);
    2092           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
    2093           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
    2094           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
    2095             : 
    2096             :     /* imaginary part */
    2097           0 :     s1  = L_add(x13, x19);
    2098           0 :     s4  = L_sub(x13, x19);
    2099           0 :     s3  = L_add(x15, x17);
    2100           0 :     s2  = L_sub(x15, x17);
    2101           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
    2102           0 :     s1  = L_add(s1, s3);
    2103           0 :     y11 = L_add(x11, s1);
    2104           0 :     s1  = L_add(y11, L_shl_pos(Mpy_32_xx(s1, C55), 1));
    2105           0 :     s3  = L_sub(s1, t);
    2106           0 :     s1  = L_add(s1, t);
    2107           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
    2108           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
    2109           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
    2110             : 
    2111             :     /* combination */
    2112           0 :     y12 = L_add(r1, s2);
    2113           0 :     y18 = L_sub(r1, s2);
    2114           0 :     y14 = L_sub(r3, s4);
    2115           0 :     y16 = L_add(r3, s4);
    2116             : 
    2117           0 :     y13 = L_sub(s1, r2);
    2118           0 :     y19 = L_add(s1, r2);
    2119           0 :     y15 = L_add(s3, r4);
    2120           0 :     y17 = L_sub(s3, r4);
    2121             : 
    2122             :     /* 3. FFT5 stage */
    2123             : 
    2124             :     /* real part */
    2125           0 :     r1  = L_add(x22, x28);
    2126           0 :     r4  = L_sub(x22, x28);
    2127           0 :     r3  = L_add(x24, x26);
    2128           0 :     r2  = L_sub(x24, x26);
    2129           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
    2130           0 :     r1  = L_add(r1, r3);
    2131           0 :     y20 = L_add(x20, r1);
    2132           0 :     r1  = L_add(y20, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
    2133           0 :     r3  = L_sub(r1, t);
    2134           0 :     r1  = L_add(r1, t);
    2135           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
    2136           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
    2137           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
    2138             : 
    2139             :     /* imaginary part */
    2140           0 :     s1  = L_add(x23, x29);
    2141           0 :     s4  = L_sub(x23, x29);
    2142           0 :     s3  = L_add(x25, x27);
    2143           0 :     s2  = L_sub(x25, x27);
    2144           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
    2145           0 :     s1  = L_add(s1, s3);
    2146           0 :     y21 = L_add(x21, s1);
    2147           0 :     s1  = L_add(y21, L_shl_pos(Mpy_32_xx(s1, C55), 1));
    2148           0 :     s3  = L_sub(s1, t);
    2149           0 :     s1  = L_add(s1, t);
    2150           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
    2151           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
    2152           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
    2153             : 
    2154             :     /* combination */
    2155           0 :     y22 = L_add(r1, s2);
    2156           0 :     y28 = L_sub(r1, s2);
    2157           0 :     y24 = L_sub(r3, s4);
    2158           0 :     y26 = L_add(r3, s4);
    2159             : 
    2160           0 :     y23 = L_sub(s1, r2);
    2161           0 :     y29 = L_add(s1, r2);
    2162           0 :     y25 = L_add(s3, r4);
    2163           0 :     y27 = L_sub(s3, r4);
    2164             : 
    2165             :     /* 1. FFT3 stage */
    2166             : 
    2167             :     /* real part */
    2168           0 :     r1  = L_add(y10, y20);
    2169           0 :     r2  = Mpy_32_xx(L_sub(y10, y20), C31);
    2170           0 :     z00 = L_add(y00, r1);
    2171           0 :     r1  = L_sub(y00, L_shr_pos(r1, 1));
    2172             : 
    2173             :     /* imaginary part */
    2174           0 :     s1  = L_add(y11, y21);
    2175           0 :     s2  = Mpy_32_xx(L_sub(y11, y21), C31);
    2176           0 :     z01 = L_add(y01, s1);
    2177           0 :     s1  = L_sub(y01, L_shr_pos(s1, 1));
    2178             : 
    2179             :     /* combination */
    2180           0 :     z20 = L_sub(r1, s2);
    2181           0 :     z10 = L_add(r1, s2);
    2182           0 :     z21 = L_add(s1, r2);
    2183           0 :     z11 = L_sub(s1, r2);
    2184             : 
    2185             :     /* 2. FFT3 stage */
    2186             : 
    2187             :     /* real part */
    2188           0 :     r1  = L_add(y12, y22);
    2189           0 :     r2  = Mpy_32_xx(L_sub(y12, y22), C31);
    2190           0 :     z12 = L_add(y02, r1);
    2191           0 :     r1  = L_sub(y02, L_shr_pos(r1, 1));
    2192             : 
    2193             :     /* imaginary part */
    2194           0 :     s1  = L_add(y13, y23);
    2195           0 :     s2  = Mpy_32_xx(L_sub(y13, y23), C31);
    2196           0 :     z13 = L_add(y03, s1);
    2197           0 :     s1  = L_sub(y03, L_shr_pos(s1, 1));
    2198             : 
    2199             :     /* combination */
    2200           0 :     z02 = L_sub(r1, s2);
    2201           0 :     z22 = L_add(r1, s2);
    2202           0 :     z03 = L_add(s1, r2);
    2203           0 :     z23 = L_sub(s1, r2);
    2204             : 
    2205             :     /* 3. FFT3 stage */
    2206             : 
    2207             :     /* real part */
    2208           0 :     r1  = L_add(y14, y24);
    2209           0 :     r2  = Mpy_32_xx(L_sub(y14, y24), C31);
    2210           0 :     z24 = L_add(y04, r1);
    2211           0 :     r1  = L_sub(y04, L_shr_pos(r1, 1));
    2212             : 
    2213             :     /* imaginary part */
    2214           0 :     s1  = L_add(y15, y25);
    2215           0 :     s2  = Mpy_32_xx(L_sub(y15, y25), C31);
    2216           0 :     z25 = L_add(y05, s1);
    2217           0 :     s1  = L_sub(y05, L_shr_pos(s1, 1));
    2218             : 
    2219             :     /* combination */
    2220           0 :     z14 = L_sub(r1, s2);
    2221           0 :     z04 = L_add(r1, s2);
    2222           0 :     z15 = L_add(s1, r2);
    2223           0 :     z05 = L_sub(s1, r2);
    2224             : 
    2225             :     /* 4. FFT3 stage */
    2226             : 
    2227             :     /* real part */
    2228           0 :     r1  = L_add(y16, y26);
    2229           0 :     r2  = Mpy_32_xx(L_sub(y16, y26), C31);
    2230           0 :     z06 = L_add(y06, r1);
    2231           0 :     r1  = L_sub(y06, L_shr_pos(r1, 1));
    2232             : 
    2233             :     /* imaginary part */
    2234           0 :     s1  = L_add(y17, y27);
    2235           0 :     s2  = Mpy_32_xx(L_sub(y17, y27), C31);
    2236           0 :     z07 = L_add(y07, s1);
    2237           0 :     s1  = L_sub(y07, L_shr_pos(s1, 1));
    2238             : 
    2239             :     /* combination */
    2240           0 :     z26 = L_sub(r1, s2);
    2241           0 :     z16 = L_add(r1, s2);
    2242           0 :     z27 = L_add(s1, r2);
    2243           0 :     z17 = L_sub(s1, r2);
    2244             : 
    2245             :     /* 5. FFT3 stage */
    2246             : 
    2247             :     /* real part */
    2248           0 :     r1  = L_add(y18, y28);
    2249           0 :     r2  = Mpy_32_xx(L_sub(y18, y28), C31);
    2250           0 :     z18 = L_add(y08, r1);
    2251           0 :     r1  = L_sub(y08, L_shr_pos(r1, 1));
    2252             : 
    2253             :     /* imaginary part */
    2254           0 :     s1  = L_add(y19, y29);
    2255           0 :     s2  = Mpy_32_xx(L_sub(y19, y29), C31);
    2256           0 :     z19 = L_add(y09, s1);
    2257           0 :     s1  = L_sub(y09, L_shr_pos(s1, 1));
    2258             : 
    2259             :     /* combination */
    2260           0 :     z08 = L_sub(r1, s2);
    2261           0 :     z28 = L_add(r1, s2);
    2262           0 :     z09 = L_add(s1, r2);
    2263           0 :     z29 = L_sub(s1, r2);
    2264             : 
    2265             :     /* 2. FFT15 stage */
    2266           0 :     x00 = L_shr_pos(re[s * 15], SCALEFACTOR30_1);
    2267           0 :     x01 = L_shr_pos(im[s * 15], SCALEFACTOR30_1);
    2268           0 :     x02 = L_shr_pos(re[s * 3], SCALEFACTOR30_1);
    2269           0 :     x03 = L_shr_pos(im[s * 3], SCALEFACTOR30_1);
    2270           0 :     x04 = L_shr_pos(re[s * 21], SCALEFACTOR30_1);
    2271           0 :     x05 = L_shr_pos(im[s * 21], SCALEFACTOR30_1);
    2272           0 :     x06 = L_shr_pos(re[s * 9], SCALEFACTOR30_1);
    2273           0 :     x07 = L_shr_pos(im[s * 9], SCALEFACTOR30_1);
    2274           0 :     x08 = L_shr_pos(re[s * 27], SCALEFACTOR30_1);
    2275           0 :     x09 = L_shr_pos(im[s * 27], SCALEFACTOR30_1);
    2276             : 
    2277           0 :     x10 = L_shr_pos(re[s * 5], SCALEFACTOR30_1);
    2278           0 :     x11 = L_shr_pos(im[s * 5], SCALEFACTOR30_1);
    2279           0 :     x12 = L_shr_pos(re[s * 23], SCALEFACTOR30_1);
    2280           0 :     x13 = L_shr_pos(im[s * 23], SCALEFACTOR30_1);
    2281           0 :     x14 = L_shr_pos(re[s * 11], SCALEFACTOR30_1);
    2282           0 :     x15 = L_shr_pos(im[s * 11], SCALEFACTOR30_1);
    2283           0 :     x16 = L_shr_pos(re[s * 29], SCALEFACTOR30_1);
    2284           0 :     x17 = L_shr_pos(im[s * 29], SCALEFACTOR30_1);
    2285           0 :     x18 = L_shr_pos(re[s * 17], SCALEFACTOR30_1);
    2286           0 :     x19 = L_shr_pos(im[s * 17], SCALEFACTOR30_1);
    2287             : 
    2288           0 :     x20 = L_shr_pos(re[s * 25], SCALEFACTOR30_1);
    2289           0 :     x21 = L_shr_pos(im[s * 25], SCALEFACTOR30_1);
    2290           0 :     x22 = L_shr_pos(re[s * 13], SCALEFACTOR30_1);
    2291           0 :     x23 = L_shr_pos(im[s * 13], SCALEFACTOR30_1);
    2292           0 :     x24 = L_shr_pos(re[s * 1], SCALEFACTOR30_1);
    2293           0 :     x25 = L_shr_pos(im[s * 1], SCALEFACTOR30_1);
    2294           0 :     x26 = L_shr_pos(re[s * 19], SCALEFACTOR30_1);
    2295           0 :     x27 = L_shr_pos(im[s * 19], SCALEFACTOR30_1);
    2296           0 :     x28 = L_shr_pos(re[s * 7], SCALEFACTOR30_1);
    2297           0 :     x29 = L_shr_pos(im[s * 7], SCALEFACTOR30_1);
    2298             : 
    2299             :     /* 1. FFT5 stage */
    2300             : 
    2301             :     /* real part */
    2302           0 :     r1  = L_add(x02, x08);
    2303           0 :     r4  = L_sub(x02, x08);
    2304           0 :     r3  = L_add(x04, x06);
    2305           0 :     r2  = L_sub(x04, x06);
    2306           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
    2307           0 :     r1  = L_add(r1, r3);
    2308           0 :     y00 = L_add(x00, r1);
    2309           0 :     r1  = L_add(y00, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
    2310           0 :     r3  = L_sub(r1, t);
    2311           0 :     r1  = L_add(r1, t);
    2312           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
    2313           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
    2314           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
    2315             : 
    2316             :     /* imaginary part */
    2317           0 :     s1  = L_add(x03, x09);
    2318           0 :     s4  = L_sub(x03, x09);
    2319           0 :     s3  = L_add(x05, x07);
    2320           0 :     s2  = L_sub(x05, x07);
    2321           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
    2322           0 :     s1  = L_add(s1, s3);
    2323           0 :     y01 = L_add(x01, s1);
    2324           0 :     s1  = L_add(y01, L_shl_pos(Mpy_32_xx(s1, C55), 1));
    2325           0 :     s3  = L_sub(s1, t);
    2326           0 :     s1  = L_add(s1, t);
    2327           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
    2328           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
    2329           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
    2330             : 
    2331             :     /* combination */
    2332           0 :     y02 = L_add(r1, s2);
    2333           0 :     y08 = L_sub(r1, s2);
    2334           0 :     y04 = L_sub(r3, s4);
    2335           0 :     y06 = L_add(r3, s4);
    2336             : 
    2337           0 :     y03 = L_sub(s1, r2);
    2338           0 :     y09 = L_add(s1, r2);
    2339           0 :     y05 = L_add(s3, r4);
    2340           0 :     y07 = L_sub(s3, r4);
    2341             : 
    2342             :     /* 2. FFT5 stage */
    2343             : 
    2344             :     /* real part */
    2345           0 :     r1  = L_add(x12, x18);
    2346           0 :     r4  = L_sub(x12, x18);
    2347           0 :     r3  = L_add(x14, x16);
    2348           0 :     r2  = L_sub(x14, x16);
    2349           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
    2350           0 :     r1  = L_add(r1, r3);
    2351           0 :     y10 = L_add(x10, r1);
    2352           0 :     r1  = L_add(y10, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
    2353           0 :     r3  = L_sub(r1, t);
    2354           0 :     r1  = L_add(r1, t);
    2355           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
    2356           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
    2357           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
    2358             : 
    2359             :     /* imaginary part */
    2360           0 :     s1  = L_add(x13, x19);
    2361           0 :     s4  = L_sub(x13, x19);
    2362           0 :     s3  = L_add(x15, x17);
    2363           0 :     s2  = L_sub(x15, x17);
    2364           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
    2365           0 :     s1  = L_add(s1, s3);
    2366           0 :     y11 = L_add(x11, s1);
    2367           0 :     s1  = L_add(y11, L_shl_pos(Mpy_32_xx(s1, C55), 1));
    2368           0 :     s3  = L_sub(s1, t);
    2369           0 :     s1  = L_add(s1, t);
    2370           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
    2371           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
    2372           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
    2373             : 
    2374             :     /* combination */
    2375           0 :     y12 = L_add(r1, s2);
    2376           0 :     y18 = L_sub(r1, s2);
    2377           0 :     y14 = L_sub(r3, s4);
    2378           0 :     y16 = L_add(r3, s4);
    2379             : 
    2380           0 :     y13 = L_sub(s1, r2);
    2381           0 :     y19 = L_add(s1, r2);
    2382           0 :     y15 = L_add(s3, r4);
    2383           0 :     y17 = L_sub(s3, r4);
    2384             : 
    2385             :     /* 3. FFT5 stage */
    2386             : 
    2387             :     /* real part */
    2388           0 :     r1  = L_add(x22, x28);
    2389           0 :     r4  = L_sub(x22, x28);
    2390           0 :     r3  = L_add(x24, x26);
    2391           0 :     r2  = L_sub(x24, x26);
    2392           0 :     t   = Mpy_32_xx(L_sub(r1, r3), C54);
    2393           0 :     r1  = L_add(r1, r3);
    2394           0 :     y20 = L_add(x20, r1);
    2395           0 :     r1  = L_add(y20, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
    2396           0 :     r3  = L_sub(r1, t);
    2397           0 :     r1  = L_add(r1, t);
    2398           0 :     t   = Mpy_32_xx((L_add(r4, r2)), C51);
    2399           0 :     r4  = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
    2400           0 :     r2  = L_add(t, Mpy_32_xx(r2, C53));
    2401             : 
    2402             :     /* imaginary part */
    2403           0 :     s1  = L_add(x23, x29);
    2404           0 :     s4  = L_sub(x23, x29);
    2405           0 :     s3  = L_add(x25, x27);
    2406           0 :     s2  = L_sub(x25, x27);
    2407           0 :     t   = Mpy_32_xx(L_sub(s1, s3), C54);
    2408           0 :     s1  = L_add(s1, s3);
    2409           0 :     y21 = L_add(x21, s1);
    2410           0 :     s1  = L_add(y21, L_shl_pos(Mpy_32_xx(s1, C55), 1));
    2411           0 :     s3  = L_sub(s1, t);
    2412           0 :     s1  = L_add(s1, t);
    2413           0 :     t   = Mpy_32_xx(L_add(s4, s2), C51);
    2414           0 :     s4  = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
    2415           0 :     s2  = L_add(t, Mpy_32_xx(s2, C53));
    2416             : 
    2417             :     /* combination */
    2418           0 :     y22 = L_add(r1, s2);
    2419           0 :     y28 = L_sub(r1, s2);
    2420           0 :     y24 = L_sub(r3, s4);
    2421           0 :     y26 = L_add(r3, s4);
    2422             : 
    2423           0 :     y23 = L_sub(s1, r2);
    2424           0 :     y29 = L_add(s1, r2);
    2425           0 :     y25 = L_add(s3, r4);
    2426           0 :     y27 = L_sub(s3, r4);
    2427             : 
    2428             :     /* 1. FFT3 stage */
    2429             : 
    2430             :     /* real part */
    2431           0 :     r1  = L_add(y10, y20);
    2432           0 :     r2  = Mpy_32_xx(L_sub(y10, y20), C31);
    2433           0 :     z30 = L_add(y00, r1);
    2434           0 :     r1  = L_sub(y00, L_shr_pos(r1, 1));
    2435             : 
    2436             :     /* imaginary part */
    2437           0 :     s1  = L_add(y11, y21);
    2438           0 :     s2  = Mpy_32_xx(L_sub(y11, y21), C31);
    2439           0 :     z31 = L_add(y01, s1);
    2440           0 :     s1  = L_sub(y01, L_shr_pos(s1, 1));
    2441             : 
    2442             :     /* combination */
    2443           0 :     z50 = L_sub(r1, s2);
    2444           0 :     z40 = L_add(r1, s2);
    2445           0 :     z51 = L_add(s1, r2);
    2446           0 :     z41 = L_sub(s1, r2);
    2447             : 
    2448             :     /* 2. FFT3 stage */
    2449             : 
    2450             :     /* real part */
    2451           0 :     r1  = L_add(y12, y22);
    2452           0 :     r2  = Mpy_32_xx(L_sub(y12, y22), C31);
    2453           0 :     z42 = L_add(y02, r1);
    2454           0 :     r1  = L_sub(y02, L_shr_pos(r1, 1));
    2455             : 
    2456             :     /* imaginary part */
    2457           0 :     s1  = L_add(y13, y23);
    2458           0 :     s2  = Mpy_32_xx(L_sub(y13, y23), C31);
    2459           0 :     z43 = L_add(y03, s1);
    2460           0 :     s1  = L_sub(y03, L_shr_pos(s1, 1));
    2461             : 
    2462             :     /* combination */
    2463           0 :     z32 = L_sub(r1, s2);
    2464           0 :     z52 = L_add(r1, s2);
    2465           0 :     z33 = L_add(s1, r2);
    2466           0 :     z53 = L_sub(s1, r2);
    2467             : 
    2468             :     /* 3. FFT3 stage */
    2469             : 
    2470             :     /* real part */
    2471           0 :     r1  = L_add(y14, y24);
    2472           0 :     r2  = Mpy_32_xx(L_sub(y14, y24), C31);
    2473           0 :     z54 = L_add(y04, r1);
    2474           0 :     r1  = L_sub(y04, L_shr_pos(r1, 1));
    2475             : 
    2476             :     /* imaginary part */
    2477           0 :     s1  = L_add(y15, y25);
    2478           0 :     s2  = Mpy_32_xx(L_sub(y15, y25), C31);
    2479           0 :     z55 = L_add(y05, s1);
    2480           0 :     s1  = L_sub(y05, L_shr_pos(s1, 1));
    2481             : 
    2482             :     /* combination */
    2483           0 :     z44 = L_sub(r1, s2);
    2484           0 :     z34 = L_add(r1, s2);
    2485           0 :     z45 = L_add(s1, r2);
    2486           0 :     z35 = L_sub(s1, r2);
    2487             : 
    2488             :     /* 4. FFT3 stage */
    2489             : 
    2490             :     /* real part */
    2491           0 :     r1  = L_add(y16, y26);
    2492           0 :     r2  = Mpy_32_xx(L_sub(y16, y26), C31);
    2493           0 :     z36 = L_add(y06, r1);
    2494           0 :     r1  = L_sub(y06, L_shr_pos(r1, 1));
    2495             : 
    2496             :     /* imaginary part */
    2497           0 :     s1  = L_add(y17, y27);
    2498           0 :     s2  = Mpy_32_xx(L_sub(y17, y27), C31);
    2499           0 :     z37 = L_add(y07, s1);
    2500           0 :     s1  = L_sub(y07, L_shr_pos(s1, 1));
    2501             : 
    2502             :     /* combination */
    2503           0 :     z56 = L_sub(r1, s2);
    2504           0 :     z46 = L_add(r1, s2);
    2505           0 :     z57 = L_add(s1, r2);
    2506           0 :     z47 = L_sub(s1, r2);
    2507             : 
    2508             :     /* 5. FFT3 stage */
    2509             : 
    2510             :     /* real part */
    2511           0 :     r1  = L_add(y18, y28);
    2512           0 :     r2  = Mpy_32_xx(L_sub(y18, y28), C31);
    2513           0 :     z48 = L_add(y08, r1);
    2514           0 :     r1  = L_sub(y08, L_shr_pos(r1, 1));
    2515             : 
    2516             :     /* imaginary part */
    2517           0 :     s1  = L_add(y19, y29);
    2518           0 :     s2  = Mpy_32_xx(L_sub(y19, y29), C31);
    2519           0 :     z49 = L_add(y09, s1);
    2520           0 :     s1  = L_sub(y09, L_shr_pos(s1, 1));
    2521             : 
    2522             :     /* combination */
    2523           0 :     z38 = L_sub(r1, s2);
    2524           0 :     z58 = L_add(r1, s2);
    2525           0 :     z39 = L_add(s1, r2);
    2526           0 :     z59 = L_sub(s1, r2);
    2527             : 
    2528             :     /* 1. FFT2 stage */
    2529           0 :     r1   = L_shr_pos(z00, SCALEFACTOR30_2);
    2530           0 :     r2   = L_shr_pos(z30, SCALEFACTOR30_2);
    2531           0 :     r3   = L_shr_pos(z01, SCALEFACTOR30_2);
    2532           0 :     r4   = L_shr_pos(z31, SCALEFACTOR30_2);
    2533           0 :     *rel = L_add(r1, r2);
    2534           0 :     move32();
    2535           0 :     *reh = L_sub(r1, r2);
    2536           0 :     move32();
    2537           0 :     *iml = L_add(r3, r4);
    2538           0 :     move32();
    2539           0 :     *imh = L_sub(r3, r4);
    2540           0 :     move32();
    2541           0 :     rel += s, reh += s, iml += s;
    2542           0 :     imh += s;
    2543             : 
    2544             :     /* 2. FFT2 stage */
    2545           0 :     r1   = L_shr_pos(z16, SCALEFACTOR30_2);
    2546           0 :     r2   = L_shr_pos(z46, SCALEFACTOR30_2);
    2547           0 :     r3   = L_shr_pos(z17, SCALEFACTOR30_2);
    2548           0 :     r4   = L_shr_pos(z47, SCALEFACTOR30_2);
    2549           0 :     *reh = L_add(r1, r2);
    2550           0 :     move32();
    2551           0 :     *rel = L_sub(r1, r2);
    2552           0 :     move32();
    2553           0 :     *imh = L_add(r3, r4);
    2554           0 :     move32();
    2555           0 :     *iml = L_sub(r3, r4);
    2556           0 :     move32();
    2557           0 :     rel += s, reh += s, iml += s;
    2558           0 :     imh += s;
    2559             : 
    2560             :     /* 3. FFT2 stage */
    2561           0 :     r1   = L_shr_pos(z02, SCALEFACTOR30_2);
    2562           0 :     r2   = L_shr_pos(z32, SCALEFACTOR30_2);
    2563           0 :     r3   = L_shr_pos(z03, SCALEFACTOR30_2);
    2564           0 :     r4   = L_shr_pos(z33, SCALEFACTOR30_2);
    2565           0 :     *rel = L_add(r1, r2);
    2566           0 :     move32();
    2567           0 :     *reh = L_sub(r1, r2);
    2568           0 :     move32();
    2569           0 :     *iml = L_add(r3, r4);
    2570           0 :     move32();
    2571           0 :     *imh = L_sub(r3, r4);
    2572           0 :     move32();
    2573           0 :     rel += s, reh += s, iml += s;
    2574           0 :     imh += s;
    2575             : 
    2576             :     /* 4. FFT2 stage */
    2577           0 :     r1   = L_shr_pos(z18, SCALEFACTOR30_2);
    2578           0 :     r2   = L_shr_pos(z48, SCALEFACTOR30_2);
    2579           0 :     r3   = L_shr_pos(z19, SCALEFACTOR30_2);
    2580           0 :     r4   = L_shr_pos(z49, SCALEFACTOR30_2);
    2581           0 :     *reh = L_add(r1, r2);
    2582           0 :     move32();
    2583           0 :     *rel = L_sub(r1, r2);
    2584           0 :     move32();
    2585           0 :     *imh = L_add(r3, r4);
    2586           0 :     move32();
    2587           0 :     *iml = L_sub(r3, r4);
    2588           0 :     move32();
    2589           0 :     rel += s, reh += s, iml += s;
    2590           0 :     imh += s;
    2591             : 
    2592             :     /* 5. FFT2 stage */
    2593           0 :     r1   = L_shr_pos(z04, SCALEFACTOR30_2);
    2594           0 :     r2   = L_shr_pos(z34, SCALEFACTOR30_2);
    2595           0 :     r3   = L_shr_pos(z05, SCALEFACTOR30_2);
    2596           0 :     r4   = L_shr_pos(z35, SCALEFACTOR30_2);
    2597           0 :     *rel = L_add(r1, r2);
    2598           0 :     move32();
    2599           0 :     *reh = L_sub(r1, r2);
    2600           0 :     move32();
    2601           0 :     *iml = L_add(r3, r4);
    2602           0 :     move32();
    2603           0 :     *imh = L_sub(r3, r4);
    2604           0 :     move32();
    2605           0 :     rel += s, reh += s, iml += s;
    2606           0 :     imh += s;
    2607             : 
    2608             :     /* 6. FFT2 stage */
    2609           0 :     r1   = L_shr_pos(z20, SCALEFACTOR30_2);
    2610           0 :     r2   = L_shr_pos(z50, SCALEFACTOR30_2);
    2611           0 :     r3   = L_shr_pos(z21, SCALEFACTOR30_2);
    2612           0 :     r4   = L_shr_pos(z51, SCALEFACTOR30_2);
    2613           0 :     *reh = L_add(r1, r2);
    2614           0 :     move32();
    2615           0 :     *rel = L_sub(r1, r2);
    2616           0 :     move32();
    2617           0 :     *imh = L_add(r3, r4);
    2618           0 :     move32();
    2619           0 :     *iml = L_sub(r3, r4);
    2620           0 :     move32();
    2621           0 :     rel += s, reh += s, iml += s;
    2622           0 :     imh += s;
    2623             : 
    2624             :     /* 7. FFT2 stage */
    2625           0 :     r1   = L_shr_pos(z06, SCALEFACTOR30_2);
    2626           0 :     r2   = L_shr_pos(z36, SCALEFACTOR30_2);
    2627           0 :     r3   = L_shr_pos(z07, SCALEFACTOR30_2);
    2628           0 :     r4   = L_shr_pos(z37, SCALEFACTOR30_2);
    2629           0 :     *rel = L_add(r1, r2);
    2630           0 :     move32();
    2631           0 :     *reh = L_sub(r1, r2);
    2632           0 :     move32();
    2633           0 :     *iml = L_add(r3, r4);
    2634           0 :     move32();
    2635           0 :     *imh = L_sub(r3, r4);
    2636           0 :     move32();
    2637           0 :     rel += s, reh += s, iml += s;
    2638           0 :     imh += s;
    2639             : 
    2640             :     /* 8. FFT2 stage */
    2641           0 :     r1   = L_shr_pos(z22, SCALEFACTOR30_2);
    2642           0 :     r2   = L_shr_pos(z52, SCALEFACTOR30_2);
    2643           0 :     r3   = L_shr_pos(z23, SCALEFACTOR30_2);
    2644           0 :     r4   = L_shr_pos(z53, SCALEFACTOR30_2);
    2645           0 :     *reh = L_add(r1, r2);
    2646           0 :     move32();
    2647           0 :     *rel = L_sub(r1, r2);
    2648           0 :     move32();
    2649           0 :     *imh = L_add(r3, r4);
    2650           0 :     move32();
    2651           0 :     *iml = L_sub(r3, r4);
    2652           0 :     move32();
    2653           0 :     rel += s, reh += s, iml += s;
    2654           0 :     imh += s;
    2655             : 
    2656             :     /* 9. FFT2 stage */
    2657           0 :     r1   = L_shr_pos(z08, SCALEFACTOR30_2);
    2658           0 :     r2   = L_shr_pos(z38, SCALEFACTOR30_2);
    2659           0 :     r3   = L_shr_pos(z09, SCALEFACTOR30_2);
    2660           0 :     r4   = L_shr_pos(z39, SCALEFACTOR30_2);
    2661           0 :     *rel = L_add(r1, r2);
    2662           0 :     move32();
    2663           0 :     *reh = L_sub(r1, r2);
    2664           0 :     move32();
    2665           0 :     *iml = L_add(r3, r4);
    2666           0 :     move32();
    2667           0 :     *imh = L_sub(r3, r4);
    2668           0 :     move32();
    2669           0 :     rel += s, reh += s, iml += s;
    2670           0 :     imh += s;
    2671             : 
    2672             :     /* 10. FFT2 stage */
    2673           0 :     r1   = L_shr_pos(z24, SCALEFACTOR30_2);
    2674           0 :     r2   = L_shr_pos(z54, SCALEFACTOR30_2);
    2675           0 :     r3   = L_shr_pos(z25, SCALEFACTOR30_2);
    2676           0 :     r4   = L_shr_pos(z55, SCALEFACTOR30_2);
    2677           0 :     *reh = L_add(r1, r2);
    2678           0 :     move32();
    2679           0 :     *rel = L_sub(r1, r2);
    2680           0 :     move32();
    2681           0 :     *imh = L_add(r3, r4);
    2682           0 :     move32();
    2683           0 :     *iml = L_sub(r3, r4);
    2684           0 :     move32();
    2685           0 :     rel += s, reh += s, iml += s;
    2686           0 :     imh += s;
    2687             : 
    2688             :     /* 11. FFT2 stage */
    2689           0 :     r1   = L_shr_pos(z10, SCALEFACTOR30_2);
    2690           0 :     r2   = L_shr_pos(z40, SCALEFACTOR30_2);
    2691           0 :     r3   = L_shr_pos(z11, SCALEFACTOR30_2);
    2692           0 :     r4   = L_shr_pos(z41, SCALEFACTOR30_2);
    2693           0 :     *rel = L_add(r1, r2);
    2694           0 :     move32();
    2695           0 :     *reh = L_sub(r1, r2);
    2696           0 :     move32();
    2697           0 :     *iml = L_add(r3, r4);
    2698           0 :     move32();
    2699           0 :     *imh = L_sub(r3, r4);
    2700           0 :     move32();
    2701           0 :     rel += s, reh += s, iml += s;
    2702           0 :     imh += s;
    2703             : 
    2704             :     /* 12. FFT2 stage */
    2705           0 :     r1   = L_shr_pos(z26, SCALEFACTOR30_2);
    2706           0 :     r2   = L_shr_pos(z56, SCALEFACTOR30_2);
    2707           0 :     r3   = L_shr_pos(z27, SCALEFACTOR30_2);
    2708           0 :     r4   = L_shr_pos(z57, SCALEFACTOR30_2);
    2709           0 :     *reh = L_add(r1, r2);
    2710           0 :     move32();
    2711           0 :     *rel = L_sub(r1, r2);
    2712           0 :     move32();
    2713           0 :     *imh = L_add(r3, r4);
    2714           0 :     move32();
    2715           0 :     *iml = L_sub(r3, r4);
    2716           0 :     move32();
    2717           0 :     rel += s, reh += s, iml += s;
    2718           0 :     imh += s;
    2719             : 
    2720             :     /* 13. FFT2 stage */
    2721           0 :     r1   = L_shr_pos(z12, SCALEFACTOR30_2);
    2722           0 :     r2   = L_shr_pos(z42, SCALEFACTOR30_2);
    2723           0 :     r3   = L_shr_pos(z13, SCALEFACTOR30_2);
    2724           0 :     r4   = L_shr_pos(z43, SCALEFACTOR30_2);
    2725           0 :     *rel = L_add(r1, r2);
    2726           0 :     move32();
    2727           0 :     *reh = L_sub(r1, r2);
    2728           0 :     move32();
    2729           0 :     *iml = L_add(r3, r4);
    2730           0 :     move32();
    2731           0 :     *imh = L_sub(r3, r4);
    2732           0 :     move32();
    2733           0 :     rel += s, reh += s, iml += s;
    2734           0 :     imh += s;
    2735             : 
    2736             :     /* 14. FFT2 stage */
    2737           0 :     r1   = L_shr_pos(z28, SCALEFACTOR30_2);
    2738           0 :     r2   = L_shr_pos(z58, SCALEFACTOR30_2);
    2739           0 :     r3   = L_shr_pos(z29, SCALEFACTOR30_2);
    2740           0 :     r4   = L_shr_pos(z59, SCALEFACTOR30_2);
    2741           0 :     *reh = L_add(r1, r2);
    2742           0 :     move32();
    2743           0 :     *rel = L_sub(r1, r2);
    2744           0 :     move32();
    2745           0 :     *imh = L_add(r3, r4);
    2746           0 :     move32();
    2747           0 :     *iml = L_sub(r3, r4);
    2748           0 :     move32();
    2749           0 :     rel += s, reh += s, iml += s;
    2750           0 :     imh += s;
    2751             : 
    2752             :     /* 15. FFT2 stage */
    2753           0 :     r1   = L_shr_pos(z14, SCALEFACTOR30_2);
    2754           0 :     r2   = L_shr_pos(z44, SCALEFACTOR30_2);
    2755           0 :     r3   = L_shr_pos(z15, SCALEFACTOR30_2);
    2756           0 :     r4   = L_shr_pos(z45, SCALEFACTOR30_2);
    2757           0 :     *rel = L_add(r1, r2);
    2758           0 :     move32();
    2759           0 :     *reh = L_sub(r1, r2);
    2760           0 :     move32();
    2761           0 :     *iml = L_add(r3, r4);
    2762           0 :     move32();
    2763           0 :     *imh = L_sub(r3, r4);
    2764           0 :     move32();
    2765           0 :     rel += s, reh += s, iml += s;
    2766           0 :     imh += s;
    2767             : 
    2768             :     Dyn_Mem_Deluxe_Out();
    2769           0 : }
    2770             : 
    2771             : /**
    2772             :  * \brief    Function performs a complex 32-point FFT
    2773             :  *           The FFT is performed inplace. The result of the FFT
    2774             :  *           is scaled by SCALEFACTOR32 bits.
    2775             :  *
    2776             :  *           WOPS with 32x16 bit multiplications:  752 cycles
    2777             :  *
    2778             :  * \param    [i/o] re    real input / output
    2779             :  * \param    [i/o] im    imag input / output
    2780             :  * \param    [i  ] s     stride real and imag input / output
    2781             :  *
    2782             :  * \return   void
    2783             :  */
    2784             : 
    2785             : 
    2786           0 : static void fft32(Word32 *re, Word32 *im, Word16 s)
    2787             : {
    2788             :     Dyn_Mem_Deluxe_In(Word32 as, bs; Word32 x00, x01, x02, x03, x04, x05, x06, x07;
    2789             :                       Word32 x08, x09, x10, x11, x12, x13, x14, x15; Word32 t00, t01, t02, t03, t04, t05, t06, t07;
    2790             :                       Word32 t08, t09, t10, t11, t12, t13, t14, t15; Word32 s00, s01, s02, s03, s04, s05, s06, s07;
    2791             :                       Word32 s08, s09, s10, s11, s12, s13, s14, s15;
    2792             : 
    2793             :                       Word32 y00, y01, y02, y03, y04, y05, y06, y07; Word32 y08, y09, y10, y11, y12, y13, y14, y15;
    2794             :                       Word32 y16, y17, y18, y19, y20, y21, y22, y23; Word32 y24, y25, y26, y27, y28, y29, y30, y31;
    2795             :                       Word32 y32, y33, y34, y35, y36, y37, y38, y39; Word32 y40, y41, y42, y43, y44, y45, y46, y47;
    2796             :                       Word32 y48, y49, y50, y51, y52, y53, y54, y55; Word32 y56, y57, y58, y59, y60, y61, y62, y63;);
    2797             : 
    2798             :     /* 1. FFT8 stage */
    2799           0 :     x00 = L_shr_pos(re[s * 0], SCALEFACTOR32_1);
    2800           0 :     x01 = L_shr_pos(im[s * 0], SCALEFACTOR32_1);
    2801           0 :     x02 = L_shr_pos(re[s * 4], SCALEFACTOR32_1);
    2802           0 :     x03 = L_shr_pos(im[s * 4], SCALEFACTOR32_1);
    2803           0 :     x04 = L_shr_pos(re[s * 8], SCALEFACTOR32_1);
    2804           0 :     x05 = L_shr_pos(im[s * 8], SCALEFACTOR32_1);
    2805           0 :     x06 = L_shr_pos(re[s * 12], SCALEFACTOR32_1);
    2806           0 :     x07 = L_shr_pos(im[s * 12], SCALEFACTOR32_1);
    2807           0 :     x08 = L_shr_pos(re[s * 16], SCALEFACTOR32_1);
    2808           0 :     x09 = L_shr_pos(im[s * 16], SCALEFACTOR32_1);
    2809           0 :     x10 = L_shr_pos(re[s * 20], SCALEFACTOR32_1);
    2810           0 :     x11 = L_shr_pos(im[s * 20], SCALEFACTOR32_1);
    2811           0 :     x12 = L_shr_pos(re[s * 24], SCALEFACTOR32_1);
    2812           0 :     x13 = L_shr_pos(im[s * 24], SCALEFACTOR32_1);
    2813           0 :     x14 = L_shr_pos(re[s * 28], SCALEFACTOR32_1);
    2814           0 :     x15 = L_shr_pos(im[s * 28], SCALEFACTOR32_1);
    2815             : 
    2816           0 :     t00 = L_add(x00, x08);
    2817           0 :     t02 = L_sub(x00, x08);
    2818           0 :     t01 = L_add(x01, x09);
    2819           0 :     t03 = L_sub(x01, x09);
    2820           0 :     t04 = L_add(x02, x10);
    2821           0 :     t06 = L_sub(x02, x10);
    2822           0 :     t05 = L_add(x03, x11);
    2823           0 :     t07 = L_sub(x03, x11);
    2824           0 :     t08 = L_add(x04, x12);
    2825           0 :     t10 = L_sub(x04, x12);
    2826           0 :     t09 = L_add(x05, x13);
    2827           0 :     t11 = L_sub(x05, x13);
    2828           0 :     t12 = L_add(x06, x14);
    2829           0 :     t14 = L_sub(x06, x14);
    2830           0 :     t13 = L_add(x07, x15);
    2831           0 :     t15 = L_sub(x07, x15);
    2832             : 
    2833             :     /* Pre-additions and core multiplications */
    2834           0 :     s00 = L_add(t00, t08);
    2835           0 :     s04 = L_sub(t00, t08);
    2836           0 :     s01 = L_add(t01, t09);
    2837           0 :     s05 = L_sub(t01, t09);
    2838           0 :     s08 = L_sub(t02, t11);
    2839           0 :     s10 = L_add(t02, t11);
    2840           0 :     s09 = L_add(t03, t10);
    2841           0 :     s11 = L_sub(t03, t10);
    2842           0 :     s02 = L_add(t04, t12);
    2843           0 :     s07 = L_sub(t04, t12);
    2844           0 :     s03 = L_add(t05, t13);
    2845           0 :     s06 = L_sub(t13, t05);
    2846           0 :     t01 = L_add(t06, t14);
    2847           0 :     t02 = L_sub(t06, t14);
    2848           0 :     t00 = L_add(t07, t15);
    2849           0 :     t03 = L_sub(t07, t15);
    2850             : 
    2851           0 :     Mpy3_0(s12, s13, s14, s15, t00, t01, t02, t03);
    2852             : 
    2853             :     /* Post-additions */
    2854           0 :     y00 = L_add(s00, s02);
    2855           0 :     y08 = L_sub(s00, s02);
    2856           0 :     y01 = L_add(s01, s03);
    2857           0 :     y09 = L_sub(s01, s03);
    2858           0 :     y04 = L_sub(s04, s06);
    2859           0 :     y12 = L_add(s04, s06);
    2860           0 :     y05 = L_sub(s05, s07);
    2861           0 :     y13 = L_add(s05, s07);
    2862           0 :     y06 = L_add(s08, s14);
    2863           0 :     y14 = L_sub(s08, s14);
    2864           0 :     y07 = L_add(s09, s15);
    2865           0 :     y15 = L_sub(s09, s15);
    2866           0 :     y02 = L_add(s10, s12);
    2867           0 :     y10 = L_sub(s10, s12);
    2868           0 :     y03 = L_add(s11, s13);
    2869           0 :     y11 = L_sub(s11, s13);
    2870             : 
    2871             :     /* 2. FFT8 stage */
    2872           0 :     x00 = L_shr_pos(re[s * 1], SCALEFACTOR32_1);
    2873           0 :     x01 = L_shr_pos(im[s * 1], SCALEFACTOR32_1);
    2874           0 :     x02 = L_shr_pos(re[s * 5], SCALEFACTOR32_1);
    2875           0 :     x03 = L_shr_pos(im[s * 5], SCALEFACTOR32_1);
    2876           0 :     x04 = L_shr_pos(re[s * 9], SCALEFACTOR32_1);
    2877           0 :     x05 = L_shr_pos(im[s * 9], SCALEFACTOR32_1);
    2878           0 :     x06 = L_shr_pos(re[s * 13], SCALEFACTOR32_1);
    2879           0 :     x07 = L_shr_pos(im[s * 13], SCALEFACTOR32_1);
    2880           0 :     x08 = L_shr_pos(re[s * 17], SCALEFACTOR32_1);
    2881           0 :     x09 = L_shr_pos(im[s * 17], SCALEFACTOR32_1);
    2882           0 :     x10 = L_shr_pos(re[s * 21], SCALEFACTOR32_1);
    2883           0 :     x11 = L_shr_pos(im[s * 21], SCALEFACTOR32_1);
    2884           0 :     x12 = L_shr_pos(re[s * 25], SCALEFACTOR32_1);
    2885           0 :     x13 = L_shr_pos(im[s * 25], SCALEFACTOR32_1);
    2886           0 :     x14 = L_shr_pos(re[s * 29], SCALEFACTOR32_1);
    2887           0 :     x15 = L_shr_pos(im[s * 29], SCALEFACTOR32_1);
    2888             : 
    2889           0 :     t00 = L_add(x00, x08);
    2890           0 :     t02 = L_sub(x00, x08);
    2891           0 :     t01 = L_add(x01, x09);
    2892           0 :     t03 = L_sub(x01, x09);
    2893           0 :     t04 = L_add(x02, x10);
    2894           0 :     t06 = L_sub(x02, x10);
    2895           0 :     t05 = L_add(x03, x11);
    2896           0 :     t07 = L_sub(x03, x11);
    2897           0 :     t08 = L_add(x04, x12);
    2898           0 :     t10 = L_sub(x04, x12);
    2899           0 :     t09 = L_add(x05, x13);
    2900           0 :     t11 = L_sub(x05, x13);
    2901           0 :     t12 = L_add(x06, x14);
    2902           0 :     t14 = L_sub(x06, x14);
    2903           0 :     t13 = L_add(x07, x15);
    2904           0 :     t15 = L_sub(x07, x15);
    2905             : 
    2906             :     /* Pre-additions and core multiplications */
    2907           0 :     s00 = L_add(t00, t08);
    2908           0 :     s04 = L_sub(t00, t08);
    2909           0 :     s01 = L_add(t01, t09);
    2910           0 :     s05 = L_sub(t01, t09);
    2911           0 :     s08 = L_sub(t02, t11);
    2912           0 :     s10 = L_add(t02, t11);
    2913           0 :     s09 = L_add(t03, t10);
    2914           0 :     s11 = L_sub(t03, t10);
    2915           0 :     s02 = L_add(t04, t12);
    2916           0 :     s07 = L_sub(t04, t12);
    2917           0 :     s03 = L_add(t05, t13);
    2918           0 :     s06 = L_sub(t13, t05);
    2919           0 :     t01 = L_add(t06, t14);
    2920           0 :     t02 = L_sub(t06, t14);
    2921           0 :     t00 = L_add(t07, t15);
    2922           0 :     t03 = L_sub(t07, t15);
    2923             : 
    2924           0 :     Mpy3_0(s12, s13, s14, s15, t00, t01, t02, t03);
    2925             : 
    2926             :     /* Post-additions */
    2927           0 :     y16 = L_add(s00, s02);
    2928           0 :     y24 = L_sub(s00, s02);
    2929           0 :     y17 = L_add(s01, s03);
    2930           0 :     y25 = L_sub(s01, s03);
    2931           0 :     y20 = L_sub(s04, s06);
    2932           0 :     y28 = L_add(s04, s06);
    2933           0 :     y21 = L_sub(s05, s07);
    2934           0 :     y29 = L_add(s05, s07);
    2935           0 :     y22 = L_add(s08, s14);
    2936           0 :     y30 = L_sub(s08, s14);
    2937           0 :     y23 = L_add(s09, s15);
    2938           0 :     y31 = L_sub(s09, s15);
    2939           0 :     y18 = L_add(s10, s12);
    2940           0 :     y26 = L_sub(s10, s12);
    2941           0 :     y19 = L_add(s11, s13);
    2942           0 :     y27 = L_sub(s11, s13);
    2943             : 
    2944             :     /* 3. FFT8 stage */
    2945           0 :     x00 = L_shr_pos(re[s * 2], SCALEFACTOR32_1);
    2946           0 :     x01 = L_shr_pos(im[s * 2], SCALEFACTOR32_1);
    2947           0 :     x02 = L_shr_pos(re[s * 6], SCALEFACTOR32_1);
    2948           0 :     x03 = L_shr_pos(im[s * 6], SCALEFACTOR32_1);
    2949           0 :     x04 = L_shr_pos(re[s * 10], SCALEFACTOR32_1);
    2950           0 :     x05 = L_shr_pos(im[s * 10], SCALEFACTOR32_1);
    2951           0 :     x06 = L_shr_pos(re[s * 14], SCALEFACTOR32_1);
    2952           0 :     x07 = L_shr_pos(im[s * 14], SCALEFACTOR32_1);
    2953           0 :     x08 = L_shr_pos(re[s * 18], SCALEFACTOR32_1);
    2954           0 :     x09 = L_shr_pos(im[s * 18], SCALEFACTOR32_1);
    2955           0 :     x10 = L_shr_pos(re[s * 22], SCALEFACTOR32_1);
    2956           0 :     x11 = L_shr_pos(im[s * 22], SCALEFACTOR32_1);
    2957           0 :     x12 = L_shr_pos(re[s * 26], SCALEFACTOR32_1);
    2958           0 :     x13 = L_shr_pos(im[s * 26], SCALEFACTOR32_1);
    2959           0 :     x14 = L_shr_pos(re[s * 30], SCALEFACTOR32_1);
    2960           0 :     x15 = L_shr_pos(im[s * 30], SCALEFACTOR32_1);
    2961             : 
    2962           0 :     t00 = L_add(x00, x08);
    2963           0 :     t02 = L_sub(x00, x08);
    2964           0 :     t01 = L_add(x01, x09);
    2965           0 :     t03 = L_sub(x01, x09);
    2966           0 :     t04 = L_add(x02, x10);
    2967           0 :     t06 = L_sub(x02, x10);
    2968           0 :     t05 = L_add(x03, x11);
    2969           0 :     t07 = L_sub(x03, x11);
    2970           0 :     t08 = L_add(x04, x12);
    2971           0 :     t10 = L_sub(x04, x12);
    2972           0 :     t09 = L_add(x05, x13);
    2973           0 :     t11 = L_sub(x05, x13);
    2974           0 :     t12 = L_add(x06, x14);
    2975           0 :     t14 = L_sub(x06, x14);
    2976           0 :     t13 = L_add(x07, x15);
    2977           0 :     t15 = L_sub(x07, x15);
    2978             : 
    2979             :     /* Pre-additions and core multiplications */
    2980           0 :     s00 = L_add(t00, t08);
    2981           0 :     s04 = L_sub(t00, t08);
    2982           0 :     s01 = L_add(t01, t09);
    2983           0 :     s05 = L_sub(t01, t09);
    2984           0 :     s08 = L_sub(t02, t11);
    2985           0 :     s10 = L_add(t02, t11);
    2986           0 :     s09 = L_add(t03, t10);
    2987           0 :     s11 = L_sub(t03, t10);
    2988           0 :     s02 = L_add(t04, t12);
    2989           0 :     s07 = L_sub(t04, t12);
    2990           0 :     s03 = L_add(t05, t13);
    2991           0 :     s06 = L_sub(t13, t05);
    2992           0 :     t01 = L_add(t06, t14);
    2993           0 :     t02 = L_sub(t06, t14);
    2994           0 :     t00 = L_add(t07, t15);
    2995           0 :     t03 = L_sub(t07, t15);
    2996             : 
    2997           0 :     Mpy3_0(s12, s13, s14, s15, t00, t01, t02, t03);
    2998             : 
    2999             :     /* Post-additions */
    3000           0 :     y32 = L_add(s00, s02);
    3001           0 :     y40 = L_sub(s00, s02);
    3002           0 :     y33 = L_add(s01, s03);
    3003           0 :     y41 = L_sub(s01, s03);
    3004           0 :     y36 = L_sub(s04, s06);
    3005           0 :     y44 = L_add(s04, s06);
    3006           0 :     y37 = L_sub(s05, s07);
    3007           0 :     y45 = L_add(s05, s07);
    3008           0 :     y38 = L_add(s08, s14);
    3009           0 :     y46 = L_sub(s08, s14);
    3010           0 :     y39 = L_add(s09, s15);
    3011           0 :     y47 = L_sub(s09, s15);
    3012           0 :     y34 = L_add(s10, s12);
    3013           0 :     y42 = L_sub(s10, s12);
    3014           0 :     y35 = L_add(s11, s13);
    3015           0 :     y43 = L_sub(s11, s13);
    3016             : 
    3017             :     /* 4. FFT8 stage */
    3018           0 :     x00 = L_shr_pos(re[s * 3], SCALEFACTOR32_1);
    3019           0 :     x01 = L_shr_pos(im[s * 3], SCALEFACTOR32_1);
    3020           0 :     x02 = L_shr_pos(re[s * 7], SCALEFACTOR32_1);
    3021           0 :     x03 = L_shr_pos(im[s * 7], SCALEFACTOR32_1);
    3022           0 :     x04 = L_shr_pos(re[s * 11], SCALEFACTOR32_1);
    3023           0 :     x05 = L_shr_pos(im[s * 11], SCALEFACTOR32_1);
    3024           0 :     x06 = L_shr_pos(re[s * 15], SCALEFACTOR32_1);
    3025           0 :     x07 = L_shr_pos(im[s * 15], SCALEFACTOR32_1);
    3026           0 :     x08 = L_shr_pos(re[s * 19], SCALEFACTOR32_1);
    3027           0 :     x09 = L_shr_pos(im[s * 19], SCALEFACTOR32_1);
    3028           0 :     x10 = L_shr_pos(re[s * 23], SCALEFACTOR32_1);
    3029           0 :     x11 = L_shr_pos(im[s * 23], SCALEFACTOR32_1);
    3030           0 :     x12 = L_shr_pos(re[s * 27], SCALEFACTOR32_1);
    3031           0 :     x13 = L_shr_pos(im[s * 27], SCALEFACTOR32_1);
    3032           0 :     x14 = L_shr_pos(re[s * 31], SCALEFACTOR32_1);
    3033           0 :     x15 = L_shr_pos(im[s * 31], SCALEFACTOR32_1);
    3034             : 
    3035           0 :     t00 = L_add(x00, x08);
    3036           0 :     t02 = L_sub(x00, x08);
    3037           0 :     t01 = L_add(x01, x09);
    3038           0 :     t03 = L_sub(x01, x09);
    3039           0 :     t04 = L_add(x02, x10);
    3040           0 :     t06 = L_sub(x02, x10);
    3041           0 :     t05 = L_add(x03, x11);
    3042           0 :     t07 = L_sub(x03, x11);
    3043           0 :     t08 = L_add(x04, x12);
    3044           0 :     t10 = L_sub(x04, x12);
    3045           0 :     t09 = L_add(x05, x13);
    3046           0 :     t11 = L_sub(x05, x13);
    3047           0 :     t12 = L_add(x06, x14);
    3048           0 :     t14 = L_sub(x06, x14);
    3049           0 :     t13 = L_add(x07, x15);
    3050           0 :     t15 = L_sub(x07, x15);
    3051             : 
    3052             :     /* Pre-additions and core multiplications */
    3053           0 :     s00 = L_add(t00, t08);
    3054           0 :     s04 = L_sub(t00, t08);
    3055           0 :     s01 = L_add(t01, t09);
    3056           0 :     s05 = L_sub(t01, t09);
    3057           0 :     s08 = L_sub(t02, t11);
    3058           0 :     s10 = L_add(t02, t11);
    3059           0 :     s09 = L_add(t03, t10);
    3060           0 :     s11 = L_sub(t03, t10);
    3061           0 :     s02 = L_add(t04, t12);
    3062           0 :     s07 = L_sub(t04, t12);
    3063           0 :     s03 = L_add(t05, t13);
    3064           0 :     s06 = L_sub(t13, t05);
    3065           0 :     t01 = L_add(t06, t14);
    3066           0 :     t02 = L_sub(t06, t14);
    3067           0 :     t00 = L_add(t07, t15);
    3068           0 :     t03 = L_sub(t07, t15);
    3069             : 
    3070           0 :     Mpy3_0(s12, s13, s14, s15, t00, t01, t02, t03);
    3071             : 
    3072             :     /* Post-additions */
    3073           0 :     y48 = L_add(s00, s02);
    3074           0 :     y56 = L_sub(s00, s02);
    3075           0 :     y49 = L_add(s01, s03);
    3076           0 :     y57 = L_sub(s01, s03);
    3077           0 :     y52 = L_sub(s04, s06);
    3078           0 :     y60 = L_add(s04, s06);
    3079           0 :     y53 = L_sub(s05, s07);
    3080           0 :     y61 = L_add(s05, s07);
    3081           0 :     y54 = L_add(s08, s14);
    3082           0 :     y62 = L_sub(s08, s14);
    3083           0 :     y55 = L_add(s09, s15);
    3084           0 :     y63 = L_sub(s09, s15);
    3085           0 :     y50 = L_add(s10, s12);
    3086           0 :     y58 = L_sub(s10, s12);
    3087           0 :     y51 = L_add(s11, s13);
    3088           0 :     y59 = L_sub(s11, s13);
    3089             : 
    3090             :     /* apply twiddle factors */
    3091           0 :     y00 = L_shr_pos(y00, SCALEFACTOR32_2);
    3092           0 :     y01 = L_shr_pos(y01, SCALEFACTOR32_2);
    3093           0 :     y02 = L_shr_pos(y02, SCALEFACTOR32_2);
    3094           0 :     y03 = L_shr_pos(y03, SCALEFACTOR32_2);
    3095           0 :     y04 = L_shr_pos(y04, SCALEFACTOR32_2);
    3096           0 :     y05 = L_shr_pos(y05, SCALEFACTOR32_2);
    3097           0 :     y06 = L_shr_pos(y06, SCALEFACTOR32_2);
    3098           0 :     y07 = L_shr_pos(y07, SCALEFACTOR32_2);
    3099           0 :     y08 = L_shr_pos(y08, SCALEFACTOR32_2);
    3100           0 :     y09 = L_shr_pos(y09, SCALEFACTOR32_2);
    3101           0 :     y10 = L_shr_pos(y10, SCALEFACTOR32_2);
    3102           0 :     y11 = L_shr_pos(y11, SCALEFACTOR32_2);
    3103           0 :     y12 = L_shr_pos(y12, SCALEFACTOR32_2);
    3104           0 :     y13 = L_shr_pos(y13, SCALEFACTOR32_2);
    3105           0 :     y14 = L_shr_pos(y14, SCALEFACTOR32_2);
    3106           0 :     y15 = L_shr_pos(y15, SCALEFACTOR32_2);
    3107           0 :     y16 = L_shr_pos(y16, SCALEFACTOR32_2);
    3108           0 :     y17 = L_shr_pos(y17, SCALEFACTOR32_2);
    3109           0 :     y32 = L_shr_pos(y32, SCALEFACTOR32_2);
    3110           0 :     y33 = L_shr_pos(y33, SCALEFACTOR32_2);
    3111           0 :     y48 = L_shr_pos(y48, SCALEFACTOR32_2);
    3112           0 :     y49 = L_shr_pos(y49, SCALEFACTOR32_2);
    3113           0 :     y40 = L_shr_pos(y40, SCALEFACTOR32_2);
    3114           0 :     y41 = L_shr_pos(y41, SCALEFACTOR32_2);
    3115             : 
    3116           0 :     cplxMpy3_0(y18, y19, RotVector_32_32[2 * 0 + 0], RotVector_32_32[2 * 0 + 1]);
    3117           0 :     cplxMpy3_0(y20, y21, RotVector_32_32[2 * 1 + 0], RotVector_32_32[2 * 1 + 1]);
    3118           0 :     cplxMpy3_0(y22, y23, RotVector_32_32[2 * 2 + 0], RotVector_32_32[2 * 2 + 1]);
    3119           0 :     cplxMpy3_0(y24, y25, RotVector_32_32[2 * 3 + 0], RotVector_32_32[2 * 3 + 1]);
    3120           0 :     cplxMpy3_0(y26, y27, RotVector_32_32[2 * 4 + 0], RotVector_32_32[2 * 4 + 1]);
    3121           0 :     cplxMpy3_0(y28, y29, RotVector_32_32[2 * 5 + 0], RotVector_32_32[2 * 5 + 1]);
    3122           0 :     cplxMpy3_0(y30, y31, RotVector_32_32[2 * 6 + 0], RotVector_32_32[2 * 6 + 1]);
    3123           0 :     cplxMpy3_0(y34, y35, RotVector_32_32[2 * 7 + 0], RotVector_32_32[2 * 7 + 1]);
    3124           0 :     cplxMpy3_0(y36, y37, RotVector_32_32[2 * 8 + 0], RotVector_32_32[2 * 8 + 1]);
    3125           0 :     cplxMpy3_0(y38, y39, RotVector_32_32[2 * 9 + 0], RotVector_32_32[2 * 9 + 1]);
    3126           0 :     cplxMpy3_0(y42, y43, RotVector_32_32[2 * 10 + 0], RotVector_32_32[2 * 10 + 1]);
    3127           0 :     cplxMpy3_0(y44, y45, RotVector_32_32[2 * 11 + 0], RotVector_32_32[2 * 11 + 1]);
    3128           0 :     cplxMpy3_0(y46, y47, RotVector_32_32[2 * 12 + 0], RotVector_32_32[2 * 12 + 1]);
    3129           0 :     cplxMpy3_0(y50, y51, RotVector_32_32[2 * 13 + 0], RotVector_32_32[2 * 13 + 1]);
    3130           0 :     cplxMpy3_0(y52, y53, RotVector_32_32[2 * 14 + 0], RotVector_32_32[2 * 14 + 1]);
    3131           0 :     cplxMpy3_0(y54, y55, RotVector_32_32[2 * 15 + 0], RotVector_32_32[2 * 15 + 1]);
    3132           0 :     cplxMpy3_0(y56, y57, RotVector_32_32[2 * 16 + 0], RotVector_32_32[2 * 16 + 1]);
    3133           0 :     cplxMpy3_0(y58, y59, RotVector_32_32[2 * 17 + 0], RotVector_32_32[2 * 17 + 1]);
    3134           0 :     cplxMpy3_0(y60, y61, RotVector_32_32[2 * 18 + 0], RotVector_32_32[2 * 18 + 1]);
    3135           0 :     cplxMpy3_0(y62, y63, RotVector_32_32[2 * 19 + 0], RotVector_32_32[2 * 19 + 1]);
    3136             : 
    3137             :     /* 1. FFT4 stage */
    3138             : 
    3139             :     /* Pre-additions */
    3140           0 :     t00 = L_add(y00, y32);
    3141           0 :     t02 = L_sub(y00, y32);
    3142           0 :     t01 = L_add(y01, y33);
    3143           0 :     t03 = L_sub(y01, y33);
    3144           0 :     t04 = L_add(y16, y48);
    3145           0 :     t07 = L_sub(y16, y48);
    3146           0 :     t05 = L_add(y49, y17);
    3147           0 :     t06 = L_sub(y49, y17);
    3148             : 
    3149             :     /* Post-additions */
    3150           0 :     re[s * 0] = L_add(t00, t04);
    3151           0 :     move32();
    3152           0 :     im[s * 0] = L_add(t01, t05);
    3153           0 :     move32();
    3154           0 :     re[s * 8] = L_sub(t02, t06);
    3155           0 :     move32();
    3156           0 :     im[s * 8] = L_sub(t03, t07);
    3157           0 :     move32();
    3158           0 :     re[s * 16] = L_sub(t00, t04);
    3159           0 :     move32();
    3160           0 :     im[s * 16] = L_sub(t01, t05);
    3161           0 :     move32();
    3162           0 :     re[s * 24] = L_add(t02, t06);
    3163           0 :     move32();
    3164           0 :     im[s * 24] = L_add(t03, t07);
    3165           0 :     move32();
    3166             : 
    3167             :     /* 2. FFT4 stage */
    3168             : 
    3169             :     /* Pre-additions */
    3170           0 :     t00 = L_add(y02, y34);
    3171           0 :     t02 = L_sub(y02, y34);
    3172           0 :     t01 = L_add(y03, y35);
    3173           0 :     t03 = L_sub(y03, y35);
    3174           0 :     t04 = L_add(y18, y50);
    3175           0 :     t07 = L_sub(y18, y50);
    3176           0 :     t05 = L_add(y51, y19);
    3177           0 :     t06 = L_sub(y51, y19);
    3178             : 
    3179             :     /* Post-additions */
    3180           0 :     re[s * 1] = L_add(t00, t04);
    3181           0 :     move32();
    3182           0 :     im[s * 1] = L_add(t01, t05);
    3183           0 :     move32();
    3184           0 :     re[s * 9] = L_sub(t02, t06);
    3185           0 :     move32();
    3186           0 :     im[s * 9] = L_sub(t03, t07);
    3187           0 :     move32();
    3188           0 :     re[s * 17] = L_sub(t00, t04);
    3189           0 :     move32();
    3190           0 :     im[s * 17] = L_sub(t01, t05);
    3191           0 :     move32();
    3192           0 :     re[s * 25] = L_add(t02, t06);
    3193           0 :     move32();
    3194           0 :     im[s * 25] = L_add(t03, t07);
    3195           0 :     move32();
    3196             : 
    3197             :     /* 3. FFT4 stage */
    3198             : 
    3199             :     /* Pre-additions */
    3200           0 :     t00 = L_add(y04, y36);
    3201           0 :     t02 = L_sub(y04, y36);
    3202           0 :     t01 = L_add(y05, y37);
    3203           0 :     t03 = L_sub(y05, y37);
    3204           0 :     t04 = L_add(y20, y52);
    3205           0 :     t07 = L_sub(y20, y52);
    3206           0 :     t05 = L_add(y53, y21);
    3207           0 :     t06 = L_sub(y53, y21);
    3208             : 
    3209             :     /* Post-additions */
    3210           0 :     re[s * 2] = L_add(t00, t04);
    3211           0 :     move32();
    3212           0 :     im[s * 2] = L_add(t01, t05);
    3213           0 :     move32();
    3214           0 :     re[s * 10] = L_sub(t02, t06);
    3215           0 :     move32();
    3216           0 :     im[s * 10] = L_sub(t03, t07);
    3217           0 :     move32();
    3218           0 :     re[s * 18] = L_sub(t00, t04);
    3219           0 :     move32();
    3220           0 :     im[s * 18] = L_sub(t01, t05);
    3221           0 :     move32();
    3222           0 :     re[s * 26] = L_add(t02, t06);
    3223           0 :     move32();
    3224           0 :     im[s * 26] = L_add(t03, t07);
    3225           0 :     move32();
    3226             : 
    3227             :     /* 4. FFT4 stage */
    3228             : 
    3229             :     /* Pre-additions */
    3230           0 :     t00 = L_add(y06, y38);
    3231           0 :     t02 = L_sub(y06, y38);
    3232           0 :     t01 = L_add(y07, y39);
    3233           0 :     t03 = L_sub(y07, y39);
    3234           0 :     t04 = L_add(y22, y54);
    3235           0 :     t07 = L_sub(y22, y54);
    3236           0 :     t05 = L_add(y55, y23);
    3237           0 :     t06 = L_sub(y55, y23);
    3238             : 
    3239             :     /* Post-additions */
    3240           0 :     re[s * 3] = L_add(t00, t04);
    3241           0 :     move32();
    3242           0 :     im[s * 3] = L_add(t01, t05);
    3243           0 :     move32();
    3244           0 :     re[s * 11] = L_sub(t02, t06);
    3245           0 :     move32();
    3246           0 :     im[s * 11] = L_sub(t03, t07);
    3247           0 :     move32();
    3248           0 :     re[s * 19] = L_sub(t00, t04);
    3249           0 :     move32();
    3250           0 :     im[s * 19] = L_sub(t01, t05);
    3251           0 :     move32();
    3252           0 :     re[s * 27] = L_add(t02, t06);
    3253           0 :     move32();
    3254           0 :     im[s * 27] = L_add(t03, t07);
    3255           0 :     move32();
    3256             : 
    3257             :     /* 5. FFT4 stage */
    3258             : 
    3259             :     /* Pre-additions */
    3260           0 :     t00 = L_add(y08, y41);
    3261           0 :     t02 = L_sub(y08, y41);
    3262           0 :     t01 = L_sub(y09, y40);
    3263           0 :     t03 = L_add(y09, y40);
    3264           0 :     t04 = L_add(y24, y56);
    3265           0 :     t07 = L_sub(y24, y56);
    3266           0 :     t05 = L_add(y57, y25);
    3267           0 :     t06 = L_sub(y57, y25);
    3268             : 
    3269             :     /* Post-additions */
    3270           0 :     re[s * 4] = L_add(t00, t04);
    3271           0 :     move32();
    3272           0 :     im[s * 4] = L_add(t01, t05);
    3273           0 :     move32();
    3274           0 :     re[s * 12] = L_sub(t02, t06);
    3275           0 :     move32();
    3276           0 :     im[s * 12] = L_sub(t03, t07);
    3277           0 :     move32();
    3278           0 :     re[s * 20] = L_sub(t00, t04);
    3279           0 :     move32();
    3280           0 :     im[s * 20] = L_sub(t01, t05);
    3281           0 :     move32();
    3282           0 :     re[s * 28] = L_add(t02, t06);
    3283           0 :     move32();
    3284           0 :     im[s * 28] = L_add(t03, t07);
    3285           0 :     move32();
    3286             : 
    3287             :     /* 6. FFT4 stage */
    3288             : 
    3289             :     /* Pre-additions */
    3290           0 :     t00 = L_add(y10, y42);
    3291           0 :     t02 = L_sub(y10, y42);
    3292           0 :     t01 = L_add(y11, y43);
    3293           0 :     t03 = L_sub(y11, y43);
    3294           0 :     t04 = L_add(y26, y58);
    3295           0 :     t07 = L_sub(y26, y58);
    3296           0 :     t05 = L_add(y59, y27);
    3297           0 :     t06 = L_sub(y59, y27);
    3298             : 
    3299             :     /* Post-additions */
    3300           0 :     re[s * 5] = L_add(t00, t04);
    3301           0 :     move32();
    3302           0 :     im[s * 5] = L_add(t01, t05);
    3303           0 :     move32();
    3304           0 :     re[s * 13] = L_sub(t02, t06);
    3305           0 :     move32();
    3306           0 :     im[s * 13] = L_sub(t03, t07);
    3307           0 :     move32();
    3308           0 :     re[s * 21] = L_sub(t00, t04);
    3309           0 :     move32();
    3310           0 :     im[s * 21] = L_sub(t01, t05);
    3311           0 :     move32();
    3312           0 :     re[s * 29] = L_add(t02, t06);
    3313           0 :     move32();
    3314           0 :     im[s * 29] = L_add(t03, t07);
    3315           0 :     move32();
    3316             : 
    3317             :     /* 7. FFT4 stage */
    3318             : 
    3319             :     /* Pre-additions */
    3320           0 :     t00 = L_add(y12, y44);
    3321           0 :     t02 = L_sub(y12, y44);
    3322           0 :     t01 = L_add(y13, y45);
    3323           0 :     t03 = L_sub(y13, y45);
    3324           0 :     t04 = L_add(y28, y60);
    3325           0 :     t07 = L_sub(y28, y60);
    3326           0 :     t05 = L_add(y61, y29);
    3327           0 :     t06 = L_sub(y61, y29);
    3328             : 
    3329             :     /* Post-additions */
    3330           0 :     re[s * 6] = L_add(t00, t04);
    3331           0 :     move32();
    3332           0 :     im[s * 6] = L_add(t01, t05);
    3333           0 :     move32();
    3334           0 :     re[s * 14] = L_sub(t02, t06);
    3335           0 :     move32();
    3336           0 :     im[s * 14] = L_sub(t03, t07);
    3337           0 :     move32();
    3338           0 :     re[s * 22] = L_sub(t00, t04);
    3339           0 :     move32();
    3340           0 :     im[s * 22] = L_sub(t01, t05);
    3341           0 :     move32();
    3342           0 :     re[s * 30] = L_add(t02, t06);
    3343           0 :     move32();
    3344           0 :     im[s * 30] = L_add(t03, t07);
    3345           0 :     move32();
    3346             : 
    3347             :     /* 8. FFT4 stage */
    3348             : 
    3349             :     /* Pre-additions */
    3350           0 :     t00 = L_add(y14, y46);
    3351           0 :     t02 = L_sub(y14, y46);
    3352           0 :     t01 = L_add(y15, y47);
    3353           0 :     t03 = L_sub(y15, y47);
    3354           0 :     t04 = L_add(y30, y62);
    3355           0 :     t07 = L_sub(y30, y62);
    3356           0 :     t05 = L_add(y63, y31);
    3357           0 :     t06 = L_sub(y63, y31);
    3358             : 
    3359             :     /* Post-additions */
    3360           0 :     re[s * 7] = L_add(t00, t04);
    3361           0 :     move32();
    3362           0 :     im[s * 7] = L_add(t01, t05);
    3363           0 :     move32();
    3364           0 :     re[s * 15] = L_sub(t02, t06);
    3365           0 :     move32();
    3366           0 :     im[s * 15] = L_sub(t03, t07);
    3367           0 :     move32();
    3368           0 :     re[s * 23] = L_sub(t00, t04);
    3369           0 :     move32();
    3370           0 :     im[s * 23] = L_sub(t01, t05);
    3371           0 :     move32();
    3372           0 :     re[s * 31] = L_add(t02, t06);
    3373           0 :     move32();
    3374           0 :     im[s * 31] = L_add(t03, t07);
    3375           0 :     move32();
    3376             : 
    3377             :     Dyn_Mem_Deluxe_Out();
    3378           0 : }
    3379             : 
    3380             : /**
    3381             :  * \brief    Function performs a complex 40-point FFT
    3382             :  *           The FFT is performed inplace. The result of the FFT
    3383             :  *           is scaled by SCALEFACTOR40 bits.
    3384             :  *
    3385             :  * \param    [i/o] re     real part
    3386             :  * \param    [i/o] im     imag part
    3387             :  * \param    [i  ] sx     stride real and imag part
    3388             :  *
    3389             :  * \return void
    3390             :  */
    3391             : 
    3392             : 
    3393             : 
    3394           0 : static void fft40(Word32 *re, Word32 *im, Word16 sx, Word32 *x)
    3395             : {
    3396             :     Dyn_Mem_Deluxe_In(const Word32 *W; Word16 dim1, dim2; Counter i, j;
    3397             :                       Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;
    3398             :                       Word32 t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15;
    3399             :                       Word32 s00, s01, s02, s03, s04, s05, s06, s07, s08, s09, s10, s11, s12, s13, s14, s15;);
    3400             : 
    3401           0 :     dim1 = 5;
    3402           0 :     move16();
    3403           0 :     dim2 = 8;
    3404           0 :     move16();
    3405             : 
    3406           0 :     W = RotVector_40_32;
    3407             : 
    3408           0 :     FOR (i = 0; i < dim2; i++)
    3409             :     {
    3410           0 :         FOR (j = 0; j < dim1; j++)
    3411             :         {
    3412           0 :             x[2 * i * dim1 + 2 * j] = re[sx * i + sx * j * dim2];
    3413           0 :             move32();
    3414           0 :             x[2 * i * dim1 + 2 * j + 1] = im[sx * i + sx * j * dim2];
    3415           0 :             move32();
    3416             :         }
    3417             :     }
    3418             : 
    3419           0 :     FOR (i = 0; i < dim2; i++)
    3420             :     {
    3421           0 :         fft5(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
    3422             :     }
    3423             : 
    3424           0 :     FOR (i = 0; i < dim1; i++)
    3425             :     {
    3426           0 :         cplxMpy4_8_1(x00, x01, x[2 * i + 2 * 0 * dim1], x[2 * i + 2 * 0 * dim1 + 1]);
    3427             : 
    3428           0 :         IF (i == 0)
    3429             :         {
    3430           0 :             cplxMpy4_8_1(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1]);
    3431           0 :             cplxMpy4_8_1(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1]);
    3432           0 :             cplxMpy4_8_1(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1]);
    3433           0 :             cplxMpy4_8_1(x08, x09, x[2 * i + 2 * 4 * dim1], x[2 * i + 2 * 4 * dim1 + 1]);
    3434           0 :             cplxMpy4_8_1(x10, x11, x[2 * i + 2 * 5 * dim1], x[2 * i + 2 * 5 * dim1 + 1]);
    3435           0 :             cplxMpy4_8_1(x12, x13, x[2 * i + 2 * 6 * dim1], x[2 * i + 2 * 6 * dim1 + 1]);
    3436           0 :             cplxMpy4_8_1(x14, x15, x[2 * i + 2 * 7 * dim1], x[2 * i + 2 * 7 * dim1 + 1]);
    3437             :         }
    3438             :         ELSE
    3439             :         {
    3440           0 :             cplxMpy4_8_2(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1], W[2 * (i - 1) + 0 * 2 * 4],
    3441             :                          W[2 * (i - 1) + 0 * 2 * 4 + 1]);
    3442           0 :             cplxMpy4_8_2(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1], W[2 * (i - 1) + 1 * 2 * 4],
    3443             :                          W[2 * (i - 1) + 1 * 2 * 4 + 1]);
    3444           0 :             cplxMpy4_8_2(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1], W[2 * (i - 1) + 2 * 2 * 4],
    3445             :                          W[2 * (i - 1) + 2 * 2 * 4 + 1]);
    3446           0 :             cplxMpy4_8_2(x08, x09, x[2 * i + 2 * 4 * dim1], x[2 * i + 2 * 4 * dim1 + 1], W[2 * (i - 1) + 3 * 2 * 4],
    3447             :                          W[2 * (i - 1) + 3 * 2 * 4 + 1]);
    3448           0 :             cplxMpy4_8_2(x10, x11, x[2 * i + 2 * 5 * dim1], x[2 * i + 2 * 5 * dim1 + 1], W[2 * (i - 1) + 4 * 2 * 4],
    3449             :                          W[2 * (i - 1) + 4 * 2 * 4 + 1]);
    3450           0 :             cplxMpy4_8_2(x12, x13, x[2 * i + 2 * 6 * dim1], x[2 * i + 2 * 6 * dim1 + 1], W[2 * (i - 1) + 5 * 2 * 4],
    3451             :                          W[2 * (i - 1) + 5 * 2 * 4 + 1]);
    3452           0 :             cplxMpy4_8_2(x14, x15, x[2 * i + 2 * 7 * dim1], x[2 * i + 2 * 7 * dim1 + 1], W[2 * (i - 1) + 6 * 2 * 4],
    3453             :                          W[2 * (i - 1) + 6 * 2 * 4 + 1]);
    3454             :         }
    3455             : 
    3456           0 :         t00 = L_shr_pos(L_add(x00, x08), SCALEFACTORN2 - 1);
    3457           0 :         t02 = L_shr_pos(L_sub(x00, x08), SCALEFACTORN2 - 1);
    3458           0 :         t01 = L_shr_pos(L_add(x01, x09), SCALEFACTORN2 - 1);
    3459           0 :         t03 = L_shr_pos(L_sub(x01, x09), SCALEFACTORN2 - 1);
    3460           0 :         t04 = L_shr_pos(L_add(x02, x10), SCALEFACTORN2 - 1);
    3461           0 :         t06 = L_sub(x02, x10);
    3462           0 :         t05 = L_shr_pos(L_add(x03, x11), SCALEFACTORN2 - 1);
    3463           0 :         t07 = L_sub(x03, x11);
    3464           0 :         t08 = L_shr_pos(L_add(x04, x12), SCALEFACTORN2 - 1);
    3465           0 :         t10 = L_shr_pos(L_sub(x04, x12), SCALEFACTORN2 - 1);
    3466           0 :         t09 = L_shr_pos(L_add(x05, x13), SCALEFACTORN2 - 1);
    3467           0 :         t11 = L_shr_pos(L_sub(x05, x13), SCALEFACTORN2 - 1);
    3468           0 :         t12 = L_shr_pos(L_add(x06, x14), SCALEFACTORN2 - 1);
    3469           0 :         t14 = L_sub(x06, x14);
    3470           0 :         t13 = L_shr_pos(L_add(x07, x15), SCALEFACTORN2 - 1);
    3471           0 :         t15 = L_sub(x07, x15);
    3472             : 
    3473           0 :         s00 = L_add(t00, t08);
    3474           0 :         s04 = L_sub(t00, t08);
    3475           0 :         s01 = L_add(t01, t09);
    3476           0 :         s05 = L_sub(t01, t09);
    3477           0 :         s08 = L_sub(t02, t11);
    3478           0 :         s10 = L_add(t02, t11);
    3479           0 :         s09 = L_add(t03, t10);
    3480           0 :         s11 = L_sub(t03, t10);
    3481           0 :         s02 = L_add(t04, t12);
    3482           0 :         s07 = L_sub(t04, t12);
    3483           0 :         s03 = L_add(t05, t13);
    3484           0 :         s06 = L_sub(t13, t05);
    3485             : 
    3486           0 :         t01 = L_shr_pos(L_add(t06, t14), SCALEFACTORN2 - 1);
    3487           0 :         t02 = L_shr_pos(L_sub(t06, t14), SCALEFACTORN2 - 1);
    3488           0 :         t00 = L_shr_pos(L_add(t07, t15), SCALEFACTORN2 - 1);
    3489           0 :         t03 = L_shr_pos(L_sub(t07, t15), SCALEFACTORN2 - 1);
    3490             : 
    3491           0 :         s12 = Mpy_32_32_lc3plus(L_add(t00, t02), C81_32);
    3492           0 :         s14 = Mpy_32_32_lc3plus(L_sub(t00, t02), C81_32);
    3493           0 :         s13 = Mpy_32_32_lc3plus(L_sub(t03, t01), C81_32);
    3494           0 :         s15 = Mpy_32_32_lc3plus(L_add(t01, t03), C82_32);
    3495             : 
    3496           0 :         re[sx * i + sx * 0 * dim1] = L_add(s00, s02);
    3497           0 :         move32();
    3498           0 :         im[sx * i + sx * 0 * dim1] = L_add(s01, s03);
    3499           0 :         move32();
    3500           0 :         re[sx * i + sx * 1 * dim1] = L_add(s10, s12);
    3501           0 :         move32();
    3502           0 :         im[sx * i + sx * 1 * dim1] = L_add(s11, s13);
    3503           0 :         move32();
    3504           0 :         re[sx * i + sx * 2 * dim1] = L_sub(s04, s06);
    3505           0 :         move32();
    3506           0 :         im[sx * i + sx * 2 * dim1] = L_sub(s05, s07);
    3507           0 :         move32();
    3508           0 :         re[sx * i + sx * 3 * dim1] = L_add(s08, s14);
    3509           0 :         move32();
    3510           0 :         im[sx * i + sx * 3 * dim1] = L_add(s09, s15);
    3511           0 :         move32();
    3512           0 :         re[sx * i + sx * 4 * dim1] = L_sub(s00, s02);
    3513           0 :         move32();
    3514           0 :         im[sx * i + sx * 4 * dim1] = L_sub(s01, s03);
    3515           0 :         move32();
    3516           0 :         re[sx * i + sx * 5 * dim1] = L_sub(s10, s12);
    3517           0 :         move32();
    3518           0 :         im[sx * i + sx * 5 * dim1] = L_sub(s11, s13);
    3519           0 :         move32();
    3520           0 :         re[sx * i + sx * 6 * dim1] = L_add(s04, s06);
    3521           0 :         move32();
    3522           0 :         im[sx * i + sx * 6 * dim1] = L_add(s05, s07);
    3523           0 :         move32();
    3524           0 :         re[sx * i + sx * 7 * dim1] = L_sub(s08, s14);
    3525           0 :         move32();
    3526           0 :         im[sx * i + sx * 7 * dim1] = L_sub(s09, s15);
    3527           0 :         move32();
    3528             :     }
    3529             : 
    3530             :     Dyn_Mem_Deluxe_Out();
    3531           0 : }
    3532             : 
    3533             : /**
    3534             :  * \brief Combined FFT
    3535             :  *
    3536             :  * \param    [i/o] re     real part
    3537             :  * \param    [i/o] im     imag part
    3538             :  * \param    [i  ] W      rotation factor
    3539             :  * \param    [i  ] dim1   length of fft1
    3540             :  * \param    [i  ] dim2   length of fft2
    3541             :  * \param    [i  ] sx     stride real and imag part
    3542             :  * \param    [i  ] sc     stride phase rotation coefficients
    3543             :  * \param    [tmp] x      32-bit workbuffer of length=2*len
    3544             :  * \param    [i  ] Woff   offset for addressing the rotation vector table
    3545             :  *
    3546             :  * \return void
    3547             :  */
    3548             : 
    3549             : 
    3550           0 : static void fftN2(Word32 *re, Word32 *im,
    3551             : #ifdef ENABLE_HR_MODE
    3552             :                   const Word32 *W,
    3553             : #else
    3554             :                   const Word16 *W,
    3555             : #endif
    3556             :                   Word16 dim1, Word16 dim2, Word16 sx, Word16 sc,
    3557             :                   Word16 Woff
    3558             :                   , Word8 *scratchBuffer
    3559             : #ifdef ENABLE_FFT_RESCALE
    3560             :                   , Word16 *scale
    3561             : #endif
    3562             :                   )
    3563             : {
    3564             :     Dyn_Mem_Deluxe_In(Counter i, j;);
    3565             : 
    3566           0 :     Word32 *x = scratchAlign(scratchBuffer, 0);
    3567             : 
    3568           0 :     FOR (i = 0; i < dim2; i++)
    3569             :     {
    3570           0 :         FOR (j = 0; j < dim1; j++)
    3571             :         {
    3572           0 :             x[2 * i * dim1 + 2 * j] = re[sx * i + sx * j * dim2];
    3573           0 :             move32();
    3574           0 :             x[2 * i * dim1 + 2 * j + 1] = im[sx * i + sx * j * dim2];
    3575           0 :             move32();
    3576             :         }
    3577             :     }
    3578             : 
    3579           0 :     SWITCH (dim1)
    3580             :     {
    3581             : 
    3582           0 :     case 4:
    3583           0 :         FOR (i = 0; i < dim2; i++)
    3584             :         {
    3585           0 :             fft4(&x[i * 2 * dim1]);
    3586             :         }
    3587           0 :         BREAK;
    3588           0 :     case 8:
    3589           0 :         FOR (i = 0; i < dim2; i++)
    3590             :         {
    3591           0 :             fft8(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
    3592             :         }
    3593           0 :         BREAK;
    3594             : 
    3595           0 :     case 10:
    3596           0 :         FOR (i = 0; i < dim2; i++)
    3597             :         {
    3598           0 :             fft10(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
    3599             :         }
    3600           0 :         BREAK;
    3601           0 :     case 15:
    3602           0 :         FOR (i = 0; i < dim2; i++)
    3603             :         {
    3604           0 :             fft15(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
    3605             :         }
    3606             :         
    3607           0 :         BREAK;
    3608           0 :     case 16:
    3609           0 :         FOR (i = 0; i < dim2; i++)
    3610             :         {
    3611           0 :             fft16(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
    3612             :         }
    3613           0 :         BREAK;
    3614           0 :     case 20:
    3615           0 :         FOR (i = 0; i < dim2; i++)
    3616             :         {
    3617           0 :             fft20(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
    3618             :         }
    3619           0 :         BREAK;
    3620           0 :     case 30:
    3621           0 :         FOR (i = 0; i < dim2; i++)
    3622             :         {
    3623           0 :             fft30(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
    3624             :         }
    3625           0 :         BREAK;
    3626           0 :     case 32:
    3627           0 :         FOR (i = 0; i < dim2; i++)
    3628             :         {
    3629           0 :             fft32(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
    3630             :         }
    3631           0 :         BREAK;
    3632             : #ifdef ENABLE_HR_MODE
    3633             : #if (defined LC3_FFT15)
    3634             :     case 60:
    3635             :         FOR (i = 0; i < dim2; i++)
    3636             :         {
    3637             : #ifndef ENABLE_FFT_RESCALE
    3638             :             fftN2(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], RotVector_480, 15, 4, sx, 4, 60, scratch);
    3639             : #else
    3640             :             fftN2(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], RotVector_480, 15, 4, sx, 4, 60, scratch, NULL);
    3641             : #endif
    3642             :         }
    3643             :         BREAK;
    3644             : #endif
    3645             : #endif
    3646           0 :     default: ASSERT(0);
    3647             :     }
    3648             :     
    3649             : #ifdef ENABLE_FFT_RESCALE
    3650           0 :     IF (scale)
    3651             :     {
    3652           0 :         *scale = s_max(sub(getScaleFactor32_lc3plus(x, dim1 * dim2 * 2), FFT_RESCALE_HR), 0); move16();
    3653             : 
    3654             : #if defined(FUNCTION_scaleValues_32)
    3655             :         scaleValues_32(x, dim1 * dim2, *scale);
    3656             : #else
    3657           0 :         FOR (i = 0; i < dim1 * dim2 * 2; i++)
    3658             :         {
    3659           0 :             x[i] = L_shl_pos(x[i], *scale); move32();
    3660             :         }
    3661             :     }
    3662             : #endif
    3663             : #endif
    3664             : 
    3665           0 :     SWITCH (dim2)
    3666             :     {
    3667           0 :     case 4:
    3668             :     {
    3669             :         Word32 x00, x01, x02, x03, x04, x05, x06, x07;
    3670             :         Word32 t00, t01, t02, t03, t04, t05, t06, t07;
    3671             : 
    3672           0 :         j = add(8, 0);
    3673           0 :         FOR (i = 0; i < dim1; i++)
    3674             :         {
    3675           0 :             cplxMpy4_4_1(x00, x01, x[2 * i + 2 * 0 * dim1], x[2 * i + 2 * 0 * dim1 + 1]);
    3676           0 :             IF (i == 0)
    3677             :             {
    3678           0 :                 cplxMpy4_4_1(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1]);
    3679           0 :                 cplxMpy4_4_1(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1]);
    3680           0 :                 cplxMpy4_4_1(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1]);
    3681             :             }
    3682             :             ELSE
    3683             :             {
    3684           0 :                 cplxMpy4_4_0(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1],
    3685             :                              W[sc * i + j * 1 * dim1 - Woff], W[sc * i + j * 1 * dim1 + 1 - Woff]);
    3686           0 :                 cplxMpy4_4_0(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1],
    3687             :                              W[sc * i + j * 2 * dim1 - Woff], W[sc * i + j * 2 * dim1 + 1 - Woff]);
    3688           0 :                 cplxMpy4_4_0(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1],
    3689             :                              W[sc * i + j * 3 * dim1 - Woff], W[sc * i + j * 3 * dim1 + 1 - Woff]);
    3690             :             }
    3691             : 
    3692           0 :             t00 = L_add(x00, x04);
    3693           0 :             t02 = L_sub(x00, x04);
    3694           0 :             t01 = L_add(x01, x05);
    3695           0 :             t03 = L_sub(x01, x05);
    3696           0 :             t04 = L_add(x02, x06);
    3697           0 :             t07 = L_sub(x02, x06);
    3698           0 :             t05 = L_add(x07, x03);
    3699           0 :             t06 = L_sub(x07, x03);
    3700             : 
    3701           0 :             re[sx * i + sx * 0 * dim1] = L_add(t00, t04);
    3702           0 :             move32();
    3703           0 :             im[sx * i + sx * 0 * dim1] = L_add(t01, t05);
    3704           0 :             move32();
    3705           0 :             re[sx * i + sx * 1 * dim1] = L_sub(t02, t06);
    3706           0 :             move32();
    3707           0 :             im[sx * i + sx * 1 * dim1] = L_sub(t03, t07);
    3708           0 :             move32();
    3709           0 :             re[sx * i + sx * 2 * dim1] = L_sub(t00, t04);
    3710           0 :             move32();
    3711           0 :             im[sx * i + sx * 2 * dim1] = L_sub(t01, t05);
    3712           0 :             move32();
    3713           0 :             re[sx * i + sx * 3 * dim1] = L_add(t02, t06);
    3714           0 :             move32();
    3715           0 :             im[sx * i + sx * 3 * dim1] = L_add(t03, t07);
    3716           0 :             move32();
    3717             :         }
    3718             :         
    3719           0 :         BREAK;
    3720             :     }
    3721             : 
    3722           0 :     case 6:
    3723             :     {
    3724             :         Word32 y[2 * 10];
    3725           0 :         FOR (j = 0; j < dim2; j++)
    3726             :         {
    3727           0 :             cplxMpy4_12_1(y[2 * j], y[2 * j + 1], x[2 * 0 + 2 * j * dim1], x[2 * 0 + 2 * j * dim1 + 1]);
    3728             :         }
    3729           0 :         fft6(&y[0], &y[1], 2);
    3730           0 :         FOR (j = 0; j < dim2; j++)
    3731             :         {
    3732           0 :             re[sx * 0 + sx * j * dim1] = y[2 * j];
    3733           0 :             move32();
    3734           0 :             im[sx * 0 + sx * j * dim1] = y[2 * j + 1];
    3735           0 :             move32();
    3736             :         }
    3737             : 
    3738           0 :         FOR (i = 1; i < dim1; i++)
    3739             :         {
    3740           0 :             cplxMpy4_12_1(y[2 * (0 + 0)], y[2 * (0 + 0) + 1], x[2 * i + 2 * (0 + 0) * dim1],
    3741             :                           x[2 * i + 2 * (0 + 0) * dim1 + 1]);
    3742           0 :             cplxMpy4_12_0(y[2 * (0 + 1)], y[2 * (0 + 1) + 1], x[2 * i + 2 * (0 + 1) * dim1],
    3743             :                           x[2 * i + 2 * (0 + 1) * dim1 + 1], W[sc * i + sc * (0 + 1) * dim1 - Woff],
    3744             :                           W[sc * i + sc * (0 + 1) * dim1 + 1 - Woff]);
    3745           0 :             FOR (j = 2; j < dim2; j = j + 2)
    3746             :             {
    3747           0 :                 cplxMpy4_12_0(y[2 * (j + 0)], y[2 * (j + 0) + 1], x[2 * i + 2 * (j + 0) * dim1],
    3748             :                               x[2 * i + 2 * (j + 0) * dim1 + 1], W[sc * i + sc * (j + 0) * dim1 - Woff],
    3749             :                               W[sc * i + sc * (j + 0) * dim1 + 1 - Woff]);
    3750           0 :                 cplxMpy4_12_0(y[2 * (j + 1)], y[2 * (j + 1) + 1], x[2 * i + 2 * (j + 1) * dim1],
    3751             :                               x[2 * i + 2 * (j + 1) * dim1 + 1], W[sc * i + sc * (j + 1) * dim1 - Woff],
    3752             :                               W[sc * i + sc * (j + 1) * dim1 + 1 - Woff]);
    3753             :             }
    3754           0 :             fft6(&y[0], &y[1], 2);
    3755           0 :             FOR (j = 0; j < dim2; j++)
    3756             :             {
    3757           0 :                 re[sx * i + sx * j * dim1] = y[2 * j];
    3758           0 :                 move32();
    3759           0 :                 im[sx * i + sx * j * dim1] = y[2 * j + 1];
    3760           0 :                 move32();
    3761             :             }
    3762             :         }
    3763           0 :         BREAK;
    3764             :     }
    3765             :     
    3766           0 :     case 8:
    3767             :     {
    3768             :         Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;
    3769             :         Word32 t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15;
    3770             :         Word32 s00, s01, s02, s03, s04, s05, s06, s07, s08, s09, s10, s11, s12, s13, s14, s15;
    3771             : 
    3772           0 :         FOR (i = 0; i < dim1; i++)
    3773             :         {
    3774           0 :             cplxMpy4_8_1(x00, x01, x[2 * i + 2 * 0 * dim1], x[2 * i + 2 * 0 * dim1 + 1]);
    3775           0 :             IF (i == 0)
    3776             :             {
    3777           0 :                 cplxMpy4_8_1(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1]);
    3778           0 :                 cplxMpy4_8_1(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1]);
    3779           0 :                 cplxMpy4_8_1(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1]);
    3780           0 :                 cplxMpy4_8_1(x08, x09, x[2 * i + 2 * 4 * dim1], x[2 * i + 2 * 4 * dim1 + 1]);
    3781           0 :                 cplxMpy4_8_1(x10, x11, x[2 * i + 2 * 5 * dim1], x[2 * i + 2 * 5 * dim1 + 1]);
    3782           0 :                 cplxMpy4_8_1(x12, x13, x[2 * i + 2 * 6 * dim1], x[2 * i + 2 * 6 * dim1 + 1]);
    3783           0 :                 cplxMpy4_8_1(x14, x15, x[2 * i + 2 * 7 * dim1], x[2 * i + 2 * 7 * dim1 + 1]);
    3784             :             }
    3785             :             ELSE
    3786             :             {
    3787           0 :                 cplxMpy4_8_0(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1],
    3788             :                              W[sc * i + sc * 1 * dim1 - Woff], W[sc * i + sc * 1 * dim1 + 1 - Woff]);
    3789           0 :                 cplxMpy4_8_0(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1],
    3790             :                              W[sc * i + sc * 2 * dim1 - Woff], W[sc * i + sc * 2 * dim1 + 1 - Woff]);
    3791           0 :                 cplxMpy4_8_0(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1],
    3792             :                              W[sc * i + sc * 3 * dim1 - Woff], W[sc * i + sc * 3 * dim1 + 1 - Woff]);
    3793           0 :                 cplxMpy4_8_0(x08, x09, x[2 * i + 2 * 4 * dim1], x[2 * i + 2 * 4 * dim1 + 1],
    3794             :                              W[sc * i + sc * 4 * dim1 - Woff], W[sc * i + sc * 4 * dim1 + 1 - Woff]);
    3795           0 :                 cplxMpy4_8_0(x10, x11, x[2 * i + 2 * 5 * dim1], x[2 * i + 2 * 5 * dim1 + 1],
    3796             :                              W[sc * i + sc * 5 * dim1 - Woff], W[sc * i + sc * 5 * dim1 + 1 - Woff]);
    3797           0 :                 cplxMpy4_8_0(x12, x13, x[2 * i + 2 * 6 * dim1], x[2 * i + 2 * 6 * dim1 + 1],
    3798             :                              W[sc * i + sc * 6 * dim1 - Woff], W[sc * i + sc * 6 * dim1 + 1 - Woff]);
    3799           0 :                 cplxMpy4_8_0(x14, x15, x[2 * i + 2 * 7 * dim1], x[2 * i + 2 * 7 * dim1 + 1],
    3800             :                              W[sc * i + sc * 7 * dim1 - Woff], W[sc * i + sc * 7 * dim1 + 1 - Woff]);
    3801             :             }
    3802             : 
    3803           0 :             t00 = L_shr_pos(L_add(x00, x08), SCALEFACTORN2 - 1);
    3804           0 :             t02 = L_shr_pos(L_sub(x00, x08), SCALEFACTORN2 - 1);
    3805           0 :             t01 = L_shr_pos(L_add(x01, x09), SCALEFACTORN2 - 1);
    3806           0 :             t03 = L_shr_pos(L_sub(x01, x09), SCALEFACTORN2 - 1);
    3807           0 :             t04 = L_shr_pos(L_add(x02, x10), SCALEFACTORN2 - 1);
    3808           0 :             t06 = L_sub(x02, x10);
    3809           0 :             t05 = L_shr_pos(L_add(x03, x11), SCALEFACTORN2 - 1);
    3810           0 :             t07 = L_sub(x03, x11);
    3811           0 :             t08 = L_shr_pos(L_add(x04, x12), SCALEFACTORN2 - 1);
    3812           0 :             t10 = L_shr_pos(L_sub(x04, x12), SCALEFACTORN2 - 1);
    3813           0 :             t09 = L_shr_pos(L_add(x05, x13), SCALEFACTORN2 - 1);
    3814           0 :             t11 = L_shr_pos(L_sub(x05, x13), SCALEFACTORN2 - 1);
    3815           0 :             t12 = L_shr_pos(L_add(x06, x14), SCALEFACTORN2 - 1);
    3816           0 :             t14 = L_sub(x06, x14);
    3817           0 :             t13 = L_shr_pos(L_add(x07, x15), SCALEFACTORN2 - 1);
    3818           0 :             t15 = L_sub(x07, x15);
    3819             : 
    3820           0 :             s00 = L_add(t00, t08);
    3821           0 :             s04 = L_sub(t00, t08);
    3822           0 :             s01 = L_add(t01, t09);
    3823           0 :             s05 = L_sub(t01, t09);
    3824           0 :             s08 = L_sub(t02, t11);
    3825           0 :             s10 = L_add(t02, t11);
    3826           0 :             s09 = L_add(t03, t10);
    3827           0 :             s11 = L_sub(t03, t10);
    3828           0 :             s02 = L_add(t04, t12);
    3829           0 :             s07 = L_sub(t04, t12);
    3830           0 :             s03 = L_add(t05, t13);
    3831           0 :             s06 = L_sub(t13, t05);
    3832             : 
    3833           0 :             t01 = L_shr_pos(L_add(t06, t14), SCALEFACTORN2 - 1);
    3834           0 :             t02 = L_shr_pos(L_sub(t06, t14), SCALEFACTORN2 - 1);
    3835           0 :             t00 = L_shr_pos(L_add(t07, t15), SCALEFACTORN2 - 1);
    3836           0 :             t03 = L_shr_pos(L_sub(t07, t15), SCALEFACTORN2 - 1);
    3837             : 
    3838           0 :             s12 = Mpy_32_xx(L_add(t00, t02), C81);
    3839           0 :             s14 = Mpy_32_xx(L_sub(t00, t02), C81);
    3840           0 :             s13 = Mpy_32_xx(L_sub(t03, t01), C81);
    3841           0 :             s15 = Mpy_32_xx(L_add(t01, t03), C82);
    3842             : 
    3843           0 :             re[sx * i + sx * 0 * dim1] = L_add(s00, s02);
    3844           0 :             move32();
    3845           0 :             im[sx * i + sx * 0 * dim1] = L_add(s01, s03);
    3846           0 :             move32();
    3847           0 :             re[sx * i + sx * 1 * dim1] = L_add(s10, s12);
    3848           0 :             move32();
    3849           0 :             im[sx * i + sx * 1 * dim1] = L_add(s11, s13);
    3850           0 :             move32();
    3851           0 :             re[sx * i + sx * 2 * dim1] = L_sub(s04, s06);
    3852           0 :             move32();
    3853           0 :             im[sx * i + sx * 2 * dim1] = L_sub(s05, s07);
    3854           0 :             move32();
    3855           0 :             re[sx * i + sx * 3 * dim1] = L_add(s08, s14);
    3856           0 :             move32();
    3857           0 :             im[sx * i + sx * 3 * dim1] = L_add(s09, s15);
    3858           0 :             move32();
    3859           0 :             re[sx * i + sx * 4 * dim1] = L_sub(s00, s02);
    3860           0 :             move32();
    3861           0 :             im[sx * i + sx * 4 * dim1] = L_sub(s01, s03);
    3862           0 :             move32();
    3863           0 :             re[sx * i + sx * 5 * dim1] = L_sub(s10, s12);
    3864           0 :             move32();
    3865           0 :             im[sx * i + sx * 5 * dim1] = L_sub(s11, s13);
    3866           0 :             move32();
    3867           0 :             re[sx * i + sx * 6 * dim1] = L_add(s04, s06);
    3868           0 :             move32();
    3869           0 :             im[sx * i + sx * 6 * dim1] = L_add(s05, s07);
    3870           0 :             move32();
    3871           0 :             re[sx * i + sx * 7 * dim1] = L_sub(s08, s14);
    3872           0 :             move32();
    3873           0 :             im[sx * i + sx * 7 * dim1] = L_sub(s09, s15);
    3874           0 :             move32();
    3875             :         }
    3876           0 :         BREAK;
    3877             :     }
    3878             : 
    3879           0 :     case 12:
    3880             :     {
    3881             :         Word32 y[2 * 20];
    3882           0 :         FOR (j = 0; j < dim2; j++)
    3883             :         {
    3884           0 :             cplxMpy4_12_1(y[2 * j], y[2 * j + 1], x[2 * 0 + 2 * j * dim1], x[2 * 0 + 2 * j * dim1 + 1]);
    3885             :         }
    3886           0 :         fft12(y);
    3887           0 :         FOR (j = 0; j < dim2; j++)
    3888             :         {
    3889           0 :             re[sx * 0 + sx * j * dim1] = y[2 * j];
    3890           0 :             move32();
    3891           0 :             im[sx * 0 + sx * j * dim1] = y[2 * j + 1];
    3892           0 :             move32();
    3893             :         }
    3894             : 
    3895           0 :         FOR (i = 1; i < dim1; i++)
    3896             :         {
    3897           0 :             cplxMpy4_12_1(y[2 * (0 + 0)], y[2 * (0 + 0) + 1], x[2 * i + 2 * (0 + 0) * dim1],
    3898             :                           x[2 * i + 2 * (0 + 0) * dim1 + 1]);
    3899           0 :             cplxMpy4_12_0(y[2 * (0 + 1)], y[2 * (0 + 1) + 1], x[2 * i + 2 * (0 + 1) * dim1],
    3900             :                           x[2 * i + 2 * (0 + 1) * dim1 + 1], W[sc * i + sc * (0 + 1) * dim1 - Woff],
    3901             :                           W[sc * i + sc * (0 + 1) * dim1 + 1 - Woff]);
    3902           0 :             FOR (j = 2; j < dim2; j = j + 2)
    3903             :             {
    3904           0 :                 cplxMpy4_12_0(y[2 * (j + 0)], y[2 * (j + 0) + 1], x[2 * i + 2 * (j + 0) * dim1],
    3905             :                               x[2 * i + 2 * (j + 0) * dim1 + 1], W[sc * i + sc * (j + 0) * dim1 - Woff],
    3906             :                               W[sc * i + sc * (j + 0) * dim1 + 1 - Woff]);
    3907           0 :                 cplxMpy4_12_0(y[2 * (j + 1)], y[2 * (j + 1) + 1], x[2 * i + 2 * (j + 1) * dim1],
    3908             :                               x[2 * i + 2 * (j + 1) * dim1 + 1], W[sc * i + sc * (j + 1) * dim1 - Woff],
    3909             :                               W[sc * i + sc * (j + 1) * dim1 + 1 - Woff]);
    3910             :             }
    3911           0 :             fft12(y);
    3912           0 :             FOR (j = 0; j < dim2; j++)
    3913             :             {
    3914           0 :                 re[sx * i + sx * j * dim1] = y[2 * j];
    3915           0 :                 move32();
    3916           0 :                 im[sx * i + sx * j * dim1] = y[2 * j + 1];
    3917           0 :                 move32();
    3918             :             }
    3919             :         }
    3920           0 :         BREAK;
    3921             :     }
    3922             :     
    3923             : #if defined(ENABLE_HR_MODE)
    3924           0 :     case 16:
    3925             :     {
    3926             :         Word32 y[2 * 20];
    3927           0 :         FOR (j = 0; j < dim2; j++)
    3928             :         {
    3929           0 :             cplxMpy4_16_1(y[2 * j], y[2 * j + 1], x[2 * 0 + 2 * j * dim1], x[2 * 0 + 2 * j * dim1 + 1]);
    3930             :         }
    3931             :         
    3932           0 :         fft16(&y[0], &y[1], 2);
    3933           0 :         FOR (j = 0; j < dim2; j++)
    3934             :         {
    3935           0 :             re[sx * 0 + sx * j * dim1] = y[2 * j];
    3936           0 :             move32();
    3937           0 :             im[sx * 0 + sx * j * dim1] = y[2 * j + 1];
    3938           0 :             move32();
    3939             :         }
    3940             : 
    3941           0 :         FOR (i = 1; i < dim1; i++)
    3942             :         {
    3943           0 :             cplxMpy4_16_1(y[2 * (0 + 0)], y[2 * (0 + 0) + 1], x[2 * i + 2 * (0 + 0) * dim1],
    3944             :                           x[2 * i + 2 * (0 + 0) * dim1 + 1]);
    3945           0 :             cplxMpy4_16_0(y[2 * (0 + 1)], y[2 * (0 + 1) + 1], x[2 * i + 2 * (0 + 1) * dim1],
    3946             :                           x[2 * i + 2 * (0 + 1) * dim1 + 1], W[sc * i + sc * (0 + 1) * dim1 - Woff],
    3947             :                           W[sc * i + sc * (0 + 1) * dim1 + 1 - Woff]);
    3948           0 :             FOR (j = 2; j < dim2; j = j + 2)
    3949             :             {
    3950           0 :                 cplxMpy4_16_0(y[2 * (j + 0)], y[2 * (j + 0) + 1], x[2 * i + 2 * (j + 0) * dim1],
    3951             :                               x[2 * i + 2 * (j + 0) * dim1 + 1], W[sc * i + sc * (j + 0) * dim1 - Woff],
    3952             :                               W[sc * i + sc * (j + 0) * dim1 + 1 - Woff]);
    3953           0 :                 cplxMpy4_16_0(y[2 * (j + 1)], y[2 * (j + 1) + 1], x[2 * i + 2 * (j + 1) * dim1],
    3954             :                               x[2 * i + 2 * (j + 1) * dim1 + 1], W[sc * i + sc * (j + 1) * dim1 - Woff],
    3955             :                               W[sc * i + sc * (j + 1) * dim1 + 1 - Woff]);
    3956             :             }
    3957           0 :             fft16(&y[0], &y[1], 2);
    3958           0 :             FOR (j = 0; j < dim2; j++)
    3959             :             {
    3960           0 :                 re[sx * i + sx * j * dim1] = y[2 * j];
    3961           0 :                 move32();
    3962           0 :                 im[sx * i + sx * j * dim1] = y[2 * j + 1];
    3963           0 :                 move32();
    3964             :             }
    3965             :         }
    3966           0 :         BREAK;
    3967             :     }
    3968             : #endif
    3969           0 :     default: ASSERT(0);
    3970             :     }
    3971             : 
    3972             :     Dyn_Mem_Deluxe_Out();
    3973           0 : }
    3974             : 
    3975             : /**
    3976             :  * \brief Complex valued FFT
    3977             :  *
    3978             :  * \param    [i/o] re          real part
    3979             :  * \param    [i/o] im          imag part
    3980             :  * \param    [i  ] sizeOfFft   length of fft
    3981             :  * \param    [i  ] s           stride real and imag part
    3982             :  * \param    [i  ] scale       scalefactor
    3983             :  *
    3984             :  * \return void
    3985             :  */
    3986             : 
    3987             : 
    3988             : 
    3989             : /* x is the scratch buffer */
    3990           0 : void BASOP_cfft_lc3plus(Word32 *re, Word32 *im, Word16 length, Word16 s, Word16 *scale, Word32 *x)
    3991             : {
    3992             : #if (defined ENABLE_FFT_RESCALE) && ((defined LC3_FFT30) || (defined ENABLE_HR_MODE))
    3993           0 :     Word16 fftN2scale = 0;
    3994             : #endif
    3995             : 
    3996             : #ifdef ENABLE_HR_MODE
    3997           0 :     Word8 scratch[6128] = {0};
    3998             : #else
    3999             :     Word8 scratch[4068] = {0};
    4000             : #endif
    4001             : 
    4002           0 :     SWITCH (length)
    4003             :     {
    4004             : 
    4005           0 :     case 10:
    4006           0 :         fft10(re, im, s);
    4007           0 :         *scale = add(*scale, SCALEFACTOR10);
    4008           0 :         move16();
    4009           0 :         BREAK;
    4010           0 :     case 16:
    4011           0 :         fft16(re, im, s);
    4012           0 :         *scale = add(*scale, SCALEFACTOR16);
    4013           0 :         move16();
    4014           0 :         BREAK;
    4015           0 :     case 20:
    4016           0 :         fft20(re, im, s);
    4017           0 :         *scale = add(*scale, SCALEFACTOR20);
    4018           0 :         move16();
    4019           0 :         BREAK;
    4020           0 :     case 30:
    4021           0 :         fft30(re, im, s);
    4022           0 :         *scale = add(*scale, SCALEFACTOR30);
    4023           0 :         move16();
    4024           0 :         BREAK;
    4025           0 :     case 32:
    4026           0 :         fft32(re, im, s);
    4027           0 :         *scale = add(*scale, SCALEFACTOR32);
    4028           0 :         move16();
    4029           0 :         BREAK;
    4030           0 :     case 40:
    4031           0 :         fft40(re, im, s, x);
    4032           0 :         *scale = add(*scale, SCALEFACTOR40);
    4033           0 :         move16();
    4034           0 :         BREAK;
    4035           0 :     case 48:
    4036             : #ifndef ENABLE_FFT_RESCALE
    4037             :         fftN2(re, im, RotVector_32_12, 4, 12, s, 16, 64, scratch);
    4038             : #else
    4039           0 :         fftN2(re, im, RotVector_32_12, 4, 12, s, 16, 64, scratch, NULL);
    4040             : #endif
    4041           0 :         *scale = add(*scale, SCALEFACTOR48);
    4042           0 :         move16();
    4043           0 :         BREAK;
    4044           0 :     case 60:
    4045             : #ifndef ENABLE_FFT_RESCALE
    4046             :         fftN2(re, im, RotVector_480, 15, 4, s, 4, 60, scratch);
    4047             : #else
    4048           0 :         fftN2(re, im, RotVector_480, 15, 4, s, 4, 60, scratch, NULL);
    4049             : #endif
    4050           0 :         *scale = add(*scale, SCALEFACTOR60);
    4051           0 :         move16();
    4052           0 :         BREAK;
    4053           0 :     case 64:
    4054             : #ifndef ENABLE_FFT_RESCALE
    4055             :         fftN2(re, im, RotVector_32_8, 8, 8, s, 8, 64, scratch);
    4056             : #else
    4057           0 :         fftN2(re, im, RotVector_32_8, 8, 8, s, 8, 64, scratch, NULL);
    4058             : #endif
    4059           0 :         *scale = add(*scale, SCALEFACTOR64);
    4060           0 :         move16();
    4061           0 :         BREAK;
    4062           0 :     case 80:
    4063             : #ifndef ENABLE_FFT_RESCALE
    4064             :         fftN2(re, im, RotVector_320, 10, 8, s, 4, 40, scratch);
    4065             : #else
    4066           0 :         fftN2(re, im, RotVector_320, 10, 8, s, 4, 40, scratch, NULL);
    4067             : #endif
    4068           0 :         *scale = add(*scale, SCALEFACTOR80);
    4069           0 :         move16();
    4070           0 :         BREAK;
    4071           0 :     case 90:
    4072             : #ifndef ENABLE_FFT_RESCALE
    4073             :         fftN2(re, im, RotVector_15_6, 15, 6, s, 2, 30, scratch);
    4074             : #else
    4075           0 :         fftN2(re, im, RotVector_15_6, 15, 6, s, 2, 30, scratch, NULL);
    4076             : #endif
    4077           0 :         *scale = add(*scale, SCALEFACTOR90);
    4078           0 :         move16();
    4079           0 :         BREAK;
    4080             : 
    4081           0 :     case 120:
    4082             : #ifndef ENABLE_FFT_RESCALE
    4083             :         fftN2(re, im, RotVector_480, 15, 8, s, 4, 60, scratch);
    4084             : #else
    4085           0 :         fftN2(re, im, RotVector_480, 15, 8, s, 4, 60, scratch, NULL);
    4086             : #endif
    4087           0 :         *scale = add(*scale, SCALEFACTOR120);
    4088           0 :         move16();
    4089           0 :         BREAK;
    4090           0 :     case 128:
    4091             : #ifndef ENABLE_FFT_RESCALE
    4092             :         fftN2(re, im, RotVector_32_8, 16, 8, s, 4, 64, scratch);
    4093             : #else
    4094           0 :         fftN2(re, im, RotVector_32_8, 16, 8, s, 4, 64, scratch, NULL);
    4095             : #endif
    4096           0 :         *scale = add(*scale, SCALEFACTOR128);
    4097           0 :         move16();
    4098           0 :         BREAK;
    4099           0 :     case 160:
    4100             : #ifndef ENABLE_FFT_RESCALE
    4101             :         fftN2(re, im, RotVector_320, 20, 8, s, 2, 40, scratch);
    4102             : #else
    4103           0 :         fftN2(re, im, RotVector_320, 20, 8, s, 2, 40, scratch, NULL);
    4104             : #endif
    4105           0 :         *scale = add(*scale, SCALEFACTOR160);
    4106           0 :         move16();
    4107           0 :         BREAK;
    4108           0 :     case 180:
    4109             : #ifndef ENABLE_FFT_RESCALE
    4110             :         fftN2(re, im, RotVector_360, 15, 12, s, 4, 60, scratch);
    4111             :         *scale = add(*scale, SCALEFACTOR180);
    4112             : #else
    4113           0 :         fftN2(re, im, RotVector_360, 15, 12, s, 4, 60, scratch, &fftN2scale);
    4114           0 :         *scale = add(*scale, SCALEFACTOR180);
    4115           0 :         *scale = sub(*scale, fftN2scale); move16();
    4116             : #endif
    4117             : 
    4118           0 :         move16();
    4119           0 :         BREAK;
    4120           0 :     case 192:
    4121             : #ifndef ENABLE_FFT_RESCALE
    4122             :         fftN2(re, im, RotVector_32_12, 16, 12, s, 4, 64, scratch);
    4123             : #else
    4124           0 :         fftN2(re, im, RotVector_32_12, 16, 12, s, 4, 64, scratch, NULL);
    4125             : #endif
    4126           0 :         *scale = add(*scale, SCALEFACTOR192);
    4127           0 :         move16();
    4128           0 :         BREAK;
    4129           0 :     case 240:
    4130             : #ifndef ENABLE_FFT_RESCALE
    4131             :         fftN2(re, im, RotVector_480, 30, 8, s, 2, 60, scratch);
    4132             :         *scale = add(*scale, SCALEFACTOR240);
    4133             : #else
    4134           0 :         fftN2(re, im, RotVector_480, 30, 8, s, 2, 60, scratch, &fftN2scale);
    4135           0 :         *scale = add(*scale, SCALEFACTOR240);
    4136           0 :         *scale = sub(*scale, fftN2scale); move16();
    4137             : #endif
    4138           0 :         move16();
    4139           0 :         BREAK;
    4140           0 :     case 256:
    4141             : #ifndef ENABLE_FFT_RESCALE
    4142             :         fftN2(re, im, RotVector_32_8, 32, 8, s, 2, 64, scratch);
    4143             : #else
    4144           0 :         fftN2(re, im, RotVector_32_8, 32, 8, s, 2, 64, scratch, NULL);
    4145             : #endif
    4146           0 :         *scale = add(*scale, SCALEFACTOR256);
    4147           0 :         move16();
    4148           0 :         BREAK;
    4149           0 :     case 384:
    4150             : #ifndef ENABLE_FFT_RESCALE
    4151             :         fftN2(re, im, RotVector_32_12, 32, 12, s, 2, 64, scratch);
    4152             : #else
    4153           0 :         fftN2(re, im, RotVector_32_12, 32, 12, s, 2, 64, scratch, NULL);
    4154             : #endif
    4155           0 :         *scale = add(*scale, SCALEFACTOR384);
    4156           0 :         move16();
    4157           0 :         BREAK;
    4158             : #ifdef ENABLE_HR_MODE
    4159           0 :     case 360:
    4160           0 :         fftN2(re, im, RotVector_720, 30, 12, s, 2, 60, scratch, &fftN2scale);
    4161           0 :         *scale = add(*scale, SCALEFACTOR360); move16();
    4162           0 :         *scale = sub(*scale, fftN2scale); move16();
    4163           0 :         BREAK;
    4164           0 :     case 480:
    4165             : #ifndef ENABLE_FFT_RESCALE
    4166             : #ifndef ENABLE_FFT_30X16
    4167             :         fftN2(re, im, RotVector_960, 60, 8, s, 2, 120, scratch);
    4168             : #else
    4169             :         fftN2(re, im, RotVector_30_16, 30, 16, s, 2, 60, scratch);  
    4170             : #endif
    4171             :         *scale = add(*scale, SCALEFACTOR480); move16();
    4172             : #else
    4173             : #ifndef ENABLE_FFT_30X16
    4174             :         fftN2(re, im, RotVector_960, 60, 8, s, 2, 120, scratch, &fftN2scale);
    4175             : #else
    4176           0 :         fftN2(re, im, RotVector_30_16, 30, 16, s, 2, 60, scratch, &fftN2scale);
    4177             : #endif
    4178           0 :         *scale = add(*scale, SCALEFACTOR480); move16();
    4179           0 :         *scale = sub(*scale, fftN2scale); move16();
    4180             : #endif
    4181           0 :         BREAK;
    4182             : #endif
    4183           0 :     default: ASSERT(0);
    4184             :     }
    4185           0 : }
    4186             : 
    4187             : 
    4188             : #define RFFT_TWIDDLE1(x, t1, t2, t3, t4, w1, w2, xb0, xb1, xt0, xt1)                                                   \
    4189             :     do                                                                                                                 \
    4190             :     {                                                                                                                  \
    4191             :         xb0 = L_shr_pos(x[2 * i + 0], 2);                                                                              \
    4192             :         xb1 = L_shr_pos(x[2 * i + 1], 2);                                                                              \
    4193             :         xt0 = L_shr_pos(x[sizeOfFft - 2 * i + 0], 2);                                                                  \
    4194             :         xt1 = L_shr_pos(x[sizeOfFft - 2 * i + 1], 2);                                                                  \
    4195             :         t1  = L_sub(xb0, xt0);                                                                                         \
    4196             :         t2  = L_add(xb1, xt1);                                                                                         \
    4197             :         t3  = L_sub(Mpy_32_32_lc3plus(t1, w1), Mpy_32_32_lc3plus(t2, w2));                                                             \
    4198             :         t4  = L_add(Mpy_32_32_lc3plus(t1, w2), Mpy_32_32_lc3plus(t2, w1));                                                             \
    4199             :         t1  = L_add(xb0, xt0);                                                                                         \
    4200             :         t2  = L_sub(xb1, xt1);                                                                                         \
    4201             :     } while (0)
    4202             : 
    4203             : #define RFFT_TWIDDLE2(x, t1, t2, t3, t4, w1, w2, xb0, xb1, xt0, xt1)                                                   \
    4204             :     do                                                                                                                 \
    4205             :     {                                                                                                                  \
    4206             :         xb0 = L_shr_pos(x[2 * i + 0], 2);                                                                              \
    4207             :         xb1 = L_shr_pos(x[2 * i + 1], 2);                                                                              \
    4208             :         xt0 = L_shr_pos(x[sizeOfFft - 2 * i + 0], 2);                                                                  \
    4209             :         xt1 = L_shr_pos(x[sizeOfFft - 2 * i + 1], 2);                                                                  \
    4210             :         t1  = L_sub(xb0, xt0);                                                                                         \
    4211             :         t2  = L_add(xb1, xt1);                                                                                         \
    4212             :         t3  = L_add(Mpy_32_32_lc3plus(t1, w1), Mpy_32_32_lc3plus(t2, w2));                                                             \
    4213             :         t4  = L_sub(Mpy_32_32_lc3plus(t2, w1), Mpy_32_32_lc3plus(t1, w2));                                                             \
    4214             :         t1  = L_add(xb0, xt0);                                                                                         \
    4215             :         t2  = L_sub(xb1, xt1);                                                                                         \
    4216             :     } while (0)
    4217             : 
    4218             : 
    4219             : 
    4220           0 : static const Word32 *rfft_twid(int size)
    4221             : {
    4222           0 :     SWITCH (size)
    4223             :     {
    4224           0 :     case 32: return RealFFT32_twid;
    4225           0 :     case 40: return RealFFT40_twid;
    4226           0 :     case 64: return RealFFT64_twid;
    4227           0 :     case 80: return RealFFT80_twid;
    4228           0 :     case 96: return RealFFT96_twid;
    4229           0 :     case 128: return RealFFT128_twid;
    4230           0 :     case 192: return RealFFT192_twid;
    4231           0 :     case 256: return RealFFT256_twid;
    4232           0 :     case 384: return RealFFT384_twid;
    4233           0 :     case 512: return RealFFT512_twid;
    4234           0 :     case 768: return RealFFT768_twid;
    4235           0 :     default: ASSERT(0);
    4236             :     }
    4237             :     return NULL;
    4238             : }
    4239             : 
    4240             : 
    4241           0 : void BASOP_rfftN(Word32 *x, Word16 sizeOfFft, Word16 *scale, Word8 *scratchBuffer)
    4242             : {
    4243             :     Dyn_Mem_Deluxe_In(Counter i; Word16 sizeOfFft2, sizeOfFft4, sizeOfFft8; Word32 t1, t2, t3, t4, xb0, xb1, xt0, xt1;
    4244             :                       Word32 * workBuffer; const Word32 *w32;);
    4245             : 
    4246           0 :     workBuffer = (Word32 *)scratchAlign(scratchBuffer, 0); /* Size = 4 * sizeOfFft */
    4247           0 :     w32        = rfft_twid(sizeOfFft);
    4248             : 
    4249           0 :     sizeOfFft2 = shr_pos(sizeOfFft, 1);
    4250           0 :     sizeOfFft4 = shr_pos(sizeOfFft, 2);
    4251           0 :     sizeOfFft8 = shr_pos(sizeOfFft, 3);
    4252             : 
    4253           0 :     BASOP_cfft_lc3plus(&x[0], &x[1], sizeOfFft2, 2, scale, workBuffer);
    4254             : 
    4255           0 :     xb0  = L_shr_pos(x[0], 1);
    4256           0 :     xb1  = L_shr_pos(x[1], 1);
    4257           0 :     x[0] = L_add(xb0, xb1);
    4258           0 :     move32();
    4259           0 :     x[1] = L_sub(xb0, xb1);
    4260           0 :     move32();
    4261             : 
    4262           0 :     FOR (i = 1; i < sizeOfFft8; i++)
    4263             :     {
    4264           0 :         RFFT_TWIDDLE1(x, t1, t2, t3, t4, w32[2 * i + 1], w32[2 * i], xb0, xb1, xt0, xt1);
    4265           0 :         x[2 * i] = L_sub(t1, t3);
    4266           0 :         move32();
    4267           0 :         x[2 * i + 1] = L_sub(t2, t4);
    4268           0 :         move32();
    4269           0 :         x[sizeOfFft - 2 * i] = L_add(t1, t3);
    4270           0 :         move32();
    4271           0 :         x[sizeOfFft - 2 * i + 1] = L_negate(L_add(t2, t4));
    4272           0 :         move32();
    4273             :     }
    4274             : 
    4275           0 :     FOR (i = sizeOfFft8; i < sizeOfFft4; i++)
    4276             :     {
    4277           0 :         RFFT_TWIDDLE1(x, t1, t2, t3, t4, w32[(2 * sizeOfFft4 - 2 * i)], w32[(2 * sizeOfFft4 - 2 * i + 1)], xb0, xb1,
    4278             :                       xt0, xt1);
    4279           0 :         x[2 * i] = L_sub(t1, t3);
    4280           0 :         move32();
    4281           0 :         x[2 * i + 1] = L_sub(t2, t4);
    4282           0 :         move32();
    4283           0 :         x[sizeOfFft - 2 * i] = L_add(t1, t3);
    4284           0 :         move32();
    4285           0 :         x[sizeOfFft - 2 * i + 1] = L_negate(L_add(t2, t4));
    4286           0 :         move32();
    4287             :     }
    4288             : 
    4289           0 :     x[sizeOfFft - 2 * i] = L_shr_pos(x[2 * i + 0], 1);
    4290           0 :     move32();
    4291           0 :     x[sizeOfFft - 2 * i + 1] = L_negate(L_shr_pos(x[2 * i + 1], 1));
    4292           0 :     move32();
    4293             : 
    4294           0 :     *scale = add(*scale, 1);
    4295           0 :     move16();
    4296             : 
    4297             :     Dyn_Mem_Deluxe_Out();
    4298           0 : }
    4299             : 
    4300             : 
    4301             : 
    4302           0 : void BASOP_irfftN(Word32 *x, Word16 sizeOfFft, Word16 *scale, Word8 *scratchBuffer)
    4303             : {
    4304             :     Dyn_Mem_Deluxe_In(Word16 sizeOfFft2, sizeOfFft4, sizeOfFft8; Word32 t1, t2, t3, t4, xb0, xb1, xt0, xt1;
    4305             :                       Word32 * workBuffer; const Word32 *w32; Counter i;);
    4306             : 
    4307           0 :     workBuffer = (Word32 *)scratchAlign(scratchBuffer, 0); /* Size = 2 * BASOP_CFFT_MAX_LENGTH */
    4308             : 
    4309           0 :     w32 = rfft_twid(sizeOfFft);
    4310             : 
    4311           0 :     sizeOfFft2 = shr_pos(sizeOfFft, 1);
    4312           0 :     sizeOfFft4 = shr_pos(sizeOfFft, 2);
    4313           0 :     sizeOfFft8 = shr_pos(sizeOfFft, 3);
    4314             : 
    4315           0 :     xb0  = L_shr_pos(x[0], 2);
    4316           0 :     xb1  = L_shr_pos(x[1], 2);
    4317           0 :     x[0] = L_add(xb0, xb1);
    4318           0 :     move32();
    4319           0 :     x[1] = L_sub(xb1, xb0);
    4320           0 :     move32();
    4321             : 
    4322           0 :     FOR (i = 1; i < sizeOfFft8; i++)
    4323             :     {
    4324           0 :         RFFT_TWIDDLE2(x, t1, t2, t3, t4, w32[2 * i + 1], w32[2 * i], xb0, xb1, xt0, xt1);
    4325           0 :         x[2 * i] = L_sub(t1, t3);
    4326           0 :         move32();
    4327           0 :         x[2 * i + 1] = L_sub(t4, t2);
    4328           0 :         move32();
    4329           0 :         x[sizeOfFft - 2 * i] = L_add(t1, t3);
    4330           0 :         move32();
    4331           0 :         x[sizeOfFft - 2 * i + 1] = L_add(t2, t4);
    4332           0 :         move32();
    4333             :     }
    4334             : 
    4335           0 :     FOR (i = sizeOfFft8; i < sizeOfFft4; i++)
    4336             :     {
    4337           0 :         RFFT_TWIDDLE2(x, t1, t2, t3, t4, w32[(2 * sizeOfFft4 - 2 * i)], w32[(2 * sizeOfFft4 - 2 * i + 1)], xb0, xb1,
    4338             :                       xt0, xt1);
    4339           0 :         x[2 * i] = L_sub(t1, t3);
    4340           0 :         move32();
    4341           0 :         x[2 * i + 1] = L_sub(t4, t2);
    4342           0 :         move32();
    4343           0 :         x[sizeOfFft - 2 * i] = L_add(t1, t3);
    4344           0 :         move32();
    4345           0 :         x[sizeOfFft - 2 * i + 1] = L_add(t2, t4);
    4346           0 :         move32();
    4347             :     }
    4348             : 
    4349           0 :     x[sizeOfFft - 2 * i] = L_shr_pos(x[2 * i + 0], 1);
    4350           0 :     move32();
    4351           0 :     x[sizeOfFft - 2 * i + 1] = L_shr_pos(x[2 * i + 1], 1);
    4352           0 :     move32();
    4353             : 
    4354           0 :     BASOP_cfft_lc3plus(&x[0], &x[1], sizeOfFft2, 2, scale, workBuffer);
    4355             : 
    4356             :     /* If you want BASOP_irfft to be inverse to BASOP_rfft then the result needs
    4357             :      * to be normalised by sizeOfFft */
    4358           0 :     FOR (i = 0; i < sizeOfFft2; i++)
    4359             :     {
    4360           0 :         x[2 * i + 1] = L_negate(x[2 * i + 1]);
    4361           0 :         move32();
    4362             :     }
    4363             : 
    4364           0 :     *scale = add(*scale, 2);
    4365           0 :     move16();
    4366             : 
    4367             :     Dyn_Mem_Deluxe_Out();
    4368           0 : }
    4369             : 
    4370             : 

Generated by: LCOV version 1.14