Line data Source code
1 : /******************************************************************************
2 : * ETSI TS 103 634 V1.5.1 *
3 : * Low Complexity Communication Codec Plus (LC3plus) *
4 : * *
5 : * Copyright licence is solely granted through ETSI Intellectual Property *
6 : * Rights Policy, 3rd April 2019. No patent licence is granted by implication, *
7 : * estoppel or otherwise. *
8 : ******************************************************************************/
9 :
10 : #include "functions.h"
11 : #include "rom_basop_util_lc3plus.h"
12 :
13 : #ifdef ENABLE_FFT_RESCALE
14 : #ifndef FFT_RESCALE_HR
15 : #define FFT_RESCALE_HR 3
16 : #endif
17 : #endif
18 :
19 : #define SCALEFACTORN2 3
20 : #define SCALEFACTOR4 3
21 : #define SCALEFACTOR5 4
22 : #define SCALEFACTOR8 4
23 : #define SCALEFACTOR15 5
24 : #define SCALEFACTOR30_1 5
25 : #define SCALEFACTOR30_2 1
26 : #define SCALEFACTOR32_1 5
27 : #define SCALEFACTOR32_2 1
28 :
29 : #ifdef ENABLE_HR_MODE
30 : #define Mpy_32_xx Mpy_32_32_lc3plus
31 : #else
32 : #define Mpy_32_xx Mpy_32_16_lc3plus
33 : #endif
34 :
35 : #define SCALEFACTOR6 4
36 : #define C61_32 (0x6ed9eba1)
37 :
38 : #define SCALEFACTOR10 5
39 : #define SCALEFACTOR16 5
40 : #define SCALEFACTOR20 5
41 : #define SCALEFACTOR30 6
42 : #define SCALEFACTOR32 6
43 : #define SCALEFACTOR40 7
44 : #define SCALEFACTOR48 8
45 : #define SCALEFACTOR60 7
46 : #define SCALEFACTOR64 7
47 : #define SCALEFACTOR80 8
48 : #define SCALEFACTOR90 9
49 : #define SCALEFACTOR96 9
50 : #define SCALEFACTOR120 8
51 : #define SCALEFACTOR128 8
52 : #define SCALEFACTOR160 8
53 : #define SCALEFACTOR180 10
54 : #define SCALEFACTOR192 10
55 : #define SCALEFACTOR240 9
56 : #define SCALEFACTOR256 9
57 : #define SCALEFACTOR384 11
58 :
59 : #ifdef ENABLE_HR_MODE
60 : #define SCALEFACTOR360 11
61 : #ifndef ENABLE_FFT_30X16
62 : #define SCALEFACTOR480 10
63 : #else
64 : #define SCALEFACTOR480 11
65 : #endif
66 : #endif
67 :
68 : #ifdef ENABLE_HR_MODE
69 : #undef L_shr_pos
70 : #define L_shr_pos(x, y) (L_shr(L_add(L_shr((x), ((y)-1)),1),1))
71 : #endif
72 :
73 : #ifdef ENABLE_HR_MODE
74 : #define FFTC(x) ((Word32)x)
75 : #else
76 : #define FFTC(x) WORD322WORD16((Word32)x)
77 : #endif
78 :
79 : #define C31 (FFTC(0x91261468)) /* FL2WORD32( -0.86602540) -sqrt(3)/2 */
80 :
81 : #define C51 (FFTC(0x79bc3854)) /* FL2WORD32( 0.95105652) */
82 : #define C52 (FFTC(0x9d839db0)) /* FL2WORD32(-1.53884180/2) */
83 : #define C53 (FFTC(0xd18053ce)) /* FL2WORD32(-0.36327126) */
84 : #define C54 (FFTC(0x478dde64)) /* FL2WORD32( 0.55901699) */
85 : #define C55 (FFTC(0xb0000001)) /* FL2WORD32(-1.25/2) */
86 :
87 : #define C81 (FFTC(0x5a82799a)) /* FL2WORD32( 7.071067811865475e-1) */
88 : #define C82 (FFTC(0xa57d8666)) /* FL2WORD32(-7.071067811865475e-1) */
89 :
90 : #define C161 (FFTC(0x5a82799a)) /* FL2WORD32( 7.071067811865475e-1) INV_SQRT2 */
91 : #define C162 (FFTC(0xa57d8666)) /* FL2WORD32(-7.071067811865475e-1) -INV_SQRT2 */
92 :
93 : #define C163 (FFTC(0x7641af3d)) /* FL2WORD32( 9.238795325112867e-1) COS_PI_DIV8 */
94 : #define C164 (FFTC(0x89be50c3)) /* FL2WORD32(-9.238795325112867e-1) -COS_PI_DIV8 */
95 :
96 : #define C165 (FFTC(0x30fbc54d)) /* FL2WORD32( 3.826834323650898e-1) COS_3PI_DIV8 */
97 : #define C166 (FFTC(0xcf043ab3)) /* FL2WORD32(-3.826834323650898e-1) -COS_3PI_DIV8 */
98 :
99 : #define C51_32 (0x79bc3854) /* FL2WORD32( 0.95105652) */
100 : #define C52_32 (0x9d839db0) /* FL2WORD32(-1.53884180/2) */
101 : #define C53_32 (0xd18053ce) /* FL2WORD32(-0.36327126) */
102 : #define C54_32 (0x478dde64) /* FL2WORD32( 0.55901699) */
103 : #define C55_32 (0xb0000001) /* FL2WORD32(-1.25/2) */
104 :
105 :
106 : #define C81_32 (0x5a82799a) /* FL2WORD32( 7.071067811865475e-1) */
107 : #define C82_32 (0xa57d8666) /* FL2WORD32(-7.071067811865475e-1) */
108 :
109 : #if defined(ENABLE_HR_MODE)
110 :
111 : #define cplxMpy4_16_0(re, im, a, b, c, d) \
112 : do \
113 : { \
114 : re = L_sub(Mpy_32_xx(a, c), Mpy_32_xx(b, d)); \
115 : move32(); \
116 : im = L_add(Mpy_32_xx(a, d), Mpy_32_xx(b, c)); \
117 : move32(); \
118 : } while (0)
119 :
120 : #define cplxMpy4_16_1(re, im, a, b) \
121 : do \
122 : { \
123 : re = a; \
124 : move32(); \
125 : im = b; \
126 : move32(); \
127 : } while (0)
128 :
129 : #endif
130 :
131 : #define Mpy3_0(s12, s13, s14, s15, t0, t1, t2, t3) \
132 : do \
133 : { \
134 : s12 = Mpy_32_32_lc3plus(L_add(t0, t2), C81_32); \
135 : s14 = Mpy_32_32_lc3plus(L_sub(t0, t2), C81_32); \
136 : s13 = Mpy_32_32_lc3plus(L_sub(t3, t1), C81_32); \
137 : s15 = Mpy_32_32_lc3plus(L_add(t1, t3), C82_32); \
138 : } while (0)
139 :
140 : #define cplxMpy3_0(a, b, c, d) \
141 : do \
142 : { \
143 : as = L_shr_pos(a, 1); \
144 : bs = L_shr_pos(b, 1); \
145 : a = L_sub(Mpy_32_32_lc3plus(as, c), Mpy_32_32_lc3plus(bs, d)); \
146 : b = L_add(Mpy_32_32_lc3plus(as, d), Mpy_32_32_lc3plus(bs, c)); \
147 : } while (0)
148 :
149 : #ifdef ENABLE_HR_MODE
150 : #define cplxMpy4_4_0(re, im, a, b, c, d) \
151 : re = L_shr_pos(L_sub(Mpy_32_xx(a, c), Mpy_32_xx(b, d)), SCALEFACTOR60 - SCALEFACTOR15); \
152 : im = L_shr_pos(L_add(Mpy_32_xx(a, d), Mpy_32_xx(b, c)), SCALEFACTOR60 - SCALEFACTOR15);
153 :
154 : #define cplxMpy4_4_1(re, im, a, b) \
155 : re = L_shr_pos(a, SCALEFACTOR60 - SCALEFACTOR15); \
156 : im = L_shr_pos(b, SCALEFACTOR60 - SCALEFACTOR15);
157 : #else
158 : #define cplxMpy4_4_0(re, im, a, b, c, d) \
159 : re = L_shr(L_sub(Mpy_32_xx(a, c), Mpy_32_xx(b, d)), SCALEFACTOR60 - SCALEFACTOR15); \
160 : im = L_shr(L_add(Mpy_32_xx(a, d), Mpy_32_xx(b, c)), SCALEFACTOR60 - SCALEFACTOR15);
161 :
162 : #define cplxMpy4_4_1(re, im, a, b) \
163 : re = L_shr(a, SCALEFACTOR60 - SCALEFACTOR15); \
164 : im = L_shr(b, SCALEFACTOR60 - SCALEFACTOR15);
165 : #endif
166 :
167 : #define cplxMpy4_8_0(re, im, a, b, c, d) \
168 : do \
169 : { \
170 : re = L_shr_pos(L_sub(Mpy_32_xx(a, c), Mpy_32_xx(b, d)), 1); \
171 : im = L_shr_pos(L_add(Mpy_32_xx(a, d), Mpy_32_xx(b, c)), 1); \
172 : } while (0)
173 :
174 :
175 : #define cplxMpy4_8_1(re, im, a, b) \
176 : do \
177 : { \
178 : re = L_shr_pos(a, 1); \
179 : im = L_shr_pos(b, 1); \
180 : } while (0)
181 :
182 :
183 : #define cplxMpy4_8_2(re, im, a, b, c, d) \
184 : do \
185 : { \
186 : re = L_shr_pos(L_add(Mpy_32_32_lc3plus(a, c), Mpy_32_32_lc3plus(b, d)), 1); \
187 : im = L_shr_pos(L_sub(Mpy_32_32_lc3plus(b, c), Mpy_32_32_lc3plus(a, d)), 1); \
188 : } while (0)
189 :
190 :
191 : #define cplxMpy4_12_0(re, im, a, b, c, d) \
192 : do \
193 : { \
194 : re = L_sub(Mpy_32_xx(a, c), Mpy_32_xx(b, d)); \
195 : move32(); \
196 : im = L_add(Mpy_32_xx(a, d), Mpy_32_xx(b, c)); \
197 : move32(); \
198 : } while (0)
199 :
200 : #define cplxMpy4_12_1(re, im, a, b) \
201 : do \
202 : { \
203 : re = a; \
204 : move32(); \
205 : im = b; \
206 : move32(); \
207 : } while (0)
208 :
209 :
210 0 : static void fft4(Word32 *x)
211 : {
212 : Dyn_Mem_Deluxe_In(Word32 x0, x1, x2, x3, x4, x5, x6, x7; Word32 t0, t1, t2, t3, t4, t5, t6, t7;);
213 :
214 0 : x0 = L_shr_pos(x[0], SCALEFACTOR4);
215 0 : x1 = L_shr_pos(x[1], SCALEFACTOR4);
216 0 : x2 = L_shr_pos(x[2], SCALEFACTOR4);
217 0 : x3 = L_shr_pos(x[3], SCALEFACTOR4);
218 0 : x4 = L_shr_pos(x[4], SCALEFACTOR4);
219 0 : x5 = L_shr_pos(x[5], SCALEFACTOR4);
220 0 : x6 = L_shr_pos(x[6], SCALEFACTOR4);
221 0 : x7 = L_shr_pos(x[7], SCALEFACTOR4);
222 :
223 : /* Pre-additions */
224 0 : t0 = L_add(x0, x4);
225 0 : t2 = L_sub(x0, x4);
226 0 : t1 = L_add(x1, x5);
227 0 : t3 = L_sub(x1, x5);
228 0 : t4 = L_add(x2, x6);
229 0 : t7 = L_sub(x2, x6);
230 0 : t5 = L_add(x7, x3);
231 0 : t6 = L_sub(x7, x3);
232 :
233 : /* Post-additions */
234 0 : x[0] = L_add(t0, t4);
235 0 : x[1] = L_add(t1, t5);
236 0 : x[2] = L_sub(t2, t6);
237 0 : x[3] = L_sub(t3, t7);
238 0 : x[4] = L_sub(t0, t4);
239 0 : x[5] = L_sub(t1, t5);
240 0 : x[6] = L_add(t2, t6);
241 0 : x[7] = L_add(t3, t7);
242 :
243 : Dyn_Mem_Deluxe_Out();
244 0 : }
245 :
246 : /**
247 : * \brief Function performs a complex 5-point FFT
248 : * The FFT is performed inplace. The result of the FFT
249 : * is scaled by SCALEFACTOR5 bits.
250 : *
251 : *
252 : * \param [i/o] re real input / output
253 : * \param [i/o] im imag input / output
254 : * \param [i ] s stride real and imag input / output
255 : *
256 : * \return void
257 : */
258 :
259 :
260 :
261 0 : static void fft5(Word32 *re, Word32 *im, Word16 s)
262 : {
263 : Dyn_Mem_Deluxe_In(Word32 x0, x1, x2, x3, x4; Word32 r1, r2, r3, r4; Word32 s1, s2, s3, s4; Word32 t;);
264 :
265 : /* real part */
266 0 : x0 = L_shr_pos(re[s * 0], SCALEFACTOR5);
267 0 : x1 = L_shr_pos(re[s * 1], SCALEFACTOR5);
268 0 : x2 = L_shr_pos(re[s * 2], SCALEFACTOR5);
269 0 : x3 = L_shr_pos(re[s * 3], SCALEFACTOR5);
270 0 : x4 = L_shr_pos(re[s * 4], SCALEFACTOR5);
271 :
272 0 : r1 = L_add(x1, x4);
273 0 : r4 = L_sub(x1, x4);
274 0 : r3 = L_add(x2, x3);
275 0 : r2 = L_sub(x2, x3);
276 0 : t = Mpy_32_32_lc3plus(L_sub(r1, r3), C54_32);
277 0 : r1 = L_add(r1, r3);
278 0 : re[0] = L_add(x0, r1);
279 0 : move32();
280 : /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
281 : the values as fracts */
282 0 : r1 = L_add(re[0], (L_shl_pos(Mpy_32_32_lc3plus(r1, C55_32), 1)));
283 0 : r3 = L_sub(r1, t);
284 0 : r1 = L_add(r1, t);
285 0 : t = Mpy_32_32_lc3plus(L_add(r4, r2), C51_32);
286 : /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
287 : the values as fracts */
288 0 : r4 = L_add(t, L_shl_pos(Mpy_32_32_lc3plus(r4, C52_32), 1));
289 0 : r2 = L_add(t, Mpy_32_32_lc3plus(r2, C53_32));
290 :
291 : /* imaginary part */
292 0 : x0 = L_shr_pos(im[s * 0], SCALEFACTOR5);
293 0 : x1 = L_shr_pos(im[s * 1], SCALEFACTOR5);
294 0 : x2 = L_shr_pos(im[s * 2], SCALEFACTOR5);
295 0 : x3 = L_shr_pos(im[s * 3], SCALEFACTOR5);
296 0 : x4 = L_shr_pos(im[s * 4], SCALEFACTOR5);
297 :
298 0 : s1 = L_add(x1, x4);
299 0 : s4 = L_sub(x1, x4);
300 0 : s3 = L_add(x2, x3);
301 0 : s2 = L_sub(x2, x3);
302 0 : t = Mpy_32_32_lc3plus(L_sub(s1, s3), C54_32);
303 0 : s1 = L_add(s1, s3);
304 0 : im[0] = L_add(x0, s1);
305 0 : move32();
306 : /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
307 : the values as fracts */
308 0 : s1 = L_add(im[0], L_shl_pos(Mpy_32_32_lc3plus(s1, C55_32), 1));
309 0 : s3 = L_sub(s1, t);
310 0 : s1 = L_add(s1, t);
311 0 : t = Mpy_32_32_lc3plus(L_add(s4, s2), C51_32);
312 : /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
313 : the values as fracts */
314 0 : s4 = L_add(t, L_shl_pos(Mpy_32_32_lc3plus(s4, C52_32), 1));
315 0 : s2 = L_add(t, Mpy_32_32_lc3plus(s2, C53_32));
316 :
317 : /* combination */
318 0 : re[s * 1] = L_add(r1, s2);
319 0 : move32();
320 0 : re[s * 4] = L_sub(r1, s2);
321 0 : move32();
322 0 : re[s * 2] = L_sub(r3, s4);
323 0 : move32();
324 0 : re[s * 3] = L_add(r3, s4);
325 0 : move32();
326 :
327 0 : im[s * 1] = L_sub(s1, r2);
328 0 : move32();
329 0 : im[s * 4] = L_add(s1, r2);
330 0 : move32();
331 0 : im[s * 2] = L_add(s3, r4);
332 0 : move32();
333 0 : im[s * 3] = L_sub(s3, r4);
334 0 : move32();
335 :
336 : Dyn_Mem_Deluxe_Out();
337 0 : }
338 :
339 : /**
340 : * \brief Function performs a complex 6-point FFT
341 : * The FFT is performed inplace. The result of the FFT
342 : * is scaled by SCALEFACTOR6 bits.
343 : *
344 : *
345 : * \param [i/o] re real input / output
346 : * \param [i/o] im imag input / output
347 : * \param [i ] st stride real and imag input / output
348 : *
349 : * \return void
350 : */
351 :
352 0 : static void fft6(Word32 *re, Word32 *im, Word16 st)
353 : {
354 : Dyn_Mem_Deluxe_In(Word32 x0, x1, x2, x3, x4, x5; Word32 r1o, r2o, i1e, i2e, i1o, i2o; Word32 t, s;);
355 :
356 : /* process real parts */
357 :
358 0 : x0 = L_shr_pos(re[0 * st], SCALEFACTOR6);
359 0 : x1 = L_shr_pos(re[1 * st], SCALEFACTOR6);
360 0 : x2 = L_shr_pos(re[2 * st], SCALEFACTOR6);
361 0 : x3 = L_shr_pos(re[3 * st], SCALEFACTOR6);
362 0 : x4 = L_shr_pos(re[4 * st], SCALEFACTOR6);
363 0 : x5 = L_shr_pos(re[5 * st], SCALEFACTOR6);
364 :
365 0 : t = L_add(x0, L_add(x2, x4));
366 0 : s = L_add(x1, L_add(x3, x5));
367 0 : re[0 * st] = L_add(t, s);
368 0 : move32();
369 0 : re[3 * st] = L_sub(t, s);
370 0 : move32();
371 0 : t = L_sub(x0, L_shr_pos(L_add(x2, x4), 1));
372 :
373 0 : re[1 * st] = t;
374 0 : move32();
375 0 : re[2 * st] = t;
376 0 : move32();
377 0 : re[4 * st] = t;
378 0 : move32();
379 0 : re[5 * st] = t;
380 0 : move32();
381 :
382 0 : s = Mpy_32_32_lc3plus(L_sub(x4, x2), C61_32);
383 :
384 0 : i1e = s;
385 0 : i2e = -s;
386 :
387 0 : t = L_sub(x1, L_shr_pos(L_add(x3, x5), 1));
388 0 : s = Mpy_32_32_lc3plus(L_sub(x5, x3), C61_32);
389 :
390 0 : r1o = r2o = t;
391 0 : i1o = s;
392 0 : i2o = -s;
393 :
394 0 : x0 = L_shr_pos(im[0 * st], SCALEFACTOR6);
395 0 : x1 = L_shr_pos(im[1 * st], SCALEFACTOR6);
396 0 : x2 = L_shr_pos(im[2 * st], SCALEFACTOR6);
397 0 : x3 = L_shr_pos(im[3 * st], SCALEFACTOR6);
398 0 : x4 = L_shr_pos(im[4 * st], SCALEFACTOR6);
399 0 : x5 = L_shr_pos(im[5 * st], SCALEFACTOR6);
400 :
401 0 : t = L_add(x0, L_add(x2, x4));
402 0 : s = L_add(x1, L_add(x3, x5));
403 :
404 0 : im[0 * st] = L_add(t, s);
405 0 : move32();
406 0 : im[3 * st] = L_sub(t, s);
407 0 : move32();
408 :
409 0 : t = Mpy_32_32_lc3plus(L_sub(x2, x4), C61_32);
410 0 : s = L_sub(x0, L_shr_pos(L_add(x2, x4), 1));
411 :
412 0 : re[1 * st] = L_add(re[1 * st], t);
413 0 : move32();
414 0 : re[2 * st] = L_sub(re[2 * st], t);
415 0 : move32();
416 0 : re[4 * st] = L_add(re[4 * st], t);
417 0 : move32();
418 0 : re[5 * st] = L_sub(re[5 * st], t);
419 0 : move32();
420 :
421 0 : i1e = L_add(i1e, s);
422 0 : i2e = L_add(i2e, s);
423 :
424 0 : t = Mpy_32_32_lc3plus(L_sub(x3, x5), C61_32);
425 0 : s = L_sub(x1, L_shr_pos(L_add(x5, x3), 1));
426 :
427 0 : r1o = L_add(r1o, t);
428 0 : r2o = L_sub(r2o, t);
429 :
430 0 : i1o = L_add(i1o, s);
431 0 : i2o = L_add(i2o, s);
432 :
433 0 : t = L_add(L_shr_pos(r1o, 1), Mpy_32_32_lc3plus(i1o, C61_32));
434 0 : s = L_sub(L_shr_pos(i1o, 1), Mpy_32_32_lc3plus(r1o, C61_32));
435 :
436 0 : re[1 * st] = L_add(re[1 * st], t);
437 0 : move32();
438 0 : im[1 * st] = L_add(i1e, s);
439 0 : move32();
440 :
441 0 : re[4 * st] = L_sub(re[4 * st], t);
442 0 : move32();
443 0 : im[4 * st] = L_sub(i1e, s);
444 0 : move32();
445 :
446 0 : t = L_sub(Mpy_32_32_lc3plus(i2o, C61_32), L_shr_pos(r2o, 1));
447 0 : s = L_negate(L_add(Mpy_32_32_lc3plus(r2o, C61_32), L_shr_pos(i2o, 1)));
448 :
449 0 : re[2 * st] = L_add(re[2 * st], t);
450 0 : move32();
451 0 : im[2 * st] = L_add(i2e, s);
452 0 : move32();
453 :
454 0 : re[5 * st] = L_sub(re[5 * st], t);
455 0 : move32();
456 0 : im[5 * st] = L_sub(i2e, s);
457 0 : move32();
458 :
459 : Dyn_Mem_Deluxe_Out();
460 0 : }
461 :
462 : /**
463 : * \brief Function performs a complex 8-point FFT
464 : * The FFT is performed inplace. The result of the FFT
465 : * is scaled by SCALEFACTOR8 bits.
466 : *
467 : * WOPS with 32x16 bit multiplications: 108 cycles
468 : *
469 : * \param [i/o] re real input / output
470 : * \param [i/o] im imag input / output
471 : * \param [i ] s stride real and imag input / output
472 : *
473 : * \return void
474 : */
475 :
476 :
477 0 : static void fft8(Word32 *re, Word32 *im, Word16 s)
478 : {
479 : Dyn_Mem_Deluxe_In(Word32 x00, x01, x02, x03, x04, x05, x06, x07; Word32 x08, x09, x10, x11, x12, x13, x14, x15;
480 : Word32 t00, t01, t02, t03, t04, t05, t06, t07; Word32 t08, t09, t10, t11, t12, t13, t14, t15;
481 : Word32 s00, s01, s02, s03, s04, s05, s06, s07; Word32 s08, s09, s10, s11, s12, s13, s14, s15;);
482 :
483 : /* Pre-additions */
484 :
485 0 : x00 = L_shr_pos(re[s * 0], SCALEFACTOR8);
486 0 : x01 = L_shr_pos(im[s * 0], SCALEFACTOR8);
487 0 : x02 = L_shr_pos(re[s * 1], SCALEFACTOR8);
488 0 : x03 = L_shr_pos(im[s * 1], SCALEFACTOR8);
489 0 : x04 = L_shr_pos(re[s * 2], SCALEFACTOR8);
490 0 : x05 = L_shr_pos(im[s * 2], SCALEFACTOR8);
491 0 : x06 = L_shr_pos(re[s * 3], SCALEFACTOR8);
492 0 : x07 = L_shr_pos(im[s * 3], SCALEFACTOR8);
493 0 : x08 = L_shr_pos(re[s * 4], SCALEFACTOR8);
494 0 : x09 = L_shr_pos(im[s * 4], SCALEFACTOR8);
495 0 : x10 = L_shr_pos(re[s * 5], SCALEFACTOR8);
496 0 : x11 = L_shr_pos(im[s * 5], SCALEFACTOR8);
497 0 : x12 = L_shr_pos(re[s * 6], SCALEFACTOR8);
498 0 : x13 = L_shr_pos(im[s * 6], SCALEFACTOR8);
499 0 : x14 = L_shr_pos(re[s * 7], SCALEFACTOR8);
500 0 : x15 = L_shr_pos(im[s * 7], SCALEFACTOR8);
501 :
502 0 : t00 = L_add(x00, x08);
503 0 : t02 = L_sub(x00, x08);
504 0 : t01 = L_add(x01, x09);
505 0 : t03 = L_sub(x01, x09);
506 0 : t04 = L_add(x02, x10);
507 0 : t06 = L_sub(x02, x10);
508 0 : t05 = L_add(x03, x11);
509 0 : t07 = L_sub(x03, x11);
510 0 : t08 = L_add(x04, x12);
511 0 : t10 = L_sub(x04, x12);
512 0 : t09 = L_add(x05, x13);
513 0 : t11 = L_sub(x05, x13);
514 0 : t12 = L_add(x06, x14);
515 0 : t14 = L_sub(x06, x14);
516 0 : t13 = L_add(x07, x15);
517 0 : t15 = L_sub(x07, x15);
518 :
519 : /* Pre-additions and core multiplications */
520 :
521 0 : s00 = L_add(t00, t08);
522 0 : s04 = L_sub(t00, t08);
523 0 : s01 = L_add(t01, t09);
524 0 : s05 = L_sub(t01, t09);
525 0 : s08 = L_sub(t02, t11);
526 0 : s10 = L_add(t02, t11);
527 0 : s09 = L_add(t03, t10);
528 0 : s11 = L_sub(t03, t10);
529 0 : s02 = L_add(t04, t12);
530 0 : s07 = L_sub(t04, t12);
531 0 : s03 = L_add(t05, t13);
532 0 : s06 = L_sub(t13, t05);
533 :
534 0 : t01 = L_add(t06, t14);
535 0 : t02 = L_sub(t06, t14);
536 0 : t00 = L_add(t07, t15);
537 0 : t03 = L_sub(t07, t15);
538 :
539 0 : s12 = Mpy_32_xx(L_add(t00, t02), C81);
540 0 : s14 = Mpy_32_xx(L_sub(t00, t02), C81);
541 0 : s13 = Mpy_32_xx(L_sub(t03, t01), C81);
542 0 : s15 = Mpy_32_xx(L_add(t01, t03), C82);
543 :
544 : /* Post-additions */
545 :
546 0 : re[s * 0] = L_add(s00, s02);
547 0 : move32();
548 0 : re[s * 4] = L_sub(s00, s02);
549 0 : move32();
550 0 : im[s * 0] = L_add(s01, s03);
551 0 : move32();
552 0 : im[s * 4] = L_sub(s01, s03);
553 0 : move32();
554 0 : re[s * 2] = L_sub(s04, s06);
555 0 : move32();
556 0 : re[s * 6] = L_add(s04, s06);
557 0 : move32();
558 0 : im[s * 2] = L_sub(s05, s07);
559 0 : move32();
560 0 : im[s * 6] = L_add(s05, s07);
561 0 : move32();
562 0 : re[s * 3] = L_add(s08, s14);
563 0 : move32();
564 0 : re[s * 7] = L_sub(s08, s14);
565 0 : move32();
566 0 : im[s * 3] = L_add(s09, s15);
567 0 : move32();
568 0 : im[s * 7] = L_sub(s09, s15);
569 0 : move32();
570 0 : re[s * 1] = L_add(s10, s12);
571 0 : move32();
572 0 : re[s * 5] = L_sub(s10, s12);
573 0 : move32();
574 0 : im[s * 1] = L_add(s11, s13);
575 0 : move32();
576 0 : im[s * 5] = L_sub(s11, s13);
577 0 : move32();
578 :
579 : Dyn_Mem_Deluxe_Out();
580 0 : }
581 :
582 : /**
583 : * \brief Function performs a complex 10-point FFT
584 : * The FFT is performed inplace. The result of the FFT
585 : * is scaled by SCALEFACTOR10 bits.
586 : *
587 : * WOPS with 32x16 bit multiplications: 196 cycles
588 : *
589 : * \param [i/o] re real input / output
590 : * \param [i/o] im imag input / output
591 : * \param [i ] s stride real and imag input / output
592 : *
593 : * \return void
594 : */
595 :
596 :
597 :
598 0 : static void fft10(Word32 *re, Word32 *im, Word16 s)
599 : {
600 : Dyn_Mem_Deluxe_In(Word32 t; Word32 x0, x1, x2, x3, x4; Word32 r1, r2, r3, r4; Word32 s1, s2, s3, s4;
601 : Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
602 : Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;);
603 :
604 : /* 2 fft5 stages */
605 :
606 : /* real part */
607 0 : x0 = L_shr_pos(re[s * 0], SCALEFACTOR10);
608 0 : x1 = L_shr_pos(re[s * 2], SCALEFACTOR10);
609 0 : x2 = L_shr_pos(re[s * 4], SCALEFACTOR10);
610 0 : x3 = L_shr_pos(re[s * 6], SCALEFACTOR10);
611 0 : x4 = L_shr_pos(re[s * 8], SCALEFACTOR10);
612 :
613 0 : r1 = L_add(x3, x2);
614 0 : r4 = L_sub(x3, x2);
615 0 : r3 = L_add(x1, x4);
616 0 : r2 = L_sub(x1, x4);
617 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
618 0 : r1 = L_add(r1, r3);
619 0 : y00 = L_add(x0, r1);
620 0 : r1 = L_add(y00, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
621 0 : r3 = L_sub(r1, t);
622 0 : r1 = L_add(r1, t);
623 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
624 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
625 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
626 :
627 : /* imaginary part */
628 0 : x0 = L_shr_pos(im[s * 0], SCALEFACTOR10);
629 0 : x1 = L_shr_pos(im[s * 2], SCALEFACTOR10);
630 0 : x2 = L_shr_pos(im[s * 4], SCALEFACTOR10);
631 0 : x3 = L_shr_pos(im[s * 6], SCALEFACTOR10);
632 0 : x4 = L_shr_pos(im[s * 8], SCALEFACTOR10);
633 :
634 0 : s1 = L_add(x3, x2);
635 0 : s4 = L_sub(x3, x2);
636 0 : s3 = L_add(x1, x4);
637 0 : s2 = L_sub(x1, x4);
638 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
639 0 : s1 = L_add(s1, s3);
640 0 : y01 = L_add(x0, s1);
641 0 : s1 = L_add(y01, L_shl_pos(Mpy_32_xx(s1, C55), 1));
642 0 : s3 = L_sub(s1, t);
643 0 : s1 = L_add(s1, t);
644 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
645 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
646 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
647 :
648 : /* combination */
649 0 : y04 = L_add(r1, s2);
650 0 : y16 = L_sub(r1, s2);
651 0 : y08 = L_sub(r3, s4);
652 0 : y12 = L_add(r3, s4);
653 :
654 0 : y05 = L_sub(s1, r2);
655 0 : y17 = L_add(s1, r2);
656 0 : y09 = L_add(s3, r4);
657 0 : y13 = L_sub(s3, r4);
658 :
659 : /* real part */
660 0 : x0 = L_shr_pos(re[s * 5], SCALEFACTOR10);
661 0 : x1 = L_shr_pos(re[s * 1], SCALEFACTOR10);
662 0 : x2 = L_shr_pos(re[s * 3], SCALEFACTOR10);
663 0 : x3 = L_shr_pos(re[s * 7], SCALEFACTOR10);
664 0 : x4 = L_shr_pos(re[s * 9], SCALEFACTOR10);
665 :
666 0 : r1 = L_add(x1, x4);
667 0 : r4 = L_sub(x1, x4);
668 0 : r3 = L_add(x3, x2);
669 0 : r2 = L_sub(x3, x2);
670 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
671 0 : r1 = L_add(r1, r3);
672 0 : y02 = L_add(x0, r1);
673 0 : r1 = L_add(y02, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
674 0 : r3 = L_sub(r1, t);
675 0 : r1 = L_add(r1, t);
676 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
677 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
678 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
679 :
680 : /* imaginary part */
681 0 : x0 = L_shr_pos(im[s * 5], SCALEFACTOR10);
682 0 : x1 = L_shr_pos(im[s * 1], SCALEFACTOR10);
683 0 : x2 = L_shr_pos(im[s * 3], SCALEFACTOR10);
684 0 : x3 = L_shr_pos(im[s * 7], SCALEFACTOR10);
685 0 : x4 = L_shr_pos(im[s * 9], SCALEFACTOR10);
686 :
687 0 : s1 = L_add(x1, x4);
688 0 : s4 = L_sub(x1, x4);
689 0 : s3 = L_add(x3, x2);
690 0 : s2 = L_sub(x3, x2);
691 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
692 0 : s1 = L_add(s1, s3);
693 0 : y03 = L_add(x0, s1);
694 0 : s1 = L_add(y03, L_shl_pos(Mpy_32_xx(s1, C55), 1));
695 0 : s3 = L_sub(s1, t);
696 0 : s1 = L_add(s1, t);
697 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
698 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
699 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
700 :
701 : /* combination */
702 0 : y06 = L_add(r1, s2);
703 0 : y18 = L_sub(r1, s2);
704 0 : y10 = L_sub(r3, s4);
705 0 : y14 = L_add(r3, s4);
706 :
707 0 : y07 = L_sub(s1, r2);
708 0 : y19 = L_add(s1, r2);
709 0 : y11 = L_add(s3, r4);
710 0 : y15 = L_sub(s3, r4);
711 :
712 : /* 5 fft2 stages */
713 0 : re[s * 0] = L_add(y00, y02);
714 0 : move32();
715 0 : im[s * 0] = L_add(y01, y03);
716 0 : move32();
717 0 : re[s * 5] = L_sub(y00, y02);
718 0 : move32();
719 0 : im[s * 5] = L_sub(y01, y03);
720 0 : move32();
721 :
722 0 : re[s * 2] = L_add(y04, y06);
723 0 : move32();
724 0 : im[s * 2] = L_add(y05, y07);
725 0 : move32();
726 0 : re[s * 7] = L_sub(y04, y06);
727 0 : move32();
728 0 : im[s * 7] = L_sub(y05, y07);
729 0 : move32();
730 :
731 0 : re[s * 4] = L_add(y08, y10);
732 0 : move32();
733 0 : im[s * 4] = L_add(y09, y11);
734 0 : move32();
735 0 : re[s * 9] = L_sub(y08, y10);
736 0 : move32();
737 0 : im[s * 9] = L_sub(y09, y11);
738 0 : move32();
739 :
740 0 : re[s * 6] = L_add(y12, y14);
741 0 : move32();
742 0 : im[s * 6] = L_add(y13, y15);
743 0 : move32();
744 0 : re[s * 1] = L_sub(y12, y14);
745 0 : move32();
746 0 : im[s * 1] = L_sub(y13, y15);
747 0 : move32();
748 :
749 0 : re[s * 8] = L_add(y16, y18);
750 0 : move32();
751 0 : im[s * 8] = L_add(y17, y19);
752 0 : move32();
753 0 : re[s * 3] = L_sub(y16, y18);
754 0 : move32();
755 0 : im[s * 3] = L_sub(y17, y19);
756 0 : move32();
757 :
758 : Dyn_Mem_Deluxe_Out();
759 0 : }
760 :
761 : /**
762 : * \brief Function performs a complex 15-point FFT
763 : * The FFT is performed inplace. The result of the FFT
764 : * is scaled by SCALEFACTOR15 bits.
765 : *
766 : * WOPS with 32x16 bit multiplications: 354 cycles
767 : *
768 : * \param [i/o] re real input / output
769 : * \param [i/o] im imag input / output
770 : * \param [i ] s stride real and imag input / output
771 : *
772 : * \return void
773 : */
774 :
775 :
776 0 : static void fft15(Word32 *re, Word32 *im, Word16 s)
777 : {
778 : Dyn_Mem_Deluxe_In(Word32 t; Word32 r1, r2, r3, r4; Word32 s1, s2, s3, s4;
779 : Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09;
780 : Word32 x10, x11, x12, x13, x14, x15, x16, x17, x18, x19;
781 : Word32 x20, x21, x22, x23, x24, x25, x26, x27, x28, x29;
782 : Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
783 : Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
784 : Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;);
785 :
786 0 : x00 = L_shr_pos(re[s * 0], SCALEFACTOR15);
787 0 : x01 = L_shr_pos(im[s * 0], SCALEFACTOR15);
788 0 : x02 = L_shr_pos(re[s * 3], SCALEFACTOR15);
789 0 : x03 = L_shr_pos(im[s * 3], SCALEFACTOR15);
790 0 : x04 = L_shr_pos(re[s * 6], SCALEFACTOR15);
791 0 : x05 = L_shr_pos(im[s * 6], SCALEFACTOR15);
792 0 : x06 = L_shr_pos(re[s * 9], SCALEFACTOR15);
793 0 : x07 = L_shr_pos(im[s * 9], SCALEFACTOR15);
794 0 : x08 = L_shr_pos(re[s * 12], SCALEFACTOR15);
795 0 : x09 = L_shr_pos(im[s * 12], SCALEFACTOR15);
796 :
797 0 : x10 = L_shr_pos(re[s * 5], SCALEFACTOR15);
798 0 : x11 = L_shr_pos(im[s * 5], SCALEFACTOR15);
799 0 : x12 = L_shr_pos(re[s * 8], SCALEFACTOR15);
800 0 : x13 = L_shr_pos(im[s * 8], SCALEFACTOR15);
801 0 : x14 = L_shr_pos(re[s * 11], SCALEFACTOR15);
802 0 : x15 = L_shr_pos(im[s * 11], SCALEFACTOR15);
803 0 : x16 = L_shr_pos(re[s * 14], SCALEFACTOR15);
804 0 : x17 = L_shr_pos(im[s * 14], SCALEFACTOR15);
805 0 : x18 = L_shr_pos(re[s * 2], SCALEFACTOR15);
806 0 : x19 = L_shr_pos(im[s * 2], SCALEFACTOR15);
807 :
808 0 : x20 = L_shr_pos(re[s * 10], SCALEFACTOR15);
809 0 : x21 = L_shr_pos(im[s * 10], SCALEFACTOR15);
810 0 : x22 = L_shr_pos(re[s * 13], SCALEFACTOR15);
811 0 : x23 = L_shr_pos(im[s * 13], SCALEFACTOR15);
812 0 : x24 = L_shr_pos(re[s * 1], SCALEFACTOR15);
813 0 : x25 = L_shr_pos(im[s * 1], SCALEFACTOR15);
814 0 : x26 = L_shr_pos(re[s * 4], SCALEFACTOR15);
815 0 : x27 = L_shr_pos(im[s * 4], SCALEFACTOR15);
816 0 : x28 = L_shr_pos(re[s * 7], SCALEFACTOR15);
817 0 : x29 = L_shr_pos(im[s * 7], SCALEFACTOR15);
818 :
819 : /* 1. FFT5 stage */
820 :
821 : /* real part */
822 0 : r1 = L_add(x02, x08);
823 0 : r4 = L_sub(x02, x08);
824 0 : r3 = L_add(x04, x06);
825 0 : r2 = L_sub(x04, x06);
826 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
827 0 : r1 = L_add(r1, r3);
828 0 : y00 = L_add(x00, r1);
829 0 : r1 = L_add(y00, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
830 0 : r3 = L_sub(r1, t);
831 0 : r1 = L_add(r1, t);
832 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
833 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
834 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
835 :
836 : /* imaginary part */
837 0 : s1 = L_add(x03, x09);
838 0 : s4 = L_sub(x03, x09);
839 0 : s3 = L_add(x05, x07);
840 0 : s2 = L_sub(x05, x07);
841 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
842 0 : s1 = L_add(s1, s3);
843 0 : y01 = L_add(x01, s1);
844 0 : s1 = L_add(y01, L_shl_pos(Mpy_32_xx(s1, C55), 1));
845 0 : s3 = L_sub(s1, t);
846 0 : s1 = L_add(s1, t);
847 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
848 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
849 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
850 :
851 : /* combination */
852 0 : y02 = L_add(r1, s2);
853 0 : y08 = L_sub(r1, s2);
854 0 : y04 = L_sub(r3, s4);
855 0 : y06 = L_add(r3, s4);
856 :
857 0 : y03 = L_sub(s1, r2);
858 0 : y09 = L_add(s1, r2);
859 0 : y05 = L_add(s3, r4);
860 0 : y07 = L_sub(s3, r4);
861 :
862 : /* 2. FFT5 stage */
863 :
864 : /* real part */
865 0 : r1 = L_add(x12, x18);
866 0 : r4 = L_sub(x12, x18);
867 0 : r3 = L_add(x14, x16);
868 0 : r2 = L_sub(x14, x16);
869 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
870 0 : r1 = L_add(r1, r3);
871 0 : y10 = L_add(x10, r1);
872 0 : r1 = L_add(y10, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
873 0 : r3 = L_sub(r1, t);
874 0 : r1 = L_add(r1, t);
875 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
876 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
877 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
878 :
879 : /* imaginary part */
880 0 : s1 = L_add(x13, x19);
881 0 : s4 = L_sub(x13, x19);
882 0 : s3 = L_add(x15, x17);
883 0 : s2 = L_sub(x15, x17);
884 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
885 0 : s1 = L_add(s1, s3);
886 0 : y11 = L_add(x11, s1);
887 0 : s1 = L_add(y11, L_shl_pos(Mpy_32_xx(s1, C55), 1));
888 0 : s3 = L_sub(s1, t);
889 0 : s1 = L_add(s1, t);
890 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
891 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
892 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
893 :
894 : /* combination */
895 0 : y12 = L_add(r1, s2);
896 0 : y18 = L_sub(r1, s2);
897 0 : y14 = L_sub(r3, s4);
898 0 : y16 = L_add(r3, s4);
899 :
900 0 : y13 = L_sub(s1, r2);
901 0 : y19 = L_add(s1, r2);
902 0 : y15 = L_add(s3, r4);
903 0 : y17 = L_sub(s3, r4);
904 :
905 : /* 3. FFT5 stage */
906 :
907 : /* real part */
908 0 : r1 = L_add(x22, x28);
909 0 : r4 = L_sub(x22, x28);
910 0 : r3 = L_add(x24, x26);
911 0 : r2 = L_sub(x24, x26);
912 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
913 0 : r1 = L_add(r1, r3);
914 0 : y20 = L_add(x20, r1);
915 0 : r1 = L_add(y20, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
916 0 : r3 = L_sub(r1, t);
917 0 : r1 = L_add(r1, t);
918 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
919 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
920 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
921 :
922 : /* imaginary part */
923 0 : s1 = L_add(x23, x29);
924 0 : s4 = L_sub(x23, x29);
925 0 : s3 = L_add(x25, x27);
926 0 : s2 = L_sub(x25, x27);
927 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
928 0 : s1 = L_add(s1, s3);
929 0 : y21 = L_add(x21, s1);
930 0 : s1 = L_add(y21, L_shl_pos(Mpy_32_xx(s1, C55), 1));
931 0 : s3 = L_sub(s1, t);
932 0 : s1 = L_add(s1, t);
933 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
934 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
935 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
936 :
937 : /* combination */
938 0 : y22 = L_add(r1, s2);
939 0 : y28 = L_sub(r1, s2);
940 0 : y24 = L_sub(r3, s4);
941 0 : y26 = L_add(r3, s4);
942 :
943 0 : y23 = L_sub(s1, r2);
944 0 : y29 = L_add(s1, r2);
945 0 : y25 = L_add(s3, r4);
946 0 : y27 = L_sub(s3, r4);
947 :
948 : /* 1. FFT3 stage */
949 :
950 : /* real part */
951 0 : r1 = L_add(y10, y20);
952 0 : r2 = Mpy_32_xx(L_sub(y10, y20), C31);
953 0 : re[s * 0] = L_add(y00, r1);
954 0 : move32();
955 0 : r1 = L_sub(y00, L_shr_pos(r1, 1));
956 :
957 : /* imaginary part */
958 0 : s1 = L_add(y11, y21);
959 0 : s2 = Mpy_32_xx(L_sub(y11, y21), C31);
960 0 : im[s * 0] = L_add(y01, s1);
961 0 : move32();
962 0 : s1 = L_sub(y01, L_shr_pos(s1, 1));
963 :
964 : /* combination */
965 0 : re[s * 10] = L_sub(r1, s2);
966 0 : move32();
967 0 : re[s * 5] = L_add(r1, s2);
968 0 : move32();
969 0 : im[s * 10] = L_add(s1, r2);
970 0 : move32();
971 0 : im[s * 5] = L_sub(s1, r2);
972 0 : move32();
973 :
974 : /* 2. FFT3 stage */
975 :
976 : /* real part */
977 0 : r1 = L_add(y12, y22);
978 0 : r2 = Mpy_32_xx(L_sub(y12, y22), C31);
979 0 : re[s * 6] = L_add(y02, r1);
980 0 : move32();
981 0 : r1 = L_sub(y02, L_shr_pos(r1, 1));
982 :
983 : /* imaginary part */
984 0 : s1 = L_add(y13, y23);
985 0 : s2 = Mpy_32_xx(L_sub(y13, y23), C31);
986 0 : im[s * 6] = L_add(y03, s1);
987 0 : move32();
988 0 : s1 = L_sub(y03, L_shr_pos(s1, 1));
989 :
990 : /* combination */
991 0 : re[s * 1] = L_sub(r1, s2);
992 0 : move32();
993 0 : re[s * 11] = L_add(r1, s2);
994 0 : move32();
995 0 : im[s * 1] = L_add(s1, r2);
996 0 : move32();
997 0 : im[s * 11] = L_sub(s1, r2);
998 0 : move32();
999 :
1000 : /* 3. FFT3 stage */
1001 :
1002 : /* real part */
1003 0 : r1 = L_add(y14, y24);
1004 0 : r2 = Mpy_32_xx(L_sub(y14, y24), C31);
1005 0 : re[s * 12] = L_add(y04, r1);
1006 0 : move32();
1007 0 : r1 = L_sub(y04, L_shr_pos(r1, 1));
1008 :
1009 : /* imaginary part */
1010 0 : s1 = L_add(y15, y25);
1011 0 : s2 = Mpy_32_xx(L_sub(y15, y25), C31);
1012 0 : im[s * 12] = L_add(y05, s1);
1013 0 : move32();
1014 0 : s1 = L_sub(y05, L_shr_pos(s1, 1));
1015 :
1016 : /* combination */
1017 0 : re[s * 7] = L_sub(r1, s2);
1018 0 : move32();
1019 0 : re[s * 2] = L_add(r1, s2);
1020 0 : move32();
1021 0 : im[s * 7] = L_add(s1, r2);
1022 0 : move32();
1023 0 : im[s * 2] = L_sub(s1, r2);
1024 0 : move32();
1025 :
1026 : /* 4. FFT3 stage */
1027 :
1028 : /* real part */
1029 0 : r1 = L_add(y16, y26);
1030 0 : r2 = Mpy_32_xx(L_sub(y16, y26), C31);
1031 0 : re[s * 3] = L_add(y06, r1);
1032 0 : move32();
1033 0 : r1 = L_sub(y06, L_shr_pos(r1, 1));
1034 :
1035 : /* imaginary part */
1036 0 : s1 = L_add(y17, y27);
1037 0 : s2 = Mpy_32_xx(L_sub(y17, y27), C31);
1038 0 : im[s * 3] = L_add(y07, s1);
1039 0 : move32();
1040 0 : s1 = L_sub(y07, L_shr_pos(s1, 1));
1041 :
1042 : /* combination */
1043 0 : re[s * 13] = L_sub(r1, s2);
1044 0 : move32();
1045 0 : re[s * 8] = L_add(r1, s2);
1046 0 : move32();
1047 0 : im[s * 13] = L_add(s1, r2);
1048 0 : move32();
1049 0 : im[s * 8] = L_sub(s1, r2);
1050 0 : move32();
1051 :
1052 : /* 5. FFT3 stage */
1053 :
1054 : /* real part */
1055 0 : r1 = L_add(y18, y28);
1056 0 : r2 = Mpy_32_xx(L_sub(y18, y28), C31);
1057 0 : re[s * 9] = L_add(y08, r1);
1058 0 : move32();
1059 0 : r1 = L_sub(y08, L_shr_pos(r1, 1));
1060 :
1061 : /* imaginary part */
1062 0 : s1 = L_add(y19, y29);
1063 0 : s2 = Mpy_32_xx(L_sub(y19, y29), C31);
1064 0 : im[s * 9] = L_add(y09, s1);
1065 0 : move32();
1066 0 : s1 = L_sub(y09, L_shr_pos(s1, 1));
1067 :
1068 : /* combination */
1069 0 : re[s * 4] = L_sub(r1, s2);
1070 0 : move32();
1071 0 : re[s * 14] = L_add(r1, s2);
1072 0 : move32();
1073 0 : im[s * 4] = L_add(s1, r2);
1074 0 : move32();
1075 0 : im[s * 14] = L_sub(s1, r2);
1076 0 : move32();
1077 :
1078 : Dyn_Mem_Deluxe_Out();
1079 0 : }
1080 :
1081 : #define STC(x) (x)
1082 : const Word32 RotVectorReal12[] =
1083 : {
1084 : STC(0x6ed9eba1), STC(0x40000000),
1085 : STC(0x40000000), STC(0xc0000000),
1086 : #ifndef FFT12_UNROLLED_ENABLE
1087 : STC(0x00000000), STC(0x80000000),
1088 : #endif
1089 : };
1090 :
1091 : const Word32 RotVectorImag12[] =
1092 : {
1093 : STC(0x40000000), STC(0x6ed9eba1),
1094 : STC(0x6ed9eba1), STC(0x6ed9eba1),
1095 : #ifndef FFT12_UNROLLED_ENABLE
1096 : STC(0x7fffffff), STC(0x00000000),
1097 : #endif
1098 : };
1099 :
1100 0 : static void fft12(Word32 *pInput)
1101 : {
1102 : Dyn_Mem_Deluxe_In(Word32 aDst[24]; Word32 * pSrc, *pDst; Counter i; Word32 r1, r2, s1, s2, pD; Word32 re, im;
1103 : Word32 vre, vim;);
1104 :
1105 0 : pSrc = pInput;
1106 0 : move16();
1107 0 : pDst = aDst;
1108 0 : move16();
1109 :
1110 : /* First 3*2 samples are shifted right by 2 before output */
1111 0 : r1 = L_add(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2));
1112 0 : r2 = Mpy_32_xx(L_sub(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2)), C31);
1113 0 : pD = L_shr_pos(pSrc[0], 2);
1114 0 : pDst[0] = L_shr_pos(L_add(pD, r1), 1);
1115 0 : r1 = L_sub(pD, L_shr_pos(r1, 1));
1116 :
1117 : /* imaginary part */
1118 0 : s1 = L_add(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2));
1119 0 : s2 = Mpy_32_xx(L_sub(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2)), C31);
1120 0 : pD = L_shr_pos(pSrc[1], 2);
1121 0 : pDst[1] = L_shr_pos(L_add(pD, s1), 1);
1122 0 : s1 = L_sub(pD, L_shr_pos(s1, 1));
1123 :
1124 0 : r1 = L_shr_pos(r1, 1);
1125 0 : r2 = L_shr_pos(r2, 1);
1126 0 : s1 = L_shr_pos(s1, 1);
1127 0 : s2 = L_shr_pos(s2, 1);
1128 :
1129 : /* combination */
1130 0 : pDst[2] = L_sub(r1, s2);
1131 0 : pDst[3] = L_add(s1, r2);
1132 0 : pDst[4] = L_add(r1, s2);
1133 0 : pDst[5] = L_sub(s1, r2);
1134 0 : pSrc += 2;
1135 0 : pDst += 6;
1136 :
1137 0 : const Word32 *pVecRe = RotVectorReal12;
1138 0 : const Word32 *pVecIm = RotVectorImag12;
1139 :
1140 :
1141 :
1142 0 : FOR (i = 0; i < 2; i++)
1143 : {
1144 : /* sample 0,1 are shifted right by 2 before output */
1145 : /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before output */
1146 :
1147 0 : r1 = L_add(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2));
1148 0 : r2 = Mpy_32_xx(L_sub(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2)), C31);
1149 0 : pD = L_shr_pos(pSrc[0], 2);
1150 0 : pDst[0] = L_shr_pos(L_add(pD, r1), 1);
1151 0 : r1 = L_sub(pD, L_shr_pos(r1, 1));
1152 :
1153 : /* imaginary part */
1154 0 : s1 = L_add(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2));
1155 0 : s2 = Mpy_32_xx(L_sub(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2)), C31);
1156 0 : pD = L_shr_pos(pSrc[1], 2);
1157 0 : pDst[1] = L_shr_pos(L_add(pD, s1), 1);
1158 0 : s1 = L_sub(pD, L_shr_pos(s1, 1));
1159 :
1160 0 : r1 = L_shr_pos(r1, 1);
1161 0 : r2 = L_shr_pos(r2, 1);
1162 0 : s1 = L_shr_pos(s1, 1);
1163 0 : s2 = L_shr_pos(s2, 1);
1164 :
1165 : /* combination */
1166 0 : re = L_sub(r1, s2);
1167 0 : im = L_add(s1, r2);
1168 0 : vre = *pVecRe++;
1169 0 : vim = *pVecIm++;
1170 0 : cplxMpy_32_32(&pDst[3], &pDst[2], im, re, vre, vim);
1171 :
1172 0 : re = L_add(r1, s2);
1173 0 : im = L_sub(s1, r2);
1174 :
1175 0 : vre = *pVecRe++;
1176 0 : vim = *pVecIm++;
1177 0 : cplxMpy_32_32(&pDst[5], &pDst[4], im, re, vre, vim);
1178 :
1179 0 : pDst += 6;
1180 0 : pSrc += 2;
1181 : }
1182 : /* sample 0,1 are shifted right by 2 before output */
1183 : /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
1184 : /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
1185 0 : r1 = L_add(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2));
1186 0 : r2 = Mpy_32_xx(L_sub(L_shr_pos(pSrc[8], 2), L_shr_pos(pSrc[16], 2)), C31);
1187 0 : pD = L_shr_pos(pSrc[0], 2);
1188 0 : pDst[0] = L_shr_pos(L_add(pD, r1), 1);
1189 0 : r1 = L_sub(pD, L_shr_pos(r1, 1));
1190 :
1191 : /* imaginary part */
1192 0 : s1 = L_add(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2));
1193 0 : s2 = Mpy_32_xx(L_sub(L_shr_pos(pSrc[9], 2), L_shr_pos(pSrc[17], 2)), C31);
1194 0 : pD = L_shr_pos(pSrc[1], 2);
1195 0 : pDst[1] = L_shr_pos(L_add(pD, s1), 1);
1196 0 : s1 = L_sub(pD, L_shr_pos(s1, 1));
1197 :
1198 0 : r1 = L_shr_pos(r1, 1);
1199 0 : r2 = L_shr_pos(r2, 1);
1200 0 : s1 = L_shr_pos(s1, 1);
1201 0 : s2 = L_shr_pos(s2, 1);
1202 :
1203 : /* combination */
1204 :
1205 0 : pDst[2] = L_add(s1, r2);
1206 0 : move32();
1207 0 : pDst[3] = L_sub(s2, r1);
1208 0 : move32();
1209 0 : pDst[4] = L_negate(L_add(r1, s2));
1210 0 : move32();
1211 0 : pDst[5] = L_sub(r2, s1);
1212 0 : move32();
1213 : /* Perform 3 times the fft of length 4. The input samples are at the address of aDst and the
1214 : output samples are at the address of pInput. The input vector for the fft of length 4 is built
1215 : of the interleaved samples in aDst, the output samples are stored consecutively at the address
1216 : of pInput.
1217 : */
1218 0 : move16();
1219 0 : move16();
1220 0 : pSrc = aDst;
1221 0 : pDst = pInput;
1222 0 : FOR (i = 0; i < 3; i++)
1223 : {
1224 : /* inline FFT4 merged with incoming resorting loop */
1225 0 : r1 = L_add(L_shr_pos(pSrc[0], 2), L_shr_pos(pSrc[12], 2)); /* Re A + Re B */
1226 0 : r2 = L_add(L_shr_pos(pSrc[6], 2), L_shr_pos(pSrc[18], 2)); /* Re C + Re D */
1227 0 : s1 = L_add(L_shr_pos(pSrc[1], 2), L_shr_pos(pSrc[13], 2)); /* Im A + Im B */
1228 0 : s2 = L_add(L_shr_pos(pSrc[7], 2), L_shr_pos(pSrc[19], 2)); /* Im C + Im D */
1229 :
1230 0 : pDst[0] = L_add(r1, r2); /* Re A' = Re A + Re B + Re C + Re D */
1231 0 : pDst[1] = L_add(s1, s2); /* Im A' = Im A + Im B + Im C + Im D */
1232 :
1233 0 : re = L_sub(r1, L_shr_pos(pSrc[12], 1)); /* Re A - Re B */
1234 0 : im = L_sub(s1, L_shr_pos(pSrc[13], 1)); /* Im A - Im B */
1235 :
1236 0 : pDst[12] = L_sub(r1, r2); /* Re C' = Re A + Re B - Re C - Re D */
1237 0 : pDst[13] = L_sub(s1, s2); /* Im C' = Im A + Im B - Im C - Im D */
1238 :
1239 0 : r2 = L_sub(r2, L_shr_pos(pSrc[18], 1)); /* Re C - Re D */
1240 0 : s2 = L_sub(s2, L_shr_pos(pSrc[19], 1)); /* Im C - Im D */
1241 :
1242 0 : pDst[6] = L_add(re, s2); /* Re B' = Re A - Re B + Im C - Im D */
1243 0 : pDst[18] = L_sub(re, s2); /* Re D' = Re A - Re B - Im C + Im D */
1244 0 : pDst[7] = L_sub(im, r2); /* Im B' = Im A - Im B - Re C + Re D */
1245 0 : pDst[19] = L_add(im, r2); /* Im D' = Im A - Im B + Re C - Re D */
1246 :
1247 0 : pSrc += 2;
1248 0 : pDst += 2;
1249 : }
1250 :
1251 : Dyn_Mem_Deluxe_Out();
1252 0 : }
1253 :
1254 : /**
1255 : * \brief Function performs a complex 16-point FFT
1256 : * The FFT is performed inplace. The result of the FFT
1257 : * is scaled by SCALEFACTOR16 bits.
1258 : *
1259 : * WOPS with 32x16 bit multiplications (scale on ): 288 cycles
1260 : * WOPS with 32x16 bit multiplications (scale off): 256 cycles
1261 : *
1262 : * \param [i/o] re real input / output
1263 : * \param [i/o] im imag input / output
1264 : * \param [i ] s stride real and imag input / output
1265 : *
1266 : * \return void
1267 : */
1268 :
1269 :
1270 :
1271 0 : static void fft16(Word32 *re, Word32 *im, Word16 s)
1272 : {
1273 : Dyn_Mem_Deluxe_In(Word32 x0, x1, x2, x3, x4, x5, x6, x7; Word32 t0, t1, t2, t3, t4, t5, t6, t7;
1274 : Word32 y00, y01, y02, y03, y04, y05, y06, y07; Word32 y08, y09, y10, y11, y12, y13, y14, y15;
1275 : Word32 y16, y17, y18, y19, y20, y21, y22, y23; Word32 y24, y25, y26, y27, y28, y29, y30, y31;);
1276 :
1277 0 : x0 = L_shr_pos(re[s * 0], SCALEFACTOR16);
1278 0 : x1 = L_shr_pos(im[s * 0], SCALEFACTOR16);
1279 0 : x2 = L_shr_pos(re[s * 4], SCALEFACTOR16);
1280 0 : x3 = L_shr_pos(im[s * 4], SCALEFACTOR16);
1281 0 : x4 = L_shr_pos(re[s * 8], SCALEFACTOR16);
1282 0 : x5 = L_shr_pos(im[s * 8], SCALEFACTOR16);
1283 0 : x6 = L_shr_pos(re[s * 12], SCALEFACTOR16);
1284 0 : x7 = L_shr_pos(im[s * 12], SCALEFACTOR16);
1285 :
1286 : /* Pre-additions */
1287 0 : t0 = L_add(x0, x4);
1288 0 : t2 = L_sub(x0, x4);
1289 0 : t1 = L_add(x1, x5);
1290 0 : t3 = L_sub(x1, x5);
1291 0 : t4 = L_add(x2, x6);
1292 0 : t7 = L_sub(x2, x6);
1293 0 : t5 = L_add(x7, x3);
1294 0 : t6 = L_sub(x7, x3);
1295 :
1296 : /* Post-additions */
1297 0 : y00 = L_add(t0, t4);
1298 0 : y01 = L_add(t1, t5);
1299 0 : y02 = L_sub(t2, t6);
1300 0 : y03 = L_sub(t3, t7);
1301 0 : y04 = L_sub(t0, t4);
1302 0 : y05 = L_sub(t1, t5);
1303 0 : y06 = L_add(t2, t6);
1304 0 : y07 = L_add(t3, t7);
1305 :
1306 0 : x0 = L_shr_pos(re[s * 1], SCALEFACTOR16);
1307 0 : x1 = L_shr_pos(im[s * 1], SCALEFACTOR16);
1308 0 : x2 = L_shr_pos(re[s * 5], SCALEFACTOR16);
1309 0 : x3 = L_shr_pos(im[s * 5], SCALEFACTOR16);
1310 0 : x4 = L_shr_pos(re[s * 9], SCALEFACTOR16);
1311 0 : x5 = L_shr_pos(im[s * 9], SCALEFACTOR16);
1312 0 : x6 = L_shr_pos(re[s * 13], SCALEFACTOR16);
1313 0 : x7 = L_shr_pos(im[s * 13], SCALEFACTOR16);
1314 :
1315 : /* Pre-additions */
1316 0 : t0 = L_add(x0, x4);
1317 0 : t2 = L_sub(x0, x4);
1318 0 : t1 = L_add(x1, x5);
1319 0 : t3 = L_sub(x1, x5);
1320 0 : t4 = L_add(x2, x6);
1321 0 : t7 = L_sub(x2, x6);
1322 0 : t5 = L_add(x7, x3);
1323 0 : t6 = L_sub(x7, x3);
1324 :
1325 : /* Post-additions */
1326 0 : y08 = L_add(t0, t4);
1327 0 : y09 = L_add(t1, t5);
1328 0 : y10 = L_sub(t2, t6);
1329 0 : y11 = L_sub(t3, t7);
1330 0 : y12 = L_sub(t0, t4);
1331 0 : y13 = L_sub(t1, t5);
1332 0 : y14 = L_add(t2, t6);
1333 0 : y15 = L_add(t3, t7);
1334 :
1335 0 : x0 = L_shr_pos(re[s * 2], SCALEFACTOR16);
1336 0 : x1 = L_shr_pos(im[s * 2], SCALEFACTOR16);
1337 0 : x2 = L_shr_pos(re[s * 6], SCALEFACTOR16);
1338 0 : x3 = L_shr_pos(im[s * 6], SCALEFACTOR16);
1339 0 : x4 = L_shr_pos(re[s * 10], SCALEFACTOR16);
1340 0 : x5 = L_shr_pos(im[s * 10], SCALEFACTOR16);
1341 0 : x6 = L_shr_pos(re[s * 14], SCALEFACTOR16);
1342 0 : x7 = L_shr_pos(im[s * 14], SCALEFACTOR16);
1343 :
1344 : /* Pre-additions */
1345 0 : t0 = L_add(x0, x4);
1346 0 : t2 = L_sub(x0, x4);
1347 0 : t1 = L_add(x1, x5);
1348 0 : t3 = L_sub(x1, x5);
1349 0 : t4 = L_add(x2, x6);
1350 0 : t7 = L_sub(x2, x6);
1351 0 : t5 = L_add(x7, x3);
1352 0 : t6 = L_sub(x7, x3);
1353 :
1354 : /* Post-additions */
1355 0 : y16 = L_add(t0, t4);
1356 0 : y17 = L_add(t1, t5);
1357 0 : y18 = L_sub(t2, t6);
1358 0 : y19 = L_sub(t3, t7);
1359 0 : y20 = L_sub(t1, t5);
1360 0 : y21 = L_sub(t4, t0);
1361 0 : y22 = L_add(t2, t6);
1362 0 : y23 = L_add(t3, t7);
1363 :
1364 0 : x0 = L_shr_pos(re[s * 3], SCALEFACTOR16);
1365 0 : x1 = L_shr_pos(im[s * 3], SCALEFACTOR16);
1366 0 : x2 = L_shr_pos(re[s * 7], SCALEFACTOR16);
1367 0 : x3 = L_shr_pos(im[s * 7], SCALEFACTOR16);
1368 0 : x4 = L_shr_pos(re[s * 11], SCALEFACTOR16);
1369 0 : x5 = L_shr_pos(im[s * 11], SCALEFACTOR16);
1370 0 : x6 = L_shr_pos(re[s * 15], SCALEFACTOR16);
1371 0 : x7 = L_shr_pos(im[s * 15], SCALEFACTOR16);
1372 :
1373 : /* Pre-additions */
1374 0 : t0 = L_add(x0, x4);
1375 0 : t2 = L_sub(x0, x4);
1376 0 : t1 = L_add(x1, x5);
1377 0 : t3 = L_sub(x1, x5);
1378 0 : t4 = L_add(x2, x6);
1379 0 : t7 = L_sub(x2, x6);
1380 0 : t5 = L_add(x7, x3);
1381 0 : t6 = L_sub(x7, x3);
1382 :
1383 : /* Post-additions */
1384 0 : y24 = L_add(t0, t4);
1385 0 : y25 = L_add(t1, t5);
1386 0 : y26 = L_sub(t2, t6);
1387 0 : y27 = L_sub(t3, t7);
1388 0 : y28 = L_sub(t0, t4);
1389 0 : y29 = L_sub(t1, t5);
1390 0 : y30 = L_add(t2, t6);
1391 0 : y31 = L_add(t3, t7);
1392 :
1393 : /* rotation */
1394 :
1395 0 : x0 = Mpy_32_xx(y22, C162);
1396 0 : x1 = Mpy_32_xx(y23, C162);
1397 0 : y22 = L_sub(x0, x1);
1398 0 : y23 = L_add(x0, x1);
1399 :
1400 0 : x0 = Mpy_32_xx(y28, C162);
1401 0 : x1 = Mpy_32_xx(y29, C162);
1402 0 : y28 = L_sub(x0, x1);
1403 0 : y29 = L_add(x0, x1);
1404 :
1405 0 : x0 = Mpy_32_xx(y12, C161);
1406 0 : x1 = Mpy_32_xx(y13, C161);
1407 0 : y12 = L_add(x0, x1);
1408 0 : y13 = L_sub(x1, x0);
1409 :
1410 0 : x0 = Mpy_32_xx(y18, C161);
1411 0 : x1 = Mpy_32_xx(y19, C161);
1412 0 : y18 = L_add(x0, x1);
1413 0 : y19 = L_sub(x1, x0);
1414 :
1415 0 : x0 = Mpy_32_xx(y10, C163);
1416 0 : x1 = Mpy_32_xx(y11, C166);
1417 0 : x2 = Mpy_32_xx(y10, C166);
1418 0 : x3 = Mpy_32_xx(y11, C163);
1419 0 : y10 = L_sub(x0, x1);
1420 0 : y11 = L_add(x2, x3);
1421 :
1422 0 : x0 = Mpy_32_xx(y14, C165);
1423 0 : x1 = Mpy_32_xx(y15, C164);
1424 0 : x2 = Mpy_32_xx(y14, C164);
1425 0 : x3 = Mpy_32_xx(y15, C165);
1426 0 : y14 = L_sub(x0, x1);
1427 0 : y15 = L_add(x2, x3);
1428 :
1429 0 : x0 = Mpy_32_xx(y26, C165);
1430 0 : x1 = Mpy_32_xx(y27, C164);
1431 0 : x2 = Mpy_32_xx(y26, C164);
1432 0 : x3 = Mpy_32_xx(y27, C165);
1433 0 : y26 = L_sub(x0, x1);
1434 0 : y27 = L_add(x2, x3);
1435 :
1436 0 : x0 = Mpy_32_xx(y30, C164);
1437 0 : x1 = Mpy_32_xx(y31, C165);
1438 0 : x2 = Mpy_32_xx(y30, C165);
1439 0 : x3 = Mpy_32_xx(y31, C164);
1440 0 : y30 = L_sub(x0, x1);
1441 0 : y31 = L_add(x2, x3);
1442 :
1443 : /* Pre-additions */
1444 :
1445 0 : t0 = L_add(y00, y16);
1446 0 : t2 = L_sub(y00, y16);
1447 0 : t1 = L_add(y01, y17);
1448 0 : t3 = L_sub(y01, y17);
1449 0 : t4 = L_add(y08, y24);
1450 0 : t7 = L_sub(y08, y24);
1451 0 : t5 = L_add(y25, y09);
1452 0 : t6 = L_sub(y25, y09);
1453 :
1454 : /* Post-additions */
1455 :
1456 0 : re[s * 0] = L_add(t0, t4);
1457 0 : move32();
1458 0 : im[s * 0] = L_add(t1, t5);
1459 0 : move32();
1460 0 : re[s * 4] = L_sub(t2, t6);
1461 0 : move32();
1462 0 : im[s * 4] = L_sub(t3, t7);
1463 0 : move32();
1464 0 : re[s * 8] = L_sub(t0, t4);
1465 0 : move32();
1466 0 : im[s * 8] = L_sub(t1, t5);
1467 0 : move32();
1468 0 : re[s * 12] = L_add(t2, t6);
1469 0 : move32();
1470 0 : im[s * 12] = L_add(t3, t7);
1471 0 : move32();
1472 :
1473 : /* Pre-additions */
1474 :
1475 0 : t0 = L_add(y02, y18);
1476 0 : t2 = L_sub(y02, y18);
1477 0 : t1 = L_add(y03, y19);
1478 0 : t3 = L_sub(y03, y19);
1479 0 : t4 = L_add(y10, y26);
1480 0 : t7 = L_sub(y10, y26);
1481 0 : t5 = L_add(y27, y11);
1482 0 : t6 = L_sub(y27, y11);
1483 :
1484 : /* Post-additions */
1485 :
1486 0 : re[s * 1] = L_add(t0, t4);
1487 0 : move32();
1488 0 : im[s * 1] = L_add(t1, t5);
1489 0 : move32();
1490 0 : re[s * 5] = L_sub(t2, t6);
1491 0 : move32();
1492 0 : im[s * 5] = L_sub(t3, t7);
1493 0 : move32();
1494 0 : re[s * 9] = L_sub(t0, t4);
1495 0 : move32();
1496 0 : im[s * 9] = L_sub(t1, t5);
1497 0 : move32();
1498 0 : re[s * 13] = L_add(t2, t6);
1499 0 : move32();
1500 0 : im[s * 13] = L_add(t3, t7);
1501 0 : move32();
1502 :
1503 : /* Pre-additions */
1504 :
1505 0 : t0 = L_add(y04, y20);
1506 0 : t2 = L_sub(y04, y20);
1507 0 : t1 = L_add(y05, y21);
1508 0 : t3 = L_sub(y05, y21);
1509 0 : t4 = L_add(y12, y28);
1510 0 : t7 = L_sub(y12, y28);
1511 0 : t5 = L_add(y29, y13);
1512 0 : t6 = L_sub(y29, y13);
1513 :
1514 : /* Post-additions */
1515 :
1516 0 : re[s * 2] = L_add(t0, t4);
1517 0 : move32();
1518 0 : im[s * 2] = L_add(t1, t5);
1519 0 : move32();
1520 0 : re[s * 6] = L_sub(t2, t6);
1521 0 : move32();
1522 0 : im[s * 6] = L_sub(t3, t7);
1523 0 : move32();
1524 0 : re[s * 10] = L_sub(t0, t4);
1525 0 : move32();
1526 0 : im[s * 10] = L_sub(t1, t5);
1527 0 : move32();
1528 0 : re[s * 14] = L_add(t2, t6);
1529 0 : move32();
1530 0 : im[s * 14] = L_add(t3, t7);
1531 0 : move32();
1532 :
1533 : /* Pre-additions */
1534 :
1535 0 : t0 = L_add(y06, y22);
1536 0 : t2 = L_sub(y06, y22);
1537 0 : t1 = L_add(y07, y23);
1538 0 : t3 = L_sub(y07, y23);
1539 0 : t4 = L_add(y14, y30);
1540 0 : t7 = L_sub(y14, y30);
1541 0 : t5 = L_add(y31, y15);
1542 0 : t6 = L_sub(y31, y15);
1543 :
1544 : /* Post-additions */
1545 :
1546 0 : re[s * 3] = L_add(t0, t4);
1547 0 : move32();
1548 0 : im[s * 3] = L_add(t1, t5);
1549 0 : move32();
1550 0 : re[s * 7] = L_sub(t2, t6);
1551 0 : move32();
1552 0 : im[s * 7] = L_sub(t3, t7);
1553 0 : move32();
1554 0 : re[s * 11] = L_sub(t0, t4);
1555 0 : move32();
1556 0 : im[s * 11] = L_sub(t1, t5);
1557 0 : move32();
1558 0 : re[s * 15] = L_add(t2, t6);
1559 0 : move32();
1560 0 : im[s * 15] = L_add(t3, t7);
1561 0 : move32();
1562 :
1563 : Dyn_Mem_Deluxe_Out();
1564 0 : }
1565 :
1566 : /**
1567 : * \brief Function performs a complex 20-point FFT
1568 : * The FFT is performed inplace. The result of the FFT
1569 : * is scaled by SCALEFACTOR20 bits.
1570 : *
1571 : * WOPS with 32x16 bit multiplications: 432 cycles
1572 : *
1573 : * \param [i/o] re real input / output
1574 : * \param [i/o] im imag input / output
1575 : * \param [i ] s stride real and imag input / output
1576 : *
1577 : * \return void
1578 : */
1579 :
1580 :
1581 0 : static void fft20(Word32 *re, Word32 *im, Word16 s)
1582 : {
1583 : Dyn_Mem_Deluxe_In(Word32 r1, r2, r3, r4; Word32 s1, s2, s3, s4; Word32 x0, x1, x2, x3, x4;
1584 : Word32 t, t0, t1, t2, t3, t4, t5, t6, t7; Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
1585 : Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
1586 : Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
1587 : Word32 y30, y31, y32, y33, y34, y35, y36, y37, y38, y39;);
1588 :
1589 : /* 1. FFT5 stage */
1590 :
1591 : /* real part */
1592 0 : x0 = L_shr_pos(re[s * 0], SCALEFACTOR20);
1593 0 : x1 = L_shr_pos(re[s * 16], SCALEFACTOR20);
1594 0 : x2 = L_shr_pos(re[s * 12], SCALEFACTOR20);
1595 0 : x3 = L_shr_pos(re[s * 8], SCALEFACTOR20);
1596 0 : x4 = L_shr_pos(re[s * 4], SCALEFACTOR20);
1597 :
1598 0 : r1 = L_add(x1, x4);
1599 0 : r4 = L_sub(x1, x4);
1600 0 : r3 = L_add(x2, x3);
1601 0 : r2 = L_sub(x2, x3);
1602 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
1603 0 : r1 = L_add(r1, r3);
1604 0 : y00 = L_add(x0, r1);
1605 0 : r1 = L_add(y00, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
1606 0 : r3 = L_sub(r1, t);
1607 0 : r1 = L_add(r1, t);
1608 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
1609 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
1610 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
1611 :
1612 : /* imaginary part */
1613 0 : x0 = L_shr_pos(im[s * 0], SCALEFACTOR20);
1614 0 : x1 = L_shr_pos(im[s * 16], SCALEFACTOR20);
1615 0 : x2 = L_shr_pos(im[s * 12], SCALEFACTOR20);
1616 0 : x3 = L_shr_pos(im[s * 8], SCALEFACTOR20);
1617 0 : x4 = L_shr_pos(im[s * 4], SCALEFACTOR20);
1618 :
1619 0 : s1 = L_add(x1, x4);
1620 0 : s4 = L_sub(x1, x4);
1621 0 : s3 = L_add(x2, x3);
1622 0 : s2 = L_sub(x2, x3);
1623 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
1624 0 : s1 = L_add(s1, s3);
1625 0 : y01 = L_add(x0, s1);
1626 0 : s1 = L_add(y01, L_shl_pos(Mpy_32_xx(s1, C55), 1));
1627 0 : s3 = L_sub(s1, t);
1628 0 : s1 = L_add(s1, t);
1629 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
1630 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
1631 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
1632 :
1633 : /* combination */
1634 0 : y08 = L_add(r1, s2);
1635 0 : y32 = L_sub(r1, s2);
1636 0 : y16 = L_sub(r3, s4);
1637 0 : y24 = L_add(r3, s4);
1638 :
1639 0 : y09 = L_sub(s1, r2);
1640 0 : y33 = L_add(s1, r2);
1641 0 : y17 = L_add(s3, r4);
1642 0 : y25 = L_sub(s3, r4);
1643 :
1644 : /* 2. FFT5 stage */
1645 :
1646 : /* real part */
1647 0 : x0 = L_shr_pos(re[s * 5], SCALEFACTOR20);
1648 0 : x1 = L_shr_pos(re[s * 1], SCALEFACTOR20);
1649 0 : x2 = L_shr_pos(re[s * 17], SCALEFACTOR20);
1650 0 : x3 = L_shr_pos(re[s * 13], SCALEFACTOR20);
1651 0 : x4 = L_shr_pos(re[s * 9], SCALEFACTOR20);
1652 :
1653 0 : r1 = L_add(x1, x4);
1654 0 : r4 = L_sub(x1, x4);
1655 0 : r3 = L_add(x2, x3);
1656 0 : r2 = L_sub(x2, x3);
1657 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
1658 0 : r1 = L_add(r1, r3);
1659 0 : y02 = L_add(x0, r1);
1660 0 : r1 = L_add(y02, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
1661 0 : r3 = L_sub(r1, t);
1662 0 : r1 = L_add(r1, t);
1663 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
1664 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
1665 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
1666 :
1667 : /* imaginary part */
1668 0 : x0 = L_shr_pos(im[s * 5], SCALEFACTOR20);
1669 0 : x1 = L_shr_pos(im[s * 1], SCALEFACTOR20);
1670 0 : x2 = L_shr_pos(im[s * 17], SCALEFACTOR20);
1671 0 : x3 = L_shr_pos(im[s * 13], SCALEFACTOR20);
1672 0 : x4 = L_shr_pos(im[s * 9], SCALEFACTOR20);
1673 :
1674 0 : s1 = L_add(x1, x4);
1675 0 : s4 = L_sub(x1, x4);
1676 0 : s3 = L_add(x2, x3);
1677 0 : s2 = L_sub(x2, x3);
1678 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
1679 0 : s1 = L_add(s1, s3);
1680 0 : y03 = L_add(x0, s1);
1681 0 : s1 = L_add(y03, L_shl_pos(Mpy_32_xx(s1, C55), 1));
1682 0 : s3 = L_sub(s1, t);
1683 0 : s1 = L_add(s1, t);
1684 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
1685 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
1686 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
1687 :
1688 : /* combination */
1689 0 : y10 = L_add(r1, s2);
1690 0 : y34 = L_sub(r1, s2);
1691 0 : y18 = L_sub(r3, s4);
1692 0 : y26 = L_add(r3, s4);
1693 :
1694 0 : y11 = L_sub(s1, r2);
1695 0 : y35 = L_add(s1, r2);
1696 0 : y19 = L_add(s3, r4);
1697 0 : y27 = L_sub(s3, r4);
1698 :
1699 : /* 3. FFT5 stage */
1700 :
1701 : /* real part */
1702 0 : x0 = L_shr_pos(re[s * 10], SCALEFACTOR20);
1703 0 : x1 = L_shr_pos(re[s * 6], SCALEFACTOR20);
1704 0 : x2 = L_shr_pos(re[s * 2], SCALEFACTOR20);
1705 0 : x3 = L_shr_pos(re[s * 18], SCALEFACTOR20);
1706 0 : x4 = L_shr_pos(re[s * 14], SCALEFACTOR20);
1707 :
1708 0 : r1 = L_add(x1, x4);
1709 0 : r4 = L_sub(x1, x4);
1710 0 : r3 = L_add(x2, x3);
1711 0 : r2 = L_sub(x2, x3);
1712 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
1713 0 : r1 = L_add(r1, r3);
1714 0 : y04 = L_add(x0, r1);
1715 0 : r1 = L_add(y04, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
1716 0 : r3 = L_sub(r1, t);
1717 0 : r1 = L_add(r1, t);
1718 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
1719 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
1720 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
1721 :
1722 : /* imaginary part */
1723 0 : x0 = L_shr_pos(im[s * 10], SCALEFACTOR20);
1724 0 : x1 = L_shr_pos(im[s * 6], SCALEFACTOR20);
1725 0 : x2 = L_shr_pos(im[s * 2], SCALEFACTOR20);
1726 0 : x3 = L_shr_pos(im[s * 18], SCALEFACTOR20);
1727 0 : x4 = L_shr_pos(im[s * 14], SCALEFACTOR20);
1728 :
1729 0 : s1 = L_add(x1, x4);
1730 0 : s4 = L_sub(x1, x4);
1731 0 : s3 = L_add(x2, x3);
1732 0 : s2 = L_sub(x2, x3);
1733 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
1734 0 : s1 = L_add(s1, s3);
1735 0 : y05 = L_add(x0, s1);
1736 0 : s1 = L_add(y05, L_shl_pos(Mpy_32_xx(s1, C55), 1));
1737 0 : s3 = L_sub(s1, t);
1738 0 : s1 = L_add(s1, t);
1739 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
1740 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
1741 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
1742 :
1743 : /* combination */
1744 0 : y12 = L_add(r1, s2);
1745 0 : y36 = L_sub(r1, s2);
1746 0 : y20 = L_sub(r3, s4);
1747 0 : y28 = L_add(r3, s4);
1748 :
1749 0 : y13 = L_sub(s1, r2);
1750 0 : y37 = L_add(s1, r2);
1751 0 : y21 = L_add(s3, r4);
1752 0 : y29 = L_sub(s3, r4);
1753 :
1754 : /* 4. FFT5 stage */
1755 :
1756 : /* real part */
1757 0 : x0 = L_shr_pos(re[s * 15], SCALEFACTOR20);
1758 0 : x1 = L_shr_pos(re[s * 11], SCALEFACTOR20);
1759 0 : x2 = L_shr_pos(re[s * 7], SCALEFACTOR20);
1760 0 : x3 = L_shr_pos(re[s * 3], SCALEFACTOR20);
1761 0 : x4 = L_shr_pos(re[s * 19], SCALEFACTOR20);
1762 :
1763 0 : r1 = L_add(x1, x4);
1764 0 : r4 = L_sub(x1, x4);
1765 0 : r3 = L_add(x2, x3);
1766 0 : r2 = L_sub(x2, x3);
1767 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
1768 0 : r1 = L_add(r1, r3);
1769 0 : y06 = L_add(x0, r1);
1770 0 : r1 = L_add(y06, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
1771 0 : r3 = L_sub(r1, t);
1772 0 : r1 = L_add(r1, t);
1773 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
1774 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
1775 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
1776 :
1777 : /* imaginary part */
1778 0 : x0 = L_shr_pos(im[s * 15], SCALEFACTOR20);
1779 0 : x1 = L_shr_pos(im[s * 11], SCALEFACTOR20);
1780 0 : x2 = L_shr_pos(im[s * 7], SCALEFACTOR20);
1781 0 : x3 = L_shr_pos(im[s * 3], SCALEFACTOR20);
1782 0 : x4 = L_shr_pos(im[s * 19], SCALEFACTOR20);
1783 :
1784 0 : s1 = L_add(x1, x4);
1785 0 : s4 = L_sub(x1, x4);
1786 0 : s3 = L_add(x2, x3);
1787 0 : s2 = L_sub(x2, x3);
1788 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
1789 0 : s1 = L_add(s1, s3);
1790 0 : y07 = L_add(x0, s1);
1791 0 : s1 = L_add(y07, L_shl_pos(Mpy_32_xx(s1, C55), 1));
1792 0 : s3 = L_sub(s1, t);
1793 0 : s1 = L_add(s1, t);
1794 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
1795 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
1796 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
1797 :
1798 : /* combination */
1799 0 : y14 = L_add(r1, s2);
1800 0 : y38 = L_sub(r1, s2);
1801 0 : y22 = L_sub(r3, s4);
1802 0 : y30 = L_add(r3, s4);
1803 :
1804 0 : y15 = L_sub(s1, r2);
1805 0 : y39 = L_add(s1, r2);
1806 0 : y23 = L_add(s3, r4);
1807 0 : y31 = L_sub(s3, r4);
1808 :
1809 : /* 1. FFT4 stage */
1810 :
1811 : /* Pre-additions */
1812 0 : t0 = L_add(y00, y04);
1813 0 : t2 = L_sub(y00, y04);
1814 0 : t1 = L_add(y01, y05);
1815 0 : t3 = L_sub(y01, y05);
1816 0 : t4 = L_add(y02, y06);
1817 0 : t7 = L_sub(y02, y06);
1818 0 : t5 = L_add(y07, y03);
1819 0 : t6 = L_sub(y07, y03);
1820 :
1821 : /* Post-additions */
1822 0 : re[s * 0] = L_add(t0, t4);
1823 0 : move32();
1824 0 : im[s * 0] = L_add(t1, t5);
1825 0 : move32();
1826 0 : re[s * 5] = L_sub(t2, t6);
1827 0 : move32();
1828 0 : im[s * 5] = L_sub(t3, t7);
1829 0 : move32();
1830 0 : re[s * 10] = L_sub(t0, t4);
1831 0 : move32();
1832 0 : im[s * 10] = L_sub(t1, t5);
1833 0 : move32();
1834 0 : re[s * 15] = L_add(t2, t6);
1835 0 : move32();
1836 0 : im[s * 15] = L_add(t3, t7);
1837 0 : move32();
1838 :
1839 : /* 2. FFT4 stage */
1840 :
1841 : /* Pre-additions */
1842 0 : t0 = L_add(y08, y12);
1843 0 : t2 = L_sub(y08, y12);
1844 0 : t1 = L_add(y09, y13);
1845 0 : t3 = L_sub(y09, y13);
1846 0 : t4 = L_add(y10, y14);
1847 0 : t7 = L_sub(y10, y14);
1848 0 : t5 = L_add(y15, y11);
1849 0 : t6 = L_sub(y15, y11);
1850 :
1851 : /* Post-additions */
1852 0 : re[s * 4] = L_add(t0, t4);
1853 0 : move32();
1854 0 : im[s * 4] = L_add(t1, t5);
1855 0 : move32();
1856 0 : re[s * 9] = L_sub(t2, t6);
1857 0 : move32();
1858 0 : im[s * 9] = L_sub(t3, t7);
1859 0 : move32();
1860 0 : re[s * 14] = L_sub(t0, t4);
1861 0 : move32();
1862 0 : im[s * 14] = L_sub(t1, t5);
1863 0 : move32();
1864 0 : re[s * 19] = L_add(t2, t6);
1865 0 : move32();
1866 0 : im[s * 19] = L_add(t3, t7);
1867 0 : move32();
1868 :
1869 : /* 3. FFT4 stage */
1870 :
1871 : /* Pre-additions */
1872 0 : t0 = L_add(y16, y20);
1873 0 : t2 = L_sub(y16, y20);
1874 0 : t1 = L_add(y17, y21);
1875 0 : t3 = L_sub(y17, y21);
1876 0 : t4 = L_add(y18, y22);
1877 0 : t7 = L_sub(y18, y22);
1878 0 : t5 = L_add(y23, y19);
1879 0 : t6 = L_sub(y23, y19);
1880 :
1881 : /* Post-additions */
1882 0 : re[s * 8] = L_add(t0, t4);
1883 0 : move32();
1884 0 : im[s * 8] = L_add(t1, t5);
1885 0 : move32();
1886 0 : re[s * 13] = L_sub(t2, t6);
1887 0 : move32();
1888 0 : im[s * 13] = L_sub(t3, t7);
1889 0 : move32();
1890 0 : re[s * 18] = L_sub(t0, t4);
1891 0 : move32();
1892 0 : im[s * 18] = L_sub(t1, t5);
1893 0 : move32();
1894 0 : re[s * 3] = L_add(t2, t6);
1895 0 : move32();
1896 0 : im[s * 3] = L_add(t3, t7);
1897 0 : move32();
1898 :
1899 : /* 4. FFT4 stage */
1900 :
1901 : /* Pre-additions */
1902 0 : t0 = L_add(y24, y28);
1903 0 : t2 = L_sub(y24, y28);
1904 0 : t1 = L_add(y25, y29);
1905 0 : t3 = L_sub(y25, y29);
1906 0 : t4 = L_add(y26, y30);
1907 0 : t7 = L_sub(y26, y30);
1908 0 : t5 = L_add(y31, y27);
1909 0 : t6 = L_sub(y31, y27);
1910 :
1911 : /* Post-additions */
1912 0 : re[s * 12] = L_add(t0, t4);
1913 0 : move32();
1914 0 : im[s * 12] = L_add(t1, t5);
1915 0 : move32();
1916 0 : re[s * 17] = L_sub(t2, t6);
1917 0 : move32();
1918 0 : im[s * 17] = L_sub(t3, t7);
1919 0 : move32();
1920 0 : re[s * 2] = L_sub(t0, t4);
1921 0 : move32();
1922 0 : im[s * 2] = L_sub(t1, t5);
1923 0 : move32();
1924 0 : re[s * 7] = L_add(t2, t6);
1925 0 : move32();
1926 0 : im[s * 7] = L_add(t3, t7);
1927 0 : move32();
1928 :
1929 : /* 5. FFT4 stage */
1930 :
1931 : /* Pre-additions */
1932 0 : t0 = L_add(y32, y36);
1933 0 : t2 = L_sub(y32, y36);
1934 0 : t1 = L_add(y33, y37);
1935 0 : t3 = L_sub(y33, y37);
1936 0 : t4 = L_add(y34, y38);
1937 0 : t7 = L_sub(y34, y38);
1938 0 : t5 = L_add(y39, y35);
1939 0 : t6 = L_sub(y39, y35);
1940 :
1941 : /* Post-additions */
1942 0 : re[s * 16] = L_add(t0, t4);
1943 0 : move32();
1944 0 : im[s * 16] = L_add(t1, t5);
1945 0 : move32();
1946 0 : re[s * 1] = L_sub(t2, t6);
1947 0 : move32();
1948 0 : im[s * 1] = L_sub(t3, t7);
1949 0 : move32();
1950 0 : re[s * 6] = L_sub(t0, t4);
1951 0 : move32();
1952 0 : im[s * 6] = L_sub(t1, t5);
1953 0 : move32();
1954 0 : re[s * 11] = L_add(t2, t6);
1955 0 : move32();
1956 0 : im[s * 11] = L_add(t3, t7);
1957 0 : move32();
1958 :
1959 : Dyn_Mem_Deluxe_Out();
1960 0 : }
1961 :
1962 : /**
1963 : * \brief Function performs a complex 30-point FFT
1964 : * The FFT is performed inplace. The result of the FFT
1965 : * is scaled by SCALEFACTOR30 bits.
1966 : *
1967 : * WOPS with 32x16 bit multiplications: 828 cycles
1968 : *
1969 : * \param [i/o] re real input / output
1970 : * \param [i/o] im imag input / output
1971 : * \param [i ] s stride real and imag input / output
1972 : *
1973 : * \return void
1974 : */
1975 :
1976 :
1977 0 : static void fft30(Word32 *re, Word32 *im, Word16 s)
1978 : {
1979 : Dyn_Mem_Deluxe_In(Word32 t; Word32 r1, r2, r3, r4; Word32 s1, s2, s3, s4;
1980 : Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09;
1981 : Word32 x10, x11, x12, x13, x14, x15, x16, x17, x18, x19;
1982 : Word32 x20, x21, x22, x23, x24, x25, x26, x27, x28, x29;
1983 :
1984 : Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
1985 : Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
1986 : Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
1987 :
1988 : Word32 z00, z01, z02, z03, z04, z05, z06, z07, z08, z09;
1989 : Word32 z10, z11, z12, z13, z14, z15, z16, z17, z18, z19;
1990 : Word32 z20, z21, z22, z23, z24, z25, z26, z27, z28, z29;
1991 : Word32 z30, z31, z32, z33, z34, z35, z36, z37, z38, z39;
1992 : Word32 z40, z41, z42, z43, z44, z45, z46, z47, z48, z49;
1993 : Word32 z50, z51, z52, z53, z54, z55, z56, z57, z58, z59;
1994 :
1995 : Word32 * rel, *reh, *iml, *imh;);
1996 :
1997 0 : rel = &re[s * 0];
1998 0 : reh = &re[s * 15];
1999 0 : iml = &im[s * 0];
2000 0 : imh = &im[s * 15];
2001 :
2002 : /* 1. FFT15 stage */
2003 0 : x00 = L_shr_pos(re[s * 0], SCALEFACTOR30_1);
2004 0 : x01 = L_shr_pos(im[s * 0], SCALEFACTOR30_1);
2005 0 : x02 = L_shr_pos(re[s * 18], SCALEFACTOR30_1);
2006 0 : x03 = L_shr_pos(im[s * 18], SCALEFACTOR30_1);
2007 0 : x04 = L_shr_pos(re[s * 6], SCALEFACTOR30_1);
2008 0 : x05 = L_shr_pos(im[s * 6], SCALEFACTOR30_1);
2009 0 : x06 = L_shr_pos(re[s * 24], SCALEFACTOR30_1);
2010 0 : x07 = L_shr_pos(im[s * 24], SCALEFACTOR30_1);
2011 0 : x08 = L_shr_pos(re[s * 12], SCALEFACTOR30_1);
2012 0 : x09 = L_shr_pos(im[s * 12], SCALEFACTOR30_1);
2013 :
2014 0 : x10 = L_shr_pos(re[s * 20], SCALEFACTOR30_1);
2015 0 : x11 = L_shr_pos(im[s * 20], SCALEFACTOR30_1);
2016 0 : x12 = L_shr_pos(re[s * 8], SCALEFACTOR30_1);
2017 0 : x13 = L_shr_pos(im[s * 8], SCALEFACTOR30_1);
2018 0 : x14 = L_shr_pos(re[s * 26], SCALEFACTOR30_1);
2019 0 : x15 = L_shr_pos(im[s * 26], SCALEFACTOR30_1);
2020 0 : x16 = L_shr_pos(re[s * 14], SCALEFACTOR30_1);
2021 0 : x17 = L_shr_pos(im[s * 14], SCALEFACTOR30_1);
2022 0 : x18 = L_shr_pos(re[s * 2], SCALEFACTOR30_1);
2023 0 : x19 = L_shr_pos(im[s * 2], SCALEFACTOR30_1);
2024 :
2025 0 : x20 = L_shr_pos(re[s * 10], SCALEFACTOR30_1);
2026 0 : x21 = L_shr_pos(im[s * 10], SCALEFACTOR30_1);
2027 0 : x22 = L_shr_pos(re[s * 28], SCALEFACTOR30_1);
2028 0 : x23 = L_shr_pos(im[s * 28], SCALEFACTOR30_1);
2029 0 : x24 = L_shr_pos(re[s * 16], SCALEFACTOR30_1);
2030 0 : x25 = L_shr_pos(im[s * 16], SCALEFACTOR30_1);
2031 0 : x26 = L_shr_pos(re[s * 4], SCALEFACTOR30_1);
2032 0 : x27 = L_shr_pos(im[s * 4], SCALEFACTOR30_1);
2033 0 : x28 = L_shr_pos(re[s * 22], SCALEFACTOR30_1);
2034 0 : x29 = L_shr_pos(im[s * 22], SCALEFACTOR30_1);
2035 :
2036 : /* 1. FFT5 stage */
2037 :
2038 : /* real part */
2039 0 : r1 = L_add(x02, x08);
2040 0 : r4 = L_sub(x02, x08);
2041 0 : r3 = L_add(x04, x06);
2042 0 : r2 = L_sub(x04, x06);
2043 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
2044 0 : r1 = L_add(r1, r3);
2045 0 : y00 = L_add(x00, r1);
2046 0 : r1 = L_add(y00, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
2047 0 : r3 = L_sub(r1, t);
2048 0 : r1 = L_add(r1, t);
2049 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
2050 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
2051 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
2052 :
2053 : /* imaginary part */
2054 0 : s1 = L_add(x03, x09);
2055 0 : s4 = L_sub(x03, x09);
2056 0 : s3 = L_add(x05, x07);
2057 0 : s2 = L_sub(x05, x07);
2058 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
2059 0 : s1 = L_add(s1, s3);
2060 0 : y01 = L_add(x01, s1);
2061 0 : s1 = L_add(y01, L_shl_pos(Mpy_32_xx(s1, C55), 1));
2062 0 : s3 = L_sub(s1, t);
2063 0 : s1 = L_add(s1, t);
2064 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
2065 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
2066 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
2067 :
2068 : /* combination */
2069 0 : y02 = L_add(r1, s2);
2070 0 : y08 = L_sub(r1, s2);
2071 0 : y04 = L_sub(r3, s4);
2072 0 : y06 = L_add(r3, s4);
2073 :
2074 0 : y03 = L_sub(s1, r2);
2075 0 : y09 = L_add(s1, r2);
2076 0 : y05 = L_add(s3, r4);
2077 0 : y07 = L_sub(s3, r4);
2078 :
2079 : /* 2. FFT5 stage */
2080 :
2081 : /* real part */
2082 0 : r1 = L_add(x12, x18);
2083 0 : r4 = L_sub(x12, x18);
2084 0 : r3 = L_add(x14, x16);
2085 0 : r2 = L_sub(x14, x16);
2086 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
2087 0 : r1 = L_add(r1, r3);
2088 0 : y10 = L_add(x10, r1);
2089 0 : r1 = L_add(y10, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
2090 0 : r3 = L_sub(r1, t);
2091 0 : r1 = L_add(r1, t);
2092 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
2093 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
2094 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
2095 :
2096 : /* imaginary part */
2097 0 : s1 = L_add(x13, x19);
2098 0 : s4 = L_sub(x13, x19);
2099 0 : s3 = L_add(x15, x17);
2100 0 : s2 = L_sub(x15, x17);
2101 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
2102 0 : s1 = L_add(s1, s3);
2103 0 : y11 = L_add(x11, s1);
2104 0 : s1 = L_add(y11, L_shl_pos(Mpy_32_xx(s1, C55), 1));
2105 0 : s3 = L_sub(s1, t);
2106 0 : s1 = L_add(s1, t);
2107 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
2108 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
2109 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
2110 :
2111 : /* combination */
2112 0 : y12 = L_add(r1, s2);
2113 0 : y18 = L_sub(r1, s2);
2114 0 : y14 = L_sub(r3, s4);
2115 0 : y16 = L_add(r3, s4);
2116 :
2117 0 : y13 = L_sub(s1, r2);
2118 0 : y19 = L_add(s1, r2);
2119 0 : y15 = L_add(s3, r4);
2120 0 : y17 = L_sub(s3, r4);
2121 :
2122 : /* 3. FFT5 stage */
2123 :
2124 : /* real part */
2125 0 : r1 = L_add(x22, x28);
2126 0 : r4 = L_sub(x22, x28);
2127 0 : r3 = L_add(x24, x26);
2128 0 : r2 = L_sub(x24, x26);
2129 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
2130 0 : r1 = L_add(r1, r3);
2131 0 : y20 = L_add(x20, r1);
2132 0 : r1 = L_add(y20, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
2133 0 : r3 = L_sub(r1, t);
2134 0 : r1 = L_add(r1, t);
2135 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
2136 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
2137 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
2138 :
2139 : /* imaginary part */
2140 0 : s1 = L_add(x23, x29);
2141 0 : s4 = L_sub(x23, x29);
2142 0 : s3 = L_add(x25, x27);
2143 0 : s2 = L_sub(x25, x27);
2144 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
2145 0 : s1 = L_add(s1, s3);
2146 0 : y21 = L_add(x21, s1);
2147 0 : s1 = L_add(y21, L_shl_pos(Mpy_32_xx(s1, C55), 1));
2148 0 : s3 = L_sub(s1, t);
2149 0 : s1 = L_add(s1, t);
2150 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
2151 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
2152 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
2153 :
2154 : /* combination */
2155 0 : y22 = L_add(r1, s2);
2156 0 : y28 = L_sub(r1, s2);
2157 0 : y24 = L_sub(r3, s4);
2158 0 : y26 = L_add(r3, s4);
2159 :
2160 0 : y23 = L_sub(s1, r2);
2161 0 : y29 = L_add(s1, r2);
2162 0 : y25 = L_add(s3, r4);
2163 0 : y27 = L_sub(s3, r4);
2164 :
2165 : /* 1. FFT3 stage */
2166 :
2167 : /* real part */
2168 0 : r1 = L_add(y10, y20);
2169 0 : r2 = Mpy_32_xx(L_sub(y10, y20), C31);
2170 0 : z00 = L_add(y00, r1);
2171 0 : r1 = L_sub(y00, L_shr_pos(r1, 1));
2172 :
2173 : /* imaginary part */
2174 0 : s1 = L_add(y11, y21);
2175 0 : s2 = Mpy_32_xx(L_sub(y11, y21), C31);
2176 0 : z01 = L_add(y01, s1);
2177 0 : s1 = L_sub(y01, L_shr_pos(s1, 1));
2178 :
2179 : /* combination */
2180 0 : z20 = L_sub(r1, s2);
2181 0 : z10 = L_add(r1, s2);
2182 0 : z21 = L_add(s1, r2);
2183 0 : z11 = L_sub(s1, r2);
2184 :
2185 : /* 2. FFT3 stage */
2186 :
2187 : /* real part */
2188 0 : r1 = L_add(y12, y22);
2189 0 : r2 = Mpy_32_xx(L_sub(y12, y22), C31);
2190 0 : z12 = L_add(y02, r1);
2191 0 : r1 = L_sub(y02, L_shr_pos(r1, 1));
2192 :
2193 : /* imaginary part */
2194 0 : s1 = L_add(y13, y23);
2195 0 : s2 = Mpy_32_xx(L_sub(y13, y23), C31);
2196 0 : z13 = L_add(y03, s1);
2197 0 : s1 = L_sub(y03, L_shr_pos(s1, 1));
2198 :
2199 : /* combination */
2200 0 : z02 = L_sub(r1, s2);
2201 0 : z22 = L_add(r1, s2);
2202 0 : z03 = L_add(s1, r2);
2203 0 : z23 = L_sub(s1, r2);
2204 :
2205 : /* 3. FFT3 stage */
2206 :
2207 : /* real part */
2208 0 : r1 = L_add(y14, y24);
2209 0 : r2 = Mpy_32_xx(L_sub(y14, y24), C31);
2210 0 : z24 = L_add(y04, r1);
2211 0 : r1 = L_sub(y04, L_shr_pos(r1, 1));
2212 :
2213 : /* imaginary part */
2214 0 : s1 = L_add(y15, y25);
2215 0 : s2 = Mpy_32_xx(L_sub(y15, y25), C31);
2216 0 : z25 = L_add(y05, s1);
2217 0 : s1 = L_sub(y05, L_shr_pos(s1, 1));
2218 :
2219 : /* combination */
2220 0 : z14 = L_sub(r1, s2);
2221 0 : z04 = L_add(r1, s2);
2222 0 : z15 = L_add(s1, r2);
2223 0 : z05 = L_sub(s1, r2);
2224 :
2225 : /* 4. FFT3 stage */
2226 :
2227 : /* real part */
2228 0 : r1 = L_add(y16, y26);
2229 0 : r2 = Mpy_32_xx(L_sub(y16, y26), C31);
2230 0 : z06 = L_add(y06, r1);
2231 0 : r1 = L_sub(y06, L_shr_pos(r1, 1));
2232 :
2233 : /* imaginary part */
2234 0 : s1 = L_add(y17, y27);
2235 0 : s2 = Mpy_32_xx(L_sub(y17, y27), C31);
2236 0 : z07 = L_add(y07, s1);
2237 0 : s1 = L_sub(y07, L_shr_pos(s1, 1));
2238 :
2239 : /* combination */
2240 0 : z26 = L_sub(r1, s2);
2241 0 : z16 = L_add(r1, s2);
2242 0 : z27 = L_add(s1, r2);
2243 0 : z17 = L_sub(s1, r2);
2244 :
2245 : /* 5. FFT3 stage */
2246 :
2247 : /* real part */
2248 0 : r1 = L_add(y18, y28);
2249 0 : r2 = Mpy_32_xx(L_sub(y18, y28), C31);
2250 0 : z18 = L_add(y08, r1);
2251 0 : r1 = L_sub(y08, L_shr_pos(r1, 1));
2252 :
2253 : /* imaginary part */
2254 0 : s1 = L_add(y19, y29);
2255 0 : s2 = Mpy_32_xx(L_sub(y19, y29), C31);
2256 0 : z19 = L_add(y09, s1);
2257 0 : s1 = L_sub(y09, L_shr_pos(s1, 1));
2258 :
2259 : /* combination */
2260 0 : z08 = L_sub(r1, s2);
2261 0 : z28 = L_add(r1, s2);
2262 0 : z09 = L_add(s1, r2);
2263 0 : z29 = L_sub(s1, r2);
2264 :
2265 : /* 2. FFT15 stage */
2266 0 : x00 = L_shr_pos(re[s * 15], SCALEFACTOR30_1);
2267 0 : x01 = L_shr_pos(im[s * 15], SCALEFACTOR30_1);
2268 0 : x02 = L_shr_pos(re[s * 3], SCALEFACTOR30_1);
2269 0 : x03 = L_shr_pos(im[s * 3], SCALEFACTOR30_1);
2270 0 : x04 = L_shr_pos(re[s * 21], SCALEFACTOR30_1);
2271 0 : x05 = L_shr_pos(im[s * 21], SCALEFACTOR30_1);
2272 0 : x06 = L_shr_pos(re[s * 9], SCALEFACTOR30_1);
2273 0 : x07 = L_shr_pos(im[s * 9], SCALEFACTOR30_1);
2274 0 : x08 = L_shr_pos(re[s * 27], SCALEFACTOR30_1);
2275 0 : x09 = L_shr_pos(im[s * 27], SCALEFACTOR30_1);
2276 :
2277 0 : x10 = L_shr_pos(re[s * 5], SCALEFACTOR30_1);
2278 0 : x11 = L_shr_pos(im[s * 5], SCALEFACTOR30_1);
2279 0 : x12 = L_shr_pos(re[s * 23], SCALEFACTOR30_1);
2280 0 : x13 = L_shr_pos(im[s * 23], SCALEFACTOR30_1);
2281 0 : x14 = L_shr_pos(re[s * 11], SCALEFACTOR30_1);
2282 0 : x15 = L_shr_pos(im[s * 11], SCALEFACTOR30_1);
2283 0 : x16 = L_shr_pos(re[s * 29], SCALEFACTOR30_1);
2284 0 : x17 = L_shr_pos(im[s * 29], SCALEFACTOR30_1);
2285 0 : x18 = L_shr_pos(re[s * 17], SCALEFACTOR30_1);
2286 0 : x19 = L_shr_pos(im[s * 17], SCALEFACTOR30_1);
2287 :
2288 0 : x20 = L_shr_pos(re[s * 25], SCALEFACTOR30_1);
2289 0 : x21 = L_shr_pos(im[s * 25], SCALEFACTOR30_1);
2290 0 : x22 = L_shr_pos(re[s * 13], SCALEFACTOR30_1);
2291 0 : x23 = L_shr_pos(im[s * 13], SCALEFACTOR30_1);
2292 0 : x24 = L_shr_pos(re[s * 1], SCALEFACTOR30_1);
2293 0 : x25 = L_shr_pos(im[s * 1], SCALEFACTOR30_1);
2294 0 : x26 = L_shr_pos(re[s * 19], SCALEFACTOR30_1);
2295 0 : x27 = L_shr_pos(im[s * 19], SCALEFACTOR30_1);
2296 0 : x28 = L_shr_pos(re[s * 7], SCALEFACTOR30_1);
2297 0 : x29 = L_shr_pos(im[s * 7], SCALEFACTOR30_1);
2298 :
2299 : /* 1. FFT5 stage */
2300 :
2301 : /* real part */
2302 0 : r1 = L_add(x02, x08);
2303 0 : r4 = L_sub(x02, x08);
2304 0 : r3 = L_add(x04, x06);
2305 0 : r2 = L_sub(x04, x06);
2306 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
2307 0 : r1 = L_add(r1, r3);
2308 0 : y00 = L_add(x00, r1);
2309 0 : r1 = L_add(y00, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
2310 0 : r3 = L_sub(r1, t);
2311 0 : r1 = L_add(r1, t);
2312 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
2313 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
2314 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
2315 :
2316 : /* imaginary part */
2317 0 : s1 = L_add(x03, x09);
2318 0 : s4 = L_sub(x03, x09);
2319 0 : s3 = L_add(x05, x07);
2320 0 : s2 = L_sub(x05, x07);
2321 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
2322 0 : s1 = L_add(s1, s3);
2323 0 : y01 = L_add(x01, s1);
2324 0 : s1 = L_add(y01, L_shl_pos(Mpy_32_xx(s1, C55), 1));
2325 0 : s3 = L_sub(s1, t);
2326 0 : s1 = L_add(s1, t);
2327 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
2328 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
2329 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
2330 :
2331 : /* combination */
2332 0 : y02 = L_add(r1, s2);
2333 0 : y08 = L_sub(r1, s2);
2334 0 : y04 = L_sub(r3, s4);
2335 0 : y06 = L_add(r3, s4);
2336 :
2337 0 : y03 = L_sub(s1, r2);
2338 0 : y09 = L_add(s1, r2);
2339 0 : y05 = L_add(s3, r4);
2340 0 : y07 = L_sub(s3, r4);
2341 :
2342 : /* 2. FFT5 stage */
2343 :
2344 : /* real part */
2345 0 : r1 = L_add(x12, x18);
2346 0 : r4 = L_sub(x12, x18);
2347 0 : r3 = L_add(x14, x16);
2348 0 : r2 = L_sub(x14, x16);
2349 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
2350 0 : r1 = L_add(r1, r3);
2351 0 : y10 = L_add(x10, r1);
2352 0 : r1 = L_add(y10, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
2353 0 : r3 = L_sub(r1, t);
2354 0 : r1 = L_add(r1, t);
2355 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
2356 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
2357 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
2358 :
2359 : /* imaginary part */
2360 0 : s1 = L_add(x13, x19);
2361 0 : s4 = L_sub(x13, x19);
2362 0 : s3 = L_add(x15, x17);
2363 0 : s2 = L_sub(x15, x17);
2364 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
2365 0 : s1 = L_add(s1, s3);
2366 0 : y11 = L_add(x11, s1);
2367 0 : s1 = L_add(y11, L_shl_pos(Mpy_32_xx(s1, C55), 1));
2368 0 : s3 = L_sub(s1, t);
2369 0 : s1 = L_add(s1, t);
2370 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
2371 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
2372 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
2373 :
2374 : /* combination */
2375 0 : y12 = L_add(r1, s2);
2376 0 : y18 = L_sub(r1, s2);
2377 0 : y14 = L_sub(r3, s4);
2378 0 : y16 = L_add(r3, s4);
2379 :
2380 0 : y13 = L_sub(s1, r2);
2381 0 : y19 = L_add(s1, r2);
2382 0 : y15 = L_add(s3, r4);
2383 0 : y17 = L_sub(s3, r4);
2384 :
2385 : /* 3. FFT5 stage */
2386 :
2387 : /* real part */
2388 0 : r1 = L_add(x22, x28);
2389 0 : r4 = L_sub(x22, x28);
2390 0 : r3 = L_add(x24, x26);
2391 0 : r2 = L_sub(x24, x26);
2392 0 : t = Mpy_32_xx(L_sub(r1, r3), C54);
2393 0 : r1 = L_add(r1, r3);
2394 0 : y20 = L_add(x20, r1);
2395 0 : r1 = L_add(y20, (L_shl_pos(Mpy_32_xx(r1, C55), 1)));
2396 0 : r3 = L_sub(r1, t);
2397 0 : r1 = L_add(r1, t);
2398 0 : t = Mpy_32_xx((L_add(r4, r2)), C51);
2399 0 : r4 = L_add(t, L_shl_pos(Mpy_32_xx(r4, C52), 1));
2400 0 : r2 = L_add(t, Mpy_32_xx(r2, C53));
2401 :
2402 : /* imaginary part */
2403 0 : s1 = L_add(x23, x29);
2404 0 : s4 = L_sub(x23, x29);
2405 0 : s3 = L_add(x25, x27);
2406 0 : s2 = L_sub(x25, x27);
2407 0 : t = Mpy_32_xx(L_sub(s1, s3), C54);
2408 0 : s1 = L_add(s1, s3);
2409 0 : y21 = L_add(x21, s1);
2410 0 : s1 = L_add(y21, L_shl_pos(Mpy_32_xx(s1, C55), 1));
2411 0 : s3 = L_sub(s1, t);
2412 0 : s1 = L_add(s1, t);
2413 0 : t = Mpy_32_xx(L_add(s4, s2), C51);
2414 0 : s4 = L_add(t, L_shl_pos(Mpy_32_xx(s4, C52), 1));
2415 0 : s2 = L_add(t, Mpy_32_xx(s2, C53));
2416 :
2417 : /* combination */
2418 0 : y22 = L_add(r1, s2);
2419 0 : y28 = L_sub(r1, s2);
2420 0 : y24 = L_sub(r3, s4);
2421 0 : y26 = L_add(r3, s4);
2422 :
2423 0 : y23 = L_sub(s1, r2);
2424 0 : y29 = L_add(s1, r2);
2425 0 : y25 = L_add(s3, r4);
2426 0 : y27 = L_sub(s3, r4);
2427 :
2428 : /* 1. FFT3 stage */
2429 :
2430 : /* real part */
2431 0 : r1 = L_add(y10, y20);
2432 0 : r2 = Mpy_32_xx(L_sub(y10, y20), C31);
2433 0 : z30 = L_add(y00, r1);
2434 0 : r1 = L_sub(y00, L_shr_pos(r1, 1));
2435 :
2436 : /* imaginary part */
2437 0 : s1 = L_add(y11, y21);
2438 0 : s2 = Mpy_32_xx(L_sub(y11, y21), C31);
2439 0 : z31 = L_add(y01, s1);
2440 0 : s1 = L_sub(y01, L_shr_pos(s1, 1));
2441 :
2442 : /* combination */
2443 0 : z50 = L_sub(r1, s2);
2444 0 : z40 = L_add(r1, s2);
2445 0 : z51 = L_add(s1, r2);
2446 0 : z41 = L_sub(s1, r2);
2447 :
2448 : /* 2. FFT3 stage */
2449 :
2450 : /* real part */
2451 0 : r1 = L_add(y12, y22);
2452 0 : r2 = Mpy_32_xx(L_sub(y12, y22), C31);
2453 0 : z42 = L_add(y02, r1);
2454 0 : r1 = L_sub(y02, L_shr_pos(r1, 1));
2455 :
2456 : /* imaginary part */
2457 0 : s1 = L_add(y13, y23);
2458 0 : s2 = Mpy_32_xx(L_sub(y13, y23), C31);
2459 0 : z43 = L_add(y03, s1);
2460 0 : s1 = L_sub(y03, L_shr_pos(s1, 1));
2461 :
2462 : /* combination */
2463 0 : z32 = L_sub(r1, s2);
2464 0 : z52 = L_add(r1, s2);
2465 0 : z33 = L_add(s1, r2);
2466 0 : z53 = L_sub(s1, r2);
2467 :
2468 : /* 3. FFT3 stage */
2469 :
2470 : /* real part */
2471 0 : r1 = L_add(y14, y24);
2472 0 : r2 = Mpy_32_xx(L_sub(y14, y24), C31);
2473 0 : z54 = L_add(y04, r1);
2474 0 : r1 = L_sub(y04, L_shr_pos(r1, 1));
2475 :
2476 : /* imaginary part */
2477 0 : s1 = L_add(y15, y25);
2478 0 : s2 = Mpy_32_xx(L_sub(y15, y25), C31);
2479 0 : z55 = L_add(y05, s1);
2480 0 : s1 = L_sub(y05, L_shr_pos(s1, 1));
2481 :
2482 : /* combination */
2483 0 : z44 = L_sub(r1, s2);
2484 0 : z34 = L_add(r1, s2);
2485 0 : z45 = L_add(s1, r2);
2486 0 : z35 = L_sub(s1, r2);
2487 :
2488 : /* 4. FFT3 stage */
2489 :
2490 : /* real part */
2491 0 : r1 = L_add(y16, y26);
2492 0 : r2 = Mpy_32_xx(L_sub(y16, y26), C31);
2493 0 : z36 = L_add(y06, r1);
2494 0 : r1 = L_sub(y06, L_shr_pos(r1, 1));
2495 :
2496 : /* imaginary part */
2497 0 : s1 = L_add(y17, y27);
2498 0 : s2 = Mpy_32_xx(L_sub(y17, y27), C31);
2499 0 : z37 = L_add(y07, s1);
2500 0 : s1 = L_sub(y07, L_shr_pos(s1, 1));
2501 :
2502 : /* combination */
2503 0 : z56 = L_sub(r1, s2);
2504 0 : z46 = L_add(r1, s2);
2505 0 : z57 = L_add(s1, r2);
2506 0 : z47 = L_sub(s1, r2);
2507 :
2508 : /* 5. FFT3 stage */
2509 :
2510 : /* real part */
2511 0 : r1 = L_add(y18, y28);
2512 0 : r2 = Mpy_32_xx(L_sub(y18, y28), C31);
2513 0 : z48 = L_add(y08, r1);
2514 0 : r1 = L_sub(y08, L_shr_pos(r1, 1));
2515 :
2516 : /* imaginary part */
2517 0 : s1 = L_add(y19, y29);
2518 0 : s2 = Mpy_32_xx(L_sub(y19, y29), C31);
2519 0 : z49 = L_add(y09, s1);
2520 0 : s1 = L_sub(y09, L_shr_pos(s1, 1));
2521 :
2522 : /* combination */
2523 0 : z38 = L_sub(r1, s2);
2524 0 : z58 = L_add(r1, s2);
2525 0 : z39 = L_add(s1, r2);
2526 0 : z59 = L_sub(s1, r2);
2527 :
2528 : /* 1. FFT2 stage */
2529 0 : r1 = L_shr_pos(z00, SCALEFACTOR30_2);
2530 0 : r2 = L_shr_pos(z30, SCALEFACTOR30_2);
2531 0 : r3 = L_shr_pos(z01, SCALEFACTOR30_2);
2532 0 : r4 = L_shr_pos(z31, SCALEFACTOR30_2);
2533 0 : *rel = L_add(r1, r2);
2534 0 : move32();
2535 0 : *reh = L_sub(r1, r2);
2536 0 : move32();
2537 0 : *iml = L_add(r3, r4);
2538 0 : move32();
2539 0 : *imh = L_sub(r3, r4);
2540 0 : move32();
2541 0 : rel += s, reh += s, iml += s;
2542 0 : imh += s;
2543 :
2544 : /* 2. FFT2 stage */
2545 0 : r1 = L_shr_pos(z16, SCALEFACTOR30_2);
2546 0 : r2 = L_shr_pos(z46, SCALEFACTOR30_2);
2547 0 : r3 = L_shr_pos(z17, SCALEFACTOR30_2);
2548 0 : r4 = L_shr_pos(z47, SCALEFACTOR30_2);
2549 0 : *reh = L_add(r1, r2);
2550 0 : move32();
2551 0 : *rel = L_sub(r1, r2);
2552 0 : move32();
2553 0 : *imh = L_add(r3, r4);
2554 0 : move32();
2555 0 : *iml = L_sub(r3, r4);
2556 0 : move32();
2557 0 : rel += s, reh += s, iml += s;
2558 0 : imh += s;
2559 :
2560 : /* 3. FFT2 stage */
2561 0 : r1 = L_shr_pos(z02, SCALEFACTOR30_2);
2562 0 : r2 = L_shr_pos(z32, SCALEFACTOR30_2);
2563 0 : r3 = L_shr_pos(z03, SCALEFACTOR30_2);
2564 0 : r4 = L_shr_pos(z33, SCALEFACTOR30_2);
2565 0 : *rel = L_add(r1, r2);
2566 0 : move32();
2567 0 : *reh = L_sub(r1, r2);
2568 0 : move32();
2569 0 : *iml = L_add(r3, r4);
2570 0 : move32();
2571 0 : *imh = L_sub(r3, r4);
2572 0 : move32();
2573 0 : rel += s, reh += s, iml += s;
2574 0 : imh += s;
2575 :
2576 : /* 4. FFT2 stage */
2577 0 : r1 = L_shr_pos(z18, SCALEFACTOR30_2);
2578 0 : r2 = L_shr_pos(z48, SCALEFACTOR30_2);
2579 0 : r3 = L_shr_pos(z19, SCALEFACTOR30_2);
2580 0 : r4 = L_shr_pos(z49, SCALEFACTOR30_2);
2581 0 : *reh = L_add(r1, r2);
2582 0 : move32();
2583 0 : *rel = L_sub(r1, r2);
2584 0 : move32();
2585 0 : *imh = L_add(r3, r4);
2586 0 : move32();
2587 0 : *iml = L_sub(r3, r4);
2588 0 : move32();
2589 0 : rel += s, reh += s, iml += s;
2590 0 : imh += s;
2591 :
2592 : /* 5. FFT2 stage */
2593 0 : r1 = L_shr_pos(z04, SCALEFACTOR30_2);
2594 0 : r2 = L_shr_pos(z34, SCALEFACTOR30_2);
2595 0 : r3 = L_shr_pos(z05, SCALEFACTOR30_2);
2596 0 : r4 = L_shr_pos(z35, SCALEFACTOR30_2);
2597 0 : *rel = L_add(r1, r2);
2598 0 : move32();
2599 0 : *reh = L_sub(r1, r2);
2600 0 : move32();
2601 0 : *iml = L_add(r3, r4);
2602 0 : move32();
2603 0 : *imh = L_sub(r3, r4);
2604 0 : move32();
2605 0 : rel += s, reh += s, iml += s;
2606 0 : imh += s;
2607 :
2608 : /* 6. FFT2 stage */
2609 0 : r1 = L_shr_pos(z20, SCALEFACTOR30_2);
2610 0 : r2 = L_shr_pos(z50, SCALEFACTOR30_2);
2611 0 : r3 = L_shr_pos(z21, SCALEFACTOR30_2);
2612 0 : r4 = L_shr_pos(z51, SCALEFACTOR30_2);
2613 0 : *reh = L_add(r1, r2);
2614 0 : move32();
2615 0 : *rel = L_sub(r1, r2);
2616 0 : move32();
2617 0 : *imh = L_add(r3, r4);
2618 0 : move32();
2619 0 : *iml = L_sub(r3, r4);
2620 0 : move32();
2621 0 : rel += s, reh += s, iml += s;
2622 0 : imh += s;
2623 :
2624 : /* 7. FFT2 stage */
2625 0 : r1 = L_shr_pos(z06, SCALEFACTOR30_2);
2626 0 : r2 = L_shr_pos(z36, SCALEFACTOR30_2);
2627 0 : r3 = L_shr_pos(z07, SCALEFACTOR30_2);
2628 0 : r4 = L_shr_pos(z37, SCALEFACTOR30_2);
2629 0 : *rel = L_add(r1, r2);
2630 0 : move32();
2631 0 : *reh = L_sub(r1, r2);
2632 0 : move32();
2633 0 : *iml = L_add(r3, r4);
2634 0 : move32();
2635 0 : *imh = L_sub(r3, r4);
2636 0 : move32();
2637 0 : rel += s, reh += s, iml += s;
2638 0 : imh += s;
2639 :
2640 : /* 8. FFT2 stage */
2641 0 : r1 = L_shr_pos(z22, SCALEFACTOR30_2);
2642 0 : r2 = L_shr_pos(z52, SCALEFACTOR30_2);
2643 0 : r3 = L_shr_pos(z23, SCALEFACTOR30_2);
2644 0 : r4 = L_shr_pos(z53, SCALEFACTOR30_2);
2645 0 : *reh = L_add(r1, r2);
2646 0 : move32();
2647 0 : *rel = L_sub(r1, r2);
2648 0 : move32();
2649 0 : *imh = L_add(r3, r4);
2650 0 : move32();
2651 0 : *iml = L_sub(r3, r4);
2652 0 : move32();
2653 0 : rel += s, reh += s, iml += s;
2654 0 : imh += s;
2655 :
2656 : /* 9. FFT2 stage */
2657 0 : r1 = L_shr_pos(z08, SCALEFACTOR30_2);
2658 0 : r2 = L_shr_pos(z38, SCALEFACTOR30_2);
2659 0 : r3 = L_shr_pos(z09, SCALEFACTOR30_2);
2660 0 : r4 = L_shr_pos(z39, SCALEFACTOR30_2);
2661 0 : *rel = L_add(r1, r2);
2662 0 : move32();
2663 0 : *reh = L_sub(r1, r2);
2664 0 : move32();
2665 0 : *iml = L_add(r3, r4);
2666 0 : move32();
2667 0 : *imh = L_sub(r3, r4);
2668 0 : move32();
2669 0 : rel += s, reh += s, iml += s;
2670 0 : imh += s;
2671 :
2672 : /* 10. FFT2 stage */
2673 0 : r1 = L_shr_pos(z24, SCALEFACTOR30_2);
2674 0 : r2 = L_shr_pos(z54, SCALEFACTOR30_2);
2675 0 : r3 = L_shr_pos(z25, SCALEFACTOR30_2);
2676 0 : r4 = L_shr_pos(z55, SCALEFACTOR30_2);
2677 0 : *reh = L_add(r1, r2);
2678 0 : move32();
2679 0 : *rel = L_sub(r1, r2);
2680 0 : move32();
2681 0 : *imh = L_add(r3, r4);
2682 0 : move32();
2683 0 : *iml = L_sub(r3, r4);
2684 0 : move32();
2685 0 : rel += s, reh += s, iml += s;
2686 0 : imh += s;
2687 :
2688 : /* 11. FFT2 stage */
2689 0 : r1 = L_shr_pos(z10, SCALEFACTOR30_2);
2690 0 : r2 = L_shr_pos(z40, SCALEFACTOR30_2);
2691 0 : r3 = L_shr_pos(z11, SCALEFACTOR30_2);
2692 0 : r4 = L_shr_pos(z41, SCALEFACTOR30_2);
2693 0 : *rel = L_add(r1, r2);
2694 0 : move32();
2695 0 : *reh = L_sub(r1, r2);
2696 0 : move32();
2697 0 : *iml = L_add(r3, r4);
2698 0 : move32();
2699 0 : *imh = L_sub(r3, r4);
2700 0 : move32();
2701 0 : rel += s, reh += s, iml += s;
2702 0 : imh += s;
2703 :
2704 : /* 12. FFT2 stage */
2705 0 : r1 = L_shr_pos(z26, SCALEFACTOR30_2);
2706 0 : r2 = L_shr_pos(z56, SCALEFACTOR30_2);
2707 0 : r3 = L_shr_pos(z27, SCALEFACTOR30_2);
2708 0 : r4 = L_shr_pos(z57, SCALEFACTOR30_2);
2709 0 : *reh = L_add(r1, r2);
2710 0 : move32();
2711 0 : *rel = L_sub(r1, r2);
2712 0 : move32();
2713 0 : *imh = L_add(r3, r4);
2714 0 : move32();
2715 0 : *iml = L_sub(r3, r4);
2716 0 : move32();
2717 0 : rel += s, reh += s, iml += s;
2718 0 : imh += s;
2719 :
2720 : /* 13. FFT2 stage */
2721 0 : r1 = L_shr_pos(z12, SCALEFACTOR30_2);
2722 0 : r2 = L_shr_pos(z42, SCALEFACTOR30_2);
2723 0 : r3 = L_shr_pos(z13, SCALEFACTOR30_2);
2724 0 : r4 = L_shr_pos(z43, SCALEFACTOR30_2);
2725 0 : *rel = L_add(r1, r2);
2726 0 : move32();
2727 0 : *reh = L_sub(r1, r2);
2728 0 : move32();
2729 0 : *iml = L_add(r3, r4);
2730 0 : move32();
2731 0 : *imh = L_sub(r3, r4);
2732 0 : move32();
2733 0 : rel += s, reh += s, iml += s;
2734 0 : imh += s;
2735 :
2736 : /* 14. FFT2 stage */
2737 0 : r1 = L_shr_pos(z28, SCALEFACTOR30_2);
2738 0 : r2 = L_shr_pos(z58, SCALEFACTOR30_2);
2739 0 : r3 = L_shr_pos(z29, SCALEFACTOR30_2);
2740 0 : r4 = L_shr_pos(z59, SCALEFACTOR30_2);
2741 0 : *reh = L_add(r1, r2);
2742 0 : move32();
2743 0 : *rel = L_sub(r1, r2);
2744 0 : move32();
2745 0 : *imh = L_add(r3, r4);
2746 0 : move32();
2747 0 : *iml = L_sub(r3, r4);
2748 0 : move32();
2749 0 : rel += s, reh += s, iml += s;
2750 0 : imh += s;
2751 :
2752 : /* 15. FFT2 stage */
2753 0 : r1 = L_shr_pos(z14, SCALEFACTOR30_2);
2754 0 : r2 = L_shr_pos(z44, SCALEFACTOR30_2);
2755 0 : r3 = L_shr_pos(z15, SCALEFACTOR30_2);
2756 0 : r4 = L_shr_pos(z45, SCALEFACTOR30_2);
2757 0 : *rel = L_add(r1, r2);
2758 0 : move32();
2759 0 : *reh = L_sub(r1, r2);
2760 0 : move32();
2761 0 : *iml = L_add(r3, r4);
2762 0 : move32();
2763 0 : *imh = L_sub(r3, r4);
2764 0 : move32();
2765 0 : rel += s, reh += s, iml += s;
2766 0 : imh += s;
2767 :
2768 : Dyn_Mem_Deluxe_Out();
2769 0 : }
2770 :
2771 : /**
2772 : * \brief Function performs a complex 32-point FFT
2773 : * The FFT is performed inplace. The result of the FFT
2774 : * is scaled by SCALEFACTOR32 bits.
2775 : *
2776 : * WOPS with 32x16 bit multiplications: 752 cycles
2777 : *
2778 : * \param [i/o] re real input / output
2779 : * \param [i/o] im imag input / output
2780 : * \param [i ] s stride real and imag input / output
2781 : *
2782 : * \return void
2783 : */
2784 :
2785 :
2786 0 : static void fft32(Word32 *re, Word32 *im, Word16 s)
2787 : {
2788 : Dyn_Mem_Deluxe_In(Word32 as, bs; Word32 x00, x01, x02, x03, x04, x05, x06, x07;
2789 : Word32 x08, x09, x10, x11, x12, x13, x14, x15; Word32 t00, t01, t02, t03, t04, t05, t06, t07;
2790 : Word32 t08, t09, t10, t11, t12, t13, t14, t15; Word32 s00, s01, s02, s03, s04, s05, s06, s07;
2791 : Word32 s08, s09, s10, s11, s12, s13, s14, s15;
2792 :
2793 : Word32 y00, y01, y02, y03, y04, y05, y06, y07; Word32 y08, y09, y10, y11, y12, y13, y14, y15;
2794 : Word32 y16, y17, y18, y19, y20, y21, y22, y23; Word32 y24, y25, y26, y27, y28, y29, y30, y31;
2795 : Word32 y32, y33, y34, y35, y36, y37, y38, y39; Word32 y40, y41, y42, y43, y44, y45, y46, y47;
2796 : Word32 y48, y49, y50, y51, y52, y53, y54, y55; Word32 y56, y57, y58, y59, y60, y61, y62, y63;);
2797 :
2798 : /* 1. FFT8 stage */
2799 0 : x00 = L_shr_pos(re[s * 0], SCALEFACTOR32_1);
2800 0 : x01 = L_shr_pos(im[s * 0], SCALEFACTOR32_1);
2801 0 : x02 = L_shr_pos(re[s * 4], SCALEFACTOR32_1);
2802 0 : x03 = L_shr_pos(im[s * 4], SCALEFACTOR32_1);
2803 0 : x04 = L_shr_pos(re[s * 8], SCALEFACTOR32_1);
2804 0 : x05 = L_shr_pos(im[s * 8], SCALEFACTOR32_1);
2805 0 : x06 = L_shr_pos(re[s * 12], SCALEFACTOR32_1);
2806 0 : x07 = L_shr_pos(im[s * 12], SCALEFACTOR32_1);
2807 0 : x08 = L_shr_pos(re[s * 16], SCALEFACTOR32_1);
2808 0 : x09 = L_shr_pos(im[s * 16], SCALEFACTOR32_1);
2809 0 : x10 = L_shr_pos(re[s * 20], SCALEFACTOR32_1);
2810 0 : x11 = L_shr_pos(im[s * 20], SCALEFACTOR32_1);
2811 0 : x12 = L_shr_pos(re[s * 24], SCALEFACTOR32_1);
2812 0 : x13 = L_shr_pos(im[s * 24], SCALEFACTOR32_1);
2813 0 : x14 = L_shr_pos(re[s * 28], SCALEFACTOR32_1);
2814 0 : x15 = L_shr_pos(im[s * 28], SCALEFACTOR32_1);
2815 :
2816 0 : t00 = L_add(x00, x08);
2817 0 : t02 = L_sub(x00, x08);
2818 0 : t01 = L_add(x01, x09);
2819 0 : t03 = L_sub(x01, x09);
2820 0 : t04 = L_add(x02, x10);
2821 0 : t06 = L_sub(x02, x10);
2822 0 : t05 = L_add(x03, x11);
2823 0 : t07 = L_sub(x03, x11);
2824 0 : t08 = L_add(x04, x12);
2825 0 : t10 = L_sub(x04, x12);
2826 0 : t09 = L_add(x05, x13);
2827 0 : t11 = L_sub(x05, x13);
2828 0 : t12 = L_add(x06, x14);
2829 0 : t14 = L_sub(x06, x14);
2830 0 : t13 = L_add(x07, x15);
2831 0 : t15 = L_sub(x07, x15);
2832 :
2833 : /* Pre-additions and core multiplications */
2834 0 : s00 = L_add(t00, t08);
2835 0 : s04 = L_sub(t00, t08);
2836 0 : s01 = L_add(t01, t09);
2837 0 : s05 = L_sub(t01, t09);
2838 0 : s08 = L_sub(t02, t11);
2839 0 : s10 = L_add(t02, t11);
2840 0 : s09 = L_add(t03, t10);
2841 0 : s11 = L_sub(t03, t10);
2842 0 : s02 = L_add(t04, t12);
2843 0 : s07 = L_sub(t04, t12);
2844 0 : s03 = L_add(t05, t13);
2845 0 : s06 = L_sub(t13, t05);
2846 0 : t01 = L_add(t06, t14);
2847 0 : t02 = L_sub(t06, t14);
2848 0 : t00 = L_add(t07, t15);
2849 0 : t03 = L_sub(t07, t15);
2850 :
2851 0 : Mpy3_0(s12, s13, s14, s15, t00, t01, t02, t03);
2852 :
2853 : /* Post-additions */
2854 0 : y00 = L_add(s00, s02);
2855 0 : y08 = L_sub(s00, s02);
2856 0 : y01 = L_add(s01, s03);
2857 0 : y09 = L_sub(s01, s03);
2858 0 : y04 = L_sub(s04, s06);
2859 0 : y12 = L_add(s04, s06);
2860 0 : y05 = L_sub(s05, s07);
2861 0 : y13 = L_add(s05, s07);
2862 0 : y06 = L_add(s08, s14);
2863 0 : y14 = L_sub(s08, s14);
2864 0 : y07 = L_add(s09, s15);
2865 0 : y15 = L_sub(s09, s15);
2866 0 : y02 = L_add(s10, s12);
2867 0 : y10 = L_sub(s10, s12);
2868 0 : y03 = L_add(s11, s13);
2869 0 : y11 = L_sub(s11, s13);
2870 :
2871 : /* 2. FFT8 stage */
2872 0 : x00 = L_shr_pos(re[s * 1], SCALEFACTOR32_1);
2873 0 : x01 = L_shr_pos(im[s * 1], SCALEFACTOR32_1);
2874 0 : x02 = L_shr_pos(re[s * 5], SCALEFACTOR32_1);
2875 0 : x03 = L_shr_pos(im[s * 5], SCALEFACTOR32_1);
2876 0 : x04 = L_shr_pos(re[s * 9], SCALEFACTOR32_1);
2877 0 : x05 = L_shr_pos(im[s * 9], SCALEFACTOR32_1);
2878 0 : x06 = L_shr_pos(re[s * 13], SCALEFACTOR32_1);
2879 0 : x07 = L_shr_pos(im[s * 13], SCALEFACTOR32_1);
2880 0 : x08 = L_shr_pos(re[s * 17], SCALEFACTOR32_1);
2881 0 : x09 = L_shr_pos(im[s * 17], SCALEFACTOR32_1);
2882 0 : x10 = L_shr_pos(re[s * 21], SCALEFACTOR32_1);
2883 0 : x11 = L_shr_pos(im[s * 21], SCALEFACTOR32_1);
2884 0 : x12 = L_shr_pos(re[s * 25], SCALEFACTOR32_1);
2885 0 : x13 = L_shr_pos(im[s * 25], SCALEFACTOR32_1);
2886 0 : x14 = L_shr_pos(re[s * 29], SCALEFACTOR32_1);
2887 0 : x15 = L_shr_pos(im[s * 29], SCALEFACTOR32_1);
2888 :
2889 0 : t00 = L_add(x00, x08);
2890 0 : t02 = L_sub(x00, x08);
2891 0 : t01 = L_add(x01, x09);
2892 0 : t03 = L_sub(x01, x09);
2893 0 : t04 = L_add(x02, x10);
2894 0 : t06 = L_sub(x02, x10);
2895 0 : t05 = L_add(x03, x11);
2896 0 : t07 = L_sub(x03, x11);
2897 0 : t08 = L_add(x04, x12);
2898 0 : t10 = L_sub(x04, x12);
2899 0 : t09 = L_add(x05, x13);
2900 0 : t11 = L_sub(x05, x13);
2901 0 : t12 = L_add(x06, x14);
2902 0 : t14 = L_sub(x06, x14);
2903 0 : t13 = L_add(x07, x15);
2904 0 : t15 = L_sub(x07, x15);
2905 :
2906 : /* Pre-additions and core multiplications */
2907 0 : s00 = L_add(t00, t08);
2908 0 : s04 = L_sub(t00, t08);
2909 0 : s01 = L_add(t01, t09);
2910 0 : s05 = L_sub(t01, t09);
2911 0 : s08 = L_sub(t02, t11);
2912 0 : s10 = L_add(t02, t11);
2913 0 : s09 = L_add(t03, t10);
2914 0 : s11 = L_sub(t03, t10);
2915 0 : s02 = L_add(t04, t12);
2916 0 : s07 = L_sub(t04, t12);
2917 0 : s03 = L_add(t05, t13);
2918 0 : s06 = L_sub(t13, t05);
2919 0 : t01 = L_add(t06, t14);
2920 0 : t02 = L_sub(t06, t14);
2921 0 : t00 = L_add(t07, t15);
2922 0 : t03 = L_sub(t07, t15);
2923 :
2924 0 : Mpy3_0(s12, s13, s14, s15, t00, t01, t02, t03);
2925 :
2926 : /* Post-additions */
2927 0 : y16 = L_add(s00, s02);
2928 0 : y24 = L_sub(s00, s02);
2929 0 : y17 = L_add(s01, s03);
2930 0 : y25 = L_sub(s01, s03);
2931 0 : y20 = L_sub(s04, s06);
2932 0 : y28 = L_add(s04, s06);
2933 0 : y21 = L_sub(s05, s07);
2934 0 : y29 = L_add(s05, s07);
2935 0 : y22 = L_add(s08, s14);
2936 0 : y30 = L_sub(s08, s14);
2937 0 : y23 = L_add(s09, s15);
2938 0 : y31 = L_sub(s09, s15);
2939 0 : y18 = L_add(s10, s12);
2940 0 : y26 = L_sub(s10, s12);
2941 0 : y19 = L_add(s11, s13);
2942 0 : y27 = L_sub(s11, s13);
2943 :
2944 : /* 3. FFT8 stage */
2945 0 : x00 = L_shr_pos(re[s * 2], SCALEFACTOR32_1);
2946 0 : x01 = L_shr_pos(im[s * 2], SCALEFACTOR32_1);
2947 0 : x02 = L_shr_pos(re[s * 6], SCALEFACTOR32_1);
2948 0 : x03 = L_shr_pos(im[s * 6], SCALEFACTOR32_1);
2949 0 : x04 = L_shr_pos(re[s * 10], SCALEFACTOR32_1);
2950 0 : x05 = L_shr_pos(im[s * 10], SCALEFACTOR32_1);
2951 0 : x06 = L_shr_pos(re[s * 14], SCALEFACTOR32_1);
2952 0 : x07 = L_shr_pos(im[s * 14], SCALEFACTOR32_1);
2953 0 : x08 = L_shr_pos(re[s * 18], SCALEFACTOR32_1);
2954 0 : x09 = L_shr_pos(im[s * 18], SCALEFACTOR32_1);
2955 0 : x10 = L_shr_pos(re[s * 22], SCALEFACTOR32_1);
2956 0 : x11 = L_shr_pos(im[s * 22], SCALEFACTOR32_1);
2957 0 : x12 = L_shr_pos(re[s * 26], SCALEFACTOR32_1);
2958 0 : x13 = L_shr_pos(im[s * 26], SCALEFACTOR32_1);
2959 0 : x14 = L_shr_pos(re[s * 30], SCALEFACTOR32_1);
2960 0 : x15 = L_shr_pos(im[s * 30], SCALEFACTOR32_1);
2961 :
2962 0 : t00 = L_add(x00, x08);
2963 0 : t02 = L_sub(x00, x08);
2964 0 : t01 = L_add(x01, x09);
2965 0 : t03 = L_sub(x01, x09);
2966 0 : t04 = L_add(x02, x10);
2967 0 : t06 = L_sub(x02, x10);
2968 0 : t05 = L_add(x03, x11);
2969 0 : t07 = L_sub(x03, x11);
2970 0 : t08 = L_add(x04, x12);
2971 0 : t10 = L_sub(x04, x12);
2972 0 : t09 = L_add(x05, x13);
2973 0 : t11 = L_sub(x05, x13);
2974 0 : t12 = L_add(x06, x14);
2975 0 : t14 = L_sub(x06, x14);
2976 0 : t13 = L_add(x07, x15);
2977 0 : t15 = L_sub(x07, x15);
2978 :
2979 : /* Pre-additions and core multiplications */
2980 0 : s00 = L_add(t00, t08);
2981 0 : s04 = L_sub(t00, t08);
2982 0 : s01 = L_add(t01, t09);
2983 0 : s05 = L_sub(t01, t09);
2984 0 : s08 = L_sub(t02, t11);
2985 0 : s10 = L_add(t02, t11);
2986 0 : s09 = L_add(t03, t10);
2987 0 : s11 = L_sub(t03, t10);
2988 0 : s02 = L_add(t04, t12);
2989 0 : s07 = L_sub(t04, t12);
2990 0 : s03 = L_add(t05, t13);
2991 0 : s06 = L_sub(t13, t05);
2992 0 : t01 = L_add(t06, t14);
2993 0 : t02 = L_sub(t06, t14);
2994 0 : t00 = L_add(t07, t15);
2995 0 : t03 = L_sub(t07, t15);
2996 :
2997 0 : Mpy3_0(s12, s13, s14, s15, t00, t01, t02, t03);
2998 :
2999 : /* Post-additions */
3000 0 : y32 = L_add(s00, s02);
3001 0 : y40 = L_sub(s00, s02);
3002 0 : y33 = L_add(s01, s03);
3003 0 : y41 = L_sub(s01, s03);
3004 0 : y36 = L_sub(s04, s06);
3005 0 : y44 = L_add(s04, s06);
3006 0 : y37 = L_sub(s05, s07);
3007 0 : y45 = L_add(s05, s07);
3008 0 : y38 = L_add(s08, s14);
3009 0 : y46 = L_sub(s08, s14);
3010 0 : y39 = L_add(s09, s15);
3011 0 : y47 = L_sub(s09, s15);
3012 0 : y34 = L_add(s10, s12);
3013 0 : y42 = L_sub(s10, s12);
3014 0 : y35 = L_add(s11, s13);
3015 0 : y43 = L_sub(s11, s13);
3016 :
3017 : /* 4. FFT8 stage */
3018 0 : x00 = L_shr_pos(re[s * 3], SCALEFACTOR32_1);
3019 0 : x01 = L_shr_pos(im[s * 3], SCALEFACTOR32_1);
3020 0 : x02 = L_shr_pos(re[s * 7], SCALEFACTOR32_1);
3021 0 : x03 = L_shr_pos(im[s * 7], SCALEFACTOR32_1);
3022 0 : x04 = L_shr_pos(re[s * 11], SCALEFACTOR32_1);
3023 0 : x05 = L_shr_pos(im[s * 11], SCALEFACTOR32_1);
3024 0 : x06 = L_shr_pos(re[s * 15], SCALEFACTOR32_1);
3025 0 : x07 = L_shr_pos(im[s * 15], SCALEFACTOR32_1);
3026 0 : x08 = L_shr_pos(re[s * 19], SCALEFACTOR32_1);
3027 0 : x09 = L_shr_pos(im[s * 19], SCALEFACTOR32_1);
3028 0 : x10 = L_shr_pos(re[s * 23], SCALEFACTOR32_1);
3029 0 : x11 = L_shr_pos(im[s * 23], SCALEFACTOR32_1);
3030 0 : x12 = L_shr_pos(re[s * 27], SCALEFACTOR32_1);
3031 0 : x13 = L_shr_pos(im[s * 27], SCALEFACTOR32_1);
3032 0 : x14 = L_shr_pos(re[s * 31], SCALEFACTOR32_1);
3033 0 : x15 = L_shr_pos(im[s * 31], SCALEFACTOR32_1);
3034 :
3035 0 : t00 = L_add(x00, x08);
3036 0 : t02 = L_sub(x00, x08);
3037 0 : t01 = L_add(x01, x09);
3038 0 : t03 = L_sub(x01, x09);
3039 0 : t04 = L_add(x02, x10);
3040 0 : t06 = L_sub(x02, x10);
3041 0 : t05 = L_add(x03, x11);
3042 0 : t07 = L_sub(x03, x11);
3043 0 : t08 = L_add(x04, x12);
3044 0 : t10 = L_sub(x04, x12);
3045 0 : t09 = L_add(x05, x13);
3046 0 : t11 = L_sub(x05, x13);
3047 0 : t12 = L_add(x06, x14);
3048 0 : t14 = L_sub(x06, x14);
3049 0 : t13 = L_add(x07, x15);
3050 0 : t15 = L_sub(x07, x15);
3051 :
3052 : /* Pre-additions and core multiplications */
3053 0 : s00 = L_add(t00, t08);
3054 0 : s04 = L_sub(t00, t08);
3055 0 : s01 = L_add(t01, t09);
3056 0 : s05 = L_sub(t01, t09);
3057 0 : s08 = L_sub(t02, t11);
3058 0 : s10 = L_add(t02, t11);
3059 0 : s09 = L_add(t03, t10);
3060 0 : s11 = L_sub(t03, t10);
3061 0 : s02 = L_add(t04, t12);
3062 0 : s07 = L_sub(t04, t12);
3063 0 : s03 = L_add(t05, t13);
3064 0 : s06 = L_sub(t13, t05);
3065 0 : t01 = L_add(t06, t14);
3066 0 : t02 = L_sub(t06, t14);
3067 0 : t00 = L_add(t07, t15);
3068 0 : t03 = L_sub(t07, t15);
3069 :
3070 0 : Mpy3_0(s12, s13, s14, s15, t00, t01, t02, t03);
3071 :
3072 : /* Post-additions */
3073 0 : y48 = L_add(s00, s02);
3074 0 : y56 = L_sub(s00, s02);
3075 0 : y49 = L_add(s01, s03);
3076 0 : y57 = L_sub(s01, s03);
3077 0 : y52 = L_sub(s04, s06);
3078 0 : y60 = L_add(s04, s06);
3079 0 : y53 = L_sub(s05, s07);
3080 0 : y61 = L_add(s05, s07);
3081 0 : y54 = L_add(s08, s14);
3082 0 : y62 = L_sub(s08, s14);
3083 0 : y55 = L_add(s09, s15);
3084 0 : y63 = L_sub(s09, s15);
3085 0 : y50 = L_add(s10, s12);
3086 0 : y58 = L_sub(s10, s12);
3087 0 : y51 = L_add(s11, s13);
3088 0 : y59 = L_sub(s11, s13);
3089 :
3090 : /* apply twiddle factors */
3091 0 : y00 = L_shr_pos(y00, SCALEFACTOR32_2);
3092 0 : y01 = L_shr_pos(y01, SCALEFACTOR32_2);
3093 0 : y02 = L_shr_pos(y02, SCALEFACTOR32_2);
3094 0 : y03 = L_shr_pos(y03, SCALEFACTOR32_2);
3095 0 : y04 = L_shr_pos(y04, SCALEFACTOR32_2);
3096 0 : y05 = L_shr_pos(y05, SCALEFACTOR32_2);
3097 0 : y06 = L_shr_pos(y06, SCALEFACTOR32_2);
3098 0 : y07 = L_shr_pos(y07, SCALEFACTOR32_2);
3099 0 : y08 = L_shr_pos(y08, SCALEFACTOR32_2);
3100 0 : y09 = L_shr_pos(y09, SCALEFACTOR32_2);
3101 0 : y10 = L_shr_pos(y10, SCALEFACTOR32_2);
3102 0 : y11 = L_shr_pos(y11, SCALEFACTOR32_2);
3103 0 : y12 = L_shr_pos(y12, SCALEFACTOR32_2);
3104 0 : y13 = L_shr_pos(y13, SCALEFACTOR32_2);
3105 0 : y14 = L_shr_pos(y14, SCALEFACTOR32_2);
3106 0 : y15 = L_shr_pos(y15, SCALEFACTOR32_2);
3107 0 : y16 = L_shr_pos(y16, SCALEFACTOR32_2);
3108 0 : y17 = L_shr_pos(y17, SCALEFACTOR32_2);
3109 0 : y32 = L_shr_pos(y32, SCALEFACTOR32_2);
3110 0 : y33 = L_shr_pos(y33, SCALEFACTOR32_2);
3111 0 : y48 = L_shr_pos(y48, SCALEFACTOR32_2);
3112 0 : y49 = L_shr_pos(y49, SCALEFACTOR32_2);
3113 0 : y40 = L_shr_pos(y40, SCALEFACTOR32_2);
3114 0 : y41 = L_shr_pos(y41, SCALEFACTOR32_2);
3115 :
3116 0 : cplxMpy3_0(y18, y19, RotVector_32_32[2 * 0 + 0], RotVector_32_32[2 * 0 + 1]);
3117 0 : cplxMpy3_0(y20, y21, RotVector_32_32[2 * 1 + 0], RotVector_32_32[2 * 1 + 1]);
3118 0 : cplxMpy3_0(y22, y23, RotVector_32_32[2 * 2 + 0], RotVector_32_32[2 * 2 + 1]);
3119 0 : cplxMpy3_0(y24, y25, RotVector_32_32[2 * 3 + 0], RotVector_32_32[2 * 3 + 1]);
3120 0 : cplxMpy3_0(y26, y27, RotVector_32_32[2 * 4 + 0], RotVector_32_32[2 * 4 + 1]);
3121 0 : cplxMpy3_0(y28, y29, RotVector_32_32[2 * 5 + 0], RotVector_32_32[2 * 5 + 1]);
3122 0 : cplxMpy3_0(y30, y31, RotVector_32_32[2 * 6 + 0], RotVector_32_32[2 * 6 + 1]);
3123 0 : cplxMpy3_0(y34, y35, RotVector_32_32[2 * 7 + 0], RotVector_32_32[2 * 7 + 1]);
3124 0 : cplxMpy3_0(y36, y37, RotVector_32_32[2 * 8 + 0], RotVector_32_32[2 * 8 + 1]);
3125 0 : cplxMpy3_0(y38, y39, RotVector_32_32[2 * 9 + 0], RotVector_32_32[2 * 9 + 1]);
3126 0 : cplxMpy3_0(y42, y43, RotVector_32_32[2 * 10 + 0], RotVector_32_32[2 * 10 + 1]);
3127 0 : cplxMpy3_0(y44, y45, RotVector_32_32[2 * 11 + 0], RotVector_32_32[2 * 11 + 1]);
3128 0 : cplxMpy3_0(y46, y47, RotVector_32_32[2 * 12 + 0], RotVector_32_32[2 * 12 + 1]);
3129 0 : cplxMpy3_0(y50, y51, RotVector_32_32[2 * 13 + 0], RotVector_32_32[2 * 13 + 1]);
3130 0 : cplxMpy3_0(y52, y53, RotVector_32_32[2 * 14 + 0], RotVector_32_32[2 * 14 + 1]);
3131 0 : cplxMpy3_0(y54, y55, RotVector_32_32[2 * 15 + 0], RotVector_32_32[2 * 15 + 1]);
3132 0 : cplxMpy3_0(y56, y57, RotVector_32_32[2 * 16 + 0], RotVector_32_32[2 * 16 + 1]);
3133 0 : cplxMpy3_0(y58, y59, RotVector_32_32[2 * 17 + 0], RotVector_32_32[2 * 17 + 1]);
3134 0 : cplxMpy3_0(y60, y61, RotVector_32_32[2 * 18 + 0], RotVector_32_32[2 * 18 + 1]);
3135 0 : cplxMpy3_0(y62, y63, RotVector_32_32[2 * 19 + 0], RotVector_32_32[2 * 19 + 1]);
3136 :
3137 : /* 1. FFT4 stage */
3138 :
3139 : /* Pre-additions */
3140 0 : t00 = L_add(y00, y32);
3141 0 : t02 = L_sub(y00, y32);
3142 0 : t01 = L_add(y01, y33);
3143 0 : t03 = L_sub(y01, y33);
3144 0 : t04 = L_add(y16, y48);
3145 0 : t07 = L_sub(y16, y48);
3146 0 : t05 = L_add(y49, y17);
3147 0 : t06 = L_sub(y49, y17);
3148 :
3149 : /* Post-additions */
3150 0 : re[s * 0] = L_add(t00, t04);
3151 0 : move32();
3152 0 : im[s * 0] = L_add(t01, t05);
3153 0 : move32();
3154 0 : re[s * 8] = L_sub(t02, t06);
3155 0 : move32();
3156 0 : im[s * 8] = L_sub(t03, t07);
3157 0 : move32();
3158 0 : re[s * 16] = L_sub(t00, t04);
3159 0 : move32();
3160 0 : im[s * 16] = L_sub(t01, t05);
3161 0 : move32();
3162 0 : re[s * 24] = L_add(t02, t06);
3163 0 : move32();
3164 0 : im[s * 24] = L_add(t03, t07);
3165 0 : move32();
3166 :
3167 : /* 2. FFT4 stage */
3168 :
3169 : /* Pre-additions */
3170 0 : t00 = L_add(y02, y34);
3171 0 : t02 = L_sub(y02, y34);
3172 0 : t01 = L_add(y03, y35);
3173 0 : t03 = L_sub(y03, y35);
3174 0 : t04 = L_add(y18, y50);
3175 0 : t07 = L_sub(y18, y50);
3176 0 : t05 = L_add(y51, y19);
3177 0 : t06 = L_sub(y51, y19);
3178 :
3179 : /* Post-additions */
3180 0 : re[s * 1] = L_add(t00, t04);
3181 0 : move32();
3182 0 : im[s * 1] = L_add(t01, t05);
3183 0 : move32();
3184 0 : re[s * 9] = L_sub(t02, t06);
3185 0 : move32();
3186 0 : im[s * 9] = L_sub(t03, t07);
3187 0 : move32();
3188 0 : re[s * 17] = L_sub(t00, t04);
3189 0 : move32();
3190 0 : im[s * 17] = L_sub(t01, t05);
3191 0 : move32();
3192 0 : re[s * 25] = L_add(t02, t06);
3193 0 : move32();
3194 0 : im[s * 25] = L_add(t03, t07);
3195 0 : move32();
3196 :
3197 : /* 3. FFT4 stage */
3198 :
3199 : /* Pre-additions */
3200 0 : t00 = L_add(y04, y36);
3201 0 : t02 = L_sub(y04, y36);
3202 0 : t01 = L_add(y05, y37);
3203 0 : t03 = L_sub(y05, y37);
3204 0 : t04 = L_add(y20, y52);
3205 0 : t07 = L_sub(y20, y52);
3206 0 : t05 = L_add(y53, y21);
3207 0 : t06 = L_sub(y53, y21);
3208 :
3209 : /* Post-additions */
3210 0 : re[s * 2] = L_add(t00, t04);
3211 0 : move32();
3212 0 : im[s * 2] = L_add(t01, t05);
3213 0 : move32();
3214 0 : re[s * 10] = L_sub(t02, t06);
3215 0 : move32();
3216 0 : im[s * 10] = L_sub(t03, t07);
3217 0 : move32();
3218 0 : re[s * 18] = L_sub(t00, t04);
3219 0 : move32();
3220 0 : im[s * 18] = L_sub(t01, t05);
3221 0 : move32();
3222 0 : re[s * 26] = L_add(t02, t06);
3223 0 : move32();
3224 0 : im[s * 26] = L_add(t03, t07);
3225 0 : move32();
3226 :
3227 : /* 4. FFT4 stage */
3228 :
3229 : /* Pre-additions */
3230 0 : t00 = L_add(y06, y38);
3231 0 : t02 = L_sub(y06, y38);
3232 0 : t01 = L_add(y07, y39);
3233 0 : t03 = L_sub(y07, y39);
3234 0 : t04 = L_add(y22, y54);
3235 0 : t07 = L_sub(y22, y54);
3236 0 : t05 = L_add(y55, y23);
3237 0 : t06 = L_sub(y55, y23);
3238 :
3239 : /* Post-additions */
3240 0 : re[s * 3] = L_add(t00, t04);
3241 0 : move32();
3242 0 : im[s * 3] = L_add(t01, t05);
3243 0 : move32();
3244 0 : re[s * 11] = L_sub(t02, t06);
3245 0 : move32();
3246 0 : im[s * 11] = L_sub(t03, t07);
3247 0 : move32();
3248 0 : re[s * 19] = L_sub(t00, t04);
3249 0 : move32();
3250 0 : im[s * 19] = L_sub(t01, t05);
3251 0 : move32();
3252 0 : re[s * 27] = L_add(t02, t06);
3253 0 : move32();
3254 0 : im[s * 27] = L_add(t03, t07);
3255 0 : move32();
3256 :
3257 : /* 5. FFT4 stage */
3258 :
3259 : /* Pre-additions */
3260 0 : t00 = L_add(y08, y41);
3261 0 : t02 = L_sub(y08, y41);
3262 0 : t01 = L_sub(y09, y40);
3263 0 : t03 = L_add(y09, y40);
3264 0 : t04 = L_add(y24, y56);
3265 0 : t07 = L_sub(y24, y56);
3266 0 : t05 = L_add(y57, y25);
3267 0 : t06 = L_sub(y57, y25);
3268 :
3269 : /* Post-additions */
3270 0 : re[s * 4] = L_add(t00, t04);
3271 0 : move32();
3272 0 : im[s * 4] = L_add(t01, t05);
3273 0 : move32();
3274 0 : re[s * 12] = L_sub(t02, t06);
3275 0 : move32();
3276 0 : im[s * 12] = L_sub(t03, t07);
3277 0 : move32();
3278 0 : re[s * 20] = L_sub(t00, t04);
3279 0 : move32();
3280 0 : im[s * 20] = L_sub(t01, t05);
3281 0 : move32();
3282 0 : re[s * 28] = L_add(t02, t06);
3283 0 : move32();
3284 0 : im[s * 28] = L_add(t03, t07);
3285 0 : move32();
3286 :
3287 : /* 6. FFT4 stage */
3288 :
3289 : /* Pre-additions */
3290 0 : t00 = L_add(y10, y42);
3291 0 : t02 = L_sub(y10, y42);
3292 0 : t01 = L_add(y11, y43);
3293 0 : t03 = L_sub(y11, y43);
3294 0 : t04 = L_add(y26, y58);
3295 0 : t07 = L_sub(y26, y58);
3296 0 : t05 = L_add(y59, y27);
3297 0 : t06 = L_sub(y59, y27);
3298 :
3299 : /* Post-additions */
3300 0 : re[s * 5] = L_add(t00, t04);
3301 0 : move32();
3302 0 : im[s * 5] = L_add(t01, t05);
3303 0 : move32();
3304 0 : re[s * 13] = L_sub(t02, t06);
3305 0 : move32();
3306 0 : im[s * 13] = L_sub(t03, t07);
3307 0 : move32();
3308 0 : re[s * 21] = L_sub(t00, t04);
3309 0 : move32();
3310 0 : im[s * 21] = L_sub(t01, t05);
3311 0 : move32();
3312 0 : re[s * 29] = L_add(t02, t06);
3313 0 : move32();
3314 0 : im[s * 29] = L_add(t03, t07);
3315 0 : move32();
3316 :
3317 : /* 7. FFT4 stage */
3318 :
3319 : /* Pre-additions */
3320 0 : t00 = L_add(y12, y44);
3321 0 : t02 = L_sub(y12, y44);
3322 0 : t01 = L_add(y13, y45);
3323 0 : t03 = L_sub(y13, y45);
3324 0 : t04 = L_add(y28, y60);
3325 0 : t07 = L_sub(y28, y60);
3326 0 : t05 = L_add(y61, y29);
3327 0 : t06 = L_sub(y61, y29);
3328 :
3329 : /* Post-additions */
3330 0 : re[s * 6] = L_add(t00, t04);
3331 0 : move32();
3332 0 : im[s * 6] = L_add(t01, t05);
3333 0 : move32();
3334 0 : re[s * 14] = L_sub(t02, t06);
3335 0 : move32();
3336 0 : im[s * 14] = L_sub(t03, t07);
3337 0 : move32();
3338 0 : re[s * 22] = L_sub(t00, t04);
3339 0 : move32();
3340 0 : im[s * 22] = L_sub(t01, t05);
3341 0 : move32();
3342 0 : re[s * 30] = L_add(t02, t06);
3343 0 : move32();
3344 0 : im[s * 30] = L_add(t03, t07);
3345 0 : move32();
3346 :
3347 : /* 8. FFT4 stage */
3348 :
3349 : /* Pre-additions */
3350 0 : t00 = L_add(y14, y46);
3351 0 : t02 = L_sub(y14, y46);
3352 0 : t01 = L_add(y15, y47);
3353 0 : t03 = L_sub(y15, y47);
3354 0 : t04 = L_add(y30, y62);
3355 0 : t07 = L_sub(y30, y62);
3356 0 : t05 = L_add(y63, y31);
3357 0 : t06 = L_sub(y63, y31);
3358 :
3359 : /* Post-additions */
3360 0 : re[s * 7] = L_add(t00, t04);
3361 0 : move32();
3362 0 : im[s * 7] = L_add(t01, t05);
3363 0 : move32();
3364 0 : re[s * 15] = L_sub(t02, t06);
3365 0 : move32();
3366 0 : im[s * 15] = L_sub(t03, t07);
3367 0 : move32();
3368 0 : re[s * 23] = L_sub(t00, t04);
3369 0 : move32();
3370 0 : im[s * 23] = L_sub(t01, t05);
3371 0 : move32();
3372 0 : re[s * 31] = L_add(t02, t06);
3373 0 : move32();
3374 0 : im[s * 31] = L_add(t03, t07);
3375 0 : move32();
3376 :
3377 : Dyn_Mem_Deluxe_Out();
3378 0 : }
3379 :
3380 : /**
3381 : * \brief Function performs a complex 40-point FFT
3382 : * The FFT is performed inplace. The result of the FFT
3383 : * is scaled by SCALEFACTOR40 bits.
3384 : *
3385 : * \param [i/o] re real part
3386 : * \param [i/o] im imag part
3387 : * \param [i ] sx stride real and imag part
3388 : *
3389 : * \return void
3390 : */
3391 :
3392 :
3393 :
3394 0 : static void fft40(Word32 *re, Word32 *im, Word16 sx, Word32 *x)
3395 : {
3396 : Dyn_Mem_Deluxe_In(const Word32 *W; Word16 dim1, dim2; Counter i, j;
3397 : Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;
3398 : Word32 t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15;
3399 : Word32 s00, s01, s02, s03, s04, s05, s06, s07, s08, s09, s10, s11, s12, s13, s14, s15;);
3400 :
3401 0 : dim1 = 5;
3402 0 : move16();
3403 0 : dim2 = 8;
3404 0 : move16();
3405 :
3406 0 : W = RotVector_40_32;
3407 :
3408 0 : FOR (i = 0; i < dim2; i++)
3409 : {
3410 0 : FOR (j = 0; j < dim1; j++)
3411 : {
3412 0 : x[2 * i * dim1 + 2 * j] = re[sx * i + sx * j * dim2];
3413 0 : move32();
3414 0 : x[2 * i * dim1 + 2 * j + 1] = im[sx * i + sx * j * dim2];
3415 0 : move32();
3416 : }
3417 : }
3418 :
3419 0 : FOR (i = 0; i < dim2; i++)
3420 : {
3421 0 : fft5(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
3422 : }
3423 :
3424 0 : FOR (i = 0; i < dim1; i++)
3425 : {
3426 0 : cplxMpy4_8_1(x00, x01, x[2 * i + 2 * 0 * dim1], x[2 * i + 2 * 0 * dim1 + 1]);
3427 :
3428 0 : IF (i == 0)
3429 : {
3430 0 : cplxMpy4_8_1(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1]);
3431 0 : cplxMpy4_8_1(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1]);
3432 0 : cplxMpy4_8_1(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1]);
3433 0 : cplxMpy4_8_1(x08, x09, x[2 * i + 2 * 4 * dim1], x[2 * i + 2 * 4 * dim1 + 1]);
3434 0 : cplxMpy4_8_1(x10, x11, x[2 * i + 2 * 5 * dim1], x[2 * i + 2 * 5 * dim1 + 1]);
3435 0 : cplxMpy4_8_1(x12, x13, x[2 * i + 2 * 6 * dim1], x[2 * i + 2 * 6 * dim1 + 1]);
3436 0 : cplxMpy4_8_1(x14, x15, x[2 * i + 2 * 7 * dim1], x[2 * i + 2 * 7 * dim1 + 1]);
3437 : }
3438 : ELSE
3439 : {
3440 0 : cplxMpy4_8_2(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1], W[2 * (i - 1) + 0 * 2 * 4],
3441 : W[2 * (i - 1) + 0 * 2 * 4 + 1]);
3442 0 : cplxMpy4_8_2(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1], W[2 * (i - 1) + 1 * 2 * 4],
3443 : W[2 * (i - 1) + 1 * 2 * 4 + 1]);
3444 0 : cplxMpy4_8_2(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1], W[2 * (i - 1) + 2 * 2 * 4],
3445 : W[2 * (i - 1) + 2 * 2 * 4 + 1]);
3446 0 : cplxMpy4_8_2(x08, x09, x[2 * i + 2 * 4 * dim1], x[2 * i + 2 * 4 * dim1 + 1], W[2 * (i - 1) + 3 * 2 * 4],
3447 : W[2 * (i - 1) + 3 * 2 * 4 + 1]);
3448 0 : cplxMpy4_8_2(x10, x11, x[2 * i + 2 * 5 * dim1], x[2 * i + 2 * 5 * dim1 + 1], W[2 * (i - 1) + 4 * 2 * 4],
3449 : W[2 * (i - 1) + 4 * 2 * 4 + 1]);
3450 0 : cplxMpy4_8_2(x12, x13, x[2 * i + 2 * 6 * dim1], x[2 * i + 2 * 6 * dim1 + 1], W[2 * (i - 1) + 5 * 2 * 4],
3451 : W[2 * (i - 1) + 5 * 2 * 4 + 1]);
3452 0 : cplxMpy4_8_2(x14, x15, x[2 * i + 2 * 7 * dim1], x[2 * i + 2 * 7 * dim1 + 1], W[2 * (i - 1) + 6 * 2 * 4],
3453 : W[2 * (i - 1) + 6 * 2 * 4 + 1]);
3454 : }
3455 :
3456 0 : t00 = L_shr_pos(L_add(x00, x08), SCALEFACTORN2 - 1);
3457 0 : t02 = L_shr_pos(L_sub(x00, x08), SCALEFACTORN2 - 1);
3458 0 : t01 = L_shr_pos(L_add(x01, x09), SCALEFACTORN2 - 1);
3459 0 : t03 = L_shr_pos(L_sub(x01, x09), SCALEFACTORN2 - 1);
3460 0 : t04 = L_shr_pos(L_add(x02, x10), SCALEFACTORN2 - 1);
3461 0 : t06 = L_sub(x02, x10);
3462 0 : t05 = L_shr_pos(L_add(x03, x11), SCALEFACTORN2 - 1);
3463 0 : t07 = L_sub(x03, x11);
3464 0 : t08 = L_shr_pos(L_add(x04, x12), SCALEFACTORN2 - 1);
3465 0 : t10 = L_shr_pos(L_sub(x04, x12), SCALEFACTORN2 - 1);
3466 0 : t09 = L_shr_pos(L_add(x05, x13), SCALEFACTORN2 - 1);
3467 0 : t11 = L_shr_pos(L_sub(x05, x13), SCALEFACTORN2 - 1);
3468 0 : t12 = L_shr_pos(L_add(x06, x14), SCALEFACTORN2 - 1);
3469 0 : t14 = L_sub(x06, x14);
3470 0 : t13 = L_shr_pos(L_add(x07, x15), SCALEFACTORN2 - 1);
3471 0 : t15 = L_sub(x07, x15);
3472 :
3473 0 : s00 = L_add(t00, t08);
3474 0 : s04 = L_sub(t00, t08);
3475 0 : s01 = L_add(t01, t09);
3476 0 : s05 = L_sub(t01, t09);
3477 0 : s08 = L_sub(t02, t11);
3478 0 : s10 = L_add(t02, t11);
3479 0 : s09 = L_add(t03, t10);
3480 0 : s11 = L_sub(t03, t10);
3481 0 : s02 = L_add(t04, t12);
3482 0 : s07 = L_sub(t04, t12);
3483 0 : s03 = L_add(t05, t13);
3484 0 : s06 = L_sub(t13, t05);
3485 :
3486 0 : t01 = L_shr_pos(L_add(t06, t14), SCALEFACTORN2 - 1);
3487 0 : t02 = L_shr_pos(L_sub(t06, t14), SCALEFACTORN2 - 1);
3488 0 : t00 = L_shr_pos(L_add(t07, t15), SCALEFACTORN2 - 1);
3489 0 : t03 = L_shr_pos(L_sub(t07, t15), SCALEFACTORN2 - 1);
3490 :
3491 0 : s12 = Mpy_32_32_lc3plus(L_add(t00, t02), C81_32);
3492 0 : s14 = Mpy_32_32_lc3plus(L_sub(t00, t02), C81_32);
3493 0 : s13 = Mpy_32_32_lc3plus(L_sub(t03, t01), C81_32);
3494 0 : s15 = Mpy_32_32_lc3plus(L_add(t01, t03), C82_32);
3495 :
3496 0 : re[sx * i + sx * 0 * dim1] = L_add(s00, s02);
3497 0 : move32();
3498 0 : im[sx * i + sx * 0 * dim1] = L_add(s01, s03);
3499 0 : move32();
3500 0 : re[sx * i + sx * 1 * dim1] = L_add(s10, s12);
3501 0 : move32();
3502 0 : im[sx * i + sx * 1 * dim1] = L_add(s11, s13);
3503 0 : move32();
3504 0 : re[sx * i + sx * 2 * dim1] = L_sub(s04, s06);
3505 0 : move32();
3506 0 : im[sx * i + sx * 2 * dim1] = L_sub(s05, s07);
3507 0 : move32();
3508 0 : re[sx * i + sx * 3 * dim1] = L_add(s08, s14);
3509 0 : move32();
3510 0 : im[sx * i + sx * 3 * dim1] = L_add(s09, s15);
3511 0 : move32();
3512 0 : re[sx * i + sx * 4 * dim1] = L_sub(s00, s02);
3513 0 : move32();
3514 0 : im[sx * i + sx * 4 * dim1] = L_sub(s01, s03);
3515 0 : move32();
3516 0 : re[sx * i + sx * 5 * dim1] = L_sub(s10, s12);
3517 0 : move32();
3518 0 : im[sx * i + sx * 5 * dim1] = L_sub(s11, s13);
3519 0 : move32();
3520 0 : re[sx * i + sx * 6 * dim1] = L_add(s04, s06);
3521 0 : move32();
3522 0 : im[sx * i + sx * 6 * dim1] = L_add(s05, s07);
3523 0 : move32();
3524 0 : re[sx * i + sx * 7 * dim1] = L_sub(s08, s14);
3525 0 : move32();
3526 0 : im[sx * i + sx * 7 * dim1] = L_sub(s09, s15);
3527 0 : move32();
3528 : }
3529 :
3530 : Dyn_Mem_Deluxe_Out();
3531 0 : }
3532 :
3533 : /**
3534 : * \brief Combined FFT
3535 : *
3536 : * \param [i/o] re real part
3537 : * \param [i/o] im imag part
3538 : * \param [i ] W rotation factor
3539 : * \param [i ] dim1 length of fft1
3540 : * \param [i ] dim2 length of fft2
3541 : * \param [i ] sx stride real and imag part
3542 : * \param [i ] sc stride phase rotation coefficients
3543 : * \param [tmp] x 32-bit workbuffer of length=2*len
3544 : * \param [i ] Woff offset for addressing the rotation vector table
3545 : *
3546 : * \return void
3547 : */
3548 :
3549 :
3550 0 : static void fftN2(Word32 *re, Word32 *im,
3551 : #ifdef ENABLE_HR_MODE
3552 : const Word32 *W,
3553 : #else
3554 : const Word16 *W,
3555 : #endif
3556 : Word16 dim1, Word16 dim2, Word16 sx, Word16 sc,
3557 : Word16 Woff
3558 : , Word8 *scratchBuffer
3559 : #ifdef ENABLE_FFT_RESCALE
3560 : , Word16 *scale
3561 : #endif
3562 : )
3563 : {
3564 : Dyn_Mem_Deluxe_In(Counter i, j;);
3565 :
3566 0 : Word32 *x = scratchAlign(scratchBuffer, 0);
3567 :
3568 0 : FOR (i = 0; i < dim2; i++)
3569 : {
3570 0 : FOR (j = 0; j < dim1; j++)
3571 : {
3572 0 : x[2 * i * dim1 + 2 * j] = re[sx * i + sx * j * dim2];
3573 0 : move32();
3574 0 : x[2 * i * dim1 + 2 * j + 1] = im[sx * i + sx * j * dim2];
3575 0 : move32();
3576 : }
3577 : }
3578 :
3579 0 : SWITCH (dim1)
3580 : {
3581 :
3582 0 : case 4:
3583 0 : FOR (i = 0; i < dim2; i++)
3584 : {
3585 0 : fft4(&x[i * 2 * dim1]);
3586 : }
3587 0 : BREAK;
3588 0 : case 8:
3589 0 : FOR (i = 0; i < dim2; i++)
3590 : {
3591 0 : fft8(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
3592 : }
3593 0 : BREAK;
3594 :
3595 0 : case 10:
3596 0 : FOR (i = 0; i < dim2; i++)
3597 : {
3598 0 : fft10(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
3599 : }
3600 0 : BREAK;
3601 0 : case 15:
3602 0 : FOR (i = 0; i < dim2; i++)
3603 : {
3604 0 : fft15(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
3605 : }
3606 :
3607 0 : BREAK;
3608 0 : case 16:
3609 0 : FOR (i = 0; i < dim2; i++)
3610 : {
3611 0 : fft16(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
3612 : }
3613 0 : BREAK;
3614 0 : case 20:
3615 0 : FOR (i = 0; i < dim2; i++)
3616 : {
3617 0 : fft20(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
3618 : }
3619 0 : BREAK;
3620 0 : case 30:
3621 0 : FOR (i = 0; i < dim2; i++)
3622 : {
3623 0 : fft30(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
3624 : }
3625 0 : BREAK;
3626 0 : case 32:
3627 0 : FOR (i = 0; i < dim2; i++)
3628 : {
3629 0 : fft32(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2);
3630 : }
3631 0 : BREAK;
3632 : #ifdef ENABLE_HR_MODE
3633 : #if (defined LC3_FFT15)
3634 : case 60:
3635 : FOR (i = 0; i < dim2; i++)
3636 : {
3637 : #ifndef ENABLE_FFT_RESCALE
3638 : fftN2(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], RotVector_480, 15, 4, sx, 4, 60, scratch);
3639 : #else
3640 : fftN2(&x[i * 2 * dim1], &x[i * 2 * dim1 + 1], RotVector_480, 15, 4, sx, 4, 60, scratch, NULL);
3641 : #endif
3642 : }
3643 : BREAK;
3644 : #endif
3645 : #endif
3646 0 : default: ASSERT(0);
3647 : }
3648 :
3649 : #ifdef ENABLE_FFT_RESCALE
3650 0 : IF (scale)
3651 : {
3652 0 : *scale = s_max(sub(getScaleFactor32_lc3plus(x, dim1 * dim2 * 2), FFT_RESCALE_HR), 0); move16();
3653 :
3654 : #if defined(FUNCTION_scaleValues_32)
3655 : scaleValues_32(x, dim1 * dim2, *scale);
3656 : #else
3657 0 : FOR (i = 0; i < dim1 * dim2 * 2; i++)
3658 : {
3659 0 : x[i] = L_shl_pos(x[i], *scale); move32();
3660 : }
3661 : }
3662 : #endif
3663 : #endif
3664 :
3665 0 : SWITCH (dim2)
3666 : {
3667 0 : case 4:
3668 : {
3669 : Word32 x00, x01, x02, x03, x04, x05, x06, x07;
3670 : Word32 t00, t01, t02, t03, t04, t05, t06, t07;
3671 :
3672 0 : j = add(8, 0);
3673 0 : FOR (i = 0; i < dim1; i++)
3674 : {
3675 0 : cplxMpy4_4_1(x00, x01, x[2 * i + 2 * 0 * dim1], x[2 * i + 2 * 0 * dim1 + 1]);
3676 0 : IF (i == 0)
3677 : {
3678 0 : cplxMpy4_4_1(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1]);
3679 0 : cplxMpy4_4_1(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1]);
3680 0 : cplxMpy4_4_1(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1]);
3681 : }
3682 : ELSE
3683 : {
3684 0 : cplxMpy4_4_0(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1],
3685 : W[sc * i + j * 1 * dim1 - Woff], W[sc * i + j * 1 * dim1 + 1 - Woff]);
3686 0 : cplxMpy4_4_0(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1],
3687 : W[sc * i + j * 2 * dim1 - Woff], W[sc * i + j * 2 * dim1 + 1 - Woff]);
3688 0 : cplxMpy4_4_0(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1],
3689 : W[sc * i + j * 3 * dim1 - Woff], W[sc * i + j * 3 * dim1 + 1 - Woff]);
3690 : }
3691 :
3692 0 : t00 = L_add(x00, x04);
3693 0 : t02 = L_sub(x00, x04);
3694 0 : t01 = L_add(x01, x05);
3695 0 : t03 = L_sub(x01, x05);
3696 0 : t04 = L_add(x02, x06);
3697 0 : t07 = L_sub(x02, x06);
3698 0 : t05 = L_add(x07, x03);
3699 0 : t06 = L_sub(x07, x03);
3700 :
3701 0 : re[sx * i + sx * 0 * dim1] = L_add(t00, t04);
3702 0 : move32();
3703 0 : im[sx * i + sx * 0 * dim1] = L_add(t01, t05);
3704 0 : move32();
3705 0 : re[sx * i + sx * 1 * dim1] = L_sub(t02, t06);
3706 0 : move32();
3707 0 : im[sx * i + sx * 1 * dim1] = L_sub(t03, t07);
3708 0 : move32();
3709 0 : re[sx * i + sx * 2 * dim1] = L_sub(t00, t04);
3710 0 : move32();
3711 0 : im[sx * i + sx * 2 * dim1] = L_sub(t01, t05);
3712 0 : move32();
3713 0 : re[sx * i + sx * 3 * dim1] = L_add(t02, t06);
3714 0 : move32();
3715 0 : im[sx * i + sx * 3 * dim1] = L_add(t03, t07);
3716 0 : move32();
3717 : }
3718 :
3719 0 : BREAK;
3720 : }
3721 :
3722 0 : case 6:
3723 : {
3724 : Word32 y[2 * 10];
3725 0 : FOR (j = 0; j < dim2; j++)
3726 : {
3727 0 : cplxMpy4_12_1(y[2 * j], y[2 * j + 1], x[2 * 0 + 2 * j * dim1], x[2 * 0 + 2 * j * dim1 + 1]);
3728 : }
3729 0 : fft6(&y[0], &y[1], 2);
3730 0 : FOR (j = 0; j < dim2; j++)
3731 : {
3732 0 : re[sx * 0 + sx * j * dim1] = y[2 * j];
3733 0 : move32();
3734 0 : im[sx * 0 + sx * j * dim1] = y[2 * j + 1];
3735 0 : move32();
3736 : }
3737 :
3738 0 : FOR (i = 1; i < dim1; i++)
3739 : {
3740 0 : cplxMpy4_12_1(y[2 * (0 + 0)], y[2 * (0 + 0) + 1], x[2 * i + 2 * (0 + 0) * dim1],
3741 : x[2 * i + 2 * (0 + 0) * dim1 + 1]);
3742 0 : cplxMpy4_12_0(y[2 * (0 + 1)], y[2 * (0 + 1) + 1], x[2 * i + 2 * (0 + 1) * dim1],
3743 : x[2 * i + 2 * (0 + 1) * dim1 + 1], W[sc * i + sc * (0 + 1) * dim1 - Woff],
3744 : W[sc * i + sc * (0 + 1) * dim1 + 1 - Woff]);
3745 0 : FOR (j = 2; j < dim2; j = j + 2)
3746 : {
3747 0 : cplxMpy4_12_0(y[2 * (j + 0)], y[2 * (j + 0) + 1], x[2 * i + 2 * (j + 0) * dim1],
3748 : x[2 * i + 2 * (j + 0) * dim1 + 1], W[sc * i + sc * (j + 0) * dim1 - Woff],
3749 : W[sc * i + sc * (j + 0) * dim1 + 1 - Woff]);
3750 0 : cplxMpy4_12_0(y[2 * (j + 1)], y[2 * (j + 1) + 1], x[2 * i + 2 * (j + 1) * dim1],
3751 : x[2 * i + 2 * (j + 1) * dim1 + 1], W[sc * i + sc * (j + 1) * dim1 - Woff],
3752 : W[sc * i + sc * (j + 1) * dim1 + 1 - Woff]);
3753 : }
3754 0 : fft6(&y[0], &y[1], 2);
3755 0 : FOR (j = 0; j < dim2; j++)
3756 : {
3757 0 : re[sx * i + sx * j * dim1] = y[2 * j];
3758 0 : move32();
3759 0 : im[sx * i + sx * j * dim1] = y[2 * j + 1];
3760 0 : move32();
3761 : }
3762 : }
3763 0 : BREAK;
3764 : }
3765 :
3766 0 : case 8:
3767 : {
3768 : Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;
3769 : Word32 t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15;
3770 : Word32 s00, s01, s02, s03, s04, s05, s06, s07, s08, s09, s10, s11, s12, s13, s14, s15;
3771 :
3772 0 : FOR (i = 0; i < dim1; i++)
3773 : {
3774 0 : cplxMpy4_8_1(x00, x01, x[2 * i + 2 * 0 * dim1], x[2 * i + 2 * 0 * dim1 + 1]);
3775 0 : IF (i == 0)
3776 : {
3777 0 : cplxMpy4_8_1(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1]);
3778 0 : cplxMpy4_8_1(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1]);
3779 0 : cplxMpy4_8_1(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1]);
3780 0 : cplxMpy4_8_1(x08, x09, x[2 * i + 2 * 4 * dim1], x[2 * i + 2 * 4 * dim1 + 1]);
3781 0 : cplxMpy4_8_1(x10, x11, x[2 * i + 2 * 5 * dim1], x[2 * i + 2 * 5 * dim1 + 1]);
3782 0 : cplxMpy4_8_1(x12, x13, x[2 * i + 2 * 6 * dim1], x[2 * i + 2 * 6 * dim1 + 1]);
3783 0 : cplxMpy4_8_1(x14, x15, x[2 * i + 2 * 7 * dim1], x[2 * i + 2 * 7 * dim1 + 1]);
3784 : }
3785 : ELSE
3786 : {
3787 0 : cplxMpy4_8_0(x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1],
3788 : W[sc * i + sc * 1 * dim1 - Woff], W[sc * i + sc * 1 * dim1 + 1 - Woff]);
3789 0 : cplxMpy4_8_0(x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1],
3790 : W[sc * i + sc * 2 * dim1 - Woff], W[sc * i + sc * 2 * dim1 + 1 - Woff]);
3791 0 : cplxMpy4_8_0(x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1],
3792 : W[sc * i + sc * 3 * dim1 - Woff], W[sc * i + sc * 3 * dim1 + 1 - Woff]);
3793 0 : cplxMpy4_8_0(x08, x09, x[2 * i + 2 * 4 * dim1], x[2 * i + 2 * 4 * dim1 + 1],
3794 : W[sc * i + sc * 4 * dim1 - Woff], W[sc * i + sc * 4 * dim1 + 1 - Woff]);
3795 0 : cplxMpy4_8_0(x10, x11, x[2 * i + 2 * 5 * dim1], x[2 * i + 2 * 5 * dim1 + 1],
3796 : W[sc * i + sc * 5 * dim1 - Woff], W[sc * i + sc * 5 * dim1 + 1 - Woff]);
3797 0 : cplxMpy4_8_0(x12, x13, x[2 * i + 2 * 6 * dim1], x[2 * i + 2 * 6 * dim1 + 1],
3798 : W[sc * i + sc * 6 * dim1 - Woff], W[sc * i + sc * 6 * dim1 + 1 - Woff]);
3799 0 : cplxMpy4_8_0(x14, x15, x[2 * i + 2 * 7 * dim1], x[2 * i + 2 * 7 * dim1 + 1],
3800 : W[sc * i + sc * 7 * dim1 - Woff], W[sc * i + sc * 7 * dim1 + 1 - Woff]);
3801 : }
3802 :
3803 0 : t00 = L_shr_pos(L_add(x00, x08), SCALEFACTORN2 - 1);
3804 0 : t02 = L_shr_pos(L_sub(x00, x08), SCALEFACTORN2 - 1);
3805 0 : t01 = L_shr_pos(L_add(x01, x09), SCALEFACTORN2 - 1);
3806 0 : t03 = L_shr_pos(L_sub(x01, x09), SCALEFACTORN2 - 1);
3807 0 : t04 = L_shr_pos(L_add(x02, x10), SCALEFACTORN2 - 1);
3808 0 : t06 = L_sub(x02, x10);
3809 0 : t05 = L_shr_pos(L_add(x03, x11), SCALEFACTORN2 - 1);
3810 0 : t07 = L_sub(x03, x11);
3811 0 : t08 = L_shr_pos(L_add(x04, x12), SCALEFACTORN2 - 1);
3812 0 : t10 = L_shr_pos(L_sub(x04, x12), SCALEFACTORN2 - 1);
3813 0 : t09 = L_shr_pos(L_add(x05, x13), SCALEFACTORN2 - 1);
3814 0 : t11 = L_shr_pos(L_sub(x05, x13), SCALEFACTORN2 - 1);
3815 0 : t12 = L_shr_pos(L_add(x06, x14), SCALEFACTORN2 - 1);
3816 0 : t14 = L_sub(x06, x14);
3817 0 : t13 = L_shr_pos(L_add(x07, x15), SCALEFACTORN2 - 1);
3818 0 : t15 = L_sub(x07, x15);
3819 :
3820 0 : s00 = L_add(t00, t08);
3821 0 : s04 = L_sub(t00, t08);
3822 0 : s01 = L_add(t01, t09);
3823 0 : s05 = L_sub(t01, t09);
3824 0 : s08 = L_sub(t02, t11);
3825 0 : s10 = L_add(t02, t11);
3826 0 : s09 = L_add(t03, t10);
3827 0 : s11 = L_sub(t03, t10);
3828 0 : s02 = L_add(t04, t12);
3829 0 : s07 = L_sub(t04, t12);
3830 0 : s03 = L_add(t05, t13);
3831 0 : s06 = L_sub(t13, t05);
3832 :
3833 0 : t01 = L_shr_pos(L_add(t06, t14), SCALEFACTORN2 - 1);
3834 0 : t02 = L_shr_pos(L_sub(t06, t14), SCALEFACTORN2 - 1);
3835 0 : t00 = L_shr_pos(L_add(t07, t15), SCALEFACTORN2 - 1);
3836 0 : t03 = L_shr_pos(L_sub(t07, t15), SCALEFACTORN2 - 1);
3837 :
3838 0 : s12 = Mpy_32_xx(L_add(t00, t02), C81);
3839 0 : s14 = Mpy_32_xx(L_sub(t00, t02), C81);
3840 0 : s13 = Mpy_32_xx(L_sub(t03, t01), C81);
3841 0 : s15 = Mpy_32_xx(L_add(t01, t03), C82);
3842 :
3843 0 : re[sx * i + sx * 0 * dim1] = L_add(s00, s02);
3844 0 : move32();
3845 0 : im[sx * i + sx * 0 * dim1] = L_add(s01, s03);
3846 0 : move32();
3847 0 : re[sx * i + sx * 1 * dim1] = L_add(s10, s12);
3848 0 : move32();
3849 0 : im[sx * i + sx * 1 * dim1] = L_add(s11, s13);
3850 0 : move32();
3851 0 : re[sx * i + sx * 2 * dim1] = L_sub(s04, s06);
3852 0 : move32();
3853 0 : im[sx * i + sx * 2 * dim1] = L_sub(s05, s07);
3854 0 : move32();
3855 0 : re[sx * i + sx * 3 * dim1] = L_add(s08, s14);
3856 0 : move32();
3857 0 : im[sx * i + sx * 3 * dim1] = L_add(s09, s15);
3858 0 : move32();
3859 0 : re[sx * i + sx * 4 * dim1] = L_sub(s00, s02);
3860 0 : move32();
3861 0 : im[sx * i + sx * 4 * dim1] = L_sub(s01, s03);
3862 0 : move32();
3863 0 : re[sx * i + sx * 5 * dim1] = L_sub(s10, s12);
3864 0 : move32();
3865 0 : im[sx * i + sx * 5 * dim1] = L_sub(s11, s13);
3866 0 : move32();
3867 0 : re[sx * i + sx * 6 * dim1] = L_add(s04, s06);
3868 0 : move32();
3869 0 : im[sx * i + sx * 6 * dim1] = L_add(s05, s07);
3870 0 : move32();
3871 0 : re[sx * i + sx * 7 * dim1] = L_sub(s08, s14);
3872 0 : move32();
3873 0 : im[sx * i + sx * 7 * dim1] = L_sub(s09, s15);
3874 0 : move32();
3875 : }
3876 0 : BREAK;
3877 : }
3878 :
3879 0 : case 12:
3880 : {
3881 : Word32 y[2 * 20];
3882 0 : FOR (j = 0; j < dim2; j++)
3883 : {
3884 0 : cplxMpy4_12_1(y[2 * j], y[2 * j + 1], x[2 * 0 + 2 * j * dim1], x[2 * 0 + 2 * j * dim1 + 1]);
3885 : }
3886 0 : fft12(y);
3887 0 : FOR (j = 0; j < dim2; j++)
3888 : {
3889 0 : re[sx * 0 + sx * j * dim1] = y[2 * j];
3890 0 : move32();
3891 0 : im[sx * 0 + sx * j * dim1] = y[2 * j + 1];
3892 0 : move32();
3893 : }
3894 :
3895 0 : FOR (i = 1; i < dim1; i++)
3896 : {
3897 0 : cplxMpy4_12_1(y[2 * (0 + 0)], y[2 * (0 + 0) + 1], x[2 * i + 2 * (0 + 0) * dim1],
3898 : x[2 * i + 2 * (0 + 0) * dim1 + 1]);
3899 0 : cplxMpy4_12_0(y[2 * (0 + 1)], y[2 * (0 + 1) + 1], x[2 * i + 2 * (0 + 1) * dim1],
3900 : x[2 * i + 2 * (0 + 1) * dim1 + 1], W[sc * i + sc * (0 + 1) * dim1 - Woff],
3901 : W[sc * i + sc * (0 + 1) * dim1 + 1 - Woff]);
3902 0 : FOR (j = 2; j < dim2; j = j + 2)
3903 : {
3904 0 : cplxMpy4_12_0(y[2 * (j + 0)], y[2 * (j + 0) + 1], x[2 * i + 2 * (j + 0) * dim1],
3905 : x[2 * i + 2 * (j + 0) * dim1 + 1], W[sc * i + sc * (j + 0) * dim1 - Woff],
3906 : W[sc * i + sc * (j + 0) * dim1 + 1 - Woff]);
3907 0 : cplxMpy4_12_0(y[2 * (j + 1)], y[2 * (j + 1) + 1], x[2 * i + 2 * (j + 1) * dim1],
3908 : x[2 * i + 2 * (j + 1) * dim1 + 1], W[sc * i + sc * (j + 1) * dim1 - Woff],
3909 : W[sc * i + sc * (j + 1) * dim1 + 1 - Woff]);
3910 : }
3911 0 : fft12(y);
3912 0 : FOR (j = 0; j < dim2; j++)
3913 : {
3914 0 : re[sx * i + sx * j * dim1] = y[2 * j];
3915 0 : move32();
3916 0 : im[sx * i + sx * j * dim1] = y[2 * j + 1];
3917 0 : move32();
3918 : }
3919 : }
3920 0 : BREAK;
3921 : }
3922 :
3923 : #if defined(ENABLE_HR_MODE)
3924 0 : case 16:
3925 : {
3926 : Word32 y[2 * 20];
3927 0 : FOR (j = 0; j < dim2; j++)
3928 : {
3929 0 : cplxMpy4_16_1(y[2 * j], y[2 * j + 1], x[2 * 0 + 2 * j * dim1], x[2 * 0 + 2 * j * dim1 + 1]);
3930 : }
3931 :
3932 0 : fft16(&y[0], &y[1], 2);
3933 0 : FOR (j = 0; j < dim2; j++)
3934 : {
3935 0 : re[sx * 0 + sx * j * dim1] = y[2 * j];
3936 0 : move32();
3937 0 : im[sx * 0 + sx * j * dim1] = y[2 * j + 1];
3938 0 : move32();
3939 : }
3940 :
3941 0 : FOR (i = 1; i < dim1; i++)
3942 : {
3943 0 : cplxMpy4_16_1(y[2 * (0 + 0)], y[2 * (0 + 0) + 1], x[2 * i + 2 * (0 + 0) * dim1],
3944 : x[2 * i + 2 * (0 + 0) * dim1 + 1]);
3945 0 : cplxMpy4_16_0(y[2 * (0 + 1)], y[2 * (0 + 1) + 1], x[2 * i + 2 * (0 + 1) * dim1],
3946 : x[2 * i + 2 * (0 + 1) * dim1 + 1], W[sc * i + sc * (0 + 1) * dim1 - Woff],
3947 : W[sc * i + sc * (0 + 1) * dim1 + 1 - Woff]);
3948 0 : FOR (j = 2; j < dim2; j = j + 2)
3949 : {
3950 0 : cplxMpy4_16_0(y[2 * (j + 0)], y[2 * (j + 0) + 1], x[2 * i + 2 * (j + 0) * dim1],
3951 : x[2 * i + 2 * (j + 0) * dim1 + 1], W[sc * i + sc * (j + 0) * dim1 - Woff],
3952 : W[sc * i + sc * (j + 0) * dim1 + 1 - Woff]);
3953 0 : cplxMpy4_16_0(y[2 * (j + 1)], y[2 * (j + 1) + 1], x[2 * i + 2 * (j + 1) * dim1],
3954 : x[2 * i + 2 * (j + 1) * dim1 + 1], W[sc * i + sc * (j + 1) * dim1 - Woff],
3955 : W[sc * i + sc * (j + 1) * dim1 + 1 - Woff]);
3956 : }
3957 0 : fft16(&y[0], &y[1], 2);
3958 0 : FOR (j = 0; j < dim2; j++)
3959 : {
3960 0 : re[sx * i + sx * j * dim1] = y[2 * j];
3961 0 : move32();
3962 0 : im[sx * i + sx * j * dim1] = y[2 * j + 1];
3963 0 : move32();
3964 : }
3965 : }
3966 0 : BREAK;
3967 : }
3968 : #endif
3969 0 : default: ASSERT(0);
3970 : }
3971 :
3972 : Dyn_Mem_Deluxe_Out();
3973 0 : }
3974 :
3975 : /**
3976 : * \brief Complex valued FFT
3977 : *
3978 : * \param [i/o] re real part
3979 : * \param [i/o] im imag part
3980 : * \param [i ] sizeOfFft length of fft
3981 : * \param [i ] s stride real and imag part
3982 : * \param [i ] scale scalefactor
3983 : *
3984 : * \return void
3985 : */
3986 :
3987 :
3988 :
3989 : /* x is the scratch buffer */
3990 0 : void BASOP_cfft_lc3plus(Word32 *re, Word32 *im, Word16 length, Word16 s, Word16 *scale, Word32 *x)
3991 : {
3992 : #if (defined ENABLE_FFT_RESCALE) && ((defined LC3_FFT30) || (defined ENABLE_HR_MODE))
3993 0 : Word16 fftN2scale = 0;
3994 : #endif
3995 :
3996 : #ifdef ENABLE_HR_MODE
3997 0 : Word8 scratch[6128] = {0};
3998 : #else
3999 : Word8 scratch[4068] = {0};
4000 : #endif
4001 :
4002 0 : SWITCH (length)
4003 : {
4004 :
4005 0 : case 10:
4006 0 : fft10(re, im, s);
4007 0 : *scale = add(*scale, SCALEFACTOR10);
4008 0 : move16();
4009 0 : BREAK;
4010 0 : case 16:
4011 0 : fft16(re, im, s);
4012 0 : *scale = add(*scale, SCALEFACTOR16);
4013 0 : move16();
4014 0 : BREAK;
4015 0 : case 20:
4016 0 : fft20(re, im, s);
4017 0 : *scale = add(*scale, SCALEFACTOR20);
4018 0 : move16();
4019 0 : BREAK;
4020 0 : case 30:
4021 0 : fft30(re, im, s);
4022 0 : *scale = add(*scale, SCALEFACTOR30);
4023 0 : move16();
4024 0 : BREAK;
4025 0 : case 32:
4026 0 : fft32(re, im, s);
4027 0 : *scale = add(*scale, SCALEFACTOR32);
4028 0 : move16();
4029 0 : BREAK;
4030 0 : case 40:
4031 0 : fft40(re, im, s, x);
4032 0 : *scale = add(*scale, SCALEFACTOR40);
4033 0 : move16();
4034 0 : BREAK;
4035 0 : case 48:
4036 : #ifndef ENABLE_FFT_RESCALE
4037 : fftN2(re, im, RotVector_32_12, 4, 12, s, 16, 64, scratch);
4038 : #else
4039 0 : fftN2(re, im, RotVector_32_12, 4, 12, s, 16, 64, scratch, NULL);
4040 : #endif
4041 0 : *scale = add(*scale, SCALEFACTOR48);
4042 0 : move16();
4043 0 : BREAK;
4044 0 : case 60:
4045 : #ifndef ENABLE_FFT_RESCALE
4046 : fftN2(re, im, RotVector_480, 15, 4, s, 4, 60, scratch);
4047 : #else
4048 0 : fftN2(re, im, RotVector_480, 15, 4, s, 4, 60, scratch, NULL);
4049 : #endif
4050 0 : *scale = add(*scale, SCALEFACTOR60);
4051 0 : move16();
4052 0 : BREAK;
4053 0 : case 64:
4054 : #ifndef ENABLE_FFT_RESCALE
4055 : fftN2(re, im, RotVector_32_8, 8, 8, s, 8, 64, scratch);
4056 : #else
4057 0 : fftN2(re, im, RotVector_32_8, 8, 8, s, 8, 64, scratch, NULL);
4058 : #endif
4059 0 : *scale = add(*scale, SCALEFACTOR64);
4060 0 : move16();
4061 0 : BREAK;
4062 0 : case 80:
4063 : #ifndef ENABLE_FFT_RESCALE
4064 : fftN2(re, im, RotVector_320, 10, 8, s, 4, 40, scratch);
4065 : #else
4066 0 : fftN2(re, im, RotVector_320, 10, 8, s, 4, 40, scratch, NULL);
4067 : #endif
4068 0 : *scale = add(*scale, SCALEFACTOR80);
4069 0 : move16();
4070 0 : BREAK;
4071 0 : case 90:
4072 : #ifndef ENABLE_FFT_RESCALE
4073 : fftN2(re, im, RotVector_15_6, 15, 6, s, 2, 30, scratch);
4074 : #else
4075 0 : fftN2(re, im, RotVector_15_6, 15, 6, s, 2, 30, scratch, NULL);
4076 : #endif
4077 0 : *scale = add(*scale, SCALEFACTOR90);
4078 0 : move16();
4079 0 : BREAK;
4080 :
4081 0 : case 120:
4082 : #ifndef ENABLE_FFT_RESCALE
4083 : fftN2(re, im, RotVector_480, 15, 8, s, 4, 60, scratch);
4084 : #else
4085 0 : fftN2(re, im, RotVector_480, 15, 8, s, 4, 60, scratch, NULL);
4086 : #endif
4087 0 : *scale = add(*scale, SCALEFACTOR120);
4088 0 : move16();
4089 0 : BREAK;
4090 0 : case 128:
4091 : #ifndef ENABLE_FFT_RESCALE
4092 : fftN2(re, im, RotVector_32_8, 16, 8, s, 4, 64, scratch);
4093 : #else
4094 0 : fftN2(re, im, RotVector_32_8, 16, 8, s, 4, 64, scratch, NULL);
4095 : #endif
4096 0 : *scale = add(*scale, SCALEFACTOR128);
4097 0 : move16();
4098 0 : BREAK;
4099 0 : case 160:
4100 : #ifndef ENABLE_FFT_RESCALE
4101 : fftN2(re, im, RotVector_320, 20, 8, s, 2, 40, scratch);
4102 : #else
4103 0 : fftN2(re, im, RotVector_320, 20, 8, s, 2, 40, scratch, NULL);
4104 : #endif
4105 0 : *scale = add(*scale, SCALEFACTOR160);
4106 0 : move16();
4107 0 : BREAK;
4108 0 : case 180:
4109 : #ifndef ENABLE_FFT_RESCALE
4110 : fftN2(re, im, RotVector_360, 15, 12, s, 4, 60, scratch);
4111 : *scale = add(*scale, SCALEFACTOR180);
4112 : #else
4113 0 : fftN2(re, im, RotVector_360, 15, 12, s, 4, 60, scratch, &fftN2scale);
4114 0 : *scale = add(*scale, SCALEFACTOR180);
4115 0 : *scale = sub(*scale, fftN2scale); move16();
4116 : #endif
4117 :
4118 0 : move16();
4119 0 : BREAK;
4120 0 : case 192:
4121 : #ifndef ENABLE_FFT_RESCALE
4122 : fftN2(re, im, RotVector_32_12, 16, 12, s, 4, 64, scratch);
4123 : #else
4124 0 : fftN2(re, im, RotVector_32_12, 16, 12, s, 4, 64, scratch, NULL);
4125 : #endif
4126 0 : *scale = add(*scale, SCALEFACTOR192);
4127 0 : move16();
4128 0 : BREAK;
4129 0 : case 240:
4130 : #ifndef ENABLE_FFT_RESCALE
4131 : fftN2(re, im, RotVector_480, 30, 8, s, 2, 60, scratch);
4132 : *scale = add(*scale, SCALEFACTOR240);
4133 : #else
4134 0 : fftN2(re, im, RotVector_480, 30, 8, s, 2, 60, scratch, &fftN2scale);
4135 0 : *scale = add(*scale, SCALEFACTOR240);
4136 0 : *scale = sub(*scale, fftN2scale); move16();
4137 : #endif
4138 0 : move16();
4139 0 : BREAK;
4140 0 : case 256:
4141 : #ifndef ENABLE_FFT_RESCALE
4142 : fftN2(re, im, RotVector_32_8, 32, 8, s, 2, 64, scratch);
4143 : #else
4144 0 : fftN2(re, im, RotVector_32_8, 32, 8, s, 2, 64, scratch, NULL);
4145 : #endif
4146 0 : *scale = add(*scale, SCALEFACTOR256);
4147 0 : move16();
4148 0 : BREAK;
4149 0 : case 384:
4150 : #ifndef ENABLE_FFT_RESCALE
4151 : fftN2(re, im, RotVector_32_12, 32, 12, s, 2, 64, scratch);
4152 : #else
4153 0 : fftN2(re, im, RotVector_32_12, 32, 12, s, 2, 64, scratch, NULL);
4154 : #endif
4155 0 : *scale = add(*scale, SCALEFACTOR384);
4156 0 : move16();
4157 0 : BREAK;
4158 : #ifdef ENABLE_HR_MODE
4159 0 : case 360:
4160 0 : fftN2(re, im, RotVector_720, 30, 12, s, 2, 60, scratch, &fftN2scale);
4161 0 : *scale = add(*scale, SCALEFACTOR360); move16();
4162 0 : *scale = sub(*scale, fftN2scale); move16();
4163 0 : BREAK;
4164 0 : case 480:
4165 : #ifndef ENABLE_FFT_RESCALE
4166 : #ifndef ENABLE_FFT_30X16
4167 : fftN2(re, im, RotVector_960, 60, 8, s, 2, 120, scratch);
4168 : #else
4169 : fftN2(re, im, RotVector_30_16, 30, 16, s, 2, 60, scratch);
4170 : #endif
4171 : *scale = add(*scale, SCALEFACTOR480); move16();
4172 : #else
4173 : #ifndef ENABLE_FFT_30X16
4174 : fftN2(re, im, RotVector_960, 60, 8, s, 2, 120, scratch, &fftN2scale);
4175 : #else
4176 0 : fftN2(re, im, RotVector_30_16, 30, 16, s, 2, 60, scratch, &fftN2scale);
4177 : #endif
4178 0 : *scale = add(*scale, SCALEFACTOR480); move16();
4179 0 : *scale = sub(*scale, fftN2scale); move16();
4180 : #endif
4181 0 : BREAK;
4182 : #endif
4183 0 : default: ASSERT(0);
4184 : }
4185 0 : }
4186 :
4187 :
4188 : #define RFFT_TWIDDLE1(x, t1, t2, t3, t4, w1, w2, xb0, xb1, xt0, xt1) \
4189 : do \
4190 : { \
4191 : xb0 = L_shr_pos(x[2 * i + 0], 2); \
4192 : xb1 = L_shr_pos(x[2 * i + 1], 2); \
4193 : xt0 = L_shr_pos(x[sizeOfFft - 2 * i + 0], 2); \
4194 : xt1 = L_shr_pos(x[sizeOfFft - 2 * i + 1], 2); \
4195 : t1 = L_sub(xb0, xt0); \
4196 : t2 = L_add(xb1, xt1); \
4197 : t3 = L_sub(Mpy_32_32_lc3plus(t1, w1), Mpy_32_32_lc3plus(t2, w2)); \
4198 : t4 = L_add(Mpy_32_32_lc3plus(t1, w2), Mpy_32_32_lc3plus(t2, w1)); \
4199 : t1 = L_add(xb0, xt0); \
4200 : t2 = L_sub(xb1, xt1); \
4201 : } while (0)
4202 :
4203 : #define RFFT_TWIDDLE2(x, t1, t2, t3, t4, w1, w2, xb0, xb1, xt0, xt1) \
4204 : do \
4205 : { \
4206 : xb0 = L_shr_pos(x[2 * i + 0], 2); \
4207 : xb1 = L_shr_pos(x[2 * i + 1], 2); \
4208 : xt0 = L_shr_pos(x[sizeOfFft - 2 * i + 0], 2); \
4209 : xt1 = L_shr_pos(x[sizeOfFft - 2 * i + 1], 2); \
4210 : t1 = L_sub(xb0, xt0); \
4211 : t2 = L_add(xb1, xt1); \
4212 : t3 = L_add(Mpy_32_32_lc3plus(t1, w1), Mpy_32_32_lc3plus(t2, w2)); \
4213 : t4 = L_sub(Mpy_32_32_lc3plus(t2, w1), Mpy_32_32_lc3plus(t1, w2)); \
4214 : t1 = L_add(xb0, xt0); \
4215 : t2 = L_sub(xb1, xt1); \
4216 : } while (0)
4217 :
4218 :
4219 :
4220 0 : static const Word32 *rfft_twid(int size)
4221 : {
4222 0 : SWITCH (size)
4223 : {
4224 0 : case 32: return RealFFT32_twid;
4225 0 : case 40: return RealFFT40_twid;
4226 0 : case 64: return RealFFT64_twid;
4227 0 : case 80: return RealFFT80_twid;
4228 0 : case 96: return RealFFT96_twid;
4229 0 : case 128: return RealFFT128_twid;
4230 0 : case 192: return RealFFT192_twid;
4231 0 : case 256: return RealFFT256_twid;
4232 0 : case 384: return RealFFT384_twid;
4233 0 : case 512: return RealFFT512_twid;
4234 0 : case 768: return RealFFT768_twid;
4235 0 : default: ASSERT(0);
4236 : }
4237 : return NULL;
4238 : }
4239 :
4240 :
4241 0 : void BASOP_rfftN(Word32 *x, Word16 sizeOfFft, Word16 *scale, Word8 *scratchBuffer)
4242 : {
4243 : Dyn_Mem_Deluxe_In(Counter i; Word16 sizeOfFft2, sizeOfFft4, sizeOfFft8; Word32 t1, t2, t3, t4, xb0, xb1, xt0, xt1;
4244 : Word32 * workBuffer; const Word32 *w32;);
4245 :
4246 0 : workBuffer = (Word32 *)scratchAlign(scratchBuffer, 0); /* Size = 4 * sizeOfFft */
4247 0 : w32 = rfft_twid(sizeOfFft);
4248 :
4249 0 : sizeOfFft2 = shr_pos(sizeOfFft, 1);
4250 0 : sizeOfFft4 = shr_pos(sizeOfFft, 2);
4251 0 : sizeOfFft8 = shr_pos(sizeOfFft, 3);
4252 :
4253 0 : BASOP_cfft_lc3plus(&x[0], &x[1], sizeOfFft2, 2, scale, workBuffer);
4254 :
4255 0 : xb0 = L_shr_pos(x[0], 1);
4256 0 : xb1 = L_shr_pos(x[1], 1);
4257 0 : x[0] = L_add(xb0, xb1);
4258 0 : move32();
4259 0 : x[1] = L_sub(xb0, xb1);
4260 0 : move32();
4261 :
4262 0 : FOR (i = 1; i < sizeOfFft8; i++)
4263 : {
4264 0 : RFFT_TWIDDLE1(x, t1, t2, t3, t4, w32[2 * i + 1], w32[2 * i], xb0, xb1, xt0, xt1);
4265 0 : x[2 * i] = L_sub(t1, t3);
4266 0 : move32();
4267 0 : x[2 * i + 1] = L_sub(t2, t4);
4268 0 : move32();
4269 0 : x[sizeOfFft - 2 * i] = L_add(t1, t3);
4270 0 : move32();
4271 0 : x[sizeOfFft - 2 * i + 1] = L_negate(L_add(t2, t4));
4272 0 : move32();
4273 : }
4274 :
4275 0 : FOR (i = sizeOfFft8; i < sizeOfFft4; i++)
4276 : {
4277 0 : RFFT_TWIDDLE1(x, t1, t2, t3, t4, w32[(2 * sizeOfFft4 - 2 * i)], w32[(2 * sizeOfFft4 - 2 * i + 1)], xb0, xb1,
4278 : xt0, xt1);
4279 0 : x[2 * i] = L_sub(t1, t3);
4280 0 : move32();
4281 0 : x[2 * i + 1] = L_sub(t2, t4);
4282 0 : move32();
4283 0 : x[sizeOfFft - 2 * i] = L_add(t1, t3);
4284 0 : move32();
4285 0 : x[sizeOfFft - 2 * i + 1] = L_negate(L_add(t2, t4));
4286 0 : move32();
4287 : }
4288 :
4289 0 : x[sizeOfFft - 2 * i] = L_shr_pos(x[2 * i + 0], 1);
4290 0 : move32();
4291 0 : x[sizeOfFft - 2 * i + 1] = L_negate(L_shr_pos(x[2 * i + 1], 1));
4292 0 : move32();
4293 :
4294 0 : *scale = add(*scale, 1);
4295 0 : move16();
4296 :
4297 : Dyn_Mem_Deluxe_Out();
4298 0 : }
4299 :
4300 :
4301 :
4302 0 : void BASOP_irfftN(Word32 *x, Word16 sizeOfFft, Word16 *scale, Word8 *scratchBuffer)
4303 : {
4304 : Dyn_Mem_Deluxe_In(Word16 sizeOfFft2, sizeOfFft4, sizeOfFft8; Word32 t1, t2, t3, t4, xb0, xb1, xt0, xt1;
4305 : Word32 * workBuffer; const Word32 *w32; Counter i;);
4306 :
4307 0 : workBuffer = (Word32 *)scratchAlign(scratchBuffer, 0); /* Size = 2 * BASOP_CFFT_MAX_LENGTH */
4308 :
4309 0 : w32 = rfft_twid(sizeOfFft);
4310 :
4311 0 : sizeOfFft2 = shr_pos(sizeOfFft, 1);
4312 0 : sizeOfFft4 = shr_pos(sizeOfFft, 2);
4313 0 : sizeOfFft8 = shr_pos(sizeOfFft, 3);
4314 :
4315 0 : xb0 = L_shr_pos(x[0], 2);
4316 0 : xb1 = L_shr_pos(x[1], 2);
4317 0 : x[0] = L_add(xb0, xb1);
4318 0 : move32();
4319 0 : x[1] = L_sub(xb1, xb0);
4320 0 : move32();
4321 :
4322 0 : FOR (i = 1; i < sizeOfFft8; i++)
4323 : {
4324 0 : RFFT_TWIDDLE2(x, t1, t2, t3, t4, w32[2 * i + 1], w32[2 * i], xb0, xb1, xt0, xt1);
4325 0 : x[2 * i] = L_sub(t1, t3);
4326 0 : move32();
4327 0 : x[2 * i + 1] = L_sub(t4, t2);
4328 0 : move32();
4329 0 : x[sizeOfFft - 2 * i] = L_add(t1, t3);
4330 0 : move32();
4331 0 : x[sizeOfFft - 2 * i + 1] = L_add(t2, t4);
4332 0 : move32();
4333 : }
4334 :
4335 0 : FOR (i = sizeOfFft8; i < sizeOfFft4; i++)
4336 : {
4337 0 : RFFT_TWIDDLE2(x, t1, t2, t3, t4, w32[(2 * sizeOfFft4 - 2 * i)], w32[(2 * sizeOfFft4 - 2 * i + 1)], xb0, xb1,
4338 : xt0, xt1);
4339 0 : x[2 * i] = L_sub(t1, t3);
4340 0 : move32();
4341 0 : x[2 * i + 1] = L_sub(t4, t2);
4342 0 : move32();
4343 0 : x[sizeOfFft - 2 * i] = L_add(t1, t3);
4344 0 : move32();
4345 0 : x[sizeOfFft - 2 * i + 1] = L_add(t2, t4);
4346 0 : move32();
4347 : }
4348 :
4349 0 : x[sizeOfFft - 2 * i] = L_shr_pos(x[2 * i + 0], 1);
4350 0 : move32();
4351 0 : x[sizeOfFft - 2 * i + 1] = L_shr_pos(x[2 * i + 1], 1);
4352 0 : move32();
4353 :
4354 0 : BASOP_cfft_lc3plus(&x[0], &x[1], sizeOfFft2, 2, scale, workBuffer);
4355 :
4356 : /* If you want BASOP_irfft to be inverse to BASOP_rfft then the result needs
4357 : * to be normalised by sizeOfFft */
4358 0 : FOR (i = 0; i < sizeOfFft2; i++)
4359 : {
4360 0 : x[2 * i + 1] = L_negate(x[2 * i + 1]);
4361 0 : move32();
4362 : }
4363 :
4364 0 : *scale = add(*scale, 2);
4365 0 : move16();
4366 :
4367 : Dyn_Mem_Deluxe_Out();
4368 0 : }
4369 :
4370 :
|