Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #include <assert.h>
40 : #include "prot_fx.h"
41 : #include "ivas_cnst.h"
42 : #include "wmc_auto.h"
43 : #include "basop_util.h"
44 : #include "complex_basop.h"
45 :
46 : #define Mpy_32_xx Mpy_32_16_1
47 :
48 : #define FFTC( x ) WORD322WORD16( (Word32) x )
49 :
50 : #define C31 ( FFTC( 0x91261468 ) ) /* FL2WORD32( -0.86602540) -sqrt(3)/2 */
51 :
52 : #define C51 ( FFTC( 0x79bc3854 ) ) /* FL2WORD32( 0.95105652) */
53 : #define C52 ( FFTC( 0x9d839db0 ) ) /* FL2WORD32(-1.53884180/2) */
54 : #define C53 ( FFTC( 0xd18053ce ) ) /* FL2WORD32(-0.36327126) */
55 : #define C54 ( FFTC( 0x478dde64 ) ) /* FL2WORD32( 0.55901699) */
56 : #define C55 ( FFTC( 0xb0000001 ) ) /* FL2WORD32(-1.25/2) */
57 :
58 : #define C81 ( FFTC( 0x5a82799a ) ) /* FL2WORD32( 7.071067811865475e-1) */
59 : #define C82 ( FFTC( 0xa57d8666 ) ) /* FL2WORD32(-7.071067811865475e-1) */
60 :
61 : #define C161 ( FFTC( 0x5a82799a ) ) /* FL2WORD32( 7.071067811865475e-1) INV_SQRT2 */
62 : #define C162 ( FFTC( 0xa57d8666 ) ) /* FL2WORD32(-7.071067811865475e-1) -INV_SQRT2 */
63 :
64 : #define C163 ( FFTC( 0x7641af3d ) ) /* FL2WORD32( 9.238795325112867e-1) COS_PI_DIV8 */
65 : #define C164 ( FFTC( 0x89be50c3 ) ) /* FL2WORD32(-9.238795325112867e-1) -COS_PI_DIV8 */
66 :
67 : #define C165 ( FFTC( 0x30fbc54d ) ) /* FL2WORD32( 3.826834323650898e-1) COS_3PI_DIV8 */
68 : #define C166 ( FFTC( 0xcf043ab3 ) ) /* FL2WORD32(-3.826834323650898e-1) -COS_3PI_DIV8 */
69 :
70 :
71 : #define cplxMpy4_8_0( re, im, a, b, c, d ) \
72 : re = L_shr( L_sub( Mpy_32_xx( a, c ), Mpy_32_xx( b, d ) ), 1 ); \
73 : im = L_shr( L_add( Mpy_32_xx( a, d ), Mpy_32_xx( b, c ) ), 1 );
74 :
75 : #define cplxMpy4_8_1( re, im, a, b ) \
76 : re = L_shr( a, 1 ); \
77 : im = L_shr( b, 1 );
78 :
79 : void fft16_with_cmplx_data( cmplx *pInp, Word16 bsacle );
80 :
81 :
82 : #undef SCALEFACTOR5
83 : #undef SCALEFACTOR8
84 : #undef SCALEFACTOR10
85 : #undef SCALEFACTOR16
86 : #undef SCALEFACTOR20
87 : #undef SCALEFACTOR30
88 : #undef SCALEFACTOR30_1
89 : #undef SCALEFACTOR30_2
90 :
91 : #define SCALEFACTOR5 ( 0 )
92 : #define SCALEFACTOR8 ( 0 )
93 : #define SCALEFACTOR10 ( 0 )
94 : #define SCALEFACTOR16 ( 0 )
95 : #define SCALEFACTOR20 ( 0 )
96 : #define SCALEFACTOR30 ( 0 )
97 : #define SCALEFACTOR30_1 ( 0 )
98 : #define SCALEFACTOR30_2 ( 0 )
99 :
100 : cmplx CL_scale_t( cmplx x, Word16 y );
101 : cmplx CL_dscale_t( cmplx x, Word16 y1, Word16 y2 );
102 :
103 : /**
104 : * \brief Function performs a complex 8-point FFT
105 : * The FFT is performed inplace. The result of the FFT
106 : * is scaled by SCALEFACTOR8 bits.
107 : *
108 : * WOPS with 32x16 bit multiplications: 108 cycles
109 : *
110 : * \param [i/o] re real input / output
111 : * \param [i/o] im imag input / output
112 : * \param [i ] s stride real and imag input / output
113 : *
114 : * \return void
115 : */
116 4917700 : static void fft8_with_cmplx_data( cmplx *inp /*Qx*/ )
117 : {
118 : cmplx x0, x1, x2, x3, x4, x5, x6, x7;
119 : cmplx s0, s1, s2, s3, s4, s5, s6, s7;
120 : cmplx t0, t1, t2, t3, t4, t5, t6, t7;
121 :
122 : /* Pre-additions */
123 4917700 : x0 = CL_shr( inp[0], SCALEFACTOR8 );
124 4917700 : x1 = CL_shr( inp[1], SCALEFACTOR8 );
125 4917700 : x2 = CL_shr( inp[2], SCALEFACTOR8 );
126 4917700 : x3 = CL_shr( inp[3], SCALEFACTOR8 );
127 4917700 : x4 = CL_shr( inp[4], SCALEFACTOR8 );
128 4917700 : x5 = CL_shr( inp[5], SCALEFACTOR8 );
129 4917700 : x6 = CL_shr( inp[6], SCALEFACTOR8 );
130 4917700 : x7 = CL_shr( inp[7], SCALEFACTOR8 );
131 :
132 : /* loops are unrolled */
133 : {
134 4917700 : t0 = CL_add( x0, x4 );
135 4917700 : t1 = CL_sub( x0, x4 );
136 :
137 4917700 : t2 = CL_add( x1, x5 );
138 4917700 : t3 = CL_sub( x1, x5 );
139 :
140 4917700 : t4 = CL_add( x2, x6 );
141 4917700 : t5 = CL_sub( x2, x6 );
142 :
143 4917700 : t6 = CL_add( x3, x7 );
144 4917700 : t7 = CL_sub( x3, x7 );
145 : }
146 :
147 : /* Pre-additions and core multiplications */
148 :
149 4917700 : s0 = CL_add( t0, t4 );
150 4917700 : s2 = CL_sub( t0, t4 );
151 :
152 4917700 : s4 = CL_mac_j( t1, t5 );
153 4917700 : s5 = CL_msu_j( t1, t5 );
154 :
155 4917700 : s1 = CL_add( t2, t6 );
156 4917700 : s3 = CL_sub( t2, t6 );
157 4917700 : s3 = CL_mul_j( s3 );
158 :
159 4917700 : t0 = CL_add( t3, t7 );
160 4917700 : t1 = CL_sub( t3, t7 );
161 :
162 4917700 : s6 = CL_scale_t( CL_msu_j( t1, t0 ), C81 );
163 4917700 : s7 = CL_dscale_t( CL_swap_real_imag( CL_msu_j( t0, t1 ) ), C81, C82 );
164 :
165 : /* Post-additions */
166 :
167 4917700 : inp[0] = CL_add( s0, s1 );
168 4917700 : inp[4] = CL_sub( s0, s1 );
169 :
170 4917700 : inp[2] = CL_sub( s2, s3 );
171 4917700 : inp[6] = CL_add( s2, s3 );
172 :
173 4917700 : inp[3] = CL_add( s4, s7 );
174 4917700 : inp[7] = CL_sub( s4, s7 );
175 :
176 4917700 : inp[1] = CL_add( s5, s6 );
177 4917700 : inp[5] = CL_sub( s5, s6 );
178 : #ifdef WMOPS
179 : multiCounter[currCounter].CL_move += 8;
180 : #endif
181 4917700 : }
182 :
183 : /**
184 : * \brief Function performs a complex 5-point FFT
185 : * The FFT is performed inplace. The result of the FFT
186 : * is scaled by SCALEFACTOR5 bits.
187 : *
188 : * WOPS with 32x16 bit multiplications: 88 cycles
189 : *
190 : * \param [i/o] re real input / output
191 : * \param [i/o] im imag input / output
192 : * \param [i ] s stride real and imag input / output
193 : *
194 : * \return void
195 : */
196 0 : static void fft5_with_cmplx_data( cmplx *inp /*Qx*/ )
197 : {
198 : cmplx x0, x1, x2, x3, x4;
199 : cmplx y1, y2, y3, y4;
200 : cmplx t;
201 :
202 0 : x0 = CL_shr( inp[0], SCALEFACTOR5 );
203 0 : x1 = CL_shr( inp[1], SCALEFACTOR5 );
204 0 : x2 = CL_shr( inp[2], SCALEFACTOR5 );
205 0 : x3 = CL_shr( inp[3], SCALEFACTOR5 );
206 0 : x4 = CL_shr( inp[4], SCALEFACTOR5 );
207 :
208 0 : y1 = CL_add( x1, x4 );
209 0 : y4 = CL_sub( x1, x4 );
210 0 : y3 = CL_add( x2, x3 );
211 0 : y2 = CL_sub( x2, x3 );
212 0 : t = CL_scale_t( CL_sub( y1, y3 ), C54 );
213 0 : y1 = CL_add( y1, y3 );
214 0 : inp[0] = CL_add( x0, y1 );
215 :
216 : /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
217 : the values as fracts */
218 0 : y1 = CL_add( inp[0], ( CL_shl( CL_scale_t( y1, C55 ), 1 ) ) );
219 0 : y3 = CL_sub( y1, t );
220 0 : y1 = CL_add( y1, t );
221 :
222 0 : t = CL_scale_t( CL_add( y4, y2 ), C51 );
223 : /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
224 : the values as fracts */
225 0 : y4 = CL_add( t, CL_shl( CL_scale_t( y4, C52 ), 1 ) );
226 0 : y2 = CL_add( t, CL_scale_t( y2, C53 ) );
227 :
228 :
229 : /* combination */
230 0 : inp[1] = CL_msu_j( y1, y2 );
231 0 : inp[4] = CL_mac_j( y1, y2 );
232 :
233 0 : inp[2] = CL_mac_j( y3, y4 );
234 0 : inp[3] = CL_msu_j( y3, y4 );
235 :
236 : #ifdef WMOPS
237 : multiCounter[currCounter].CL_move += 5;
238 : #endif
239 0 : }
240 :
241 : /**
242 : * \brief Function performs a complex 10-point FFT
243 : * The FFT is performed inplace. The result of the FFT
244 : * is scaled by SCALEFACTOR10 bits.
245 : *
246 : * WOPS with 32x16 bit multiplications: 196 cycles
247 : *
248 : * \param [i/o] re real input / output
249 : * \param [i/o] im imag input / output
250 : * \param [i ] s stride real and imag input / output
251 : *
252 : * \return void
253 : */
254 13174712 : static void fft10_with_cmplx_data( cmplx *inp_data /*Qx*/ )
255 : {
256 : cmplx r1, r2, r3, r4;
257 : cmplx x0, x1, x2, x3, x4, t;
258 : cmplx y[10];
259 :
260 : /* FOR i=0 */
261 : {
262 13174712 : x0 = CL_shr( inp_data[0], SCALEFACTOR10 );
263 13174712 : x1 = CL_shr( inp_data[2], SCALEFACTOR10 );
264 13174712 : x2 = CL_shr( inp_data[4], SCALEFACTOR10 );
265 13174712 : x3 = CL_shr( inp_data[6], SCALEFACTOR10 );
266 13174712 : x4 = CL_shr( inp_data[8], SCALEFACTOR10 );
267 :
268 13174712 : r1 = CL_add( x3, x2 );
269 13174712 : r4 = CL_sub( x3, x2 );
270 13174712 : r3 = CL_add( x1, x4 );
271 13174712 : r2 = CL_sub( x1, x4 );
272 13174712 : t = CL_scale_t( CL_sub( r1, r3 ), C54 );
273 13174712 : r1 = CL_add( r1, r3 );
274 13174712 : y[0] = CL_add( x0, r1 );
275 13174712 : r1 = CL_add( y[0], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
276 13174712 : r3 = CL_sub( r1, t );
277 13174712 : r1 = CL_add( r1, t );
278 13174712 : t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
279 13174712 : r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
280 13174712 : r2 = CL_add( t, CL_scale_t( r2, C53 ) );
281 :
282 :
283 13174712 : y[2] = CL_msu_j( r1, r2 );
284 13174712 : y[8] = CL_mac_j( r1, r2 );
285 13174712 : y[4] = CL_mac_j( r3, r4 );
286 13174712 : y[6] = CL_msu_j( r3, r4 );
287 : }
288 : /* FOR i=1 */
289 : {
290 13174712 : x0 = CL_shr( inp_data[5], SCALEFACTOR10 );
291 13174712 : x1 = CL_shr( inp_data[1], SCALEFACTOR10 );
292 13174712 : x2 = CL_shr( inp_data[3], SCALEFACTOR10 );
293 13174712 : x3 = CL_shr( inp_data[7], SCALEFACTOR10 );
294 13174712 : x4 = CL_shr( inp_data[9], SCALEFACTOR10 );
295 :
296 13174712 : r1 = CL_add( x1, x4 );
297 13174712 : r4 = CL_sub( x1, x4 );
298 13174712 : r3 = CL_add( x3, x2 );
299 13174712 : r2 = CL_sub( x3, x2 );
300 13174712 : t = CL_scale_t( CL_sub( r1, r3 ), C54 );
301 13174712 : r1 = CL_add( r1, r3 );
302 13174712 : y[1] = CL_add( x0, r1 );
303 13174712 : r1 = CL_add( y[1], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
304 13174712 : r3 = CL_sub( r1, t );
305 13174712 : r1 = CL_add( r1, t );
306 13174712 : t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
307 13174712 : r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
308 13174712 : r2 = CL_add( t, CL_scale_t( r2, C53 ) );
309 :
310 :
311 13174712 : y[3] = CL_msu_j( r1, r2 );
312 13174712 : y[9] = CL_mac_j( r1, r2 );
313 13174712 : y[5] = CL_mac_j( r3, r4 );
314 13174712 : y[7] = CL_msu_j( r3, r4 );
315 : }
316 :
317 : /* FOR i=0 */
318 : {
319 13174712 : inp_data[0] = CL_add( y[0], y[1] );
320 13174712 : inp_data[5] = CL_sub( y[0], y[1] );
321 : }
322 : /* FOR i=2 */
323 : {
324 13174712 : inp_data[2] = CL_add( y[2], y[3] );
325 13174712 : inp_data[7] = CL_sub( y[2], y[3] );
326 : }
327 : /* FOR i=4 */
328 : {
329 13174712 : inp_data[4] = CL_add( y[4], y[5] );
330 13174712 : inp_data[9] = CL_sub( y[4], y[5] );
331 : }
332 : /* FOR i=6 */
333 : {
334 13174712 : inp_data[6] = CL_add( y[6], y[7] );
335 13174712 : inp_data[1] = CL_sub( y[6], y[7] );
336 : }
337 : /* FOR i=8 */
338 : {
339 13174712 : inp_data[8] = CL_add( y[8], y[9] );
340 13174712 : inp_data[3] = CL_sub( y[8], y[9] );
341 : }
342 :
343 : #ifdef WMOPS
344 : multiCounter[currCounter].CL_move += 10;
345 : #endif
346 13174712 : }
347 :
348 : /**
349 : * \brief Function performs a complex 20-point FFT
350 : * The FFT is performed inplace. The result of the FFT
351 : * is scaled by SCALEFACTOR20 bits.
352 : *
353 : * WOPS with 32x16 bit multiplications: 432 cycles
354 : *
355 : * \param [i/o] re real input / output
356 : * \param [i/o] im imag input / output
357 : * \param [i ] s stride real and imag input / output
358 : *
359 : * \return void
360 : */
361 16880132 : static void fft20_with_cmplx_data( cmplx *inp_data /*Qx*/ )
362 : {
363 : cmplx r1, r2, r3, r4;
364 : cmplx x0, x1, x2, x3, x4;
365 : cmplx t, t0, t1, t2, t3;
366 : cmplx y[20];
367 : cmplx *y0, *y1, *y2, *y3, *y4;
368 :
369 16880132 : y0 = y;
370 16880132 : y1 = &y[4];
371 16880132 : y2 = &y[16];
372 16880132 : y3 = &y[8];
373 16880132 : y4 = &y[12];
374 :
375 : {
376 16880132 : x0 = CL_shr( inp_data[0], SCALEFACTOR20 );
377 16880132 : x1 = CL_shr( inp_data[16], SCALEFACTOR20 );
378 16880132 : x2 = CL_shr( inp_data[12], SCALEFACTOR20 );
379 16880132 : x3 = CL_shr( inp_data[8], SCALEFACTOR20 );
380 16880132 : x4 = CL_shr( inp_data[4], SCALEFACTOR20 );
381 :
382 16880132 : r4 = CL_sub( x1, x4 );
383 16880132 : r2 = CL_sub( x2, x3 );
384 16880132 : r1 = CL_add( x1, x4 );
385 16880132 : r3 = CL_add( x2, x3 );
386 16880132 : t = CL_scale_t( CL_sub( r1, r3 ), C54 );
387 16880132 : r1 = CL_add( r1, r3 );
388 16880132 : y0[0] = CL_add( x0, r1 );
389 16880132 : r1 = CL_add( y0[0], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
390 16880132 : r3 = CL_sub( r1, t );
391 16880132 : r1 = CL_add( r1, t );
392 16880132 : t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
393 16880132 : r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
394 16880132 : r2 = CL_add( t, CL_scale_t( r2, C53 ) );
395 :
396 :
397 16880132 : y1[0] = CL_msu_j( r1, r2 );
398 16880132 : y2[0] = CL_mac_j( r1, r2 );
399 16880132 : y3[0] = CL_mac_j( r3, r4 );
400 16880132 : y4[0] = CL_msu_j( r3, r4 );
401 : }
402 : {
403 16880132 : x0 = CL_shr( inp_data[5], SCALEFACTOR20 );
404 16880132 : x1 = CL_shr( inp_data[1], SCALEFACTOR20 );
405 16880132 : x2 = CL_shr( inp_data[17], SCALEFACTOR20 );
406 16880132 : x3 = CL_shr( inp_data[13], SCALEFACTOR20 );
407 16880132 : x4 = CL_shr( inp_data[9], SCALEFACTOR20 );
408 :
409 16880132 : r4 = CL_sub( x1, x4 );
410 16880132 : r2 = CL_sub( x2, x3 );
411 16880132 : r1 = CL_add( x1, x4 );
412 16880132 : r3 = CL_add( x2, x3 );
413 16880132 : t = CL_scale_t( CL_sub( r1, r3 ), C54 );
414 16880132 : r1 = CL_add( r1, r3 );
415 16880132 : y0[1] = CL_add( x0, r1 );
416 16880132 : r1 = CL_add( y0[1], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
417 16880132 : r3 = CL_sub( r1, t );
418 16880132 : r1 = CL_add( r1, t );
419 16880132 : t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
420 16880132 : r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
421 16880132 : r2 = CL_add( t, CL_scale_t( r2, C53 ) );
422 :
423 :
424 16880132 : y1[1] = CL_msu_j( r1, r2 );
425 16880132 : y2[1] = CL_mac_j( r1, r2 );
426 16880132 : y3[1] = CL_mac_j( r3, r4 );
427 16880132 : y4[1] = CL_msu_j( r3, r4 );
428 : }
429 : {
430 16880132 : x0 = CL_shr( inp_data[10], SCALEFACTOR20 );
431 16880132 : x1 = CL_shr( inp_data[6], SCALEFACTOR20 );
432 16880132 : x2 = CL_shr( inp_data[2], SCALEFACTOR20 );
433 16880132 : x3 = CL_shr( inp_data[18], SCALEFACTOR20 );
434 16880132 : x4 = CL_shr( inp_data[14], SCALEFACTOR20 );
435 :
436 16880132 : r4 = CL_sub( x1, x4 );
437 16880132 : r2 = CL_sub( x2, x3 );
438 16880132 : r1 = CL_add( x1, x4 );
439 16880132 : r3 = CL_add( x2, x3 );
440 16880132 : t = CL_scale_t( CL_sub( r1, r3 ), C54 );
441 16880132 : r1 = CL_add( r1, r3 );
442 16880132 : y0[2] = CL_add( x0, r1 );
443 16880132 : r1 = CL_add( y0[2], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
444 16880132 : r3 = CL_sub( r1, t );
445 16880132 : r1 = CL_add( r1, t );
446 16880132 : t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
447 16880132 : r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
448 16880132 : r2 = CL_add( t, CL_scale_t( r2, C53 ) );
449 :
450 :
451 16880132 : y1[2] = CL_msu_j( r1, r2 );
452 16880132 : y2[2] = CL_mac_j( r1, r2 );
453 16880132 : y3[2] = CL_mac_j( r3, r4 );
454 16880132 : y4[2] = CL_msu_j( r3, r4 );
455 : }
456 : {
457 16880132 : x0 = CL_shr( inp_data[15], SCALEFACTOR20 );
458 16880132 : x1 = CL_shr( inp_data[11], SCALEFACTOR20 );
459 16880132 : x2 = CL_shr( inp_data[7], SCALEFACTOR20 );
460 16880132 : x3 = CL_shr( inp_data[3], SCALEFACTOR20 );
461 16880132 : x4 = CL_shr( inp_data[19], SCALEFACTOR20 );
462 :
463 16880132 : r4 = CL_sub( x1, x4 );
464 16880132 : r2 = CL_sub( x2, x3 );
465 16880132 : r1 = CL_add( x1, x4 );
466 16880132 : r3 = CL_add( x2, x3 );
467 16880132 : t = CL_scale_t( CL_sub( r1, r3 ), C54 );
468 16880132 : r1 = CL_add( r1, r3 );
469 16880132 : y0[3] = CL_add( x0, r1 );
470 16880132 : r1 = CL_add( y0[3], ( CL_shl( CL_scale_t( r1, C55 ), 1 ) ) );
471 16880132 : r3 = CL_sub( r1, t );
472 16880132 : r1 = CL_add( r1, t );
473 16880132 : t = CL_scale_t( ( CL_add( r4, r2 ) ), C51 );
474 16880132 : r4 = CL_add( t, CL_shl( CL_scale_t( r4, C52 ), 1 ) );
475 16880132 : r2 = CL_add( t, CL_scale_t( r2, C53 ) );
476 :
477 :
478 16880132 : y1[3] = CL_msu_j( r1, r2 );
479 16880132 : y2[3] = CL_mac_j( r1, r2 );
480 16880132 : y3[3] = CL_mac_j( r3, r4 );
481 16880132 : y4[3] = CL_msu_j( r3, r4 );
482 : }
483 :
484 : {
485 16880132 : cmplx *ptr_y = y;
486 : {
487 : cmplx Cy0, Cy1, Cy2, Cy3;
488 :
489 16880132 : Cy0 = *ptr_y++;
490 16880132 : Cy1 = *ptr_y++;
491 16880132 : Cy2 = *ptr_y++;
492 16880132 : Cy3 = *ptr_y++;
493 :
494 : /* Pre-additions */
495 16880132 : t0 = CL_add( Cy0, Cy2 );
496 16880132 : t1 = CL_sub( Cy0, Cy2 );
497 16880132 : t2 = CL_add( Cy1, Cy3 );
498 16880132 : t3 = CL_sub( Cy1, Cy3 );
499 :
500 :
501 16880132 : inp_data[0] = CL_add( t0, t2 );
502 16880132 : inp_data[5] = CL_msu_j( t1, t3 );
503 16880132 : inp_data[10] = CL_sub( t0, t2 );
504 16880132 : inp_data[15] = CL_mac_j( t1, t3 );
505 : }
506 :
507 : {
508 : cmplx Cy0, Cy1, Cy2, Cy3;
509 :
510 16880132 : Cy0 = *ptr_y++;
511 16880132 : Cy1 = *ptr_y++;
512 16880132 : Cy2 = *ptr_y++;
513 16880132 : Cy3 = *ptr_y++;
514 :
515 : /* Pre-additions */
516 16880132 : t0 = CL_add( Cy0, Cy2 );
517 16880132 : t1 = CL_sub( Cy0, Cy2 );
518 16880132 : t2 = CL_add( Cy1, Cy3 );
519 16880132 : t3 = CL_sub( Cy1, Cy3 );
520 :
521 :
522 16880132 : inp_data[4] = CL_add( t0, t2 );
523 16880132 : inp_data[9] = CL_msu_j( t1, t3 );
524 16880132 : inp_data[14] = CL_sub( t0, t2 );
525 16880132 : inp_data[19] = CL_mac_j( t1, t3 );
526 : }
527 :
528 : {
529 : cmplx Cy0, Cy1, Cy2, Cy3;
530 :
531 16880132 : Cy0 = *ptr_y++;
532 16880132 : Cy1 = *ptr_y++;
533 16880132 : Cy2 = *ptr_y++;
534 16880132 : Cy3 = *ptr_y++;
535 :
536 : /* Pre-additions */
537 16880132 : t0 = CL_add( Cy0, Cy2 );
538 16880132 : t1 = CL_sub( Cy0, Cy2 );
539 16880132 : t2 = CL_add( Cy1, Cy3 );
540 16880132 : t3 = CL_sub( Cy1, Cy3 );
541 :
542 :
543 16880132 : inp_data[8] = CL_add( t0, t2 );
544 16880132 : inp_data[13] = CL_msu_j( t1, t3 );
545 16880132 : inp_data[18] = CL_sub( t0, t2 );
546 16880132 : inp_data[3] = CL_mac_j( t1, t3 );
547 : }
548 :
549 : {
550 : cmplx Cy0, Cy1, Cy2, Cy3;
551 :
552 16880132 : Cy0 = *ptr_y++;
553 16880132 : Cy1 = *ptr_y++;
554 16880132 : Cy2 = *ptr_y++;
555 16880132 : Cy3 = *ptr_y++;
556 :
557 : /* Pre-additions */
558 16880132 : t0 = CL_add( Cy0, Cy2 );
559 16880132 : t1 = CL_sub( Cy0, Cy2 );
560 16880132 : t2 = CL_add( Cy1, Cy3 );
561 16880132 : t3 = CL_sub( Cy1, Cy3 );
562 :
563 16880132 : inp_data[12] = CL_add( t0, t2 );
564 16880132 : inp_data[17] = CL_msu_j( t1, t3 );
565 16880132 : inp_data[2] = CL_sub( t0, t2 );
566 16880132 : inp_data[7] = CL_mac_j( t1, t3 );
567 : }
568 :
569 : {
570 : cmplx Cy0, Cy1, Cy2, Cy3;
571 :
572 16880132 : Cy0 = *ptr_y++;
573 16880132 : Cy1 = *ptr_y++;
574 16880132 : Cy2 = *ptr_y++;
575 16880132 : Cy3 = *ptr_y++;
576 :
577 : /* Pre-additions */
578 16880132 : t0 = CL_add( Cy0, Cy2 );
579 16880132 : t1 = CL_sub( Cy0, Cy2 );
580 16880132 : t2 = CL_add( Cy1, Cy3 );
581 16880132 : t3 = CL_sub( Cy1, Cy3 );
582 :
583 :
584 16880132 : inp_data[16] = CL_add( t0, t2 );
585 16880132 : inp_data[1] = CL_msu_j( t1, t3 );
586 16880132 : inp_data[6] = CL_sub( t0, t2 );
587 16880132 : inp_data[11] = CL_mac_j( t1, t3 );
588 : }
589 : }
590 : #ifdef WMOPS
591 : multiCounter[currCounter].CL_move += 20;
592 : #endif
593 16880132 : }
594 :
595 :
596 : /**
597 : * \brief Function performs a complex 30-point FFT
598 : * The FFT is performed inplace. The result of the FFT
599 : * is scaled by SCALEFACTOR30 bits.
600 : *
601 : * WOPS with 32x16 bit multiplications: 828 cycles
602 : *
603 : * \param [i/o] re real input / output
604 : * \param [i/o] im imag input / output
605 : * \param [i ] s stride real and imag input / output
606 : *
607 : * \return void
608 : */
609 51796414 : static void fft30_with_cmplx_data( cmplx *inp /*Qx*/ )
610 : {
611 51796414 : cmplx *l = &inp[0];
612 51796414 : cmplx *h = &inp[15];
613 :
614 : cmplx z[30], y[15], x[15], rs1, rs2, rs3, rs4, t;
615 :
616 : /* 1. FFT15 stage */
617 :
618 51796414 : x[0] = CL_shr( inp[0], SCALEFACTOR30_1 );
619 51796414 : x[1] = CL_shr( inp[18], SCALEFACTOR30_1 );
620 51796414 : x[2] = CL_shr( inp[6], SCALEFACTOR30_1 );
621 51796414 : x[3] = CL_shr( inp[24], SCALEFACTOR30_1 );
622 51796414 : x[4] = CL_shr( inp[12], SCALEFACTOR30_1 );
623 :
624 51796414 : x[5] = CL_shr( inp[20], SCALEFACTOR30_1 );
625 51796414 : x[6] = CL_shr( inp[8], SCALEFACTOR30_1 );
626 51796414 : x[7] = CL_shr( inp[26], SCALEFACTOR30_1 );
627 51796414 : x[8] = CL_shr( inp[14], SCALEFACTOR30_1 );
628 51796414 : x[9] = CL_shr( inp[2], SCALEFACTOR30_1 );
629 :
630 51796414 : x[10] = CL_shr( inp[10], SCALEFACTOR30_1 );
631 51796414 : x[11] = CL_shr( inp[28], SCALEFACTOR30_1 );
632 51796414 : x[12] = CL_shr( inp[16], SCALEFACTOR30_1 );
633 51796414 : x[13] = CL_shr( inp[4], SCALEFACTOR30_1 );
634 51796414 : x[14] = CL_shr( inp[22], SCALEFACTOR30_1 );
635 :
636 :
637 : /* 1. FFT5 stage */
638 51796414 : rs1 = CL_add( x[1], x[4] );
639 51796414 : rs4 = CL_sub( x[1], x[4] );
640 51796414 : rs3 = CL_add( x[2], x[3] );
641 51796414 : rs2 = CL_sub( x[2], x[3] );
642 51796414 : t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
643 51796414 : rs1 = CL_add( rs1, rs3 );
644 51796414 : y[0] = CL_add( x[0], rs1 );
645 51796414 : rs1 = CL_add( y[0], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
646 51796414 : rs3 = CL_sub( rs1, t );
647 51796414 : rs1 = CL_add( rs1, t );
648 51796414 : t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
649 51796414 : rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
650 51796414 : rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
651 :
652 : /* combination */
653 51796414 : y[1] = CL_msu_j( rs1, rs2 );
654 51796414 : y[4] = CL_mac_j( rs1, rs2 );
655 51796414 : y[2] = CL_mac_j( rs3, rs4 );
656 51796414 : y[3] = CL_msu_j( rs3, rs4 );
657 :
658 :
659 : /* 2. FFT5 stage */
660 51796414 : rs1 = CL_add( x[6], x[9] );
661 51796414 : rs4 = CL_sub( x[6], x[9] );
662 51796414 : rs3 = CL_add( x[7], x[8] );
663 51796414 : rs2 = CL_sub( x[7], x[8] );
664 51796414 : t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
665 51796414 : rs1 = CL_add( rs1, rs3 );
666 51796414 : y[5] = CL_add( x[5], rs1 );
667 51796414 : rs1 = CL_add( y[5], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
668 51796414 : rs3 = CL_sub( rs1, t );
669 51796414 : rs1 = CL_add( rs1, t );
670 51796414 : t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
671 51796414 : rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
672 51796414 : rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
673 :
674 : /* combination */
675 51796414 : y[6] = CL_msu_j( rs1, rs2 );
676 51796414 : y[9] = CL_mac_j( rs1, rs2 );
677 51796414 : y[7] = CL_mac_j( rs3, rs4 );
678 51796414 : y[8] = CL_msu_j( rs3, rs4 );
679 :
680 :
681 : /* 3. FFT5 stage */
682 51796414 : rs1 = CL_add( x[11], x[14] );
683 51796414 : rs4 = CL_sub( x[11], x[14] );
684 51796414 : rs3 = CL_add( x[12], x[13] );
685 51796414 : rs2 = CL_sub( x[12], x[13] );
686 51796414 : t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
687 51796414 : rs1 = CL_add( rs1, rs3 );
688 51796414 : y[10] = CL_add( x[10], rs1 );
689 51796414 : rs1 = CL_add( y[10], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
690 51796414 : rs3 = CL_sub( rs1, t );
691 51796414 : rs1 = CL_add( rs1, t );
692 51796414 : t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
693 51796414 : rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
694 51796414 : rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
695 :
696 : /* combination */
697 51796414 : y[11] = CL_msu_j( rs1, rs2 );
698 51796414 : y[14] = CL_mac_j( rs1, rs2 );
699 51796414 : y[12] = CL_mac_j( rs3, rs4 );
700 51796414 : y[13] = CL_msu_j( rs3, rs4 );
701 :
702 : /* 1. FFT3 stage */
703 : /* real part */
704 51796414 : rs1 = CL_add( y[5], y[10] );
705 51796414 : rs2 = CL_scale_t( CL_sub( y[5], y[10] ), C31 );
706 51796414 : z[0] = CL_add( y[0], rs1 );
707 51796414 : rs1 = CL_sub( y[0], CL_shr( rs1, 1 ) );
708 :
709 51796414 : z[10] = CL_mac_j( rs1, rs2 );
710 51796414 : z[5] = CL_msu_j( rs1, rs2 );
711 :
712 : /* 2. FFT3 stage */
713 51796414 : rs1 = CL_add( y[6], y[11] );
714 51796414 : rs2 = CL_scale_t( CL_sub( y[6], y[11] ), C31 );
715 51796414 : z[6] = CL_add( y[1], rs1 );
716 51796414 : rs1 = CL_sub( y[1], CL_shr( rs1, 1 ) );
717 :
718 51796414 : z[1] = CL_mac_j( rs1, rs2 );
719 51796414 : z[11] = CL_msu_j( rs1, rs2 );
720 :
721 :
722 : /* 3. FFT3 stage */
723 51796414 : rs1 = CL_add( y[7], y[12] );
724 51796414 : rs2 = CL_scale_t( CL_sub( y[7], y[12] ), C31 );
725 51796414 : z[12] = CL_add( y[2], rs1 );
726 51796414 : rs1 = CL_sub( y[2], CL_shr( rs1, 1 ) );
727 :
728 51796414 : z[7] = CL_mac_j( rs1, rs2 );
729 51796414 : z[2] = CL_msu_j( rs1, rs2 );
730 :
731 :
732 : /* 4. FFT3 stage */
733 51796414 : rs1 = CL_add( y[8], y[13] );
734 51796414 : rs2 = CL_scale_t( CL_sub( y[8], y[13] ), C31 );
735 51796414 : z[3] = CL_add( y[3], rs1 );
736 51796414 : rs1 = CL_sub( y[3], CL_shr( rs1, 1 ) );
737 :
738 51796414 : z[13] = CL_mac_j( rs1, rs2 );
739 51796414 : z[8] = CL_msu_j( rs1, rs2 );
740 :
741 :
742 : /* 5. FFT3 stage */
743 51796414 : rs1 = CL_add( y[9], y[14] );
744 51796414 : rs2 = CL_scale_t( CL_sub( y[9], y[14] ), C31 );
745 51796414 : z[9] = CL_add( y[4], rs1 );
746 51796414 : rs1 = CL_sub( y[4], CL_shr( rs1, 1 ) );
747 :
748 51796414 : z[4] = CL_mac_j( rs1, rs2 );
749 51796414 : z[14] = CL_msu_j( rs1, rs2 );
750 :
751 : /* 2. FFT15 stage */
752 51796414 : x[0] = CL_shr( inp[15], SCALEFACTOR30_1 );
753 51796414 : x[1] = CL_shr( inp[3], SCALEFACTOR30_1 );
754 51796414 : x[2] = CL_shr( inp[21], SCALEFACTOR30_1 );
755 51796414 : x[3] = CL_shr( inp[9], SCALEFACTOR30_1 );
756 51796414 : x[4] = CL_shr( inp[27], SCALEFACTOR30_1 );
757 :
758 51796414 : x[5] = CL_shr( inp[5], SCALEFACTOR30_1 );
759 51796414 : x[6] = CL_shr( inp[23], SCALEFACTOR30_1 );
760 51796414 : x[7] = CL_shr( inp[11], SCALEFACTOR30_1 );
761 51796414 : x[8] = CL_shr( inp[29], SCALEFACTOR30_1 );
762 51796414 : x[9] = CL_shr( inp[17], SCALEFACTOR30_1 );
763 :
764 51796414 : x[10] = CL_shr( inp[25], SCALEFACTOR30_1 );
765 51796414 : x[11] = CL_shr( inp[13], SCALEFACTOR30_1 );
766 51796414 : x[12] = CL_shr( inp[1], SCALEFACTOR30_1 );
767 51796414 : x[13] = CL_shr( inp[19], SCALEFACTOR30_1 );
768 51796414 : x[14] = CL_shr( inp[7], SCALEFACTOR30_1 );
769 :
770 : /* 1. FFT5 stage */
771 51796414 : rs1 = CL_add( x[1], x[4] );
772 51796414 : rs4 = CL_sub( x[1], x[4] );
773 51796414 : rs3 = CL_add( x[2], x[3] );
774 51796414 : rs2 = CL_sub( x[2], x[3] );
775 51796414 : t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
776 51796414 : rs1 = CL_add( rs1, rs3 );
777 51796414 : y[0] = CL_add( x[0], rs1 );
778 51796414 : rs1 = CL_add( y[0], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
779 51796414 : rs3 = CL_sub( rs1, t );
780 51796414 : rs1 = CL_add( rs1, t );
781 51796414 : t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
782 51796414 : rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
783 51796414 : rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
784 :
785 : /* combination */
786 51796414 : y[1] = CL_msu_j( rs1, rs2 );
787 51796414 : y[4] = CL_mac_j( rs1, rs2 );
788 51796414 : y[2] = CL_mac_j( rs3, rs4 );
789 51796414 : y[3] = CL_msu_j( rs3, rs4 );
790 :
791 :
792 : /* 2. FFT5 stage */
793 51796414 : rs1 = CL_add( x[6], x[9] );
794 51796414 : rs4 = CL_sub( x[6], x[9] );
795 51796414 : rs3 = CL_add( x[7], x[8] );
796 51796414 : rs2 = CL_sub( x[7], x[8] );
797 51796414 : t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
798 51796414 : rs1 = CL_add( rs1, rs3 );
799 51796414 : y[5] = CL_add( x[5], rs1 );
800 51796414 : rs1 = CL_add( y[5], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
801 51796414 : rs3 = CL_sub( rs1, t );
802 51796414 : rs1 = CL_add( rs1, t );
803 51796414 : t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
804 51796414 : rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
805 51796414 : rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
806 :
807 : /* combination */
808 51796414 : y[6] = CL_msu_j( rs1, rs2 );
809 51796414 : y[9] = CL_mac_j( rs1, rs2 );
810 51796414 : y[7] = CL_mac_j( rs3, rs4 );
811 51796414 : y[8] = CL_msu_j( rs3, rs4 );
812 :
813 :
814 : /* 3. FFT5 stage */
815 51796414 : rs1 = CL_add( x[11], x[14] );
816 51796414 : rs4 = CL_sub( x[11], x[14] );
817 51796414 : rs3 = CL_add( x[12], x[13] );
818 51796414 : rs2 = CL_sub( x[12], x[13] );
819 51796414 : t = CL_scale_t( CL_sub( rs1, rs3 ), C54 );
820 51796414 : rs1 = CL_add( rs1, rs3 );
821 51796414 : y[10] = CL_add( x[10], rs1 );
822 51796414 : rs1 = CL_add( y[10], ( CL_shl( CL_scale_t( rs1, C55 ), 1 ) ) );
823 51796414 : rs3 = CL_sub( rs1, t );
824 51796414 : rs1 = CL_add( rs1, t );
825 51796414 : t = CL_scale_t( CL_add( rs4, rs2 ), C51 );
826 51796414 : rs4 = CL_add( t, CL_shl( CL_scale_t( rs4, C52 ), 1 ) );
827 51796414 : rs2 = CL_add( t, CL_scale_t( rs2, C53 ) );
828 :
829 : /* combination */
830 51796414 : y[11] = CL_msu_j( rs1, rs2 );
831 51796414 : y[14] = CL_mac_j( rs1, rs2 );
832 51796414 : y[12] = CL_mac_j( rs3, rs4 );
833 51796414 : y[13] = CL_msu_j( rs3, rs4 );
834 :
835 : /* 1. FFT3 stage */
836 : /* real part */
837 51796414 : rs1 = CL_add( y[5], y[10] );
838 51796414 : rs2 = CL_scale_t( CL_sub( y[5], y[10] ), C31 );
839 51796414 : z[15] = CL_add( y[0], rs1 );
840 51796414 : rs1 = CL_sub( y[0], CL_shr( rs1, 1 ) );
841 :
842 51796414 : z[25] = CL_mac_j( rs1, rs2 );
843 51796414 : z[20] = CL_msu_j( rs1, rs2 );
844 :
845 : /* 2. FFT3 stage */
846 51796414 : rs1 = CL_add( y[6], y[11] );
847 51796414 : rs2 = CL_scale_t( CL_sub( y[6], y[11] ), C31 );
848 51796414 : z[21] = CL_add( y[1], rs1 );
849 51796414 : rs1 = CL_sub( y[1], CL_shr( rs1, 1 ) );
850 :
851 51796414 : z[16] = CL_mac_j( rs1, rs2 );
852 51796414 : z[26] = CL_msu_j( rs1, rs2 );
853 :
854 :
855 : /* 3. FFT3 stage */
856 51796414 : rs1 = CL_add( y[7], y[12] );
857 51796414 : rs2 = CL_scale_t( CL_sub( y[7], y[12] ), C31 );
858 51796414 : z[27] = CL_add( y[2], rs1 );
859 51796414 : rs1 = CL_sub( y[2], CL_shr( rs1, 1 ) );
860 :
861 51796414 : z[22] = CL_mac_j( rs1, rs2 );
862 51796414 : z[17] = CL_msu_j( rs1, rs2 );
863 :
864 :
865 : /* 4. FFT3 stage */
866 51796414 : rs1 = CL_add( y[8], y[13] );
867 51796414 : rs2 = CL_scale_t( CL_sub( y[8], y[13] ), C31 );
868 51796414 : z[18] = CL_add( y[3], rs1 );
869 51796414 : rs1 = CL_sub( y[3], CL_shr( rs1, 1 ) );
870 :
871 51796414 : z[28] = CL_mac_j( rs1, rs2 );
872 51796414 : z[23] = CL_msu_j( rs1, rs2 );
873 :
874 :
875 : /* 5. FFT3 stage */
876 51796414 : rs1 = CL_add( y[9], y[14] );
877 51796414 : rs2 = CL_scale_t( CL_sub( y[9], y[14] ), C31 );
878 51796414 : z[24] = CL_add( y[4], rs1 );
879 51796414 : rs1 = CL_sub( y[4], CL_shr( rs1, 1 ) );
880 :
881 51796414 : z[19] = CL_mac_j( rs1, rs2 );
882 51796414 : z[29] = CL_msu_j( rs1, rs2 );
883 :
884 : /* 1. FFT2 stage */
885 51796414 : rs1 = CL_shr( z[0], SCALEFACTOR30_2 );
886 51796414 : rs2 = CL_shr( z[15], SCALEFACTOR30_2 );
887 51796414 : *l = CL_add( rs1, rs2 );
888 51796414 : *h = CL_sub( rs1, rs2 );
889 51796414 : l += 1;
890 51796414 : h += 1;
891 :
892 : /* 2. FFT2 stage */
893 51796414 : rs1 = CL_shr( z[8], SCALEFACTOR30_2 );
894 51796414 : rs2 = CL_shr( z[23], SCALEFACTOR30_2 );
895 51796414 : *h = CL_add( rs1, rs2 );
896 51796414 : *l = CL_sub( rs1, rs2 );
897 51796414 : l += 1;
898 51796414 : h += 1;
899 :
900 : /* 3. FFT2 stage */
901 51796414 : rs1 = CL_shr( z[1], SCALEFACTOR30_2 );
902 51796414 : rs2 = CL_shr( z[16], SCALEFACTOR30_2 );
903 51796414 : *l = CL_add( rs1, rs2 );
904 51796414 : *h = CL_sub( rs1, rs2 );
905 51796414 : l += 1;
906 51796414 : h += 1;
907 :
908 : /* 4. FFT2 stage */
909 51796414 : rs1 = CL_shr( z[9], SCALEFACTOR30_2 );
910 51796414 : rs2 = CL_shr( z[24], SCALEFACTOR30_2 );
911 51796414 : *h = CL_add( rs1, rs2 );
912 51796414 : *l = CL_sub( rs1, rs2 );
913 51796414 : l += 1;
914 51796414 : h += 1;
915 :
916 : /* 5. FFT2 stage */
917 51796414 : rs1 = CL_shr( z[2], SCALEFACTOR30_2 );
918 51796414 : rs2 = CL_shr( z[17], SCALEFACTOR30_2 );
919 51796414 : *l = CL_add( rs1, rs2 );
920 51796414 : *h = CL_sub( rs1, rs2 );
921 51796414 : l += 1;
922 51796414 : h += 1;
923 :
924 : /* 6. FFT2 stage */
925 51796414 : rs1 = CL_shr( z[10], SCALEFACTOR30_2 );
926 51796414 : rs2 = CL_shr( z[25], SCALEFACTOR30_2 );
927 51796414 : *h = CL_add( rs1, rs2 );
928 51796414 : *l = CL_sub( rs1, rs2 );
929 51796414 : l += 1;
930 51796414 : h += 1;
931 :
932 : /* 7. FFT2 stage */
933 51796414 : rs1 = CL_shr( z[3], SCALEFACTOR30_2 );
934 51796414 : rs2 = CL_shr( z[18], SCALEFACTOR30_2 );
935 51796414 : *l = CL_add( rs1, rs2 );
936 51796414 : *h = CL_sub( rs1, rs2 );
937 51796414 : l += 1;
938 51796414 : h += 1;
939 :
940 : /* 8. FFT2 stage */
941 51796414 : rs1 = CL_shr( z[11], SCALEFACTOR30_2 );
942 51796414 : rs2 = CL_shr( z[26], SCALEFACTOR30_2 );
943 51796414 : *h = CL_add( rs1, rs2 );
944 51796414 : *l = CL_sub( rs1, rs2 );
945 51796414 : l += 1;
946 51796414 : h += 1;
947 :
948 : /* 9. FFT2 stage */
949 51796414 : rs1 = CL_shr( z[4], SCALEFACTOR30_2 );
950 51796414 : rs2 = CL_shr( z[19], SCALEFACTOR30_2 );
951 51796414 : *l = CL_add( rs1, rs2 );
952 51796414 : *h = CL_sub( rs1, rs2 );
953 51796414 : l += 1;
954 51796414 : h += 1;
955 :
956 : /* 10. FFT2 stage */
957 51796414 : rs1 = CL_shr( z[12], SCALEFACTOR30_2 );
958 51796414 : rs2 = CL_shr( z[27], SCALEFACTOR30_2 );
959 51796414 : *h = CL_add( rs1, rs2 );
960 51796414 : *l = CL_sub( rs1, rs2 );
961 51796414 : l += 1;
962 51796414 : h += 1;
963 :
964 : /* 11. FFT2 stage */
965 51796414 : rs1 = CL_shr( z[5], SCALEFACTOR30_2 );
966 51796414 : rs2 = CL_shr( z[20], SCALEFACTOR30_2 );
967 51796414 : *l = CL_add( rs1, rs2 );
968 51796414 : *h = CL_sub( rs1, rs2 );
969 51796414 : l += 1;
970 51796414 : h += 1;
971 :
972 : /* 12. FFT2 stage */
973 51796414 : rs1 = CL_shr( z[13], SCALEFACTOR30_2 );
974 51796414 : rs2 = CL_shr( z[28], SCALEFACTOR30_2 );
975 51796414 : *h = CL_add( rs1, rs2 );
976 51796414 : *l = CL_sub( rs1, rs2 );
977 51796414 : l += 1;
978 51796414 : h += 1;
979 :
980 : /* 13. FFT2 stage */
981 51796414 : rs1 = CL_shr( z[6], SCALEFACTOR30_2 );
982 51796414 : rs2 = CL_shr( z[21], SCALEFACTOR30_2 );
983 51796414 : *l = CL_add( rs1, rs2 );
984 51796414 : *h = CL_sub( rs1, rs2 );
985 51796414 : l += 1;
986 51796414 : h += 1;
987 :
988 : /* 14. FFT2 stage */
989 51796414 : rs1 = CL_shr( z[14], SCALEFACTOR30_2 );
990 51796414 : rs2 = CL_shr( z[29], SCALEFACTOR30_2 );
991 51796414 : *h = CL_add( rs1, rs2 );
992 51796414 : *l = CL_sub( rs1, rs2 );
993 51796414 : l += 1;
994 51796414 : h += 1;
995 :
996 : /* 15. FFT2 stage */
997 51796414 : rs1 = CL_shr( z[7], SCALEFACTOR30_2 );
998 51796414 : rs2 = CL_shr( z[22], SCALEFACTOR30_2 );
999 51796414 : *l = CL_add( rs1, rs2 );
1000 51796414 : *h = CL_sub( rs1, rs2 );
1001 51796414 : l += 1;
1002 51796414 : h += 1;
1003 :
1004 : #ifdef WMOPS
1005 : multiCounter[currCounter].CL_move += 30;
1006 : #endif
1007 51796414 : }
1008 :
1009 : /*-------------------------------------------------------------------*
1010 : * fft_cldfb_fx()
1011 : *
1012 : * Interface functions FFT subroutines
1013 : *--------------------------------------------------------------------*/
1014 86772582 : void fft_cldfb_fx(
1015 : Word32 *data, /* i/o: input/output vector Qx*/
1016 : const Word16 size /* size of fft operation */
1017 : )
1018 : {
1019 :
1020 86772582 : SWITCH( size )
1021 : {
1022 0 : case 5:
1023 0 : fft5_with_cmplx_data( (cmplx *) data );
1024 0 : BREAK;
1025 4917700 : case 8:
1026 4917700 : fft8_with_cmplx_data( (cmplx *) data );
1027 4917700 : BREAK;
1028 13174712 : case 10:
1029 13174712 : fft10_with_cmplx_data( (cmplx *) data );
1030 13174712 : BREAK;
1031 3624 : case 16:
1032 3624 : fft16_with_cmplx_data( (cmplx *) data, 0 );
1033 3624 : BREAK;
1034 16880132 : case 20:
1035 16880132 : fft20_with_cmplx_data( (cmplx *) data );
1036 16880132 : BREAK;
1037 51796414 : case 30:
1038 51796414 : fft30_with_cmplx_data( (cmplx *) data );
1039 51796414 : BREAK;
1040 :
1041 0 : default:
1042 0 : assert( 0 );
1043 : BREAK;
1044 : }
1045 :
1046 86772582 : return;
1047 : }
|