Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #include "prot_fx.h"
40 : #include "rom_com.h"
41 : #include "wmc_auto.h"
42 :
43 : /*-----------------------------------------------------------------*
44 : * Local constants
45 : *-----------------------------------------------------------------*/
46 :
47 : #define N_MAX_FFT 1024
48 : #define INV_SQR2_FX 23170 /*Q15*/
49 : #define N_MAX_SAS 256
50 : /*---------------------------------------------------------------------*
51 : * ifft_rel()
52 : *
53 : * Calculate the inverse FFT of a real signal
54 : *
55 : * Based on the FORTRAN code from the article "Real-valued Fast Fourier Transform Algorithms"
56 : * by Sorensen, ... in IEEE Trans. on ASSP, Vol. ASSP-35, No. June 6th 1987.
57 : *
58 : * Input: the io[] signal containing the spectrum in the following order :
59 : *
60 : * Re[0], Re[1], .. Re[n/2], Im[n/2-1], .. Im[1]
61 : *---------------------------------------------------------------------*/
62 :
63 31064 : void ifft_rel_fx(
64 : Word16 io[], /* Qx i/o: input/output vector */
65 : const Word16 n, /* Q0 i : vector length */
66 : const Word16 m /* Q0 i : log2 of vector length */
67 : )
68 : {
69 : Word16 i, j, k;
70 : Word16 step;
71 : Word16 n2, n4, n8, i0;
72 : Word16 is, id;
73 : Word16 *x, *xi0, *xi1, *xi2, *xi3, *xi4, *xup1, *xdn6, *xup3, *xdn8;
74 : Word16 xt;
75 : Word16 r1;
76 : Word16 t1, t2, t3, t4, t5;
77 : const Word16 *s, *c, *s3, *c3;
78 :
79 : Word16 cc1, cc3, ss1, ss3;
80 : Word16 tmp;
81 : #ifndef ISSUE_1836_replace_overflow_libcom
82 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
83 : Flag Overflow = 0;
84 : move16();
85 : #endif
86 : #endif
87 :
88 :
89 : /*-----------------------------------------------------------------*
90 : * ifft
91 : *-----------------------------------------------------------------*/
92 :
93 31064 : x = &io[-1];
94 31064 : n2 = shl( n, 1 );
95 111172 : FOR( k = 1; k < m; k++ )
96 : {
97 80108 : is = 0;
98 80108 : move16();
99 80108 : id = n2;
100 80108 : move16();
101 80108 : n2 = shr( n2, 1 );
102 80108 : n4 = shr( n2, 2 );
103 80108 : n8 = shr( n4, 1 );
104 80108 : tmp = sub( n, 1 );
105 192580 : WHILE( LT_16( is, tmp ) )
106 : {
107 112472 : xi1 = x + is + 1; /*Qx*/
108 112472 : xi2 = xi1 + n4; /*Qx*/
109 112472 : xi3 = xi2 + n4; /*Qx*/
110 112472 : xi4 = xi3 + n4; /*Qx*/
111 :
112 473068 : FOR( i = is; i < n; i += id )
113 : {
114 : #ifdef ISSUE_1836_replace_overflow_libcom
115 360596 : t1 = sub_sat( *xi1, *xi3 ); /*Qx*/
116 360596 : *xi1 = add_sat( *xi1, *xi3 ); /*Qx*/
117 360596 : move16();
118 360596 : *xi2 = shl_sat( *xi2, 1 ); /*Qx*/
119 360596 : move16();
120 360596 : *xi3 = sub_sat( t1, shl_sat( *xi4, 1 ) ); /*Qx*/
121 360596 : move16();
122 360596 : *xi4 = add_sat( t1, shl_sat( *xi4, 1 ) ); /*Qx*/
123 360596 : move16();
124 : #else
125 : t1 = sub_o( *xi1, *xi3, &Overflow ); /*Qx*/
126 : *xi1 = add_o( *xi1, *xi3, &Overflow ); /*Qx*/
127 : move16();
128 : *xi2 = shl_o( *xi2, 1, &Overflow ); /*Qx*/
129 : move16();
130 : *xi3 = sub_o( t1, shl_o( *xi4, 1, &Overflow ), &Overflow ); /*Qx*/
131 : move16();
132 : *xi4 = add_o( t1, shl_o( *xi4, 1, &Overflow ), &Overflow ); /*Qx*/
133 : move16();
134 : #endif
135 :
136 360596 : IF( NE_16( n4, 1 ) )
137 : {
138 : #ifdef ISSUE_1836_replace_overflow_libcom
139 178500 : t1 = mult_r( sub_sat( *( xi2 + n8 ), *( xi1 + n8 ) ), INV_SQR2_FX /*Q15*/ ); /*Qx*/
140 178500 : t2 = mult_r( add_sat( *( xi4 + n8 ), *( xi3 + n8 ) ), INV_SQR2_FX /*Q15*/ ); /*Qx*/
141 :
142 178500 : *( xi1 + n8 ) = add_sat( *( xi1 + n8 ), *( xi2 + n8 ) ); /*Qx*/
143 178500 : move16();
144 178500 : *( xi2 + n8 ) = sub_sat( *( xi4 + n8 ), *( xi3 + n8 ) ); /*Qx*/
145 178500 : move16();
146 178500 : *( xi3 + n8 ) = negate( shl_sat( add_sat( t2, t1 ), 1 ) ); /*Qx*/
147 178500 : move16();
148 178500 : *( xi4 + n8 ) = shl_sat( sub_sat( t1, t2 ), 1 ); /*Qx*/
149 178500 : move16();
150 : #else
151 : t1 = mult_r( sub_o( *( xi2 + n8 ), *( xi1 + n8 ), &Overflow ), INV_SQR2_FX /*Q15*/ ); /*Qx*/
152 : t2 = mult_r( add_o( *( xi4 + n8 ), *( xi3 + n8 ), &Overflow ), INV_SQR2_FX /*Q15*/ ); /*Qx*/
153 :
154 : *( xi1 + n8 ) = add_o( *( xi1 + n8 ), *( xi2 + n8 ), &Overflow ); /*Qx*/
155 : move16();
156 : *( xi2 + n8 ) = sub_o( *( xi4 + n8 ), *( xi3 + n8 ), &Overflow ); /*Qx*/
157 : move16();
158 : *( xi3 + n8 ) = negate( shl_o( add_o( t2, t1, &Overflow ), 1, &Overflow ) ); /*Qx*/
159 : move16();
160 : *( xi4 + n8 ) = shl_o( sub_o( t1, t2, &Overflow ), 1, &Overflow ); /*Qx*/
161 : move16();
162 : #endif
163 : }
164 360596 : xi1 += id;
165 360596 : xi2 += id;
166 360596 : xi3 += id;
167 360596 : xi4 += id;
168 : }
169 112472 : is = sub( shl( id, 1 ), n2 );
170 112472 : id = shl( id, 2 );
171 : }
172 : /*Can be acheived with a shr */
173 80108 : step = idiv1616( N_MAX_SAS, n2 );
174 80108 : move16();
175 :
176 80108 : s = sincos_t_fx + step; /*Q15 */
177 80108 : c = s + 64; /*Q15 */
178 80108 : s3 = sincos_t_fx + i_mult2( step, 3 ); /*Q15 */
179 80108 : c3 = s3 + 64; /*Q15 */
180 285080 : FOR( j = 2; j <= n8; j++ )
181 : {
182 204972 : cc1 = *c; /*Q15 */
183 204972 : move16();
184 204972 : ss1 = *s; /*Q15 */
185 204972 : move16();
186 204972 : cc3 = *c3; /*Q15 */
187 204972 : move16();
188 204972 : ss3 = *s3; /*Q15 */
189 204972 : move16();
190 :
191 204972 : is = 0;
192 204972 : move16();
193 204972 : id = shl( n2, 1 );
194 :
195 204972 : c += step;
196 204972 : s += step;
197 :
198 204972 : c3 += imult1616( 3, step );
199 204972 : s3 += imult1616( 3, step );
200 453096 : WHILE( LT_16( is, sub( n, 1 ) ) )
201 : {
202 248124 : xup1 = x + j + is; /*Qx*/
203 248124 : xup3 = xup1 + shl( n4, 1 ); /*Qx*/
204 248124 : xdn6 = xup3 - shl( j, 1 ) + 2; /*Qx*/
205 :
206 248124 : xdn8 = xdn6 + shl( n4, 1 ); /*Qx*/
207 :
208 582552 : FOR( i = is; i < n; i += id )
209 : {
210 : #ifdef ISSUE_1836_replace_overflow_libcom
211 334428 : t1 = sub_sat( *xup1, *xdn6 ); /*Qx*/
212 334428 : *xup1 = add_sat( *xup1, *xdn6 ); /*Qx*/
213 334428 : move16();
214 334428 : xup1 += n4;
215 334428 : xdn6 -= n4;
216 :
217 334428 : t2 = sub_sat( *xdn6, *xup1 ); /*Qx*/
218 334428 : *xdn6 = add_sat( *xup1, *xdn6 ); /*Qx*/
219 334428 : move16();
220 :
221 334428 : xdn6 += n4;
222 334428 : t3 = add_sat( *xdn8, *xup3 ); /*Qx*/
223 334428 : *xdn6 = sub_sat( *xdn8, *xup3 ); /*Qx*/
224 334428 : move16();
225 :
226 334428 : xup3 += n4;
227 334428 : xdn8 -= n4;
228 :
229 334428 : t4 = add_sat( *xup3, *xdn8 ); /*Qx*/
230 334428 : *xup1 = sub_sat( *xup3, *xdn8 ); /*Qx*/
231 334428 : move16();
232 :
233 334428 : t5 = sub_sat( t1, t4 ); /*Qx*/
234 334428 : t1 = add_sat( t1, t4 ); /*Qx*/
235 334428 : t4 = sub_sat( t2, t3 ); /*Qx*/
236 334428 : t2 = add_sat( t2, t3 ); /*Qx*/
237 334428 : *xup3 = sub_sat( mult_r( t1, cc3 ), mult_r( t2, ss3 ) ); /*Qx*/
238 334428 : move16();
239 334428 : xup3 -= n4;
240 334428 : *xup3 = add_sat( mult_r( t5, cc1 ), mult_r( t4, ss1 ) ); /*Qx*/
241 334428 : move16();
242 334428 : *xdn8 = sub_sat( mult_r( t5, ss1 ), mult_r( t4, cc1 ) ); /*Qx*/
243 334428 : move16();
244 :
245 334428 : xdn8 += n4;
246 334428 : *xdn8 = add_sat( mult_r( t2, cc3 ), mult_r( t1, ss3 ) ); /*Qx*/
247 334428 : move16();
248 : #else
249 : t1 = sub_o( *xup1, *xdn6, &Overflow ); /*Qx*/
250 : *xup1 = add_o( *xup1, *xdn6, &Overflow ); /*Qx*/
251 : move16();
252 : xup1 += n4;
253 : xdn6 -= n4;
254 :
255 : t2 = sub_o( *xdn6, *xup1, &Overflow ); /*Qx*/
256 : *xdn6 = add_o( *xup1, *xdn6, &Overflow ); /*Qx*/
257 : move16();
258 :
259 : xdn6 += n4;
260 : t3 = add_o( *xdn8, *xup3, &Overflow ); /*Qx*/
261 : *xdn6 = sub_o( *xdn8, *xup3, &Overflow ); /*Qx*/
262 : move16();
263 :
264 : xup3 += n4;
265 : xdn8 -= n4;
266 :
267 : t4 = add_o( *xup3, *xdn8, &Overflow ); /*Qx*/
268 : *xup1 = sub_o( *xup3, *xdn8, &Overflow ); /*Qx*/
269 : move16();
270 :
271 : t5 = sub_o( t1, t4, &Overflow ); /*Qx*/
272 : t1 = add_o( t1, t4, &Overflow ); /*Qx*/
273 : t4 = sub_o( t2, t3, &Overflow ); /*Qx*/
274 : t2 = add_o( t2, t3, &Overflow ); /*Qx*/
275 : *xup3 = sub_o( mult_r( t1, cc3 ), mult_r( t2, ss3 ), &Overflow ); /*Qx*/
276 : move16();
277 : xup3 -= n4;
278 : *xup3 = add_o( mult_r( t5, cc1 ), mult_r( t4, ss1 ), &Overflow ); /*Qx*/
279 : move16();
280 : *xdn8 = sub_o( mult_r( t5, ss1 ), mult_r( t4, cc1 ), &Overflow ); /*Qx*/
281 : move16();
282 :
283 : xdn8 += n4;
284 : *xdn8 = add_o( mult_r( t2, cc3 ), mult_r( t1, ss3 ), &Overflow ); /*Qx*/
285 : move16();
286 : #endif
287 :
288 334428 : xup1 -= n4;
289 334428 : xup1 += id;
290 334428 : xup3 += id;
291 334428 : xdn6 += id;
292 334428 : xdn8 += id;
293 : }
294 248124 : is = sub( shl( id, 1 ), n2 );
295 248124 : id = shl( id, 2 );
296 : }
297 : }
298 : }
299 :
300 : /*-----------------------------------------------------------------*
301 : * Length two butterflies
302 : *-----------------------------------------------------------------*/
303 :
304 31064 : is = 1;
305 31064 : move16();
306 31064 : id = 4;
307 31064 : move16();
308 100384 : WHILE( is < n )
309 : {
310 69320 : xi0 = x + is;
311 69320 : xi1 = xi0 + 1;
312 :
313 457384 : FOR( i0 = is; i0 <= n; i0 += id )
314 : {
315 388064 : r1 = *xi0;
316 388064 : move16();
317 : #ifdef ISSUE_1836_replace_overflow_libcom
318 388064 : *xi0 = add_sat( r1, *xi1 ); /*Qx*/
319 388064 : move16();
320 388064 : *xi1 = sub_sat( r1, *xi1 ); /*Qx*/
321 388064 : move16();
322 : #else
323 : *xi0 = add_o( r1, *xi1, &Overflow ); /*Qx*/
324 : move16();
325 : *xi1 = sub_o( r1, *xi1, &Overflow ); /*Qx*/
326 : move16();
327 : #endif
328 388064 : xi0 += id;
329 388064 : xi1 += id;
330 : }
331 69320 : is = sub( shl( id, 1 ), 1 );
332 69320 : id = shl( id, 2 );
333 : }
334 :
335 : /*-----------------------------------------------------------------*
336 : * Digit reverse counter
337 : *-----------------------------------------------------------------*/
338 :
339 31064 : j = 1;
340 31064 : move16();
341 1140320 : FOR( i = 1; i < n; i++ )
342 : {
343 1109256 : IF( LT_16( i, j ) )
344 : {
345 486456 : xt = x[j]; /*Qx*/
346 486456 : move16();
347 486456 : x[j] = x[i]; /*Qx*/
348 486456 : move16();
349 486456 : x[i] = xt; /*Qx*/
350 486456 : move16();
351 : }
352 1109256 : k = shr( n, 1 );
353 2107340 : WHILE( LT_16( k, j ) )
354 : {
355 998084 : j = sub( j, k );
356 998084 : k = shr( k, 1 );
357 : }
358 1109256 : j = add( j, k );
359 : }
360 :
361 : /*-----------------------------------------------------------------*
362 : * Normalization
363 : *-----------------------------------------------------------------*/
364 :
365 31064 : tmp = div_s( 1, n ); /*Q15 */
366 1171384 : FOR( i = 1; i <= n; i++ )
367 : {
368 1140320 : x[i] = mult_r( x[i], tmp ); /*Qx*/
369 1140320 : move16();
370 : }
371 :
372 31064 : return;
373 : }
374 :
375 : #define INV_SQRT_2_16 ( Word16 )( 0x5A82 ) /*Q15*/
376 34628 : void ifft_rel_fx32(
377 : Word32 io[], /* Qx i/o: input/output vector */
378 : const Word16 n, /* Q0 i : vector length */
379 : const Word16 m /* Q0 i : log2 of vector length */
380 : )
381 : {
382 : Word16 i, j, k;
383 : Word16 step;
384 : Word16 n2, n4, n8, i0;
385 : Word16 is, id;
386 : Word32 *x, *xi0, *xi1, *xi2, *xi3, *xi4, *xup1, *xdn6, *xup3, *xdn8;
387 : Word32 xt;
388 : Word32 r1;
389 : Word32 t1, t2, t3, t4, t5;
390 : Word16 cc1, cc3, ss1, ss3;
391 : const Word16 *s, *s3, *c, *c3;
392 : const Word16 *idx;
393 : Word32 temp[512];
394 34628 : Word16 n_inv = 128; /*1/256 in Q15*/
395 34628 : move16();
396 :
397 34628 : SWITCH( n )
398 : {
399 0 : case 128:
400 0 : n_inv = 256; /*1/128 in Q15*/
401 0 : BREAK;
402 244 : case 256:
403 244 : n_inv = 128; /*1/256 in Q15*/
404 244 : BREAK;
405 34384 : case 512:
406 34384 : n_inv = 64; /*1/512 in Q15*/
407 34384 : BREAK;
408 0 : default:
409 0 : assert( 0 );
410 : BREAK;
411 : }
412 34628 : move16();
413 :
414 : /*-----------------------------------------------------------------*
415 : * IFFT
416 : *-----------------------------------------------------------------*/
417 :
418 34628 : x = &io[-1];
419 34628 : n2 = shl( n, 1 ); /*Q0*/
420 311408 : FOR( k = 1; k < m; k++ )
421 : {
422 276780 : is = 0;
423 276780 : move16();
424 276780 : id = n2;
425 276780 : move16();
426 276780 : n2 = shr( n2, 1 ); /*Q0*/
427 276780 : n4 = shr( n2, 2 ); /*Q0*/
428 276780 : n8 = shr( n4, 1 ); /*Q0*/
429 968364 : WHILE( LT_16( is, sub( n, 1 ) ) )
430 : {
431 691584 : xi1 = x + is + 1; /*Qx*/
432 691584 : xi2 = xi1 + n4; /*Qx*/
433 691584 : xi3 = xi2 + n4; /*Qx*/
434 691584 : xi4 = xi3 + n4; /*Qx*/
435 :
436 6557604 : FOR( i = is; i < n; i += id )
437 : {
438 5866020 : t1 = L_sub( *xi1, *xi3 ); /*Qx*/
439 5866020 : *xi1 = L_add( *xi1, *xi3 ); /*Qx*/
440 5866020 : move32();
441 5866020 : *xi2 = L_shl( *xi2, 1 ); /*Qx*/
442 5866020 : move32();
443 5866020 : *xi3 = L_sub( t1, L_shl( *xi4, 1 ) ); /*Qx*/
444 5866020 : move32();
445 5866020 : *xi4 = L_add( t1, L_shl( *xi4, 1 ) ); /*Qx*/
446 5866020 : move32();
447 5866020 : IF( NE_16( n4, 1 ) )
448 : {
449 2932888 : t1 = Mpy_32_16_1( L_sub( *( xi2 + n8 ), *( xi1 + n8 ) ), INV_SQRT_2_16 ); /*Qx*/
450 2932888 : t2 = Mpy_32_16_1( L_add( *( xi4 + n8 ), *( xi3 + n8 ) ), INV_SQRT_2_16 ); /*Qx*/
451 :
452 2932888 : *( xi1 + n8 ) = L_add( *( xi1 + n8 ), *( xi2 + n8 ) ); /*Qx*/
453 2932888 : move32();
454 2932888 : *( xi2 + n8 ) = L_sub( *( xi4 + n8 ), *( xi3 + n8 ) ); /*Qx*/
455 2932888 : move32();
456 2932888 : *( xi3 + n8 ) = L_shl( L_negate( L_add( t2, t1 ) ), 1 ); /*Qx*/
457 2932888 : move32();
458 2932888 : *( xi4 + n8 ) = L_shl( L_sub( t1, t2 ), 1 ); /*Qx*/
459 2932888 : move32();
460 : }
461 5866020 : xi1 += id;
462 5866020 : xi2 += id;
463 5866020 : xi3 += id;
464 5866020 : xi4 += id;
465 : }
466 691584 : is = sub( shl( id, 1 ), n2 ); /*Q0*/
467 691584 : id = shl( id, 2 ); /*Q0*/
468 : }
469 : /*Can be acheived with a shr */
470 276780 : step = idiv1616( N_MAX_FFT, n2 );
471 276780 : move16();
472 :
473 276780 : s = sincos_t_ext_fx + step; /*Q15*/
474 276780 : c = s + shr( N_MAX_FFT, 2 ); /*Q15*/
475 276780 : s3 = sincos_t_ext_fx + imult1616( 3, step ); /*Q15*/
476 276780 : c3 = s3 + shr( N_MAX_FFT, 2 ); /*Q15*/
477 4416768 : FOR( j = 2; j <= n8; j++ )
478 : {
479 4139988 : cc1 = *c; /*Q15*/
480 4139988 : ss1 = *s; /*Q15*/
481 4139988 : cc3 = *c3; /*Q15*/
482 4139988 : ss3 = *s3; /*Q15*/
483 4139988 : move16();
484 4139988 : move16();
485 4139988 : move16();
486 4139988 : move16();
487 :
488 4139988 : is = 0;
489 4139988 : move16();
490 4139988 : id = shl( n2, 1 );
491 4139988 : move16();
492 :
493 4139988 : c += step;
494 4139988 : s += step;
495 :
496 4139988 : c3 += imult1616( 3, step ); /*Q15*/
497 4139988 : s3 += imult1616( 3, step ); /*Q15*/
498 9314424 : WHILE( LT_16( is, sub( n, 1 ) ) )
499 : {
500 5174436 : xup1 = x + add( j, is ); /*Qx*/
501 5174436 : xup3 = xup1 + shl( n4, 1 ); /*Qx*/
502 5174436 : xdn6 = xup3 - sub( shl( j, 1 ), 2 ); /*Qx*/
503 5174436 : xdn8 = xdn6 + shl( n4, 1 ); /*Qx*/
504 :
505 13036680 : FOR( i = is; i < n; i += id )
506 : {
507 7862244 : t1 = L_sub( *xup1, *xdn6 ); /*Qx*/
508 7862244 : *xup1 = L_add( *xup1, *xdn6 ); /*Qx*/
509 7862244 : move32();
510 7862244 : xup1 += n4;
511 7862244 : xdn6 -= n4;
512 :
513 7862244 : t2 = L_sub( *xdn6, *xup1 ); /*Qx*/
514 7862244 : *xdn6 = L_add( *xup1, *xdn6 ); /*Qx*/
515 7862244 : move32();
516 :
517 7862244 : xdn6 += n4;
518 7862244 : t3 = L_add( *xdn8, *xup3 ); /*Qx*/
519 7862244 : *xdn6 = L_sub( *xdn8, *xup3 ); /*Qx*/
520 7862244 : move32();
521 :
522 7862244 : xup3 += n4;
523 7862244 : xdn8 -= n4;
524 :
525 7862244 : t4 = L_add( *xup3, *xdn8 ); /*Qx*/
526 7862244 : *xup1 = L_sub( *xup3, *xdn8 ); /*Qx*/
527 7862244 : move32();
528 :
529 7862244 : t5 = L_sub( t1, t4 ); /*Qx*/
530 7862244 : t1 = L_add( t1, t4 ); /*Qx*/
531 7862244 : t4 = L_sub( t2, t3 ); /*Qx*/
532 7862244 : t2 = L_add( t2, t3 ); /*Qx*/
533 7862244 : *xup3 = L_sub( Mpy_32_16_1( t1, cc3 ), Mpy_32_16_1( t2, ss3 ) ); /*Qx*/
534 7862244 : move32();
535 7862244 : xup3 -= n4;
536 7862244 : *xup3 = L_add( Mpy_32_16_1( t5, cc1 ), Mpy_32_16_1( t4, ss1 ) ); /*Qx*/
537 7862244 : move32();
538 7862244 : *xdn8 = L_sub( Mpy_32_16_1( t5, ss1 ), Mpy_32_16_1( t4, cc1 ) ); /*Qx*/
539 7862244 : move32();
540 :
541 7862244 : xdn8 += n4;
542 7862244 : *xdn8 = L_add( Mpy_32_16_1( t2, cc3 ), Mpy_32_16_1( t1, ss3 ) ); /*Qx*/
543 7862244 : move32();
544 :
545 7862244 : xup1 -= n4;
546 7862244 : xup1 += id;
547 7862244 : xup3 += id;
548 7862244 : xdn6 += id;
549 7862244 : xdn8 += id;
550 : }
551 5174436 : is = sub( shl( id, 1 ), n2 ); /*Q0*/
552 5174436 : id = shl( id, 2 ); /*Q0*/
553 : }
554 : }
555 : }
556 :
557 : /*-----------------------------------------------------------------*
558 : * Length two butterflies
559 : *-----------------------------------------------------------------*/
560 :
561 34628 : is = 1;
562 34628 : move16();
563 34628 : id = 4;
564 34628 : move16();
565 207524 : WHILE( LT_16( is, n ) )
566 : {
567 172896 : xi0 = x + is;
568 172896 : xi1 = xi0 + 1;
569 :
570 6073300 : FOR( i0 = is; i0 <= n; i0 += id )
571 : {
572 5900404 : r1 = *xi0; /*Qx*/
573 5900404 : move32();
574 5900404 : *xi0 = L_add( r1, *xi1 ); /*Qx*/
575 5900404 : move32();
576 5900404 : *xi1 = L_sub( r1, *xi1 ); /*Qx*/
577 5900404 : move32();
578 5900404 : xi0 += id;
579 5900404 : xi1 += id;
580 : }
581 172896 : is = sub( shl( id, 1 ), 1 );
582 172896 : id = shl( id, 2 );
583 : }
584 :
585 : /*-----------------------------------------------------------------*
586 : * Digit reverse counter
587 : *-----------------------------------------------------------------*/
588 :
589 34628 : idx = fft256_read_indexes;
590 34628 : xi0 = &temp[0] - 1;
591 34628 : IF( EQ_16( n, 128 ) )
592 : {
593 0 : FOR( i = 0; i < n; i++ )
594 : {
595 0 : j = *idx++;
596 0 : move16();
597 0 : temp[i] = x[1 + shr( j, 1 )]; /*Qx*/
598 0 : move32();
599 : }
600 : }
601 34628 : ELSE IF( EQ_16( n, 256 ) )
602 : {
603 62708 : FOR( i = 0; i < n; i++ )
604 : {
605 62464 : j = *idx++;
606 62464 : temp[i] = x[1 + j]; /*Qx*/
607 62464 : move32();
608 : }
609 : }
610 34384 : ELSE IF( EQ_16( n, 512 ) )
611 : {
612 8836688 : FOR( i = 0; i < 256; i++ )
613 : {
614 8802304 : j = *idx++;
615 8802304 : temp[i] = x[1 + 2 * j]; /*Qx*/
616 8802304 : move32();
617 8802304 : temp[i + 256] = x[2 + 2 * j]; /*Qx*/
618 8802304 : move32();
619 : }
620 : }
621 : ELSE
622 : {
623 0 : xi0 = x;
624 0 : j = 1;
625 0 : move16();
626 0 : FOR( i = 1; i < n; i++ )
627 : {
628 0 : IF( LT_16( i, j ) )
629 : {
630 0 : xt = x[j]; /*Qx*/
631 0 : x[j] = x[i]; /*Qx*/
632 0 : x[i] = xt; /*Qx*/
633 0 : move32();
634 0 : move32();
635 0 : move32();
636 : }
637 0 : k = shr( n, 1 );
638 0 : WHILE( LT_16( k, j ) )
639 : {
640 0 : j = sub( j, k );
641 0 : k = shr( k, 1 );
642 : }
643 0 : j = add( j, k );
644 : }
645 : }
646 :
647 : /*-----------------------------------------------------------------*
648 : * Normalization
649 : *-----------------------------------------------------------------*/
650 :
651 17701700 : FOR( i = 1; i <= n; i++ )
652 : {
653 17667072 : x[i] = Mpy_32_16_1( xi0[i], n_inv ); /*Qx*/
654 17667072 : move32();
655 : }
656 :
657 34628 : return;
658 : }
|