Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #include "prot_fx.h"
40 : #include "rom_com.h"
41 : #include "wmc_auto.h"
42 :
43 : /*---------------------------------------------------------------------*
44 : * Local constants
45 : *---------------------------------------------------------------------*/
46 :
47 : #define N_MAX_FFT 1024
48 : #define N_MAX_DIV2 ( N_MAX_FFT >> 1 )
49 : #define N_MAX_DIV4 ( N_MAX_DIV2 >> 1 )
50 : #define INV_SQR2_FX 23170
51 : #define N_MAX_SAS 256
52 : /*---------------------------------------------------------------------*
53 : * fft_rel()
54 : *
55 : * Computes the split-radix FFT in place for the real-valued
56 : * signal x of length n. The algorithm has been ported from
57 : * the Fortran code of [1].
58 : *
59 : * The function needs sine and cosine tables t_sin and t_cos,
60 : * and the constant N_MAX_FFT. The table entries are defined as
61 : * sin(2*pi*i) and cos(2*pi*i) for i = 0, 1, ..., N_MAX_FFT-1. The
62 : * implementation assumes that any entry will not be needed
63 : * outside the tables. Therefore, N_MAX_FFT and n must be properly
64 : * set. The function has been tested with the values n = 16,
65 : * 32, 64, 128, 256, and N_MAX_FFT = 1280.
66 : *
67 : * References
68 : * [1] H.V. Sorensen, D.L. Jones, M.T. Heideman, C.S. Burrus,
69 : * "Real-valued fast Fourier transform algorithm," IEEE
70 : * Trans. on Signal Processing, Vol.35, No.6, pp 849-863,
71 : * 1987.
72 : *
73 : * OUTPUT
74 : * x[0:n-1] Transform coeffients in the order re[0], re[1],
75 : * ..., re[n/2], im[n/2-1], ..., im[1].
76 : *---------------------------------------------------------------------*/
77 :
78 0 : void fft_rel(
79 : float x[], /* i/o: input/output vector */
80 : const int16_t n, /* i : vector length */
81 : const int16_t m /* i : log2 of vector length */
82 : )
83 : {
84 : int16_t i, j, k, n1, n2, n4;
85 : int16_t step;
86 : float xt, t1, t2;
87 : float *x0, *x1, *x2;
88 : float *xi2, *xi3, *xi4, *xi1;
89 : const float *s, *c;
90 : const int16_t *idx;
91 :
92 : /* !!!! NMAX = 256 is hardcoded here (similar optimizations should be done for NMAX > 256) !!! */
93 :
94 : float *x2even, *x2odd;
95 : float temp[512];
96 :
97 0 : if ( n == 128 || n == 256 || n == 512 )
98 : {
99 0 : idx = fft256_read_indexes;
100 :
101 : /* Combined Digit reverse counter & Length two butterflies */
102 0 : if ( n == 128 )
103 : {
104 0 : x2 = temp;
105 0 : for ( i = 0; i < 64; i++ )
106 : {
107 0 : j = *idx++;
108 0 : k = *idx++;
109 :
110 0 : *x2++ = x[j >> 1] + x[k >> 1];
111 0 : *x2++ = x[j >> 1] - x[k >> 1];
112 : }
113 : }
114 0 : else if ( n == 256 )
115 : {
116 0 : x2 = temp;
117 0 : for ( i = 0; i < 128; i++ )
118 : {
119 0 : j = *idx++;
120 0 : k = *idx++;
121 :
122 0 : *x2++ = x[j] + x[k];
123 0 : *x2++ = x[j] - x[k];
124 : }
125 : }
126 0 : else if ( n == 512 )
127 : {
128 0 : x2even = temp;
129 0 : x2odd = temp + 256;
130 :
131 0 : for ( i = 0; i < 128; i++ )
132 : {
133 0 : j = 2 * *idx++;
134 0 : k = 2 * *idx++;
135 :
136 0 : *x2even++ = x[j] + x[k];
137 0 : *x2even++ = x[j] - x[k];
138 0 : *x2odd++ = x[++j] + x[++k];
139 0 : *x2odd++ = x[j] - x[k];
140 : }
141 : }
142 :
143 : /*-----------------------------------------------------------------*
144 : * 1st Stage Loop has been Unrolled because n4 is '1' and that
145 : * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
146 : * and the associated pointers initialization.
147 : * Also, it allows to Put the Data from 'temp' back into 'x' due
148 : * to the previous Combined Digit Reverse and Length two butterflies
149 : *-----------------------------------------------------------------*/
150 :
151 : /*for_ (k = 2; k < 3; k++)*/
152 : {
153 0 : x0 = temp;
154 0 : x1 = x0 + 2;
155 0 : x2 = x;
156 :
157 0 : for ( i = 0; i < n; i += 4 )
158 : {
159 0 : *x2++ = *x0++ + *x1; /* x[i] = xt + x[i+n2]; */
160 0 : *x2++ = *x0;
161 0 : *x2++ = *--x0 - *x1++; /* x[i+n2] = xt - x[i+n2]; */
162 0 : *x2++ = -*x1; /* x[i+n2+n4] = -x[i+n2+n4]; */
163 :
164 0 : x0 += 4;
165 0 : x1 += 3; /* x1 has already advanced */
166 : }
167 : }
168 : }
169 : else
170 : {
171 : /*-----------------------------------------------------------------*
172 : * Digit reverse counter
173 : *-----------------------------------------------------------------*/
174 :
175 0 : j = 0;
176 0 : x0 = &x[0];
177 0 : for ( i = 0; i < n - 1; i++ )
178 : {
179 0 : if ( i < j )
180 : {
181 0 : xt = x[j];
182 0 : x[j] = *x0;
183 0 : *x0 = xt;
184 : }
185 0 : x0++;
186 0 : k = n / 2;
187 0 : while ( k <= j )
188 : {
189 0 : j -= k;
190 0 : k = k >> 1;
191 : }
192 0 : j += k;
193 : }
194 :
195 : /*-----------------------------------------------------------------*
196 : * Length two butterflies
197 : *-----------------------------------------------------------------*/
198 :
199 0 : x0 = &x[0];
200 0 : x1 = &x[1];
201 0 : for ( i = 0; i < n / 2; i++ )
202 : {
203 0 : *x1 = *x0 - *x1;
204 0 : *x0 = *x0 * 2 - *x1;
205 :
206 0 : x0++;
207 0 : x0++;
208 0 : x1++;
209 0 : x1++;
210 : }
211 :
212 : /*-----------------------------------------------------------------*
213 : * 1st Stage Loop has been Unrolled because n4 is '1' and that
214 : * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
215 : * and the associated pointers initialization.
216 : *-----------------------------------------------------------------*/
217 :
218 : /* for_ (k = 2; k < 3; k++) */
219 : {
220 0 : x0 = x;
221 0 : x1 = x0 + 2;
222 :
223 0 : for ( i = 0; i < n; i += 4 )
224 : {
225 0 : *x1 = *x0 - *x1; /* x[i+n2] = xt - x[i+n2]; */
226 0 : *x0 = *x0 * 2 - *x1++; /* x[i] = xt + x[i+n2]; */
227 0 : *x1 = -*x1; /* x[i+n2+n4] = -x[i+n2+n4]; */
228 :
229 0 : x0 += 4;
230 0 : x1 += 3; /* x1 has already advanced */
231 : }
232 : }
233 : }
234 :
235 : /*-----------------------------------------------------------------*
236 : * Other butterflies
237 : *
238 : * The implementation described in [1] has been changed by using
239 : * table lookup for evaluating sine and cosine functions. The
240 : * variable ind and its increment step are needed to access table
241 : * entries. Note that this implementation assumes n4 to be so
242 : * small that ind will never exceed the table. Thus the input
243 : * argument n and the constant N_MAX_FFT must be set properly.
244 : *-----------------------------------------------------------------*/
245 :
246 0 : n4 = 1;
247 0 : n2 = 2;
248 0 : n1 = 4;
249 :
250 0 : step = N_MAX_DIV4;
251 :
252 0 : for ( k = 3; k <= m; k++ )
253 : {
254 0 : step >>= 1;
255 0 : n4 <<= 1;
256 0 : n2 <<= 1;
257 0 : n1 <<= 1;
258 :
259 0 : x0 = x;
260 0 : x1 = x0 + n2;
261 0 : x2 = x1 + n4;
262 :
263 0 : for ( i = 0; i < n; i += n1 )
264 : {
265 0 : *x1 = *x0 - *x1; /* x[i+n2] = xt - x[i+n2]; */
266 0 : *x0 = *x0 * 2 - *x1; /* x[i] = xt + x[i+n2]; */
267 0 : *x2 = -*x2; /* x[i+n2+n4] = -x[i+n2+n4]; */
268 :
269 0 : s = sincos_t_ext;
270 0 : c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 */
271 0 : xi1 = x0;
272 0 : xi3 = xi1 + n2;
273 0 : xi2 = xi3;
274 0 : x0 += n1;
275 0 : xi4 = x0;
276 :
277 0 : for ( j = 1; j < n4; j++ )
278 : {
279 0 : xi3++;
280 0 : xi1++;
281 0 : xi4--;
282 0 : xi2--;
283 0 : c += step;
284 0 : s += step; /* autoincrement by ar0 */
285 :
286 0 : t1 = *xi3 * *c + *xi4 * *s; /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); */
287 0 : t2 = *xi3 * *s - *xi4 * *c; /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); */
288 :
289 0 : *xi4 = *xi2 - t2;
290 0 : *xi2 = *xi1 - t1;
291 0 : *xi1 = *xi1 * 2 - *xi2;
292 0 : *xi3 = -2 * t2 - *xi4;
293 : }
294 :
295 0 : x1 += n1;
296 0 : x2 += n1;
297 : }
298 : }
299 :
300 0 : return;
301 : }
302 :
303 2118790 : void fft_rel_16_32fx(
304 : Word16 x[], /* i/o: input/output vector Qx */
305 : Word16 *q_x, /* extra scaling added on speech buffer*/
306 : Word16 i_subfr,
307 : const Word16 n, /* i : vector length */
308 : const Word16 m /* i : log2 of vector length */
309 : )
310 : {
311 : Word16 i, j, k, n1, n2, n4;
312 : Word16 step;
313 : Word32 xt, t1, t2;
314 : Word32 *x0, *x1, *x2;
315 : const Word16 *s, *c;
316 : Word32 *xi2, *xi3, *xi4, *xi1;
317 :
318 : Word32 fft_bff32[L_FFT];
319 2118790 : Copy_Scale_sig_16_32_no_sat( x, fft_bff32, L_FFT, 0 ); // copying x to fft_bff32 without scaling
320 :
321 : /*-----------------------------------------------------------------*
322 : * Digit reverse counter
323 : *-----------------------------------------------------------------*/
324 :
325 2118790 : j = 0;
326 2118790 : move16();
327 2118790 : x0 = &fft_bff32[0]; // Qx
328 542410240 : FOR( i = 0; i < n - 1; i++ )
329 : {
330 540291450 : IF( LT_16( i, j ) )
331 : {
332 254254800 : xt = fft_bff32[j]; // Qx
333 254254800 : move32();
334 254254800 : fft_bff32[j] = *x0; // Qx
335 254254800 : move32();
336 254254800 : *x0 = xt; // Qx
337 254254800 : move32();
338 : }
339 540291450 : x0++;
340 540291450 : k = shr( n, 1 );
341 1063632580 : WHILE( ( k <= j ) )
342 : {
343 523341130 : j = sub( j, k );
344 523341130 : k = shr( k, 1 );
345 : }
346 540291450 : j = add( j, k );
347 : }
348 :
349 : /*-----------------------------------------------------------------*
350 : * Length two butterflies
351 : *-----------------------------------------------------------------*/
352 :
353 2118790 : x0 = &fft_bff32[0];
354 2118790 : x1 = &fft_bff32[1];
355 273323910 : FOR( i = 0; i < ( n >> 1 ); i++ )
356 : {
357 271205120 : xt = *x0;
358 271205120 : move32();
359 271205120 : *x0 = L_add( xt, *x1 );
360 271205120 : move32();
361 271205120 : *x1 = L_sub( xt, *x1 );
362 271205120 : move32();
363 271205120 : x0++;
364 271205120 : x0++;
365 271205120 : x1++;
366 271205120 : x1++;
367 : }
368 :
369 : /*-----------------------------------------------------------------*
370 : * Other butterflies
371 : *
372 : * The implementation described in [1] has been changed by using
373 : * table lookup for evaluating sine and cosine functions. The
374 : * variable ind and its increment step are needed to access table
375 : * entries. Note that this implementation assumes n4 to be so
376 : * small that ind will never exceed the table. Thus the input
377 : * argument n and the constant N_MAX_SAS must be set properly.
378 : *-----------------------------------------------------------------*/
379 :
380 2118790 : n2 = 1;
381 2118790 : move16();
382 : /* step = N_MAX_SAS/4; */
383 16950320 : FOR( k = 2; k <= m; k++ )
384 : {
385 14831530 : n4 = n2;
386 14831530 : move16();
387 14831530 : n2 = shl( n4, 1 );
388 14831530 : n1 = shl( n2, 1 );
389 :
390 14831530 : step = idiv1616( N_MAX_SAS, n1 );
391 :
392 14831530 : x0 = fft_bff32;
393 14831530 : x1 = fft_bff32 + n2;
394 14831530 : x2 = fft_bff32 + add( n2, n4 );
395 283917860 : FOR( i = 0; i < n; i += n1 )
396 : {
397 269086330 : xt = *x0;
398 269086330 : move32(); /* xt = x[i]; */
399 269086330 : *x0 = L_add( xt, *x1 );
400 269086330 : move32(); /* x[i] = xt + x[i+n2]; */
401 269086330 : *x1 = L_sub( xt, *x1 );
402 269086330 : move32(); /* x[i+n2] = xt - x[i+n2]; */
403 269086330 : *x2 = L_negate( *x2 );
404 269086330 : move32(); /* x[i+n2+n4] = -x[i+n2+n4]; */
405 :
406 :
407 269086330 : s = sincos_t_fx + step; // Q15
408 269086330 : c = s + 64; // Q15
409 269086330 : xi1 = fft_bff32 + add( i, 1 );
410 269086330 : xi3 = xi1 + n2;
411 269086330 : xi2 = xi3 - 2;
412 269086330 : xi4 = xi1 + sub( n1, 2 );
413 :
414 949217920 : FOR( j = 1; j < n4; j++ )
415 : {
416 680131590 : t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx */
417 680131590 : t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx */
418 680131590 : *xi4 = L_sub( *xi2, t2 );
419 680131590 : move32();
420 680131590 : *xi3 = L_negate( L_add( *xi2, t2 ) );
421 680131590 : move32();
422 680131590 : *xi2 = L_sub( *xi1, t1 );
423 680131590 : move32();
424 680131590 : *xi1 = L_add( *xi1, t1 );
425 680131590 : move32();
426 :
427 680131590 : xi4--;
428 680131590 : xi2--;
429 680131590 : xi3++;
430 680131590 : xi1++;
431 680131590 : c += step;
432 680131590 : s += step; /* autoincrement by ar0 */
433 : }
434 :
435 269086330 : x0 += n1;
436 269086330 : x1 += n1;
437 269086330 : x2 += n1;
438 : }
439 : /* step = shr(step, 1); */
440 : }
441 2118790 : Word16 norm = L_norm_arr( fft_bff32, L_FFT );
442 2118790 : IF( i_subfr == 0 )
443 : {
444 1059395 : Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
445 1059395 : *q_x = sub( norm, 16 );
446 1059395 : move16();
447 : }
448 : ELSE
449 : {
450 1059395 : IF( LT_16( sub( norm, 16 ), *q_x ) )
451 : {
452 211597 : scale_sig( x - L_FFT, L_FFT, sub( sub( norm, 16 ), *q_x ) );
453 211597 : Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
454 211597 : *q_x = sub( norm, 16 );
455 211597 : move16();
456 : }
457 : ELSE
458 : {
459 847798 : Copy_Scale_sig32_16( fft_bff32, x, L_FFT, add( 16, *q_x ) );
460 : }
461 : }
462 :
463 2118790 : return;
464 : }
465 :
466 206951 : void fft_rel_fx(
467 : Word16 x[], /* i/o: input/output vector Qx */
468 : const Word16 n, /* i : vector length */
469 : const Word16 m /* i : log2 of vector length */
470 : )
471 : {
472 : Word16 i, j, k, n1, n2, n4;
473 : Word16 step;
474 : Word16 xt, t1, t2;
475 : Word16 *x0, *x1, *x2;
476 : const Word16 *s, *c;
477 : Word16 *xi2, *xi3, *xi4, *xi1;
478 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
479 206951 : Flag Overflow = 0;
480 206951 : move32();
481 : #endif
482 :
483 :
484 : /*-----------------------------------------------------------------*
485 : * Digit reverse counter
486 : *-----------------------------------------------------------------*/
487 :
488 206951 : j = 0;
489 206951 : move16();
490 206951 : x0 = &x[0]; // Qx
491 206951 : move16();
492 46004096 : FOR( i = 0; i < n - 1; i++ )
493 : {
494 45797145 : IF( LT_16( i, j ) )
495 : {
496 21540200 : xt = x[j]; // Qx
497 21540200 : move16();
498 21540200 : x[j] = *x0; // Qx
499 21540200 : move16();
500 21540200 : *x0 = xt; // Qx
501 21540200 : move16();
502 : }
503 45797145 : x0++;
504 45797145 : k = shr( n, 1 );
505 90104762 : WHILE( ( k <= j ) )
506 : {
507 44307617 : j = sub( j, k );
508 44307617 : k = shr( k, 1 );
509 : }
510 45797145 : j = add( j, k );
511 : }
512 :
513 : /*-----------------------------------------------------------------*
514 : * Length two butterflies
515 : *-----------------------------------------------------------------*/
516 :
517 206951 : x0 = &x[0];
518 206951 : move16();
519 206951 : x1 = &x[1];
520 206951 : move16();
521 23208999 : FOR( i = 0; i < ( n >> 1 ); i++ )
522 : {
523 23002048 : xt = *x0;
524 23002048 : move16();
525 23002048 : *x0 = add_o( xt, *x1, &Overflow );
526 23002048 : move16();
527 23002048 : *x1 = sub_o( xt, *x1, &Overflow );
528 23002048 : move16();
529 23002048 : x0++;
530 23002048 : x0++;
531 23002048 : x1++;
532 23002048 : x1++;
533 : }
534 :
535 : /*-----------------------------------------------------------------*
536 : * Other butterflies
537 : *
538 : * The implementation described in [1] has been changed by using
539 : * table lookup for evaluating sine and cosine functions. The
540 : * variable ind and its increment step are needed to access table
541 : * entries. Note that this implementation assumes n4 to be so
542 : * small that ind will never exceed the table. Thus the input
543 : * argument n and the constant N_MAX_SAS must be set properly.
544 : *-----------------------------------------------------------------*/
545 :
546 206951 : n2 = 1;
547 206951 : move16();
548 : /* step = N_MAX_SAS/4; */
549 1489528 : FOR( k = 2; k <= m; k++ )
550 : {
551 1282577 : n4 = n2;
552 1282577 : move16();
553 1282577 : n2 = shl( n4, 1 );
554 1282577 : n1 = shl( n2, 1 );
555 :
556 1282577 : step = idiv1616( N_MAX_SAS, n1 );
557 :
558 1282577 : x0 = x;
559 1282577 : x1 = x + n2;
560 1282577 : x2 = x + add( n2, n4 );
561 24077674 : FOR( i = 0; i < n; i += n1 )
562 : {
563 22795097 : xt = *x0;
564 22795097 : move16(); /* xt = x[i]; */
565 22795097 : *x0 = add_o( xt, *x1, &Overflow );
566 22795097 : move16(); /* x[i] = xt + x[i+n2]; */
567 22795097 : *x1 = sub_o( xt, *x1, &Overflow );
568 22795097 : move16(); /* x[i+n2] = xt - x[i+n2]; */
569 22795097 : *x2 = negate( *x2 );
570 22795097 : move16(); /* x[i+n2+n4] = -x[i+n2+n4]; */
571 :
572 :
573 22795097 : s = sincos_t_fx + step; // Q15
574 22795097 : c = s + 64; // Q15
575 22795097 : xi1 = x + add( i, 1 );
576 22795097 : xi3 = xi1 + n2;
577 22795097 : xi2 = xi3 - 2;
578 22795097 : xi4 = xi1 + sub( n1, 2 );
579 :
580 80341088 : FOR( j = 1; j < n4; j++ )
581 : {
582 57545991 : t1 = add_o( mult_r( *xi3, *c ), mult_r( *xi4, *s ), &Overflow ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx */
583 57545991 : t2 = sub_o( mult_r( *xi3, *s ), mult_r( *xi4, *c ), &Overflow ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx */
584 57545991 : *xi4 = sub_o( *xi2, t2, &Overflow );
585 57545991 : move16();
586 57545991 : *xi3 = negate( add_o( *xi2, t2, &Overflow ) );
587 57545991 : move16();
588 57545991 : *xi2 = sub_o( *xi1, t1, &Overflow );
589 57545991 : move16();
590 57545991 : *xi1 = add_o( *xi1, t1, &Overflow );
591 57545991 : move16();
592 :
593 57545991 : xi4--;
594 57545991 : xi2--;
595 57545991 : xi3++;
596 57545991 : xi1++;
597 57545991 : c += step;
598 57545991 : s += step; /* autoincrement by ar0 */
599 : }
600 :
601 22795097 : x0 += n1;
602 22795097 : x1 += n1;
603 22795097 : x2 += n1;
604 : }
605 : /* step = shr(step, 1); */
606 : }
607 :
608 206951 : return;
609 : }
610 :
611 44321 : void fft_rel_fx32(
612 : Word32 x[], /* i/o: input/output vector Qx */
613 : const Word16 n, /* i : vector length */
614 : const Word16 m /* i : log2 of vector length */
615 : )
616 : {
617 : Word16 i, j, k, n1, n2, n4;
618 : Word16 step;
619 : Word32 xt, t1, t2;
620 : Word32 *x0, *x1, *x2;
621 : Word32 *xi2, *xi3, *xi4, *xi1;
622 : const Word16 *s, *c;
623 : const Word16 *idx;
624 :
625 : /* !!!! NMAX = 256 is hardcoded here (similar optimizations should be done for NMAX > 256) !!! */
626 :
627 : Word32 *x2even, *x2odd;
628 : Word32 temp[512];
629 :
630 44321 : test();
631 44321 : test();
632 44321 : IF( EQ_16( n, 128 ) || EQ_16( n, 256 ) || EQ_16( n, 512 ) )
633 : {
634 44321 : idx = fft256_read_indexes;
635 :
636 : /* Combined Digit reverse counter & Length two butterflies */
637 44321 : IF( EQ_16( n, 128 ) )
638 : {
639 0 : x2 = temp;
640 0 : FOR( i = 0; i < 64; i++ )
641 : {
642 0 : j = *idx++;
643 0 : move16();
644 0 : k = *idx++;
645 0 : move16();
646 :
647 0 : *x2++ = L_add( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
648 0 : move16();
649 0 : *x2++ = L_sub( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
650 0 : move16();
651 : }
652 : }
653 44321 : ELSE IF( EQ_16( n, 256 ) )
654 : {
655 488 : x2 = temp;
656 62952 : FOR( i = 0; i < 128; i++ )
657 : {
658 62464 : j = *idx++;
659 62464 : move16();
660 62464 : k = *idx++;
661 62464 : move16();
662 :
663 62464 : *x2++ = L_add( x[j], x[k] ); // Qx
664 62464 : move16();
665 62464 : *x2++ = L_sub( x[j], x[k] ); // Qx
666 62464 : move16();
667 : }
668 : }
669 43833 : ELSE IF( EQ_16( n, 512 ) )
670 : {
671 43833 : x2even = temp;
672 43833 : x2odd = temp + 256;
673 :
674 5654457 : FOR( i = 0; i < 128; i++ )
675 : {
676 5610624 : j = shl( *idx, 1 );
677 5610624 : idx++;
678 5610624 : k = shl( *idx, 1 );
679 5610624 : idx++;
680 :
681 5610624 : *x2even++ = L_add( x[j], x[k] ); // Qx
682 5610624 : move16();
683 5610624 : *x2even++ = L_sub( x[j], x[k] ); // Qx
684 5610624 : move16();
685 5610624 : j = add( j, 1 );
686 5610624 : k = add( k, 1 );
687 5610624 : *x2odd++ = L_add( x[j], x[k] ); // Qx
688 5610624 : move16();
689 5610624 : *x2odd++ = L_sub( x[j], x[k] ); // Qx
690 5610624 : move16();
691 : }
692 : }
693 :
694 : /*-----------------------------------------------------------------*
695 : * 1st Stage Loop has been Unrolled because n4 is '1' and that
696 : * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
697 : * and the associated pointers initialization.
698 : * Also, it allows to Put the Data from 'temp' back into 'x' due
699 : * to the previous Combined Digit Reverse and Length two butterflies
700 : *-----------------------------------------------------------------*/
701 :
702 : /*for_ (k = 2; k < 3; k++)*/
703 : {
704 44321 : x0 = temp;
705 44321 : x1 = x0 + 2;
706 44321 : x2 = x; // Qx
707 :
708 5686177 : FOR( i = 0; i < n; i += 4 )
709 : {
710 5641856 : *x2++ = L_add( *x0++, *x1 ); /* x[i] = xt + x[i+n2]; */
711 5641856 : move16();
712 5641856 : *x2++ = *x0;
713 5641856 : move16();
714 5641856 : x0--;
715 5641856 : *x2++ = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; */
716 5641856 : move16();
717 5641856 : x1++;
718 5641856 : *x2++ = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4]; */
719 5641856 : move16();
720 :
721 5641856 : x0 += 4;
722 5641856 : x1 += 3; /* x1 has already advanced */
723 : }
724 : }
725 : }
726 : ELSE
727 : {
728 : /*-----------------------------------------------------------------*
729 : * Digit reverse counter
730 : *-----------------------------------------------------------------*/
731 :
732 0 : j = 0;
733 0 : move16();
734 0 : x0 = &x[0]; // Qx
735 0 : FOR( i = 0; i < ( n - 1 ); i++ )
736 : {
737 0 : IF( LT_16( i, j ) )
738 : {
739 0 : xt = x[j]; // Qx
740 0 : move32();
741 0 : x[j] = *x0;
742 0 : move32();
743 0 : *x0 = xt;
744 0 : move32();
745 : }
746 0 : x0++;
747 0 : k = shr( n, 1 );
748 0 : WHILE( ( k <= j ) )
749 : {
750 0 : j = sub( j, k );
751 0 : k = shr( k, 1 );
752 : }
753 0 : j = add( j, k );
754 : }
755 :
756 : /*-----------------------------------------------------------------*
757 : * Length two butterflies
758 : *-----------------------------------------------------------------*/
759 :
760 0 : x0 = &x[0]; // Qx
761 0 : x1 = &x[1]; // Qx
762 0 : FOR( i = 0; i < ( n >> 1 ); i++ )
763 : {
764 0 : *x1 = L_sub( *x0, *x1 ); // Qx
765 0 : move32();
766 0 : *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); //*x0 * 2 - *x1 (Qx)
767 0 : move32();
768 :
769 0 : x0++;
770 0 : x0++;
771 0 : x1++;
772 0 : x1++;
773 : }
774 :
775 : /*-----------------------------------------------------------------*
776 : * 1st Stage Loop has been Unrolled because n4 is '1' and that
777 : * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
778 : * and the associated pointers initialization.
779 : *-----------------------------------------------------------------*/
780 :
781 : /* for_ (k = 2; k < 3; k++) */
782 : {
783 0 : x0 = x; // Qx
784 0 : x1 = x0 + 2;
785 :
786 0 : FOR( i = 0; i < n; i += 4 )
787 : {
788 0 : *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; Qx*/
789 0 : move32();
790 0 : *x0 = L_sub( L_shl( *x0, 1 ), *x1++ ); /* x[i] = xt + x[i+n2]; */ /**x0 * 2 - *x1 (Qx)*/
791 0 : move32();
792 0 : *x1 = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4]; Qx*/
793 0 : move32();
794 :
795 0 : x0 += 4;
796 0 : x1 += 3; /* x1 has already advanced */
797 : }
798 : }
799 : }
800 :
801 : /*-----------------------------------------------------------------*
802 : * Other butterflies
803 : *
804 : * The implementation described in [1] has been changed by using
805 : * table lookup for evaluating sine and cosine functions. The
806 : * variable ind and its increment step are needed to access table
807 : * entries. Note that this implementation assumes n4 to be so
808 : * small that ind will never exceed the table. Thus the input
809 : * argument n and the constant N_MAX_FFT must be set properly.
810 : *-----------------------------------------------------------------*/
811 :
812 44321 : n4 = 1;
813 44321 : move16();
814 44321 : n2 = 2;
815 44321 : move16();
816 44321 : n1 = 4;
817 44321 : move16();
818 :
819 44321 : step = N_MAX_DIV4;
820 44321 : move16();
821 :
822 354080 : FOR( k = 3; k <= m; k++ )
823 : {
824 309759 : step = shr( step, 1 );
825 309759 : n4 = shl( n4, 1 );
826 309759 : n2 = shl( n2, 1 );
827 309759 : n1 = shl( n1, 1 );
828 :
829 309759 : x0 = x;
830 309759 : x1 = x0 + n2;
831 309759 : x2 = x1 + n4;
832 :
833 5907294 : FOR( i = 0; i < n; i += n1 )
834 : {
835 5597535 : *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; */
836 5597535 : move32();
837 5597535 : *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); /* x[i] = xt + x[i+n2]; */ /**x0 * 2 - *x1 (Qx)*/
838 5597535 : move32();
839 5597535 : *x2 = L_negate( *x2 ); /* x[i+n2+n4] = -x[i+n2+n4]; Qx */
840 5597535 : move32();
841 :
842 5597535 : s = sincos_t_ext_fx; // Q15
843 5597535 : c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 Q15*/
844 5597535 : xi1 = x0;
845 5597535 : xi3 = xi1 + n2;
846 5597535 : xi2 = xi3;
847 5597535 : x0 += n1;
848 5597535 : xi4 = x0;
849 :
850 39461760 : FOR( j = 1; j < n4; j++ )
851 : {
852 33864225 : xi3++;
853 33864225 : xi1++;
854 33864225 : xi4--;
855 33864225 : xi2--;
856 33864225 : c += step;
857 33864225 : s += step; /* autoincrement by ar0 */
858 :
859 33864225 : t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx*/
860 33864225 : t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx*/
861 :
862 33864225 : *xi4 = L_sub( *xi2, t2 );
863 33864225 : move32();
864 33864225 : *xi2 = L_sub( *xi1, t1 );
865 33864225 : move32();
866 33864225 : *xi1 = L_sub( L_shl( *xi1, 1 ), *xi2 ); // Qx
867 33864225 : move32();
868 33864225 : *xi3 = L_negate( L_add( L_shl( t2, 1 ), *xi4 ) ); // Qx
869 33864225 : move32();
870 : }
871 :
872 5597535 : x1 += n1;
873 5597535 : x2 += n1;
874 : }
875 : }
876 :
877 44321 : return;
878 : }
|