Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include <stdint.h>
6 : #include "options.h" /* Compilation switches */
7 : #include "prot_fx.h" /* Function prototypes */
8 : #include "rom_com.h" /* Static table prototypes */
9 : #include "stl.h"
10 : #include "stdint.h"
11 : #include "wmc_auto.h"
12 :
13 : /*---------------------------------------------------------------------*
14 : * Local constants
15 : *---------------------------------------------------------------------*/
16 :
17 : #define N_MAX_FFT 1024
18 : #define N_MAX_DIV2 ( N_MAX_FFT >> 1 )
19 : #define N_MAX_DIV4 ( N_MAX_DIV2 >> 1 )
20 : #define INV_SQR2_FX 23170
21 : #define N_MAX_SAS 256
22 :
23 : /*------------------------------------------------------------------
24 : *
25 : * This is an implementation of decimation-in-time FFT algorithm for
26 : * real sequences. The techniques used here can be found in several
27 : * books, e.g., i) Proakis and Manolakis, "Digital Signal Processing",
28 : * 2nd Edition, Chapter 9, and ii) W.H. Press et. al., "Numerical
29 : * Recipes in C", 2nd Edition, Chapter 12.
30 : *
31 : * Input - There are two inputs to this function:
32 : *
33 : * 1) An integer pointer to the input data array
34 : * 2) An integer value which should be set as +1 for FFT
35 : * and some other value, e.g., -1 for ifFT
36 : *
37 : * Output - There is no return value.
38 : * The input data are replaced with transformed data. if the
39 : * input is a real time domain sequence, it is replaced with
40 : * the complex FFT for positive frequencies. The FFT value
41 : * for DC and the foldover frequency are combined to form the
42 : * first complex number in the array. The remaining complex
43 : * numbers correspond to increasing frequencies. if the input
44 : * is a complex frequency domain sequence arranged as above,
45 : * it is replaced with the corresponding time domain sequence.
46 : *
47 : * Notes:
48 : *
49 : * 1) This function is designed to be a part of a noise supp-
50 : * ression algorithm that requires 128-point FFT of real
51 : * sequences. This is achieved here through a 64-point
52 : * complex FFT. Consequently, the FFT size information is
53 : * not transmitted explicitly. However, some flexibility
54 : * is provided in the function to change the size of the
55 : * FFT by specifying the size information through "define"
56 : * statements.
57 : *
58 : * 2) The values of the complex sinusoids used in the FFT
59 : * algorithm are computed once (i.e., the first time the
60 : * r_fft function is called) and stored in a table. To
61 : * further speed up the algorithm, these values can be
62 : * precomputed and stored in a ROM table in actual DSP
63 : * based implementations.
64 : *
65 : * 3) In the c_fft function, the FFT values are divided by
66 : * 2 after each stage of computation thus dividing the
67 : * final FFT values by 64. No multiplying factor is used
68 : * for the ifFT. This is somewhat different from the usual
69 : * definition of FFT where the factor 1/N, i.e., 1/64, is
70 : * used for the ifFT and not the FFT. No factor is used in
71 : * the r_fft function.
72 : *
73 : * 4) Much of the code for the FFT and ifFT parts in r_fft
74 : * and c_fft functions are similar and can be combined.
75 : * They are, however, kept separate here to speed up the
76 : * execution.
77 : *------------------------------------------------------------------------*/
78 : /*------------------------------------------------------------------------*
79 : * c_fft_fx:
80 : *
81 : * Computes the complex part of the split-radix FFT
82 : *------------------------------------------------------------------------*/
83 :
84 70648 : static void c_fft_fx(
85 : const Word16 *phs_tbl, /* i : Table of phases */
86 : Word16 SIZE, /* i : Size of the FFT */
87 : Word16 NUM_STAGE, /* i : Number of stages */
88 : const Word16 *in_ptr, /* i : coefficients in the order re[0], re[n/2], re[1], im[1], ..., re[n/2-1], im[n/2-1] Qx*/
89 : Word16 *out_ptr, /* o : coefficients in the order re[0], re[n/2], re[1], im[1], ..., re[n/2-1], im[n/2-1] Q(x - 1)*/
90 : /* in_ptr & out_ptr must not overlap! */
91 : const Word16 isign ) /* i : 1=fft, otherwise it is ifft*/
92 : {
93 : Word16 i, j, k, ii, jj, kk, ji, kj;
94 : Word32 L_tmp1, L_tmp2;
95 : Word16 tmp1, tmp2, tmp3, tmp4;
96 : const Word16 *table_ptr;
97 : const Word16 *input_ptr1, *input_ptr2, *input_ptr3, *input_ptr4;
98 70648 : Word16 shift = 0;
99 70648 : move16();
100 : /* Setup Reorder Variables */
101 70648 : table_ptr = NULL;
102 70648 : table_ptr = FFT_REORDER_1024;
103 70648 : SWITCH( SIZE )
104 : {
105 58 : case 1024:
106 58 : shift = 0;
107 58 : move16();
108 58 : BREAK;
109 384 : case 512:
110 384 : shift = 1;
111 384 : move16();
112 384 : BREAK;
113 6258 : case 256:
114 6258 : shift = 2;
115 6258 : move16();
116 6258 : BREAK;
117 150 : case 128:
118 150 : shift = 3;
119 150 : move16();
120 150 : BREAK;
121 63798 : case 64:
122 63798 : shift = 4;
123 63798 : move16();
124 63798 : BREAK;
125 : }
126 : /* The FFT part */
127 70648 : IF( isign != 0 )
128 : {
129 : /* Unrolled 1st/2nd Stage
130 : * 1) to take advantage of Table Values (0 & +/- 16384)
131 : * 2) to perform reordering of Input Values
132 : */
133 504771 : FOR( k = 0; k < SIZE; k += 8 )
134 : {
135 : /*
136 : * This loop use:
137 : * 4 Word16 (tmp1...tmp4)
138 : * 2 Word32 (L_tmp1 & L_tmp2)
139 : * 4 Pointers (table_ptr, input_ptr1, input_ptr2, input_ptr3)
140 : *
141 : * The addition of 'in_ptr' + and index value from 'reorder_ptr'
142 : * is counted as a move16()
143 : */
144 :
145 466360 : input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift ); // Qx
146 :
147 466360 : L_tmp1 = L_mult( *input_ptr1++, 16384 );
148 466360 : L_tmp2 = L_mult( *input_ptr1, 16384 );
149 :
150 466360 : input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
151 :
152 466360 : tmp1 = msu_r_sat( L_tmp1, *input_ptr1, 16384 );
153 466360 : tmp3 = mac_r_sat( L_tmp1, *input_ptr1++, 16384 );
154 466360 : input_ptr2 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
155 466360 : input_ptr3 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
156 :
157 466360 : L_tmp1 = L_mult( *input_ptr2++, 16384 );
158 466360 : tmp2 = mac_r_sat( L_tmp1, *input_ptr3, 16384 );
159 466360 : tmp4 = msu_r_sat( L_tmp1, *input_ptr3++, 16384 );
160 466360 : L_tmp1 = L_mult( tmp3, 16384 );
161 466360 : out_ptr[k] = mac_r_sat( L_tmp1, tmp2, 16384 );
162 466360 : move16();
163 466360 : out_ptr[k + 4] = msu_r_sat( L_tmp1, tmp2, 16384 );
164 466360 : move16();
165 :
166 466360 : tmp2 = mac_r_sat( L_tmp2, *input_ptr1, 16384 );
167 466360 : tmp3 = msu_r_sat( L_tmp2, *input_ptr1, 16384 );
168 466360 : L_tmp2 = L_mult( *input_ptr2, 16384 );
169 :
170 466360 : L_tmp1 = L_mult( tmp1, 16384 );
171 466360 : tmp1 = msu_r_sat( L_tmp2, *input_ptr3, 16384 );
172 466360 : out_ptr[k + 2] = mac_r_sat( L_tmp1, tmp1, 16384 );
173 466360 : move16();
174 466360 : out_ptr[k + 6] = msu_r_sat( L_tmp1, tmp1, 16384 );
175 466360 : move16();
176 466360 : L_tmp1 = L_mult( tmp2, 16384 );
177 466360 : tmp2 = mac_r_sat( L_tmp2, *input_ptr3, 16384 );
178 466360 : out_ptr[k + 1] = mac_r_sat( L_tmp1, tmp2, 16384 );
179 466360 : move16();
180 466360 : out_ptr[k + 5] = msu_r_sat( L_tmp1, tmp2, 16384 );
181 466360 : move16();
182 466360 : L_tmp1 = L_mult( tmp3, 16384 );
183 466360 : out_ptr[k + 3] = msu_r_sat( L_tmp1, tmp4, 16384 );
184 466360 : move16();
185 466360 : out_ptr[k + 7] = mac_r_sat( L_tmp1, tmp4, 16384 );
186 466360 : move16();
187 : }
188 :
189 : /* Remaining Stages */
190 166651 : FOR( i = 2; i < NUM_STAGE; i++ )
191 : {
192 : /* i is stage counter */
193 128240 : jj = shl( 2, i ); /* FFT size */
194 128240 : kk = shl( jj, 1 ); /* 2 * FFT size */
195 128240 : ii = shr( SIZE, i );
196 128240 : ji = 0;
197 128240 : move16(); /* ji is phase table index */
198 :
199 1840036 : FOR( j = 0; j < jj; j += 2 )
200 : {
201 : /* j is sample counter */
202 5374276 : FOR( k = j; k < SIZE; k += kk )
203 : {
204 : /* k is butterfly top */
205 3662480 : kj = add( k, jj ); /* kj is butterfly bottom */
206 :
207 : /* Butterfly computations */
208 3662480 : L_tmp1 = L_msu( L_mult( *( out_ptr + kj ), phs_tbl[ji] ),
209 3662480 : *( out_ptr + kj + 1 ), phs_tbl[ji + 1] );
210 3662480 : L_tmp2 = L_mac( L_mult( *( out_ptr + kj + 1 ), phs_tbl[ji] ),
211 3662480 : *( out_ptr + kj ), phs_tbl[ji + 1] );
212 :
213 3662480 : out_ptr[kj] = mac_r( L_negate( L_tmp1 ), out_ptr[k], 16384 );
214 3662480 : move16();
215 3662480 : out_ptr[kj + 1] = mac_r( L_negate( L_tmp2 ), out_ptr[k + 1], 16384 );
216 3662480 : move16();
217 3662480 : out_ptr[k] = mac_r( L_tmp1, out_ptr[k], 16384 );
218 3662480 : move16();
219 3662480 : out_ptr[k + 1] = mac_r( L_tmp2, out_ptr[k + 1], 16384 );
220 3662480 : move16();
221 : }
222 1711796 : ji = add( ji, ii );
223 : }
224 : }
225 : }
226 : ELSE /* The ifFT part */
227 : {
228 : /* Unrolled 1st/2nd Stage
229 : * 1) to take advantage of Table Values (0 & +/- 16384)
230 : * 2) to perform reordering of Input Values
231 : */
232 310917 : FOR( k = 0; k < SIZE; k += 8 )
233 : {
234 : /*
235 : * This loop use:
236 : * 4 Word16 (tmp1...tmp4)
237 : * 2 Word32 (L_tmp1 & L_tmp2)
238 : * 5 Pointers (reorder_ptr, input_ptr1...input_ptr4)
239 : *
240 : * The addition of 'in_ptr' + and index value from 'reorder_ptr'
241 : * is counted as a move16()
242 : */
243 278680 : input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
244 278680 : input_ptr2 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
245 :
246 278680 : input_ptr3 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
247 278680 : input_ptr4 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
248 :
249 :
250 278680 : tmp3 = sub( *input_ptr1, *input_ptr2 );
251 278680 : tmp4 = add( *input_ptr1++, *input_ptr2++ );
252 :
253 278680 : tmp2 = sub( input_ptr3[0], input_ptr4[0] );
254 278680 : tmp1 = sub( input_ptr3[1], input_ptr4[1] );
255 :
256 278680 : out_ptr[k + 2] = sub( tmp3, tmp1 );
257 278680 : move16();
258 278680 : out_ptr[k + 6] = add( tmp3, tmp1 );
259 278680 : move16();
260 :
261 278680 : tmp1 = sub( *input_ptr1, *input_ptr2 );
262 278680 : out_ptr[k + 3] = add( tmp1, tmp2 );
263 278680 : move16();
264 278680 : out_ptr[k + 7] = sub( tmp1, tmp2 );
265 278680 : move16();
266 :
267 278680 : tmp1 = add( input_ptr3[0], input_ptr4[0] );
268 278680 : tmp3 = add( input_ptr3[1], input_ptr4[1] );
269 :
270 278680 : out_ptr[k] = add( tmp4, tmp1 );
271 278680 : move16();
272 278680 : out_ptr[k + 4] = sub( tmp4, tmp1 );
273 278680 : move16();
274 :
275 278680 : tmp4 = add( *input_ptr1, *input_ptr2 );
276 278680 : out_ptr[k + 1] = add( tmp4, tmp3 );
277 278680 : move16();
278 278680 : out_ptr[k + 5] = sub( tmp4, tmp3 );
279 278680 : move16();
280 : }
281 :
282 32237 : table_ptr = phs_tbl + SIZE; /* access part of table that is scaled by 2 */
283 :
284 : /* Remaining Stages */
285 129991 : FOR( i = 2; i < NUM_STAGE; i++ )
286 : {
287 : /* i is stage counter */
288 97754 : jj = shl( 2, i ); /* FFT size */
289 97754 : kk = shl( jj, 1 ); /* 2 * FFT size */
290 97754 : ii = shr( SIZE, i );
291 97754 : ji = 0;
292 97754 : move16(); /* ji is phase table index */
293 :
294 1083526 : FOR( j = 0; j < jj; j += 2 )
295 : {
296 : /* j is sample counter */
297 : /* This can be computed by successive add_fxitions of ii to ji, starting from 0
298 : hence line-count it as a one-line add (still need to increment op count!!) */
299 :
300 2806204 : FOR( k = j; k < SIZE; k += kk )
301 : {
302 : /* k is butterfly top */
303 1820432 : kj = add( k, jj ); /* kj is butterfly bottom */
304 :
305 : /* Butterfly computations */
306 1820432 : tmp1 = mac_r_sat( L_mult_sat( out_ptr[kj], table_ptr[ji] ), out_ptr[kj + 1], table_ptr[ji + 1] );
307 :
308 1820432 : tmp2 = msu_r_sat( L_mult_sat( out_ptr[kj + 1], table_ptr[ji] ), out_ptr[kj], table_ptr[ji + 1] );
309 :
310 1820432 : out_ptr[kj] = sub_sat( out_ptr[k], tmp1 );
311 1820432 : move16();
312 1820432 : out_ptr[kj + 1] = sub_sat( out_ptr[k + 1], tmp2 );
313 1820432 : move16();
314 1820432 : out_ptr[k] = add_sat( out_ptr[k], tmp1 );
315 1820432 : move16();
316 1820432 : out_ptr[k + 1] = add_sat( out_ptr[k + 1], tmp2 );
317 1820432 : move16();
318 : }
319 985772 : ji = add( ji, ii );
320 : }
321 : }
322 : }
323 70648 : }
324 :
325 : /*--------------------------------------------------------------------------------*
326 : * r_fft_fx:
327 : *
328 : * Perform FFT fixed-point for real-valued sequences of length 32, 64 or 128
329 : *--------------------------------------------------------------------------------*/
330 70648 : void r_fft_fx_lc(
331 : const Word16 *phs_tbl, /* i : Table of phase */
332 : const Word16 SIZE, /* i : Size of the FFT */
333 : const Word16 SIZE2, /* i : Size / 2 */
334 : const Word16 NUM_STAGE, /* i : Number of stage */
335 : const Word16 *in_ptr, /* i : coefficients in the order re[0], re[1], ... re[n/2], im[n/2-1], im[n/2-2], ..., im[1] Qx*/
336 : Word16 *out_ptr, /* o : coefficients in the order re[0], re[1], ... re[n/2], im[n/2-1], im[n/2-2], ..., im[1] Q(x - 1)*/
337 : const Word16 isign /* i : 1=fft, otherwize it's ifft */
338 : )
339 : {
340 : Word16 tmp2_real, tmp2_imag;
341 : Word32 Ltmp1_real, Ltmp1_imag;
342 : Word16 i;
343 : Word32 Ltmp1;
344 : const Word16 *phstbl_ptrDn;
345 : Word16 *ptrDn;
346 : Word16 temp[1024]; /* Accommodates real input FFT size up to 1024. */
347 :
348 : /* Setup Pointers */
349 70648 : phstbl_ptrDn = &phs_tbl[SIZE - 1];
350 :
351 : /* The FFT part */
352 70648 : IF( isign != 0 )
353 : {
354 : Word16 *ptRealUp, *ptRealDn, *ptImaUp, *ptImaDn;
355 :
356 : /* Perform the complex FFT */
357 38411 : c_fft_fx( phs_tbl, SIZE, NUM_STAGE, in_ptr, temp, isign );
358 :
359 : /* First, handle the DC and foldover frequencies */
360 38411 : out_ptr[SIZE2] = sub( temp[0], temp[1] );
361 38411 : move16();
362 38411 : out_ptr[0] = sub( add( temp[0], temp[1] ), shr( NUM_STAGE, 1 ) );
363 38411 : move16(); /* DC have a small offset */
364 :
365 38411 : ptrDn = &temp[SIZE - 1];
366 :
367 38411 : ptImaDn = &out_ptr[SIZE - 1];
368 38411 : ptRealUp = &out_ptr[1];
369 38411 : ptImaUp = &out_ptr[SIZE2 + 1];
370 38411 : ptRealDn = &out_ptr[SIZE2 - 1];
371 :
372 : /* Now, handle the remaining positive frequencies */
373 971131 : FOR( i = 2; i <= SIZE2; i += 2 )
374 : {
375 932720 : Ltmp1_imag = L_mult( temp[i + 1], 16384 );
376 932720 : Ltmp1_imag = L_msu_sat( Ltmp1_imag, *ptrDn, 16384 );
377 932720 : tmp2_real = add_sat( temp[i + 1], *ptrDn-- );
378 :
379 932720 : Ltmp1_real = L_mult( temp[i], 16384 );
380 932720 : Ltmp1_real = L_mac_sat( Ltmp1_real, *ptrDn, 16384 );
381 932720 : tmp2_imag = sub( *ptrDn--, temp[i] );
382 :
383 :
384 932720 : *ptRealUp++ = msu_r_sat( L_mac_sat( Ltmp1_real, tmp2_real, phs_tbl[i] ), tmp2_imag, phs_tbl[i + 1] );
385 932720 : move16();
386 932720 : *ptImaDn-- = mac_r_sat( L_mac_sat( Ltmp1_imag, tmp2_imag, phs_tbl[i] ), tmp2_real, phs_tbl[i + 1] );
387 932720 : move16();
388 932720 : Ltmp1 = L_mac_sat( L_negate( Ltmp1_imag ), tmp2_real, *phstbl_ptrDn );
389 932720 : Ltmp1_real = L_mac_sat( Ltmp1_real, tmp2_imag, *phstbl_ptrDn-- );
390 932720 : *ptImaUp++ = msu_r_sat( Ltmp1, tmp2_imag, *phstbl_ptrDn );
391 932720 : move16();
392 932720 : *ptRealDn-- = mac_r_sat( Ltmp1_real, tmp2_real, *phstbl_ptrDn-- );
393 932720 : move16();
394 : }
395 : }
396 : ELSE /* The ifFT part */
397 : {
398 : const Word16 *ptRealUp, *ptRealDn, *ptImaUp, *ptImaDn;
399 :
400 : /* First, handle the DC and foldover frequencies */
401 32237 : Ltmp1 = L_mult( in_ptr[0], 16384 );
402 32237 : temp[0] = mac_r( Ltmp1, in_ptr[SIZE2], 16384 );
403 32237 : move16();
404 32237 : temp[1] = msu_r( Ltmp1, in_ptr[SIZE2], 16384 );
405 32237 : move16();
406 :
407 32237 : ptrDn = &temp[SIZE - 1];
408 :
409 : /* Here we cast to Word16 * from a const Word16 *. */
410 : /* This is ok because we use these pointers for */
411 : /* reading only. This is just to avoid declaring a */
412 : /* bunch of 4 other pointer with const Word16 *. */
413 32237 : ptImaDn = &in_ptr[SIZE - 1]; // Qx
414 32237 : ptRealUp = &in_ptr[1]; // Qx
415 32237 : ptImaUp = &in_ptr[SIZE2 + 1]; // Qx
416 32237 : ptRealDn = &in_ptr[SIZE2 - 1]; // Qx
417 :
418 : /* Now, handle the remaining positive frequencies */
419 589597 : FOR( i = 2; i <= SIZE2; i += 2 )
420 : {
421 557360 : Ltmp1_imag = L_mult( *ptImaDn, 16384 );
422 557360 : Ltmp1_imag = L_msu_sat( Ltmp1_imag, *ptImaUp, 16384 );
423 557360 : tmp2_real = add_sat( *ptImaDn--, *ptImaUp++ );
424 557360 : Ltmp1_real = L_mult( *ptRealUp, 16384 );
425 557360 : Ltmp1_real = L_mac_sat( Ltmp1_real, *ptRealDn, 16384 );
426 557360 : tmp2_imag = sub_sat( *ptRealUp++, *ptRealDn-- );
427 557360 : temp[i] = mac_r_sat( L_msu_sat( Ltmp1_real, tmp2_real, phs_tbl[i] ), tmp2_imag, phs_tbl[i + 1] );
428 557360 : move16();
429 557360 : temp[i + 1] = mac_r_sat( L_mac_sat( Ltmp1_imag, tmp2_imag, phs_tbl[i] ), tmp2_real, phs_tbl[i + 1] );
430 557360 : move16();
431 557360 : Ltmp1 = L_mac_sat( L_negate( Ltmp1_imag ), tmp2_real, *phstbl_ptrDn );
432 557360 : Ltmp1_real = L_msu_sat( Ltmp1_real, tmp2_imag, *phstbl_ptrDn-- );
433 557360 : *ptrDn-- = msu_r_sat( Ltmp1, tmp2_imag, *phstbl_ptrDn );
434 557360 : move16();
435 557360 : *ptrDn-- = msu_r_sat( Ltmp1_real, tmp2_real, *phstbl_ptrDn-- );
436 557360 : move16();
437 : }
438 :
439 : /* Perform the complex ifFT */
440 32237 : c_fft_fx( phs_tbl, SIZE, NUM_STAGE, temp, out_ptr, isign );
441 : }
442 70648 : }
443 :
444 :
445 : /*---------------------------------------------------------------------*
446 : * fft_rel()
447 : *
448 : * Computes the split-radix FFT in place for the real-valued
449 : * signal x of length n. The algorithm has been ported from
450 : * the Fortran code of [1].
451 : *
452 : * The function needs sine and cosine tables t_sin and t_cos,
453 : * and the constant N_MAX_FFT. The table entries are defined as
454 : * sin(2*pi*i) and cos(2*pi*i) for i = 0, 1, ..., N_MAX_FFT-1. The
455 : * implementation assumes that any entry will not be needed
456 : * outside the tables. Therefore, N_MAX_FFT and n must be properly
457 : * set. The function has been tested with the values n = 16,
458 : * 32, 64, 128, 256, and N_MAX_FFT = 1280.
459 : *
460 : * References
461 : * [1] H.V. Sorensen, D.L. Jones, M.T. Heideman, C.S. Burrus,
462 : * "Real-valued fast Fourier transform algorithm," IEEE
463 : * Trans. on Signal Processing, Vol.35, No.6, pp 849-863,
464 : * 1987.
465 : *
466 : * OUTPUT
467 : * x[0:n-1] Transform coeffients in the order re[0], re[1],
468 : * ..., re[n/2], im[n/2-1], ..., im[1].
469 : *---------------------------------------------------------------------*/
470 :
471 2186812 : void fft_rel_16_32fx(
472 : Word16 x[], /* i/o: input/output vector Qx */
473 : Word16 *q_x, /* extra scaling added on speech buffer*/
474 : Word16 i_subfr,
475 : const Word16 n, /* i : vector length */
476 : const Word16 m /* i : log2 of vector length */
477 : )
478 : {
479 : Word16 i, j, k, n1, n2, n4;
480 : Word16 step;
481 : Word32 xt, t1, t2;
482 : Word32 *x0, *x1, *x2;
483 : const Word16 *s, *c;
484 : Word32 *xi2, *xi3, *xi4, *xi1;
485 :
486 : Word32 fft_bff32[L_FFT];
487 2186812 : Copy_Scale_sig_16_32_no_sat( x, fft_bff32, L_FFT, 0 ); // copying x to fft_bff32 without scaling
488 :
489 : /*-----------------------------------------------------------------*
490 : * Digit reverse counter
491 : *-----------------------------------------------------------------*/
492 :
493 2186812 : j = 0;
494 2186812 : move16();
495 2186812 : x0 = &fft_bff32[0]; // Qx
496 559823872 : FOR( i = 0; i < n - 1; i++ )
497 : {
498 557637060 : IF( LT_16( i, j ) )
499 : {
500 262417440 : xt = fft_bff32[j]; // Qx
501 262417440 : move32();
502 262417440 : fft_bff32[j] = *x0; // Qx
503 262417440 : move32();
504 262417440 : *x0 = xt; // Qx
505 262417440 : move32();
506 : }
507 557637060 : x0++;
508 557637060 : k = shr( n, 1 );
509 1097779624 : WHILE( ( k <= j ) )
510 : {
511 540142564 : j = sub( j, k );
512 540142564 : k = shr( k, 1 );
513 : }
514 557637060 : j = add( j, k );
515 : }
516 :
517 : /*-----------------------------------------------------------------*
518 : * Length two butterflies
519 : *-----------------------------------------------------------------*/
520 :
521 2186812 : x0 = &fft_bff32[0];
522 2186812 : x1 = &fft_bff32[1];
523 282098748 : FOR( i = 0; i < ( n >> 1 ); i++ )
524 : {
525 279911936 : xt = *x0;
526 279911936 : move32();
527 279911936 : *x0 = L_add( xt, *x1 );
528 279911936 : move32();
529 279911936 : *x1 = L_sub( xt, *x1 );
530 279911936 : move32();
531 279911936 : x0++;
532 279911936 : x0++;
533 279911936 : x1++;
534 279911936 : x1++;
535 : }
536 :
537 : /*-----------------------------------------------------------------*
538 : * Other butterflies
539 : *
540 : * The implementation described in [1] has been changed by using
541 : * table lookup for evaluating sine and cosine functions. The
542 : * variable ind and its increment step are needed to access table
543 : * entries. Note that this implementation assumes n4 to be so
544 : * small that ind will never exceed the table. Thus the input
545 : * argument n and the constant N_MAX_SAS must be set properly.
546 : *-----------------------------------------------------------------*/
547 :
548 2186812 : n2 = 1;
549 2186812 : move16();
550 : /* step = N_MAX_SAS/4; */
551 17494496 : FOR( k = 2; k <= m; k++ )
552 : {
553 15307684 : n4 = n2;
554 15307684 : move16();
555 15307684 : n2 = shl( n4, 1 );
556 15307684 : n1 = shl( n2, 1 );
557 :
558 15307684 : step = idiv1616( N_MAX_SAS, n1 );
559 :
560 15307684 : x0 = fft_bff32;
561 15307684 : x1 = fft_bff32 + n2;
562 15307684 : x2 = fft_bff32 + add( n2, n4 );
563 293032808 : FOR( i = 0; i < n; i += n1 )
564 : {
565 277725124 : xt = *x0;
566 277725124 : move32(); /* xt = x[i]; */
567 277725124 : *x0 = L_add( xt, *x1 );
568 277725124 : move32(); /* x[i] = xt + x[i+n2]; */
569 277725124 : *x1 = L_sub( xt, *x1 );
570 277725124 : move32(); /* x[i+n2] = xt - x[i+n2]; */
571 277725124 : *x2 = L_negate( *x2 );
572 277725124 : move32(); /* x[i+n2+n4] = -x[i+n2+n4]; */
573 :
574 :
575 277725124 : s = sincos_t_fx + step; // Q15
576 277725124 : c = s + 64; // Q15
577 277725124 : xi1 = fft_bff32 + add( i, 1 );
578 277725124 : xi3 = xi1 + n2;
579 277725124 : xi2 = xi3 - 2;
580 277725124 : xi4 = xi1 + sub( n1, 2 );
581 :
582 979691776 : FOR( j = 1; j < n4; j++ )
583 : {
584 701966652 : t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx */
585 701966652 : t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx */
586 701966652 : *xi4 = L_sub( *xi2, t2 );
587 701966652 : move32();
588 701966652 : *xi3 = L_negate( L_add( *xi2, t2 ) );
589 701966652 : move32();
590 701966652 : *xi2 = L_sub( *xi1, t1 );
591 701966652 : move32();
592 701966652 : *xi1 = L_add( *xi1, t1 );
593 701966652 : move32();
594 :
595 701966652 : xi4--;
596 701966652 : xi2--;
597 701966652 : xi3++;
598 701966652 : xi1++;
599 701966652 : c += step;
600 701966652 : s += step; /* autoincrement by ar0 */
601 : }
602 :
603 277725124 : x0 += n1;
604 277725124 : x1 += n1;
605 277725124 : x2 += n1;
606 : }
607 : /* step = shr(step, 1); */
608 : }
609 2186812 : Word16 norm = L_norm_arr( fft_bff32, L_FFT );
610 2186812 : IF( i_subfr == 0 )
611 : {
612 1093406 : Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
613 1093406 : *q_x = sub( norm, 16 );
614 1093406 : move16();
615 : }
616 : ELSE
617 : {
618 1093406 : IF( LT_16( sub( norm, 16 ), *q_x ) )
619 : {
620 218471 : scale_sig( x - L_FFT, L_FFT, sub( sub( norm, 16 ), *q_x ) );
621 218471 : Copy_Scale_sig32_16( fft_bff32, x, L_FFT, norm );
622 218471 : *q_x = sub( norm, 16 );
623 218471 : move16();
624 : }
625 : ELSE
626 : {
627 874935 : Copy_Scale_sig32_16( fft_bff32, x, L_FFT, add( 16, *q_x ) );
628 : }
629 : }
630 :
631 2186812 : return;
632 : }
633 :
634 211030 : void fft_rel_fx(
635 : Word16 x[], /* i/o: input/output vector Qx */
636 : const Word16 n, /* i : vector length */
637 : const Word16 m /* i : log2 of vector length */
638 : )
639 : {
640 : Word16 i, j, k, n1, n2, n4;
641 : Word16 step;
642 : Word16 xt, t1, t2;
643 : Word16 *x0, *x1, *x2;
644 : const Word16 *s, *c;
645 : Word16 *xi2, *xi3, *xi4, *xi1;
646 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
647 211030 : Flag Overflow = 0;
648 211030 : move32();
649 : #endif
650 :
651 :
652 : /*-----------------------------------------------------------------*
653 : * Digit reverse counter
654 : *-----------------------------------------------------------------*/
655 :
656 211030 : j = 0;
657 211030 : move16();
658 211030 : x0 = &x[0]; // Qx
659 211030 : move16();
660 47101744 : FOR( i = 0; i < n - 1; i++ )
661 : {
662 46890714 : IF( LT_16( i, j ) )
663 : {
664 22054908 : xt = x[j]; // Qx
665 22054908 : move16();
666 22054908 : x[j] = *x0; // Qx
667 22054908 : move16();
668 22054908 : *x0 = xt; // Qx
669 22054908 : move16();
670 : }
671 46890714 : x0++;
672 46890714 : k = shr( n, 1 );
673 92257996 : WHILE( ( k <= j ) )
674 : {
675 45367282 : j = sub( j, k );
676 45367282 : k = shr( k, 1 );
677 : }
678 46890714 : j = add( j, k );
679 : }
680 :
681 : /*-----------------------------------------------------------------*
682 : * Length two butterflies
683 : *-----------------------------------------------------------------*/
684 :
685 211030 : x0 = &x[0];
686 211030 : move16();
687 211030 : x1 = &x[1];
688 211030 : move16();
689 23761902 : FOR( i = 0; i < ( n >> 1 ); i++ )
690 : {
691 23550872 : xt = *x0;
692 23550872 : move16();
693 23550872 : *x0 = add_o( xt, *x1, &Overflow );
694 23550872 : move16();
695 23550872 : *x1 = sub_o( xt, *x1, &Overflow );
696 23550872 : move16();
697 23550872 : x0++;
698 23550872 : x0++;
699 23550872 : x1++;
700 23550872 : x1++;
701 : }
702 :
703 : /*-----------------------------------------------------------------*
704 : * Other butterflies
705 : *
706 : * The implementation described in [1] has been changed by using
707 : * table lookup for evaluating sine and cosine functions. The
708 : * variable ind and its increment step are needed to access table
709 : * entries. Note that this implementation assumes n4 to be so
710 : * small that ind will never exceed the table. Thus the input
711 : * argument n and the constant N_MAX_SAS must be set properly.
712 : *-----------------------------------------------------------------*/
713 :
714 211030 : n2 = 1;
715 211030 : move16();
716 : /* step = N_MAX_SAS/4; */
717 1523432 : FOR( k = 2; k <= m; k++ )
718 : {
719 1312402 : n4 = n2;
720 1312402 : move16();
721 1312402 : n2 = shl( n4, 1 );
722 1312402 : n1 = shl( n2, 1 );
723 :
724 1312402 : step = idiv1616( N_MAX_SAS, n1 );
725 :
726 1312402 : x0 = x;
727 1312402 : x1 = x + n2;
728 1312402 : x2 = x + add( n2, n4 );
729 24652244 : FOR( i = 0; i < n; i += n1 )
730 : {
731 23339842 : xt = *x0;
732 23339842 : move16(); /* xt = x[i]; */
733 23339842 : *x0 = add_o( xt, *x1, &Overflow );
734 23339842 : move16(); /* x[i] = xt + x[i+n2]; */
735 23339842 : *x1 = sub_o( xt, *x1, &Overflow );
736 23339842 : move16(); /* x[i+n2] = xt - x[i+n2]; */
737 23339842 : *x2 = negate( *x2 );
738 23339842 : move16(); /* x[i+n2+n4] = -x[i+n2+n4]; */
739 :
740 :
741 23339842 : s = sincos_t_fx + step; // Q15
742 23339842 : c = s + 64; // Q15
743 23339842 : xi1 = x + add( i, 1 );
744 23339842 : xi3 = xi1 + n2;
745 23339842 : xi2 = xi3 - 2;
746 23339842 : xi4 = xi1 + sub( n1, 2 );
747 :
748 82263244 : FOR( j = 1; j < n4; j++ )
749 : {
750 58923402 : t1 = add_o( mult_r( *xi3, *c ), mult_r( *xi4, *s ), &Overflow ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx */
751 58923402 : t2 = sub_o( mult_r( *xi3, *s ), mult_r( *xi4, *c ), &Overflow ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx */
752 58923402 : *xi4 = sub_o( *xi2, t2, &Overflow );
753 58923402 : move16();
754 58923402 : *xi3 = negate( add_o( *xi2, t2, &Overflow ) );
755 58923402 : move16();
756 58923402 : *xi2 = sub_o( *xi1, t1, &Overflow );
757 58923402 : move16();
758 58923402 : *xi1 = add_o( *xi1, t1, &Overflow );
759 58923402 : move16();
760 :
761 58923402 : xi4--;
762 58923402 : xi2--;
763 58923402 : xi3++;
764 58923402 : xi1++;
765 58923402 : c += step;
766 58923402 : s += step; /* autoincrement by ar0 */
767 : }
768 :
769 23339842 : x0 += n1;
770 23339842 : x1 += n1;
771 23339842 : x2 += n1;
772 : }
773 : /* step = shr(step, 1); */
774 : }
775 :
776 211030 : return;
777 : }
778 :
779 45232 : void fft_rel_fx32(
780 : Word32 x[], /* i/o: input/output vector Qx */
781 : const Word16 n, /* i : vector length */
782 : const Word16 m /* i : log2 of vector length */
783 : )
784 : {
785 : Word16 i, j, k, n1, n2, n4;
786 : Word16 step;
787 : Word32 xt, t1, t2;
788 : Word32 *x0, *x1, *x2;
789 : Word32 *xi2, *xi3, *xi4, *xi1;
790 : const Word16 *s, *c;
791 : const Word16 *idx;
792 :
793 : /* !!!! NMAX = 256 is hardcoded here (similar optimizations should be done for NMAX > 256) !!! */
794 :
795 : Word32 *x2even, *x2odd;
796 : Word32 temp[512];
797 :
798 45232 : test();
799 45232 : test();
800 45232 : IF( EQ_16( n, 128 ) || EQ_16( n, 256 ) || EQ_16( n, 512 ) )
801 : {
802 45232 : idx = fft256_read_indexes;
803 :
804 : /* Combined Digit reverse counter & Length two butterflies */
805 45232 : IF( EQ_16( n, 128 ) )
806 : {
807 0 : x2 = temp;
808 0 : FOR( i = 0; i < 64; i++ )
809 : {
810 0 : j = *idx++;
811 0 : move16();
812 0 : k = *idx++;
813 0 : move16();
814 :
815 0 : *x2++ = L_add( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
816 0 : move16();
817 0 : *x2++ = L_sub( x[( j >> 1 )], x[( k >> 1 )] ); // Qx
818 0 : move16();
819 : }
820 : }
821 45232 : ELSE IF( EQ_16( n, 256 ) )
822 : {
823 488 : x2 = temp;
824 62952 : FOR( i = 0; i < 128; i++ )
825 : {
826 62464 : j = *idx++;
827 62464 : move16();
828 62464 : k = *idx++;
829 62464 : move16();
830 :
831 62464 : *x2++ = L_add( x[j], x[k] ); // Qx
832 62464 : move16();
833 62464 : *x2++ = L_sub( x[j], x[k] ); // Qx
834 62464 : move16();
835 : }
836 : }
837 44744 : ELSE IF( EQ_16( n, 512 ) )
838 : {
839 44744 : x2even = temp;
840 44744 : x2odd = temp + 256;
841 :
842 5771976 : FOR( i = 0; i < 128; i++ )
843 : {
844 5727232 : j = shl( *idx, 1 );
845 5727232 : idx++;
846 5727232 : k = shl( *idx, 1 );
847 5727232 : idx++;
848 :
849 5727232 : *x2even++ = L_add( x[j], x[k] ); // Qx
850 5727232 : move16();
851 5727232 : *x2even++ = L_sub( x[j], x[k] ); // Qx
852 5727232 : move16();
853 5727232 : j = add( j, 1 );
854 5727232 : k = add( k, 1 );
855 5727232 : *x2odd++ = L_add( x[j], x[k] ); // Qx
856 5727232 : move16();
857 5727232 : *x2odd++ = L_sub( x[j], x[k] ); // Qx
858 5727232 : move16();
859 : }
860 : }
861 :
862 : /*-----------------------------------------------------------------*
863 : * 1st Stage Loop has been Unrolled because n4 is '1' and that
864 : * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
865 : * and the associated pointers initialization.
866 : * Also, it allows to Put the Data from 'temp' back into 'x' due
867 : * to the previous Combined Digit Reverse and Length two butterflies
868 : *-----------------------------------------------------------------*/
869 :
870 : /*for_ (k = 2; k < 3; k++)*/
871 : {
872 45232 : x0 = temp;
873 45232 : x1 = x0 + 2;
874 45232 : x2 = x; // Qx
875 :
876 5803696 : FOR( i = 0; i < n; i += 4 )
877 : {
878 5758464 : *x2++ = L_add( *x0++, *x1 ); /* x[i] = xt + x[i+n2]; */
879 5758464 : move16();
880 5758464 : *x2++ = *x0;
881 5758464 : move16();
882 5758464 : x0--;
883 5758464 : *x2++ = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; */
884 5758464 : move16();
885 5758464 : x1++;
886 5758464 : *x2++ = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4]; */
887 5758464 : move16();
888 :
889 5758464 : x0 += 4;
890 5758464 : x1 += 3; /* x1 has already advanced */
891 : }
892 : }
893 : }
894 : ELSE
895 : {
896 : /*-----------------------------------------------------------------*
897 : * Digit reverse counter
898 : *-----------------------------------------------------------------*/
899 :
900 0 : j = 0;
901 0 : move16();
902 0 : x0 = &x[0]; // Qx
903 0 : FOR( i = 0; i < ( n - 1 ); i++ )
904 : {
905 0 : IF( LT_16( i, j ) )
906 : {
907 0 : xt = x[j]; // Qx
908 0 : move32();
909 0 : x[j] = *x0;
910 0 : move32();
911 0 : *x0 = xt;
912 0 : move32();
913 : }
914 0 : x0++;
915 0 : k = shr( n, 1 );
916 0 : WHILE( ( k <= j ) )
917 : {
918 0 : j = sub( j, k );
919 0 : k = shr( k, 1 );
920 : }
921 0 : j = add( j, k );
922 : }
923 :
924 : /*-----------------------------------------------------------------*
925 : * Length two butterflies
926 : *-----------------------------------------------------------------*/
927 :
928 0 : x0 = &x[0]; // Qx
929 0 : x1 = &x[1]; // Qx
930 0 : FOR( i = 0; i < ( n >> 1 ); i++ )
931 : {
932 0 : *x1 = L_sub( *x0, *x1 ); // Qx
933 0 : move32();
934 0 : *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); //*x0 * 2 - *x1 (Qx)
935 0 : move32();
936 :
937 0 : x0++;
938 0 : x0++;
939 0 : x1++;
940 0 : x1++;
941 : }
942 :
943 : /*-----------------------------------------------------------------*
944 : * 1st Stage Loop has been Unrolled because n4 is '1' and that
945 : * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
946 : * and the associated pointers initialization.
947 : *-----------------------------------------------------------------*/
948 :
949 : /* for_ (k = 2; k < 3; k++) */
950 : {
951 0 : x0 = x; // Qx
952 0 : x1 = x0 + 2;
953 :
954 0 : FOR( i = 0; i < n; i += 4 )
955 : {
956 0 : *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; Qx*/
957 0 : move32();
958 0 : *x0 = L_sub( L_shl( *x0, 1 ), *x1++ ); /* x[i] = xt + x[i+n2]; */ /**x0 * 2 - *x1 (Qx)*/
959 0 : move32();
960 0 : *x1 = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4]; Qx*/
961 0 : move32();
962 :
963 0 : x0 += 4;
964 0 : x1 += 3; /* x1 has already advanced */
965 : }
966 : }
967 : }
968 :
969 : /*-----------------------------------------------------------------*
970 : * Other butterflies
971 : *
972 : * The implementation described in [1] has been changed by using
973 : * table lookup for evaluating sine and cosine functions. The
974 : * variable ind and its increment step are needed to access table
975 : * entries. Note that this implementation assumes n4 to be so
976 : * small that ind will never exceed the table. Thus the input
977 : * argument n and the constant N_MAX_FFT must be set properly.
978 : *-----------------------------------------------------------------*/
979 :
980 45232 : n4 = 1;
981 45232 : move16();
982 45232 : n2 = 2;
983 45232 : move16();
984 45232 : n1 = 4;
985 45232 : move16();
986 :
987 45232 : step = N_MAX_DIV4;
988 45232 : move16();
989 :
990 361368 : FOR( k = 3; k <= m; k++ )
991 : {
992 316136 : step = shr( step, 1 );
993 316136 : n4 = shl( n4, 1 );
994 316136 : n2 = shl( n2, 1 );
995 316136 : n1 = shl( n1, 1 );
996 :
997 316136 : x0 = x;
998 316136 : x1 = x0 + n2;
999 316136 : x2 = x1 + n4;
1000 :
1001 6029368 : FOR( i = 0; i < n; i += n1 )
1002 : {
1003 5713232 : *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; */
1004 5713232 : move32();
1005 5713232 : *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); /* x[i] = xt + x[i+n2]; */ /**x0 * 2 - *x1 (Qx)*/
1006 5713232 : move32();
1007 5713232 : *x2 = L_negate( *x2 ); /* x[i+n2+n4] = -x[i+n2+n4]; Qx */
1008 5713232 : move32();
1009 :
1010 5713232 : s = sincos_t_ext_fx; // Q15
1011 5713232 : c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 Q15*/
1012 5713232 : xi1 = x0;
1013 5713232 : xi3 = xi1 + n2;
1014 5713232 : xi2 = xi3;
1015 5713232 : x0 += n1;
1016 5713232 : xi4 = x0;
1017 :
1018 40278016 : FOR( j = 1; j < n4; j++ )
1019 : {
1020 34564784 : xi3++;
1021 34564784 : xi1++;
1022 34564784 : xi4--;
1023 34564784 : xi2--;
1024 34564784 : c += step;
1025 34564784 : s += step; /* autoincrement by ar0 */
1026 :
1027 34564784 : t1 = L_add( Mpy_32_16_1( *xi3, *c ), Mpy_32_16_1( *xi4, *s ) ); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); Qx*/
1028 34564784 : t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); Qx*/
1029 :
1030 34564784 : *xi4 = L_sub( *xi2, t2 );
1031 34564784 : move32();
1032 34564784 : *xi2 = L_sub( *xi1, t1 );
1033 34564784 : move32();
1034 34564784 : *xi1 = L_sub( L_shl( *xi1, 1 ), *xi2 ); // Qx
1035 34564784 : move32();
1036 34564784 : *xi3 = L_negate( L_add( L_shl( t2, 1 ), *xi4 ) ); // Qx
1037 34564784 : move32();
1038 : }
1039 :
1040 5713232 : x1 += n1;
1041 5713232 : x2 += n1;
1042 : }
1043 : }
1044 :
1045 45232 : return;
1046 : }
|