Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include "options.h" /* Compilation switches */
6 : #include "prot_fx.h" /* Function prototypes */
7 : #include "rom_com.h" /* Static table prototypes */
8 : #include "stl.h"
9 : #include "stdint.h"
10 : /*------------------------------------------------------------------
11 : *
12 : * This is an implementation of decimation-in-time FFT algorithm for
13 : * real sequences. The techniques used here can be found in several
14 : * books, e.g., i) Proakis and Manolakis, "Digital Signal Processing",
15 : * 2nd Edition, Chapter 9, and ii) W.H. Press et. al., "Numerical
16 : * Recipes in C", 2nd Edition, Chapter 12.
17 : *
18 : * Input - There are two inputs to this function:
19 : *
20 : * 1) An integer pointer to the input data array
21 : * 2) An integer value which should be set as +1 for FFT
22 : * and some other value, e.g., -1 for ifFT
23 : *
24 : * Output - There is no return value.
25 : * The input data are replaced with transformed data. if the
26 : * input is a real time domain sequence, it is replaced with
27 : * the complex FFT for positive frequencies. The FFT value
28 : * for DC and the foldover frequency are combined to form the
29 : * first complex number in the array. The remaining complex
30 : * numbers correspond to increasing frequencies. if the input
31 : * is a complex frequency domain sequence arranged as above,
32 : * it is replaced with the corresponding time domain sequence.
33 : *
34 : * Notes:
35 : *
36 : * 1) This function is designed to be a part of a noise supp-
37 : * ression algorithm that requires 128-point FFT of real
38 : * sequences. This is achieved here through a 64-point
39 : * complex FFT. Consequently, the FFT size information is
40 : * not transmitted explicitly. However, some flexibility
41 : * is provided in the function to change the size of the
42 : * FFT by specifying the size information through "define"
43 : * statements.
44 : *
45 : * 2) The values of the complex sinusoids used in the FFT
46 : * algorithm are computed once (i.e., the first time the
47 : * r_fft function is called) and stored in a table. To
48 : * further speed up the algorithm, these values can be
49 : * precomputed and stored in a ROM table in actual DSP
50 : * based implementations.
51 : *
52 : * 3) In the c_fft function, the FFT values are divided by
53 : * 2 after each stage of computation thus dividing the
54 : * final FFT values by 64. No multiplying factor is used
55 : * for the ifFT. This is somewhat different from the usual
56 : * definition of FFT where the factor 1/N, i.e., 1/64, is
57 : * used for the ifFT and not the FFT. No factor is used in
58 : * the r_fft function.
59 : *
60 : * 4) Much of the code for the FFT and ifFT parts in r_fft
61 : * and c_fft functions are similar and can be combined.
62 : * They are, however, kept separate here to speed up the
63 : * execution.
64 : *------------------------------------------------------------------------*/
65 : /*------------------------------------------------------------------------*
66 : * c_fft_fx:
67 : *
68 : * Computes the complex part of the split-radix FFT
69 : *------------------------------------------------------------------------*/
70 :
71 69200 : static void c_fft_fx(
72 : const Word16 *phs_tbl, /* i : Table of phases */
73 : Word16 SIZE, /* i : Size of the FFT */
74 : Word16 NUM_STAGE, /* i : Number of stages */
75 : const Word16 *in_ptr, /* i : coefficients in the order re[0], re[n/2], re[1], im[1], ..., re[n/2-1], im[n/2-1] Qx*/
76 : Word16 *out_ptr, /* o : coefficients in the order re[0], re[n/2], re[1], im[1], ..., re[n/2-1], im[n/2-1] Q(x - 1)*/
77 : /* in_ptr & out_ptr must not overlap! */
78 : const Word16 isign ) /* i : 1=fft, otherwise it is ifft*/
79 : {
80 : Word16 i, j, k, ii, jj, kk, ji, kj;
81 : Word32 L_tmp1, L_tmp2;
82 : Word16 tmp1, tmp2, tmp3, tmp4;
83 : const Word16 *table_ptr;
84 : const Word16 *input_ptr1, *input_ptr2, *input_ptr3, *input_ptr4;
85 69200 : Word16 shift = 0;
86 69200 : move16();
87 : /* Setup Reorder Variables */
88 69200 : table_ptr = NULL;
89 69200 : table_ptr = FFT_REORDER_1024;
90 69200 : SWITCH( SIZE )
91 : {
92 68 : case 1024:
93 68 : shift = 0;
94 68 : move16();
95 68 : BREAK;
96 420 : case 512:
97 420 : shift = 1;
98 420 : move16();
99 420 : BREAK;
100 6268 : case 256:
101 6268 : shift = 2;
102 6268 : move16();
103 6268 : BREAK;
104 186 : case 128:
105 186 : shift = 3;
106 186 : move16();
107 186 : BREAK;
108 62258 : case 64:
109 62258 : shift = 4;
110 62258 : move16();
111 62258 : BREAK;
112 : }
113 : /* The FFT part */
114 69200 : IF( isign != 0 )
115 : {
116 : /* Unrolled 1st/2nd Stage
117 : * 1) to take advantage of Table Values (0 & +/- 16384)
118 : * 2) to perform reordering of Input Values
119 : */
120 500598 : FOR( k = 0; k < SIZE; k += 8 )
121 : {
122 : /*
123 : * This loop use:
124 : * 4 Word16 (tmp1...tmp4)
125 : * 2 Word32 (L_tmp1 & L_tmp2)
126 : * 4 Pointers (table_ptr, input_ptr1, input_ptr2, input_ptr3)
127 : *
128 : * The addition of 'in_ptr' + and index value from 'reorder_ptr'
129 : * is counted as a move16()
130 : */
131 :
132 462888 : input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift ); // Qx
133 :
134 462888 : L_tmp1 = L_mult( *input_ptr1++, 16384 );
135 462888 : L_tmp2 = L_mult( *input_ptr1, 16384 );
136 :
137 462888 : input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
138 :
139 462888 : tmp1 = msu_r_sat( L_tmp1, *input_ptr1, 16384 );
140 462888 : tmp3 = mac_r_sat( L_tmp1, *input_ptr1++, 16384 );
141 462888 : input_ptr2 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
142 462888 : input_ptr3 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
143 :
144 462888 : L_tmp1 = L_mult( *input_ptr2++, 16384 );
145 462888 : tmp2 = mac_r_sat( L_tmp1, *input_ptr3, 16384 );
146 462888 : tmp4 = msu_r_sat( L_tmp1, *input_ptr3++, 16384 );
147 462888 : L_tmp1 = L_mult( tmp3, 16384 );
148 462888 : out_ptr[k] = mac_r_sat( L_tmp1, tmp2, 16384 );
149 462888 : move16();
150 462888 : out_ptr[k + 4] = msu_r_sat( L_tmp1, tmp2, 16384 );
151 462888 : move16();
152 :
153 462888 : tmp2 = mac_r_sat( L_tmp2, *input_ptr1, 16384 );
154 462888 : tmp3 = msu_r_sat( L_tmp2, *input_ptr1, 16384 );
155 462888 : L_tmp2 = L_mult( *input_ptr2, 16384 );
156 :
157 462888 : L_tmp1 = L_mult( tmp1, 16384 );
158 462888 : tmp1 = msu_r_sat( L_tmp2, *input_ptr3, 16384 );
159 462888 : out_ptr[k + 2] = mac_r_sat( L_tmp1, tmp1, 16384 );
160 462888 : move16();
161 462888 : out_ptr[k + 6] = msu_r_sat( L_tmp1, tmp1, 16384 );
162 462888 : move16();
163 462888 : L_tmp1 = L_mult( tmp2, 16384 );
164 462888 : tmp2 = mac_r_sat( L_tmp2, *input_ptr3, 16384 );
165 462888 : out_ptr[k + 1] = mac_r_sat( L_tmp1, tmp2, 16384 );
166 462888 : move16();
167 462888 : out_ptr[k + 5] = msu_r_sat( L_tmp1, tmp2, 16384 );
168 462888 : move16();
169 462888 : L_tmp1 = L_mult( tmp3, 16384 );
170 462888 : out_ptr[k + 3] = msu_r_sat( L_tmp1, tmp4, 16384 );
171 462888 : move16();
172 462888 : out_ptr[k + 7] = mac_r_sat( L_tmp1, tmp4, 16384 );
173 462888 : move16();
174 : }
175 :
176 : /* Remaining Stages */
177 163977 : FOR( i = 2; i < NUM_STAGE; i++ )
178 : {
179 : /* i is stage counter */
180 126267 : jj = shl( 2, i ); /* FFT size */
181 126267 : kk = shl( jj, 1 ); /* 2 * FFT size */
182 126267 : ii = shr( SIZE, i );
183 126267 : ji = 0;
184 126267 : move16(); /* ji is phase table index */
185 :
186 1826979 : FOR( j = 0; j < jj; j += 2 )
187 : {
188 : /* j is sample counter */
189 5356824 : FOR( k = j; k < SIZE; k += kk )
190 : {
191 : /* k is butterfly top */
192 3656112 : kj = add( k, jj ); /* kj is butterfly bottom */
193 :
194 : /* Butterfly computations */
195 3656112 : L_tmp1 = L_msu( L_mult( *( out_ptr + kj ), phs_tbl[ji] ),
196 3656112 : *( out_ptr + kj + 1 ), phs_tbl[ji + 1] );
197 3656112 : L_tmp2 = L_mac( L_mult( *( out_ptr + kj + 1 ), phs_tbl[ji] ),
198 3656112 : *( out_ptr + kj ), phs_tbl[ji + 1] );
199 :
200 3656112 : out_ptr[kj] = mac_r( L_negate( L_tmp1 ), out_ptr[k], 16384 );
201 3656112 : move16();
202 3656112 : out_ptr[kj + 1] = mac_r( L_negate( L_tmp2 ), out_ptr[k + 1], 16384 );
203 3656112 : move16();
204 3656112 : out_ptr[k] = mac_r( L_tmp1, out_ptr[k], 16384 );
205 3656112 : move16();
206 3656112 : out_ptr[k + 1] = mac_r( L_tmp2, out_ptr[k + 1], 16384 );
207 3656112 : move16();
208 : }
209 1700712 : ji = add( ji, ii );
210 : }
211 : }
212 : }
213 : ELSE /* The ifFT part */
214 : {
215 : /* Unrolled 1st/2nd Stage
216 : * 1) to take advantage of Table Values (0 & +/- 16384)
217 : * 2) to perform reordering of Input Values
218 : */
219 305802 : FOR( k = 0; k < SIZE; k += 8 )
220 : {
221 : /*
222 : * This loop use:
223 : * 4 Word16 (tmp1...tmp4)
224 : * 2 Word32 (L_tmp1 & L_tmp2)
225 : * 5 Pointers (reorder_ptr, input_ptr1...input_ptr4)
226 : *
227 : * The addition of 'in_ptr' + and index value from 'reorder_ptr'
228 : * is counted as a move16()
229 : */
230 274312 : input_ptr1 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
231 274312 : input_ptr2 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
232 :
233 274312 : input_ptr3 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
234 274312 : input_ptr4 = in_ptr + ( const Word16 )( (uintptr_t) ( *table_ptr++ ) >> (uintptr_t) shift );
235 :
236 :
237 274312 : tmp3 = sub( *input_ptr1, *input_ptr2 );
238 274312 : tmp4 = add( *input_ptr1++, *input_ptr2++ );
239 :
240 274312 : tmp2 = sub( input_ptr3[0], input_ptr4[0] );
241 274312 : tmp1 = sub( input_ptr3[1], input_ptr4[1] );
242 :
243 274312 : out_ptr[k + 2] = sub( tmp3, tmp1 );
244 274312 : move16();
245 274312 : out_ptr[k + 6] = add( tmp3, tmp1 );
246 274312 : move16();
247 :
248 274312 : tmp1 = sub( *input_ptr1, *input_ptr2 );
249 274312 : out_ptr[k + 3] = add( tmp1, tmp2 );
250 274312 : move16();
251 274312 : out_ptr[k + 7] = sub( tmp1, tmp2 );
252 274312 : move16();
253 :
254 274312 : tmp1 = add( input_ptr3[0], input_ptr4[0] );
255 274312 : tmp3 = add( input_ptr3[1], input_ptr4[1] );
256 :
257 274312 : out_ptr[k] = add( tmp4, tmp1 );
258 274312 : move16();
259 274312 : out_ptr[k + 4] = sub( tmp4, tmp1 );
260 274312 : move16();
261 :
262 274312 : tmp4 = add( *input_ptr1, *input_ptr2 );
263 274312 : out_ptr[k + 1] = add( tmp4, tmp3 );
264 274312 : move16();
265 274312 : out_ptr[k + 5] = sub( tmp4, tmp3 );
266 274312 : move16();
267 : }
268 :
269 31490 : table_ptr = phs_tbl + SIZE; /* access part of table that is scaled by 2 */
270 :
271 : /* Remaining Stages */
272 127077 : FOR( i = 2; i < NUM_STAGE; i++ )
273 : {
274 : /* i is stage counter */
275 95587 : jj = shl( 2, i ); /* FFT size */
276 95587 : kk = shl( jj, 1 ); /* 2 * FFT size */
277 95587 : ii = shr( SIZE, i );
278 95587 : ji = 0;
279 95587 : move16(); /* ji is phase table index */
280 :
281 1066875 : FOR( j = 0; j < jj; j += 2 )
282 : {
283 : /* j is sample counter */
284 : /* This can be computed by successive add_fxitions of ii to ji, starting from 0
285 : hence line-count it as a one-line add (still need to increment op count!!) */
286 :
287 2777544 : FOR( k = j; k < SIZE; k += kk )
288 : {
289 : /* k is butterfly top */
290 1806256 : kj = add( k, jj ); /* kj is butterfly bottom */
291 :
292 : /* Butterfly computations */
293 1806256 : tmp1 = mac_r_sat( L_mult_sat( out_ptr[kj], table_ptr[ji] ), out_ptr[kj + 1], table_ptr[ji + 1] );
294 :
295 1806256 : tmp2 = msu_r_sat( L_mult_sat( out_ptr[kj + 1], table_ptr[ji] ), out_ptr[kj], table_ptr[ji + 1] );
296 :
297 1806256 : out_ptr[kj] = sub_sat( out_ptr[k], tmp1 );
298 1806256 : move16();
299 1806256 : out_ptr[kj + 1] = sub_sat( out_ptr[k + 1], tmp2 );
300 1806256 : move16();
301 1806256 : out_ptr[k] = add_sat( out_ptr[k], tmp1 );
302 1806256 : move16();
303 1806256 : out_ptr[k + 1] = add_sat( out_ptr[k + 1], tmp2 );
304 1806256 : move16();
305 : }
306 971288 : ji = add( ji, ii );
307 : }
308 : }
309 : }
310 69200 : }
311 :
312 : /*--------------------------------------------------------------------------------*
313 : * r_fft_fx:
314 : *
315 : * Perform FFT fixed-point for real-valued sequences of length 32, 64 or 128
316 : *--------------------------------------------------------------------------------*/
317 69200 : void r_fft_fx_lc(
318 : const Word16 *phs_tbl, /* i : Table of phase */
319 : const Word16 SIZE, /* i : Size of the FFT */
320 : const Word16 SIZE2, /* i : Size / 2 */
321 : const Word16 NUM_STAGE, /* i : Number of stage */
322 : const Word16 *in_ptr, /* i : coefficients in the order re[0], re[1], ... re[n/2], im[n/2-1], im[n/2-2], ..., im[1] Qx*/
323 : Word16 *out_ptr, /* o : coefficients in the order re[0], re[1], ... re[n/2], im[n/2-1], im[n/2-2], ..., im[1] Q(x - 1)*/
324 : const Word16 isign /* i : 1=fft, otherwize it's ifft */
325 : )
326 : {
327 : Word16 tmp2_real, tmp2_imag;
328 : Word32 Ltmp1_real, Ltmp1_imag;
329 : Word16 i;
330 : Word32 Ltmp1;
331 : const Word16 *phstbl_ptrDn;
332 : Word16 *ptrDn;
333 : Word16 temp[1024]; /* Accommodates real input FFT size up to 1024. */
334 :
335 : /* Setup Pointers */
336 69200 : phstbl_ptrDn = &phs_tbl[SIZE - 1];
337 :
338 : /* The FFT part */
339 69200 : IF( isign != 0 )
340 : {
341 : Word16 *ptRealUp, *ptRealDn, *ptImaUp, *ptImaDn;
342 :
343 : /* Perform the complex FFT */
344 37710 : c_fft_fx( phs_tbl, SIZE, NUM_STAGE, in_ptr, temp, isign );
345 :
346 : /* First, handle the DC and foldover frequencies */
347 37710 : out_ptr[SIZE2] = sub( temp[0], temp[1] );
348 37710 : move16();
349 37710 : out_ptr[0] = sub( add( temp[0], temp[1] ), shr( NUM_STAGE, 1 ) );
350 37710 : move16(); /* DC have a small offset */
351 :
352 37710 : ptrDn = &temp[SIZE - 1];
353 :
354 37710 : ptImaDn = &out_ptr[SIZE - 1];
355 37710 : ptRealUp = &out_ptr[1];
356 37710 : ptImaUp = &out_ptr[SIZE2 + 1];
357 37710 : ptRealDn = &out_ptr[SIZE2 - 1];
358 :
359 : /* Now, handle the remaining positive frequencies */
360 963486 : FOR( i = 2; i <= SIZE2; i += 2 )
361 : {
362 925776 : Ltmp1_imag = L_mult( temp[i + 1], 16384 );
363 925776 : Ltmp1_imag = L_msu_sat( Ltmp1_imag, *ptrDn, 16384 );
364 925776 : tmp2_real = add_sat( temp[i + 1], *ptrDn-- );
365 :
366 925776 : Ltmp1_real = L_mult( temp[i], 16384 );
367 925776 : Ltmp1_real = L_mac_sat( Ltmp1_real, *ptrDn, 16384 );
368 925776 : tmp2_imag = sub( *ptrDn--, temp[i] );
369 :
370 :
371 925776 : *ptRealUp++ = msu_r_sat( L_mac_sat( Ltmp1_real, tmp2_real, phs_tbl[i] ), tmp2_imag, phs_tbl[i + 1] );
372 925776 : move16();
373 925776 : *ptImaDn-- = mac_r_sat( L_mac_sat( Ltmp1_imag, tmp2_imag, phs_tbl[i] ), tmp2_real, phs_tbl[i + 1] );
374 925776 : move16();
375 925776 : Ltmp1 = L_mac_sat( L_negate( Ltmp1_imag ), tmp2_real, *phstbl_ptrDn );
376 925776 : Ltmp1_real = L_mac_sat( Ltmp1_real, tmp2_imag, *phstbl_ptrDn-- );
377 925776 : *ptImaUp++ = msu_r_sat( Ltmp1, tmp2_imag, *phstbl_ptrDn );
378 925776 : move16();
379 925776 : *ptRealDn-- = mac_r_sat( Ltmp1_real, tmp2_real, *phstbl_ptrDn-- );
380 925776 : move16();
381 : }
382 : }
383 : ELSE /* The ifFT part */
384 : {
385 : const Word16 *ptRealUp, *ptRealDn, *ptImaUp, *ptImaDn;
386 :
387 : /* First, handle the DC and foldover frequencies */
388 31490 : Ltmp1 = L_mult( in_ptr[0], 16384 );
389 31490 : temp[0] = mac_r( Ltmp1, in_ptr[SIZE2], 16384 );
390 31490 : move16();
391 31490 : temp[1] = msu_r( Ltmp1, in_ptr[SIZE2], 16384 );
392 31490 : move16();
393 :
394 31490 : ptrDn = &temp[SIZE - 1];
395 :
396 : /* Here we cast to Word16 * from a const Word16 *. */
397 : /* This is ok because we use these pointers for */
398 : /* reading only. This is just to avoid declaring a */
399 : /* bunch of 4 other pointer with const Word16 *. */
400 31490 : ptImaDn = &in_ptr[SIZE - 1]; // Qx
401 31490 : ptRealUp = &in_ptr[1]; // Qx
402 31490 : ptImaUp = &in_ptr[SIZE2 + 1]; // Qx
403 31490 : ptRealDn = &in_ptr[SIZE2 - 1]; // Qx
404 :
405 : /* Now, handle the remaining positive frequencies */
406 580114 : FOR( i = 2; i <= SIZE2; i += 2 )
407 : {
408 548624 : Ltmp1_imag = L_mult( *ptImaDn, 16384 );
409 548624 : Ltmp1_imag = L_msu_sat( Ltmp1_imag, *ptImaUp, 16384 );
410 548624 : tmp2_real = add_sat( *ptImaDn--, *ptImaUp++ );
411 548624 : Ltmp1_real = L_mult( *ptRealUp, 16384 );
412 548624 : Ltmp1_real = L_mac_sat( Ltmp1_real, *ptRealDn, 16384 );
413 548624 : tmp2_imag = sub_sat( *ptRealUp++, *ptRealDn-- );
414 548624 : temp[i] = mac_r_sat( L_msu_sat( Ltmp1_real, tmp2_real, phs_tbl[i] ), tmp2_imag, phs_tbl[i + 1] );
415 548624 : move16();
416 548624 : temp[i + 1] = mac_r_sat( L_mac_sat( Ltmp1_imag, tmp2_imag, phs_tbl[i] ), tmp2_real, phs_tbl[i + 1] );
417 548624 : move16();
418 548624 : Ltmp1 = L_mac_sat( L_negate( Ltmp1_imag ), tmp2_real, *phstbl_ptrDn );
419 548624 : Ltmp1_real = L_msu_sat( Ltmp1_real, tmp2_imag, *phstbl_ptrDn-- );
420 548624 : *ptrDn-- = msu_r_sat( Ltmp1, tmp2_imag, *phstbl_ptrDn );
421 548624 : move16();
422 548624 : *ptrDn-- = msu_r_sat( Ltmp1_real, tmp2_real, *phstbl_ptrDn-- );
423 548624 : move16();
424 : }
425 :
426 : /* Perform the complex ifFT */
427 31490 : c_fft_fx( phs_tbl, SIZE, NUM_STAGE, temp, out_ptr, isign );
428 : }
429 69200 : }
|