Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include "options.h" /* Compilation switches */
6 : #include "cnst.h" /* Common constants */
7 : #include "prot_fx.h" /* Function prototypes */
8 : #include "rom_com.h" /* Static table prototypes */
9 : #include "stl.h"
10 : #include <assert.h>
11 :
12 : /*-----------------------------------------------------------------*
13 : * Local functions
14 : *-----------------------------------------------------------------*/
15 :
16 : #define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */
17 : /* DCT related */
18 : #define KP559016994_16FX 1200479845 /* EDCT & EMDCT constants Q31*/
19 : #define KP951056516_16FX 2042378325 /* EDCT & EMDCT constants Q31*/
20 : #define KP587785252_16FX 1262259213 /* EDCT & EMDCT constants Q31*/
21 :
22 : static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
23 : static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
24 : static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
25 : static void cftmdl_16fx( Word16 n, Word16 l, Word16 *a, const Word32 *w );
26 : static void cftfsub_16fx( Word16 n, Word16 *a, const Word32 *w );
27 : static void cft1st_16fx( Word16 n, Word16 *a, const Word32 *w );
28 : static void cftmdl_16fx( Word16 n, Word16 l, Word16 *a, const Word32 *w );
29 : static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
30 : static void bitrv2_SR_16fx( Word16 n, const Word16 *ip, Word16 *a );
31 : static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
32 : static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx );
33 : static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w );
34 :
35 : #include "math_32.h"
36 :
37 : /*-----------------------------------------------------------------*
38 : * Local functions
39 : *-----------------------------------------------------------------*/
40 : static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w );
41 : static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a );
42 : static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w );
43 : static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w );
44 : static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w );
45 :
46 :
47 263 : void DoRTFTn_fx(
48 : Word32 *x, /* i/o : real part of input and output data Q(x) */
49 : Word32 *y, /* i/o : imaginary part of input and output data Q(x) */
50 : const Word16 n /* i : size of the FFT up to 1024 */
51 : )
52 : {
53 :
54 : Word16 i;
55 : Word32 z[2048], *pt;
56 :
57 263 : pt = z;
58 134471 : FOR( i = 0; i < n; i++ )
59 : {
60 134208 : *pt++ = x[i];
61 134208 : move16();
62 134208 : *pt++ = y[i];
63 134208 : move16();
64 : }
65 :
66 263 : IF( EQ_16( n, 16 ) )
67 : {
68 0 : cdftForw_fx( 2 * n, z, Ip_fft16, w_fft512_fx_evs );
69 : }
70 263 : ELSE IF( EQ_16( n, 32 ) )
71 : {
72 0 : cdftForw_fx( 2 * n, z, Ip_fft32, w_fft512_fx_evs );
73 : }
74 263 : ELSE IF( EQ_16( n, 64 ) )
75 : {
76 1 : cdftForw_fx( 2 * n, z, Ip_fft64, w_fft512_fx_evs );
77 : }
78 262 : ELSE IF( EQ_16( n, 128 ) )
79 : {
80 0 : cdftForw_fx( 2 * n, z, Ip_fft128, w_fft512_fx_evs );
81 : }
82 262 : ELSE IF( EQ_16( n, 256 ) )
83 : {
84 0 : cdftForw_fx( 2 * n, z, Ip_fft256, w_fft512_fx_evs );
85 : }
86 262 : ELSE IF( EQ_16( n, 512 ) )
87 : {
88 262 : cdftForw_fx( 2 * n, z, Ip_fft512, w_fft512_fx_evs );
89 : }
90 : ELSE
91 : {
92 0 : assert( 0 );
93 : }
94 :
95 263 : x[0] = z[0];
96 263 : move16();
97 263 : y[0] = z[1];
98 263 : move16();
99 263 : pt = &z[2];
100 134208 : FOR( i = n - 1; i >= 1; i-- )
101 : {
102 133945 : x[i] = *pt++;
103 133945 : move16();
104 133945 : y[i] = *pt++;
105 133945 : move16();
106 : }
107 :
108 263 : return;
109 : }
110 :
111 : /*-----------------------------------------------------------------*
112 : * cdftForw_fx()
113 : * Main fuction of Complex Discrete Fourier Transform
114 : *-----------------------------------------------------------------*/
115 263 : static void cdftForw_fx(
116 : Word16 n, /* i : data length of real and imag */
117 : Word32 *a, /* i/o : input/output data Q(q)*/
118 : const Word16 *ip, /* i : work area for bit reversal */
119 : const Word16 *w /* i : cos/sin table Q14*/
120 : )
121 : {
122 : /* bit reversal */
123 263 : bitrv2_SR_fx( n, ip + 2, a );
124 :
125 : /* Do FFT */
126 263 : cftfsub_fx( n, a, w );
127 263 : }
128 :
129 : /*-----------------------------------------------------------------*
130 : * bitrv2_SR_fx()
131 : * Bit reversal
132 : *-----------------------------------------------------------------*/
133 92248 : static void bitrv2_SR_fx(
134 : Word16 n, /* i : data length of real and imag */
135 : const Word16 *ip, /* i/o : work area for bit reversal */
136 : Word32 *a /* i/o : input/output data Q(q)*/
137 : )
138 : {
139 : Word16 j, j1, k, k1, m, m2;
140 : Word16 l;
141 : Word32 xr, xi, yr, yi;
142 :
143 92248 : l = n;
144 92248 : move16();
145 92248 : m = 1;
146 92248 : move16();
147 :
148 277268 : WHILE( ( ( m << 3 ) < l ) )
149 : {
150 185020 : l = shr( l, 1 );
151 185020 : m = shl( m, 1 );
152 : }
153 :
154 92248 : m2 = shl( m, 1 );
155 92248 : IF( EQ_16( shl( m, 3 ), l ) )
156 : {
157 5 : FOR( k = 0; k < m; k++ )
158 : {
159 10 : FOR( j = 0; j < k; j++ )
160 : {
161 6 : j1 = add( shl( j, 1 ), ip[k] );
162 6 : k1 = add( shl( k, 1 ), ip[j] );
163 6 : xr = a[j1];
164 6 : move32();
165 6 : xi = a[j1 + 1];
166 6 : move32();
167 6 : yr = a[k1];
168 6 : move32();
169 6 : yi = a[k1 + 1];
170 6 : move32();
171 6 : a[j1] = yr;
172 6 : move32();
173 6 : a[j1 + 1] = yi;
174 6 : move32();
175 6 : a[k1] = xr;
176 6 : move32();
177 6 : a[k1 + 1] = xi;
178 6 : move32();
179 6 : j1 = add( j1, m2 );
180 6 : k1 = add( k1, shl( m2, 1 ) );
181 6 : xr = a[j1];
182 6 : move32();
183 6 : xi = a[j1 + 1];
184 6 : move32();
185 6 : yr = a[k1];
186 6 : move32();
187 6 : yi = a[k1 + 1];
188 6 : move32();
189 6 : a[j1] = yr;
190 6 : move32();
191 6 : a[j1 + 1] = yi;
192 6 : move32();
193 6 : a[k1] = xr;
194 6 : move32();
195 6 : a[k1 + 1] = xi;
196 6 : move32();
197 6 : j1 = add( j1, m2 );
198 6 : k1 = sub( k1, m2 );
199 6 : xr = a[j1];
200 6 : move32();
201 6 : xi = a[j1 + 1];
202 6 : move32();
203 6 : xi = a[j1 + 1];
204 6 : move32();
205 6 : yr = a[k1];
206 6 : move32();
207 6 : yi = a[k1 + 1];
208 6 : move32();
209 6 : a[j1] = yr;
210 6 : move32();
211 6 : a[j1 + 1] = yi;
212 6 : move32();
213 6 : a[k1] = xr;
214 6 : move32();
215 6 : a[k1 + 1] = xi;
216 6 : move32();
217 6 : j1 = add( j1, m2 );
218 6 : k1 = add( k1, shl( m2, 1 ) );
219 6 : xr = a[j1];
220 6 : move32();
221 6 : xi = a[j1 + 1];
222 6 : move32();
223 6 : yr = a[k1];
224 6 : move32();
225 6 : yi = a[k1 + 1];
226 6 : move32();
227 6 : a[j1] = yr;
228 6 : move32();
229 6 : a[j1 + 1] = yi;
230 6 : move32();
231 6 : a[k1] = xr;
232 6 : move32();
233 6 : a[k1 + 1] = xi;
234 6 : move32();
235 : }
236 :
237 4 : j1 = add( add( shl( k, 1 ), m2 ), ip[k] );
238 4 : k1 = add( j1, m2 );
239 4 : xr = a[j1];
240 4 : move32();
241 4 : xi = a[j1 + 1];
242 4 : move32();
243 4 : yr = a[k1];
244 4 : move32();
245 4 : yi = a[k1 + 1];
246 4 : move32();
247 4 : a[j1] = yr;
248 4 : move32();
249 4 : a[j1 + 1] = yi;
250 4 : move32();
251 4 : a[k1] = xr;
252 4 : move32();
253 4 : a[k1 + 1] = xi;
254 4 : move32();
255 : }
256 : }
257 : ELSE
258 : {
259 372132 : FOR( k = 1; k < m; k++ )
260 : {
261 863235 : FOR( j = 0; j < k; j++ )
262 : {
263 583350 : j1 = add( shl( j, 1 ), ip[k] );
264 583350 : k1 = add( shl( k, 1 ), ip[j] );
265 583350 : xr = a[j1];
266 583350 : move32();
267 583350 : xi = a[j1 + 1];
268 583350 : move32();
269 583350 : yr = a[k1];
270 583350 : move32();
271 583350 : yi = a[k1 + 1];
272 583350 : move32();
273 583350 : a[j1] = yr;
274 583350 : move32();
275 583350 : a[j1 + 1] = yi;
276 583350 : move32();
277 583350 : a[k1] = xr;
278 583350 : move32();
279 583350 : a[k1 + 1] = xi;
280 583350 : move32();
281 583350 : j1 = add( j1, m2 );
282 583350 : k1 = add( k1, m2 );
283 583350 : xr = a[j1];
284 583350 : move32();
285 583350 : xi = a[j1 + 1];
286 583350 : move32();
287 583350 : yr = a[k1];
288 583350 : move32();
289 583350 : yi = a[k1 + 1];
290 583350 : move32();
291 583350 : a[j1] = yr;
292 583350 : move32();
293 583350 : a[j1 + 1] = yi;
294 583350 : move32();
295 583350 : a[k1] = xr;
296 583350 : move32();
297 583350 : a[k1 + 1] = xi;
298 583350 : move32();
299 : }
300 : }
301 : }
302 :
303 92248 : return;
304 : }
305 :
306 : /*-----------------------------------------------------------------*
307 : * cftfsub_fx()
308 : * Complex Discrete Fourier Transform
309 : *-----------------------------------------------------------------*/
310 59373 : static void cftfsub_fx(
311 : Word16 n, /* i : data length of real and imag */
312 : Word32 *a, /* i/o : input/output data Q(q)*/
313 : const Word16 *w /* i : cos/sin table Q14*/
314 : )
315 : {
316 : Word16 j, j1, j2, j3, l;
317 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
318 :
319 59373 : l = 2;
320 59373 : move16();
321 :
322 59373 : IF( n > 8 )
323 : {
324 59373 : cft1st_fx( n, a, w );
325 59373 : l = 8;
326 59373 : move16();
327 119270 : WHILE( ( ( l << 2 ) < n ) )
328 : {
329 59897 : cftmdl_fx( n, l, a, w );
330 59897 : l = shl( l, 2 );
331 : }
332 : }
333 59373 : IF( shl( l, 2 ) == n )
334 : {
335 17 : FOR( j = 0; j < l; j += 2 )
336 : {
337 16 : j1 = add( j, l );
338 16 : j2 = add( j1, l );
339 16 : j3 = add( j2, l );
340 16 : x0r = L_add( a[j], a[j1] );
341 16 : x0i = L_add( a[j + 1], a[j1 + 1] );
342 16 : x1r = L_sub( a[j], a[j1] );
343 16 : x1i = L_sub( a[j + 1], a[j1 + 1] );
344 16 : x2r = L_add( a[j2], a[j3] );
345 16 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
346 16 : x3r = L_sub( a[j2], a[j3] );
347 16 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
348 16 : a[j] = L_add( x0r, x2r );
349 16 : move32();
350 16 : a[j2] = L_sub( x0r, x2r );
351 16 : move32();
352 16 : a[j + 1] = L_add( x0i, x2i );
353 16 : move32();
354 16 : a[j2 + 1] = L_sub( x0i, x2i );
355 16 : move32();
356 16 : a[j1] = L_sub( x1r, x3i );
357 16 : move32();
358 16 : a[j1 + 1] = L_add( x1i, x3r );
359 16 : move32();
360 16 : a[j3] = L_add( x1r, x3i );
361 16 : move32();
362 16 : a[j3 + 1] = L_sub( x1i, x3r );
363 16 : move32();
364 : }
365 : }
366 : ELSE
367 : {
368 1072204 : FOR( j = 0; j < l; j += 2 )
369 : {
370 1012832 : j1 = add( j, l );
371 1012832 : x0r = L_sub( a[j], a[j1] );
372 1012832 : x0i = L_sub( a[j + 1], a[j1 + 1] );
373 1012832 : a[j] = L_add( a[j], a[j1] );
374 1012832 : move32();
375 1012832 : a[j + 1] = L_add( a[j + 1], a[j1 + 1] );
376 1012832 : move32();
377 1012832 : a[j1] = x0r;
378 1012832 : move32();
379 1012832 : move32();
380 1012832 : a[j1 + 1] = x0i;
381 1012832 : move32();
382 1012832 : move32();
383 : }
384 : }
385 :
386 59373 : return;
387 : }
388 :
389 : /*-----------------------------------------------------------------*
390 : * cft1st_fx()
391 : * Subfunction of Complex Discrete Fourier Transform
392 : *-----------------------------------------------------------------*/
393 92248 : static void cft1st_fx(
394 : Word16 n, /* i : data length of real and imag */
395 : Word32 *a, /* i/o : input/output data Q(q)*/
396 : const Word16 *w /* i : cos/sin table Q14*/
397 : )
398 : {
399 : Word16 j, k1, k2;
400 : Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
401 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
402 :
403 92248 : x0r = L_add( a[0], a[2] );
404 92248 : x0i = L_add( a[1], a[3] );
405 92248 : x1r = L_sub( a[0], a[2] );
406 92248 : x1i = L_sub( a[1], a[3] );
407 92248 : x2r = L_add( a[4], a[6] );
408 92248 : x2i = L_add( a[5], a[7] );
409 92248 : x3r = L_sub( a[4], a[6] );
410 92248 : x3i = L_sub( a[5], a[7] );
411 92248 : a[0] = L_add( x0r, x2r );
412 92248 : move32();
413 92248 : a[1] = L_add( x0i, x2i );
414 92248 : move32();
415 92248 : a[4] = L_sub( x0r, x2r );
416 92248 : move32();
417 92248 : a[5] = L_sub( x0i, x2i );
418 92248 : move32();
419 92248 : a[2] = L_sub( x1r, x3i );
420 92248 : move32();
421 92248 : a[3] = L_add( x1i, x3r );
422 92248 : move32();
423 92248 : a[6] = L_add( x1r, x3i );
424 92248 : move32();
425 92248 : a[7] = L_sub( x1i, x3r );
426 92248 : move32();
427 :
428 92248 : wk1r = w[2];
429 92248 : move16();
430 92248 : x0r = L_add( a[8], a[10] );
431 92248 : x0i = L_add( a[9], a[11] );
432 92248 : x1r = L_sub( a[8], a[10] );
433 92248 : x1i = L_sub( a[9], a[11] );
434 92248 : x2r = L_add( a[12], a[14] );
435 92248 : x2i = L_add( a[13], a[15] );
436 92248 : x3r = L_sub( a[12], a[14] );
437 92248 : x3i = L_sub( a[13], a[15] );
438 92248 : a[8] = L_add( x0r, x2r );
439 92248 : move32();
440 92248 : a[9] = L_add( x0i, x2i );
441 92248 : move32();
442 92248 : a[12] = L_sub( x2i, x0i );
443 92248 : move32();
444 92248 : a[13] = L_sub( x0r, x2r );
445 92248 : move32();
446 :
447 92248 : x0r = L_sub( x1r, x3i );
448 92248 : x0i = L_add( x1i, x3r );
449 92248 : a[10] = Mult_32_16( L_shl( L_sub( x0r, x0i ), 1 ), wk1r );
450 92248 : move32();
451 92248 : a[11] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
452 92248 : move32();
453 92248 : x0r = L_add( x3i, x1r );
454 92248 : x0i = L_sub( x3r, x1i );
455 92248 : a[14] = Mult_32_16( L_shl( L_sub( x0i, x0r ), 1 ), wk1r );
456 92248 : move32();
457 92248 : a[15] = Mult_32_16( L_shl( L_add( x0i, x0r ), 1 ), wk1r );
458 92248 : move32();
459 :
460 92248 : k1 = 0;
461 92248 : move16();
462 384716 : FOR( j = 16; j < n; j += 16 )
463 : {
464 292468 : k1 = add( k1, 2 );
465 292468 : k2 = shl( k1, 1 );
466 292468 : wk2r = w[k1];
467 292468 : move16();
468 292468 : wk2i = w[k1 + 1];
469 292468 : move16();
470 292468 : wk1r = w[k2];
471 292468 : move16();
472 292468 : wk1i = w[k2 + 1];
473 292468 : move16();
474 292468 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2i, wk1i ), 14 ) ) );
475 292468 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2i, wk1r ), 14 ), wk1i, 1 ) );
476 292468 : x0r = L_add( a[j], a[j + 2] );
477 292468 : x0i = L_add( a[j + 1], a[j + 3] );
478 292468 : x1r = L_sub( a[j], a[j + 2] );
479 292468 : x1i = L_sub( a[j + 1], a[j + 3] );
480 292468 : x2r = L_add( a[j + 4], a[j + 6] );
481 292468 : x2i = L_add( a[j + 5], a[j + 7] );
482 292468 : x3r = L_sub( a[j + 4], a[j + 6] );
483 292468 : x3i = L_sub( a[j + 5], a[j + 7] );
484 292468 : a[j] = L_add( x0r, x2r );
485 292468 : move32();
486 292468 : a[j + 1] = L_add( x0i, x2i );
487 292468 : move32();
488 292468 : x0r = L_sub( x0r, x2r );
489 292468 : x0i = L_sub( x0i, x2i );
490 292468 : a[j + 4] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
491 292468 : move32();
492 292468 : a[j + 5] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk2r ), Mult_32_16( L_shl( x0r, 1 ), wk2i ) );
493 292468 : move32();
494 292468 : x0r = L_sub( x1r, x3i );
495 292468 : x0i = L_add( x1i, x3r );
496 292468 : a[j + 2] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
497 292468 : move32();
498 292468 : a[j + 3] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
499 292468 : move32();
500 292468 : x0r = L_add( x1r, x3i );
501 292468 : x0i = L_sub( x1i, x3r );
502 292468 : a[j + 6] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
503 292468 : move32();
504 292468 : a[j + 7] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
505 292468 : move32();
506 :
507 292468 : wk1r = w[k2 + 2];
508 292468 : move16();
509 292468 : wk1i = w[k2 + 3];
510 292468 : move16();
511 292468 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2r, wk1i ), 14 ) ) );
512 292468 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2r, wk1r ), 14 ), wk1i, 1 ) );
513 292468 : x0r = L_add( a[j + 8], a[j + 10] );
514 292468 : x0i = L_add( a[j + 9], a[j + 11] );
515 292468 : x1r = L_sub( a[j + 8], a[j + 10] );
516 292468 : x1i = L_sub( a[j + 9], a[j + 11] );
517 292468 : x2r = L_add( a[j + 12], a[j + 14] );
518 292468 : x2i = L_add( a[j + 13], a[j + 15] );
519 292468 : x3r = L_sub( a[j + 12], a[j + 14] );
520 292468 : x3i = L_sub( a[j + 13], a[j + 15] );
521 292468 : a[j + 8] = L_add( x0r, x2r );
522 292468 : move32();
523 292468 : a[j + 9] = L_add( x0i, x2i );
524 292468 : move32();
525 292468 : x0r = L_sub( x0r, x2r );
526 292468 : x0i = L_sub( x0i, x2i );
527 292468 : a[j + 12] = L_negate( L_add( Mult_32_16( L_shl( x0r, 1 ), wk2i ), Mult_32_16( L_shl( x0i, 1 ), wk2r ) ) );
528 292468 : move32();
529 292468 : a[j + 13] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
530 292468 : move32();
531 292468 : x0r = L_sub( x1r, x3i );
532 292468 : x0i = L_add( x1i, x3r );
533 292468 : a[j + 10] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
534 292468 : move32();
535 292468 : a[j + 11] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
536 292468 : move32();
537 292468 : x0r = L_add( x1r, x3i );
538 292468 : x0i = L_sub( x1i, x3r );
539 292468 : a[j + 14] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
540 292468 : move32();
541 292468 : a[j + 15] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
542 292468 : move32();
543 : }
544 :
545 92248 : return;
546 : }
547 :
548 : /*-----------------------------------------------------------------*
549 : * cftmdl_fx()
550 : * Subfunction of Complex Discrete Fourier Transform
551 : *-----------------------------------------------------------------*/
552 92772 : static void cftmdl_fx(
553 : Word16 n, /* i : data length of real and imag */
554 : Word16 l, /* i : initial shift for processing */
555 : Word32 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
556 : const Word16 *w /* i : cos/sin table Q30*/
557 : )
558 : {
559 : Word16 j, j1, j2, j3, k, k1, k2, m, m2;
560 : Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
561 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
562 : Word16 tmp;
563 :
564 92772 : m = shl( l, 2 );
565 482724 : FOR( j = 0; j < l; j += 2 )
566 : {
567 389952 : j1 = add( j, l );
568 389952 : j2 = add( j1, l );
569 389952 : j3 = add( j2, l );
570 389952 : x0r = L_add( a[j], a[j1] );
571 389952 : x0i = L_add( a[j + 1], a[j1 + 1] );
572 389952 : x1r = L_sub( a[j], a[j1] );
573 389952 : x1i = L_sub( a[j + 1], a[j1 + 1] );
574 389952 : x2r = L_add( a[j2], a[j3] );
575 389952 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
576 389952 : x3r = L_sub( a[j2], a[j3] );
577 389952 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
578 389952 : a[j] = L_add( x0r, x2r );
579 389952 : move32();
580 389952 : a[j + 1] = L_add( x0i, x2i );
581 389952 : move32();
582 389952 : a[j2] = L_sub( x0r, x2r );
583 389952 : move32();
584 389952 : a[j2 + 1] = L_sub( x0i, x2i );
585 389952 : move32();
586 389952 : a[j1] = L_sub( x1r, x3i );
587 389952 : move32();
588 389952 : a[j1 + 1] = L_add( x1i, x3r );
589 389952 : move32();
590 389952 : a[j3] = L_add( x1r, x3i );
591 389952 : move32();
592 389952 : a[j3 + 1] = L_sub( x1i, x3r );
593 389952 : move32();
594 : }
595 :
596 92772 : wk1r = w[2];
597 92772 : move16();
598 92772 : tmp = add( l, m );
599 482724 : FOR( j = m; j < tmp; j += 2 )
600 : {
601 389952 : j1 = add( j, l );
602 389952 : j2 = add( j1, l );
603 389952 : j3 = add( j2, l );
604 389952 : x0r = L_add( a[j], a[j1] );
605 389952 : x0i = L_add( a[j + 1], a[j1 + 1] );
606 389952 : x1r = L_sub( a[j], a[j1] );
607 389952 : x1i = L_sub( a[j + 1], a[j1 + 1] );
608 389952 : x2r = L_add( a[j2], a[j3] );
609 389952 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
610 389952 : x3r = L_sub( a[j2], a[j3] );
611 389952 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
612 389952 : a[j] = L_add( x0r, x2r );
613 389952 : move32();
614 389952 : a[j + 1] = L_add( x0i, x2i );
615 389952 : move32();
616 389952 : a[j2] = L_sub( x2i, x0i );
617 389952 : move32();
618 389952 : a[j2 + 1] = L_sub( x0r, x2r );
619 389952 : move32();
620 389952 : x0r = L_sub( x1r, x3i );
621 389952 : x0i = L_add( x1i, x3r );
622 389952 : a[j1] = Mult_32_16( L_shl( L_sub( x0r, x0i ), 1 ), wk1r );
623 389952 : move32();
624 389952 : a[j1 + 1] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
625 389952 : move32();
626 389952 : x0r = L_add( x3i, x1r );
627 389952 : x0i = L_sub( x3r, x1i );
628 389952 : a[j3] = Mult_32_16( L_shl( L_sub( x0i, x0r ), 1 ), wk1r );
629 389952 : move32();
630 389952 : a[j3 + 1] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
631 389952 : move32();
632 : }
633 :
634 92772 : k1 = 0;
635 92772 : move16();
636 92772 : m2 = shl( m, 1 );
637 97489 : FOR( k = m2; k < n; k += m2 )
638 : {
639 4717 : k1 = add( k1, 2 );
640 4717 : k2 = shl( k1, 1 );
641 4717 : wk2r = w[k1];
642 4717 : move16();
643 4717 : wk2i = w[k1 + 1];
644 4717 : move16();
645 4717 : wk1r = w[k2];
646 4717 : move16();
647 4717 : wk1i = w[k2 + 1];
648 4717 : move16();
649 4717 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2i, wk1i ), 14 ) ) );
650 4717 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2i, wk1r ), 14 ), wk1i, 1 ) );
651 :
652 4717 : tmp = add( l, k );
653 33017 : FOR( j = k; j < tmp; j += 2 )
654 : {
655 28300 : j1 = add( j, l );
656 28300 : j2 = add( j1, l );
657 28300 : j3 = add( j2, l );
658 28300 : x0r = L_add( a[j], a[j1] );
659 28300 : x0i = L_add( a[j + 1], a[j1 + 1] );
660 28300 : x1r = L_sub( a[j], a[j1] );
661 28300 : x1i = L_sub( a[j + 1], a[j1 + 1] );
662 28300 : x2r = L_add( a[j2], a[j3] );
663 28300 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
664 28300 : x3r = L_sub( a[j2], a[j3] );
665 28300 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
666 28300 : a[j] = L_add( x0r, x2r );
667 28300 : move32();
668 28300 : a[j + 1] = L_add( x0i, x2i );
669 28300 : move32();
670 28300 : x0r = L_sub( x0r, x2r );
671 28300 : x0i = L_sub( x0i, x2i );
672 28300 : a[j2] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
673 28300 : move32();
674 28300 : a[j2 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk2r ), Mult_32_16( L_shl( x0r, 1 ), wk2i ) );
675 28300 : move32();
676 28300 : x0r = L_sub( x1r, x3i );
677 28300 : x0i = L_add( x1i, x3r );
678 28300 : a[j1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
679 28300 : move32();
680 28300 : a[j1 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
681 28300 : move32();
682 28300 : x0r = L_add( x1r, x3i );
683 28300 : x0i = L_sub( x1i, x3r );
684 28300 : a[j3] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
685 28300 : move32();
686 28300 : a[j3 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
687 28300 : move32();
688 : }
689 :
690 4717 : wk1r = w[k2 + 2];
691 4717 : move16();
692 4717 : wk1i = w[k2 + 3];
693 4717 : move16();
694 4717 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2r, wk1i ), 14 ) ) );
695 4717 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2r, wk1r ), 14 ), wk1i, 1 ) );
696 :
697 4717 : tmp = add( l, add( k, m ) );
698 33017 : FOR( j = k + m; j < tmp; j += 2 )
699 : {
700 28300 : j1 = add( j, l );
701 28300 : j2 = add( j1, l );
702 28300 : j3 = add( j2, l );
703 28300 : x0r = L_add( a[j], a[j1] );
704 28300 : x0i = L_add( a[j + 1], a[j1 + 1] );
705 28300 : x1r = L_sub( a[j], a[j1] );
706 28300 : x1i = L_sub( a[j + 1], a[j1 + 1] );
707 28300 : x2r = L_add( a[j2], a[j3] );
708 28300 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
709 28300 : x3r = L_sub( a[j2], a[j3] );
710 28300 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
711 28300 : a[j] = L_add( x0r, x2r );
712 28300 : move32();
713 28300 : a[j + 1] = L_add( x0i, x2i );
714 28300 : move32();
715 28300 : x0r = L_sub( x0r, x2r );
716 28300 : x0i = L_sub( x0i, x2i );
717 28300 : a[j2] = L_negate( L_add( Mult_32_16( L_shl( x0r, 1 ), wk2i ), Mult_32_16( L_shl( x0i, 1 ), wk2r ) ) );
718 28300 : move32();
719 28300 : a[j2 + 1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
720 28300 : move32();
721 28300 : x0r = L_sub( x1r, x3i );
722 28300 : x0i = L_add( x1i, x3r );
723 28300 : a[j1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
724 28300 : move32();
725 28300 : a[j1 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
726 28300 : move32();
727 28300 : x0r = L_add( x1r, x3i );
728 28300 : x0i = L_sub( x1i, x3r );
729 28300 : a[j3] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
730 28300 : move32();
731 28300 : a[j3 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
732 28300 : move32();
733 : }
734 : }
735 :
736 92772 : return;
737 : }
738 :
739 :
740 32875 : static void cftbsub_fx(
741 : Word16 n,
742 : Word32 *a, // Q(Qx+Q_edct)
743 : const Word16 *w /* i : cos/sin table Q14 */
744 : )
745 : {
746 : Word16 j, j1, j2, j3, l;
747 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
748 :
749 32875 : l = 2;
750 32875 : move16();
751 32875 : IF( GT_16( n, 8 ) )
752 : {
753 32875 : cft1st_fx( n, a, w );
754 32875 : l = 8;
755 32875 : move16();
756 :
757 65750 : WHILE( ( ( l << 2 ) < n ) )
758 : {
759 32875 : cftmdl_fx( n, l, a, w );
760 32875 : l = shl( l, 2 );
761 : }
762 : }
763 :
764 32875 : IF( EQ_16( shl( l, 2 ), n ) )
765 : {
766 0 : FOR( j = 0; j < l; j += 2 )
767 : {
768 0 : j1 = add( j, l );
769 0 : j2 = add( j1, l );
770 0 : j3 = add( j2, l );
771 0 : x0r = L_add( a[j], a[j1] );
772 0 : x0i = L_negate( L_add( a[j + 1], a[j1 + 1] ) );
773 0 : x1r = L_sub( a[j], a[j1] );
774 0 : x1i = L_sub( a[j1 + 1], a[j + 1] );
775 0 : x2r = L_add( a[j2], a[j3] );
776 0 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
777 0 : x3r = L_sub( a[j2], a[j3] );
778 0 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
779 0 : a[j] = L_add( x0r, x2r );
780 0 : move32();
781 0 : a[j + 1] = L_sub( x0i, x2i );
782 0 : move32();
783 0 : a[j2] = L_sub( x0r, x2r );
784 0 : move32();
785 0 : a[j2 + 1] = L_add( x0i, x2i );
786 0 : move32();
787 0 : a[j1] = L_sub( x1r, x3i );
788 0 : move32();
789 0 : a[j1 + 1] = L_sub( x1i, x3r );
790 0 : move32();
791 0 : a[j3] = L_add( x1r, x3i );
792 0 : move32();
793 0 : a[j3 + 1] = L_add( x1i, x3r );
794 0 : move32();
795 : }
796 : }
797 : ELSE
798 : {
799 558875 : FOR( j = 0; j < l; j += 2 )
800 : {
801 526000 : j1 = add( j, l );
802 526000 : x0r = L_sub( a[j], a[j1] );
803 526000 : x0i = L_sub( a[j1 + 1], a[j + 1] );
804 526000 : a[j] = L_add( a[j], a[j1] );
805 526000 : move32();
806 526000 : a[j + 1] = L_negate( L_add( a[j + 1], a[j1 + 1] ) );
807 526000 : move32();
808 526000 : a[j1] = x0r;
809 526000 : move32();
810 526000 : a[j1 + 1] = x0i;
811 526000 : move32();
812 : }
813 : }
814 32875 : }
815 :
816 59110 : static void rftfsub_fx(
817 : Word16 n,
818 : Word32 *a, // Qx
819 : Word16 nc,
820 : const Word16 *c /*Q14*/ )
821 : {
822 : Word16 j, k, kk, ks, m, tmp;
823 : Word32 xr, xi, yr, yi;
824 : Word16 wkr, wki;
825 :
826 59110 : m = shr( n, 1 );
827 : /*ks = 2 * nc / m; */
828 59110 : tmp = shl( nc, 1 );
829 59110 : ks = 0;
830 59110 : move16();
831 295550 : WHILE( ( tmp >= m ) )
832 : {
833 236440 : ks = add( ks, 1 );
834 236440 : tmp = sub( tmp, m );
835 : }
836 59110 : kk = 0;
837 59110 : move16();
838 945760 : FOR( j = 2; j < m; j += 2 )
839 : {
840 886650 : k = sub( n, j );
841 886650 : kk = add( kk, ks );
842 886650 : wkr = sub( 8192 /*0.5.Q14*/, c[( nc - kk )] ); // Q14
843 886650 : wki = c[kk]; // Q14
844 886650 : move16();
845 886650 : xr = L_sub( a[j], a[k] ); // Qx
846 886650 : xi = L_add( a[j + 1], a[k + 1] ); // Qx
847 886650 : yr = L_sub( Mult_32_16( L_shl( xr, 1 ), wkr ), Mult_32_16( L_shl( xi, 1 ), wki ) ); // Qx
848 886650 : yi = L_add( Mult_32_16( L_shl( xi, 1 ), wkr ), Mult_32_16( L_shl( xr, 1 ), wki ) ); // Qx
849 886650 : a[j] = L_sub( a[j], yr );
850 886650 : move32();
851 886650 : a[j + 1] = L_sub( a[j + 1], yi );
852 886650 : move32();
853 886650 : a[k] = L_add( a[k], yr );
854 886650 : move32();
855 886650 : a[k + 1] = L_sub( a[k + 1], yi );
856 886650 : move32();
857 : }
858 59110 : }
859 :
860 :
861 32875 : static void rftbsub_fx(
862 : Word16 n,
863 : Word32 *a, // Qx
864 : Word16 nc,
865 : const Word16 *c /*Q14*/ )
866 : {
867 : Word16 j, k, kk, ks, m, tmp;
868 : Word32 xr, xi, yr, yi;
869 : Word16 wkr, wki;
870 :
871 32875 : a[1] = L_negate( a[1] );
872 32875 : m = shr( n, 1 );
873 : /*ks = 2 * nc / m; */
874 32875 : tmp = shl( nc, 1 );
875 32875 : ks = 0;
876 32875 : move16();
877 164375 : WHILE( ( tmp >= m ) )
878 : {
879 131500 : ks = add( ks, 1 );
880 131500 : tmp = sub( tmp, m );
881 : }
882 32875 : kk = 0;
883 32875 : move16();
884 526000 : FOR( j = 2; j < m; j += 2 )
885 : {
886 493125 : k = sub( n, j );
887 493125 : kk = add( kk, ks );
888 493125 : wkr = sub( 8192 /*0.5.Q14*/, c[( nc - kk )] ); // Q14
889 493125 : wki = c[kk]; // Q14
890 493125 : move16();
891 493125 : xr = L_sub( a[j], a[k] ); // Qx
892 493125 : xi = L_add( a[j + 1], a[k + 1] ); // Qx
893 493125 : yr = L_add( Mult_32_16( L_shl( xr, 1 ), wkr ), Mult_32_16( L_shl( xi, 1 ), wki ) ); // Qx
894 493125 : yi = L_sub( Mult_32_16( L_shl( xi, 1 ), wkr ), Mult_32_16( L_shl( xr, 1 ), wki ) ); // Qx
895 493125 : a[j] = L_sub( a[j], yr );
896 493125 : move32();
897 493125 : a[j + 1] = L_sub( yi, a[j + 1] );
898 493125 : move32();
899 493125 : a[k] = L_add( a[k], yr );
900 493125 : move32();
901 493125 : a[k + 1] = L_sub( yi, a[k + 1] );
902 493125 : move32();
903 : }
904 32875 : a[m + 1] = L_negate( a[m + 1] );
905 32875 : move32();
906 32875 : }
907 :
908 :
909 91985 : static void dctsub_fx(
910 : Word16 n,
911 : Word32 *a, // Qx
912 : Word16 nc,
913 : const Word16 *c /*Q14*/ )
914 : {
915 : Word16 j, k, kk, ks, m, tmp;
916 : Word16 wkr, wki;
917 : Word32 xr;
918 :
919 91985 : m = shr( n, 1 );
920 : /*ks = nc / n; */
921 91985 : tmp = nc;
922 91985 : move16();
923 91985 : ks = 0;
924 91985 : move16();
925 183970 : WHILE( ( tmp >= n ) )
926 : {
927 91985 : ks = add( ks, 1 );
928 91985 : tmp = sub( tmp, n );
929 : }
930 91985 : kk = 0;
931 91985 : move16();
932 2943520 : FOR( j = 1; j < m; j++ )
933 : {
934 2851535 : k = sub( n, j );
935 2851535 : kk = add( kk, ks );
936 2851535 : wkr = sub( c[kk], c[( nc - kk )] ); // Q14
937 2851535 : wki = add( c[kk], c[( nc - kk )] ); // Q14
938 2851535 : xr = L_sub( Mult_32_16( L_shl( a[j], 1 ), wki ), Mult_32_16( L_shl( a[k], 1 ), wkr ) ); // Qx
939 2851535 : a[j] = L_add( Mult_32_16( L_shl( a[j], 1 ), wkr ), Mult_32_16( L_shl( a[k], 1 ), wki ) ); // Qx
940 2851535 : move32();
941 2851535 : a[k] = xr;
942 2851535 : move32();
943 : }
944 91985 : a[m] = Mult_32_16( L_shl( a[m], 1 ), c[0] ); // Qx
945 91985 : move16();
946 91985 : }
947 :
948 : /*-----------------------------------------------------------------*
949 : * edct2_fx()
950 : *
951 : * Transformation of the signal to DCT domain
952 : * OR Inverse EDCT-II for short frames
953 : *-----------------------------------------------------------------*/
954 :
955 91985 : void edct2_fx(
956 : Word16 n,
957 : Word16 isgn,
958 : Word16 *in, // Q(q)
959 : Word32 *a, // Qx
960 : Word16 *q,
961 : const Word16 *ip,
962 : const Word16 *w /*Q14*/ )
963 : {
964 : Word16 j, nw, nc;
965 : Word32 xr;
966 :
967 91985 : *q = Exp16Array( n, in );
968 91985 : move16();
969 91985 : *q = add( *q, 6 );
970 91985 : move16();
971 5979025 : FOR( j = 0; j < n; j++ )
972 : {
973 5887040 : a[j] = L_shl( (Word32) in[j], *q );
974 5887040 : move32();
975 : }
976 :
977 91985 : nw = ip[0];
978 91985 : move16();
979 91985 : if ( GT_16( n, shl( nw, 2 ) ) )
980 : {
981 0 : nw = shr( n, 2 );
982 : }
983 :
984 91985 : nc = ip[1];
985 91985 : move16();
986 91985 : if ( GT_16( n, nc ) )
987 : {
988 0 : nc = n;
989 0 : move16();
990 : }
991 :
992 91985 : IF( isgn < 0 )
993 : {
994 32875 : xr = a[n - 1];
995 32875 : move32();
996 1052000 : FOR( j = n - 2; j >= 2; j -= 2 )
997 : {
998 1019125 : a[j + 1] = L_sub( a[j], a[j - 1] );
999 1019125 : move32();
1000 1019125 : a[j] = L_add( a[j], a[j - 1] );
1001 1019125 : move32();
1002 : }
1003 32875 : a[1] = L_sub( a[0], xr );
1004 32875 : move32();
1005 32875 : a[0] = L_add( a[0], xr );
1006 32875 : move32();
1007 :
1008 32875 : IF( GT_16( n, 4 ) )
1009 : {
1010 32875 : rftbsub_fx( n, a, nc, w + nw );
1011 32875 : bitrv2_SR_fx( n, ip + 2, a );
1012 32875 : cftbsub_fx( n, a, w );
1013 : }
1014 0 : ELSE IF( EQ_16( n, 4 ) )
1015 : {
1016 0 : cftfsub_fx( n, a, w );
1017 : }
1018 : }
1019 :
1020 91985 : IF( isgn >= 0 )
1021 : {
1022 59110 : a[0] = L_shr( a[0], 1 );
1023 59110 : move32();
1024 : }
1025 :
1026 91985 : dctsub_fx( n, a, nc, w + nw );
1027 :
1028 91985 : IF( isgn >= 0 )
1029 : {
1030 59110 : IF( GT_16( n, 4 ) )
1031 : {
1032 59110 : bitrv2_SR_fx( n, ip + 2, a );
1033 59110 : cftfsub_fx( n, a, w );
1034 59110 : rftfsub_fx( n, a, nc, w + nw );
1035 : }
1036 0 : ELSE IF( EQ_16( n, 4 ) )
1037 : {
1038 0 : cftfsub_fx( n, a, w );
1039 : }
1040 59110 : xr = L_sub( a[0], a[1] );
1041 59110 : a[0] = L_add( a[0], a[1] );
1042 59110 : move32();
1043 1891520 : FOR( j = 2; j < n; j += 2 )
1044 : {
1045 1832410 : a[j - 1] = L_sub( a[j], a[j + 1] );
1046 1832410 : move32();
1047 1832410 : a[j] = L_add( a[j], a[j + 1] );
1048 1832410 : move32();
1049 : }
1050 59110 : a[n - 1] = xr;
1051 59110 : move32();
1052 :
1053 3842150 : FOR( j = 0; j < n; j++ )
1054 : {
1055 3783040 : a[j] = L_shr( a[j], 5 ); // a[j] / 32.0f
1056 3783040 : move32();
1057 : }
1058 : }
1059 91985 : }
1060 :
1061 :
1062 : /*-----------------------------------------------------------------*
1063 : * fft5_shift4()
1064 : * 5-point FFT with 4-point circular shift
1065 : *-----------------------------------------------------------------*/
1066 :
1067 1192064 : static void fft5_shift4_16fx(
1068 : Word16 n1, /* i : length of data */
1069 : Word16 *zRe, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1070 : Word16 *zIm, /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1071 : const Word16 *Idx /* i : pointer of the address table Q0 */
1072 : )
1073 : {
1074 : Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn, T2, T3, T4, T5, T6, T7;
1075 : Word16 i0, i1, i2, i3, i4;
1076 : Word32 L_tmp;
1077 :
1078 :
1079 1192064 : i0 = Idx[0];
1080 1192064 : move16();
1081 1192064 : i1 = Idx[n1];
1082 1192064 : move16();
1083 1192064 : i2 = Idx[n1 * 2];
1084 1192064 : move16();
1085 1192064 : i3 = Idx[n1 * 3];
1086 1192064 : move16();
1087 1192064 : i4 = Idx[n1 * 4];
1088 1192064 : move16();
1089 :
1090 1192064 : T1 = zRe[i0]; // Qx
1091 1192064 : move16();
1092 1192064 : To = zIm[i0]; // Qx
1093 1192064 : move16();
1094 :
1095 1192064 : T2 = zRe[i1];
1096 1192064 : move16();
1097 1192064 : T3 = zRe[i4];
1098 1192064 : move16();
1099 1192064 : T4 = add_sat( T2, T3 );
1100 1192064 : T5 = zRe[i2];
1101 1192064 : move16();
1102 1192064 : T6 = zRe[i3];
1103 1192064 : move16();
1104 1192064 : T7 = add_sat( T5, T6 );
1105 1192064 : T8 = add_sat( T4, T7 );
1106 1192064 : Tt = sub_sat( T5, T6 );
1107 : /* T9 = KP559016994 * (T4 - T7); */
1108 1192064 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1109 1192064 : T9 = round_fx_sat( L_tmp ); // Qx
1110 1192064 : Ts = sub_sat( T2, T3 );
1111 :
1112 1192064 : T2 = zIm[i1];
1113 1192064 : move16();
1114 1192064 : T3 = zIm[i4];
1115 1192064 : move16();
1116 1192064 : T4 = add( T2, T3 );
1117 1192064 : T5 = zIm[i2];
1118 1192064 : move16();
1119 1192064 : T6 = zIm[i3];
1120 1192064 : move16();
1121 1192064 : T7 = add_sat( T5, T6 );
1122 1192064 : Te = sub_sat( T2, T3 );
1123 1192064 : Tp = add_sat( T4, T7 );
1124 1192064 : Th = sub_sat( T5, T6 );
1125 :
1126 : /* Tn = KP559016994 * (T4 - T7); */
1127 1192064 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1128 1192064 : Tn = round_fx_sat( L_tmp ); // Qx
1129 1192064 : zRe[i0] = add_sat( T1, T8 );
1130 1192064 : move16();
1131 1192064 : zIm[i0] = add_sat( To, Tp );
1132 1192064 : move16();
1133 :
1134 : /* T2 = KP951056516*Te + KP587785252*Th; */
1135 1192064 : L_tmp = Mult_32_16( KP951056516_16FX, Te ); // Q(16 +x)
1136 1192064 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Th ); // Q(16 +x)
1137 1192064 : T2 = round_fx_sat( L_tmp ); // Qx
1138 : /*T3 = KP951056516*Th - KP587785252*Te; */
1139 1192064 : L_tmp = Mult_32_16( KP951056516_16FX, Th ); // Q(16 +x)
1140 1192064 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Te ); // Q(16 +x)
1141 1192064 : T3 = round_fx_sat( L_tmp ); // Qx
1142 1192064 : T6 = sub_sat( T1, shr_sat( T8, 2 ) );
1143 1192064 : T4 = add_sat( T9, T6 );
1144 1192064 : T5 = sub_sat( T6, T9 );
1145 1192064 : zRe[i1] = sub_sat( T4, T2 );
1146 1192064 : move16();
1147 1192064 : zRe[i2] = add_sat( T5, T3 );
1148 1192064 : move16();
1149 1192064 : zRe[i4] = add_sat( T4, T2 );
1150 1192064 : move16();
1151 1192064 : zRe[i3] = sub_sat( T5, T3 );
1152 1192064 : move16();
1153 :
1154 : /* T2 = KP951056516 * Ts + KP587785252 * Tt; */
1155 1192064 : L_tmp = Mult_32_16( KP951056516_16FX, Ts ); // Q(16 +x)
1156 1192064 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Tt ); // Q(16 +x)
1157 1192064 : T2 = round_fx_sat( L_tmp ); // Qx
1158 : /* T3 = KP951056516 * Tt - KP587785252 * Ts; */
1159 1192064 : L_tmp = Mult_32_16( KP951056516_16FX, Tt ); // Q(16 +x)
1160 1192064 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Ts ); // Q(16 +x)
1161 1192064 : T3 = round_fx_sat( L_tmp ); // Qx
1162 1192064 : T6 = sub_sat( To, shr( Tp, 2 ) ); // To - (Tp / 4)
1163 1192064 : T4 = add_sat( Tn, T6 );
1164 1192064 : T5 = sub_sat( T6, Tn );
1165 1192064 : zIm[i4] = sub_sat( T4, T2 );
1166 1192064 : move16();
1167 1192064 : zIm[i2] = sub_sat( T5, T3 );
1168 1192064 : move16();
1169 1192064 : zIm[i1] = add_sat( T2, T4 );
1170 1192064 : move16();
1171 1192064 : zIm[i3] = add_sat( T3, T5 );
1172 1192064 : move16();
1173 1192064 : return;
1174 : }
1175 :
1176 : /*-----------------------------------------------------------------*
1177 : * fft5_32()
1178 : * 5-point FFT called for 32 times
1179 : *-----------------------------------------------------------------*/
1180 2766560 : static void fft5_32_16fx(
1181 : Word16 *zRe, /* i/o : real part of input and output data Qx */
1182 : Word16 *zIm, /* i/o : imaginary part of input and output data Qx */
1183 : const Word16 *Idx /* i : pointer of the address table Q0 */
1184 : )
1185 : {
1186 : Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn, T2, T3, T4, T5, T6, T7;
1187 : Word16 i0, i1, i2, i3, i4;
1188 : Word32 L_tmp;
1189 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1190 2766560 : Flag Overflow = 0;
1191 2766560 : move32();
1192 : #endif
1193 2766560 : i0 = Idx[0];
1194 2766560 : move16();
1195 2766560 : i1 = Idx[32];
1196 2766560 : move16();
1197 2766560 : i2 = Idx[64];
1198 2766560 : move16();
1199 2766560 : i3 = Idx[96];
1200 2766560 : move16();
1201 2766560 : i4 = Idx[128];
1202 2766560 : move16();
1203 :
1204 2766560 : T1 = zRe[i0]; // Qx
1205 2766560 : move16();
1206 2766560 : To = zIm[i0]; // Qx
1207 2766560 : move16();
1208 :
1209 2766560 : T2 = zRe[i1]; // Qx
1210 2766560 : move16();
1211 2766560 : T3 = zRe[i4]; // Qx
1212 2766560 : move16();
1213 :
1214 2766560 : T4 = add_sat( T2, T3 );
1215 2766560 : T5 = zRe[i2];
1216 2766560 : move16();
1217 2766560 : T6 = zRe[i3];
1218 2766560 : move16();
1219 2766560 : T7 = add_o( T5, T6, &Overflow );
1220 2766560 : T8 = add_o( T4, T7, &Overflow );
1221 2766560 : Tt = sub_o( T5, T6, &Overflow );
1222 : /* T9 = KP559016994 * (T4 - T7); */
1223 2766560 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1224 2766560 : T9 = round_fx_sat( L_tmp ); // Qx
1225 2766560 : Ts = sub_sat( T2, T3 );
1226 :
1227 2766560 : T2 = zIm[i1];
1228 2766560 : move16();
1229 2766560 : T3 = zIm[i4];
1230 2766560 : move16();
1231 2766560 : T4 = add_sat( T2, T3 );
1232 2766560 : T5 = zIm[i2];
1233 2766560 : move16();
1234 2766560 : T6 = zIm[i3];
1235 2766560 : move16();
1236 2766560 : T7 = add_sat( T5, T6 );
1237 2766560 : Te = sub_sat( T2, T3 );
1238 2766560 : Tp = add_sat( T4, T7 );
1239 2766560 : Th = sub_sat( T5, T6 );
1240 2766560 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1241 2766560 : Tn = round_fx_sat( L_tmp ); // Qx
1242 :
1243 :
1244 2766560 : zRe[i0] = add_o( T1, T8, &Overflow );
1245 2766560 : move16();
1246 2766560 : zIm[i0] = add_o( To, Tp, &Overflow );
1247 2766560 : move32();
1248 :
1249 : /*T2 = KP951056516*Te + KP587785252*Th; */
1250 2766560 : L_tmp = Mult_32_16( KP951056516_16FX, Te ); // Q(16 +x)
1251 2766560 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Th ); // Q(16 +x)
1252 2766560 : T2 = round_fx_sat( L_tmp ); // Qx
1253 :
1254 : /*T3 = KP951056516*Th - KP587785252*Te; */
1255 2766560 : L_tmp = Mult_32_16( KP951056516_16FX, Th ); // Q(16 +x)
1256 2766560 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Te ); // Q(16 +x)
1257 2766560 : T3 = round_fx_sat( L_tmp ); // Qx
1258 :
1259 :
1260 2766560 : T6 = sub_sat( T1, shr( T8, 2 ) );
1261 2766560 : T4 = add_sat( T9, T6 );
1262 2766560 : T5 = sub_sat( T6, T9 );
1263 :
1264 2766560 : zRe[i3] = sub_o( T4, T2, &Overflow );
1265 2766560 : move32();
1266 2766560 : zRe[i1] = add_o( T5, T3, &Overflow );
1267 2766560 : move32();
1268 2766560 : zRe[i2] = add_o( T4, T2, &Overflow );
1269 2766560 : move32();
1270 2766560 : zRe[i4] = sub_o( T5, T3, &Overflow );
1271 2766560 : move32();
1272 :
1273 : /* T2 = KP951056516 * Ts + KP587785252 * Tt; */
1274 2766560 : L_tmp = Mult_32_16( KP951056516_16FX, Ts ); // Q(16 +x)
1275 2766560 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Tt ); // Q(16 +x)
1276 2766560 : T2 = round_fx_sat( L_tmp ); // Qx
1277 :
1278 : /* T3 = KP951056516 * Tt - KP587785252 * Ts; */
1279 2766560 : L_tmp = Mult_32_16( KP951056516_16FX, Tt ); // Q(16 +x)
1280 2766560 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Ts ); // Q(16 +x)
1281 :
1282 2766560 : T3 = round_fx_sat( L_tmp ); // Qx
1283 :
1284 2766560 : T6 = sub_sat( To, shr( Tp, 2 ) );
1285 2766560 : T4 = add_sat( Tn, T6 );
1286 2766560 : T5 = sub_sat( T6, Tn );
1287 2766560 : zIm[i2] = sub_sat( T4, T2 );
1288 2766560 : move16();
1289 2766560 : zIm[i1] = sub_sat( T5, T3 );
1290 2766560 : move16();
1291 2766560 : zIm[i3] = add_sat( T2, T4 );
1292 2766560 : move16();
1293 2766560 : zIm[i4] = add_sat( T3, T5 );
1294 2766560 : move16();
1295 :
1296 2766560 : return;
1297 : }
1298 :
1299 : /*-----------------------------------------------------------------*
1300 : * fft64()
1301 : * 64-point FFT
1302 : *-----------------------------------------------------------------*/
1303 93130 : static void fft64_16fx(
1304 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1305 : Word16 *y, /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1306 : const Word16 *Idx /* i : pointer of the address table Q0 */
1307 : )
1308 : {
1309 : Word16 i, id, jd;
1310 : Word16 z[128];
1311 93130 : move16(); /*penalty for 1 ptr init */
1312 6053450 : FOR( i = 0; i < 64; i++ )
1313 : {
1314 5960320 : id = Idx[i];
1315 5960320 : move16();
1316 5960320 : z[2 * i] = x[id];
1317 5960320 : move16();
1318 5960320 : z[2 * i + 1] = y[id];
1319 5960320 : move16();
1320 : }
1321 :
1322 93130 : cdftForw_16fx( 128, z, Ip_fft64, w_fft128_16fx );
1323 :
1324 93130 : move16(); /*penalty for 1 ptr init */
1325 6053450 : FOR( i = 0; i < 64; i++ )
1326 : {
1327 5960320 : jd = Odx_fft64[i];
1328 5960320 : move16();
1329 5960320 : id = Idx[jd];
1330 5960320 : move16();
1331 5960320 : x[id] = z[2 * i];
1332 5960320 : move16();
1333 5960320 : y[id] = z[2 * i + 1];
1334 5960320 : move16();
1335 : }
1336 :
1337 93130 : return;
1338 : }
1339 :
1340 :
1341 : /*-----------------------------------------------------------------*
1342 : * fft32_5()
1343 : * 32-point FFT called for 5 times
1344 : *-----------------------------------------------------------------*/
1345 432275 : static void fft32_5_16fx(
1346 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1347 : Word16 *y, /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1348 : const Word16 *Idx /* i : pointer of the address table */
1349 : )
1350 : {
1351 : Word16 i, id, jd;
1352 : Word16 z[64];
1353 :
1354 14265075 : FOR( i = 0; i < 32; i++ )
1355 : {
1356 13832800 : id = Idx[i];
1357 13832800 : move16();
1358 13832800 : z[2 * i] = x[id];
1359 13832800 : move16();
1360 13832800 : z[2 * i + 1] = y[id];
1361 13832800 : move16();
1362 : }
1363 :
1364 432275 : cdftForw_16fx( 64, z, Ip_fft32, w_fft32_16fx );
1365 :
1366 14265075 : FOR( i = 0; i < 32; i++ )
1367 : {
1368 13832800 : jd = Odx_fft32_5[i];
1369 13832800 : move16();
1370 13832800 : id = Idx[jd];
1371 13832800 : move16();
1372 13832800 : x[id] = z[2 * i];
1373 13832800 : move16();
1374 13832800 : y[id] = z[2 * i + 1];
1375 13832800 : move16();
1376 : }
1377 :
1378 432275 : return;
1379 : }
1380 :
1381 :
1382 : /*-----------------------------------------------------------------*
1383 : * DoRTFT160()
1384 : * a low complexity 2-dimensional DFT of 160 points
1385 : *-----------------------------------------------------------------*/
1386 86455 : void DoRTFT160_16fx(
1387 : Word16 x[], /* i/o : real part of input and output data Q(Qx+Q_edct) */
1388 : Word16 y[] /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1389 : )
1390 : {
1391 : Word16 j;
1392 :
1393 : /* Applying 32-point FFT for 5 times based on the address table Idx_dortft160 */
1394 518730 : FOR( j = 0; j < 5; j++ )
1395 : {
1396 432275 : fft32_5_16fx( x, y, Idx_dortft160 + shl( j, 5 ) /*32*j*/ );
1397 : }
1398 :
1399 : /* Applying 5-point FFT for 32 times based on the address table Idx_dortft160 */
1400 2853015 : FOR( j = 0; j < 32; j++ )
1401 : {
1402 2766560 : fft5_32_16fx( x, y, Idx_dortft160 + j );
1403 : }
1404 :
1405 86455 : return;
1406 : }
1407 :
1408 : /*-----------------------------------------------------------------*
1409 : * DoRTFT320()
1410 : * a low complexity 2-dimensional DFT of 320 points
1411 : *-----------------------------------------------------------------*/
1412 18626 : void DoRTFT320_16fx(
1413 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1414 : Word16 *y /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1415 : )
1416 : {
1417 : Word16 j;
1418 :
1419 : /* Applying 64-point FFT for 5 times based on the address table Idx_dortft160 */
1420 111756 : FOR( j = 0; j < 5; j++ )
1421 : {
1422 93130 : fft64_16fx( x, y, Idx_dortft320 + shl( j, 6 ) /*64*j*/ );
1423 : }
1424 :
1425 : /* Applying 5-point FFT for 64 times based on the address table Idx_dortft160 */
1426 1210690 : FOR( j = 0; j < 64; j++ )
1427 : {
1428 1192064 : fft5_shift4_16fx( 64, x, y, Idx_dortft320 + j );
1429 : }
1430 :
1431 18626 : return;
1432 : }
1433 :
1434 : /*-----------------------------------------------------------------*
1435 : * DoRTFT128()
1436 : * FFT with 128 points
1437 : *-----------------------------------------------------------------*/
1438 193294 : void DoRTFT128_16fx(
1439 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct)*/
1440 : Word16 *y /* i/o : imaginary part of input and output data Q(Qx+Q_edct)*/
1441 : )
1442 : {
1443 :
1444 : Word16 i;
1445 : Word16 z[256];
1446 :
1447 24934926 : FOR( i = 0; i < 128; i++ )
1448 : {
1449 24741632 : z[2 * i] = x[i];
1450 24741632 : move16();
1451 24741632 : z[2 * i + 1] = y[i];
1452 24741632 : move16();
1453 : }
1454 :
1455 193294 : cdftForw_16fx( 256, z, Ip_fft128, w_fft128_16fx );
1456 :
1457 193294 : x[0] = z[0];
1458 193294 : move16();
1459 193294 : y[0] = z[1];
1460 193294 : move16();
1461 24741632 : FOR( i = 1; i < 128; i++ )
1462 : {
1463 24548338 : x[128 - i] = z[2 * i];
1464 24548338 : move16();
1465 24548338 : y[128 - i] = z[2 * i + 1];
1466 24548338 : move16();
1467 : }
1468 :
1469 193294 : return;
1470 : }
1471 : /*-----------------------------------------------------------------*
1472 : * cdftForw()
1473 : * Main fuction of Complex Discrete Fourier Transform
1474 : *-----------------------------------------------------------------*/
1475 718699 : static void cdftForw_16fx(
1476 : Word16 n, /* i : data length of real and imag */
1477 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
1478 : const Word16 *ip, /* i : work area for bit reversal */
1479 : const Word32 *w /* i : cos/sin table Q30*/
1480 : )
1481 : {
1482 : /* bit reversal */
1483 718699 : bitrv2_SR_16fx( n, ip + 2, a );
1484 :
1485 : /* Do FFT */
1486 718699 : cftfsub_16fx( n, a, w );
1487 718699 : }
1488 :
1489 : /*-----------------------------------------------------------------*
1490 : * bitrv2_SR()
1491 : * Bit reversal
1492 : *-----------------------------------------------------------------*/
1493 718699 : static void bitrv2_SR_16fx(
1494 : Word16 n, /* i : data length of real and imag */
1495 : const Word16 *ip, /* i/o : work area for bit reversal */
1496 : Word16 *a /* i/o : input/output data Q(Qx+Q_edct)*/
1497 : )
1498 : {
1499 : Word16 j, j1, k, k1, m, m2;
1500 : Word16 l;
1501 : Word16 xr, xi, yr, yi;
1502 :
1503 718699 : l = n;
1504 718699 : move16();
1505 718699 : m = 1;
1506 718699 : move16();
1507 :
1508 2349391 : WHILE( ( ( m << 3 ) < l ) )
1509 : {
1510 1630692 : l = shr( l, 1 );
1511 1630692 : m = shl( m, 1 );
1512 : }
1513 :
1514 718699 : m2 = shl( m, 1 );
1515 718699 : IF( EQ_16( shl( m, 3 ), l ) )
1516 : {
1517 465650 : FOR( k = 0; k < m; k++ )
1518 : {
1519 931300 : FOR( j = 0; j < k; j++ )
1520 : {
1521 558780 : j1 = add( shl( j, 1 ), ip[k] );
1522 558780 : k1 = add( shl( k, 1 ), ip[j] );
1523 558780 : xr = a[j1];
1524 558780 : move16();
1525 558780 : xi = a[j1 + 1];
1526 558780 : move16();
1527 558780 : yr = a[k1];
1528 558780 : move16();
1529 558780 : yi = a[k1 + 1];
1530 558780 : move16();
1531 558780 : a[j1] = yr;
1532 558780 : move16();
1533 558780 : a[j1 + 1] = yi;
1534 558780 : move16();
1535 558780 : a[k1] = xr;
1536 558780 : move16();
1537 558780 : a[k1 + 1] = xi;
1538 558780 : move16();
1539 558780 : j1 = add( j1, m2 );
1540 558780 : k1 = add( k1, shl( m2, 1 ) );
1541 558780 : xr = a[j1];
1542 558780 : move16();
1543 558780 : xi = a[j1 + 1];
1544 558780 : move16();
1545 558780 : yr = a[k1];
1546 558780 : move16();
1547 558780 : yi = a[k1 + 1];
1548 558780 : move16();
1549 558780 : a[j1] = yr;
1550 558780 : move16();
1551 558780 : a[j1 + 1] = yi;
1552 558780 : move16();
1553 558780 : a[k1] = xr;
1554 558780 : move16();
1555 558780 : a[k1 + 1] = xi;
1556 558780 : move16();
1557 558780 : j1 = add( j1, m2 );
1558 558780 : k1 = sub( k1, m2 );
1559 558780 : xr = a[j1];
1560 558780 : move16();
1561 558780 : xi = a[j1 + 1];
1562 558780 : move16();
1563 558780 : xi = a[j1 + 1];
1564 558780 : move16();
1565 558780 : yr = a[k1];
1566 558780 : move16();
1567 558780 : yi = a[k1 + 1];
1568 558780 : move16();
1569 558780 : a[j1] = yr;
1570 558780 : move16();
1571 558780 : a[j1 + 1] = yi;
1572 558780 : move16();
1573 558780 : a[k1] = xr;
1574 558780 : move16();
1575 558780 : a[k1 + 1] = xi;
1576 558780 : move16();
1577 558780 : j1 = add( j1, m2 );
1578 558780 : k1 = add( k1, shl( m2, 1 ) );
1579 558780 : xr = a[j1];
1580 558780 : move16();
1581 558780 : xi = a[j1 + 1];
1582 558780 : move16();
1583 558780 : yr = a[k1];
1584 558780 : move16();
1585 558780 : yi = a[k1 + 1];
1586 558780 : move16();
1587 558780 : a[j1] = yr;
1588 558780 : move16();
1589 558780 : a[j1 + 1] = yi;
1590 558780 : move16();
1591 558780 : a[k1] = xr;
1592 558780 : move16();
1593 558780 : a[k1 + 1] = xi;
1594 558780 : move16();
1595 : }
1596 :
1597 372520 : j1 = add( add( shl( k, 1 ), m2 ), ip[k] );
1598 372520 : k1 = add( j1, m2 );
1599 372520 : xr = a[j1];
1600 372520 : move16();
1601 372520 : xi = a[j1 + 1];
1602 372520 : move16();
1603 372520 : yr = a[k1];
1604 372520 : move16();
1605 372520 : yi = a[k1 + 1];
1606 372520 : move16();
1607 372520 : a[j1] = yr;
1608 372520 : move16();
1609 372520 : a[j1 + 1] = yi;
1610 372520 : move16();
1611 372520 : a[k1] = xr;
1612 372520 : move16();
1613 372520 : a[k1 + 1] = xi;
1614 372520 : move16();
1615 : }
1616 : }
1617 : ELSE
1618 : {
1619 3275452 : FOR( k = 1; k < m; k++ )
1620 : {
1621 10655765 : FOR( j = 0; j < k; j++ )
1622 : {
1623 8005882 : j1 = add( shl( j, 1 ), ip[k] );
1624 8005882 : k1 = add( shl( k, 1 ), ip[j] );
1625 8005882 : xr = a[j1];
1626 8005882 : move16();
1627 8005882 : xi = a[j1 + 1];
1628 8005882 : move16();
1629 8005882 : yr = a[k1];
1630 8005882 : move16();
1631 8005882 : yi = a[k1 + 1];
1632 8005882 : move16();
1633 8005882 : a[j1] = yr;
1634 8005882 : move16();
1635 8005882 : a[j1 + 1] = yi;
1636 8005882 : move16();
1637 8005882 : a[k1] = xr;
1638 8005882 : move16();
1639 8005882 : a[k1 + 1] = xi;
1640 8005882 : move16();
1641 8005882 : j1 = add( j1, m2 );
1642 8005882 : k1 = add( k1, m2 );
1643 8005882 : xr = a[j1];
1644 8005882 : move16();
1645 8005882 : xi = a[j1 + 1];
1646 8005882 : move16();
1647 8005882 : yr = a[k1];
1648 8005882 : move16();
1649 8005882 : yi = a[k1 + 1];
1650 8005882 : move16();
1651 8005882 : a[j1] = yr;
1652 8005882 : move16();
1653 8005882 : a[j1 + 1] = yi;
1654 8005882 : move16();
1655 8005882 : a[k1] = xr;
1656 8005882 : move16();
1657 8005882 : a[k1 + 1] = xi;
1658 8005882 : move16();
1659 : }
1660 : }
1661 : }
1662 :
1663 718699 : return;
1664 : }
1665 :
1666 : /*-----------------------------------------------------------------*
1667 : * cftfsub()
1668 : * Complex Discrete Fourier Transform
1669 : *-----------------------------------------------------------------*/
1670 718699 : static void cftfsub_16fx(
1671 : Word16 n, /* i : data length of real and imag */
1672 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
1673 : const Word32 *w /* i : cos/sin table Q30*/
1674 : )
1675 : {
1676 : Word16 j, j1, j2, j3, l;
1677 : Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1678 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1679 718699 : Flag Overflow = 0;
1680 718699 : move32();
1681 : #endif
1682 :
1683 718699 : l = 2;
1684 718699 : move16();
1685 718699 : IF( GT_16( n, 8 ) )
1686 : {
1687 718699 : cft1st_16fx( n, a, w );
1688 718699 : l = 8;
1689 718699 : move16();
1690 1630692 : WHILE( ( ( l << 2 ) < n ) )
1691 : {
1692 911993 : cftmdl_16fx( n, l, a, w );
1693 911993 : l = shl( l, 2 );
1694 : }
1695 : }
1696 :
1697 718699 : IF( EQ_16( shl( l, 2 ), n ) )
1698 : {
1699 1583210 : FOR( j = 0; j < l; j += 2 )
1700 : {
1701 1490080 : j1 = add( j, l );
1702 1490080 : j2 = add( j1, l );
1703 1490080 : j3 = add( j2, l );
1704 1490080 : x0r = add( a[j], a[j1] );
1705 1490080 : x0i = add( a[j + 1], a[j1 + 1] );
1706 1490080 : x1r = sub( a[j], a[j1] );
1707 1490080 : x1i = sub( a[j + 1], a[j1 + 1] );
1708 1490080 : x2r = add( a[j2], a[j3] );
1709 1490080 : x2i = add( a[j2 + 1], a[j3 + 1] );
1710 1490080 : x3r = sub( a[j2], a[j3] );
1711 1490080 : x3i = sub( a[j2 + 1], a[j3 + 1] );
1712 1490080 : a[j] = add( x0r, x2r );
1713 1490080 : move16();
1714 1490080 : a[j + 1] = add( x0i, x2i );
1715 1490080 : move16();
1716 1490080 : a[j2] = sub( x0r, x2r );
1717 1490080 : move16();
1718 1490080 : a[j2 + 1] = sub( x0i, x2i );
1719 1490080 : move16();
1720 1490080 : a[j1] = sub( x1r, x3i );
1721 1490080 : move16();
1722 1490080 : a[j1 + 1] = add( x1i, x3r );
1723 1490080 : move16();
1724 1490080 : a[j3] = add( x1r, x3i );
1725 1490080 : move16();
1726 1490080 : a[j3 + 1] = sub( x1i, x3r );
1727 1490080 : move16();
1728 : }
1729 : }
1730 : ELSE
1731 : {
1732 19912785 : FOR( j = 0; j < l; j += 2 )
1733 : {
1734 19287216 : j1 = add_o( j, l, &Overflow );
1735 19287216 : x0r = sub_o( a[j], a[j1], &Overflow );
1736 19287216 : x0i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
1737 19287216 : a[j] = add_o( a[j], a[j1], &Overflow );
1738 19287216 : move16();
1739 19287216 : a[j + 1] = add_o( a[j + 1], a[j1 + 1], &Overflow );
1740 19287216 : move16();
1741 19287216 : a[j1] = x0r;
1742 19287216 : move16();
1743 19287216 : a[j1 + 1] = x0i;
1744 19287216 : move16();
1745 : }
1746 : }
1747 718699 : return;
1748 : }
1749 :
1750 : /*-----------------------------------------------------------------*
1751 : * cft1st()
1752 : * Subfunction of Complex Discrete Fourier Transform
1753 : *-----------------------------------------------------------------*/
1754 718699 : static void cft1st_16fx(
1755 : Word16 n, /* i : data length of real and imag */
1756 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
1757 : const Word32 *w /* i : cos/sin table Q30*/
1758 : )
1759 : {
1760 : Word16 j, k1, k2;
1761 : Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1762 : Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1763 : Word16 tmp;
1764 : Word32 L_tmp;
1765 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1766 718699 : Flag Overflow = 0;
1767 718699 : move32();
1768 : #endif
1769 :
1770 718699 : x0r = add_o( a[0], a[2], &Overflow );
1771 718699 : x0i = add_o( a[1], a[3], &Overflow );
1772 718699 : x1r = sub_o( a[0], a[2], &Overflow );
1773 718699 : x1i = sub_o( a[1], a[3], &Overflow );
1774 718699 : x2r = add_o( a[4], a[6], &Overflow );
1775 718699 : x2i = add_o( a[5], a[7], &Overflow );
1776 718699 : x3r = sub_o( a[4], a[6], &Overflow );
1777 718699 : x3i = sub_o( a[5], a[7], &Overflow );
1778 718699 : a[0] = add_o( x0r, x2r, &Overflow );
1779 718699 : move16();
1780 718699 : a[1] = add_o( x0i, x2i, &Overflow );
1781 718699 : move16();
1782 718699 : a[4] = sub_o( x0r, x2r, &Overflow );
1783 718699 : move16();
1784 718699 : a[5] = sub_o( x0i, x2i, &Overflow );
1785 718699 : move16();
1786 718699 : a[2] = sub_o( x1r, x3i, &Overflow );
1787 718699 : move16();
1788 718699 : a[3] = add_o( x1i, x3r, &Overflow );
1789 718699 : move16();
1790 718699 : a[6] = add_o( x1r, x3i, &Overflow );
1791 718699 : move16();
1792 718699 : a[7] = sub_o( x1i, x3r, &Overflow );
1793 718699 : wk1r = w[2];
1794 718699 : move32();
1795 :
1796 718699 : x0r = add_o( a[8], a[10], &Overflow );
1797 718699 : x0i = add_o( a[9], a[11], &Overflow );
1798 718699 : x1r = sub_o( a[8], a[10], &Overflow );
1799 718699 : x1i = sub_o( a[9], a[11], &Overflow );
1800 718699 : x2r = add_o( a[12], a[14], &Overflow );
1801 718699 : x2i = add_o( a[13], a[15], &Overflow );
1802 718699 : x3r = sub_o( a[12], a[14], &Overflow );
1803 718699 : x3i = sub_o( a[13], a[15], &Overflow );
1804 718699 : a[8] = add_o( x0r, x2r, &Overflow );
1805 718699 : move16();
1806 718699 : a[9] = add_o( x0i, x2i, &Overflow );
1807 718699 : move16();
1808 718699 : a[12] = sub_o( x2i, x0i, &Overflow );
1809 718699 : move16();
1810 718699 : a[13] = sub_o( x0r, x2r, &Overflow );
1811 718699 : move16();
1812 :
1813 718699 : x0r = sub_o( x1r, x3i, &Overflow );
1814 718699 : x0i = add_o( x1i, x3r, &Overflow );
1815 718699 : tmp = sub_o( x0r, x0i, &Overflow );
1816 718699 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1817 :
1818 718699 : a[10] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1819 718699 : move16();
1820 :
1821 718699 : tmp = add_o( x0r, x0i, &Overflow );
1822 718699 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1823 718699 : a[11] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /* Q(Qx+Q_edct) */
1824 718699 : move16();
1825 :
1826 718699 : x0r = add_o( x3i, x1r, &Overflow );
1827 718699 : x0i = sub_o( x3r, x1i, &Overflow );
1828 718699 : tmp = sub_o( x0i, x0r, &Overflow );
1829 718699 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1830 718699 : a[14] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1831 718699 : move16();
1832 :
1833 718699 : tmp = add_o( x0i, x0r, &Overflow );
1834 718699 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1835 718699 : a[15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1836 718699 : move16();
1837 718699 : k1 = 0;
1838 718699 : move16();
1839 :
1840 5566844 : FOR( j = 16; j < n; j += 16 )
1841 : {
1842 4848145 : k1 = add( k1, 2 );
1843 4848145 : k2 = shl( k1, 1 );
1844 :
1845 4848145 : wk2r = w[k1];
1846 4848145 : move32();
1847 4848145 : wk2i = w[k1 + 1];
1848 4848145 : move32();
1849 4848145 : wk1r = w[k2];
1850 4848145 : move32();
1851 4848145 : wk1i = w[k2 + 1];
1852 4848145 : move32();
1853 :
1854 4848145 : L_tmp = L_shl( Mult_32_32( wk2i, wk1i ), 1 ); /*Q29 */
1855 4848145 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
1856 :
1857 4848145 : L_tmp = L_shl( Mult_32_32( wk2i, wk1r ), 1 ); /*Q29 */
1858 4848145 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
1859 4848145 : x0r = add_o( a[j], a[j + 2], &Overflow );
1860 4848145 : x0i = add_o( a[j + 1], a[j + 3], &Overflow );
1861 4848145 : x1r = sub_o( a[j], a[j + 2], &Overflow );
1862 4848145 : x1i = sub_o( a[j + 1], a[j + 3], &Overflow );
1863 4848145 : x2r = add_o( a[j + 4], a[j + 6], &Overflow );
1864 4848145 : x2i = add_o( a[j + 5], a[j + 7], &Overflow );
1865 4848145 : x3r = sub_o( a[j + 4], a[j + 6], &Overflow );
1866 4848145 : x3i = sub_o( a[j + 5], a[j + 7], &Overflow );
1867 4848145 : a[j] = add_o( x0r, x2r, &Overflow );
1868 4848145 : move16();
1869 4848145 : a[j + 1] = add_o( x0i, x2i, &Overflow );
1870 4848145 : move16();
1871 :
1872 4848145 : x0r = sub_o( x0r, x2r, &Overflow );
1873 4848145 : x0i = sub_o( x0i, x2i, &Overflow );
1874 4848145 : L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */
1875 4848145 : L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */
1876 4848145 : a[j + 4] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1877 4848145 : move16();
1878 :
1879 4848145 : L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */
1880 4848145 : L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */
1881 4848145 : a[j + 5] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1882 4848145 : move16();
1883 :
1884 4848145 : x0r = sub_o( x1r, x3i, &Overflow );
1885 4848145 : x0i = add_o( x1i, x3r, &Overflow );
1886 4848145 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
1887 4848145 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
1888 4848145 : a[j + 2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1889 4848145 : move16();
1890 :
1891 4848145 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
1892 4848145 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
1893 4848145 : a[j + 3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1894 4848145 : move16();
1895 :
1896 4848145 : x0r = add_o( x1r, x3i, &Overflow );
1897 4848145 : x0i = sub_o( x1i, x3r, &Overflow );
1898 4848145 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
1899 4848145 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
1900 4848145 : a[j + 6] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1901 4848145 : move16();
1902 :
1903 4848145 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
1904 4848145 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
1905 4848145 : a[j + 7] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1906 4848145 : move16();
1907 :
1908 4848145 : wk1r = w[k2 + 2];
1909 4848145 : move32();
1910 4848145 : wk1i = w[k2 + 3];
1911 4848145 : move32();
1912 4848145 : L_tmp = L_shl( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
1913 4848145 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
1914 :
1915 4848145 : L_tmp = L_shl( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
1916 4848145 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
1917 :
1918 4848145 : x0r = add_o( a[j + 8], a[j + 10], &Overflow );
1919 4848145 : x0i = add_o( a[j + 9], a[j + 11], &Overflow );
1920 4848145 : x1r = sub_o( a[j + 8], a[j + 10], &Overflow );
1921 4848145 : x1i = sub_o( a[j + 9], a[j + 11], &Overflow );
1922 4848145 : x2r = add_o( a[j + 12], a[j + 14], &Overflow );
1923 4848145 : x2i = add_o( a[j + 13], a[j + 15], &Overflow );
1924 4848145 : x3r = sub_o( a[j + 12], a[j + 14], &Overflow );
1925 4848145 : x3i = sub_o( a[j + 13], a[j + 15], &Overflow );
1926 4848145 : a[j + 8] = add_o( x0r, x2r, &Overflow );
1927 4848145 : move16();
1928 4848145 : a[j + 9] = add_o( x0i, x2i, &Overflow );
1929 4848145 : move16();
1930 :
1931 4848145 : x0r = sub_o( x0r, x2r, &Overflow );
1932 4848145 : x0i = sub_o( x0i, x2i, &Overflow );
1933 4848145 : tmp = negate( x0r );
1934 4848145 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
1935 4848145 : L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */
1936 4848145 : a[j + 12] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1937 4848145 : move16();
1938 :
1939 4848145 : tmp = negate( x0i );
1940 4848145 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
1941 4848145 : L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */
1942 4848145 : a[j + 13] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1943 4848145 : move16();
1944 :
1945 4848145 : x0r = sub_o( x1r, x3i, &Overflow );
1946 4848145 : x0i = add_o( x1i, x3r, &Overflow );
1947 4848145 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
1948 4848145 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
1949 4848145 : a[j + 10] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1950 4848145 : move16();
1951 :
1952 4848145 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
1953 4848145 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
1954 4848145 : a[j + 11] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1955 4848145 : move16();
1956 :
1957 4848145 : x0r = add_o( x1r, x3i, &Overflow );
1958 4848145 : x0i = sub_o( x1i, x3r, &Overflow );
1959 :
1960 4848145 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
1961 4848145 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
1962 4848145 : a[j + 14] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1963 4848145 : move16();
1964 :
1965 4848145 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
1966 4848145 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
1967 4848145 : a[j + 15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1968 4848145 : move16();
1969 : }
1970 :
1971 718699 : return;
1972 : }
1973 :
1974 : /*-----------------------------------------------------------------*
1975 : * cftmdl()
1976 : * Subfunction of Complex Discrete Fourier Transform
1977 : *-----------------------------------------------------------------*/
1978 911993 : static void cftmdl_16fx(
1979 : Word16 n, /* i : data length of real and imag */
1980 : Word16 l, /* i : initial shift for processing */
1981 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
1982 : const Word32 *w /* i : cos/sin table Q30*/
1983 : )
1984 : {
1985 : Word16 j, j1, j2, j3, k, k1, k2, m, m2;
1986 : Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1987 : Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1988 : Word16 tmp, tmp2;
1989 : Word32 L_tmp;
1990 : Word32 L_x0r, L_x0i;
1991 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1992 911993 : Flag Overflow = 0;
1993 911993 : move32();
1994 : #endif
1995 911993 : m = shl( l, 2 );
1996 6879493 : FOR( j = 0; j < l; j += 2 )
1997 : {
1998 5967500 : j1 = add_o( j, l, &Overflow );
1999 5967500 : j2 = add_o( j1, l, &Overflow );
2000 5967500 : j3 = add_o( j2, l, &Overflow );
2001 5967500 : x0r = add_o( a[j], a[j1], &Overflow );
2002 5967500 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2003 5967500 : x1r = sub_o( a[j], a[j1], &Overflow );
2004 5967500 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2005 5967500 : x2r = add_o( a[j2], a[j3], &Overflow );
2006 5967500 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2007 5967500 : x3r = sub_o( a[j2], a[j3], &Overflow );
2008 5967500 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2009 5967500 : a[j] = add_o( x0r, x2r, &Overflow );
2010 5967500 : move16();
2011 5967500 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2012 5967500 : move16();
2013 5967500 : a[j2] = sub_o( x0r, x2r, &Overflow );
2014 5967500 : move16();
2015 5967500 : a[j2 + 1] = sub_o( x0i, x2i, &Overflow );
2016 5967500 : move16();
2017 5967500 : a[j1] = sub_o( x1r, x3i, &Overflow );
2018 5967500 : move16();
2019 5967500 : a[j1 + 1] = add_o( x1i, x3r, &Overflow );
2020 5967500 : move16();
2021 5967500 : a[j3] = add_o( x1r, x3i, &Overflow );
2022 5967500 : move16();
2023 5967500 : a[j3 + 1] = sub_o( x1i, x3r, &Overflow );
2024 5967500 : move16();
2025 : }
2026 :
2027 911993 : wk1r = w[2];
2028 911993 : move32();
2029 911993 : tmp2 = add( l, m );
2030 6879493 : FOR( j = m; j < tmp2; j += 2 )
2031 : {
2032 5967500 : j1 = add_o( j, l, &Overflow );
2033 5967500 : j2 = add_o( j1, l, &Overflow );
2034 5967500 : j3 = add_o( j2, l, &Overflow );
2035 5967500 : x0r = add_o( a[j], a[j1], &Overflow );
2036 5967500 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2037 5967500 : x1r = sub_o( a[j], a[j1], &Overflow );
2038 5967500 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2039 5967500 : x2r = add_o( a[j2], a[j3], &Overflow );
2040 5967500 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2041 5967500 : x3r = sub_o( a[j2], a[j3], &Overflow );
2042 5967500 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2043 5967500 : a[j] = add_o( x0r, x2r, &Overflow );
2044 5967500 : move16();
2045 5967500 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2046 5967500 : move16();
2047 5967500 : a[j2] = sub_o( x2i, x0i, &Overflow );
2048 5967500 : move16();
2049 5967500 : a[j2 + 1] = sub_o( x0r, x2r, &Overflow );
2050 5967500 : move16();
2051 :
2052 5967500 : x0r = sub_o( x1r, x3i, &Overflow );
2053 5967500 : x0i = add_o( x1i, x3r, &Overflow );
2054 5967500 : tmp = sub_o( x0r, x0i, &Overflow );
2055 5967500 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2056 5967500 : a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2057 5967500 : move16();
2058 :
2059 5967500 : tmp = add_o( x0r, x0i, &Overflow );
2060 5967500 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2061 5967500 : a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2062 5967500 : move16();
2063 :
2064 5967500 : x0r = add_o( x3i, x1r, &Overflow );
2065 5967500 : x0i = sub_o( x3r, x1i, &Overflow );
2066 5967500 : tmp = sub_o( x0i, x0r, &Overflow );
2067 5967500 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2068 5967500 : a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2069 5967500 : move16();
2070 :
2071 5967500 : tmp = add_o( x0i, x0r, &Overflow );
2072 5967500 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2073 5967500 : a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2074 5967500 : move16();
2075 : }
2076 :
2077 911993 : k1 = 0;
2078 911993 : move16();
2079 911993 : m2 = shl( m, 1 );
2080 1585005 : FOR( k = m2; k < n; k += m2 )
2081 : {
2082 673012 : k1 = add( k1, 2 );
2083 673012 : k2 = shl( k1, 1 );
2084 673012 : wk2r = w[k1];
2085 673012 : move32();
2086 673012 : wk2i = w[k1 + 1];
2087 673012 : move32();
2088 673012 : wk1r = w[k2];
2089 673012 : move32();
2090 673012 : wk1i = w[k2 + 1];
2091 673012 : move32();
2092 673012 : L_tmp = L_shl( Mult_32_32( wk2i, wk1i ), 1 ); /*Q29 */
2093 673012 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
2094 :
2095 673012 : L_tmp = L_shl( Mult_32_32( wk2i, wk1r ), 1 ); /*Q29 */
2096 673012 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
2097 :
2098 673012 : tmp2 = add( l, k );
2099 3365060 : FOR( j = k; j < tmp2; j += 2 )
2100 : {
2101 2692048 : j1 = add_o( j, l, &Overflow );
2102 2692048 : j2 = add_o( j1, l, &Overflow );
2103 2692048 : j3 = add_o( j2, l, &Overflow );
2104 2692048 : x0r = add_o( a[j], a[j1], &Overflow );
2105 2692048 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2106 2692048 : x1r = sub_o( a[j], a[j1], &Overflow );
2107 2692048 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2108 2692048 : x2r = add_o( a[j2], a[j3], &Overflow );
2109 2692048 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2110 2692048 : x3r = sub_o( a[j2], a[j3], &Overflow );
2111 2692048 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2112 2692048 : a[j] = add_o( x0r, x2r, &Overflow );
2113 2692048 : move16();
2114 2692048 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2115 2692048 : move16();
2116 :
2117 2692048 : x0r = sub_o( x0r, x2r, &Overflow );
2118 2692048 : x0i = sub_o( x0i, x2i, &Overflow );
2119 :
2120 2692048 : L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2121 2692048 : L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */
2122 2692048 : a[j2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2123 2692048 : move16();
2124 :
2125 2692048 : L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2126 2692048 : L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */
2127 2692048 : a[j2 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2128 2692048 : move16();
2129 :
2130 2692048 : x0r = sub_o( x1r, x3i, &Overflow );
2131 2692048 : x0i = add_o( x1i, x3r, &Overflow );
2132 :
2133 2692048 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2134 2692048 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2135 2692048 : a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2136 2692048 : move16();
2137 :
2138 2692048 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2139 2692048 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2140 2692048 : a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2141 2692048 : move16();
2142 :
2143 2692048 : L_x0r = L_add( (Word32) x1r, (Word32) x3i );
2144 2692048 : L_x0i = L_sub( (Word32) x1i, (Word32) x3r );
2145 2692048 : x0r = extract_l( L_x0r );
2146 2692048 : x0i = extract_l( L_x0i );
2147 2692048 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2148 2692048 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2149 2692048 : a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2150 2692048 : move16();
2151 :
2152 2692048 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2153 2692048 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2154 2692048 : a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2155 2692048 : move16();
2156 : }
2157 :
2158 673012 : wk1r = w[k2 + 2];
2159 673012 : move32();
2160 673012 : wk1i = w[k2 + 3];
2161 673012 : move32();
2162 673012 : L_tmp = L_shl_o( Mult_32_32( wk2r, wk1i ), 1, &Overflow ); /*Q29 */
2163 673012 : wk3r = L_sub_o( wk1r, L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q30 */
2164 :
2165 673012 : L_tmp = L_shl_o( Mult_32_32( wk2r, wk1r ), 1, &Overflow ); /*Q29 */
2166 673012 : wk3i = L_sub_o( L_shl_o( L_tmp, 1, &Overflow ), wk1i, &Overflow ); /*Q30 */
2167 673012 : tmp2 = add( l, add( k, m ) );
2168 3365060 : FOR( j = add( k, m ); j < tmp2; j += 2 )
2169 : {
2170 2692048 : j1 = add_o( j, l, &Overflow );
2171 2692048 : j2 = add_o( j1, l, &Overflow );
2172 2692048 : j3 = add_o( j2, l, &Overflow );
2173 2692048 : x0r = add_o( a[j], a[j1], &Overflow );
2174 2692048 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2175 2692048 : x1r = sub_o( a[j], a[j1], &Overflow );
2176 2692048 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2177 2692048 : x2r = add_o( a[j2], a[j3], &Overflow );
2178 2692048 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2179 2692048 : x3r = sub_o( a[j2], a[j3], &Overflow );
2180 2692048 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2181 2692048 : a[j] = add_o( x0r, x2r, &Overflow );
2182 2692048 : move16();
2183 2692048 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2184 2692048 : move16();
2185 :
2186 2692048 : x0r = sub_o( x0r, x2r, &Overflow );
2187 2692048 : x0i = sub_o( x0i, x2i, &Overflow );
2188 :
2189 2692048 : tmp = negate( x0r );
2190 2692048 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2191 2692048 : L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2192 2692048 : a[j2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2193 2692048 : move16();
2194 :
2195 2692048 : tmp = negate( x0i );
2196 2692048 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2197 2692048 : L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2198 2692048 : a[j2 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2199 2692048 : move16();
2200 :
2201 2692048 : x0r = sub_o( x1r, x3i, &Overflow );
2202 2692048 : x0i = add_o( x1i, x3r, &Overflow );
2203 :
2204 2692048 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2205 2692048 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2206 2692048 : a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2207 2692048 : move16();
2208 :
2209 2692048 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2210 2692048 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2211 2692048 : a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2212 2692048 : move16();
2213 :
2214 2692048 : x0r = add_o( x1r, x3i, &Overflow );
2215 2692048 : x0i = sub_o( x1i, x3r, &Overflow );
2216 :
2217 2692048 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2218 2692048 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2219 2692048 : a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2220 2692048 : move16();
2221 :
2222 2692048 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2223 2692048 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2224 2692048 : a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2225 2692048 : move16();
2226 : }
2227 : }
2228 :
2229 911993 : return;
2230 : }
2231 :
2232 93 : void fft3_fx( const Word16 X[] /*Qx*/, Word16 Y[] /*Qx*/, const Word16 n )
2233 : {
2234 : Word16 Z[PH_ECU_SPEC_SIZE];
2235 : Word16 *Z0, *Z1, *Z2;
2236 : Word16 *z0, *z1, *z2;
2237 : const Word16 *x;
2238 93 : const Word16 *t_sin = sincos_t_rad3_fx; // Q15
2239 : Word16 m, mMinus1, step;
2240 : Word16 i, l;
2241 : Word16 c1_ind, s1_ind, c2_ind, s2_ind;
2242 : Word16 c1_step, s1_step, c2_step, s2_step;
2243 : Word16 *RY, *IY, *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
2244 : Word32 acc;
2245 : Word16 mBy2, orderMinus1;
2246 : const Word16 *pPhaseTbl;
2247 :
2248 : /* Determine the order of the transform, the length of decimated */
2249 : /* transforms m, and the step for the sine and cosine tables. */
2250 93 : SWITCH( n )
2251 : {
2252 31 : case 1536:
2253 31 : orderMinus1 = 9 - 1;
2254 31 : move16();
2255 31 : m = 512;
2256 31 : move16();
2257 31 : step = 1;
2258 31 : move16();
2259 31 : pPhaseTbl = FFT_W256;
2260 31 : BREAK;
2261 62 : case 384:
2262 62 : orderMinus1 = 7 - 1;
2263 62 : move16();
2264 62 : m = 128;
2265 62 : move16();
2266 62 : step = 4;
2267 62 : move16();
2268 62 : pPhaseTbl = FFT_W64;
2269 62 : BREAK;
2270 0 : default:
2271 0 : orderMinus1 = 7 - 1;
2272 0 : move16();
2273 0 : m = 128;
2274 0 : move16();
2275 0 : step = 4;
2276 0 : move16();
2277 0 : pPhaseTbl = FFT_W64;
2278 0 : BREAK;
2279 : }
2280 :
2281 : /* Compose decimated sequences X[3i], X[3i+1],X[3i+2] */
2282 : /* compute their FFT of length m. */
2283 93 : Z0 = &Z[0];
2284 93 : z0 = &Z0[0];
2285 93 : Z1 = &Z0[m];
2286 93 : z1 = &Z1[0]; /* Z1 = &Z[ m]; */
2287 93 : Z2 = &Z1[m];
2288 93 : z2 = &Z2[0]; /* Z2 = &Z[2m]; */
2289 93 : x = &X[0]; // Qx
2290 23901 : FOR( i = 0; i < m; i++ )
2291 : {
2292 23808 : *z0++ = *x++; /* Z0[i] = X[3i]; Qx */
2293 23808 : move16();
2294 23808 : *z1++ = *x++; /* Z1[i] = X[3i+1]; Qx */
2295 23808 : move16();
2296 23808 : *z2++ = *x++; /* Z2[i] = X[3i+2]; Qx */
2297 23808 : move16();
2298 : }
2299 93 : mBy2 = shr( m, 1 );
2300 93 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z0, Z0, 1 );
2301 93 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z1, Z1, 1 );
2302 93 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z2, Z2, 1 );
2303 :
2304 : /* Butterflies of order 3. */
2305 : /* pointer initialization */
2306 93 : mMinus1 = sub( m, 1 );
2307 93 : RY = &Y[0]; // Qx
2308 93 : IY = &Y[n]; // Qx
2309 93 : IY--; /* Decrement the address counter.*/
2310 93 : RZ0 = &Z0[0]; // Qx
2311 93 : IZ0 = &Z0[mMinus1];
2312 93 : RZ1 = &Z1[0]; // Qx
2313 93 : IZ1 = &Z1[mMinus1]; // Qx
2314 93 : RZ2 = &Z2[0]; // Qx
2315 93 : IZ2 = &Z2[mMinus1]; // Qx
2316 :
2317 93 : c1_step = negate( step );
2318 93 : s1_step = step;
2319 93 : move16();
2320 93 : c2_step = shl( c1_step, 1 );
2321 93 : s2_step = shl( s1_step, 1 );
2322 93 : c1_ind = add( T_SIN_PI_2, c1_step );
2323 93 : s1_ind = s1_step;
2324 93 : move16();
2325 93 : c2_ind = add( T_SIN_PI_2, c2_step );
2326 93 : s2_ind = s2_step;
2327 93 : move16();
2328 :
2329 : /* special case: i = 0 */
2330 93 : acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ ); // Q15 + Qx
2331 93 : acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ ); // Q15 + Qx
2332 93 : *RY++ = mac_r_sat( acc, *RZ2++, 0x4000 /*1.Q14*/ ); // Qx
2333 93 : move16();
2334 :
2335 : /* first 3/12-- from 1 to (3*m/8)-1 */
2336 93 : l = sub( shr( n, 3 ), 1 ); /* (3*m/8) - 1 = (n/8) - 1 */
2337 8928 : FOR( i = 0; i < l; i++ )
2338 : {
2339 8835 : acc = L_shl( *RZ0++, 15 ); /* Align with the following non-fractional mode so as to gain 1 more bit headroom. Q15 + Qx*/
2340 8835 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2341 8835 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2342 8835 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2343 8835 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2344 8835 : *RY++ = round_fx( acc ); /* bit growth = 1 (compensated by non-fractional mode MAC). Qx - 1*/
2345 8835 : move16();
2346 :
2347 8835 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2348 8835 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2349 8835 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2350 8835 : acc = L_msu0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2351 8835 : acc = L_mac0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2352 8835 : *IY-- = round_fx( acc ); // Qx - 1
2353 8835 : move16();
2354 :
2355 8835 : c1_ind = add( c1_ind, c1_step );
2356 8835 : s1_ind = add( s1_ind, s1_step );
2357 8835 : c2_ind = add( c2_ind, c2_step );
2358 8835 : s2_ind = add( s2_ind, s2_step );
2359 : }
2360 :
2361 : /* next 1/12-- from (3*m/8) to (4*m/8)-1 */
2362 93 : l = shr( m, 3 ); /* (4*m/8) - (3*m/8) = m/8 */
2363 3069 : FOR( i = 0; i < l; i++ )
2364 : {
2365 2976 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2366 2976 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2367 2976 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2368 2976 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2369 2976 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2370 2976 : *RY++ = round_fx( acc ); // Qx - 1
2371 2976 : move16();
2372 :
2373 2976 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2374 2976 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2375 2976 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2376 2976 : acc = L_msu0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2377 2976 : acc = L_msu0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2378 2976 : *IY-- = round_fx( acc ); // Qx - 1
2379 2976 : move16();
2380 :
2381 2976 : c1_ind = add( c1_ind, c1_step );
2382 2976 : s1_ind = add( s1_ind, s1_step );
2383 2976 : c2_ind = sub( c2_ind, c2_step );
2384 2976 : s2_ind = sub( s2_ind, s2_step );
2385 : }
2386 :
2387 : /* special case: i = m/2 i.e. 1/3 */
2388 93 : acc = L_shl( *RZ0--, 15 ); // Q15 + Qx
2389 93 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
2390 93 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2391 93 : *RY++ = round_fx( acc ); // Qx - 1
2392 93 : move16();
2393 :
2394 93 : acc = 0;
2395 93 : move32();
2396 93 : acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
2397 93 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
2398 93 : *IY-- = round_fx( acc ); // Qx - 1
2399 93 : move16();
2400 93 : IZ0++;
2401 93 : IZ1++;
2402 93 : IZ2++;
2403 :
2404 93 : c1_ind = add( c1_ind, c1_step );
2405 93 : s1_ind = add( s1_ind, s1_step );
2406 93 : c2_ind = sub( c2_ind, c2_step );
2407 93 : s2_ind = sub( s2_ind, s2_step );
2408 :
2409 : /* next 2/12-- from ((m/2)+1) to (6*m/8)-1 */
2410 93 : l = sub( shr( m, 2 ), 1 ); /* (6*m/8) - ((m/2)+1) = m/4 - 1 */
2411 5952 : FOR( i = 0; i < l; i++ )
2412 : {
2413 5859 : acc = L_shl( *RZ0--, 15 ); // Q15 + Qx
2414 5859 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2415 5859 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2416 5859 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2417 5859 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2418 5859 : *RY++ = round_fx( acc ); // Qx - 1
2419 5859 : move16();
2420 :
2421 5859 : acc = L_mult0( *IZ0++, -32768 ); // Q15 + Qx
2422 5859 : acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
2423 5859 : acc = L_msu0( acc, *IZ1++, t_sin[c1_ind] ); // Q15 + Qx
2424 5859 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
2425 5859 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Q15 + Qx
2426 5859 : *IY-- = round_fx( acc ); // Qx - 1
2427 5859 : move16();
2428 :
2429 5859 : c1_ind = add( c1_ind, c1_step );
2430 5859 : s1_ind = add( s1_ind, s1_step );
2431 5859 : c2_ind = sub( c2_ind, c2_step );
2432 5859 : s2_ind = sub( s2_ind, s2_step );
2433 : }
2434 :
2435 : /*--------------------------half--------------------------// */
2436 : /* next 2/12-- from (6*m/8) to (8*m/8) - 1 */
2437 93 : l = shr( m, 2 );
2438 6045 : FOR( i = 0; i < l; i++ )
2439 : {
2440 5952 : acc = L_shl( *RZ0--, 15 ); // Q15 + Qx
2441 5952 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2442 5952 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2443 5952 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2444 5952 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2445 5952 : *RY++ = round_fx( acc ); // Qx - 1
2446 5952 : move16();
2447 :
2448 5952 : acc = L_mult0( *IZ0++, -32768 ); // Q15 + Qx
2449 5952 : acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
2450 5952 : acc = L_mac0( acc, *IZ1++, t_sin[c1_ind] ); // Q15 + Qx
2451 5952 : acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
2452 5952 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Q15 + Qx
2453 5952 : *IY-- = round_fx( acc ); // Qx - 1
2454 5952 : move16();
2455 :
2456 5952 : c1_ind = sub( c1_ind, c1_step );
2457 5952 : s1_ind = sub( s1_ind, s1_step );
2458 5952 : c2_ind = add( c2_ind, c2_step );
2459 5952 : s2_ind = add( s2_ind, s2_step );
2460 : }
2461 :
2462 : /* special case: i = m, i.e 2/3 */
2463 93 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2464 93 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
2465 93 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2466 93 : *RY++ = round_fx( acc ); // Qx - 1
2467 93 : move16();
2468 :
2469 93 : acc = L_deposit_l( 0 );
2470 93 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2471 93 : acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2472 93 : *IY-- = round_fx( acc ); // Qx - 1
2473 93 : move16();
2474 93 : IZ0--; /* Just decrement the address counter */
2475 93 : IZ1--;
2476 93 : IZ2--;
2477 :
2478 93 : c1_ind = sub( c1_ind, c1_step );
2479 93 : s1_ind = sub( s1_ind, s1_step );
2480 93 : c2_ind = add( c2_ind, c2_step );
2481 93 : s2_ind = add( s2_ind, s2_step );
2482 :
2483 : /* next 1/12-- from (m + 1) to (9*m/8) - 1 */
2484 93 : l = sub( shr( m, 3 ), 1 ); /* (9*m/8) - (m +1) = m/8 - 1 */
2485 2976 : FOR( i = 0; i < l; i++ )
2486 : {
2487 2883 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2488 2883 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2489 2883 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2490 2883 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2491 2883 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2492 2883 : *RY++ = round_fx( acc ); // Qx - 1
2493 2883 : move16();
2494 :
2495 2883 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2496 2883 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2497 2883 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2498 2883 : acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2499 2883 : acc = L_msu0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2500 2883 : *IY-- = round_fx( acc ); // Qx - 1
2501 2883 : move16();
2502 :
2503 2883 : c1_ind = sub( c1_ind, c1_step );
2504 2883 : s1_ind = sub( s1_ind, s1_step );
2505 2883 : c2_ind = add( c2_ind, c2_step );
2506 2883 : s2_ind = add( s2_ind, s2_step );
2507 : }
2508 :
2509 : /* last 3/12-- from (9*m/8) to (12*m/8) - 1 */
2510 93 : l = shr( n, 3 ); /* (12*m/8) - (9*m/8) = 3*m/8 = n/8 */
2511 9021 : FOR( i = 0; i < l; i++ )
2512 : {
2513 8928 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2514 8928 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2515 8928 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2516 8928 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2517 8928 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2518 8928 : *RY++ = round_fx( acc ); // Qx - 1
2519 8928 : move16();
2520 :
2521 8928 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2522 8928 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2523 8928 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2524 8928 : acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2525 8928 : acc = L_mac0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2526 8928 : *IY-- = round_fx( acc ); // Qx - 1
2527 8928 : move16();
2528 :
2529 8928 : c1_ind = sub( c1_ind, c1_step );
2530 8928 : s1_ind = sub( s1_ind, s1_step );
2531 8928 : c2_ind = sub( c2_ind, c2_step );
2532 8928 : s2_ind = sub( s2_ind, s2_step );
2533 : }
2534 :
2535 : /* special case: i = 3*m/2 */
2536 93 : acc = L_shl( *RZ0, 15 ); // Q15 + Qx
2537 93 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
2538 93 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2539 93 : *RY = round_fx( acc ); // Qx - 1
2540 93 : move16();
2541 :
2542 93 : return;
2543 : }
2544 :
2545 :
2546 109 : void ifft3_fx( const Word16 Z[] /*Qx*/, Word16 X[] /*Qx*/, const Word16 n )
2547 : {
2548 : Word16 Y[PH_ECU_SPEC_SIZE];
2549 109 : const Word16 *t_sin = sincos_t_rad3_fx; // Q15
2550 : Word16 m, mMinus1, step, step2;
2551 : Word16 i, l;
2552 : Word16 c0_ind, s0_ind, c1_ind, s1_ind, c2_ind, s2_ind;
2553 : const Word16 *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
2554 : const Word16 *RZ00, *IZ00, *RZ10, *IZ10, *RZ20, *IZ20;
2555 : Word16 *RY0, *IY0, *RY1, *IY1, *RY2, *IY2, *y0, *y1, *y2, *pX;
2556 : Word32 acc;
2557 : Word16 mBy2, orderMinus1, nMinusMBy2;
2558 : const Word16 *pPhaseTbl;
2559 :
2560 : /* Determine the order of the transform, the length of decimated */
2561 : /* transforms m, and the step for the sine and cosine tables. */
2562 109 : SWITCH( n )
2563 : {
2564 109 : case 1536:
2565 109 : orderMinus1 = 9 - 1;
2566 109 : move16();
2567 109 : m = 512;
2568 109 : move16();
2569 109 : step = 1;
2570 109 : move16();
2571 109 : pPhaseTbl = FFT_W256;
2572 109 : BREAK;
2573 0 : case 384:
2574 0 : orderMinus1 = 7 - 1;
2575 0 : move16();
2576 0 : m = 128;
2577 0 : move16();
2578 0 : step = 4;
2579 0 : move16();
2580 0 : pPhaseTbl = FFT_W64;
2581 0 : BREAK;
2582 0 : default:
2583 0 : orderMinus1 = 7 - 1;
2584 0 : move16();
2585 0 : m = 128;
2586 0 : move16();
2587 0 : step = 4;
2588 0 : move16();
2589 0 : pPhaseTbl = FFT_W64;
2590 0 : BREAK;
2591 : }
2592 :
2593 109 : nMinusMBy2 = shr( sub( n, m ), 1 );
2594 109 : mMinus1 = sub( m, 1 );
2595 : /* pointer initialization */
2596 109 : RY0 = &Y[0]; // Qx
2597 109 : IY0 = &Y[m]; // Qx
2598 109 : RY1 = &RY0[m]; // Qx
2599 109 : IY1 = &RY1[mMinus1]; // Qx
2600 109 : RY2 = &RY1[m]; // Qx
2601 109 : IY2 = &RY2[mMinus1]; // Qx
2602 :
2603 109 : RZ00 = &Z[0]; /* The zero positions of the pointers Qx*/
2604 109 : RZ10 = &RZ00[m]; // Qx
2605 109 : RZ20 = &RZ00[nMinusMBy2]; // Qx
2606 109 : IZ00 = &Z[n]; // Qx
2607 109 : IZ10 = &IZ00[-m]; // Qx
2608 109 : IZ20 = &IZ00[-nMinusMBy2]; // Qx
2609 :
2610 109 : RZ0 = RZ00; /* Reset the pointers to zero positions. */
2611 109 : RZ1 = RZ10;
2612 109 : RZ2 = RZ20;
2613 109 : IZ0 = IZ00;
2614 109 : IZ1 = IZ10;
2615 109 : IZ2 = IZ20;
2616 :
2617 : /* Inverse butterflies of order 3. */
2618 :
2619 : /* Construction of Y0 */
2620 109 : acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ ); // Qx + Q15
2621 109 : acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ ); // Qx + Q15
2622 109 : *RY0++ = mac_r( acc, *RZ2--, 0x4000 /*1.Q14*/ ); // Qx
2623 109 : move16();
2624 109 : IZ0--;
2625 109 : IZ1--;
2626 109 : IZ2++;
2627 109 : IY0--;
2628 :
2629 109 : l = sub( shr( m, 1 ), 1 );
2630 27904 : FOR( i = 0; i < l; i++ )
2631 : {
2632 27795 : acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ ); // Qx + Q15
2633 27795 : acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ ); // Qx + Q15
2634 27795 : *RY0++ = mac_r( acc, *RZ2--, 0x4000 /*1.Q14*/ ); // Qx
2635 27795 : move16();
2636 :
2637 27795 : acc = L_mult( *IZ0--, 0x4000 /*1.Q14*/ ); // Qx + Q15
2638 27795 : acc = L_mac( acc, *IZ1--, 0x4000 /*1.Q14*/ ); // Qx + Q15
2639 27795 : *IY0-- = msu_r( acc, *IZ2++, 0x4000 /*1.Q14*/ ); // Qx
2640 27795 : move16();
2641 : }
2642 :
2643 : /* m/2 */
2644 109 : acc = L_mult( *RZ0, 0x4000 /*1.Q14*/ ); // Qx + Q15
2645 109 : acc = L_mac( acc, *RZ1, 0x4000 /*1.Q14*/ ); // Qx + Q15
2646 109 : *RY0++ = mac_r( acc, *RZ2, 0x4000 /*1.Q14*/ ); // Qx
2647 109 : move16();
2648 :
2649 :
2650 : /* Construction of Y1 */
2651 109 : c0_ind = T_SIN_PI_2;
2652 109 : s0_ind = 0;
2653 109 : c1_ind = T_SIN_PI_2 * 1 / 3;
2654 109 : s1_ind = T_SIN_PI_2 * 2 / 3;
2655 109 : c2_ind = T_SIN_PI_2 * 1 / 3;
2656 109 : s2_ind = T_SIN_PI_2 * 2 / 3;
2657 :
2658 109 : RZ0 = RZ00; /* Reset pointers to zero positions. */
2659 109 : RZ1 = RZ10;
2660 109 : RZ2 = RZ20;
2661 109 : IZ0 = IZ00;
2662 109 : IZ1 = IZ10;
2663 109 : IZ2 = IZ20;
2664 109 : acc = L_mult0( *RZ0++, t_sin[c0_ind] ); // Qx + Q15
2665 109 : acc = L_msu0( acc, *RZ1++, t_sin[c1_ind] ); // Qx + Q15
2666 109 : acc = L_msu0( acc, *RZ2--, t_sin[c2_ind] ); // Qx + Q15
2667 109 : IZ0--;
2668 109 : acc = L_msu0( acc, *IZ1--, t_sin[s1_ind] ); // Qx + Q15
2669 109 : acc = L_msu0( acc, *IZ2++, t_sin[s2_ind] ); // Qx + Q15
2670 109 : *RY1++ = round_fx( acc ); // Qx - 1
2671 109 : move16();
2672 :
2673 109 : c0_ind = sub( c0_ind, step );
2674 109 : s0_ind = add( s0_ind, step );
2675 109 : c1_ind = add( c1_ind, step );
2676 109 : s1_ind = sub( s1_ind, step );
2677 109 : c2_ind = sub( c2_ind, step );
2678 109 : s2_ind = add( s2_ind, step );
2679 :
2680 : /* From 1 to (m/4) - 1. */
2681 109 : l = sub( shr( m, 2 ), 1 );
2682 13952 : FOR( i = 0; i < l; i++ )
2683 : {
2684 13843 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
2685 13843 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
2686 13843 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
2687 13843 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
2688 13843 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
2689 13843 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
2690 13843 : *RY1++ = round_fx( acc ); // Qx - 1
2691 13843 : move16();
2692 :
2693 13843 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
2694 13843 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
2695 13843 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
2696 13843 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
2697 13843 : acc = L_mac0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
2698 13843 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
2699 13843 : *IY1-- = round_fx( acc ); // Qx - 1
2700 13843 : move16();
2701 :
2702 13843 : c0_ind = sub( c0_ind, step );
2703 13843 : s0_ind = add( s0_ind, step );
2704 13843 : c1_ind = add( c1_ind, step );
2705 13843 : s1_ind = sub( s1_ind, step );
2706 13843 : c2_ind = sub( c2_ind, step );
2707 13843 : s2_ind = add( s2_ind, step );
2708 : }
2709 :
2710 : /* From m/4 to m/2 -1. */
2711 109 : l = shr( m, 2 ); /* m/2 - m/4 = m/4 */
2712 14061 : FOR( i = 0; i < l; i++ )
2713 : {
2714 13952 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
2715 13952 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
2716 13952 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
2717 13952 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
2718 13952 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
2719 13952 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
2720 13952 : *RY1++ = round_fx( acc ); // Qx - 1
2721 13952 : move16();
2722 :
2723 13952 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
2724 13952 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
2725 13952 : acc = L_msu0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
2726 13952 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
2727 13952 : acc = L_mac0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
2728 13952 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
2729 13952 : *IY1-- = round_fx( acc ); // Qx - 1
2730 13952 : move16();
2731 :
2732 13952 : c0_ind = sub( c0_ind, step );
2733 13952 : s0_ind = add( s0_ind, step );
2734 13952 : c1_ind = add( c1_ind, step );
2735 13952 : s1_ind = sub( s1_ind, step );
2736 13952 : c2_ind = add( c2_ind, step );
2737 13952 : s2_ind = sub( s2_ind, step );
2738 : }
2739 :
2740 : /* m/2 */
2741 109 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
2742 109 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
2743 109 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
2744 109 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
2745 109 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
2746 109 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
2747 109 : *RY1++ = round_fx( acc ); // Qx - 1
2748 109 : move16();
2749 :
2750 : /* Construction of Y2 */
2751 109 : c0_ind = T_SIN_PI_2;
2752 109 : s0_ind = 0;
2753 109 : c1_ind = T_SIN_PI_2 * 1 / 3;
2754 109 : s1_ind = T_SIN_PI_2 * 2 / 3;
2755 109 : c2_ind = T_SIN_PI_2 * 1 / 3;
2756 109 : s2_ind = T_SIN_PI_2 * 2 / 3;
2757 109 : step2 = shl( step, 1 );
2758 :
2759 109 : RZ0 = RZ00; /* Reset pointers to zero positions. */
2760 109 : RZ1 = RZ10;
2761 109 : RZ2 = RZ20;
2762 109 : IZ0 = IZ00;
2763 109 : IZ1 = IZ10;
2764 109 : IZ2 = IZ20;
2765 109 : acc = L_mult0( *RZ0++, t_sin[c0_ind] ); // Qx + Q15
2766 109 : acc = L_msu0( acc, *RZ1++, t_sin[c1_ind] ); // Qx + Q15
2767 109 : acc = L_msu0( acc, *RZ2--, t_sin[c2_ind] ); // Qx + Q15
2768 109 : IZ0--;
2769 109 : acc = L_mac0( acc, *IZ1--, t_sin[s1_ind] ); // Qx + Q15
2770 109 : acc = L_mac0( acc, *IZ2++, t_sin[s2_ind] ); // Qx + Q15
2771 109 : *RY2++ = round_fx( acc ); // Qx - 1
2772 109 : move16();
2773 :
2774 109 : c0_ind = sub( c0_ind, step2 );
2775 109 : s0_ind = add( s0_ind, step2 );
2776 109 : c1_ind = sub( c1_ind, step2 );
2777 109 : s1_ind = add( s1_ind, step2 );
2778 109 : c2_ind = add( c2_ind, step2 );
2779 109 : s2_ind = sub( s2_ind, step2 );
2780 :
2781 : /* From 1 to (m/8) - 1. */
2782 109 : l = sub( shr( m, 3 ), 1 ); /* m/8 - 1. */
2783 6976 : FOR( i = 0; i < l; i++ )
2784 : {
2785 6867 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
2786 6867 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
2787 6867 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
2788 6867 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
2789 6867 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
2790 6867 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
2791 6867 : *RY2++ = round_fx( acc ); // Qx - 1
2792 6867 : move16();
2793 :
2794 6867 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
2795 6867 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
2796 6867 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
2797 6867 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
2798 6867 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
2799 6867 : acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
2800 6867 : *IY2-- = round_fx( acc ); // Qx - 1
2801 6867 : move16();
2802 :
2803 6867 : c0_ind = sub( c0_ind, step2 );
2804 6867 : s0_ind = add( s0_ind, step2 );
2805 6867 : c1_ind = sub( c1_ind, step2 );
2806 6867 : s1_ind = add( s1_ind, step2 );
2807 6867 : c2_ind = add( c2_ind, step2 );
2808 6867 : s2_ind = sub( s2_ind, step2 );
2809 : }
2810 :
2811 : /* From (m/8) to (m/4) - 1. */
2812 109 : l = shr( m, 3 ); /* m/4 - m/8 = m/8 */
2813 7085 : FOR( i = 0; i < l; i++ )
2814 : {
2815 6976 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
2816 6976 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
2817 6976 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
2818 6976 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
2819 6976 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
2820 6976 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
2821 6976 : *RY2++ = round_fx( acc ); // Qx - 1
2822 6976 : move16();
2823 :
2824 6976 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
2825 6976 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
2826 6976 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
2827 6976 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
2828 6976 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
2829 6976 : acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
2830 6976 : *IY2-- = round_fx( acc ); // Qx - 1
2831 6976 : move16();
2832 :
2833 6976 : c0_ind = sub( c0_ind, step2 );
2834 6976 : s0_ind = add( s0_ind, step2 );
2835 6976 : c1_ind = add( c1_ind, step2 );
2836 6976 : s1_ind = sub( s1_ind, step2 );
2837 6976 : c2_ind = add( c2_ind, step2 );
2838 6976 : s2_ind = sub( s2_ind, step2 );
2839 : }
2840 :
2841 : /* From m/4 to 3*m/8 - 1. */
2842 109 : l = shr( m, 3 ); /* 3*m/8 - m/4 = m/8 */
2843 7085 : FOR( i = 0; i < l; i++ )
2844 : {
2845 6976 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
2846 6976 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
2847 6976 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
2848 6976 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
2849 6976 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
2850 6976 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
2851 6976 : *RY2++ = round_fx( acc ); // Qx - 1
2852 6976 : move16();
2853 :
2854 6976 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
2855 6976 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
2856 6976 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
2857 6976 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
2858 6976 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
2859 6976 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
2860 6976 : *IY2-- = round_fx( acc ); // Qx - 1
2861 6976 : move16();
2862 :
2863 6976 : c0_ind = sub( c0_ind, step2 );
2864 6976 : s0_ind = add( s0_ind, step2 );
2865 6976 : c1_ind = add( c1_ind, step2 );
2866 6976 : s1_ind = sub( s1_ind, step2 );
2867 6976 : c2_ind = sub( c2_ind, step2 );
2868 6976 : s2_ind = add( s2_ind, step2 );
2869 : }
2870 :
2871 : /* From 3*m/8 to m/2 - 1*/
2872 109 : l = shr( m, 3 ); /* m/2 - 3*m/8 = m/8 */
2873 7085 : FOR( i = 0; i < l; i++ )
2874 : {
2875 6976 : acc = L_mult0( *RZ1, t_sin[c1_ind] ); // Qx + Q15
2876 6976 : acc = L_msu0( acc, *RZ0, t_sin[c0_ind] ); // Qx + Q15
2877 6976 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
2878 6976 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
2879 6976 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
2880 6976 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
2881 6976 : *RY2++ = round_fx( acc ); // Qx - 1
2882 6976 : move16();
2883 :
2884 6976 : acc = L_mult0( *IZ1--, t_sin[c1_ind] ); // Qx + Q15
2885 6976 : acc = L_msu0( acc, *IZ0--, t_sin[c0_ind] ); // Qx + Q15
2886 6976 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
2887 6976 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
2888 6976 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
2889 6976 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
2890 6976 : *IY2-- = round_fx( acc ); // Qx - 1
2891 6976 : move16();
2892 :
2893 6976 : c0_ind = add( c0_ind, step2 );
2894 6976 : s0_ind = sub( s0_ind, step2 );
2895 6976 : c1_ind = add( c1_ind, step2 );
2896 6976 : s1_ind = sub( s1_ind, step2 );
2897 6976 : c2_ind = sub( c2_ind, step2 );
2898 6976 : s2_ind = add( s2_ind, step2 );
2899 : }
2900 :
2901 : /* m/2 */
2902 109 : acc = L_mult0( *RZ1, t_sin[c1_ind] ); // Qx + Q15
2903 109 : acc = L_msu0( acc, *RZ0, t_sin[c0_ind] ); // Qx + Q15
2904 109 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
2905 109 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
2906 109 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
2907 109 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
2908 109 : *RY2++ = round_fx( acc ); // Qx - 1
2909 109 : move16();
2910 :
2911 : /* Compute the inverse FFT for all 3 blocks. */
2912 109 : RY0 = &Y[0]; /* Rewind the pointers. */
2913 109 : RY1 = &Y[m];
2914 109 : RY2 = &RY1[m];
2915 109 : mBy2 = shr( m, 1 );
2916 109 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY0, RY0, 0 ); /* inverse FFT */
2917 109 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY1, RY1, 0 ); /* inverse FFT */
2918 109 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY2, RY2, 0 ); /* inverse FFT */
2919 :
2920 109 : y0 = RY0;
2921 109 : y1 = RY1;
2922 109 : y2 = RY2;
2923 :
2924 : /* Interlacing and scaling, scale = 1/3 */
2925 109 : pX = X;
2926 55917 : FOR( i = 0; i < m; i++ )
2927 : {
2928 55808 : *pX++ = shl_sat( mult_r( *y0++, FFT3_ONE_THIRD ), 1 ); // Qx
2929 55808 : move16();
2930 55808 : *pX++ = shl_sat( mult_r( *y1++, FFT3_ONE_THIRD ), 1 ); // Qx
2931 55808 : move16();
2932 55808 : *pX++ = shl_sat( mult_r( *y2++, FFT3_ONE_THIRD ), 1 ); // Qx
2933 55808 : move16();
2934 : }
2935 :
2936 109 : return;
2937 : }
|