Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include "options.h" /* Compilation switches */
6 : #include "cnst.h" /* Common constants */
7 : #include "prot_fx.h" /* Function prototypes */
8 : #include "rom_com.h" /* Static table prototypes */
9 : #include "stl.h"
10 : #include <assert.h>
11 :
12 : /*-----------------------------------------------------------------*
13 : * Local functions
14 : *-----------------------------------------------------------------*/
15 :
16 : #define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */
17 : /* DCT related */
18 : #define KP559016994_16FX 1200479845 /* EDCT & EMDCT constants Q31*/
19 : #define KP951056516_16FX 2042378325 /* EDCT & EMDCT constants Q31*/
20 : #define KP587785252_16FX 1262259213 /* EDCT & EMDCT constants Q31*/
21 :
22 : static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
23 : static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
24 : static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
25 : static void cftmdl_16fx( Word16 n, Word16 l, Word16 *a, const Word32 *w );
26 : static void cftfsub_16fx( Word16 n, Word16 *a, const Word32 *w );
27 : static void cft1st_16fx( Word16 n, Word16 *a, const Word32 *w );
28 : static void cftmdl_16fx( Word16 n, Word16 l, Word16 *a, const Word32 *w );
29 : static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
30 : static void bitrv2_SR_16fx( Word16 n, const Word16 *ip, Word16 *a );
31 : static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
32 : static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx );
33 : static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w );
34 :
35 : #include "math_32.h"
36 :
37 : /*-----------------------------------------------------------------*
38 : * Local functions
39 : *-----------------------------------------------------------------*/
40 : static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w );
41 : static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a );
42 : static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w );
43 : static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w );
44 : static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w );
45 :
46 :
47 263 : void DoRTFTn_fx(
48 : Word32 *x, /* i/o : real part of input and output data Q(x) */
49 : Word32 *y, /* i/o : imaginary part of input and output data Q(x) */
50 : const Word16 n /* i : size of the FFT up to 1024 */
51 : )
52 : {
53 :
54 : Word16 i;
55 : Word32 z[2048], *pt;
56 :
57 263 : pt = z;
58 134471 : FOR( i = 0; i < n; i++ )
59 : {
60 134208 : *pt++ = x[i];
61 134208 : move16();
62 134208 : *pt++ = y[i];
63 134208 : move16();
64 : }
65 :
66 263 : IF( EQ_16( n, 16 ) )
67 : {
68 0 : cdftForw_fx( 2 * n, z, Ip_fft16, w_fft512_fx_evs );
69 : }
70 263 : ELSE IF( EQ_16( n, 32 ) )
71 : {
72 0 : cdftForw_fx( 2 * n, z, Ip_fft32, w_fft512_fx_evs );
73 : }
74 263 : ELSE IF( EQ_16( n, 64 ) )
75 : {
76 1 : cdftForw_fx( 2 * n, z, Ip_fft64, w_fft512_fx_evs );
77 : }
78 262 : ELSE IF( EQ_16( n, 128 ) )
79 : {
80 0 : cdftForw_fx( 2 * n, z, Ip_fft128, w_fft512_fx_evs );
81 : }
82 262 : ELSE IF( EQ_16( n, 256 ) )
83 : {
84 0 : cdftForw_fx( 2 * n, z, Ip_fft256, w_fft512_fx_evs );
85 : }
86 262 : ELSE IF( EQ_16( n, 512 ) )
87 : {
88 262 : cdftForw_fx( 2 * n, z, Ip_fft512, w_fft512_fx_evs );
89 : }
90 : ELSE
91 : {
92 0 : assert( 0 );
93 : }
94 :
95 263 : x[0] = z[0];
96 263 : move16();
97 263 : y[0] = z[1];
98 263 : move16();
99 263 : pt = &z[2];
100 134208 : FOR( i = n - 1; i >= 1; i-- )
101 : {
102 133945 : x[i] = *pt++;
103 133945 : move16();
104 133945 : y[i] = *pt++;
105 133945 : move16();
106 : }
107 :
108 263 : return;
109 : }
110 :
111 : /*-----------------------------------------------------------------*
112 : * cdftForw_fx()
113 : * Main fuction of Complex Discrete Fourier Transform
114 : *-----------------------------------------------------------------*/
115 263 : static void cdftForw_fx(
116 : Word16 n, /* i : data length of real and imag */
117 : Word32 *a, /* i/o : input/output data Q(q)*/
118 : const Word16 *ip, /* i : work area for bit reversal */
119 : const Word16 *w /* i : cos/sin table Q14*/
120 : )
121 : {
122 : /* bit reversal */
123 263 : bitrv2_SR_fx( n, ip + 2, a );
124 :
125 : /* Do FFT */
126 263 : cftfsub_fx( n, a, w );
127 263 : }
128 :
129 : /*-----------------------------------------------------------------*
130 : * bitrv2_SR_fx()
131 : * Bit reversal
132 : *-----------------------------------------------------------------*/
133 92863 : static void bitrv2_SR_fx(
134 : Word16 n, /* i : data length of real and imag */
135 : const Word16 *ip, /* i/o : work area for bit reversal */
136 : Word32 *a /* i/o : input/output data Q(q)*/
137 : )
138 : {
139 : Word16 j, j1, k, k1, m, m2;
140 : Word16 l;
141 : Word32 xr, xi, yr, yi;
142 :
143 92863 : l = n;
144 92863 : move16();
145 92863 : m = 1;
146 92863 : move16();
147 :
148 279113 : WHILE( ( ( m << 3 ) < l ) )
149 : {
150 186250 : l = shr( l, 1 );
151 186250 : m = shl( m, 1 );
152 : }
153 :
154 92863 : m2 = shl( m, 1 );
155 92863 : IF( EQ_16( shl( m, 3 ), l ) )
156 : {
157 5 : FOR( k = 0; k < m; k++ )
158 : {
159 10 : FOR( j = 0; j < k; j++ )
160 : {
161 6 : j1 = add( shl( j, 1 ), ip[k] );
162 6 : k1 = add( shl( k, 1 ), ip[j] );
163 6 : xr = a[j1];
164 6 : move32();
165 6 : xi = a[j1 + 1];
166 6 : move32();
167 6 : yr = a[k1];
168 6 : move32();
169 6 : yi = a[k1 + 1];
170 6 : move32();
171 6 : a[j1] = yr;
172 6 : move32();
173 6 : a[j1 + 1] = yi;
174 6 : move32();
175 6 : a[k1] = xr;
176 6 : move32();
177 6 : a[k1 + 1] = xi;
178 6 : move32();
179 6 : j1 = add( j1, m2 );
180 6 : k1 = add( k1, shl( m2, 1 ) );
181 6 : xr = a[j1];
182 6 : move32();
183 6 : xi = a[j1 + 1];
184 6 : move32();
185 6 : yr = a[k1];
186 6 : move32();
187 6 : yi = a[k1 + 1];
188 6 : move32();
189 6 : a[j1] = yr;
190 6 : move32();
191 6 : a[j1 + 1] = yi;
192 6 : move32();
193 6 : a[k1] = xr;
194 6 : move32();
195 6 : a[k1 + 1] = xi;
196 6 : move32();
197 6 : j1 = add( j1, m2 );
198 6 : k1 = sub( k1, m2 );
199 6 : xr = a[j1];
200 6 : move32();
201 6 : xi = a[j1 + 1];
202 6 : move32();
203 6 : xi = a[j1 + 1];
204 6 : move32();
205 6 : yr = a[k1];
206 6 : move32();
207 6 : yi = a[k1 + 1];
208 6 : move32();
209 6 : a[j1] = yr;
210 6 : move32();
211 6 : a[j1 + 1] = yi;
212 6 : move32();
213 6 : a[k1] = xr;
214 6 : move32();
215 6 : a[k1 + 1] = xi;
216 6 : move32();
217 6 : j1 = add( j1, m2 );
218 6 : k1 = add( k1, shl( m2, 1 ) );
219 6 : xr = a[j1];
220 6 : move32();
221 6 : xi = a[j1 + 1];
222 6 : move32();
223 6 : yr = a[k1];
224 6 : move32();
225 6 : yi = a[k1 + 1];
226 6 : move32();
227 6 : a[j1] = yr;
228 6 : move32();
229 6 : a[j1 + 1] = yi;
230 6 : move32();
231 6 : a[k1] = xr;
232 6 : move32();
233 6 : a[k1 + 1] = xi;
234 6 : move32();
235 : }
236 :
237 4 : j1 = add( add( shl( k, 1 ), m2 ), ip[k] );
238 4 : k1 = add( j1, m2 );
239 4 : xr = a[j1];
240 4 : move32();
241 4 : xi = a[j1 + 1];
242 4 : move32();
243 4 : yr = a[k1];
244 4 : move32();
245 4 : yi = a[k1 + 1];
246 4 : move32();
247 4 : a[j1] = yr;
248 4 : move32();
249 4 : a[j1 + 1] = yi;
250 4 : move32();
251 4 : a[k1] = xr;
252 4 : move32();
253 4 : a[k1 + 1] = xi;
254 4 : move32();
255 : }
256 : }
257 : ELSE
258 : {
259 374592 : FOR( k = 1; k < m; k++ )
260 : {
261 868770 : FOR( j = 0; j < k; j++ )
262 : {
263 587040 : j1 = add( shl( j, 1 ), ip[k] );
264 587040 : k1 = add( shl( k, 1 ), ip[j] );
265 587040 : xr = a[j1];
266 587040 : move32();
267 587040 : xi = a[j1 + 1];
268 587040 : move32();
269 587040 : yr = a[k1];
270 587040 : move32();
271 587040 : yi = a[k1 + 1];
272 587040 : move32();
273 587040 : a[j1] = yr;
274 587040 : move32();
275 587040 : a[j1 + 1] = yi;
276 587040 : move32();
277 587040 : a[k1] = xr;
278 587040 : move32();
279 587040 : a[k1 + 1] = xi;
280 587040 : move32();
281 587040 : j1 = add( j1, m2 );
282 587040 : k1 = add( k1, m2 );
283 587040 : xr = a[j1];
284 587040 : move32();
285 587040 : xi = a[j1 + 1];
286 587040 : move32();
287 587040 : yr = a[k1];
288 587040 : move32();
289 587040 : yi = a[k1 + 1];
290 587040 : move32();
291 587040 : a[j1] = yr;
292 587040 : move32();
293 587040 : a[j1 + 1] = yi;
294 587040 : move32();
295 587040 : a[k1] = xr;
296 587040 : move32();
297 587040 : a[k1 + 1] = xi;
298 587040 : move32();
299 : }
300 : }
301 : }
302 :
303 92863 : return;
304 : }
305 :
306 : /*-----------------------------------------------------------------*
307 : * cftfsub_fx()
308 : * Complex Discrete Fourier Transform
309 : *-----------------------------------------------------------------*/
310 60093 : static void cftfsub_fx(
311 : Word16 n, /* i : data length of real and imag */
312 : Word32 *a, /* i/o : input/output data Q(q)*/
313 : const Word16 *w /* i : cos/sin table Q14*/
314 : )
315 : {
316 : Word16 j, j1, j2, j3, l;
317 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
318 :
319 60093 : l = 2;
320 60093 : move16();
321 :
322 60093 : IF( n > 8 )
323 : {
324 60093 : cft1st_fx( n, a, w );
325 60093 : l = 8;
326 60093 : move16();
327 120710 : WHILE( ( ( l << 2 ) < n ) )
328 : {
329 60617 : cftmdl_fx( n, l, a, w );
330 60617 : l = shl( l, 2 );
331 : }
332 : }
333 60093 : IF( shl( l, 2 ) == n )
334 : {
335 17 : FOR( j = 0; j < l; j += 2 )
336 : {
337 16 : j1 = add( j, l );
338 16 : j2 = add( j1, l );
339 16 : j3 = add( j2, l );
340 16 : x0r = L_add( a[j], a[j1] );
341 16 : x0i = L_add( a[j + 1], a[j1 + 1] );
342 16 : x1r = L_sub( a[j], a[j1] );
343 16 : x1i = L_sub( a[j + 1], a[j1 + 1] );
344 16 : x2r = L_add( a[j2], a[j3] );
345 16 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
346 16 : x3r = L_sub( a[j2], a[j3] );
347 16 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
348 16 : a[j] = L_add( x0r, x2r );
349 16 : move32();
350 16 : a[j2] = L_sub( x0r, x2r );
351 16 : move32();
352 16 : a[j + 1] = L_add( x0i, x2i );
353 16 : move32();
354 16 : a[j2 + 1] = L_sub( x0i, x2i );
355 16 : move32();
356 16 : a[j1] = L_sub( x1r, x3i );
357 16 : move32();
358 16 : a[j1 + 1] = L_add( x1i, x3r );
359 16 : move32();
360 16 : a[j3] = L_add( x1r, x3i );
361 16 : move32();
362 16 : a[j3 + 1] = L_sub( x1i, x3r );
363 16 : move32();
364 : }
365 : }
366 : ELSE
367 : {
368 1084444 : FOR( j = 0; j < l; j += 2 )
369 : {
370 1024352 : j1 = add( j, l );
371 1024352 : x0r = L_sub( a[j], a[j1] );
372 1024352 : x0i = L_sub( a[j + 1], a[j1 + 1] );
373 1024352 : a[j] = L_add( a[j], a[j1] );
374 1024352 : move32();
375 1024352 : a[j + 1] = L_add( a[j + 1], a[j1 + 1] );
376 1024352 : move32();
377 1024352 : a[j1] = x0r;
378 1024352 : move32();
379 1024352 : move32();
380 1024352 : a[j1 + 1] = x0i;
381 1024352 : move32();
382 1024352 : move32();
383 : }
384 : }
385 :
386 60093 : return;
387 : }
388 :
389 : /*-----------------------------------------------------------------*
390 : * cft1st_fx()
391 : * Subfunction of Complex Discrete Fourier Transform
392 : *-----------------------------------------------------------------*/
393 92863 : static void cft1st_fx(
394 : Word16 n, /* i : data length of real and imag */
395 : Word32 *a, /* i/o : input/output data Q(q)*/
396 : const Word16 *w /* i : cos/sin table Q14*/
397 : )
398 : {
399 : Word16 j, k1, k2;
400 : Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
401 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
402 :
403 92863 : x0r = L_add( a[0], a[2] );
404 92863 : x0i = L_add( a[1], a[3] );
405 92863 : x1r = L_sub( a[0], a[2] );
406 92863 : x1i = L_sub( a[1], a[3] );
407 92863 : x2r = L_add( a[4], a[6] );
408 92863 : x2i = L_add( a[5], a[7] );
409 92863 : x3r = L_sub( a[4], a[6] );
410 92863 : x3i = L_sub( a[5], a[7] );
411 92863 : a[0] = L_add( x0r, x2r );
412 92863 : move32();
413 92863 : a[1] = L_add( x0i, x2i );
414 92863 : move32();
415 92863 : a[4] = L_sub( x0r, x2r );
416 92863 : move32();
417 92863 : a[5] = L_sub( x0i, x2i );
418 92863 : move32();
419 92863 : a[2] = L_sub( x1r, x3i );
420 92863 : move32();
421 92863 : a[3] = L_add( x1i, x3r );
422 92863 : move32();
423 92863 : a[6] = L_add( x1r, x3i );
424 92863 : move32();
425 92863 : a[7] = L_sub( x1i, x3r );
426 92863 : move32();
427 :
428 92863 : wk1r = w[2];
429 92863 : move16();
430 92863 : x0r = L_add( a[8], a[10] );
431 92863 : x0i = L_add( a[9], a[11] );
432 92863 : x1r = L_sub( a[8], a[10] );
433 92863 : x1i = L_sub( a[9], a[11] );
434 92863 : x2r = L_add( a[12], a[14] );
435 92863 : x2i = L_add( a[13], a[15] );
436 92863 : x3r = L_sub( a[12], a[14] );
437 92863 : x3i = L_sub( a[13], a[15] );
438 92863 : a[8] = L_add( x0r, x2r );
439 92863 : move32();
440 92863 : a[9] = L_add( x0i, x2i );
441 92863 : move32();
442 92863 : a[12] = L_sub( x2i, x0i );
443 92863 : move32();
444 92863 : a[13] = L_sub( x0r, x2r );
445 92863 : move32();
446 :
447 92863 : x0r = L_sub( x1r, x3i );
448 92863 : x0i = L_add( x1i, x3r );
449 92863 : a[10] = Mult_32_16( L_shl( L_sub( x0r, x0i ), 1 ), wk1r );
450 92863 : move32();
451 92863 : a[11] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
452 92863 : move32();
453 92863 : x0r = L_add( x3i, x1r );
454 92863 : x0i = L_sub( x3r, x1i );
455 92863 : a[14] = Mult_32_16( L_shl( L_sub( x0i, x0r ), 1 ), wk1r );
456 92863 : move32();
457 92863 : a[15] = Mult_32_16( L_shl( L_add( x0i, x0r ), 1 ), wk1r );
458 92863 : move32();
459 :
460 92863 : k1 = 0;
461 92863 : move16();
462 387176 : FOR( j = 16; j < n; j += 16 )
463 : {
464 294313 : k1 = add( k1, 2 );
465 294313 : k2 = shl( k1, 1 );
466 294313 : wk2r = w[k1];
467 294313 : move16();
468 294313 : wk2i = w[k1 + 1];
469 294313 : move16();
470 294313 : wk1r = w[k2];
471 294313 : move16();
472 294313 : wk1i = w[k2 + 1];
473 294313 : move16();
474 294313 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2i, wk1i ), 14 ) ) );
475 294313 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2i, wk1r ), 14 ), wk1i, 1 ) );
476 294313 : x0r = L_add( a[j], a[j + 2] );
477 294313 : x0i = L_add( a[j + 1], a[j + 3] );
478 294313 : x1r = L_sub( a[j], a[j + 2] );
479 294313 : x1i = L_sub( a[j + 1], a[j + 3] );
480 294313 : x2r = L_add( a[j + 4], a[j + 6] );
481 294313 : x2i = L_add( a[j + 5], a[j + 7] );
482 294313 : x3r = L_sub( a[j + 4], a[j + 6] );
483 294313 : x3i = L_sub( a[j + 5], a[j + 7] );
484 294313 : a[j] = L_add( x0r, x2r );
485 294313 : move32();
486 294313 : a[j + 1] = L_add( x0i, x2i );
487 294313 : move32();
488 294313 : x0r = L_sub( x0r, x2r );
489 294313 : x0i = L_sub( x0i, x2i );
490 294313 : a[j + 4] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
491 294313 : move32();
492 294313 : a[j + 5] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk2r ), Mult_32_16( L_shl( x0r, 1 ), wk2i ) );
493 294313 : move32();
494 294313 : x0r = L_sub( x1r, x3i );
495 294313 : x0i = L_add( x1i, x3r );
496 294313 : a[j + 2] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
497 294313 : move32();
498 294313 : a[j + 3] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
499 294313 : move32();
500 294313 : x0r = L_add( x1r, x3i );
501 294313 : x0i = L_sub( x1i, x3r );
502 294313 : a[j + 6] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
503 294313 : move32();
504 294313 : a[j + 7] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
505 294313 : move32();
506 :
507 294313 : wk1r = w[k2 + 2];
508 294313 : move16();
509 294313 : wk1i = w[k2 + 3];
510 294313 : move16();
511 294313 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2r, wk1i ), 14 ) ) );
512 294313 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2r, wk1r ), 14 ), wk1i, 1 ) );
513 294313 : x0r = L_add( a[j + 8], a[j + 10] );
514 294313 : x0i = L_add( a[j + 9], a[j + 11] );
515 294313 : x1r = L_sub( a[j + 8], a[j + 10] );
516 294313 : x1i = L_sub( a[j + 9], a[j + 11] );
517 294313 : x2r = L_add( a[j + 12], a[j + 14] );
518 294313 : x2i = L_add( a[j + 13], a[j + 15] );
519 294313 : x3r = L_sub( a[j + 12], a[j + 14] );
520 294313 : x3i = L_sub( a[j + 13], a[j + 15] );
521 294313 : a[j + 8] = L_add( x0r, x2r );
522 294313 : move32();
523 294313 : a[j + 9] = L_add( x0i, x2i );
524 294313 : move32();
525 294313 : x0r = L_sub( x0r, x2r );
526 294313 : x0i = L_sub( x0i, x2i );
527 294313 : a[j + 12] = L_negate( L_add( Mult_32_16( L_shl( x0r, 1 ), wk2i ), Mult_32_16( L_shl( x0i, 1 ), wk2r ) ) );
528 294313 : move32();
529 294313 : a[j + 13] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
530 294313 : move32();
531 294313 : x0r = L_sub( x1r, x3i );
532 294313 : x0i = L_add( x1i, x3r );
533 294313 : a[j + 10] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
534 294313 : move32();
535 294313 : a[j + 11] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
536 294313 : move32();
537 294313 : x0r = L_add( x1r, x3i );
538 294313 : x0i = L_sub( x1i, x3r );
539 294313 : a[j + 14] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
540 294313 : move32();
541 294313 : a[j + 15] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
542 294313 : move32();
543 : }
544 :
545 92863 : return;
546 : }
547 :
548 : /*-----------------------------------------------------------------*
549 : * cftmdl_fx()
550 : * Subfunction of Complex Discrete Fourier Transform
551 : *-----------------------------------------------------------------*/
552 93387 : static void cftmdl_fx(
553 : Word16 n, /* i : data length of real and imag */
554 : Word16 l, /* i : initial shift for processing */
555 : Word32 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
556 : const Word16 *w /* i : cos/sin table Q30*/
557 : )
558 : {
559 : Word16 j, j1, j2, j3, k, k1, k2, m, m2;
560 : Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
561 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
562 : Word16 tmp;
563 :
564 93387 : m = shl( l, 2 );
565 485799 : FOR( j = 0; j < l; j += 2 )
566 : {
567 392412 : j1 = add( j, l );
568 392412 : j2 = add( j1, l );
569 392412 : j3 = add( j2, l );
570 392412 : x0r = L_add( a[j], a[j1] );
571 392412 : x0i = L_add( a[j + 1], a[j1 + 1] );
572 392412 : x1r = L_sub( a[j], a[j1] );
573 392412 : x1i = L_sub( a[j + 1], a[j1 + 1] );
574 392412 : x2r = L_add( a[j2], a[j3] );
575 392412 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
576 392412 : x3r = L_sub( a[j2], a[j3] );
577 392412 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
578 392412 : a[j] = L_add( x0r, x2r );
579 392412 : move32();
580 392412 : a[j + 1] = L_add( x0i, x2i );
581 392412 : move32();
582 392412 : a[j2] = L_sub( x0r, x2r );
583 392412 : move32();
584 392412 : a[j2 + 1] = L_sub( x0i, x2i );
585 392412 : move32();
586 392412 : a[j1] = L_sub( x1r, x3i );
587 392412 : move32();
588 392412 : a[j1 + 1] = L_add( x1i, x3r );
589 392412 : move32();
590 392412 : a[j3] = L_add( x1r, x3i );
591 392412 : move32();
592 392412 : a[j3 + 1] = L_sub( x1i, x3r );
593 392412 : move32();
594 : }
595 :
596 93387 : wk1r = w[2];
597 93387 : move16();
598 93387 : tmp = add( l, m );
599 485799 : FOR( j = m; j < tmp; j += 2 )
600 : {
601 392412 : j1 = add( j, l );
602 392412 : j2 = add( j1, l );
603 392412 : j3 = add( j2, l );
604 392412 : x0r = L_add( a[j], a[j1] );
605 392412 : x0i = L_add( a[j + 1], a[j1 + 1] );
606 392412 : x1r = L_sub( a[j], a[j1] );
607 392412 : x1i = L_sub( a[j + 1], a[j1 + 1] );
608 392412 : x2r = L_add( a[j2], a[j3] );
609 392412 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
610 392412 : x3r = L_sub( a[j2], a[j3] );
611 392412 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
612 392412 : a[j] = L_add( x0r, x2r );
613 392412 : move32();
614 392412 : a[j + 1] = L_add( x0i, x2i );
615 392412 : move32();
616 392412 : a[j2] = L_sub( x2i, x0i );
617 392412 : move32();
618 392412 : a[j2 + 1] = L_sub( x0r, x2r );
619 392412 : move32();
620 392412 : x0r = L_sub( x1r, x3i );
621 392412 : x0i = L_add( x1i, x3r );
622 392412 : a[j1] = Mult_32_16( L_shl( L_sub( x0r, x0i ), 1 ), wk1r );
623 392412 : move32();
624 392412 : a[j1 + 1] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
625 392412 : move32();
626 392412 : x0r = L_add( x3i, x1r );
627 392412 : x0i = L_sub( x3r, x1i );
628 392412 : a[j3] = Mult_32_16( L_shl( L_sub( x0i, x0r ), 1 ), wk1r );
629 392412 : move32();
630 392412 : a[j3 + 1] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
631 392412 : move32();
632 : }
633 :
634 93387 : k1 = 0;
635 93387 : move16();
636 93387 : m2 = shl( m, 1 );
637 98104 : FOR( k = m2; k < n; k += m2 )
638 : {
639 4717 : k1 = add( k1, 2 );
640 4717 : k2 = shl( k1, 1 );
641 4717 : wk2r = w[k1];
642 4717 : move16();
643 4717 : wk2i = w[k1 + 1];
644 4717 : move16();
645 4717 : wk1r = w[k2];
646 4717 : move16();
647 4717 : wk1i = w[k2 + 1];
648 4717 : move16();
649 4717 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2i, wk1i ), 14 ) ) );
650 4717 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2i, wk1r ), 14 ), wk1i, 1 ) );
651 :
652 4717 : tmp = add( l, k );
653 33017 : FOR( j = k; j < tmp; j += 2 )
654 : {
655 28300 : j1 = add( j, l );
656 28300 : j2 = add( j1, l );
657 28300 : j3 = add( j2, l );
658 28300 : x0r = L_add( a[j], a[j1] );
659 28300 : x0i = L_add( a[j + 1], a[j1 + 1] );
660 28300 : x1r = L_sub( a[j], a[j1] );
661 28300 : x1i = L_sub( a[j + 1], a[j1 + 1] );
662 28300 : x2r = L_add( a[j2], a[j3] );
663 28300 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
664 28300 : x3r = L_sub( a[j2], a[j3] );
665 28300 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
666 28300 : a[j] = L_add( x0r, x2r );
667 28300 : move32();
668 28300 : a[j + 1] = L_add( x0i, x2i );
669 28300 : move32();
670 28300 : x0r = L_sub( x0r, x2r );
671 28300 : x0i = L_sub( x0i, x2i );
672 28300 : a[j2] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
673 28300 : move32();
674 28300 : a[j2 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk2r ), Mult_32_16( L_shl( x0r, 1 ), wk2i ) );
675 28300 : move32();
676 28300 : x0r = L_sub( x1r, x3i );
677 28300 : x0i = L_add( x1i, x3r );
678 28300 : a[j1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
679 28300 : move32();
680 28300 : a[j1 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
681 28300 : move32();
682 28300 : x0r = L_add( x1r, x3i );
683 28300 : x0i = L_sub( x1i, x3r );
684 28300 : a[j3] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
685 28300 : move32();
686 28300 : a[j3 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
687 28300 : move32();
688 : }
689 :
690 4717 : wk1r = w[k2 + 2];
691 4717 : move16();
692 4717 : wk1i = w[k2 + 3];
693 4717 : move16();
694 4717 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2r, wk1i ), 14 ) ) );
695 4717 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2r, wk1r ), 14 ), wk1i, 1 ) );
696 :
697 4717 : tmp = add( l, add( k, m ) );
698 33017 : FOR( j = k + m; j < tmp; j += 2 )
699 : {
700 28300 : j1 = add( j, l );
701 28300 : j2 = add( j1, l );
702 28300 : j3 = add( j2, l );
703 28300 : x0r = L_add( a[j], a[j1] );
704 28300 : x0i = L_add( a[j + 1], a[j1 + 1] );
705 28300 : x1r = L_sub( a[j], a[j1] );
706 28300 : x1i = L_sub( a[j + 1], a[j1 + 1] );
707 28300 : x2r = L_add( a[j2], a[j3] );
708 28300 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
709 28300 : x3r = L_sub( a[j2], a[j3] );
710 28300 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
711 28300 : a[j] = L_add( x0r, x2r );
712 28300 : move32();
713 28300 : a[j + 1] = L_add( x0i, x2i );
714 28300 : move32();
715 28300 : x0r = L_sub( x0r, x2r );
716 28300 : x0i = L_sub( x0i, x2i );
717 28300 : a[j2] = L_negate( L_add( Mult_32_16( L_shl( x0r, 1 ), wk2i ), Mult_32_16( L_shl( x0i, 1 ), wk2r ) ) );
718 28300 : move32();
719 28300 : a[j2 + 1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
720 28300 : move32();
721 28300 : x0r = L_sub( x1r, x3i );
722 28300 : x0i = L_add( x1i, x3r );
723 28300 : a[j1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
724 28300 : move32();
725 28300 : a[j1 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
726 28300 : move32();
727 28300 : x0r = L_add( x1r, x3i );
728 28300 : x0i = L_sub( x1i, x3r );
729 28300 : a[j3] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
730 28300 : move32();
731 28300 : a[j3 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
732 28300 : move32();
733 : }
734 : }
735 :
736 93387 : return;
737 : }
738 :
739 :
740 32770 : static void cftbsub_fx(
741 : Word16 n,
742 : Word32 *a, // Q(Qx+Q_edct)
743 : const Word16 *w /* i : cos/sin table Q14 */
744 : )
745 : {
746 : Word16 j, j1, j2, j3, l;
747 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
748 :
749 32770 : l = 2;
750 32770 : move16();
751 32770 : IF( GT_16( n, 8 ) )
752 : {
753 32770 : cft1st_fx( n, a, w );
754 32770 : l = 8;
755 32770 : move16();
756 :
757 65540 : WHILE( ( ( l << 2 ) < n ) )
758 : {
759 32770 : cftmdl_fx( n, l, a, w );
760 32770 : l = shl( l, 2 );
761 : }
762 : }
763 :
764 32770 : IF( EQ_16( shl( l, 2 ), n ) )
765 : {
766 0 : FOR( j = 0; j < l; j += 2 )
767 : {
768 0 : j1 = add( j, l );
769 0 : j2 = add( j1, l );
770 0 : j3 = add( j2, l );
771 0 : x0r = L_add( a[j], a[j1] );
772 0 : x0i = L_negate( L_add( a[j + 1], a[j1 + 1] ) );
773 0 : x1r = L_sub( a[j], a[j1] );
774 0 : x1i = L_sub( a[j1 + 1], a[j + 1] );
775 0 : x2r = L_add( a[j2], a[j3] );
776 0 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
777 0 : x3r = L_sub( a[j2], a[j3] );
778 0 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
779 0 : a[j] = L_add( x0r, x2r );
780 0 : move32();
781 0 : a[j + 1] = L_sub( x0i, x2i );
782 0 : move32();
783 0 : a[j2] = L_sub( x0r, x2r );
784 0 : move32();
785 0 : a[j2 + 1] = L_add( x0i, x2i );
786 0 : move32();
787 0 : a[j1] = L_sub( x1r, x3i );
788 0 : move32();
789 0 : a[j1 + 1] = L_sub( x1i, x3r );
790 0 : move32();
791 0 : a[j3] = L_add( x1r, x3i );
792 0 : move32();
793 0 : a[j3 + 1] = L_add( x1i, x3r );
794 0 : move32();
795 : }
796 : }
797 : ELSE
798 : {
799 557090 : FOR( j = 0; j < l; j += 2 )
800 : {
801 524320 : j1 = add( j, l );
802 524320 : x0r = L_sub( a[j], a[j1] );
803 524320 : x0i = L_sub( a[j1 + 1], a[j + 1] );
804 524320 : a[j] = L_add( a[j], a[j1] );
805 524320 : move32();
806 524320 : a[j + 1] = L_negate( L_add( a[j + 1], a[j1 + 1] ) );
807 524320 : move32();
808 524320 : a[j1] = x0r;
809 524320 : move32();
810 524320 : a[j1 + 1] = x0i;
811 524320 : move32();
812 : }
813 : }
814 32770 : }
815 :
816 59830 : static void rftfsub_fx(
817 : Word16 n,
818 : Word32 *a, // Qx
819 : Word16 nc,
820 : const Word16 *c /*Q14*/ )
821 : {
822 : Word16 j, k, kk, ks, m, tmp;
823 : Word32 xr, xi, yr, yi;
824 : Word16 wkr, wki;
825 :
826 59830 : m = shr( n, 1 );
827 : /*ks = 2 * nc / m; */
828 59830 : tmp = shl( nc, 1 );
829 59830 : ks = 0;
830 59830 : move16();
831 299150 : WHILE( ( tmp >= m ) )
832 : {
833 239320 : ks = add( ks, 1 );
834 239320 : tmp = sub( tmp, m );
835 : }
836 59830 : kk = 0;
837 59830 : move16();
838 957280 : FOR( j = 2; j < m; j += 2 )
839 : {
840 897450 : k = sub( n, j );
841 897450 : kk = add( kk, ks );
842 897450 : wkr = sub( 8192 /*0.5.Q14*/, c[( nc - kk )] ); // Q14
843 897450 : wki = c[kk]; // Q14
844 897450 : move16();
845 897450 : xr = L_sub( a[j], a[k] ); // Qx
846 897450 : xi = L_add( a[j + 1], a[k + 1] ); // Qx
847 897450 : yr = L_sub( Mult_32_16( L_shl( xr, 1 ), wkr ), Mult_32_16( L_shl( xi, 1 ), wki ) ); // Qx
848 897450 : yi = L_add( Mult_32_16( L_shl( xi, 1 ), wkr ), Mult_32_16( L_shl( xr, 1 ), wki ) ); // Qx
849 897450 : a[j] = L_sub( a[j], yr );
850 897450 : move32();
851 897450 : a[j + 1] = L_sub( a[j + 1], yi );
852 897450 : move32();
853 897450 : a[k] = L_add( a[k], yr );
854 897450 : move32();
855 897450 : a[k + 1] = L_sub( a[k + 1], yi );
856 897450 : move32();
857 : }
858 59830 : }
859 :
860 :
861 32770 : static void rftbsub_fx(
862 : Word16 n,
863 : Word32 *a, // Qx
864 : Word16 nc,
865 : const Word16 *c /*Q14*/ )
866 : {
867 : Word16 j, k, kk, ks, m, tmp;
868 : Word32 xr, xi, yr, yi;
869 : Word16 wkr, wki;
870 :
871 32770 : a[1] = L_negate( a[1] );
872 32770 : m = shr( n, 1 );
873 : /*ks = 2 * nc / m; */
874 32770 : tmp = shl( nc, 1 );
875 32770 : ks = 0;
876 32770 : move16();
877 163850 : WHILE( ( tmp >= m ) )
878 : {
879 131080 : ks = add( ks, 1 );
880 131080 : tmp = sub( tmp, m );
881 : }
882 32770 : kk = 0;
883 32770 : move16();
884 524320 : FOR( j = 2; j < m; j += 2 )
885 : {
886 491550 : k = sub( n, j );
887 491550 : kk = add( kk, ks );
888 491550 : wkr = sub( 8192 /*0.5.Q14*/, c[( nc - kk )] ); // Q14
889 491550 : wki = c[kk]; // Q14
890 491550 : move16();
891 491550 : xr = L_sub( a[j], a[k] ); // Qx
892 491550 : xi = L_add( a[j + 1], a[k + 1] ); // Qx
893 491550 : yr = L_add( Mult_32_16( L_shl( xr, 1 ), wkr ), Mult_32_16( L_shl( xi, 1 ), wki ) ); // Qx
894 491550 : yi = L_sub( Mult_32_16( L_shl( xi, 1 ), wkr ), Mult_32_16( L_shl( xr, 1 ), wki ) ); // Qx
895 491550 : a[j] = L_sub( a[j], yr );
896 491550 : move32();
897 491550 : a[j + 1] = L_sub( yi, a[j + 1] );
898 491550 : move32();
899 491550 : a[k] = L_add( a[k], yr );
900 491550 : move32();
901 491550 : a[k + 1] = L_sub( yi, a[k + 1] );
902 491550 : move32();
903 : }
904 32770 : a[m + 1] = L_negate( a[m + 1] );
905 32770 : move32();
906 32770 : }
907 :
908 :
909 92600 : static void dctsub_fx(
910 : Word16 n,
911 : Word32 *a, // Qx
912 : Word16 nc,
913 : const Word16 *c /*Q14*/ )
914 : {
915 : Word16 j, k, kk, ks, m, tmp;
916 : Word16 wkr, wki;
917 : Word32 xr;
918 :
919 92600 : m = shr( n, 1 );
920 : /*ks = nc / n; */
921 92600 : tmp = nc;
922 92600 : move16();
923 92600 : ks = 0;
924 92600 : move16();
925 185200 : WHILE( ( tmp >= n ) )
926 : {
927 92600 : ks = add( ks, 1 );
928 92600 : tmp = sub( tmp, n );
929 : }
930 92600 : kk = 0;
931 92600 : move16();
932 2963200 : FOR( j = 1; j < m; j++ )
933 : {
934 2870600 : k = sub( n, j );
935 2870600 : kk = add( kk, ks );
936 2870600 : wkr = sub( c[kk], c[( nc - kk )] ); // Q14
937 2870600 : wki = add( c[kk], c[( nc - kk )] ); // Q14
938 2870600 : xr = L_sub( Mult_32_16( L_shl( a[j], 1 ), wki ), Mult_32_16( L_shl( a[k], 1 ), wkr ) ); // Qx
939 2870600 : a[j] = L_add( Mult_32_16( L_shl( a[j], 1 ), wkr ), Mult_32_16( L_shl( a[k], 1 ), wki ) ); // Qx
940 2870600 : move32();
941 2870600 : a[k] = xr;
942 2870600 : move32();
943 : }
944 92600 : a[m] = Mult_32_16( L_shl( a[m], 1 ), c[0] ); // Qx
945 92600 : move16();
946 92600 : }
947 :
948 : /*-----------------------------------------------------------------*
949 : * edct2_fx()
950 : *
951 : * Transformation of the signal to DCT domain
952 : * OR Inverse EDCT-II for short frames
953 : *-----------------------------------------------------------------*/
954 :
955 92600 : void edct2_fx(
956 : Word16 n,
957 : Word16 isgn,
958 : Word16 *in, // Q(q)
959 : Word32 *a, // Qx
960 : Word16 *q,
961 : const Word16 *ip,
962 : const Word16 *w /*Q14*/ )
963 : {
964 : Word16 j, nw, nc;
965 : Word32 xr;
966 :
967 92600 : *q = Exp16Array( n, in );
968 92600 : move16();
969 92600 : *q = add( *q, 6 );
970 92600 : move16();
971 6019000 : FOR( j = 0; j < n; j++ )
972 : {
973 5926400 : a[j] = L_shl( (Word32) in[j], *q );
974 5926400 : move32();
975 : }
976 :
977 92600 : nw = ip[0];
978 92600 : move16();
979 92600 : if ( GT_16( n, shl( nw, 2 ) ) )
980 : {
981 0 : nw = shr( n, 2 );
982 : }
983 :
984 92600 : nc = ip[1];
985 92600 : move16();
986 92600 : if ( GT_16( n, nc ) )
987 : {
988 0 : nc = n;
989 0 : move16();
990 : }
991 :
992 92600 : IF( isgn < 0 )
993 : {
994 32770 : xr = a[n - 1];
995 32770 : move32();
996 1048640 : FOR( j = n - 2; j >= 2; j -= 2 )
997 : {
998 1015870 : a[j + 1] = L_sub( a[j], a[j - 1] );
999 1015870 : move32();
1000 1015870 : a[j] = L_add( a[j], a[j - 1] );
1001 1015870 : move32();
1002 : }
1003 32770 : a[1] = L_sub( a[0], xr );
1004 32770 : move32();
1005 32770 : a[0] = L_add( a[0], xr );
1006 32770 : move32();
1007 :
1008 32770 : IF( GT_16( n, 4 ) )
1009 : {
1010 32770 : rftbsub_fx( n, a, nc, w + nw );
1011 32770 : bitrv2_SR_fx( n, ip + 2, a );
1012 32770 : cftbsub_fx( n, a, w );
1013 : }
1014 0 : ELSE IF( EQ_16( n, 4 ) )
1015 : {
1016 0 : cftfsub_fx( n, a, w );
1017 : }
1018 : }
1019 :
1020 92600 : IF( isgn >= 0 )
1021 : {
1022 59830 : a[0] = L_shr( a[0], 1 );
1023 59830 : move32();
1024 : }
1025 :
1026 92600 : dctsub_fx( n, a, nc, w + nw );
1027 :
1028 92600 : IF( isgn >= 0 )
1029 : {
1030 59830 : IF( GT_16( n, 4 ) )
1031 : {
1032 59830 : bitrv2_SR_fx( n, ip + 2, a );
1033 59830 : cftfsub_fx( n, a, w );
1034 59830 : rftfsub_fx( n, a, nc, w + nw );
1035 : }
1036 0 : ELSE IF( EQ_16( n, 4 ) )
1037 : {
1038 0 : cftfsub_fx( n, a, w );
1039 : }
1040 59830 : xr = L_sub( a[0], a[1] );
1041 59830 : a[0] = L_add( a[0], a[1] );
1042 59830 : move32();
1043 1914560 : FOR( j = 2; j < n; j += 2 )
1044 : {
1045 1854730 : a[j - 1] = L_sub( a[j], a[j + 1] );
1046 1854730 : move32();
1047 1854730 : a[j] = L_add( a[j], a[j + 1] );
1048 1854730 : move32();
1049 : }
1050 59830 : a[n - 1] = xr;
1051 59830 : move32();
1052 :
1053 3888950 : FOR( j = 0; j < n; j++ )
1054 : {
1055 3829120 : a[j] = L_shr( a[j], 5 ); // a[j] / 32.0f
1056 3829120 : move32();
1057 : }
1058 : }
1059 92600 : }
1060 :
1061 :
1062 : /*-----------------------------------------------------------------*
1063 : * fft5_shift4()
1064 : * 5-point FFT with 4-point circular shift
1065 : *-----------------------------------------------------------------*/
1066 :
1067 1156736 : static void fft5_shift4_16fx(
1068 : Word16 n1, /* i : length of data */
1069 : Word16 *zRe, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1070 : Word16 *zIm, /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1071 : const Word16 *Idx /* i : pointer of the address table Q0 */
1072 : )
1073 : {
1074 : Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn, T2, T3, T4, T5, T6, T7;
1075 : Word16 i0, i1, i2, i3, i4;
1076 : Word32 L_tmp;
1077 :
1078 :
1079 1156736 : i0 = Idx[0];
1080 1156736 : move16();
1081 1156736 : i1 = Idx[n1];
1082 1156736 : move16();
1083 1156736 : i2 = Idx[n1 * 2];
1084 1156736 : move16();
1085 1156736 : i3 = Idx[n1 * 3];
1086 1156736 : move16();
1087 1156736 : i4 = Idx[n1 * 4];
1088 1156736 : move16();
1089 :
1090 1156736 : T1 = zRe[i0]; // Qx
1091 1156736 : move16();
1092 1156736 : To = zIm[i0]; // Qx
1093 1156736 : move16();
1094 :
1095 1156736 : T2 = zRe[i1];
1096 1156736 : move16();
1097 1156736 : T3 = zRe[i4];
1098 1156736 : move16();
1099 1156736 : T4 = add_sat( T2, T3 );
1100 1156736 : T5 = zRe[i2];
1101 1156736 : move16();
1102 1156736 : T6 = zRe[i3];
1103 1156736 : move16();
1104 1156736 : T7 = add_sat( T5, T6 );
1105 1156736 : T8 = add_sat( T4, T7 );
1106 1156736 : Tt = sub_sat( T5, T6 );
1107 : /* T9 = KP559016994 * (T4 - T7); */
1108 1156736 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1109 1156736 : T9 = round_fx_sat( L_tmp ); // Qx
1110 1156736 : Ts = sub_sat( T2, T3 );
1111 :
1112 1156736 : T2 = zIm[i1];
1113 1156736 : move16();
1114 1156736 : T3 = zIm[i4];
1115 1156736 : move16();
1116 1156736 : T4 = add( T2, T3 );
1117 1156736 : T5 = zIm[i2];
1118 1156736 : move16();
1119 1156736 : T6 = zIm[i3];
1120 1156736 : move16();
1121 1156736 : T7 = add_sat( T5, T6 );
1122 1156736 : Te = sub_sat( T2, T3 );
1123 1156736 : Tp = add_sat( T4, T7 );
1124 1156736 : Th = sub_sat( T5, T6 );
1125 :
1126 : /* Tn = KP559016994 * (T4 - T7); */
1127 1156736 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1128 1156736 : Tn = round_fx_sat( L_tmp ); // Qx
1129 1156736 : zRe[i0] = add_sat( T1, T8 );
1130 1156736 : move16();
1131 1156736 : zIm[i0] = add_sat( To, Tp );
1132 1156736 : move16();
1133 :
1134 : /* T2 = KP951056516*Te + KP587785252*Th; */
1135 1156736 : L_tmp = Mult_32_16( KP951056516_16FX, Te ); // Q(16 +x)
1136 1156736 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Th ); // Q(16 +x)
1137 1156736 : T2 = round_fx_sat( L_tmp ); // Qx
1138 : /*T3 = KP951056516*Th - KP587785252*Te; */
1139 1156736 : L_tmp = Mult_32_16( KP951056516_16FX, Th ); // Q(16 +x)
1140 1156736 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Te ); // Q(16 +x)
1141 1156736 : T3 = round_fx_sat( L_tmp ); // Qx
1142 1156736 : T6 = sub_sat( T1, shr_sat( T8, 2 ) );
1143 1156736 : T4 = add_sat( T9, T6 );
1144 1156736 : T5 = sub_sat( T6, T9 );
1145 1156736 : zRe[i1] = sub_sat( T4, T2 );
1146 1156736 : move16();
1147 1156736 : zRe[i2] = add_sat( T5, T3 );
1148 1156736 : move16();
1149 1156736 : zRe[i4] = add_sat( T4, T2 );
1150 1156736 : move16();
1151 1156736 : zRe[i3] = sub_sat( T5, T3 );
1152 1156736 : move16();
1153 :
1154 : /* T2 = KP951056516 * Ts + KP587785252 * Tt; */
1155 1156736 : L_tmp = Mult_32_16( KP951056516_16FX, Ts ); // Q(16 +x)
1156 1156736 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Tt ); // Q(16 +x)
1157 1156736 : T2 = round_fx_sat( L_tmp ); // Qx
1158 : /* T3 = KP951056516 * Tt - KP587785252 * Ts; */
1159 1156736 : L_tmp = Mult_32_16( KP951056516_16FX, Tt ); // Q(16 +x)
1160 1156736 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Ts ); // Q(16 +x)
1161 1156736 : T3 = round_fx_sat( L_tmp ); // Qx
1162 1156736 : T6 = sub_sat( To, shr( Tp, 2 ) ); // To - (Tp / 4)
1163 1156736 : T4 = add_sat( Tn, T6 );
1164 1156736 : T5 = sub_sat( T6, Tn );
1165 1156736 : zIm[i4] = sub_sat( T4, T2 );
1166 1156736 : move16();
1167 1156736 : zIm[i2] = sub_sat( T5, T3 );
1168 1156736 : move16();
1169 1156736 : zIm[i1] = add_sat( T2, T4 );
1170 1156736 : move16();
1171 1156736 : zIm[i3] = add_sat( T3, T5 );
1172 1156736 : move16();
1173 1156736 : return;
1174 : }
1175 :
1176 : /*-----------------------------------------------------------------*
1177 : * fft5_32()
1178 : * 5-point FFT called for 32 times
1179 : *-----------------------------------------------------------------*/
1180 2821760 : static void fft5_32_16fx(
1181 : Word16 *zRe, /* i/o : real part of input and output data Qx */
1182 : Word16 *zIm, /* i/o : imaginary part of input and output data Qx */
1183 : const Word16 *Idx /* i : pointer of the address table Q0 */
1184 : )
1185 : {
1186 : Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn, T2, T3, T4, T5, T6, T7;
1187 : Word16 i0, i1, i2, i3, i4;
1188 : Word32 L_tmp;
1189 : #ifndef ISSUE_1836_replace_overflow_libcom
1190 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1191 : Flag Overflow = 0;
1192 : move32();
1193 : #endif
1194 : #endif
1195 2821760 : i0 = Idx[0];
1196 2821760 : move16();
1197 2821760 : i1 = Idx[32];
1198 2821760 : move16();
1199 2821760 : i2 = Idx[64];
1200 2821760 : move16();
1201 2821760 : i3 = Idx[96];
1202 2821760 : move16();
1203 2821760 : i4 = Idx[128];
1204 2821760 : move16();
1205 :
1206 2821760 : T1 = zRe[i0]; // Qx
1207 2821760 : move16();
1208 2821760 : To = zIm[i0]; // Qx
1209 2821760 : move16();
1210 :
1211 2821760 : T2 = zRe[i1]; // Qx
1212 2821760 : move16();
1213 2821760 : T3 = zRe[i4]; // Qx
1214 2821760 : move16();
1215 :
1216 2821760 : T4 = add_sat( T2, T3 );
1217 2821760 : T5 = zRe[i2];
1218 2821760 : move16();
1219 2821760 : T6 = zRe[i3];
1220 2821760 : move16();
1221 : #ifdef ISSUE_1836_replace_overflow_libcom
1222 2821760 : T7 = add_sat( T5, T6 );
1223 2821760 : T8 = add_sat( T4, T7 );
1224 2821760 : Tt = sub_sat( T5, T6 );
1225 : #else
1226 : T7 = add_o( T5, T6, &Overflow );
1227 : T8 = add_o( T4, T7, &Overflow );
1228 : Tt = sub_o( T5, T6, &Overflow );
1229 : #endif
1230 : /* T9 = KP559016994 * (T4 - T7); */
1231 2821760 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1232 2821760 : T9 = round_fx_sat( L_tmp ); // Qx
1233 2821760 : Ts = sub_sat( T2, T3 );
1234 :
1235 2821760 : T2 = zIm[i1];
1236 2821760 : move16();
1237 2821760 : T3 = zIm[i4];
1238 2821760 : move16();
1239 2821760 : T4 = add_sat( T2, T3 );
1240 2821760 : T5 = zIm[i2];
1241 2821760 : move16();
1242 2821760 : T6 = zIm[i3];
1243 2821760 : move16();
1244 2821760 : T7 = add_sat( T5, T6 );
1245 2821760 : Te = sub_sat( T2, T3 );
1246 2821760 : Tp = add_sat( T4, T7 );
1247 2821760 : Th = sub_sat( T5, T6 );
1248 2821760 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1249 2821760 : Tn = round_fx_sat( L_tmp ); // Qx
1250 :
1251 : #ifdef ISSUE_1836_replace_overflow_libcom
1252 2821760 : zRe[i0] = add_sat( T1, T8 );
1253 2821760 : move16();
1254 2821760 : zIm[i0] = add_sat( To, Tp );
1255 2821760 : move32();
1256 : #else
1257 : zRe[i0] = add_o( T1, T8, &Overflow );
1258 : move16();
1259 : zIm[i0] = add_o( To, Tp, &Overflow );
1260 : move32();
1261 : #endif
1262 :
1263 : /*T2 = KP951056516*Te + KP587785252*Th; */
1264 2821760 : L_tmp = Mult_32_16( KP951056516_16FX, Te ); // Q(16 +x)
1265 2821760 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Th ); // Q(16 +x)
1266 2821760 : T2 = round_fx_sat( L_tmp ); // Qx
1267 :
1268 : /*T3 = KP951056516*Th - KP587785252*Te; */
1269 2821760 : L_tmp = Mult_32_16( KP951056516_16FX, Th ); // Q(16 +x)
1270 2821760 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Te ); // Q(16 +x)
1271 2821760 : T3 = round_fx_sat( L_tmp ); // Qx
1272 :
1273 :
1274 2821760 : T6 = sub_sat( T1, shr( T8, 2 ) );
1275 2821760 : T4 = add_sat( T9, T6 );
1276 2821760 : T5 = sub_sat( T6, T9 );
1277 :
1278 : #ifdef ISSUE_1836_replace_overflow_libcom
1279 2821760 : zRe[i3] = sub_sat( T4, T2 );
1280 2821760 : move32();
1281 2821760 : zRe[i1] = add_sat( T5, T3 );
1282 2821760 : move32();
1283 2821760 : zRe[i2] = add_sat( T4, T2 );
1284 2821760 : move32();
1285 2821760 : zRe[i4] = sub_sat( T5, T3 );
1286 2821760 : move32();
1287 : #else
1288 : zRe[i3] = sub_o( T4, T2, &Overflow );
1289 : move32();
1290 : zRe[i1] = add_o( T5, T3, &Overflow );
1291 : move32();
1292 : zRe[i2] = add_o( T4, T2, &Overflow );
1293 : move32();
1294 : zRe[i4] = sub_o( T5, T3, &Overflow );
1295 : move32();
1296 : #endif
1297 :
1298 : /* T2 = KP951056516 * Ts + KP587785252 * Tt; */
1299 2821760 : L_tmp = Mult_32_16( KP951056516_16FX, Ts ); // Q(16 +x)
1300 2821760 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Tt ); // Q(16 +x)
1301 2821760 : T2 = round_fx_sat( L_tmp ); // Qx
1302 :
1303 : /* T3 = KP951056516 * Tt - KP587785252 * Ts; */
1304 2821760 : L_tmp = Mult_32_16( KP951056516_16FX, Tt ); // Q(16 +x)
1305 2821760 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Ts ); // Q(16 +x)
1306 :
1307 2821760 : T3 = round_fx_sat( L_tmp ); // Qx
1308 :
1309 2821760 : T6 = sub_sat( To, shr( Tp, 2 ) );
1310 2821760 : T4 = add_sat( Tn, T6 );
1311 2821760 : T5 = sub_sat( T6, Tn );
1312 2821760 : zIm[i2] = sub_sat( T4, T2 );
1313 2821760 : move16();
1314 2821760 : zIm[i1] = sub_sat( T5, T3 );
1315 2821760 : move16();
1316 2821760 : zIm[i3] = add_sat( T2, T4 );
1317 2821760 : move16();
1318 2821760 : zIm[i4] = add_sat( T3, T5 );
1319 2821760 : move16();
1320 :
1321 2821760 : return;
1322 : }
1323 :
1324 : /*-----------------------------------------------------------------*
1325 : * fft64()
1326 : * 64-point FFT
1327 : *-----------------------------------------------------------------*/
1328 90370 : static void fft64_16fx(
1329 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1330 : Word16 *y, /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1331 : const Word16 *Idx /* i : pointer of the address table Q0 */
1332 : )
1333 : {
1334 : Word16 i, id, jd;
1335 : Word16 z[128];
1336 90370 : move16(); /*penalty for 1 ptr init */
1337 5874050 : FOR( i = 0; i < 64; i++ )
1338 : {
1339 5783680 : id = Idx[i];
1340 5783680 : move16();
1341 5783680 : z[2 * i] = x[id];
1342 5783680 : move16();
1343 5783680 : z[2 * i + 1] = y[id];
1344 5783680 : move16();
1345 : }
1346 :
1347 90370 : cdftForw_16fx( 128, z, Ip_fft64, w_fft128_16fx );
1348 :
1349 90370 : move16(); /*penalty for 1 ptr init */
1350 5874050 : FOR( i = 0; i < 64; i++ )
1351 : {
1352 5783680 : jd = Odx_fft64[i];
1353 5783680 : move16();
1354 5783680 : id = Idx[jd];
1355 5783680 : move16();
1356 5783680 : x[id] = z[2 * i];
1357 5783680 : move16();
1358 5783680 : y[id] = z[2 * i + 1];
1359 5783680 : move16();
1360 : }
1361 :
1362 90370 : return;
1363 : }
1364 :
1365 :
1366 : /*-----------------------------------------------------------------*
1367 : * fft32_5()
1368 : * 32-point FFT called for 5 times
1369 : *-----------------------------------------------------------------*/
1370 440900 : static void fft32_5_16fx(
1371 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1372 : Word16 *y, /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1373 : const Word16 *Idx /* i : pointer of the address table */
1374 : )
1375 : {
1376 : Word16 i, id, jd;
1377 : Word16 z[64];
1378 :
1379 14549700 : FOR( i = 0; i < 32; i++ )
1380 : {
1381 14108800 : id = Idx[i];
1382 14108800 : move16();
1383 14108800 : z[2 * i] = x[id];
1384 14108800 : move16();
1385 14108800 : z[2 * i + 1] = y[id];
1386 14108800 : move16();
1387 : }
1388 :
1389 440900 : cdftForw_16fx( 64, z, Ip_fft32, w_fft32_16fx );
1390 :
1391 14549700 : FOR( i = 0; i < 32; i++ )
1392 : {
1393 14108800 : jd = Odx_fft32_5[i];
1394 14108800 : move16();
1395 14108800 : id = Idx[jd];
1396 14108800 : move16();
1397 14108800 : x[id] = z[2 * i];
1398 14108800 : move16();
1399 14108800 : y[id] = z[2 * i + 1];
1400 14108800 : move16();
1401 : }
1402 :
1403 440900 : return;
1404 : }
1405 :
1406 :
1407 : /*-----------------------------------------------------------------*
1408 : * DoRTFT160()
1409 : * a low complexity 2-dimensional DFT of 160 points
1410 : *-----------------------------------------------------------------*/
1411 88180 : void DoRTFT160_16fx(
1412 : Word16 x[], /* i/o : real part of input and output data Q(Qx+Q_edct) */
1413 : Word16 y[] /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1414 : )
1415 : {
1416 : Word16 j;
1417 :
1418 : /* Applying 32-point FFT for 5 times based on the address table Idx_dortft160 */
1419 529080 : FOR( j = 0; j < 5; j++ )
1420 : {
1421 440900 : fft32_5_16fx( x, y, Idx_dortft160 + shl( j, 5 ) /*32*j*/ );
1422 : }
1423 :
1424 : /* Applying 5-point FFT for 32 times based on the address table Idx_dortft160 */
1425 2909940 : FOR( j = 0; j < 32; j++ )
1426 : {
1427 2821760 : fft5_32_16fx( x, y, Idx_dortft160 + j );
1428 : }
1429 :
1430 88180 : return;
1431 : }
1432 :
1433 : /*-----------------------------------------------------------------*
1434 : * DoRTFT320()
1435 : * a low complexity 2-dimensional DFT of 320 points
1436 : *-----------------------------------------------------------------*/
1437 18074 : void DoRTFT320_16fx(
1438 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1439 : Word16 *y /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1440 : )
1441 : {
1442 : Word16 j;
1443 :
1444 : /* Applying 64-point FFT for 5 times based on the address table Idx_dortft160 */
1445 108444 : FOR( j = 0; j < 5; j++ )
1446 : {
1447 90370 : fft64_16fx( x, y, Idx_dortft320 + shl( j, 6 ) /*64*j*/ );
1448 : }
1449 :
1450 : /* Applying 5-point FFT for 64 times based on the address table Idx_dortft160 */
1451 1174810 : FOR( j = 0; j < 64; j++ )
1452 : {
1453 1156736 : fft5_shift4_16fx( 64, x, y, Idx_dortft320 + j );
1454 : }
1455 :
1456 18074 : return;
1457 : }
1458 :
1459 : /*-----------------------------------------------------------------*
1460 : * DoRTFT128()
1461 : * FFT with 128 points
1462 : *-----------------------------------------------------------------*/
1463 193308 : void DoRTFT128_16fx(
1464 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct)*/
1465 : Word16 *y /* i/o : imaginary part of input and output data Q(Qx+Q_edct)*/
1466 : )
1467 : {
1468 :
1469 : Word16 i;
1470 : Word16 z[256];
1471 :
1472 24936732 : FOR( i = 0; i < 128; i++ )
1473 : {
1474 24743424 : z[2 * i] = x[i];
1475 24743424 : move16();
1476 24743424 : z[2 * i + 1] = y[i];
1477 24743424 : move16();
1478 : }
1479 :
1480 193308 : cdftForw_16fx( 256, z, Ip_fft128, w_fft128_16fx );
1481 :
1482 193308 : x[0] = z[0];
1483 193308 : move16();
1484 193308 : y[0] = z[1];
1485 193308 : move16();
1486 24743424 : FOR( i = 1; i < 128; i++ )
1487 : {
1488 24550116 : x[128 - i] = z[2 * i];
1489 24550116 : move16();
1490 24550116 : y[128 - i] = z[2 * i + 1];
1491 24550116 : move16();
1492 : }
1493 :
1494 193308 : return;
1495 : }
1496 : /*-----------------------------------------------------------------*
1497 : * cdftForw()
1498 : * Main fuction of Complex Discrete Fourier Transform
1499 : *-----------------------------------------------------------------*/
1500 724578 : static void cdftForw_16fx(
1501 : Word16 n, /* i : data length of real and imag */
1502 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
1503 : const Word16 *ip, /* i : work area for bit reversal */
1504 : const Word32 *w /* i : cos/sin table Q30*/
1505 : )
1506 : {
1507 : /* bit reversal */
1508 724578 : bitrv2_SR_16fx( n, ip + 2, a );
1509 :
1510 : /* Do FFT */
1511 724578 : cftfsub_16fx( n, a, w );
1512 724578 : }
1513 :
1514 : /*-----------------------------------------------------------------*
1515 : * bitrv2_SR()
1516 : * Bit reversal
1517 : *-----------------------------------------------------------------*/
1518 724578 : static void bitrv2_SR_16fx(
1519 : Word16 n, /* i : data length of real and imag */
1520 : const Word16 *ip, /* i/o : work area for bit reversal */
1521 : Word16 *a /* i/o : input/output data Q(Qx+Q_edct)*/
1522 : )
1523 : {
1524 : Word16 j, j1, k, k1, m, m2;
1525 : Word16 l;
1526 : Word16 xr, xi, yr, yi;
1527 :
1528 724578 : l = n;
1529 724578 : move16();
1530 724578 : m = 1;
1531 724578 : move16();
1532 :
1533 2367042 : WHILE( ( ( m << 3 ) < l ) )
1534 : {
1535 1642464 : l = shr( l, 1 );
1536 1642464 : m = shl( m, 1 );
1537 : }
1538 :
1539 724578 : m2 = shl( m, 1 );
1540 724578 : IF( EQ_16( shl( m, 3 ), l ) )
1541 : {
1542 451850 : FOR( k = 0; k < m; k++ )
1543 : {
1544 903700 : FOR( j = 0; j < k; j++ )
1545 : {
1546 542220 : j1 = add( shl( j, 1 ), ip[k] );
1547 542220 : k1 = add( shl( k, 1 ), ip[j] );
1548 542220 : xr = a[j1];
1549 542220 : move16();
1550 542220 : xi = a[j1 + 1];
1551 542220 : move16();
1552 542220 : yr = a[k1];
1553 542220 : move16();
1554 542220 : yi = a[k1 + 1];
1555 542220 : move16();
1556 542220 : a[j1] = yr;
1557 542220 : move16();
1558 542220 : a[j1 + 1] = yi;
1559 542220 : move16();
1560 542220 : a[k1] = xr;
1561 542220 : move16();
1562 542220 : a[k1 + 1] = xi;
1563 542220 : move16();
1564 542220 : j1 = add( j1, m2 );
1565 542220 : k1 = add( k1, shl( m2, 1 ) );
1566 542220 : xr = a[j1];
1567 542220 : move16();
1568 542220 : xi = a[j1 + 1];
1569 542220 : move16();
1570 542220 : yr = a[k1];
1571 542220 : move16();
1572 542220 : yi = a[k1 + 1];
1573 542220 : move16();
1574 542220 : a[j1] = yr;
1575 542220 : move16();
1576 542220 : a[j1 + 1] = yi;
1577 542220 : move16();
1578 542220 : a[k1] = xr;
1579 542220 : move16();
1580 542220 : a[k1 + 1] = xi;
1581 542220 : move16();
1582 542220 : j1 = add( j1, m2 );
1583 542220 : k1 = sub( k1, m2 );
1584 542220 : xr = a[j1];
1585 542220 : move16();
1586 542220 : xi = a[j1 + 1];
1587 542220 : move16();
1588 542220 : xi = a[j1 + 1];
1589 542220 : move16();
1590 542220 : yr = a[k1];
1591 542220 : move16();
1592 542220 : yi = a[k1 + 1];
1593 542220 : move16();
1594 542220 : a[j1] = yr;
1595 542220 : move16();
1596 542220 : a[j1 + 1] = yi;
1597 542220 : move16();
1598 542220 : a[k1] = xr;
1599 542220 : move16();
1600 542220 : a[k1 + 1] = xi;
1601 542220 : move16();
1602 542220 : j1 = add( j1, m2 );
1603 542220 : k1 = add( k1, shl( m2, 1 ) );
1604 542220 : xr = a[j1];
1605 542220 : move16();
1606 542220 : xi = a[j1 + 1];
1607 542220 : move16();
1608 542220 : yr = a[k1];
1609 542220 : move16();
1610 542220 : yi = a[k1 + 1];
1611 542220 : move16();
1612 542220 : a[j1] = yr;
1613 542220 : move16();
1614 542220 : a[j1 + 1] = yi;
1615 542220 : move16();
1616 542220 : a[k1] = xr;
1617 542220 : move16();
1618 542220 : a[k1 + 1] = xi;
1619 542220 : move16();
1620 : }
1621 :
1622 361480 : j1 = add( add( shl( k, 1 ), m2 ), ip[k] );
1623 361480 : k1 = add( j1, m2 );
1624 361480 : xr = a[j1];
1625 361480 : move16();
1626 361480 : xi = a[j1 + 1];
1627 361480 : move16();
1628 361480 : yr = a[k1];
1629 361480 : move16();
1630 361480 : yi = a[k1 + 1];
1631 361480 : move16();
1632 361480 : a[j1] = yr;
1633 361480 : move16();
1634 361480 : a[j1 + 1] = yi;
1635 361480 : move16();
1636 361480 : a[k1] = xr;
1637 361480 : move16();
1638 361480 : a[k1 + 1] = xi;
1639 361480 : move16();
1640 : }
1641 : }
1642 : ELSE
1643 : {
1644 3310064 : FOR( k = 1; k < m; k++ )
1645 : {
1646 10733880 : FOR( j = 0; j < k; j++ )
1647 : {
1648 8058024 : j1 = add( shl( j, 1 ), ip[k] );
1649 8058024 : k1 = add( shl( k, 1 ), ip[j] );
1650 8058024 : xr = a[j1];
1651 8058024 : move16();
1652 8058024 : xi = a[j1 + 1];
1653 8058024 : move16();
1654 8058024 : yr = a[k1];
1655 8058024 : move16();
1656 8058024 : yi = a[k1 + 1];
1657 8058024 : move16();
1658 8058024 : a[j1] = yr;
1659 8058024 : move16();
1660 8058024 : a[j1 + 1] = yi;
1661 8058024 : move16();
1662 8058024 : a[k1] = xr;
1663 8058024 : move16();
1664 8058024 : a[k1 + 1] = xi;
1665 8058024 : move16();
1666 8058024 : j1 = add( j1, m2 );
1667 8058024 : k1 = add( k1, m2 );
1668 8058024 : xr = a[j1];
1669 8058024 : move16();
1670 8058024 : xi = a[j1 + 1];
1671 8058024 : move16();
1672 8058024 : yr = a[k1];
1673 8058024 : move16();
1674 8058024 : yi = a[k1 + 1];
1675 8058024 : move16();
1676 8058024 : a[j1] = yr;
1677 8058024 : move16();
1678 8058024 : a[j1 + 1] = yi;
1679 8058024 : move16();
1680 8058024 : a[k1] = xr;
1681 8058024 : move16();
1682 8058024 : a[k1 + 1] = xi;
1683 8058024 : move16();
1684 : }
1685 : }
1686 : }
1687 :
1688 724578 : return;
1689 : }
1690 :
1691 : /*-----------------------------------------------------------------*
1692 : * cftfsub()
1693 : * Complex Discrete Fourier Transform
1694 : *-----------------------------------------------------------------*/
1695 724578 : static void cftfsub_16fx(
1696 : Word16 n, /* i : data length of real and imag */
1697 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
1698 : const Word32 *w /* i : cos/sin table Q30*/
1699 : )
1700 : {
1701 : Word16 j, j1, j2, j3, l;
1702 : Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1703 : #ifndef ISSUE_1836_replace_overflow_libcom
1704 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1705 : Flag Overflow = 0;
1706 : move32();
1707 : #endif
1708 : #endif
1709 :
1710 724578 : l = 2;
1711 724578 : move16();
1712 724578 : IF( GT_16( n, 8 ) )
1713 : {
1714 724578 : cft1st_16fx( n, a, w );
1715 724578 : l = 8;
1716 724578 : move16();
1717 1642464 : WHILE( ( ( l << 2 ) < n ) )
1718 : {
1719 917886 : cftmdl_16fx( n, l, a, w );
1720 917886 : l = shl( l, 2 );
1721 : }
1722 : }
1723 :
1724 724578 : IF( EQ_16( shl( l, 2 ), n ) )
1725 : {
1726 1536290 : FOR( j = 0; j < l; j += 2 )
1727 : {
1728 1445920 : j1 = add( j, l );
1729 1445920 : j2 = add( j1, l );
1730 1445920 : j3 = add( j2, l );
1731 1445920 : x0r = add( a[j], a[j1] );
1732 1445920 : x0i = add( a[j + 1], a[j1 + 1] );
1733 1445920 : x1r = sub( a[j], a[j1] );
1734 1445920 : x1i = sub( a[j + 1], a[j1 + 1] );
1735 1445920 : x2r = add( a[j2], a[j3] );
1736 1445920 : x2i = add( a[j2 + 1], a[j3 + 1] );
1737 1445920 : x3r = sub( a[j2], a[j3] );
1738 1445920 : x3i = sub( a[j2 + 1], a[j3 + 1] );
1739 1445920 : a[j] = add( x0r, x2r );
1740 1445920 : move16();
1741 1445920 : a[j + 1] = add( x0i, x2i );
1742 1445920 : move16();
1743 1445920 : a[j2] = sub( x0r, x2r );
1744 1445920 : move16();
1745 1445920 : a[j2 + 1] = sub( x0i, x2i );
1746 1445920 : move16();
1747 1445920 : a[j1] = sub( x1r, x3i );
1748 1445920 : move16();
1749 1445920 : a[j1 + 1] = add( x1i, x3r );
1750 1445920 : move16();
1751 1445920 : a[j3] = add( x1r, x3i );
1752 1445920 : move16();
1753 1445920 : a[j3 + 1] = sub( x1i, x3r );
1754 1445920 : move16();
1755 : }
1756 : }
1757 : ELSE
1758 : {
1759 20060320 : FOR( j = 0; j < l; j += 2 )
1760 : {
1761 : #ifdef ISSUE_1836_replace_overflow_libcom
1762 19426112 : j1 = add_sat( j, l );
1763 19426112 : x0r = sub_sat( a[j], a[j1] );
1764 19426112 : x0i = sub_sat( a[j + 1], a[j1 + 1] );
1765 19426112 : a[j] = add_sat( a[j], a[j1] );
1766 19426112 : move16();
1767 19426112 : a[j + 1] = add_sat( a[j + 1], a[j1 + 1] );
1768 19426112 : move16();
1769 : #else
1770 : j1 = add_o( j, l, &Overflow );
1771 : x0r = sub_o( a[j], a[j1], &Overflow );
1772 : x0i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
1773 : a[j] = add_o( a[j], a[j1], &Overflow );
1774 : move16();
1775 : a[j + 1] = add_o( a[j + 1], a[j1 + 1], &Overflow );
1776 : move16();
1777 : #endif
1778 19426112 : a[j1] = x0r;
1779 19426112 : move16();
1780 19426112 : a[j1 + 1] = x0i;
1781 19426112 : move16();
1782 : }
1783 : }
1784 724578 : return;
1785 : }
1786 :
1787 : /*-----------------------------------------------------------------*
1788 : * cft1st()
1789 : * Subfunction of Complex Discrete Fourier Transform
1790 : *-----------------------------------------------------------------*/
1791 724578 : static void cft1st_16fx(
1792 : Word16 n, /* i : data length of real and imag */
1793 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
1794 : const Word32 *w /* i : cos/sin table Q30*/
1795 : )
1796 : {
1797 : Word16 j, k1, k2;
1798 : Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1799 : Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1800 : Word16 tmp;
1801 : Word32 L_tmp;
1802 : #ifndef ISSUE_1836_replace_overflow_libcom
1803 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1804 : Flag Overflow = 0;
1805 : move32();
1806 : #endif
1807 : #endif
1808 :
1809 : #ifdef ISSUE_1836_replace_overflow_libcom
1810 724578 : x0r = add_sat( a[0], a[2] );
1811 724578 : x0i = add_sat( a[1], a[3] );
1812 724578 : x1r = sub_sat( a[0], a[2] );
1813 724578 : x1i = sub_sat( a[1], a[3] );
1814 724578 : x2r = add_sat( a[4], a[6] );
1815 724578 : x2i = add_sat( a[5], a[7] );
1816 724578 : x3r = sub_sat( a[4], a[6] );
1817 724578 : x3i = sub_sat( a[5], a[7] );
1818 724578 : a[0] = add_sat( x0r, x2r );
1819 724578 : move16();
1820 724578 : a[1] = add_sat( x0i, x2i );
1821 724578 : move16();
1822 724578 : a[4] = sub_sat( x0r, x2r );
1823 724578 : move16();
1824 724578 : a[5] = sub_sat( x0i, x2i );
1825 724578 : move16();
1826 724578 : a[2] = sub_sat( x1r, x3i );
1827 724578 : move16();
1828 724578 : a[3] = add_sat( x1i, x3r );
1829 724578 : move16();
1830 724578 : a[6] = add_sat( x1r, x3i );
1831 724578 : move16();
1832 724578 : a[7] = sub_sat( x1i, x3r );
1833 724578 : wk1r = w[2];
1834 724578 : move32();
1835 :
1836 724578 : x0r = add_sat( a[8], a[10] );
1837 724578 : x0i = add_sat( a[9], a[11] );
1838 724578 : x1r = sub_sat( a[8], a[10] );
1839 724578 : x1i = sub_sat( a[9], a[11] );
1840 724578 : x2r = add_sat( a[12], a[14] );
1841 724578 : x2i = add_sat( a[13], a[15] );
1842 724578 : x3r = sub_sat( a[12], a[14] );
1843 724578 : x3i = sub_sat( a[13], a[15] );
1844 724578 : a[8] = add_sat( x0r, x2r );
1845 724578 : move16();
1846 724578 : a[9] = add_sat( x0i, x2i );
1847 724578 : move16();
1848 724578 : a[12] = sub_sat( x2i, x0i );
1849 724578 : move16();
1850 724578 : a[13] = sub_sat( x0r, x2r );
1851 724578 : move16();
1852 :
1853 724578 : x0r = sub_sat( x1r, x3i );
1854 724578 : x0i = add_sat( x1i, x3r );
1855 724578 : tmp = sub_sat( x0r, x0i );
1856 724578 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1857 :
1858 724578 : a[10] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
1859 724578 : move16();
1860 :
1861 724578 : tmp = add_sat( x0r, x0i );
1862 724578 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1863 724578 : a[11] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /* Q(Qx+Q_edct)*/
1864 724578 : move16();
1865 :
1866 724578 : x0r = add_sat( x3i, x1r );
1867 724578 : x0i = sub_sat( x3r, x1i );
1868 724578 : tmp = sub_sat( x0i, x0r );
1869 724578 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1870 724578 : a[14] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
1871 724578 : move16();
1872 :
1873 724578 : tmp = add_sat( x0i, x0r );
1874 724578 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1875 724578 : a[15] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
1876 724578 : move16();
1877 724578 : k1 = 0;
1878 724578 : move16();
1879 : #else
1880 : x0r = add_o( a[0], a[2], &Overflow );
1881 : x0i = add_o( a[1], a[3], &Overflow );
1882 : x1r = sub_o( a[0], a[2], &Overflow );
1883 : x1i = sub_o( a[1], a[3], &Overflow );
1884 : x2r = add_o( a[4], a[6], &Overflow );
1885 : x2i = add_o( a[5], a[7], &Overflow );
1886 : x3r = sub_o( a[4], a[6], &Overflow );
1887 : x3i = sub_o( a[5], a[7], &Overflow );
1888 : a[0] = add_o( x0r, x2r, &Overflow );
1889 : move16();
1890 : a[1] = add_o( x0i, x2i, &Overflow );
1891 : move16();
1892 : a[4] = sub_o( x0r, x2r, &Overflow );
1893 : move16();
1894 : a[5] = sub_o( x0i, x2i, &Overflow );
1895 : move16();
1896 : a[2] = sub_o( x1r, x3i, &Overflow );
1897 : move16();
1898 : a[3] = add_o( x1i, x3r, &Overflow );
1899 : move16();
1900 : a[6] = add_o( x1r, x3i, &Overflow );
1901 : move16();
1902 : a[7] = sub_o( x1i, x3r, &Overflow );
1903 : wk1r = w[2];
1904 : move32();
1905 :
1906 : x0r = add_o( a[8], a[10], &Overflow );
1907 : x0i = add_o( a[9], a[11], &Overflow );
1908 : x1r = sub_o( a[8], a[10], &Overflow );
1909 : x1i = sub_o( a[9], a[11], &Overflow );
1910 : x2r = add_o( a[12], a[14], &Overflow );
1911 : x2i = add_o( a[13], a[15], &Overflow );
1912 : x3r = sub_o( a[12], a[14], &Overflow );
1913 : x3i = sub_o( a[13], a[15], &Overflow );
1914 : a[8] = add_o( x0r, x2r, &Overflow );
1915 : move16();
1916 : a[9] = add_o( x0i, x2i, &Overflow );
1917 : move16();
1918 : a[12] = sub_o( x2i, x0i, &Overflow );
1919 : move16();
1920 : a[13] = sub_o( x0r, x2r, &Overflow );
1921 : move16();
1922 :
1923 : x0r = sub_o( x1r, x3i, &Overflow );
1924 : x0i = add_o( x1i, x3r, &Overflow );
1925 : tmp = sub_o( x0r, x0i, &Overflow );
1926 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1927 :
1928 : a[10] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1929 : move16();
1930 :
1931 : tmp = add_o( x0r, x0i, &Overflow );
1932 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1933 : a[11] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /* Q(Qx+Q_edct) */
1934 : move16();
1935 :
1936 : x0r = add_o( x3i, x1r, &Overflow );
1937 : x0i = sub_o( x3r, x1i, &Overflow );
1938 : tmp = sub_o( x0i, x0r, &Overflow );
1939 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1940 : a[14] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1941 : move16();
1942 :
1943 : tmp = add_o( x0i, x0r, &Overflow );
1944 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1945 : a[15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1946 : move16();
1947 : k1 = 0;
1948 : move16();
1949 : #endif
1950 :
1951 5579488 : FOR( j = 16; j < n; j += 16 )
1952 : {
1953 4854910 : k1 = add( k1, 2 );
1954 4854910 : k2 = shl( k1, 1 );
1955 :
1956 4854910 : wk2r = w[k1];
1957 4854910 : move32();
1958 4854910 : wk2i = w[k1 + 1];
1959 4854910 : move32();
1960 4854910 : wk1r = w[k2];
1961 4854910 : move32();
1962 4854910 : wk1i = w[k2 + 1];
1963 4854910 : move32();
1964 :
1965 4854910 : L_tmp = L_shl( Mult_32_32( wk2i, wk1i ), 1 ); /*Q29 */
1966 4854910 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
1967 :
1968 4854910 : L_tmp = L_shl( Mult_32_32( wk2i, wk1r ), 1 ); /*Q29 */
1969 4854910 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
1970 : #ifdef ISSUE_1836_replace_overflow_libcom
1971 4854910 : x0r = add_sat( a[j], a[j + 2] );
1972 4854910 : x0i = add_sat( a[j + 1], a[j + 3] );
1973 4854910 : x1r = sub_sat( a[j], a[j + 2] );
1974 4854910 : x1i = sub_sat( a[j + 1], a[j + 3] );
1975 4854910 : x2r = add_sat( a[j + 4], a[j + 6] );
1976 4854910 : x2i = add_sat( a[j + 5], a[j + 7] );
1977 4854910 : x3r = sub_sat( a[j + 4], a[j + 6] );
1978 4854910 : x3i = sub_sat( a[j + 5], a[j + 7] );
1979 4854910 : a[j] = add_sat( x0r, x2r );
1980 4854910 : move16();
1981 4854910 : a[j + 1] = add_sat( x0i, x2i );
1982 4854910 : move16();
1983 :
1984 4854910 : x0r = sub_sat( x0r, x2r );
1985 4854910 : x0i = sub_sat( x0i, x2i );
1986 4854910 : L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */
1987 4854910 : L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */
1988 4854910 : a[j + 4] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
1989 4854910 : move16();
1990 :
1991 4854910 : L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */
1992 4854910 : L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */
1993 4854910 : a[j + 5] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
1994 4854910 : move16();
1995 :
1996 4854910 : x0r = sub_sat( x1r, x3i );
1997 4854910 : x0i = add_sat( x1i, x3r );
1998 4854910 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
1999 4854910 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2000 4854910 : a[j + 2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2001 4854910 : move16();
2002 :
2003 4854910 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2004 4854910 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2005 4854910 : a[j + 3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2006 4854910 : move16();
2007 :
2008 4854910 : x0r = add_sat( x1r, x3i );
2009 4854910 : x0i = sub_sat( x1i, x3r );
2010 4854910 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2011 4854910 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2012 4854910 : a[j + 6] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2013 4854910 : move16();
2014 :
2015 4854910 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2016 4854910 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2017 4854910 : a[j + 7] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2018 4854910 : move16();
2019 :
2020 4854910 : wk1r = w[k2 + 2];
2021 4854910 : move32();
2022 4854910 : wk1i = w[k2 + 3];
2023 4854910 : move32();
2024 4854910 : L_tmp = L_shl( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
2025 4854910 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
2026 :
2027 4854910 : L_tmp = L_shl( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
2028 4854910 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
2029 :
2030 4854910 : x0r = add_sat( a[j + 8], a[j + 10] );
2031 4854910 : x0i = add_sat( a[j + 9], a[j + 11] );
2032 4854910 : x1r = sub_sat( a[j + 8], a[j + 10] );
2033 4854910 : x1i = sub_sat( a[j + 9], a[j + 11] );
2034 4854910 : x2r = add_sat( a[j + 12], a[j + 14] );
2035 4854910 : x2i = add_sat( a[j + 13], a[j + 15] );
2036 4854910 : x3r = sub_sat( a[j + 12], a[j + 14] );
2037 4854910 : x3i = sub_sat( a[j + 13], a[j + 15] );
2038 4854910 : a[j + 8] = add_sat( x0r, x2r );
2039 4854910 : move16();
2040 4854910 : a[j + 9] = add_sat( x0i, x2i );
2041 4854910 : move16();
2042 :
2043 4854910 : x0r = sub_sat( x0r, x2r );
2044 4854910 : x0i = sub_sat( x0i, x2i );
2045 4854910 : tmp = negate( x0r );
2046 4854910 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2047 4854910 : L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2048 4854910 : a[j + 12] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2049 4854910 : move16();
2050 :
2051 4854910 : tmp = negate( x0i );
2052 4854910 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2053 4854910 : L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2054 4854910 : a[j + 13] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2055 4854910 : move16();
2056 :
2057 4854910 : x0r = sub_sat( x1r, x3i );
2058 4854910 : x0i = add_sat( x1i, x3r );
2059 4854910 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2060 4854910 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2061 4854910 : a[j + 10] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2062 4854910 : move16();
2063 :
2064 4854910 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2065 4854910 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2066 4854910 : a[j + 11] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2067 4854910 : move16();
2068 :
2069 4854910 : x0r = add_sat( x1r, x3i );
2070 4854910 : x0i = sub_sat( x1i, x3r );
2071 :
2072 4854910 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2073 4854910 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2074 4854910 : a[j + 14] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2075 4854910 : move16();
2076 :
2077 4854910 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2078 4854910 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2079 4854910 : a[j + 15] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2080 4854910 : move16();
2081 : #else
2082 : x0r = add_o( a[j], a[j + 2], &Overflow );
2083 : x0i = add_o( a[j + 1], a[j + 3], &Overflow );
2084 : x1r = sub_o( a[j], a[j + 2], &Overflow );
2085 : x1i = sub_o( a[j + 1], a[j + 3], &Overflow );
2086 : x2r = add_o( a[j + 4], a[j + 6], &Overflow );
2087 : x2i = add_o( a[j + 5], a[j + 7], &Overflow );
2088 : x3r = sub_o( a[j + 4], a[j + 6], &Overflow );
2089 : x3i = sub_o( a[j + 5], a[j + 7], &Overflow );
2090 : a[j] = add_o( x0r, x2r, &Overflow );
2091 : move16();
2092 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2093 : move16();
2094 :
2095 : x0r = sub_o( x0r, x2r, &Overflow );
2096 : x0i = sub_o( x0i, x2i, &Overflow );
2097 : L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2098 : L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */
2099 : a[j + 4] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2100 : move16();
2101 :
2102 : L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2103 : L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */
2104 : a[j + 5] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2105 : move16();
2106 :
2107 : x0r = sub_o( x1r, x3i, &Overflow );
2108 : x0i = add_o( x1i, x3r, &Overflow );
2109 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2110 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2111 : a[j + 2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2112 : move16();
2113 :
2114 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2115 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2116 : a[j + 3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2117 : move16();
2118 :
2119 : x0r = add_o( x1r, x3i, &Overflow );
2120 : x0i = sub_o( x1i, x3r, &Overflow );
2121 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2122 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2123 : a[j + 6] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2124 : move16();
2125 :
2126 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2127 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2128 : a[j + 7] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2129 : move16();
2130 :
2131 : wk1r = w[k2 + 2];
2132 : move32();
2133 : wk1i = w[k2 + 3];
2134 : move32();
2135 : L_tmp = L_shl( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
2136 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
2137 :
2138 : L_tmp = L_shl( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
2139 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
2140 :
2141 : x0r = add_o( a[j + 8], a[j + 10], &Overflow );
2142 : x0i = add_o( a[j + 9], a[j + 11], &Overflow );
2143 : x1r = sub_o( a[j + 8], a[j + 10], &Overflow );
2144 : x1i = sub_o( a[j + 9], a[j + 11], &Overflow );
2145 : x2r = add_o( a[j + 12], a[j + 14], &Overflow );
2146 : x2i = add_o( a[j + 13], a[j + 15], &Overflow );
2147 : x3r = sub_o( a[j + 12], a[j + 14], &Overflow );
2148 : x3i = sub_o( a[j + 13], a[j + 15], &Overflow );
2149 : a[j + 8] = add_o( x0r, x2r, &Overflow );
2150 : move16();
2151 : a[j + 9] = add_o( x0i, x2i, &Overflow );
2152 : move16();
2153 :
2154 : x0r = sub_o( x0r, x2r, &Overflow );
2155 : x0i = sub_o( x0i, x2i, &Overflow );
2156 : tmp = negate( x0r );
2157 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2158 : L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2159 : a[j + 12] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2160 : move16();
2161 :
2162 : tmp = negate( x0i );
2163 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2164 : L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2165 : a[j + 13] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2166 : move16();
2167 :
2168 : x0r = sub_o( x1r, x3i, &Overflow );
2169 : x0i = add_o( x1i, x3r, &Overflow );
2170 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2171 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2172 : a[j + 10] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2173 : move16();
2174 :
2175 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2176 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2177 : a[j + 11] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2178 : move16();
2179 :
2180 : x0r = add_o( x1r, x3i, &Overflow );
2181 : x0i = sub_o( x1i, x3r, &Overflow );
2182 :
2183 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2184 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2185 : a[j + 14] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2186 : move16();
2187 :
2188 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2189 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2190 : a[j + 15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2191 : move16();
2192 : #endif
2193 : }
2194 :
2195 724578 : return;
2196 : }
2197 :
2198 : /*-----------------------------------------------------------------*
2199 : * cftmdl()
2200 : * Subfunction of Complex Discrete Fourier Transform
2201 : *-----------------------------------------------------------------*/
2202 917886 : static void cftmdl_16fx(
2203 : Word16 n, /* i : data length of real and imag */
2204 : Word16 l, /* i : initial shift for processing */
2205 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
2206 : const Word32 *w /* i : cos/sin table Q30*/
2207 : )
2208 : {
2209 : Word16 j, j1, j2, j3, k, k1, k2, m, m2;
2210 : Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
2211 : Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2212 : Word16 tmp, tmp2;
2213 : Word32 L_tmp;
2214 : Word32 L_x0r, L_x0i;
2215 : #ifndef ISSUE_1836_replace_overflow_libcom
2216 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
2217 : Flag Overflow = 0;
2218 : move32();
2219 : #endif
2220 : #endif
2221 917886 : m = shl( l, 2 );
2222 6909126 : FOR( j = 0; j < l; j += 2 )
2223 : {
2224 : #ifdef ISSUE_1836_replace_overflow_libcom
2225 5991240 : j1 = add_sat( j, l );
2226 5991240 : j2 = add_sat( j1, l );
2227 5991240 : j3 = add_sat( j2, l );
2228 5991240 : x0r = add_sat( a[j], a[j1] );
2229 5991240 : x0i = add_sat( a[j + 1], a[j1 + 1] );
2230 5991240 : x1r = sub_sat( a[j], a[j1] );
2231 5991240 : x1i = sub_sat( a[j + 1], a[j1 + 1] );
2232 5991240 : x2r = add_sat( a[j2], a[j3] );
2233 5991240 : x2i = add_sat( a[j2 + 1], a[j3 + 1] );
2234 5991240 : x3r = sub_sat( a[j2], a[j3] );
2235 5991240 : x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
2236 5991240 : a[j] = add_sat( x0r, x2r );
2237 5991240 : move16();
2238 5991240 : a[j + 1] = add_sat( x0i, x2i );
2239 5991240 : move16();
2240 5991240 : a[j2] = sub_sat( x0r, x2r );
2241 5991240 : move16();
2242 5991240 : a[j2 + 1] = sub_sat( x0i, x2i );
2243 5991240 : move16();
2244 5991240 : a[j1] = sub_sat( x1r, x3i );
2245 5991240 : move16();
2246 5991240 : a[j1 + 1] = add_sat( x1i, x3r );
2247 5991240 : move16();
2248 5991240 : a[j3] = add_sat( x1r, x3i );
2249 5991240 : move16();
2250 5991240 : a[j3 + 1] = sub_sat( x1i, x3r );
2251 5991240 : move16();
2252 : #else
2253 : j1 = add_o( j, l, &Overflow );
2254 : j2 = add_o( j1, l, &Overflow );
2255 : j3 = add_o( j2, l, &Overflow );
2256 : x0r = add_o( a[j], a[j1], &Overflow );
2257 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2258 : x1r = sub_o( a[j], a[j1], &Overflow );
2259 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2260 : x2r = add_o( a[j2], a[j3], &Overflow );
2261 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2262 : x3r = sub_o( a[j2], a[j3], &Overflow );
2263 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2264 : a[j] = add_o( x0r, x2r, &Overflow );
2265 : move16();
2266 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2267 : move16();
2268 : a[j2] = sub_o( x0r, x2r, &Overflow );
2269 : move16();
2270 : a[j2 + 1] = sub_o( x0i, x2i, &Overflow );
2271 : move16();
2272 : a[j1] = sub_o( x1r, x3i, &Overflow );
2273 : move16();
2274 : a[j1 + 1] = add_o( x1i, x3r, &Overflow );
2275 : move16();
2276 : a[j3] = add_o( x1r, x3i, &Overflow );
2277 : move16();
2278 : a[j3 + 1] = sub_o( x1i, x3r, &Overflow );
2279 : move16();
2280 : #endif
2281 : }
2282 :
2283 917886 : wk1r = w[2];
2284 917886 : move32();
2285 917886 : tmp2 = add( l, m );
2286 6909126 : FOR( j = m; j < tmp2; j += 2 )
2287 : {
2288 : #ifdef ISSUE_1836_replace_overflow_libcom
2289 5991240 : j1 = add_sat( j, l );
2290 5991240 : j2 = add_sat( j1, l );
2291 5991240 : j3 = add_sat( j2, l );
2292 5991240 : x0r = add_sat( a[j], a[j1] );
2293 5991240 : x0i = add_sat( a[j + 1], a[j1 + 1] );
2294 5991240 : x1r = sub_sat( a[j], a[j1] );
2295 5991240 : x1i = sub_sat( a[j + 1], a[j1 + 1] );
2296 5991240 : x2r = add_sat( a[j2], a[j3] );
2297 5991240 : x2i = add_sat( a[j2 + 1], a[j3 + 1] );
2298 5991240 : x3r = sub_sat( a[j2], a[j3] );
2299 5991240 : x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
2300 5991240 : a[j] = add_sat( x0r, x2r );
2301 5991240 : move16();
2302 5991240 : a[j + 1] = add_sat( x0i, x2i );
2303 5991240 : move16();
2304 5991240 : a[j2] = sub_sat( x2i, x0i );
2305 5991240 : move16();
2306 5991240 : a[j2 + 1] = sub_sat( x0r, x2r );
2307 5991240 : move16();
2308 :
2309 5991240 : x0r = sub_sat( x1r, x3i );
2310 5991240 : x0i = add_sat( x1i, x3r );
2311 5991240 : tmp = sub_sat( x0r, x0i );
2312 5991240 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2313 5991240 : a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2314 5991240 : move16();
2315 :
2316 5991240 : tmp = add_sat( x0r, x0i );
2317 5991240 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2318 5991240 : a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2319 5991240 : move16();
2320 :
2321 5991240 : x0r = add_sat( x3i, x1r );
2322 5991240 : x0i = sub_sat( x3r, x1i );
2323 5991240 : tmp = sub_sat( x0i, x0r );
2324 5991240 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2325 5991240 : a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2326 5991240 : move16();
2327 :
2328 5991240 : tmp = add_sat( x0i, x0r );
2329 5991240 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2330 5991240 : a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2331 5991240 : move16();
2332 : #else
2333 : j1 = add_o( j, l, &Overflow );
2334 : j2 = add_o( j1, l, &Overflow );
2335 : j3 = add_o( j2, l, &Overflow );
2336 : x0r = add_o( a[j], a[j1], &Overflow );
2337 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2338 : x1r = sub_o( a[j], a[j1], &Overflow );
2339 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2340 : x2r = add_o( a[j2], a[j3], &Overflow );
2341 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2342 : x3r = sub_o( a[j2], a[j3], &Overflow );
2343 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2344 : a[j] = add_o( x0r, x2r, &Overflow );
2345 : move16();
2346 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2347 : move16();
2348 : a[j2] = sub_o( x2i, x0i, &Overflow );
2349 : move16();
2350 : a[j2 + 1] = sub_o( x0r, x2r, &Overflow );
2351 : move16();
2352 :
2353 : x0r = sub_o( x1r, x3i, &Overflow );
2354 : x0i = add_o( x1i, x3r, &Overflow );
2355 : tmp = sub_o( x0r, x0i, &Overflow );
2356 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2357 : a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2358 : move16();
2359 :
2360 : tmp = add_o( x0r, x0i, &Overflow );
2361 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2362 : a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2363 : move16();
2364 :
2365 : x0r = add_o( x3i, x1r, &Overflow );
2366 : x0i = sub_o( x3r, x1i, &Overflow );
2367 : tmp = sub_o( x0i, x0r, &Overflow );
2368 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2369 : a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2370 : move16();
2371 :
2372 : tmp = add_o( x0i, x0r, &Overflow );
2373 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2374 : a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2375 : move16();
2376 : #endif
2377 : }
2378 :
2379 917886 : k1 = 0;
2380 917886 : move16();
2381 917886 : m2 = shl( m, 1 );
2382 1588180 : FOR( k = m2; k < n; k += m2 )
2383 : {
2384 670294 : k1 = add( k1, 2 );
2385 670294 : k2 = shl( k1, 1 );
2386 670294 : wk2r = w[k1];
2387 670294 : move32();
2388 670294 : wk2i = w[k1 + 1];
2389 670294 : move32();
2390 670294 : wk1r = w[k2];
2391 670294 : move32();
2392 670294 : wk1i = w[k2 + 1];
2393 670294 : move32();
2394 670294 : L_tmp = L_shl( Mult_32_32( wk2i, wk1i ), 1 ); /*Q29 */
2395 670294 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
2396 :
2397 670294 : L_tmp = L_shl( Mult_32_32( wk2i, wk1r ), 1 ); /*Q29 */
2398 670294 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
2399 :
2400 670294 : tmp2 = add( l, k );
2401 3351470 : FOR( j = k; j < tmp2; j += 2 )
2402 : {
2403 : #ifdef ISSUE_1836_replace_overflow_libcom
2404 2681176 : j1 = add_sat( j, l );
2405 2681176 : j2 = add_sat( j1, l );
2406 2681176 : j3 = add_sat( j2, l );
2407 2681176 : x0r = add_sat( a[j], a[j1] );
2408 2681176 : x0i = add_sat( a[j + 1], a[j1 + 1] );
2409 2681176 : x1r = sub_sat( a[j], a[j1] );
2410 2681176 : x1i = sub_sat( a[j + 1], a[j1 + 1] );
2411 2681176 : x2r = add_sat( a[j2], a[j3] );
2412 2681176 : x2i = add_sat( a[j2 + 1], a[j3 + 1] );
2413 2681176 : x3r = sub_sat( a[j2], a[j3] );
2414 2681176 : x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
2415 2681176 : a[j] = add_sat( x0r, x2r );
2416 2681176 : move16();
2417 2681176 : a[j + 1] = add_sat( x0i, x2i );
2418 2681176 : move16();
2419 :
2420 2681176 : x0r = sub_sat( x0r, x2r );
2421 2681176 : x0i = sub_sat( x0i, x2i );
2422 :
2423 2681176 : L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2424 2681176 : L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */
2425 2681176 : a[j2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2426 2681176 : move16();
2427 :
2428 2681176 : L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2429 2681176 : L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */
2430 2681176 : a[j2 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2431 2681176 : move16();
2432 :
2433 2681176 : x0r = sub_sat( x1r, x3i );
2434 2681176 : x0i = add_sat( x1i, x3r );
2435 :
2436 2681176 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2437 2681176 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2438 2681176 : a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2439 2681176 : move16();
2440 :
2441 2681176 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2442 2681176 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2443 2681176 : a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2444 2681176 : move16();
2445 :
2446 2681176 : L_x0r = L_add( (Word32) x1r, (Word32) x3i );
2447 2681176 : L_x0i = L_sub( (Word32) x1i, (Word32) x3r );
2448 2681176 : x0r = extract_l( L_x0r );
2449 2681176 : x0i = extract_l( L_x0i );
2450 2681176 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2451 2681176 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2452 2681176 : a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2453 2681176 : move16();
2454 :
2455 2681176 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2456 2681176 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2457 2681176 : a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2458 2681176 : move16();
2459 : #else
2460 : j1 = add_o( j, l, &Overflow );
2461 : j2 = add_o( j1, l, &Overflow );
2462 : j3 = add_o( j2, l, &Overflow );
2463 : x0r = add_o( a[j], a[j1], &Overflow );
2464 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2465 : x1r = sub_o( a[j], a[j1], &Overflow );
2466 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2467 : x2r = add_o( a[j2], a[j3], &Overflow );
2468 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2469 : x3r = sub_o( a[j2], a[j3], &Overflow );
2470 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2471 : a[j] = add_o( x0r, x2r, &Overflow );
2472 : move16();
2473 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2474 : move16();
2475 :
2476 : x0r = sub_o( x0r, x2r, &Overflow );
2477 : x0i = sub_o( x0i, x2i, &Overflow );
2478 :
2479 : L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2480 : L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */
2481 : a[j2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2482 : move16();
2483 :
2484 : L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2485 : L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */
2486 : a[j2 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2487 : move16();
2488 :
2489 : x0r = sub_o( x1r, x3i, &Overflow );
2490 : x0i = add_o( x1i, x3r, &Overflow );
2491 :
2492 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2493 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2494 : a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2495 : move16();
2496 :
2497 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2498 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2499 : a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2500 : move16();
2501 :
2502 : L_x0r = L_add( (Word32) x1r, (Word32) x3i );
2503 : L_x0i = L_sub( (Word32) x1i, (Word32) x3r );
2504 : x0r = extract_l( L_x0r );
2505 : x0i = extract_l( L_x0i );
2506 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2507 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2508 : a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2509 : move16();
2510 :
2511 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2512 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2513 : a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2514 : move16();
2515 : #endif
2516 : }
2517 :
2518 670294 : wk1r = w[k2 + 2];
2519 670294 : move32();
2520 670294 : wk1i = w[k2 + 3];
2521 670294 : move32();
2522 : #ifdef ISSUE_1836_replace_overflow_libcom
2523 670294 : L_tmp = L_shl_sat( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
2524 670294 : wk3r = L_sub_sat( wk1r, L_shl_sat( L_tmp, 1 ) ); /*Q30 */
2525 :
2526 670294 : L_tmp = L_shl_sat( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
2527 670294 : wk3i = L_sub_sat( L_shl_sat( L_tmp, 1 ), wk1i ); /*Q30 */
2528 670294 : tmp2 = add( l, add( k, m ) );
2529 : #else
2530 : L_tmp = L_shl_o( Mult_32_32( wk2r, wk1i ), 1, &Overflow ); /*Q29 */
2531 : wk3r = L_sub_o( wk1r, L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q30 */
2532 :
2533 : L_tmp = L_shl_o( Mult_32_32( wk2r, wk1r ), 1, &Overflow ); /*Q29 */
2534 : wk3i = L_sub_o( L_shl_o( L_tmp, 1, &Overflow ), wk1i, &Overflow ); /*Q30 */
2535 : tmp2 = add( l, add( k, m ) );
2536 : #endif
2537 3351470 : FOR( j = add( k, m ); j < tmp2; j += 2 )
2538 : {
2539 : #ifdef ISSUE_1836_replace_overflow_libcom
2540 2681176 : j1 = add_sat( j, l );
2541 2681176 : j2 = add_sat( j1, l );
2542 2681176 : j3 = add_sat( j2, l );
2543 2681176 : x0r = add_sat( a[j], a[j1] );
2544 2681176 : x0i = add_sat( a[j + 1], a[j1 + 1] );
2545 2681176 : x1r = sub_sat( a[j], a[j1] );
2546 2681176 : x1i = sub_sat( a[j + 1], a[j1 + 1] );
2547 2681176 : x2r = add_sat( a[j2], a[j3] );
2548 2681176 : x2i = add_sat( a[j2 + 1], a[j3 + 1] );
2549 2681176 : x3r = sub_sat( a[j2], a[j3] );
2550 2681176 : x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
2551 2681176 : a[j] = add_sat( x0r, x2r );
2552 2681176 : move16();
2553 2681176 : a[j + 1] = add_sat( x0i, x2i );
2554 2681176 : move16();
2555 :
2556 2681176 : x0r = sub_sat( x0r, x2r );
2557 2681176 : x0i = sub_sat( x0i, x2i );
2558 :
2559 2681176 : tmp = negate( x0r );
2560 2681176 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2561 2681176 : L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2562 2681176 : a[j2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2563 2681176 : move16();
2564 :
2565 2681176 : tmp = negate( x0i );
2566 2681176 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2567 2681176 : L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2568 2681176 : a[j2 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2569 2681176 : move16();
2570 :
2571 2681176 : x0r = sub_sat( x1r, x3i );
2572 2681176 : x0i = add_sat( x1i, x3r );
2573 :
2574 2681176 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2575 2681176 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2576 2681176 : a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2577 2681176 : move16();
2578 :
2579 2681176 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2580 2681176 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2581 2681176 : a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2582 2681176 : move16();
2583 :
2584 2681176 : x0r = add_sat( x1r, x3i );
2585 2681176 : x0i = sub_sat( x1i, x3r );
2586 :
2587 2681176 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2588 2681176 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2589 2681176 : a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2590 2681176 : move16();
2591 :
2592 2681176 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2593 2681176 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2594 2681176 : a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2595 2681176 : move16();
2596 : #else
2597 : j1 = add_o( j, l, &Overflow );
2598 : j2 = add_o( j1, l, &Overflow );
2599 : j3 = add_o( j2, l, &Overflow );
2600 : x0r = add_o( a[j], a[j1], &Overflow );
2601 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2602 : x1r = sub_o( a[j], a[j1], &Overflow );
2603 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2604 : x2r = add_o( a[j2], a[j3], &Overflow );
2605 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2606 : x3r = sub_o( a[j2], a[j3], &Overflow );
2607 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2608 : a[j] = add_o( x0r, x2r, &Overflow );
2609 : move16();
2610 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2611 : move16();
2612 :
2613 : x0r = sub_o( x0r, x2r, &Overflow );
2614 : x0i = sub_o( x0i, x2i, &Overflow );
2615 :
2616 : tmp = negate( x0r );
2617 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2618 : L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2619 : a[j2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2620 : move16();
2621 :
2622 : tmp = negate( x0i );
2623 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2624 : L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2625 : a[j2 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2626 : move16();
2627 :
2628 : x0r = sub_o( x1r, x3i, &Overflow );
2629 : x0i = add_o( x1i, x3r, &Overflow );
2630 :
2631 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2632 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2633 : a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2634 : move16();
2635 :
2636 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2637 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2638 : a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2639 : move16();
2640 :
2641 : x0r = add_o( x1r, x3i, &Overflow );
2642 : x0i = sub_o( x1i, x3r, &Overflow );
2643 :
2644 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2645 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2646 : a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2647 : move16();
2648 :
2649 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2650 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2651 : a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2652 : move16();
2653 : #endif
2654 : }
2655 : }
2656 :
2657 917886 : return;
2658 : }
2659 :
2660 75 : void fft3_fx( const Word16 X[] /*Qx*/, Word16 Y[] /*Qx*/, const Word16 n )
2661 : {
2662 : Word16 Z[PH_ECU_SPEC_SIZE];
2663 : Word16 *Z0, *Z1, *Z2;
2664 : Word16 *z0, *z1, *z2;
2665 : const Word16 *x;
2666 75 : const Word16 *t_sin = sincos_t_rad3_fx; // Q15
2667 : Word16 m, mMinus1, step;
2668 : Word16 i, l;
2669 : Word16 c1_ind, s1_ind, c2_ind, s2_ind;
2670 : Word16 c1_step, s1_step, c2_step, s2_step;
2671 : Word16 *RY, *IY, *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
2672 : Word32 acc;
2673 : Word16 mBy2, orderMinus1;
2674 : const Word16 *pPhaseTbl;
2675 :
2676 : /* Determine the order of the transform, the length of decimated */
2677 : /* transforms m, and the step for the sine and cosine tables. */
2678 75 : SWITCH( n )
2679 : {
2680 25 : case 1536:
2681 25 : orderMinus1 = 9 - 1;
2682 25 : move16();
2683 25 : m = 512;
2684 25 : move16();
2685 25 : step = 1;
2686 25 : move16();
2687 25 : pPhaseTbl = FFT_W256;
2688 25 : BREAK;
2689 50 : case 384:
2690 50 : orderMinus1 = 7 - 1;
2691 50 : move16();
2692 50 : m = 128;
2693 50 : move16();
2694 50 : step = 4;
2695 50 : move16();
2696 50 : pPhaseTbl = FFT_W64;
2697 50 : BREAK;
2698 0 : default:
2699 0 : orderMinus1 = 7 - 1;
2700 0 : move16();
2701 0 : m = 128;
2702 0 : move16();
2703 0 : step = 4;
2704 0 : move16();
2705 0 : pPhaseTbl = FFT_W64;
2706 0 : BREAK;
2707 : }
2708 :
2709 : /* Compose decimated sequences X[3i], X[3i+1],X[3i+2] */
2710 : /* compute their FFT of length m. */
2711 75 : Z0 = &Z[0];
2712 75 : z0 = &Z0[0];
2713 75 : Z1 = &Z0[m];
2714 75 : z1 = &Z1[0]; /* Z1 = &Z[ m]; */
2715 75 : Z2 = &Z1[m];
2716 75 : z2 = &Z2[0]; /* Z2 = &Z[2m]; */
2717 75 : x = &X[0]; // Qx
2718 19275 : FOR( i = 0; i < m; i++ )
2719 : {
2720 19200 : *z0++ = *x++; /* Z0[i] = X[3i]; Qx */
2721 19200 : move16();
2722 19200 : *z1++ = *x++; /* Z1[i] = X[3i+1]; Qx */
2723 19200 : move16();
2724 19200 : *z2++ = *x++; /* Z2[i] = X[3i+2]; Qx */
2725 19200 : move16();
2726 : }
2727 75 : mBy2 = shr( m, 1 );
2728 75 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z0, Z0, 1 );
2729 75 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z1, Z1, 1 );
2730 75 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z2, Z2, 1 );
2731 :
2732 : /* Butterflies of order 3. */
2733 : /* pointer initialization */
2734 75 : mMinus1 = sub( m, 1 );
2735 75 : RY = &Y[0]; // Qx
2736 75 : IY = &Y[n]; // Qx
2737 75 : IY--; /* Decrement the address counter.*/
2738 75 : RZ0 = &Z0[0]; // Qx
2739 75 : IZ0 = &Z0[mMinus1];
2740 75 : RZ1 = &Z1[0]; // Qx
2741 75 : IZ1 = &Z1[mMinus1]; // Qx
2742 75 : RZ2 = &Z2[0]; // Qx
2743 75 : IZ2 = &Z2[mMinus1]; // Qx
2744 :
2745 75 : c1_step = negate( step );
2746 75 : s1_step = step;
2747 75 : move16();
2748 75 : c2_step = shl( c1_step, 1 );
2749 75 : s2_step = shl( s1_step, 1 );
2750 75 : c1_ind = add( T_SIN_PI_2, c1_step );
2751 75 : s1_ind = s1_step;
2752 75 : move16();
2753 75 : c2_ind = add( T_SIN_PI_2, c2_step );
2754 75 : s2_ind = s2_step;
2755 75 : move16();
2756 :
2757 : /* special case: i = 0 */
2758 75 : acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ ); // Q15 + Qx
2759 75 : acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ ); // Q15 + Qx
2760 75 : *RY++ = mac_r_sat( acc, *RZ2++, 0x4000 /*1.Q14*/ ); // Qx
2761 75 : move16();
2762 :
2763 : /* first 3/12-- from 1 to (3*m/8)-1 */
2764 75 : l = sub( shr( n, 3 ), 1 ); /* (3*m/8) - 1 = (n/8) - 1 */
2765 7200 : FOR( i = 0; i < l; i++ )
2766 : {
2767 7125 : acc = L_shl( *RZ0++, 15 ); /* Align with the following non-fractional mode so as to gain 1 more bit headroom. Q15 + Qx*/
2768 7125 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2769 7125 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2770 7125 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2771 7125 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2772 7125 : *RY++ = round_fx( acc ); /* bit growth = 1 (compensated by non-fractional mode MAC). Qx - 1*/
2773 7125 : move16();
2774 :
2775 7125 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2776 7125 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2777 7125 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2778 7125 : acc = L_msu0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2779 7125 : acc = L_mac0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2780 7125 : *IY-- = round_fx( acc ); // Qx - 1
2781 7125 : move16();
2782 :
2783 7125 : c1_ind = add( c1_ind, c1_step );
2784 7125 : s1_ind = add( s1_ind, s1_step );
2785 7125 : c2_ind = add( c2_ind, c2_step );
2786 7125 : s2_ind = add( s2_ind, s2_step );
2787 : }
2788 :
2789 : /* next 1/12-- from (3*m/8) to (4*m/8)-1 */
2790 75 : l = shr( m, 3 ); /* (4*m/8) - (3*m/8) = m/8 */
2791 2475 : FOR( i = 0; i < l; i++ )
2792 : {
2793 2400 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2794 2400 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2795 2400 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2796 2400 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2797 2400 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2798 2400 : *RY++ = round_fx( acc ); // Qx - 1
2799 2400 : move16();
2800 :
2801 2400 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2802 2400 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2803 2400 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2804 2400 : acc = L_msu0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2805 2400 : acc = L_msu0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2806 2400 : *IY-- = round_fx( acc ); // Qx - 1
2807 2400 : move16();
2808 :
2809 2400 : c1_ind = add( c1_ind, c1_step );
2810 2400 : s1_ind = add( s1_ind, s1_step );
2811 2400 : c2_ind = sub( c2_ind, c2_step );
2812 2400 : s2_ind = sub( s2_ind, s2_step );
2813 : }
2814 :
2815 : /* special case: i = m/2 i.e. 1/3 */
2816 75 : acc = L_shl( *RZ0--, 15 ); // Q15 + Qx
2817 75 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
2818 75 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2819 75 : *RY++ = round_fx( acc ); // Qx - 1
2820 75 : move16();
2821 :
2822 75 : acc = 0;
2823 75 : move32();
2824 75 : acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
2825 75 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
2826 75 : *IY-- = round_fx( acc ); // Qx - 1
2827 75 : move16();
2828 75 : IZ0++;
2829 75 : IZ1++;
2830 75 : IZ2++;
2831 :
2832 75 : c1_ind = add( c1_ind, c1_step );
2833 75 : s1_ind = add( s1_ind, s1_step );
2834 75 : c2_ind = sub( c2_ind, c2_step );
2835 75 : s2_ind = sub( s2_ind, s2_step );
2836 :
2837 : /* next 2/12-- from ((m/2)+1) to (6*m/8)-1 */
2838 75 : l = sub( shr( m, 2 ), 1 ); /* (6*m/8) - ((m/2)+1) = m/4 - 1 */
2839 4800 : FOR( i = 0; i < l; i++ )
2840 : {
2841 4725 : acc = L_shl( *RZ0--, 15 ); // Q15 + Qx
2842 4725 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2843 4725 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2844 4725 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2845 4725 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2846 4725 : *RY++ = round_fx( acc ); // Qx - 1
2847 4725 : move16();
2848 :
2849 4725 : acc = L_mult0( *IZ0++, -32768 ); // Q15 + Qx
2850 4725 : acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
2851 4725 : acc = L_msu0( acc, *IZ1++, t_sin[c1_ind] ); // Q15 + Qx
2852 4725 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
2853 4725 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Q15 + Qx
2854 4725 : *IY-- = round_fx( acc ); // Qx - 1
2855 4725 : move16();
2856 :
2857 4725 : c1_ind = add( c1_ind, c1_step );
2858 4725 : s1_ind = add( s1_ind, s1_step );
2859 4725 : c2_ind = sub( c2_ind, c2_step );
2860 4725 : s2_ind = sub( s2_ind, s2_step );
2861 : }
2862 :
2863 : /*--------------------------half--------------------------// */
2864 : /* next 2/12-- from (6*m/8) to (8*m/8) - 1 */
2865 75 : l = shr( m, 2 );
2866 4875 : FOR( i = 0; i < l; i++ )
2867 : {
2868 4800 : acc = L_shl( *RZ0--, 15 ); // Q15 + Qx
2869 4800 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2870 4800 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2871 4800 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2872 4800 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2873 4800 : *RY++ = round_fx( acc ); // Qx - 1
2874 4800 : move16();
2875 :
2876 4800 : acc = L_mult0( *IZ0++, -32768 ); // Q15 + Qx
2877 4800 : acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
2878 4800 : acc = L_mac0( acc, *IZ1++, t_sin[c1_ind] ); // Q15 + Qx
2879 4800 : acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
2880 4800 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Q15 + Qx
2881 4800 : *IY-- = round_fx( acc ); // Qx - 1
2882 4800 : move16();
2883 :
2884 4800 : c1_ind = sub( c1_ind, c1_step );
2885 4800 : s1_ind = sub( s1_ind, s1_step );
2886 4800 : c2_ind = add( c2_ind, c2_step );
2887 4800 : s2_ind = add( s2_ind, s2_step );
2888 : }
2889 :
2890 : /* special case: i = m, i.e 2/3 */
2891 75 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2892 75 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
2893 75 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2894 75 : *RY++ = round_fx( acc ); // Qx - 1
2895 75 : move16();
2896 :
2897 75 : acc = L_deposit_l( 0 );
2898 75 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2899 75 : acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2900 75 : *IY-- = round_fx( acc ); // Qx - 1
2901 75 : move16();
2902 75 : IZ0--; /* Just decrement the address counter */
2903 75 : IZ1--;
2904 75 : IZ2--;
2905 :
2906 75 : c1_ind = sub( c1_ind, c1_step );
2907 75 : s1_ind = sub( s1_ind, s1_step );
2908 75 : c2_ind = add( c2_ind, c2_step );
2909 75 : s2_ind = add( s2_ind, s2_step );
2910 :
2911 : /* next 1/12-- from (m + 1) to (9*m/8) - 1 */
2912 75 : l = sub( shr( m, 3 ), 1 ); /* (9*m/8) - (m +1) = m/8 - 1 */
2913 2400 : FOR( i = 0; i < l; i++ )
2914 : {
2915 2325 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2916 2325 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2917 2325 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2918 2325 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2919 2325 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2920 2325 : *RY++ = round_fx( acc ); // Qx - 1
2921 2325 : move16();
2922 :
2923 2325 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2924 2325 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2925 2325 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2926 2325 : acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2927 2325 : acc = L_msu0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2928 2325 : *IY-- = round_fx( acc ); // Qx - 1
2929 2325 : move16();
2930 :
2931 2325 : c1_ind = sub( c1_ind, c1_step );
2932 2325 : s1_ind = sub( s1_ind, s1_step );
2933 2325 : c2_ind = add( c2_ind, c2_step );
2934 2325 : s2_ind = add( s2_ind, s2_step );
2935 : }
2936 :
2937 : /* last 3/12-- from (9*m/8) to (12*m/8) - 1 */
2938 75 : l = shr( n, 3 ); /* (12*m/8) - (9*m/8) = 3*m/8 = n/8 */
2939 7275 : FOR( i = 0; i < l; i++ )
2940 : {
2941 7200 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2942 7200 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2943 7200 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2944 7200 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2945 7200 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2946 7200 : *RY++ = round_fx( acc ); // Qx - 1
2947 7200 : move16();
2948 :
2949 7200 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2950 7200 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2951 7200 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2952 7200 : acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2953 7200 : acc = L_mac0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2954 7200 : *IY-- = round_fx( acc ); // Qx - 1
2955 7200 : move16();
2956 :
2957 7200 : c1_ind = sub( c1_ind, c1_step );
2958 7200 : s1_ind = sub( s1_ind, s1_step );
2959 7200 : c2_ind = sub( c2_ind, c2_step );
2960 7200 : s2_ind = sub( s2_ind, s2_step );
2961 : }
2962 :
2963 : /* special case: i = 3*m/2 */
2964 75 : acc = L_shl( *RZ0, 15 ); // Q15 + Qx
2965 75 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
2966 75 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2967 75 : *RY = round_fx( acc ); // Qx - 1
2968 75 : move16();
2969 :
2970 75 : return;
2971 : }
2972 :
2973 :
2974 103 : void ifft3_fx( const Word16 Z[] /*Qx*/, Word16 X[] /*Qx*/, const Word16 n )
2975 : {
2976 : Word16 Y[PH_ECU_SPEC_SIZE];
2977 103 : const Word16 *t_sin = sincos_t_rad3_fx; // Q15
2978 : Word16 m, mMinus1, step, step2;
2979 : Word16 i, l;
2980 : Word16 c0_ind, s0_ind, c1_ind, s1_ind, c2_ind, s2_ind;
2981 : const Word16 *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
2982 : const Word16 *RZ00, *IZ00, *RZ10, *IZ10, *RZ20, *IZ20;
2983 : Word16 *RY0, *IY0, *RY1, *IY1, *RY2, *IY2, *y0, *y1, *y2, *pX;
2984 : Word32 acc;
2985 : Word16 mBy2, orderMinus1, nMinusMBy2;
2986 : const Word16 *pPhaseTbl;
2987 :
2988 : /* Determine the order of the transform, the length of decimated */
2989 : /* transforms m, and the step for the sine and cosine tables. */
2990 103 : SWITCH( n )
2991 : {
2992 103 : case 1536:
2993 103 : orderMinus1 = 9 - 1;
2994 103 : move16();
2995 103 : m = 512;
2996 103 : move16();
2997 103 : step = 1;
2998 103 : move16();
2999 103 : pPhaseTbl = FFT_W256;
3000 103 : BREAK;
3001 0 : case 384:
3002 0 : orderMinus1 = 7 - 1;
3003 0 : move16();
3004 0 : m = 128;
3005 0 : move16();
3006 0 : step = 4;
3007 0 : move16();
3008 0 : pPhaseTbl = FFT_W64;
3009 0 : BREAK;
3010 0 : default:
3011 0 : orderMinus1 = 7 - 1;
3012 0 : move16();
3013 0 : m = 128;
3014 0 : move16();
3015 0 : step = 4;
3016 0 : move16();
3017 0 : pPhaseTbl = FFT_W64;
3018 0 : BREAK;
3019 : }
3020 :
3021 103 : nMinusMBy2 = shr( sub( n, m ), 1 );
3022 103 : mMinus1 = sub( m, 1 );
3023 : /* pointer initialization */
3024 103 : RY0 = &Y[0]; // Qx
3025 103 : IY0 = &Y[m]; // Qx
3026 103 : RY1 = &RY0[m]; // Qx
3027 103 : IY1 = &RY1[mMinus1]; // Qx
3028 103 : RY2 = &RY1[m]; // Qx
3029 103 : IY2 = &RY2[mMinus1]; // Qx
3030 :
3031 103 : RZ00 = &Z[0]; /* The zero positions of the pointers Qx*/
3032 103 : RZ10 = &RZ00[m]; // Qx
3033 103 : RZ20 = &RZ00[nMinusMBy2]; // Qx
3034 103 : IZ00 = &Z[n]; // Qx
3035 103 : IZ10 = &IZ00[-m]; // Qx
3036 103 : IZ20 = &IZ00[-nMinusMBy2]; // Qx
3037 :
3038 103 : RZ0 = RZ00; /* Reset the pointers to zero positions. */
3039 103 : RZ1 = RZ10;
3040 103 : RZ2 = RZ20;
3041 103 : IZ0 = IZ00;
3042 103 : IZ1 = IZ10;
3043 103 : IZ2 = IZ20;
3044 :
3045 : /* Inverse butterflies of order 3. */
3046 :
3047 : /* Construction of Y0 */
3048 103 : acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ ); // Qx + Q15
3049 103 : acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ ); // Qx + Q15
3050 103 : *RY0++ = mac_r( acc, *RZ2--, 0x4000 /*1.Q14*/ ); // Qx
3051 103 : move16();
3052 103 : IZ0--;
3053 103 : IZ1--;
3054 103 : IZ2++;
3055 103 : IY0--;
3056 :
3057 103 : l = sub( shr( m, 1 ), 1 );
3058 26368 : FOR( i = 0; i < l; i++ )
3059 : {
3060 26265 : acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ ); // Qx + Q15
3061 26265 : acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ ); // Qx + Q15
3062 26265 : *RY0++ = mac_r( acc, *RZ2--, 0x4000 /*1.Q14*/ ); // Qx
3063 26265 : move16();
3064 :
3065 26265 : acc = L_mult( *IZ0--, 0x4000 /*1.Q14*/ ); // Qx + Q15
3066 26265 : acc = L_mac( acc, *IZ1--, 0x4000 /*1.Q14*/ ); // Qx + Q15
3067 26265 : *IY0-- = msu_r( acc, *IZ2++, 0x4000 /*1.Q14*/ ); // Qx
3068 26265 : move16();
3069 : }
3070 :
3071 : /* m/2 */
3072 103 : acc = L_mult( *RZ0, 0x4000 /*1.Q14*/ ); // Qx + Q15
3073 103 : acc = L_mac( acc, *RZ1, 0x4000 /*1.Q14*/ ); // Qx + Q15
3074 103 : *RY0++ = mac_r( acc, *RZ2, 0x4000 /*1.Q14*/ ); // Qx
3075 103 : move16();
3076 :
3077 :
3078 : /* Construction of Y1 */
3079 103 : c0_ind = T_SIN_PI_2;
3080 103 : s0_ind = 0;
3081 103 : c1_ind = T_SIN_PI_2 * 1 / 3;
3082 103 : s1_ind = T_SIN_PI_2 * 2 / 3;
3083 103 : c2_ind = T_SIN_PI_2 * 1 / 3;
3084 103 : s2_ind = T_SIN_PI_2 * 2 / 3;
3085 :
3086 103 : RZ0 = RZ00; /* Reset pointers to zero positions. */
3087 103 : RZ1 = RZ10;
3088 103 : RZ2 = RZ20;
3089 103 : IZ0 = IZ00;
3090 103 : IZ1 = IZ10;
3091 103 : IZ2 = IZ20;
3092 103 : acc = L_mult0( *RZ0++, t_sin[c0_ind] ); // Qx + Q15
3093 103 : acc = L_msu0( acc, *RZ1++, t_sin[c1_ind] ); // Qx + Q15
3094 103 : acc = L_msu0( acc, *RZ2--, t_sin[c2_ind] ); // Qx + Q15
3095 103 : IZ0--;
3096 103 : acc = L_msu0( acc, *IZ1--, t_sin[s1_ind] ); // Qx + Q15
3097 103 : acc = L_msu0( acc, *IZ2++, t_sin[s2_ind] ); // Qx + Q15
3098 103 : *RY1++ = round_fx( acc ); // Qx - 1
3099 103 : move16();
3100 :
3101 103 : c0_ind = sub( c0_ind, step );
3102 103 : s0_ind = add( s0_ind, step );
3103 103 : c1_ind = add( c1_ind, step );
3104 103 : s1_ind = sub( s1_ind, step );
3105 103 : c2_ind = sub( c2_ind, step );
3106 103 : s2_ind = add( s2_ind, step );
3107 :
3108 : /* From 1 to (m/4) - 1. */
3109 103 : l = sub( shr( m, 2 ), 1 );
3110 13184 : FOR( i = 0; i < l; i++ )
3111 : {
3112 13081 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3113 13081 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3114 13081 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3115 13081 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3116 13081 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3117 13081 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3118 13081 : *RY1++ = round_fx( acc ); // Qx - 1
3119 13081 : move16();
3120 :
3121 13081 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3122 13081 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3123 13081 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3124 13081 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3125 13081 : acc = L_mac0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3126 13081 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3127 13081 : *IY1-- = round_fx( acc ); // Qx - 1
3128 13081 : move16();
3129 :
3130 13081 : c0_ind = sub( c0_ind, step );
3131 13081 : s0_ind = add( s0_ind, step );
3132 13081 : c1_ind = add( c1_ind, step );
3133 13081 : s1_ind = sub( s1_ind, step );
3134 13081 : c2_ind = sub( c2_ind, step );
3135 13081 : s2_ind = add( s2_ind, step );
3136 : }
3137 :
3138 : /* From m/4 to m/2 -1. */
3139 103 : l = shr( m, 2 ); /* m/2 - m/4 = m/4 */
3140 13287 : FOR( i = 0; i < l; i++ )
3141 : {
3142 13184 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3143 13184 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3144 13184 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3145 13184 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3146 13184 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3147 13184 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3148 13184 : *RY1++ = round_fx( acc ); // Qx - 1
3149 13184 : move16();
3150 :
3151 13184 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3152 13184 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3153 13184 : acc = L_msu0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3154 13184 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3155 13184 : acc = L_mac0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3156 13184 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3157 13184 : *IY1-- = round_fx( acc ); // Qx - 1
3158 13184 : move16();
3159 :
3160 13184 : c0_ind = sub( c0_ind, step );
3161 13184 : s0_ind = add( s0_ind, step );
3162 13184 : c1_ind = add( c1_ind, step );
3163 13184 : s1_ind = sub( s1_ind, step );
3164 13184 : c2_ind = add( c2_ind, step );
3165 13184 : s2_ind = sub( s2_ind, step );
3166 : }
3167 :
3168 : /* m/2 */
3169 103 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3170 103 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3171 103 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3172 103 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3173 103 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3174 103 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3175 103 : *RY1++ = round_fx( acc ); // Qx - 1
3176 103 : move16();
3177 :
3178 : /* Construction of Y2 */
3179 103 : c0_ind = T_SIN_PI_2;
3180 103 : s0_ind = 0;
3181 103 : c1_ind = T_SIN_PI_2 * 1 / 3;
3182 103 : s1_ind = T_SIN_PI_2 * 2 / 3;
3183 103 : c2_ind = T_SIN_PI_2 * 1 / 3;
3184 103 : s2_ind = T_SIN_PI_2 * 2 / 3;
3185 103 : step2 = shl( step, 1 );
3186 :
3187 103 : RZ0 = RZ00; /* Reset pointers to zero positions. */
3188 103 : RZ1 = RZ10;
3189 103 : RZ2 = RZ20;
3190 103 : IZ0 = IZ00;
3191 103 : IZ1 = IZ10;
3192 103 : IZ2 = IZ20;
3193 103 : acc = L_mult0( *RZ0++, t_sin[c0_ind] ); // Qx + Q15
3194 103 : acc = L_msu0( acc, *RZ1++, t_sin[c1_ind] ); // Qx + Q15
3195 103 : acc = L_msu0( acc, *RZ2--, t_sin[c2_ind] ); // Qx + Q15
3196 103 : IZ0--;
3197 103 : acc = L_mac0( acc, *IZ1--, t_sin[s1_ind] ); // Qx + Q15
3198 103 : acc = L_mac0( acc, *IZ2++, t_sin[s2_ind] ); // Qx + Q15
3199 103 : *RY2++ = round_fx( acc ); // Qx - 1
3200 103 : move16();
3201 :
3202 103 : c0_ind = sub( c0_ind, step2 );
3203 103 : s0_ind = add( s0_ind, step2 );
3204 103 : c1_ind = sub( c1_ind, step2 );
3205 103 : s1_ind = add( s1_ind, step2 );
3206 103 : c2_ind = add( c2_ind, step2 );
3207 103 : s2_ind = sub( s2_ind, step2 );
3208 :
3209 : /* From 1 to (m/8) - 1. */
3210 103 : l = sub( shr( m, 3 ), 1 ); /* m/8 - 1. */
3211 6592 : FOR( i = 0; i < l; i++ )
3212 : {
3213 6489 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3214 6489 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3215 6489 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3216 6489 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3217 6489 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3218 6489 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3219 6489 : *RY2++ = round_fx( acc ); // Qx - 1
3220 6489 : move16();
3221 :
3222 6489 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3223 6489 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3224 6489 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3225 6489 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3226 6489 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3227 6489 : acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3228 6489 : *IY2-- = round_fx( acc ); // Qx - 1
3229 6489 : move16();
3230 :
3231 6489 : c0_ind = sub( c0_ind, step2 );
3232 6489 : s0_ind = add( s0_ind, step2 );
3233 6489 : c1_ind = sub( c1_ind, step2 );
3234 6489 : s1_ind = add( s1_ind, step2 );
3235 6489 : c2_ind = add( c2_ind, step2 );
3236 6489 : s2_ind = sub( s2_ind, step2 );
3237 : }
3238 :
3239 : /* From (m/8) to (m/4) - 1. */
3240 103 : l = shr( m, 3 ); /* m/4 - m/8 = m/8 */
3241 6695 : FOR( i = 0; i < l; i++ )
3242 : {
3243 6592 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3244 6592 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3245 6592 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3246 6592 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3247 6592 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3248 6592 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3249 6592 : *RY2++ = round_fx( acc ); // Qx - 1
3250 6592 : move16();
3251 :
3252 6592 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3253 6592 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3254 6592 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3255 6592 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3256 6592 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3257 6592 : acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3258 6592 : *IY2-- = round_fx( acc ); // Qx - 1
3259 6592 : move16();
3260 :
3261 6592 : c0_ind = sub( c0_ind, step2 );
3262 6592 : s0_ind = add( s0_ind, step2 );
3263 6592 : c1_ind = add( c1_ind, step2 );
3264 6592 : s1_ind = sub( s1_ind, step2 );
3265 6592 : c2_ind = add( c2_ind, step2 );
3266 6592 : s2_ind = sub( s2_ind, step2 );
3267 : }
3268 :
3269 : /* From m/4 to 3*m/8 - 1. */
3270 103 : l = shr( m, 3 ); /* 3*m/8 - m/4 = m/8 */
3271 6695 : FOR( i = 0; i < l; i++ )
3272 : {
3273 6592 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3274 6592 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3275 6592 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3276 6592 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3277 6592 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3278 6592 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3279 6592 : *RY2++ = round_fx( acc ); // Qx - 1
3280 6592 : move16();
3281 :
3282 6592 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3283 6592 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3284 6592 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3285 6592 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3286 6592 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3287 6592 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3288 6592 : *IY2-- = round_fx( acc ); // Qx - 1
3289 6592 : move16();
3290 :
3291 6592 : c0_ind = sub( c0_ind, step2 );
3292 6592 : s0_ind = add( s0_ind, step2 );
3293 6592 : c1_ind = add( c1_ind, step2 );
3294 6592 : s1_ind = sub( s1_ind, step2 );
3295 6592 : c2_ind = sub( c2_ind, step2 );
3296 6592 : s2_ind = add( s2_ind, step2 );
3297 : }
3298 :
3299 : /* From 3*m/8 to m/2 - 1*/
3300 103 : l = shr( m, 3 ); /* m/2 - 3*m/8 = m/8 */
3301 6695 : FOR( i = 0; i < l; i++ )
3302 : {
3303 6592 : acc = L_mult0( *RZ1, t_sin[c1_ind] ); // Qx + Q15
3304 6592 : acc = L_msu0( acc, *RZ0, t_sin[c0_ind] ); // Qx + Q15
3305 6592 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3306 6592 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3307 6592 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3308 6592 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3309 6592 : *RY2++ = round_fx( acc ); // Qx - 1
3310 6592 : move16();
3311 :
3312 6592 : acc = L_mult0( *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3313 6592 : acc = L_msu0( acc, *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3314 6592 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3315 6592 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3316 6592 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3317 6592 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3318 6592 : *IY2-- = round_fx( acc ); // Qx - 1
3319 6592 : move16();
3320 :
3321 6592 : c0_ind = add( c0_ind, step2 );
3322 6592 : s0_ind = sub( s0_ind, step2 );
3323 6592 : c1_ind = add( c1_ind, step2 );
3324 6592 : s1_ind = sub( s1_ind, step2 );
3325 6592 : c2_ind = sub( c2_ind, step2 );
3326 6592 : s2_ind = add( s2_ind, step2 );
3327 : }
3328 :
3329 : /* m/2 */
3330 103 : acc = L_mult0( *RZ1, t_sin[c1_ind] ); // Qx + Q15
3331 103 : acc = L_msu0( acc, *RZ0, t_sin[c0_ind] ); // Qx + Q15
3332 103 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3333 103 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3334 103 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3335 103 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3336 103 : *RY2++ = round_fx( acc ); // Qx - 1
3337 103 : move16();
3338 :
3339 : /* Compute the inverse FFT for all 3 blocks. */
3340 103 : RY0 = &Y[0]; /* Rewind the pointers. */
3341 103 : RY1 = &Y[m];
3342 103 : RY2 = &RY1[m];
3343 103 : mBy2 = shr( m, 1 );
3344 103 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY0, RY0, 0 ); /* inverse FFT */
3345 103 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY1, RY1, 0 ); /* inverse FFT */
3346 103 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY2, RY2, 0 ); /* inverse FFT */
3347 :
3348 103 : y0 = RY0;
3349 103 : y1 = RY1;
3350 103 : y2 = RY2;
3351 :
3352 : /* Interlacing and scaling, scale = 1/3 */
3353 103 : pX = X;
3354 52839 : FOR( i = 0; i < m; i++ )
3355 : {
3356 52736 : *pX++ = shl_sat( mult_r( *y0++, FFT3_ONE_THIRD ), 1 ); // Qx
3357 52736 : move16();
3358 52736 : *pX++ = shl_sat( mult_r( *y1++, FFT3_ONE_THIRD ), 1 ); // Qx
3359 52736 : move16();
3360 52736 : *pX++ = shl_sat( mult_r( *y2++, FFT3_ONE_THIRD ), 1 ); // Qx
3361 52736 : move16();
3362 : }
3363 :
3364 103 : return;
3365 : }
|