Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include "options.h" /* Compilation switches */
6 : #include "cnst.h" /* Common constants */
7 : #include "prot_fx.h" /* Function prototypes */
8 : #include "rom_com.h" /* Static table prototypes */
9 : #include "stl.h"
10 : #include <assert.h>
11 :
12 : /*-----------------------------------------------------------------*
13 : * Local functions
14 : *-----------------------------------------------------------------*/
15 :
16 : #define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */
17 : /* DCT related */
18 : #define KP559016994_16FX 1200479845 /* EDCT & EMDCT constants Q31*/
19 : #define KP951056516_16FX 2042378325 /* EDCT & EMDCT constants Q31*/
20 : #define KP587785252_16FX 1262259213 /* EDCT & EMDCT constants Q31*/
21 :
22 : static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
23 : static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
24 : static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
25 : static void cftmdl_16fx( Word16 n, Word16 l, Word16 *a, const Word32 *w );
26 : static void cftfsub_16fx( Word16 n, Word16 *a, const Word32 *w );
27 : static void cft1st_16fx( Word16 n, Word16 *a, const Word32 *w );
28 : static void cftmdl_16fx( Word16 n, Word16 l, Word16 *a, const Word32 *w );
29 : static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
30 : static void bitrv2_SR_16fx( Word16 n, const Word16 *ip, Word16 *a );
31 : static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
32 : static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx );
33 : static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w );
34 :
35 : #include "math_32.h"
36 :
37 : /*-----------------------------------------------------------------*
38 : * Local functions
39 : *-----------------------------------------------------------------*/
40 : static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w );
41 : static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a );
42 : static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w );
43 : static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w );
44 : static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w );
45 :
46 :
47 267 : void DoRTFTn_fx(
48 : Word32 *x, /* i/o : real part of input and output data Q(x) */
49 : Word32 *y, /* i/o : imaginary part of input and output data Q(x) */
50 : const Word16 n /* i : size of the FFT up to 1024 */
51 : )
52 : {
53 :
54 : Word16 i;
55 : Word32 z[2048], *pt;
56 :
57 267 : pt = z;
58 136523 : FOR( i = 0; i < n; i++ )
59 : {
60 136256 : *pt++ = x[i];
61 136256 : move16();
62 136256 : *pt++ = y[i];
63 136256 : move16();
64 : }
65 :
66 267 : IF( EQ_16( n, 16 ) )
67 : {
68 0 : cdftForw_fx( 2 * n, z, Ip_fft16, w_fft512_fx_evs );
69 : }
70 267 : ELSE IF( EQ_16( n, 32 ) )
71 : {
72 0 : cdftForw_fx( 2 * n, z, Ip_fft32, w_fft512_fx_evs );
73 : }
74 267 : ELSE IF( EQ_16( n, 64 ) )
75 : {
76 1 : cdftForw_fx( 2 * n, z, Ip_fft64, w_fft512_fx_evs );
77 : }
78 266 : ELSE IF( EQ_16( n, 128 ) )
79 : {
80 0 : cdftForw_fx( 2 * n, z, Ip_fft128, w_fft512_fx_evs );
81 : }
82 266 : ELSE IF( EQ_16( n, 256 ) )
83 : {
84 0 : cdftForw_fx( 2 * n, z, Ip_fft256, w_fft512_fx_evs );
85 : }
86 266 : ELSE IF( EQ_16( n, 512 ) )
87 : {
88 266 : cdftForw_fx( 2 * n, z, Ip_fft512, w_fft512_fx_evs );
89 : }
90 : ELSE
91 : {
92 0 : assert( 0 );
93 : }
94 :
95 267 : x[0] = z[0];
96 267 : move16();
97 267 : y[0] = z[1];
98 267 : move16();
99 267 : pt = &z[2];
100 136256 : FOR( i = n - 1; i >= 1; i-- )
101 : {
102 135989 : x[i] = *pt++;
103 135989 : move16();
104 135989 : y[i] = *pt++;
105 135989 : move16();
106 : }
107 :
108 267 : return;
109 : }
110 :
111 : /*-----------------------------------------------------------------*
112 : * cdftForw_fx()
113 : * Main fuction of Complex Discrete Fourier Transform
114 : *-----------------------------------------------------------------*/
115 267 : static void cdftForw_fx(
116 : Word16 n, /* i : data length of real and imag */
117 : Word32 *a, /* i/o : input/output data Q(q)*/
118 : const Word16 *ip, /* i : work area for bit reversal */
119 : const Word16 *w /* i : cos/sin table Q14*/
120 : )
121 : {
122 : /* bit reversal */
123 267 : bitrv2_SR_fx( n, ip + 2, a );
124 :
125 : /* Do FFT */
126 267 : cftfsub_fx( n, a, w );
127 267 : }
128 :
129 : /*-----------------------------------------------------------------*
130 : * bitrv2_SR_fx()
131 : * Bit reversal
132 : *-----------------------------------------------------------------*/
133 27132 : static void bitrv2_SR_fx(
134 : Word16 n, /* i : data length of real and imag */
135 : const Word16 *ip, /* i/o : work area for bit reversal */
136 : Word32 *a /* i/o : input/output data Q(q)*/
137 : )
138 : {
139 : Word16 j, j1, k, k1, m, m2;
140 : Word16 l;
141 : Word32 xr, xi, yr, yi;
142 :
143 27132 : l = n;
144 27132 : move16();
145 27132 : m = 1;
146 27132 : move16();
147 :
148 81928 : WHILE( ( ( m << 3 ) < l ) )
149 : {
150 54796 : l = shr( l, 1 );
151 54796 : m = shl( m, 1 );
152 : }
153 :
154 27132 : m2 = shl( m, 1 );
155 27132 : IF( EQ_16( shl( m, 3 ), l ) )
156 : {
157 5 : FOR( k = 0; k < m; k++ )
158 : {
159 10 : FOR( j = 0; j < k; j++ )
160 : {
161 6 : j1 = add( shl( j, 1 ), ip[k] );
162 6 : k1 = add( shl( k, 1 ), ip[j] );
163 6 : xr = a[j1];
164 6 : move32();
165 6 : xi = a[j1 + 1];
166 6 : move32();
167 6 : yr = a[k1];
168 6 : move32();
169 6 : yi = a[k1 + 1];
170 6 : move32();
171 6 : a[j1] = yr;
172 6 : move32();
173 6 : a[j1 + 1] = yi;
174 6 : move32();
175 6 : a[k1] = xr;
176 6 : move32();
177 6 : a[k1 + 1] = xi;
178 6 : move32();
179 6 : j1 = add( j1, m2 );
180 6 : k1 = add( k1, shl( m2, 1 ) );
181 6 : xr = a[j1];
182 6 : move32();
183 6 : xi = a[j1 + 1];
184 6 : move32();
185 6 : yr = a[k1];
186 6 : move32();
187 6 : yi = a[k1 + 1];
188 6 : move32();
189 6 : a[j1] = yr;
190 6 : move32();
191 6 : a[j1 + 1] = yi;
192 6 : move32();
193 6 : a[k1] = xr;
194 6 : move32();
195 6 : a[k1 + 1] = xi;
196 6 : move32();
197 6 : j1 = add( j1, m2 );
198 6 : k1 = sub( k1, m2 );
199 6 : xr = a[j1];
200 6 : move32();
201 6 : xi = a[j1 + 1];
202 6 : move32();
203 6 : xi = a[j1 + 1];
204 6 : move32();
205 6 : yr = a[k1];
206 6 : move32();
207 6 : yi = a[k1 + 1];
208 6 : move32();
209 6 : a[j1] = yr;
210 6 : move32();
211 6 : a[j1 + 1] = yi;
212 6 : move32();
213 6 : a[k1] = xr;
214 6 : move32();
215 6 : a[k1 + 1] = xi;
216 6 : move32();
217 6 : j1 = add( j1, m2 );
218 6 : k1 = add( k1, shl( m2, 1 ) );
219 6 : xr = a[j1];
220 6 : move32();
221 6 : xi = a[j1 + 1];
222 6 : move32();
223 6 : yr = a[k1];
224 6 : move32();
225 6 : yi = a[k1 + 1];
226 6 : move32();
227 6 : a[j1] = yr;
228 6 : move32();
229 6 : a[j1 + 1] = yi;
230 6 : move32();
231 6 : a[k1] = xr;
232 6 : move32();
233 6 : a[k1 + 1] = xi;
234 6 : move32();
235 : }
236 :
237 4 : j1 = add( add( shl( k, 1 ), m2 ), ip[k] );
238 4 : k1 = add( j1, m2 );
239 4 : xr = a[j1];
240 4 : move32();
241 4 : xi = a[j1 + 1];
242 4 : move32();
243 4 : yr = a[k1];
244 4 : move32();
245 4 : yi = a[k1 + 1];
246 4 : move32();
247 4 : a[j1] = yr;
248 4 : move32();
249 4 : a[j1 + 1] = yi;
250 4 : move32();
251 4 : a[k1] = xr;
252 4 : move32();
253 4 : a[k1 + 1] = xi;
254 4 : move32();
255 : }
256 : }
257 : ELSE
258 : {
259 111716 : FOR( k = 1; k < m; k++ )
260 : {
261 277695 : FOR( j = 0; j < k; j++ )
262 : {
263 193110 : j1 = add( shl( j, 1 ), ip[k] );
264 193110 : k1 = add( shl( k, 1 ), ip[j] );
265 193110 : xr = a[j1];
266 193110 : move32();
267 193110 : xi = a[j1 + 1];
268 193110 : move32();
269 193110 : yr = a[k1];
270 193110 : move32();
271 193110 : yi = a[k1 + 1];
272 193110 : move32();
273 193110 : a[j1] = yr;
274 193110 : move32();
275 193110 : a[j1 + 1] = yi;
276 193110 : move32();
277 193110 : a[k1] = xr;
278 193110 : move32();
279 193110 : a[k1 + 1] = xi;
280 193110 : move32();
281 193110 : j1 = add( j1, m2 );
282 193110 : k1 = add( k1, m2 );
283 193110 : xr = a[j1];
284 193110 : move32();
285 193110 : xi = a[j1 + 1];
286 193110 : move32();
287 193110 : yr = a[k1];
288 193110 : move32();
289 193110 : yi = a[k1 + 1];
290 193110 : move32();
291 193110 : a[j1] = yr;
292 193110 : move32();
293 193110 : a[j1 + 1] = yi;
294 193110 : move32();
295 193110 : a[k1] = xr;
296 193110 : move32();
297 193110 : a[k1 + 1] = xi;
298 193110 : move32();
299 : }
300 : }
301 : }
302 :
303 27132 : return;
304 : }
305 :
306 : /*-----------------------------------------------------------------*
307 : * cftfsub_fx()
308 : * Complex Discrete Fourier Transform
309 : *-----------------------------------------------------------------*/
310 27132 : static void cftfsub_fx(
311 : Word16 n, /* i : data length of real and imag */
312 : Word32 *a, /* i/o : input/output data Q(q)*/
313 : const Word16 *w /* i : cos/sin table Q14*/
314 : )
315 : {
316 : Word16 j, j1, j2, j3, l;
317 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
318 :
319 27132 : l = 2;
320 27132 : move16();
321 :
322 27132 : IF( n > 8 )
323 : {
324 27132 : cft1st_fx( n, a, w );
325 27132 : l = 8;
326 27132 : move16();
327 54796 : WHILE( ( ( l << 2 ) < n ) )
328 : {
329 27664 : cftmdl_fx( n, l, a, w );
330 27664 : l = shl( l, 2 );
331 : }
332 : }
333 27132 : IF( shl( l, 2 ) == n )
334 : {
335 17 : FOR( j = 0; j < l; j += 2 )
336 : {
337 16 : j1 = add( j, l );
338 16 : j2 = add( j1, l );
339 16 : j3 = add( j2, l );
340 16 : x0r = L_add( a[j], a[j1] );
341 16 : x0i = L_add( a[j + 1], a[j1 + 1] );
342 16 : x1r = L_sub( a[j], a[j1] );
343 16 : x1i = L_sub( a[j + 1], a[j1 + 1] );
344 16 : x2r = L_add( a[j2], a[j3] );
345 16 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
346 16 : x3r = L_sub( a[j2], a[j3] );
347 16 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
348 16 : a[j] = L_add( x0r, x2r );
349 16 : move32();
350 16 : a[j2] = L_sub( x0r, x2r );
351 16 : move32();
352 16 : a[j + 1] = L_add( x0i, x2i );
353 16 : move32();
354 16 : a[j2 + 1] = L_sub( x0i, x2i );
355 16 : move32();
356 16 : a[j1] = L_sub( x1r, x3i );
357 16 : move32();
358 16 : a[j1 + 1] = L_add( x1i, x3r );
359 16 : move32();
360 16 : a[j3] = L_add( x1r, x3i );
361 16 : move32();
362 16 : a[j3 + 1] = L_sub( x1i, x3r );
363 16 : move32();
364 : }
365 : }
366 : ELSE
367 : {
368 525067 : FOR( j = 0; j < l; j += 2 )
369 : {
370 497936 : j1 = add( j, l );
371 497936 : x0r = L_sub( a[j], a[j1] );
372 497936 : x0i = L_sub( a[j + 1], a[j1 + 1] );
373 497936 : a[j] = L_add( a[j], a[j1] );
374 497936 : move32();
375 497936 : a[j + 1] = L_add( a[j + 1], a[j1 + 1] );
376 497936 : move32();
377 497936 : a[j1] = x0r;
378 497936 : move32();
379 497936 : move32();
380 497936 : a[j1 + 1] = x0i;
381 497936 : move32();
382 497936 : move32();
383 : }
384 : }
385 :
386 27132 : return;
387 : }
388 :
389 : /*-----------------------------------------------------------------*
390 : * cft1st_fx()
391 : * Subfunction of Complex Discrete Fourier Transform
392 : *-----------------------------------------------------------------*/
393 27132 : static void cft1st_fx(
394 : Word16 n, /* i : data length of real and imag */
395 : Word32 *a, /* i/o : input/output data Q(q)*/
396 : const Word16 *w /* i : cos/sin table Q14*/
397 : )
398 : {
399 : Word16 j, k1, k2;
400 : Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
401 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
402 :
403 27132 : x0r = L_add( a[0], a[2] );
404 27132 : x0i = L_add( a[1], a[3] );
405 27132 : x1r = L_sub( a[0], a[2] );
406 27132 : x1i = L_sub( a[1], a[3] );
407 27132 : x2r = L_add( a[4], a[6] );
408 27132 : x2i = L_add( a[5], a[7] );
409 27132 : x3r = L_sub( a[4], a[6] );
410 27132 : x3i = L_sub( a[5], a[7] );
411 27132 : a[0] = L_add( x0r, x2r );
412 27132 : move32();
413 27132 : a[1] = L_add( x0i, x2i );
414 27132 : move32();
415 27132 : a[4] = L_sub( x0r, x2r );
416 27132 : move32();
417 27132 : a[5] = L_sub( x0i, x2i );
418 27132 : move32();
419 27132 : a[2] = L_sub( x1r, x3i );
420 27132 : move32();
421 27132 : a[3] = L_add( x1i, x3r );
422 27132 : move32();
423 27132 : a[6] = L_add( x1r, x3i );
424 27132 : move32();
425 27132 : a[7] = L_sub( x1i, x3r );
426 27132 : move32();
427 :
428 27132 : wk1r = w[2];
429 27132 : move16();
430 27132 : x0r = L_add( a[8], a[10] );
431 27132 : x0i = L_add( a[9], a[11] );
432 27132 : x1r = L_sub( a[8], a[10] );
433 27132 : x1i = L_sub( a[9], a[11] );
434 27132 : x2r = L_add( a[12], a[14] );
435 27132 : x2i = L_add( a[13], a[15] );
436 27132 : x3r = L_sub( a[12], a[14] );
437 27132 : x3i = L_sub( a[13], a[15] );
438 27132 : a[8] = L_add( x0r, x2r );
439 27132 : move32();
440 27132 : a[9] = L_add( x0i, x2i );
441 27132 : move32();
442 27132 : a[12] = L_sub( x2i, x0i );
443 27132 : move32();
444 27132 : a[13] = L_sub( x0r, x2r );
445 27132 : move32();
446 :
447 27132 : x0r = L_sub( x1r, x3i );
448 27132 : x0i = L_add( x1i, x3r );
449 27132 : a[10] = Mult_32_16( L_shl( L_sub( x0r, x0i ), 1 ), wk1r );
450 27132 : move32();
451 27132 : a[11] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
452 27132 : move32();
453 27132 : x0r = L_add( x3i, x1r );
454 27132 : x0i = L_sub( x3r, x1i );
455 27132 : a[14] = Mult_32_16( L_shl( L_sub( x0i, x0r ), 1 ), wk1r );
456 27132 : move32();
457 27132 : a[15] = Mult_32_16( L_shl( L_add( x0i, x0r ), 1 ), wk1r );
458 27132 : move32();
459 :
460 27132 : k1 = 0;
461 27132 : move16();
462 124492 : FOR( j = 16; j < n; j += 16 )
463 : {
464 97360 : k1 = add( k1, 2 );
465 97360 : k2 = shl( k1, 1 );
466 97360 : wk2r = w[k1];
467 97360 : move16();
468 97360 : wk2i = w[k1 + 1];
469 97360 : move16();
470 97360 : wk1r = w[k2];
471 97360 : move16();
472 97360 : wk1i = w[k2 + 1];
473 97360 : move16();
474 97360 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2i, wk1i ), 14 ) ) );
475 97360 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2i, wk1r ), 14 ), wk1i, 1 ) );
476 97360 : x0r = L_add( a[j], a[j + 2] );
477 97360 : x0i = L_add( a[j + 1], a[j + 3] );
478 97360 : x1r = L_sub( a[j], a[j + 2] );
479 97360 : x1i = L_sub( a[j + 1], a[j + 3] );
480 97360 : x2r = L_add( a[j + 4], a[j + 6] );
481 97360 : x2i = L_add( a[j + 5], a[j + 7] );
482 97360 : x3r = L_sub( a[j + 4], a[j + 6] );
483 97360 : x3i = L_sub( a[j + 5], a[j + 7] );
484 97360 : a[j] = L_add( x0r, x2r );
485 97360 : move32();
486 97360 : a[j + 1] = L_add( x0i, x2i );
487 97360 : move32();
488 97360 : x0r = L_sub( x0r, x2r );
489 97360 : x0i = L_sub( x0i, x2i );
490 97360 : a[j + 4] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
491 97360 : move32();
492 97360 : a[j + 5] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk2r ), Mult_32_16( L_shl( x0r, 1 ), wk2i ) );
493 97360 : move32();
494 97360 : x0r = L_sub( x1r, x3i );
495 97360 : x0i = L_add( x1i, x3r );
496 97360 : a[j + 2] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
497 97360 : move32();
498 97360 : a[j + 3] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
499 97360 : move32();
500 97360 : x0r = L_add( x1r, x3i );
501 97360 : x0i = L_sub( x1i, x3r );
502 97360 : a[j + 6] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
503 97360 : move32();
504 97360 : a[j + 7] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
505 97360 : move32();
506 :
507 97360 : wk1r = w[k2 + 2];
508 97360 : move16();
509 97360 : wk1i = w[k2 + 3];
510 97360 : move16();
511 97360 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2r, wk1i ), 14 ) ) );
512 97360 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2r, wk1r ), 14 ), wk1i, 1 ) );
513 97360 : x0r = L_add( a[j + 8], a[j + 10] );
514 97360 : x0i = L_add( a[j + 9], a[j + 11] );
515 97360 : x1r = L_sub( a[j + 8], a[j + 10] );
516 97360 : x1i = L_sub( a[j + 9], a[j + 11] );
517 97360 : x2r = L_add( a[j + 12], a[j + 14] );
518 97360 : x2i = L_add( a[j + 13], a[j + 15] );
519 97360 : x3r = L_sub( a[j + 12], a[j + 14] );
520 97360 : x3i = L_sub( a[j + 13], a[j + 15] );
521 97360 : a[j + 8] = L_add( x0r, x2r );
522 97360 : move32();
523 97360 : a[j + 9] = L_add( x0i, x2i );
524 97360 : move32();
525 97360 : x0r = L_sub( x0r, x2r );
526 97360 : x0i = L_sub( x0i, x2i );
527 97360 : a[j + 12] = L_negate( L_add( Mult_32_16( L_shl( x0r, 1 ), wk2i ), Mult_32_16( L_shl( x0i, 1 ), wk2r ) ) );
528 97360 : move32();
529 97360 : a[j + 13] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
530 97360 : move32();
531 97360 : x0r = L_sub( x1r, x3i );
532 97360 : x0i = L_add( x1i, x3r );
533 97360 : a[j + 10] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
534 97360 : move32();
535 97360 : a[j + 11] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
536 97360 : move32();
537 97360 : x0r = L_add( x1r, x3i );
538 97360 : x0i = L_sub( x1i, x3r );
539 97360 : a[j + 14] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
540 97360 : move32();
541 97360 : a[j + 15] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
542 97360 : move32();
543 : }
544 :
545 27132 : return;
546 : }
547 :
548 : /*-----------------------------------------------------------------*
549 : * cftmdl_fx()
550 : * Subfunction of Complex Discrete Fourier Transform
551 : *-----------------------------------------------------------------*/
552 27664 : static void cftmdl_fx(
553 : Word16 n, /* i : data length of real and imag */
554 : Word16 l, /* i : initial shift for processing */
555 : Word32 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
556 : const Word16 *w /* i : cos/sin table Q30*/
557 : )
558 : {
559 : Word16 j, j1, j2, j3, k, k1, k2, m, m2;
560 : Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
561 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
562 : Word16 tmp;
563 :
564 27664 : m = shl( l, 2 );
565 157472 : FOR( j = 0; j < l; j += 2 )
566 : {
567 129808 : j1 = add( j, l );
568 129808 : j2 = add( j1, l );
569 129808 : j3 = add( j2, l );
570 129808 : x0r = L_add( a[j], a[j1] );
571 129808 : x0i = L_add( a[j + 1], a[j1 + 1] );
572 129808 : x1r = L_sub( a[j], a[j1] );
573 129808 : x1i = L_sub( a[j + 1], a[j1 + 1] );
574 129808 : x2r = L_add( a[j2], a[j3] );
575 129808 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
576 129808 : x3r = L_sub( a[j2], a[j3] );
577 129808 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
578 129808 : a[j] = L_add( x0r, x2r );
579 129808 : move32();
580 129808 : a[j + 1] = L_add( x0i, x2i );
581 129808 : move32();
582 129808 : a[j2] = L_sub( x0r, x2r );
583 129808 : move32();
584 129808 : a[j2 + 1] = L_sub( x0i, x2i );
585 129808 : move32();
586 129808 : a[j1] = L_sub( x1r, x3i );
587 129808 : move32();
588 129808 : a[j1 + 1] = L_add( x1i, x3r );
589 129808 : move32();
590 129808 : a[j3] = L_add( x1r, x3i );
591 129808 : move32();
592 129808 : a[j3 + 1] = L_sub( x1i, x3r );
593 129808 : move32();
594 : }
595 :
596 27664 : wk1r = w[2];
597 27664 : move16();
598 27664 : tmp = add( l, m );
599 157472 : FOR( j = m; j < tmp; j += 2 )
600 : {
601 129808 : j1 = add( j, l );
602 129808 : j2 = add( j1, l );
603 129808 : j3 = add( j2, l );
604 129808 : x0r = L_add( a[j], a[j1] );
605 129808 : x0i = L_add( a[j + 1], a[j1 + 1] );
606 129808 : x1r = L_sub( a[j], a[j1] );
607 129808 : x1i = L_sub( a[j + 1], a[j1 + 1] );
608 129808 : x2r = L_add( a[j2], a[j3] );
609 129808 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
610 129808 : x3r = L_sub( a[j2], a[j3] );
611 129808 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
612 129808 : a[j] = L_add( x0r, x2r );
613 129808 : move32();
614 129808 : a[j + 1] = L_add( x0i, x2i );
615 129808 : move32();
616 129808 : a[j2] = L_sub( x2i, x0i );
617 129808 : move32();
618 129808 : a[j2 + 1] = L_sub( x0r, x2r );
619 129808 : move32();
620 129808 : x0r = L_sub( x1r, x3i );
621 129808 : x0i = L_add( x1i, x3r );
622 129808 : a[j1] = Mult_32_16( L_shl( L_sub( x0r, x0i ), 1 ), wk1r );
623 129808 : move32();
624 129808 : a[j1 + 1] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
625 129808 : move32();
626 129808 : x0r = L_add( x3i, x1r );
627 129808 : x0i = L_sub( x3r, x1i );
628 129808 : a[j3] = Mult_32_16( L_shl( L_sub( x0i, x0r ), 1 ), wk1r );
629 129808 : move32();
630 129808 : a[j3 + 1] = Mult_32_16( L_shl( L_add( x0r, x0i ), 1 ), wk1r );
631 129808 : move32();
632 : }
633 :
634 27664 : k1 = 0;
635 27664 : move16();
636 27664 : m2 = shl( m, 1 );
637 32453 : FOR( k = m2; k < n; k += m2 )
638 : {
639 4789 : k1 = add( k1, 2 );
640 4789 : k2 = shl( k1, 1 );
641 4789 : wk2r = w[k1];
642 4789 : move16();
643 4789 : wk2i = w[k1 + 1];
644 4789 : move16();
645 4789 : wk1r = w[k2];
646 4789 : move16();
647 4789 : wk1i = w[k2 + 1];
648 4789 : move16();
649 4789 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2i, wk1i ), 14 ) ) );
650 4789 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2i, wk1r ), 14 ), wk1i, 1 ) );
651 :
652 4789 : tmp = add( l, k );
653 33521 : FOR( j = k; j < tmp; j += 2 )
654 : {
655 28732 : j1 = add( j, l );
656 28732 : j2 = add( j1, l );
657 28732 : j3 = add( j2, l );
658 28732 : x0r = L_add( a[j], a[j1] );
659 28732 : x0i = L_add( a[j + 1], a[j1 + 1] );
660 28732 : x1r = L_sub( a[j], a[j1] );
661 28732 : x1i = L_sub( a[j + 1], a[j1 + 1] );
662 28732 : x2r = L_add( a[j2], a[j3] );
663 28732 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
664 28732 : x3r = L_sub( a[j2], a[j3] );
665 28732 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
666 28732 : a[j] = L_add( x0r, x2r );
667 28732 : move32();
668 28732 : a[j + 1] = L_add( x0i, x2i );
669 28732 : move32();
670 28732 : x0r = L_sub( x0r, x2r );
671 28732 : x0i = L_sub( x0i, x2i );
672 28732 : a[j2] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
673 28732 : move32();
674 28732 : a[j2 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk2r ), Mult_32_16( L_shl( x0r, 1 ), wk2i ) );
675 28732 : move32();
676 28732 : x0r = L_sub( x1r, x3i );
677 28732 : x0i = L_add( x1i, x3r );
678 28732 : a[j1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
679 28732 : move32();
680 28732 : a[j1 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
681 28732 : move32();
682 28732 : x0r = L_add( x1r, x3i );
683 28732 : x0i = L_sub( x1i, x3r );
684 28732 : a[j3] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
685 28732 : move32();
686 28732 : a[j3 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
687 28732 : move32();
688 : }
689 :
690 4789 : wk1r = w[k2 + 2];
691 4789 : move16();
692 4789 : wk1i = w[k2 + 3];
693 4789 : move16();
694 4789 : wk3r = extract_l( L_sub( L_deposit_l( wk1r ), L_shr( L_mult( wk2r, wk1i ), 14 ) ) );
695 4789 : wk3i = extract_l( L_msu0( L_shr( L_mult( wk2r, wk1r ), 14 ), wk1i, 1 ) );
696 :
697 4789 : tmp = add( l, add( k, m ) );
698 33521 : FOR( j = k + m; j < tmp; j += 2 )
699 : {
700 28732 : j1 = add( j, l );
701 28732 : j2 = add( j1, l );
702 28732 : j3 = add( j2, l );
703 28732 : x0r = L_add( a[j], a[j1] );
704 28732 : x0i = L_add( a[j + 1], a[j1 + 1] );
705 28732 : x1r = L_sub( a[j], a[j1] );
706 28732 : x1i = L_sub( a[j + 1], a[j1 + 1] );
707 28732 : x2r = L_add( a[j2], a[j3] );
708 28732 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
709 28732 : x3r = L_sub( a[j2], a[j3] );
710 28732 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
711 28732 : a[j] = L_add( x0r, x2r );
712 28732 : move32();
713 28732 : a[j + 1] = L_add( x0i, x2i );
714 28732 : move32();
715 28732 : x0r = L_sub( x0r, x2r );
716 28732 : x0i = L_sub( x0i, x2i );
717 28732 : a[j2] = L_negate( L_add( Mult_32_16( L_shl( x0r, 1 ), wk2i ), Mult_32_16( L_shl( x0i, 1 ), wk2r ) ) );
718 28732 : move32();
719 28732 : a[j2 + 1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk2r ), Mult_32_16( L_shl( x0i, 1 ), wk2i ) );
720 28732 : move32();
721 28732 : x0r = L_sub( x1r, x3i );
722 28732 : x0i = L_add( x1i, x3r );
723 28732 : a[j1] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk1r ), Mult_32_16( L_shl( x0i, 1 ), wk1i ) );
724 28732 : move32();
725 28732 : a[j1 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk1r ), Mult_32_16( L_shl( x0r, 1 ), wk1i ) );
726 28732 : move32();
727 28732 : x0r = L_add( x1r, x3i );
728 28732 : x0i = L_sub( x1i, x3r );
729 28732 : a[j3] = L_sub( Mult_32_16( L_shl( x0r, 1 ), wk3r ), Mult_32_16( L_shl( x0i, 1 ), wk3i ) );
730 28732 : move32();
731 28732 : a[j3 + 1] = L_add( Mult_32_16( L_shl( x0i, 1 ), wk3r ), Mult_32_16( L_shl( x0r, 1 ), wk3i ) );
732 28732 : move32();
733 : }
734 : }
735 :
736 27664 : return;
737 : }
738 :
739 :
740 0 : static void cftbsub_fx(
741 : Word16 n,
742 : Word32 *a, // Q(Qx+Q_edct)
743 : const Word16 *w /* i : cos/sin table Q14 */
744 : )
745 : {
746 : Word16 j, j1, j2, j3, l;
747 : Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
748 :
749 0 : l = 2;
750 0 : move16();
751 0 : IF( GT_16( n, 8 ) )
752 : {
753 0 : cft1st_fx( n, a, w );
754 0 : l = 8;
755 0 : move16();
756 :
757 0 : WHILE( ( ( l << 2 ) < n ) )
758 : {
759 0 : cftmdl_fx( n, l, a, w );
760 0 : l = shl( l, 2 );
761 : }
762 : }
763 :
764 0 : IF( EQ_16( shl( l, 2 ), n ) )
765 : {
766 0 : FOR( j = 0; j < l; j += 2 )
767 : {
768 0 : j1 = add( j, l );
769 0 : j2 = add( j1, l );
770 0 : j3 = add( j2, l );
771 0 : x0r = L_add( a[j], a[j1] );
772 0 : x0i = L_negate( L_add( a[j + 1], a[j1 + 1] ) );
773 0 : x1r = L_sub( a[j], a[j1] );
774 0 : x1i = L_sub( a[j1 + 1], a[j + 1] );
775 0 : x2r = L_add( a[j2], a[j3] );
776 0 : x2i = L_add( a[j2 + 1], a[j3 + 1] );
777 0 : x3r = L_sub( a[j2], a[j3] );
778 0 : x3i = L_sub( a[j2 + 1], a[j3 + 1] );
779 0 : a[j] = L_add( x0r, x2r );
780 0 : move32();
781 0 : a[j + 1] = L_sub( x0i, x2i );
782 0 : move32();
783 0 : a[j2] = L_sub( x0r, x2r );
784 0 : move32();
785 0 : a[j2 + 1] = L_add( x0i, x2i );
786 0 : move32();
787 0 : a[j1] = L_sub( x1r, x3i );
788 0 : move32();
789 0 : a[j1 + 1] = L_sub( x1i, x3r );
790 0 : move32();
791 0 : a[j3] = L_add( x1r, x3i );
792 0 : move32();
793 0 : a[j3 + 1] = L_add( x1i, x3r );
794 0 : move32();
795 : }
796 : }
797 : ELSE
798 : {
799 0 : FOR( j = 0; j < l; j += 2 )
800 : {
801 0 : j1 = add( j, l );
802 0 : x0r = L_sub( a[j], a[j1] );
803 0 : x0i = L_sub( a[j1 + 1], a[j + 1] );
804 0 : a[j] = L_add( a[j], a[j1] );
805 0 : move32();
806 0 : a[j + 1] = L_negate( L_add( a[j + 1], a[j1 + 1] ) );
807 0 : move32();
808 0 : a[j1] = x0r;
809 0 : move32();
810 0 : a[j1 + 1] = x0i;
811 0 : move32();
812 : }
813 : }
814 0 : }
815 :
816 26865 : static void rftfsub_fx(
817 : Word16 n,
818 : Word32 *a, // Qx
819 : Word16 nc,
820 : const Word16 *c /*Q14*/ )
821 : {
822 : Word16 j, k, kk, ks, m, tmp;
823 : Word32 xr, xi, yr, yi;
824 : Word16 wkr, wki;
825 :
826 26865 : m = shr( n, 1 );
827 : /*ks = 2 * nc / m; */
828 26865 : tmp = shl( nc, 1 );
829 26865 : ks = 0;
830 26865 : move16();
831 134325 : WHILE( ( tmp >= m ) )
832 : {
833 107460 : ks = add( ks, 1 );
834 107460 : tmp = sub( tmp, m );
835 : }
836 26865 : kk = 0;
837 26865 : move16();
838 429840 : FOR( j = 2; j < m; j += 2 )
839 : {
840 402975 : k = sub( n, j );
841 402975 : kk = add( kk, ks );
842 402975 : wkr = sub( 8192 /*0.5.Q14*/, c[( nc - kk )] ); // Q14
843 402975 : wki = c[kk]; // Q14
844 402975 : move16();
845 402975 : xr = L_sub( a[j], a[k] ); // Qx
846 402975 : xi = L_add( a[j + 1], a[k + 1] ); // Qx
847 402975 : yr = L_sub( Mult_32_16( L_shl( xr, 1 ), wkr ), Mult_32_16( L_shl( xi, 1 ), wki ) ); // Qx
848 402975 : yi = L_add( Mult_32_16( L_shl( xi, 1 ), wkr ), Mult_32_16( L_shl( xr, 1 ), wki ) ); // Qx
849 402975 : a[j] = L_sub( a[j], yr );
850 402975 : move32();
851 402975 : a[j + 1] = L_sub( a[j + 1], yi );
852 402975 : move32();
853 402975 : a[k] = L_add( a[k], yr );
854 402975 : move32();
855 402975 : a[k + 1] = L_sub( a[k + 1], yi );
856 402975 : move32();
857 : }
858 26865 : }
859 :
860 :
861 0 : static void rftbsub_fx(
862 : Word16 n,
863 : Word32 *a, // Qx
864 : Word16 nc,
865 : const Word16 *c /*Q14*/ )
866 : {
867 : Word16 j, k, kk, ks, m, tmp;
868 : Word32 xr, xi, yr, yi;
869 : Word16 wkr, wki;
870 :
871 0 : a[1] = L_negate( a[1] );
872 0 : m = shr( n, 1 );
873 : /*ks = 2 * nc / m; */
874 0 : tmp = shl( nc, 1 );
875 0 : ks = 0;
876 0 : move16();
877 0 : WHILE( ( tmp >= m ) )
878 : {
879 0 : ks = add( ks, 1 );
880 0 : tmp = sub( tmp, m );
881 : }
882 0 : kk = 0;
883 0 : move16();
884 0 : FOR( j = 2; j < m; j += 2 )
885 : {
886 0 : k = sub( n, j );
887 0 : kk = add( kk, ks );
888 0 : wkr = sub( 8192 /*0.5.Q14*/, c[( nc - kk )] ); // Q14
889 0 : wki = c[kk]; // Q14
890 0 : move16();
891 0 : xr = L_sub( a[j], a[k] ); // Qx
892 0 : xi = L_add( a[j + 1], a[k + 1] ); // Qx
893 0 : yr = L_add( Mult_32_16( L_shl( xr, 1 ), wkr ), Mult_32_16( L_shl( xi, 1 ), wki ) ); // Qx
894 0 : yi = L_sub( Mult_32_16( L_shl( xi, 1 ), wkr ), Mult_32_16( L_shl( xr, 1 ), wki ) ); // Qx
895 0 : a[j] = L_sub( a[j], yr );
896 0 : move32();
897 0 : a[j + 1] = L_sub( yi, a[j + 1] );
898 0 : move32();
899 0 : a[k] = L_add( a[k], yr );
900 0 : move32();
901 0 : a[k + 1] = L_sub( yi, a[k + 1] );
902 0 : move32();
903 : }
904 0 : a[m + 1] = L_negate( a[m + 1] );
905 0 : move32();
906 0 : }
907 :
908 :
909 26865 : static void dctsub_fx(
910 : Word16 n,
911 : Word32 *a, // Qx
912 : Word16 nc,
913 : const Word16 *c /*Q14*/ )
914 : {
915 : Word16 j, k, kk, ks, m, tmp;
916 : Word16 wkr, wki;
917 : Word32 xr;
918 :
919 26865 : m = shr( n, 1 );
920 : /*ks = nc / n; */
921 26865 : tmp = nc;
922 26865 : move16();
923 26865 : ks = 0;
924 26865 : move16();
925 53730 : WHILE( ( tmp >= n ) )
926 : {
927 26865 : ks = add( ks, 1 );
928 26865 : tmp = sub( tmp, n );
929 : }
930 26865 : kk = 0;
931 26865 : move16();
932 859680 : FOR( j = 1; j < m; j++ )
933 : {
934 832815 : k = sub( n, j );
935 832815 : kk = add( kk, ks );
936 832815 : wkr = sub( c[kk], c[( nc - kk )] ); // Q14
937 832815 : wki = add( c[kk], c[( nc - kk )] ); // Q14
938 832815 : xr = L_sub( Mult_32_16( L_shl( a[j], 1 ), wki ), Mult_32_16( L_shl( a[k], 1 ), wkr ) ); // Qx
939 832815 : a[j] = L_add( Mult_32_16( L_shl( a[j], 1 ), wkr ), Mult_32_16( L_shl( a[k], 1 ), wki ) ); // Qx
940 832815 : move32();
941 832815 : a[k] = xr;
942 832815 : move32();
943 : }
944 26865 : a[m] = Mult_32_16( L_shl( a[m], 1 ), c[0] ); // Qx
945 26865 : move16();
946 26865 : }
947 :
948 : /*-----------------------------------------------------------------*
949 : * edct2_fx()
950 : *
951 : * Transformation of the signal to DCT domain
952 : * OR Inverse EDCT-II for short frames
953 : *-----------------------------------------------------------------*/
954 :
955 26865 : void edct2_fx(
956 : Word16 n,
957 : Word16 isgn,
958 : Word16 *in, // Q(q)
959 : Word32 *a, // Qx
960 : Word16 *q,
961 : const Word16 *ip,
962 : const Word16 *w /*Q14*/ )
963 : {
964 : Word16 j, nw, nc;
965 : Word32 xr;
966 :
967 26865 : *q = Exp16Array( n, in );
968 26865 : move16();
969 26865 : *q = add( *q, 6 );
970 26865 : move16();
971 1746225 : FOR( j = 0; j < n; j++ )
972 : {
973 1719360 : a[j] = L_shl( (Word32) in[j], *q );
974 1719360 : move32();
975 : }
976 :
977 26865 : nw = ip[0];
978 26865 : move16();
979 26865 : if ( GT_16( n, shl( nw, 2 ) ) )
980 : {
981 0 : nw = shr( n, 2 );
982 : }
983 :
984 26865 : nc = ip[1];
985 26865 : move16();
986 26865 : if ( GT_16( n, nc ) )
987 : {
988 0 : nc = n;
989 0 : move16();
990 : }
991 :
992 26865 : IF( isgn < 0 )
993 : {
994 0 : xr = a[n - 1];
995 0 : move32();
996 0 : FOR( j = n - 2; j >= 2; j -= 2 )
997 : {
998 0 : a[j + 1] = L_sub( a[j], a[j - 1] );
999 0 : move32();
1000 0 : a[j] = L_add( a[j], a[j - 1] );
1001 0 : move32();
1002 : }
1003 0 : a[1] = L_sub( a[0], xr );
1004 0 : move32();
1005 0 : a[0] = L_add( a[0], xr );
1006 0 : move32();
1007 :
1008 0 : IF( GT_16( n, 4 ) )
1009 : {
1010 0 : rftbsub_fx( n, a, nc, w + nw );
1011 0 : bitrv2_SR_fx( n, ip + 2, a );
1012 0 : cftbsub_fx( n, a, w );
1013 : }
1014 0 : ELSE IF( EQ_16( n, 4 ) )
1015 : {
1016 0 : cftfsub_fx( n, a, w );
1017 : }
1018 : }
1019 :
1020 26865 : IF( isgn >= 0 )
1021 : {
1022 26865 : a[0] = L_shr( a[0], 1 );
1023 26865 : move32();
1024 : }
1025 :
1026 26865 : dctsub_fx( n, a, nc, w + nw );
1027 :
1028 26865 : IF( isgn >= 0 )
1029 : {
1030 26865 : IF( GT_16( n, 4 ) )
1031 : {
1032 26865 : bitrv2_SR_fx( n, ip + 2, a );
1033 26865 : cftfsub_fx( n, a, w );
1034 26865 : rftfsub_fx( n, a, nc, w + nw );
1035 : }
1036 0 : ELSE IF( EQ_16( n, 4 ) )
1037 : {
1038 0 : cftfsub_fx( n, a, w );
1039 : }
1040 26865 : xr = L_sub( a[0], a[1] );
1041 26865 : a[0] = L_add( a[0], a[1] );
1042 26865 : move32();
1043 859680 : FOR( j = 2; j < n; j += 2 )
1044 : {
1045 832815 : a[j - 1] = L_sub( a[j], a[j + 1] );
1046 832815 : move32();
1047 832815 : a[j] = L_add( a[j], a[j + 1] );
1048 832815 : move32();
1049 : }
1050 26865 : a[n - 1] = xr;
1051 26865 : move32();
1052 :
1053 1746225 : FOR( j = 0; j < n; j++ )
1054 : {
1055 1719360 : a[j] = L_shr( a[j], 5 ); // a[j] / 32.0f
1056 1719360 : move32();
1057 : }
1058 : }
1059 26865 : }
1060 :
1061 :
1062 : /*-----------------------------------------------------------------*
1063 : * fft5_shift4()
1064 : * 5-point FFT with 4-point circular shift
1065 : *-----------------------------------------------------------------*/
1066 :
1067 1193856 : static void fft5_shift4_16fx(
1068 : Word16 n1, /* i : length of data */
1069 : Word16 *zRe, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1070 : Word16 *zIm, /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1071 : const Word16 *Idx /* i : pointer of the address table Q0 */
1072 : )
1073 : {
1074 : Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn, T2, T3, T4, T5, T6, T7;
1075 : Word16 i0, i1, i2, i3, i4;
1076 : Word32 L_tmp;
1077 :
1078 :
1079 1193856 : i0 = Idx[0];
1080 1193856 : move16();
1081 1193856 : i1 = Idx[n1];
1082 1193856 : move16();
1083 1193856 : i2 = Idx[n1 * 2];
1084 1193856 : move16();
1085 1193856 : i3 = Idx[n1 * 3];
1086 1193856 : move16();
1087 1193856 : i4 = Idx[n1 * 4];
1088 1193856 : move16();
1089 :
1090 1193856 : T1 = zRe[i0]; // Qx
1091 1193856 : move16();
1092 1193856 : To = zIm[i0]; // Qx
1093 1193856 : move16();
1094 :
1095 1193856 : T2 = zRe[i1];
1096 1193856 : move16();
1097 1193856 : T3 = zRe[i4];
1098 1193856 : move16();
1099 1193856 : T4 = add_sat( T2, T3 );
1100 1193856 : T5 = zRe[i2];
1101 1193856 : move16();
1102 1193856 : T6 = zRe[i3];
1103 1193856 : move16();
1104 1193856 : T7 = add_sat( T5, T6 );
1105 1193856 : T8 = add_sat( T4, T7 );
1106 1193856 : Tt = sub_sat( T5, T6 );
1107 : /* T9 = KP559016994 * (T4 - T7); */
1108 1193856 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1109 1193856 : T9 = round_fx_sat( L_tmp ); // Qx
1110 1193856 : Ts = sub_sat( T2, T3 );
1111 :
1112 1193856 : T2 = zIm[i1];
1113 1193856 : move16();
1114 1193856 : T3 = zIm[i4];
1115 1193856 : move16();
1116 1193856 : T4 = add( T2, T3 );
1117 1193856 : T5 = zIm[i2];
1118 1193856 : move16();
1119 1193856 : T6 = zIm[i3];
1120 1193856 : move16();
1121 1193856 : T7 = add_sat( T5, T6 );
1122 1193856 : Te = sub_sat( T2, T3 );
1123 1193856 : Tp = add_sat( T4, T7 );
1124 1193856 : Th = sub_sat( T5, T6 );
1125 :
1126 : /* Tn = KP559016994 * (T4 - T7); */
1127 1193856 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1128 1193856 : Tn = round_fx_sat( L_tmp ); // Qx
1129 1193856 : zRe[i0] = add_sat( T1, T8 );
1130 1193856 : move16();
1131 1193856 : zIm[i0] = add_sat( To, Tp );
1132 1193856 : move16();
1133 :
1134 : /* T2 = KP951056516*Te + KP587785252*Th; */
1135 1193856 : L_tmp = Mult_32_16( KP951056516_16FX, Te ); // Q(16 +x)
1136 1193856 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Th ); // Q(16 +x)
1137 1193856 : T2 = round_fx_sat( L_tmp ); // Qx
1138 : /*T3 = KP951056516*Th - KP587785252*Te; */
1139 1193856 : L_tmp = Mult_32_16( KP951056516_16FX, Th ); // Q(16 +x)
1140 1193856 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Te ); // Q(16 +x)
1141 1193856 : T3 = round_fx_sat( L_tmp ); // Qx
1142 1193856 : T6 = sub_sat( T1, shr_sat( T8, 2 ) );
1143 1193856 : T4 = add_sat( T9, T6 );
1144 1193856 : T5 = sub_sat( T6, T9 );
1145 1193856 : zRe[i1] = sub_sat( T4, T2 );
1146 1193856 : move16();
1147 1193856 : zRe[i2] = add_sat( T5, T3 );
1148 1193856 : move16();
1149 1193856 : zRe[i4] = add_sat( T4, T2 );
1150 1193856 : move16();
1151 1193856 : zRe[i3] = sub_sat( T5, T3 );
1152 1193856 : move16();
1153 :
1154 : /* T2 = KP951056516 * Ts + KP587785252 * Tt; */
1155 1193856 : L_tmp = Mult_32_16( KP951056516_16FX, Ts ); // Q(16 +x)
1156 1193856 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Tt ); // Q(16 +x)
1157 1193856 : T2 = round_fx_sat( L_tmp ); // Qx
1158 : /* T3 = KP951056516 * Tt - KP587785252 * Ts; */
1159 1193856 : L_tmp = Mult_32_16( KP951056516_16FX, Tt ); // Q(16 +x)
1160 1193856 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Ts ); // Q(16 +x)
1161 1193856 : T3 = round_fx_sat( L_tmp ); // Qx
1162 1193856 : T6 = sub_sat( To, shr( Tp, 2 ) ); // To - (Tp / 4)
1163 1193856 : T4 = add_sat( Tn, T6 );
1164 1193856 : T5 = sub_sat( T6, Tn );
1165 1193856 : zIm[i4] = sub_sat( T4, T2 );
1166 1193856 : move16();
1167 1193856 : zIm[i2] = sub_sat( T5, T3 );
1168 1193856 : move16();
1169 1193856 : zIm[i1] = add_sat( T2, T4 );
1170 1193856 : move16();
1171 1193856 : zIm[i3] = add_sat( T3, T5 );
1172 1193856 : move16();
1173 1193856 : return;
1174 : }
1175 :
1176 : /*-----------------------------------------------------------------*
1177 : * fft5_32()
1178 : * 5-point FFT called for 32 times
1179 : *-----------------------------------------------------------------*/
1180 2486656 : static void fft5_32_16fx(
1181 : Word16 *zRe, /* i/o : real part of input and output data Qx */
1182 : Word16 *zIm, /* i/o : imaginary part of input and output data Qx */
1183 : const Word16 *Idx /* i : pointer of the address table Q0 */
1184 : )
1185 : {
1186 : Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn, T2, T3, T4, T5, T6, T7;
1187 : Word16 i0, i1, i2, i3, i4;
1188 : Word32 L_tmp;
1189 : #ifndef ISSUE_1836_replace_overflow_libcom
1190 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1191 : Flag Overflow = 0;
1192 : move32();
1193 : #endif
1194 : #endif
1195 2486656 : i0 = Idx[0];
1196 2486656 : move16();
1197 2486656 : i1 = Idx[32];
1198 2486656 : move16();
1199 2486656 : i2 = Idx[64];
1200 2486656 : move16();
1201 2486656 : i3 = Idx[96];
1202 2486656 : move16();
1203 2486656 : i4 = Idx[128];
1204 2486656 : move16();
1205 :
1206 2486656 : T1 = zRe[i0]; // Qx
1207 2486656 : move16();
1208 2486656 : To = zIm[i0]; // Qx
1209 2486656 : move16();
1210 :
1211 2486656 : T2 = zRe[i1]; // Qx
1212 2486656 : move16();
1213 2486656 : T3 = zRe[i4]; // Qx
1214 2486656 : move16();
1215 :
1216 2486656 : T4 = add_sat( T2, T3 );
1217 2486656 : T5 = zRe[i2];
1218 2486656 : move16();
1219 2486656 : T6 = zRe[i3];
1220 2486656 : move16();
1221 : #ifdef ISSUE_1836_replace_overflow_libcom
1222 2486656 : T7 = add_sat( T5, T6 );
1223 2486656 : T8 = add_sat( T4, T7 );
1224 2486656 : Tt = sub_sat( T5, T6 );
1225 : #else
1226 : T7 = add_o( T5, T6, &Overflow );
1227 : T8 = add_o( T4, T7, &Overflow );
1228 : Tt = sub_o( T5, T6, &Overflow );
1229 : #endif
1230 : /* T9 = KP559016994 * (T4 - T7); */
1231 2486656 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1232 2486656 : T9 = round_fx_sat( L_tmp ); // Qx
1233 2486656 : Ts = sub_sat( T2, T3 );
1234 :
1235 2486656 : T2 = zIm[i1];
1236 2486656 : move16();
1237 2486656 : T3 = zIm[i4];
1238 2486656 : move16();
1239 2486656 : T4 = add_sat( T2, T3 );
1240 2486656 : T5 = zIm[i2];
1241 2486656 : move16();
1242 2486656 : T6 = zIm[i3];
1243 2486656 : move16();
1244 2486656 : T7 = add_sat( T5, T6 );
1245 2486656 : Te = sub_sat( T2, T3 );
1246 2486656 : Tp = add_sat( T4, T7 );
1247 2486656 : Th = sub_sat( T5, T6 );
1248 2486656 : L_tmp = Mult_32_16( KP559016994_16FX, sub_sat( T4, T7 ) ); // Q(16 +x)
1249 2486656 : Tn = round_fx_sat( L_tmp ); // Qx
1250 :
1251 : #ifdef ISSUE_1836_replace_overflow_libcom
1252 2486656 : zRe[i0] = add_sat( T1, T8 );
1253 2486656 : move16();
1254 2486656 : zIm[i0] = add_sat( To, Tp );
1255 2486656 : move32();
1256 : #else
1257 : zRe[i0] = add_o( T1, T8, &Overflow );
1258 : move16();
1259 : zIm[i0] = add_o( To, Tp, &Overflow );
1260 : move32();
1261 : #endif
1262 :
1263 : /*T2 = KP951056516*Te + KP587785252*Th; */
1264 2486656 : L_tmp = Mult_32_16( KP951056516_16FX, Te ); // Q(16 +x)
1265 2486656 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Th ); // Q(16 +x)
1266 2486656 : T2 = round_fx_sat( L_tmp ); // Qx
1267 :
1268 : /*T3 = KP951056516*Th - KP587785252*Te; */
1269 2486656 : L_tmp = Mult_32_16( KP951056516_16FX, Th ); // Q(16 +x)
1270 2486656 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Te ); // Q(16 +x)
1271 2486656 : T3 = round_fx_sat( L_tmp ); // Qx
1272 :
1273 :
1274 2486656 : T6 = sub_sat( T1, shr( T8, 2 ) );
1275 2486656 : T4 = add_sat( T9, T6 );
1276 2486656 : T5 = sub_sat( T6, T9 );
1277 :
1278 : #ifdef ISSUE_1836_replace_overflow_libcom
1279 2486656 : zRe[i3] = sub_sat( T4, T2 );
1280 2486656 : move32();
1281 2486656 : zRe[i1] = add_sat( T5, T3 );
1282 2486656 : move32();
1283 2486656 : zRe[i2] = add_sat( T4, T2 );
1284 2486656 : move32();
1285 2486656 : zRe[i4] = sub_sat( T5, T3 );
1286 2486656 : move32();
1287 : #else
1288 : zRe[i3] = sub_o( T4, T2, &Overflow );
1289 : move32();
1290 : zRe[i1] = add_o( T5, T3, &Overflow );
1291 : move32();
1292 : zRe[i2] = add_o( T4, T2, &Overflow );
1293 : move32();
1294 : zRe[i4] = sub_o( T5, T3, &Overflow );
1295 : move32();
1296 : #endif
1297 :
1298 : /* T2 = KP951056516 * Ts + KP587785252 * Tt; */
1299 2486656 : L_tmp = Mult_32_16( KP951056516_16FX, Ts ); // Q(16 +x)
1300 2486656 : L_tmp = Madd_32_16( L_tmp, KP587785252_16FX, Tt ); // Q(16 +x)
1301 2486656 : T2 = round_fx_sat( L_tmp ); // Qx
1302 :
1303 : /* T3 = KP951056516 * Tt - KP587785252 * Ts; */
1304 2486656 : L_tmp = Mult_32_16( KP951056516_16FX, Tt ); // Q(16 +x)
1305 2486656 : L_tmp = Msub_32_16( L_tmp, KP587785252_16FX, Ts ); // Q(16 +x)
1306 :
1307 2486656 : T3 = round_fx_sat( L_tmp ); // Qx
1308 :
1309 2486656 : T6 = sub_sat( To, shr( Tp, 2 ) );
1310 2486656 : T4 = add_sat( Tn, T6 );
1311 2486656 : T5 = sub_sat( T6, Tn );
1312 2486656 : zIm[i2] = sub_sat( T4, T2 );
1313 2486656 : move16();
1314 2486656 : zIm[i1] = sub_sat( T5, T3 );
1315 2486656 : move16();
1316 2486656 : zIm[i3] = add_sat( T2, T4 );
1317 2486656 : move16();
1318 2486656 : zIm[i4] = add_sat( T3, T5 );
1319 2486656 : move16();
1320 :
1321 2486656 : return;
1322 : }
1323 :
1324 : /*-----------------------------------------------------------------*
1325 : * fft64()
1326 : * 64-point FFT
1327 : *-----------------------------------------------------------------*/
1328 93270 : static void fft64_16fx(
1329 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1330 : Word16 *y, /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1331 : const Word16 *Idx /* i : pointer of the address table Q0 */
1332 : )
1333 : {
1334 : Word16 i, id, jd;
1335 : Word16 z[128];
1336 93270 : move16(); /*penalty for 1 ptr init */
1337 6062550 : FOR( i = 0; i < 64; i++ )
1338 : {
1339 5969280 : id = Idx[i];
1340 5969280 : move16();
1341 5969280 : z[2 * i] = x[id];
1342 5969280 : move16();
1343 5969280 : z[2 * i + 1] = y[id];
1344 5969280 : move16();
1345 : }
1346 :
1347 93270 : cdftForw_16fx( 128, z, Ip_fft64, w_fft128_16fx );
1348 :
1349 93270 : move16(); /*penalty for 1 ptr init */
1350 6062550 : FOR( i = 0; i < 64; i++ )
1351 : {
1352 5969280 : jd = Odx_fft64[i];
1353 5969280 : move16();
1354 5969280 : id = Idx[jd];
1355 5969280 : move16();
1356 5969280 : x[id] = z[2 * i];
1357 5969280 : move16();
1358 5969280 : y[id] = z[2 * i + 1];
1359 5969280 : move16();
1360 : }
1361 :
1362 93270 : return;
1363 : }
1364 :
1365 :
1366 : /*-----------------------------------------------------------------*
1367 : * fft32_5()
1368 : * 32-point FFT called for 5 times
1369 : *-----------------------------------------------------------------*/
1370 388540 : static void fft32_5_16fx(
1371 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1372 : Word16 *y, /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1373 : const Word16 *Idx /* i : pointer of the address table */
1374 : )
1375 : {
1376 : Word16 i, id, jd;
1377 : Word16 z[64];
1378 :
1379 12821820 : FOR( i = 0; i < 32; i++ )
1380 : {
1381 12433280 : id = Idx[i];
1382 12433280 : move16();
1383 12433280 : z[2 * i] = x[id];
1384 12433280 : move16();
1385 12433280 : z[2 * i + 1] = y[id];
1386 12433280 : move16();
1387 : }
1388 :
1389 388540 : cdftForw_16fx( 64, z, Ip_fft32, w_fft32_16fx );
1390 :
1391 12821820 : FOR( i = 0; i < 32; i++ )
1392 : {
1393 12433280 : jd = Odx_fft32_5[i];
1394 12433280 : move16();
1395 12433280 : id = Idx[jd];
1396 12433280 : move16();
1397 12433280 : x[id] = z[2 * i];
1398 12433280 : move16();
1399 12433280 : y[id] = z[2 * i + 1];
1400 12433280 : move16();
1401 : }
1402 :
1403 388540 : return;
1404 : }
1405 :
1406 :
1407 : /*-----------------------------------------------------------------*
1408 : * DoRTFT160()
1409 : * a low complexity 2-dimensional DFT of 160 points
1410 : *-----------------------------------------------------------------*/
1411 77708 : void DoRTFT160_16fx(
1412 : Word16 x[], /* i/o : real part of input and output data Q(Qx+Q_edct) */
1413 : Word16 y[] /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1414 : )
1415 : {
1416 : Word16 j;
1417 :
1418 : /* Applying 32-point FFT for 5 times based on the address table Idx_dortft160 */
1419 466248 : FOR( j = 0; j < 5; j++ )
1420 : {
1421 388540 : fft32_5_16fx( x, y, Idx_dortft160 + shl( j, 5 ) /*32*j*/ );
1422 : }
1423 :
1424 : /* Applying 5-point FFT for 32 times based on the address table Idx_dortft160 */
1425 2564364 : FOR( j = 0; j < 32; j++ )
1426 : {
1427 2486656 : fft5_32_16fx( x, y, Idx_dortft160 + j );
1428 : }
1429 :
1430 77708 : return;
1431 : }
1432 :
1433 : /*-----------------------------------------------------------------*
1434 : * DoRTFT320()
1435 : * a low complexity 2-dimensional DFT of 320 points
1436 : *-----------------------------------------------------------------*/
1437 18654 : void DoRTFT320_16fx(
1438 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct) */
1439 : Word16 *y /* i/o : imaginary part of input and output data Q(Qx+Q_edct) */
1440 : )
1441 : {
1442 : Word16 j;
1443 :
1444 : /* Applying 64-point FFT for 5 times based on the address table Idx_dortft160 */
1445 111924 : FOR( j = 0; j < 5; j++ )
1446 : {
1447 93270 : fft64_16fx( x, y, Idx_dortft320 + shl( j, 6 ) /*64*j*/ );
1448 : }
1449 :
1450 : /* Applying 5-point FFT for 64 times based on the address table Idx_dortft160 */
1451 1212510 : FOR( j = 0; j < 64; j++ )
1452 : {
1453 1193856 : fft5_shift4_16fx( 64, x, y, Idx_dortft320 + j );
1454 : }
1455 :
1456 18654 : return;
1457 : }
1458 :
1459 : /*-----------------------------------------------------------------*
1460 : * DoRTFT128()
1461 : * FFT with 128 points
1462 : *-----------------------------------------------------------------*/
1463 121189 : void DoRTFT128_16fx(
1464 : Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct)*/
1465 : Word16 *y /* i/o : imaginary part of input and output data Q(Qx+Q_edct)*/
1466 : )
1467 : {
1468 :
1469 : Word16 i;
1470 : Word16 z[256];
1471 :
1472 15633381 : FOR( i = 0; i < 128; i++ )
1473 : {
1474 15512192 : z[2 * i] = x[i];
1475 15512192 : move16();
1476 15512192 : z[2 * i + 1] = y[i];
1477 15512192 : move16();
1478 : }
1479 :
1480 121189 : cdftForw_16fx( 256, z, Ip_fft128, w_fft128_16fx );
1481 :
1482 121189 : x[0] = z[0];
1483 121189 : move16();
1484 121189 : y[0] = z[1];
1485 121189 : move16();
1486 15512192 : FOR( i = 1; i < 128; i++ )
1487 : {
1488 15391003 : x[128 - i] = z[2 * i];
1489 15391003 : move16();
1490 15391003 : y[128 - i] = z[2 * i + 1];
1491 15391003 : move16();
1492 : }
1493 :
1494 121189 : return;
1495 : }
1496 : /*-----------------------------------------------------------------*
1497 : * cdftForw()
1498 : * Main fuction of Complex Discrete Fourier Transform
1499 : *-----------------------------------------------------------------*/
1500 602999 : static void cdftForw_16fx(
1501 : Word16 n, /* i : data length of real and imag */
1502 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
1503 : const Word16 *ip, /* i : work area for bit reversal */
1504 : const Word32 *w /* i : cos/sin table Q30*/
1505 : )
1506 : {
1507 : /* bit reversal */
1508 602999 : bitrv2_SR_16fx( n, ip + 2, a );
1509 :
1510 : /* Do FFT */
1511 602999 : cftfsub_16fx( n, a, w );
1512 602999 : }
1513 :
1514 : /*-----------------------------------------------------------------*
1515 : * bitrv2_SR()
1516 : * Bit reversal
1517 : *-----------------------------------------------------------------*/
1518 602999 : static void bitrv2_SR_16fx(
1519 : Word16 n, /* i : data length of real and imag */
1520 : const Word16 *ip, /* i/o : work area for bit reversal */
1521 : Word16 *a /* i/o : input/output data Q(Qx+Q_edct)*/
1522 : )
1523 : {
1524 : Word16 j, j1, k, k1, m, m2;
1525 : Word16 l;
1526 : Word16 xr, xi, yr, yi;
1527 :
1528 602999 : l = n;
1529 602999 : move16();
1530 602999 : m = 1;
1531 602999 : move16();
1532 :
1533 1930186 : WHILE( ( ( m << 3 ) < l ) )
1534 : {
1535 1327187 : l = shr( l, 1 );
1536 1327187 : m = shl( m, 1 );
1537 : }
1538 :
1539 602999 : m2 = shl( m, 1 );
1540 602999 : IF( EQ_16( shl( m, 3 ), l ) )
1541 : {
1542 466350 : FOR( k = 0; k < m; k++ )
1543 : {
1544 932700 : FOR( j = 0; j < k; j++ )
1545 : {
1546 559620 : j1 = add( shl( j, 1 ), ip[k] );
1547 559620 : k1 = add( shl( k, 1 ), ip[j] );
1548 559620 : xr = a[j1];
1549 559620 : move16();
1550 559620 : xi = a[j1 + 1];
1551 559620 : move16();
1552 559620 : yr = a[k1];
1553 559620 : move16();
1554 559620 : yi = a[k1 + 1];
1555 559620 : move16();
1556 559620 : a[j1] = yr;
1557 559620 : move16();
1558 559620 : a[j1 + 1] = yi;
1559 559620 : move16();
1560 559620 : a[k1] = xr;
1561 559620 : move16();
1562 559620 : a[k1 + 1] = xi;
1563 559620 : move16();
1564 559620 : j1 = add( j1, m2 );
1565 559620 : k1 = add( k1, shl( m2, 1 ) );
1566 559620 : xr = a[j1];
1567 559620 : move16();
1568 559620 : xi = a[j1 + 1];
1569 559620 : move16();
1570 559620 : yr = a[k1];
1571 559620 : move16();
1572 559620 : yi = a[k1 + 1];
1573 559620 : move16();
1574 559620 : a[j1] = yr;
1575 559620 : move16();
1576 559620 : a[j1 + 1] = yi;
1577 559620 : move16();
1578 559620 : a[k1] = xr;
1579 559620 : move16();
1580 559620 : a[k1 + 1] = xi;
1581 559620 : move16();
1582 559620 : j1 = add( j1, m2 );
1583 559620 : k1 = sub( k1, m2 );
1584 559620 : xr = a[j1];
1585 559620 : move16();
1586 559620 : xi = a[j1 + 1];
1587 559620 : move16();
1588 559620 : xi = a[j1 + 1];
1589 559620 : move16();
1590 559620 : yr = a[k1];
1591 559620 : move16();
1592 559620 : yi = a[k1 + 1];
1593 559620 : move16();
1594 559620 : a[j1] = yr;
1595 559620 : move16();
1596 559620 : a[j1 + 1] = yi;
1597 559620 : move16();
1598 559620 : a[k1] = xr;
1599 559620 : move16();
1600 559620 : a[k1 + 1] = xi;
1601 559620 : move16();
1602 559620 : j1 = add( j1, m2 );
1603 559620 : k1 = add( k1, shl( m2, 1 ) );
1604 559620 : xr = a[j1];
1605 559620 : move16();
1606 559620 : xi = a[j1 + 1];
1607 559620 : move16();
1608 559620 : yr = a[k1];
1609 559620 : move16();
1610 559620 : yi = a[k1 + 1];
1611 559620 : move16();
1612 559620 : a[j1] = yr;
1613 559620 : move16();
1614 559620 : a[j1 + 1] = yi;
1615 559620 : move16();
1616 559620 : a[k1] = xr;
1617 559620 : move16();
1618 559620 : a[k1 + 1] = xi;
1619 559620 : move16();
1620 : }
1621 :
1622 373080 : j1 = add( add( shl( k, 1 ), m2 ), ip[k] );
1623 373080 : k1 = add( j1, m2 );
1624 373080 : xr = a[j1];
1625 373080 : move16();
1626 373080 : xi = a[j1 + 1];
1627 373080 : move16();
1628 373080 : yr = a[k1];
1629 373080 : move16();
1630 373080 : yi = a[k1 + 1];
1631 373080 : move16();
1632 373080 : a[j1] = yr;
1633 373080 : move16();
1634 373080 : a[j1 + 1] = yi;
1635 373080 : move16();
1636 373080 : a[k1] = xr;
1637 373080 : move16();
1638 373080 : a[k1 + 1] = xi;
1639 373080 : move16();
1640 : }
1641 : }
1642 : ELSE
1643 : {
1644 2523672 : FOR( k = 1; k < m; k++ )
1645 : {
1646 7738475 : FOR( j = 0; j < k; j++ )
1647 : {
1648 5724532 : j1 = add( shl( j, 1 ), ip[k] );
1649 5724532 : k1 = add( shl( k, 1 ), ip[j] );
1650 5724532 : xr = a[j1];
1651 5724532 : move16();
1652 5724532 : xi = a[j1 + 1];
1653 5724532 : move16();
1654 5724532 : yr = a[k1];
1655 5724532 : move16();
1656 5724532 : yi = a[k1 + 1];
1657 5724532 : move16();
1658 5724532 : a[j1] = yr;
1659 5724532 : move16();
1660 5724532 : a[j1 + 1] = yi;
1661 5724532 : move16();
1662 5724532 : a[k1] = xr;
1663 5724532 : move16();
1664 5724532 : a[k1 + 1] = xi;
1665 5724532 : move16();
1666 5724532 : j1 = add( j1, m2 );
1667 5724532 : k1 = add( k1, m2 );
1668 5724532 : xr = a[j1];
1669 5724532 : move16();
1670 5724532 : xi = a[j1 + 1];
1671 5724532 : move16();
1672 5724532 : yr = a[k1];
1673 5724532 : move16();
1674 5724532 : yi = a[k1 + 1];
1675 5724532 : move16();
1676 5724532 : a[j1] = yr;
1677 5724532 : move16();
1678 5724532 : a[j1 + 1] = yi;
1679 5724532 : move16();
1680 5724532 : a[k1] = xr;
1681 5724532 : move16();
1682 5724532 : a[k1 + 1] = xi;
1683 5724532 : move16();
1684 : }
1685 : }
1686 : }
1687 :
1688 602999 : return;
1689 : }
1690 :
1691 : /*-----------------------------------------------------------------*
1692 : * cftfsub()
1693 : * Complex Discrete Fourier Transform
1694 : *-----------------------------------------------------------------*/
1695 602999 : static void cftfsub_16fx(
1696 : Word16 n, /* i : data length of real and imag */
1697 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
1698 : const Word32 *w /* i : cos/sin table Q30*/
1699 : )
1700 : {
1701 : Word16 j, j1, j2, j3, l;
1702 : Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1703 : #ifndef ISSUE_1836_replace_overflow_libcom
1704 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1705 : Flag Overflow = 0;
1706 : move32();
1707 : #endif
1708 : #endif
1709 :
1710 602999 : l = 2;
1711 602999 : move16();
1712 602999 : IF( GT_16( n, 8 ) )
1713 : {
1714 602999 : cft1st_16fx( n, a, w );
1715 602999 : l = 8;
1716 602999 : move16();
1717 1327187 : WHILE( ( ( l << 2 ) < n ) )
1718 : {
1719 724188 : cftmdl_16fx( n, l, a, w );
1720 724188 : l = shl( l, 2 );
1721 : }
1722 : }
1723 :
1724 602999 : IF( EQ_16( shl( l, 2 ), n ) )
1725 : {
1726 1585590 : FOR( j = 0; j < l; j += 2 )
1727 : {
1728 1492320 : j1 = add( j, l );
1729 1492320 : j2 = add( j1, l );
1730 1492320 : j3 = add( j2, l );
1731 1492320 : x0r = add( a[j], a[j1] );
1732 1492320 : x0i = add( a[j + 1], a[j1 + 1] );
1733 1492320 : x1r = sub( a[j], a[j1] );
1734 1492320 : x1i = sub( a[j + 1], a[j1 + 1] );
1735 1492320 : x2r = add( a[j2], a[j3] );
1736 1492320 : x2i = add( a[j2 + 1], a[j3 + 1] );
1737 1492320 : x3r = sub( a[j2], a[j3] );
1738 1492320 : x3i = sub( a[j2 + 1], a[j3 + 1] );
1739 1492320 : a[j] = add( x0r, x2r );
1740 1492320 : move16();
1741 1492320 : a[j + 1] = add( x0i, x2i );
1742 1492320 : move16();
1743 1492320 : a[j2] = sub( x0r, x2r );
1744 1492320 : move16();
1745 1492320 : a[j2 + 1] = sub( x0i, x2i );
1746 1492320 : move16();
1747 1492320 : a[j1] = sub( x1r, x3i );
1748 1492320 : move16();
1749 1492320 : a[j1 + 1] = add( x1i, x3r );
1750 1492320 : move16();
1751 1492320 : a[j3] = add( x1r, x3i );
1752 1492320 : move16();
1753 1492320 : a[j3 + 1] = sub( x1i, x3r );
1754 1492320 : move16();
1755 : }
1756 : }
1757 : ELSE
1758 : {
1759 14482465 : FOR( j = 0; j < l; j += 2 )
1760 : {
1761 : #ifdef ISSUE_1836_replace_overflow_libcom
1762 13972736 : j1 = add_sat( j, l );
1763 13972736 : x0r = sub_sat( a[j], a[j1] );
1764 13972736 : x0i = sub_sat( a[j + 1], a[j1 + 1] );
1765 13972736 : a[j] = add_sat( a[j], a[j1] );
1766 13972736 : move16();
1767 13972736 : a[j + 1] = add_sat( a[j + 1], a[j1 + 1] );
1768 13972736 : move16();
1769 : #else
1770 : j1 = add_o( j, l, &Overflow );
1771 : x0r = sub_o( a[j], a[j1], &Overflow );
1772 : x0i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
1773 : a[j] = add_o( a[j], a[j1], &Overflow );
1774 : move16();
1775 : a[j + 1] = add_o( a[j + 1], a[j1 + 1], &Overflow );
1776 : move16();
1777 : #endif
1778 13972736 : a[j1] = x0r;
1779 13972736 : move16();
1780 13972736 : a[j1 + 1] = x0i;
1781 13972736 : move16();
1782 : }
1783 : }
1784 602999 : return;
1785 : }
1786 :
1787 : /*-----------------------------------------------------------------*
1788 : * cft1st()
1789 : * Subfunction of Complex Discrete Fourier Transform
1790 : *-----------------------------------------------------------------*/
1791 602999 : static void cft1st_16fx(
1792 : Word16 n, /* i : data length of real and imag */
1793 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
1794 : const Word32 *w /* i : cos/sin table Q30*/
1795 : )
1796 : {
1797 : Word16 j, k1, k2;
1798 : Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1799 : Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1800 : Word16 tmp;
1801 : Word32 L_tmp;
1802 : #ifndef ISSUE_1836_replace_overflow_libcom
1803 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1804 : Flag Overflow = 0;
1805 : move32();
1806 : #endif
1807 : #endif
1808 :
1809 : #ifdef ISSUE_1836_replace_overflow_libcom
1810 602999 : x0r = add_sat( a[0], a[2] );
1811 602999 : x0i = add_sat( a[1], a[3] );
1812 602999 : x1r = sub_sat( a[0], a[2] );
1813 602999 : x1i = sub_sat( a[1], a[3] );
1814 602999 : x2r = add_sat( a[4], a[6] );
1815 602999 : x2i = add_sat( a[5], a[7] );
1816 602999 : x3r = sub_sat( a[4], a[6] );
1817 602999 : x3i = sub_sat( a[5], a[7] );
1818 602999 : a[0] = add_sat( x0r, x2r );
1819 602999 : move16();
1820 602999 : a[1] = add_sat( x0i, x2i );
1821 602999 : move16();
1822 602999 : a[4] = sub_sat( x0r, x2r );
1823 602999 : move16();
1824 602999 : a[5] = sub_sat( x0i, x2i );
1825 602999 : move16();
1826 602999 : a[2] = sub_sat( x1r, x3i );
1827 602999 : move16();
1828 602999 : a[3] = add_sat( x1i, x3r );
1829 602999 : move16();
1830 602999 : a[6] = add_sat( x1r, x3i );
1831 602999 : move16();
1832 602999 : a[7] = sub_sat( x1i, x3r );
1833 602999 : wk1r = w[2];
1834 602999 : move32();
1835 :
1836 602999 : x0r = add_sat( a[8], a[10] );
1837 602999 : x0i = add_sat( a[9], a[11] );
1838 602999 : x1r = sub_sat( a[8], a[10] );
1839 602999 : x1i = sub_sat( a[9], a[11] );
1840 602999 : x2r = add_sat( a[12], a[14] );
1841 602999 : x2i = add_sat( a[13], a[15] );
1842 602999 : x3r = sub_sat( a[12], a[14] );
1843 602999 : x3i = sub_sat( a[13], a[15] );
1844 602999 : a[8] = add_sat( x0r, x2r );
1845 602999 : move16();
1846 602999 : a[9] = add_sat( x0i, x2i );
1847 602999 : move16();
1848 602999 : a[12] = sub_sat( x2i, x0i );
1849 602999 : move16();
1850 602999 : a[13] = sub_sat( x0r, x2r );
1851 602999 : move16();
1852 :
1853 602999 : x0r = sub_sat( x1r, x3i );
1854 602999 : x0i = add_sat( x1i, x3r );
1855 602999 : tmp = sub_sat( x0r, x0i );
1856 602999 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1857 :
1858 602999 : a[10] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
1859 602999 : move16();
1860 :
1861 602999 : tmp = add_sat( x0r, x0i );
1862 602999 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1863 602999 : a[11] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /* Q(Qx+Q_edct)*/
1864 602999 : move16();
1865 :
1866 602999 : x0r = add_sat( x3i, x1r );
1867 602999 : x0i = sub_sat( x3r, x1i );
1868 602999 : tmp = sub_sat( x0i, x0r );
1869 602999 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1870 602999 : a[14] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
1871 602999 : move16();
1872 :
1873 602999 : tmp = add_sat( x0i, x0r );
1874 602999 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1875 602999 : a[15] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
1876 602999 : move16();
1877 602999 : k1 = 0;
1878 602999 : move16();
1879 : #else
1880 : x0r = add_o( a[0], a[2], &Overflow );
1881 : x0i = add_o( a[1], a[3], &Overflow );
1882 : x1r = sub_o( a[0], a[2], &Overflow );
1883 : x1i = sub_o( a[1], a[3], &Overflow );
1884 : x2r = add_o( a[4], a[6], &Overflow );
1885 : x2i = add_o( a[5], a[7], &Overflow );
1886 : x3r = sub_o( a[4], a[6], &Overflow );
1887 : x3i = sub_o( a[5], a[7], &Overflow );
1888 : a[0] = add_o( x0r, x2r, &Overflow );
1889 : move16();
1890 : a[1] = add_o( x0i, x2i, &Overflow );
1891 : move16();
1892 : a[4] = sub_o( x0r, x2r, &Overflow );
1893 : move16();
1894 : a[5] = sub_o( x0i, x2i, &Overflow );
1895 : move16();
1896 : a[2] = sub_o( x1r, x3i, &Overflow );
1897 : move16();
1898 : a[3] = add_o( x1i, x3r, &Overflow );
1899 : move16();
1900 : a[6] = add_o( x1r, x3i, &Overflow );
1901 : move16();
1902 : a[7] = sub_o( x1i, x3r, &Overflow );
1903 : wk1r = w[2];
1904 : move32();
1905 :
1906 : x0r = add_o( a[8], a[10], &Overflow );
1907 : x0i = add_o( a[9], a[11], &Overflow );
1908 : x1r = sub_o( a[8], a[10], &Overflow );
1909 : x1i = sub_o( a[9], a[11], &Overflow );
1910 : x2r = add_o( a[12], a[14], &Overflow );
1911 : x2i = add_o( a[13], a[15], &Overflow );
1912 : x3r = sub_o( a[12], a[14], &Overflow );
1913 : x3i = sub_o( a[13], a[15], &Overflow );
1914 : a[8] = add_o( x0r, x2r, &Overflow );
1915 : move16();
1916 : a[9] = add_o( x0i, x2i, &Overflow );
1917 : move16();
1918 : a[12] = sub_o( x2i, x0i, &Overflow );
1919 : move16();
1920 : a[13] = sub_o( x0r, x2r, &Overflow );
1921 : move16();
1922 :
1923 : x0r = sub_o( x1r, x3i, &Overflow );
1924 : x0i = add_o( x1i, x3r, &Overflow );
1925 : tmp = sub_o( x0r, x0i, &Overflow );
1926 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1927 :
1928 : a[10] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1929 : move16();
1930 :
1931 : tmp = add_o( x0r, x0i, &Overflow );
1932 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1933 : a[11] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /* Q(Qx+Q_edct) */
1934 : move16();
1935 :
1936 : x0r = add_o( x3i, x1r, &Overflow );
1937 : x0i = sub_o( x3r, x1i, &Overflow );
1938 : tmp = sub_o( x0i, x0r, &Overflow );
1939 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1940 : a[14] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1941 : move16();
1942 :
1943 : tmp = add_o( x0i, x0r, &Overflow );
1944 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
1945 : a[15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
1946 : move16();
1947 : k1 = 0;
1948 : move16();
1949 : #endif
1950 :
1951 4239344 : FOR( j = 16; j < n; j += 16 )
1952 : {
1953 3636345 : k1 = add( k1, 2 );
1954 3636345 : k2 = shl( k1, 1 );
1955 :
1956 3636345 : wk2r = w[k1];
1957 3636345 : move32();
1958 3636345 : wk2i = w[k1 + 1];
1959 3636345 : move32();
1960 3636345 : wk1r = w[k2];
1961 3636345 : move32();
1962 3636345 : wk1i = w[k2 + 1];
1963 3636345 : move32();
1964 :
1965 3636345 : L_tmp = L_shl( Mult_32_32( wk2i, wk1i ), 1 ); /*Q29 */
1966 3636345 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
1967 :
1968 3636345 : L_tmp = L_shl( Mult_32_32( wk2i, wk1r ), 1 ); /*Q29 */
1969 3636345 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
1970 : #ifdef ISSUE_1836_replace_overflow_libcom
1971 3636345 : x0r = add_sat( a[j], a[j + 2] );
1972 3636345 : x0i = add_sat( a[j + 1], a[j + 3] );
1973 3636345 : x1r = sub_sat( a[j], a[j + 2] );
1974 3636345 : x1i = sub_sat( a[j + 1], a[j + 3] );
1975 3636345 : x2r = add_sat( a[j + 4], a[j + 6] );
1976 3636345 : x2i = add_sat( a[j + 5], a[j + 7] );
1977 3636345 : x3r = sub_sat( a[j + 4], a[j + 6] );
1978 3636345 : x3i = sub_sat( a[j + 5], a[j + 7] );
1979 3636345 : a[j] = add_sat( x0r, x2r );
1980 3636345 : move16();
1981 3636345 : a[j + 1] = add_sat( x0i, x2i );
1982 3636345 : move16();
1983 :
1984 3636345 : x0r = sub_sat( x0r, x2r );
1985 3636345 : x0i = sub_sat( x0i, x2i );
1986 3636345 : L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */
1987 3636345 : L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */
1988 3636345 : a[j + 4] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
1989 3636345 : move16();
1990 :
1991 3636345 : L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */
1992 3636345 : L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */
1993 3636345 : a[j + 5] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
1994 3636345 : move16();
1995 :
1996 3636345 : x0r = sub_sat( x1r, x3i );
1997 3636345 : x0i = add_sat( x1i, x3r );
1998 3636345 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
1999 3636345 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2000 3636345 : a[j + 2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2001 3636345 : move16();
2002 :
2003 3636345 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2004 3636345 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2005 3636345 : a[j + 3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2006 3636345 : move16();
2007 :
2008 3636345 : x0r = add_sat( x1r, x3i );
2009 3636345 : x0i = sub_sat( x1i, x3r );
2010 3636345 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2011 3636345 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2012 3636345 : a[j + 6] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2013 3636345 : move16();
2014 :
2015 3636345 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2016 3636345 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2017 3636345 : a[j + 7] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2018 3636345 : move16();
2019 :
2020 3636345 : wk1r = w[k2 + 2];
2021 3636345 : move32();
2022 3636345 : wk1i = w[k2 + 3];
2023 3636345 : move32();
2024 3636345 : L_tmp = L_shl( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
2025 3636345 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
2026 :
2027 3636345 : L_tmp = L_shl( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
2028 3636345 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
2029 :
2030 3636345 : x0r = add_sat( a[j + 8], a[j + 10] );
2031 3636345 : x0i = add_sat( a[j + 9], a[j + 11] );
2032 3636345 : x1r = sub_sat( a[j + 8], a[j + 10] );
2033 3636345 : x1i = sub_sat( a[j + 9], a[j + 11] );
2034 3636345 : x2r = add_sat( a[j + 12], a[j + 14] );
2035 3636345 : x2i = add_sat( a[j + 13], a[j + 15] );
2036 3636345 : x3r = sub_sat( a[j + 12], a[j + 14] );
2037 3636345 : x3i = sub_sat( a[j + 13], a[j + 15] );
2038 3636345 : a[j + 8] = add_sat( x0r, x2r );
2039 3636345 : move16();
2040 3636345 : a[j + 9] = add_sat( x0i, x2i );
2041 3636345 : move16();
2042 :
2043 3636345 : x0r = sub_sat( x0r, x2r );
2044 3636345 : x0i = sub_sat( x0i, x2i );
2045 3636345 : tmp = negate( x0r );
2046 3636345 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2047 3636345 : L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2048 3636345 : a[j + 12] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2049 3636345 : move16();
2050 :
2051 3636345 : tmp = negate( x0i );
2052 3636345 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2053 3636345 : L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2054 3636345 : a[j + 13] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2055 3636345 : move16();
2056 :
2057 3636345 : x0r = sub_sat( x1r, x3i );
2058 3636345 : x0i = add_sat( x1i, x3r );
2059 3636345 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2060 3636345 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2061 3636345 : a[j + 10] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2062 3636345 : move16();
2063 :
2064 3636345 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2065 3636345 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2066 3636345 : a[j + 11] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2067 3636345 : move16();
2068 :
2069 3636345 : x0r = add_sat( x1r, x3i );
2070 3636345 : x0i = sub_sat( x1i, x3r );
2071 :
2072 3636345 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2073 3636345 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2074 3636345 : a[j + 14] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2075 3636345 : move16();
2076 :
2077 3636345 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2078 3636345 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2079 3636345 : a[j + 15] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2080 3636345 : move16();
2081 : #else
2082 : x0r = add_o( a[j], a[j + 2], &Overflow );
2083 : x0i = add_o( a[j + 1], a[j + 3], &Overflow );
2084 : x1r = sub_o( a[j], a[j + 2], &Overflow );
2085 : x1i = sub_o( a[j + 1], a[j + 3], &Overflow );
2086 : x2r = add_o( a[j + 4], a[j + 6], &Overflow );
2087 : x2i = add_o( a[j + 5], a[j + 7], &Overflow );
2088 : x3r = sub_o( a[j + 4], a[j + 6], &Overflow );
2089 : x3i = sub_o( a[j + 5], a[j + 7], &Overflow );
2090 : a[j] = add_o( x0r, x2r, &Overflow );
2091 : move16();
2092 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2093 : move16();
2094 :
2095 : x0r = sub_o( x0r, x2r, &Overflow );
2096 : x0i = sub_o( x0i, x2i, &Overflow );
2097 : L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2098 : L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */
2099 : a[j + 4] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2100 : move16();
2101 :
2102 : L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2103 : L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */
2104 : a[j + 5] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2105 : move16();
2106 :
2107 : x0r = sub_o( x1r, x3i, &Overflow );
2108 : x0i = add_o( x1i, x3r, &Overflow );
2109 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2110 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2111 : a[j + 2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2112 : move16();
2113 :
2114 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2115 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2116 : a[j + 3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2117 : move16();
2118 :
2119 : x0r = add_o( x1r, x3i, &Overflow );
2120 : x0i = sub_o( x1i, x3r, &Overflow );
2121 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2122 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2123 : a[j + 6] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2124 : move16();
2125 :
2126 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2127 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2128 : a[j + 7] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2129 : move16();
2130 :
2131 : wk1r = w[k2 + 2];
2132 : move32();
2133 : wk1i = w[k2 + 3];
2134 : move32();
2135 : L_tmp = L_shl( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
2136 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
2137 :
2138 : L_tmp = L_shl( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
2139 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
2140 :
2141 : x0r = add_o( a[j + 8], a[j + 10], &Overflow );
2142 : x0i = add_o( a[j + 9], a[j + 11], &Overflow );
2143 : x1r = sub_o( a[j + 8], a[j + 10], &Overflow );
2144 : x1i = sub_o( a[j + 9], a[j + 11], &Overflow );
2145 : x2r = add_o( a[j + 12], a[j + 14], &Overflow );
2146 : x2i = add_o( a[j + 13], a[j + 15], &Overflow );
2147 : x3r = sub_o( a[j + 12], a[j + 14], &Overflow );
2148 : x3i = sub_o( a[j + 13], a[j + 15], &Overflow );
2149 : a[j + 8] = add_o( x0r, x2r, &Overflow );
2150 : move16();
2151 : a[j + 9] = add_o( x0i, x2i, &Overflow );
2152 : move16();
2153 :
2154 : x0r = sub_o( x0r, x2r, &Overflow );
2155 : x0i = sub_o( x0i, x2i, &Overflow );
2156 : tmp = negate( x0r );
2157 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2158 : L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2159 : a[j + 12] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2160 : move16();
2161 :
2162 : tmp = negate( x0i );
2163 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2164 : L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2165 : a[j + 13] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2166 : move16();
2167 :
2168 : x0r = sub_o( x1r, x3i, &Overflow );
2169 : x0i = add_o( x1i, x3r, &Overflow );
2170 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2171 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2172 : a[j + 10] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2173 : move16();
2174 :
2175 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2176 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2177 : a[j + 11] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2178 : move16();
2179 :
2180 : x0r = add_o( x1r, x3i, &Overflow );
2181 : x0i = sub_o( x1i, x3r, &Overflow );
2182 :
2183 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2184 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2185 : a[j + 14] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2186 : move16();
2187 :
2188 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2189 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2190 : a[j + 15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2191 : move16();
2192 : #endif
2193 : }
2194 :
2195 602999 : return;
2196 : }
2197 :
2198 : /*-----------------------------------------------------------------*
2199 : * cftmdl()
2200 : * Subfunction of Complex Discrete Fourier Transform
2201 : *-----------------------------------------------------------------*/
2202 724188 : static void cftmdl_16fx(
2203 : Word16 n, /* i : data length of real and imag */
2204 : Word16 l, /* i : initial shift for processing */
2205 : Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
2206 : const Word32 *w /* i : cos/sin table Q30*/
2207 : )
2208 : {
2209 : Word16 j, j1, j2, j3, k, k1, k2, m, m2;
2210 : Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
2211 : Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2212 : Word16 tmp, tmp2;
2213 : Word32 L_tmp;
2214 : Word32 L_x0r, L_x0i;
2215 : #ifndef ISSUE_1836_replace_overflow_libcom
2216 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
2217 : Flag Overflow = 0;
2218 : move32();
2219 : #endif
2220 : #endif
2221 724188 : m = shl( l, 2 );
2222 5075208 : FOR( j = 0; j < l; j += 2 )
2223 : {
2224 : #ifdef ISSUE_1836_replace_overflow_libcom
2225 4351020 : j1 = add_sat( j, l );
2226 4351020 : j2 = add_sat( j1, l );
2227 4351020 : j3 = add_sat( j2, l );
2228 4351020 : x0r = add_sat( a[j], a[j1] );
2229 4351020 : x0i = add_sat( a[j + 1], a[j1 + 1] );
2230 4351020 : x1r = sub_sat( a[j], a[j1] );
2231 4351020 : x1i = sub_sat( a[j + 1], a[j1 + 1] );
2232 4351020 : x2r = add_sat( a[j2], a[j3] );
2233 4351020 : x2i = add_sat( a[j2 + 1], a[j3 + 1] );
2234 4351020 : x3r = sub_sat( a[j2], a[j3] );
2235 4351020 : x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
2236 4351020 : a[j] = add_sat( x0r, x2r );
2237 4351020 : move16();
2238 4351020 : a[j + 1] = add_sat( x0i, x2i );
2239 4351020 : move16();
2240 4351020 : a[j2] = sub_sat( x0r, x2r );
2241 4351020 : move16();
2242 4351020 : a[j2 + 1] = sub_sat( x0i, x2i );
2243 4351020 : move16();
2244 4351020 : a[j1] = sub_sat( x1r, x3i );
2245 4351020 : move16();
2246 4351020 : a[j1 + 1] = add_sat( x1i, x3r );
2247 4351020 : move16();
2248 4351020 : a[j3] = add_sat( x1r, x3i );
2249 4351020 : move16();
2250 4351020 : a[j3 + 1] = sub_sat( x1i, x3r );
2251 4351020 : move16();
2252 : #else
2253 : j1 = add_o( j, l, &Overflow );
2254 : j2 = add_o( j1, l, &Overflow );
2255 : j3 = add_o( j2, l, &Overflow );
2256 : x0r = add_o( a[j], a[j1], &Overflow );
2257 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2258 : x1r = sub_o( a[j], a[j1], &Overflow );
2259 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2260 : x2r = add_o( a[j2], a[j3], &Overflow );
2261 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2262 : x3r = sub_o( a[j2], a[j3], &Overflow );
2263 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2264 : a[j] = add_o( x0r, x2r, &Overflow );
2265 : move16();
2266 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2267 : move16();
2268 : a[j2] = sub_o( x0r, x2r, &Overflow );
2269 : move16();
2270 : a[j2 + 1] = sub_o( x0i, x2i, &Overflow );
2271 : move16();
2272 : a[j1] = sub_o( x1r, x3i, &Overflow );
2273 : move16();
2274 : a[j1 + 1] = add_o( x1i, x3r, &Overflow );
2275 : move16();
2276 : a[j3] = add_o( x1r, x3i, &Overflow );
2277 : move16();
2278 : a[j3 + 1] = sub_o( x1i, x3r, &Overflow );
2279 : move16();
2280 : #endif
2281 : }
2282 :
2283 724188 : wk1r = w[2];
2284 724188 : move32();
2285 724188 : tmp2 = add( l, m );
2286 5075208 : FOR( j = m; j < tmp2; j += 2 )
2287 : {
2288 : #ifdef ISSUE_1836_replace_overflow_libcom
2289 4351020 : j1 = add_sat( j, l );
2290 4351020 : j2 = add_sat( j1, l );
2291 4351020 : j3 = add_sat( j2, l );
2292 4351020 : x0r = add_sat( a[j], a[j1] );
2293 4351020 : x0i = add_sat( a[j + 1], a[j1 + 1] );
2294 4351020 : x1r = sub_sat( a[j], a[j1] );
2295 4351020 : x1i = sub_sat( a[j + 1], a[j1 + 1] );
2296 4351020 : x2r = add_sat( a[j2], a[j3] );
2297 4351020 : x2i = add_sat( a[j2 + 1], a[j3 + 1] );
2298 4351020 : x3r = sub_sat( a[j2], a[j3] );
2299 4351020 : x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
2300 4351020 : a[j] = add_sat( x0r, x2r );
2301 4351020 : move16();
2302 4351020 : a[j + 1] = add_sat( x0i, x2i );
2303 4351020 : move16();
2304 4351020 : a[j2] = sub_sat( x2i, x0i );
2305 4351020 : move16();
2306 4351020 : a[j2 + 1] = sub_sat( x0r, x2r );
2307 4351020 : move16();
2308 :
2309 4351020 : x0r = sub_sat( x1r, x3i );
2310 4351020 : x0i = add_sat( x1i, x3r );
2311 4351020 : tmp = sub_sat( x0r, x0i );
2312 4351020 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2313 4351020 : a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2314 4351020 : move16();
2315 :
2316 4351020 : tmp = add_sat( x0r, x0i );
2317 4351020 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2318 4351020 : a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2319 4351020 : move16();
2320 :
2321 4351020 : x0r = add_sat( x3i, x1r );
2322 4351020 : x0i = sub_sat( x3r, x1i );
2323 4351020 : tmp = sub_sat( x0i, x0r );
2324 4351020 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2325 4351020 : a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2326 4351020 : move16();
2327 :
2328 4351020 : tmp = add_sat( x0i, x0r );
2329 4351020 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2330 4351020 : a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2331 4351020 : move16();
2332 : #else
2333 : j1 = add_o( j, l, &Overflow );
2334 : j2 = add_o( j1, l, &Overflow );
2335 : j3 = add_o( j2, l, &Overflow );
2336 : x0r = add_o( a[j], a[j1], &Overflow );
2337 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2338 : x1r = sub_o( a[j], a[j1], &Overflow );
2339 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2340 : x2r = add_o( a[j2], a[j3], &Overflow );
2341 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2342 : x3r = sub_o( a[j2], a[j3], &Overflow );
2343 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2344 : a[j] = add_o( x0r, x2r, &Overflow );
2345 : move16();
2346 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2347 : move16();
2348 : a[j2] = sub_o( x2i, x0i, &Overflow );
2349 : move16();
2350 : a[j2 + 1] = sub_o( x0r, x2r, &Overflow );
2351 : move16();
2352 :
2353 : x0r = sub_o( x1r, x3i, &Overflow );
2354 : x0i = add_o( x1i, x3r, &Overflow );
2355 : tmp = sub_o( x0r, x0i, &Overflow );
2356 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2357 : a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2358 : move16();
2359 :
2360 : tmp = add_o( x0r, x0i, &Overflow );
2361 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2362 : a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2363 : move16();
2364 :
2365 : x0r = add_o( x3i, x1r, &Overflow );
2366 : x0i = sub_o( x3r, x1i, &Overflow );
2367 : tmp = sub_o( x0i, x0r, &Overflow );
2368 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2369 : a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2370 : move16();
2371 :
2372 : tmp = add_o( x0i, x0r, &Overflow );
2373 : L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */
2374 : a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2375 : move16();
2376 : #endif
2377 : }
2378 :
2379 724188 : k1 = 0;
2380 724188 : move16();
2381 724188 : m2 = shl( m, 1 );
2382 1181025 : FOR( k = m2; k < n; k += m2 )
2383 : {
2384 456837 : k1 = add( k1, 2 );
2385 456837 : k2 = shl( k1, 1 );
2386 456837 : wk2r = w[k1];
2387 456837 : move32();
2388 456837 : wk2i = w[k1 + 1];
2389 456837 : move32();
2390 456837 : wk1r = w[k2];
2391 456837 : move32();
2392 456837 : wk1i = w[k2 + 1];
2393 456837 : move32();
2394 456837 : L_tmp = L_shl( Mult_32_32( wk2i, wk1i ), 1 ); /*Q29 */
2395 456837 : wk3r = L_sub( wk1r, L_shl( L_tmp, 1 ) ); /*Q30 */
2396 :
2397 456837 : L_tmp = L_shl( Mult_32_32( wk2i, wk1r ), 1 ); /*Q29 */
2398 456837 : wk3i = L_sub( L_shl( L_tmp, 1 ), wk1i ); /*Q30 */
2399 :
2400 456837 : tmp2 = add( l, k );
2401 2284185 : FOR( j = k; j < tmp2; j += 2 )
2402 : {
2403 : #ifdef ISSUE_1836_replace_overflow_libcom
2404 1827348 : j1 = add_sat( j, l );
2405 1827348 : j2 = add_sat( j1, l );
2406 1827348 : j3 = add_sat( j2, l );
2407 1827348 : x0r = add_sat( a[j], a[j1] );
2408 1827348 : x0i = add_sat( a[j + 1], a[j1 + 1] );
2409 1827348 : x1r = sub_sat( a[j], a[j1] );
2410 1827348 : x1i = sub_sat( a[j + 1], a[j1 + 1] );
2411 1827348 : x2r = add_sat( a[j2], a[j3] );
2412 1827348 : x2i = add_sat( a[j2 + 1], a[j3 + 1] );
2413 1827348 : x3r = sub_sat( a[j2], a[j3] );
2414 1827348 : x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
2415 1827348 : a[j] = add_sat( x0r, x2r );
2416 1827348 : move16();
2417 1827348 : a[j + 1] = add_sat( x0i, x2i );
2418 1827348 : move16();
2419 :
2420 1827348 : x0r = sub_sat( x0r, x2r );
2421 1827348 : x0i = sub_sat( x0i, x2i );
2422 :
2423 1827348 : L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2424 1827348 : L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */
2425 1827348 : a[j2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2426 1827348 : move16();
2427 :
2428 1827348 : L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2429 1827348 : L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */
2430 1827348 : a[j2 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2431 1827348 : move16();
2432 :
2433 1827348 : x0r = sub_sat( x1r, x3i );
2434 1827348 : x0i = add_sat( x1i, x3r );
2435 :
2436 1827348 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2437 1827348 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2438 1827348 : a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2439 1827348 : move16();
2440 :
2441 1827348 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2442 1827348 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2443 1827348 : a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2444 1827348 : move16();
2445 :
2446 1827348 : L_x0r = L_add( (Word32) x1r, (Word32) x3i );
2447 1827348 : L_x0i = L_sub( (Word32) x1i, (Word32) x3r );
2448 1827348 : x0r = extract_l( L_x0r );
2449 1827348 : x0i = extract_l( L_x0i );
2450 1827348 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2451 1827348 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2452 1827348 : a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2453 1827348 : move16();
2454 :
2455 1827348 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2456 1827348 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2457 1827348 : a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2458 1827348 : move16();
2459 : #else
2460 : j1 = add_o( j, l, &Overflow );
2461 : j2 = add_o( j1, l, &Overflow );
2462 : j3 = add_o( j2, l, &Overflow );
2463 : x0r = add_o( a[j], a[j1], &Overflow );
2464 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2465 : x1r = sub_o( a[j], a[j1], &Overflow );
2466 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2467 : x2r = add_o( a[j2], a[j3], &Overflow );
2468 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2469 : x3r = sub_o( a[j2], a[j3], &Overflow );
2470 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2471 : a[j] = add_o( x0r, x2r, &Overflow );
2472 : move16();
2473 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2474 : move16();
2475 :
2476 : x0r = sub_o( x0r, x2r, &Overflow );
2477 : x0i = sub_o( x0i, x2i, &Overflow );
2478 :
2479 : L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2480 : L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */
2481 : a[j2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2482 : move16();
2483 :
2484 : L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2485 : L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */
2486 : a[j2 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2487 : move16();
2488 :
2489 : x0r = sub_o( x1r, x3i, &Overflow );
2490 : x0i = add_o( x1i, x3r, &Overflow );
2491 :
2492 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2493 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2494 : a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2495 : move16();
2496 :
2497 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2498 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2499 : a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2500 : move16();
2501 :
2502 : L_x0r = L_add( (Word32) x1r, (Word32) x3i );
2503 : L_x0i = L_sub( (Word32) x1i, (Word32) x3r );
2504 : x0r = extract_l( L_x0r );
2505 : x0i = extract_l( L_x0i );
2506 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2507 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2508 : a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2509 : move16();
2510 :
2511 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2512 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2513 : a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2514 : move16();
2515 : #endif
2516 : }
2517 :
2518 456837 : wk1r = w[k2 + 2];
2519 456837 : move32();
2520 456837 : wk1i = w[k2 + 3];
2521 456837 : move32();
2522 : #ifdef ISSUE_1836_replace_overflow_libcom
2523 456837 : L_tmp = L_shl_sat( Mult_32_32( wk2r, wk1i ), 1 ); /*Q29 */
2524 456837 : wk3r = L_sub_sat( wk1r, L_shl_sat( L_tmp, 1 ) ); /*Q30 */
2525 :
2526 456837 : L_tmp = L_shl_sat( Mult_32_32( wk2r, wk1r ), 1 ); /*Q29 */
2527 456837 : wk3i = L_sub_sat( L_shl_sat( L_tmp, 1 ), wk1i ); /*Q30 */
2528 456837 : tmp2 = add( l, add( k, m ) );
2529 : #else
2530 : L_tmp = L_shl_o( Mult_32_32( wk2r, wk1i ), 1, &Overflow ); /*Q29 */
2531 : wk3r = L_sub_o( wk1r, L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q30 */
2532 :
2533 : L_tmp = L_shl_o( Mult_32_32( wk2r, wk1r ), 1, &Overflow ); /*Q29 */
2534 : wk3i = L_sub_o( L_shl_o( L_tmp, 1, &Overflow ), wk1i, &Overflow ); /*Q30 */
2535 : tmp2 = add( l, add( k, m ) );
2536 : #endif
2537 2284185 : FOR( j = add( k, m ); j < tmp2; j += 2 )
2538 : {
2539 : #ifdef ISSUE_1836_replace_overflow_libcom
2540 1827348 : j1 = add_sat( j, l );
2541 1827348 : j2 = add_sat( j1, l );
2542 1827348 : j3 = add_sat( j2, l );
2543 1827348 : x0r = add_sat( a[j], a[j1] );
2544 1827348 : x0i = add_sat( a[j + 1], a[j1 + 1] );
2545 1827348 : x1r = sub_sat( a[j], a[j1] );
2546 1827348 : x1i = sub_sat( a[j + 1], a[j1 + 1] );
2547 1827348 : x2r = add_sat( a[j2], a[j3] );
2548 1827348 : x2i = add_sat( a[j2 + 1], a[j3 + 1] );
2549 1827348 : x3r = sub_sat( a[j2], a[j3] );
2550 1827348 : x3i = sub_sat( a[j2 + 1], a[j3 + 1] );
2551 1827348 : a[j] = add_sat( x0r, x2r );
2552 1827348 : move16();
2553 1827348 : a[j + 1] = add_sat( x0i, x2i );
2554 1827348 : move16();
2555 :
2556 1827348 : x0r = sub_sat( x0r, x2r );
2557 1827348 : x0i = sub_sat( x0i, x2i );
2558 :
2559 1827348 : tmp = negate( x0r );
2560 1827348 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2561 1827348 : L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2562 1827348 : a[j2] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2563 1827348 : move16();
2564 :
2565 1827348 : tmp = negate( x0i );
2566 1827348 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2567 1827348 : L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2568 1827348 : a[j2 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2569 1827348 : move16();
2570 :
2571 1827348 : x0r = sub_sat( x1r, x3i );
2572 1827348 : x0i = add_sat( x1i, x3r );
2573 :
2574 1827348 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2575 1827348 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2576 1827348 : a[j1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2577 1827348 : move16();
2578 :
2579 1827348 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2580 1827348 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2581 1827348 : a[j1 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2582 1827348 : move16();
2583 :
2584 1827348 : x0r = add_sat( x1r, x3i );
2585 1827348 : x0i = sub_sat( x1i, x3r );
2586 :
2587 1827348 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2588 1827348 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2589 1827348 : a[j3] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2590 1827348 : move16();
2591 :
2592 1827348 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2593 1827348 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2594 1827348 : a[j3 + 1] = round_fx_sat( L_shl_sat( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */
2595 1827348 : move16();
2596 : #else
2597 : j1 = add_o( j, l, &Overflow );
2598 : j2 = add_o( j1, l, &Overflow );
2599 : j3 = add_o( j2, l, &Overflow );
2600 : x0r = add_o( a[j], a[j1], &Overflow );
2601 : x0i = add_o( a[j + 1], a[j1 + 1], &Overflow );
2602 : x1r = sub_o( a[j], a[j1], &Overflow );
2603 : x1i = sub_o( a[j + 1], a[j1 + 1], &Overflow );
2604 : x2r = add_o( a[j2], a[j3], &Overflow );
2605 : x2i = add_o( a[j2 + 1], a[j3 + 1], &Overflow );
2606 : x3r = sub_o( a[j2], a[j3], &Overflow );
2607 : x3i = sub_o( a[j2 + 1], a[j3 + 1], &Overflow );
2608 : a[j] = add_o( x0r, x2r, &Overflow );
2609 : move16();
2610 : a[j + 1] = add_o( x0i, x2i, &Overflow );
2611 : move16();
2612 :
2613 : x0r = sub_o( x0r, x2r, &Overflow );
2614 : x0i = sub_o( x0i, x2i, &Overflow );
2615 :
2616 : tmp = negate( x0r );
2617 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2618 : L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */
2619 : a[j2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2620 : move16();
2621 :
2622 : tmp = negate( x0i );
2623 : L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */
2624 : L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */
2625 : a[j2 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2626 : move16();
2627 :
2628 : x0r = sub_o( x1r, x3i, &Overflow );
2629 : x0i = add_o( x1i, x3r, &Overflow );
2630 :
2631 : L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */
2632 : L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */
2633 : a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2634 : move16();
2635 :
2636 : L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */
2637 : L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */
2638 : a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2639 : move16();
2640 :
2641 : x0r = add_o( x1r, x3i, &Overflow );
2642 : x0i = sub_o( x1i, x3r, &Overflow );
2643 :
2644 : L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */
2645 : L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */
2646 : a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2647 : move16();
2648 :
2649 : L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */
2650 : L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */
2651 : a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */
2652 : move16();
2653 : #endif
2654 : }
2655 : }
2656 :
2657 724188 : return;
2658 : }
2659 :
2660 93 : void fft3_fx( const Word16 X[] /*Qx*/, Word16 Y[] /*Qx*/, const Word16 n )
2661 : {
2662 : Word16 Z[PH_ECU_SPEC_SIZE];
2663 : Word16 *Z0, *Z1, *Z2;
2664 : Word16 *z0, *z1, *z2;
2665 : const Word16 *x;
2666 93 : const Word16 *t_sin = sincos_t_rad3_fx; // Q15
2667 : Word16 m, mMinus1, step;
2668 : Word16 i, l;
2669 : Word16 c1_ind, s1_ind, c2_ind, s2_ind;
2670 : Word16 c1_step, s1_step, c2_step, s2_step;
2671 : Word16 *RY, *IY, *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
2672 : Word32 acc;
2673 : Word16 mBy2, orderMinus1;
2674 : const Word16 *pPhaseTbl;
2675 :
2676 : /* Determine the order of the transform, the length of decimated */
2677 : /* transforms m, and the step for the sine and cosine tables. */
2678 93 : SWITCH( n )
2679 : {
2680 31 : case 1536:
2681 31 : orderMinus1 = 9 - 1;
2682 31 : move16();
2683 31 : m = 512;
2684 31 : move16();
2685 31 : step = 1;
2686 31 : move16();
2687 31 : pPhaseTbl = FFT_W256;
2688 31 : BREAK;
2689 62 : case 384:
2690 62 : orderMinus1 = 7 - 1;
2691 62 : move16();
2692 62 : m = 128;
2693 62 : move16();
2694 62 : step = 4;
2695 62 : move16();
2696 62 : pPhaseTbl = FFT_W64;
2697 62 : BREAK;
2698 0 : default:
2699 0 : orderMinus1 = 7 - 1;
2700 0 : move16();
2701 0 : m = 128;
2702 0 : move16();
2703 0 : step = 4;
2704 0 : move16();
2705 0 : pPhaseTbl = FFT_W64;
2706 0 : BREAK;
2707 : }
2708 :
2709 : /* Compose decimated sequences X[3i], X[3i+1],X[3i+2] */
2710 : /* compute their FFT of length m. */
2711 93 : Z0 = &Z[0];
2712 93 : z0 = &Z0[0];
2713 93 : Z1 = &Z0[m];
2714 93 : z1 = &Z1[0]; /* Z1 = &Z[ m]; */
2715 93 : Z2 = &Z1[m];
2716 93 : z2 = &Z2[0]; /* Z2 = &Z[2m]; */
2717 93 : x = &X[0]; // Qx
2718 23901 : FOR( i = 0; i < m; i++ )
2719 : {
2720 23808 : *z0++ = *x++; /* Z0[i] = X[3i]; Qx */
2721 23808 : move16();
2722 23808 : *z1++ = *x++; /* Z1[i] = X[3i+1]; Qx */
2723 23808 : move16();
2724 23808 : *z2++ = *x++; /* Z2[i] = X[3i+2]; Qx */
2725 23808 : move16();
2726 : }
2727 93 : mBy2 = shr( m, 1 );
2728 93 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z0, Z0, 1 );
2729 93 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z1, Z1, 1 );
2730 93 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, Z2, Z2, 1 );
2731 :
2732 : /* Butterflies of order 3. */
2733 : /* pointer initialization */
2734 93 : mMinus1 = sub( m, 1 );
2735 93 : RY = &Y[0]; // Qx
2736 93 : IY = &Y[n]; // Qx
2737 93 : IY--; /* Decrement the address counter.*/
2738 93 : RZ0 = &Z0[0]; // Qx
2739 93 : IZ0 = &Z0[mMinus1];
2740 93 : RZ1 = &Z1[0]; // Qx
2741 93 : IZ1 = &Z1[mMinus1]; // Qx
2742 93 : RZ2 = &Z2[0]; // Qx
2743 93 : IZ2 = &Z2[mMinus1]; // Qx
2744 :
2745 93 : c1_step = negate( step );
2746 93 : s1_step = step;
2747 93 : move16();
2748 93 : c2_step = shl( c1_step, 1 );
2749 93 : s2_step = shl( s1_step, 1 );
2750 93 : c1_ind = add( T_SIN_PI_2, c1_step );
2751 93 : s1_ind = s1_step;
2752 93 : move16();
2753 93 : c2_ind = add( T_SIN_PI_2, c2_step );
2754 93 : s2_ind = s2_step;
2755 93 : move16();
2756 :
2757 : /* special case: i = 0 */
2758 93 : acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ ); // Q15 + Qx
2759 93 : acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ ); // Q15 + Qx
2760 93 : *RY++ = mac_r_sat( acc, *RZ2++, 0x4000 /*1.Q14*/ ); // Qx
2761 93 : move16();
2762 :
2763 : /* first 3/12-- from 1 to (3*m/8)-1 */
2764 93 : l = sub( shr( n, 3 ), 1 ); /* (3*m/8) - 1 = (n/8) - 1 */
2765 8928 : FOR( i = 0; i < l; i++ )
2766 : {
2767 8835 : acc = L_shl( *RZ0++, 15 ); /* Align with the following non-fractional mode so as to gain 1 more bit headroom. Q15 + Qx*/
2768 8835 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2769 8835 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2770 8835 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2771 8835 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2772 8835 : *RY++ = round_fx( acc ); /* bit growth = 1 (compensated by non-fractional mode MAC). Qx - 1*/
2773 8835 : move16();
2774 :
2775 8835 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2776 8835 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2777 8835 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2778 8835 : acc = L_msu0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2779 8835 : acc = L_mac0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2780 8835 : *IY-- = round_fx( acc ); // Qx - 1
2781 8835 : move16();
2782 :
2783 8835 : c1_ind = add( c1_ind, c1_step );
2784 8835 : s1_ind = add( s1_ind, s1_step );
2785 8835 : c2_ind = add( c2_ind, c2_step );
2786 8835 : s2_ind = add( s2_ind, s2_step );
2787 : }
2788 :
2789 : /* next 1/12-- from (3*m/8) to (4*m/8)-1 */
2790 93 : l = shr( m, 3 ); /* (4*m/8) - (3*m/8) = m/8 */
2791 3069 : FOR( i = 0; i < l; i++ )
2792 : {
2793 2976 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2794 2976 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2795 2976 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2796 2976 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2797 2976 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2798 2976 : *RY++ = round_fx( acc ); // Qx - 1
2799 2976 : move16();
2800 :
2801 2976 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2802 2976 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2803 2976 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2804 2976 : acc = L_msu0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2805 2976 : acc = L_msu0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2806 2976 : *IY-- = round_fx( acc ); // Qx - 1
2807 2976 : move16();
2808 :
2809 2976 : c1_ind = add( c1_ind, c1_step );
2810 2976 : s1_ind = add( s1_ind, s1_step );
2811 2976 : c2_ind = sub( c2_ind, c2_step );
2812 2976 : s2_ind = sub( s2_ind, s2_step );
2813 : }
2814 :
2815 : /* special case: i = m/2 i.e. 1/3 */
2816 93 : acc = L_shl( *RZ0--, 15 ); // Q15 + Qx
2817 93 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
2818 93 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2819 93 : *RY++ = round_fx( acc ); // Qx - 1
2820 93 : move16();
2821 :
2822 93 : acc = 0;
2823 93 : move32();
2824 93 : acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
2825 93 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
2826 93 : *IY-- = round_fx( acc ); // Qx - 1
2827 93 : move16();
2828 93 : IZ0++;
2829 93 : IZ1++;
2830 93 : IZ2++;
2831 :
2832 93 : c1_ind = add( c1_ind, c1_step );
2833 93 : s1_ind = add( s1_ind, s1_step );
2834 93 : c2_ind = sub( c2_ind, c2_step );
2835 93 : s2_ind = sub( s2_ind, s2_step );
2836 :
2837 : /* next 2/12-- from ((m/2)+1) to (6*m/8)-1 */
2838 93 : l = sub( shr( m, 2 ), 1 ); /* (6*m/8) - ((m/2)+1) = m/4 - 1 */
2839 5952 : FOR( i = 0; i < l; i++ )
2840 : {
2841 5859 : acc = L_shl( *RZ0--, 15 ); // Q15 + Qx
2842 5859 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2843 5859 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2844 5859 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2845 5859 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2846 5859 : *RY++ = round_fx( acc ); // Qx - 1
2847 5859 : move16();
2848 :
2849 5859 : acc = L_mult0( *IZ0++, -32768 ); // Q15 + Qx
2850 5859 : acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
2851 5859 : acc = L_msu0( acc, *IZ1++, t_sin[c1_ind] ); // Q15 + Qx
2852 5859 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
2853 5859 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Q15 + Qx
2854 5859 : *IY-- = round_fx( acc ); // Qx - 1
2855 5859 : move16();
2856 :
2857 5859 : c1_ind = add( c1_ind, c1_step );
2858 5859 : s1_ind = add( s1_ind, s1_step );
2859 5859 : c2_ind = sub( c2_ind, c2_step );
2860 5859 : s2_ind = sub( s2_ind, s2_step );
2861 : }
2862 :
2863 : /*--------------------------half--------------------------// */
2864 : /* next 2/12-- from (6*m/8) to (8*m/8) - 1 */
2865 93 : l = shr( m, 2 );
2866 6045 : FOR( i = 0; i < l; i++ )
2867 : {
2868 5952 : acc = L_shl( *RZ0--, 15 ); // Q15 + Qx
2869 5952 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2870 5952 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2871 5952 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2872 5952 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2873 5952 : *RY++ = round_fx( acc ); // Qx - 1
2874 5952 : move16();
2875 :
2876 5952 : acc = L_mult0( *IZ0++, -32768 ); // Q15 + Qx
2877 5952 : acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); // Q15 + Qx
2878 5952 : acc = L_mac0( acc, *IZ1++, t_sin[c1_ind] ); // Q15 + Qx
2879 5952 : acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Q15 + Qx
2880 5952 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Q15 + Qx
2881 5952 : *IY-- = round_fx( acc ); // Qx - 1
2882 5952 : move16();
2883 :
2884 5952 : c1_ind = sub( c1_ind, c1_step );
2885 5952 : s1_ind = sub( s1_ind, s1_step );
2886 5952 : c2_ind = add( c2_ind, c2_step );
2887 5952 : s2_ind = add( s2_ind, s2_step );
2888 : }
2889 :
2890 : /* special case: i = m, i.e 2/3 */
2891 93 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2892 93 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
2893 93 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2894 93 : *RY++ = round_fx( acc ); // Qx - 1
2895 93 : move16();
2896 :
2897 93 : acc = L_deposit_l( 0 );
2898 93 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2899 93 : acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2900 93 : *IY-- = round_fx( acc ); // Qx - 1
2901 93 : move16();
2902 93 : IZ0--; /* Just decrement the address counter */
2903 93 : IZ1--;
2904 93 : IZ2--;
2905 :
2906 93 : c1_ind = sub( c1_ind, c1_step );
2907 93 : s1_ind = sub( s1_ind, s1_step );
2908 93 : c2_ind = add( c2_ind, c2_step );
2909 93 : s2_ind = add( s2_ind, s2_step );
2910 :
2911 : /* next 1/12-- from (m + 1) to (9*m/8) - 1 */
2912 93 : l = sub( shr( m, 3 ), 1 ); /* (9*m/8) - (m +1) = m/8 - 1 */
2913 2976 : FOR( i = 0; i < l; i++ )
2914 : {
2915 2883 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2916 2883 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2917 2883 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2918 2883 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2919 2883 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2920 2883 : *RY++ = round_fx( acc ); // Qx - 1
2921 2883 : move16();
2922 :
2923 2883 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2924 2883 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2925 2883 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2926 2883 : acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2927 2883 : acc = L_msu0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2928 2883 : *IY-- = round_fx( acc ); // Qx - 1
2929 2883 : move16();
2930 :
2931 2883 : c1_ind = sub( c1_ind, c1_step );
2932 2883 : s1_ind = sub( s1_ind, s1_step );
2933 2883 : c2_ind = add( c2_ind, c2_step );
2934 2883 : s2_ind = add( s2_ind, s2_step );
2935 : }
2936 :
2937 : /* last 3/12-- from (9*m/8) to (12*m/8) - 1 */
2938 93 : l = shr( n, 3 ); /* (12*m/8) - (9*m/8) = 3*m/8 = n/8 */
2939 9021 : FOR( i = 0; i < l; i++ )
2940 : {
2941 8928 : acc = L_shl( *RZ0++, 15 ); // Q15 + Qx
2942 8928 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); /* Non-fractional mode gains 1 more bit headroom. Q15 + Qx*/
2943 8928 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Q15 + Qx
2944 8928 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2945 8928 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Q15 + Qx
2946 8928 : *RY++ = round_fx( acc ); // Qx - 1
2947 8928 : move16();
2948 :
2949 8928 : acc = L_shl( *IZ0--, 15 ); // Q15 + Qx
2950 8928 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Q15 + Qx
2951 8928 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Q15 + Qx
2952 8928 : acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); // Q15 + Qx
2953 8928 : acc = L_mac0( acc, *IZ2--, t_sin[c2_ind] ); // Q15 + Qx
2954 8928 : *IY-- = round_fx( acc ); // Qx - 1
2955 8928 : move16();
2956 :
2957 8928 : c1_ind = sub( c1_ind, c1_step );
2958 8928 : s1_ind = sub( s1_ind, s1_step );
2959 8928 : c2_ind = sub( c2_ind, c2_step );
2960 8928 : s2_ind = sub( s2_ind, s2_step );
2961 : }
2962 :
2963 : /* special case: i = 3*m/2 */
2964 93 : acc = L_shl( *RZ0, 15 ); // Q15 + Qx
2965 93 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Q15 + Qx
2966 93 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Q15 + Qx
2967 93 : *RY = round_fx( acc ); // Qx - 1
2968 93 : move16();
2969 :
2970 93 : return;
2971 : }
2972 :
2973 :
2974 109 : void ifft3_fx( const Word16 Z[] /*Qx*/, Word16 X[] /*Qx*/, const Word16 n )
2975 : {
2976 : Word16 Y[PH_ECU_SPEC_SIZE];
2977 109 : const Word16 *t_sin = sincos_t_rad3_fx; // Q15
2978 : Word16 m, mMinus1, step, step2;
2979 : Word16 i, l;
2980 : Word16 c0_ind, s0_ind, c1_ind, s1_ind, c2_ind, s2_ind;
2981 : const Word16 *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
2982 : const Word16 *RZ00, *IZ00, *RZ10, *IZ10, *RZ20, *IZ20;
2983 : Word16 *RY0, *IY0, *RY1, *IY1, *RY2, *IY2, *y0, *y1, *y2, *pX;
2984 : Word32 acc;
2985 : Word16 mBy2, orderMinus1, nMinusMBy2;
2986 : const Word16 *pPhaseTbl;
2987 :
2988 : /* Determine the order of the transform, the length of decimated */
2989 : /* transforms m, and the step for the sine and cosine tables. */
2990 109 : SWITCH( n )
2991 : {
2992 109 : case 1536:
2993 109 : orderMinus1 = 9 - 1;
2994 109 : move16();
2995 109 : m = 512;
2996 109 : move16();
2997 109 : step = 1;
2998 109 : move16();
2999 109 : pPhaseTbl = FFT_W256;
3000 109 : BREAK;
3001 0 : case 384:
3002 0 : orderMinus1 = 7 - 1;
3003 0 : move16();
3004 0 : m = 128;
3005 0 : move16();
3006 0 : step = 4;
3007 0 : move16();
3008 0 : pPhaseTbl = FFT_W64;
3009 0 : BREAK;
3010 0 : default:
3011 0 : orderMinus1 = 7 - 1;
3012 0 : move16();
3013 0 : m = 128;
3014 0 : move16();
3015 0 : step = 4;
3016 0 : move16();
3017 0 : pPhaseTbl = FFT_W64;
3018 0 : BREAK;
3019 : }
3020 :
3021 109 : nMinusMBy2 = shr( sub( n, m ), 1 );
3022 109 : mMinus1 = sub( m, 1 );
3023 : /* pointer initialization */
3024 109 : RY0 = &Y[0]; // Qx
3025 109 : IY0 = &Y[m]; // Qx
3026 109 : RY1 = &RY0[m]; // Qx
3027 109 : IY1 = &RY1[mMinus1]; // Qx
3028 109 : RY2 = &RY1[m]; // Qx
3029 109 : IY2 = &RY2[mMinus1]; // Qx
3030 :
3031 109 : RZ00 = &Z[0]; /* The zero positions of the pointers Qx*/
3032 109 : RZ10 = &RZ00[m]; // Qx
3033 109 : RZ20 = &RZ00[nMinusMBy2]; // Qx
3034 109 : IZ00 = &Z[n]; // Qx
3035 109 : IZ10 = &IZ00[-m]; // Qx
3036 109 : IZ20 = &IZ00[-nMinusMBy2]; // Qx
3037 :
3038 109 : RZ0 = RZ00; /* Reset the pointers to zero positions. */
3039 109 : RZ1 = RZ10;
3040 109 : RZ2 = RZ20;
3041 109 : IZ0 = IZ00;
3042 109 : IZ1 = IZ10;
3043 109 : IZ2 = IZ20;
3044 :
3045 : /* Inverse butterflies of order 3. */
3046 :
3047 : /* Construction of Y0 */
3048 109 : acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ ); // Qx + Q15
3049 109 : acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ ); // Qx + Q15
3050 109 : *RY0++ = mac_r( acc, *RZ2--, 0x4000 /*1.Q14*/ ); // Qx
3051 109 : move16();
3052 109 : IZ0--;
3053 109 : IZ1--;
3054 109 : IZ2++;
3055 109 : IY0--;
3056 :
3057 109 : l = sub( shr( m, 1 ), 1 );
3058 27904 : FOR( i = 0; i < l; i++ )
3059 : {
3060 27795 : acc = L_mult( *RZ0++, 0x4000 /*1.Q14*/ ); // Qx + Q15
3061 27795 : acc = L_mac( acc, *RZ1++, 0x4000 /*1.Q14*/ ); // Qx + Q15
3062 27795 : *RY0++ = mac_r( acc, *RZ2--, 0x4000 /*1.Q14*/ ); // Qx
3063 27795 : move16();
3064 :
3065 27795 : acc = L_mult( *IZ0--, 0x4000 /*1.Q14*/ ); // Qx + Q15
3066 27795 : acc = L_mac( acc, *IZ1--, 0x4000 /*1.Q14*/ ); // Qx + Q15
3067 27795 : *IY0-- = msu_r( acc, *IZ2++, 0x4000 /*1.Q14*/ ); // Qx
3068 27795 : move16();
3069 : }
3070 :
3071 : /* m/2 */
3072 109 : acc = L_mult( *RZ0, 0x4000 /*1.Q14*/ ); // Qx + Q15
3073 109 : acc = L_mac( acc, *RZ1, 0x4000 /*1.Q14*/ ); // Qx + Q15
3074 109 : *RY0++ = mac_r( acc, *RZ2, 0x4000 /*1.Q14*/ ); // Qx
3075 109 : move16();
3076 :
3077 :
3078 : /* Construction of Y1 */
3079 109 : c0_ind = T_SIN_PI_2;
3080 109 : s0_ind = 0;
3081 109 : c1_ind = T_SIN_PI_2 * 1 / 3;
3082 109 : s1_ind = T_SIN_PI_2 * 2 / 3;
3083 109 : c2_ind = T_SIN_PI_2 * 1 / 3;
3084 109 : s2_ind = T_SIN_PI_2 * 2 / 3;
3085 :
3086 109 : RZ0 = RZ00; /* Reset pointers to zero positions. */
3087 109 : RZ1 = RZ10;
3088 109 : RZ2 = RZ20;
3089 109 : IZ0 = IZ00;
3090 109 : IZ1 = IZ10;
3091 109 : IZ2 = IZ20;
3092 109 : acc = L_mult0( *RZ0++, t_sin[c0_ind] ); // Qx + Q15
3093 109 : acc = L_msu0( acc, *RZ1++, t_sin[c1_ind] ); // Qx + Q15
3094 109 : acc = L_msu0( acc, *RZ2--, t_sin[c2_ind] ); // Qx + Q15
3095 109 : IZ0--;
3096 109 : acc = L_msu0( acc, *IZ1--, t_sin[s1_ind] ); // Qx + Q15
3097 109 : acc = L_msu0( acc, *IZ2++, t_sin[s2_ind] ); // Qx + Q15
3098 109 : *RY1++ = round_fx( acc ); // Qx - 1
3099 109 : move16();
3100 :
3101 109 : c0_ind = sub( c0_ind, step );
3102 109 : s0_ind = add( s0_ind, step );
3103 109 : c1_ind = add( c1_ind, step );
3104 109 : s1_ind = sub( s1_ind, step );
3105 109 : c2_ind = sub( c2_ind, step );
3106 109 : s2_ind = add( s2_ind, step );
3107 :
3108 : /* From 1 to (m/4) - 1. */
3109 109 : l = sub( shr( m, 2 ), 1 );
3110 13952 : FOR( i = 0; i < l; i++ )
3111 : {
3112 13843 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3113 13843 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3114 13843 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3115 13843 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3116 13843 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3117 13843 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3118 13843 : *RY1++ = round_fx( acc ); // Qx - 1
3119 13843 : move16();
3120 :
3121 13843 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3122 13843 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3123 13843 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3124 13843 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3125 13843 : acc = L_mac0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3126 13843 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3127 13843 : *IY1-- = round_fx( acc ); // Qx - 1
3128 13843 : move16();
3129 :
3130 13843 : c0_ind = sub( c0_ind, step );
3131 13843 : s0_ind = add( s0_ind, step );
3132 13843 : c1_ind = add( c1_ind, step );
3133 13843 : s1_ind = sub( s1_ind, step );
3134 13843 : c2_ind = sub( c2_ind, step );
3135 13843 : s2_ind = add( s2_ind, step );
3136 : }
3137 :
3138 : /* From m/4 to m/2 -1. */
3139 109 : l = shr( m, 2 ); /* m/2 - m/4 = m/4 */
3140 14061 : FOR( i = 0; i < l; i++ )
3141 : {
3142 13952 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3143 13952 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3144 13952 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3145 13952 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3146 13952 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3147 13952 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3148 13952 : *RY1++ = round_fx( acc ); // Qx - 1
3149 13952 : move16();
3150 :
3151 13952 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3152 13952 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3153 13952 : acc = L_msu0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3154 13952 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3155 13952 : acc = L_mac0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3156 13952 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3157 13952 : *IY1-- = round_fx( acc ); // Qx - 1
3158 13952 : move16();
3159 :
3160 13952 : c0_ind = sub( c0_ind, step );
3161 13952 : s0_ind = add( s0_ind, step );
3162 13952 : c1_ind = add( c1_ind, step );
3163 13952 : s1_ind = sub( s1_ind, step );
3164 13952 : c2_ind = add( c2_ind, step );
3165 13952 : s2_ind = sub( s2_ind, step );
3166 : }
3167 :
3168 : /* m/2 */
3169 109 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3170 109 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3171 109 : acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3172 109 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3173 109 : acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3174 109 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3175 109 : *RY1++ = round_fx( acc ); // Qx - 1
3176 109 : move16();
3177 :
3178 : /* Construction of Y2 */
3179 109 : c0_ind = T_SIN_PI_2;
3180 109 : s0_ind = 0;
3181 109 : c1_ind = T_SIN_PI_2 * 1 / 3;
3182 109 : s1_ind = T_SIN_PI_2 * 2 / 3;
3183 109 : c2_ind = T_SIN_PI_2 * 1 / 3;
3184 109 : s2_ind = T_SIN_PI_2 * 2 / 3;
3185 109 : step2 = shl( step, 1 );
3186 :
3187 109 : RZ0 = RZ00; /* Reset pointers to zero positions. */
3188 109 : RZ1 = RZ10;
3189 109 : RZ2 = RZ20;
3190 109 : IZ0 = IZ00;
3191 109 : IZ1 = IZ10;
3192 109 : IZ2 = IZ20;
3193 109 : acc = L_mult0( *RZ0++, t_sin[c0_ind] ); // Qx + Q15
3194 109 : acc = L_msu0( acc, *RZ1++, t_sin[c1_ind] ); // Qx + Q15
3195 109 : acc = L_msu0( acc, *RZ2--, t_sin[c2_ind] ); // Qx + Q15
3196 109 : IZ0--;
3197 109 : acc = L_mac0( acc, *IZ1--, t_sin[s1_ind] ); // Qx + Q15
3198 109 : acc = L_mac0( acc, *IZ2++, t_sin[s2_ind] ); // Qx + Q15
3199 109 : *RY2++ = round_fx( acc ); // Qx - 1
3200 109 : move16();
3201 :
3202 109 : c0_ind = sub( c0_ind, step2 );
3203 109 : s0_ind = add( s0_ind, step2 );
3204 109 : c1_ind = sub( c1_ind, step2 );
3205 109 : s1_ind = add( s1_ind, step2 );
3206 109 : c2_ind = add( c2_ind, step2 );
3207 109 : s2_ind = sub( s2_ind, step2 );
3208 :
3209 : /* From 1 to (m/8) - 1. */
3210 109 : l = sub( shr( m, 3 ), 1 ); /* m/8 - 1. */
3211 6976 : FOR( i = 0; i < l; i++ )
3212 : {
3213 6867 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3214 6867 : acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3215 6867 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3216 6867 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3217 6867 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3218 6867 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3219 6867 : *RY2++ = round_fx( acc ); // Qx - 1
3220 6867 : move16();
3221 :
3222 6867 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3223 6867 : acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3224 6867 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3225 6867 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3226 6867 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3227 6867 : acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3228 6867 : *IY2-- = round_fx( acc ); // Qx - 1
3229 6867 : move16();
3230 :
3231 6867 : c0_ind = sub( c0_ind, step2 );
3232 6867 : s0_ind = add( s0_ind, step2 );
3233 6867 : c1_ind = sub( c1_ind, step2 );
3234 6867 : s1_ind = add( s1_ind, step2 );
3235 6867 : c2_ind = add( c2_ind, step2 );
3236 6867 : s2_ind = sub( s2_ind, step2 );
3237 : }
3238 :
3239 : /* From (m/8) to (m/4) - 1. */
3240 109 : l = shr( m, 3 ); /* m/4 - m/8 = m/8 */
3241 7085 : FOR( i = 0; i < l; i++ )
3242 : {
3243 6976 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3244 6976 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3245 6976 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3246 6976 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3247 6976 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3248 6976 : acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3249 6976 : *RY2++ = round_fx( acc ); // Qx - 1
3250 6976 : move16();
3251 :
3252 6976 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3253 6976 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3254 6976 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3255 6976 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3256 6976 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3257 6976 : acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3258 6976 : *IY2-- = round_fx( acc ); // Qx - 1
3259 6976 : move16();
3260 :
3261 6976 : c0_ind = sub( c0_ind, step2 );
3262 6976 : s0_ind = add( s0_ind, step2 );
3263 6976 : c1_ind = add( c1_ind, step2 );
3264 6976 : s1_ind = sub( s1_ind, step2 );
3265 6976 : c2_ind = add( c2_ind, step2 );
3266 6976 : s2_ind = sub( s2_ind, step2 );
3267 : }
3268 :
3269 : /* From m/4 to 3*m/8 - 1. */
3270 109 : l = shr( m, 3 ); /* 3*m/8 - m/4 = m/8 */
3271 7085 : FOR( i = 0; i < l; i++ )
3272 : {
3273 6976 : acc = L_mult0( *RZ0, t_sin[c0_ind] ); // Qx + Q15
3274 6976 : acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); // Qx + Q15
3275 6976 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3276 6976 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3277 6976 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3278 6976 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3279 6976 : *RY2++ = round_fx( acc ); // Qx - 1
3280 6976 : move16();
3281 :
3282 6976 : acc = L_mult0( *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3283 6976 : acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3284 6976 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3285 6976 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3286 6976 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3287 6976 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3288 6976 : *IY2-- = round_fx( acc ); // Qx - 1
3289 6976 : move16();
3290 :
3291 6976 : c0_ind = sub( c0_ind, step2 );
3292 6976 : s0_ind = add( s0_ind, step2 );
3293 6976 : c1_ind = add( c1_ind, step2 );
3294 6976 : s1_ind = sub( s1_ind, step2 );
3295 6976 : c2_ind = sub( c2_ind, step2 );
3296 6976 : s2_ind = add( s2_ind, step2 );
3297 : }
3298 :
3299 : /* From 3*m/8 to m/2 - 1*/
3300 109 : l = shr( m, 3 ); /* m/2 - 3*m/8 = m/8 */
3301 7085 : FOR( i = 0; i < l; i++ )
3302 : {
3303 6976 : acc = L_mult0( *RZ1, t_sin[c1_ind] ); // Qx + Q15
3304 6976 : acc = L_msu0( acc, *RZ0, t_sin[c0_ind] ); // Qx + Q15
3305 6976 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3306 6976 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3307 6976 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3308 6976 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3309 6976 : *RY2++ = round_fx( acc ); // Qx - 1
3310 6976 : move16();
3311 :
3312 6976 : acc = L_mult0( *IZ1--, t_sin[c1_ind] ); // Qx + Q15
3313 6976 : acc = L_msu0( acc, *IZ0--, t_sin[c0_ind] ); // Qx + Q15
3314 6976 : acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); // Qx + Q15
3315 6976 : acc = L_mac0( acc, *RZ0++, t_sin[s0_ind] ); // Qx + Q15
3316 6976 : acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); // Qx + Q15
3317 6976 : acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); // Qx + Q15
3318 6976 : *IY2-- = round_fx( acc ); // Qx - 1
3319 6976 : move16();
3320 :
3321 6976 : c0_ind = add( c0_ind, step2 );
3322 6976 : s0_ind = sub( s0_ind, step2 );
3323 6976 : c1_ind = add( c1_ind, step2 );
3324 6976 : s1_ind = sub( s1_ind, step2 );
3325 6976 : c2_ind = sub( c2_ind, step2 );
3326 6976 : s2_ind = add( s2_ind, step2 );
3327 : }
3328 :
3329 : /* m/2 */
3330 109 : acc = L_mult0( *RZ1, t_sin[c1_ind] ); // Qx + Q15
3331 109 : acc = L_msu0( acc, *RZ0, t_sin[c0_ind] ); // Qx + Q15
3332 109 : acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); // Qx + Q15
3333 109 : acc = L_msu0( acc, *IZ0, t_sin[s0_ind] ); // Qx + Q15
3334 109 : acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); // Qx + Q15
3335 109 : acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); // Qx + Q15
3336 109 : *RY2++ = round_fx( acc ); // Qx - 1
3337 109 : move16();
3338 :
3339 : /* Compute the inverse FFT for all 3 blocks. */
3340 109 : RY0 = &Y[0]; /* Rewind the pointers. */
3341 109 : RY1 = &Y[m];
3342 109 : RY2 = &RY1[m];
3343 109 : mBy2 = shr( m, 1 );
3344 109 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY0, RY0, 0 ); /* inverse FFT */
3345 109 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY1, RY1, 0 ); /* inverse FFT */
3346 109 : r_fft_fx_lc( pPhaseTbl, m, mBy2, orderMinus1, RY2, RY2, 0 ); /* inverse FFT */
3347 :
3348 109 : y0 = RY0;
3349 109 : y1 = RY1;
3350 109 : y2 = RY2;
3351 :
3352 : /* Interlacing and scaling, scale = 1/3 */
3353 109 : pX = X;
3354 55917 : FOR( i = 0; i < m; i++ )
3355 : {
3356 55808 : *pX++ = shl_sat( mult_r( *y0++, FFT3_ONE_THIRD ), 1 ); // Qx
3357 55808 : move16();
3358 55808 : *pX++ = shl_sat( mult_r( *y1++, FFT3_ONE_THIRD ), 1 ); // Qx
3359 55808 : move16();
3360 55808 : *pX++ = shl_sat( mult_r( *y2++, FFT3_ONE_THIRD ), 1 ); // Qx
3361 55808 : move16();
3362 : }
3363 :
3364 109 : return;
3365 : }
|