Line data Source code
1 : /******************************************************************************
2 : * ETSI TS 103 634 V1.5.1 *
3 : * Low Complexity Communication Codec Plus (LC3plus) *
4 : * *
5 : * Copyright licence is solely granted through ETSI Intellectual Property *
6 : * Rights Policy, 3rd April 2019. No patent licence is granted by implication, *
7 : * estoppel or otherwise. *
8 : ******************************************************************************/
9 :
10 : #include "defines.h"
11 : #include "functions.h"
12 : #include "rom_basop_util_lc3plus.h"
13 : #include "basop_util_lc3plus.h"
14 :
15 : extern const Word32 SqrtTable_lc3plus[32];
16 : extern const Word16 SqrtDiffTable_lc3plus[32];
17 :
18 : extern const Word32 ISqrtTable_lc3plus[32];
19 : extern const Word16 ISqrtDiffTable_lc3plus[32];
20 :
21 : extern const Word32 InvTable_lc3plus[32];
22 : extern const Word16 InvDiffTable_lc3plus[32];
23 :
24 0 : Word32 BASOP_Util_Log2_lc3plus(Word32 x)
25 : {
26 : Word32 exp;
27 : Word16 exp_e;
28 : Word16 nIn;
29 : Word16 accuSqr;
30 : Word32 accuRes;
31 :
32 0 : assert(x >= 0);
33 :
34 0 : if (x == 0)
35 : {
36 :
37 0 : return ((Word32)MIN_32);
38 : }
39 :
40 : /* normalize input, calculate integer part */
41 0 : exp_e = norm_l(x);
42 0 : x = L_shl(x, exp_e);
43 0 : exp = L_deposit_l(exp_e);
44 :
45 : /* calculate (1-normalized_input) */
46 0 : nIn = extract_h(L_sub(MAX_32, x));
47 :
48 : /* approximate ln() for fractional part (nIn *c0 + nIn^2*c1 + nIn^3*c2 + ... + nIn^8 *c7) */
49 :
50 : /* iteration 1, no need for accumulation */
51 0 : accuRes = L_mult(nIn, ldCoeff_lc3plus[0]); /* nIn^i * coeff[0] */
52 0 : accuSqr = mult(nIn, nIn); /* nIn^2, nIn^3 .... */
53 :
54 : /* iteration 2 */
55 0 : accuRes = L_mac(accuRes, accuSqr, ldCoeff_lc3plus[1]); /* nIn^i * coeff[1] */
56 0 : accuSqr = mult(accuSqr, nIn); /* nIn^2, nIn^3 .... */
57 :
58 : /* iteration 3 */
59 0 : accuRes = L_mac(accuRes, accuSqr, ldCoeff_lc3plus[2]); /* nIn^i * coeff[2] */
60 0 : accuSqr = mult(accuSqr, nIn); /* nIn^2, nIn^3 .... */
61 :
62 : /* iteration 4 */
63 0 : accuRes = L_mac(accuRes, accuSqr, ldCoeff_lc3plus[3]); /* nIn^i * coeff[3] */
64 0 : accuSqr = mult(accuSqr, nIn); /* nIn^2, nIn^3 .... */
65 :
66 : /* iteration 5 */
67 0 : accuRes = L_mac(accuRes, accuSqr, ldCoeff_lc3plus[4]); /* nIn^i * coeff[4] */
68 0 : accuSqr = mult(accuSqr, nIn); /* nIn^2, nIn^3 .... */
69 :
70 : /* iteration 6 */
71 0 : accuRes = L_mac(accuRes, accuSqr, ldCoeff_lc3plus[5]); /* nIn^i * coeff[5] */
72 0 : accuSqr = mult(accuSqr, nIn); /* nIn^2, nIn^3 .... */
73 :
74 : /* iteration 7, no need to calculate accuSqr any more */
75 0 : accuRes = L_mac(accuRes, accuSqr, ldCoeff_lc3plus[6]); /* nIn^i * coeff[6] */
76 :
77 : /* ld(fractional part) = ln(fractional part)/ln(2), 1/ln(2) = (1 + 0.44269504) */
78 0 : accuRes = L_mac0(L_shr(accuRes, 1), extract_h(accuRes), 14506);
79 :
80 0 : accuRes = L_shr(accuRes, LD_DATA_SCALE - 1); /* fractional part/LD_DATA_SCALE */
81 0 : exp = L_shl(exp, (31 - LD_DATA_SCALE)); /* integer part/LD_DATA_SCALE */
82 0 : accuRes = L_sub(accuRes, exp); /* result = integer part + fractional part */
83 :
84 0 : return (accuRes);
85 : }
86 :
87 0 : Word32 BASOP_Util_InvLog2_lc3plus(Word32 x)
88 : {
89 : #ifdef ENABLE_HR_MODE
90 : /* Original code was used for negative x and hence the exp was always 0, which is assumed */
91 : Word16 exp;
92 0 : return BASOP_Util_InvLog2_pos(x, &exp);
93 : #else
94 : Word16 frac;
95 : Word16 exp;
96 : Word32 retVal;
97 : UWord32 index3;
98 : UWord32 index2;
99 : UWord32 index1;
100 : UWord32 lookup3f;
101 : UWord32 lookup12;
102 : UWord32 lookup;
103 :
104 : if (x < -1040187392l /*-31.0/64.0 Q31*/)
105 : {
106 :
107 : return 0;
108 : }
109 : test();
110 : if ((L_sub(x, 1040187392l /*31.0/64.0 Q31*/) >= 0) || (x == 0))
111 : {
112 :
113 : return 0x7FFFFFFF;
114 : }
115 :
116 : frac = extract_l(L_and(x, 0x3FF));
117 :
118 : index3 = L_and(L_shr_pos(x, 10), 0x1F);
119 : index2 = L_and(L_shr_pos(x, 15), 0x1F);
120 : index1 = L_and(L_shr_pos(x, 20), 0x1F);
121 :
122 : exp = extract_l(L_shr_pos(x, 25));
123 : if (x > 0)
124 : {
125 : exp = sub(31, exp);
126 : }
127 : if (x < 0)
128 : {
129 : exp = negate(exp);
130 : }
131 :
132 : lookup3f = L_add(exp2x_tab_long_lc3plus[index3], L_shr_pos(Mpy_32_16_lc3plus(0x0016302F, frac), 1));
133 : lookup12 = Mpy_32_32_lc3plus(exp2_tab_long_lc3plus[index1], exp2w_tab_long_lc3plus[index2]);
134 : lookup = Mpy_32_32_lc3plus(lookup12, lookup3f);
135 :
136 : retVal = L_shr(lookup, sub(exp, 3));
137 :
138 : return retVal;
139 : #endif
140 : }
141 :
142 : #ifdef ENABLE_HR_MODE
143 : /* New function which works with positive and negative exponents */
144 0 : Word32 BASOP_Util_InvLog2_pos(Word32 x, Word16 *exp)
145 : {
146 : Word16 frac;
147 : Word16 ret_exp;
148 : Word32 retVal;
149 : UWord32 index3;
150 : UWord32 index2;
151 : UWord32 index1;
152 : UWord32 lookup3f;
153 : UWord32 lookup12;
154 : UWord32 lookup;
155 :
156 : /* The usage of exp.mantissa format in LC3plus in Word32 is : floatval = mantissa / (2^(31 - exp)) */
157 0 : ret_exp = extract_l(L_shr(x, 25));
158 :
159 0 : IF (x < -1040187392l /*-31.0/64.0 Q31*/)
160 : {
161 0 : *exp = 0;
162 0 : move16();
163 0 : return 0;
164 : }
165 0 : test();
166 0 : IF ((L_sub(x, 1040187392l /*31.0/64.0 Q31*/) >= 0))
167 : {
168 0 : *exp = 31;
169 0 : move16();
170 0 : return 0x40000000;
171 : }
172 0 : ELSE IF(x == 0)
173 : {
174 0 : *exp = -1;
175 0 : move16();
176 0 : return 0x10000000;
177 : }
178 :
179 0 : frac = extract_l(L_and(x, 0x3FF));
180 :
181 0 : index3 = L_and(L_shr(x, 10), 0x1F);
182 0 : index2 = L_and(L_shr(x, 15), 0x1F);
183 0 : index1 = L_and(L_shr(x, 20), 0x1F);
184 :
185 0 : lookup3f = L_add(exp2x_tab_long_lc3plus[index3], L_shr(Mpy_32_16_lc3plus(0x0016302F, frac), 1));
186 0 : lookup12 = Mpy_32_32_lc3plus(exp2_tab_long_lc3plus[index1], exp2w_tab_long_lc3plus[index2]);
187 0 : lookup = Mpy_32_32_lc3plus(lookup12, lookup3f);
188 :
189 0 : IF (x > 0)
190 : {
191 : /* The returned exponent is the offset from 31, so the float result is
192 : retVal / 2^(31 - *exp) */
193 0 : *exp = add(ret_exp, 3);
194 0 : retVal = lookup;
195 : }
196 : ELSE
197 : {
198 : /* For negative powers provide the result in Q31 and ignore the exponent */
199 0 : *exp = 0;
200 0 : retVal = L_shr(lookup, sub(negate(ret_exp), 3));
201 : }
202 :
203 0 : return retVal;
204 : }
205 : #endif
206 :
207 : /* local function for Sqrt16_lc3plus and Sqrt16norm */
208 0 : static Word16 Sqrt16_common(Word16 m, Word16 e)
209 : {
210 : Word16 index, frac;
211 :
212 0 : assert((m >= 0x4000) || (m == 0));
213 :
214 : /* get table index (upper 6 bits minus 32) */
215 : /* index = (m >> 9) - 32; */
216 0 : index = mac_r(-32768 - (32 << 16), m, 1 << 6);
217 :
218 : /* get fractional part for interpolation (lower 9 bits) */
219 0 : frac = s_and(m, 0x1FF); /* Q9 */
220 :
221 : /* interpolate */
222 0 : if (m != 0)
223 : {
224 0 : m = mac_r(SqrtTable_lc3plus[index], SqrtDiffTable_lc3plus[index], frac);
225 : }
226 :
227 : /* handle odd exponents */
228 0 : if (s_and(e, 1) != 0)
229 0 : m = mult_r(m, 0x5a82);
230 :
231 0 : return m;
232 : }
233 :
234 : /* local function for ISqrt16 and ISqrt16norm */
235 0 : static Word16 ISqrt16_common(Word16 m, Word16 e)
236 : {
237 : Word16 index, frac;
238 :
239 0 : assert(m >= 0x4000);
240 :
241 : /* get table index (upper 6 bits minus 32) */
242 : /* index = (m >> 9) - 32; */
243 0 : index = mac_r(-32768 - (32 << 16), m, 1 << 6);
244 :
245 : /* get fractional part for interpolation (lower 9 bits) */
246 0 : frac = s_and(m, 0x1FF); /* Q9 */
247 :
248 : /* interpolate */
249 0 : m = msu_r(ISqrtTable_lc3plus[index], ISqrtDiffTable_lc3plus[index], frac);
250 :
251 : /* handle even exponents */
252 0 : if (s_and(e, 1) == 0)
253 0 : m = mult_r(m, 0x5a82);
254 :
255 0 : return m;
256 : }
257 :
258 0 : Word16 Sqrt16_lc3plus( /*!< output mantissa */
259 : Word16 mantissa, /*!< input mantissa */
260 : Word16 *exponent /*!< pointer to exponent */
261 : )
262 : {
263 : Word16 preShift, e;
264 :
265 0 : assert(mantissa >= 0);
266 :
267 : /* normalize */
268 0 : preShift = norm_s(mantissa);
269 :
270 0 : e = sub(*exponent, preShift);
271 0 : mantissa = shl(mantissa, preShift);
272 :
273 : /* calc mantissa */
274 0 : mantissa = Sqrt16_common(mantissa, e);
275 :
276 : /* e = (e + 1) >> 1 */
277 0 : *exponent = mult_r(e, 1 << 14); move16();
278 :
279 0 : return mantissa;
280 : }
281 :
282 0 : Word16 ISqrt16( /*!< output mantissa */
283 : Word16 mantissa, /*!< input mantissa */
284 : Word16 *exponent /*!< pointer to exponent */
285 : )
286 : {
287 : Word16 preShift, e;
288 :
289 0 : assert(mantissa > 0);
290 :
291 : /* normalize */
292 0 : preShift = norm_s(mantissa);
293 :
294 0 : e = sub(*exponent, preShift);
295 0 : mantissa = shl(mantissa, preShift);
296 :
297 : /* calc mantissa */
298 0 : mantissa = ISqrt16_common(mantissa, e);
299 :
300 : /* e = (2 - e) >> 1 */
301 0 : *exponent = msu_r(1L << 15, e, 1 << 14); move16();
302 :
303 0 : return mantissa;
304 : }
305 :
306 0 : Word16 Inv16_lc3plus( /*!< output mantissa */
307 : Word16 mantissa, /*!< input mantissa */
308 : Word16 *exponent /*!< pointer to exponent */
309 : )
310 : {
311 : Word16 index, frac;
312 : Word16 preShift;
313 : Word16 m, e;
314 :
315 0 : assert(mantissa != 0);
316 :
317 : /* absolute */
318 0 : m = abs_s(s_max(mantissa, MIN_16 + 1));
319 :
320 : /* normalize */
321 0 : preShift = norm_s(m);
322 :
323 0 : e = sub(*exponent, preShift);
324 0 : m = shl(m, preShift);
325 :
326 : /* get table index (upper 6 bits minus 32) */
327 : /* index = (m >> 9) - 32; */
328 0 : index = mac_r(-32768 - (32 << 16), m, 1 << 6);
329 :
330 : /* get fractional part for interpolation (lower 9 bits) */
331 0 : frac = shl(s_and(m, 0x1FF), 1); /* Q10 */
332 :
333 : /* interpolate */
334 0 : m = msu_r(InvTable_lc3plus[index], InvDiffTable_lc3plus[index], frac);
335 :
336 : /* restore sign */
337 0 : if (mantissa < 0)
338 0 : m = negate(m);
339 :
340 : /* e = 1 - e */
341 0 : *exponent = sub(1, e); move16();
342 :
343 0 : return m;
344 : }
345 :
346 : /********************************************************************/
347 : /*!
348 : \brief Calculates the scalefactor needed to normalize input array
349 :
350 : The scalefactor needed to normalize the Word16 input array is returned <br>
351 : If the input array contains only '0', a scalefactor 0 is returned <br>
352 : Scaling factor is determined wrt a normalized target x: 16384 <= x <= 32767 for positive x <br>
353 : and -32768 <= x <= -16384 for negative x
354 : */
355 :
356 0 : Word16 getScaleFactor16( /* o: measured headroom in range [0..15], 0 if all x[i] == 0 */
357 : const Word16 *x, /* i: array containing 16-bit data */
358 : const Word16 len_x) /* i: length of the array to scan */
359 : {
360 : Counter i;
361 : Word16 i_min, i_max;
362 : Word16 x_min, x_max;
363 :
364 0 : x_max = 0; move16();
365 0 : x_min = 0; move16();
366 0 : FOR (i = 0; i < len_x; i++)
367 : {
368 0 : if (x[i] >= 0)
369 0 : x_max = s_max(x_max, x[i]);
370 0 : if (x[i] < 0)
371 0 : x_min = s_min(x_min, x[i]);
372 : }
373 :
374 0 : i_max = 0x10; move16();
375 0 : i_min = 0x10; move16();
376 :
377 0 : if (x_max != 0)
378 0 : i_max = norm_s(x_max);
379 :
380 0 : if (x_min != 0)
381 0 : i_min = norm_s(x_min);
382 :
383 0 : i = s_and(s_min(i_max, i_min), 0xF);
384 :
385 0 : return i;
386 : }
387 :
388 : /********************************************************************/
389 : /*!
390 : \brief Calculates the scalefactor needed to normalize input array
391 :
392 : The scalefactor needed to normalize the Word16 input array is returned <br>
393 : If the input array contains only '0', a scalefactor 16 is returned <br>
394 : Scaling factor is determined wrt a normalized target x: 16384 <= x <= 32767 for positive x <br>
395 : and -32768 <= x <= -16384 for negative x
396 : */
397 :
398 0 : Word16 getScaleFactor16_0( /* o: measured headroom in range [0..15], 16 if all x[i] == 0 */
399 : const Word16 *x, /* i: array containing 16-bit data */
400 : const Word16 len_x) /* i: length of the array to scan */
401 : {
402 : Counter i;
403 : Word16 i_min, i_max;
404 : Word16 x_min, x_max;
405 :
406 0 : x_max = 0; move16();
407 0 : x_min = 0; move16();
408 0 : FOR (i = 0; i < len_x; i++)
409 : {
410 0 : if (x[i] >= 0)
411 0 : x_max = s_max(x_max, x[i]);
412 0 : if (x[i] < 0)
413 0 : x_min = s_min(x_min, x[i]);
414 : }
415 :
416 0 : i_max = 0x10; move16();
417 0 : i_min = 0x10; move16();
418 :
419 0 : if (x_max != 0)
420 0 : i_max = norm_s(x_max);
421 :
422 0 : if (x_min != 0)
423 0 : i_min = norm_s(x_min);
424 :
425 0 : i = s_min(i_max, i_min);
426 :
427 0 : return i;
428 : }
429 :
430 : /********************************************************************/
431 : /*!
432 : \brief Calculates the scalefactor needed to normalize input array
433 :
434 : The scalefactor needed to normalize the Word32 input array is returned <br>
435 : If the input array contains only '0', a scalefactor 0 is returned <br>
436 : Scaling factor is determined wrt a normalized target x: 1073741824 <= x <= 2147483647 for positive x <br>
437 : and -2147483648 <= x <= -1073741824 for negative x
438 : */
439 :
440 0 : Word16 getScaleFactor32_lc3plus( /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */
441 : const Word32 *x, /* i: array containing 32-bit data */
442 : const Word16 len_x) /* i: length of the array to scan */
443 : {
444 : Counter i;
445 : Word16 i_min, i_max;
446 : Word32 x_min, x_max;
447 :
448 0 : x_max = L_add(0, 0);
449 0 : x_min = L_add(0, 0);
450 0 : FOR (i = 0; i < len_x; i++)
451 : {
452 0 : if (x[i] >= 0)
453 0 : x_max = L_max(x_max, x[i]);
454 0 : if (x[i] < 0)
455 0 : x_min = L_min(x_min, x[i]);
456 : }
457 :
458 0 : i_max = 0x20; move16();
459 0 : i_min = 0x20; move16();
460 :
461 0 : if (x_max != 0)
462 0 : i_max = norm_l(x_max);
463 :
464 0 : if (x_min != 0)
465 0 : i_min = norm_l(x_min);
466 :
467 0 : i = s_and(s_min(i_max, i_min), 0x1F);
468 :
469 0 : return i;
470 : }
471 :
472 : /********************************************************************/
473 : /*!
474 : \brief Calculates the scalefactor needed to normalize input array
475 :
476 : The scalefactor needed to normalize the Word32 input array is returned <br>
477 : If the input array contains only '0', a scalefactor 32 is returned <br>
478 : Scaling factor is determined wrt a normalized target x: 1073741824 <= x <= 2147483647 for positive x <br>
479 : and -2147483648 <= x <= -1073741824 for negative x
480 : */
481 :
482 0 : Word16 getScaleFactor32_0( /* o: measured headroom in range [0..31], 32 if all x[i] == 0 */
483 : const Word32 *x, /* i: array containing 32-bit data */
484 : const Word16 len_x) /* i: length of the array to scan */
485 : {
486 : Counter i;
487 : Word16 i_min, i_max;
488 : Word32 x_min, x_max;
489 :
490 0 : x_max = L_add(0, 0);
491 0 : x_min = L_add(0, 0);
492 0 : FOR (i = 0; i < len_x; i++)
493 : {
494 0 : if (x[i] >= 0)
495 0 : x_max = L_max(x_max, x[i]);
496 0 : if (x[i] < 0)
497 0 : x_min = L_min(x_min, x[i]);
498 : }
499 :
500 0 : i_max = 0x20; move16();
501 0 : i_min = 0x20; move16();
502 :
503 0 : if (x_max != 0)
504 0 : i_max = norm_l(x_max);
505 :
506 0 : if (x_min != 0)
507 0 : i_min = norm_l(x_min);
508 :
509 0 : i = s_min(i_max, i_min);
510 :
511 0 : return i;
512 : }
513 :
514 0 : Word16 BASOP_Util_Divide3216_Scale_lc3plus( /* o: result of division x/y, not normalized */
515 : Word32 x, /* i: numerator, signed */
516 : Word16 y, /* i: denominator, signed */
517 : Word16 *s) /* o: scaling, 0, if x==0 */
518 : {
519 : Word16 z;
520 : Word16 sx;
521 : Word16 sy;
522 : Word16 sign;
523 :
524 : /*assert (x > (Word32)0);
525 : assert (y >= (Word16)0);*/
526 :
527 : /* check, if numerator equals zero, return zero then */
528 0 : IF (x == (Word32)0)
529 : {
530 0 : *s = 0; move16();
531 :
532 0 : return ((Word16)0);
533 : }
534 :
535 0 : sign = s_xor(extract_h(x), y); /* just to exor the sign bits */
536 0 : x = L_abs(L_max(x, MIN_32 + 1));
537 0 : y = abs_s(s_max(y, MIN_16 + 1));
538 0 : sx = sub(norm_l(x), 1);
539 0 : x = L_shl(x, sx);
540 0 : sy = norm_s(y);
541 0 : y = shl(y, sy);
542 0 : *s = sub(sy, sx); move16();
543 :
544 0 : z = div_s(round_fx(x), y);
545 :
546 0 : if (sign < 0) /* if sign bits differ, negate the result */
547 : {
548 0 : z = negate(z);
549 : }
550 :
551 0 : return z;
552 : }
553 :
554 0 : Word16 BASOP_Util_Divide1616_Scale_lc3plus(Word16 x, Word16 y, Word16 *s)
555 : {
556 : Word16 z;
557 : Word16 sx;
558 : Word16 sy;
559 : Word16 sign;
560 :
561 : /* assert (x >= (Word16)0); */
562 0 : assert(y != (Word16)0);
563 :
564 0 : sign = 0; move16();
565 :
566 0 : IF (x < 0)
567 : {
568 0 : x = negate(x);
569 0 : sign = s_xor(sign, 1);
570 : }
571 :
572 0 : IF (y < 0)
573 : {
574 0 : y = negate(y);
575 0 : sign = s_xor(sign, 1);
576 : }
577 :
578 0 : IF (x == (Word16)0)
579 : {
580 0 : *s = 0; move16();
581 :
582 0 : return ((Word16)0);
583 : }
584 :
585 0 : sx = norm_s(x);
586 0 : x = shl(x, sx);
587 0 : x = shr(x, 1);
588 0 : *s = sub(1, sx); move16();
589 :
590 0 : sy = norm_s(y);
591 0 : y = shl(y, sy);
592 0 : *s = add(*s, sy); move16();
593 :
594 0 : z = div_s(x, y);
595 :
596 0 : if (sign != 0)
597 : {
598 0 : z = negate(z);
599 : }
600 :
601 0 : return z;
602 : }
603 :
604 0 : Word32 Norm32Norm(const Word32 *x, const Word16 headroom, const Word16 length, Word16 *result_e)
605 : {
606 : Word32 L_tmp, L_tmp2, inc;
607 : Word16 s, shift, tmp;
608 : Counter i;
609 :
610 0 : shift = headroom; move16();
611 :
612 0 : L_tmp = L_deposit_l(0);
613 :
614 0 : FOR (i = 0; i < length; i++)
615 : {
616 0 : L_tmp2 = L_sub(L_tmp, 0x40000000);
617 0 : if (L_tmp2 >= 0)
618 0 : shift = sub(shift, 1);
619 0 : if (L_tmp2 >= 0)
620 0 : L_tmp = L_shr(L_tmp, 2);
621 :
622 0 : tmp = round_fx_sat(L_shl_sat(x[i], shift));
623 0 : L_tmp = L_mac0(L_tmp, tmp, tmp); /* exponent = (1-shift*2) , Q(30+shift*2) */
624 : }
625 :
626 : /* Consider an increase of 0xfffd per sample in case that the pre-shift factor
627 : in the acf is 1 bit higher than the shift factor estimated in this function.
628 : This prevent overflows in the acf. */
629 0 : IF (L_tmp != 0)
630 : {
631 0 : s = norm_s(length);
632 0 : inc = L_shl(Mpy_32_16_lc3plus(0x0000fffd, shl(length, s)), sub(15, s));
633 0 : L_tmp = L_add_sat(L_tmp, inc);
634 : }
635 :
636 0 : *result_e = sub(1, shl(shift, 1)); move16();
637 :
638 0 : return L_tmp;
639 : }
640 :
641 0 : void Scale_sig(Word16 x[], /* i/o: signal to scale Qx */
642 : const Word16 lg, /* i : size of x[] Q0 */
643 : const Word16 exp0 /* i : exponent: x = round(x << exp) Qx ?exp */
644 : )
645 : {
646 : Counter i;
647 : Word16 tmp;
648 0 : IF (exp0 > 0)
649 : {
650 0 : tmp = s_min(exp0, 15); move16();
651 0 : FOR (i = 0; i < lg; i++)
652 : {
653 0 : x[i] = shl(x[i], tmp); move16(); /* saturation can occur here */
654 : }
655 0 : return;
656 : }
657 0 : IF (exp0 < 0)
658 : {
659 0 : tmp = shl(-32768, s_max(exp0, -15)); /* we use negative to correctly represent 1.0 */
660 0 : FOR (i = 0; i < lg; i++)
661 : {
662 0 : x[i] = msu_r(0, x[i], tmp); move16(); /* msu instead of mac because factor is negative */
663 : }
664 0 : return;
665 : }
666 : }
667 :
668 0 : void Copy_Scale_sig(const Word16 x[], /* i : signal to scale input Qx */
669 : Word16 y[], /* o : scaled signal output Qx */
670 : const Word16 lg, /* i : size of x[] Q0 */
671 : const Word16 exp0 /* i : exponent: x = round(x << exp) Qx ?exp */
672 : )
673 : {
674 : Counter i;
675 : Word16 tmp;
676 :
677 0 : IF (exp0 == 0)
678 : {
679 0 : basop_memmove(y, x, lg * sizeof(Word16));
680 :
681 0 : return;
682 : }
683 0 : IF (exp0 < 0)
684 : {
685 0 : tmp = shl(-32768, exp0); /* we use negative to correctly represent 1.0 */
686 0 : FOR (i = 0; i < lg; i++)
687 : {
688 0 : y[i] = msu_r(0, x[i], tmp); move16();
689 : }
690 0 : return;
691 : }
692 0 : FOR (i = 0; i < lg; i++)
693 : {
694 0 : y[i] = shl_sat(x[i], exp0); move16(); /* saturation can occur here */
695 : }
696 : }
697 :
698 0 : Word32 BASOP_Util_Add_Mant32Exp_lc3plus /*!< o: normalized result mantissa */
699 : (Word32 a_m, /*!< i: Mantissa of 1st operand a */
700 : Word16 a_e, /*!< i: Exponent of 1st operand a */
701 : Word32 b_m, /*!< i: Mantissa of 2nd operand b */
702 : Word16 b_e, /*!< i: Exponent of 2nd operand b */
703 : Word16 *ptr_e) /*!< o: exponent of result */
704 : {
705 : Word32 L_tmp;
706 : Word16 shift;
707 :
708 : /* Compare exponents: the difference is limited to +/- 30
709 : The Word32 mantissa of the operand with lower exponent is shifted right by the exponent difference.
710 : Then, the unshifted mantissa of the operand with the higher exponent is added. The addition result
711 : is normalized and the result represents the mantissa to return. The returned exponent takes into
712 : account all shift operations.
713 : */
714 :
715 0 : if (!a_m)
716 0 : a_e = add(b_e, 0);
717 :
718 0 : if (!b_m)
719 0 : b_e = add(a_e, 0);
720 :
721 0 : shift = sub(a_e, b_e);
722 0 : shift = s_max(-31, shift);
723 0 : shift = s_min(31, shift);
724 0 : if (shift < 0)
725 : {
726 : /* exponent of b is greater than exponent of a, shr a_m */
727 0 : a_m = L_shl(a_m, shift);
728 : }
729 0 : if (shift > 0)
730 : {
731 : /* exponent of a is greater than exponent of b */
732 0 : b_m = L_shr(b_m, shift);
733 : }
734 0 : a_e = add(s_max(a_e, b_e), 1);
735 0 : L_tmp = L_add(L_shr(a_m, 1), L_shr(b_m, 1));
736 0 : shift = norm_l(L_tmp);
737 0 : if (shift)
738 0 : L_tmp = L_shl(L_tmp, shift);
739 0 : if (L_tmp == 0)
740 0 : a_e = add(0, 0);
741 0 : if (L_tmp != 0)
742 0 : a_e = sub(a_e, shift);
743 0 : *ptr_e = a_e;
744 :
745 0 : return (L_tmp);
746 : }
747 :
748 0 : Word16 BASOP_Util_Cmp_Mant32Exp_lc3plus /*!< o: flag: result of comparison */
749 : /* 0, if a == b */
750 : /* 1, if a > b */
751 : /* -1, if a < b */
752 : (Word32 a_m, /*!< i: Mantissa of 1st operand a */
753 : Word16 a_e, /*!< i: Exponent of 1st operand a */
754 : Word32 b_m, /*!< i: Mantissa of 2nd operand b */
755 : Word16 b_e) /*!< i: Exponent of 2nd operand b */
756 :
757 : {
758 : Word32 diff_m;
759 : Word16 diff_e, shift, result;
760 :
761 : /*
762 : This function compares two input parameters, both represented by a 32-bit mantissa and a 16-bit exponent.
763 : If both values are identical, 0 is returned.
764 : If a is greater b, 1 is returned.
765 : If a is less than b, -1 is returned.
766 : */
767 :
768 : /* Check, if both mantissa and exponents are identical, when normalized: return 0 */
769 0 : shift = norm_l(a_m);
770 0 : if (shift)
771 0 : a_m = L_shl(a_m, shift);
772 0 : if (shift)
773 0 : a_e = sub(a_e, shift);
774 :
775 0 : shift = norm_l(b_m);
776 0 : if (shift)
777 0 : b_m = L_shl(b_m, shift);
778 0 : if (shift)
779 0 : b_e = sub(b_e, shift);
780 :
781 : /* align exponent, if any mantissa is zero */
782 0 : if (!a_m)
783 0 : a_e = add(b_e, 0);
784 0 : if (!b_m)
785 0 : b_e = add(a_e, 0);
786 :
787 0 : IF (a_m > 0 && b_m < 0)
788 : {
789 0 : diff_m = 1; move16();
790 : }
791 0 : ELSE IF (a_m<0 && b_m> 0)
792 : {
793 0 : diff_m = -1; move16();
794 : }
795 : ELSE
796 : {
797 0 : diff_m = L_sub(a_m, b_m);
798 : }
799 :
800 0 : diff_e = sub(a_e, b_e);
801 :
802 0 : test();
803 0 : IF (diff_m == 0 && diff_e == 0)
804 : {
805 0 : return 0;
806 : }
807 :
808 : /* Check sign, exponent and mantissa to identify, whether a is greater b or not */
809 0 : result = sub(0, 1);
810 :
811 0 : IF (a_m >= 0)
812 : {
813 : /* a is positive */
814 0 : if (b_m < 0)
815 : {
816 0 : result = add(1, 0);
817 : }
818 :
819 0 : test(); test(); test();
820 0 : if ((b_m >= 0) && ((diff_e > 0) || (diff_e == 0 && diff_m > 0)))
821 : {
822 0 : result = add(1, 0);
823 : }
824 : }
825 : ELSE
826 : {
827 : /* a is negative */
828 0 : test(); test(); test();
829 0 : if ((b_m < 0) && ((diff_e < 0) || (diff_e == 0 && diff_m > 0)))
830 : {
831 0 : result = add(1, 0);
832 : }
833 : }
834 0 : return result;
835 : }
836 :
837 : /*----------------------------------------------------------------------------------*
838 : * Function: Isqrt
839 : *
840 : * Description:
841 : *
842 : * The function computes 1/sqrt(x).
843 : * The mantissa of the input value must be in the range of 1.0 > x >= 0.5.
844 : * The computation of the inverse square root is an approach with a lookup table
845 : * and linear interpolation.
846 : *
847 : * result = x * 2^x_e
848 : *
849 : * Parameter:
850 : *
851 : * x [i] mantissa (Q31)
852 : * x_e [i/o] pointer to exponent (Q0)
853 : *
854 : * Return value:
855 : *
856 : * mantissa (Q31)
857 : *
858 : *----------------------------------------------------------------------------------*/
859 0 : Word32 Isqrt_lc3plus(Word32 x, /* mantissa */
860 : Word16 *x_e /* pointer to exponent */
861 : )
862 : {
863 : Word16 s;
864 : Word32 y;
865 : Word32 idx;
866 : Word32 diff;
867 : Word16 fract;
868 :
869 0 : IF (x <= 0)
870 : {
871 0 : *x_e = 0; move16();
872 0 : return 0x7FFFFFFF;
873 : }
874 :
875 : /* check if exponent is odd */
876 0 : s = s_and(*x_e, 0x0001);
877 :
878 : /* get table index (upper 8 bits) */
879 0 : idx = L_and(L_shr(x, (31 - 8)), 0x00000007f);
880 :
881 : /* get fractional part for interpolation (lower 23 bits) */
882 0 : fract = extract_h(L_shl(L_and(x, 0x007FFFFF), 8));
883 :
884 : /* interpolate */
885 0 : diff = L_sub(isqrt_table[idx + 1], isqrt_table[idx]);
886 0 : y = L_add(isqrt_table[idx], Mpy_32_16_lc3plus(diff, fract));
887 :
888 : /* if exponent is odd apply sqrt(0.5) */
889 0 : if (s != 0)
890 : {
891 0 : y = Mpy_32_16_lc3plus(y, 0x5A82 /*0x5A827999*/);
892 : }
893 :
894 : /* if exponent is odd shift 1 bit left */
895 0 : if (s != 0)
896 : {
897 0 : y = L_shl(y, s);
898 : }
899 :
900 : /* change sign, shift right and add 1 to exponent (implicit exponent of isqrt_table) */
901 0 : *x_e = mac_r(32768, *x_e, -16384); move16();
902 :
903 0 : return y;
904 : }
905 :
906 0 : Word16 BASOP_Util_Log2_16(Word32 x, Word16 x_e)
907 : {
908 : Word16 shift, tmp1, tmp2;
909 : Word16 outInt, outFrac, out;
910 :
911 0 : assert(x >= 0);
912 :
913 0 : if (x == 0)
914 : {
915 0 : return (MIN_16);
916 : }
917 :
918 : /* Scale Input */
919 0 : shift = norm_l(x);
920 0 : x = L_shl(x, sub(shift, 10));
921 :
922 : /* Integer part */
923 0 : outInt = shl(sub(sub(x_e, shift), 1), 9);
924 :
925 : /* Fractional part */
926 0 : tmp1 = mac_r(x, -33, 16384);
927 0 : tmp2 = lshr(extract_l(x), 6);
928 0 : outFrac = mac_r(Log2_16_table1[tmp1], Log2_16_table2[tmp1], tmp2);
929 :
930 : /* Output */
931 0 : out = add(outInt, outFrac);
932 :
933 0 : return out;
934 : }
935 :
936 0 : Word16 BASOP_Util_InvLog2_16(Word16 x, Word16 *y_e)
937 : {
938 : Word16 tmp1, tmp2, y;
939 :
940 0 : tmp1 = shr(s_and(x, 2047), 5);
941 0 : tmp2 = shl(s_and(x, 31), 4);
942 0 : y = mac_r(InvLog2_16_table1[tmp1], InvLog2_16_table2[tmp1], tmp2);
943 0 : *y_e = add(shr_pos(x, 11), 1);
944 :
945 0 : return y;
946 : }
947 :
948 : #ifdef ENABLE_HR_MODE
949 : #define DFRACT_BITS 32 /* double precision */
950 :
951 : #define SQRT_BITS 7
952 : #define SQRT_BITS_MASK 0x7f
953 : #define SQRT_FRACT_BITS_MASK 0x007FFFFF
954 :
955 : #ifndef Word64
956 : #define Word64 long long
957 : #endif
958 :
959 0 : static __inline Word32 Mpy_32_32_noshift(Word32 x1, Word32 x2)
960 : {
961 0 : Word64 ret = 0;
962 0 : ret = ((Word64)x1 * (Word64)x2);
963 0 : return ((Word32)((ret & (Word64)0xffffffff00000000) >> 32));
964 : }
965 :
966 0 : static __inline Word32 fixmadddiv2_DD(const Word32 x, const Word32 a, const Word32 b)
967 : {
968 0 : return ((((Word64)x << 32) + (Word64)a * b) >> 32);
969 : }
970 :
971 : /**
972 : * \brief calculate 1.0/sqrt(op)
973 : * \param op_m mantissa of input value.
974 : * \param result_e pointer to return the exponent of the result
975 : * \return mantissa of the result
976 : */
977 : /*****************************************************************************
978 : delivers 1/sqrt(op) normalized to .5...1 and the shift value of the OUTPUT,
979 : i.e. the denormalized result is 1/sqrt(op) = invSqrtNorm(op) * 2^(shift)
980 : uses Newton-iteration for approximation
981 : Q(n+1) = Q(n) + Q(n) * (0.5 - 2 * V * Q(n)^2)
982 : with Q = 0.5* V ^-0.5; 0.5 <= V < 1.0
983 : *****************************************************************************/
984 0 : static __inline Word32 invSqrtNorm2(Word32 op, Word32 *shift)
985 : {
986 0 : Word32 val = op;
987 : Word32 reg1, reg2;
988 :
989 0 : if (val == 0)
990 : {
991 0 : *shift = 16;
992 0 : return ((Word32)0x7FFFFFFF); /* maximum positive value */
993 : }
994 :
995 : /* #define INVSQRTNORM2_NEWTON_ITERATE */
996 : #define INVSQRTNORM2_LINEAR_INTERPOLATE
997 : #define INVSQRTNORM2_LINEAR_INTERPOLATE_HQ
998 :
999 : /* normalize input, calculate shift value */
1000 0 : assert(val > 0);
1001 0 : *shift = norm_l(val); /* CountLeadingBits() is not necessary here since test value is always > 0 */
1002 0 : val <<= *shift; /* normalized input V */
1003 0 : *shift += 2; /* bias for exponent */
1004 :
1005 : #if defined(INVSQRTNORM2_NEWTON_ITERATE)
1006 : /* Newton iteration of 1/sqrt(V) */
1007 : reg1 = invSqrtTab[(Word32)(val >> (DFRACT_BITS - 1 - (SQRT_BITS + 1))) & SQRT_BITS_MASK];
1008 : reg2 = FL2FXCONST_DBL(0.0625f); /* 0.5 >> 3 */
1009 :
1010 : Word32 regtmp = fPow2Div2(reg1); /* a = Q^2 */
1011 : regtmp = reg2 - fMultDiv2(regtmp, val); /* b = 0.5 - 2 * V * Q^2 */
1012 : reg1 += (fMultDiv2(regtmp, reg1) << 4); /* Q = Q + Q*b */
1013 : #elif defined(INVSQRTNORM2_LINEAR_INTERPOLATE)
1014 0 : Word32 index = (Word32)(val >> (DFRACT_BITS - 1 - (SQRT_BITS + 1))) & SQRT_BITS_MASK;
1015 0 : Word32 Fract = (Word32)(((Word32)val & SQRT_FRACT_BITS_MASK) << (SQRT_BITS + 1));
1016 0 : Word32 diff = invSqrtTab[index + 1] - invSqrtTab[index];
1017 0 : reg1 = invSqrtTab[index] + (Word32)(((UWord32)(Mpy_32_32_noshift(diff, Fract)) << 1));
1018 : #if defined(INVSQRTNORM2_LINEAR_INTERPOLATE_HQ)
1019 : /* reg1 = t[i] + (t[i+1]-t[i])*fract ... already computed ...
1020 : + (1-fract)fract*(t[i+2]-t[i+1])/2 */
1021 0 : if (Fract != (Word32)0)
1022 : {
1023 : /* fract = fract * (1 - fract) */
1024 0 : Fract = Mpy_32_32_noshift(Fract, (Word32)((UWord32)0x80000000 - (Word32)Fract)) << 1;
1025 0 : diff = diff - (invSqrtTab[index + 2] - invSqrtTab[index + 1]);
1026 0 : reg1 = fixmadddiv2_DD(reg1, Fract, diff);
1027 : }
1028 : #endif /* INVSQRTNORM2_LINEAR_INTERPOLATE_HQ */
1029 : #else
1030 : #error "Either define INVSQRTNORM2_NEWTON_ITERATE or INVSQRTNORM2_LINEAR_INTERPOLATE"
1031 : #endif
1032 : /* calculate the output exponent = input exp/2 */
1033 0 : if (*shift & 0x00000001)
1034 : { /* odd shift values ? */
1035 : /* Note: Do not use rounded value 0x5A82799A to avoid overflow with shift-by-2 */
1036 0 : reg2 = (Word32)0x5A827999; /* FL2FXCONST_DBL(0.707106781186547524400844362104849f);*/ /* 1/sqrt(2); */
1037 : #ifdef __ADSP21000__
1038 : reg1 = fMult(reg1, reg2) << 1; /* compiler bug work around (CCES 2.4.0), and better precision */
1039 : #else
1040 0 : reg1 = Mpy_32_32_noshift(reg1, reg2) << 2;
1041 : #endif
1042 : }
1043 :
1044 0 : *shift = *shift >> 1;
1045 :
1046 0 : return (reg1);
1047 : }
1048 :
1049 : /**
1050 : * \brief calculate 1.0/(op_m * 2^op_e)
1051 : * \param op_m mantissa of the input value.
1052 : * \param op_e pointer into were the exponent of the input value is stored, and the result will be stored into.
1053 : * \return mantissa of the result
1054 : */
1055 0 : Word32 invFixp(Word32 op_m, Word16 *op_e)
1056 : {
1057 0 : if ((op_m == (Word32)0x00000000) || (op_m == (Word32)0x00000001))
1058 : {
1059 0 : *op_e = 31 - *op_e;
1060 0 : return ((Word32)0x7FFFFFFF);
1061 : }
1062 :
1063 : Word32 tmp_exp;
1064 0 : Word32 tmp_inv = invSqrtNorm2(op_m, &tmp_exp);
1065 :
1066 0 : *op_e = (tmp_exp << 1) - *op_e + 1;
1067 0 : return Mpy_32_32_noshift(tmp_inv, tmp_inv);
1068 : }
1069 : #endif
|