Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include <assert.h>
6 : #include <stdint.h>
7 : #include "options.h"
8 : #include "cnst.h" /* Common constants */
9 : #include "rom_com.h" /* Static table prototypes */
10 : #include "prot_fx.h"
11 : #include "basop_util.h"
12 :
13 : #define N_OPT_FX 5 /* complexity setting, direct functional calculation limit & low dim recursion limit */
14 : #define TABLE_LIM_OPT_FX 96 /* odd divisor table , N-row_recursion limit setting, due to dim */
15 :
16 :
17 : /* local typedefs for optimized pvq indexing, used locally c-file to vectorize common function calls */
18 : typedef void ( *VEC2INDFUNCM )( const Word16 *, Word16 *, UWord32 *, UWord32 * );
19 : typedef UWord32 ( *NFUNCM )( Word16 );
20 : typedef UWord32 ( *H_FUNCM )( UWord32 );
21 : typedef void ( *IND2VECFUNCM )( Word16, Word16, UWord32, Word16 * );
22 : typedef void ( *NDIM_FUNCM )( Word16, Word16, UWord32, Word16 * );
23 :
24 : /* local constants for indexing functions */
25 : #define SIGNBIT_FX 0x80000000u
26 : #define SIGNBIT_SHRT_FX 0x8000
27 : #define UDIVBY3_FX 2863311531U
28 :
29 : /*-------------------------------------------------------------------*
30 : * f_odd_exact_div_opt_fx()
31 : *
32 : * find 1/(den1*2+1) * ( num1p*num2p - num3) ,
33 : * if the result is known a priori to be exactly a 32bit UWord32
34 : *-------------------------------------------------------------------*/
35 801275 : static UWord32 f_odd_exact_div_opt_fx( /* o Q0 : see Eq. */
36 : UWord32 num1p,
37 : /* i : see Eq. */ /* (2n-1) or n , i.e can be short also */
38 : UWord32 num2p, /* i : see Eq. */
39 : UWord32 num3, /* i : see Eq. */
40 : Word16 den1 /* Q0 i : see Eq. */ /*range [0..127] can be made to short */
41 : )
42 : {
43 : UWord32 UL_tmp;
44 801275 : UL_tmp = UL_Mpy_32_32( exactdivodd[den1], UL_subNsD( UL_Mpy_32_32( num1p, num2p ), num3 ) ); /*Q0*/
45 :
46 801275 : return ( UL_tmp );
47 : }
48 :
49 : /*---------------------------------------------------------------------------*
50 : * f_even_exact_div_opt_fx()
51 : *
52 : * returns (num1p*num2p - num3 )/ den1
53 : * if the result is known a priori to be exactly a 32bit unsigned integer
54 : *--------------------------------------------------------------------------*/
55 888889 : static UWord32 f_even_exact_div_opt_fx( /* o Q0 : see Eq. */
56 : UWord32 UL_num1p, /* i : see Eq. 2n-1 or n can be short input */
57 : UWord32 UL_num2p, /* i : see Eq. range should be larger than num1p */
58 : UWord32 UL_num3, /* i : see Eq. */
59 : Word16 den1 /* i : see Eq. */
60 : )
61 : {
62 : UWord32 UL_tmp, UL_oddfactor;
63 : Word16 den1_m1, even_sh;
64 : UWord32 UL_tmp_h;
65 : UWord16 sgn;
66 :
67 888889 : den1_m1 = sub( den1, 1 ); /* remove top bit */
68 888889 : even_sh = sub( 15, norm_s( s_xor( den1_m1, den1 ) ) ); /* STL signed ops ok as den1 <= 127 */
69 :
70 888889 : UL_oddfactor = exactdivodd[lshr( den1_m1, even_sh )];
71 888889 : move32();
72 888889 : even_sh = sub( even_sh, 1 );
73 :
74 :
75 888889 : Mpy_32_32_uu( UL_num1p, UL_num2p, &UL_tmp_h, &UL_tmp ); /* cost ~4 */
76 888889 : UL_tmp = UL_subNs( UL_tmp, UL_num3, &sgn ); /* may wrap for underflow */
77 888889 : if ( sgn ) /* underflow */
78 : {
79 110 : UL_tmp_h = UL_subNsD( UL_tmp_h, 1U ); /* single basicop -> if */
80 : }
81 888889 : UL_tmp = UL_or( UL_lshl( UL_tmp_h, sub( 32, even_sh ) ), UL_lshr( UL_tmp, even_sh ) );
82 : /* total cost 9-11 , old solution had 15-16*/
83 :
84 : /* now use tabled modular multiplicative inverse for the odd part division */
85 888889 : return UL_Mpy_32_32( UL_tmp, UL_oddfactor ); /*Q0*/
86 : }
87 :
88 : /* direct calculation functions for smaller dimensions to speed up indexing
89 :
90 : N_MPVQ(1,k) = 1;
91 : N_MPVQ(2,k) = k*2;
92 : N_MPVQ(3,k) = 1+2*(k^2);
93 : N_MPVQ(4,k) = k/3 * 4*(k^2+2);
94 : N_MPVQ(5,k) = 1 + 2*(k*k*(5+k*k))/3;
95 : N_MPVQ(*,k) = iterations = 1 + A(n,k) + A(n+1,k);
96 :
97 : N_PVQ(n,k) = 2*N_MPVQ(n,k);
98 :
99 :
100 : A(1,k) = 1;
101 : A(2,k) = -1 + 2k;
102 : A(3,k) = 1+2(k-1)k;
103 : A(4,k) = 1/3*(((4k-6)k+8)*k-3),
104 : A(5,k) = 1/3*(3+ k(10+2(k-2)k)k-8);
105 : A(*,k) = recursive iterations;
106 :
107 :
108 : U(n,k) = (A(n,k)-1)/2;
109 : U(1,k) = 0;
110 : U(2,k) = k-1;
111 : U(3,k) = k*(k-1)
112 : U(4,k) = (1/3)*((k - 1)*(2*k^2 - k + 3))
113 : U(5,k) = (1/3)*(k*(k - 1)*(k^2 - k + 4))
114 : U(*,k) = recursive iterations;
115 :
116 : U(n,k) = U(k,n);
117 : U(n,k) = func(n, U(n,k-1), U(n,k-2) , 1/(k-1) );
118 : U(n,k) = 1 + U(n-1,k-1) + U(n-1,k) + U(n,k-1);
119 : U(n,k) = 1 + A(n-1,k-1)>>1 + A(n-1,k)>>1 + A(n,k-1)>>1; A(n,k) is always odd if k>0
120 : */
121 :
122 : /*-------------------------------------------------------------------*
123 : * a_three_fx()
124 : *-------------------------------------------------------------------*/
125 20237 : static UWord32 a_three_fx( /* Q0 o: offset for dim 3 */
126 : UWord32 k_val /* Q0 i: nb unit pulses */
127 : ) /* k_val may be higher than 16 bit signed */
128 : {
129 20237 : IF( k_val )
130 : {
131 : /* return (ONE_U + k_val*((k_val - ONE_U) << ONE)); */
132 20025 : return UL_addNsD( 1U, UL_Mpy_32_32( k_val, UL_lshl( UL_subNsD( k_val, 1U ), 1 ) ) ); /*Q0*/
133 : }
134 : ELSE
135 : {
136 212 : return 0;
137 : }
138 : }
139 :
140 : /*-------------------------------------------------------------------*
141 : * a_four_fx()
142 : *-------------------------------------------------------------------*/
143 72928 : static UWord32 a_four_fx( /* Q0 o: offset for dim 4 */
144 : UWord32 k_val /* Q0 i: nb unit pulses */
145 : )
146 : {
147 : UWord32 UL_2k;
148 72928 : IF( k_val )
149 : {
150 : /* return UDIVBY3*((k_val<<ONE)*(4 + ((k_val<<ONE) - 3)*k_val) - 3); */
151 72744 : UL_2k = UL_lshl( k_val, 1 );
152 72744 : return UL_Mpy_32_32( UDIVBY3_FX,
153 : UL_subNsD( UL_Mpy_32_32( UL_2k, UL_addNsD( 4U, UL_Mpy_32_32( UL_subNsD( UL_2k, 3U ), k_val ) ) ), 3U ) ); /*Q0*/
154 : }
155 184 : return 0;
156 : }
157 :
158 : /*-------------------------------------------------------------------*
159 : * a_five_fx()
160 : *-------------------------------------------------------------------*/
161 2119 : static UWord32 a_five_fx( /* Q0 o: offset for dim 5 */
162 : UWord32 k_val /* Q0 i: nb unit pulses */
163 : )
164 : {
165 : /* k=uint64(256); offset = 1 + 2*idivide( (((k-2)*k + 5)*k -4) * k ,3) , log2(double(3*(double(offset)-1)/2)) */
166 :
167 2119 : IF( k_val == 0 )
168 : {
169 15 : return 0;
170 : }
171 2104 : ELSE IF( UL_subNsD( k_val, 1U ) == 0 )
172 : {
173 42 : return 1;
174 : }
175 : ELSE
176 : {
177 : /*UL_offset = ONE + (((((k_val - TWO)*k_val + 5)*k_val - 4)*k_val)*(UDIVBY3_FX))<<ONE; */
178 2062 : return UL_addNsD( 1U,
179 : UL_lshl(
180 : UL_Mpy_32_32( UL_Mpy_32_32( UL_subNsD( UL_Mpy_32_32( UL_addNsD( UL_Mpy_32_32( UL_subNsD( k_val, 2U ), k_val ), 5U ), k_val ), 4U ), k_val ), UDIVBY3_FX ), 1 ) );
181 : /* cost about ~13 cycles*/ /*Q0*/
182 : }
183 : }
184 :
185 : /*-------------------------------------------------------------------*
186 : * direct_msize_fx()
187 : * direct m_sizes for N=1..5
188 : *-------------------------------------------------------------------*/
189 20633 : static UWord32 direct_msize_fx( Word16 dim_in, Word16 k_in )
190 : {
191 : UWord32 UL_msize, k, ksq;
192 :
193 20633 : UL_msize = 1; /* k==0 or dim==1 , and base fot other dims */
194 20633 : move32();
195 20633 : IF( k_in != 0 )
196 : {
197 20633 : k = UL_deposit_l( k_in ); /*k = (UWord32) k_in;*/
198 20633 : ksq = UL_Mpy_32_32( k, k ); /*ksq= k*k; */
199 20633 : SWITCH( dim_in )
200 : {
201 662 : case ( 5 ):
202 : /* k*k = 238*238 < 2^16 , to remember for FIP*/
203 : /*UL_msize += ( ((ksq*(5 + ksq))* UDIVBY3_FX )<<ONE ); */
204 662 : UL_msize = UL_addNsD( UL_msize, UL_lshl( UL_Mpy_32_32( UL_Mpy_32_32( ksq, UL_addNsD( 5U, ksq ) ), UDIVBY3_FX ), 1 ) ); /*Q0*/
205 662 : BREAK;
206 19776 : case ( 4 ):
207 : /*UL_msize = 4*idivide(k*(k.^2+2),3);*/
208 19776 : UL_msize = UL_lshl( UL_Mpy_32_32( UL_Mpy_32_32( k, UL_addNsD( ksq, 2U ) ), UDIVBY3_FX ), 2 ); /* ((k*(ksq + 2))*UDIVBY3_FX) <<TWO; */ /*Q0*/
209 19776 : BREAK;
210 16 : case ( 3 ):
211 16 : UL_msize = UL_addNsD( UL_msize, UL_lshl( ksq, 1 ) ); /* += ((ksq)<<ONE) ;*/ /*Q0*/
212 16 : BREAK;
213 137 : case ( 2 ):
214 137 : UL_msize = UL_lshl( k, 1 ); /* k<<ONE; */ /*Q0*/
215 137 : BREAK;
216 : }
217 0 : }
218 :
219 20633 : return UL_msize; /*Q0*/
220 : }
221 :
222 : /* update h_mem[0.. k_val_in+1] , with starting offsets for A+U recursion */
223 283617 : static void initOffsets_fx( Word16 dim_in, UWord32 *h_mem, Word16 k_val_in )
224 : {
225 : UWord32 k_val_curr, k_val_prev;
226 : UWord32 k_val, UL_k_val_in;
227 :
228 :
229 283617 : h_mem[0] = UL_deposit_l( 0 ); /* A(=>0,k=0) */
230 283617 : move32();
231 283617 : h_mem[1] = UL_deposit_l( 1 ); /* A(*,k=1) */
232 283617 : move32();
233 :
234 283617 : UL_k_val_in = UL_deposit_l( k_val_in );
235 283617 : IF( EQ_16( dim_in, 2 ) )
236 : {
237 0 : FOR( k_val = 2; k_val <= UL_k_val_in; k_val++ )
238 : {
239 0 : h_mem[k_val] = UL_subNsD( UL_lshl( k_val, 1 ), 1U ); /*Q0*/
240 0 : move32(); /* A(2, 2 .. k ) */
241 : }
242 0 : h_mem[k_val] = UL_k_val_in; /*Q0*/
243 0 : move32(); /* U(2,k+1) */
244 : }
245 : ELSE
246 : {
247 283617 : k_val_prev = UL_deposit_l( 1U );
248 2054296 : FOR( k_val_curr = 2; k_val_curr <= UL_k_val_in; k_val_curr++ )
249 : {
250 1770679 : h_mem[k_val_curr] = UL_addNsD( 1U, UL_Mpy_32_32( k_val_curr, UL_lshl( k_val_prev, 1 ) ) ); /*Q0*/
251 1770679 : move32();
252 1770679 : k_val_prev = UL_addNsD( k_val_curr, 0U ); /* 1 op*/
253 : }
254 283617 : h_mem[k_val_curr] = UL_Mpy_32_32( k_val_curr, k_val_prev ); /*Q0*/
255 283617 : move32(); /* % U(3,k_val_in+1) u_three(k+1) */
256 : }
257 :
258 283617 : return;
259 : }
260 :
261 : /*-------------------------------------------------------------------*
262 : * a_fwd_fx()
263 : *
264 : * create offsets for A(n,k) from lower A(n-1,k)
265 : *-------------------------------------------------------------------*/
266 2879797 : static void a_fwd_fx(
267 : UWord32 *a_in, /* Q0 i/o: offsets */
268 : Word16 n_items /* i : items, k's */
269 : )
270 : {
271 : UWord32 a_1, a_in0;
272 : Word16 i;
273 : UWord32 *a_in_prev_ptr;
274 :
275 2879797 : a_in0 = UL_deposit_l( 1 );
276 :
277 2879797 : a_in_prev_ptr = &( a_in[-1] ); /* single loop ptr setup not counted; */
278 20381032 : FOR( i = 1; i <= n_items; i++ ) /* basic A fwd row recursion */
279 : {
280 17501235 : a_1 = UL_addNsD( a_in0, UL_addNsD( a_in_prev_ptr[i], a_in[i] ) ); /*Q0*/
281 17501235 : a_in_prev_ptr[i] = a_in0;
282 17501235 : move32();
283 17501235 : a_in0 = UL_addNsD( a_1, 0U ); /*Q0*/
284 : }
285 2879797 : a_in_prev_ptr[i] = a_in0; /*Q0*/
286 2879797 : move32();
287 2879797 : return;
288 : }
289 :
290 : /*-------------------------------------------------------------------*
291 : * a_bwd_fx()
292 : *
293 : * create offsets for A(n,k) from higher A(n+1,k)
294 : *-------------------------------------------------------------------*/
295 2486896 : static void a_bwd_fx(
296 : UWord32 *a_in, /* Q0 i/o: offsets */
297 : Word16 n_items /* i: n_items */
298 : )
299 : {
300 : UWord32 a_1, a_in0;
301 : Word16 i;
302 : UWord32 *a_in_prev_ptr;
303 :
304 2486896 : a_in0 = UL_deposit_l( 0 );
305 2486896 : a_in_prev_ptr = &( a_in[-1] );
306 :
307 15062950 : FOR( i = 1; i <= n_items; i++ ) /*basic A reverse row recursion */
308 : {
309 : /* 2x[i] ptr memory access below are treated as ptr access */
310 12576054 : a_1 = UL_subNsD( UL_subNsD( a_in[i], a_in0 ), a_in_prev_ptr[i] );
311 12576054 : a_in_prev_ptr[i] = a_in0; /*Q0*/
312 12576054 : move32();
313 12576054 : a_in0 = UL_addNsD( a_1, 0U ); /*Q0*/
314 : }
315 2486896 : a_in_prev_ptr[i] = a_in0; /*Q0*/
316 2486896 : move32();
317 2486896 : return;
318 : }
319 :
320 263108 : static UWord32 direct_row_A2U_rec_calc_fx( Word16 dim_in, Word16 k_val_in, UWord32 a_km2, UWord32 a_km1 )
321 : {
322 : /* U(n,k) = (A(n,k-2)-1)/2 + ((2*n-1)*A(n,k-1) - A(n,k-2) )/2*(k-1) */
323 : /* U(n,k) = floor(A(n,k-2)/2) + (n*A(n,k-1) - floor(A(n,k-1)/2) - floor(A(n,k-2)/2) +1 )/(k-1) */
324 : /* U(n,k) = floor(A(n,k-2)/2) + (n*A(n,k-1) - (floor(A(n,k-1)/2) + floor(A(n,k-2)/2) +1) )/(k-1) */
325 :
326 : UWord32 km2_size, UL_um2, UL_dim;
327 : Word16 divisor;
328 :
329 263108 : divisor = sub( k_val_in, 1 );
330 263108 : UL_um2 = UL_lshr( a_km2, 1U );
331 263108 : UL_dim = UL_deposit_l( dim_in );
332 263108 : km2_size = UL_addNsD( UL_addNsD( UL_lshr( a_km1, 1 ), UL_um2 ), 1U );
333 263108 : IF( s_and( divisor, 0x1 ) != 0 )
334 : {
335 : /* odd */
336 145521 : return UL_addNsD( UL_um2, f_odd_exact_div_opt_fx( UL_dim, a_km1, km2_size, shr( divisor, 1 ) ) ); /*Q0*/
337 : }
338 : ELSE
339 : {
340 : /* even divisor, */
341 117587 : return UL_addNsD( UL_um2, f_even_exact_div_opt_fx( UL_dim, a_km1, km2_size, divisor ) ); /*Q0*/
342 : }
343 : }
344 :
345 2879797 : static void a_u_fwd_fx( UWord32 *a_u_in, /*o Q0*/
346 : Word16 k_val_in, /*i */
347 : Word16 mem_size_m1 /*i */ )
348 : {
349 : UWord32 u_kp1_prev, u_kp1;
350 : UWord32 u_k_prev;
351 :
352 2879797 : u_kp1_prev = a_u_in[mem_size_m1];
353 2879797 : move32(); /* previous n U (n,k+1) value*/
354 2879797 : u_k_prev = UL_lshr( a_u_in[k_val_in], 1 ); /* previous n A(n,k) value*/ /*Q0*/
355 :
356 2879797 : a_fwd_fx( &a_u_in[1], k_val_in ); /* a_u_in[k==ZERO] = zero if n>0 */
357 :
358 : /* low dynamic last offset entry mixed recursion */
359 : /* used for size calculation */
360 : /* U(n,k+1) = 1 + U(n-1,k+1) + U(n-1,k) + U(n,k) */
361 : /* U(n,k+1) = 1 + U(n-1,k+1) + (A(n-1,k)-1)/2 + (A(n,k)-1)/2 */
362 : /* Note, A(n,k) always odd for k>0 , subtracted one always shifted out */
363 :
364 2879797 : u_kp1 = UL_lshr( a_u_in[k_val_in], 1 );
365 2879797 : a_u_in[mem_size_m1] = UL_addNsD( 1U, UL_addNsD( u_kp1_prev, UL_addNsD( u_k_prev, u_kp1 ) ) ); /*Q0*/
366 2879797 : move32();
367 2879797 : return;
368 : }
369 :
370 : /*-------------------------------------------------------------------*
371 : * nm_h_prep_opt_fx()
372 : *
373 : * find and return N_MPVQ(n,k) and also offsets A(n, 0 to k ) and U(n,k+1).
374 : *-------------------------------------------------------------------*/
375 263108 : static UWord32 nm_h_prep_opt_fx( /* o: Q0 msize for dim */
376 : Word16 dim_in, /* i: dimension */
377 : Word16 k_val_in, /* i: nb unit pulses */
378 : UWord32 *h /* o: Q0 A/U offsets array */
379 : )
380 : {
381 : Word16 mem_size_m1, k_val, tmp;
382 : Word16 dim_tmp, d_start;
383 : UWord32 h_saveA, h_saveB, u_kp1, a_k; /* registers for alternating A(n,k-1), A(n,k-2)*/
384 : UWord32 numDsub1;
385 : Word16 end_loop, add_last_odd;
386 :
387 263108 : h[0] = UL_deposit_l( 0 ); /* % A(=>0,k=0) */
388 263108 : move32();
389 263108 : h[1] = UL_deposit_l( 1 ); /* % A(*,k=1) */
390 263108 : move32();
391 :
392 263108 : mem_size_m1 = add( k_val_in, 1 );
393 :
394 263108 : assert( dim_in > N_OPT_FX ); /* code now optimized with direct functions for dim <= N_OPT_FX ) */
395 263108 : IF( ( GT_16( k_val_in, TABLE_LIM_OPT_FX ) ) )
396 : {
397 0 : d_start = 2;
398 0 : move16();
399 0 : if ( GE_16( dim_in, 3 ) )
400 : {
401 : /* start from A(3), U(3) */
402 0 : d_start = 3;
403 0 : move16(); /* single op */
404 : }
405 0 : initOffsets_fx( d_start, h, k_val_in );
406 0 : FOR( dim_tmp = d_start; dim_tmp < dim_in; dim_tmp++ )
407 : {
408 0 : a_u_fwd_fx( h, k_val_in, mem_size_m1 );
409 : }
410 0 : a_k = h[k_val_in];
411 0 : move32();
412 0 : u_kp1 = h[mem_size_m1]; /*Q0*/
413 0 : move32();
414 : }
415 : ELSE
416 : {
417 263108 : numDsub1 = UL_deposit_l( sub( shl( dim_in, 1 ), 1 ) ); /*Q0*/
418 263108 : h[2] = numDsub1; /*Q0*/
419 263108 : move32();
420 :
421 : /* interleaved " odd, even [odd]" divisor calls */
422 263108 : h_saveA = numDsub1;
423 263108 : move32();
424 263108 : h_saveB = UL_deposit_l( 1 );
425 :
426 : /* OPT: makes sure that the STL FOR loop is not broken */
427 263108 : tmp = sub( k_val_in, 3 );
428 263108 : add_last_odd = 0;
429 263108 : move16(); /*k_val_in=0 1 2*/
430 263108 : if ( tmp == 0 )
431 : {
432 20812 : add_last_odd = 1;
433 20812 : move16(); /*k_val_in =3 */
434 : }
435 263108 : k_val = 3;
436 263108 : move16();
437 263108 : IF( tmp > 0 )
438 : {
439 : /* k_val_in = 3,4, 5,6, 7 ... */
440 178157 : end_loop = mem_size_m1; /*Q0*/
441 178157 : move16();
442 178157 : add_last_odd = s_and( k_val_in, 0x1 );
443 178157 : move16(); /* odd -> 0x00100*/
444 : /* even loop limits, and odd tail exists , and */
445 178157 : if ( NE_16( add_last_odd, 0 ) )
446 : {
447 94736 : end_loop = sub( end_loop, 1 ); /* make initial loop to even number of (odd-even )pairs */ /* one basicop */
448 : }
449 833911 : FOR( k_val = 3; k_val < end_loop; k_val++ )
450 : {
451 : /* the optimized non broken loop k=(3,4)(5,6)...(odd,even)*/
452 : /* A(n,k) = A(n,k-2) + ((2*n-1)*A(n,k-1)-A(n,k-2)) /(k-1) */
453 : /* first odd k, even divisor */
454 655754 : h_saveB = UL_addNsD( h_saveB, f_even_exact_div_opt_fx( numDsub1, h_saveA, h_saveB, sub( k_val, 1 ) ) ); /*Q0*/
455 655754 : h[k_val] = h_saveB; /*Q0*/
456 655754 : move32();
457 :
458 : /* next even k, odd divisor */
459 : /*k_val++; */
460 655754 : h_saveA = UL_addNsD( h_saveA, f_odd_exact_div_opt_fx( numDsub1, h_saveB, h_saveA, shr( k_val, 1 ) ) ); /*Q0*/
461 655754 : k_val++; /* ptr incr */
462 655754 : h[k_val] = h_saveA; /*Q0*/
463 655754 : move32();
464 : }
465 : }
466 :
467 263108 : if ( add_last_odd != 0 )
468 : {
469 : /* add a last odd call as needed , not to be called if k_val_in is [0,1,2] */
470 115548 : h_saveB = UL_addNsD( h_saveB, f_even_exact_div_opt_fx( numDsub1, h_saveA, h_saveB, sub( k_val, 1 ) ) );
471 115548 : h[k_val_in] = h_saveB;
472 115548 : move32();
473 : }
474 :
475 : /* always do the last (k+1) recursion based on U(n,k+1) = func( A(n-2,k+1), A(n-1,k+1) ) */
476 263108 : a_k = h[k_val_in];
477 263108 : move32();
478 263108 : u_kp1 = direct_row_A2U_rec_calc_fx( dim_in, mem_size_m1, h[mem_size_m1 - 2], a_k ); /*Q0*/
479 263108 : h[mem_size_m1] = u_kp1; /*Q0*/
480 263108 : move32();
481 : }
482 :
483 : /* N_MPVQ(n,k) = 1 + U(n,k+1) + U(n,k) = 1 + U(n,k+1) + floor(A(n,k))/2) ; */ /* as A(n,k) always odd */
484 263108 : return ( UL_addNsD( 1U, UL_addNsD( u_kp1, UL_lshr( a_k, 1 ) ) ) ); /*Q0*/
485 : }
486 :
487 : /*
488 : find_amp_split_offset_func_mem_fx()
489 : find first offset in range 0..k_val_in that is less than ind_in
490 : using a tree search with direct function calls [ or memory iteration]
491 : */
492 10704 : static Word16 find_amp_split_offset_func_mem_fx( /* o: Q0 found k_value */
493 : UWord32 ind_in,
494 : Word16 high, /* i: k_val_in (high bound) */
495 : H_FUNCM h_func_ptr, /* i: offset function pointer */
496 : UWord32 *UL_tmp_offset ) /* o: Q0 offset found */
497 : {
498 10704 : Word16 not_ready, low, k_test = 0;
499 10704 : move16();
500 : UWord16 sgn;
501 : UWord32 UL_tmp;
502 : /* split over A(n,k) = h_mem(k), or use direct A function evaluation */
503 :
504 10704 : low = 0;
505 10704 : move16();
506 10704 : move32(); /* account for adaptive function ptr setup */
507 10704 : not_ready = 1;
508 10704 : move16();
509 :
510 75403 : WHILE( not_ready != 0 )
511 : {
512 64699 : k_test = shr( add( low, high ), 1 ); /*% split range in half */
513 64699 : *UL_tmp_offset = ( *h_func_ptr )( UL_deposit_l( k_test ) ); /* call direct A offset-function */
514 64699 : move32();
515 :
516 64699 : UL_tmp = UL_subNs( *UL_tmp_offset, ind_in, &sgn ); /*Q0*/
517 64699 : IF( sgn )
518 : {
519 : /* (*tmp_offset < ind_in) */
520 45171 : low = add( 1, k_test );
521 45171 : if ( GE_16( k_test, high ) )
522 : {
523 10600 : not_ready = 0;
524 10600 : move16(); /* single basicop */
525 : }
526 : }
527 : ELSE
528 : {
529 : /* (ind_in <= *tmp_offset ) */
530 19528 : high = sub( k_test, 1 );
531 19528 : if ( UL_tmp == 0 )
532 : {
533 : /* (*tmp_offset == ind_in) */
534 104 : not_ready = 0;
535 104 : move16(); /* single basicop */
536 : }
537 : }
538 : }
539 10704 : return k_test; /*Q0*/
540 : }
541 :
542 : /*
543 : get_lead_sign_fx()
544 : updated index and return leading sign
545 : */
546 889682 : static Word16 get_lead_sign_fx( UWord32 *ind )
547 : {
548 : Word16 leading_sign;
549 :
550 889682 : leading_sign = 1;
551 889682 : move16();
552 889682 : if ( UL_and( *ind, 1 ) != 0 )
553 : {
554 : /* leading sign stored in LSB */
555 444360 : leading_sign = -1;
556 444360 : move16();
557 : }
558 889682 : ( *ind ) = UL_lshr( *ind, 1 ); /*Q0*/
559 889682 : move32();
560 :
561 889682 : return leading_sign; /*Q0*/
562 : }
563 :
564 : /*-------------------------------------------------------------------*
565 : * mind2vec_one_fx()
566 : *-------------------------------------------------------------------*/
567 1163365 : static void mind2vec_one_fx(
568 : Word16 k_val_in, /* i: nb unit pulses */
569 : Word16 leading_sign, /* i: leading sign -1, 0, 1*/
570 : UWord32 ind,
571 : /* i: index */ /* parameter needed as it is used in a function array */
572 : Word16 *vec_out /* o: pulse train */
573 : )
574 : {
575 : /* NB input k_val_in can be zero */
576 : /* *vec_out = leading_sign*k_val_in; */
577 1163365 : *vec_out = (Word16) ind; /* dummy assignment to avoid gcc "unused parameter" warning for ind, i.e no move16() needed */
578 1163365 : move16();
579 :
580 : /* *vec_out = extract_l(L_mult0(leading_sign,k_val_in)); move16(); // 3 ops */
581 1163365 : if ( leading_sign < 0 )
582 : {
583 581191 : k_val_in = negate( k_val_in ); /* single basicop --> if */
584 : }
585 1163365 : *vec_out = k_val_in;
586 1163365 : move16(); /* 1 op */
587 1163365 : }
588 :
589 10316 : static void mind2vec_two_fx(
590 : Word16 k_val_in, /* i: nb unit pulses */
591 : Word16 leading_sign, /* i: leading sign -1,0, 1 */
592 : UWord32 ind_in, /* i: index */
593 : Word16 *vec_out /* o: Q0 pulse train */
594 : )
595 : {
596 : UWord32 UL_ind_tmp;
597 : Word16 val1;
598 :
599 10316 : IF( ind_in == 0 )
600 : {
601 : /* ind_in == 0 */
602 766 : mind2vec_one_fx( k_val_in, leading_sign, ind_in, vec_out );
603 : }
604 9550 : ELSE IF( EQ_16( (Word16) u_extract_l( ind_in ), sub( shl( k_val_in, 1 ), 1 ) ) )
605 : {
606 : /* signed ops fine as 2*KMAX << 32767) */
607 : /* (ind_in == ( (unsigned int)(k_val_in<<ONE) - ONE_U) ) */
608 546 : mind2vec_one_fx( k_val_in, leading_sign, ind_in, &( vec_out[1] ) );
609 : }
610 : ELSE
611 : {
612 9004 : UL_ind_tmp = UL_subNsD( ind_in, 1U );
613 9004 : val1 = (Word16) u_extract_l( UL_addNsD( 1U, UL_lshr( UL_ind_tmp, 1 ) ) ); /*(Word16) to avoid warning */ /*Q0*/
614 :
615 9004 : mind2vec_one_fx( sub( k_val_in, val1 ), leading_sign, ind_in, vec_out );
616 :
617 9004 : if ( UL_and( UL_ind_tmp, 1 ) != 0 )
618 : {
619 4615 : val1 = negate( val1 ); /*single basicop */
620 : }
621 9004 : vec_out[1] = val1; /*Q0*/
622 9004 : move16();
623 : }
624 10316 : }
625 :
626 2507817 : static Word16 setval_update_sign_fx( Word16 k_delta,
627 : Word16 k_max_local,
628 : Word16 *leading_sign,
629 : UWord32 *ind_in,
630 : Word16 *vec_out )
631 : {
632 2507817 : IF( k_delta != 0 )
633 : {
634 889682 : mind2vec_one_fx( k_delta, *leading_sign, *ind_in, vec_out );
635 889682 : *leading_sign = get_lead_sign_fx( ind_in ); /*Q0*/
636 889682 : move16();
637 889682 : k_max_local = sub( k_max_local, k_delta ); /*Q0*/
638 : }
639 2507817 : return k_max_local; /*Q0*/
640 : }
641 :
642 : /*-------------------------------------------------------------------*
643 : * mind2vec_three_fx()
644 : *-------------------------------------------------------------------*/
645 10387 : static void mind2vec_three_fx(
646 : Word16 k_max_local, /* i: nb unit pulses */
647 : Word16 leading_sign, /* i: leading sign */
648 : UWord32 ind_in, /* i: index */
649 : Word16 *vec_out /* Q0 o: pulse train */
650 : )
651 : {
652 : /*
653 : use direct calculation of first amplitude
654 : (to find amplitudes faster than using split or linear iteration)
655 : */
656 : Word16 k_delta;
657 : Word16 acc_val;
658 :
659 10387 : IF( ind_in != 0 )
660 : {
661 : /* acc_val=idivide(uint32(floor(real(sqrt(double(ind)*2-1))))+1, 2); % (exact_integer_sqrt((ind*2-1) +1)*2 */
662 10217 : acc_val = lshr( add( 1, getSqrtWord32( UL_subNsD( UL_lshl( ind_in, 1 ), 1U ) ) ), 1 ); /*Q0*/
663 10217 : k_delta = sub( k_max_local, acc_val ); /*Q0*/
664 10217 : ind_in = UL_subNsD( ind_in, a_three_fx( UL_deposit_l( acc_val ) ) ); /* remove amplitude offset A(3,k_acc) */
665 :
666 10217 : k_max_local = setval_update_sign_fx( k_delta, k_max_local, &leading_sign, &ind_in, vec_out );
667 :
668 10217 : mind2vec_two_fx( k_max_local, leading_sign, ind_in, &vec_out[1] );
669 : }
670 : ELSE
671 : {
672 : /* vec_out[0]= leading_sign*k_max_local; */
673 170 : mind2vec_one_fx( k_max_local, leading_sign, ind_in, vec_out );
674 : }
675 10387 : return;
676 : }
677 :
678 : /*-------------------------------------------------------------------*
679 : * mind2vec_direct_fx ,
680 : general function for direct decoding using direct funstions
681 : (no memory recursion)
682 : *-------------------------------------------------------------------*/
683 10751 : static void mind2vec_direct_fx(
684 : Word16 k_max_local, /* i: nb unit pulses */
685 : Word16 leading_sign, /* i: leading sign */
686 : UWord32 ind, /* i: index */
687 : H_FUNCM h_func_ptr, /* i : offset function */
688 : NDIM_FUNCM nd_func_ptr, /* i : next dimension function */
689 : Word16 *vec_out /* Q0 o: pulse train */
690 : )
691 :
692 : {
693 : Word16 k_delta, k_test;
694 : UWord32 UL_tmp_offset;
695 :
696 10751 : IF( ind != 0 )
697 : {
698 10704 : k_test = find_amp_split_offset_func_mem_fx( ind, k_max_local, h_func_ptr, &UL_tmp_offset );
699 10704 : k_delta = sub( k_max_local, k_test );
700 10704 : ind = UL_subNsD( ind, UL_tmp_offset ); /* % remove amplitude offset A(n,k_acc) */
701 :
702 10704 : k_max_local = setval_update_sign_fx( k_delta, k_max_local, &leading_sign, &ind, vec_out );
703 :
704 10704 : move32(); /* account for adaptive function ptr setup */
705 10704 : ( *nd_func_ptr )( k_max_local, leading_sign, ind, &vec_out[1] ); /* next lower dimension */
706 : }
707 : ELSE
708 : {
709 47 : mind2vec_one_fx( k_max_local, leading_sign, ind, vec_out );
710 : }
711 10751 : return;
712 : }
713 :
714 : /*-------------------------------------------------------------------*
715 : * mind2vec_four_fx()
716 : *-------------------------------------------------------------------*/
717 10425 : static void mind2vec_four_fx(
718 : Word16 k_val_in, /* i: nb unit pulses */
719 : Word16 leading_sign, /* i: leading sign */
720 : UWord32 ind_in, /* i: index */
721 : Word16 *vec_out /* Q0 o: pulse train */
722 : )
723 : {
724 10425 : mind2vec_direct_fx( k_val_in, leading_sign, ind_in, a_four_fx, mind2vec_three_fx, vec_out );
725 10425 : return;
726 : }
727 :
728 : /*-------------------------------------------------------------------*
729 : * mind2vec_five_fx()
730 : *-------------------------------------------------------------------*/
731 326 : static void mind2vec_five_fx(
732 : Word16 k_val_in, /* i: nb unit pulses */
733 : Word16 leading_sign, /* i: leading sign */
734 : UWord32 ind_in, /* i: index */
735 : Word16 *vec_out /* o: pulse train */
736 : )
737 : {
738 326 : mind2vec_direct_fx( k_val_in, leading_sign, ind_in, a_five_fx, mind2vec_four_fx, vec_out );
739 326 : return;
740 : }
741 :
742 : /*-------------------------------------------------------------------*
743 : * mind2vec_fx()
744 : *-------------------------------------------------------------------*/
745 263108 : static void mind2vec_fx(
746 : Word16 dim_in, /* i: dimension */
747 : Word16 k_max_local, /* i: nb unit pulses */
748 : Word16 leading_sign, /* i: leading sign */
749 : UWord32 ind, /* i: index */
750 : Word16 *vec_out, /* Q0 o: pulse train */
751 : UWord32 *h_in /* i: offset vector A=1+2U */
752 : )
753 : {
754 : Word16 pos, k_acc, k_delta;
755 : UWord32 UL_tmp_offset, UL_diff;
756 : UWord16 sgn;
757 :
758 263108 : k_acc = k_max_local;
759 263108 : move16();
760 2750004 : FOR( pos = 0; pos < dim_in; pos++ )
761 : {
762 : /* first to last position decoding */
763 :
764 2750004 : IF( ind != 0 )
765 : {
766 : /* regular linear magnitude search */
767 2486896 : k_acc = k_max_local;
768 2486896 : move16(); /* ptr init */
769 :
770 2486896 : UL_tmp_offset = UL_addNsD( h_in[k_acc], 0U ); /* memory load init */
771 :
772 2486896 : UL_diff = UL_subNs( ind, UL_tmp_offset, &sgn );
773 :
774 4005301 : WHILE( sgn /*(ind - UL_tmp_offset)<0*/ ) /* WHILE costs, 4 cycles every iter */
775 : {
776 1518405 : UL_diff = UL_subNs( ind, h_in[--k_acc], &sgn ); /* one cycle*/
777 : }
778 :
779 2486896 : ind = UL_addNsD( UL_diff, 0U ); /* save amplitude index offset A(n, k_acc) */
780 :
781 2486896 : k_delta = sub( k_max_local, k_acc ); /* amplitude decoding */
782 : }
783 : ELSE
784 : {
785 263108 : mind2vec_one_fx( k_max_local, leading_sign, ind, &vec_out[pos] );
786 263108 : BREAK; /* "fast" recursion exit*/
787 : }
788 :
789 2486896 : k_max_local = setval_update_sign_fx( k_delta, k_max_local, &leading_sign, &ind, &vec_out[pos] ); /*Q0*/
790 :
791 : /* move from A(n,kmax) to A(n-1, k_max_local), */
792 2486896 : a_bwd_fx( h_in, add( k_max_local, 1 ) ); /* [0 ... k_max_local], no need to update U(n,k_max_local+1) in index decoding */
793 : }
794 :
795 263108 : return;
796 : }
797 :
798 : /*-------------------------------------------------------------------*
799 : * get_size_mpvq_calc_offset_fx()
800 : *
801 : * unsigned int h_mem[1 + KMAX +1 ];
802 : * example using fixed size of offset vector input help variable
803 : *-------------------------------------------------------------------*/
804 273683 : PvqEntry get_size_mpvq_calc_offset_fx( /* o : size, dim, k_val */
805 : Word16 dim_in, /* i : dimension */
806 : Word16 k_val_in, /* i : nb unit pulses */
807 : UWord32 *h_mem /* Q0 o : offsets */
808 : )
809 : {
810 : PvqEntry entry;
811 :
812 273683 : entry.dim = dim_in;
813 273683 : move16();
814 273683 : entry.k_val = k_val_in;
815 273683 : move16();
816 :
817 273683 : entry.index = L_deposit_l( 0 );
818 273683 : entry.lead_sign_ind = 0;
819 273683 : move16();
820 :
821 273683 : IF( GT_16( dim_in, N_OPT_FX ) ) /* non-direct solutions, use A+U relation */
822 : {
823 263108 : entry.size = nm_h_prep_opt_fx( entry.dim, entry.k_val, h_mem ); /*Q0*/
824 263108 : move32();
825 : }
826 : ELSE
827 : {
828 10575 : entry.size = direct_msize_fx( dim_in, entry.k_val ); /*Q0*/
829 10575 : move32();
830 : }
831 :
832 273683 : return entry; /*Q0*/
833 : }
834 :
835 : /*-------------------------------------------------------------------*
836 : * mpvq_decode_vec_fx()
837 : *-------------------------------------------------------------------*/
838 273683 : void mpvq_decode_vec_fx( /* o : void */
839 : const PvqEntry *entry, /* i : sign_ind, index, dim, k_val */
840 : UWord32 *h_mem, /* i : A/U offsets */
841 : Word16 *vec_out /* Q0 o : pulse train */
842 : )
843 : {
844 : Word16 i, leading_sign;
845 273683 : IND2VECFUNCM mind2vec_f_fx[N_OPT_FX + 1] = { (IND2VECFUNCM) NULL, mind2vec_one_fx, mind2vec_two_fx, mind2vec_three_fx, mind2vec_four_fx, mind2vec_five_fx };
846 :
847 3759493 : FOR( i = 0; i < entry->dim; i++ )
848 : {
849 3485810 : vec_out[i] = 0;
850 3485810 : move16(); /* set all of short vector to zero , required for fast/early exit logic */
851 : }
852 :
853 273683 : leading_sign = 1;
854 273683 : move16();
855 273683 : if ( entry->lead_sign_ind != 0 )
856 : {
857 136831 : leading_sign = -1;
858 136831 : move16();
859 : }
860 :
861 273683 : IF( entry->k_val != 0 )
862 : {
863 273683 : IF( GT_16( entry->dim, N_OPT_FX ) ) /* N_OPT_FX */
864 : {
865 : /* generic */
866 263108 : mind2vec_fx( entry->dim, entry->k_val, leading_sign, entry->index, vec_out, h_mem );
867 : }
868 : ELSE
869 : {
870 : /* specialized functions, with direct offset calculations */
871 10575 : ( mind2vec_f_fx[entry->dim] )( entry->k_val, leading_sign, entry->index, vec_out );
872 : }
873 : }
874 :
875 273683 : return;
876 : }
877 :
878 : /*-------------------------------------------------------------------*
879 : * vec2mind_one_fx()
880 : *-------------------------------------------------------------------*/
881 0 : static void vec2mind_one_fx(
882 : const Word16 *vec_in, /* i : PVQ pulse train */
883 : Word16 *k_val_out_ptr,
884 : /* Q0 o : number of unit pulses */ /* parameter needed as it is used in a function array */
885 : UWord32 *next_sign_ind, /* i/o: next sign ind */
886 : UWord32 *ind /* Q0 o: MPVQ index */
887 : )
888 : {
889 0 : *ind = (Word32) ( *k_val_out_ptr ); /* dummy assignment to avoid gcc "unused parameter" warning for *k_val_out_ptr, i.e no move32 needed() */
890 0 : move32();
891 0 : *ind = UL_deposit_l( 0 );
892 0 : move32();
893 :
894 0 : *next_sign_ind = UL_deposit_l( 0 );
895 0 : move32();
896 0 : if ( *vec_in < 0 )
897 : {
898 0 : *next_sign_ind = UL_deposit_l( 1 ); /*single basicop */ /*Q0*/
899 0 : move32();
900 : }
901 0 : return;
902 : }
903 :
904 : /*-------------------------------------------------------------------*
905 : * vec2mind_two_fx()
906 : *-------------------------------------------------------------------*/
907 293675 : static void vec2mind_two_fx(
908 : const Word16 *vec_in, /* i : PVQ pulse train */
909 : Word16 *k_val_out_ptr, /* Q0 o : number of unit pulses */
910 : UWord32 *next_sign_ind, /* Q0 i/o: next sign ind */
911 : UWord32 *ind /* Q0 o: MPVQ index */
912 : )
913 : {
914 : UWord32 lead_sign_ind_add;
915 : Word16 abs0, abs1, abs01, sptr;
916 :
917 293675 : abs0 = abs_s( vec_in[0] );
918 293675 : abs1 = abs_s( vec_in[1] );
919 293675 : abs01 = add( abs0, abs1 );
920 293675 : *k_val_out_ptr = abs01;
921 293675 : move16(); /* can be zero */
922 293675 : *ind = UL_deposit_l( 0 ); /* [KMAX 0 ] , and dual zeros */
923 293675 : move32();
924 :
925 293675 : *next_sign_ind = UL_deposit_h( SIGNBIT_SHRT_FX ); /* "unset" sign flag set */ /* dual zeroes can happen in a recursive encoding call */
926 293675 : move32();
927 :
928 :
929 293675 : IF( abs01 != 0 )
930 : {
931 165777 : sptr = 0;
932 165777 : move16(); /*used as ptr to vec0 or vec1 value */
933 165777 : *next_sign_ind = UL_deposit_l( sptr ); /*Q0*/
934 165777 : move32();
935 :
936 165777 : test();
937 165777 : IF( abs0 != 0 && abs1 != 0 )
938 : {
939 : /* likely most frequent/common case */
940 : /* [ KMAX-1 1],[ KMAX-1 -1] ... [ 1 +(KMAX-1) ],[ 1 -(KMAX-1)] */
941 : /* sign always shifted to first pos */
942 69869 : lead_sign_ind_add = UL_deposit_l( 1 );
943 69869 : if ( vec_in[1] < 0 )
944 : {
945 35059 : lead_sign_ind_add = UL_deposit_l( 2 ); /* single op */
946 : }
947 69869 : *ind = UL_addNsD( UL_deposit_l( (UWord16) lshl( sub( abs1, 1 ), 1 ) ), lead_sign_ind_add ); /*Q0*/
948 69869 : move32();
949 : }
950 : ELSE
951 : {
952 : /* one value is a zero */
953 95908 : IF( abs0 == 0 )
954 : {
955 : /* [ 0 KMAX]*/
956 46717 : *ind = UL_deposit_l( (UWord16) sub( lshl( abs1, 1 ), 1 ) ); /*Q0*/
957 46717 : move32();
958 46717 : sptr = 1;
959 46717 : move16();
960 : }
961 : }
962 :
963 : /* *next_sign_ind= (unsigned int)(vec_in[*next_sign_ind]<0); */
964 165777 : if ( vec_in[sptr] < 0 )
965 : {
966 83158 : *next_sign_ind = UL_deposit_l( 1 ); /*single instruction */ /*Q0*/
967 83158 : move32();
968 : }
969 : }
970 293675 : return;
971 : }
972 :
973 3183782 : static void enc_push_sign( Word16 val, UWord32 *next_sign_ind, UWord32 *index )
974 : {
975 : /*
976 : % Check if the leading sign 'bit' is to be added
977 : % here the leading sign bit is put in LSB as it saves about 3 cycles in sign-decoding
978 : % (one can also put it in the MSB , but then one needs to access h_mem twice and shift the sign bit into position )
979 : */
980 3183782 : test();
981 3183782 : IF( ( UL_and( *next_sign_ind, SIGNBIT_FX ) == 0 ) && ( val != 0 ) )
982 : {
983 884979 : *index = UL_addNsD( UL_lshl( *index, 1 ), *next_sign_ind );
984 884979 : move32();
985 : }
986 :
987 : /* push sign to next non_zero position */
988 : /* *next_sign_ind = *next_sign_ind ;*/ /* default is to keep stored sign index */
989 3183782 : if ( val < 0 )
990 : {
991 505979 : *next_sign_ind = UL_deposit_l( 1 ); /* single basicop */
992 505979 : move32();
993 : }
994 3183782 : if ( val > 0 )
995 : {
996 506898 : *next_sign_ind = UL_deposit_l( 0 ); /* single basicop */
997 506898 : move32();
998 : }
999 3183782 : }
1000 :
1001 : /*-------------------------------------------------------------------*
1002 : * vec2mind_gen345_fx( vec_in kval, next_dim_func , offset_func,....)
1003 : * generic call saves PROM ,
1004 : *-------------------------------------------------------------------*/
1005 :
1006 20368 : static void vec2mind_gen345_fx(
1007 : const Word16 *vec_in, /* i : PVQ abs pulse train */
1008 : Word16 *k_val_out_ptr, /* Q0 o : number of unit pulses */
1009 : UWord32 *next_sign_ind, /* i/o: next sign ind */
1010 : UWord32 *index, /* Q0 o: MPVQ index */
1011 : VEC2INDFUNCM vec2indfunc_ptr, /* i: */
1012 : H_FUNCM a_func_ptr /*i: offset function */
1013 : )
1014 : {
1015 : Word16 tmp_val;
1016 :
1017 20368 : tmp_val = vec_in[0];
1018 20368 : move16();
1019 20368 : move32(); /* adaptive function call setup */
1020 20368 : ( *vec2indfunc_ptr )( &vec_in[1], k_val_out_ptr, next_sign_ind, index );
1021 :
1022 20368 : enc_push_sign( tmp_val, next_sign_ind, index );
1023 :
1024 20368 : move32(); /* adaptive function call setup */
1025 20368 : *index = UL_addNsD( *index, ( *a_func_ptr )( UL_deposit_l( *k_val_out_ptr ) ) ); /*Q0*/
1026 20368 : move32();
1027 :
1028 20368 : *k_val_out_ptr = add( *k_val_out_ptr, abs_s( tmp_val ) ); /*Q0*/
1029 20368 : move16();
1030 :
1031 20368 : return;
1032 : }
1033 :
1034 : /*-------------------------------------------------------------------*
1035 : * vec2mind_three_fx()
1036 : *-------------------------------------------------------------------*/
1037 10020 : static void vec2mind_three_fx(
1038 : const Word16 *vec_in, /* i : PVQ pulse train */
1039 : Word16 *k_val_out_ptr, /* Q0 o : number of unit pulses */
1040 : UWord32 *next_sign_ind, /* Q0 i/o: next sign ind */
1041 : UWord32 *index /* Q0 o: MPVQ index */
1042 : )
1043 : {
1044 10020 : vec2mind_gen345_fx( vec_in, k_val_out_ptr, next_sign_ind, index, vec2mind_two_fx, a_three_fx );
1045 10020 : return;
1046 : }
1047 :
1048 :
1049 : /*-------------------------------------------------------------------*
1050 : * vec2mind_four_fx()
1051 : *-------------------------------------------------------------------*/
1052 10012 : static void vec2mind_four_fx(
1053 : const Word16 *vec_in, /* i : PVQ pulse train */
1054 : Word16 *k_val_out_ptr, /* Q0 o : number of unit pulses */
1055 : UWord32 *next_sign_ind, /* Q0 i/o: next sign ind */
1056 : UWord32 *index /* Q0 o: MPVQ index */
1057 : )
1058 : {
1059 10012 : vec2mind_gen345_fx( vec_in, k_val_out_ptr, next_sign_ind, index, vec2mind_three_fx, a_four_fx );
1060 10012 : return;
1061 : }
1062 :
1063 : /*-------------------------------------------------------------------*
1064 : * vec2mind_five_fx()
1065 : *-------------------------------------------------------------------*/
1066 336 : static void vec2mind_five_fx(
1067 : const Word16 *vec_in, /* i : PVQ abs pulse train */
1068 : Word16 *k_val_out_ptr, /* Q0 o : number of unit pulses */
1069 : UWord32 *next_sign_ind, /* Q0 i/o: next sign ind */
1070 : UWord32 *index /* Q0 o: MPVQ index */
1071 : )
1072 : {
1073 336 : vec2mind_gen345_fx( vec_in, k_val_out_ptr, next_sign_ind, index, vec2mind_four_fx, a_five_fx );
1074 336 : return;
1075 : }
1076 :
1077 : /*-------------------------------------------------------------------*
1078 : * vec2mind_fx()
1079 : *-------------------------------------------------------------------*/
1080 283617 : static void vec2mind_fx( Word16 dim_in, /* i : dim */
1081 : Word16 k_val_in, /* i : number of unit pulses */
1082 : const Word16 *vec_in, /* i : PVQ pulse train */
1083 : UWord32 *next_sign_ind, /* Q0 o : pushed leading sign */
1084 : UWord32 *index, /* Q0 o : MPVQ index */
1085 : UWord32 *N_MPVQ_ptr, /* Q0 o : size(N_MPVQ(dim,K_val_in))*/
1086 : UWord32 *h_mem ) /* Q0 o : offsets */
1087 : {
1088 : Word16 pos, mem_size_m1, k_val_acc, tmp_val;
1089 : UWord32 tmp_h;
1090 :
1091 : /*
1092 : %% main steps
1093 : % quick encode two rightmost pos
1094 : % for every position from dim-3 to 0 (right to left)
1095 : % check if an sign is to be encoded ,
1096 : % add its offset
1097 : % check(and add) amplitude offset(for accumulated pulse sum) up to this point
1098 : % update total pulse sum
1099 : % update offset vector recursively (except for pos==0 )
1100 : % end
1101 : % calculate size
1102 : */
1103 :
1104 283617 : mem_size_m1 = add( k_val_in, 1 );
1105 283617 : *next_sign_ind = UL_deposit_h( SIGNBIT_SHRT_FX ); /* highest bit set signals no sign found yet, should always be 0 or 1 out, */ /*Q0*/
1106 283617 : move32();
1107 :
1108 283617 : pos = sub( dim_in, 2 ); /* adress 2nd last sample */ /*Q0*/
1109 283617 : vec2mind_two_fx( &vec_in[pos], &k_val_acc, next_sign_ind, index );
1110 283617 : initOffsets_fx( 3, h_mem, k_val_in ); /* start recursions at 3rd sample */
1111 :
1112 283617 : tmp_h = h_mem[k_val_acc];
1113 283617 : move32();
1114 3447031 : FOR( pos--; pos >= 0; pos-- )
1115 : {
1116 3163414 : tmp_val = vec_in[pos];
1117 3163414 : move16();
1118 3163414 : enc_push_sign( tmp_val, next_sign_ind, index );
1119 :
1120 : /* now add indexing offset up to this reverse (r_l) accumulated amplitude point */
1121 3163414 : *index = UL_addNsD( *index, tmp_h ); /* k_val_acc==0 ==>0 */ /*Q0*/
1122 3163414 : move32();
1123 :
1124 : /* k_val_acc = k_val_acc + vec_abs[pos];*/ /* now increase acc k value for next N */
1125 3163414 : k_val_acc = add( k_val_acc, abs_s( tmp_val ) ); /*Q0*/
1126 :
1127 3163414 : IF( pos != 0 )
1128 : {
1129 2879797 : a_u_fwd_fx( h_mem, k_val_in, mem_size_m1 );
1130 : /* update A(n,k=1:k_val_in) and U(n,k_val_in+1) */
1131 : /* NB here (k_val_in + 2 elements always has to be updated */
1132 : }
1133 3163414 : tmp_h = UL_addNsD( h_mem[k_val_acc], 0U ); /*Q0*/
1134 : }
1135 :
1136 : /* size is needed for the subseqent arithmetic encoding/transmission of the index.
1137 : use relation N_MPVQ(n,K) = 1 + (A(n, K)-1)/2 + U(n, 1 + K)
1138 : = N_MPVQ(n,K) = 1 + (A(n, K)>>1) + U(n, 1 + K) , as A(n,K) is odd) */
1139 283617 : *N_MPVQ_ptr = UL_addNsD( 1U, UL_addNsD( UL_lshr( tmp_h, 1 ), h_mem[mem_size_m1] ) ); /*Q0*/
1140 283617 : move32(); /* calc total size */
1141 :
1142 283617 : return;
1143 : }
1144 :
1145 : /*--------------------------------------------------------------------------*
1146 : * mpvq_encode_vec_fx()
1147 : *
1148 : * returns struct with lead sign index, MPVQ-index, dim and N_MPVQ size
1149 : *-------------------------------------------------------------------------*/
1150 :
1151 293675 : PvqEntry mpvq_encode_vec_fx( /* Q0 o : leading_sign_index, index, size, k_val */
1152 : const Word16 *vec_in, /* i : signed pulse train */
1153 : Word16 dim_in, /* i : dimension */
1154 : Word16 k_val_local /* i : nb unit pulses */
1155 : )
1156 : {
1157 : PvqEntry result;
1158 : UWord32 h_mem[1 + KMAX_NON_DIRECT_FX + 1]; /* now always assign max offset buffer for dim 6 ,
1159 : actually only 1+k_val_in+1 needed ) */
1160 : UWord32 lead_sign_ind;
1161 :
1162 293675 : VEC2INDFUNCM vec2mind_f[1 + N_OPT_FX] = { (VEC2INDFUNCM) NULL, vec2mind_one_fx, vec2mind_two_fx, vec2mind_three_fx, vec2mind_four_fx, vec2mind_five_fx };
1163 :
1164 293675 : result.k_val = k_val_local; /*Q0*/
1165 293675 : move16();
1166 293675 : result.dim = dim_in; /*Q0*/
1167 293675 : move16();
1168 : /* NB , k_val_local may be changed in some sub encoding routines */
1169 293675 : IF( GT_16( dim_in, N_OPT_FX ) )
1170 : {
1171 : /* use the generic dimension function */
1172 283617 : vec2mind_fx( dim_in, k_val_local, vec_in, &lead_sign_ind, &result.index, &result.size, h_mem );
1173 : }
1174 : ELSE /* if (dim_in<=N_OPT), h_mem not used */
1175 : {
1176 10058 : move32(); /* adaptive function ptr setup */
1177 10058 : ( vec2mind_f[dim_in] )( vec_in, &k_val_local, &lead_sign_ind, &result.index );
1178 10058 : result.size = direct_msize_fx( dim_in, k_val_local ); /*Q0*/
1179 : }
1180 293675 : result.lead_sign_ind = u_extract_l( lead_sign_ind ); /*Q0*/
1181 293675 : move16();
1182 :
1183 293675 : return result;
1184 : }
|