Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include <stdint.h>
6 : #include <assert.h>
7 : #include "options.h"
8 : //#include "prot_fx.h"
9 : #include "cnst.h"
10 : #include "rom_com_fx.h"
11 : #include "rom_com.h"
12 : #include "rom_enc.h"
13 : #include "basop_util.h"
14 : #include "prot_fx.h" /* Function prototypes */
15 : #include "prot_fx_enc.h" /* Function prototypes */
16 : #include "basop_proto_func.h"
17 : #include "wmc_auto.h"
18 :
19 :
20 : /*-------------------------------------------------------------------*
21 : * Local constants
22 : *-------------------------------------------------------------------*/
23 :
24 : #define kMaxC 8
25 :
26 : #define MAXINT32 2147483647
27 : #ifndef swap
28 : #define swap( x, y, type ) \
29 : { \
30 : type u__p; \
31 : u__p = x; \
32 : x = y; \
33 : y = u__p; \
34 : }
35 : #endif
36 :
37 : #define depack_4_values( cbp, val0, val1, val2, val3 ) \
38 : val0 = shr( ( cbp )[0], 4 ); \
39 : val1 = shr( ( cbp )[1], 4 ); \
40 : val2 = shr( ( cbp )[2], 4 ); \
41 : val3 = add( add( shr( lshl( ( cbp )[2], 12 ), 4 ), lshr( lshl( ( cbp )[1], 12 ), 8 ) ), s_and( ( cbp )[0], 0xF ) );
42 :
43 : /*--------------------------------------------------------------------------*
44 : * msvq_encmsvq_stage1_dct_search()
45 : *
46 : * stage1 search in a segmentwise truncated dct N domain without weights
47 : *--------------------------------------------------------------------------*/
48 :
49 : /*! r: (p_max , best candidate sofar ) */
50 2742 : Word16 msvq_stage1_dct_search_fx(
51 : const Word32 *u_fx, /* i : target exp : u_e */
52 : const Word16 u_e, /* i : exp for target Q0 */
53 : const Word16 N, /* i : target length and IDCT synthesis length */
54 : const Word16 maxC_st1, /* i : number of final stage 1 candidates to provide */
55 : const DCTTYPE dcttype, /* e.g. DCT_T2_16_XX, DCT_T2_24_XX; */
56 : const Word16 max_dct_trunc, /* i : maximum of truncation lenghts */
57 : Word32 *invTrfMatrix_fx, /* i : IDCT synthesis matrix for dim N Q31 */
58 : const Word16 *midQ_truncQ_fx, /* i : midQ vector */
59 : const Word32 *dct_scaleF_fx, /* i : global scale factors Q10 */
60 : const Word16 n_segm, /* i : number of segments */
61 : const Word16 *cols_per_segment, /* i : remaining length per segment */
62 : const Word16 *trunc_dct_cols_per_segment, /* i : trunc length per segment */
63 : const Word16 *entries_per_segment, /* i : number of rows per segment */
64 : const Word16 *cum_entries_per_segment, /* i : number of cumulative entries */
65 : const Word8 *const W8Qx_dct_sections[], /* i : Word8(byte) segment table ptrs */
66 : const Word16 *col_syn_shift[], /* i : columnwise syn shift tables */
67 : const Word8 *segm_neighbour_fwd, /* i : circular neighbour list fwd */
68 : const Word8 *segm_neighbour_rev, /* i : circular neighbour list reverse */
69 : const Word16 npost_check, /* i : number of neigbours to check , should be even */
70 : Word32 *st1_mse_ptr_fx, /* i : dynRAM buffer for MSEs exp : u_e */
71 : Word16 *indices_st1_local, /* o : selected cand indices */
72 : Word32 *st1_syn_vec_ptr_fx, /* i/o: buffer for IDCT24 synthesis i :exp : u_e */
73 : Word32 *dist1_ptr_fx, /* o : resulting stage 1 MSEs in DCT-N domain */
74 : Word16 *dist1_ptr_e )
75 : {
76 : Word32 dct_target_fx[FDCNG_VQ_DCT_MAXTRUNC]; // Q20
77 : Word32 u_mr_fx[FDCNG_VQ_MAX_LEN];
78 : Word16 dist1_ptr_e_buf[2 * LSFMBEST_MAX];
79 : Word64 mse_trunc_segm_fx[FDCNG_VQ_DCT_NSEGM];
80 : Word32 tmp_fx, check_mse;
81 : Word16 tmp_e, check_mse_e;
82 : Word64 mse_fx; /* Word64 in BASOP */
83 :
84 : Word16 p_max, c, c2, segm, j_full, j, i;
85 : Word16 n_ana, p_mins[2], idx_min[2];
86 :
87 : Word16 st1_mse_ptr_e[128];
88 :
89 : const Word8 *cbpW8;
90 : const Word16 *dct_col_shift_tab;
91 :
92 : Word32 *st1_mse_pair_fx;
93 : Word16 *st1_mse_pair_e;
94 : Word16 *st1_idx_pair;
95 :
96 : Word32 tmp2_fx;
97 : Word16 check_ind[FDCNG_VQ_DCT_NPOST];
98 2742 : assert( ( npost_check % 2 == 0 ) && ( npost_check <= FDCNG_VQ_DCT_NPOST ) );
99 :
100 2742 : assert( n_segm <= FDCNG_VQ_DCT_NSEGM );
101 :
102 2742 : n_ana = N; /* VQ stage#1 core is currently always using stored DCT N coeffs */
103 2742 : move16();
104 2742 : assert( n_ana >= max_dct_trunc ); /* check for FDCNGVQ WB , SWB, FB operation */
105 :
106 : /* remove mid stage#1 vector, in original input domain */
107 2742 : tmp_e = s_max( 12, u_e );
108 68550 : FOR( i = 0; i < n_ana; i++ )
109 : {
110 65808 : u_mr_fx[i] = L_sub( L_shl( u_fx[i], sub( u_e, tmp_e ) ), L_shl( midQ_truncQ_fx[i], sub( Q31 - Q10, tmp_e ) ) ); // tmp_e
111 65808 : move32();
112 : }
113 :
114 2742 : dctT2_N_apply_matrix_fx( (const Word32 *) u_mr_fx, dct_target_fx, s_min( max_dct_trunc, n_ana ), n_ana, invTrfMatrix_fx, max_dct_trunc, dcttype ); // exp : tmp_e
115 :
116 : /* init search state ptr's at the top */
117 2742 : set32_fx( dist1_ptr_fx, MAX_32, maxC_st1 );
118 2742 : set16_fx( dist1_ptr_e_buf, 32, maxC_st1 );
119 2742 : st1_mse_pair_fx = &( dist1_ptr_fx[0] ); /* req. ptr post upd +=2 */ // st1_mse_pair_e
120 2742 : st1_mse_pair_e = &( dist1_ptr_e_buf[0] ); /* req. ptr post upd +=2 */
121 2742 : st1_idx_pair = &( indices_st1_local[0] ); /* req. ptr post upd +=2 */
122 2742 : set64_fx( mse_trunc_segm_fx, 0, n_segm );
123 :
124 : // set16_fx( mse_trunc_segm_e, u_e, FDCNG_VQ_DCT_NSEGM );
125 :
126 13710 : FOR( segm = 0; segm < n_segm; segm++ )
127 : { /* point to a new paired location for each segment */
128 10968 : p_max = 0; /* req. to point to one of 1 or 0, this init can potentially be omitted here,as p_max is always 1 or 0 */
129 10968 : move16();
130 :
131 : /* compute segment common trunction error in dctN domain */
132 :
133 65808 : FOR( i = 0; i < trunc_dct_cols_per_segment[segm]; i++ )
134 : {
135 54840 : mse_trunc_segm_fx[segm] = W_mac_32_32( mse_trunc_segm_fx[segm], dct_target_fx[cols_per_segment[segm] + i], dct_target_fx[cols_per_segment[segm] + i] ); // Q41
136 54840 : move64();
137 : }
138 :
139 10968 : cbpW8 = W8Qx_dct_sections[segm]; /* Word8 column variable Qx storage , table ptr init */
140 :
141 361944 : FOR( j = 0; j < entries_per_segment[segm]; j++ )
142 : {
143 : /* unweighted segmented search DCT domain loop */
144 350976 : j_full = add( j, cum_entries_per_segment[segm] ); /* or simply use j_full++ */
145 :
146 350976 : mse_fx = mse_trunc_segm_fx[segm]; /* init mse with with common mse truncation part, in BASOP a move32() */ // Q41
147 350976 : move64();
148 :
149 350976 : dct_col_shift_tab = col_syn_shift[segm]; /* ptr init */
150 :
151 5763684 : FOR( c2 = 0; c2 < cols_per_segment[segm]; c2++ )
152 : {
153 : #define WMC_TOOL_SKIP
154 5412708 : tmp_fx = L_sub( dct_target_fx[c2], Mpy_32_32( L_shl( cbpW8[c2], add( sub( Q31, tmp_e ), dct_col_shift_tab[c2] ) ), dct_scaleF_fx[1] ) ); /* note: BASOP shift left defined for signed integers */
155 : LOGIC( 1 );
156 : SHIFT( 1 );
157 : ADD( 1 ); /* in BASOP: s_and(for W8->W16), shl(), sub()*/
158 : #undef WMC_TOOL_SKIP
159 5412708 : mse_fx = W_mac_32_32( mse_fx, tmp_fx, tmp_fx ); /* L_mac or L_mac0() square Word16 -> Word32*/ // Q41
160 : }
161 350976 : Word16 L_tmp = W_norm( mse_fx );
162 350976 : st1_mse_ptr_fx[j_full] = W_extract_h( W_lshl( mse_fx, L_tmp ) ); /* save MSE in shared dynamic RAM, move32() in BASOP */ // st1_mse_ptr_e
163 350976 : move32();
164 350976 : st1_mse_ptr_e[j_full] = sub( shl( tmp_e, 1 ), L_tmp );
165 350976 : move16();
166 :
167 : #define WMC_TOOL_SKIP
168 350976 : cbpW8 += cols_per_segment[segm]; /* fixed pointer increment for each segment */
169 : #undef WMC_TOOL_SKIP
170 :
171 : /* overwrite with a new worst index at p_max */
172 :
173 : /* Note: The three inner loop if's below are not 100% properly instrumented by WMC tool */
174 : // if ( st1_mse_ptr_fx[j_full] < st1_mse_pair_fx[p_max] ) /* L_sub */
175 350976 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( st1_mse_ptr_fx[j_full], st1_mse_ptr_e[j_full], st1_mse_pair_fx[p_max], st1_mse_pair_e[p_max] ), -1 ) ) /* L_sub */
176 : {
177 154546 : st1_idx_pair[p_max] = j_full; /* move16, single BASOP */
178 154546 : move16();
179 : } /* BASOP 2 ops */
180 :
181 350976 : IF( EQ_16( st1_idx_pair[p_max], j_full ) )
182 : { /* idx updated --> also update mse */
183 154546 : st1_mse_pair_fx[p_max] = st1_mse_ptr_fx[j_full]; /* move32(), single BASOP */
184 154546 : move32();
185 154546 : st1_mse_pair_e[p_max] = st1_mse_ptr_e[j_full]; /* move32(), single BASOP */
186 154546 : move16();
187 : } /* BASOP 3 ops */
188 :
189 : /* avoid WC costly candidate list management by always updating p_max,
190 : as we have only a pair in each segment to maintain */
191 350976 : p_max = 0;
192 350976 : move16();
193 350976 : if ( EQ_16( BASOP_Util_Cmp_Mant32Exp( st1_mse_pair_fx[0], st1_mse_pair_e[0], st1_mse_pair_fx[1], st1_mse_pair_e[1] ), -1 ) ) /* L_sub()*/
194 : {
195 187546 : p_max = 1; /* move16() */
196 187546 : move16();
197 : } /* BASOP 3 ops ,Note 2 ops possible in BASOP with L_sub and L_lshr */
198 :
199 : /* Note: logical shift right not available in ANSI-C */
200 : /* p_max = (st1_mse_pair[0] - st1_mse_pair[1]) ">>>" 31; */
201 : /* in java logical shift right is available as >>> , in BASOP it is available as L_lshr */
202 :
203 : /* Cost: weighted sum with cond moves ('if') => 8 in float , 7 in BASOP with L_lshr */
204 : } /* j in section */
205 :
206 10968 : st1_mse_pair_fx += 2; /* req. ptr init */
207 10968 : st1_mse_pair_e += 2; /* req. ptr init */
208 10968 : st1_idx_pair += 2; /* req. ptr init */
209 :
210 : } /* next segment */
211 :
212 2742 : tmp_e = 0;
213 2742 : move16();
214 24678 : FOR( j = 0; j < maxC_st1; j++ )
215 : {
216 : /* compute_full mse using stored DCT24 domain MSE's */
217 : /* calculate MSE from stage1 inner using existing inner DCT domain variables */
218 21936 : tmp_e = s_max( dist1_ptr_e_buf[j], tmp_e );
219 : }
220 :
221 24678 : FOR( j = 0; j < maxC_st1; j++ )
222 : {
223 : /* compute_full mse using stored DCT24 domain MSE's */
224 : /* calculate MSE from stage1 inner using existing inner DCT domain variables */
225 21936 : dist1_ptr_fx[j] = L_shr( dist1_ptr_fx[j], sub( tmp_e, dist1_ptr_e_buf[j] ) );
226 21936 : move32();
227 21936 : *dist1_ptr_e = tmp_e;
228 21936 : move16();
229 : }
230 :
231 :
232 2742 : assert( ( maxC_st1 >= 3 ) );
233 2742 : assert( ( maxC_st1 <= 8 ) );
234 :
235 2742 : p_max = maximum_32_fx( dist1_ptr_fx, maxC_st1, NULL ); /* establish current worst candidate for MSVQ stage#2 among all maxC_st1 candidates so far */
236 :
237 2742 : p_mins[0] = minimum_32_fx( dist1_ptr_fx, maxC_st1, NULL ); /* find best entry among all maxC_pre */
238 2742 : move16();
239 2742 : tmp_fx = dist1_ptr_fx[p_mins[0]];
240 2742 : move32();
241 2742 : dist1_ptr_fx[p_mins[0]] = MAX_32; /* exclude 1st */
242 2742 : move32();
243 :
244 2742 : p_mins[1] = minimum_32_fx( dist1_ptr_fx, maxC_st1, NULL ); /* find 2nd best entry */
245 2742 : move16();
246 2742 : tmp2_fx = dist1_ptr_fx[p_mins[1]];
247 2742 : move32();
248 2742 : dist1_ptr_fx[p_mins[1]] = MAX_32; /* exclude 2nd */
249 2742 : move32();
250 :
251 2742 : dist1_ptr_fx[p_mins[0]] = tmp_fx; /* restore 1st */
252 2742 : move32();
253 2742 : dist1_ptr_fx[p_mins[1]] = tmp2_fx; /* restore 2nd */
254 2742 : move32();
255 :
256 2742 : idx_min[0] = indices_st1_local[p_mins[0]];
257 2742 : move16();
258 2742 : idx_min[1] = indices_st1_local[p_mins[1]];
259 2742 : move16();
260 :
261 :
262 : /* use global exclusion list to never reselect the two (best) global MSE values sofar */
263 2742 : st1_mse_ptr_fx[idx_min[0]] = MAX_32; /* move32() */
264 2742 : move32();
265 2742 : st1_mse_ptr_e[idx_min[0]] = MAX_16;
266 2742 : move16();
267 2742 : st1_mse_ptr_fx[idx_min[1]] = MAX_32; /* move32() */
268 2742 : move32();
269 2742 : st1_mse_ptr_e[idx_min[1]] = MAX_16;
270 2742 : move16();
271 :
272 : /* circular MSE-neigbour list in use to potentially replace some segment search candidates */
273 : /* using both 1st and 2nd best neighbours in fwd and rev directions */
274 2742 : check_ind[0] = segm_neighbour_fwd[idx_min[0]];
275 2742 : move16();
276 2742 : check_ind[1] = segm_neighbour_rev[idx_min[0]];
277 2742 : move16();
278 :
279 2742 : check_ind[2] = segm_neighbour_fwd[idx_min[1]];
280 2742 : move16();
281 2742 : check_ind[3] = segm_neighbour_rev[idx_min[1]];
282 2742 : move16();
283 :
284 2742 : check_ind[4] = segm_neighbour_fwd[check_ind[0]];
285 2742 : move16();
286 2742 : check_ind[5] = segm_neighbour_rev[check_ind[1]];
287 2742 : move16();
288 :
289 2742 : check_ind[6] = segm_neighbour_fwd[check_ind[2]];
290 2742 : move16();
291 2742 : check_ind[FDCNG_VQ_DCT_NPOST - 1] = segm_neighbour_rev[check_ind[3]];
292 2742 : move16();
293 :
294 24678 : FOR( i = 0; i < npost_check; i++ )
295 : {
296 : /* move MSE from DCT-inner loop search to input synthesis domain */
297 : /* multiplication by fdcng_dct_scaleF[2] to get the float outer loop scale correct in IDCT synthesis domain */
298 21936 : check_mse = st1_mse_ptr_fx[check_ind[i]];
299 21936 : move32();
300 21936 : check_mse_e = st1_mse_ptr_e[check_ind[i]];
301 21936 : move16();
302 :
303 21936 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( check_mse, check_mse_e, dist1_ptr_fx[p_max], *dist1_ptr_e ), -1 ) )
304 : { /* new winner , replace worst */
305 10454 : dist1_ptr_fx[p_max] = L_shl( check_mse, sub( check_mse_e, *dist1_ptr_e ) );
306 10454 : move32();
307 10454 : indices_st1_local[p_max] = check_ind[i];
308 10454 : move16();
309 10454 : st1_mse_ptr_fx[check_ind[i]] = MAX_32; /* exclude, BASOP: move32() */
310 10454 : move32();
311 10454 : st1_mse_ptr_e[check_ind[i]] = MAX_16;
312 10454 : move16();
313 10454 : p_max = maximum_32_fx( dist1_ptr_fx, maxC_st1, NULL ); /* establish a new current worst candidate among all maxC */
314 : }
315 : }
316 :
317 : /* extract the selected stage one vectors in DCT_N domain , apply IDCT_N and scale up */
318 : /* always extract full length signal(e.g. 24) to be able to update WB(e.g. N_in==21) candidate MSE values */
319 : /* in the case that only a part of the IDCT N vector is in final use */
320 :
321 : /* note: synthesis not yet fully parameterized/generalized for other IDCT lengths */
322 2742 : assert( N == 24 );
323 : {
324 24678 : FOR( c = 0; c < maxC_st1; c++ )
325 : {
326 21936 : dec_FDCNG_MSVQ_stage1_fx( indices_st1_local[c], N, invTrfMatrix_fx, dcttype + 1, &( st1_syn_vec_ptr_fx[c * N] ), NULL ); // Q11 : output
327 21936 : scale_sig32( &( st1_syn_vec_ptr_fx[c * N] ), N, sub( 11, s_max( u_e, 12 ) ) );
328 : }
329 : }
330 :
331 2742 : return p_max; /*ptr to worst performing candidate */
332 : }
333 :
334 :
335 : /*--------------------------------------------------------------------------*
336 : * msvq_stage1_dct_recalc_candidates_fdcng_wb()
337 : *
338 : * recalc MSE for fdcng WB(0..20) coeffs ,
339 : essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search,
340 : excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep the WB MSEs update for the subsequent stages
341 : *--------------------------------------------------------------------------*/
342 :
343 : /*! r: (updated p_max) */
344 725 : Word16 msvq_stage1_dct_recalc_candidates_fdcng_wb_fx(
345 : const Word32 *st1_syn_vec_ptr_fx, /* i : IDCT24 synthesis vectors st1_syn_vec_e*/
346 : const Word16 st1_syn_vec_e, /* i : exp for IDCT24 synthesis vectors */
347 : const Word32 *u_fx, /* i : target signal u_e*/
348 : const Word16 u_e, /* i : exp for target signal */
349 : const Word16 maxC_st1, /* i : number of candidates in stage1 */
350 : Word32 *dist_ptr_fx, /* i/o: updated MSE vector for stage1 */
351 : Word16 *dist_ptr_e /* i/o: exp for updated MSE vector for stage1 */
352 : )
353 : {
354 : Word16 i;
355 : Word16 p_max_local, c;
356 : const Word32 *p2_fx;
357 : Word16 tmp_e;
358 : Word32 res24_fx, high_diff_fx[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB];
359 : Word64 acc;
360 : Word16 res24_e[FD_CNG_maxC_37bits];
361 : Word16 dist_e;
362 :
363 725 : dist_e = *dist_ptr_e;
364 725 : move16();
365 6525 : FOR( c = 0; c < maxC_st1; c++ )
366 : { /* point to extended synthesis part */
367 5800 : p2_fx = (const Word32 *) &( st1_syn_vec_ptr_fx[c * FDCNG_VQ_MAX_LEN + FDCNG_VQ_MAX_LEN_WB] ); /* ptr init to synthesis candidate c */
368 5800 : tmp_e = s_max( st1_syn_vec_e, u_e );
369 5800 : tmp_e = add( tmp_e, 1 );
370 : /* for stage#1 use "u" instead of the shortened resid[0], to access the extended/extrapolated input target */
371 23200 : FOR( i = 0; i < FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB; i++ )
372 : {
373 17400 : high_diff_fx[i] = L_sub( L_shr( p2_fx[i], sub( tmp_e, st1_syn_vec_e ) ), L_shr( u_fx[FDCNG_VQ_MAX_LEN_WB + i], sub( tmp_e, u_e ) ) ); // tmp_e
374 17400 : move32();
375 : }
376 5800 : acc = 0;
377 5800 : move64();
378 23200 : FOR( i = 0; i < FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB; i++ )
379 : {
380 17400 : acc = W_mac_32_32( acc, high_diff_fx[i], high_diff_fx[i] );
381 : }
382 5800 : res24_e[c] = tmp_e;
383 5800 : move16();
384 5800 : tmp_e = W_norm( acc );
385 5800 : res24_fx = W_extract_h( W_shl( acc, tmp_e ) );
386 :
387 5800 : res24_e[c] = sub( shl( res24_e[c], 1 ), tmp_e );
388 5800 : move16();
389 :
390 5800 : dist_ptr_fx[c] = BASOP_Util_Add_Mant32Exp( dist_ptr_fx[c], *dist_ptr_e, L_negate( res24_fx ), res24_e[c], &res24_e[c] ); /* remove DCT24 high band error contribution */
391 5800 : move32();
392 5800 : dist_e = s_max( dist_e, res24_e[c] );
393 5800 : move16();
394 : }
395 :
396 :
397 6525 : FOR( c = 0; c < maxC_st1; c++ )
398 : {
399 5800 : dist_ptr_fx[c] = L_shl( dist_ptr_fx[c], sub( res24_e[c], dist_e ) );
400 5800 : move32();
401 : }
402 725 : *dist_ptr_e = dist_e;
403 725 : move16();
404 : /* finally update p_max, as it may potentially change,
405 : due to the core DCT24 search originally optimizing over the longer basis vectors than DCT21 */
406 725 : p_max_local = maximum_32_fx( dist_ptr_fx, maxC_st1, NULL );
407 :
408 725 : return p_max_local;
409 : }
410 :
411 :
412 : /*--------------------------------------------------------------------------*
413 : * depack_mul_values_fx()
414 : *
415 : *--------------------------------------------------------------------------*/
416 :
417 0 : static Word32 depack_mul_values_fx( Word16 *Tmp, const Word16 *w, const Word16 *cbp, const Word16 N )
418 : {
419 : Word16 i, val0, val1, val2, val3;
420 : Word32 en;
421 :
422 0 : en = 0;
423 0 : move32();
424 0 : FOR( i = 0; i < N; i += 4 )
425 : {
426 0 : depack_4_values( cbp + i_mult( shr( i, 2 ), 3 ), val0, val1, val2, val3 )
427 0 : Tmp[i + 0] = mult_r( shl_sat( w[i + 0], 2 ), val0 );
428 0 : move16();
429 0 : en = L_mac_sat( en, val0, Tmp[i + 0] );
430 0 : Tmp[i + 1] = mult_r( shl_sat( w[i + 1], 2 ), val1 );
431 0 : move16();
432 0 : en = L_mac_sat( en, val1, Tmp[i + 1] );
433 0 : Tmp[i + 2] = mult_r( shl_sat( w[i + 2], 2 ), val2 );
434 0 : move16();
435 0 : en = L_mac_sat( en, val2, Tmp[i + 2] );
436 0 : Tmp[i + 3] = mult_r( shl_sat( w[i + 3], 2 ), val3 );
437 0 : move16();
438 0 : en = L_mac_sat( en, val3, Tmp[i + 3] );
439 : }
440 :
441 0 : return en;
442 : }
443 :
444 :
445 : /*--------------------------------------------------------------------------*
446 : * depack_sub_values()
447 : *
448 : *--------------------------------------------------------------------------*/
449 :
450 558784 : static void depack_sub_values_fx( Word16 *pTmp, const Word16 *p1, const Word16 *cbp, const Word16 N )
451 : {
452 : Word16 j, val0, val1, val2, val3;
453 :
454 1955744 : FOR( j = 0; j < N; j += 4 )
455 : {
456 1396960 : depack_4_values( cbp + i_mult( 3, shr( j, 2 ) ), val0, val1, val2, val3 )
457 :
458 : /*pTmp[i] = (p1[i] - cbp[i]);*/
459 1396960 : pTmp[j + 0] = sub( p1[j + 0], val0 );
460 1396960 : move16(); /*3Q12*1.28*/
461 1396960 : pTmp[j + 1] = sub( p1[j + 1], val1 );
462 1396960 : move16(); /*3Q12*1.28*/
463 1396960 : pTmp[j + 2] = sub( p1[j + 2], val2 );
464 1396960 : move16(); /*3Q12*1.28*/
465 1396960 : pTmp[j + 3] = sub( p1[j + 3], val3 );
466 1396960 : move16(); /*3Q12*1.28*/
467 : }
468 558784 : }
469 :
470 :
471 1187416 : static Word64 depack_mul_values_fx64( Word32 *Tmp, const Word16 *w, const Word16 *cbp, const Word16 N )
472 : {
473 : Word16 i, val0, val1, val2, val3;
474 : Word64 en;
475 :
476 1187416 : en = 0;
477 1187416 : move32();
478 4889360 : FOR( i = 0; i < N; i += 4 )
479 : {
480 3701944 : depack_4_values( cbp + i_mult( shr( i, 2 ), 3 ), val0, val1, val2, val3 )
481 3701944 : Tmp[i + 0] = L_mult0( w[i + 0], val0 ); // Q8 * Q2.56
482 3701944 : move16();
483 3701944 : en = W_mac_32_16( en, Tmp[i + 0], val0 ); // Q8 * Q2.56 * 2.56 * Q1
484 3701944 : Tmp[i + 1] = L_mult0( w[i + 1], val1 );
485 3701944 : move16();
486 3701944 : en = W_mac_32_16( en, Tmp[i + 1], val1 );
487 3701944 : Tmp[i + 2] = L_mult0( w[i + 2], val2 );
488 3701944 : move16();
489 3701944 : en = W_mac_32_16( en, Tmp[i + 2], val2 );
490 3701944 : Tmp[i + 3] = L_mult0( w[i + 3], val3 );
491 3701944 : move16();
492 3701944 : en = W_mac_32_16( en, Tmp[i + 3], val3 );
493 : }
494 :
495 1187416 : return en; // Q8 * Q2.56 * 2.56 * Q1
496 : }
497 :
498 : /*--------------------------------------------------------------------------*
499 : * msvq_enc_find_p_max_8()
500 : *
501 : * Unroll of inner search loop for maxC == 8
502 : *--------------------------------------------------------------------------*/
503 :
504 0 : static Word16 msvq_enc_find_p_max_8_fx( Word32 dist[] )
505 : {
506 : Word16 p_max;
507 :
508 0 : p_max = 0;
509 0 : move16();
510 :
511 : BASOP_SATURATE_WARNING_OFF_EVS
512 0 : if ( GT_32( dist[1], dist[p_max] ) )
513 : {
514 0 : p_max = 1;
515 0 : move16();
516 : }
517 0 : if ( GT_32( dist[2], dist[p_max] ) )
518 : {
519 0 : p_max = 2;
520 0 : move16();
521 : }
522 0 : if ( GT_32( dist[3], dist[p_max] ) )
523 : {
524 0 : p_max = 3;
525 0 : move16();
526 : }
527 0 : if ( GT_32( dist[4], dist[p_max] ) )
528 : {
529 0 : p_max = 4;
530 0 : move16();
531 : }
532 0 : if ( GT_32( dist[5], dist[p_max] ) )
533 : {
534 0 : p_max = 5;
535 0 : move16();
536 : }
537 0 : if ( GT_32( dist[6], dist[p_max] ) )
538 : {
539 0 : p_max = 6;
540 0 : move16();
541 : }
542 0 : if ( GT_32( dist[7], dist[p_max] ) )
543 : {
544 0 : p_max = 7;
545 0 : move16();
546 : }
547 : BASOP_SATURATE_WARNING_ON_EVS
548 0 : return p_max;
549 : }
550 :
551 :
552 1484189 : static Word16 msvq_enc_find_p_max_8_fx64( Word64 dist[] )
553 : {
554 : Word16 p_max;
555 :
556 1484189 : p_max = 0;
557 1484189 : move16();
558 :
559 : BASOP_SATURATE_WARNING_OFF_EVS
560 1484189 : if ( GT_64( dist[1], dist[p_max] ) )
561 : {
562 704291 : p_max = 1;
563 704291 : move16();
564 : }
565 1484189 : if ( GT_64( dist[2], dist[p_max] ) )
566 : {
567 498072 : p_max = 2;
568 498072 : move16();
569 : }
570 1484189 : if ( GT_64( dist[3], dist[p_max] ) )
571 : {
572 395510 : p_max = 3;
573 395510 : move16();
574 : }
575 1484189 : if ( GT_64( dist[4], dist[p_max] ) )
576 : {
577 302317 : p_max = 4;
578 302317 : move16();
579 : }
580 1484189 : if ( GT_64( dist[5], dist[p_max] ) )
581 : {
582 238747 : p_max = 5;
583 238747 : move16();
584 : }
585 1484189 : if ( GT_64( dist[6], dist[p_max] ) )
586 : {
587 218491 : p_max = 6;
588 218491 : move16();
589 : }
590 1484189 : if ( GT_64( dist[7], dist[p_max] ) )
591 : {
592 186148 : p_max = 7;
593 186148 : move16();
594 : }
595 : BASOP_SATURATE_WARNING_ON_EVS
596 1484189 : return p_max;
597 : }
598 :
599 :
600 : /*--------------------------------------------------------------------------*
601 : * msvq_enc_find_p_max_6()
602 : *
603 : * Unroll of inner search loop for maxC == 6
604 : *--------------------------------------------------------------------------*/
605 :
606 0 : static Word16 msvq_enc_find_p_max_6_fx( Word32 dist[] )
607 : {
608 : Word16 p_max;
609 :
610 0 : p_max = 0;
611 0 : move16();
612 :
613 : BASOP_SATURATE_WARNING_OFF_EVS
614 0 : if ( GT_32( dist[1], dist[p_max] ) )
615 : {
616 0 : p_max = 1;
617 0 : move16();
618 : }
619 0 : if ( GT_32( dist[2], dist[p_max] ) )
620 : {
621 0 : p_max = 2;
622 0 : move16();
623 : }
624 0 : if ( GT_32( dist[3], dist[p_max] ) )
625 : {
626 0 : p_max = 3;
627 0 : move16();
628 : }
629 0 : if ( GT_32( dist[4], dist[p_max] ) )
630 : {
631 0 : p_max = 4;
632 0 : move16();
633 : }
634 0 : if ( GT_32( dist[5], dist[p_max] ) )
635 : {
636 0 : p_max = 5;
637 0 : move16();
638 : }
639 : BASOP_SATURATE_WARNING_ON_EVS
640 0 : return p_max;
641 : }
642 :
643 0 : static Word16 msvq_enc_find_p_max_6_fx64( Word64 dist[] )
644 : {
645 : Word16 p_max;
646 :
647 0 : p_max = 0;
648 0 : move16();
649 :
650 : BASOP_SATURATE_WARNING_OFF_EVS
651 0 : if ( GT_64( dist[1], dist[p_max] ) )
652 : {
653 0 : p_max = 1;
654 0 : move16();
655 : }
656 0 : if ( GT_64( dist[2], dist[p_max] ) )
657 : {
658 0 : p_max = 2;
659 0 : move16();
660 : }
661 0 : if ( GT_64( dist[3], dist[p_max] ) )
662 : {
663 0 : p_max = 3;
664 0 : move16();
665 : }
666 0 : if ( GT_64( dist[4], dist[p_max] ) )
667 : {
668 0 : p_max = 4;
669 0 : move16();
670 : }
671 0 : if ( GT_64( dist[5], dist[p_max] ) )
672 : {
673 0 : p_max = 5;
674 0 : move16();
675 : }
676 : BASOP_SATURATE_WARNING_ON_EVS
677 0 : return p_max;
678 : }
679 :
680 :
681 : /*--------------------------------------------------------------------------*
682 : * msvq_enc_fx()
683 : *
684 : * MSVQ encoder
685 : *--------------------------------------------------------------------------*/
686 :
687 0 : void msvq_enc_fx(
688 : const Word16 *const *cb, /* i : Codebook (indexed cb[*stages][levels][p]) (0Q15) */
689 : const Word16 dims[], /* i : Dimension of each codebook stage (NULL: full dim.) */
690 : const Word16 offs[], /* i : Starting dimension of each codebook stage (NULL: 0) */
691 : const Word16 u[], /* i : Vector to be encoded (prediction and mean removed)(3Q12) */
692 : const Word16 *levels, /* i : Number of levels in each stage */
693 : const Word16 maxC, /* i : Tree search size (number of candidates kept from */
694 : /* one stage to the next == M-best) */
695 : const Word16 stages, /* i : Number of stages */
696 : const Word16 w[], /* i : Weights Q8*/
697 : const Word16 N, /* i : Vector dimension */
698 : const Word16 maxN, /* i : Codebook dimension */
699 : Word16 Idx[] /* o : Indices */
700 : )
701 : {
702 : Word16 j;
703 : const Word16 *cbp;
704 : Word16 p2i;
705 : Word16 resid_buf[2 * LSFMBEST_MAX * M_MAX], *resid[2];
706 : Word16 *pTmp, *p1;
707 : Word16 *indices[2], m, s, c, c2, p_max, i, Tmp[M_MAX];
708 : Word16 idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX];
709 : Word32 dist_buf[2 * LSFMBEST_MAX], *dist[2], t1, tmp, en, ss2;
710 : Word16 ( *func_ptr )( Word32 * );
711 : Word16 N34;
712 : Word16 n, maxn, start;
713 :
714 : /*----------------------------------------------------------------*
715 : * Allocate memory for previous (parent) and current nodes.
716 : * Parent node is indexed [0], current node is indexed [1].
717 : *----------------------------------------------------------------*/
718 0 : indices[0] = idx_buf;
719 0 : indices[1] = idx_buf + maxC * stages; /*move16();*/
720 : /*vr_iset(0, idx_buf, 2*stages*maxC);*/
721 0 : set16_fx( idx_buf, 0, (Word16) ( 2 * stages * maxC ) );
722 :
723 0 : resid[0] = resid_buf;
724 0 : resid[1] = resid_buf + maxC * N; /*move16();*/
725 :
726 0 : dist[0] = dist_buf;
727 0 : dist[1] = dist_buf + maxC; /*move16();*/
728 :
729 : /*vr_iset(0, parents, maxC);*/
730 0 : set16_fx( parents, 0, maxC );
731 :
732 :
733 0 : func_ptr = msvq_enc_find_p_max_6_fx;
734 0 : move16();
735 0 : if ( EQ_16( maxC, 8 ) )
736 : {
737 0 : func_ptr = msvq_enc_find_p_max_8_fx;
738 0 : move16();
739 : }
740 :
741 : /*----------------------------------------------------------------*
742 : * LSF weights are normalized, so it is always better to multiply it first
743 : * Set up inital distance vector
744 : *----------------------------------------------------------------*/
745 : /* Q0/16 * Qw_norm/16 << 1 >> 16 => Qwnorm-15/16 * Q0/16 << 1 => Qwnorm-14/32 * 6.5536 */
746 0 : ss2 = L_mult( mult( u[0], shl( w[0], 2 ) ), u[0] );
747 0 : move16();
748 0 : FOR( j = 1; j < N; j++ )
749 : {
750 0 : ss2 = L_mac_sat( ss2, mult( u[j], shl_sat( w[j], 2 ) ), u[j] );
751 : }
752 :
753 : /* Set up inital error (residual) vectors */
754 0 : pTmp = resid[1]; /*move16();*/
755 0 : FOR( c = 0; c < maxC; c++ )
756 : {
757 0 : Copy( u, pTmp + c * N, N );
758 0 : dist[1][c] = ss2;
759 0 : move32();
760 : }
761 :
762 : /* Loop over all stages */
763 0 : m = 1;
764 0 : move16();
765 0 : FOR( s = 0; s < stages; s++ )
766 : {
767 : /* codebook pointer is set to point to first stage */
768 0 : cbp = cb[s]; /*3Q12*1.28*/
769 0 : move16();
770 :
771 : /* Set up pointers to parent and current nodes */
772 0 : swap( indices[0], indices[1], Word16 * );
773 0 : move16();
774 0 : move16();
775 0 : move16();
776 0 : swap( resid[0], resid[1], Word16 * );
777 0 : move16();
778 0 : move16();
779 0 : move16();
780 0 : swap( dist[0], dist[1], Word32 * );
781 0 : move32();
782 0 : move32();
783 0 : move32();
784 :
785 : /* p_max points to maximum distortion node (worst of best) */
786 0 : p_max = 0;
787 0 : move16();
788 :
789 0 : n = N;
790 0 : move16();
791 0 : maxn = maxN;
792 0 : move16();
793 0 : if ( dims )
794 : {
795 0 : n = dims[s];
796 0 : move16();
797 : }
798 0 : if ( dims )
799 : {
800 0 : maxn = n;
801 0 : move16();
802 : }
803 :
804 0 : assert( ( maxn % 4 ) == 0 );
805 0 : N34 = mult( maxn, 24576 /*0.75f Q15*/ );
806 :
807 0 : start = 0;
808 0 : move16();
809 0 : if ( offs )
810 : {
811 0 : start = offs[s];
812 0 : move16();
813 : }
814 :
815 0 : set16_fx( Tmp, 0, start );
816 0 : set16_fx( Tmp + start + n, 0, sub( N, add( start, n ) ) );
817 :
818 : /* Set distortions to a large value */
819 0 : FOR( j = 0; j < maxC; j++ )
820 : {
821 0 : dist[1][j] = MAXINT32;
822 0 : move32();
823 : }
824 :
825 0 : FOR( j = 0; j < levels[s]; j++ )
826 : {
827 : /* Compute weighted codebook element and its energy */
828 0 : en = depack_mul_values_fx( Tmp + start, w + start, cbp, n );
829 :
830 0 : cbp += N34; /* pointer is incremented */
831 :
832 : /* Iterate over all parent nodes */
833 0 : FOR( c = 0; c < m; c++ )
834 : {
835 0 : pTmp = &resid[0][c * N];
836 : /*tmp = (*pTmp++) * Tmp[0];*/
837 0 : t1 = L_mult( pTmp[0], Tmp[0] );
838 :
839 0 : FOR( i = 1; i < N; i++ )
840 : {
841 0 : t1 = L_mac( t1, pTmp[i], Tmp[i] );
842 : }
843 :
844 : BASOP_SATURATE_WARNING_OFF_EVS
845 : /*NOTE: as long as a shorter distance is found, saturation can be accepted.*/
846 0 : tmp = L_add_sat( dist[0][c], L_sub_sat( en, L_shl( t1, 1 ) ) );
847 0 : t1 = L_sub_sat( tmp, dist[1][p_max] );
848 : BASOP_SATURATE_WARNING_ON_EVS
849 :
850 0 : IF( t1 <= 0 )
851 : {
852 : /* Replace worst */
853 0 : dist[1][p_max] = tmp;
854 0 : move32();
855 0 : indices[1][p_max * stages + s] = j;
856 0 : move16();
857 0 : add( 0, 0 );
858 0 : mult( 0, 0 );
859 0 : parents[p_max] = c;
860 0 : move16();
861 :
862 0 : p_max = ( *func_ptr )( dist[1] );
863 :
864 : } /*IF (L_sub(tmp,dist[1][p_max]) < 0) */
865 : } /* FOR (c=0; c<m; c++) */
866 : } /* FOR (j=0; j<levels[s]; j++) */
867 :
868 : /*------------------------------------------------------------*
869 : * Compute error vectors for each node
870 : *------------------------------------------------------------*/
871 0 : pTmp = resid[1];
872 0 : FOR( c = 0; c < maxC; c++ )
873 : {
874 : /* Subtract codebook entry from residual vector of parent node and multiply with scale factor */
875 0 : p1 = resid[0] + parents[c] * N;
876 0 : p2i = indices[1][c * stages + s];
877 0 : move16();
878 :
879 0 : Copy( p1, pTmp, start );
880 0 : depack_sub_values_fx( pTmp + start, p1 + start, &cb[s][p2i * N34], n );
881 0 : Copy( p1 + start + n, pTmp + start + n, sub( N, add( start, n ) ) );
882 :
883 0 : pTmp += N;
884 :
885 : /* Get indices that were used for parent node */
886 : /*mvs2s(indices[0]+parents[c]*stages, indices[1]+c*stages, s);*/
887 0 : Copy( indices[0] + parents[c] * stages, indices[1] + c * stages, s );
888 : } /* for (c=0; c<maxC; c++) */
889 0 : m = maxC;
890 0 : move16();
891 : } /* for (m=1, s=0; s<stages; s++) */
892 :
893 : /* Find the optimum candidate */
894 0 : c2 = findIndexOfMinWord32( dist[1], maxC );
895 : /*mvi2i (indices[1]+c2*stages, Idx, stages);*/
896 0 : Copy( indices[1] + c2 * stages, Idx, stages );
897 :
898 :
899 0 : return;
900 : }
901 :
902 34924 : void msvq_enc_lsf_fx64(
903 : const Word16 *const *cb, /* i : Codebook (indexed cb[*stages][levels][p]) (10Q5 * 1.28) */
904 : const Word16 dims[], /* i : Dimension of each codebook stage (NULL: full dim.) */
905 : const Word16 offs[], /* i : Starting dimension of each codebook stage (NULL: 0) */
906 : const Word16 u[], /* i : Vector to be encoded (prediction and mean removed)(Q14Q1*1.28) */
907 : const Word16 *levels, /* i : Number of levels in each stage */
908 : const Word16 maxC, /* i : Tree search size (number of candidates kept from */
909 : /* one stage to the next == M-best) */
910 : const Word16 stages, /* i : Number of stages */
911 : const Word16 w[], /* i : Weights Q8*/
912 : const Word16 N, /* i : Vector dimension */
913 : const Word16 maxN, /* i : Codebook dimension */
914 : Word16 Idx[] /* o : Indices */
915 : )
916 : {
917 : Word16 j;
918 : const Word16 *cbp;
919 : Word16 p2i;
920 : Word16 resid_buf[2 * LSFMBEST_MAX * M_MAX], *resid[2];
921 : Word16 *pTmp, *p1;
922 : Word16 *indices[2], m, s, c, c2, p_max, i;
923 : Word32 Tmp32[M_MAX];
924 : Word16 idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX];
925 : Word64 *dist_64[2], en64, tmp64;
926 : Word64 dist_buf_64[2 * LSFMBEST_MAX];
927 : Word16 ( *func_ptr64 )( Word64 * );
928 : Word16 N34;
929 : Word16 n, maxn, start;
930 :
931 : /*----------------------------------------------------------------*
932 : * Allocate memory for previous (parent) and current nodes.
933 : * Parent node is indexed [0], current node is indexed [1].
934 : *----------------------------------------------------------------*/
935 34924 : indices[0] = idx_buf;
936 34924 : indices[1] = idx_buf + maxC * stages; /*move16();*/
937 : /*vr_iset(0, idx_buf, 2*stages*maxC);*/
938 34924 : set16_fx( idx_buf, 0, (Word16) ( 2 * stages * maxC ) );
939 :
940 34924 : resid[0] = resid_buf;
941 34924 : resid[1] = resid_buf + maxC * N; /*move16();*/
942 :
943 34924 : dist_64[0] = dist_buf_64;
944 34924 : dist_64[1] = dist_buf_64 + maxC; /*move16();*/
945 :
946 : /*vr_iset(0, parents, maxC);*/
947 34924 : set16_fx( parents, 0, maxC );
948 :
949 :
950 34924 : func_ptr64 = msvq_enc_find_p_max_6_fx64;
951 34924 : move16();
952 34924 : if ( EQ_16( maxC, 8 ) )
953 : {
954 34924 : func_ptr64 = msvq_enc_find_p_max_8_fx64;
955 34924 : move16();
956 : }
957 :
958 : /*----------------------------------------------------------------*
959 : * LSF weights are normalized, so it is always better to multiply it first
960 : * Set up inital distance vector
961 : *----------------------------------------------------------------*/
962 : /* Q0/16 * Qw_norm/16 << 1 >> 16 => Qwnorm-15/16 * Q0/16 << 1 => Qwnorm-14/32 * 6.5536 */
963 : Word64 ss2_64;
964 34924 : ss2_64 = W_mult_32_16( L_mult0( u[0], w[0] ), u[0] );
965 : // Q8 * Q2.56 * 2.56 * Q1
966 558784 : FOR( j = 1; j < N; j++ )
967 : {
968 523860 : ss2_64 = W_mac_32_16( ss2_64, L_mult0( u[j], w[j] ), u[j] );
969 : }
970 :
971 : /* Set up inital error (residual) vectors */
972 34924 : pTmp = resid[1]; /*move16();*/
973 314316 : FOR( c = 0; c < maxC; c++ )
974 : {
975 279392 : Copy( u, pTmp + c * N, N );
976 279392 : dist_64[1][c] = ss2_64;
977 279392 : move64();
978 : }
979 :
980 : /* Loop over all stages */
981 34924 : m = 1;
982 34924 : move16();
983 104772 : FOR( s = 0; s < stages; s++ )
984 : {
985 : /* codebook pointer is set to point to first stage */
986 69848 : cbp = cb[s]; /*3Q12*1.28*/
987 69848 : move16();
988 :
989 : /* Set up pointers to parent and current nodes */
990 69848 : swap( indices[0], indices[1], Word16 * );
991 69848 : move16();
992 69848 : move16();
993 69848 : move16();
994 69848 : move16();
995 69848 : swap( resid[0], resid[1], Word16 * );
996 69848 : move16();
997 69848 : move16();
998 69848 : move16();
999 69848 : swap( dist_64[0], dist_64[1], Word64 * );
1000 69848 : move64();
1001 69848 : move64();
1002 69848 : move64();
1003 :
1004 : /* p_max points to maximum distortion node (worst of best) */
1005 69848 : p_max = 0;
1006 69848 : move16();
1007 :
1008 69848 : n = N;
1009 69848 : move16();
1010 69848 : maxn = maxN;
1011 69848 : move16();
1012 69848 : if ( dims )
1013 : {
1014 69848 : n = dims[s];
1015 69848 : move16();
1016 : }
1017 69848 : if ( dims )
1018 : {
1019 69848 : maxn = n;
1020 69848 : move16();
1021 : }
1022 :
1023 69848 : assert( ( maxn % 4 ) == 0 );
1024 69848 : N34 = mult( maxn, 24576 /*0.75f Q15*/ );
1025 :
1026 69848 : start = 0;
1027 69848 : move16();
1028 69848 : if ( offs )
1029 : {
1030 69848 : start = offs[s];
1031 69848 : move16();
1032 : }
1033 :
1034 69848 : set32_fx( Tmp32, 0, start );
1035 69848 : set32_fx( Tmp32 + start + n, 0, sub( N, add( start, n ) ) );
1036 :
1037 : /* Set distortions to a large value */
1038 628632 : FOR( j = 0; j < maxC; j++ )
1039 : {
1040 558784 : dist_64[1][j] = LLONG_MAX;
1041 558784 : move64();
1042 : }
1043 :
1044 1257264 : FOR( j = 0; j < levels[s]; j++ )
1045 : {
1046 : /* Compute weighted codebook element and its energy */
1047 1187416 : en64 = depack_mul_values_fx64( Tmp32 + start, w + start, cbp, n ); // Q8
1048 : // en64: Q8 * Q2.56 * Q2.56 * q1
1049 : // Tmp: 2.56 * Q8
1050 :
1051 1187416 : cbp += N34; /* pointer is incremented */
1052 :
1053 : /* Iterate over all parent nodes */
1054 6286320 : FOR( c = 0; c < m; c++ )
1055 : {
1056 5098904 : pTmp = &resid[0][c * N]; // this resid buffer is initial lsf values
1057 : /*tmp = (*pTmp++) * Tmp[0];*/
1058 5098904 : Word64 t164 = 0;
1059 5098904 : move64();
1060 5098904 : t164 = W_mult_32_16( Tmp32[0], pTmp[0] ); // 2.56 * Q8 * Q2.56 * Q1
1061 : // Tmp32: Q8 * Q2.56
1062 81582464 : FOR( i = 1; i < N; i++ )
1063 : {
1064 76483560 : t164 = W_mac_32_16( t164, Tmp32[i], pTmp[i] ); // 2.56 * Q8 * Q2.56 * Q1
1065 : }
1066 :
1067 5098904 : tmp64 = W_add( dist_64[0][c], W_sub( en64, W_shl( t164, 1 ) ) );
1068 5098904 : t164 = W_sub( tmp64, dist_64[1][p_max] );
1069 5098904 : IF( t164 <= 0 )
1070 : {
1071 : /* Replace worst */
1072 1484189 : dist_64[1][p_max] = tmp64;
1073 1484189 : move64();
1074 1484189 : indices[1][p_max * stages + s] = j;
1075 1484189 : move16();
1076 1484189 : parents[p_max] = c;
1077 1484189 : move16();
1078 :
1079 1484189 : p_max = ( *func_ptr64 )( dist_64[1] );
1080 :
1081 : } /*IF (L_sub(tmp,dist[1][p_max]) < 0) */
1082 : } /* FOR (c=0; c<m; c++) */
1083 : } /* FOR (j=0; j<levels[s]; j++) */
1084 :
1085 : /*------------------------------------------------------------*
1086 : * Compute error vectors for each node
1087 : *------------------------------------------------------------*/
1088 69848 : pTmp = resid[1];
1089 628632 : FOR( c = 0; c < maxC; c++ )
1090 : {
1091 : /* Subtract codebook entry from residual vector of parent node and multiply with scale factor */
1092 558784 : p1 = resid[0] + parents[c] * N;
1093 558784 : p2i = indices[1][c * stages + s];
1094 558784 : move16();
1095 :
1096 558784 : Copy( p1, pTmp, start );
1097 558784 : depack_sub_values_fx( pTmp + start, p1 + start, &cb[s][p2i * N34], n );
1098 558784 : Copy( p1 + start + n, pTmp + start + n, sub( N, add( start, n ) ) );
1099 :
1100 558784 : pTmp += N;
1101 :
1102 : /* Get indices that were used for parent node */
1103 : /*mvs2s(indices[0]+parents[c]*stages, indices[1]+c*stages, s);*/
1104 558784 : Copy( indices[0] + parents[c] * stages, indices[1] + c * stages, s );
1105 : } /* for (c=0; c<maxC; c++) */
1106 69848 : m = maxC;
1107 69848 : move16();
1108 : } /* for (m=1, s=0; s<stages; s++) */
1109 :
1110 : /* Find the optimum candidate */
1111 34924 : c2 = findIndexOfMinWord64( dist_64[1], maxC );
1112 : /*mvi2i (indices[1]+c2*stages, Idx, stages);*/
1113 34924 : Copy( indices[1] + c2 * stages, Idx, stages );
1114 :
1115 :
1116 34924 : return;
1117 : }
1118 : /*--------------------------------------------------------------------------*
1119 : * msvq_enc_ivas_fx()
1120 : *
1121 : * MSVQ encoder
1122 : *--------------------------------------------------------------------------*/
1123 :
1124 92353 : void msvq_enc_ivas_fx(
1125 : const Word16 *const *cb, /* i : Codebook (indexed cb[*stages][levels][p]) Q_cb */
1126 : const Word16 Q_cb, /* i : Codebook Q */
1127 : const Word16 dims[], /* i : Dimension of each codebook stage (NULL: full dim.) */
1128 : const Word16 offs[], /* i : Starting dimension of each codebook stage (NULL: 0) */
1129 : const Word32 u_fx[], /* i : Vector to be encoded (prediction and mean removed) (exp : u_e) */
1130 : const Word16 u_e, /* i : Exponent for Vector to be encoded */
1131 : const Word16 *levels, /* i : Number of levels in each stage */
1132 : const Word16 maxC, /* i : Tree search size (number of candidates kept from from one stage to the next == M-best) */
1133 : const Word16 stages, /* i : Number of stages */
1134 : const Word16 w[], /* i : Weights Q8 */
1135 : const Word16 N, /* i : Vector dimension */
1136 : const Word16 maxN, /* i : Codebook dimension */
1137 : const Word16 applyDCT_flag, /* i : applyDCT flag */
1138 : Word32 *invTrfMatrix_fx, /* i/o: synthesis matrix Q31 */
1139 : Word16 Idx[] /* o : Indices */
1140 : )
1141 : {
1142 : Word16 j;
1143 : const Word16 *cbp, *cb_stage;
1144 : Word32 resid_buf_fx[2 * LSFMBEST_MAX * M_MAX], *resid_fx[2];
1145 : Word32 *pTmp, *p1, *p2; // pTmp_e
1146 : Word16 pTmp_e;
1147 : Word16 *indices[2], m, s, c, c2, p_max, i;
1148 : Word16 idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX];
1149 : Word32 dist_buf_fx[2 * LSFMBEST_MAX], *dist_fx[2], tmp, en, ss2, Tmp[M_MAX];
1150 : Word16 dist_buf_e[2 * LSFMBEST_MAX], *dist_e[2];
1151 : Word16 tmp_e, tmp_n, en_e;
1152 : Word16 resid_e;
1153 : Word16 n, maxn, start;
1154 : Word64 W_acc; /*64 bit accumulator*/
1155 :
1156 92353 : Word32 *st1_syn_vec_ptr_fx = &( resid_buf_fx[1 * LSFMBEST_MAX * M_MAX] ) - FDCNG_VQ_MAX_LEN * maxC;
1157 92353 : Word32 *st1_mse_ptr_fx = &( resid_buf_fx[1 * LSFMBEST_MAX * M_MAX] ) - ( levels[0] );
1158 : Word16 indices_st1_local[FDCNG_VQ_DCT_NSEGM * 2];
1159 :
1160 : /*----------------------------------------------------------------*
1161 : * Allocate memory for previous (parent) and current nodes.
1162 : * Parent node is indexed [0], current node is indexed [1].
1163 : *----------------------------------------------------------------*/
1164 92353 : indices[0] = idx_buf;
1165 92353 : indices[1] = idx_buf + maxC * stages; /*move16();*/
1166 : /*vr_iset(0, idx_buf, 2*stages*maxC);*/
1167 92353 : set16_fx( idx_buf, 0, (Word16) ( 2 * stages * maxC ) );
1168 :
1169 92353 : resid_fx[0] = resid_buf_fx;
1170 92353 : resid_fx[1] = resid_buf_fx + maxC * N; /*move16();*/
1171 :
1172 92353 : dist_fx[0] = dist_buf_fx;
1173 92353 : dist_e[0] = dist_buf_e;
1174 92353 : dist_fx[1] = dist_buf_fx + maxC;
1175 92353 : dist_e[1] = dist_buf_e + maxC;
1176 :
1177 : /*vr_iset(0, parents, maxC);*/
1178 92353 : set16_fx( parents, 0, maxC );
1179 :
1180 : /*----------------------------------------------------------------*
1181 : * LSF weights are normalized, so it is always better to multiply it first
1182 : * Set up inital distance vector
1183 : *----------------------------------------------------------------*/
1184 92353 : W_acc = W_mult_32_32( Mpy_32_16_1( u_fx[0], shl( w[0], 2 ) ), u_fx[0] ); // 2*Qu - 6 + 1
1185 1497409 : FOR( j = 1; j < N; j++ )
1186 : {
1187 1405056 : W_acc = W_mac_32_32( W_acc, Mpy_32_16_1( u_fx[j], shl( w[j], 2 ) ), u_fx[j] ); // 2*Qu - 6 + 1
1188 : }
1189 :
1190 92353 : tmp_n = W_norm( W_acc );
1191 92353 : ss2 = W_extract_h( W_shl( W_acc, tmp_n ) );
1192 92353 : tmp_e = sub( add( shl( u_e, 1 ), 5 ), tmp_n );
1193 :
1194 : /* Set up inital error (residual) vectors */
1195 92353 : pTmp = resid_fx[1]; /*move16();*/
1196 92353 : resid_e = u_e;
1197 92353 : move16();
1198 92353 : IF( applyDCT_flag != 0 )
1199 : {
1200 2742 : resid_e = s_max( u_e, 12 );
1201 : }
1202 383122 : FOR( c = 0; c < maxC; c++ )
1203 : {
1204 290769 : Copy32( u_fx, pTmp + c * N, N );
1205 290769 : test();
1206 290769 : IF( applyDCT_flag != 0 && LT_16( u_e, 12 ) )
1207 : {
1208 21448 : scale_sig32( pTmp + c * N, N, sub( u_e, resid_e ) );
1209 : }
1210 290769 : dist_fx[1][c] = ss2;
1211 290769 : move32();
1212 290769 : dist_e[1][c] = tmp_e;
1213 290769 : move16();
1214 : }
1215 :
1216 : /* Loop over all stages */
1217 92353 : m = 1;
1218 92353 : move16();
1219 390763 : FOR( s = 0; s < stages; s++ )
1220 : {
1221 : /* codebook pointer is set to point to first stage */
1222 298410 : cbp = cb[s]; /*Q_cb*/
1223 298410 : cb_stage = cbp;
1224 :
1225 : /* Set up pointers to parent and current nodes */
1226 298410 : swap( indices[0], indices[1], Word16 * );
1227 298410 : move16();
1228 298410 : move16();
1229 298410 : move16();
1230 298410 : swap( resid_fx[0], resid_fx[1], Word32 * );
1231 298410 : move32();
1232 298410 : move32();
1233 298410 : move32();
1234 298410 : swap( dist_fx[0], dist_fx[1], Word32 * );
1235 298410 : swap( dist_e[0], dist_e[1], Word16 * );
1236 298410 : move32();
1237 298410 : move32();
1238 298410 : move32();
1239 298410 : move16();
1240 298410 : move16();
1241 298410 : move16();
1242 :
1243 : /* p_max points to maximum distortion node (worst of best) */
1244 298410 : p_max = 0;
1245 298410 : move16();
1246 :
1247 298410 : n = N;
1248 298410 : move16();
1249 298410 : maxn = maxN;
1250 298410 : move16();
1251 298410 : if ( dims )
1252 : {
1253 0 : n = dims[s];
1254 0 : move16();
1255 : }
1256 298410 : if ( dims )
1257 : {
1258 0 : maxn = n;
1259 0 : move16();
1260 : }
1261 :
1262 298410 : assert( ( maxn % 4 ) == 0 );
1263 :
1264 298410 : start = 0;
1265 298410 : move16();
1266 298410 : if ( offs )
1267 : {
1268 0 : start = offs[s];
1269 0 : move16();
1270 : }
1271 :
1272 298410 : set32_fx( Tmp, 0, start );
1273 298410 : set32_fx( Tmp + start + n, 0, sub( N, add( start, n ) ) );
1274 :
1275 : /* Set distortions to a large value */
1276 1271740 : FOR( j = 0; j < maxC; j++ )
1277 : {
1278 973330 : dist_fx[1][j] = MAX_32;
1279 973330 : move32();
1280 973330 : dist_e[1][j] = MAX_16 / 2;
1281 973330 : move16();
1282 : }
1283 :
1284 298410 : test();
1285 298410 : IF( !s && applyDCT_flag != 0 ) /* means: m==1 */
1286 : {
1287 : /* stage 1 candidates search in truncated dct24 domain without any weights */
1288 2742 : assert( N == FDCNG_VQ_MAX_LEN || N == FDCNG_VQ_MAX_LEN_WB ); /* 21 and 24 allowed */
1289 2742 : assert( maxC == 2 * FDCNG_VQ_DCT_NSEGM );
1290 2742 : p_max = msvq_stage1_dct_search_fx( u_fx, u_e, FDCNG_VQ_MAX_LEN, maxC, DCT_T2_24_XX, FDCNG_VQ_DCT_MAXTRUNC, (Word32 *) invTrfMatrix_fx, cdk1r_tr_midQ_truncQ_fx, fdcng_dct_scaleF_fx, FDCNG_VQ_DCT_NSEGM,
1291 : cdk1_ivas_cols_per_segment, cdk1_ivas_trunc_dct_cols_per_segment, cdk1_ivas_entries_per_segment, cdk1_ivas_cum_entries_per_segment, cdk_37bits_ivas_stage1_W8Qx_dct_sections,
1292 : stage1_dct_col_syn_shift, cdk1_ivas_segm_neighbour_fwd, cdk1_ivas_segm_neighbour_rev, FDCNG_VQ_DCT_NPOST, st1_mse_ptr_fx, indices_st1_local, st1_syn_vec_ptr_fx, dist_fx[1], &dist_e[1][0] );
1293 :
1294 : /* move established stage#1 indices to the global MSVQ list structure */
1295 2742 : set16_fx( dist_e[1], dist_e[1][0], maxC );
1296 24678 : FOR( c = 0; c < maxC; c++ )
1297 : {
1298 21936 : indices[1][c * stages] = indices_st1_local[c];
1299 21936 : move16();
1300 : }
1301 : }
1302 : ELSE
1303 : {
1304 16782316 : FOR( j = 0; j < levels[s]; j++ )
1305 : {
1306 : /* Compute weighted codebook element and its energy */
1307 16486648 : en = 0;
1308 16486648 : move32();
1309 16486648 : en_e = 0;
1310 16486648 : move16();
1311 16486648 : W_acc = 0;
1312 16486648 : move64();
1313 286233912 : FOR( c2 = 0; c2 < n; c2++ )
1314 : {
1315 269747264 : Tmp[start + c2] = L_mult0( shl( w[start + c2], 2 ), cbp[c2] );
1316 269747264 : move32();
1317 269747264 : W_acc = W_mac_32_16( W_acc, Tmp[start + c2], cbp[c2] );
1318 : }
1319 :
1320 16486648 : tmp_n = W_norm( W_acc );
1321 :
1322 16486648 : en = W_extract_h( W_shl( W_acc, tmp_n ) );
1323 16486648 : en_e = sub( sub( 52, shl( Q_cb, 1 ) ), tmp_n );
1324 :
1325 16486648 : cbp += maxn; /* pointer is incremented */
1326 :
1327 : /* Iterate over all parent nodes */
1328 54116128 : FOR( c = 0; c < m; c++ )
1329 : {
1330 37629480 : pTmp = &resid_fx[0][c * N];
1331 37629480 : pTmp_e = resid_e;
1332 37629480 : move16();
1333 : /*tmp = (*pTmp++) * Tmp[0];*/
1334 37629480 : W_acc = W_mult_32_32( pTmp[0], Tmp[0] );
1335 :
1336 649758848 : FOR( i = 1; i < N; i++ )
1337 : {
1338 612129368 : W_acc = W_mac_32_32( W_acc, pTmp[i], Tmp[i] );
1339 : }
1340 37629480 : tmp_n = W_norm( W_acc );
1341 37629480 : tmp = W_extract_h( W_shl( W_acc, tmp_n ) );
1342 37629480 : tmp_e = sub( add( pTmp_e, sub( Q31 - Q10, Q_cb ) ), tmp_n );
1343 :
1344 :
1345 37629480 : tmp_n = s_max( tmp_e, en_e );
1346 37629480 : tmp_n = s_max( dist_e[0][c], tmp_n );
1347 :
1348 37629480 : IF( NE_16( dist_e[0][c], MAX_16 / 2 ) )
1349 : {
1350 37629480 : tmp_n = add( tmp_n, 2 );
1351 37629480 : tmp = L_sub( L_shl( en, sub( en_e, tmp_n ) ), L_shl( tmp, add( sub( tmp_e, tmp_n ), 1 ) ) );
1352 37629480 : tmp = L_add( tmp, L_shl( dist_fx[0][c], sub( dist_e[0][c], tmp_n ) ) );
1353 : }
1354 : ELSE
1355 : {
1356 0 : tmp = MAX_32 - 1;
1357 0 : move32();
1358 0 : tmp_n = MAX_16 / 2;
1359 0 : move32();
1360 : }
1361 :
1362 37629480 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( tmp, tmp_n, dist_fx[1][p_max], dist_e[1][p_max] ), -1 ) )
1363 : {
1364 : /* Replace worst */
1365 4479936 : dist_fx[1][p_max] = tmp;
1366 4479936 : move32();
1367 4479936 : dist_e[1][p_max] = tmp_n;
1368 4479936 : move16();
1369 4479936 : indices[1][p_max * stages + s] = j;
1370 4479936 : move16();
1371 4479936 : parents[p_max] = c;
1372 4479936 : move16();
1373 :
1374 4479936 : p_max = 0;
1375 4479936 : move16();
1376 4479936 : tmp_e = p_max;
1377 4479936 : move16();
1378 :
1379 4479936 : tmp_n = dist_e[1][0];
1380 4479936 : move16();
1381 16306603 : FOR( c2 = 1; c2 < maxC; c2++ )
1382 : {
1383 11826667 : if ( EQ_16( BASOP_Util_Cmp_Mant32Exp( dist_fx[1][c2], dist_e[1][c2], dist_fx[1][p_max], dist_e[1][p_max] ), 1 ) )
1384 : {
1385 4474090 : p_max = c2;
1386 4474090 : move16();
1387 : }
1388 11826667 : test();
1389 11826667 : if ( GT_16( dist_e[1][c2], tmp_n ) && NE_16( dist_e[1][c2], MAX_16 / 2 ) )
1390 : {
1391 203092 : tmp_n = dist_e[1][c2];
1392 203092 : move16();
1393 : }
1394 : }
1395 20786539 : FOR( c2 = 0; c2 < maxC; c2++ )
1396 : {
1397 16306603 : IF( NE_16( dist_e[1][c2], MAX_16 / 2 ) )
1398 : {
1399 15097649 : dist_fx[1][c2] = L_shl( dist_fx[1][c2], sub( dist_e[1][c2], tmp_n ) );
1400 15097649 : move32();
1401 15097649 : dist_e[1][c2] = tmp_n;
1402 15097649 : move16();
1403 : }
1404 : }
1405 : } /*IF (L_sub(tmp,dist[1][p_max]) < 0) */
1406 : } /* FOR (c=0; c<m; c++) */
1407 : } /* FOR (j=0; j<levels[s]; j++) */
1408 : }
1409 :
1410 :
1411 : /*------------------------------------------------------------*
1412 : * Compute error vectors for each node
1413 : *------------------------------------------------------------*/
1414 298410 : pTmp = resid_fx[1];
1415 1271740 : FOR( c = 0; c < maxC; c++ )
1416 : {
1417 :
1418 : /* Subtract codebook entry from residual vector of parent node and multiply with scale factor */
1419 973330 : p1 = resid_fx[0] + parents[c] * N;
1420 973330 : p2 = NULL;
1421 973330 : IF( cb_stage != NULL )
1422 : {
1423 : // p2 = cb_stage + ( indices[1][c * stages + s] ) * maxn; /* regular ptr init */
1424 951394 : Copy_Scale_sig_16_32_DEPREC( cb_stage + ( indices[1][c * stages + s] ) * maxn, Tmp, N, 0 );
1425 951394 : scale_sig32( Tmp, N, sub( sub( Q31, Q_cb ), resid_e ) );
1426 951394 : p2 = Tmp;
1427 : }
1428 973330 : test();
1429 973330 : IF( s == 0 && applyDCT_flag != 0 )
1430 : {
1431 21936 : p2 = (Word32 *) &( st1_syn_vec_ptr_fx[c * FDCNG_VQ_MAX_LEN] ); /*ptr init of stage 1 */
1432 : }
1433 :
1434 973330 : Copy32( p1, pTmp, start );
1435 17449810 : FOR( j = 0; j < n; j++ )
1436 : {
1437 16476480 : pTmp[start + j] = L_sub( p1[start + j], p2[j] );
1438 16476480 : move32();
1439 : }
1440 973330 : Copy32( p1 + start + n, pTmp + start + n, sub( N, add( start, n ) ) );
1441 :
1442 973330 : pTmp += N;
1443 :
1444 : /* Get indices that were used for parent node */
1445 : /*mvs2s(indices[0]+parents[c]*stages, indices[1]+c*stages, s);*/
1446 973330 : Copy( indices[0] + parents[c] * stages, indices[1] + c * stages, s );
1447 : } /* for (c=0; c<maxC; c++) */
1448 : /* recalc MSE for WB(0..20) coeffs ,
1449 : essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search,
1450 : excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep WB MSEs update for the subsequent stages
1451 : */
1452 : /* recalc MSE for WB(0..20) coeffs ,
1453 : essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search,
1454 : excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep WB MSEs update for the subsequent stages
1455 : */
1456 298410 : test();
1457 298410 : IF( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB )
1458 : {
1459 725 : p_max = msvq_stage1_dct_recalc_candidates_fdcng_wb_fx( st1_syn_vec_ptr_fx, resid_e, u_fx, u_e, maxC, dist_fx[1], &dist_e[1][0] );
1460 725 : set16_fx( dist_e[1], dist_e[1][0], maxC );
1461 : }
1462 298410 : m = maxC;
1463 298410 : move16();
1464 : } /* for (m=1, s=0; s<stages; s++) */
1465 :
1466 : /* Find the optimum candidate */
1467 92353 : c2 = minimum_32_fx( dist_fx[1], maxC, NULL );
1468 : /*mvi2i (indices[1]+c2*stages, Idx, stages);*/
1469 92353 : Copy( indices[1] + c2 * stages, Idx, stages );
1470 :
1471 :
1472 92353 : return;
1473 : }
1474 :
1475 :
1476 : /*--------------------------------------------------------------------------*
1477 : * lsf_msvq_ma_encprm_fx()
1478 : *
1479 : *
1480 : *--------------------------------------------------------------------------*/
1481 :
1482 979 : Word16 lsf_msvq_ma_encprm_fx(
1483 : BSTR_ENC_HANDLE hBstr,
1484 : Word16 *param_lpc, // Q0
1485 : Word16 core,
1486 : Word16 acelp_mode,
1487 : Word16 acelp_midLpc,
1488 : Word16 *bits_param_lpc,
1489 : Word16 no_indices )
1490 : {
1491 : Word16 i, nbits_lpc;
1492 : Word16 bits_midlpc;
1493 :
1494 979 : bits_midlpc = MIDLSF_NBITS;
1495 979 : move16();
1496 979 : nbits_lpc = 0;
1497 979 : move16();
1498 :
1499 4001 : FOR( i = 0; i < no_indices; i++ )
1500 : {
1501 :
1502 3022 : push_next_indice( hBstr, *param_lpc, bits_param_lpc[i] );
1503 3022 : param_lpc++;
1504 3022 : nbits_lpc = add( nbits_lpc, bits_param_lpc[i] );
1505 : }
1506 979 : IF( NE_16( acelp_mode, VOICED ) )
1507 : {
1508 731 : test();
1509 731 : IF( ( core == ACELP_CORE ) && acelp_midLpc )
1510 : {
1511 :
1512 342 : push_next_indice( hBstr, *param_lpc, bits_midlpc );
1513 342 : nbits_lpc = add( nbits_lpc, bits_midlpc );
1514 : }
1515 : }
1516 :
1517 979 : return nbits_lpc;
1518 : }
1519 :
1520 :
1521 111960 : Word16 lsf_msvq_ma_encprm_ivas_fx(
1522 : BSTR_ENC_HANDLE hBstr,
1523 : const Word16 *param_lpc, // Q0
1524 : const Word16 core,
1525 : const Word16 acelp_mode,
1526 : const Word16 acelp_midLpc,
1527 : const Word16 *bits_param_lpc,
1528 : const Word16 no_indices )
1529 : {
1530 : Word16 i, nbits_lpc;
1531 : Word16 bits_midlpc;
1532 :
1533 111960 : bits_midlpc = MIDLSF_NBITS;
1534 111960 : move16();
1535 111960 : nbits_lpc = 0;
1536 111960 : move16();
1537 :
1538 460452 : FOR( i = 0; i < no_indices; i++ )
1539 : {
1540 :
1541 348492 : push_next_indice( hBstr, *param_lpc, bits_param_lpc[i] );
1542 348492 : param_lpc++;
1543 348492 : nbits_lpc = add( nbits_lpc, bits_param_lpc[i] );
1544 : }
1545 111960 : IF( NE_16( acelp_mode, VOICED ) )
1546 : {
1547 88256 : test();
1548 88256 : IF( ( core == ACELP_CORE ) && acelp_midLpc )
1549 : {
1550 :
1551 0 : push_next_indice( hBstr, *param_lpc, bits_midlpc );
1552 0 : nbits_lpc = add( nbits_lpc, bits_midlpc );
1553 : }
1554 : }
1555 :
1556 111960 : return nbits_lpc;
1557 : }
1558 :
1559 :
1560 : /*--------------------------------------------------------------------------*
1561 : * midlsf_enc_fx()
1562 : *
1563 : *
1564 : *--------------------------------------------------------------------------*/
1565 :
1566 717 : void midlsf_enc_fx(
1567 : const Word16 qlsf0[], /* i: quantized lsf coefficients (3Q12) */
1568 : const Word16 qlsf1[], /* i: quantized lsf coefficients (3Q12) */
1569 : const Word16 lsf[], /* i: lsf coefficients (3Q12) */
1570 : Word16 *idx, /* o: codebook index */
1571 : const Word16 lpcorder, /* i: order of the lpc */
1572 : const Word32 *Bin_Ener_128_fx, // Q_ener
1573 : const Word16 Q_ener,
1574 : const Word8 narrowBand,
1575 : const Word32 sr_core,
1576 : const Word16 coder_type )
1577 : {
1578 : Word32 err, err_min, L_tmp;
1579 : Word16 k, k1, j, tmp, size, qlsf[M], wghts[M];
1580 : const Word16 *ratio;
1581 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
1582 717 : Flag Overflow = 0;
1583 : #endif
1584 :
1585 717 : IF( EQ_16( coder_type, UNVOICED ) )
1586 : {
1587 27 : ratio = tbl_mid_unv_wb_5b_fx;
1588 : }
1589 : ELSE
1590 : {
1591 690 : ratio = tbl_mid_gen_wb_5b_fx;
1592 : }
1593 717 : size = 32;
1594 717 : move16();
1595 :
1596 : /* Weights */
1597 717 : Unified_weighting_fx(
1598 : Bin_Ener_128_fx, /* i : FFT Bin energy 128 bins in two sets Q_ener */
1599 : Q_ener,
1600 : lsf, /* i : LSF vector x2.56 */
1601 : wghts, /* o : LP weighting filter (numerator) Q8 */
1602 : narrowBand, /* i : flag for Narrowband */
1603 717 : sub( coder_type, UNVOICED ) == 0, /* i : flag for Unvoiced frame */
1604 : sr_core, /* i : sampling rate of core-coder */
1605 : lpcorder /* i : LP order */
1606 : );
1607 717 : err_min = MAXINT32;
1608 717 : move16();
1609 717 : *idx = 0;
1610 717 : move16();
1611 717 : k1 = 0;
1612 717 : move16();
1613 23661 : FOR( k = 0; k < size; k++ )
1614 : {
1615 22944 : err = L_deposit_l( 0 );
1616 :
1617 390048 : FOR( j = 0; j < M; j++ )
1618 : {
1619 : /* qlsf[j] = (1.0f - ratio[k*M+j]) * qlsf0[j] + ratio[k*M+j] * qlsf1[j]; */
1620 367104 : L_tmp = L_mult( sub( 0x2000, ratio[k1 + j] ), qlsf0[j] );
1621 367104 : L_tmp = L_mac( L_tmp, ratio[k1 + j], qlsf1[j] );
1622 367104 : qlsf[j] = round_fx( L_shl( L_tmp, 2 ) );
1623 367104 : test();
1624 367104 : test();
1625 367104 : IF( j > 0 && LT_16( j, M ) && LT_16( qlsf[j], add( qlsf[j - 1], LSF_GAP_MID_FX ) ) )
1626 : {
1627 2048 : qlsf[j] = add( qlsf[j - 1], LSF_GAP_MID_FX );
1628 : }
1629 :
1630 367104 : tmp = sub( lsf[j], qlsf[j] );
1631 : /* err += wghts[j] * ftemp * ftemp; */
1632 : /* tmp is usually very small, we can have some extra precision with very rare saturation */
1633 367104 : tmp = shl_o( tmp, 4, &Overflow );
1634 367104 : tmp = mult_ro( tmp, tmp, &Overflow );
1635 367104 : err = L_mac( err, tmp, wghts[j] );
1636 : }
1637 22944 : err = L_shl_o( err, 2, &Overflow );
1638 :
1639 : /* err = L_shl(err,Wscale); */
1640 22944 : err = Mult_32_16( err, LSF_1_OVER_256SQ );
1641 : /* err = Mult_32_16(err,Wmult); */
1642 :
1643 22944 : IF( LT_32( err, err_min ) )
1644 : {
1645 2921 : err_min = L_add( err, 0 );
1646 2921 : *idx = k;
1647 2921 : move16();
1648 : }
1649 22944 : k1 += M;
1650 22944 : move16();
1651 : }
1652 :
1653 717 : return;
1654 : }
1655 :
1656 :
1657 : /*--------------------------------------------------------------------------*
1658 : * Q_lsf_tcxlpc_fx()
1659 : *
1660 : * Returns: number of indices
1661 : *--------------------------------------------------------------------------*/
1662 :
1663 0 : Word16 Q_lsf_tcxlpc_fx(
1664 : /* const */ Word16 lsf[], /* i : original lsf 14Q1 * 1.28 */
1665 : Word16 lsf_q[], /* o : quantized lsf (14Q1*1.28)*/
1666 : Word16 lsp_q_ind[], /* o : quantized lsp (w/o MA prediction) */
1667 : Word16 indices[], /* o : VQ indices */
1668 : const Word16 lpcorder, /* i : LPC order */
1669 : const Word16 narrowband, /* i : narrowband flag */
1670 : const Word16 cdk, /* i : codebook selector */
1671 : const Word16 mem_MA[], /* i : MA memory */
1672 : const Word16 coder_type,
1673 : const Word32 *Bin_Ener, // Q_ener
1674 : const Word16 Q_ener )
1675 : {
1676 : Word16 weights[M + 1];
1677 : Word16 pred[M16k];
1678 : Word16 i;
1679 : Word16 NumIndices;
1680 : Word16 lsf_q_ind[M16k];
1681 : const Word16 *means;
1682 : Word16 lsf_rem[M];
1683 : Word16 lsf_rem_q_ind[M];
1684 :
1685 0 : Unified_weighting_fx( Bin_Ener, Q_ener, lsf, weights, narrowband, (Word16) EQ_16( coder_type, UNVOICED ), 12800, M );
1686 :
1687 0 : move16();
1688 0 : NumIndices = 0;
1689 :
1690 : /* Put disabled flag */
1691 0 : indices[NumIndices] = 0;
1692 0 : move16();
1693 0 : NumIndices = add( NumIndices, 1 );
1694 :
1695 : /* Inter-frame prediction */
1696 :
1697 0 : means = lsf_means[narrowband]; /* 14Q1 * 1.28 */
1698 :
1699 0 : FOR( i = 0; i < lpcorder; ++i )
1700 : {
1701 0 : pred[i] = add( means[i], mult_r( MU_MA_FX, mem_MA[i] ) ); /* 14Q1 * 1.28 + ( 14Q1 * 1.28 * Q15 ) = 14Q1 * 1.28*/
1702 : }
1703 :
1704 : /* Subtract prediction */
1705 :
1706 0 : FOR( i = 0; i < lpcorder; ++i )
1707 : {
1708 0 : lsf[i] = sub( lsf[i], pred[i] ); /* 14Q1 * 1.28 */
1709 : }
1710 :
1711 :
1712 0 : msvq_enc_fx(
1713 0 : lsf_codebook[narrowband][cdk],
1714 : lsf_dims,
1715 : lsf_offs,
1716 : lsf,
1717 : lsf_numlevels,
1718 : kMaxC,
1719 : TCXLPC_NUMSTAGES,
1720 : weights,
1721 : lpcorder,
1722 : lpcorder,
1723 0 : indices + NumIndices );
1724 0 : msvq_dec(
1725 0 : lsf_codebook[narrowband][cdk],
1726 : lsf_dims,
1727 : lsf_offs,
1728 : TCXLPC_NUMSTAGES,
1729 : lpcorder,
1730 : lpcorder,
1731 0 : indices + NumIndices,
1732 : lsf_q );
1733 0 : NumIndices = add( NumIndices, TCXLPC_NUMSTAGES );
1734 :
1735 0 : FOR( i = 0; i < lpcorder; ++i )
1736 : {
1737 0 : lsf_q_ind[i] = lsf_q[i]; /*(14Q1*1.28)*/
1738 0 : move16();
1739 : }
1740 :
1741 : /* Update flag */
1742 0 : indices[0] = lsf_ind_is_active( lsf_q_ind, lsf_means[narrowband], narrowband, cdk );
1743 0 : move16();
1744 :
1745 : /* Get residual vector */
1746 0 : FOR( i = 0; i < lpcorder; ++i )
1747 : {
1748 0 : lsf_rem[i] = add( sub( pred[i], lsf_means[narrowband][i] ), sub( lsf[i], lsf_q_ind[i] ) );
1749 : }
1750 :
1751 : /* Quantize using extra stage(s) */
1752 0 : msvq_enc_fx(
1753 0 : lsf_ind_codebook[narrowband][cdk],
1754 : lsf_ind_dims,
1755 : lsf_ind_offs,
1756 : lsf_rem,
1757 : lsf_ind_numlevels,
1758 : kMaxC,
1759 : TCXLPC_IND_NUMSTAGES,
1760 : weights,
1761 : lpcorder,
1762 : lpcorder,
1763 0 : indices + NumIndices );
1764 : /* Only add contribution if flag is enabled */
1765 0 : IF( indices[0] )
1766 : {
1767 : /* Decode */
1768 0 : msvq_dec(
1769 0 : lsf_ind_codebook[narrowband][cdk],
1770 : lsf_ind_dims,
1771 : lsf_ind_offs,
1772 : TCXLPC_IND_NUMSTAGES,
1773 : lpcorder,
1774 : lpcorder,
1775 0 : indices + NumIndices,
1776 : lsf_rem_q_ind );
1777 0 : NumIndices = add( NumIndices, TCXLPC_IND_NUMSTAGES );
1778 :
1779 : /* Add to MA-removed vector */
1780 0 : FOR( i = 0; i < lpcorder; ++i )
1781 : {
1782 0 : lsf_q_ind[i] = add( lsf_q_ind[i], lsf_rem_q_ind[i] );
1783 : }
1784 : }
1785 :
1786 : /* Add inter-frame prediction */
1787 0 : FOR( i = 0; i < lpcorder; ++i )
1788 : {
1789 0 : lsf_q[i] = add( lsf_q[i], pred[i] );
1790 0 : lsf[i] = add( lsf[i], pred[i] );
1791 : }
1792 :
1793 0 : reorder_lsf_fx( lsf_q, TCXLPC_LSF_GAP, lpcorder, INT_FS_FX );
1794 :
1795 0 : FOR( i = 0; i < lpcorder; ++i )
1796 : {
1797 0 : lsf_q_ind[i] = add( lsf_q_ind[i], lsf_means[narrowband][i] );
1798 : }
1799 0 : reorder_lsf_fx( lsf_q_ind, TCXLPC_LSF_GAP, lpcorder, INT_FS_FX );
1800 :
1801 0 : IF( lsp_q_ind )
1802 : {
1803 0 : E_LPC_lsf_lsp_conversion /*lsf2lsp*/ ( lsf_q_ind, lsp_q_ind, lpcorder );
1804 : }
1805 :
1806 0 : return NumIndices;
1807 : }
1808 :
1809 :
1810 17462 : Word16 Q_lsf_tcxlpc_ivas_fx(
1811 : /* const */ Word16 lsf[], /* i : original lsf */
1812 : Word16 lsf_q[], /* o : quantized lsf */
1813 : Word16 lsp_q_ind[], /* o : quantized lsp (w/o MA prediction) */
1814 : Word16 indices[], /* o : VQ indices */
1815 : const Word16 lpcorder, /* i : LPC order */
1816 : const Word16 narrowband, /* i : narrowband flag */
1817 : const Word16 cdk, /* i : codebook selector */
1818 : const Word16 mem_MA[], /* i : MA memory */
1819 : const Word16 coder_type,
1820 : const Word32 *Bin_Ener,
1821 : const Word16 Q_ener )
1822 : {
1823 : Word16 weights[M + 1];
1824 : Word16 pred[M16k];
1825 : Word16 i;
1826 : Word16 NumIndices;
1827 : Word16 lsf_q_ind[M16k];
1828 : const Word16 *means;
1829 : Word16 lsf_rem[M];
1830 : Word16 lsf_rem_q_ind[M];
1831 :
1832 17462 : Unified_weighting_fx( &Bin_Ener[L_FFT / 2], Q_ener, lsf, weights, narrowband, (Word16) EQ_16( coder_type, UNVOICED ), 12800, M );
1833 :
1834 17462 : move16();
1835 17462 : NumIndices = 0;
1836 :
1837 : /* Put disabled flag */
1838 17462 : indices[NumIndices] = 0;
1839 17462 : move16();
1840 17462 : NumIndices = add( NumIndices, 1 );
1841 :
1842 : /* Inter-frame prediction */
1843 :
1844 17462 : means = lsf_means[narrowband]; /* 14Q1 * 1.28 */
1845 :
1846 296854 : FOR( i = 0; i < lpcorder; ++i )
1847 : {
1848 279392 : pred[i] = add( means[i], mult_r( MU_MA_FX, mem_MA[i] ) ); /* 14Q1 * 1.28 + ( 14Q1 * 1.28 * Q15 ) = 14Q1 * 1.28*/
1849 279392 : move16();
1850 : }
1851 :
1852 : /* Subtract prediction */
1853 :
1854 296854 : FOR( i = 0; i < lpcorder; ++i )
1855 : {
1856 279392 : lsf[i] = sub( lsf[i], pred[i] ); /* 14Q1 * 1.28 */
1857 279392 : move16();
1858 : }
1859 :
1860 17462 : msvq_enc_lsf_fx64(
1861 17462 : lsf_codebook[narrowband][cdk],
1862 : lsf_dims,
1863 : lsf_offs,
1864 : lsf,
1865 : lsf_numlevels,
1866 : kMaxC,
1867 : TCXLPC_NUMSTAGES,
1868 : weights,
1869 : lpcorder,
1870 : lpcorder,
1871 17462 : indices + NumIndices );
1872 17462 : msvq_dec(
1873 17462 : lsf_codebook[narrowband][cdk],
1874 : lsf_dims,
1875 : lsf_offs,
1876 : TCXLPC_NUMSTAGES,
1877 : lpcorder,
1878 : lpcorder,
1879 17462 : indices + NumIndices,
1880 : lsf_q );
1881 17462 : NumIndices = add( NumIndices, TCXLPC_NUMSTAGES );
1882 :
1883 296854 : FOR( i = 0; i < lpcorder; ++i )
1884 : {
1885 279392 : lsf_q_ind[i] = lsf_q[i];
1886 279392 : move16();
1887 : }
1888 :
1889 : /* Update flag */
1890 17462 : indices[0] = lsf_ind_is_active( lsf_q_ind, lsf_means[narrowband], narrowband, cdk );
1891 17462 : move16();
1892 :
1893 : /* Get residual vector */
1894 296854 : FOR( i = 0; i < lpcorder; ++i )
1895 : {
1896 279392 : lsf_rem[i] = add( sub( pred[i], lsf_means[narrowband][i] ), sub( lsf[i], lsf_q_ind[i] ) );
1897 279392 : move16();
1898 : }
1899 :
1900 : /* Quantize using extra stage(s) */
1901 17462 : msvq_enc_lsf_fx64(
1902 17462 : lsf_ind_codebook[narrowband][cdk],
1903 : lsf_ind_dims,
1904 : lsf_ind_offs,
1905 : lsf_rem,
1906 : lsf_ind_numlevels,
1907 : kMaxC,
1908 : TCXLPC_IND_NUMSTAGES,
1909 : weights,
1910 : lpcorder,
1911 : lpcorder,
1912 17462 : indices + NumIndices );
1913 : /* Only add contribution if flag is enabled */
1914 17462 : IF( indices[0] )
1915 : {
1916 : /* Decode */
1917 4899 : msvq_dec(
1918 4899 : lsf_ind_codebook[narrowband][cdk],
1919 : lsf_ind_dims,
1920 : lsf_ind_offs,
1921 : TCXLPC_IND_NUMSTAGES,
1922 : lpcorder,
1923 : lpcorder,
1924 4899 : indices + NumIndices,
1925 : lsf_rem_q_ind );
1926 4899 : NumIndices = add( NumIndices, TCXLPC_IND_NUMSTAGES );
1927 :
1928 : /* Add to MA-removed vector */
1929 83283 : FOR( i = 0; i < lpcorder; ++i )
1930 : {
1931 78384 : lsf_q_ind[i] = add( lsf_q_ind[i], lsf_rem_q_ind[i] );
1932 78384 : move16();
1933 : }
1934 : }
1935 :
1936 : /* Add inter-frame prediction */
1937 296854 : FOR( i = 0; i < lpcorder; ++i )
1938 : {
1939 279392 : lsf_q[i] = add( lsf_q[i], pred[i] );
1940 279392 : lsf[i] = add( lsf[i], pred[i] );
1941 279392 : move16();
1942 279392 : move16();
1943 : }
1944 :
1945 17462 : reorder_lsf_fx( lsf_q, TCXLPC_LSF_GAP, lpcorder, INT_FS_FX );
1946 :
1947 296854 : FOR( i = 0; i < lpcorder; ++i )
1948 : {
1949 279392 : lsf_q_ind[i] = add( lsf_q_ind[i], lsf_means[narrowband][i] );
1950 279392 : move16();
1951 : }
1952 17462 : reorder_lsf_fx( lsf_q_ind, TCXLPC_LSF_GAP, lpcorder, INT_FS_FX );
1953 :
1954 17462 : IF( lsp_q_ind )
1955 : {
1956 17462 : E_LPC_lsf_lsp_conversion /*lsf2lsp*/ ( lsf_q_ind, lsp_q_ind, lpcorder );
1957 : }
1958 :
1959 17462 : return NumIndices;
1960 : }
1961 :
1962 :
1963 : /*--------------------------------------------------------------------------*
1964 : * enc_lsf_tcxlpc_fx()
1965 : *
1966 : * Returns: number of bits written
1967 : *--------------------------------------------------------------------------*/
1968 :
1969 0 : Word16 enc_lsf_tcxlpc_fx(
1970 : Word16 **indices, /* i : Ptr to VQ indices */
1971 : BSTR_ENC_HANDLE hBstr /* i/o: encoder bitstream handle */
1972 : )
1973 : {
1974 : Word16 i, NumBits;
1975 :
1976 : Word16 flag;
1977 :
1978 : /* Read flag */
1979 0 : flag = ( *indices )[0];
1980 0 : move16();
1981 0 : ++*indices;
1982 :
1983 0 : NumBits = TCXLPC_NUMBITS;
1984 0 : move16();
1985 0 : FOR( i = 0; i < TCXLPC_NUMSTAGES; ++i )
1986 : {
1987 0 : push_next_indice( hBstr, **indices, lsf_numbits[i] );
1988 0 : ++*indices;
1989 : }
1990 :
1991 0 : IF( flag )
1992 : {
1993 0 : NumBits = add( NumBits, TCXLPC_IND_NUMBITS );
1994 0 : FOR( i = 0; i < TCXLPC_IND_NUMSTAGES; ++i )
1995 : {
1996 0 : push_next_indice( hBstr, **indices, lsf_ind_numbits[i] );
1997 0 : ++*indices;
1998 : }
1999 : }
2000 0 : return NumBits;
2001 : }
2002 :
2003 :
2004 17462 : Word16 enc_lsf_tcxlpc_ivas_fx(
2005 : const Word16 **indices, /* i : Ptr to VQ indices */
2006 : BSTR_ENC_HANDLE hBstr /* i/o: encoder bitstream handle */
2007 : )
2008 : {
2009 : Word16 i, NumBits;
2010 :
2011 : Word16 flag;
2012 :
2013 : /* Read flag */
2014 17462 : flag = ( *indices )[0];
2015 17462 : move16();
2016 17462 : ++*indices;
2017 :
2018 17462 : NumBits = TCXLPC_NUMBITS;
2019 17462 : move16();
2020 69848 : FOR( i = 0; i < TCXLPC_NUMSTAGES; ++i )
2021 : {
2022 52386 : push_next_indice( hBstr, **indices, lsf_numbits[i] );
2023 52386 : ++*indices;
2024 : }
2025 :
2026 17462 : IF( flag )
2027 : {
2028 4899 : NumBits = add( NumBits, TCXLPC_IND_NUMBITS );
2029 9798 : FOR( i = 0; i < TCXLPC_IND_NUMSTAGES; ++i )
2030 : {
2031 4899 : push_next_indice( hBstr, **indices, lsf_ind_numbits[i] );
2032 4899 : ++*indices;
2033 : }
2034 : }
2035 17462 : return NumBits;
2036 : }
2037 :
2038 :
2039 : /*--------------------------------------------------------------------------*
2040 : * lsf_bctcvq_encprm_fx()
2041 : *
2042 : *
2043 : *--------------------------------------------------------------------------*/
2044 :
2045 270 : Word16 lsf_bctcvq_encprm_fx(
2046 : BSTR_ENC_HANDLE hBstr,
2047 : Word16 *param_lpc, // Q0
2048 : Word16 *bits_param_lpc,
2049 : Word16 no_indices )
2050 : {
2051 : Word16 i, nbits_lpc;
2052 :
2053 270 : nbits_lpc = 0;
2054 :
2055 2970 : FOR( i = 0; i < no_indices; i++ )
2056 : {
2057 2700 : push_next_indice( hBstr, *param_lpc, bits_param_lpc[i] );
2058 2700 : param_lpc++;
2059 2700 : nbits_lpc = add( nbits_lpc, bits_param_lpc[i] );
2060 : }
2061 :
2062 270 : return nbits_lpc;
2063 : }
2064 :
2065 :
2066 0 : Word16 lsf_bctcvq_encprm_ivas_fx(
2067 : BSTR_ENC_HANDLE hBstr,
2068 : const Word16 *param_lpc, // Q0
2069 : const Word16 *bits_param_lpc,
2070 : const Word16 no_indices )
2071 : {
2072 : Word16 i, nbits_lpc;
2073 :
2074 0 : nbits_lpc = 0;
2075 0 : move16();
2076 :
2077 0 : FOR( i = 0; i < no_indices; i++ )
2078 : {
2079 0 : push_next_indice( hBstr, *param_lpc, bits_param_lpc[i] );
2080 0 : param_lpc++;
2081 0 : nbits_lpc = add( nbits_lpc, bits_param_lpc[i] );
2082 : }
2083 :
2084 0 : return nbits_lpc;
2085 : }
|