Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 :
5 : #include <stdint.h>
6 : #include <assert.h>
7 : #include "options.h"
8 : #include "cnst.h"
9 : #include "rom_com.h"
10 : #include "rom_enc.h"
11 : #include "basop_util.h"
12 : #include "prot_fx.h" /* Function prototypes */
13 : #include "prot_fx_enc.h" /* Function prototypes */
14 : #include "basop_proto_func.h"
15 : #include "wmc_auto.h"
16 :
17 :
18 : /*-------------------------------------------------------------------*
19 : * Local constants
20 : *-------------------------------------------------------------------*/
21 :
22 : #define kMaxC 8
23 :
24 : #define MAXINT32 2147483647
25 : #ifndef swap
26 : #define swap( x, y, type ) \
27 : { \
28 : type u__p; \
29 : u__p = x; \
30 : x = y; \
31 : y = u__p; \
32 : }
33 : #endif
34 :
35 : #define depack_4_values( cbp, val0, val1, val2, val3 ) \
36 : val0 = shr( ( cbp )[0], 4 ); \
37 : val1 = shr( ( cbp )[1], 4 ); \
38 : val2 = shr( ( cbp )[2], 4 ); \
39 : val3 = add( add( shr( lshl( ( cbp )[2], 12 ), 4 ), lshr( lshl( ( cbp )[1], 12 ), 8 ) ), s_and( ( cbp )[0], 0xF ) );
40 :
41 : /*--------------------------------------------------------------------------*
42 : * msvq_encmsvq_stage1_dct_search()
43 : *
44 : * stage1 search in a segmentwise truncated dct N domain without weights
45 : *--------------------------------------------------------------------------*/
46 :
47 : /*! r: (p_max , best candidate sofar ) */
48 2742 : Word16 msvq_stage1_dct_search_fx(
49 : const Word32 *u_fx, /* i : target exp : u_e */
50 : const Word16 u_e, /* i : exp for target Q0 */
51 : const Word16 N, /* i : target length and IDCT synthesis length */
52 : const Word16 maxC_st1, /* i : number of final stage 1 candidates to provide */
53 : const DCTTYPE dcttype, /* e.g. DCT_T2_16_XX, DCT_T2_24_XX; */
54 : const Word16 max_dct_trunc, /* i : maximum of truncation lenghts */
55 : Word32 *invTrfMatrix_fx, /* i : IDCT synthesis matrix for dim N Q31 */
56 : const Word16 *midQ_truncQ_fx, /* i : midQ vector */
57 : const Word32 *dct_scaleF_fx, /* i : global scale factors Q10 */
58 : const Word16 n_segm, /* i : number of segments */
59 : const Word16 *cols_per_segment, /* i : remaining length per segment */
60 : const Word16 *trunc_dct_cols_per_segment, /* i : trunc length per segment */
61 : const Word16 *entries_per_segment, /* i : number of rows per segment */
62 : const Word16 *cum_entries_per_segment, /* i : number of cumulative entries */
63 : const Word8 *const W8Qx_dct_sections[], /* i : Word8(byte) segment table ptrs */
64 : const Word16 *col_syn_shift[], /* i : columnwise syn shift tables */
65 : const Word8 *segm_neighbour_fwd, /* i : circular neighbour list fwd */
66 : const Word8 *segm_neighbour_rev, /* i : circular neighbour list reverse */
67 : const Word16 npost_check, /* i : number of neigbours to check , should be even */
68 : Word32 *st1_mse_ptr_fx, /* i : dynRAM buffer for MSEs exp : u_e */
69 : Word16 *indices_st1_local, /* o : selected cand indices */
70 : Word32 *st1_syn_vec_ptr_fx, /* i/o: buffer for IDCT24 synthesis i :exp : u_e */
71 : Word32 *dist1_ptr_fx, /* o : resulting stage 1 MSEs in DCT-N domain */
72 : Word16 *dist1_ptr_e )
73 : {
74 : Word32 dct_target_fx[FDCNG_VQ_DCT_MAXTRUNC]; // Q20
75 : Word32 u_mr_fx[FDCNG_VQ_MAX_LEN];
76 : Word16 dist1_ptr_e_buf[2 * LSFMBEST_MAX];
77 : Word64 mse_trunc_segm_fx[FDCNG_VQ_DCT_NSEGM];
78 : Word32 tmp_fx, check_mse;
79 : Word16 tmp_e, check_mse_e;
80 : Word64 mse_fx; /* Word64 in BASOP */
81 :
82 : Word16 p_max, c, c2, segm, j_full, j, i;
83 : Word16 n_ana, p_mins[2], idx_min[2];
84 :
85 : Word16 st1_mse_ptr_e[128];
86 :
87 : const Word8 *cbpW8;
88 : const Word16 *dct_col_shift_tab;
89 :
90 : Word32 *st1_mse_pair_fx;
91 : Word16 *st1_mse_pair_e;
92 : Word16 *st1_idx_pair;
93 :
94 : Word32 tmp2_fx;
95 : Word16 check_ind[FDCNG_VQ_DCT_NPOST];
96 2742 : assert( ( npost_check % 2 == 0 ) && ( npost_check <= FDCNG_VQ_DCT_NPOST ) );
97 :
98 2742 : assert( n_segm <= FDCNG_VQ_DCT_NSEGM );
99 :
100 2742 : n_ana = N; /* VQ stage#1 core is currently always using stored DCT N coeffs */
101 2742 : move16();
102 2742 : assert( n_ana >= max_dct_trunc ); /* check for FDCNGVQ WB , SWB, FB operation */
103 :
104 : /* remove mid stage#1 vector, in original input domain */
105 2742 : tmp_e = s_max( 12, u_e );
106 68550 : FOR( i = 0; i < n_ana; i++ )
107 : {
108 65808 : u_mr_fx[i] = L_sub( L_shl( u_fx[i], sub( u_e, tmp_e ) ), L_shl( midQ_truncQ_fx[i], sub( Q31 - Q10, tmp_e ) ) ); // tmp_e
109 65808 : move32();
110 : }
111 :
112 2742 : dctT2_N_apply_matrix_fx( (const Word32 *) u_mr_fx, dct_target_fx, s_min( max_dct_trunc, n_ana ), n_ana, invTrfMatrix_fx, max_dct_trunc, dcttype ); // exp : tmp_e
113 :
114 : /* init search state ptr's at the top */
115 2742 : set32_fx( dist1_ptr_fx, MAX_32, maxC_st1 );
116 2742 : set16_fx( dist1_ptr_e_buf, 32, maxC_st1 );
117 2742 : st1_mse_pair_fx = &( dist1_ptr_fx[0] ); /* req. ptr post upd +=2 */ // st1_mse_pair_e
118 2742 : st1_mse_pair_e = &( dist1_ptr_e_buf[0] ); /* req. ptr post upd +=2 */
119 2742 : st1_idx_pair = &( indices_st1_local[0] ); /* req. ptr post upd +=2 */
120 2742 : set64_fx( mse_trunc_segm_fx, 0, n_segm );
121 :
122 : // set16_fx( mse_trunc_segm_e, u_e, FDCNG_VQ_DCT_NSEGM );
123 :
124 13710 : FOR( segm = 0; segm < n_segm; segm++ )
125 : { /* point to a new paired location for each segment */
126 10968 : p_max = 0; /* req. to point to one of 1 or 0, this init can potentially be omitted here,as p_max is always 1 or 0 */
127 10968 : move16();
128 :
129 : /* compute segment common trunction error in dctN domain */
130 :
131 65808 : FOR( i = 0; i < trunc_dct_cols_per_segment[segm]; i++ )
132 : {
133 54840 : mse_trunc_segm_fx[segm] = W_mac_32_32( mse_trunc_segm_fx[segm], dct_target_fx[cols_per_segment[segm] + i], dct_target_fx[cols_per_segment[segm] + i] ); // Q41
134 54840 : move64();
135 : }
136 :
137 10968 : cbpW8 = W8Qx_dct_sections[segm]; /* Word8 column variable Qx storage , table ptr init */
138 :
139 361944 : FOR( j = 0; j < entries_per_segment[segm]; j++ )
140 : {
141 : /* unweighted segmented search DCT domain loop */
142 350976 : j_full = add( j, cum_entries_per_segment[segm] ); /* or simply use j_full++ */
143 :
144 350976 : mse_fx = mse_trunc_segm_fx[segm]; /* init mse with with common mse truncation part, in BASOP a move32() */ // Q41
145 350976 : move64();
146 :
147 350976 : dct_col_shift_tab = col_syn_shift[segm]; /* ptr init */
148 :
149 5763684 : FOR( c2 = 0; c2 < cols_per_segment[segm]; c2++ )
150 : {
151 : #define WMC_TOOL_SKIP
152 5412708 : tmp_fx = L_sub( dct_target_fx[c2], Mpy_32_32( L_shl( cbpW8[c2], add( sub( Q31, tmp_e ), dct_col_shift_tab[c2] ) ), dct_scaleF_fx[1] ) ); /* note: BASOP shift left defined for signed integers */
153 : LOGIC( 1 );
154 : SHIFT( 1 );
155 : ADD( 1 ); /* in BASOP: s_and(for W8->W16), shl(), sub()*/
156 : #undef WMC_TOOL_SKIP
157 5412708 : mse_fx = W_mac_32_32( mse_fx, tmp_fx, tmp_fx ); /* L_mac or L_mac0() square Word16 -> Word32*/ // Q41
158 : }
159 350976 : Word16 L_tmp = W_norm( mse_fx );
160 350976 : st1_mse_ptr_fx[j_full] = W_extract_h( W_lshl( mse_fx, L_tmp ) ); /* save MSE in shared dynamic RAM, move32() in BASOP */ // st1_mse_ptr_e
161 350976 : move32();
162 350976 : st1_mse_ptr_e[j_full] = sub( shl( tmp_e, 1 ), L_tmp );
163 350976 : move16();
164 :
165 : #define WMC_TOOL_SKIP
166 350976 : cbpW8 += cols_per_segment[segm]; /* fixed pointer increment for each segment */
167 : #undef WMC_TOOL_SKIP
168 :
169 : /* overwrite with a new worst index at p_max */
170 :
171 : /* Note: The three inner loop if's below are not 100% properly instrumented by WMC tool */
172 : // if ( st1_mse_ptr_fx[j_full] < st1_mse_pair_fx[p_max] ) /* L_sub */
173 350976 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( st1_mse_ptr_fx[j_full], st1_mse_ptr_e[j_full], st1_mse_pair_fx[p_max], st1_mse_pair_e[p_max] ), -1 ) ) /* L_sub */
174 : {
175 154528 : st1_idx_pair[p_max] = j_full; /* move16, single BASOP */
176 154528 : move16();
177 : } /* BASOP 2 ops */
178 :
179 350976 : IF( EQ_16( st1_idx_pair[p_max], j_full ) )
180 : { /* idx updated --> also update mse */
181 154528 : st1_mse_pair_fx[p_max] = st1_mse_ptr_fx[j_full]; /* move32(), single BASOP */
182 154528 : move32();
183 154528 : st1_mse_pair_e[p_max] = st1_mse_ptr_e[j_full]; /* move32(), single BASOP */
184 154528 : move16();
185 : } /* BASOP 3 ops */
186 :
187 : /* avoid WC costly candidate list management by always updating p_max,
188 : as we have only a pair in each segment to maintain */
189 350976 : p_max = 0;
190 350976 : move16();
191 350976 : if ( EQ_16( BASOP_Util_Cmp_Mant32Exp( st1_mse_pair_fx[0], st1_mse_pair_e[0], st1_mse_pair_fx[1], st1_mse_pair_e[1] ), -1 ) ) /* L_sub()*/
192 : {
193 187860 : p_max = 1; /* move16() */
194 187860 : move16();
195 : } /* BASOP 3 ops ,Note 2 ops possible in BASOP with L_sub and L_lshr */
196 :
197 : /* Note: logical shift right not available in ANSI-C */
198 : /* p_max = (st1_mse_pair[0] - st1_mse_pair[1]) ">>>" 31; */
199 : /* in java logical shift right is available as >>> , in BASOP it is available as L_lshr */
200 :
201 : /* Cost: weighted sum with cond moves ('if') => 8 in float , 7 in BASOP with L_lshr */
202 : } /* j in section */
203 :
204 10968 : st1_mse_pair_fx += 2; /* req. ptr init */
205 10968 : st1_mse_pair_e += 2; /* req. ptr init */
206 10968 : st1_idx_pair += 2; /* req. ptr init */
207 :
208 : } /* next segment */
209 :
210 2742 : tmp_e = 0;
211 2742 : move16();
212 24678 : FOR( j = 0; j < maxC_st1; j++ )
213 : {
214 : /* compute_full mse using stored DCT24 domain MSE's */
215 : /* calculate MSE from stage1 inner using existing inner DCT domain variables */
216 21936 : tmp_e = s_max( dist1_ptr_e_buf[j], tmp_e );
217 : }
218 :
219 24678 : FOR( j = 0; j < maxC_st1; j++ )
220 : {
221 : /* compute_full mse using stored DCT24 domain MSE's */
222 : /* calculate MSE from stage1 inner using existing inner DCT domain variables */
223 21936 : dist1_ptr_fx[j] = L_shr( dist1_ptr_fx[j], sub( tmp_e, dist1_ptr_e_buf[j] ) );
224 21936 : move32();
225 21936 : *dist1_ptr_e = tmp_e;
226 21936 : move16();
227 : }
228 :
229 :
230 2742 : assert( ( maxC_st1 >= 3 ) );
231 2742 : assert( ( maxC_st1 <= 8 ) );
232 :
233 2742 : p_max = maximum_32_fx( dist1_ptr_fx, maxC_st1, NULL ); /* establish current worst candidate for MSVQ stage#2 among all maxC_st1 candidates so far */
234 :
235 2742 : p_mins[0] = minimum_32_fx( dist1_ptr_fx, maxC_st1, NULL ); /* find best entry among all maxC_pre */
236 2742 : move16();
237 2742 : tmp_fx = dist1_ptr_fx[p_mins[0]];
238 2742 : move32();
239 2742 : dist1_ptr_fx[p_mins[0]] = MAX_32; /* exclude 1st */
240 2742 : move32();
241 :
242 2742 : p_mins[1] = minimum_32_fx( dist1_ptr_fx, maxC_st1, NULL ); /* find 2nd best entry */
243 2742 : move16();
244 2742 : tmp2_fx = dist1_ptr_fx[p_mins[1]];
245 2742 : move32();
246 2742 : dist1_ptr_fx[p_mins[1]] = MAX_32; /* exclude 2nd */
247 2742 : move32();
248 :
249 2742 : dist1_ptr_fx[p_mins[0]] = tmp_fx; /* restore 1st */
250 2742 : move32();
251 2742 : dist1_ptr_fx[p_mins[1]] = tmp2_fx; /* restore 2nd */
252 2742 : move32();
253 :
254 2742 : idx_min[0] = indices_st1_local[p_mins[0]];
255 2742 : move16();
256 2742 : idx_min[1] = indices_st1_local[p_mins[1]];
257 2742 : move16();
258 :
259 :
260 : /* use global exclusion list to never reselect the two (best) global MSE values sofar */
261 2742 : st1_mse_ptr_fx[idx_min[0]] = MAX_32; /* move32() */
262 2742 : move32();
263 2742 : st1_mse_ptr_e[idx_min[0]] = MAX_16;
264 2742 : move16();
265 2742 : st1_mse_ptr_fx[idx_min[1]] = MAX_32; /* move32() */
266 2742 : move32();
267 2742 : st1_mse_ptr_e[idx_min[1]] = MAX_16;
268 2742 : move16();
269 :
270 : /* circular MSE-neigbour list in use to potentially replace some segment search candidates */
271 : /* using both 1st and 2nd best neighbours in fwd and rev directions */
272 2742 : check_ind[0] = segm_neighbour_fwd[idx_min[0]];
273 2742 : move16();
274 2742 : check_ind[1] = segm_neighbour_rev[idx_min[0]];
275 2742 : move16();
276 :
277 2742 : check_ind[2] = segm_neighbour_fwd[idx_min[1]];
278 2742 : move16();
279 2742 : check_ind[3] = segm_neighbour_rev[idx_min[1]];
280 2742 : move16();
281 :
282 2742 : check_ind[4] = segm_neighbour_fwd[check_ind[0]];
283 2742 : move16();
284 2742 : check_ind[5] = segm_neighbour_rev[check_ind[1]];
285 2742 : move16();
286 :
287 2742 : check_ind[6] = segm_neighbour_fwd[check_ind[2]];
288 2742 : move16();
289 2742 : check_ind[FDCNG_VQ_DCT_NPOST - 1] = segm_neighbour_rev[check_ind[3]];
290 2742 : move16();
291 :
292 24678 : FOR( i = 0; i < npost_check; i++ )
293 : {
294 : /* move MSE from DCT-inner loop search to input synthesis domain */
295 : /* multiplication by fdcng_dct_scaleF[2] to get the float outer loop scale correct in IDCT synthesis domain */
296 21936 : check_mse = st1_mse_ptr_fx[check_ind[i]];
297 21936 : move32();
298 21936 : check_mse_e = st1_mse_ptr_e[check_ind[i]];
299 21936 : move16();
300 :
301 21936 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( check_mse, check_mse_e, dist1_ptr_fx[p_max], *dist1_ptr_e ), -1 ) )
302 : { /* new winner , replace worst */
303 10458 : dist1_ptr_fx[p_max] = L_shl( check_mse, sub( check_mse_e, *dist1_ptr_e ) );
304 10458 : move32();
305 10458 : indices_st1_local[p_max] = check_ind[i];
306 10458 : move16();
307 10458 : st1_mse_ptr_fx[check_ind[i]] = MAX_32; /* exclude, BASOP: move32() */
308 10458 : move32();
309 10458 : st1_mse_ptr_e[check_ind[i]] = MAX_16;
310 10458 : move16();
311 10458 : p_max = maximum_32_fx( dist1_ptr_fx, maxC_st1, NULL ); /* establish a new current worst candidate among all maxC */
312 : }
313 : }
314 :
315 : /* extract the selected stage one vectors in DCT_N domain , apply IDCT_N and scale up */
316 : /* always extract full length signal(e.g. 24) to be able to update WB(e.g. N_in==21) candidate MSE values */
317 : /* in the case that only a part of the IDCT N vector is in final use */
318 :
319 : /* note: synthesis not yet fully parameterized/generalized for other IDCT lengths */
320 2742 : assert( N == 24 );
321 : {
322 24678 : FOR( c = 0; c < maxC_st1; c++ )
323 : {
324 21936 : dec_FDCNG_MSVQ_stage1_fx( indices_st1_local[c], N, invTrfMatrix_fx, dcttype + 1, &( st1_syn_vec_ptr_fx[c * N] ), NULL ); // Q11 : output
325 21936 : scale_sig32( &( st1_syn_vec_ptr_fx[c * N] ), N, sub( 11, s_max( u_e, 12 ) ) );
326 : }
327 : }
328 :
329 2742 : return p_max; /*ptr to worst performing candidate */
330 : }
331 :
332 :
333 : /*--------------------------------------------------------------------------*
334 : * msvq_stage1_dct_recalc_candidates_fdcng_wb()
335 : *
336 : * recalc MSE for fdcng WB(0..20) coeffs ,
337 : essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search,
338 : excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep the WB MSEs update for the subsequent stages
339 : *--------------------------------------------------------------------------*/
340 :
341 : /*! r: (updated p_max) */
342 751 : Word16 msvq_stage1_dct_recalc_candidates_fdcng_wb_fx(
343 : const Word32 *st1_syn_vec_ptr_fx, /* i : IDCT24 synthesis vectors st1_syn_vec_e*/
344 : const Word16 st1_syn_vec_e, /* i : exp for IDCT24 synthesis vectors */
345 : const Word32 *u_fx, /* i : target signal u_e*/
346 : const Word16 u_e, /* i : exp for target signal */
347 : const Word16 maxC_st1, /* i : number of candidates in stage1 */
348 : Word32 *dist_ptr_fx, /* i/o: updated MSE vector for stage1 */
349 : Word16 *dist_ptr_e /* i/o: exp for updated MSE vector for stage1 */
350 : )
351 : {
352 : Word16 i;
353 : Word16 p_max_local, c;
354 : const Word32 *p2_fx;
355 : Word16 tmp_e;
356 : Word32 res24_fx, high_diff_fx[FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB];
357 : Word64 acc;
358 : Word16 res24_e[FD_CNG_maxC_37bits];
359 : Word16 dist_e;
360 :
361 751 : dist_e = *dist_ptr_e;
362 751 : move16();
363 6759 : FOR( c = 0; c < maxC_st1; c++ )
364 : { /* point to extended synthesis part */
365 6008 : p2_fx = (const Word32 *) &( st1_syn_vec_ptr_fx[c * FDCNG_VQ_MAX_LEN + FDCNG_VQ_MAX_LEN_WB] ); /* ptr init to synthesis candidate c */
366 6008 : tmp_e = s_max( st1_syn_vec_e, u_e );
367 6008 : tmp_e = add( tmp_e, 1 );
368 : /* for stage#1 use "u" instead of the shortened resid[0], to access the extended/extrapolated input target */
369 24032 : FOR( i = 0; i < FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB; i++ )
370 : {
371 18024 : high_diff_fx[i] = L_sub( L_shr( p2_fx[i], sub( tmp_e, st1_syn_vec_e ) ), L_shr( u_fx[FDCNG_VQ_MAX_LEN_WB + i], sub( tmp_e, u_e ) ) ); // tmp_e
372 18024 : move32();
373 : }
374 6008 : acc = 0;
375 6008 : move64();
376 24032 : FOR( i = 0; i < FDCNG_VQ_MAX_LEN - FDCNG_VQ_MAX_LEN_WB; i++ )
377 : {
378 18024 : acc = W_mac_32_32( acc, high_diff_fx[i], high_diff_fx[i] );
379 : }
380 6008 : res24_e[c] = tmp_e;
381 6008 : move16();
382 6008 : tmp_e = W_norm( acc );
383 6008 : res24_fx = W_extract_h( W_shl( acc, tmp_e ) );
384 :
385 6008 : res24_e[c] = sub( shl( res24_e[c], 1 ), tmp_e );
386 6008 : move16();
387 :
388 6008 : dist_ptr_fx[c] = BASOP_Util_Add_Mant32Exp( dist_ptr_fx[c], *dist_ptr_e, L_negate( res24_fx ), res24_e[c], &res24_e[c] ); /* remove DCT24 high band error contribution */
389 6008 : move32();
390 6008 : dist_e = s_max( dist_e, res24_e[c] );
391 6008 : move16();
392 : }
393 :
394 :
395 6759 : FOR( c = 0; c < maxC_st1; c++ )
396 : {
397 6008 : dist_ptr_fx[c] = L_shl( dist_ptr_fx[c], sub( res24_e[c], dist_e ) );
398 6008 : move32();
399 : }
400 751 : *dist_ptr_e = dist_e;
401 751 : move16();
402 : /* finally update p_max, as it may potentially change,
403 : due to the core DCT24 search originally optimizing over the longer basis vectors than DCT21 */
404 751 : p_max_local = maximum_32_fx( dist_ptr_fx, maxC_st1, NULL );
405 :
406 751 : return p_max_local;
407 : }
408 :
409 :
410 : /*--------------------------------------------------------------------------*
411 : * depack_mul_values_fx()
412 : *
413 : *--------------------------------------------------------------------------*/
414 :
415 0 : static Word32 depack_mul_values_fx( Word16 *Tmp, const Word16 *w, const Word16 *cbp, const Word16 N )
416 : {
417 : Word16 i, val0, val1, val2, val3;
418 : Word32 en;
419 :
420 0 : en = 0;
421 0 : move32();
422 0 : FOR( i = 0; i < N; i += 4 )
423 : {
424 0 : depack_4_values( cbp + i_mult( shr( i, 2 ), 3 ), val0, val1, val2, val3 )
425 0 : Tmp[i + 0] = mult_r( shl_sat( w[i + 0], 2 ), val0 );
426 0 : move16();
427 0 : en = L_mac_sat( en, val0, Tmp[i + 0] );
428 0 : Tmp[i + 1] = mult_r( shl_sat( w[i + 1], 2 ), val1 );
429 0 : move16();
430 0 : en = L_mac_sat( en, val1, Tmp[i + 1] );
431 0 : Tmp[i + 2] = mult_r( shl_sat( w[i + 2], 2 ), val2 );
432 0 : move16();
433 0 : en = L_mac_sat( en, val2, Tmp[i + 2] );
434 0 : Tmp[i + 3] = mult_r( shl_sat( w[i + 3], 2 ), val3 );
435 0 : move16();
436 0 : en = L_mac_sat( en, val3, Tmp[i + 3] );
437 : }
438 :
439 0 : return en;
440 : }
441 :
442 :
443 : /*--------------------------------------------------------------------------*
444 : * depack_sub_values()
445 : *
446 : *--------------------------------------------------------------------------*/
447 :
448 558208 : static void depack_sub_values_fx( Word16 *pTmp, const Word16 *p1, const Word16 *cbp, const Word16 N )
449 : {
450 : Word16 j, val0, val1, val2, val3;
451 :
452 1953728 : FOR( j = 0; j < N; j += 4 )
453 : {
454 1395520 : depack_4_values( cbp + i_mult( 3, shr( j, 2 ) ), val0, val1, val2, val3 )
455 :
456 : /*pTmp[i] = (p1[i] - cbp[i]);*/
457 1395520 : pTmp[j + 0] = sub( p1[j + 0], val0 );
458 1395520 : move16(); /*3Q12*1.28*/
459 1395520 : pTmp[j + 1] = sub( p1[j + 1], val1 );
460 1395520 : move16(); /*3Q12*1.28*/
461 1395520 : pTmp[j + 2] = sub( p1[j + 2], val2 );
462 1395520 : move16(); /*3Q12*1.28*/
463 1395520 : pTmp[j + 3] = sub( p1[j + 3], val3 );
464 1395520 : move16(); /*3Q12*1.28*/
465 : }
466 558208 : }
467 :
468 :
469 1186192 : static Word64 depack_mul_values_fx64( Word32 *Tmp, const Word16 *w, const Word16 *cbp, const Word16 N )
470 : {
471 : Word16 i, val0, val1, val2, val3;
472 : Word64 en;
473 :
474 1186192 : en = 0;
475 1186192 : move32();
476 4884320 : FOR( i = 0; i < N; i += 4 )
477 : {
478 3698128 : depack_4_values( cbp + i_mult( shr( i, 2 ), 3 ), val0, val1, val2, val3 )
479 3698128 : Tmp[i + 0] = L_mult0( w[i + 0], val0 ); // Q8 * Q2.56
480 3698128 : move16();
481 3698128 : en = W_mac_32_16( en, Tmp[i + 0], val0 ); // Q8 * Q2.56 * 2.56 * Q1
482 3698128 : Tmp[i + 1] = L_mult0( w[i + 1], val1 );
483 3698128 : move16();
484 3698128 : en = W_mac_32_16( en, Tmp[i + 1], val1 );
485 3698128 : Tmp[i + 2] = L_mult0( w[i + 2], val2 );
486 3698128 : move16();
487 3698128 : en = W_mac_32_16( en, Tmp[i + 2], val2 );
488 3698128 : Tmp[i + 3] = L_mult0( w[i + 3], val3 );
489 3698128 : move16();
490 3698128 : en = W_mac_32_16( en, Tmp[i + 3], val3 );
491 : }
492 :
493 1186192 : return en; // Q8 * Q2.56 * 2.56 * Q1
494 : }
495 :
496 : /*--------------------------------------------------------------------------*
497 : * msvq_enc_find_p_max_8()
498 : *
499 : * Unroll of inner search loop for maxC == 8
500 : *--------------------------------------------------------------------------*/
501 :
502 0 : static Word16 msvq_enc_find_p_max_8_fx( Word32 dist[] )
503 : {
504 : Word16 p_max;
505 :
506 0 : p_max = 0;
507 0 : move16();
508 :
509 : BASOP_SATURATE_WARNING_OFF_EVS
510 0 : if ( GT_32( dist[1], dist[p_max] ) )
511 : {
512 0 : p_max = 1;
513 0 : move16();
514 : }
515 0 : if ( GT_32( dist[2], dist[p_max] ) )
516 : {
517 0 : p_max = 2;
518 0 : move16();
519 : }
520 0 : if ( GT_32( dist[3], dist[p_max] ) )
521 : {
522 0 : p_max = 3;
523 0 : move16();
524 : }
525 0 : if ( GT_32( dist[4], dist[p_max] ) )
526 : {
527 0 : p_max = 4;
528 0 : move16();
529 : }
530 0 : if ( GT_32( dist[5], dist[p_max] ) )
531 : {
532 0 : p_max = 5;
533 0 : move16();
534 : }
535 0 : if ( GT_32( dist[6], dist[p_max] ) )
536 : {
537 0 : p_max = 6;
538 0 : move16();
539 : }
540 0 : if ( GT_32( dist[7], dist[p_max] ) )
541 : {
542 0 : p_max = 7;
543 0 : move16();
544 : }
545 : BASOP_SATURATE_WARNING_ON_EVS
546 0 : return p_max;
547 : }
548 :
549 :
550 1482366 : static Word16 msvq_enc_find_p_max_8_fx64( Word64 dist[] )
551 : {
552 : Word16 p_max;
553 :
554 1482366 : p_max = 0;
555 1482366 : move16();
556 :
557 : BASOP_SATURATE_WARNING_OFF_EVS
558 1482366 : if ( GT_64( dist[1], dist[p_max] ) )
559 : {
560 702532 : p_max = 1;
561 702532 : move16();
562 : }
563 1482366 : if ( GT_64( dist[2], dist[p_max] ) )
564 : {
565 497344 : p_max = 2;
566 497344 : move16();
567 : }
568 1482366 : if ( GT_64( dist[3], dist[p_max] ) )
569 : {
570 394773 : p_max = 3;
571 394773 : move16();
572 : }
573 1482366 : if ( GT_64( dist[4], dist[p_max] ) )
574 : {
575 302193 : p_max = 4;
576 302193 : move16();
577 : }
578 1482366 : if ( GT_64( dist[5], dist[p_max] ) )
579 : {
580 238455 : p_max = 5;
581 238455 : move16();
582 : }
583 1482366 : if ( GT_64( dist[6], dist[p_max] ) )
584 : {
585 218136 : p_max = 6;
586 218136 : move16();
587 : }
588 1482366 : if ( GT_64( dist[7], dist[p_max] ) )
589 : {
590 186006 : p_max = 7;
591 186006 : move16();
592 : }
593 : BASOP_SATURATE_WARNING_ON_EVS
594 1482366 : return p_max;
595 : }
596 :
597 :
598 : /*--------------------------------------------------------------------------*
599 : * msvq_enc_find_p_max_6()
600 : *
601 : * Unroll of inner search loop for maxC == 6
602 : *--------------------------------------------------------------------------*/
603 :
604 0 : static Word16 msvq_enc_find_p_max_6_fx( Word32 dist[] )
605 : {
606 : Word16 p_max;
607 :
608 0 : p_max = 0;
609 0 : move16();
610 :
611 : BASOP_SATURATE_WARNING_OFF_EVS
612 0 : if ( GT_32( dist[1], dist[p_max] ) )
613 : {
614 0 : p_max = 1;
615 0 : move16();
616 : }
617 0 : if ( GT_32( dist[2], dist[p_max] ) )
618 : {
619 0 : p_max = 2;
620 0 : move16();
621 : }
622 0 : if ( GT_32( dist[3], dist[p_max] ) )
623 : {
624 0 : p_max = 3;
625 0 : move16();
626 : }
627 0 : if ( GT_32( dist[4], dist[p_max] ) )
628 : {
629 0 : p_max = 4;
630 0 : move16();
631 : }
632 0 : if ( GT_32( dist[5], dist[p_max] ) )
633 : {
634 0 : p_max = 5;
635 0 : move16();
636 : }
637 : BASOP_SATURATE_WARNING_ON_EVS
638 0 : return p_max;
639 : }
640 :
641 0 : static Word16 msvq_enc_find_p_max_6_fx64( Word64 dist[] )
642 : {
643 : Word16 p_max;
644 :
645 0 : p_max = 0;
646 0 : move16();
647 :
648 : BASOP_SATURATE_WARNING_OFF_EVS
649 0 : if ( GT_64( dist[1], dist[p_max] ) )
650 : {
651 0 : p_max = 1;
652 0 : move16();
653 : }
654 0 : if ( GT_64( dist[2], dist[p_max] ) )
655 : {
656 0 : p_max = 2;
657 0 : move16();
658 : }
659 0 : if ( GT_64( dist[3], dist[p_max] ) )
660 : {
661 0 : p_max = 3;
662 0 : move16();
663 : }
664 0 : if ( GT_64( dist[4], dist[p_max] ) )
665 : {
666 0 : p_max = 4;
667 0 : move16();
668 : }
669 0 : if ( GT_64( dist[5], dist[p_max] ) )
670 : {
671 0 : p_max = 5;
672 0 : move16();
673 : }
674 : BASOP_SATURATE_WARNING_ON_EVS
675 0 : return p_max;
676 : }
677 :
678 :
679 : /*--------------------------------------------------------------------------*
680 : * msvq_enc_fx()
681 : *
682 : * MSVQ encoder
683 : *--------------------------------------------------------------------------*/
684 :
685 0 : void msvq_enc_fx(
686 : const Word16 *const *cb, /* i : Codebook (indexed cb[*stages][levels][p]) (0Q15) */
687 : const Word16 dims[], /* i : Dimension of each codebook stage (NULL: full dim.) */
688 : const Word16 offs[], /* i : Starting dimension of each codebook stage (NULL: 0) */
689 : const Word16 u[], /* i : Vector to be encoded (prediction and mean removed)(3Q12) */
690 : const Word16 *levels, /* i : Number of levels in each stage */
691 : const Word16 maxC, /* i : Tree search size (number of candidates kept from */
692 : /* one stage to the next == M-best) */
693 : const Word16 stages, /* i : Number of stages */
694 : const Word16 w[], /* i : Weights Q8*/
695 : const Word16 N, /* i : Vector dimension */
696 : const Word16 maxN, /* i : Codebook dimension */
697 : Word16 Idx[] /* o : Indices */
698 : )
699 : {
700 : Word16 j;
701 : const Word16 *cbp;
702 : Word16 p2i;
703 : Word16 resid_buf[2 * LSFMBEST_MAX * M_MAX], *resid[2];
704 : Word16 *pTmp, *p1;
705 : Word16 *indices[2], m, s, c, c2, p_max, i, Tmp[M_MAX];
706 : Word16 idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX];
707 : Word32 dist_buf[2 * LSFMBEST_MAX], *dist[2], t1, tmp, en, ss2;
708 : Word16 ( *func_ptr )( Word32 * );
709 : Word16 N34;
710 : Word16 n, maxn, start;
711 :
712 : /*----------------------------------------------------------------*
713 : * Allocate memory for previous (parent) and current nodes.
714 : * Parent node is indexed [0], current node is indexed [1].
715 : *----------------------------------------------------------------*/
716 0 : indices[0] = idx_buf;
717 0 : indices[1] = idx_buf + maxC * stages; /*move16();*/
718 : /*vr_iset(0, idx_buf, 2*stages*maxC);*/
719 0 : set16_fx( idx_buf, 0, (Word16) ( 2 * stages * maxC ) );
720 :
721 0 : resid[0] = resid_buf;
722 0 : resid[1] = resid_buf + maxC * N; /*move16();*/
723 :
724 0 : dist[0] = dist_buf;
725 0 : dist[1] = dist_buf + maxC; /*move16();*/
726 :
727 : /*vr_iset(0, parents, maxC);*/
728 0 : set16_fx( parents, 0, maxC );
729 :
730 :
731 0 : func_ptr = msvq_enc_find_p_max_6_fx;
732 0 : move16();
733 0 : if ( EQ_16( maxC, 8 ) )
734 : {
735 0 : func_ptr = msvq_enc_find_p_max_8_fx;
736 0 : move16();
737 : }
738 :
739 : /*----------------------------------------------------------------*
740 : * LSF weights are normalized, so it is always better to multiply it first
741 : * Set up inital distance vector
742 : *----------------------------------------------------------------*/
743 : /* Q0/16 * Qw_norm/16 << 1 >> 16 => Qwnorm-15/16 * Q0/16 << 1 => Qwnorm-14/32 * 6.5536 */
744 0 : ss2 = L_mult( mult( u[0], shl( w[0], 2 ) ), u[0] );
745 0 : move16();
746 0 : FOR( j = 1; j < N; j++ )
747 : {
748 0 : ss2 = L_mac_sat( ss2, mult( u[j], shl_sat( w[j], 2 ) ), u[j] );
749 : }
750 :
751 : /* Set up inital error (residual) vectors */
752 0 : pTmp = resid[1]; /*move16();*/
753 0 : FOR( c = 0; c < maxC; c++ )
754 : {
755 0 : Copy( u, pTmp + c * N, N );
756 0 : dist[1][c] = ss2;
757 0 : move32();
758 : }
759 :
760 : /* Loop over all stages */
761 0 : m = 1;
762 0 : move16();
763 0 : FOR( s = 0; s < stages; s++ )
764 : {
765 : /* codebook pointer is set to point to first stage */
766 0 : cbp = cb[s]; /*3Q12*1.28*/
767 0 : move16();
768 :
769 : /* Set up pointers to parent and current nodes */
770 0 : swap( indices[0], indices[1], Word16 * );
771 0 : move16();
772 0 : move16();
773 0 : move16();
774 0 : swap( resid[0], resid[1], Word16 * );
775 0 : move16();
776 0 : move16();
777 0 : move16();
778 0 : swap( dist[0], dist[1], Word32 * );
779 0 : move32();
780 0 : move32();
781 0 : move32();
782 :
783 : /* p_max points to maximum distortion node (worst of best) */
784 0 : p_max = 0;
785 0 : move16();
786 :
787 0 : n = N;
788 0 : move16();
789 0 : maxn = maxN;
790 0 : move16();
791 0 : if ( dims )
792 : {
793 0 : n = dims[s];
794 0 : move16();
795 : }
796 0 : if ( dims )
797 : {
798 0 : maxn = n;
799 0 : move16();
800 : }
801 :
802 0 : assert( ( maxn % 4 ) == 0 );
803 0 : N34 = mult( maxn, 24576 /*0.75f Q15*/ );
804 :
805 0 : start = 0;
806 0 : move16();
807 0 : if ( offs )
808 : {
809 0 : start = offs[s];
810 0 : move16();
811 : }
812 :
813 0 : set16_fx( Tmp, 0, start );
814 0 : set16_fx( Tmp + start + n, 0, sub( N, add( start, n ) ) );
815 :
816 : /* Set distortions to a large value */
817 0 : FOR( j = 0; j < maxC; j++ )
818 : {
819 0 : dist[1][j] = MAXINT32;
820 0 : move32();
821 : }
822 :
823 0 : FOR( j = 0; j < levels[s]; j++ )
824 : {
825 : /* Compute weighted codebook element and its energy */
826 0 : en = depack_mul_values_fx( Tmp + start, w + start, cbp, n );
827 :
828 0 : cbp += N34; /* pointer is incremented */
829 :
830 : /* Iterate over all parent nodes */
831 0 : FOR( c = 0; c < m; c++ )
832 : {
833 0 : pTmp = &resid[0][c * N];
834 : /*tmp = (*pTmp++) * Tmp[0];*/
835 0 : t1 = L_mult( pTmp[0], Tmp[0] );
836 :
837 0 : FOR( i = 1; i < N; i++ )
838 : {
839 0 : t1 = L_mac( t1, pTmp[i], Tmp[i] );
840 : }
841 :
842 : BASOP_SATURATE_WARNING_OFF_EVS
843 : /*NOTE: as long as a shorter distance is found, saturation can be accepted.*/
844 0 : tmp = L_add_sat( dist[0][c], L_sub_sat( en, L_shl( t1, 1 ) ) );
845 0 : t1 = L_sub_sat( tmp, dist[1][p_max] );
846 : BASOP_SATURATE_WARNING_ON_EVS
847 :
848 0 : IF( t1 <= 0 )
849 : {
850 : /* Replace worst */
851 0 : dist[1][p_max] = tmp;
852 0 : move32();
853 0 : indices[1][p_max * stages + s] = j;
854 0 : move16();
855 0 : add( 0, 0 );
856 0 : mult( 0, 0 );
857 0 : parents[p_max] = c;
858 0 : move16();
859 :
860 0 : p_max = ( *func_ptr )( dist[1] );
861 :
862 : } /*IF (L_sub(tmp,dist[1][p_max]) < 0) */
863 : } /* FOR (c=0; c<m; c++) */
864 : } /* FOR (j=0; j<levels[s]; j++) */
865 :
866 : /*------------------------------------------------------------*
867 : * Compute error vectors for each node
868 : *------------------------------------------------------------*/
869 0 : pTmp = resid[1];
870 0 : FOR( c = 0; c < maxC; c++ )
871 : {
872 : /* Subtract codebook entry from residual vector of parent node and multiply with scale factor */
873 0 : p1 = resid[0] + parents[c] * N;
874 0 : p2i = indices[1][c * stages + s];
875 0 : move16();
876 :
877 0 : Copy( p1, pTmp, start );
878 0 : depack_sub_values_fx( pTmp + start, p1 + start, &cb[s][p2i * N34], n );
879 0 : Copy( p1 + start + n, pTmp + start + n, sub( N, add( start, n ) ) );
880 :
881 0 : pTmp += N;
882 :
883 : /* Get indices that were used for parent node */
884 : /*mvs2s(indices[0]+parents[c]*stages, indices[1]+c*stages, s);*/
885 0 : Copy( indices[0] + parents[c] * stages, indices[1] + c * stages, s );
886 : } /* for (c=0; c<maxC; c++) */
887 0 : m = maxC;
888 0 : move16();
889 : } /* for (m=1, s=0; s<stages; s++) */
890 :
891 : /* Find the optimum candidate */
892 0 : c2 = findIndexOfMinWord32( dist[1], maxC );
893 : /*mvi2i (indices[1]+c2*stages, Idx, stages);*/
894 0 : Copy( indices[1] + c2 * stages, Idx, stages );
895 :
896 :
897 0 : return;
898 : }
899 :
900 34888 : void msvq_enc_lsf_fx64(
901 : const Word16 *const *cb, /* i : Codebook (indexed cb[*stages][levels][p]) (10Q5 * 1.28) */
902 : const Word16 dims[], /* i : Dimension of each codebook stage (NULL: full dim.) */
903 : const Word16 offs[], /* i : Starting dimension of each codebook stage (NULL: 0) */
904 : const Word16 u[], /* i : Vector to be encoded (prediction and mean removed)(Q14Q1*1.28) */
905 : const Word16 *levels, /* i : Number of levels in each stage */
906 : const Word16 maxC, /* i : Tree search size (number of candidates kept from */
907 : /* one stage to the next == M-best) */
908 : const Word16 stages, /* i : Number of stages */
909 : const Word16 w[], /* i : Weights Q8*/
910 : const Word16 N, /* i : Vector dimension */
911 : const Word16 maxN, /* i : Codebook dimension */
912 : Word16 Idx[] /* o : Indices */
913 : )
914 : {
915 : Word16 j;
916 : const Word16 *cbp;
917 : Word16 p2i;
918 : Word16 resid_buf[2 * LSFMBEST_MAX * M_MAX], *resid[2];
919 : Word16 *pTmp, *p1;
920 : Word16 *indices[2], m, s, c, c2, p_max, i;
921 : Word32 Tmp32[M_MAX];
922 : Word16 idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX];
923 : Word64 *dist_64[2], en64, tmp64;
924 : Word64 dist_buf_64[2 * LSFMBEST_MAX];
925 : Word16 ( *func_ptr64 )( Word64 * );
926 : Word16 N34;
927 : Word16 n, maxn, start;
928 :
929 : /*----------------------------------------------------------------*
930 : * Allocate memory for previous (parent) and current nodes.
931 : * Parent node is indexed [0], current node is indexed [1].
932 : *----------------------------------------------------------------*/
933 34888 : indices[0] = idx_buf;
934 34888 : indices[1] = idx_buf + maxC * stages; /*move16();*/
935 : /*vr_iset(0, idx_buf, 2*stages*maxC);*/
936 34888 : set16_fx( idx_buf, 0, (Word16) ( 2 * stages * maxC ) );
937 :
938 34888 : resid[0] = resid_buf;
939 34888 : resid[1] = resid_buf + maxC * N; /*move16();*/
940 :
941 34888 : dist_64[0] = dist_buf_64;
942 34888 : dist_64[1] = dist_buf_64 + maxC; /*move16();*/
943 :
944 : /*vr_iset(0, parents, maxC);*/
945 34888 : set16_fx( parents, 0, maxC );
946 :
947 :
948 34888 : func_ptr64 = msvq_enc_find_p_max_6_fx64;
949 34888 : move16();
950 34888 : if ( EQ_16( maxC, 8 ) )
951 : {
952 34888 : func_ptr64 = msvq_enc_find_p_max_8_fx64;
953 34888 : move16();
954 : }
955 :
956 : /*----------------------------------------------------------------*
957 : * LSF weights are normalized, so it is always better to multiply it first
958 : * Set up inital distance vector
959 : *----------------------------------------------------------------*/
960 : /* Q0/16 * Qw_norm/16 << 1 >> 16 => Qwnorm-15/16 * Q0/16 << 1 => Qwnorm-14/32 * 6.5536 */
961 : Word64 ss2_64;
962 34888 : ss2_64 = W_mult_32_16( L_mult0( u[0], w[0] ), u[0] );
963 : // Q8 * Q2.56 * 2.56 * Q1
964 558208 : FOR( j = 1; j < N; j++ )
965 : {
966 523320 : ss2_64 = W_mac_32_16( ss2_64, L_mult0( u[j], w[j] ), u[j] );
967 : }
968 :
969 : /* Set up inital error (residual) vectors */
970 34888 : pTmp = resid[1]; /*move16();*/
971 313992 : FOR( c = 0; c < maxC; c++ )
972 : {
973 279104 : Copy( u, pTmp + c * N, N );
974 279104 : dist_64[1][c] = ss2_64;
975 279104 : move64();
976 : }
977 :
978 : /* Loop over all stages */
979 34888 : m = 1;
980 34888 : move16();
981 104664 : FOR( s = 0; s < stages; s++ )
982 : {
983 : /* codebook pointer is set to point to first stage */
984 69776 : cbp = cb[s]; /*3Q12*1.28*/
985 69776 : move16();
986 :
987 : /* Set up pointers to parent and current nodes */
988 69776 : swap( indices[0], indices[1], Word16 * );
989 69776 : move16();
990 69776 : move16();
991 69776 : move16();
992 69776 : move16();
993 69776 : swap( resid[0], resid[1], Word16 * );
994 69776 : move16();
995 69776 : move16();
996 69776 : move16();
997 69776 : swap( dist_64[0], dist_64[1], Word64 * );
998 69776 : move64();
999 69776 : move64();
1000 69776 : move64();
1001 :
1002 : /* p_max points to maximum distortion node (worst of best) */
1003 69776 : p_max = 0;
1004 69776 : move16();
1005 :
1006 69776 : n = N;
1007 69776 : move16();
1008 69776 : maxn = maxN;
1009 69776 : move16();
1010 69776 : if ( dims )
1011 : {
1012 69776 : n = dims[s];
1013 69776 : move16();
1014 : }
1015 69776 : if ( dims )
1016 : {
1017 69776 : maxn = n;
1018 69776 : move16();
1019 : }
1020 :
1021 69776 : assert( ( maxn % 4 ) == 0 );
1022 69776 : N34 = mult( maxn, 24576 /*0.75f Q15*/ );
1023 :
1024 69776 : start = 0;
1025 69776 : move16();
1026 69776 : if ( offs )
1027 : {
1028 69776 : start = offs[s];
1029 69776 : move16();
1030 : }
1031 :
1032 69776 : set32_fx( Tmp32, 0, start );
1033 69776 : set32_fx( Tmp32 + start + n, 0, sub( N, add( start, n ) ) );
1034 :
1035 : /* Set distortions to a large value */
1036 627984 : FOR( j = 0; j < maxC; j++ )
1037 : {
1038 558208 : dist_64[1][j] = LLONG_MAX;
1039 558208 : move64();
1040 : }
1041 :
1042 1255968 : FOR( j = 0; j < levels[s]; j++ )
1043 : {
1044 : /* Compute weighted codebook element and its energy */
1045 1186192 : en64 = depack_mul_values_fx64( Tmp32 + start, w + start, cbp, n ); // Q8
1046 : // en64: Q8 * Q2.56 * Q2.56 * q1
1047 : // Tmp: 2.56 * Q8
1048 :
1049 1186192 : cbp += N34; /* pointer is incremented */
1050 :
1051 : /* Iterate over all parent nodes */
1052 6279840 : FOR( c = 0; c < m; c++ )
1053 : {
1054 5093648 : pTmp = &resid[0][c * N]; // this resid buffer is initial lsf values
1055 : /*tmp = (*pTmp++) * Tmp[0];*/
1056 5093648 : Word64 t164 = 0;
1057 5093648 : move64();
1058 5093648 : t164 = W_mult_32_16( Tmp32[0], pTmp[0] ); // 2.56 * Q8 * Q2.56 * Q1
1059 : // Tmp32: Q8 * Q2.56
1060 81498368 : FOR( i = 1; i < N; i++ )
1061 : {
1062 76404720 : t164 = W_mac_32_16( t164, Tmp32[i], pTmp[i] ); // 2.56 * Q8 * Q2.56 * Q1
1063 : }
1064 :
1065 5093648 : tmp64 = W_add( dist_64[0][c], W_sub( en64, W_shl( t164, 1 ) ) );
1066 5093648 : t164 = W_sub( tmp64, dist_64[1][p_max] );
1067 5093648 : IF( t164 <= 0 )
1068 : {
1069 : /* Replace worst */
1070 1482366 : dist_64[1][p_max] = tmp64;
1071 1482366 : move64();
1072 1482366 : indices[1][p_max * stages + s] = j;
1073 1482366 : move16();
1074 1482366 : parents[p_max] = c;
1075 1482366 : move16();
1076 :
1077 1482366 : p_max = ( *func_ptr64 )( dist_64[1] );
1078 :
1079 : } /*IF (L_sub(tmp,dist[1][p_max]) < 0) */
1080 : } /* FOR (c=0; c<m; c++) */
1081 : } /* FOR (j=0; j<levels[s]; j++) */
1082 :
1083 : /*------------------------------------------------------------*
1084 : * Compute error vectors for each node
1085 : *------------------------------------------------------------*/
1086 69776 : pTmp = resid[1];
1087 627984 : FOR( c = 0; c < maxC; c++ )
1088 : {
1089 : /* Subtract codebook entry from residual vector of parent node and multiply with scale factor */
1090 558208 : p1 = resid[0] + parents[c] * N;
1091 558208 : p2i = indices[1][c * stages + s];
1092 558208 : move16();
1093 :
1094 558208 : Copy( p1, pTmp, start );
1095 558208 : depack_sub_values_fx( pTmp + start, p1 + start, &cb[s][p2i * N34], n );
1096 558208 : Copy( p1 + start + n, pTmp + start + n, sub( N, add( start, n ) ) );
1097 :
1098 558208 : pTmp += N;
1099 :
1100 : /* Get indices that were used for parent node */
1101 : /*mvs2s(indices[0]+parents[c]*stages, indices[1]+c*stages, s);*/
1102 558208 : Copy( indices[0] + parents[c] * stages, indices[1] + c * stages, s );
1103 : } /* for (c=0; c<maxC; c++) */
1104 69776 : m = maxC;
1105 69776 : move16();
1106 : } /* for (m=1, s=0; s<stages; s++) */
1107 :
1108 : /* Find the optimum candidate */
1109 34888 : c2 = findIndexOfMinWord64( dist_64[1], maxC );
1110 : /*mvi2i (indices[1]+c2*stages, Idx, stages);*/
1111 34888 : Copy( indices[1] + c2 * stages, Idx, stages );
1112 :
1113 :
1114 34888 : return;
1115 : }
1116 : /*--------------------------------------------------------------------------*
1117 : * msvq_enc_ivas_fx()
1118 : *
1119 : * MSVQ encoder
1120 : *--------------------------------------------------------------------------*/
1121 :
1122 93935 : void msvq_enc_ivas_fx(
1123 : const Word16 *const *cb, /* i : Codebook (indexed cb[*stages][levels][p]) Q_cb */
1124 : const Word16 Q_cb, /* i : Codebook Q */
1125 : const Word16 dims[], /* i : Dimension of each codebook stage (NULL: full dim.) */
1126 : const Word16 offs[], /* i : Starting dimension of each codebook stage (NULL: 0) */
1127 : const Word32 u_fx[], /* i : Vector to be encoded (prediction and mean removed) (exp : u_e) */
1128 : const Word16 u_e, /* i : Exponent for Vector to be encoded */
1129 : const Word16 *levels, /* i : Number of levels in each stage */
1130 : const Word16 maxC, /* i : Tree search size (number of candidates kept from from one stage to the next == M-best) */
1131 : const Word16 stages, /* i : Number of stages */
1132 : const Word16 w[], /* i : Weights Q8 */
1133 : const Word16 N, /* i : Vector dimension */
1134 : const Word16 maxN, /* i : Codebook dimension */
1135 : const Word16 applyDCT_flag, /* i : applyDCT flag */
1136 : Word32 *invTrfMatrix_fx, /* i/o: synthesis matrix Q31 */
1137 : Word16 Idx[] /* o : Indices */
1138 : )
1139 : {
1140 : Word16 j;
1141 : const Word16 *cbp, *cb_stage;
1142 : Word32 resid_buf_fx[2 * LSFMBEST_MAX * M_MAX], *resid_fx[2];
1143 : Word32 *pTmp, *p1, *p2; // pTmp_e
1144 : Word16 pTmp_e;
1145 : Word16 *indices[2], m, s, c, c2, p_max, i;
1146 : Word16 idx_buf[2 * LSFMBEST_MAX * MAX_VQ_STAGES_USED], parents[LSFMBEST_MAX];
1147 : Word32 dist_buf_fx[2 * LSFMBEST_MAX], *dist_fx[2], tmp, en, ss2, Tmp[M_MAX];
1148 : Word16 dist_buf_e[2 * LSFMBEST_MAX], *dist_e[2];
1149 : Word16 tmp_e, tmp_n, en_e;
1150 : Word16 resid_e;
1151 : Word16 n, maxn, start;
1152 : Word64 W_acc; /*64 bit accumulator*/
1153 :
1154 93935 : Word32 *st1_syn_vec_ptr_fx = &( resid_buf_fx[1 * LSFMBEST_MAX * M_MAX] ) - FDCNG_VQ_MAX_LEN * maxC;
1155 93935 : Word32 *st1_mse_ptr_fx = &( resid_buf_fx[1 * LSFMBEST_MAX * M_MAX] ) - ( levels[0] );
1156 : Word16 indices_st1_local[FDCNG_VQ_DCT_NSEGM * 2];
1157 :
1158 : /*----------------------------------------------------------------*
1159 : * Allocate memory for previous (parent) and current nodes.
1160 : * Parent node is indexed [0], current node is indexed [1].
1161 : *----------------------------------------------------------------*/
1162 93935 : indices[0] = idx_buf;
1163 93935 : indices[1] = idx_buf + maxC * stages; /*move16();*/
1164 : /*vr_iset(0, idx_buf, 2*stages*maxC);*/
1165 93935 : set16_fx( idx_buf, 0, (Word16) ( 2 * stages * maxC ) );
1166 :
1167 93935 : resid_fx[0] = resid_buf_fx;
1168 93935 : resid_fx[1] = resid_buf_fx + maxC * N; /*move16();*/
1169 :
1170 93935 : dist_fx[0] = dist_buf_fx;
1171 93935 : dist_e[0] = dist_buf_e;
1172 93935 : dist_fx[1] = dist_buf_fx + maxC;
1173 93935 : dist_e[1] = dist_buf_e + maxC;
1174 :
1175 : /*vr_iset(0, parents, maxC);*/
1176 93935 : set16_fx( parents, 0, maxC );
1177 :
1178 : /*----------------------------------------------------------------*
1179 : * LSF weights are normalized, so it is always better to multiply it first
1180 : * Set up inital distance vector
1181 : *----------------------------------------------------------------*/
1182 93935 : W_acc = W_mult_32_32( Mpy_32_16_1( u_fx[0], shl( w[0], 2 ) ), u_fx[0] ); // 2*Qu - 6 + 1
1183 1522643 : FOR( j = 1; j < N; j++ )
1184 : {
1185 1428708 : W_acc = W_mac_32_32( W_acc, Mpy_32_16_1( u_fx[j], shl( w[j], 2 ) ), u_fx[j] ); // 2*Qu - 6 + 1
1186 : }
1187 :
1188 93935 : tmp_n = W_norm( W_acc );
1189 93935 : ss2 = W_extract_h( W_shl( W_acc, tmp_n ) );
1190 93935 : tmp_e = sub( add( shl( u_e, 1 ), 5 ), tmp_n );
1191 :
1192 : /* Set up inital error (residual) vectors */
1193 93935 : pTmp = resid_fx[1]; /*move16();*/
1194 93935 : resid_e = u_e;
1195 93935 : move16();
1196 93935 : IF( applyDCT_flag != 0 )
1197 : {
1198 2742 : resid_e = s_max( u_e, 12 );
1199 : }
1200 389450 : FOR( c = 0; c < maxC; c++ )
1201 : {
1202 295515 : Copy32( u_fx, pTmp + c * N, N );
1203 295515 : test();
1204 295515 : IF( applyDCT_flag != 0 && LT_16( u_e, 12 ) )
1205 : {
1206 21448 : scale_sig32( pTmp + c * N, N, sub( u_e, resid_e ) );
1207 : }
1208 295515 : dist_fx[1][c] = ss2;
1209 295515 : move32();
1210 295515 : dist_e[1][c] = tmp_e;
1211 295515 : move16();
1212 : }
1213 :
1214 : /* Loop over all stages */
1215 93935 : m = 1;
1216 93935 : move16();
1217 397513 : FOR( s = 0; s < stages; s++ )
1218 : {
1219 : /* codebook pointer is set to point to first stage */
1220 303578 : cbp = cb[s]; /*Q_cb*/
1221 303578 : cb_stage = cbp;
1222 :
1223 : /* Set up pointers to parent and current nodes */
1224 303578 : swap( indices[0], indices[1], Word16 * );
1225 303578 : move16();
1226 303578 : move16();
1227 303578 : move16();
1228 303578 : swap( resid_fx[0], resid_fx[1], Word32 * );
1229 303578 : move32();
1230 303578 : move32();
1231 303578 : move32();
1232 303578 : swap( dist_fx[0], dist_fx[1], Word32 * );
1233 303578 : swap( dist_e[0], dist_e[1], Word16 * );
1234 303578 : move32();
1235 303578 : move32();
1236 303578 : move32();
1237 303578 : move16();
1238 303578 : move16();
1239 303578 : move16();
1240 :
1241 : /* p_max points to maximum distortion node (worst of best) */
1242 303578 : p_max = 0;
1243 303578 : move16();
1244 :
1245 303578 : n = N;
1246 303578 : move16();
1247 303578 : maxn = maxN;
1248 303578 : move16();
1249 303578 : if ( dims )
1250 : {
1251 0 : n = dims[s];
1252 0 : move16();
1253 : }
1254 303578 : if ( dims )
1255 : {
1256 0 : maxn = n;
1257 0 : move16();
1258 : }
1259 :
1260 303578 : assert( ( maxn % 4 ) == 0 );
1261 :
1262 303578 : start = 0;
1263 303578 : move16();
1264 303578 : if ( offs )
1265 : {
1266 0 : start = offs[s];
1267 0 : move16();
1268 : }
1269 :
1270 303578 : set32_fx( Tmp, 0, start );
1271 303578 : set32_fx( Tmp + start + n, 0, sub( N, add( start, n ) ) );
1272 :
1273 : /* Set distortions to a large value */
1274 1292412 : FOR( j = 0; j < maxC; j++ )
1275 : {
1276 988834 : dist_fx[1][j] = MAX_32;
1277 988834 : move32();
1278 988834 : dist_e[1][j] = MAX_16 / 2;
1279 988834 : move16();
1280 : }
1281 :
1282 303578 : test();
1283 303578 : IF( !s && applyDCT_flag != 0 ) /* means: m==1 */
1284 : {
1285 : /* stage 1 candidates search in truncated dct24 domain without any weights */
1286 2742 : assert( N == FDCNG_VQ_MAX_LEN || N == FDCNG_VQ_MAX_LEN_WB ); /* 21 and 24 allowed */
1287 2742 : assert( maxC == 2 * FDCNG_VQ_DCT_NSEGM );
1288 2742 : p_max = msvq_stage1_dct_search_fx( u_fx, u_e, FDCNG_VQ_MAX_LEN, maxC, DCT_T2_24_XX, FDCNG_VQ_DCT_MAXTRUNC, (Word32 *) invTrfMatrix_fx, cdk1r_tr_midQ_truncQ_fx, fdcng_dct_scaleF_fx, FDCNG_VQ_DCT_NSEGM,
1289 : cdk1_ivas_cols_per_segment, cdk1_ivas_trunc_dct_cols_per_segment, cdk1_ivas_entries_per_segment, cdk1_ivas_cum_entries_per_segment, cdk_37bits_ivas_stage1_W8Qx_dct_sections,
1290 : stage1_dct_col_syn_shift, cdk1_ivas_segm_neighbour_fwd, cdk1_ivas_segm_neighbour_rev, FDCNG_VQ_DCT_NPOST, st1_mse_ptr_fx, indices_st1_local, st1_syn_vec_ptr_fx, dist_fx[1], &dist_e[1][0] );
1291 :
1292 : /* move established stage#1 indices to the global MSVQ list structure */
1293 2742 : set16_fx( dist_e[1], dist_e[1][0], maxC );
1294 24678 : FOR( c = 0; c < maxC; c++ )
1295 : {
1296 21936 : indices[1][c * stages] = indices_st1_local[c];
1297 21936 : move16();
1298 : }
1299 : }
1300 : ELSE
1301 : {
1302 17081348 : FOR( j = 0; j < levels[s]; j++ )
1303 : {
1304 : /* Compute weighted codebook element and its energy */
1305 16780512 : en = 0;
1306 16780512 : move32();
1307 16780512 : en_e = 0;
1308 16780512 : move16();
1309 16780512 : W_acc = 0;
1310 16780512 : move64();
1311 291209632 : FOR( c2 = 0; c2 < n; c2++ )
1312 : {
1313 274429120 : Tmp[start + c2] = L_mult0( shl( w[start + c2], 2 ), cbp[c2] );
1314 274429120 : move32();
1315 274429120 : W_acc = W_mac_32_16( W_acc, Tmp[start + c2], cbp[c2] );
1316 : }
1317 :
1318 16780512 : tmp_n = W_norm( W_acc );
1319 :
1320 16780512 : en = W_extract_h( W_shl( W_acc, tmp_n ) );
1321 16780512 : en_e = sub( sub( 52, shl( Q_cb, 1 ) ), tmp_n );
1322 :
1323 16780512 : cbp += maxn; /* pointer is incremented */
1324 :
1325 : /* Iterate over all parent nodes */
1326 54992576 : FOR( c = 0; c < m; c++ )
1327 : {
1328 38212064 : pTmp = &resid_fx[0][c * N];
1329 38212064 : pTmp_e = resid_e;
1330 38212064 : move16();
1331 : /*tmp = (*pTmp++) * Tmp[0];*/
1332 38212064 : W_acc = W_mult_32_32( pTmp[0], Tmp[0] );
1333 :
1334 658920448 : FOR( i = 1; i < N; i++ )
1335 : {
1336 620708384 : W_acc = W_mac_32_32( W_acc, pTmp[i], Tmp[i] );
1337 : }
1338 38212064 : tmp_n = W_norm( W_acc );
1339 38212064 : tmp = W_extract_h( W_shl( W_acc, tmp_n ) );
1340 38212064 : tmp_e = sub( add( pTmp_e, sub( Q31 - Q10, Q_cb ) ), tmp_n );
1341 :
1342 :
1343 38212064 : tmp_n = s_max( tmp_e, en_e );
1344 38212064 : tmp_n = s_max( dist_e[0][c], tmp_n );
1345 :
1346 38212064 : IF( NE_16( dist_e[0][c], MAX_16 / 2 ) )
1347 : {
1348 38212064 : tmp_n = add( tmp_n, 2 );
1349 38212064 : tmp = L_sub( L_shl( en, sub( en_e, tmp_n ) ), L_shl( tmp, add( sub( tmp_e, tmp_n ), 1 ) ) );
1350 38212064 : tmp = L_add( tmp, L_shl( dist_fx[0][c], sub( dist_e[0][c], tmp_n ) ) );
1351 : }
1352 : ELSE
1353 : {
1354 0 : tmp = MAX_32 - 1;
1355 0 : move32();
1356 0 : tmp_n = MAX_16 / 2;
1357 0 : move32();
1358 : }
1359 :
1360 38212064 : IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( tmp, tmp_n, dist_fx[1][p_max], dist_e[1][p_max] ), -1 ) )
1361 : {
1362 : /* Replace worst */
1363 4551620 : dist_fx[1][p_max] = tmp;
1364 4551620 : move32();
1365 4551620 : dist_e[1][p_max] = tmp_n;
1366 4551620 : move16();
1367 4551620 : indices[1][p_max * stages + s] = j;
1368 4551620 : move16();
1369 4551620 : parents[p_max] = c;
1370 4551620 : move16();
1371 :
1372 4551620 : p_max = 0;
1373 4551620 : move16();
1374 4551620 : tmp_e = p_max;
1375 4551620 : move16();
1376 :
1377 4551620 : tmp_n = dist_e[1][0];
1378 4551620 : move16();
1379 16521740 : FOR( c2 = 1; c2 < maxC; c2++ )
1380 : {
1381 11970120 : if ( EQ_16( BASOP_Util_Cmp_Mant32Exp( dist_fx[1][c2], dist_e[1][c2], dist_fx[1][p_max], dist_e[1][p_max] ), 1 ) )
1382 : {
1383 4535435 : p_max = c2;
1384 4535435 : move16();
1385 : }
1386 11970120 : test();
1387 11970120 : if ( GT_16( dist_e[1][c2], tmp_n ) && NE_16( dist_e[1][c2], MAX_16 / 2 ) )
1388 : {
1389 206775 : tmp_n = dist_e[1][c2];
1390 206775 : move16();
1391 : }
1392 : }
1393 21073360 : FOR( c2 = 0; c2 < maxC; c2++ )
1394 : {
1395 16521740 : IF( NE_16( dist_e[1][c2], MAX_16 / 2 ) )
1396 : {
1397 15297282 : dist_fx[1][c2] = L_shl( dist_fx[1][c2], sub( dist_e[1][c2], tmp_n ) );
1398 15297282 : move32();
1399 15297282 : dist_e[1][c2] = tmp_n;
1400 15297282 : move16();
1401 : }
1402 : }
1403 : } /*IF (L_sub(tmp,dist[1][p_max]) < 0) */
1404 : } /* FOR (c=0; c<m; c++) */
1405 : } /* FOR (j=0; j<levels[s]; j++) */
1406 : }
1407 :
1408 :
1409 : /*------------------------------------------------------------*
1410 : * Compute error vectors for each node
1411 : *------------------------------------------------------------*/
1412 303578 : pTmp = resid_fx[1];
1413 1292412 : FOR( c = 0; c < maxC; c++ )
1414 : {
1415 :
1416 : /* Subtract codebook entry from residual vector of parent node and multiply with scale factor */
1417 988834 : p1 = resid_fx[0] + parents[c] * N;
1418 988834 : p2 = NULL;
1419 988834 : IF( cb_stage != NULL )
1420 : {
1421 : // p2 = cb_stage + ( indices[1][c * stages + s] ) * maxn; /* regular ptr init */
1422 966898 : Copy_Scale_sig_16_32_DEPREC( cb_stage + ( indices[1][c * stages + s] ) * maxn, Tmp, N, 0 );
1423 966898 : scale_sig32( Tmp, N, sub( sub( Q31, Q_cb ), resid_e ) );
1424 966898 : p2 = Tmp;
1425 : }
1426 988834 : test();
1427 988834 : IF( s == 0 && applyDCT_flag != 0 )
1428 : {
1429 21936 : p2 = (Word32 *) &( st1_syn_vec_ptr_fx[c * FDCNG_VQ_MAX_LEN] ); /*ptr init of stage 1 */
1430 : }
1431 :
1432 988834 : Copy32( p1, pTmp, start );
1433 17710258 : FOR( j = 0; j < n; j++ )
1434 : {
1435 16721424 : pTmp[start + j] = L_sub( p1[start + j], p2[j] );
1436 16721424 : move32();
1437 : }
1438 988834 : Copy32( p1 + start + n, pTmp + start + n, sub( N, add( start, n ) ) );
1439 :
1440 988834 : pTmp += N;
1441 :
1442 : /* Get indices that were used for parent node */
1443 : /*mvs2s(indices[0]+parents[c]*stages, indices[1]+c*stages, s);*/
1444 988834 : Copy( indices[0] + parents[c] * stages, indices[1] + c * stages, s );
1445 : } /* for (c=0; c<maxC; c++) */
1446 : /* recalc MSE for WB(0..20) coeffs ,
1447 : essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search,
1448 : excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep WB MSEs update for the subsequent stages
1449 : */
1450 : /* recalc MSE for WB(0..20) coeffs ,
1451 : essentially subtract res21^2 ,res22^2, res23^2 that was included in stage1 MSE in the DCT24 domain truncated search,
1452 : excludes the waveform contributions at pos 21,22,23 to the MSE, important to keep WB MSEs update for the subsequent stages
1453 : */
1454 303578 : test();
1455 303578 : IF( s == 0 && applyDCT_flag != 0 && n == FDCNG_VQ_MAX_LEN_WB )
1456 : {
1457 751 : p_max = msvq_stage1_dct_recalc_candidates_fdcng_wb_fx( st1_syn_vec_ptr_fx, resid_e, u_fx, u_e, maxC, dist_fx[1], &dist_e[1][0] );
1458 751 : set16_fx( dist_e[1], dist_e[1][0], maxC );
1459 : }
1460 303578 : m = maxC;
1461 303578 : move16();
1462 : } /* for (m=1, s=0; s<stages; s++) */
1463 :
1464 : /* Find the optimum candidate */
1465 93935 : c2 = minimum_32_fx( dist_fx[1], maxC, NULL );
1466 : /*mvi2i (indices[1]+c2*stages, Idx, stages);*/
1467 93935 : Copy( indices[1] + c2 * stages, Idx, stages );
1468 :
1469 :
1470 93935 : return;
1471 : }
1472 :
1473 :
1474 : /*--------------------------------------------------------------------------*
1475 : * lsf_msvq_ma_encprm_fx()
1476 : *
1477 : *
1478 : *--------------------------------------------------------------------------*/
1479 :
1480 976 : Word16 lsf_msvq_ma_encprm_fx(
1481 : BSTR_ENC_HANDLE hBstr,
1482 : Word16 *param_lpc, // Q0
1483 : Word16 core,
1484 : Word16 acelp_mode,
1485 : Word16 acelp_midLpc,
1486 : Word16 *bits_param_lpc,
1487 : Word16 no_indices )
1488 : {
1489 : Word16 i, nbits_lpc;
1490 : Word16 bits_midlpc;
1491 :
1492 976 : bits_midlpc = MIDLSF_NBITS;
1493 976 : move16();
1494 976 : nbits_lpc = 0;
1495 976 : move16();
1496 :
1497 3989 : FOR( i = 0; i < no_indices; i++ )
1498 : {
1499 :
1500 3013 : push_next_indice( hBstr, *param_lpc, bits_param_lpc[i] );
1501 3013 : param_lpc++;
1502 3013 : nbits_lpc = add( nbits_lpc, bits_param_lpc[i] );
1503 : }
1504 976 : IF( NE_16( acelp_mode, VOICED ) )
1505 : {
1506 722 : test();
1507 722 : IF( ( core == ACELP_CORE ) && acelp_midLpc )
1508 : {
1509 :
1510 342 : push_next_indice( hBstr, *param_lpc, bits_midlpc );
1511 342 : nbits_lpc = add( nbits_lpc, bits_midlpc );
1512 : }
1513 : }
1514 :
1515 976 : return nbits_lpc;
1516 : }
1517 :
1518 :
1519 112129 : Word16 lsf_msvq_ma_encprm_ivas_fx(
1520 : BSTR_ENC_HANDLE hBstr,
1521 : const Word16 *param_lpc, // Q0
1522 : const Word16 core,
1523 : const Word16 acelp_mode,
1524 : const Word16 acelp_midLpc,
1525 : const Word16 *bits_param_lpc,
1526 : const Word16 no_indices )
1527 : {
1528 : Word16 i, nbits_lpc;
1529 : Word16 bits_midlpc;
1530 :
1531 112129 : bits_midlpc = MIDLSF_NBITS;
1532 112129 : move16();
1533 112129 : nbits_lpc = 0;
1534 112129 : move16();
1535 :
1536 461085 : FOR( i = 0; i < no_indices; i++ )
1537 : {
1538 :
1539 348956 : push_next_indice( hBstr, *param_lpc, bits_param_lpc[i] );
1540 348956 : param_lpc++;
1541 348956 : nbits_lpc = add( nbits_lpc, bits_param_lpc[i] );
1542 : }
1543 112129 : IF( NE_16( acelp_mode, VOICED ) )
1544 : {
1545 88354 : test();
1546 88354 : IF( ( core == ACELP_CORE ) && acelp_midLpc )
1547 : {
1548 :
1549 0 : push_next_indice( hBstr, *param_lpc, bits_midlpc );
1550 0 : nbits_lpc = add( nbits_lpc, bits_midlpc );
1551 : }
1552 : }
1553 :
1554 112129 : return nbits_lpc;
1555 : }
1556 :
1557 :
1558 : /*--------------------------------------------------------------------------*
1559 : * midlsf_enc_fx()
1560 : *
1561 : *
1562 : *--------------------------------------------------------------------------*/
1563 :
1564 708 : void midlsf_enc_fx(
1565 : const Word16 qlsf0[], /* i: quantized lsf coefficients (3Q12) */
1566 : const Word16 qlsf1[], /* i: quantized lsf coefficients (3Q12) */
1567 : const Word16 lsf[], /* i: lsf coefficients (3Q12) */
1568 : Word16 *idx, /* o: codebook index */
1569 : const Word16 lpcorder, /* i: order of the lpc */
1570 : const Word32 *Bin_Ener_128_fx, // Q_ener
1571 : const Word16 Q_ener,
1572 : const Word8 narrowBand,
1573 : const Word32 sr_core,
1574 : const Word16 coder_type )
1575 : {
1576 : Word32 err, err_min, L_tmp;
1577 : Word16 k, k1, j, tmp, size, qlsf[M], wghts[M];
1578 : const Word16 *ratio;
1579 :
1580 708 : IF( EQ_16( coder_type, UNVOICED ) )
1581 : {
1582 27 : ratio = tbl_mid_unv_wb_5b_fx;
1583 : }
1584 : ELSE
1585 : {
1586 681 : ratio = tbl_mid_gen_wb_5b_fx;
1587 : }
1588 708 : size = 32;
1589 708 : move16();
1590 :
1591 : /* Weights */
1592 708 : Unified_weighting_fx(
1593 : Bin_Ener_128_fx, /* i : FFT Bin energy 128 bins in two sets Q_ener */
1594 : Q_ener,
1595 : lsf, /* i : LSF vector x2.56 */
1596 : wghts, /* o : LP weighting filter (numerator) Q8 */
1597 : narrowBand, /* i : flag for Narrowband */
1598 708 : sub( coder_type, UNVOICED ) == 0, /* i : flag for Unvoiced frame */
1599 : sr_core, /* i : sampling rate of core-coder */
1600 : lpcorder /* i : LP order */
1601 : );
1602 708 : err_min = MAXINT32;
1603 708 : move16();
1604 708 : *idx = 0;
1605 708 : move16();
1606 708 : k1 = 0;
1607 708 : move16();
1608 23364 : FOR( k = 0; k < size; k++ )
1609 : {
1610 22656 : err = L_deposit_l( 0 );
1611 :
1612 385152 : FOR( j = 0; j < M; j++ )
1613 : {
1614 : /* qlsf[j] = (1.0f - ratio[k*M+j]) * qlsf0[j] + ratio[k*M+j] * qlsf1[j]; */
1615 362496 : L_tmp = L_mult( sub( 0x2000, ratio[k1 + j] ), qlsf0[j] );
1616 362496 : L_tmp = L_mac( L_tmp, ratio[k1 + j], qlsf1[j] );
1617 362496 : qlsf[j] = round_fx( L_shl( L_tmp, 2 ) );
1618 362496 : test();
1619 362496 : test();
1620 362496 : IF( j > 0 && LT_16( j, M ) && LT_16( qlsf[j], add( qlsf[j - 1], LSF_GAP_MID_FX ) ) )
1621 : {
1622 1949 : qlsf[j] = add( qlsf[j - 1], LSF_GAP_MID_FX );
1623 : }
1624 :
1625 362496 : tmp = sub( lsf[j], qlsf[j] );
1626 : /* err += wghts[j] * ftemp * ftemp; */
1627 : /* tmp is usually very small, we can have some extra precision with very rare saturation */
1628 362496 : tmp = shl_sat( tmp, 4 );
1629 362496 : tmp = mult_r_sat( tmp, tmp );
1630 362496 : err = L_mac( err, tmp, wghts[j] );
1631 : }
1632 22656 : err = L_shl_sat( err, 2 );
1633 :
1634 : /* err = L_shl(err,Wscale); */
1635 22656 : err = Mult_32_16( err, LSF_1_OVER_256SQ );
1636 : /* err = Mult_32_16(err,Wmult); */
1637 :
1638 22656 : IF( LT_32( err, err_min ) )
1639 : {
1640 2854 : err_min = L_add( err, 0 );
1641 2854 : *idx = k;
1642 2854 : move16();
1643 : }
1644 22656 : k1 += M;
1645 22656 : move16();
1646 : }
1647 :
1648 708 : return;
1649 : }
1650 :
1651 :
1652 : /*--------------------------------------------------------------------------*
1653 : * Q_lsf_tcxlpc_fx()
1654 : *
1655 : * Returns: number of indices
1656 : *--------------------------------------------------------------------------*/
1657 :
1658 0 : Word16 Q_lsf_tcxlpc_fx(
1659 : /* const */ Word16 lsf[], /* i : original lsf 14Q1 * 1.28 */
1660 : Word16 lsf_q[], /* o : quantized lsf (14Q1*1.28)*/
1661 : Word16 lsp_q_ind[], /* o : quantized lsp (w/o MA prediction) */
1662 : Word16 indices[], /* o : VQ indices */
1663 : const Word16 lpcorder, /* i : LPC order */
1664 : const Word16 narrowband, /* i : narrowband flag */
1665 : const Word16 cdk, /* i : codebook selector */
1666 : const Word16 mem_MA[], /* i : MA memory */
1667 : const Word16 coder_type,
1668 : const Word32 *Bin_Ener, // Q_ener
1669 : const Word16 Q_ener )
1670 : {
1671 : Word16 weights[M + 1];
1672 : Word16 pred[M16k];
1673 : Word16 i;
1674 : Word16 NumIndices;
1675 : Word16 lsf_q_ind[M16k];
1676 : const Word16 *means;
1677 : Word16 lsf_rem[M];
1678 : Word16 lsf_rem_q_ind[M];
1679 :
1680 0 : Unified_weighting_fx( Bin_Ener, Q_ener, lsf, weights, narrowband, (Word16) EQ_16( coder_type, UNVOICED ), 12800, M );
1681 :
1682 0 : move16();
1683 0 : NumIndices = 0;
1684 :
1685 : /* Put disabled flag */
1686 0 : indices[NumIndices] = 0;
1687 0 : move16();
1688 0 : NumIndices = add( NumIndices, 1 );
1689 :
1690 : /* Inter-frame prediction */
1691 :
1692 0 : means = lsf_means[narrowband]; /* 14Q1 * 1.28 */
1693 :
1694 0 : FOR( i = 0; i < lpcorder; ++i )
1695 : {
1696 0 : pred[i] = add( means[i], mult_r( MU_MA_FX, mem_MA[i] ) ); /* 14Q1 * 1.28 + ( 14Q1 * 1.28 * Q15 ) = 14Q1 * 1.28*/
1697 : }
1698 :
1699 : /* Subtract prediction */
1700 :
1701 0 : FOR( i = 0; i < lpcorder; ++i )
1702 : {
1703 0 : lsf[i] = sub( lsf[i], pred[i] ); /* 14Q1 * 1.28 */
1704 : }
1705 :
1706 :
1707 0 : msvq_enc_fx(
1708 0 : lsf_codebook[narrowband][cdk],
1709 : lsf_dims,
1710 : lsf_offs,
1711 : lsf,
1712 : lsf_numlevels,
1713 : kMaxC,
1714 : TCXLPC_NUMSTAGES,
1715 : weights,
1716 : lpcorder,
1717 : lpcorder,
1718 0 : indices + NumIndices );
1719 0 : msvq_dec(
1720 0 : lsf_codebook[narrowband][cdk],
1721 : lsf_dims,
1722 : lsf_offs,
1723 : TCXLPC_NUMSTAGES,
1724 : lpcorder,
1725 : lpcorder,
1726 0 : indices + NumIndices,
1727 : lsf_q );
1728 0 : NumIndices = add( NumIndices, TCXLPC_NUMSTAGES );
1729 :
1730 0 : FOR( i = 0; i < lpcorder; ++i )
1731 : {
1732 0 : lsf_q_ind[i] = lsf_q[i]; /*(14Q1*1.28)*/
1733 0 : move16();
1734 : }
1735 :
1736 : /* Update flag */
1737 0 : indices[0] = lsf_ind_is_active( lsf_q_ind, lsf_means[narrowband], narrowband, cdk );
1738 0 : move16();
1739 :
1740 : /* Get residual vector */
1741 0 : FOR( i = 0; i < lpcorder; ++i )
1742 : {
1743 0 : lsf_rem[i] = add( sub( pred[i], lsf_means[narrowband][i] ), sub( lsf[i], lsf_q_ind[i] ) );
1744 : }
1745 :
1746 : /* Quantize using extra stage(s) */
1747 0 : msvq_enc_fx(
1748 0 : lsf_ind_codebook[narrowband][cdk],
1749 : lsf_ind_dims,
1750 : lsf_ind_offs,
1751 : lsf_rem,
1752 : lsf_ind_numlevels,
1753 : kMaxC,
1754 : TCXLPC_IND_NUMSTAGES,
1755 : weights,
1756 : lpcorder,
1757 : lpcorder,
1758 0 : indices + NumIndices );
1759 : /* Only add contribution if flag is enabled */
1760 0 : IF( indices[0] )
1761 : {
1762 : /* Decode */
1763 0 : msvq_dec(
1764 0 : lsf_ind_codebook[narrowband][cdk],
1765 : lsf_ind_dims,
1766 : lsf_ind_offs,
1767 : TCXLPC_IND_NUMSTAGES,
1768 : lpcorder,
1769 : lpcorder,
1770 0 : indices + NumIndices,
1771 : lsf_rem_q_ind );
1772 0 : NumIndices = add( NumIndices, TCXLPC_IND_NUMSTAGES );
1773 :
1774 : /* Add to MA-removed vector */
1775 0 : FOR( i = 0; i < lpcorder; ++i )
1776 : {
1777 0 : lsf_q_ind[i] = add( lsf_q_ind[i], lsf_rem_q_ind[i] );
1778 : }
1779 : }
1780 :
1781 : /* Add inter-frame prediction */
1782 0 : FOR( i = 0; i < lpcorder; ++i )
1783 : {
1784 0 : lsf_q[i] = add( lsf_q[i], pred[i] );
1785 0 : lsf[i] = add( lsf[i], pred[i] );
1786 : }
1787 :
1788 0 : reorder_lsf_fx( lsf_q, TCXLPC_LSF_GAP, lpcorder, INT_FS_FX );
1789 :
1790 0 : FOR( i = 0; i < lpcorder; ++i )
1791 : {
1792 0 : lsf_q_ind[i] = add( lsf_q_ind[i], lsf_means[narrowband][i] );
1793 : }
1794 0 : reorder_lsf_fx( lsf_q_ind, TCXLPC_LSF_GAP, lpcorder, INT_FS_FX );
1795 :
1796 0 : IF( lsp_q_ind )
1797 : {
1798 0 : E_LPC_lsf_lsp_conversion /*lsf2lsp*/ ( lsf_q_ind, lsp_q_ind, lpcorder );
1799 : }
1800 :
1801 0 : return NumIndices;
1802 : }
1803 :
1804 :
1805 17444 : Word16 Q_lsf_tcxlpc_ivas_fx(
1806 : /* const */ Word16 lsf[], /* i : original lsf */
1807 : Word16 lsf_q[], /* o : quantized lsf */
1808 : Word16 lsp_q_ind[], /* o : quantized lsp (w/o MA prediction) */
1809 : Word16 indices[], /* o : VQ indices */
1810 : const Word16 lpcorder, /* i : LPC order */
1811 : const Word16 narrowband, /* i : narrowband flag */
1812 : const Word16 cdk, /* i : codebook selector */
1813 : const Word16 mem_MA[], /* i : MA memory */
1814 : const Word16 coder_type,
1815 : const Word32 *Bin_Ener,
1816 : const Word16 Q_ener )
1817 : {
1818 : Word16 weights[M + 1];
1819 : Word16 pred[M16k];
1820 : Word16 i;
1821 : Word16 NumIndices;
1822 : Word16 lsf_q_ind[M16k];
1823 : const Word16 *means;
1824 : Word16 lsf_rem[M];
1825 : Word16 lsf_rem_q_ind[M];
1826 :
1827 17444 : Unified_weighting_fx( &Bin_Ener[L_FFT / 2], Q_ener, lsf, weights, narrowband, (Word16) EQ_16( coder_type, UNVOICED ), 12800, M );
1828 :
1829 17444 : move16();
1830 17444 : NumIndices = 0;
1831 :
1832 : /* Put disabled flag */
1833 17444 : indices[NumIndices] = 0;
1834 17444 : move16();
1835 17444 : NumIndices = add( NumIndices, 1 );
1836 :
1837 : /* Inter-frame prediction */
1838 :
1839 17444 : means = lsf_means[narrowband]; /* 14Q1 * 1.28 */
1840 :
1841 296548 : FOR( i = 0; i < lpcorder; ++i )
1842 : {
1843 279104 : pred[i] = add( means[i], mult_r( MU_MA_FX, mem_MA[i] ) ); /* 14Q1 * 1.28 + ( 14Q1 * 1.28 * Q15 ) = 14Q1 * 1.28*/
1844 279104 : move16();
1845 : }
1846 :
1847 : /* Subtract prediction */
1848 :
1849 296548 : FOR( i = 0; i < lpcorder; ++i )
1850 : {
1851 279104 : lsf[i] = sub( lsf[i], pred[i] ); /* 14Q1 * 1.28 */
1852 279104 : move16();
1853 : }
1854 :
1855 17444 : msvq_enc_lsf_fx64(
1856 17444 : lsf_codebook[narrowband][cdk],
1857 : lsf_dims,
1858 : lsf_offs,
1859 : lsf,
1860 : lsf_numlevels,
1861 : kMaxC,
1862 : TCXLPC_NUMSTAGES,
1863 : weights,
1864 : lpcorder,
1865 : lpcorder,
1866 17444 : indices + NumIndices );
1867 17444 : msvq_dec(
1868 17444 : lsf_codebook[narrowband][cdk],
1869 : lsf_dims,
1870 : lsf_offs,
1871 : TCXLPC_NUMSTAGES,
1872 : lpcorder,
1873 : lpcorder,
1874 17444 : indices + NumIndices,
1875 : lsf_q );
1876 17444 : NumIndices = add( NumIndices, TCXLPC_NUMSTAGES );
1877 :
1878 296548 : FOR( i = 0; i < lpcorder; ++i )
1879 : {
1880 279104 : lsf_q_ind[i] = lsf_q[i];
1881 279104 : move16();
1882 : }
1883 :
1884 : /* Update flag */
1885 17444 : indices[0] = lsf_ind_is_active( lsf_q_ind, lsf_means[narrowband], narrowband, cdk );
1886 17444 : move16();
1887 :
1888 : /* Get residual vector */
1889 296548 : FOR( i = 0; i < lpcorder; ++i )
1890 : {
1891 279104 : lsf_rem[i] = add( sub( pred[i], lsf_means[narrowband][i] ), sub( lsf[i], lsf_q_ind[i] ) );
1892 279104 : move16();
1893 : }
1894 :
1895 : /* Quantize using extra stage(s) */
1896 17444 : msvq_enc_lsf_fx64(
1897 17444 : lsf_ind_codebook[narrowband][cdk],
1898 : lsf_ind_dims,
1899 : lsf_ind_offs,
1900 : lsf_rem,
1901 : lsf_ind_numlevels,
1902 : kMaxC,
1903 : TCXLPC_IND_NUMSTAGES,
1904 : weights,
1905 : lpcorder,
1906 : lpcorder,
1907 17444 : indices + NumIndices );
1908 : /* Only add contribution if flag is enabled */
1909 17444 : IF( indices[0] )
1910 : {
1911 : /* Decode */
1912 4883 : msvq_dec(
1913 4883 : lsf_ind_codebook[narrowband][cdk],
1914 : lsf_ind_dims,
1915 : lsf_ind_offs,
1916 : TCXLPC_IND_NUMSTAGES,
1917 : lpcorder,
1918 : lpcorder,
1919 4883 : indices + NumIndices,
1920 : lsf_rem_q_ind );
1921 4883 : NumIndices = add( NumIndices, TCXLPC_IND_NUMSTAGES );
1922 :
1923 : /* Add to MA-removed vector */
1924 83011 : FOR( i = 0; i < lpcorder; ++i )
1925 : {
1926 78128 : lsf_q_ind[i] = add( lsf_q_ind[i], lsf_rem_q_ind[i] );
1927 78128 : move16();
1928 : }
1929 : }
1930 :
1931 : /* Add inter-frame prediction */
1932 296548 : FOR( i = 0; i < lpcorder; ++i )
1933 : {
1934 279104 : lsf_q[i] = add( lsf_q[i], pred[i] );
1935 279104 : lsf[i] = add( lsf[i], pred[i] );
1936 279104 : move16();
1937 279104 : move16();
1938 : }
1939 :
1940 17444 : reorder_lsf_fx( lsf_q, TCXLPC_LSF_GAP, lpcorder, INT_FS_FX );
1941 :
1942 296548 : FOR( i = 0; i < lpcorder; ++i )
1943 : {
1944 279104 : lsf_q_ind[i] = add( lsf_q_ind[i], lsf_means[narrowband][i] );
1945 279104 : move16();
1946 : }
1947 17444 : reorder_lsf_fx( lsf_q_ind, TCXLPC_LSF_GAP, lpcorder, INT_FS_FX );
1948 :
1949 17444 : IF( lsp_q_ind )
1950 : {
1951 17444 : E_LPC_lsf_lsp_conversion /*lsf2lsp*/ ( lsf_q_ind, lsp_q_ind, lpcorder );
1952 : }
1953 :
1954 17444 : return NumIndices;
1955 : }
1956 :
1957 :
1958 : /*--------------------------------------------------------------------------*
1959 : * enc_lsf_tcxlpc_fx()
1960 : *
1961 : * Returns: number of bits written
1962 : *--------------------------------------------------------------------------*/
1963 :
1964 0 : Word16 enc_lsf_tcxlpc_fx(
1965 : Word16 **indices, /* i : Ptr to VQ indices */
1966 : BSTR_ENC_HANDLE hBstr /* i/o: encoder bitstream handle */
1967 : )
1968 : {
1969 : Word16 i, NumBits;
1970 :
1971 : Word16 flag;
1972 :
1973 : /* Read flag */
1974 0 : flag = ( *indices )[0];
1975 0 : move16();
1976 0 : ++*indices;
1977 :
1978 0 : NumBits = TCXLPC_NUMBITS;
1979 0 : move16();
1980 0 : FOR( i = 0; i < TCXLPC_NUMSTAGES; ++i )
1981 : {
1982 0 : push_next_indice( hBstr, **indices, lsf_numbits[i] );
1983 0 : ++*indices;
1984 : }
1985 :
1986 0 : IF( flag )
1987 : {
1988 0 : NumBits = add( NumBits, TCXLPC_IND_NUMBITS );
1989 0 : FOR( i = 0; i < TCXLPC_IND_NUMSTAGES; ++i )
1990 : {
1991 0 : push_next_indice( hBstr, **indices, lsf_ind_numbits[i] );
1992 0 : ++*indices;
1993 : }
1994 : }
1995 0 : return NumBits;
1996 : }
1997 :
1998 :
1999 17444 : Word16 enc_lsf_tcxlpc_ivas_fx(
2000 : const Word16 **indices, /* i : Ptr to VQ indices */
2001 : BSTR_ENC_HANDLE hBstr /* i/o: encoder bitstream handle */
2002 : )
2003 : {
2004 : Word16 i, NumBits;
2005 :
2006 : Word16 flag;
2007 :
2008 : /* Read flag */
2009 17444 : flag = ( *indices )[0];
2010 17444 : move16();
2011 17444 : ++*indices;
2012 :
2013 17444 : NumBits = TCXLPC_NUMBITS;
2014 17444 : move16();
2015 69776 : FOR( i = 0; i < TCXLPC_NUMSTAGES; ++i )
2016 : {
2017 52332 : push_next_indice( hBstr, **indices, lsf_numbits[i] );
2018 52332 : ++*indices;
2019 : }
2020 :
2021 17444 : IF( flag )
2022 : {
2023 4883 : NumBits = add( NumBits, TCXLPC_IND_NUMBITS );
2024 9766 : FOR( i = 0; i < TCXLPC_IND_NUMSTAGES; ++i )
2025 : {
2026 4883 : push_next_indice( hBstr, **indices, lsf_ind_numbits[i] );
2027 4883 : ++*indices;
2028 : }
2029 : }
2030 17444 : return NumBits;
2031 : }
2032 :
2033 :
2034 : /*--------------------------------------------------------------------------*
2035 : * lsf_bctcvq_encprm_fx()
2036 : *
2037 : *
2038 : *--------------------------------------------------------------------------*/
2039 :
2040 270 : Word16 lsf_bctcvq_encprm_fx(
2041 : BSTR_ENC_HANDLE hBstr,
2042 : Word16 *param_lpc, // Q0
2043 : Word16 *bits_param_lpc,
2044 : Word16 no_indices )
2045 : {
2046 : Word16 i, nbits_lpc;
2047 :
2048 270 : nbits_lpc = 0;
2049 :
2050 2970 : FOR( i = 0; i < no_indices; i++ )
2051 : {
2052 2700 : push_next_indice( hBstr, *param_lpc, bits_param_lpc[i] );
2053 2700 : param_lpc++;
2054 2700 : nbits_lpc = add( nbits_lpc, bits_param_lpc[i] );
2055 : }
2056 :
2057 270 : return nbits_lpc;
2058 : }
2059 :
2060 :
2061 0 : Word16 lsf_bctcvq_encprm_ivas_fx(
2062 : BSTR_ENC_HANDLE hBstr,
2063 : const Word16 *param_lpc, // Q0
2064 : const Word16 *bits_param_lpc,
2065 : const Word16 no_indices )
2066 : {
2067 : Word16 i, nbits_lpc;
2068 :
2069 0 : nbits_lpc = 0;
2070 0 : move16();
2071 :
2072 0 : FOR( i = 0; i < no_indices; i++ )
2073 : {
2074 0 : push_next_indice( hBstr, *param_lpc, bits_param_lpc[i] );
2075 0 : param_lpc++;
2076 0 : nbits_lpc = add( nbits_lpc, bits_param_lpc[i] );
2077 : }
2078 :
2079 0 : return nbits_lpc;
2080 : }
|