Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 : #include <stdint.h>
5 : #include "options.h" /* Compilation switches */
6 : #include "cnst.h"
7 : #include "prot_fx.h" /* Function prototypes */
8 : #include "prot_fx_enc.h" /* Function prototypes */
9 : #include "rom_com.h"
10 :
11 :
12 : /* PVQ MIXED_SEARCH_LOOP:
13 : low precision 16/32 + energy selective high precision 32/64,
14 : mixed perf , 10 dB SEGSNR better than the low precision loop only,
15 : active if k>=128 and accumulated energy is high enough,
16 : comes at a controlled complexity cost, as dimensions decrease for high k's*/
17 :
18 : /* o : maximum value in the input vector */
19 1662 : static Word16 max_val_fx(
20 : const Word16 *vec, /* i : input vector */
21 : const Word16 lvec /* i : length of input vector */
22 : )
23 : {
24 : Word16 j, tmp;
25 :
26 1662 : tmp = vec[0];
27 1662 : move16();
28 6616 : FOR( j = 1; j < lvec; j++ )
29 : {
30 4954 : tmp = s_max( vec[j], tmp );
31 : }
32 1662 : return tmp;
33 : }
34 :
35 292905 : static Word16 pyramidSearchProjInit_fx(
36 : const Word16 L,
37 : const Word16 Ptot )
38 : {
39 292905 : return ( sub( Ptot, extract_l( L_shr( L_mult0( 8223, (Word32) L ), 14 ) ) ) > 0 );
40 : }
41 :
42 :
43 : /* The inner search loop for one single additional unit pulse, starting from pulse_tot ,
44 : with information about required energy precision/down scaling for the dim loop in en_dn_shift,
45 : and the current max_xabs absolute value to be used for an near optimal correlation upscaling.
46 : returns the index of the best positioned unit pulse in imax
47 : */
48 1178512 : static Word16 one_pulse_search(
49 : const Word16 dim, /* vector dimension */
50 : const Word16 *x_abs, /* absolute vector values */
51 : Word16 *y, /* output vector */
52 : Word16 *pulse_tot_ptr,
53 : Word32 *L_xy_ptr, /* accumulated correlation */
54 : Word32 *L_yy_ptr, /* accumulated energy */
55 : Word16 high_prec_active,
56 : Word16 en_dn_shift,
57 : Word16 max_xabs ) /* current accumulated max amplitude for pulses */
58 : {
59 : Word16 i, corr_up_shift, corr_tmp, imax, corr_sq_tmp, en_max_den, cmax_num, en_tmp;
60 : Word32 L_tmp_en_lc, L_tmp_corr;
61 : Word32 L_tmp_en, L_en_max_den, L_corr_sq_max, L_tmp_corr_sq;
62 : Word32 L_left_h, L_right_h;
63 : UWord32 UL_left_l, UL_right_l, UL_dummy;
64 : Word32 L_tmp;
65 : UWord16 u_sgn;
66 :
67 1178512 : en_tmp = en_dn_shift; /* dummy assignment to avoid compiler warning for unused parameter */
68 :
69 : /* maximize correlation precision, prior to every unit pulse addition in the vector */
70 1178512 : corr_up_shift = norm_l( L_mac( *L_xy_ptr, 1, max_xabs ) ); /* pre analyze worst case L_xy update in the dim loop, 2 ops */
71 1178512 : imax = -1; /* not needed for search, only added to avoid compiler warning */
72 :
73 : /* clean BE code, with split out low/high precision loops */
74 : /* activate low complexity en/corr search section conditionally if resulting vector energy is within limits */
75 : /* typical case for higher dimensions */
76 :
77 1178512 : IF( high_prec_active == 0 )
78 : {
79 1176724 : en_max_den = 0; /*move16()*/
80 1176724 : move16();
81 : ; /* OPT: move saved by using high_prec_active as en_max_den */ /* 1 op */
82 1176724 : cmax_num = -1;
83 1176724 : move16(); /* req. to force a 1st update for n==0 */ /* 1 op */
84 :
85 16865317 : FOR( i = 0; i < dim; i++ ) /* FOR 3 ops */
86 : {
87 15688593 : L_tmp_corr = L_shl_sat( L_mac_sat( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /* actual in-loop target value, 2 ops */
88 15688593 : corr_tmp = round_fx_sat( L_tmp_corr ); /* 1 op */
89 15688593 : corr_sq_tmp = mult( corr_tmp, corr_tmp ); /* CorrSq, is a 16bit for low compelxity cross multiplication 1 op */
90 :
91 15688593 : L_tmp_en_lc = L_mac( *L_yy_ptr, 1, y[i] ); /*Q1 result , energy may span up to ~14+1(Q1)+1(sign)=16 bits, 1 op */
92 : /* extract_l without shift can always be used for this section as energy is guaranteed to stay in the lower word, 1 op */
93 15688593 : en_tmp = extract_l( L_tmp_en_lc ); /* L_shl + round_fx could also be used also but then adds an uphift cost (2-3 ops)*/
94 :
95 : /* 16/32 bit comparison WC (4 +1+1 + (1+1+1) = 9 */
96 15688593 : IF( L_msu( L_mult( corr_sq_tmp, en_max_den ), cmax_num, en_tmp ) > 0 ) /* use L_mult and then a L_msu, 2 ops */
97 : {
98 3816073 : cmax_num = corr_sq_tmp;
99 3816073 : move16(); /* 1 op */
100 3816073 : en_max_den = en_tmp;
101 3816073 : move16(); /* 1 op */
102 3816073 : imax = i;
103 3816073 : move16(); /* 1 op */
104 : }
105 : } /* dim */
106 : }
107 : ELSE
108 : {
109 : /* High resolution section activated when vector energy is becoming high (peaky or many pulses) */
110 : /* BASOP operator Mpy32_32_ss used to allow higher resolution for both the CorrSq term and the Energy term */
111 :
112 1788 : L_en_max_den = L_deposit_l( 0 ); /* 1 op */
113 1788 : L_corr_sq_max = L_deposit_l( -1 ); /* req. to force a 1st update */ /* 1 op */
114 :
115 8784 : FOR( i = 0; i < dim; i++ ) /* FOR 3 ops */
116 : {
117 6996 : L_tmp_corr = L_shl( L_mac( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /* actual in loop WC value 2 ops */
118 6996 : Mpy_32_32_ss( L_tmp_corr, L_tmp_corr, &L_tmp_corr_sq, &UL_dummy ); /* CorrSq 32 bits, 4 ops */
119 :
120 6996 : L_tmp_en = L_mac( *L_yy_ptr, 1, y[i] ); /* Q1,energy may span up to sign+19 bits , 1 op */
121 : /* For highest accuracy use pairs of maximum upshifted 32x32 bit signed values */
122 : /* (L_tmp_corr_sq / L_tmp_en) > (L_corr_sq_max/L_en_max_den) */
123 : /* (L_tmp_corr_sq * L_en_max_den) > (L_corr_sq_max * L_tmp_en) */
124 6996 : Mpy_32_32_ss( L_en_max_den, L_tmp_corr_sq, &L_left_h, &UL_left_l ); /* 4 ops */
125 6996 : Mpy_32_32_ss( L_tmp_en, L_corr_sq_max, &L_right_h, &UL_right_l ); /* 4 ops */
126 :
127 : /* STL optimized "Lazy evaluation" of:
128 : IF( (L_left_h > L_right_h) || ( (L_left_h == L_right_h) && (UL_left_l > UL_right_l) )
129 : */
130 : /* 32/64 bit Lazy eval comparison WC cost is (1+ 1+1+1 + 4 +(2+2+1) = 13 , and average is ~12 */
131 : /* Unoptimized 32/64 bit comparison WC cost is (1+1+ 2x2 + 4 +(2+2+1) = 15 */
132 6996 : L_tmp = L_sub( L_left_h, L_right_h ); /* high signed word check 1 op */
133 6996 : u_sgn = 0;
134 6996 : move16(); /* 1 op */
135 6996 : if ( L_tmp == 0 ) /* L_tmp high Word testing is always needed */
136 : {
137 : /* The returned UL value from UL_subNs is not needed, only u_sgn is needed */
138 3249 : UL_subNs( UL_right_l, UL_left_l, &u_sgn ); /* low unsigned word check, note left/right order switch of ">" due to ">=" inside UL_subNs, 1 op */
139 : }
140 6996 : if ( u_sgn != 0 )
141 : {
142 1318 : L_tmp = L_add( L_tmp, 1 ); /* 0+1 --> 1 use wrap/sign result of low Word u_sgn check */ /* 1 op */
143 : }
144 6996 : IF( L_tmp > 0 ) /* IF 4 ops */
145 : {
146 3722 : L_corr_sq_max = L_add( L_tmp_corr_sq, 0 ); /* 1-2 ops */
147 3722 : L_en_max_den = L_add( L_tmp_en, 0 ); /* 1-2 ops */
148 3722 : imax = i;
149 3722 : move16(); /* 1 op */
150 : }
151 : } /* dim loop */
152 : }
153 : /* Complexity comparison per coeff for low precision vs. high precision
154 : low precision: pulse_tot <= 127, 16 bit: WC 2+3 +(15)*dim ops, dim=5 --> 5+15*5 = 90 ops, 18 ops/coeff
155 : high precision: pulse_tot > 127, 32 bit: WC 1+3+3 +(26-28)*dim ops, WC-band dim=5 --> 7+28*5 = 147 ops, 29 ops/coeff ~61% increase
156 : */
157 :
158 : /* finally add found unit pulse contribution to past L_xy, Lyy, for next pulse loop */
159 1178512 : *L_xy_ptr = L_mac( *L_xy_ptr, x_abs[imax], 1 ); /* Q12+1 */
160 1178512 : *L_yy_ptr = L_mac( *L_yy_ptr, 1, y[imax] );
161 :
162 1178512 : y[imax] = add( y[imax], 1 );
163 1178512 : move16(); /* Q0 added pulse */
164 1178512 : ( *pulse_tot_ptr ) = add( ( *pulse_tot_ptr ), 1 ); /* increment total pulse sum */
165 1178512 : move16();
166 :
167 1178512 : return imax;
168 : }
169 : /*-----------------------------------------------------------------------*
170 : * Function pvq_encode_fx() *
171 : * *
172 : *-----------------------------------------------------------------------*/
173 276508 : void pvq_encode_ivas_fx(
174 : BSTR_ENC_HANDLE hBstr,
175 : PVQ_ENC_HANDLE hPVQ, /* i/o: PVQ encoder handle */
176 : const Word16 *x, /* i: vector to quantize Q15-3=>Q12 */
177 : Word16 *y, /* o: raw pulses (non-scaled short) Q0 */
178 : Word16 *xq, /* o: quantized vector Q15 */
179 : Word32 *L_xq, /* o: quantized vector Q31 fot eval */
180 : const Word16 pulses, /* i: number of allocated pulses */
181 : const Word16 dim, /* i: Length of vector */
182 : const Word16 neg_gain /* i: - Gain use - negative gain in Q15 0..1 */
183 : )
184 : {
185 : Word16 i;
186 : Word16 pulse_tot;
187 : Word16 xabs[PVQ_MAX_BAND_SIZE];
188 : Word16 max_xabs;
189 : Word32 L_xsum;
190 : Word32 L_proj_fac;
191 : Word32 L_yy, L_xy;
192 : Word16 max_amp_y, imax;
193 : Word16 k, en_margin, en_dn_shift, high_prec_active;
194 :
195 : Word32 L_num, L_tmp;
196 : Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
197 :
198 : UWord16 u16_tmp;
199 : Word16 dim_m1;
200 : Word32 L_isqrt;
201 : Word16 neg_gain_norm, shift_tot;
202 : Word16 high_pulse_density_flag;
203 : PvqEntry entry;
204 :
205 276508 : L_proj_fac = 4096;
206 276508 : move32();
207 276508 : L_xsum = L_deposit_h( 0 );
208 276508 : max_xabs = -1;
209 276508 : move16();
210 :
211 3844364 : FOR( i = 0; i < dim; i++ )
212 : {
213 3567856 : xabs[i] = abs_s( x[i] );
214 3567856 : move16(); /* Q12 */
215 3567856 : max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient search correlation scaling */
216 3567856 : L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
217 3567856 : y[i] = 0;
218 3567856 : move16(); /* init, later only non-zero values need to be normalized */
219 : }
220 :
221 276508 : test();
222 276508 : IF( L_xsum == 0 || neg_gain == 0 )
223 : {
224 429 : pulse_tot = pulses;
225 429 : move16();
226 429 : dim_m1 = sub( dim, 1 );
227 429 : y[dim_m1] = 0;
228 429 : move16();
229 429 : y[0] = shr( pulses, 1 );
230 429 : move16();
231 429 : y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
232 429 : move16();
233 429 : L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
234 429 : IF( dim_m1 != 0 )
235 : {
236 429 : L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
237 : }
238 : }
239 : ELSE
240 : {
241 :
242 276079 : num = sub( pulses, PYR_OFFSET );
243 276079 : high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
244 :
245 276079 : test();
246 276079 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
247 : {
248 118786 : shift_den = norm_l( L_xsum ); /* x_sum input Q12 */
249 118786 : den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den */
250 :
251 118786 : L_num = L_deposit_l( num );
252 118786 : shift_num = sub( norm_l( L_num ), 1 );
253 118786 : L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1 */
254 118786 : proj_fac = div_l( L_num, den ); /* L_num always has to be less than den<<16 */
255 :
256 118786 : shift_delta = sub( shift_num, shift_den );
257 118786 : L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring to a fixed Q12 */
258 : }
259 :
260 276079 : pulse_tot = 0;
261 276079 : move16();
262 276079 : L_yy = L_deposit_l( 0 );
263 276079 : L_xy = L_deposit_l( 0 );
264 276079 : test();
265 276079 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
266 : {
267 1248149 : FOR( i = 0; i < dim; i++ ) /* max 64 */
268 : {
269 1129363 : Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12 +1 */
270 1129363 : y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
271 1129363 : move16(); /* Q12 *Q12 -> Q0 */
272 :
273 1129363 : pulse_tot = add( pulse_tot, y[i] ); /* Q0 */
274 1129363 : L_yy = L_mac( L_yy, y[i], y[i] ); /* Energy, result will scale up by 2 by L_mac */
275 1129363 : L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12 +1 --> Q13 */
276 : }
277 : }
278 :
279 :
280 276079 : L_yy = L_shr( L_yy, 1 );
281 276079 : IF( LE_16( pulses, 127 ) )
282 : {
283 : /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5 */
284 : /* ( if high energy precision is inactive, max_amp_y is not needed , no max_amp_y(k-1) update ) */
285 1371432 : FOR( k = pulse_tot; k < pulses; k++ )
286 : {
287 1096268 : L_yy = L_add( L_yy, 1 );
288 1096268 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
289 : }
290 : }
291 : ELSE
292 : { /* HC or LC+HC inner loops */
293 915 : max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension restricted) */
294 : /* max_amp_y from projected y is needed when pulses_sum exceeds 127 */
295 :
296 : /* First section with 32 bit energy inactive, max_amp_y kept updated though */
297 945 : FOR( k = pulse_tot; k < 128; k++ )
298 : {
299 30 : L_yy = L_add( L_yy, 1 );
300 30 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
301 30 : max_amp_y = s_max( max_amp_y, y[imax] );
302 : }
303 :
304 : /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated */
305 3636 : FOR( k = pulse_tot; k < pulses; k++ )
306 : {
307 2721 : L_yy = L_add( L_yy, 1 );
308 2721 : en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin, ~ 2 ops */
309 2721 : en_dn_shift = sub( 16, en_margin ); /* calc. shift to lower byte for fixed use of extract_l */
310 :
311 2721 : high_prec_active = 1;
312 2721 : move16();
313 2721 : if ( en_dn_shift <= 0 )
314 : {
315 : /* only use 32 bit energy if actually needed */
316 1895 : high_prec_active = 0;
317 1895 : move16();
318 : }
319 : /* 32 bit energy and corr adaptively active, max_amp_y kept updated */
320 2721 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
321 2721 : max_amp_y = s_max( max_amp_y, y[imax] );
322 : }
323 : }
324 276079 : L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
325 : to make energy right for unit/inverse gain calculation */
326 : }
327 :
328 : /* Apply unit energy normalization scaling, always at least one pulse so no div-by-zero check is needed */
329 276508 : L_isqrt = L_deposit_l( 0 );
330 276508 : IF( neg_gain != 0 )
331 : {
332 276079 : L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
333 : }
334 :
335 276508 : shift_num = norm_s( pulse_tot ); /* account for max possible pulse amplitude in y,
336 : can be used even when max_amp_y is not avail. */
337 276508 : shift_den = norm_s( neg_gain ); /* account for gain downscaling shift */
338 276508 : neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm */
339 276508 : shift_tot = sub( add( shift_num, shift_den ), 15 );
340 :
341 276508 : L_isqrt = L_negate( L_isqrt );
342 3844364 : FOR( i = 0; i < dim; i++ )
343 : {
344 3567856 : tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used in scaling */
345 3567856 : if ( x[i] < 0 )
346 : {
347 1772521 : tmp = negate( tmp ); /* apply sign */
348 : }
349 :
350 3567856 : IF( y[i] != 0 )
351 : {
352 1156437 : y[i] = shr( tmp, shift_num );
353 1156437 : move16(); /* updates sign of y[i} , ~range -512 + 512), array move */
354 : }
355 3567856 : Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) +1 */
356 3567856 : Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1 */
357 3567856 : L_tmp = L_shr_sat( L_tmp, shift_tot ); /* Q31+x */
358 3567856 : xq[i] = round_fx_sat( L_tmp ); /* Q15, array move */
359 3567856 : move16();
360 3567856 : L_xq[i] = L_tmp; /* Q31 currently unused */
361 3567856 : move32();
362 : }
363 :
364 : /* index the found PVQ vector into short codewords */
365 276508 : entry = mpvq_encode_vec_fx( y, dim, pulses );
366 :
367 : /* send the short codeword(s) to the range encoder */
368 276508 : rc_enc_bits_ivas_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
369 276508 : IF( NE_16( dim, 1 ) )
370 : {
371 276508 : rc_enc_uniform_ivas_fx( hBstr, hPVQ, entry.index, entry.size );
372 : }
373 :
374 276508 : return;
375 : }
376 :
377 16843 : void pvq_encode_fx(
378 : BSTR_ENC_HANDLE hBstr,
379 : PVQ_ENC_HANDLE hPVQ, /* i/o: PVQ encoder handle */
380 : const Word16 *x, /* i: vector to quantize Q15-3=>Q12 */
381 : Word16 *y, /* o: raw pulses (non-scaled short) Q0 */
382 : Word16 *xq, /* o: quantized vector Q15 */
383 : Word32 *L_xq, /* o: quantized vector Q31 fot eval */
384 : const Word16 pulses, /* i: number of allocated pulses */
385 : const Word16 dim, /* i: Length of vector */
386 : const Word16 neg_gain /* i: - Gain use - negative gain in Q15 0..1 */
387 : )
388 : {
389 : Word16 i;
390 : Word16 pulse_tot;
391 : Word16 xabs[PVQ_MAX_BAND_SIZE];
392 : Word16 max_xabs;
393 : Word32 L_xsum;
394 : Word32 L_proj_fac;
395 : Word32 L_yy, L_xy;
396 : Word16 max_amp_y, imax;
397 : Word16 k, en_margin, en_dn_shift, high_prec_active;
398 :
399 : Word32 L_num, L_tmp;
400 : Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
401 :
402 : UWord16 u16_tmp;
403 : Word16 dim_m1;
404 : Word32 L_isqrt;
405 : Word16 neg_gain_norm, shift_tot;
406 : Word16 high_pulse_density_flag;
407 : PvqEntry entry;
408 :
409 16843 : L_proj_fac = 4096;
410 16843 : move32();
411 16843 : L_xsum = L_deposit_h( 0 );
412 16843 : max_xabs = -1;
413 16843 : move16();
414 :
415 216203 : FOR( i = 0; i < dim; i++ )
416 : {
417 199360 : xabs[i] = abs_s( x[i] );
418 199360 : move16(); /* Q12 */
419 199360 : max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient search correlation scaling */
420 199360 : L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
421 199360 : y[i] = 0;
422 199360 : move16(); /* init, later only non-zero values need to be normalized */
423 : }
424 :
425 16843 : test();
426 16843 : IF( L_xsum == 0 || neg_gain == 0 )
427 : {
428 17 : pulse_tot = pulses;
429 17 : move16();
430 17 : dim_m1 = sub( dim, 1 );
431 17 : y[dim_m1] = 0;
432 17 : move16();
433 17 : y[0] = shr( pulses, 1 );
434 17 : move16();
435 17 : y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
436 17 : move16();
437 17 : L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
438 17 : if ( dim_m1 != 0 )
439 : {
440 17 : L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
441 : }
442 : }
443 : ELSE
444 : {
445 :
446 16826 : num = sub( pulses, PYR_OFFSET );
447 16826 : high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
448 :
449 16826 : test();
450 16826 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
451 : {
452 10933 : shift_den = norm_l( L_xsum ); /* x_sum input Q12 */
453 10933 : den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den */
454 :
455 10933 : L_num = L_deposit_l( num );
456 10933 : shift_num = sub( norm_l( L_num ), 1 );
457 10933 : L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1 */
458 10933 : proj_fac = div_l( L_num, den ); /* L_num always has to be less than den<<16 */
459 :
460 10933 : shift_delta = sub( shift_num, shift_den );
461 10933 : L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring to a fixed Q12 */
462 : }
463 :
464 16826 : pulse_tot = 0;
465 16826 : move16();
466 16826 : L_yy = L_deposit_l( 0 );
467 16826 : L_xy = L_deposit_l( 0 );
468 16826 : test();
469 16826 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
470 : {
471 102537 : FOR( i = 0; i < dim; i++ ) /* max 64 */
472 : {
473 91604 : Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12 +1 */
474 91604 : y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
475 91604 : move16(); /* Q12 *Q12 -> Q0 */
476 :
477 91604 : pulse_tot = add( pulse_tot, y[i] ); /* Q0 */
478 91604 : L_yy = L_mac( L_yy, y[i], y[i] ); /* Energy, result will scale up by 2 by L_mac */
479 91604 : L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12 +1 --> Q13 */
480 : }
481 : }
482 :
483 :
484 16826 : L_yy = L_shr( L_yy, 1 );
485 16826 : IF( LE_16( pulses, 127 ) )
486 : {
487 : /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5 */
488 : /* ( if high energy precision is inactive, max_amp_y is not needed , no max_amp_y(k-1) update ) */
489 93339 : FOR( k = pulse_tot; k < pulses; k++ )
490 : {
491 77260 : L_yy = L_add( L_yy, 1 );
492 77260 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
493 : }
494 : }
495 : ELSE
496 : { /* HC or LC+HC inner loops */
497 747 : max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension restricted) */
498 : /* max_amp_y from projected y is needed when pulses_sum exceeds 127 */
499 :
500 : /* First section with 32 bit energy inactive, max_amp_y kept updated though */
501 753 : FOR( k = pulse_tot; k < 128; k++ )
502 : {
503 6 : L_yy = L_add( L_yy, 1 );
504 6 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
505 6 : max_amp_y = s_max( max_amp_y, y[imax] );
506 : }
507 :
508 : /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated */
509 2974 : FOR( k = pulse_tot; k < pulses; k++ )
510 : {
511 2227 : L_yy = L_add( L_yy, 1 );
512 2227 : en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin, ~ 2 ops */
513 2227 : en_dn_shift = sub( 16, en_margin ); /* calc. shift to lower byte for fixed use of extract_l */
514 :
515 2227 : high_prec_active = 1;
516 2227 : move16();
517 2227 : if ( en_dn_shift <= 0 )
518 : {
519 : /* only use 32 bit energy if actually needed */
520 1265 : high_prec_active = 0;
521 1265 : move16();
522 : }
523 : /* 32 bit energy and corr adaptively active, max_amp_y kept updated */
524 2227 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
525 2227 : max_amp_y = s_max( max_amp_y, y[imax] );
526 : }
527 : }
528 16826 : L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
529 : to make energy right for unit/inverse gain calculation */
530 : }
531 :
532 : /* Apply unit energy normalization scaling, always at least one pulse so no div-by-zero check is needed */
533 16843 : L_isqrt = L_deposit_l( 0 );
534 16843 : IF( neg_gain != 0 )
535 : {
536 16826 : L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
537 : }
538 :
539 16843 : shift_num = norm_s( pulse_tot ); /* account for max possible pulse amplitude in y,
540 : can be used even when max_amp_y is not avail. */
541 16843 : shift_den = norm_s( neg_gain ); /* account for gain downscaling shift */
542 16843 : neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm */
543 16843 : shift_tot = sub( add( shift_num, shift_den ), 15 );
544 :
545 16843 : L_isqrt = L_negate( L_isqrt );
546 216203 : FOR( i = 0; i < dim; i++ )
547 : {
548 199360 : tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used in scaling */
549 199360 : if ( x[i] < 0 )
550 : {
551 99448 : tmp = negate( tmp ); /* apply sign */
552 : }
553 :
554 199360 : if ( y[i] != 0 )
555 : {
556 91218 : y[i] = shr( tmp, shift_num );
557 91218 : move16(); /* updates sign of y[i} , ~range -512 + 512), array move */
558 : }
559 199360 : Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) +1 */
560 199360 : Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1 */
561 199360 : L_tmp = L_shr_sat( L_tmp, shift_tot ); /* Q31+x */
562 199360 : xq[i] = round_fx_sat( L_tmp ); /* Q15, array move */
563 199360 : move16();
564 199360 : L_xq[i] = L_tmp; /* Q31 currently unused */
565 199360 : move32();
566 : }
567 :
568 : /* index the found PVQ vector into short codewords */
569 16843 : entry = mpvq_encode_vec_fx( y, dim, pulses );
570 :
571 : /* send the short codeword(s) to the range encoder */
572 16843 : rc_enc_bits_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
573 16843 : IF( NE_16( dim, 1 ) )
574 : {
575 16843 : rc_enc_uniform_fx( hBstr, hPVQ, entry.index, entry.size );
576 : }
577 :
578 16843 : return;
579 : }
|