Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 : #include <stdint.h>
5 : #include "options.h" /* Compilation switches */
6 : #include "cnst.h"
7 : #include "prot_fx.h" /* Function prototypes */
8 : #include "prot_fx_enc.h" /* Function prototypes */
9 : #include "rom_com_fx.h"
10 : #include "rom_com.h"
11 :
12 : /* PVQ MIXED_SEARCH_LOOP:
13 : low precision 16/32 + energy selective high precision 32/64,
14 : mixed perf , 10 dB SEGSNR better than the low precision loop only,
15 : active if k>=128 and accumulated energy is high enough,
16 : comes at a controlled complexity cost, as dimensions decrease for high k's*/
17 :
18 1662 : static Word16 max_val_fx( /* o : maximum value in the input vector */
19 : const Word16 *vec, /* i : input vector */
20 : const Word16 lvec /* i : length of input vector */
21 : )
22 : {
23 : Word16 j, tmp;
24 :
25 1662 : tmp = vec[0];
26 1662 : move16();
27 6616 : FOR( j = 1; j < lvec; j++ )
28 : {
29 4954 : tmp = s_max( vec[j], tmp );
30 : }
31 1662 : return tmp;
32 : }
33 :
34 292738 : static Word16 pyramidSearchProjInit_fx( Word16 L, Word16 Ptot )
35 : {
36 292738 : return ( sub( Ptot, extract_l( L_shr( L_mult0( 8223, (Word32) L ), 14 ) ) ) > 0 );
37 : }
38 :
39 :
40 : /* The inner search loop for one single additional unit pulse, starting from pulse_tot ,
41 : with information about required energy precision/down scaling for the dim loop in en_dn_shift,
42 : and the current max_xabs absolute value to be used for an near optimal correlation upscaling.
43 : returns the index of the best positioned unit pulse in imax
44 : */
45 1177157 : static Word16 one_pulse_search(
46 : const Word16 dim, /* vector dimension */
47 : const Word16 *x_abs, /* absolute vector values */
48 : Word16 *y, /* output vector */
49 : Word16 *pulse_tot_ptr,
50 : Word32 *L_xy_ptr, /* accumulated correlation */
51 : Word32 *L_yy_ptr, /* accumulated energy */
52 : Word16 high_prec_active,
53 : Word16 en_dn_shift,
54 : Word16 max_xabs ) /* current accumulated max amplitude for pulses */
55 : {
56 : Word16 i, corr_up_shift, corr_tmp, imax, corr_sq_tmp, en_max_den, cmax_num, en_tmp;
57 : Word32 L_tmp_en_lc, L_tmp_corr;
58 : Word32 L_tmp_en, L_en_max_den, L_corr_sq_max, L_tmp_corr_sq;
59 : Word32 L_left_h, L_right_h;
60 : UWord32 UL_left_l, UL_right_l, UL_dummy;
61 : Word32 L_tmp;
62 : UWord16 u_sgn;
63 : #ifndef ISSUE_1867_replace_overflow_libenc
64 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
65 : Flag Overflow = 0;
66 : move16();
67 : #endif
68 : #endif
69 :
70 1177157 : en_tmp = en_dn_shift; /* dummy assignment to avoid compiler warning for unused parameter */
71 :
72 : /* maximize correlation precision, prior to every unit pulse addition in the vector */
73 1177157 : corr_up_shift = norm_l( L_mac( *L_xy_ptr, 1, max_xabs ) ); /* pre analyze worst case L_xy update in the dim loop , 2 ops */
74 1177157 : imax = -1; /* not needed for search, only added to avoid compiler warning */
75 :
76 : /* clean BE code, with split out low/high precision loops */
77 : /* activate low complexity en/corr search section conditionally if resulting vector energy is within limits */
78 : /* typical case for higher dimensions */
79 :
80 1177157 : IF( high_prec_active == 0 )
81 : {
82 1175369 : en_max_den = 0; /*move16()*/
83 1175369 : move16();
84 : ; /* OPT: move saved by using high_prec_active as en_max_den */ /* 1 op */
85 1175369 : cmax_num = -1;
86 1175369 : move16(); /* req. to force a 1st update for n==0 */ /* 1 op */
87 :
88 16846770 : FOR( i = 0; i < dim; i++ ) /* FOR 3 ops */
89 : {
90 : #ifdef ISSUE_1867_replace_overflow_libenc
91 15671401 : L_tmp_corr = L_shl_sat( L_mac_sat( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /* actual in-loop target value, 2 ops */
92 15671401 : corr_tmp = round_fx_sat( L_tmp_corr ); /* 1 op */
93 : #else
94 : L_tmp_corr = L_shl_o( L_mac_o( *L_xy_ptr, 1, x_abs[i], &Overflow ), corr_up_shift, &Overflow ); /* actual in-loop target value, 2 ops */
95 : corr_tmp = round_fx_o( L_tmp_corr, &Overflow ); /* 1 op */
96 : #endif
97 15671401 : corr_sq_tmp = mult( corr_tmp, corr_tmp ); /* CorrSq, is a 16bit for low compelxity cross multiplication 1 op */
98 :
99 15671401 : L_tmp_en_lc = L_mac( *L_yy_ptr, 1, y[i] ); /*Q1 result , energy may span up to ~14+1(Q1)+1(sign)=16 bits, 1 op */
100 : /* extract_l without shift can always be used for this section as energy is guaranteed to stay in the lower word, 1 op */
101 15671401 : en_tmp = extract_l( L_tmp_en_lc ); /* L_shl + round_fx could also be used also but then adds an uphift cost (2-3 ops)*/
102 :
103 : /* 16/32 bit comparison WC (4 +1+1 + (1+1+1) = 9 */
104 15671401 : IF( L_msu( L_mult( corr_sq_tmp, en_max_den ), cmax_num, en_tmp ) > 0 ) /* use L_mult and then a L_msu, 2 ops */
105 : {
106 3811087 : cmax_num = corr_sq_tmp;
107 3811087 : move16(); /* 1 op */
108 3811087 : en_max_den = en_tmp;
109 3811087 : move16(); /* 1 op */
110 3811087 : imax = i;
111 3811087 : move16(); /* 1 op */
112 : }
113 : } /* dim */
114 : }
115 : ELSE
116 : {
117 : /* High resolution section activated when vector energy is becoming high (peaky or many pulses) */
118 : /* BASOP operator Mpy32_32_ss used to allow higher resolution for both the CorrSq term and the Energy term */
119 :
120 1788 : L_en_max_den = L_deposit_l( 0 ); /* 1 op */
121 1788 : L_corr_sq_max = L_deposit_l( -1 ); /* req. to force a 1st update */ /* 1 op */
122 :
123 8784 : FOR( i = 0; i < dim; i++ ) /* FOR 3 ops */
124 : {
125 6996 : L_tmp_corr = L_shl( L_mac( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /* actual in loop WC value 2 ops */
126 6996 : Mpy_32_32_ss( L_tmp_corr, L_tmp_corr, &L_tmp_corr_sq, &UL_dummy ); /* CorrSq 32 bits, 4 ops */
127 :
128 6996 : L_tmp_en = L_mac( *L_yy_ptr, 1, y[i] ); /* Q1,energy may span up to sign+19 bits , 1 op */
129 : /* For highest accuracy use pairs of maximum upshifted 32x32 bit signed values */
130 : /* (L_tmp_corr_sq / L_tmp_en) > (L_corr_sq_max/L_en_max_den) */
131 : /* (L_tmp_corr_sq * L_en_max_den) > (L_corr_sq_max * L_tmp_en) */
132 6996 : Mpy_32_32_ss( L_en_max_den, L_tmp_corr_sq, &L_left_h, &UL_left_l ); /* 4 ops */
133 6996 : Mpy_32_32_ss( L_tmp_en, L_corr_sq_max, &L_right_h, &UL_right_l ); /* 4 ops */
134 :
135 : /* STL optimized "Lazy evaluation" of:
136 : IF( (L_left_h > L_right_h) || ( (L_left_h == L_right_h) && (UL_left_l > UL_right_l) )
137 : */
138 : /* 32/64 bit Lazy eval comparison WC cost is (1+ 1+1+1 + 4 +(2+2+1) = 13 , and average is ~12 */
139 : /* Unoptimized 32/64 bit comparison WC cost is (1+1+ 2x2 + 4 +(2+2+1) = 15 */
140 6996 : L_tmp = L_sub( L_left_h, L_right_h ); /* high signed word check 1 op */
141 6996 : u_sgn = 0;
142 6996 : move16(); /* 1 op */
143 6996 : if ( L_tmp == 0 ) /* L_tmp high Word testing is always needed */
144 : {
145 : /* The returned UL value from UL_subNs is not needed, only u_sgn is needed */
146 3249 : UL_subNs( UL_right_l, UL_left_l, &u_sgn ); /* low unsigned word check, note left/right order switch of ">" due to ">=" inside UL_subNs, 1 op */
147 : }
148 6996 : if ( u_sgn != 0 )
149 : {
150 1318 : L_tmp = L_add( L_tmp, 1 ); /* 0+1 --> 1 use wrap/sign result of low Word u_sgn check */ /* 1 op */
151 : }
152 6996 : IF( L_tmp > 0 ) /* IF 4 ops */
153 : {
154 3722 : L_corr_sq_max = L_add( L_tmp_corr_sq, 0 ); /* 1-2 ops */
155 3722 : L_en_max_den = L_add( L_tmp_en, 0 ); /* 1-2 ops */
156 3722 : imax = i;
157 3722 : move16(); /* 1 op */
158 : }
159 : } /* dim loop */
160 : }
161 : /* Complexity comparison per coeff for low precision vs. high precision
162 : low precision: pulse_tot <= 127, 16 bit: WC 2+3 +(15)*dim ops, dim=5 --> 5+15*5 = 90 ops, 18 ops/coeff
163 : high precision: pulse_tot > 127, 32 bit: WC 1+3+3 +(26-28)*dim ops, WC-band dim=5 --> 7+28*5 = 147 ops, 29 ops/coeff ~61% increase
164 : */
165 :
166 : /* finally add found unit pulse contribution to past L_xy, Lyy, for next pulse loop */
167 1177157 : *L_xy_ptr = L_mac( *L_xy_ptr, x_abs[imax], 1 ); /* Q12+1 */
168 1177157 : *L_yy_ptr = L_mac( *L_yy_ptr, 1, y[imax] );
169 :
170 1177157 : y[imax] = add( y[imax], 1 );
171 1177157 : move16(); /* Q0 added pulse */
172 1177157 : ( *pulse_tot_ptr ) = add( ( *pulse_tot_ptr ), 1 ); /* increment total pulse sum */
173 1177157 : move16();
174 :
175 1177157 : return imax;
176 : }
177 : /*-----------------------------------------------------------------------*
178 : * Function pvq_encode_fx() *
179 : * *
180 : *-----------------------------------------------------------------------*/
181 276341 : void pvq_encode_ivas_fx(
182 : BSTR_ENC_HANDLE hBstr,
183 : PVQ_ENC_HANDLE hPVQ, /* i/o: PVQ encoder handle */
184 : const Word16 *x, /* i: vector to quantize Q15-3=>Q12 */
185 : Word16 *y, /* o: raw pulses (non-scaled short) Q0 */
186 : Word16 *xq, /* o: quantized vector Q15 */
187 : Word32 *L_xq, /* o: quantized vector Q31 fot eval */
188 : const Word16 pulses, /* i: number of allocated pulses */
189 : const Word16 dim, /* i: Length of vector */
190 : const Word16 neg_gain /* i: - Gain use - negative gain in Q15 0..1 */
191 : )
192 : {
193 : Word16 i;
194 : Word16 pulse_tot;
195 : Word16 xabs[PVQ_MAX_BAND_SIZE];
196 : Word16 max_xabs;
197 : Word32 L_xsum;
198 : Word32 L_proj_fac;
199 : Word32 L_yy, L_xy;
200 : Word16 max_amp_y, imax;
201 : Word16 k, en_margin, en_dn_shift, high_prec_active;
202 :
203 : Word32 L_num, L_tmp;
204 : Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
205 :
206 : UWord16 u16_tmp;
207 : Word16 dim_m1;
208 : Word32 L_isqrt;
209 : Word16 neg_gain_norm, shift_tot;
210 : Word16 high_pulse_density_flag;
211 : PvqEntry entry;
212 : #ifndef ISSUE_1867_replace_overflow_libenc
213 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
214 : Flag Overflow = 0;
215 : move16();
216 : #endif
217 : #endif
218 :
219 276341 : L_proj_fac = 4096;
220 276341 : move32();
221 276341 : L_xsum = L_deposit_h( 0 );
222 276341 : max_xabs = -1;
223 276341 : move16();
224 :
225 3842693 : FOR( i = 0; i < dim; i++ )
226 : {
227 3566352 : xabs[i] = abs_s( x[i] );
228 3566352 : move16(); /* Q12 */
229 3566352 : max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient search correlation scaling */
230 3566352 : L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
231 3566352 : y[i] = 0;
232 3566352 : move16(); /* init, later only non-zero values need to be normalized */
233 : }
234 :
235 276341 : test();
236 276341 : IF( L_xsum == 0 || neg_gain == 0 )
237 : {
238 429 : pulse_tot = pulses;
239 429 : move16();
240 429 : dim_m1 = sub( dim, 1 );
241 429 : y[dim_m1] = 0;
242 429 : move16();
243 429 : y[0] = shr( pulses, 1 );
244 429 : move16();
245 429 : y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
246 429 : move16();
247 429 : L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
248 429 : IF( dim_m1 != 0 )
249 : {
250 429 : L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
251 : }
252 : }
253 : ELSE
254 : {
255 :
256 275912 : num = sub( pulses, PYR_OFFSET );
257 275912 : high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
258 :
259 275912 : test();
260 275912 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
261 : {
262 118606 : shift_den = norm_l( L_xsum ); /* x_sum input Q12 */
263 118606 : den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den */
264 :
265 118606 : L_num = L_deposit_l( num );
266 118606 : shift_num = sub( norm_l( L_num ), 1 );
267 118606 : L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1 */
268 118606 : proj_fac = div_l( L_num, den ); /* L_num always has to be less than den<<16 */
269 :
270 118606 : shift_delta = sub( shift_num, shift_den );
271 118606 : L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring to a fixed Q12 */
272 : }
273 :
274 275912 : pulse_tot = 0;
275 275912 : move16();
276 275912 : L_yy = L_deposit_l( 0 );
277 275912 : L_xy = L_deposit_l( 0 );
278 275912 : test();
279 275912 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
280 : {
281 1246161 : FOR( i = 0; i < dim; i++ ) /* max 64 */
282 : {
283 1127555 : Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12 +1 */
284 1127555 : y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
285 1127555 : move16(); /* Q12 *Q12 -> Q0 */
286 :
287 1127555 : pulse_tot = add( pulse_tot, y[i] ); /* Q0 */
288 1127555 : L_yy = L_mac( L_yy, y[i], y[i] ); /* Energy, result will scale up by 2 by L_mac */
289 1127555 : L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12 +1 --> Q13 */
290 : }
291 : }
292 :
293 :
294 275912 : L_yy = L_shr( L_yy, 1 );
295 275912 : IF( LE_16( pulses, 127 ) )
296 : {
297 : /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5 */
298 : /* ( if high energy precision is inactive, max_amp_y is not needed , no max_amp_y(k-1) update ) */
299 1369910 : FOR( k = pulse_tot; k < pulses; k++ )
300 : {
301 1094913 : L_yy = L_add( L_yy, 1 );
302 1094913 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
303 : }
304 : }
305 : ELSE
306 : { /* HC or LC+HC inner loops */
307 915 : max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension restricted) */
308 : /* max_amp_y from projected y is needed when pulses_sum exceeds 127 */
309 :
310 : /* First section with 32 bit energy inactive, max_amp_y kept updated though */
311 945 : FOR( k = pulse_tot; k < 128; k++ )
312 : {
313 30 : L_yy = L_add( L_yy, 1 );
314 30 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
315 30 : max_amp_y = s_max( max_amp_y, y[imax] );
316 : }
317 :
318 : /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated */
319 3636 : FOR( k = pulse_tot; k < pulses; k++ )
320 : {
321 2721 : L_yy = L_add( L_yy, 1 );
322 2721 : en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin, ~ 2 ops */
323 2721 : en_dn_shift = sub( 16, en_margin ); /* calc. shift to lower byte for fixed use of extract_l */
324 :
325 2721 : high_prec_active = 1;
326 2721 : move16();
327 2721 : if ( en_dn_shift <= 0 )
328 : {
329 : /* only use 32 bit energy if actually needed */
330 1895 : high_prec_active = 0;
331 1895 : move16();
332 : }
333 : /* 32 bit energy and corr adaptively active, max_amp_y kept updated */
334 2721 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
335 2721 : max_amp_y = s_max( max_amp_y, y[imax] );
336 : }
337 : }
338 275912 : L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
339 : to make energy right for unit/inverse gain calculation */
340 : }
341 :
342 : /* Apply unit energy normalization scaling, always at least one pulse so no div-by-zero check is needed */
343 276341 : L_isqrt = L_deposit_l( 0 );
344 276341 : IF( neg_gain != 0 )
345 : {
346 275912 : L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
347 : }
348 :
349 276341 : shift_num = norm_s( pulse_tot ); /* account for max possible pulse amplitude in y,
350 : can be used even when max_amp_y is not avail. */
351 276341 : shift_den = norm_s( neg_gain ); /* account for gain downscaling shift */
352 276341 : neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm */
353 276341 : shift_tot = sub( add( shift_num, shift_den ), 15 );
354 :
355 276341 : L_isqrt = L_negate( L_isqrt );
356 3842693 : FOR( i = 0; i < dim; i++ )
357 : {
358 3566352 : tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used in scaling */
359 3566352 : if ( x[i] < 0 )
360 : {
361 1770957 : tmp = negate( tmp ); /* apply sign */
362 : }
363 :
364 3566352 : IF( y[i] != 0 )
365 : {
366 1155036 : y[i] = shr( tmp, shift_num );
367 1155036 : move16(); /* updates sign of y[i} , ~range -512 + 512), array move */
368 : }
369 3566352 : Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) +1 */
370 3566352 : Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1 */
371 : #ifdef ISSUE_1867_replace_overflow_libenc
372 3566352 : L_tmp = L_shr_sat( L_tmp, shift_tot ); /* Q31+x */
373 3566352 : xq[i] = round_fx_sat( L_tmp ); /* Q15, array move */
374 : #else
375 : L_tmp = L_shr_sat( L_tmp, shift_tot ); /* Q31+x */
376 : xq[i] = round_fx_o( L_tmp, &Overflow ); /* Q15, array move */
377 : #endif
378 3566352 : move16();
379 3566352 : L_xq[i] = L_tmp; /* Q31 currently unused */
380 3566352 : move32();
381 : }
382 :
383 : /* index the found PVQ vector into short codewords */
384 276341 : entry = mpvq_encode_vec_fx( y, dim, pulses );
385 :
386 : /* send the short codeword(s) to the range encoder */
387 276341 : rc_enc_bits_ivas_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
388 276341 : IF( NE_16( dim, 1 ) )
389 : {
390 276341 : rc_enc_uniform_ivas_fx( hBstr, hPVQ, entry.index, entry.size );
391 : }
392 :
393 276341 : return;
394 : }
395 :
396 16843 : void pvq_encode_fx(
397 : BSTR_ENC_HANDLE hBstr,
398 : PVQ_ENC_HANDLE hPVQ, /* i/o: PVQ encoder handle */
399 : const Word16 *x, /* i: vector to quantize Q15-3=>Q12 */
400 : Word16 *y, /* o: raw pulses (non-scaled short) Q0 */
401 : Word16 *xq, /* o: quantized vector Q15 */
402 : Word32 *L_xq, /* o: quantized vector Q31 fot eval */
403 : const Word16 pulses, /* i: number of allocated pulses */
404 : const Word16 dim, /* i: Length of vector */
405 : const Word16 neg_gain /* i: - Gain use - negative gain in Q15 0..1 */
406 : )
407 : {
408 : Word16 i;
409 : Word16 pulse_tot;
410 : Word16 xabs[PVQ_MAX_BAND_SIZE];
411 : Word16 max_xabs;
412 : Word32 L_xsum;
413 : Word32 L_proj_fac;
414 : Word32 L_yy, L_xy;
415 : Word16 max_amp_y, imax;
416 : Word16 k, en_margin, en_dn_shift, high_prec_active;
417 :
418 : Word32 L_num, L_tmp;
419 : Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
420 :
421 : UWord16 u16_tmp;
422 : Word16 dim_m1;
423 : Word32 L_isqrt;
424 : Word16 neg_gain_norm, shift_tot;
425 : Word16 high_pulse_density_flag;
426 : PvqEntry entry;
427 : #ifndef ISSUE_1867_replace_overflow_libenc
428 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
429 : Flag Overflow = 0;
430 : move16();
431 : #endif
432 : #endif
433 :
434 16843 : L_proj_fac = 4096;
435 16843 : move32();
436 16843 : L_xsum = L_deposit_h( 0 );
437 16843 : max_xabs = -1;
438 16843 : move16();
439 :
440 216203 : FOR( i = 0; i < dim; i++ )
441 : {
442 199360 : xabs[i] = abs_s( x[i] );
443 199360 : move16(); /* Q12 */
444 199360 : max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient search correlation scaling */
445 199360 : L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
446 199360 : y[i] = 0;
447 199360 : move16(); /* init, later only non-zero values need to be normalized */
448 : }
449 :
450 16843 : test();
451 16843 : IF( L_xsum == 0 || neg_gain == 0 )
452 : {
453 17 : pulse_tot = pulses;
454 17 : move16();
455 17 : dim_m1 = sub( dim, 1 );
456 17 : y[dim_m1] = 0;
457 17 : move16();
458 17 : y[0] = shr( pulses, 1 );
459 17 : move16();
460 17 : y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
461 17 : move16();
462 17 : L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
463 17 : if ( dim_m1 != 0 )
464 : {
465 17 : L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
466 : }
467 : }
468 : ELSE
469 : {
470 :
471 16826 : num = sub( pulses, PYR_OFFSET );
472 16826 : high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
473 :
474 16826 : test();
475 16826 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
476 : {
477 10933 : shift_den = norm_l( L_xsum ); /* x_sum input Q12 */
478 10933 : den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den */
479 :
480 10933 : L_num = L_deposit_l( num );
481 10933 : shift_num = sub( norm_l( L_num ), 1 );
482 10933 : L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1 */
483 10933 : proj_fac = div_l( L_num, den ); /* L_num always has to be less than den<<16 */
484 :
485 10933 : shift_delta = sub( shift_num, shift_den );
486 10933 : L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring to a fixed Q12 */
487 : }
488 :
489 16826 : pulse_tot = 0;
490 16826 : move16();
491 16826 : L_yy = L_deposit_l( 0 );
492 16826 : L_xy = L_deposit_l( 0 );
493 16826 : test();
494 16826 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
495 : {
496 102537 : FOR( i = 0; i < dim; i++ ) /* max 64 */
497 : {
498 91604 : Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12 +1 */
499 91604 : y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
500 91604 : move16(); /* Q12 *Q12 -> Q0 */
501 :
502 91604 : pulse_tot = add( pulse_tot, y[i] ); /* Q0 */
503 91604 : L_yy = L_mac( L_yy, y[i], y[i] ); /* Energy, result will scale up by 2 by L_mac */
504 91604 : L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12 +1 --> Q13 */
505 : }
506 : }
507 :
508 :
509 16826 : L_yy = L_shr( L_yy, 1 );
510 16826 : IF( LE_16( pulses, 127 ) )
511 : {
512 : /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5 */
513 : /* ( if high energy precision is inactive, max_amp_y is not needed , no max_amp_y(k-1) update ) */
514 93339 : FOR( k = pulse_tot; k < pulses; k++ )
515 : {
516 77260 : L_yy = L_add( L_yy, 1 );
517 77260 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
518 : }
519 : }
520 : ELSE
521 : { /* HC or LC+HC inner loops */
522 747 : max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension restricted) */
523 : /* max_amp_y from projected y is needed when pulses_sum exceeds 127 */
524 :
525 : /* First section with 32 bit energy inactive, max_amp_y kept updated though */
526 753 : FOR( k = pulse_tot; k < 128; k++ )
527 : {
528 6 : L_yy = L_add( L_yy, 1 );
529 6 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
530 6 : max_amp_y = s_max( max_amp_y, y[imax] );
531 : }
532 :
533 : /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated */
534 2974 : FOR( k = pulse_tot; k < pulses; k++ )
535 : {
536 2227 : L_yy = L_add( L_yy, 1 );
537 2227 : en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin, ~ 2 ops */
538 2227 : en_dn_shift = sub( 16, en_margin ); /* calc. shift to lower byte for fixed use of extract_l */
539 :
540 2227 : high_prec_active = 1;
541 2227 : move16();
542 2227 : if ( en_dn_shift <= 0 )
543 : {
544 : /* only use 32 bit energy if actually needed */
545 1265 : high_prec_active = 0;
546 1265 : move16();
547 : }
548 : /* 32 bit energy and corr adaptively active, max_amp_y kept updated */
549 2227 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
550 2227 : max_amp_y = s_max( max_amp_y, y[imax] );
551 : }
552 : }
553 16826 : L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
554 : to make energy right for unit/inverse gain calculation */
555 : }
556 :
557 : /* Apply unit energy normalization scaling, always at least one pulse so no div-by-zero check is needed */
558 16843 : L_isqrt = L_deposit_l( 0 );
559 16843 : IF( neg_gain != 0 )
560 : {
561 16826 : L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
562 : }
563 :
564 16843 : shift_num = norm_s( pulse_tot ); /* account for max possible pulse amplitude in y,
565 : can be used even when max_amp_y is not avail. */
566 16843 : shift_den = norm_s( neg_gain ); /* account for gain downscaling shift */
567 16843 : neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm */
568 16843 : shift_tot = sub( add( shift_num, shift_den ), 15 );
569 :
570 16843 : L_isqrt = L_negate( L_isqrt );
571 216203 : FOR( i = 0; i < dim; i++ )
572 : {
573 199360 : tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used in scaling */
574 199360 : if ( x[i] < 0 )
575 : {
576 99448 : tmp = negate( tmp ); /* apply sign */
577 : }
578 :
579 199360 : if ( y[i] != 0 )
580 : {
581 91218 : y[i] = shr( tmp, shift_num );
582 91218 : move16(); /* updates sign of y[i} , ~range -512 + 512), array move */
583 : }
584 199360 : Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) +1 */
585 199360 : Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1 */
586 : #ifdef ISSUE_1867_replace_overflow_libenc
587 199360 : L_tmp = L_shr_sat( L_tmp, shift_tot ); /* Q31+x */
588 199360 : xq[i] = round_fx_sat( L_tmp ); /* Q15, array move */
589 : #else
590 : L_tmp = L_shr_sat( L_tmp, shift_tot ); /* Q31+x */
591 : xq[i] = round_fx_o( L_tmp, &Overflow ); /* Q15, array move */
592 : #endif
593 199360 : move16();
594 199360 : L_xq[i] = L_tmp; /* Q31 currently unused */
595 199360 : move32();
596 : }
597 :
598 : /* index the found PVQ vector into short codewords */
599 16843 : entry = mpvq_encode_vec_fx( y, dim, pulses );
600 :
601 : /* send the short codeword(s) to the range encoder */
602 16843 : rc_enc_bits_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
603 16843 : IF( NE_16( dim, 1 ) )
604 : {
605 16843 : rc_enc_uniform_fx( hBstr, hPVQ, entry.index, entry.size );
606 : }
607 :
608 16843 : return;
609 : }
|