Line data Source code
1 : /*====================================================================================
2 : EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
3 : ====================================================================================*/
4 : #include <stdint.h>
5 : #include "options.h" /* Compilation switches */
6 : #include "cnst.h"
7 : #include "prot_fx.h" /* Function prototypes */
8 : #include "prot_fx_enc.h" /* Function prototypes */
9 : #include "rom_com_fx.h"
10 : #include "rom_com.h"
11 :
12 : /* PVQ MIXED_SEARCH_LOOP:
13 : low precision 16/32 + energy selective high precision 32/64,
14 : mixed perf , 10 dB SEGSNR better than the low precision loop only,
15 : active if k>=128 and accumulated energy is high enough,
16 : comes at a controlled complexity cost, as dimensions decrease for high k's*/
17 :
18 1686 : static Word16 max_val_fx( /* o : maximum value in the input vector */
19 : const Word16 *vec, /* i : input vector */
20 : const Word16 lvec /* i : length of input vector */
21 : )
22 : {
23 : Word16 j, tmp;
24 :
25 1686 : tmp = vec[0];
26 1686 : move16();
27 6713 : FOR( j = 1; j < lvec; j++ )
28 : {
29 5027 : tmp = s_max( vec[j], tmp );
30 : }
31 1686 : return tmp;
32 : }
33 :
34 293228 : static Word16 pyramidSearchProjInit_fx( Word16 L, Word16 Ptot )
35 : {
36 293228 : return ( sub( Ptot, extract_l( L_shr( L_mult0( 8223, (Word32) L ), 14 ) ) ) > 0 );
37 : }
38 :
39 :
40 : /* The inner search loop for one single additional unit pulse, starting from pulse_tot ,
41 : with information about required energy precision/down scaling for the dim loop in en_dn_shift,
42 : and the current max_xabs absolute value to be used for an near optimal correlation upscaling.
43 : returns the index of the best positioned unit pulse in imax
44 : */
45 1178198 : static Word16 one_pulse_search(
46 : const Word16 dim, /* vector dimension */
47 : const Word16 *x_abs, /* absolute vector values */
48 : Word16 *y, /* output vector */
49 : Word16 *pulse_tot_ptr,
50 : Word32 *L_xy_ptr, /* accumulated correlation */
51 : Word32 *L_yy_ptr, /* accumulated energy */
52 : Word16 high_prec_active,
53 : Word16 en_dn_shift,
54 : Word16 max_xabs ) /* current accumulated max amplitude for pulses */
55 : {
56 : Word16 i, corr_up_shift, corr_tmp, imax, corr_sq_tmp, en_max_den, cmax_num, en_tmp;
57 : Word32 L_tmp_en_lc, L_tmp_corr;
58 : Word32 L_tmp_en, L_en_max_den, L_corr_sq_max, L_tmp_corr_sq;
59 : Word32 L_left_h, L_right_h;
60 : UWord32 UL_left_l, UL_right_l, UL_dummy;
61 : Word32 L_tmp;
62 : UWord16 u_sgn;
63 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
64 1178198 : Flag Overflow = 0;
65 1178198 : move16();
66 : #endif
67 :
68 1178198 : en_tmp = en_dn_shift; /* dummy assignment to avoid compiler warning for unused parameter */
69 :
70 : /* maximize correlation precision, prior to every unit pulse addition in the vector */
71 1178198 : corr_up_shift = norm_l( L_mac( *L_xy_ptr, 1, max_xabs ) ); /* pre analyze worst case L_xy update in the dim loop , 2 ops */
72 1178198 : imax = -1; /* not needed for search, only added to avoid compiler warning */
73 :
74 : /* clean BE code, with split out low/high precision loops */
75 : /* activate low complexity en/corr search section conditionally if resulting vector energy is within limits */
76 : /* typical case for higher dimensions */
77 :
78 1178198 : IF( high_prec_active == 0 )
79 : {
80 1176364 : en_max_den = 0; /*move16()*/
81 1176364 : move16();
82 : ; /* OPT: move saved by using high_prec_active as en_max_den */ /* 1 op */
83 1176364 : cmax_num = -1;
84 1176364 : move16(); /* req. to force a 1st update for n==0 */ /* 1 op */
85 :
86 16851952 : FOR( i = 0; i < dim; i++ ) /* FOR 3 ops */
87 : {
88 15675588 : L_tmp_corr = L_shl_o( L_mac_o( *L_xy_ptr, 1, x_abs[i], &Overflow ), corr_up_shift, &Overflow ); /* actual in-loop target value, 2 ops */
89 15675588 : corr_tmp = round_fx_o( L_tmp_corr, &Overflow ); /* 1 op */
90 15675588 : corr_sq_tmp = mult( corr_tmp, corr_tmp ); /* CorrSq, is a 16bit for low compelxity cross multiplication 1 op */
91 :
92 15675588 : L_tmp_en_lc = L_mac( *L_yy_ptr, 1, y[i] ); /*Q1 result , energy may span up to ~14+1(Q1)+1(sign)=16 bits, 1 op */
93 : /* extract_l without shift can always be used for this section as energy is guaranteed to stay in the lower word, 1 op */
94 15675588 : en_tmp = extract_l( L_tmp_en_lc ); /* L_shl + round_fx could also be used also but then adds an uphift cost (2-3 ops)*/
95 :
96 : /* 16/32 bit comparison WC (4 +1+1 + (1+1+1) = 9 */
97 15675588 : IF( L_msu( L_mult( corr_sq_tmp, en_max_den ), cmax_num, en_tmp ) > 0 ) /* use L_mult and then a L_msu, 2 ops */
98 : {
99 3812111 : cmax_num = corr_sq_tmp;
100 3812111 : move16(); /* 1 op */
101 3812111 : en_max_den = en_tmp;
102 3812111 : move16(); /* 1 op */
103 3812111 : imax = i;
104 3812111 : move16(); /* 1 op */
105 : }
106 : } /* dim */
107 : }
108 : ELSE
109 : {
110 : /* High resolution section activated when vector energy is becoming high (peaky or many pulses) */
111 : /* BASOP operator Mpy32_32_ss used to allow higher resolution for both the CorrSq term and the Energy term */
112 :
113 1834 : L_en_max_den = L_deposit_l( 0 ); /* 1 op */
114 1834 : L_corr_sq_max = L_deposit_l( -1 ); /* req. to force a 1st update */ /* 1 op */
115 :
116 9014 : FOR( i = 0; i < dim; i++ ) /* FOR 3 ops */
117 : {
118 7180 : L_tmp_corr = L_shl( L_mac( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /* actual in loop WC value 2 ops */
119 7180 : Mpy_32_32_ss( L_tmp_corr, L_tmp_corr, &L_tmp_corr_sq, &UL_dummy ); /* CorrSq 32 bits, 4 ops */
120 :
121 7180 : L_tmp_en = L_mac( *L_yy_ptr, 1, y[i] ); /* Q1,energy may span up to sign+19 bits , 1 op */
122 : /* For highest accuracy use pairs of maximum upshifted 32x32 bit signed values */
123 : /* (L_tmp_corr_sq / L_tmp_en) > (L_corr_sq_max/L_en_max_den) */
124 : /* (L_tmp_corr_sq * L_en_max_den) > (L_corr_sq_max * L_tmp_en) */
125 7180 : Mpy_32_32_ss( L_en_max_den, L_tmp_corr_sq, &L_left_h, &UL_left_l ); /* 4 ops */
126 7180 : Mpy_32_32_ss( L_tmp_en, L_corr_sq_max, &L_right_h, &UL_right_l ); /* 4 ops */
127 :
128 : /* STL optimized "Lazy evaluation" of:
129 : IF( (L_left_h > L_right_h) || ( (L_left_h == L_right_h) && (UL_left_l > UL_right_l) )
130 : */
131 : /* 32/64 bit Lazy eval comparison WC cost is (1+ 1+1+1 + 4 +(2+2+1) = 13 , and average is ~12 */
132 : /* Unoptimized 32/64 bit comparison WC cost is (1+1+ 2x2 + 4 +(2+2+1) = 15 */
133 7180 : L_tmp = L_sub( L_left_h, L_right_h ); /* high signed word check 1 op */
134 7180 : u_sgn = 0;
135 7180 : move16(); /* 1 op */
136 7180 : if ( L_tmp == 0 ) /* L_tmp high Word testing is always needed */
137 : {
138 : /* The returned UL value from UL_subNs is not needed, only u_sgn is needed */
139 3337 : UL_subNs( UL_right_l, UL_left_l, &u_sgn ); /* low unsigned word check, note left/right order switch of ">" due to ">=" inside UL_subNs, 1 op */
140 : }
141 7180 : if ( u_sgn != 0 )
142 : {
143 1359 : L_tmp = L_add( L_tmp, 1 ); /* 0+1 --> 1 use wrap/sign result of low Word u_sgn check */ /* 1 op */
144 : }
145 7180 : IF( L_tmp > 0 ) /* IF 4 ops */
146 : {
147 3815 : L_corr_sq_max = L_add( L_tmp_corr_sq, 0 ); /* 1-2 ops */
148 3815 : L_en_max_den = L_add( L_tmp_en, 0 ); /* 1-2 ops */
149 3815 : imax = i;
150 3815 : move16(); /* 1 op */
151 : }
152 : } /* dim loop */
153 : }
154 : /* Complexity comparison per coeff for low precision vs. high precision
155 : low precision: pulse_tot <= 127, 16 bit: WC 2+3 +(15)*dim ops, dim=5 --> 5+15*5 = 90 ops, 18 ops/coeff
156 : high precision: pulse_tot > 127, 32 bit: WC 1+3+3 +(26-28)*dim ops, WC-band dim=5 --> 7+28*5 = 147 ops, 29 ops/coeff ~61% increase
157 : */
158 :
159 : /* finally add found unit pulse contribution to past L_xy, Lyy, for next pulse loop */
160 1178198 : *L_xy_ptr = L_mac( *L_xy_ptr, x_abs[imax], 1 ); /* Q12+1 */
161 1178198 : *L_yy_ptr = L_mac( *L_yy_ptr, 1, y[imax] );
162 :
163 1178198 : y[imax] = add( y[imax], 1 );
164 1178198 : move16(); /* Q0 added pulse */
165 1178198 : ( *pulse_tot_ptr ) = add( ( *pulse_tot_ptr ), 1 ); /* increment total pulse sum */
166 1178198 : move16();
167 :
168 1178198 : return imax;
169 : }
170 : /*-----------------------------------------------------------------------*
171 : * Function pvq_encode_fx() *
172 : * *
173 : *-----------------------------------------------------------------------*/
174 277318 : void pvq_encode_ivas_fx(
175 : BSTR_ENC_HANDLE hBstr,
176 : PVQ_ENC_HANDLE hPVQ, /* i/o: PVQ encoder handle */
177 : const Word16 *x, /* i: vector to quantize Q15-3=>Q12 */
178 : Word16 *y, /* o: raw pulses (non-scaled short) Q0 */
179 : Word16 *xq, /* o: quantized vector Q15 */
180 : Word32 *L_xq, /* o: quantized vector Q31 fot eval */
181 : const Word16 pulses, /* i: number of allocated pulses */
182 : const Word16 dim, /* i: Length of vector */
183 : const Word16 neg_gain /* i: - Gain use - negative gain in Q15 0..1 */
184 : )
185 : {
186 : Word16 i;
187 : Word16 pulse_tot;
188 : Word16 xabs[PVQ_MAX_BAND_SIZE];
189 : Word16 max_xabs;
190 : Word32 L_xsum;
191 : Word32 L_proj_fac;
192 : Word32 L_yy, L_xy;
193 : Word16 max_amp_y, imax;
194 : Word16 k, en_margin, en_dn_shift, high_prec_active;
195 :
196 : Word32 L_num, L_tmp;
197 : Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
198 :
199 : UWord16 u16_tmp;
200 : Word16 dim_m1;
201 : Word32 L_isqrt;
202 : Word16 neg_gain_norm, shift_tot;
203 : Word16 high_pulse_density_flag;
204 : PvqEntry entry;
205 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
206 277318 : Flag Overflow = 0;
207 277318 : move16();
208 : #endif
209 :
210 277318 : L_proj_fac = 4096;
211 277318 : move32();
212 277318 : L_xsum = L_deposit_h( 0 );
213 277318 : max_xabs = -1;
214 277318 : move16();
215 :
216 3854962 : FOR( i = 0; i < dim; i++ )
217 : {
218 3577644 : xabs[i] = abs_s( x[i] );
219 3577644 : move16(); /* Q12 */
220 3577644 : max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient search correlation scaling */
221 3577644 : L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
222 3577644 : y[i] = 0;
223 3577644 : move16(); /* init, later only non-zero values need to be normalized */
224 : }
225 :
226 277318 : test();
227 277318 : IF( L_xsum == 0 || neg_gain == 0 )
228 : {
229 429 : pulse_tot = pulses;
230 429 : move16();
231 429 : dim_m1 = sub( dim, 1 );
232 429 : y[dim_m1] = 0;
233 429 : move16();
234 429 : y[0] = shr( pulses, 1 );
235 429 : move16();
236 429 : y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
237 429 : move16();
238 429 : L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
239 429 : IF( dim_m1 != 0 )
240 : {
241 429 : L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
242 : }
243 : }
244 : ELSE
245 : {
246 :
247 276889 : num = sub( pulses, PYR_OFFSET );
248 276889 : high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
249 :
250 276889 : test();
251 276889 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
252 : {
253 119177 : shift_den = norm_l( L_xsum ); /* x_sum input Q12 */
254 119177 : den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den */
255 :
256 119177 : L_num = L_deposit_l( num );
257 119177 : shift_num = sub( norm_l( L_num ), 1 );
258 119177 : L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1 */
259 119177 : proj_fac = div_l( L_num, den ); /* L_num always has to be less than den<<16 */
260 :
261 119177 : shift_delta = sub( shift_num, shift_den );
262 119177 : L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring to a fixed Q12 */
263 : }
264 :
265 276889 : pulse_tot = 0;
266 276889 : move16();
267 276889 : L_yy = L_deposit_l( 0 );
268 276889 : L_xy = L_deposit_l( 0 );
269 276889 : test();
270 276889 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
271 : {
272 1251323 : FOR( i = 0; i < dim; i++ ) /* max 64 */
273 : {
274 1132146 : Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12 +1 */
275 1132146 : y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
276 1132146 : move16(); /* Q12 *Q12 -> Q0 */
277 :
278 1132146 : pulse_tot = add( pulse_tot, y[i] ); /* Q0 */
279 1132146 : L_yy = L_mac( L_yy, y[i], y[i] ); /* Energy, result will scale up by 2 by L_mac */
280 1132146 : L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12 +1 --> Q13 */
281 : }
282 : }
283 :
284 :
285 276889 : L_yy = L_shr( L_yy, 1 );
286 276889 : IF( LE_16( pulses, 127 ) )
287 : {
288 : /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5 */
289 : /* ( if high energy precision is inactive, max_amp_y is not needed , no max_amp_y(k-1) update ) */
290 1373786 : FOR( k = pulse_tot; k < pulses; k++ )
291 : {
292 1097844 : L_yy = L_add( L_yy, 1 );
293 1097844 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
294 : }
295 : }
296 : ELSE
297 : { /* HC or LC+HC inner loops */
298 947 : max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension restricted) */
299 : /* max_amp_y from projected y is needed when pulses_sum exceeds 127 */
300 :
301 : /* First section with 32 bit energy inactive, max_amp_y kept updated though */
302 979 : FOR( k = pulse_tot; k < 128; k++ )
303 : {
304 32 : L_yy = L_add( L_yy, 1 );
305 32 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
306 32 : max_amp_y = s_max( max_amp_y, y[imax] );
307 : }
308 :
309 : /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated */
310 3766 : FOR( k = pulse_tot; k < pulses; k++ )
311 : {
312 2819 : L_yy = L_add( L_yy, 1 );
313 2819 : en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin, ~ 2 ops */
314 2819 : en_dn_shift = sub( 16, en_margin ); /* calc. shift to lower byte for fixed use of extract_l */
315 :
316 2819 : high_prec_active = 1;
317 2819 : move16();
318 2819 : if ( en_dn_shift <= 0 )
319 : {
320 : /* only use 32 bit energy if actually needed */
321 1944 : high_prec_active = 0;
322 1944 : move16();
323 : }
324 : /* 32 bit energy and corr adaptively active, max_amp_y kept updated */
325 2819 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
326 2819 : max_amp_y = s_max( max_amp_y, y[imax] );
327 : }
328 : }
329 276889 : L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
330 : to make energy right for unit/inverse gain calculation */
331 : }
332 :
333 : /* Apply unit energy normalization scaling, always at least one pulse so no div-by-zero check is needed */
334 277318 : L_isqrt = L_deposit_l( 0 );
335 277318 : IF( neg_gain != 0 )
336 : {
337 276889 : L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
338 : }
339 :
340 277318 : shift_num = norm_s( pulse_tot ); /* account for max possible pulse amplitude in y,
341 : can be used even when max_amp_y is not avail. */
342 277318 : shift_den = norm_s( neg_gain ); /* account for gain downscaling shift */
343 277318 : neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm */
344 277318 : shift_tot = sub( add( shift_num, shift_den ), 15 );
345 :
346 277318 : L_isqrt = L_negate( L_isqrt );
347 3854962 : FOR( i = 0; i < dim; i++ )
348 : {
349 3577644 : tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used in scaling */
350 3577644 : if ( x[i] < 0 )
351 : {
352 1778723 : tmp = negate( tmp ); /* apply sign */
353 : }
354 :
355 3577644 : IF( y[i] != 0 )
356 : {
357 1159303 : y[i] = shr( tmp, shift_num );
358 1159303 : move16(); /* updates sign of y[i} , ~range -512 + 512), array move */
359 : }
360 3577644 : Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) +1 */
361 3577644 : Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1 */
362 3577644 : L_tmp = L_shr_o( L_tmp, shift_tot, &Overflow ); /* Q31+x */
363 3577644 : xq[i] = round_fx_o( L_tmp, &Overflow ); /* Q15, array move */
364 3577644 : move16();
365 3577644 : L_xq[i] = L_tmp; /* Q31 currently unused */
366 3577644 : move32();
367 : }
368 :
369 : /* index the found PVQ vector into short codewords */
370 277318 : entry = mpvq_encode_vec_fx( y, dim, pulses );
371 :
372 : /* send the short codeword(s) to the range encoder */
373 277318 : rc_enc_bits_ivas_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
374 277318 : IF( NE_16( dim, 1 ) )
375 : {
376 277318 : rc_enc_uniform_ivas_fx( hBstr, hPVQ, entry.index, entry.size );
377 : }
378 :
379 277318 : return;
380 : }
381 :
382 16357 : void pvq_encode_fx(
383 : BSTR_ENC_HANDLE hBstr,
384 : PVQ_ENC_HANDLE hPVQ, /* i/o: PVQ encoder handle */
385 : const Word16 *x, /* i: vector to quantize Q15-3=>Q12 */
386 : Word16 *y, /* o: raw pulses (non-scaled short) Q0 */
387 : Word16 *xq, /* o: quantized vector Q15 */
388 : Word32 *L_xq, /* o: quantized vector Q31 fot eval */
389 : const Word16 pulses, /* i: number of allocated pulses */
390 : const Word16 dim, /* i: Length of vector */
391 : const Word16 neg_gain /* i: - Gain use - negative gain in Q15 0..1 */
392 : )
393 : {
394 : Word16 i;
395 : Word16 pulse_tot;
396 : Word16 xabs[PVQ_MAX_BAND_SIZE];
397 : Word16 max_xabs;
398 : Word32 L_xsum;
399 : Word32 L_proj_fac;
400 : Word32 L_yy, L_xy;
401 : Word16 max_amp_y, imax;
402 : Word16 k, en_margin, en_dn_shift, high_prec_active;
403 :
404 : Word32 L_num, L_tmp;
405 : Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
406 :
407 : UWord16 u16_tmp;
408 : Word16 dim_m1;
409 : Word32 L_isqrt;
410 : Word16 neg_gain_norm, shift_tot;
411 : Word16 high_pulse_density_flag;
412 : PvqEntry entry;
413 : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
414 16357 : Flag Overflow = 0;
415 16357 : move16();
416 : #endif
417 :
418 16357 : L_proj_fac = 4096;
419 16357 : move32();
420 16357 : L_xsum = L_deposit_h( 0 );
421 16357 : max_xabs = -1;
422 16357 : move16();
423 :
424 209845 : FOR( i = 0; i < dim; i++ )
425 : {
426 193488 : xabs[i] = abs_s( x[i] );
427 193488 : move16(); /* Q12 */
428 193488 : max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient search correlation scaling */
429 193488 : L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
430 193488 : y[i] = 0;
431 193488 : move16(); /* init, later only non-zero values need to be normalized */
432 : }
433 :
434 16357 : test();
435 16357 : IF( L_xsum == 0 || neg_gain == 0 )
436 : {
437 18 : pulse_tot = pulses;
438 18 : move16();
439 18 : dim_m1 = sub( dim, 1 );
440 18 : y[dim_m1] = 0;
441 18 : move16();
442 18 : y[0] = shr( pulses, 1 );
443 18 : move16();
444 18 : y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
445 18 : move16();
446 18 : L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
447 18 : if ( dim_m1 != 0 )
448 : {
449 18 : L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
450 : }
451 : }
452 : ELSE
453 : {
454 :
455 16339 : num = sub( pulses, PYR_OFFSET );
456 16339 : high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
457 :
458 16339 : test();
459 16339 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
460 : {
461 10806 : shift_den = norm_l( L_xsum ); /* x_sum input Q12 */
462 10806 : den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den */
463 :
464 10806 : L_num = L_deposit_l( num );
465 10806 : shift_num = sub( norm_l( L_num ), 1 );
466 10806 : L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1 */
467 10806 : proj_fac = div_l( L_num, den ); /* L_num always has to be less than den<<16 */
468 :
469 10806 : shift_delta = sub( shift_num, shift_den );
470 10806 : L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring to a fixed Q12 */
471 : }
472 :
473 16339 : pulse_tot = 0;
474 16339 : move16();
475 16339 : L_yy = L_deposit_l( 0 );
476 16339 : L_xy = L_deposit_l( 0 );
477 16339 : test();
478 16339 : IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
479 : {
480 101212 : FOR( i = 0; i < dim; i++ ) /* max 64 */
481 : {
482 90406 : Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12 +1 */
483 90406 : y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
484 90406 : move16(); /* Q12 *Q12 -> Q0 */
485 :
486 90406 : pulse_tot = add( pulse_tot, y[i] ); /* Q0 */
487 90406 : L_yy = L_mac( L_yy, y[i], y[i] ); /* Energy, result will scale up by 2 by L_mac */
488 90406 : L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12 +1 --> Q13 */
489 : }
490 : }
491 :
492 :
493 16339 : L_yy = L_shr( L_yy, 1 );
494 16339 : IF( LE_16( pulses, 127 ) )
495 : {
496 : /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5 */
497 : /* ( if high energy precision is inactive, max_amp_y is not needed , no max_amp_y(k-1) update ) */
498 90891 : FOR( k = pulse_tot; k < pulses; k++ )
499 : {
500 75291 : L_yy = L_add( L_yy, 1 );
501 75291 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
502 : }
503 : }
504 : ELSE
505 : { /* HC or LC+HC inner loops */
506 739 : max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension restricted) */
507 : /* max_amp_y from projected y is needed when pulses_sum exceeds 127 */
508 :
509 : /* First section with 32 bit energy inactive, max_amp_y kept updated though */
510 745 : FOR( k = pulse_tot; k < 128; k++ )
511 : {
512 6 : L_yy = L_add( L_yy, 1 );
513 6 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
514 6 : max_amp_y = s_max( max_amp_y, y[imax] );
515 : }
516 :
517 : /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated */
518 2945 : FOR( k = pulse_tot; k < pulses; k++ )
519 : {
520 2206 : L_yy = L_add( L_yy, 1 );
521 2206 : en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin, ~ 2 ops */
522 2206 : en_dn_shift = sub( 16, en_margin ); /* calc. shift to lower byte for fixed use of extract_l */
523 :
524 2206 : high_prec_active = 1;
525 2206 : move16();
526 2206 : if ( en_dn_shift <= 0 )
527 : {
528 : /* only use 32 bit energy if actually needed */
529 1247 : high_prec_active = 0;
530 1247 : move16();
531 : }
532 : /* 32 bit energy and corr adaptively active, max_amp_y kept updated */
533 2206 : imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
534 2206 : max_amp_y = s_max( max_amp_y, y[imax] );
535 : }
536 : }
537 16339 : L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
538 : to make energy right for unit/inverse gain calculation */
539 : }
540 :
541 : /* Apply unit energy normalization scaling, always at least one pulse so no div-by-zero check is needed */
542 16357 : L_isqrt = L_deposit_l( 0 );
543 16357 : IF( neg_gain != 0 )
544 : {
545 16339 : L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
546 : }
547 :
548 16357 : shift_num = norm_s( pulse_tot ); /* account for max possible pulse amplitude in y,
549 : can be used even when max_amp_y is not avail. */
550 16357 : shift_den = norm_s( neg_gain ); /* account for gain downscaling shift */
551 16357 : neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm */
552 16357 : shift_tot = sub( add( shift_num, shift_den ), 15 );
553 :
554 16357 : L_isqrt = L_negate( L_isqrt );
555 209845 : FOR( i = 0; i < dim; i++ )
556 : {
557 193488 : tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used in scaling */
558 193488 : if ( x[i] < 0 )
559 : {
560 96576 : tmp = negate( tmp ); /* apply sign */
561 : }
562 :
563 193488 : if ( y[i] != 0 )
564 : {
565 89220 : y[i] = shr( tmp, shift_num );
566 89220 : move16(); /* updates sign of y[i} , ~range -512 + 512), array move */
567 : }
568 193488 : Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) +1 */
569 193488 : Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1 */
570 193488 : L_tmp = L_shr_o( L_tmp, shift_tot, &Overflow ); /* Q31+x */
571 193488 : xq[i] = round_fx_o( L_tmp, &Overflow ); /* Q15, array move */
572 193488 : move16();
573 193488 : L_xq[i] = L_tmp; /* Q31 currently unused */
574 193488 : move32();
575 : }
576 :
577 : /* index the found PVQ vector into short codewords */
578 16357 : entry = mpvq_encode_vec_fx( y, dim, pulses );
579 :
580 : /* send the short codeword(s) to the range encoder */
581 16357 : rc_enc_bits_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
582 16357 : IF( NE_16( dim, 1 ) )
583 : {
584 16357 : rc_enc_uniform_fx( hBstr, hPVQ, entry.index, entry.size );
585 : }
586 :
587 16357 : return;
588 : }
|