Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include "options.h"
35 : #include <math.h>
36 : #include "ivas_prot_fx.h"
37 : #include "ivas_cnst.h"
38 : #include "ivas_rom_com.h"
39 : #include "ivas_rom_enc.h"
40 : #include <assert.h>
41 : #include "prot_fx.h"
42 : #include "wmc_auto.h"
43 : /* used only for norm_s in the code_length_from_count function */
44 : #include "stl.h"
45 :
46 :
47 : /*---------------------------------------------------------------
48 : * log_base2()
49 : *
50 : *
51 : * ---------------------------------------------------------------*/
52 :
53 :
54 : /*---------------------------------------------------------------
55 : * get_sign()
56 : *
57 : *
58 : * ---------------------------------------------------------------*/
59 439660 : static Word16 get_sign_fx( Word16 n )
60 : {
61 439660 : return extract_l( L_shr( (UWord16) n, 15 ) ); /* extract the sign bit */
62 : }
63 :
64 : /*---------------------------------------------------------------
65 : * ECSQ_quantize_vector()
66 : *
67 : *
68 : * ---------------------------------------------------------------*/
69 27494 : void ECSQ_quantize_vector_fx(
70 : const Word32 *input, // Q(31-input_e)
71 : const Word16 input_e,
72 : const Word32 global_gain, // Q15
73 : const Word16 N,
74 : Word16 *output )
75 : {
76 : Word16 i;
77 : Word32 inv_global_gain;
78 : Word16 inv_global_gain_e;
79 :
80 : // inv_global_gain = 1.0f / global_gain;
81 27494 : IF( EQ_32( global_gain, ONE_IN_Q15 ) )
82 : {
83 6765 : FOR( i = 0; i < N; ++i )
84 : {
85 : // output[i] = (int16_t) round_f( input[i] * inv_global_gain );
86 6600 : output[i] = extract_l( L_shr_r( input[i], sub( 31, input_e ) ) );
87 6600 : move16();
88 : }
89 : }
90 : ELSE
91 : {
92 27329 : inv_global_gain = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q15, global_gain, &inv_global_gain_e );
93 27329 : inv_global_gain = L_shl( inv_global_gain, inv_global_gain_e ); // Q31
94 1120489 : FOR( i = 0; i < N; ++i )
95 : {
96 : // output[i] = (int16_t) round_f( input[i] * inv_global_gain );
97 1093160 : output[i] = extract_l( L_shr_r( Mpy_32_32( input[i], inv_global_gain ), sub( 31, input_e ) ) );
98 1093160 : move16();
99 : }
100 : }
101 :
102 27494 : return;
103 : }
104 :
105 : /*---------------------------------------------------------------
106 : * ECSQ_compute_optimal_gain()
107 : *
108 : * compute the optimal global gain for dequantization of output
109 : * if all the values in output are zero, it returns 0
110 : * ---------------------------------------------------------------*/
111 16533 : Word32 ECSQ_compute_optimal_gain_fx( // Q15
112 : const Word32 *input, // Q(31-input_e)
113 : const Word16 input_e,
114 : const Word16 N,
115 : const Word16 *output )
116 : {
117 : Word16 i;
118 : Word32 sum_sq_output;
119 : Word32 sum_input_output;
120 : Word16 sum_input_output_e;
121 : Word32 optimal_global_gain;
122 : Word16 optimal_global_gain_e;
123 :
124 16533 : sum_sq_output = 0;
125 16533 : move32();
126 16533 : sum_input_output = 0;
127 16533 : move32();
128 16533 : sum_input_output_e = 0;
129 16533 : move16();
130 677853 : FOR( i = 0; i < N; ++i )
131 : {
132 : // sum_sq_output += (float) output[i] * (float) output[i];
133 661320 : sum_sq_output = L_add( sum_sq_output, L_mult0( output[i], output[i] ) );
134 : // sum_input_output += input[i] * (float) output[i];
135 661320 : sum_input_output = BASOP_Util_Add_Mant32Exp( sum_input_output, sum_input_output_e, W_extract_l( W_mult0_32_32( input[i], output[i] ) ), input_e, &sum_input_output_e );
136 : }
137 :
138 16533 : optimal_global_gain = 0;
139 16533 : move32();
140 16533 : IF( sum_sq_output != 0 )
141 : {
142 : // optimal_global_gain = sum_input_output / sum_sq_output;
143 16526 : optimal_global_gain = BASOP_Util_Divide3232_Scale_newton( sum_input_output, sum_sq_output, &optimal_global_gain_e ); // 31 - ( sum_input_output_e - 31 + optimal_global_gain_e ) = 62 - ( sum_input_output_e + optimal_global_gain_e )
144 16526 : optimal_global_gain = L_shr( optimal_global_gain, sub( sub( 62, add( sum_input_output_e, optimal_global_gain_e ) ), 15 ) ); // Q15
145 : }
146 :
147 16533 : return optimal_global_gain;
148 : }
149 :
150 : /*---------------------------------------------------------------
151 : * ECSQ_quantize_gain()
152 : *
153 : * quantize global gain
154 : * ---------------------------------------------------------------*/
155 :
156 33059 : static Word16 ECSQ_quantize_gain_fx(
157 : Word32 global_gain ) // Q15
158 : {
159 : Word16 index;
160 :
161 : // global_gain = max(global_gain, 1.0f); /* because always index >= 0 anyway */
162 33059 : global_gain = L_max( global_gain, ONE_IN_Q15 );
163 :
164 : /* min gain = 1 (index 0), max gain ~= 29145 (index 126), domain range ~= 90 dB, resolution 90 / 127 ~= 0.7 dB */
165 : /* value 127 (ECSQ_GLOBAL_GAIN_INDEX_ALL_ZERO) is reserved and indicates that all values in the vector are zero */
166 :
167 : // index = (int16_t) ( ECLVQ_GLOBAL_GAIN_FACTOR * log10f( global_gain ) + 0.4898f );
168 33059 : index = extract_l( L_shr( L_add( Mpy_32_32( ECLVQ_GLOBAL_GAIN_FACTOR_Q26, BASOP_Util_Log10( global_gain, 16 ) ), 513592 /*0.4898f in Q20*/ ), 20 ) );
169 :
170 : /* for MSE quantization, the value f in (0, 1) is the middle between consecutive quantization points in the linear scale */
171 : /* 10 ^ (inv_global_gain_factor * f) - 1 = 10 ^ inv_global_gain_factor - 10 ^ (inv_global_gain_factor * f) */
172 : /* f = 0.5102, and when applying floor to convert to integer, 1 - f = 0.4898 must be used as the offset */
173 :
174 33059 : index = s_min( s_max( index, 0 ), 126 );
175 :
176 33059 : return index;
177 : }
178 :
179 : /*---------------------------------------------------------------
180 : * arith_encode_bit()
181 : *
182 : *
183 : * ---------------------------------------------------------------*/
184 460695 : static void arith_encode_bit_fx(
185 : ECSQ_instance *ecsq_inst,
186 : const Word16 bit )
187 : {
188 : RangeUniEncState *rc_st_enc;
189 460695 : rc_st_enc = (RangeUniEncState *) ecsq_inst->ac_handle;
190 :
191 :
192 460695 : ecsq_inst->bit_count_estimate = L_add( ecsq_inst->bit_count_estimate, 1024 ); /* 1024 eq 1 << 10, 22Q10 fixed-point representation */
193 460695 : move32();
194 :
195 460695 : IF( ecsq_inst->encoding_active )
196 : {
197 : /* call to the actual AC */
198 460695 : rc_uni_enc_encode_bits_fx( rc_st_enc, bit, 1 );
199 : }
200 :
201 460695 : return;
202 : }
203 : /*---------------------------------------------------------------
204 : * arith_encode_bits()
205 : *
206 : *
207 : * ---------------------------------------------------------------*/
208 66048 : static void arith_encode_bits_fx(
209 : ECSQ_instance *ecsq_inst,
210 : const UWord16 n,
211 : Word16 size )
212 : {
213 : RangeUniEncState *rc_st_enc;
214 66048 : rc_st_enc = (RangeUniEncState *) ecsq_inst->ac_handle;
215 :
216 :
217 66048 : ecsq_inst->bit_count_estimate = L_add( ecsq_inst->bit_count_estimate, L_shl( size, 10 ) ); /* 22Q10 fixed-point representation */
218 66048 : move32();
219 :
220 66048 : IF( ecsq_inst->encoding_active )
221 : {
222 66048 : rc_uni_enc_encode_bits_fx( rc_st_enc, n, size );
223 : }
224 :
225 66048 : return;
226 : }
227 :
228 : /*---------------------------------------------------------------
229 : * code_length_from_count()
230 : *
231 : *
232 : * ---------------------------------------------------------------*/
233 1364539 : static Word16 code_length_from_count_fx( // Q10
234 : const Word16 c )
235 : {
236 : /* compute the approximation of code length, 14 - log2(c), in 22Q10 fixed-point representation */
237 : /* with c in {1, ..., 2 ^ 14}, representing a probability count in 14-bit AC implementations */
238 : Word16 c_norm;
239 : Word16 res;
240 :
241 : #define WMC_TOOL_SKIP
242 1364539 : c_norm = norm_s( (Word16) c ); /* equivalent with 14 - floor(log_base2(c)) */
243 : #undef WMC_TOOL_SKIP
244 :
245 : /* compute linear approximation of log2(1 + x), for x in [0, 1], using a look-up table with 64 entries */
246 : /* normalize to {16384, ..., 32767}, subtract MSB bit, and convert to Q6 for indexing log2_1px_table */
247 1364539 : res = sub( shl( c_norm, 10 ), log2_1px_table[( ( c << c_norm ) - ( 1 << 14 ) + ( 1 << 7 ) ) >> 8] );
248 :
249 :
250 : /* |(14 - log2(c)) - res / (1 << 10)| < 0.0113, for c in {1, ..., 2 ^ 14} */
251 : /* complexity: 1 norm_s, 2 adds, 3 shifts, 1 table lookup */
252 1364539 : return res;
253 : }
254 : /*---------------------------------------------------------------
255 : * arith_encode_bit_prob()
256 : *
257 : * encoding for one bit with the probabilies prob_0 = count0 / 2 ^ ECSQ_PROB_BITS and prob_1 = 1 - prob_0
258 : * ---------------------------------------------------------------*/
259 52252 : static void arith_encode_bit_prob_fx(
260 : ECSQ_instance *ecsq_inst,
261 : const Word16 count0,
262 : const Word16 bit )
263 : {
264 : Word16 count;
265 : RangeUniEncState *rc_st_enc;
266 52252 : rc_st_enc = (RangeUniEncState *) ecsq_inst->ac_handle;
267 :
268 :
269 52252 : count = sub( ECSQ_PROB_TOTAL, count0 );
270 :
271 52252 : IF( bit == 0 )
272 : {
273 34697 : count = count0;
274 34697 : move16();
275 : }
276 :
277 52252 : ecsq_inst->bit_count_estimate = L_add( ecsq_inst->bit_count_estimate, code_length_from_count_fx( count ) ); /* 22Q10 fixed-point representation */
278 52252 : move32();
279 :
280 52252 : IF( ecsq_inst->encoding_active )
281 : {
282 : /* call to the actual AC */
283 52252 : rc_uni_enc_encode_fast_fx( rc_st_enc, u_extract_l( L_mult0( bit, count0 ) ), count, 14 );
284 : }
285 :
286 52252 : return;
287 : }
288 : /*---------------------------------------------------------------
289 : * arith_encode_prob()
290 : *
291 : *
292 : * ---------------------------------------------------------------*/
293 655062 : static void arith_encode_prob_fx(
294 : ECSQ_instance *ecsq_inst,
295 : const UWord16 table[],
296 : const Word16 table_size,
297 : const Word16 symbol )
298 : {
299 : Word16 count;
300 : RangeUniEncState *rc_st_enc;
301 :
302 655062 : rc_st_enc = (RangeUniEncState *) ecsq_inst->ac_handle;
303 :
304 655062 : count = table_size; /* just to avoid warning when DEBUGGING is deactivated */
305 655062 : move16();
306 :
307 655062 : count = sub( table[symbol], table[symbol + 1] );
308 :
309 655062 : ecsq_inst->bit_count_estimate = L_add( ecsq_inst->bit_count_estimate, code_length_from_count_fx( count ) ); /* 22Q10 fixed-point representation */
310 655062 : move16();
311 :
312 655062 : IF( ecsq_inst->encoding_active )
313 : {
314 : /* call to the actual AC */
315 655062 : rc_uni_enc_encode_fast_fx( rc_st_enc, u_extract_l( UL_subNsD( ECSQ_PROB_TOTAL, table[symbol] ) ), count, 14 );
316 : }
317 :
318 655062 : return;
319 : }
320 :
321 : /*---------------------------------------------------------------
322 : * arith_encode_elias_mod()
323 : *
324 : *
325 : * ---------------------------------------------------------------*/
326 7809 : static void arith_encode_elias_mod_fx(
327 : ECSQ_instance *ecsq_inst,
328 : const Word16 n )
329 : {
330 : Word16 i;
331 :
332 7809 : IF( LE_16( n, 1 ) )
333 : {
334 : /* code for 0 is 10 and code for 1 is 11 */
335 2477 : arith_encode_bit_fx( ecsq_inst, 1 );
336 2477 : arith_encode_bit_fx( ecsq_inst, n );
337 : }
338 : ELSE /* n >= 2 */
339 : {
340 : /* code consists of n_bits zero bits, an one bit, and n_bits data bits */
341 : Word16 n_bits;
342 :
343 : /* n_bits is floor(log_2(n)), the number of bits after the leading one bit */
344 5332 : n_bits = sub( 30, norm_l( n ) );
345 16081 : FOR( i = 0; i < n_bits; i++ )
346 : {
347 10749 : arith_encode_bit_fx( ecsq_inst, 0 );
348 : }
349 5332 : arith_encode_bit_fx( ecsq_inst, 1 );
350 :
351 : /* encode the n_bits data bits at once */
352 5332 : arith_encode_bits_fx( ecsq_inst, u_extract_l( L_sub( n, ( shl( 1, n_bits ) ) ) ), n_bits );
353 : }
354 :
355 7809 : return;
356 : }
357 :
358 : /*---------------------------------------------------------------
359 : * arith_encode_prob_escape()
360 : *
361 : *
362 : * ---------------------------------------------------------------*/
363 561168 : static void arith_encode_prob_escape_fx(
364 : ECSQ_instance *ecsq_inst,
365 : const UWord16 table[],
366 : const Word16 table_size,
367 : const Word16 symbol )
368 : {
369 561168 : IF( LT_16( symbol, sub( table_size, 1 ) ) )
370 : {
371 553359 : arith_encode_prob_fx( ecsq_inst, table, table_size, symbol );
372 : }
373 : ELSE
374 : {
375 7809 : arith_encode_prob_fx( ecsq_inst, table, table_size, sub( table_size, 1 ) ); /* escape symbol */
376 : /* encode the additional value using a modified Elias integer code */
377 7809 : arith_encode_elias_mod_fx( ecsq_inst, sub( symbol, sub( table_size, 1 ) ) );
378 : }
379 :
380 561168 : return;
381 : }
382 :
383 : /*---------------------------------------------------------------
384 : * get_best_param()
385 : *
386 : *
387 : * ---------------------------------------------------------------*/
388 219075 : static Word16 get_best_param_fx(
389 : Word16 *x,
390 : const Word16 start_offset,
391 : const Word16 stop_offset,
392 : Word32 *avg_abs_sum, // Q10
393 : Word16 *N0 )
394 : {
395 : Word16 v;
396 : Word16 val;
397 : Word32 sum_abs; // Q10
398 : Word16 count;
399 : Word16 count0;
400 : Word16 param;
401 : Word16 L_tmp_e;
402 :
403 219075 : const Word16 offset = INV_LOG_2_FX; /* offset = 1 / ln(2) and log2(offset) ~ 0.528766 */
404 :
405 219075 : sum_abs = 0;
406 219075 : move16();
407 219075 : count = add( sub( stop_offset, start_offset ), 1 );
408 219075 : count0 = 0;
409 219075 : move16();
410 :
411 : /* compute sum(abs(x[v])) and sum(x[v] == 0) */
412 1971675 : FOR( v = start_offset; v <= stop_offset; ++v )
413 : {
414 :
415 1752600 : val = x[v];
416 1752600 : move16();
417 1752600 : sum_abs = L_add( sum_abs, L_shl( abs_s( val ), 10 ) ); // Q10
418 1752600 : IF( val == 0 )
419 : {
420 499970 : count0 = add( count0, 1 );
421 : }
422 : }
423 :
424 :
425 : /* the vector has at most ECSQ_NONZERO_MAX values of +-1 and the rest are zeros */
426 219075 : test();
427 219075 : IF( LE_16( sub( count, count0 ), ECSQ_NONZERO_MAX ) && EQ_32( sum_abs, L_shl( sub( count, count0 ), 10 ) ) )
428 : {
429 : //*avg_abs_sum = ( sum_abs + 0.25f * count0 ) / count;
430 25630 : *avg_abs_sum = BASOP_Util_Divide3232_Scale_newton( L_add( sum_abs, L_shl( count0, 8 ) ), L_shl( count, 10 ), &L_tmp_e );
431 25630 : move32();
432 25630 : *avg_abs_sum = L_shr_r( *avg_abs_sum, sub( 21, L_tmp_e ) ); // Q10
433 25630 : move32();
434 25630 : *N0 = count0;
435 25630 : move16();
436 :
437 25630 : return ECSQ_ALL_ZERO_PARAM;
438 : }
439 :
440 : //*avg_abs_sum = ( sum_abs + 0.25f * count0 ) / count;
441 193445 : *avg_abs_sum = BASOP_Util_Divide3232_Scale_newton( L_add( sum_abs, L_shl( count0, 8 ) ), L_shl( count, 10 ), &L_tmp_e );
442 193445 : move32();
443 193445 : *avg_abs_sum = L_shr_r( *avg_abs_sum, sub( 21, L_tmp_e ) ); // Q10
444 193445 : move32();
445 193445 : *N0 = count0;
446 193445 : move16();
447 :
448 : /* the best Laplace integer parameter is floor(log2(avg_abs_sum) + log2(offset)) */
449 : // param = (int16_t) floor( log_base2( *avg_abs_sum * offset ) );
450 193445 : param = extract_l( L_shr( L_add( BASOP_Util_Log2( Mpy_32_16_1( *avg_abs_sum, offset ) ), 738197504 /* (31-(10+14-15)) <<25 */ ), 25 ) );
451 :
452 : /* limit param value to the available exponent range */
453 193445 : param = s_max( ECSQ_ALL_ZERO_PARAM + 1, param );
454 193445 : param = s_min( param, ECSQ_ALL_ZERO_PARAM + ECSQ_PARAM_COUNT - 1 );
455 :
456 193445 : return param;
457 : }
458 :
459 : /*---------------------------------------------------------------
460 : * get_est_size()
461 : *
462 : *
463 : * ---------------------------------------------------------------*/
464 :
465 : #define ECSQ_log2TB_FIRST_PARAM -2
466 657225 : static Word32 get_est_size_fx( // Q10
467 : const Word16 N,
468 : Word32 avg_abs_sum, // Q10
469 : const Word16 N0,
470 : Word16 param )
471 : {
472 : Word32 size; // Q10
473 : // Word32 two_to_param;
474 : // Word16 two_to_param_e;
475 :
476 : // two_to_param = (float) ( 1 << abs( param ) );
477 : // two_to_param = 1 << abs_s( param ) ;
478 : // if ( param < 0 )
479 : //{
480 : // //two_to_param = 1.0f / two_to_param;
481 : // two_to_param = BASOP_Util_Divide3232_Scale_newton( 1, two_to_param,&two_to_param_e);
482 : // }
483 :
484 657225 : IF( NE_16( param, ECSQ_ALL_ZERO_PARAM ) ) /* not all values are zeros */
485 : {
486 : Word16 index;
487 568341 : index = sub( param, ECSQ_log2TB_FIRST_PARAM );
488 568341 : index = s_min( index, ECSQ_log2TB_SIZE - 2 );
489 :
490 :
491 : /* the estimated size in bits is N * log2(2 * 2 ^ param) + */
492 : /* + N * log2(e) * (avg_abs_sum / 2 ^ param) - (N - N0) * log2(T(2 ^ param)) - */
493 : /* - N0 * log2(T(2 * 2 ^ param)) */
494 : // size = N * ( 1 + param + INV_LOG_2 * ( avg_abs_sum / two_to_param ) );
495 568341 : size = W_extract_l( W_mult0_32_32( N, L_add( L_shl( add( 1, param ), 10 ), Mpy_32_16_1( L_shr( avg_abs_sum, sub( param, 1 /*compesating for INV_LOG_2_FX*/ ) ), INV_LOG_2_FX ) ) ) ); // Q10
496 : // size -= ( N - N0 ) * log2TB[index];
497 568341 : size = L_sub( size, Mpy_32_32( L_shl( sub( N, N0 ), 10 ), log2TB_Q31[index] ) ); // Q10
498 : // size -= N0 * log2TB[index + 1];
499 568341 : size = L_sub( size, Mpy_32_32( L_shl( N0, 10 ), log2TB_Q31[index + 1] ) ); // Q10
500 : }
501 : ELSE
502 : {
503 : /* used for all zero values or for very low entropy with number of nonzeros <= ECSQ_NONZERO_MAX */
504 : Word16 nonzero;
505 : Word32 required_avg_abs_sum; // Q10
506 : Word16 required_avg_abs_sum_e;
507 :
508 88884 : nonzero = sub( N, N0 );
509 :
510 : // required_avg_abs_sum = ( nonzero + 0.25f * N0 ) / N; /* the vector must have nonzero +-1 and N0 zeros */
511 88884 : required_avg_abs_sum = BASOP_Util_Divide3232_Scale_newton( L_add( L_shl( nonzero, 10 ), L_shl( N0, 8 ) ), L_shl( N, 10 ), &required_avg_abs_sum_e ); /* the vector must have nonzero +-1 and N0 zeros */
512 88884 : required_avg_abs_sum = L_shr_r( required_avg_abs_sum, sub( 21, required_avg_abs_sum_e ) ); // Q10
513 :
514 88884 : test();
515 88884 : IF( EQ_32( avg_abs_sum, required_avg_abs_sum ) && LE_16( nonzero, ECSQ_NONZERO_MAX ) )
516 : {
517 : // size = 2.0f; /* log_base2(1 + ECSQ_NONZERO_MAX), indicate the nonzero count */
518 25630 : size = 2 << 10; /* log_base2(1 + ECSQ_NONZERO_MAX), indicate the nonzero count */
519 25630 : move32();
520 :
521 : /* the number of bits for the nonzero mask is log2(nchoosek(N, nonzero)) */
522 : // size += ECSQ_log2_fact[N] - ECSQ_log2_fact[N - nonzero] - ECSQ_log2_fact[nonzero];
523 25630 : size = L_add( size, L_sub( L_sub( ECSQ_log2_fact_Q10[N], ECSQ_log2_fact_Q10[N - nonzero] ), ECSQ_log2_fact_Q10[nonzero] ) );
524 :
525 : // size += (float) nonzero; /* indicate the signs for nonzero values */
526 25630 : size = L_add( size, L_shl( nonzero, 10 ) ); /* indicate the signs for nonzero values */
527 : }
528 : ELSE
529 : {
530 : /* the method cannot be used, return a huge value so that it will never be chosen */
531 : // size = 1.0e11f;
532 63254 : size = MAX_32;
533 63254 : move32();
534 : }
535 : }
536 :
537 657225 : return size;
538 : }
539 : /*---------------------------------------------------------------
540 : * ECSQ_encode_raw()
541 : *
542 : * encode input, which contains a concatenation of quantized RE8 integer-valued vectors;
543 : * the return value is the approximate number of bits written, expressed in 22Q10 fixed-point representation
544 : * ---------------------------------------------------------------*/
545 43815 : static Word32 ECSQ_encode_raw_fx(
546 : ECSQ_instance *ecsq_inst,
547 : Word16 *input,
548 : const Word16 N )
549 : {
550 : Word32 bit_count_estimate_initial;
551 :
552 : Word32 total_size; // Q10
553 : Word16 segment_count;
554 :
555 : Word16 seg_length, seg_start, seg_stop, segment, seg_count0;
556 : Word32 est_size;
557 :
558 : Word16 est_param, first_param, last_param, param;
559 : Word32 best_size, test_size; // Q10
560 : Word16 best_param, best_params[ECSQ_VECTOR_SIZE_MAX / ECSQ_SEGMENT_SIZE];
561 : Word16 saved_seg_count0[ECSQ_VECTOR_SIZE_MAX / ECSQ_SEGMENT_SIZE];
562 :
563 : Word32 avg_abs_sum; // Q10
564 : // const Word32 scale_Q10 = ONE_IN_Q21;// 1.0f / 1024.0f
565 : Word16 i, idx, shift, val, sym, nonzero, left0, left1, count, count0, lsbs;
566 : Word16 param_zb, best_param_zb; /* zero-based parameter index for coding */
567 : const UWord16 *tab_vals, *tab_abs_lsbs;
568 :
569 43815 : bit_count_estimate_initial = ecsq_inst->bit_count_estimate;
570 43815 : move32();
571 :
572 43815 : total_size = 0;
573 43815 : move32();
574 : // segment_count = ( N + ECSQ_SEGMENT_SIZE - 1 ) / ECSQ_SEGMENT_SIZE;
575 43815 : segment_count = shr( add( N, ECSQ_SEGMENT_SIZE - 1 ), 3 );
576 :
577 262890 : FOR( segment = 0; segment < segment_count; ++segment )
578 : {
579 219075 : seg_start = imult1616( segment, ECSQ_SEGMENT_SIZE );
580 219075 : seg_stop = sub( s_min( add( seg_start, ECSQ_SEGMENT_SIZE ), N ), 1 );
581 219075 : seg_length = add( sub( seg_stop, seg_start ), 1 );
582 :
583 219075 : est_param = get_best_param_fx( input, seg_start, seg_stop, &avg_abs_sum, &seg_count0 );
584 219075 : saved_seg_count0[segment] = seg_count0;
585 219075 : move16();
586 :
587 : /* find the best param around est_param for the current segment count */
588 219075 : best_size = MAX_32 - 1; // 1.0e10f ?
589 219075 : move32();
590 219075 : best_param = -1000;
591 219075 : move16();
592 :
593 219075 : IF( EQ_16( est_param, ECSQ_ALL_ZERO_PARAM ) ) /* all values are zero */
594 : {
595 25630 : first_param = ECSQ_ALL_ZERO_PARAM;
596 25630 : move16();
597 25630 : last_param = ECSQ_ALL_ZERO_PARAM + 2 * ECSQ_PARAM_SEARCH_RANGE;
598 25630 : move16();
599 : }
600 : ELSE
601 : {
602 193445 : first_param = s_max( ECSQ_ALL_ZERO_PARAM, sub( est_param, ECSQ_PARAM_SEARCH_RANGE ) );
603 193445 : last_param = s_min( add( est_param, ECSQ_PARAM_SEARCH_RANGE ), ECSQ_ALL_ZERO_PARAM + ECSQ_PARAM_COUNT - 1 );
604 : }
605 :
606 876300 : FOR( param = first_param; param <= last_param; ++param )
607 : {
608 657225 : param_zb = sub( param, ECSQ_ALL_ZERO_PARAM );
609 657225 : count = sub( ECSQ_tab_param[ecsq_inst->config_index][param_zb], ECSQ_tab_param[ecsq_inst->config_index][param_zb + 1] );
610 : // test_size = scale_Q10 * code_length_from_count( count );
611 657225 : test_size = code_length_from_count_fx( count ); // Q10
612 :
613 657225 : test_size = L_add_sat( test_size, get_est_size_fx( seg_length, avg_abs_sum, seg_count0, param ) ); // if test_size > best_size we don't select
614 :
615 657225 : IF( LT_32( test_size, best_size ) )
616 : {
617 291047 : best_param = param;
618 291047 : move16();
619 291047 : best_size = test_size;
620 291047 : move32();
621 : }
622 : }
623 :
624 219075 : best_params[segment] = best_param;
625 219075 : move16();
626 219075 : total_size = L_add( total_size, best_size );
627 : }
628 :
629 43815 : IF( !ecsq_inst->encoding_active ) /* only size estimation is needed */
630 : {
631 : // est_size = (int32_t) ( total_size * 1024.0 + 0.5 ); /* 22Q10 fixed-point representation */
632 27289 : est_size = total_size; /* 22Q10 fixed-point representation */
633 27289 : move32();
634 :
635 27289 : return est_size;
636 : }
637 :
638 : /* encode with the best parameters: best_params[] */
639 99156 : FOR( segment = 0; segment < segment_count; ++segment )
640 : {
641 82630 : seg_start = imult1616( segment, ECSQ_SEGMENT_SIZE );
642 82630 : seg_stop = add( seg_start, ECSQ_SEGMENT_SIZE - 1 );
643 82630 : seg_length = ECSQ_SEGMENT_SIZE;
644 82630 : move16();
645 82630 : IF( EQ_16( segment, sub( segment_count, 1 ) ) )
646 : {
647 16526 : seg_stop = sub( N, 1 );
648 16526 : seg_length = add( sub( seg_stop, seg_start ), 1 );
649 : }
650 :
651 82630 : best_param_zb = sub( best_params[segment], ECSQ_ALL_ZERO_PARAM );
652 82630 : shift = s_max( 0, sub( best_param_zb, 3 ) ); /* first nonzero shift of 1 is used for param 3 */
653 :
654 82630 : arith_encode_prob_fx( ecsq_inst, ECSQ_tab_param[ecsq_inst->config_index], ECSQ_PARAM_COUNT, best_param_zb );
655 :
656 : /* encode the actual values if not using the ECSQ_ALL_ZERO_PARAM parameter */
657 82630 : IF( best_param_zb != 0 )
658 : {
659 70146 : tab_vals = ECSQ_tab_vals[best_param_zb - 1];
660 70146 : idx = s_min( shift, 4 );
661 70146 : tab_abs_lsbs = ECSQ_tab_abs_lsbs[idx];
662 :
663 631314 : FOR( i = seg_start; i <= seg_stop; ++i )
664 : {
665 561168 : val = input[i];
666 561168 : move16();
667 561168 : sym = abs_s( val );
668 :
669 561168 : IF( shift != 0 )
670 : {
671 59496 : lsbs = s_and( sym, sub( shl( 1, shift ), 1 ) );
672 59496 : sym = shr( sym, shift );
673 :
674 59496 : arith_encode_prob_escape_fx( ecsq_inst, tab_vals, ECSQ_TAB_VALS_SIZE, sym );
675 :
676 59496 : test();
677 59496 : IF( ( sym > 0 ) || GT_16( shift, 4 ) )
678 : {
679 48232 : arith_encode_bits_fx( ecsq_inst, lsbs, shift );
680 : }
681 : ELSE /* (sym == 0) && (shift <= 4) */
682 : {
683 11264 : arith_encode_prob_fx( ecsq_inst, tab_abs_lsbs, shl( 1, shift ), lsbs );
684 : }
685 : }
686 : ELSE /* shift == 0 */
687 : {
688 501672 : arith_encode_prob_escape_fx( ecsq_inst, tab_vals, ECSQ_TAB_VALS_SIZE, sym );
689 : }
690 :
691 561168 : IF( val != 0 )
692 : {
693 419105 : arith_encode_bit_fx( ecsq_inst, get_sign_fx( val ) );
694 : }
695 : }
696 : }
697 : ELSE
698 : {
699 12484 : nonzero = sub( seg_length, saved_seg_count0[segment] );
700 :
701 :
702 12484 : arith_encode_bits_fx( ecsq_inst, nonzero, 2 ); /* log_base2(ECSQ_NONZERO_MAX + 1) == 2 */
703 :
704 12484 : left1 = nonzero;
705 12484 : move16();
706 12484 : left0 = sub( seg_length, nonzero );
707 :
708 112356 : FOR( i = seg_start; i <= seg_stop; ++i )
709 : {
710 99872 : val = input[i];
711 99872 : move16();
712 99872 : sym = abs_s( val );
713 :
714 :
715 99872 : IF( left1 == 0 )
716 : {
717 : }
718 55252 : ELSE IF( left0 == 0 )
719 : {
720 : }
721 : ELSE
722 : {
723 52252 : count0 = imult1616( left0, ECSQ_tab_inverse[left0 + left1] ); /* left0 * round(ECSQ_PROB_TOTAL / (left0 + left1)) */
724 52252 : arith_encode_bit_prob_fx( ecsq_inst, count0, sym );
725 : }
726 :
727 99872 : IF( sym != 0 )
728 : {
729 20555 : arith_encode_bit_fx( ecsq_inst, get_sign_fx( val ) );
730 20555 : --left1;
731 20555 : move16();
732 : }
733 : ELSE
734 : {
735 79317 : --left0;
736 79317 : move16();
737 : }
738 : }
739 : }
740 : }
741 :
742 :
743 16526 : return L_sub( ecsq_inst->bit_count_estimate, bit_count_estimate_initial );
744 : }
745 :
746 : /*---------------------------------------------------------------
747 : * ECSQ_encode_target_SNR()
748 : *
749 : * encode input with an approximate target of target_SNR signal-to-noise ratio, and ensure no more than max_bits are used;
750 : * the computed global gain index is returned into global_gain_index_output;
751 : * if global_gain_index_output == ECSQ_GLOBAL_GAIN_INDEX_ALL_ZERO, nothing is written and the function returns 0;
752 : * if output pointer is not NULL, it will contain the dequantized vector, as it will be available at the decoder side;
753 : * the return value is the approximate number of bits written, expressed in 22Q10 fixed-point representation
754 : * ---------------------------------------------------------------*/
755 :
756 : #define ECSQ_MAX_BITS_ITERATIONS 2
757 :
758 17408 : Word32 ECSQ_encode_target_SNR_fx(
759 : ECSQ_instance *ecsq_inst,
760 : const Word32 *input, // Q(31-input_e)
761 : const Word16 input_e,
762 : const Word16 N,
763 : const Word32 target_SNR, // Q25
764 : const Word16 max_bits,
765 : Word32 *output,
766 : Word16 *global_gain_index_output )
767 : {
768 : Word16 global_gain_index;
769 : Word16 global_gain_index_last; /* used to potentially save one call to ECSQ_quantize_vector */
770 : Word16 quantized_input[ECSQ_VECTOR_SIZE_MAX];
771 : Word16 saved_encoding_active;
772 : Word32 saved_bit_count_estimate;
773 : Word32 test_size /*Q10*/, adjust_size;
774 : Word16 i, iteration;
775 : Word32 global_gain /*Q15*/, adjust_global_gain_index;
776 : Word16 global_gain_e;
777 : Word32 max_bits_fixpt;
778 : Word32 sum_squared, target_ratio, target_sum_squared_error;
779 : Word16 sum_squared_e, target_ratio_e, target_sum_squared_error_e;
780 : // const Word32 global_gain_step = 1165018877; // powf( 10.0f, ECLVQ_INV_GLOBAL_GAIN_FACTOR ) in Q30
781 : Word32 L_tmp1;
782 : Word16 L_tmp1_e;
783 : Word64 W_tmp1;
784 : Word16 shift;
785 :
786 : // max_bits_fixpt = max_bits * 1024; /* max_bits_fixpt is in 22Q10 fixed-point representation */
787 17408 : max_bits_fixpt = L_shl( max_bits, 10 ); /* max_bits_fixpt is in 22Q10 fixed-point representation */
788 :
789 17408 : IF( LE_32( target_SNR, 0 ) )
790 : {
791 : /* a target SNR of 0.0 dB is already achieved by quantizing all values in the vector to zero */
792 0 : *global_gain_index_output = ECSQ_GLOBAL_GAIN_INDEX_ALL_ZERO;
793 0 : move16();
794 0 : IF( output != NULL )
795 : {
796 0 : set32_fx( output, 0, N );
797 : }
798 :
799 0 : return 0; /* nothing is coded when global gain index is ECSQ_GLOBAL_GAIN_INDEX_ALL_ZERO */
800 : }
801 :
802 17408 : sum_squared = 0;
803 17408 : move32();
804 17408 : sum_squared_e = 0;
805 17408 : move16();
806 :
807 713728 : FOR( i = 0; i < N; ++i )
808 : {
809 : // sum_squared += input[i] * input[i];
810 696320 : W_tmp1 = W_mult0_32_32( input[i], input[i] ); // 2(31-input_e)
811 696320 : IF( W_tmp1 == 0 )
812 : {
813 35015 : shift = 32;
814 35015 : move16();
815 : }
816 : ELSE
817 : {
818 661305 : shift = W_norm( W_tmp1 );
819 : }
820 696320 : shift = sub( 32, shift );
821 696320 : L_tmp1 = W_extract_l( W_shr( W_tmp1, shift ) ); // 2(31-input_e) - shift = 62 - 2input_e - shift
822 696320 : L_tmp1_e = sub( add( shl( input_e, 1 ), shift ), 31 ); // 31 - (62 - 2input_e - shift) = 2input_e + shift - 31
823 696320 : sum_squared = BASOP_Util_Add_Mant32Exp( sum_squared, sum_squared_e, L_tmp1, L_tmp1_e, &sum_squared_e );
824 : }
825 :
826 : // if ( sum_squared < 0.25f ) /* all the values in the input vector will always be quantized to zero */
827 17408 : IF( BASOP_Util_Cmp_Mant32Exp( sum_squared, sum_squared_e, ONE_IN_Q29, 0 ) < 0 ) /* all the values in the input vector will always be quantized to zero */
828 : {
829 : /* the condition above holds because sum(input[i] ^ 2) < 0.25 => max(abs(input[i])) < 0.5 */
830 875 : *global_gain_index_output = ECSQ_GLOBAL_GAIN_INDEX_ALL_ZERO;
831 875 : move16();
832 875 : IF( output != NULL )
833 : {
834 0 : set32_fx( output, 0, N );
835 : }
836 :
837 875 : return 0; /* nothing is coded when global gain index is ECSQ_GLOBAL_GAIN_INDEX_ALL_ZERO */
838 : }
839 :
840 : /* save internal state and activate size evaluation only */
841 16533 : saved_encoding_active = ecsq_inst->encoding_active;
842 16533 : move32();
843 16533 : saved_bit_count_estimate = ecsq_inst->bit_count_estimate;
844 16533 : move32();
845 16533 : ecsq_inst->encoding_active = 0;
846 16533 : move16();
847 :
848 : /* target_ratio is the target ratio between the sum squared values of input and sum squared values of quantization error */
849 : // target_ratio = powf( 10.0f, target_SNR / 10.0f );
850 16533 : L_tmp1 = BASOP_Util_Divide3232_Scale_newton( target_SNR, 10, &L_tmp1_e );
851 16533 : L_tmp1_e = add( L_tmp1_e, 6 - 31 );
852 16533 : target_ratio = BASOP_Util_fPow( 10 << 27, 4, L_tmp1, L_tmp1_e, &target_ratio_e );
853 : // target_sum_squared_error = sum_squared / target_ratio;
854 16533 : target_sum_squared_error = BASOP_Util_Divide3232_Scale_newton( sum_squared, target_ratio, &target_sum_squared_error_e );
855 16533 : target_sum_squared_error_e = add( target_sum_squared_error_e, sub( sum_squared_e, target_ratio_e ) );
856 :
857 : /* the mean of squared quantization error for uniform scalar quantization is 1 / 12, approximately 0.0833 */
858 : /* when including global_gain, the relationship is target_sum_squared_error ~ (0.0833 * N) * global_gain ^ 2 */
859 : /* the representable range for global_gain is from 1 (global_gain_index 0) to 29145 (global_gain_index 126) inclusive */
860 : // global_gain = sqrtf( target_sum_squared_error / ( 0.0833f * (float) N ) );
861 16533 : global_gain = BASOP_Util_Divide3232_Scale_newton( target_sum_squared_error, L_mult0( 5459 /*0.0833f in Q16*/, N ), &global_gain_e );
862 16533 : global_gain_e = add( global_gain_e, sub( target_sum_squared_error_e, 15 ) );
863 16533 : global_gain = Sqrt32( global_gain, &global_gain_e );
864 16533 : global_gain = L_shr( global_gain, sub( 16, global_gain_e ) ); // Q15
865 : /* quantize the estimated global_gain */
866 16533 : global_gain_index = ECSQ_quantize_gain_fx( global_gain );
867 :
868 16533 : iteration = 0;
869 16533 : move16();
870 :
871 : /* do the quantization with the dequantized estimated global_gain_index found */
872 16533 : global_gain = ECSQ_dequantize_gain_fx( global_gain_index ); // Q15
873 16533 : global_gain_index_last = global_gain_index;
874 16533 : move16();
875 16533 : ECSQ_quantize_vector_fx( input, input_e, global_gain, N, quantized_input );
876 :
877 16533 : test_size = ECSQ_encode_raw_fx( ecsq_inst, quantized_input, N );
878 :
879 16533 : iteration = add( iteration, 1 );
880 : Word16 tmp_e;
881 16533 : test();
882 27289 : WHILE( GT_32( test_size, max_bits_fixpt ) && LT_16( iteration, ECSQ_MAX_BITS_ITERATIONS ) )
883 : {
884 10756 : test();
885 10756 : adjust_size = L_sub( test_size, max_bits_fixpt );
886 : /* assume doubling the quantization step size will reduce the entropy with (up to) one bit */
887 : // adjust_global_gain_index = (int16_t) ceil( adjust_size / ( 1024.0f * N * log_base2( global_gain_step ) ) );
888 10756 : adjust_global_gain_index = BASOP_Util_Divide3232_Scale_newton( adjust_size, N, &tmp_e );
889 10756 : adjust_global_gain_index = Mpy_32_32( adjust_global_gain_index, 17816838 /*/ ( 1024.0f * log_base2( global_gain_step ) )*/ );
890 10756 : IF( NE_32( adjust_global_gain_index, L_shl( L_shr( adjust_global_gain_index, sub( 31, tmp_e ) ), sub( 31, tmp_e ) ) ) )
891 : {
892 10756 : adjust_global_gain_index = BASOP_Util_Add_Mant32Exp( adjust_global_gain_index, tmp_e, ONE_IN_Q30, 1, &tmp_e );
893 : }
894 :
895 10756 : adjust_global_gain_index = extract_l( L_shr( adjust_global_gain_index, sub( 31, tmp_e ) ) );
896 :
897 10756 : global_gain_index = s_min( add( global_gain_index, (Word16) adjust_global_gain_index ), 126 );
898 :
899 10756 : global_gain = ECSQ_dequantize_gain_fx( global_gain_index ); // Q15
900 10756 : global_gain_index_last = global_gain_index;
901 10756 : move16();
902 10756 : ECSQ_quantize_vector_fx( input, input_e, global_gain, N, quantized_input );
903 :
904 10756 : test_size = ECSQ_encode_raw_fx( ecsq_inst, quantized_input, N );
905 :
906 10756 : iteration = add( iteration, 1 );
907 : }
908 :
909 16533 : IF( GT_32( test_size, max_bits_fixpt ) )
910 : {
911 : /* further increase the quantization step with the smallest increment for global_gain_index */
912 205 : global_gain_index = s_min( global_gain_index + 1, 126 );
913 : }
914 :
915 : /* restore internal state */
916 16533 : ecsq_inst->encoding_active = saved_encoding_active;
917 16533 : move16();
918 16533 : ecsq_inst->bit_count_estimate = saved_bit_count_estimate;
919 16533 : move32();
920 :
921 : /* do the quantization with the dequantized final global_gain_index found */
922 16533 : global_gain = ECSQ_dequantize_gain_fx( global_gain_index ); // Q15
923 :
924 16533 : IF( NE_16( global_gain_index, global_gain_index_last ) )
925 : {
926 205 : ECSQ_quantize_vector_fx( input, input_e, global_gain, N, quantized_input );
927 : }
928 :
929 16533 : global_gain = ECSQ_compute_optimal_gain_fx( input, input_e, N, quantized_input );
930 :
931 16533 : IF( global_gain == 0 ) /* all values in quantized_input are zero */
932 : {
933 7 : *global_gain_index_output = ECSQ_GLOBAL_GAIN_INDEX_ALL_ZERO;
934 7 : move16();
935 7 : IF( output != NULL )
936 : {
937 0 : set32_fx( output, 0, N );
938 : }
939 :
940 7 : return 0; /* nothing is coded when global gain index is ECSQ_GLOBAL_GAIN_INDEX_ALL_ZERO */
941 : }
942 :
943 16526 : global_gain_index = ECSQ_quantize_gain_fx( global_gain );
944 16526 : *global_gain_index_output = global_gain_index;
945 16526 : move16();
946 :
947 :
948 16526 : IF( output != NULL )
949 : {
950 0 : global_gain = ECSQ_dequantize_gain_fx( global_gain_index );
951 0 : ECSQ_dequantize_vector_fx( quantized_input, global_gain, N, output );
952 : }
953 :
954 16526 : ECSQ_encode_raw_fx( ecsq_inst, quantized_input, N );
955 :
956 16526 : return L_sub( ecsq_inst->bit_count_estimate, saved_bit_count_estimate );
957 : }
|