LCOV - code coverage report
Current view: top level - lib_enc - pvq_encode_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main @ e95243e9e67ddeb69dddf129509de1b3d95b402e Lines: 259 259 100.0 %
Date: 2025-09-14 03:13:15 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*====================================================================================
       2             :     EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
       3             :   ====================================================================================*/
       4             : #include <stdint.h>
       5             : #include "options.h" /* Compilation switches                   */
       6             : #include "cnst.h"
       7             : #include "prot_fx.h"     /* Function prototypes                    */
       8             : #include "prot_fx_enc.h" /* Function prototypes                    */
       9             : #include "rom_com_fx.h"
      10             : #include "rom_com.h"
      11             : 
      12             : /*   PVQ MIXED_SEARCH_LOOP:
      13             :                            low precision 16/32 +  energy selective high precision 32/64,
      14             :                            mixed perf , 10 dB SEGSNR better than the low precision loop only,
      15             :                            active  if k>=128 and accumulated energy is high enough,
      16             :                            comes at a controlled complexity cost, as dimensions decrease for high k's*/
      17             : 
      18        1662 : static Word16 max_val_fx(                    /* o  : maximum value in the input vector              */
      19             :                           const Word16 *vec, /* i  : input vector                                   */
      20             :                           const Word16 lvec  /* i  : length of input vector                         */
      21             : )
      22             : {
      23             :     Word16 j, tmp;
      24             : 
      25        1662 :     tmp = vec[0];
      26        1662 :     move16();
      27        6616 :     FOR( j = 1; j < lvec; j++ )
      28             :     {
      29        4954 :         tmp = s_max( vec[j], tmp );
      30             :     }
      31        1662 :     return tmp;
      32             : }
      33             : 
      34      292738 : static Word16 pyramidSearchProjInit_fx( Word16 L, Word16 Ptot )
      35             : {
      36      292738 :     return ( sub( Ptot, extract_l( L_shr( L_mult0( 8223, (Word32) L ), 14 ) ) ) > 0 );
      37             : }
      38             : 
      39             : 
      40             : /* The inner search loop for one single additional unit pulse, starting from  pulse_tot  ,
      41             :     with information about required energy precision/down scaling for the dim loop in  en_dn_shift,
      42             :     and the current max_xabs absolute value to be used for an near optimal  correlation upscaling.
      43             :     returns the index of the best positioned unit pulse in imax
      44             : */
      45     1177157 : static Word16 one_pulse_search(
      46             :     const Word16 dim,    /* vector dimension       */
      47             :     const Word16 *x_abs, /* absolute vector values */
      48             :     Word16 *y,           /* output vector          */
      49             :     Word16 *pulse_tot_ptr,
      50             :     Word32 *L_xy_ptr, /* accumulated correlation */
      51             :     Word32 *L_yy_ptr, /* accumulated energy      */
      52             :     Word16 high_prec_active,
      53             :     Word16 en_dn_shift,
      54             :     Word16 max_xabs ) /* current accumulated max amplitude for pulses */
      55             : {
      56             :     Word16 i, corr_up_shift, corr_tmp, imax, corr_sq_tmp, en_max_den, cmax_num, en_tmp;
      57             :     Word32 L_tmp_en_lc, L_tmp_corr;
      58             :     Word32 L_tmp_en, L_en_max_den, L_corr_sq_max, L_tmp_corr_sq;
      59             :     Word32 L_left_h, L_right_h;
      60             :     UWord32 UL_left_l, UL_right_l, UL_dummy;
      61             :     Word32 L_tmp;
      62             :     UWord16 u_sgn;
      63             : #ifndef ISSUE_1867_replace_overflow_libenc
      64             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
      65             :     Flag Overflow = 0;
      66             :     move16();
      67             : #endif
      68             : #endif
      69             : 
      70     1177157 :     en_tmp = en_dn_shift; /* dummy assignment to avoid compiler warning for unused parameter  */
      71             : 
      72             :     /* maximize correlation precision, prior to every unit pulse addition in the vector */
      73     1177157 :     corr_up_shift = norm_l( L_mac( *L_xy_ptr, 1, max_xabs ) ); /* pre analyze worst case L_xy update in the dim  loop        , 2 ops */
      74     1177157 :     imax = -1;                                                 /* not needed for search, only added to avoid compiler warning     */
      75             : 
      76             :     /* clean BE code, with split out low/high precision loops                                                      */
      77             :     /* activate low complexity en/corr search section conditionally if resulting vector energy is within limits    */
      78             :     /* typical case for higher dimensions                                                                          */
      79             : 
      80     1177157 :     IF( high_prec_active == 0 )
      81             :     {
      82     1175369 :         en_max_den = 0; /*move16()*/
      83     1175369 :         move16();
      84             :         ; /* OPT: move saved by  using high_prec_active as en_max_den */ /*      1 op   */
      85     1175369 :         cmax_num = -1;
      86     1175369 :         move16(); /* req. to force a 1st update for n==0   */ /*      1 op   */
      87             : 
      88    16846770 :         FOR( i = 0; i < dim; i++ ) /* FOR 3 ops  */
      89             :         {
      90             : #ifdef ISSUE_1867_replace_overflow_libenc
      91    15671401 :             L_tmp_corr = L_shl_sat( L_mac_sat( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /*  actual in-loop target    value, 2 ops  */
      92    15671401 :             corr_tmp = round_fx_sat( L_tmp_corr );                                        /*     1 op   */
      93             : #else
      94             :             L_tmp_corr = L_shl_o( L_mac_o( *L_xy_ptr, 1, x_abs[i], &Overflow ), corr_up_shift, &Overflow ); /*  actual in-loop target    value, 2 ops  */
      95             :             corr_tmp = round_fx_o( L_tmp_corr, &Overflow );                                                 /*     1 op   */
      96             : #endif
      97    15671401 :             corr_sq_tmp = mult( corr_tmp, corr_tmp ); /* CorrSq, is a 16bit for low compelxity cross multiplication    1 op   */
      98             : 
      99    15671401 :             L_tmp_en_lc = L_mac( *L_yy_ptr, 1, y[i] ); /*Q1 result ,  energy may span up to ~14+1(Q1)+1(sign)=16 bits,  1 op */
     100             :             /* extract_l without shift can always be used for this section as energy is guaranteed to stay in the lower word, 1 op */
     101    15671401 :             en_tmp = extract_l( L_tmp_en_lc ); /* L_shl + round_fx could also be used also but then adds an uphift cost (2-3 ops)*/
     102             : 
     103             :             /* 16/32 bit comparison    WC (4 +1+1 + (1+1+1) = 9                                                                   */
     104    15671401 :             IF( L_msu( L_mult( corr_sq_tmp, en_max_den ), cmax_num, en_tmp ) > 0 ) /* use L_mult and then a L_msu,      2 ops  */
     105             :             {
     106     3811087 :                 cmax_num = corr_sq_tmp;
     107     3811087 :                 move16(); /* 1 op */
     108     3811087 :                 en_max_den = en_tmp;
     109     3811087 :                 move16(); /* 1 op */
     110     3811087 :                 imax = i;
     111     3811087 :                 move16(); /* 1 op */
     112             :             }
     113             :         } /* dim  */
     114             :     }
     115             :     ELSE
     116             :     {
     117             :         /* High resolution section activated when vector energy is becoming high  (peaky or many pulses)                    */
     118             :         /* BASOP operator Mpy32_32_ss used to allow higher resolution for both the CorrSq term and the Energy term          */
     119             : 
     120        1788 :         L_en_max_den = L_deposit_l( 0 );                                      /* 1 op  */
     121        1788 :         L_corr_sq_max = L_deposit_l( -1 ); /* req. to force a 1st update   */ /* 1 op  */
     122             : 
     123        8784 :         FOR( i = 0; i < dim; i++ ) /* FOR 3 ops */
     124             :         {
     125        6996 :             L_tmp_corr = L_shl( L_mac( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /* actual in  loop WC value 2 ops   */
     126        6996 :             Mpy_32_32_ss( L_tmp_corr, L_tmp_corr, &L_tmp_corr_sq, &UL_dummy );    /* CorrSq 32 bits,          4 ops   */
     127             : 
     128        6996 :             L_tmp_en = L_mac( *L_yy_ptr, 1, y[i] ); /* Q1,energy may span up to sign+19 bits , 1 op    */
     129             :             /* For highest accuracy use pairs of maximum upshifted 32x32 bit signed values              */
     130             :             /*  (L_tmp_corr_sq / L_tmp_en)     >  (L_corr_sq_max/L_en_max_den)                          */
     131             :             /*  (L_tmp_corr_sq * L_en_max_den) >  (L_corr_sq_max * L_tmp_en)                            */
     132        6996 :             Mpy_32_32_ss( L_en_max_den, L_tmp_corr_sq, &L_left_h, &UL_left_l ); /* 4 ops */
     133        6996 :             Mpy_32_32_ss( L_tmp_en, L_corr_sq_max, &L_right_h, &UL_right_l );   /* 4 ops */
     134             : 
     135             :             /* STL optimized "Lazy evaluation"  of:
     136             :                IF( (L_left_h > L_right_h)  ||  ( (L_left_h == L_right_h) &&  (UL_left_l > UL_right_l) )
     137             :              */
     138             :             /* 32/64 bit Lazy eval comparison WC cost is    (1+  1+1+1 + 4 +(2+2+1) = 13 ,  and average  is  ~12 */
     139             :             /* Unoptimized 32/64 bit comparison  WC cost is (1+1+ 2x2  + 4 +(2+2+1) = 15 */
     140        6996 :             L_tmp = L_sub( L_left_h, L_right_h ); /* high  signed  word check            1 op  */
     141        6996 :             u_sgn = 0;
     142        6996 :             move16();         /* 1 op  */
     143        6996 :             if ( L_tmp == 0 ) /* L_tmp high Word testing is always needed */
     144             :             {
     145             :                 /* The returned UL value from UL_subNs is not needed,  only u_sgn is needed  */
     146        3249 :                 UL_subNs( UL_right_l, UL_left_l, &u_sgn ); /* low unsigned word check, note left/right order switch of ">"  due to ">=" inside UL_subNs, 1 op */
     147             :             }
     148        6996 :             if ( u_sgn != 0 )
     149             :             {
     150        1318 :                 L_tmp = L_add( L_tmp, 1 ); /* 0+1  --> 1 use wrap/sign result of low Word u_sgn check */ /* 1 op  */
     151             :             }
     152        6996 :             IF( L_tmp > 0 ) /* IF  4 ops */
     153             :             {
     154        3722 :                 L_corr_sq_max = L_add( L_tmp_corr_sq, 0 ); /* 1-2 ops */
     155        3722 :                 L_en_max_den = L_add( L_tmp_en, 0 );       /* 1-2 ops */
     156        3722 :                 imax = i;
     157        3722 :                 move16(); /* 1 op  */
     158             :             }
     159             :         } /* dim loop */
     160             :     }
     161             :     /* Complexity comparison per coeff for low precision vs. high precision
     162             :         low  precision: pulse_tot <= 127, 16 bit:  WC  2+3 +(15)*dim    ops,            dim=5  --> 5+15*5 = 90  ops, 18 ops/coeff
     163             :         high precision: pulse_tot  > 127, 32 bit:  WC  1+3+3 +(26-28)*dim  ops, WC-band dim=5  --> 7+28*5 = 147 ops, 29 ops/coeff  ~61% increase
     164             :     */
     165             : 
     166             :     /*  finally add found unit pulse contribution to past L_xy, Lyy,  for next pulse loop    */
     167     1177157 :     *L_xy_ptr = L_mac( *L_xy_ptr, x_abs[imax], 1 ); /*      Q12+1 */
     168     1177157 :     *L_yy_ptr = L_mac( *L_yy_ptr, 1, y[imax] );
     169             : 
     170     1177157 :     y[imax] = add( y[imax], 1 );
     171     1177157 :     move16();                                          /* Q0 added pulse              */
     172     1177157 :     ( *pulse_tot_ptr ) = add( ( *pulse_tot_ptr ), 1 ); /* increment total pulse sum   */
     173     1177157 :     move16();
     174             : 
     175     1177157 :     return imax;
     176             : }
     177             : /*-----------------------------------------------------------------------*
     178             :  * Function pvq_encode_fx()                                              *
     179             :  *                                                                       *
     180             :  *-----------------------------------------------------------------------*/
     181      276341 : void pvq_encode_ivas_fx(
     182             :     BSTR_ENC_HANDLE hBstr,
     183             :     PVQ_ENC_HANDLE hPVQ,  /* i/o: PVQ encoder handle                 */
     184             :     const Word16 *x,      /* i:   vector to quantize             Q15-3=>Q12       */
     185             :     Word16 *y,            /* o:   raw pulses  (non-scaled short) Q0               */
     186             :     Word16 *xq,           /* o:   quantized vector               Q15              */
     187             :     Word32 *L_xq,         /* o:   quantized vector               Q31 fot eval     */
     188             :     const Word16 pulses,  /* i:   number of allocated pulses                      */
     189             :     const Word16 dim,     /* i:   Length of vector                                */
     190             :     const Word16 neg_gain /* i:  - Gain       use - negative gain in  Q15  0..1   */
     191             : )
     192             : {
     193             :     Word16 i;
     194             :     Word16 pulse_tot;
     195             :     Word16 xabs[PVQ_MAX_BAND_SIZE];
     196             :     Word16 max_xabs;
     197             :     Word32 L_xsum;
     198             :     Word32 L_proj_fac;
     199             :     Word32 L_yy, L_xy;
     200             :     Word16 max_amp_y, imax;
     201             :     Word16 k, en_margin, en_dn_shift, high_prec_active;
     202             : 
     203             :     Word32 L_num, L_tmp;
     204             :     Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
     205             : 
     206             :     UWord16 u16_tmp;
     207             :     Word16 dim_m1;
     208             :     Word32 L_isqrt;
     209             :     Word16 neg_gain_norm, shift_tot;
     210             :     Word16 high_pulse_density_flag;
     211             :     PvqEntry entry;
     212             : #ifndef ISSUE_1867_replace_overflow_libenc
     213             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     214             :     Flag Overflow = 0;
     215             :     move16();
     216             : #endif
     217             : #endif
     218             : 
     219      276341 :     L_proj_fac = 4096;
     220      276341 :     move32();
     221      276341 :     L_xsum = L_deposit_h( 0 );
     222      276341 :     max_xabs = -1;
     223      276341 :     move16();
     224             : 
     225     3842693 :     FOR( i = 0; i < dim; i++ )
     226             :     {
     227     3566352 :         xabs[i] = abs_s( x[i] );
     228     3566352 :         move16();                              /* Q12 */
     229     3566352 :         max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient  search correlation scaling */
     230     3566352 :         L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
     231     3566352 :         y[i] = 0;
     232     3566352 :         move16(); /* init, later only non-zero values need to be normalized */
     233             :     }
     234             : 
     235      276341 :     test();
     236      276341 :     IF( L_xsum == 0 || neg_gain == 0 )
     237             :     {
     238         429 :         pulse_tot = pulses;
     239         429 :         move16();
     240         429 :         dim_m1 = sub( dim, 1 );
     241         429 :         y[dim_m1] = 0;
     242         429 :         move16();
     243         429 :         y[0] = shr( pulses, 1 );
     244         429 :         move16();
     245         429 :         y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
     246         429 :         move16();
     247         429 :         L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
     248         429 :         IF( dim_m1 != 0 )
     249             :         {
     250         429 :             L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
     251             :         }
     252             :     }
     253             :     ELSE
     254             :     {
     255             : 
     256      275912 :         num = sub( pulses, PYR_OFFSET );
     257      275912 :         high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
     258             : 
     259      275912 :         test();
     260      275912 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     261             :         {
     262      118606 :             shift_den = norm_l( L_xsum );                  /* x_sum input  Q12                         */
     263      118606 :             den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den                     */
     264             : 
     265      118606 :             L_num = L_deposit_l( num );
     266      118606 :             shift_num = sub( norm_l( L_num ), 1 );
     267      118606 :             L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1                  */
     268      118606 :             proj_fac = div_l( L_num, den );    /* L_num always has to be less than den<<16 */
     269             : 
     270      118606 :             shift_delta = sub( shift_num, shift_den );
     271      118606 :             L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring  to a fixed  Q12     */
     272             :         }
     273             : 
     274      275912 :         pulse_tot = 0;
     275      275912 :         move16();
     276      275912 :         L_yy = L_deposit_l( 0 );
     277      275912 :         L_xy = L_deposit_l( 0 );
     278      275912 :         test();
     279      275912 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     280             :         {
     281     1246161 :             FOR( i = 0; i < dim; i++ ) /* max 64 */
     282             :             {
     283     1127555 :                 Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12  +1 */
     284     1127555 :                 y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
     285     1127555 :                 move16(); /* Q12 *Q12  ->  Q0 */
     286             : 
     287     1127555 :                 pulse_tot = add( pulse_tot, y[i] );  /* Q0                                         */
     288     1127555 :                 L_yy = L_mac( L_yy, y[i], y[i] );    /* Energy, result will scale up by 2 by L_mac */
     289     1127555 :                 L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12  +1 --> Q13                   */
     290             :             }
     291             :         }
     292             : 
     293             : 
     294      275912 :         L_yy = L_shr( L_yy, 1 );
     295      275912 :         IF( LE_16( pulses, 127 ) )
     296             :         {
     297             :             /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5  */
     298             :             /* ( if  high energy precision is inactive,  max_amp_y is not needed , no max_amp_y(k-1) update )           */
     299     1369910 :             FOR( k = pulse_tot; k < pulses; k++ )
     300             :             {
     301     1094913 :                 L_yy = L_add( L_yy, 1 );
     302     1094913 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     303             :             }
     304             :         }
     305             :         ELSE
     306             :         {                                     /* HC or LC+HC inner loops */
     307         915 :             max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension  restricted)     */
     308             :             /* max_amp_y from projected y is needed when pulses_sum  exceeds 127      */
     309             : 
     310             :             /* First section with 32 bit energy inactive,   max_amp_y kept updated though    */
     311         945 :             FOR( k = pulse_tot; k < 128; k++ )
     312             :             {
     313          30 :                 L_yy = L_add( L_yy, 1 );
     314          30 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     315          30 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     316             :             }
     317             : 
     318             :             /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated                */
     319        3636 :             FOR( k = pulse_tot; k < pulses; k++ )
     320             :             {
     321        2721 :                 L_yy = L_add( L_yy, 1 );
     322        2721 :                 en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin,  ~ 2 ops      */
     323        2721 :                 en_dn_shift = sub( 16, en_margin );                /* calc. shift to lower byte for fixed use of extract_l      */
     324             : 
     325        2721 :                 high_prec_active = 1;
     326        2721 :                 move16();
     327        2721 :                 if ( en_dn_shift <= 0 )
     328             :                 {
     329             :                     /* only use 32 bit energy if actually needed */
     330        1895 :                     high_prec_active = 0;
     331        1895 :                     move16();
     332             :                 }
     333             :                 /* 32 bit energy and corr adaptively active,  max_amp_y kept updated */
     334        2721 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
     335        2721 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     336             :             }
     337             :         }
     338      275912 :         L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
     339             :                                    to make energy right for unit/inverse gain calculation */
     340             :     }
     341             : 
     342             :     /* Apply unit energy normalization scaling,  always at least one pulse so no div-by-zero check is needed */
     343      276341 :     L_isqrt = L_deposit_l( 0 );
     344      276341 :     IF( neg_gain != 0 )
     345             :     {
     346      275912 :         L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
     347             :     }
     348             : 
     349      276341 :     shift_num = norm_s( pulse_tot );            /* account for max possible pulse amplitude in y,
     350             :                                                    can be used even when max_amp_y is not avail.  */
     351      276341 :     shift_den = norm_s( neg_gain );             /* account for gain downscaling shift            */
     352      276341 :     neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm            */
     353      276341 :     shift_tot = sub( add( shift_num, shift_den ), 15 );
     354             : 
     355      276341 :     L_isqrt = L_negate( L_isqrt );
     356     3842693 :     FOR( i = 0; i < dim; i++ )
     357             :     {
     358     3566352 :         tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used  in scaling */
     359     3566352 :         if ( x[i] < 0 )
     360             :         {
     361     1770957 :             tmp = negate( tmp ); /* apply sign */
     362             :         }
     363             : 
     364     3566352 :         IF( y[i] != 0 )
     365             :         {
     366     1155036 :             y[i] = shr( tmp, shift_num );
     367     1155036 :             move16(); /* updates sign of y[i} , ~range -512 + 512),  array move */
     368             :         }
     369     3566352 :         Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp );         /* Q31*Q(0+x)  +1         */
     370     3566352 :         Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1     */
     371             : #ifdef ISSUE_1867_replace_overflow_libenc
     372     3566352 :         L_tmp = L_shr_sat( L_tmp, shift_tot ); /* Q31+x                  */
     373     3566352 :         xq[i] = round_fx_sat( L_tmp );         /* Q15, array move        */
     374             : #else
     375             :         L_tmp = L_shr_sat( L_tmp, shift_tot );                                                              /* Q31+x                  */
     376             :         xq[i] = round_fx_o( L_tmp, &Overflow );                                                             /* Q15, array move        */
     377             : #endif
     378     3566352 :         move16();
     379     3566352 :         L_xq[i] = L_tmp; /* Q31 currently  unused  */
     380     3566352 :         move32();
     381             :     }
     382             : 
     383             :     /* index the found PVQ vector into short codewords */
     384      276341 :     entry = mpvq_encode_vec_fx( y, dim, pulses );
     385             : 
     386             :     /* send the short codeword(s) to the range encoder */
     387      276341 :     rc_enc_bits_ivas_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
     388      276341 :     IF( NE_16( dim, 1 ) )
     389             :     {
     390      276341 :         rc_enc_uniform_ivas_fx( hBstr, hPVQ, entry.index, entry.size );
     391             :     }
     392             : 
     393      276341 :     return;
     394             : }
     395             : 
     396       16843 : void pvq_encode_fx(
     397             :     BSTR_ENC_HANDLE hBstr,
     398             :     PVQ_ENC_HANDLE hPVQ,  /* i/o: PVQ encoder handle                 */
     399             :     const Word16 *x,      /* i:   vector to quantize             Q15-3=>Q12       */
     400             :     Word16 *y,            /* o:   raw pulses  (non-scaled short) Q0               */
     401             :     Word16 *xq,           /* o:   quantized vector               Q15              */
     402             :     Word32 *L_xq,         /* o:   quantized vector               Q31 fot eval     */
     403             :     const Word16 pulses,  /* i:   number of allocated pulses                      */
     404             :     const Word16 dim,     /* i:   Length of vector                                */
     405             :     const Word16 neg_gain /* i:  - Gain       use - negative gain in  Q15  0..1   */
     406             : )
     407             : {
     408             :     Word16 i;
     409             :     Word16 pulse_tot;
     410             :     Word16 xabs[PVQ_MAX_BAND_SIZE];
     411             :     Word16 max_xabs;
     412             :     Word32 L_xsum;
     413             :     Word32 L_proj_fac;
     414             :     Word32 L_yy, L_xy;
     415             :     Word16 max_amp_y, imax;
     416             :     Word16 k, en_margin, en_dn_shift, high_prec_active;
     417             : 
     418             :     Word32 L_num, L_tmp;
     419             :     Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
     420             : 
     421             :     UWord16 u16_tmp;
     422             :     Word16 dim_m1;
     423             :     Word32 L_isqrt;
     424             :     Word16 neg_gain_norm, shift_tot;
     425             :     Word16 high_pulse_density_flag;
     426             :     PvqEntry entry;
     427             : #ifndef ISSUE_1867_replace_overflow_libenc
     428             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     429             :     Flag Overflow = 0;
     430             :     move16();
     431             : #endif
     432             : #endif
     433             : 
     434       16843 :     L_proj_fac = 4096;
     435       16843 :     move32();
     436       16843 :     L_xsum = L_deposit_h( 0 );
     437       16843 :     max_xabs = -1;
     438       16843 :     move16();
     439             : 
     440      216203 :     FOR( i = 0; i < dim; i++ )
     441             :     {
     442      199360 :         xabs[i] = abs_s( x[i] );
     443      199360 :         move16();                              /* Q12 */
     444      199360 :         max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient  search correlation scaling */
     445      199360 :         L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
     446      199360 :         y[i] = 0;
     447      199360 :         move16(); /* init, later only non-zero values need to be normalized */
     448             :     }
     449             : 
     450       16843 :     test();
     451       16843 :     IF( L_xsum == 0 || neg_gain == 0 )
     452             :     {
     453          17 :         pulse_tot = pulses;
     454          17 :         move16();
     455          17 :         dim_m1 = sub( dim, 1 );
     456          17 :         y[dim_m1] = 0;
     457          17 :         move16();
     458          17 :         y[0] = shr( pulses, 1 );
     459          17 :         move16();
     460          17 :         y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
     461          17 :         move16();
     462          17 :         L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
     463          17 :         if ( dim_m1 != 0 )
     464             :         {
     465          17 :             L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
     466             :         }
     467             :     }
     468             :     ELSE
     469             :     {
     470             : 
     471       16826 :         num = sub( pulses, PYR_OFFSET );
     472       16826 :         high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
     473             : 
     474       16826 :         test();
     475       16826 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     476             :         {
     477       10933 :             shift_den = norm_l( L_xsum );                  /* x_sum input  Q12                         */
     478       10933 :             den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den                     */
     479             : 
     480       10933 :             L_num = L_deposit_l( num );
     481       10933 :             shift_num = sub( norm_l( L_num ), 1 );
     482       10933 :             L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1                  */
     483       10933 :             proj_fac = div_l( L_num, den );    /* L_num always has to be less than den<<16 */
     484             : 
     485       10933 :             shift_delta = sub( shift_num, shift_den );
     486       10933 :             L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring  to a fixed  Q12     */
     487             :         }
     488             : 
     489       16826 :         pulse_tot = 0;
     490       16826 :         move16();
     491       16826 :         L_yy = L_deposit_l( 0 );
     492       16826 :         L_xy = L_deposit_l( 0 );
     493       16826 :         test();
     494       16826 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     495             :         {
     496      102537 :             FOR( i = 0; i < dim; i++ ) /* max 64 */
     497             :             {
     498       91604 :                 Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12  +1 */
     499       91604 :                 y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
     500       91604 :                 move16(); /* Q12 *Q12  ->  Q0 */
     501             : 
     502       91604 :                 pulse_tot = add( pulse_tot, y[i] );  /* Q0                                         */
     503       91604 :                 L_yy = L_mac( L_yy, y[i], y[i] );    /* Energy, result will scale up by 2 by L_mac */
     504       91604 :                 L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12  +1 --> Q13                   */
     505             :             }
     506             :         }
     507             : 
     508             : 
     509       16826 :         L_yy = L_shr( L_yy, 1 );
     510       16826 :         IF( LE_16( pulses, 127 ) )
     511             :         {
     512             :             /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5  */
     513             :             /* ( if  high energy precision is inactive,  max_amp_y is not needed , no max_amp_y(k-1) update )           */
     514       93339 :             FOR( k = pulse_tot; k < pulses; k++ )
     515             :             {
     516       77260 :                 L_yy = L_add( L_yy, 1 );
     517       77260 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     518             :             }
     519             :         }
     520             :         ELSE
     521             :         {                                     /* HC or LC+HC inner loops */
     522         747 :             max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension  restricted)     */
     523             :             /* max_amp_y from projected y is needed when pulses_sum  exceeds 127      */
     524             : 
     525             :             /* First section with 32 bit energy inactive,   max_amp_y kept updated though    */
     526         753 :             FOR( k = pulse_tot; k < 128; k++ )
     527             :             {
     528           6 :                 L_yy = L_add( L_yy, 1 );
     529           6 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     530           6 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     531             :             }
     532             : 
     533             :             /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated                */
     534        2974 :             FOR( k = pulse_tot; k < pulses; k++ )
     535             :             {
     536        2227 :                 L_yy = L_add( L_yy, 1 );
     537        2227 :                 en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin,  ~ 2 ops      */
     538        2227 :                 en_dn_shift = sub( 16, en_margin );                /* calc. shift to lower byte for fixed use of extract_l      */
     539             : 
     540        2227 :                 high_prec_active = 1;
     541        2227 :                 move16();
     542        2227 :                 if ( en_dn_shift <= 0 )
     543             :                 {
     544             :                     /* only use 32 bit energy if actually needed */
     545        1265 :                     high_prec_active = 0;
     546        1265 :                     move16();
     547             :                 }
     548             :                 /* 32 bit energy and corr adaptively active,  max_amp_y kept updated */
     549        2227 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
     550        2227 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     551             :             }
     552             :         }
     553       16826 :         L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
     554             :                                    to make energy right for unit/inverse gain calculation */
     555             :     }
     556             : 
     557             :     /* Apply unit energy normalization scaling,  always at least one pulse so no div-by-zero check is needed */
     558       16843 :     L_isqrt = L_deposit_l( 0 );
     559       16843 :     IF( neg_gain != 0 )
     560             :     {
     561       16826 :         L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
     562             :     }
     563             : 
     564       16843 :     shift_num = norm_s( pulse_tot );            /* account for max possible pulse amplitude in y,
     565             :                                                    can be used even when max_amp_y is not avail.  */
     566       16843 :     shift_den = norm_s( neg_gain );             /* account for gain downscaling shift            */
     567       16843 :     neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm            */
     568       16843 :     shift_tot = sub( add( shift_num, shift_den ), 15 );
     569             : 
     570       16843 :     L_isqrt = L_negate( L_isqrt );
     571      216203 :     FOR( i = 0; i < dim; i++ )
     572             :     {
     573      199360 :         tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used  in scaling */
     574      199360 :         if ( x[i] < 0 )
     575             :         {
     576       99448 :             tmp = negate( tmp ); /* apply sign */
     577             :         }
     578             : 
     579      199360 :         if ( y[i] != 0 )
     580             :         {
     581       91218 :             y[i] = shr( tmp, shift_num );
     582       91218 :             move16(); /* updates sign of y[i} , ~range -512 + 512),  array move */
     583             :         }
     584      199360 :         Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp );         /* Q31*Q(0+x)  +1         */
     585      199360 :         Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1     */
     586             : #ifdef ISSUE_1867_replace_overflow_libenc
     587      199360 :         L_tmp = L_shr_sat( L_tmp, shift_tot ); /* Q31+x                  */
     588      199360 :         xq[i] = round_fx_sat( L_tmp );         /* Q15, array move        */
     589             : #else
     590             :         L_tmp = L_shr_sat( L_tmp, shift_tot );                                                              /* Q31+x                  */
     591             :         xq[i] = round_fx_o( L_tmp, &Overflow );                                                             /* Q15, array move        */
     592             : #endif
     593      199360 :         move16();
     594      199360 :         L_xq[i] = L_tmp; /* Q31 currently  unused  */
     595      199360 :         move32();
     596             :     }
     597             : 
     598             :     /* index the found PVQ vector into short codewords */
     599       16843 :     entry = mpvq_encode_vec_fx( y, dim, pulses );
     600             : 
     601             :     /* send the short codeword(s) to the range encoder */
     602       16843 :     rc_enc_bits_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
     603       16843 :     IF( NE_16( dim, 1 ) )
     604             :     {
     605       16843 :         rc_enc_uniform_fx( hBstr, hPVQ, entry.index, entry.size );
     606             :     }
     607             : 
     608       16843 :     return;
     609             : }

Generated by: LCOV version 1.14