LCOV - Coverage on main enc/dec/rend @ 574a190e3c6896c6c4ed10d7f23649709a0c4347

LCOV - code coverage report

Current view:	top level - lib_enc - pvq_encode_fx.c (source / functions)		Hit	Total	Coverage
Test:	Coverage on main enc/dec/rend @ 574a190e3c6896c6c4ed10d7f23649709a0c4347	Lines:	265	265	100.0 %
Date:	2025-06-27 02:59:36	Functions:	5	5	100.0 %

          Line data    Source code

       1             : /*====================================================================================
       2             :     EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
       3             :   ====================================================================================*/
       4             : #include <stdint.h>
       5             : #include "options.h" /* Compilation switches                   */
       6             : #include "cnst.h"
       7             : #include "prot_fx.h"     /* Function prototypes                    */
       8             : #include "prot_fx_enc.h" /* Function prototypes                    */
       9             : #include "rom_com_fx.h"
      10             : #include "rom_com.h"
      11             : 
      12             : /*   PVQ MIXED_SEARCH_LOOP:
      13             :                            low precision 16/32 +  energy selective high precision 32/64,
      14             :                            mixed perf , 10 dB SEGSNR better than the low precision loop only,
      15             :                            active  if k>=128 and accumulated energy is high enough,
      16             :                            comes at a controlled complexity cost, as dimensions decrease for high k's*/
      17             : 
      18        1654 : static Word16 max_val_fx(                    /* o  : maximum value in the input vector              */
      19             :                           const Word16 *vec, /* i  : input vector                                   */
      20             :                           const Word16 lvec  /* i  : length of input vector                         */
      21             : )
      22             : {
      23             :     Word16 j, tmp;
      24             : 
      25        1654 :     tmp = vec[0];
      26        1654 :     move16();
      27        6584 :     FOR( j = 1; j < lvec; j++ )
      28             :     {
      29        4930 :         tmp = s_max( vec[j], tmp );
      30             :     }
      31        1654 :     return tmp;
      32             : }
      33             : 
      34      292157 : static Word16 pyramidSearchProjInit_fx( Word16 L, Word16 Ptot )
      35             : {
      36      292157 :     return ( sub( Ptot, extract_l( L_shr( L_mult0( 8223, (Word32) L ), 14 ) ) ) > 0 );
      37             : }
      38             : 
      39             : 
      40             : /* The inner search loop for one single additional unit pulse, starting from  pulse_tot  ,
      41             :     with information about required energy precision/down scaling for the dim loop in  en_dn_shift,
      42             :     and the current max_xabs absolute value to be used for an near optimal  correlation upscaling.
      43             :     returns the index of the best positioned unit pulse in imax
      44             : */
      45     1174742 : static Word16 one_pulse_search(
      46             :     const Word16 dim,    /* vector dimension       */
      47             :     const Word16 *x_abs, /* absolute vector values */
      48             :     Word16 *y,           /* output vector          */
      49             :     Word16 *pulse_tot_ptr,
      50             :     Word32 *L_xy_ptr, /* accumulated correlation */
      51             :     Word32 *L_yy_ptr, /* accumulated energy      */
      52             :     Word16 high_prec_active,
      53             :     Word16 en_dn_shift,
      54             :     Word16 max_xabs ) /* current accumulated max amplitude for pulses */
      55             : {
      56             :     Word16 i, corr_up_shift, corr_tmp, imax, corr_sq_tmp, en_max_den, cmax_num, en_tmp;
      57             :     Word32 L_tmp_en_lc, L_tmp_corr;
      58             :     Word32 L_tmp_en, L_en_max_den, L_corr_sq_max, L_tmp_corr_sq;
      59             :     Word32 L_left_h, L_right_h;
      60             :     UWord32 UL_left_l, UL_right_l, UL_dummy;
      61             :     Word32 L_tmp;
      62             :     UWord16 u_sgn;
      63             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
      64     1174742 :     Flag Overflow = 0;
      65     1174742 :     move16();
      66             : #endif
      67             : 
      68     1174742 :     en_tmp = en_dn_shift; /* dummy assignment to avoid compiler warning for unused parameter  */
      69             : 
      70             :     /* maximize correlation precision, prior to every unit pulse addition in the vector */
      71     1174742 :     corr_up_shift = norm_l( L_mac( *L_xy_ptr, 1, max_xabs ) ); /* pre analyze worst case L_xy update in the dim  loop        , 2 ops */
      72     1174742 :     imax = -1;                                                 /* not needed for search, only added to avoid compiler warning     */
      73             : 
      74             :     /* clean BE code, with split out low/high precision loops                                                      */
      75             :     /* activate low complexity en/corr search section conditionally if resulting vector energy is within limits    */
      76             :     /* typical case for higher dimensions                                                                          */
      77             : 
      78     1174742 :     IF( high_prec_active == 0 )
      79             :     {
      80     1172957 :         en_max_den = 0; /*move16()*/
      81     1172957 :         move16();
      82             :         ; /* OPT: move saved by  using high_prec_active as en_max_den */ /*      1 op   */
      83     1172957 :         cmax_num = -1;
      84     1172957 :         move16(); /* req. to force a 1st update for n==0   */ /*      1 op   */
      85             : 
      86    16807114 :         FOR( i = 0; i < dim; i++ ) /* FOR 3 ops  */
      87             :         {
      88    15634157 :             L_tmp_corr = L_shl_o( L_mac_o( *L_xy_ptr, 1, x_abs[i], &Overflow ), corr_up_shift, &Overflow ); /*  actual in-loop target    value, 2 ops  */
      89    15634157 :             corr_tmp = round_fx_o( L_tmp_corr, &Overflow );                                                 /*     1 op   */
      90    15634157 :             corr_sq_tmp = mult( corr_tmp, corr_tmp );                                                       /* CorrSq, is a 16bit for low compelxity cross multiplication    1 op   */
      91             : 
      92    15634157 :             L_tmp_en_lc = L_mac( *L_yy_ptr, 1, y[i] ); /*Q1 result ,  energy may span up to ~14+1(Q1)+1(sign)=16 bits,  1 op */
      93             :             /* extract_l without shift can always be used for this section as energy is guaranteed to stay in the lower word, 1 op */
      94    15634157 :             en_tmp = extract_l( L_tmp_en_lc ); /* L_shl + round_fx could also be used also but then adds an uphift cost (2-3 ops)*/
      95             : 
      96             :             /* 16/32 bit comparison    WC (4 +1+1 + (1+1+1) = 9                                                                   */
      97    15634157 :             IF( L_msu( L_mult( corr_sq_tmp, en_max_den ), cmax_num, en_tmp ) > 0 ) /* use L_mult and then a L_msu,      2 ops  */
      98             :             {
      99     3801815 :                 cmax_num = corr_sq_tmp;
     100     3801815 :                 move16(); /* 1 op */
     101     3801815 :                 en_max_den = en_tmp;
     102     3801815 :                 move16(); /* 1 op */
     103     3801815 :                 imax = i;
     104     3801815 :                 move16(); /* 1 op */
     105             :             }
     106             :         } /* dim  */
     107             :     }
     108             :     ELSE
     109             :     {
     110             :         /* High resolution section activated when vector energy is becoming high  (peaky or many pulses)                    */
     111             :         /* BASOP operator Mpy32_32_ss used to allow higher resolution for both the CorrSq term and the Energy term          */
     112             : 
     113        1785 :         L_en_max_den = L_deposit_l( 0 );                                      /* 1 op  */
     114        1785 :         L_corr_sq_max = L_deposit_l( -1 ); /* req. to force a 1st update   */ /* 1 op  */
     115             : 
     116        8769 :         FOR( i = 0; i < dim; i++ ) /* FOR 3 ops */
     117             :         {
     118        6984 :             L_tmp_corr = L_shl( L_mac( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /* actual in  loop WC value 2 ops   */
     119        6984 :             Mpy_32_32_ss( L_tmp_corr, L_tmp_corr, &L_tmp_corr_sq, &UL_dummy );    /* CorrSq 32 bits,          4 ops   */
     120             : 
     121        6984 :             L_tmp_en = L_mac( *L_yy_ptr, 1, y[i] ); /* Q1,energy may span up to sign+19 bits , 1 op    */
     122             :             /* For highest accuracy use pairs of maximum upshifted 32x32 bit signed values              */
     123             :             /*  (L_tmp_corr_sq / L_tmp_en)     >  (L_corr_sq_max/L_en_max_den)                          */
     124             :             /*  (L_tmp_corr_sq * L_en_max_den) >  (L_corr_sq_max * L_tmp_en)                            */
     125        6984 :             Mpy_32_32_ss( L_en_max_den, L_tmp_corr_sq, &L_left_h, &UL_left_l ); /* 4 ops */
     126        6984 :             Mpy_32_32_ss( L_tmp_en, L_corr_sq_max, &L_right_h, &UL_right_l );   /* 4 ops */
     127             : 
     128             :             /* STL optimized "Lazy evaluation"  of:
     129             :                IF( (L_left_h > L_right_h)  ||  ( (L_left_h == L_right_h) &&  (UL_left_l > UL_right_l) )
     130             :              */
     131             :             /* 32/64 bit Lazy eval comparison WC cost is    (1+  1+1+1 + 4 +(2+2+1) = 13 ,  and average  is  ~12 */
     132             :             /* Unoptimized 32/64 bit comparison  WC cost is (1+1+ 2x2  + 4 +(2+2+1) = 15 */
     133        6984 :             L_tmp = L_sub( L_left_h, L_right_h ); /* high  signed  word check            1 op  */
     134        6984 :             u_sgn = 0;
     135        6984 :             move16();         /* 1 op  */
     136        6984 :             if ( L_tmp == 0 ) /* L_tmp high Word testing is always needed */
     137             :             {
     138             :                 /* The returned UL value from UL_subNs is not needed,  only u_sgn is needed  */
     139        3246 :                 UL_subNs( UL_right_l, UL_left_l, &u_sgn ); /* low unsigned word check, note left/right order switch of ">"  due to ">=" inside UL_subNs, 1 op */
     140             :             }
     141        6984 :             if ( u_sgn != 0 )
     142             :             {
     143        1319 :                 L_tmp = L_add( L_tmp, 1 ); /* 0+1  --> 1 use wrap/sign result of low Word u_sgn check */ /* 1 op  */
     144             :             }
     145        6984 :             IF( L_tmp > 0 ) /* IF  4 ops */
     146             :             {
     147        3718 :                 L_corr_sq_max = L_add( L_tmp_corr_sq, 0 ); /* 1-2 ops */
     148        3718 :                 L_en_max_den = L_add( L_tmp_en, 0 );       /* 1-2 ops */
     149        3718 :                 imax = i;
     150        3718 :                 move16(); /* 1 op  */
     151             :             }
     152             :         } /* dim loop */
     153             :     }
     154             :     /* Complexity comparison per coeff for low precision vs. high precision
     155             :         low  precision: pulse_tot <= 127, 16 bit:  WC  2+3 +(15)*dim    ops,            dim=5  --> 5+15*5 = 90  ops, 18 ops/coeff
     156             :         high precision: pulse_tot  > 127, 32 bit:  WC  1+3+3 +(26-28)*dim  ops, WC-band dim=5  --> 7+28*5 = 147 ops, 29 ops/coeff  ~61% increase
     157             :     */
     158             : 
     159             :     /*  finally add found unit pulse contribution to past L_xy, Lyy,  for next pulse loop    */
     160     1174742 :     *L_xy_ptr = L_mac( *L_xy_ptr, x_abs[imax], 1 ); /*      Q12+1 */
     161     1174742 :     *L_yy_ptr = L_mac( *L_yy_ptr, 1, y[imax] );
     162             : 
     163     1174742 :     y[imax] = add( y[imax], 1 );
     164     1174742 :     move16();                                          /* Q0 added pulse              */
     165     1174742 :     ( *pulse_tot_ptr ) = add( ( *pulse_tot_ptr ), 1 ); /* increment total pulse sum   */
     166     1174742 :     move16();
     167             : 
     168     1174742 :     return imax;
     169             : }
     170             : /*-----------------------------------------------------------------------*
     171             :  * Function pvq_encode_fx()                                              *
     172             :  *                                                                       *
     173             :  *-----------------------------------------------------------------------*/
     174      275913 : void pvq_encode_ivas_fx(
     175             :     BSTR_ENC_HANDLE hBstr,
     176             :     PVQ_ENC_HANDLE hPVQ,  /* i/o: PVQ encoder handle                 */
     177             :     const Word16 *x,      /* i:   vector to quantize             Q15-3=>Q12       */
     178             :     Word16 *y,            /* o:   raw pulses  (non-scaled short) Q0               */
     179             :     Word16 *xq,           /* o:   quantized vector               Q15              */
     180             :     Word32 *L_xq,         /* o:   quantized vector               Q31 fot eval     */
     181             :     const Word16 pulses,  /* i:   number of allocated pulses                      */
     182             :     const Word16 dim,     /* i:   Length of vector                                */
     183             :     const Word16 neg_gain /* i:  - Gain       use - negative gain in  Q15  0..1   */
     184             : )
     185             : {
     186             :     Word16 i;
     187             :     Word16 pulse_tot;
     188             :     Word16 xabs[PVQ_MAX_BAND_SIZE];
     189             :     Word16 max_xabs;
     190             :     Word32 L_xsum;
     191             :     Word32 L_proj_fac;
     192             :     Word32 L_yy, L_xy;
     193             :     Word16 max_amp_y, imax;
     194             :     Word16 k, en_margin, en_dn_shift, high_prec_active;
     195             : 
     196             :     Word32 L_num, L_tmp;
     197             :     Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
     198             : 
     199             :     UWord16 u16_tmp;
     200             :     Word16 dim_m1;
     201             :     Word32 L_isqrt;
     202             :     Word16 neg_gain_norm, shift_tot;
     203             :     Word16 high_pulse_density_flag;
     204             :     PvqEntry entry;
     205             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     206      275913 :     Flag Overflow = 0;
     207      275913 :     move16();
     208             : #endif
     209             : 
     210      275913 :     L_proj_fac = 4096;
     211      275913 :     move32();
     212      275913 :     L_xsum = L_deposit_h( 0 );
     213      275913 :     max_xabs = -1;
     214      275913 :     move16();
     215             : 
     216     3836601 :     FOR( i = 0; i < dim; i++ )
     217             :     {
     218     3560688 :         xabs[i] = abs_s( x[i] );
     219     3560688 :         move16();                              /* Q12 */
     220     3560688 :         max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient  search correlation scaling */
     221     3560688 :         L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
     222     3560688 :         y[i] = 0;
     223     3560688 :         move16(); /* init, later only non-zero values need to be normalized */
     224             :     }
     225             : 
     226      275913 :     test();
     227      275913 :     IF( L_xsum == 0 || neg_gain == 0 )
     228             :     {
     229         428 :         pulse_tot = pulses;
     230         428 :         move16();
     231         428 :         dim_m1 = sub( dim, 1 );
     232         428 :         y[dim_m1] = 0;
     233         428 :         move16();
     234         428 :         y[0] = shr( pulses, 1 );
     235         428 :         move16();
     236         428 :         y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
     237         428 :         move16();
     238         428 :         L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
     239         428 :         IF( dim_m1 != 0 )
     240             :         {
     241         428 :             L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
     242             :         }
     243             :     }
     244             :     ELSE
     245             :     {
     246             : 
     247      275485 :         num = sub( pulses, PYR_OFFSET );
     248      275485 :         high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
     249             : 
     250      275485 :         test();
     251      275485 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     252             :         {
     253      118522 :             shift_den = norm_l( L_xsum );                  /* x_sum input  Q12                         */
     254      118522 :             den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den                     */
     255             : 
     256      118522 :             L_num = L_deposit_l( num );
     257      118522 :             shift_num = sub( norm_l( L_num ), 1 );
     258      118522 :             L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1                  */
     259      118522 :             proj_fac = div_l( L_num, den );    /* L_num always has to be less than den<<16 */
     260             : 
     261      118522 :             shift_delta = sub( shift_num, shift_den );
     262      118522 :             L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring  to a fixed  Q12     */
     263             :         }
     264             : 
     265      275485 :         pulse_tot = 0;
     266      275485 :         move16();
     267      275485 :         L_yy = L_deposit_l( 0 );
     268      275485 :         L_xy = L_deposit_l( 0 );
     269      275485 :         test();
     270      275485 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     271             :         {
     272     1245305 :             FOR( i = 0; i < dim; i++ ) /* max 64 */
     273             :             {
     274     1126783 :                 Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12  +1 */
     275     1126783 :                 y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
     276     1126783 :                 move16(); /* Q12 *Q12  ->  Q0 */
     277             : 
     278     1126783 :                 pulse_tot = add( pulse_tot, y[i] );  /* Q0                                         */
     279     1126783 :                 L_yy = L_mac( L_yy, y[i], y[i] );    /* Energy, result will scale up by 2 by L_mac */
     280     1126783 :                 L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12  +1 --> Q13                   */
     281             :             }
     282             :         }
     283             : 
     284             : 
     285      275485 :         L_yy = L_shr( L_yy, 1 );
     286      275485 :         IF( LE_16( pulses, 127 ) )
     287             :         {
     288             :             /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5  */
     289             :             /* ( if  high energy precision is inactive,  max_amp_y is not needed , no max_amp_y(k-1) update )           */
     290     1367585 :             FOR( k = pulse_tot; k < pulses; k++ )
     291             :             {
     292     1093015 :                 L_yy = L_add( L_yy, 1 );
     293     1093015 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     294             :             }
     295             :         }
     296             :         ELSE
     297             :         {                                     /* HC or LC+HC inner loops */
     298         915 :             max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension  restricted)     */
     299             :             /* max_amp_y from projected y is needed when pulses_sum  exceeds 127      */
     300             : 
     301             :             /* First section with 32 bit energy inactive,   max_amp_y kept updated though    */
     302         945 :             FOR( k = pulse_tot; k < 128; k++ )
     303             :             {
     304          30 :                 L_yy = L_add( L_yy, 1 );
     305          30 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     306          30 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     307             :             }
     308             : 
     309             :             /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated                */
     310        3636 :             FOR( k = pulse_tot; k < pulses; k++ )
     311             :             {
     312        2721 :                 L_yy = L_add( L_yy, 1 );
     313        2721 :                 en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin,  ~ 2 ops      */
     314        2721 :                 en_dn_shift = sub( 16, en_margin );                /* calc. shift to lower byte for fixed use of extract_l      */
     315             : 
     316        2721 :                 high_prec_active = 1;
     317        2721 :                 move16();
     318        2721 :                 if ( en_dn_shift <= 0 )
     319             :                 {
     320             :                     /* only use 32 bit energy if actually needed */
     321        1895 :                     high_prec_active = 0;
     322        1895 :                     move16();
     323             :                 }
     324             :                 /* 32 bit energy and corr adaptively active,  max_amp_y kept updated */
     325        2721 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
     326        2721 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     327             :             }
     328             :         }
     329      275485 :         L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
     330             :                                    to make energy right for unit/inverse gain calculation */
     331             :     }
     332             : 
     333             :     /* Apply unit energy normalization scaling,  always at least one pulse so no div-by-zero check is needed */
     334      275913 :     L_isqrt = L_deposit_l( 0 );
     335      275913 :     IF( neg_gain != 0 )
     336             :     {
     337      275485 :         L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
     338             :     }
     339             : 
     340      275913 :     shift_num = norm_s( pulse_tot );            /* account for max possible pulse amplitude in y,
     341             :                                                    can be used even when max_amp_y is not avail.  */
     342      275913 :     shift_den = norm_s( neg_gain );             /* account for gain downscaling shift            */
     343      275913 :     neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm            */
     344      275913 :     shift_tot = sub( add( shift_num, shift_den ), 15 );
     345             : 
     346      275913 :     L_isqrt = L_negate( L_isqrt );
     347     3836601 :     FOR( i = 0; i < dim; i++ )
     348             :     {
     349     3560688 :         tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used  in scaling */
     350     3560688 :         if ( x[i] < 0 )
     351             :         {
     352     1769771 :             tmp = negate( tmp ); /* apply sign */
     353             :         }
     354             : 
     355     3560688 :         IF( y[i] != 0 )
     356             :         {
     357     1153417 :             y[i] = shr( tmp, shift_num );
     358     1153417 :             move16(); /* updates sign of y[i} , ~range -512 + 512),  array move */
     359             :         }
     360     3560688 :         Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp );         /* Q31*Q(0+x)  +1         */
     361     3560688 :         Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1     */
     362     3560688 :         L_tmp = L_shr_o( L_tmp, shift_tot, &Overflow );         /* Q31+x                  */
     363     3560688 :         xq[i] = round_fx_o( L_tmp, &Overflow );                 /* Q15, array move        */
     364     3560688 :         move16();
     365     3560688 :         L_xq[i] = L_tmp; /* Q31 currently  unused  */
     366     3560688 :         move32();
     367             :     }
     368             : 
     369             :     /* index the found PVQ vector into short codewords */
     370      275913 :     entry = mpvq_encode_vec_fx( y, dim, pulses );
     371             : 
     372             :     /* send the short codeword(s) to the range encoder */
     373      275913 :     rc_enc_bits_ivas_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
     374      275913 :     IF( NE_16( dim, 1 ) )
     375             :     {
     376      275913 :         rc_enc_uniform_ivas_fx( hBstr, hPVQ, entry.index, entry.size );
     377             :     }
     378             : 
     379      275913 :     return;
     380             : }
     381             : 
     382       16690 : void pvq_encode_fx(
     383             :     BSTR_ENC_HANDLE hBstr,
     384             :     PVQ_ENC_HANDLE hPVQ,  /* i/o: PVQ encoder handle                 */
     385             :     const Word16 *x,      /* i:   vector to quantize             Q15-3=>Q12       */
     386             :     Word16 *y,            /* o:   raw pulses  (non-scaled short) Q0               */
     387             :     Word16 *xq,           /* o:   quantized vector               Q15              */
     388             :     Word32 *L_xq,         /* o:   quantized vector               Q31 fot eval     */
     389             :     const Word16 pulses,  /* i:   number of allocated pulses                      */
     390             :     const Word16 dim,     /* i:   Length of vector                                */
     391             :     const Word16 neg_gain /* i:  - Gain       use - negative gain in  Q15  0..1   */
     392             : )
     393             : {
     394             :     Word16 i;
     395             :     Word16 pulse_tot;
     396             :     Word16 xabs[PVQ_MAX_BAND_SIZE];
     397             :     Word16 max_xabs;
     398             :     Word32 L_xsum;
     399             :     Word32 L_proj_fac;
     400             :     Word32 L_yy, L_xy;
     401             :     Word16 max_amp_y, imax;
     402             :     Word16 k, en_margin, en_dn_shift, high_prec_active;
     403             : 
     404             :     Word32 L_num, L_tmp;
     405             :     Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
     406             : 
     407             :     UWord16 u16_tmp;
     408             :     Word16 dim_m1;
     409             :     Word32 L_isqrt;
     410             :     Word16 neg_gain_norm, shift_tot;
     411             :     Word16 high_pulse_density_flag;
     412             :     PvqEntry entry;
     413             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     414       16690 :     Flag Overflow = 0;
     415       16690 :     move16();
     416             : #endif
     417             : 
     418       16690 :     L_proj_fac = 4096;
     419       16690 :     move32();
     420       16690 :     L_xsum = L_deposit_h( 0 );
     421       16690 :     max_xabs = -1;
     422       16690 :     move16();
     423             : 
     424      214066 :     FOR( i = 0; i < dim; i++ )
     425             :     {
     426      197376 :         xabs[i] = abs_s( x[i] );
     427      197376 :         move16();                              /* Q12 */
     428      197376 :         max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient  search correlation scaling */
     429      197376 :         L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
     430      197376 :         y[i] = 0;
     431      197376 :         move16(); /* init, later only non-zero values need to be normalized */
     432             :     }
     433             : 
     434       16690 :     test();
     435       16690 :     IF( L_xsum == 0 || neg_gain == 0 )
     436             :     {
     437          18 :         pulse_tot = pulses;
     438          18 :         move16();
     439          18 :         dim_m1 = sub( dim, 1 );
     440          18 :         y[dim_m1] = 0;
     441          18 :         move16();
     442          18 :         y[0] = shr( pulses, 1 );
     443          18 :         move16();
     444          18 :         y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
     445          18 :         move16();
     446          18 :         L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
     447          18 :         if ( dim_m1 != 0 )
     448             :         {
     449          18 :             L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
     450             :         }
     451             :     }
     452             :     ELSE
     453             :     {
     454             : 
     455       16672 :         num = sub( pulses, PYR_OFFSET );
     456       16672 :         high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
     457             : 
     458       16672 :         test();
     459       16672 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     460             :         {
     461       10938 :             shift_den = norm_l( L_xsum );                  /* x_sum input  Q12                         */
     462       10938 :             den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den                     */
     463             : 
     464       10938 :             L_num = L_deposit_l( num );
     465       10938 :             shift_num = sub( norm_l( L_num ), 1 );
     466       10938 :             L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1                  */
     467       10938 :             proj_fac = div_l( L_num, den );    /* L_num always has to be less than den<<16 */
     468             : 
     469       10938 :             shift_delta = sub( shift_num, shift_den );
     470       10938 :             L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring  to a fixed  Q12     */
     471             :         }
     472             : 
     473       16672 :         pulse_tot = 0;
     474       16672 :         move16();
     475       16672 :         L_yy = L_deposit_l( 0 );
     476       16672 :         L_xy = L_deposit_l( 0 );
     477       16672 :         test();
     478       16672 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     479             :         {
     480      102626 :             FOR( i = 0; i < dim; i++ ) /* max 64 */
     481             :             {
     482       91688 :                 Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12  +1 */
     483       91688 :                 y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
     484       91688 :                 move16(); /* Q12 *Q12  ->  Q0 */
     485             : 
     486       91688 :                 pulse_tot = add( pulse_tot, y[i] );  /* Q0                                         */
     487       91688 :                 L_yy = L_mac( L_yy, y[i], y[i] );    /* Energy, result will scale up by 2 by L_mac */
     488       91688 :                 L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12  +1 --> Q13                   */
     489             :             }
     490             :         }
     491             : 
     492             : 
     493       16672 :         L_yy = L_shr( L_yy, 1 );
     494       16672 :         IF( LE_16( pulses, 127 ) )
     495             :         {
     496             :             /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5  */
     497             :             /* ( if  high energy precision is inactive,  max_amp_y is not needed , no max_amp_y(k-1) update )           */
     498       92697 :             FOR( k = pulse_tot; k < pulses; k++ )
     499             :             {
     500       76764 :                 L_yy = L_add( L_yy, 1 );
     501       76764 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     502             :             }
     503             :         }
     504             :         ELSE
     505             :         {                                     /* HC or LC+HC inner loops */
     506         739 :             max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension  restricted)     */
     507             :             /* max_amp_y from projected y is needed when pulses_sum  exceeds 127      */
     508             : 
     509             :             /* First section with 32 bit energy inactive,   max_amp_y kept updated though    */
     510         745 :             FOR( k = pulse_tot; k < 128; k++ )
     511             :             {
     512           6 :                 L_yy = L_add( L_yy, 1 );
     513           6 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     514           6 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     515             :             }
     516             : 
     517             :             /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated                */
     518        2945 :             FOR( k = pulse_tot; k < pulses; k++ )
     519             :             {
     520        2206 :                 L_yy = L_add( L_yy, 1 );
     521        2206 :                 en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin,  ~ 2 ops      */
     522        2206 :                 en_dn_shift = sub( 16, en_margin );                /* calc. shift to lower byte for fixed use of extract_l      */
     523             : 
     524        2206 :                 high_prec_active = 1;
     525        2206 :                 move16();
     526        2206 :                 if ( en_dn_shift <= 0 )
     527             :                 {
     528             :                     /* only use 32 bit energy if actually needed */
     529        1247 :                     high_prec_active = 0;
     530        1247 :                     move16();
     531             :                 }
     532             :                 /* 32 bit energy and corr adaptively active,  max_amp_y kept updated */
     533        2206 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
     534        2206 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     535             :             }
     536             :         }
     537       16672 :         L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
     538             :                                    to make energy right for unit/inverse gain calculation */
     539             :     }
     540             : 
     541             :     /* Apply unit energy normalization scaling,  always at least one pulse so no div-by-zero check is needed */
     542       16690 :     L_isqrt = L_deposit_l( 0 );
     543       16690 :     IF( neg_gain != 0 )
     544             :     {
     545       16672 :         L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
     546             :     }
     547             : 
     548       16690 :     shift_num = norm_s( pulse_tot );            /* account for max possible pulse amplitude in y,
     549             :                                                    can be used even when max_amp_y is not avail.  */
     550       16690 :     shift_den = norm_s( neg_gain );             /* account for gain downscaling shift            */
     551       16690 :     neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm            */
     552       16690 :     shift_tot = sub( add( shift_num, shift_den ), 15 );
     553             : 
     554       16690 :     L_isqrt = L_negate( L_isqrt );
     555      214066 :     FOR( i = 0; i < dim; i++ )
     556             :     {
     557      197376 :         tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used  in scaling */
     558      197376 :         if ( x[i] < 0 )
     559             :         {
     560       98495 :             tmp = negate( tmp ); /* apply sign */
     561             :         }
     562             : 
     563      197376 :         if ( y[i] != 0 )
     564             :         {
     565       90759 :             y[i] = shr( tmp, shift_num );
     566       90759 :             move16(); /* updates sign of y[i} , ~range -512 + 512),  array move */
     567             :         }
     568      197376 :         Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp );         /* Q31*Q(0+x)  +1         */
     569      197376 :         Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1     */
     570      197376 :         L_tmp = L_shr_o( L_tmp, shift_tot, &Overflow );         /* Q31+x                  */
     571      197376 :         xq[i] = round_fx_o( L_tmp, &Overflow );                 /* Q15, array move        */
     572      197376 :         move16();
     573      197376 :         L_xq[i] = L_tmp; /* Q31 currently  unused  */
     574      197376 :         move32();
     575             :     }
     576             : 
     577             :     /* index the found PVQ vector into short codewords */
     578       16690 :     entry = mpvq_encode_vec_fx( y, dim, pulses );
     579             : 
     580             :     /* send the short codeword(s) to the range encoder */
     581       16690 :     rc_enc_bits_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
     582       16690 :     IF( NE_16( dim, 1 ) )
     583             :     {
     584       16690 :         rc_enc_uniform_fx( hBstr, hPVQ, entry.index, entry.size );
     585             :     }
     586             : 
     587       16690 :     return;
     588             : }

Generated by: LCOV version 1.14