LCOV - code coverage report
Current view: top level - lib_enc - pvq_encode_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main enc/dec/rend @ 3b2f07138c61dcf997bbf4165d0882f794b2995f Lines: 265 265 100.0 %
Date: 2025-05-03 01:55:50 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*====================================================================================
       2             :     EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
       3             :   ====================================================================================*/
       4             : #include <stdint.h>
       5             : #include "options.h" /* Compilation switches                   */
       6             : #include "cnst.h"
       7             : #include "prot_fx.h"     /* Function prototypes                    */
       8             : #include "prot_fx_enc.h" /* Function prototypes                    */
       9             : #include "rom_com_fx.h"
      10             : #include "rom_com.h"
      11             : 
      12             : /*   PVQ MIXED_SEARCH_LOOP:
      13             :                            low precision 16/32 +  energy selective high precision 32/64,
      14             :                            mixed perf , 10 dB SEGSNR better than the low precision loop only,
      15             :                            active  if k>=128 and accumulated energy is high enough,
      16             :                            comes at a controlled complexity cost, as dimensions decrease for high k's*/
      17             : 
      18        1686 : static Word16 max_val_fx(                    /* o  : maximum value in the input vector              */
      19             :                           const Word16 *vec, /* i  : input vector                                   */
      20             :                           const Word16 lvec  /* i  : length of input vector                         */
      21             : )
      22             : {
      23             :     Word16 j, tmp;
      24             : 
      25        1686 :     tmp = vec[0];
      26        1686 :     move16();
      27        6713 :     FOR( j = 1; j < lvec; j++ )
      28             :     {
      29        5027 :         tmp = s_max( vec[j], tmp );
      30             :     }
      31        1686 :     return tmp;
      32             : }
      33             : 
      34      293228 : static Word16 pyramidSearchProjInit_fx( Word16 L, Word16 Ptot )
      35             : {
      36      293228 :     return ( sub( Ptot, extract_l( L_shr( L_mult0( 8223, (Word32) L ), 14 ) ) ) > 0 );
      37             : }
      38             : 
      39             : 
      40             : /* The inner search loop for one single additional unit pulse, starting from  pulse_tot  ,
      41             :     with information about required energy precision/down scaling for the dim loop in  en_dn_shift,
      42             :     and the current max_xabs absolute value to be used for an near optimal  correlation upscaling.
      43             :     returns the index of the best positioned unit pulse in imax
      44             : */
      45     1178198 : static Word16 one_pulse_search(
      46             :     const Word16 dim,    /* vector dimension       */
      47             :     const Word16 *x_abs, /* absolute vector values */
      48             :     Word16 *y,           /* output vector          */
      49             :     Word16 *pulse_tot_ptr,
      50             :     Word32 *L_xy_ptr, /* accumulated correlation */
      51             :     Word32 *L_yy_ptr, /* accumulated energy      */
      52             :     Word16 high_prec_active,
      53             :     Word16 en_dn_shift,
      54             :     Word16 max_xabs ) /* current accumulated max amplitude for pulses */
      55             : {
      56             :     Word16 i, corr_up_shift, corr_tmp, imax, corr_sq_tmp, en_max_den, cmax_num, en_tmp;
      57             :     Word32 L_tmp_en_lc, L_tmp_corr;
      58             :     Word32 L_tmp_en, L_en_max_den, L_corr_sq_max, L_tmp_corr_sq;
      59             :     Word32 L_left_h, L_right_h;
      60             :     UWord32 UL_left_l, UL_right_l, UL_dummy;
      61             :     Word32 L_tmp;
      62             :     UWord16 u_sgn;
      63             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
      64     1178198 :     Flag Overflow = 0;
      65     1178198 :     move16();
      66             : #endif
      67             : 
      68     1178198 :     en_tmp = en_dn_shift; /* dummy assignment to avoid compiler warning for unused parameter  */
      69             : 
      70             :     /* maximize correlation precision, prior to every unit pulse addition in the vector */
      71     1178198 :     corr_up_shift = norm_l( L_mac( *L_xy_ptr, 1, max_xabs ) ); /* pre analyze worst case L_xy update in the dim  loop        , 2 ops */
      72     1178198 :     imax = -1;                                                 /* not needed for search, only added to avoid compiler warning     */
      73             : 
      74             :     /* clean BE code, with split out low/high precision loops                                                      */
      75             :     /* activate low complexity en/corr search section conditionally if resulting vector energy is within limits    */
      76             :     /* typical case for higher dimensions                                                                          */
      77             : 
      78     1178198 :     IF( high_prec_active == 0 )
      79             :     {
      80     1176364 :         en_max_den = 0; /*move16()*/
      81     1176364 :         move16();
      82             :         ; /* OPT: move saved by  using high_prec_active as en_max_den */ /*      1 op   */
      83     1176364 :         cmax_num = -1;
      84     1176364 :         move16(); /* req. to force a 1st update for n==0   */ /*      1 op   */
      85             : 
      86    16851952 :         FOR( i = 0; i < dim; i++ ) /* FOR 3 ops  */
      87             :         {
      88    15675588 :             L_tmp_corr = L_shl_o( L_mac_o( *L_xy_ptr, 1, x_abs[i], &Overflow ), corr_up_shift, &Overflow ); /*  actual in-loop target    value, 2 ops  */
      89    15675588 :             corr_tmp = round_fx_o( L_tmp_corr, &Overflow );                                                 /*     1 op   */
      90    15675588 :             corr_sq_tmp = mult( corr_tmp, corr_tmp );                                                       /* CorrSq, is a 16bit for low compelxity cross multiplication    1 op   */
      91             : 
      92    15675588 :             L_tmp_en_lc = L_mac( *L_yy_ptr, 1, y[i] ); /*Q1 result ,  energy may span up to ~14+1(Q1)+1(sign)=16 bits,  1 op */
      93             :             /* extract_l without shift can always be used for this section as energy is guaranteed to stay in the lower word, 1 op */
      94    15675588 :             en_tmp = extract_l( L_tmp_en_lc ); /* L_shl + round_fx could also be used also but then adds an uphift cost (2-3 ops)*/
      95             : 
      96             :             /* 16/32 bit comparison    WC (4 +1+1 + (1+1+1) = 9                                                                   */
      97    15675588 :             IF( L_msu( L_mult( corr_sq_tmp, en_max_den ), cmax_num, en_tmp ) > 0 ) /* use L_mult and then a L_msu,      2 ops  */
      98             :             {
      99     3812111 :                 cmax_num = corr_sq_tmp;
     100     3812111 :                 move16(); /* 1 op */
     101     3812111 :                 en_max_den = en_tmp;
     102     3812111 :                 move16(); /* 1 op */
     103     3812111 :                 imax = i;
     104     3812111 :                 move16(); /* 1 op */
     105             :             }
     106             :         } /* dim  */
     107             :     }
     108             :     ELSE
     109             :     {
     110             :         /* High resolution section activated when vector energy is becoming high  (peaky or many pulses)                    */
     111             :         /* BASOP operator Mpy32_32_ss used to allow higher resolution for both the CorrSq term and the Energy term          */
     112             : 
     113        1834 :         L_en_max_den = L_deposit_l( 0 );                                      /* 1 op  */
     114        1834 :         L_corr_sq_max = L_deposit_l( -1 ); /* req. to force a 1st update   */ /* 1 op  */
     115             : 
     116        9014 :         FOR( i = 0; i < dim; i++ ) /* FOR 3 ops */
     117             :         {
     118        7180 :             L_tmp_corr = L_shl( L_mac( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /* actual in  loop WC value 2 ops   */
     119        7180 :             Mpy_32_32_ss( L_tmp_corr, L_tmp_corr, &L_tmp_corr_sq, &UL_dummy );    /* CorrSq 32 bits,          4 ops   */
     120             : 
     121        7180 :             L_tmp_en = L_mac( *L_yy_ptr, 1, y[i] ); /* Q1,energy may span up to sign+19 bits , 1 op    */
     122             :             /* For highest accuracy use pairs of maximum upshifted 32x32 bit signed values              */
     123             :             /*  (L_tmp_corr_sq / L_tmp_en)     >  (L_corr_sq_max/L_en_max_den)                          */
     124             :             /*  (L_tmp_corr_sq * L_en_max_den) >  (L_corr_sq_max * L_tmp_en)                            */
     125        7180 :             Mpy_32_32_ss( L_en_max_den, L_tmp_corr_sq, &L_left_h, &UL_left_l ); /* 4 ops */
     126        7180 :             Mpy_32_32_ss( L_tmp_en, L_corr_sq_max, &L_right_h, &UL_right_l );   /* 4 ops */
     127             : 
     128             :             /* STL optimized "Lazy evaluation"  of:
     129             :                IF( (L_left_h > L_right_h)  ||  ( (L_left_h == L_right_h) &&  (UL_left_l > UL_right_l) )
     130             :              */
     131             :             /* 32/64 bit Lazy eval comparison WC cost is    (1+  1+1+1 + 4 +(2+2+1) = 13 ,  and average  is  ~12 */
     132             :             /* Unoptimized 32/64 bit comparison  WC cost is (1+1+ 2x2  + 4 +(2+2+1) = 15 */
     133        7180 :             L_tmp = L_sub( L_left_h, L_right_h ); /* high  signed  word check            1 op  */
     134        7180 :             u_sgn = 0;
     135        7180 :             move16();         /* 1 op  */
     136        7180 :             if ( L_tmp == 0 ) /* L_tmp high Word testing is always needed */
     137             :             {
     138             :                 /* The returned UL value from UL_subNs is not needed,  only u_sgn is needed  */
     139        3337 :                 UL_subNs( UL_right_l, UL_left_l, &u_sgn ); /* low unsigned word check, note left/right order switch of ">"  due to ">=" inside UL_subNs, 1 op */
     140             :             }
     141        7180 :             if ( u_sgn != 0 )
     142             :             {
     143        1359 :                 L_tmp = L_add( L_tmp, 1 ); /* 0+1  --> 1 use wrap/sign result of low Word u_sgn check */ /* 1 op  */
     144             :             }
     145        7180 :             IF( L_tmp > 0 ) /* IF  4 ops */
     146             :             {
     147        3815 :                 L_corr_sq_max = L_add( L_tmp_corr_sq, 0 ); /* 1-2 ops */
     148        3815 :                 L_en_max_den = L_add( L_tmp_en, 0 );       /* 1-2 ops */
     149        3815 :                 imax = i;
     150        3815 :                 move16(); /* 1 op  */
     151             :             }
     152             :         } /* dim loop */
     153             :     }
     154             :     /* Complexity comparison per coeff for low precision vs. high precision
     155             :         low  precision: pulse_tot <= 127, 16 bit:  WC  2+3 +(15)*dim    ops,            dim=5  --> 5+15*5 = 90  ops, 18 ops/coeff
     156             :         high precision: pulse_tot  > 127, 32 bit:  WC  1+3+3 +(26-28)*dim  ops, WC-band dim=5  --> 7+28*5 = 147 ops, 29 ops/coeff  ~61% increase
     157             :     */
     158             : 
     159             :     /*  finally add found unit pulse contribution to past L_xy, Lyy,  for next pulse loop    */
     160     1178198 :     *L_xy_ptr = L_mac( *L_xy_ptr, x_abs[imax], 1 ); /*      Q12+1 */
     161     1178198 :     *L_yy_ptr = L_mac( *L_yy_ptr, 1, y[imax] );
     162             : 
     163     1178198 :     y[imax] = add( y[imax], 1 );
     164     1178198 :     move16();                                          /* Q0 added pulse              */
     165     1178198 :     ( *pulse_tot_ptr ) = add( ( *pulse_tot_ptr ), 1 ); /* increment total pulse sum   */
     166     1178198 :     move16();
     167             : 
     168     1178198 :     return imax;
     169             : }
     170             : /*-----------------------------------------------------------------------*
     171             :  * Function pvq_encode_fx()                                              *
     172             :  *                                                                       *
     173             :  *-----------------------------------------------------------------------*/
     174      277318 : void pvq_encode_ivas_fx(
     175             :     BSTR_ENC_HANDLE hBstr,
     176             :     PVQ_ENC_HANDLE hPVQ,  /* i/o: PVQ encoder handle                 */
     177             :     const Word16 *x,      /* i:   vector to quantize             Q15-3=>Q12       */
     178             :     Word16 *y,            /* o:   raw pulses  (non-scaled short) Q0               */
     179             :     Word16 *xq,           /* o:   quantized vector               Q15              */
     180             :     Word32 *L_xq,         /* o:   quantized vector               Q31 fot eval     */
     181             :     const Word16 pulses,  /* i:   number of allocated pulses                      */
     182             :     const Word16 dim,     /* i:   Length of vector                                */
     183             :     const Word16 neg_gain /* i:  - Gain       use - negative gain in  Q15  0..1   */
     184             : )
     185             : {
     186             :     Word16 i;
     187             :     Word16 pulse_tot;
     188             :     Word16 xabs[PVQ_MAX_BAND_SIZE];
     189             :     Word16 max_xabs;
     190             :     Word32 L_xsum;
     191             :     Word32 L_proj_fac;
     192             :     Word32 L_yy, L_xy;
     193             :     Word16 max_amp_y, imax;
     194             :     Word16 k, en_margin, en_dn_shift, high_prec_active;
     195             : 
     196             :     Word32 L_num, L_tmp;
     197             :     Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
     198             : 
     199             :     UWord16 u16_tmp;
     200             :     Word16 dim_m1;
     201             :     Word32 L_isqrt;
     202             :     Word16 neg_gain_norm, shift_tot;
     203             :     Word16 high_pulse_density_flag;
     204             :     PvqEntry entry;
     205             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     206      277318 :     Flag Overflow = 0;
     207      277318 :     move16();
     208             : #endif
     209             : 
     210      277318 :     L_proj_fac = 4096;
     211      277318 :     move32();
     212      277318 :     L_xsum = L_deposit_h( 0 );
     213      277318 :     max_xabs = -1;
     214      277318 :     move16();
     215             : 
     216     3854962 :     FOR( i = 0; i < dim; i++ )
     217             :     {
     218     3577644 :         xabs[i] = abs_s( x[i] );
     219     3577644 :         move16();                              /* Q12 */
     220     3577644 :         max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient  search correlation scaling */
     221     3577644 :         L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
     222     3577644 :         y[i] = 0;
     223     3577644 :         move16(); /* init, later only non-zero values need to be normalized */
     224             :     }
     225             : 
     226      277318 :     test();
     227      277318 :     IF( L_xsum == 0 || neg_gain == 0 )
     228             :     {
     229         429 :         pulse_tot = pulses;
     230         429 :         move16();
     231         429 :         dim_m1 = sub( dim, 1 );
     232         429 :         y[dim_m1] = 0;
     233         429 :         move16();
     234         429 :         y[0] = shr( pulses, 1 );
     235         429 :         move16();
     236         429 :         y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
     237         429 :         move16();
     238         429 :         L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
     239         429 :         IF( dim_m1 != 0 )
     240             :         {
     241         429 :             L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
     242             :         }
     243             :     }
     244             :     ELSE
     245             :     {
     246             : 
     247      276889 :         num = sub( pulses, PYR_OFFSET );
     248      276889 :         high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
     249             : 
     250      276889 :         test();
     251      276889 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     252             :         {
     253      119177 :             shift_den = norm_l( L_xsum );                  /* x_sum input  Q12                         */
     254      119177 :             den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den                     */
     255             : 
     256      119177 :             L_num = L_deposit_l( num );
     257      119177 :             shift_num = sub( norm_l( L_num ), 1 );
     258      119177 :             L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1                  */
     259      119177 :             proj_fac = div_l( L_num, den );    /* L_num always has to be less than den<<16 */
     260             : 
     261      119177 :             shift_delta = sub( shift_num, shift_den );
     262      119177 :             L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring  to a fixed  Q12     */
     263             :         }
     264             : 
     265      276889 :         pulse_tot = 0;
     266      276889 :         move16();
     267      276889 :         L_yy = L_deposit_l( 0 );
     268      276889 :         L_xy = L_deposit_l( 0 );
     269      276889 :         test();
     270      276889 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     271             :         {
     272     1251323 :             FOR( i = 0; i < dim; i++ ) /* max 64 */
     273             :             {
     274     1132146 :                 Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12  +1 */
     275     1132146 :                 y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
     276     1132146 :                 move16(); /* Q12 *Q12  ->  Q0 */
     277             : 
     278     1132146 :                 pulse_tot = add( pulse_tot, y[i] );  /* Q0                                         */
     279     1132146 :                 L_yy = L_mac( L_yy, y[i], y[i] );    /* Energy, result will scale up by 2 by L_mac */
     280     1132146 :                 L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12  +1 --> Q13                   */
     281             :             }
     282             :         }
     283             : 
     284             : 
     285      276889 :         L_yy = L_shr( L_yy, 1 );
     286      276889 :         IF( LE_16( pulses, 127 ) )
     287             :         {
     288             :             /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5  */
     289             :             /* ( if  high energy precision is inactive,  max_amp_y is not needed , no max_amp_y(k-1) update )           */
     290     1373786 :             FOR( k = pulse_tot; k < pulses; k++ )
     291             :             {
     292     1097844 :                 L_yy = L_add( L_yy, 1 );
     293     1097844 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     294             :             }
     295             :         }
     296             :         ELSE
     297             :         {                                     /* HC or LC+HC inner loops */
     298         947 :             max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension  restricted)     */
     299             :             /* max_amp_y from projected y is needed when pulses_sum  exceeds 127      */
     300             : 
     301             :             /* First section with 32 bit energy inactive,   max_amp_y kept updated though    */
     302         979 :             FOR( k = pulse_tot; k < 128; k++ )
     303             :             {
     304          32 :                 L_yy = L_add( L_yy, 1 );
     305          32 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     306          32 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     307             :             }
     308             : 
     309             :             /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated                */
     310        3766 :             FOR( k = pulse_tot; k < pulses; k++ )
     311             :             {
     312        2819 :                 L_yy = L_add( L_yy, 1 );
     313        2819 :                 en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin,  ~ 2 ops      */
     314        2819 :                 en_dn_shift = sub( 16, en_margin );                /* calc. shift to lower byte for fixed use of extract_l      */
     315             : 
     316        2819 :                 high_prec_active = 1;
     317        2819 :                 move16();
     318        2819 :                 if ( en_dn_shift <= 0 )
     319             :                 {
     320             :                     /* only use 32 bit energy if actually needed */
     321        1944 :                     high_prec_active = 0;
     322        1944 :                     move16();
     323             :                 }
     324             :                 /* 32 bit energy and corr adaptively active,  max_amp_y kept updated */
     325        2819 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
     326        2819 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     327             :             }
     328             :         }
     329      276889 :         L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
     330             :                                    to make energy right for unit/inverse gain calculation */
     331             :     }
     332             : 
     333             :     /* Apply unit energy normalization scaling,  always at least one pulse so no div-by-zero check is needed */
     334      277318 :     L_isqrt = L_deposit_l( 0 );
     335      277318 :     IF( neg_gain != 0 )
     336             :     {
     337      276889 :         L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
     338             :     }
     339             : 
     340      277318 :     shift_num = norm_s( pulse_tot );            /* account for max possible pulse amplitude in y,
     341             :                                                    can be used even when max_amp_y is not avail.  */
     342      277318 :     shift_den = norm_s( neg_gain );             /* account for gain downscaling shift            */
     343      277318 :     neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm            */
     344      277318 :     shift_tot = sub( add( shift_num, shift_den ), 15 );
     345             : 
     346      277318 :     L_isqrt = L_negate( L_isqrt );
     347     3854962 :     FOR( i = 0; i < dim; i++ )
     348             :     {
     349     3577644 :         tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used  in scaling */
     350     3577644 :         if ( x[i] < 0 )
     351             :         {
     352     1778723 :             tmp = negate( tmp ); /* apply sign */
     353             :         }
     354             : 
     355     3577644 :         IF( y[i] != 0 )
     356             :         {
     357     1159303 :             y[i] = shr( tmp, shift_num );
     358     1159303 :             move16(); /* updates sign of y[i} , ~range -512 + 512),  array move */
     359             :         }
     360     3577644 :         Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp );         /* Q31*Q(0+x)  +1         */
     361     3577644 :         Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1     */
     362     3577644 :         L_tmp = L_shr_o( L_tmp, shift_tot, &Overflow );         /* Q31+x                  */
     363     3577644 :         xq[i] = round_fx_o( L_tmp, &Overflow );                 /* Q15, array move        */
     364     3577644 :         move16();
     365     3577644 :         L_xq[i] = L_tmp; /* Q31 currently  unused  */
     366     3577644 :         move32();
     367             :     }
     368             : 
     369             :     /* index the found PVQ vector into short codewords */
     370      277318 :     entry = mpvq_encode_vec_fx( y, dim, pulses );
     371             : 
     372             :     /* send the short codeword(s) to the range encoder */
     373      277318 :     rc_enc_bits_ivas_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
     374      277318 :     IF( NE_16( dim, 1 ) )
     375             :     {
     376      277318 :         rc_enc_uniform_ivas_fx( hBstr, hPVQ, entry.index, entry.size );
     377             :     }
     378             : 
     379      277318 :     return;
     380             : }
     381             : 
     382       16357 : void pvq_encode_fx(
     383             :     BSTR_ENC_HANDLE hBstr,
     384             :     PVQ_ENC_HANDLE hPVQ,  /* i/o: PVQ encoder handle                 */
     385             :     const Word16 *x,      /* i:   vector to quantize             Q15-3=>Q12       */
     386             :     Word16 *y,            /* o:   raw pulses  (non-scaled short) Q0               */
     387             :     Word16 *xq,           /* o:   quantized vector               Q15              */
     388             :     Word32 *L_xq,         /* o:   quantized vector               Q31 fot eval     */
     389             :     const Word16 pulses,  /* i:   number of allocated pulses                      */
     390             :     const Word16 dim,     /* i:   Length of vector                                */
     391             :     const Word16 neg_gain /* i:  - Gain       use - negative gain in  Q15  0..1   */
     392             : )
     393             : {
     394             :     Word16 i;
     395             :     Word16 pulse_tot;
     396             :     Word16 xabs[PVQ_MAX_BAND_SIZE];
     397             :     Word16 max_xabs;
     398             :     Word32 L_xsum;
     399             :     Word32 L_proj_fac;
     400             :     Word32 L_yy, L_xy;
     401             :     Word16 max_amp_y, imax;
     402             :     Word16 k, en_margin, en_dn_shift, high_prec_active;
     403             : 
     404             :     Word32 L_num, L_tmp;
     405             :     Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
     406             : 
     407             :     UWord16 u16_tmp;
     408             :     Word16 dim_m1;
     409             :     Word32 L_isqrt;
     410             :     Word16 neg_gain_norm, shift_tot;
     411             :     Word16 high_pulse_density_flag;
     412             :     PvqEntry entry;
     413             : #ifdef BASOP_NOGLOB_DECLARE_LOCAL
     414       16357 :     Flag Overflow = 0;
     415       16357 :     move16();
     416             : #endif
     417             : 
     418       16357 :     L_proj_fac = 4096;
     419       16357 :     move32();
     420       16357 :     L_xsum = L_deposit_h( 0 );
     421       16357 :     max_xabs = -1;
     422       16357 :     move16();
     423             : 
     424      209845 :     FOR( i = 0; i < dim; i++ )
     425             :     {
     426      193488 :         xabs[i] = abs_s( x[i] );
     427      193488 :         move16();                              /* Q12 */
     428      193488 :         max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient  search correlation scaling */
     429      193488 :         L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
     430      193488 :         y[i] = 0;
     431      193488 :         move16(); /* init, later only non-zero values need to be normalized */
     432             :     }
     433             : 
     434       16357 :     test();
     435       16357 :     IF( L_xsum == 0 || neg_gain == 0 )
     436             :     {
     437          18 :         pulse_tot = pulses;
     438          18 :         move16();
     439          18 :         dim_m1 = sub( dim, 1 );
     440          18 :         y[dim_m1] = 0;
     441          18 :         move16();
     442          18 :         y[0] = shr( pulses, 1 );
     443          18 :         move16();
     444          18 :         y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
     445          18 :         move16();
     446          18 :         L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
     447          18 :         if ( dim_m1 != 0 )
     448             :         {
     449          18 :             L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
     450             :         }
     451             :     }
     452             :     ELSE
     453             :     {
     454             : 
     455       16339 :         num = sub( pulses, PYR_OFFSET );
     456       16339 :         high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
     457             : 
     458       16339 :         test();
     459       16339 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     460             :         {
     461       10806 :             shift_den = norm_l( L_xsum );                  /* x_sum input  Q12                         */
     462       10806 :             den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den                     */
     463             : 
     464       10806 :             L_num = L_deposit_l( num );
     465       10806 :             shift_num = sub( norm_l( L_num ), 1 );
     466       10806 :             L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1                  */
     467       10806 :             proj_fac = div_l( L_num, den );    /* L_num always has to be less than den<<16 */
     468             : 
     469       10806 :             shift_delta = sub( shift_num, shift_den );
     470       10806 :             L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring  to a fixed  Q12     */
     471             :         }
     472             : 
     473       16339 :         pulse_tot = 0;
     474       16339 :         move16();
     475       16339 :         L_yy = L_deposit_l( 0 );
     476       16339 :         L_xy = L_deposit_l( 0 );
     477       16339 :         test();
     478       16339 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     479             :         {
     480      101212 :             FOR( i = 0; i < dim; i++ ) /* max 64 */
     481             :             {
     482       90406 :                 Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12  +1 */
     483       90406 :                 y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
     484       90406 :                 move16(); /* Q12 *Q12  ->  Q0 */
     485             : 
     486       90406 :                 pulse_tot = add( pulse_tot, y[i] );  /* Q0                                         */
     487       90406 :                 L_yy = L_mac( L_yy, y[i], y[i] );    /* Energy, result will scale up by 2 by L_mac */
     488       90406 :                 L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12  +1 --> Q13                   */
     489             :             }
     490             :         }
     491             : 
     492             : 
     493       16339 :         L_yy = L_shr( L_yy, 1 );
     494       16339 :         IF( LE_16( pulses, 127 ) )
     495             :         {
     496             :             /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5  */
     497             :             /* ( if  high energy precision is inactive,  max_amp_y is not needed , no max_amp_y(k-1) update )           */
     498       90891 :             FOR( k = pulse_tot; k < pulses; k++ )
     499             :             {
     500       75291 :                 L_yy = L_add( L_yy, 1 );
     501       75291 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     502             :             }
     503             :         }
     504             :         ELSE
     505             :         {                                     /* HC or LC+HC inner loops */
     506         739 :             max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension  restricted)     */
     507             :             /* max_amp_y from projected y is needed when pulses_sum  exceeds 127      */
     508             : 
     509             :             /* First section with 32 bit energy inactive,   max_amp_y kept updated though    */
     510         745 :             FOR( k = pulse_tot; k < 128; k++ )
     511             :             {
     512           6 :                 L_yy = L_add( L_yy, 1 );
     513           6 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     514           6 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     515             :             }
     516             : 
     517             :             /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated                */
     518        2945 :             FOR( k = pulse_tot; k < pulses; k++ )
     519             :             {
     520        2206 :                 L_yy = L_add( L_yy, 1 );
     521        2206 :                 en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin,  ~ 2 ops      */
     522        2206 :                 en_dn_shift = sub( 16, en_margin );                /* calc. shift to lower byte for fixed use of extract_l      */
     523             : 
     524        2206 :                 high_prec_active = 1;
     525        2206 :                 move16();
     526        2206 :                 if ( en_dn_shift <= 0 )
     527             :                 {
     528             :                     /* only use 32 bit energy if actually needed */
     529        1247 :                     high_prec_active = 0;
     530        1247 :                     move16();
     531             :                 }
     532             :                 /* 32 bit energy and corr adaptively active,  max_amp_y kept updated */
     533        2206 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
     534        2206 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     535             :             }
     536             :         }
     537       16339 :         L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
     538             :                                    to make energy right for unit/inverse gain calculation */
     539             :     }
     540             : 
     541             :     /* Apply unit energy normalization scaling,  always at least one pulse so no div-by-zero check is needed */
     542       16357 :     L_isqrt = L_deposit_l( 0 );
     543       16357 :     IF( neg_gain != 0 )
     544             :     {
     545       16339 :         L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
     546             :     }
     547             : 
     548       16357 :     shift_num = norm_s( pulse_tot );            /* account for max possible pulse amplitude in y,
     549             :                                                    can be used even when max_amp_y is not avail.  */
     550       16357 :     shift_den = norm_s( neg_gain );             /* account for gain downscaling shift            */
     551       16357 :     neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm            */
     552       16357 :     shift_tot = sub( add( shift_num, shift_den ), 15 );
     553             : 
     554       16357 :     L_isqrt = L_negate( L_isqrt );
     555      209845 :     FOR( i = 0; i < dim; i++ )
     556             :     {
     557      193488 :         tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used  in scaling */
     558      193488 :         if ( x[i] < 0 )
     559             :         {
     560       96576 :             tmp = negate( tmp ); /* apply sign */
     561             :         }
     562             : 
     563      193488 :         if ( y[i] != 0 )
     564             :         {
     565       89220 :             y[i] = shr( tmp, shift_num );
     566       89220 :             move16(); /* updates sign of y[i} , ~range -512 + 512),  array move */
     567             :         }
     568      193488 :         Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp );         /* Q31*Q(0+x)  +1         */
     569      193488 :         Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1     */
     570      193488 :         L_tmp = L_shr_o( L_tmp, shift_tot, &Overflow );         /* Q31+x                  */
     571      193488 :         xq[i] = round_fx_o( L_tmp, &Overflow );                 /* Q15, array move        */
     572      193488 :         move16();
     573      193488 :         L_xq[i] = L_tmp; /* Q31 currently  unused  */
     574      193488 :         move32();
     575             :     }
     576             : 
     577             :     /* index the found PVQ vector into short codewords */
     578       16357 :     entry = mpvq_encode_vec_fx( y, dim, pulses );
     579             : 
     580             :     /* send the short codeword(s) to the range encoder */
     581       16357 :     rc_enc_bits_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
     582       16357 :     IF( NE_16( dim, 1 ) )
     583             :     {
     584       16357 :         rc_enc_uniform_fx( hBstr, hPVQ, entry.index, entry.size );
     585             :     }
     586             : 
     587       16357 :     return;
     588             : }

Generated by: LCOV version 1.14