LCOV - code coverage report
Current view: top level - lib_enc - pvq_encode_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main @ 43b7b28dcb1471ff5d355252c4b8f37ee7ecc268 Lines: 259 259 100.0 %
Date: 2025-11-02 02:02:47 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*====================================================================================
       2             :     EVS Codec 3GPP TS26.452 Aug 12, 2021. Version 16.3.0
       3             :   ====================================================================================*/
       4             : #include <stdint.h>
       5             : #include "options.h" /* Compilation switches                   */
       6             : #include "cnst.h"
       7             : #include "prot_fx.h"     /* Function prototypes                    */
       8             : #include "prot_fx_enc.h" /* Function prototypes                    */
       9             : #include "rom_com.h"
      10             : 
      11             : 
      12             : /*   PVQ MIXED_SEARCH_LOOP:
      13             :                            low precision 16/32 +  energy selective high precision 32/64,
      14             :                            mixed perf , 10 dB SEGSNR better than the low precision loop only,
      15             :                            active  if k>=128 and accumulated energy is high enough,
      16             :                            comes at a controlled complexity cost, as dimensions decrease for high k's*/
      17             : 
      18             : /* o  : maximum value in the input vector              */
      19        1662 : static Word16 max_val_fx(
      20             :     const Word16 *vec, /* i  : input vector                                   */
      21             :     const Word16 lvec  /* i  : length of input vector                         */
      22             : )
      23             : {
      24             :     Word16 j, tmp;
      25             : 
      26        1662 :     tmp = vec[0];
      27        1662 :     move16();
      28        6616 :     FOR( j = 1; j < lvec; j++ )
      29             :     {
      30        4954 :         tmp = s_max( vec[j], tmp );
      31             :     }
      32        1662 :     return tmp;
      33             : }
      34             : 
      35      292905 : static Word16 pyramidSearchProjInit_fx(
      36             :     const Word16 L,
      37             :     const Word16 Ptot )
      38             : {
      39      292905 :     return ( sub( Ptot, extract_l( L_shr( L_mult0( 8223, (Word32) L ), 14 ) ) ) > 0 );
      40             : }
      41             : 
      42             : 
      43             : /* The inner search loop for one single additional unit pulse, starting from  pulse_tot  ,
      44             :     with information about required energy precision/down scaling for the dim loop in  en_dn_shift,
      45             :     and the current max_xabs absolute value to be used for an near optimal  correlation upscaling.
      46             :     returns the index of the best positioned unit pulse in imax
      47             : */
      48     1178512 : static Word16 one_pulse_search(
      49             :     const Word16 dim,    /* vector dimension       */
      50             :     const Word16 *x_abs, /* absolute vector values */
      51             :     Word16 *y,           /* output vector          */
      52             :     Word16 *pulse_tot_ptr,
      53             :     Word32 *L_xy_ptr, /* accumulated correlation */
      54             :     Word32 *L_yy_ptr, /* accumulated energy      */
      55             :     Word16 high_prec_active,
      56             :     Word16 en_dn_shift,
      57             :     Word16 max_xabs ) /* current accumulated max amplitude for pulses */
      58             : {
      59             :     Word16 i, corr_up_shift, corr_tmp, imax, corr_sq_tmp, en_max_den, cmax_num, en_tmp;
      60             :     Word32 L_tmp_en_lc, L_tmp_corr;
      61             :     Word32 L_tmp_en, L_en_max_den, L_corr_sq_max, L_tmp_corr_sq;
      62             :     Word32 L_left_h, L_right_h;
      63             :     UWord32 UL_left_l, UL_right_l, UL_dummy;
      64             :     Word32 L_tmp;
      65             :     UWord16 u_sgn;
      66             : 
      67     1178512 :     en_tmp = en_dn_shift; /* dummy assignment to avoid compiler warning for unused parameter  */
      68             : 
      69             :     /* maximize correlation precision, prior to every unit pulse addition in the vector */
      70     1178512 :     corr_up_shift = norm_l( L_mac( *L_xy_ptr, 1, max_xabs ) ); /* pre analyze worst case L_xy update in the dim  loop, 2 ops */
      71     1178512 :     imax = -1;                                                 /* not needed for search, only added to avoid compiler warning     */
      72             : 
      73             :     /* clean BE code, with split out low/high precision loops                                                      */
      74             :     /* activate low complexity en/corr search section conditionally if resulting vector energy is within limits    */
      75             :     /* typical case for higher dimensions                                                                          */
      76             : 
      77     1178512 :     IF( high_prec_active == 0 )
      78             :     {
      79     1176724 :         en_max_den = 0; /*move16()*/
      80     1176724 :         move16();
      81             :         ; /* OPT: move saved by  using high_prec_active as en_max_den */ /*      1 op   */
      82     1176724 :         cmax_num = -1;
      83     1176724 :         move16(); /* req. to force a 1st update for n==0   */ /*      1 op   */
      84             : 
      85    16865317 :         FOR( i = 0; i < dim; i++ ) /* FOR 3 ops  */
      86             :         {
      87    15688593 :             L_tmp_corr = L_shl_sat( L_mac_sat( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /*  actual in-loop target    value, 2 ops  */
      88    15688593 :             corr_tmp = round_fx_sat( L_tmp_corr );                                        /*     1 op   */
      89    15688593 :             corr_sq_tmp = mult( corr_tmp, corr_tmp );                                     /* CorrSq, is a 16bit for low compelxity cross multiplication    1 op   */
      90             : 
      91    15688593 :             L_tmp_en_lc = L_mac( *L_yy_ptr, 1, y[i] ); /*Q1 result ,  energy may span up to ~14+1(Q1)+1(sign)=16 bits,  1 op */
      92             :             /* extract_l without shift can always be used for this section as energy is guaranteed to stay in the lower word, 1 op */
      93    15688593 :             en_tmp = extract_l( L_tmp_en_lc ); /* L_shl + round_fx could also be used also but then adds an uphift cost (2-3 ops)*/
      94             : 
      95             :             /* 16/32 bit comparison    WC (4 +1+1 + (1+1+1) = 9                                                                   */
      96    15688593 :             IF( L_msu( L_mult( corr_sq_tmp, en_max_den ), cmax_num, en_tmp ) > 0 ) /* use L_mult and then a L_msu,      2 ops  */
      97             :             {
      98     3816073 :                 cmax_num = corr_sq_tmp;
      99     3816073 :                 move16(); /* 1 op */
     100     3816073 :                 en_max_den = en_tmp;
     101     3816073 :                 move16(); /* 1 op */
     102     3816073 :                 imax = i;
     103     3816073 :                 move16(); /* 1 op */
     104             :             }
     105             :         } /* dim  */
     106             :     }
     107             :     ELSE
     108             :     {
     109             :         /* High resolution section activated when vector energy is becoming high  (peaky or many pulses)                    */
     110             :         /* BASOP operator Mpy32_32_ss used to allow higher resolution for both the CorrSq term and the Energy term          */
     111             : 
     112        1788 :         L_en_max_den = L_deposit_l( 0 );                                      /* 1 op  */
     113        1788 :         L_corr_sq_max = L_deposit_l( -1 ); /* req. to force a 1st update   */ /* 1 op  */
     114             : 
     115        8784 :         FOR( i = 0; i < dim; i++ ) /* FOR 3 ops */
     116             :         {
     117        6996 :             L_tmp_corr = L_shl( L_mac( *L_xy_ptr, 1, x_abs[i] ), corr_up_shift ); /* actual in  loop WC value 2 ops   */
     118        6996 :             Mpy_32_32_ss( L_tmp_corr, L_tmp_corr, &L_tmp_corr_sq, &UL_dummy );    /* CorrSq 32 bits,          4 ops   */
     119             : 
     120        6996 :             L_tmp_en = L_mac( *L_yy_ptr, 1, y[i] ); /* Q1,energy may span up to sign+19 bits , 1 op    */
     121             :             /* For highest accuracy use pairs of maximum upshifted 32x32 bit signed values              */
     122             :             /*  (L_tmp_corr_sq / L_tmp_en)     >  (L_corr_sq_max/L_en_max_den)                          */
     123             :             /*  (L_tmp_corr_sq * L_en_max_den) >  (L_corr_sq_max * L_tmp_en)                            */
     124        6996 :             Mpy_32_32_ss( L_en_max_den, L_tmp_corr_sq, &L_left_h, &UL_left_l ); /* 4 ops */
     125        6996 :             Mpy_32_32_ss( L_tmp_en, L_corr_sq_max, &L_right_h, &UL_right_l );   /* 4 ops */
     126             : 
     127             :             /* STL optimized "Lazy evaluation"  of:
     128             :                IF( (L_left_h > L_right_h)  ||  ( (L_left_h == L_right_h) &&  (UL_left_l > UL_right_l) )
     129             :              */
     130             :             /* 32/64 bit Lazy eval comparison WC cost is    (1+  1+1+1 + 4 +(2+2+1) = 13 ,  and average  is  ~12 */
     131             :             /* Unoptimized 32/64 bit comparison  WC cost is (1+1+ 2x2  + 4 +(2+2+1) = 15 */
     132        6996 :             L_tmp = L_sub( L_left_h, L_right_h ); /* high  signed  word check            1 op  */
     133        6996 :             u_sgn = 0;
     134        6996 :             move16();         /* 1 op  */
     135        6996 :             if ( L_tmp == 0 ) /* L_tmp high Word testing is always needed */
     136             :             {
     137             :                 /* The returned UL value from UL_subNs is not needed,  only u_sgn is needed  */
     138        3249 :                 UL_subNs( UL_right_l, UL_left_l, &u_sgn ); /* low unsigned word check, note left/right order switch of ">"  due to ">=" inside UL_subNs, 1 op */
     139             :             }
     140        6996 :             if ( u_sgn != 0 )
     141             :             {
     142        1318 :                 L_tmp = L_add( L_tmp, 1 ); /* 0+1  --> 1 use wrap/sign result of low Word u_sgn check */ /* 1 op  */
     143             :             }
     144        6996 :             IF( L_tmp > 0 ) /* IF  4 ops */
     145             :             {
     146        3722 :                 L_corr_sq_max = L_add( L_tmp_corr_sq, 0 ); /* 1-2 ops */
     147        3722 :                 L_en_max_den = L_add( L_tmp_en, 0 );       /* 1-2 ops */
     148        3722 :                 imax = i;
     149        3722 :                 move16(); /* 1 op  */
     150             :             }
     151             :         } /* dim loop */
     152             :     }
     153             :     /* Complexity comparison per coeff for low precision vs. high precision
     154             :         low  precision: pulse_tot <= 127, 16 bit:  WC  2+3 +(15)*dim    ops,            dim=5  --> 5+15*5 = 90  ops, 18 ops/coeff
     155             :         high precision: pulse_tot  > 127, 32 bit:  WC  1+3+3 +(26-28)*dim  ops, WC-band dim=5  --> 7+28*5 = 147 ops, 29 ops/coeff  ~61% increase
     156             :     */
     157             : 
     158             :     /*  finally add found unit pulse contribution to past L_xy, Lyy,  for next pulse loop    */
     159     1178512 :     *L_xy_ptr = L_mac( *L_xy_ptr, x_abs[imax], 1 ); /*      Q12+1 */
     160     1178512 :     *L_yy_ptr = L_mac( *L_yy_ptr, 1, y[imax] );
     161             : 
     162     1178512 :     y[imax] = add( y[imax], 1 );
     163     1178512 :     move16();                                          /* Q0 added pulse              */
     164     1178512 :     ( *pulse_tot_ptr ) = add( ( *pulse_tot_ptr ), 1 ); /* increment total pulse sum   */
     165     1178512 :     move16();
     166             : 
     167     1178512 :     return imax;
     168             : }
     169             : /*-----------------------------------------------------------------------*
     170             :  * Function pvq_encode_fx()                                              *
     171             :  *                                                                       *
     172             :  *-----------------------------------------------------------------------*/
     173      276508 : void pvq_encode_ivas_fx(
     174             :     BSTR_ENC_HANDLE hBstr,
     175             :     PVQ_ENC_HANDLE hPVQ,  /* i/o: PVQ encoder handle                 */
     176             :     const Word16 *x,      /* i:   vector to quantize             Q15-3=>Q12       */
     177             :     Word16 *y,            /* o:   raw pulses  (non-scaled short) Q0               */
     178             :     Word16 *xq,           /* o:   quantized vector               Q15              */
     179             :     Word32 *L_xq,         /* o:   quantized vector               Q31 fot eval     */
     180             :     const Word16 pulses,  /* i:   number of allocated pulses                      */
     181             :     const Word16 dim,     /* i:   Length of vector                                */
     182             :     const Word16 neg_gain /* i:  - Gain       use - negative gain in  Q15  0..1   */
     183             : )
     184             : {
     185             :     Word16 i;
     186             :     Word16 pulse_tot;
     187             :     Word16 xabs[PVQ_MAX_BAND_SIZE];
     188             :     Word16 max_xabs;
     189             :     Word32 L_xsum;
     190             :     Word32 L_proj_fac;
     191             :     Word32 L_yy, L_xy;
     192             :     Word16 max_amp_y, imax;
     193             :     Word16 k, en_margin, en_dn_shift, high_prec_active;
     194             : 
     195             :     Word32 L_num, L_tmp;
     196             :     Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
     197             : 
     198             :     UWord16 u16_tmp;
     199             :     Word16 dim_m1;
     200             :     Word32 L_isqrt;
     201             :     Word16 neg_gain_norm, shift_tot;
     202             :     Word16 high_pulse_density_flag;
     203             :     PvqEntry entry;
     204             : 
     205      276508 :     L_proj_fac = 4096;
     206      276508 :     move32();
     207      276508 :     L_xsum = L_deposit_h( 0 );
     208      276508 :     max_xabs = -1;
     209      276508 :     move16();
     210             : 
     211     3844364 :     FOR( i = 0; i < dim; i++ )
     212             :     {
     213     3567856 :         xabs[i] = abs_s( x[i] );
     214     3567856 :         move16();                              /* Q12 */
     215     3567856 :         max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient  search correlation scaling */
     216     3567856 :         L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
     217     3567856 :         y[i] = 0;
     218     3567856 :         move16(); /* init, later only non-zero values need to be normalized */
     219             :     }
     220             : 
     221      276508 :     test();
     222      276508 :     IF( L_xsum == 0 || neg_gain == 0 )
     223             :     {
     224         429 :         pulse_tot = pulses;
     225         429 :         move16();
     226         429 :         dim_m1 = sub( dim, 1 );
     227         429 :         y[dim_m1] = 0;
     228         429 :         move16();
     229         429 :         y[0] = shr( pulses, 1 );
     230         429 :         move16();
     231         429 :         y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
     232         429 :         move16();
     233         429 :         L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
     234         429 :         IF( dim_m1 != 0 )
     235             :         {
     236         429 :             L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
     237             :         }
     238             :     }
     239             :     ELSE
     240             :     {
     241             : 
     242      276079 :         num = sub( pulses, PYR_OFFSET );
     243      276079 :         high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
     244             : 
     245      276079 :         test();
     246      276079 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     247             :         {
     248      118786 :             shift_den = norm_l( L_xsum );                  /* x_sum input  Q12                         */
     249      118786 :             den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den                     */
     250             : 
     251      118786 :             L_num = L_deposit_l( num );
     252      118786 :             shift_num = sub( norm_l( L_num ), 1 );
     253      118786 :             L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1                  */
     254      118786 :             proj_fac = div_l( L_num, den );    /* L_num always has to be less than den<<16 */
     255             : 
     256      118786 :             shift_delta = sub( shift_num, shift_den );
     257      118786 :             L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring  to a fixed  Q12     */
     258             :         }
     259             : 
     260      276079 :         pulse_tot = 0;
     261      276079 :         move16();
     262      276079 :         L_yy = L_deposit_l( 0 );
     263      276079 :         L_xy = L_deposit_l( 0 );
     264      276079 :         test();
     265      276079 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     266             :         {
     267     1248149 :             FOR( i = 0; i < dim; i++ ) /* max 64 */
     268             :             {
     269     1129363 :                 Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12  +1 */
     270     1129363 :                 y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
     271     1129363 :                 move16(); /* Q12 *Q12  ->  Q0 */
     272             : 
     273     1129363 :                 pulse_tot = add( pulse_tot, y[i] );  /* Q0                                         */
     274     1129363 :                 L_yy = L_mac( L_yy, y[i], y[i] );    /* Energy, result will scale up by 2 by L_mac */
     275     1129363 :                 L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12  +1 --> Q13                   */
     276             :             }
     277             :         }
     278             : 
     279             : 
     280      276079 :         L_yy = L_shr( L_yy, 1 );
     281      276079 :         IF( LE_16( pulses, 127 ) )
     282             :         {
     283             :             /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5  */
     284             :             /* ( if  high energy precision is inactive,  max_amp_y is not needed , no max_amp_y(k-1) update )           */
     285     1371432 :             FOR( k = pulse_tot; k < pulses; k++ )
     286             :             {
     287     1096268 :                 L_yy = L_add( L_yy, 1 );
     288     1096268 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     289             :             }
     290             :         }
     291             :         ELSE
     292             :         {                                     /* HC or LC+HC inner loops */
     293         915 :             max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension  restricted)     */
     294             :             /* max_amp_y from projected y is needed when pulses_sum  exceeds 127      */
     295             : 
     296             :             /* First section with 32 bit energy inactive,   max_amp_y kept updated though    */
     297         945 :             FOR( k = pulse_tot; k < 128; k++ )
     298             :             {
     299          30 :                 L_yy = L_add( L_yy, 1 );
     300          30 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     301          30 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     302             :             }
     303             : 
     304             :             /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated                */
     305        3636 :             FOR( k = pulse_tot; k < pulses; k++ )
     306             :             {
     307        2721 :                 L_yy = L_add( L_yy, 1 );
     308        2721 :                 en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin,  ~ 2 ops      */
     309        2721 :                 en_dn_shift = sub( 16, en_margin );                /* calc. shift to lower byte for fixed use of extract_l      */
     310             : 
     311        2721 :                 high_prec_active = 1;
     312        2721 :                 move16();
     313        2721 :                 if ( en_dn_shift <= 0 )
     314             :                 {
     315             :                     /* only use 32 bit energy if actually needed */
     316        1895 :                     high_prec_active = 0;
     317        1895 :                     move16();
     318             :                 }
     319             :                 /* 32 bit energy and corr adaptively active,  max_amp_y kept updated */
     320        2721 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
     321        2721 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     322             :             }
     323             :         }
     324      276079 :         L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
     325             :                                    to make energy right for unit/inverse gain calculation */
     326             :     }
     327             : 
     328             :     /* Apply unit energy normalization scaling,  always at least one pulse so no div-by-zero check is needed */
     329      276508 :     L_isqrt = L_deposit_l( 0 );
     330      276508 :     IF( neg_gain != 0 )
     331             :     {
     332      276079 :         L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
     333             :     }
     334             : 
     335      276508 :     shift_num = norm_s( pulse_tot );            /* account for max possible pulse amplitude in y,
     336             :                                                    can be used even when max_amp_y is not avail.  */
     337      276508 :     shift_den = norm_s( neg_gain );             /* account for gain downscaling shift            */
     338      276508 :     neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm            */
     339      276508 :     shift_tot = sub( add( shift_num, shift_den ), 15 );
     340             : 
     341      276508 :     L_isqrt = L_negate( L_isqrt );
     342     3844364 :     FOR( i = 0; i < dim; i++ )
     343             :     {
     344     3567856 :         tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used  in scaling */
     345     3567856 :         if ( x[i] < 0 )
     346             :         {
     347     1772521 :             tmp = negate( tmp ); /* apply sign */
     348             :         }
     349             : 
     350     3567856 :         IF( y[i] != 0 )
     351             :         {
     352     1156437 :             y[i] = shr( tmp, shift_num );
     353     1156437 :             move16(); /* updates sign of y[i} , ~range -512 + 512),  array move */
     354             :         }
     355     3567856 :         Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp );         /* Q31*Q(0+x)  +1         */
     356     3567856 :         Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1     */
     357     3567856 :         L_tmp = L_shr_sat( L_tmp, shift_tot );                  /* Q31+x                  */
     358     3567856 :         xq[i] = round_fx_sat( L_tmp );                          /* Q15, array move        */
     359     3567856 :         move16();
     360     3567856 :         L_xq[i] = L_tmp; /* Q31 currently  unused  */
     361     3567856 :         move32();
     362             :     }
     363             : 
     364             :     /* index the found PVQ vector into short codewords */
     365      276508 :     entry = mpvq_encode_vec_fx( y, dim, pulses );
     366             : 
     367             :     /* send the short codeword(s) to the range encoder */
     368      276508 :     rc_enc_bits_ivas_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
     369      276508 :     IF( NE_16( dim, 1 ) )
     370             :     {
     371      276508 :         rc_enc_uniform_ivas_fx( hBstr, hPVQ, entry.index, entry.size );
     372             :     }
     373             : 
     374      276508 :     return;
     375             : }
     376             : 
     377       16843 : void pvq_encode_fx(
     378             :     BSTR_ENC_HANDLE hBstr,
     379             :     PVQ_ENC_HANDLE hPVQ,  /* i/o: PVQ encoder handle                 */
     380             :     const Word16 *x,      /* i:   vector to quantize             Q15-3=>Q12       */
     381             :     Word16 *y,            /* o:   raw pulses  (non-scaled short) Q0               */
     382             :     Word16 *xq,           /* o:   quantized vector               Q15              */
     383             :     Word32 *L_xq,         /* o:   quantized vector               Q31 fot eval     */
     384             :     const Word16 pulses,  /* i:   number of allocated pulses                      */
     385             :     const Word16 dim,     /* i:   Length of vector                                */
     386             :     const Word16 neg_gain /* i:  - Gain       use - negative gain in  Q15  0..1   */
     387             : )
     388             : {
     389             :     Word16 i;
     390             :     Word16 pulse_tot;
     391             :     Word16 xabs[PVQ_MAX_BAND_SIZE];
     392             :     Word16 max_xabs;
     393             :     Word32 L_xsum;
     394             :     Word32 L_proj_fac;
     395             :     Word32 L_yy, L_xy;
     396             :     Word16 max_amp_y, imax;
     397             :     Word16 k, en_margin, en_dn_shift, high_prec_active;
     398             : 
     399             :     Word32 L_num, L_tmp;
     400             :     Word16 proj_fac, tmp, shift_den, shift_num, shift_delta, num, den;
     401             : 
     402             :     UWord16 u16_tmp;
     403             :     Word16 dim_m1;
     404             :     Word32 L_isqrt;
     405             :     Word16 neg_gain_norm, shift_tot;
     406             :     Word16 high_pulse_density_flag;
     407             :     PvqEntry entry;
     408             : 
     409       16843 :     L_proj_fac = 4096;
     410       16843 :     move32();
     411       16843 :     L_xsum = L_deposit_h( 0 );
     412       16843 :     max_xabs = -1;
     413       16843 :     move16();
     414             : 
     415      216203 :     FOR( i = 0; i < dim; i++ )
     416             :     {
     417      199360 :         xabs[i] = abs_s( x[i] );
     418      199360 :         move16();                              /* Q12 */
     419      199360 :         max_xabs = s_max( max_xabs, xabs[i] ); /* for efficient  search correlation scaling */
     420      199360 :         L_xsum = L_mac0( L_xsum, 1, xabs[i] ); /* stay in Q12 */
     421      199360 :         y[i] = 0;
     422      199360 :         move16(); /* init, later only non-zero values need to be normalized */
     423             :     }
     424             : 
     425       16843 :     test();
     426       16843 :     IF( L_xsum == 0 || neg_gain == 0 )
     427             :     {
     428          17 :         pulse_tot = pulses;
     429          17 :         move16();
     430          17 :         dim_m1 = sub( dim, 1 );
     431          17 :         y[dim_m1] = 0;
     432          17 :         move16();
     433          17 :         y[0] = shr( pulses, 1 );
     434          17 :         move16();
     435          17 :         y[dim_m1] = add( y[dim_m1], sub( pulses, y[0] ) );
     436          17 :         move16();
     437          17 :         L_yy = L_mult( y[0], y[0] ); /* L_yy needed for normalization */
     438          17 :         if ( dim_m1 != 0 )
     439             :         {
     440          17 :             L_yy = L_mac( L_yy, y[dim_m1], y[dim_m1] ); /* (single basop) */
     441             :         }
     442             :     }
     443             :     ELSE
     444             :     {
     445             : 
     446       16826 :         num = sub( pulses, PYR_OFFSET );
     447       16826 :         high_pulse_density_flag = pyramidSearchProjInit_fx( dim, pulses );
     448             : 
     449       16826 :         test();
     450       16826 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     451             :         {
     452       10933 :             shift_den = norm_l( L_xsum );                  /* x_sum input  Q12                         */
     453       10933 :             den = extract_h( L_shl( L_xsum, shift_den ) ); /* now in Q12+shift_den                     */
     454             : 
     455       10933 :             L_num = L_deposit_l( num );
     456       10933 :             shift_num = sub( norm_l( L_num ), 1 );
     457       10933 :             L_num = L_shl( L_num, shift_num ); /* now in Q0 +shift_num -1                  */
     458       10933 :             proj_fac = div_l( L_num, den );    /* L_num always has to be less than den<<16 */
     459             : 
     460       10933 :             shift_delta = sub( shift_num, shift_den );
     461       10933 :             L_proj_fac = L_shl_sat( L_deposit_l( proj_fac ), sub( 9, shift_delta ) ); /* bring  to a fixed  Q12     */
     462             :         }
     463             : 
     464       16826 :         pulse_tot = 0;
     465       16826 :         move16();
     466       16826 :         L_yy = L_deposit_l( 0 );
     467       16826 :         L_xy = L_deposit_l( 0 );
     468       16826 :         test();
     469       16826 :         IF( ( num > 0 ) && ( high_pulse_density_flag != 0 ) )
     470             :         {
     471      102537 :             FOR( i = 0; i < dim; i++ ) /* max 64 */
     472             :             {
     473       91604 :                 Mpy_32_16_ss( L_proj_fac, xabs[i], &L_tmp, &u16_tmp ); /*Q12 *Q12  +1 */
     474       91604 :                 y[i] = extract_l( L_shr( L_tmp, 12 + 12 - 16 + 1 ) );
     475       91604 :                 move16(); /* Q12 *Q12  ->  Q0 */
     476             : 
     477       91604 :                 pulse_tot = add( pulse_tot, y[i] );  /* Q0                                         */
     478       91604 :                 L_yy = L_mac( L_yy, y[i], y[i] );    /* Energy, result will scale up by 2 by L_mac */
     479       91604 :                 L_xy = L_mac( L_xy, xabs[i], y[i] ); /* Corr, Q0*Q12  +1 --> Q13                   */
     480             :             }
     481             :         }
     482             : 
     483             : 
     484       16826 :         L_yy = L_shr( L_yy, 1 );
     485       16826 :         IF( LE_16( pulses, 127 ) )
     486             :         {
     487             :             /* LC inner loop, enters here always for dimensions 6 and higher, and also sometimes for dimensions 1 .. 5  */
     488             :             /* ( if  high energy precision is inactive,  max_amp_y is not needed , no max_amp_y(k-1) update )           */
     489       93339 :             FOR( k = pulse_tot; k < pulses; k++ )
     490             :             {
     491       77260 :                 L_yy = L_add( L_yy, 1 );
     492       77260 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     493             :             }
     494             :         }
     495             :         ELSE
     496             :         {                                     /* HC or LC+HC inner loops */
     497         747 :             max_amp_y = max_val_fx( y, dim ); /* this loops over max 5 values (as pulses are dimension  restricted)     */
     498             :             /* max_amp_y from projected y is needed when pulses_sum  exceeds 127      */
     499             : 
     500             :             /* First section with 32 bit energy inactive,   max_amp_y kept updated though    */
     501         753 :             FOR( k = pulse_tot; k < 128; k++ )
     502             :             {
     503           6 :                 L_yy = L_add( L_yy, 1 );
     504           6 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, 0, 0, max_xabs );
     505           6 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     506             :             }
     507             : 
     508             :             /* Second section with higher number of pulses, 32 bit energy precission adaptively selected, max_amp_y kept updated                */
     509        2974 :             FOR( k = pulse_tot; k < pulses; k++ )
     510             :             {
     511        2227 :                 L_yy = L_add( L_yy, 1 );
     512        2227 :                 en_margin = norm_l( L_mac( L_yy, 1, max_amp_y ) ); /* find max current energy "addition", margin,  ~ 2 ops      */
     513        2227 :                 en_dn_shift = sub( 16, en_margin );                /* calc. shift to lower byte for fixed use of extract_l      */
     514             : 
     515        2227 :                 high_prec_active = 1;
     516        2227 :                 move16();
     517        2227 :                 if ( en_dn_shift <= 0 )
     518             :                 {
     519             :                     /* only use 32 bit energy if actually needed */
     520        1265 :                     high_prec_active = 0;
     521        1265 :                     move16();
     522             :                 }
     523             :                 /* 32 bit energy and corr adaptively active,  max_amp_y kept updated */
     524        2227 :                 imax = one_pulse_search( dim, xabs, y, &pulse_tot, &L_xy, &L_yy, high_prec_active, en_dn_shift, max_xabs );
     525        2227 :                 max_amp_y = s_max( max_amp_y, y[imax] );
     526             :             }
     527             :         }
     528       16826 :         L_yy = L_shl( L_yy, 1 ); /* compensate search loop analysis energy downshift by 1,
     529             :                                    to make energy right for unit/inverse gain calculation */
     530             :     }
     531             : 
     532             :     /* Apply unit energy normalization scaling,  always at least one pulse so no div-by-zero check is needed */
     533       16843 :     L_isqrt = L_deposit_l( 0 );
     534       16843 :     IF( neg_gain != 0 )
     535             :     {
     536       16826 :         L_isqrt = Isqrt( L_shr( L_yy, 1 ) ); /* Note: one single gain factor as not computed */
     537             :     }
     538             : 
     539       16843 :     shift_num = norm_s( pulse_tot );            /* account for max possible pulse amplitude in y,
     540             :                                                    can be used even when max_amp_y is not avail.  */
     541       16843 :     shift_den = norm_s( neg_gain );             /* account for gain downscaling shift            */
     542       16843 :     neg_gain_norm = shl( neg_gain, shift_den ); /* up to 10 dB loss without this norm            */
     543       16843 :     shift_tot = sub( add( shift_num, shift_den ), 15 );
     544             : 
     545       16843 :     L_isqrt = L_negate( L_isqrt );
     546      216203 :     FOR( i = 0; i < dim; i++ )
     547             :     {
     548      199360 :         tmp = shl( y[i], shift_num ); /* upshifted abs(y[i]) used  in scaling */
     549      199360 :         if ( x[i] < 0 )
     550             :         {
     551       99448 :             tmp = negate( tmp ); /* apply sign */
     552             :         }
     553             : 
     554      199360 :         if ( y[i] != 0 )
     555             :         {
     556       91218 :             y[i] = shr( tmp, shift_num );
     557       91218 :             move16(); /* updates sign of y[i} , ~range -512 + 512),  array move */
     558             :         }
     559      199360 :         Mpy_32_16_ss( L_isqrt, tmp, &L_tmp, &u16_tmp );         /* Q31*Q(0+x)  +1         */
     560      199360 :         Mpy_32_16_ss( L_tmp, neg_gain_norm, &L_tmp, &u16_tmp ); /* Q31*Q(0+x) *Q15 +1     */
     561      199360 :         L_tmp = L_shr_sat( L_tmp, shift_tot );                  /* Q31+x                  */
     562      199360 :         xq[i] = round_fx_sat( L_tmp );                          /* Q15, array move        */
     563      199360 :         move16();
     564      199360 :         L_xq[i] = L_tmp; /* Q31 currently  unused  */
     565      199360 :         move32();
     566             :     }
     567             : 
     568             :     /* index the found PVQ vector into short codewords */
     569       16843 :     entry = mpvq_encode_vec_fx( y, dim, pulses );
     570             : 
     571             :     /* send the short codeword(s) to the range encoder */
     572       16843 :     rc_enc_bits_fx( hBstr, hPVQ, UL_deposit_l( entry.lead_sign_ind ), 1 ); /* 0 or 1 */
     573       16843 :     IF( NE_16( dim, 1 ) )
     574             :     {
     575       16843 :         rc_enc_uniform_fx( hBstr, hPVQ, entry.index, entry.size );
     576             :     }
     577             : 
     578       16843 :     return;
     579             : }

Generated by: LCOV version 1.14