Line data Source code
1 : /******************************************************************************
2 : * ETSI TS 103 634 V1.5.1 *
3 : * Low Complexity Communication Codec Plus (LC3plus) *
4 : * *
5 : * Copyright licence is solely granted through ETSI Intellectual Property *
6 : * Rights Policy, 3rd April 2019. No patent licence is granted by implication, *
7 : * estoppel or otherwise. *
8 : ******************************************************************************/
9 :
10 : #include "functions.h"
11 :
12 0 : static void pvq_pyr_project(const Word16 dim_proj, /* end vector dimension+1 */
13 : const Word16 *xabs, /* absolute vector values */
14 : Word32 L_xsum, /* absolute vector sum over dim */
15 : Word16 num, /* target number of pulses */
16 : Word16 * y, /* projected output vector */
17 : Word16 *pulse_tot_ptr, Word32 *L_xy_ptr, /* accumulated correlation Q(in+0+1) = Qin+1 */
18 : Word32 *L_yy_ptr /* accumulated energy Q0 */
19 : )
20 : {
21 :
22 : Dyn_Mem_Deluxe_In(
23 : Counter i;
24 : Word32 L_tmp, L_num;
25 : Word16 den, shift_num, shift_den, shift_delta, proj_fac;
26 : );
27 :
28 0 : *pulse_tot_ptr = 0; move16();
29 0 : *L_xy_ptr = L_deposit_l(0);
30 0 : *L_yy_ptr = L_deposit_l(0);
31 :
32 0 : shift_den = norm_l(L_xsum); /* x_sum input Qin */
33 0 : den = extract_h(L_shl_pos(L_xsum, shift_den)); /* now in Qin+shift_den */
34 :
35 0 : L_num = L_deposit_l(num);
36 0 : shift_num = sub(norm_l(L_num), 1);
37 0 : L_num = L_shl_pos(L_num, shift_num); /* now in Q0 +shift_num -1 */
38 0 : proj_fac = div_l(L_num, den); /* L_num always has to be less than den<<16 , norm_l-1 makes that happen */
39 :
40 0 : shift_delta = sub(shift_num, shift_den);
41 0 : FOR (i = 0; i < dim_proj; i++)
42 : {
43 0 : L_tmp = L_mult(proj_fac, xabs[i]); /* Q shift_delta + PVQ_SEARCH_QIN */
44 0 : y[i] = extract_h(L_shr(L_tmp, shift_delta)); move16(); /* to Q0 with floor , and potential sturation */
45 : ;
46 :
47 0 : *pulse_tot_ptr = add(*pulse_tot_ptr, y[i]); /* Q0 */
48 0 : *L_yy_ptr = L_mac0(*L_yy_ptr, y[i], y[i]); /* Energy, Q0 */
49 0 : *L_xy_ptr = L_mac(*L_xy_ptr, xabs[i], y[i]); /* Corr, Q0*Q12 +1 --> Q13 */
50 : }
51 :
52 : Dyn_Mem_Deluxe_Out();
53 0 : }
54 :
55 :
56 0 : static __forceinline Word16 one_pulse_search(const Word16 dim_start, /* start vector dimension */
57 : const Word16 dim_end, /* end vector dimension+1 */
58 : const Word16 *x_abs, /* absolute vector values */
59 : Word16 * y, /* output vector */
60 : Word16 * pulse_tot_ptr,
61 : Word32 * L_xy_ptr, /* accumulated correlation Q(12+0+1) = Q13 */
62 : Word32 * L_yy_ptr, /* accumulated energy Q0 */
63 : Word16 max_xabs) /* current max amplitude for target */
64 : {
65 : Dyn_Mem_Deluxe_In(
66 : Counter i;
67 : Word16 corr_tmp, corr_sq_tmp, en_max_den, cmax_num, en_tmp;
68 : Word32 L_tmp_en_lc, L_tmp_corr;
69 : Word16 corr_up_shift, imax;
70 : );
71 :
72 : /* maximize correlation precision, prior to every unit pulse addition in the vector */
73 0 : corr_up_shift = norm_l(L_mac(*L_xy_ptr, 1, max_xabs)); /* pre analyze worst case L_xy update in the dim loop */
74 0 : imax = -1; /* not needed for search, only added to avoid compiler warning */
75 : {
76 0 : en_max_den = 0; move16();
77 0 : cmax_num = -1; move16(); /* req. to force a 1st update for n==dim_start */
78 :
79 0 : FOR (i = dim_start; i < dim_end; i++)
80 : {
81 0 : L_tmp_corr = L_shl_pos(L_mac(*L_xy_ptr, 1, x_abs[i]), corr_up_shift); /* actual in-loop target value */
82 :
83 0 : corr_tmp = round_fx_sat(L_tmp_corr);
84 :
85 0 : corr_sq_tmp = mult(corr_tmp, corr_tmp); /* CorrSq_tmp for a 16bit low complexity cross multiplication */
86 :
87 0 : L_tmp_en_lc = L_mac(*L_yy_ptr, 1, y[i]); /*Q0 x^2+ 2x , "+1" added once before loop , result , energy may
88 : span up to ~14+1(Q1)+1(sign)=16 bits */
89 : /* extract_l without shift can always be used for this section as energy is guaranteed to stay in the lower
90 : * word*/
91 :
92 0 : en_tmp = extract_l(L_tmp_en_lc); /* L_shl + round_fx could also be used also but then adds an uphift cost */
93 :
94 : /* 16/32 bit comparison WC (4 +1+1 + (1+1+1) = 9 */
95 0 : IF (L_msu(L_mult(corr_sq_tmp, en_max_den), cmax_num, en_tmp) > 0) /* use L_mult and then a L_msu */
96 : {
97 0 : cmax_num = corr_sq_tmp; move16();
98 0 : en_max_den = en_tmp; move16();
99 0 : imax = i; move16();
100 : }
101 : } /* dim */
102 : }
103 :
104 :
105 : /* finally add found unit pulse contribution to past L_xy, Lyy, for next pulse loop */
106 0 : *L_xy_ptr = L_mac(*L_xy_ptr, x_abs[imax], 1); /* Qin+1 */
107 0 : *L_yy_ptr = L_mac(*L_yy_ptr, 1, y[imax]);
108 :
109 0 : y[imax] = add(y[imax], 1); move16(); /* Q0 added pulse */
110 0 : (*pulse_tot_ptr) = add((*pulse_tot_ptr), 1); /* increment total pulse sum */
111 : Dyn_Mem_Deluxe_Out();
112 0 : return imax;
113 : }
114 :
115 :
116 0 : void pvq_enc_search_fx(
117 : const Word16 *x, /* i: target vector to quantize Qin */
118 : Word16 * y_far, /* o: outl_far o, raw pulses (non-scaled short) Q0 , length dim */
119 : Word16 * y, /* o: outl_near o, raw pulses (non-scaled short) Q0 , length dim */
120 : Word16 * yA, /* o: A section raw pulses (non-scaled short) Q0 , length dimA */
121 : Word16 * yB, /* o: B section raw pulses (non-scaled short) Q0 , length dim-dimA */
122 : Word32 * L_corr, /* o: 4 un-normalized correlation sums for outl_far, outl_near, A, AB */
123 : Word32 * L_search_en, /* o: 4 energy sums for outl_far, outl_near, A, AB */
124 : Word16 * pulses_fin, /* i: number of allocated pulses to outl_far, outl_near , A, AB sections */
125 : Word16 * pulses_proj, /* i: number of projection pulses for outl_far, outl_near, A, AB */
126 :
127 : const Word16 dim, /* i: Length of outlier vector */
128 : const Word16 dimA /* i: Length of vector A section */
129 : )
130 : {
131 :
132 : Dyn_Mem_Deluxe_In(
133 : Counter i;
134 : Word16 pulse_tot_far, pulse_tot, pulse_totA, pulse_totB;
135 : Word16 xabs[PVQ_MAX_VEC_SIZE];
136 : Word16 max_xabs, max_xabsA, max_xabsB;
137 : Word32 L_xsum, L_xsumA;
138 : Word32 L_yy, L_xy;
139 : Word16 imax;
140 : Counter k;
141 : Word16 dim_m1;
142 : Word16 dimB;
143 : const Word16 *xBptr;
144 : Word16 pulses_far, pulses, pulsesA, pulsesB;
145 : );
146 :
147 : #ifdef WMOPS
148 : push_wmops("pvq_enc_search_fx");
149 : #endif
150 :
151 0 : pulses_far = pulses_fin[0]; move16();
152 0 : pulses = pulses_fin[1]; move16();
153 0 : pulsesA = pulses_fin[2]; move16();
154 0 : pulsesB = pulses_fin[3]; move16();
155 :
156 0 : FOR (i = 0; i < N_SCF_SHAPES_ST2; i++)
157 : {
158 0 : L_corr[i] = L_deposit_l(0);
159 0 : L_search_en[i] = L_deposit_l(0);
160 : }
161 :
162 0 : dimB = sub(dim, dimA);
163 :
164 0 : L_xsum = L_deposit_h(0);
165 :
166 0 : max_xabs = -1; move16();
167 0 : max_xabsA = -1; move16();
168 0 : max_xabsB = -1; move16();
169 0 : FOR (i = 0; i < dimA; i++)
170 : {
171 0 : xabs[i] = abs_s(x[i]); move16(); /* Qx */
172 0 : max_xabsA = s_max(max_xabsA, xabs[i]); /* for efficient search correlation scaling */
173 0 : L_xsum = L_mac0(L_xsum, 1, xabs[i]); /* stay in Qx */
174 : }
175 :
176 0 : basop_memset(y_far, 0, dim * sizeof(Word16));
177 0 : basop_memset(y, 0, dimA * sizeof(Word16));
178 0 : basop_memset(yA, 0, dimA * sizeof(Word16));
179 :
180 0 : L_xsumA = L_add(L_xsum, 0); /* save for section A projection */
181 :
182 0 : FOR (i = dimA; i < dim; i++)
183 : {
184 0 : xabs[i] = abs_s(x[i]); move16(); /* Qx */
185 0 : max_xabsB = s_max(max_xabsB, xabs[i]); /* for efficient search correlation scaling */
186 0 : L_xsum = L_mac0(L_xsum, 1, xabs[i]); /* stay in Qx */
187 : }
188 :
189 0 : basop_memset(&y[dimA], 0, (dim - dimA) * sizeof(Word16));
190 :
191 0 : basop_memset(yB, 0, dimB * sizeof(Word16));
192 :
193 0 : max_xabs = s_max(max_xabsA, max_xabsB); /* global max abs value */
194 :
195 0 : test();
196 0 : IF (L_xsum == 0)
197 : { /* no shape in any section, projection in outl_far, outl_near, A, AB not possible, any search meaningless */
198 :
199 0 : dim_m1 = sub(dim, 1);
200 0 : y_far[0] = shr_pos(pulses_far, 1); move16();
201 0 : y_far[dim_m1] = add(y_far[dim_m1], sub(pulses_far, y_far[0])); move16();
202 :
203 0 : dim_m1 = sub(dim, 1);
204 0 : y[0] = shr_pos(pulses, 1); move16();
205 0 : y[dim_m1] = add(y[dim_m1], sub(pulses, y[0])); move16();
206 :
207 0 : dim_m1 = sub(dimA, 1);
208 0 : yA[0] = shr_pos(pulsesA, 1); move16();
209 0 : yA[dim_m1] = add(yA[dim_m1], sub(pulsesA, yA[0])); move16();
210 :
211 0 : dim_m1 = sub(dimB, 1);
212 0 : yB[0] = shr_pos(pulsesB, 1); move16();
213 0 : yB[dim_m1] = add(yB[dim_m1], sub(pulsesB, yB[0])); move16();
214 : }
215 : ELSE
216 : {
217 0 : ASSERT(pulses_proj[0] > 0);
218 0 : ASSERT(L_xsum > 0);
219 :
220 0 : pvq_pyr_project(dim, xabs, L_xsum, pulses_proj[0], y_far, &pulse_tot_far, &L_xy,
221 : &L_yy); /* outlier submode projection */
222 :
223 0 : ASSERT(pulses_far <= 127);
224 0 : FOR (k = pulse_tot_far; k < pulses_far; k++)
225 : {
226 0 : L_yy = L_add(L_yy, 1); /* pre add 1 in Q0 in L_yyQ0 = (x^2 + 2*x + 1) */
227 0 : imax = one_pulse_search(0, dim, xabs, y_far, &pulse_tot_far, &L_xy, &L_yy, max_xabs);
228 : }
229 0 : ASSERT(pulse_tot_far == pulses_far);
230 : /* outlier far submode result vector in y_far[0...15] */
231 0 : L_corr[0] = L_shr_pos(L_xy, 1); /* to Qin*Q0 */
232 :
233 0 : basop_memmove(y, y_far, dim * sizeof(Word16)); /*y_far->y */
234 :
235 0 : pulse_tot = pulse_tot_far; move16();
236 :
237 0 : ASSERT(pulses <= 127);
238 0 : FOR (k = pulse_tot; k < pulses; k++)
239 : {
240 0 : L_yy = L_add(L_yy, 1); /* pre add 1 in Q0 in L_yyQ0 = (x^2 + 2*x + 1) */
241 0 : imax = one_pulse_search(0, dim, xabs, y, &pulse_tot, &L_xy, &L_yy, max_xabs);
242 : }
243 :
244 : /* outlier near submode result vector in y[0...15] */
245 0 : L_corr[1] = L_shr_pos(L_xy, 1); /* to Qin*Q0 */
246 :
247 0 : ASSERT(pulse_tot == pulses);
248 :
249 0 : IF (L_xsumA == 0)
250 : {
251 : /* no shape in A section, projection in A not possible, search meaningless */
252 0 : dim_m1 = sub(dimA, 1);
253 0 : yA[0] = shr_pos(pulsesA, 1); move16();
254 0 : yA[dim_m1] = add(yA[dim_m1], sub(pulsesA, yA[0])); move16();
255 : }
256 : ELSE
257 : {
258 0 : IF (pulses_proj[2] != 0) /* fixed setup if bitrate is fixed */
259 : {
260 0 : ASSERT(pulses_proj[2] > 0);
261 0 : ASSERT(L_xsumA > 0);
262 0 : pvq_pyr_project(dimA, xabs, L_xsumA, pulses_proj[2], yA, &pulse_totA, &L_xy,
263 : &L_yy); /* section A , in submode 1 projection */
264 : }
265 : ELSE
266 : {
267 : /* default, otherwise recalculate A from outlier result (to remove any section B pulses influence)
268 : */
269 0 : pulse_totA = 0; move16();
270 0 : L_xy = L_deposit_l(0);
271 0 : L_yy = L_deposit_l(0);
272 :
273 0 : basop_memmove(yA, y, dimA * sizeof(Word16));
274 0 : FOR (i = 0; i < dimA; i++)
275 : {
276 0 : pulse_totA = add(pulse_totA, yA[i]); /* Q0 */
277 0 : L_xy = L_mac(L_xy, xabs[i], yA[i]); /* Corr, Q0*Q12 +1 --> Q13 */
278 0 : L_yy = L_mac(L_yy, yA[i], yA[i]); /* Energy, Q(0+0)+1)= Q1 */
279 : }
280 0 : L_yy = L_shr_pos(L_yy, 1); /* En to Q0 */
281 : }
282 :
283 : /* search remaining pulses in regular section A */
284 0 : FOR (k = pulse_totA; k < pulsesA; k++)
285 : {
286 0 : L_yy = L_add(L_yy, 1); /* 1 added in Q0 */
287 0 : imax = one_pulse_search(0, dimA, xabs, yA, &pulse_totA, &L_xy, &L_yy, max_xabsA);
288 : }
289 0 : ASSERT(pulse_totA == pulsesA);
290 : } /* L_xsumA!=0 */
291 :
292 : /* reg Set A result vector now in yA[0...9] */
293 0 : L_corr[2] = L_shr_pos(L_xy, 1); /* to Qin*Q0 */
294 :
295 : /* search remaining pulses in regular section B, even if energy in B is zero */
296 0 : ASSERT(pulses_proj[3] == 0);
297 0 : pulse_totB = 0; move16();
298 :
299 0 : IF (sub(pulsesB, 1) == 0)
300 : { /* LC search, sufficient to find a single max, as pulses can not be stacked, when nb-pulses==1 */
301 0 : imax = 0; move16(); /* safety */
302 0 : FOR (i = dimA; i < dim; i++)
303 : {
304 0 : if (xabs[i] == max_xabsB)
305 : {
306 0 : imax = sub(i, dimA);
307 : }
308 : }
309 0 : pulse_totB = 1; move16();
310 0 : yB[imax] = 1; move16(); /* reg set B result vector in yB[0...5] */
311 0 : L_xy = L_mac(L_xy, xabs[add(imax, dimA)], 1); /* calc total corr for A+B sections */
312 0 : L_yy = L_add(L_yy, 1);
313 : }
314 : ELSE
315 : { /* more than one target pulse in section B */
316 : /* keep A pulses influence, search section B pulses influence */
317 0 : FOR (k = pulse_totB; k < pulsesB; k++)
318 : {
319 0 : L_yy = L_add(L_yy, 1); /* 1 added in Q0*/
320 0 : imax = one_pulse_search(dimA, dim, xabs, &(yB[-dimA]), &pulse_totB, &L_xy, &L_yy, max_xabsB);
321 : }
322 : }
323 :
324 0 : L_corr[3] = L_shr_pos(L_xy, 1); move32(); /* to Qin*Q0 , corr of combined A and B */
325 :
326 0 : ASSERT(pulse_totB == pulsesB);
327 : /* reg set B result vector now in yB[0...5] */
328 : } /* L_xsum != 0 */
329 :
330 : /* apply sign of (x) to first orthant result */
331 0 : FOR (i = 0; i < dim; i++)
332 : {
333 0 : if (x[i] < 0)
334 : {
335 0 : y_far[i] = negate(y_far[i]); /* apply sign for outlier far */
336 : }
337 : }
338 :
339 0 : FOR (i = 0; i < dim; i++)
340 : {
341 0 : if (x[i] < 0)
342 : {
343 0 : y[i] = negate(y[i]); /* apply sign for outliers near */
344 : }
345 : }
346 :
347 0 : xBptr = &(x[dimA]); move32(); /* ptr init to B target section */
348 0 : FOR (i = 0; i < dimA; i++)
349 : {
350 0 : if (x[i] < 0)
351 : {
352 0 : yA[i] = negate(yA[i]); /* apply sign in N_SETA */
353 : }
354 : }
355 :
356 0 : FOR (i = 0; i < (dimB); i++)
357 : {
358 0 : if (xBptr[i] < 0)
359 : {
360 0 : yB[i] = negate(yB[i]); /* apply sign in N_SETB */
361 : }
362 : }
363 :
364 : Dyn_Mem_Deluxe_Out();
365 : #ifdef WMOPS
366 : pop_wmops();
367 : #endif
368 0 : }
369 :
|