Line data Source code
1 : /******************************************************************************
2 : * ETSI TS 103 634 V1.5.1 *
3 : * Low Complexity Communication Codec Plus (LC3plus) *
4 : * *
5 : * Copyright licence is solely granted through ETSI Intellectual Property *
6 : * Rights Policy, 3rd April 2019. No patent licence is granted by implication, *
7 : * estoppel or otherwise. *
8 : ******************************************************************************/
9 :
10 : #include "defines.h"
11 : #include "functions.h"
12 :
13 : #define MAX_ACCS 3 /* sum(x.*y), sum(x.*x), sum(y.*y), nb of always nonsaturated shorter sub_blocks*/
14 : #define MAX_BLOCKS 8
15 : #define MAX_ACC_LEN_BITS 7
16 : #define MIN_ACC_LEN_BITS 5
17 : #define MAX_ACC_LEN (1 << MAX_ACC_LEN_BITS)
18 : #define MIN_PITCH_8K 20 /* 8000* MIN_PITCH_12k8/12800 */
19 :
20 : static const Word16 pitch_min_2[] = {2 * MIN_PITCH_8K , 2 * MIN_PITCH_8K * 2, 2 * MIN_PITCH_8K * 3,
21 : 2 * MIN_PITCH_8K * 4, 2 * MIN_PITCH_8K * 6, 2 * MIN_PITCH_8K * 12};
22 :
23 : /* req headroom in bits, for safe summing of block results w/o downshift */
24 : /* also the safe pre subblock acc downshift for various number of blocks */
25 : static const Word16 tab_req_headroom[MAX_BLOCKS + 1] = {0, 0, 1, 2, 2, 3, 3, 3, 3};
26 : /*(0, 1, 2, 3,4, 5,6,7,8)*/
27 :
28 : static Word16 plc_norm_corr_blocks_fx(Word16 tot_len, Word16 l2_base_len, Word16 n_blocks, Word16 inshift,
29 : Word16 *currFrame, Word16 *predFrame);
30 :
31 0 : Word16 plc_norm_corr_blocks_fx( /* o: norm_corr range [-1 ... 1.0[ in Q15 */
32 : Word16 tot_len, /* i: total correlation length in Q0 */
33 : Word16 l2_base_len, /* i: size of subblocks in log2 */
34 : Word16 n_blocks, /* i: number of accumulator sub_blocks */
35 : Word16 inshift, /* i: required inshift of curr/pred Q0 */
36 : Word16 *currFrame, /* i: ptr to most recent section */
37 : Word16 *predFrame) /* i: ptr to historic section */
38 : {
39 : Word16 scale0, scale1, scale2, scale_min, shift, prod_exp, acc_margin;
40 0 : Word32 L_prod, L_inv, L_tmp0 = 0, L_tmp1 = 0, L_tmp2 = 0;
41 : Word16 norm_corr, curr, pred;
42 : Counter m, b;
43 : Word32 L_ce[MAX_ACCS][MAX_BLOCKS];
44 :
45 : #ifdef DYNMEM_COUNT
46 : Dyn_Mem_In("plc_norm_corr_blocks_fx", sizeof(struct {
47 : Word16 scale0, scale1, scale2, scale_min, shift, prod_exp, acc_margin;
48 : Word32 L_prod, L_inv, L_tmp0, L_tmp1, L_tmp2;
49 : Word16 norm_corr, curr, pred;
50 : Counter m, b;
51 : Word32 L_ce[MAX_ACCS][MAX_BLOCKS];
52 : }));
53 : #endif
54 :
55 : #ifdef WMOPS
56 : push_wmops("plc_norm_corr_blocks_fx");
57 : #endif
58 :
59 : /* Calculate normalized correlation with added shift and block interleaving possibility */
60 0 : ASSERT(n_blocks <= MAX_BLOCKS && n_blocks > 0);
61 0 : ASSERT(((float)tot_len / (float)n_blocks) <= (float)(1 << l2_base_len));
62 0 : ASSERT(inshift > 0);
63 : UNUSED(l2_base_len);
64 :
65 0 : FOR (b = 0; b < n_blocks; b++)
66 : { /* block loop with fixed pre_down shifting(inshift) of input signal */
67 0 : curr = shr_pos(currFrame[b], inshift);
68 0 : pred = shr_pos(predFrame[b], inshift);
69 0 : L_tmp2 = L_deposit_l(0);
70 0 : L_tmp0 = L_msu0(L_tmp2, curr, pred); /* acc L_tmp0 on negative side to avoid saturation for (-1*-1) */
71 0 : L_tmp1 = L_msu0(L_tmp2, pred, pred); /* acc_energy on negative side */
72 0 : L_tmp2 = L_msu0(L_tmp2, curr, curr); /* acc_energy on negative side */
73 :
74 0 : FOR (m = (b + n_blocks); m < tot_len; m += n_blocks)
75 : { /* interleaved accumulation over total length */
76 0 : curr = shr_pos(currFrame[m], inshift);
77 0 : pred = shr_pos(predFrame[m], inshift);
78 0 : L_tmp0 = L_msu0(L_tmp0, curr, pred);
79 0 : L_tmp1 = L_msu0(L_tmp1, pred, pred);
80 0 : L_tmp2 = L_msu0(L_tmp2, curr, curr);
81 : }
82 :
83 0 : L_ce[0][b] = L_add(L_tmp0, 0); /* account for moves from register to stack memory */
84 0 : L_ce[1][b] = L_add(L_tmp1, 0);
85 0 : L_ce[2][b] = L_add(L_tmp2, 0);
86 : }
87 :
88 : /* aggregate interleaved subsections */
89 0 : IF (sub(n_blocks, 1) >= 0)
90 : { /* 100% safe non saturating L_ce with a safe acc_margin */
91 0 : acc_margin = tab_req_headroom[n_blocks]; move16();
92 :
93 0 : L_tmp0 = L_shr_pos(L_ce[0][0], acc_margin);
94 0 : L_tmp1 = L_shr_pos(L_ce[1][0], acc_margin);
95 0 : L_tmp2 = L_shr_pos(L_ce[2][0], acc_margin);
96 :
97 0 : FOR (b = 1; b < n_blocks; b++)
98 : {
99 0 : L_tmp0 = L_add(L_tmp0, L_shr_pos(L_ce[0][b], acc_margin)); /* add negative values */
100 0 : L_tmp1 = L_add(L_tmp1, L_shr_pos(L_ce[1][b], acc_margin)); /* add negative values */
101 0 : L_tmp2 = L_add(L_tmp2, L_shr_pos(L_ce[2][b], acc_margin)); /* add negative values */
102 : }
103 :
104 : /* evaluate headroom margin in coarse representation */
105 0 : scale0 = norm_l(L_tmp0);
106 0 : scale1 = norm_l(L_tmp1);
107 0 : scale2 = norm_l(L_tmp2);
108 :
109 0 : scale_min = s_min(scale0, scale1);
110 0 : scale_min = s_min(scale_min, scale2);
111 :
112 0 : shift = sub(scale_min, acc_margin);
113 0 : IF (shift >= 0)
114 : { /* re-accumulate blocks with highest possible precision */
115 0 : L_tmp0 = L_add(L_ce[0][0], 0); /* add negative values */
116 0 : L_tmp1 = L_add(L_ce[1][0], 0); /* add negative values */
117 0 : L_tmp2 = L_add(L_ce[2][0], 0); /* add negative values */
118 :
119 0 : FOR (b = 1; b < n_blocks; b++)
120 : {
121 0 : L_tmp0 = L_add(L_tmp0, L_ce[0][b]); /* add negative values */
122 0 : L_tmp1 = L_add(L_tmp1, L_ce[1][b]); /* add negative values */
123 0 : L_tmp2 = L_add(L_tmp2, L_ce[2][b]); /* add negative values */
124 : }
125 : }
126 : }
127 :
128 : /* quota: norm_corr = corr/sqrt(en1*en2) = negate(L_tmp1)/sqrt(-L_tmp1*-L_tmp2) */
129 0 : L_tmp1 = L_min(L_tmp1, -1); /* make sure there is negative energy */
130 0 : L_tmp2 = L_min(L_tmp2, -1); /* make sure there is negative energy */
131 :
132 0 : ASSERT(L_tmp1 < 0 && L_tmp2 < 0);
133 :
134 : /* negate correlation, due to the used safe msu0 accumulation, with a saturation pre-check ctrl */
135 0 : L_tmp0 = L_max(L_tmp0, (MIN_32 + 1));
136 0 : L_tmp0 = L_negate(L_tmp0);
137 :
138 0 : scale0 = norm_l(L_tmp0);
139 0 : scale1 = norm_l(L_tmp1);
140 0 : scale2 = norm_l(L_tmp2);
141 :
142 0 : L_tmp1 = L_shl_pos(L_tmp1, scale1);
143 0 : L_tmp2 = L_shl_pos(L_tmp2, scale2);
144 0 : L_prod = Mpy_32_32_lc3plus(L_tmp1, L_tmp2); /* neg * neg -> positive */
145 0 : shift = norm_l(L_prod);
146 0 : L_prod = L_shl_pos(L_prod, shift);
147 0 : prod_exp = sub(62, add(add(scale1, scale2), shift));
148 0 : L_inv = Isqrt_lc3plus(L_prod, &prod_exp);
149 :
150 0 : L_tmp0 = L_shl_pos(L_tmp0, scale0);
151 0 : L_prod = Mpy_32_32_lc3plus(L_tmp0, L_inv);
152 0 : prod_exp = add(sub(31, scale0), prod_exp);
153 :
154 0 : norm_corr = 32767; move16(); /* as close to 1.0 as possible in Q15 */
155 0 : if (L_tmp0 < 0)
156 : {
157 0 : norm_corr = -32768; move16(); /*-1.0*/
158 : }
159 :
160 0 : test();
161 0 : IF (L_prod == 0 || sub(norm_l(L_prod), prod_exp) >= 0)
162 : {
163 0 : norm_corr = round_fx_sat(L_shl_sat(L_prod, prod_exp));
164 : }
165 :
166 : #ifdef DYNMEM_COUNT
167 : Dyn_Mem_Out();
168 : #endif
169 : #ifdef WMOPS
170 : pop_wmops();
171 : #endif
172 0 : return norm_corr;
173 : }
174 :
175 0 : Word16 plc_xcorr_lc_fx( /* o: quantized output xcorr in Q15 [ 0 ..32767 ] = [0. 1.0[ */
176 : Word16 *pcmbuf_fx, /* i: NB should be an already dynamically upscaled pcm buffer with about
177 : 0...1(2) bits margin */
178 : Word16 max_len_pcm_plc, /* i: Q0 physical size of pcmbuf_fx */
179 : Word16 pitch_int, /* i: Q0 in Fs, lag value to evaluate, corresponding to the current f0 in
180 : pcm_buf_Fx */
181 : Word16 fs_idx /*i: */)
182 : {
183 : Word16 *range1Ptr;
184 : Word16 *range2Ptr;
185 : Word16 corr_len_fx, inshift, l2_base_len, n_blocks, norm_xcorr_est_q, pcm_max_corr_len, max_corr_len;
186 :
187 : #ifdef DYNMEM_COUNT
188 : Dyn_Mem_In("plc_xcorr_lc_fx", sizeof(struct {
189 : Word16 *range1Ptr;
190 : Word16 *range2Ptr;
191 : Word16 corr_len_fx, inshift, l2_base_len, n_blocks, norm_xcorr_est_q, pcm_max_corr_len, max_corr_len;
192 : }));
193 : #endif
194 :
195 : #ifdef WMOPS
196 : push_wmops("plc_xcorr_lc_fx");
197 : #endif
198 :
199 0 : norm_xcorr_est_q = 0; move16();
200 :
201 0 : IF (pitch_int > 0)
202 : {
203 0 : pcm_max_corr_len = sub(max_len_pcm_plc, pitch_int);
204 :
205 0 : max_corr_len = rectLengthTab[fs_idx]; /* maximum 10 ms */
206 0 : max_corr_len = s_min(max_corr_len, pcm_max_corr_len);
207 :
208 0 : corr_len_fx = s_min(max_corr_len, pitch_int);
209 0 : corr_len_fx = s_max(corr_len_fx, pitch_min_2[fs_idx]); /* at least 5 ms (=2*pitchmin*) corr length */
210 :
211 0 : ASSERT(corr_len_fx >= (pitch_min_2[fs_idx])); /* at least 2 x pitch min(fs) */
212 0 : ASSERT(corr_len_fx <= (MAX_ACC_LEN * MAX_BLOCKS));
213 0 : ASSERT(corr_len_fx <= max_corr_len);
214 0 : ASSERT( max_len_pcm_plc - corr_len_fx - pitch_int + 1 > 0 );
215 :
216 0 : range1Ptr = &(pcmbuf_fx[max_len_pcm_plc]) - corr_len_fx; /* ptr setup, start of head section */
217 0 : range2Ptr = range1Ptr - pitch_int; /* ptr setup, history = tail - lag distance */
218 :
219 : /* assume 32 bit acc of up to 32 values -> sum(over 32, x_up>>2 * y_up>>2) */
220 0 : inshift = 2; move16();
221 0 : l2_base_len = MIN_ACC_LEN_BITS; move16();
222 0 : n_blocks = shr(add(corr_len_fx, (1 << MIN_ACC_LEN_BITS) - 1), MIN_ACC_LEN_BITS);
223 :
224 0 : IF (sub(n_blocks, MAX_BLOCKS) > 0)
225 : { /* shift to 32 bit acc of up to 128 values -> sum(over 128, x_up>>3 * y_up>>3) */
226 0 : inshift = 3; move16();
227 0 : l2_base_len = MAX_ACC_LEN_BITS; move16();
228 0 : n_blocks = shr(add(corr_len_fx, ((1 << MAX_ACC_LEN_BITS) - 1)), MAX_ACC_LEN_BITS);
229 : }
230 :
231 0 : ASSERT(n_blocks <= MAX_BLOCKS); /* MAX_BLOCKS*(32 or 128) is max possible total corr_length */
232 0 : ASSERT(n_blocks > 0);
233 :
234 : /* subblock accumulation of corr and energies, to achieve high low level precision */
235 : norm_xcorr_est_q =
236 0 : plc_norm_corr_blocks_fx(corr_len_fx, l2_base_len, n_blocks, inshift, range1Ptr, /* curr_frame */
237 : range2Ptr); /* pred_frame = curr_frame-lag, i.e historic section */
238 :
239 0 : norm_xcorr_est_q = s_max(0, norm_xcorr_est_q); /* do not allow negative output values */
240 : }
241 : #ifdef DYNMEM_COUNT
242 : Dyn_Mem_Out();
243 : #endif
244 : #ifdef WMOPS
245 : pop_wmops();
246 : #endif
247 0 : return norm_xcorr_est_q;
248 : }
249 :
250 :
|