Line data Source code
1 : /******************************************************************************
2 : * ETSI TS 103 634 V1.5.1 *
3 : * Low Complexity Communication Codec Plus (LC3plus) *
4 : * *
5 : * Copyright licence is solely granted through ETSI Intellectual Property *
6 : * Rights Policy, 3rd April 2019. No patent licence is granted by implication, *
7 : * estoppel or otherwise. *
8 : ******************************************************************************/
9 :
10 : #include "functions.h"
11 :
12 0 : void idct16_fx(const Word16 *in, Word16 *out)
13 : {
14 : Dyn_Mem_Deluxe_In(
15 : Word16 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15;
16 : Word16 b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15;
17 : );
18 :
19 0 : a8 = add(mult_r(in[1], 1136), mult_r(in[15], -11529)); /* Sπ/32/√8 -S15π/32/√8 */
20 0 : a9 = add(mult_r(in[9], 8956), mult_r(in[7], -7350)); /* S9π/32/√8 -S7π/32/√8 */
21 0 : a10 = add(mult_r(in[5], 5461), mult_r(in[11], -10217)); /* S5π/32/√8 -S11π/32/√8 */
22 0 : a11 = add(mult_r(in[13], 11086), mult_r(in[3], -3363)); /* S13π/32/√8 -S3π/32/√8 */
23 0 : a12 = add(mult_r(in[3], 11086), mult_r(in[13], 3363)); /* C3π/32/√8 C13π/32/√8 */
24 0 : a13 = add(mult_r(in[11], 5461), mult_r(in[5], 10217)); /* C11π/32/√8 C5π/32/√8 */
25 0 : a14 = add(mult_r(in[7], 8956), mult_r(in[9], 7350)); /* C7π/32/√8 C9π/32/√8 */
26 0 : a15 = add(mult_r(in[15], 1136), mult_r(in[1], 11529)); /* C15π/32/√8 Cπ/32/√8 */
27 :
28 0 : b4 = add(mult_r(in[2], 2260), mult_r(in[14], -11363)); /* Sπ/16/√8 -S7π/16/√8 */
29 0 : b5 = add(mult_r(in[10], 9633), mult_r(in[6], -6436)); /* S5π/16/√8 -S3π/16/√8 */
30 0 : b6 = add(mult_r(in[6], 9633), mult_r(in[10], 6436)); /* C3π/16/√8 C5π/16/√8 */
31 0 : b7 = add(mult_r(in[14], 2260), mult_r(in[2], 11363)); /* C7π/16/√8 Cπ/16/√8 */
32 0 : b8 = add(a9, a8);
33 0 : b9 = sub(a8, a9);
34 0 : b10 = sub(a11, a10);
35 0 : b11 = add(a10, a11);
36 0 : b12 = add(a13, a12);
37 0 : b13 = sub(a12, a13);
38 0 : b14 = sub(a15, a14);
39 0 : b15 = add(a14, a15);
40 :
41 0 : a0 = add(mult_r(in[0], 8192), mult_r(in[8], 8192)); /* Cπ/4/√8 Cπ/4/√8 */
42 0 : a1 = add(mult_r(in[8], -8192), mult_r(in[0], 8192)); /* -Cπ/4/√8 Cπ/4/√8 */
43 0 : a2 = add(mult_r(in[4], 4433), mult_r(in[12], -10703)); /* Sπ/8/√8 -S3π/8/√8 */
44 0 : a3 = add(mult_r(in[12], 4433), mult_r(in[4], 10703)); /* C3π/8/√8 Cπ/8/√8 */
45 0 : a4 = add(b5, b4);
46 0 : a5 = sub(b4, b5);
47 0 : a6 = sub(b7, b6);
48 0 : a7 = add(b6, b7);
49 0 : a8 = b8; move16();
50 0 : a9 = add(mult_r(b9, -30274), mult_r(b14, 12540)); /* -Cπ/8 C3π/8 */
51 0 : a10 = add(mult_r(b10, -12540), mult_r(b13, -30274)); /* -Sπ/8 -S3π/8 */
52 0 : a11 = b11; move16();
53 0 : a12 = b12; move16();
54 0 : a13 = add(mult_r(b13, 12540), mult_r(b10, -30274)); /* C3π/8 -Cπ/8 */
55 0 : a14 = add(mult_r(b14, 30274), mult_r(b9, 12540)); /* S3π/8 Sπ/8 */
56 0 : a15 = b15; move16();
57 :
58 0 : b0 = add(a3, a0);
59 0 : b1 = add(a2, a1);
60 0 : b2 = sub(a1, a2);
61 0 : b3 = sub(a0, a3);
62 0 : b4 = a4; move16();
63 0 : b5 = add(mult_r(a5, -23170), mult_r(a6, 23170)); /* -Cπ/4 Cπ/4 */
64 0 : b6 = add(mult_r(a6, 23170), mult_r(a5, 23170)); /* Cπ/4 Cπ/4 */
65 0 : b7 = a7; move16();
66 0 : b8 = add(a11, a8);
67 0 : b9 = add(a10, a9);
68 0 : b10 = sub(a9, a10);
69 0 : b11 = sub(a8, a11);
70 0 : b12 = sub(a15, a12);
71 0 : b13 = sub(a14, a13);
72 0 : b14 = add(a13, a14);
73 0 : b15 = add(a12, a15);
74 :
75 0 : a0 = add(b7, b0);
76 0 : a1 = add(b6, b1);
77 0 : a2 = add(b5, b2);
78 0 : a3 = add(b4, b3);
79 0 : a4 = sub(b3, b4);
80 0 : a5 = sub(b2, b5);
81 0 : a6 = sub(b1, b6);
82 0 : a7 = sub(b0, b7);
83 0 : a10 = add(mult_r(b10, -23170), mult_r(b13, 23170)); /* -Cπ/4 Cπ/4 */
84 0 : a11 = add(mult_r(b11, -23170), mult_r(b12, 23170)); /* -Cπ/4 Cπ/4 */
85 0 : a12 = add(mult_r(b12, 23170), mult_r(b11, 23170)); /* Cπ/4 Cπ/4 */
86 0 : a13 = add(mult_r(b13, 23170), mult_r(b10, 23170)); /* Cπ/4 Cπ/4 */
87 :
88 0 : out[0] = add(b15, a0); move16();
89 0 : out[1] = add(b14, a1); move16();
90 0 : out[2] = add(a13, a2); move16();
91 0 : out[3] = add(a12, a3); move16();
92 0 : out[4] = add(a11, a4); move16();
93 0 : out[5] = add(a10, a5); move16();
94 0 : out[6] = add(b9, a6); move16();
95 0 : out[7] = add(b8, a7); move16();
96 0 : out[8] = sub(a7, b8); move16();
97 0 : out[9] = sub(a6, b9); move16();
98 0 : out[10] = sub(a5, a10); move16();
99 0 : out[11] = sub(a4, a11); move16();
100 0 : out[12] = sub(a3, a12); move16();
101 0 : out[13] = sub(a2, a13); move16();
102 0 : out[14] = sub(a1, b14); move16();
103 0 : out[15] = sub(a0, b15); move16();
104 :
105 : Dyn_Mem_Deluxe_Out();
106 0 : }
107 :
108 0 : void dct32_fx(const Word32 *in, Word32 *out)
109 : {
110 : Dyn_Mem_Deluxe_In(Word32 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15;
111 : Word32 b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15;);
112 :
113 0 : a0 = L_add(in[15], in[0]);
114 0 : a1 = L_add(in[14], in[1]);
115 0 : a2 = L_add(in[13], in[2]);
116 0 : a3 = L_add(in[12], in[3]);
117 0 : a4 = L_add(in[11], in[4]);
118 0 : a5 = L_add(in[10], in[5]);
119 0 : a6 = L_add(in[9], in[6]);
120 0 : a7 = L_add(in[8], in[7]);
121 0 : a10 = L_sub(in[5], in[10]);
122 0 : a11 = L_sub(in[4], in[11]);
123 0 : a12 = L_sub(in[3], in[12]);
124 0 : a13 = L_sub(in[2], in[13]);
125 :
126 0 : b0 = L_add(a7, a0);
127 0 : b1 = L_add(a6, a1);
128 0 : b2 = L_add(a5, a2);
129 0 : b3 = L_add(a4, a3);
130 0 : b4 = L_sub(a3, a4);
131 0 : b5 = L_sub_sat(a2, a5);
132 0 : b6 = L_sub_sat(a1, a6);
133 0 : b7 = L_sub(a0, a7);
134 0 : b8 = L_sub(in[7], in[8]);
135 0 : b9 = L_sub(in[6], in[9]);
136 0 : b10 = L_add(Mpy_32_16_lc3plus(a10, -23170), Mpy_32_16_lc3plus(a13, 23170)); /* -Cπ/4 Cπ/4 */
137 0 : b11 = L_add(Mpy_32_16_lc3plus(a11, -23170), Mpy_32_16_lc3plus(a12, 23170)); /* -Cπ/4 Cπ/4 */
138 0 : b12 = L_add(Mpy_32_16_lc3plus(a12, 23170), Mpy_32_16_lc3plus(a11, 23170)); /* Cπ/4 Cπ/4 */
139 0 : b13 = L_add(Mpy_32_16_lc3plus(a13, 23170), Mpy_32_16_lc3plus(a10, 23170)); /* Cπ/4 Cπ/4 */
140 0 : b14 = L_sub(in[1], in[14]);
141 0 : b15 = L_sub(in[0], in[15]);
142 :
143 0 : a0 = L_add(b3, b0);
144 0 : a1 = L_add(b2, b1);
145 0 : a2 = L_sub(b1, b2);
146 0 : a3 = L_sub_sat(b0, b3);
147 0 : a4 = b4;
148 0 : move16();
149 0 : a5 = L_add(Mpy_32_16_lc3plus(b5, -23170), Mpy_32_16_lc3plus(b6, 23170)); /* -Cπ/4 Cπ/4 */
150 0 : a6 = L_add(Mpy_32_16_lc3plus(b6, 23170), Mpy_32_16_lc3plus(b5, 23170)); /* Cπ/4 Cπ/4 */
151 0 : a7 = b7;
152 0 : move16();
153 0 : a8 = L_add(b11, b8);
154 0 : a9 = L_add(b10, b9);
155 0 : a10 = L_sub(b9, b10);
156 0 : a11 = L_sub(b8, b11);
157 0 : a12 = L_sub(b15, b12);
158 0 : a13 = L_sub(b14, b13);
159 0 : a14 = L_add(b13, b14);
160 0 : a15 = L_add(b12, b15);
161 :
162 0 : out[0] = L_add(Mpy_32_16_lc3plus(a0, 8192), Mpy_32_16_lc3plus(a1, 8192));
163 0 : move16(); /* Cπ/4/√8 Cπ/4/√8 */
164 0 : out[8] = L_add(Mpy_32_16_lc3plus(a1, -8192), Mpy_32_16_lc3plus(a0, 8192));
165 0 : move16(); /* -Cπ/4/√8 Cπ/4/√8 */
166 0 : out[4] = L_add(Mpy_32_16_lc3plus(a2, 4433), Mpy_32_16_lc3plus(a3, 10703));
167 0 : move16(); /* Sπ/8/√8 Cπ/8/√8 */
168 0 : out[12] = L_add(Mpy_32_16_lc3plus(a3, 4433), Mpy_32_16_lc3plus(a2, -10703));
169 0 : move16(); /* C3π/8/√8 -S3π/8/√8 */
170 0 : b4 = L_add(a5, a4);
171 0 : b5 = L_sub(a4, a5);
172 0 : b6 = L_sub_sat(a7, a6);
173 0 : b7 = L_add(a6, a7);
174 0 : b8 = a8;
175 0 : move16();
176 0 : b9 = L_add(Mpy_32_16_lc3plus(a9, -30274), Mpy_32_16_lc3plus(a14, 12540)); /* -Cπ/8 Sπ/8 */
177 0 : b10 = L_add(Mpy_32_16_lc3plus(a10, -12540), Mpy_32_16_lc3plus(a13, -30274)); /* -Sπ/8 -Cπ/8 */
178 0 : b11 = a11;
179 0 : move16();
180 0 : b12 = a12;
181 0 : move16();
182 0 : b13 = L_add(Mpy_32_16_lc3plus(a13, 12540), Mpy_32_16_lc3plus(a10, -30274)); /* C3π/8 -S3π/8 */
183 0 : b14 = L_add(Mpy_32_16_lc3plus(a14, 30274), Mpy_32_16_lc3plus(a9, 12540)); /* S3π/8 C3π/8 */
184 0 : b15 = a15;
185 0 : move16();
186 :
187 0 : out[2] = L_add(Mpy_32_16_lc3plus(b4, 2260), Mpy_32_16_lc3plus(b7, 11363));
188 0 : move16(); /* Sπ/16/√8 Cπ/16/√8 */
189 0 : out[10] = L_add(Mpy_32_16_lc3plus(b5, 9633), Mpy_32_16_lc3plus(b6, 6436));
190 0 : move16(); /* S5π/16/√8 C5π/16/√8 */
191 0 : out[6] = L_add(Mpy_32_16_lc3plus(b6, 9633), Mpy_32_16_lc3plus(b5, -6436));
192 0 : move16(); /* C3π/16/√8 -S3π/16/√8 */
193 0 : out[14] = L_add(Mpy_32_16_lc3plus(b7, 2260), Mpy_32_16_lc3plus(b4, -11363));
194 0 : move16(); /* C7π/16/√8 -S7π/16/√8 */
195 :
196 0 : a8 = L_add_sat(b9, b8);
197 0 : a9 = L_sub_sat(b8, b9);
198 0 : a10 = L_sub_sat(b11, b10);
199 0 : a11 = L_add_sat(b10, b11);
200 0 : a12 = L_add_sat(b13, b12);
201 0 : a13 = L_sub_sat(b12, b13);
202 0 : a14 = L_sub_sat(b15, b14);
203 0 : a15 = L_add_sat(b14, b15);
204 :
205 0 : out[1] = L_add(Mpy_32_16_lc3plus(a8, 1136), Mpy_32_16_lc3plus(a15, 11529));
206 0 : move16(); /* Sπ/32/√8 Cπ/32/√8 */
207 0 : out[9] = L_add(Mpy_32_16_lc3plus(a9, 8956), Mpy_32_16_lc3plus(a14, 7350));
208 0 : move16(); /* S9π/32/√8 C9π/32/√8 */
209 0 : out[5] = L_add(Mpy_32_16_lc3plus(a10, 5461), Mpy_32_16_lc3plus(a13, 10217));
210 0 : move16(); /* S5π/32/√8 C5π/32/√8 */
211 0 : out[13] = L_add(Mpy_32_16_lc3plus(a11, 11086), Mpy_32_16_lc3plus(a12, 3363));
212 0 : move16(); /* S13π/32/√8 C13π/32/√8 */
213 0 : out[3] = L_add(Mpy_32_16_lc3plus(a12, 11086), Mpy_32_16_lc3plus(a11, -3363));
214 0 : move16(); /* C3π/32/√8 -S3π/32/√8 */
215 0 : out[11] = L_add(Mpy_32_16_lc3plus(a13, 5461), Mpy_32_16_lc3plus(a10, -10217));
216 0 : move16(); /* C11π/32/√8 -S11π/32/√8 */
217 0 : out[7] = L_add(Mpy_32_16_lc3plus(a14, 8956), Mpy_32_16_lc3plus(a9, -7350));
218 0 : move16(); /* C7π/32/√8 -S7π/32/√8 */
219 0 : out[15] = L_add(Mpy_32_16_lc3plus(a15, 1136), Mpy_32_16_lc3plus(a8, -11529));
220 0 : move16(); /* C15π/32/√8 -S15/32/√8 */
221 :
222 : Dyn_Mem_Deluxe_Out();
223 0 : }
224 :
225 0 : void idct32_fx(const Word32 *in, Word32 *out)
226 : {
227 : Dyn_Mem_Deluxe_In(Word32 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15;
228 : Word32 b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15;);
229 :
230 0 : a8 = L_add(Mpy_32_16_lc3plus(in[1], 1136), Mpy_32_16_lc3plus(in[15], -11529)); /* Sπ/32/√8 -S15π/32/√8 */
231 0 : a9 = L_add(Mpy_32_16_lc3plus(in[9], 8956), Mpy_32_16_lc3plus(in[7], -7350)); /* S9π/32/√8 -S7π/32/√8 */
232 0 : a10 = L_add(Mpy_32_16_lc3plus(in[5], 5461), Mpy_32_16_lc3plus(in[11], -10217)); /* S5π/32/√8 -S11π/32/√8 */
233 0 : a11 = L_add(Mpy_32_16_lc3plus(in[13], 11086), Mpy_32_16_lc3plus(in[3], -3363)); /* S13π/32/√8 -S3π/32/√8 */
234 0 : a12 = L_add(Mpy_32_16_lc3plus(in[3], 11086), Mpy_32_16_lc3plus(in[13], 3363)); /* C3π/32/√8 C13π/32/√8 */
235 0 : a13 = L_add(Mpy_32_16_lc3plus(in[11], 5461), Mpy_32_16_lc3plus(in[5], 10217)); /* C11π/32/√8 C5π/32/√8 */
236 0 : a14 = L_add(Mpy_32_16_lc3plus(in[7], 8956), Mpy_32_16_lc3plus(in[9], 7350)); /* C7π/32/√8 C9π/32/√8 */
237 0 : a15 = L_add(Mpy_32_16_lc3plus(in[15], 1136), Mpy_32_16_lc3plus(in[1], 11529)); /* C15π/32/√8 Cπ/32/√8 */
238 :
239 0 : b4 = L_add(Mpy_32_16_lc3plus(in[2], 2260), Mpy_32_16_lc3plus(in[14], -11363)); /* Sπ/16/√8 -S7π/16/√8 */
240 0 : b5 = L_add(Mpy_32_16_lc3plus(in[10], 9633), Mpy_32_16_lc3plus(in[6], -6436)); /* S5π/16/√8 -S3π/16/√8 */
241 0 : b6 = L_add(Mpy_32_16_lc3plus(in[6], 9633), Mpy_32_16_lc3plus(in[10], 6436)); /* C3π/16/√8 C5π/16/√8 */
242 0 : b7 = L_add(Mpy_32_16_lc3plus(in[14], 2260), Mpy_32_16_lc3plus(in[2], 11363)); /* C7π/16/√8 Cπ/16/√8 */
243 0 : b8 = L_add(a9, a8);
244 0 : b9 = L_sub(a8, a9);
245 0 : b10 = L_sub(a11, a10);
246 0 : b11 = L_add(a10, a11);
247 0 : b12 = L_add(a13, a12);
248 0 : b13 = L_sub(a12, a13);
249 0 : b14 = L_sub(a15, a14);
250 0 : b15 = L_add(a14, a15);
251 :
252 0 : a0 = L_add(Mpy_32_16_lc3plus(in[0], 8192), Mpy_32_16_lc3plus(in[8], 8192)); /* Cπ/4/√8 Cπ/4/√8 */
253 0 : a1 = L_add(Mpy_32_16_lc3plus(in[8], -8192), Mpy_32_16_lc3plus(in[0], 8192)); /* -Cπ/4/√8 Cπ/4/√8 */
254 0 : a2 = L_add(Mpy_32_16_lc3plus(in[4], 4433), Mpy_32_16_lc3plus(in[12], -10703)); /* Sπ/8/√8 -S3π/8/√8 */
255 0 : a3 = L_add(Mpy_32_16_lc3plus(in[12], 4433), Mpy_32_16_lc3plus(in[4], 10703)); /* C3π/8/√8 Cπ/8/√8 */
256 0 : a4 = L_add(b5, b4);
257 0 : a5 = L_sub(b4, b5);
258 0 : a6 = L_sub(b7, b6);
259 0 : a7 = L_add(b6, b7);
260 0 : a8 = b8;
261 0 : move32();
262 0 : a9 = L_add(Mpy_32_16_lc3plus(b9, -30274), Mpy_32_16_lc3plus(b14, 12540)); /* -Cπ/8 C3π/8 */
263 0 : a10 = L_add(Mpy_32_16_lc3plus(b10, -12540), Mpy_32_16_lc3plus(b13, -30274)); /* -Sπ/8 -S3π/8 */
264 0 : a11 = b11;
265 0 : move32();
266 0 : a12 = b12;
267 0 : move32();
268 0 : a13 = L_add(Mpy_32_16_lc3plus(b13, 12540), Mpy_32_16_lc3plus(b10, -30274)); /* C3π/8 -Cπ/8 */
269 0 : a14 = L_add(Mpy_32_16_lc3plus(b14, 30274), Mpy_32_16_lc3plus(b9, 12540)); /* S3π/8 Sπ/8 */
270 0 : a15 = b15;
271 0 : move32();
272 :
273 0 : b0 = L_add(a3, a0);
274 0 : b1 = L_add(a2, a1);
275 0 : b2 = L_sub(a1, a2);
276 0 : b3 = L_sub(a0, a3);
277 0 : b4 = a4;
278 0 : move32();
279 0 : b5 = L_add(Mpy_32_16_lc3plus(a5, -23170), Mpy_32_16_lc3plus(a6, 23170)); /* -Cπ/4 Cπ/4 */
280 0 : b6 = L_add(Mpy_32_16_lc3plus(a6, 23170), Mpy_32_16_lc3plus(a5, 23170)); /* Cπ/4 Cπ/4 */
281 0 : b7 = a7;
282 0 : move32();
283 0 : b8 = L_add(a11, a8);
284 0 : b9 = L_add(a10, a9);
285 0 : b10 = L_sub(a9, a10);
286 0 : b11 = L_sub(a8, a11);
287 0 : b12 = L_sub(a15, a12);
288 0 : b13 = L_sub(a14, a13);
289 0 : b14 = L_add(a13, a14);
290 0 : b15 = L_add(a12, a15);
291 :
292 0 : a0 = L_add(b7, b0);
293 0 : a1 = L_add(b6, b1);
294 0 : a2 = L_add(b5, b2);
295 0 : a3 = L_add(b4, b3);
296 0 : a4 = L_sub(b3, b4);
297 0 : a5 = L_sub(b2, b5);
298 0 : a6 = L_sub(b1, b6);
299 0 : a7 = L_sub(b0, b7);
300 0 : a10 = L_add(Mpy_32_16_lc3plus(b10, -23170), Mpy_32_16_lc3plus(b13, 23170)); /* -Cπ/4 Cπ/4 */
301 0 : a11 = L_add(Mpy_32_16_lc3plus(b11, -23170), Mpy_32_16_lc3plus(b12, 23170)); /* -Cπ/4 Cπ/4 */
302 0 : a12 = L_add(Mpy_32_16_lc3plus(b12, 23170), Mpy_32_16_lc3plus(b11, 23170)); /* Cπ/4 Cπ/4 */
303 0 : a13 = L_add(Mpy_32_16_lc3plus(b13, 23170), Mpy_32_16_lc3plus(b10, 23170)); /* Cπ/4 Cπ/4 */
304 :
305 0 : out[0] = L_add(b15, a0);
306 0 : move32();
307 0 : out[1] = L_add(b14, a1);
308 0 : move32();
309 0 : out[2] = L_add(a13, a2);
310 0 : move32();
311 0 : out[3] = L_add(a12, a3);
312 0 : move32();
313 0 : out[4] = L_add(a11, a4);
314 0 : move32();
315 0 : out[5] = L_add(a10, a5);
316 0 : move32();
317 0 : out[6] = L_add(b9, a6);
318 0 : move32();
319 0 : out[7] = L_add(b8, a7);
320 0 : move32();
321 0 : out[8] = L_sub(a7, b8);
322 0 : move32();
323 0 : out[9] = L_sub(a6, b9);
324 0 : move32();
325 0 : out[10] = L_sub(a5, a10);
326 0 : move32();
327 0 : out[11] = L_sub(a4, a11);
328 0 : move32();
329 0 : out[12] = L_sub(a3, a12);
330 0 : move32();
331 0 : out[13] = L_sub(a2, a13);
332 0 : move32();
333 0 : out[14] = L_sub(a1, b14);
334 0 : move32();
335 0 : out[15] = L_sub(a0, b15);
336 0 : move32();
337 :
338 : Dyn_Mem_Deluxe_Out();
339 0 : }
340 :
341 0 : void idct32_32_fx(const Word32 *in, Word32 *out)
342 : {
343 : Dyn_Mem_Deluxe_In(Word32 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15;
344 : Word32 b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15;);
345 :
346 : /*round(sin(pi*(1:16)/32)/sqrt(8)*2^31) =
347 : 74419526 148122351 220398677 290552444 357908031 421816769 481663180 536870912
348 : 586908283 631293407 669598830 701455651 726557070 744661347 755594128 759250125
349 : */
350 : /*ound(cos(pi*(1:16)/32)/sqrt(8)*2^31) =
351 : 755594128 744661347 726557070 701455651 669598830 631293407 586908283 536870912
352 : 481663180 421816769 357908031 290552444 220398677 148122351 74419526 0
353 : */
354 :
355 0 : a8 = L_add(Mpy_32_32_lc3plus(in[1], 74419526), Mpy_32_32_lc3plus(in[15], -755594128)); /* Sπ/32/√8 -S15π/32/√8 */
356 0 : a9 = L_add(Mpy_32_32_lc3plus(in[9], 586908283), Mpy_32_32_lc3plus(in[7], -481663180)); /* S9π/32/√8 -S7π/32/√8 */
357 0 : a10 = L_add(Mpy_32_32_lc3plus(in[5], 357908031), Mpy_32_32_lc3plus(in[11], -669598830)); /* S5π/32/√8 -S11π/32/√8 */
358 0 : a11 = L_add(Mpy_32_32_lc3plus(in[13], 726557070), Mpy_32_32_lc3plus(in[3], -220398677)); /* S13π/32/√8 -S3π/32/√8 */
359 :
360 0 : a12 = L_add(Mpy_32_32_lc3plus(in[3], 726557070), Mpy_32_32_lc3plus(in[13], 220398677)); /* C3π/32/√8 C13π/32/√8 */
361 0 : a13 = L_add(Mpy_32_32_lc3plus(in[11], 357908031), Mpy_32_32_lc3plus(in[5], 669598830)); /* C11π/32/√8 C5π/32/√8 */
362 0 : a14 = L_add(Mpy_32_32_lc3plus(in[7], 586908283), Mpy_32_32_lc3plus(in[9], 481663180)); /* C7π/32/√8 C9π/32/√8 */
363 0 : a15 = L_add(Mpy_32_32_lc3plus(in[15], 74419526), Mpy_32_32_lc3plus(in[1], 755594128)); /* C15π/32/√8 Cπ/32/√8 */
364 :
365 0 : b4 = L_add(Mpy_32_32_lc3plus(in[2], 148122351), Mpy_32_32_lc3plus(in[14], -744661347)); /* Sπ/16/√8 -S7π/16/√8 */
366 0 : b5 = L_add(Mpy_32_32_lc3plus(in[10], 631293407), Mpy_32_32_lc3plus(in[6], -421816769)); /* S5π/16/√8 -S3π/16/√8 */
367 0 : b6 = L_add(Mpy_32_32_lc3plus(in[6], 631293407), Mpy_32_32_lc3plus(in[10], 421816769)); /* C3π/16/√8 C5π/16/√8 */
368 0 : b7 = L_add(Mpy_32_32_lc3plus(in[14], 148122351), Mpy_32_32_lc3plus(in[2], 744661347)); /* C7π/16/√8 Cπ/16/√8 */
369 0 : b8 = L_add(a9, a8);
370 0 : b9 = L_sub(a8, a9);
371 0 : b10 = L_sub(a11, a10);
372 0 : b11 = L_add(a10, a11);
373 0 : b12 = L_add(a13, a12);
374 0 : b13 = L_sub(a12, a13);
375 0 : b14 = L_sub(a15, a14);
376 0 : b15 = L_add(a14, a15);
377 :
378 0 : a0 = L_add(Mpy_32_32_lc3plus(in[0], 536870912), Mpy_32_32_lc3plus(in[8], 536870912)); /* Cπ/4/√8 Cπ/4/√8 */
379 0 : a1 = L_add(Mpy_32_32_lc3plus(in[8], -536870912), Mpy_32_32_lc3plus(in[0], 536870912)); /* -Cπ/4/√8 Cπ/4/√8 */
380 0 : a2 = L_add(Mpy_32_32_lc3plus(in[4], 290552444), Mpy_32_32_lc3plus(in[12], -701455651)); /* Sπ/8/√8 -S3π/8/√8 */
381 0 : a3 = L_add(Mpy_32_32_lc3plus(in[12], 290552444), Mpy_32_32_lc3plus(in[4], 701455651)); /* C3π/8/√8 Cπ/8/√8 */
382 0 : a4 = L_add(b5, b4);
383 0 : a5 = L_sub(b4, b5);
384 0 : a6 = L_sub(b7, b6);
385 0 : a7 = L_add(b6, b7);
386 0 : a8 = b8;
387 0 : move32();
388 0 : a9 = L_add(Mpy_32_32_lc3plus(b9, -1984016189), Mpy_32_32_lc3plus(b14, 821806413)); /* -Cπ/8 C3π/8 */
389 0 : a10 = L_add(Mpy_32_32_lc3plus(b10, -821806413), Mpy_32_32_lc3plus(b13, -1984016189)); /* -Sπ/8 -S3π/8 */
390 0 : a11 = b11;
391 0 : move32();
392 0 : a12 = b12;
393 0 : move32();
394 0 : a13 = L_add(Mpy_32_32_lc3plus(b13, 821806413), Mpy_32_32_lc3plus(b10, -1984016189)); /* C3π/8 -Cπ/8 */
395 0 : a14 = L_add(Mpy_32_32_lc3plus(b14, 1984016189), Mpy_32_32_lc3plus(b9, 821806413)); /* S3π/8 Sπ/8 */
396 0 : a15 = b15;
397 0 : move32();
398 :
399 0 : b0 = L_add(a3, a0);
400 0 : b1 = L_add(a2, a1);
401 0 : b2 = L_sub(a1, a2);
402 0 : b3 = L_sub(a0, a3);
403 0 : b4 = a4;
404 0 : move32();
405 0 : b5 = L_add(Mpy_32_32_lc3plus(a5, -1518500250), Mpy_32_32_lc3plus(a6, 1518500250)); /* -Cπ/4 Cπ/4 */
406 0 : b6 = L_add(Mpy_32_32_lc3plus(a6, 1518500250), Mpy_32_32_lc3plus(a5, 1518500250)); /* Cπ/4 Cπ/4 */
407 0 : b7 = a7;
408 0 : move32();
409 0 : b8 = L_add(a11, a8);
410 0 : b9 = L_add(a10, a9);
411 0 : b10 = L_sub(a9, a10);
412 0 : b11 = L_sub(a8, a11);
413 0 : b12 = L_sub(a15, a12);
414 0 : b13 = L_sub(a14, a13);
415 0 : b14 = L_add(a13, a14);
416 0 : b15 = L_add(a12, a15);
417 :
418 0 : a0 = L_add(b7, b0);
419 0 : a1 = L_add(b6, b1);
420 0 : a2 = L_add(b5, b2);
421 0 : a3 = L_add(b4, b3);
422 0 : a4 = L_sub(b3, b4);
423 0 : a5 = L_sub(b2, b5);
424 0 : a6 = L_sub(b1, b6);
425 0 : a7 = L_sub(b0, b7);
426 0 : a10 = L_add(Mpy_32_32_lc3plus(b10, -1518500250), Mpy_32_32_lc3plus(b13, 1518500250)); /* -Cπ/4 Cπ/4 */
427 0 : a11 = L_add(Mpy_32_32_lc3plus(b11, -1518500250), Mpy_32_32_lc3plus(b12, 1518500250)); /* -Cπ/4 Cπ/4 */
428 0 : a12 = L_add(Mpy_32_32_lc3plus(b12, 1518500250), Mpy_32_32_lc3plus(b11, 1518500250)); /* Cπ/4 Cπ/4 */
429 0 : a13 = L_add(Mpy_32_32_lc3plus(b13, 1518500250), Mpy_32_32_lc3plus(b10, 1518500250)); /* Cπ/4 Cπ/4 */
430 :
431 0 : out[0] = L_add(b15, a0);
432 0 : move32();
433 0 : out[1] = L_add(b14, a1);
434 0 : move32();
435 0 : out[2] = L_add(a13, a2);
436 0 : move32();
437 0 : out[3] = L_add(a12, a3);
438 0 : move32();
439 0 : out[4] = L_add(a11, a4);
440 0 : move32();
441 0 : out[5] = L_add(a10, a5);
442 0 : move32();
443 0 : out[6] = L_add(b9, a6);
444 0 : move32();
445 0 : out[7] = L_add(b8, a7);
446 0 : move32();
447 0 : out[8] = L_sub(a7, b8);
448 0 : move32();
449 0 : out[9] = L_sub(a6, b9);
450 0 : move32();
451 0 : out[10] = L_sub(a5, a10);
452 0 : move32();
453 0 : out[11] = L_sub(a4, a11);
454 0 : move32();
455 0 : out[12] = L_sub(a3, a12);
456 0 : move32();
457 0 : out[13] = L_sub(a2, a13);
458 0 : move32();
459 0 : out[14] = L_sub(a1, b14);
460 0 : move32();
461 0 : out[15] = L_sub(a0, b15);
462 0 : move32();
463 :
464 : Dyn_Mem_Deluxe_Out();
465 0 : }
|