1 module mp3decoder;
2 
3 import core.stdc..string;
4 import core.stdc.stdlib;
5 import std.stdio;
6 
7 version (MINIMP3_FLOAT_OUTPUT) {
8 	alias mp3d_sample_t = float;
9 }
10 else {
11 	alias mp3d_sample_t = short;
12 }
13 
14 version (HAVE_SIMD) {
15 	import core.simd;
16 
17 	alias f4 = __m128;
18 }
19 
20 enum MP3D_SEEK_TO_BYTE = 0;
21 enum MP3D_SEEK_TO_SAMPLE = 1;
22 enum MP3D_SEEK_TO_SAMPLE_INDEXED = 2;
23 enum MINIMP3_MAX_SAMPLES_PER_FRAME = 1152 * 2;
24 
25 enum BITS_DEQUANTIZER_OUT = -1;
26 enum MAX_SCF = (255 + BITS_DEQUANTIZER_OUT * 4 - 210);
27 enum MAX_SCFI = ((MAX_SCF + 3) & ~3);
28 
29 enum MAX_FREE_FORMAT_FRAME_SIZE = 2304; // more than ISO spec's* /
30 enum MAX_FRAME_SYNC_MATCHES = 10;
31 
32 enum MAX_L3_FRAME_PAYLOAD_BYTES = MAX_FREE_FORMAT_FRAME_SIZE; // MUST be >= 320000/8/32000*1152 = 1440* /
33 
34 enum MAX_BITRESERVOIR_BYTES = 511;
35 enum SHORT_BLOCK_TYPE = 2;
36 enum STOP_BLOCK_TYPE = 3;
37 enum MODE_MONO = 3;
38 enum MODE_JOINT_STEREO = 1;
39 enum HDR_SIZE = 4;
40 
41 struct mp3dec_map_info_t {
42 	const(ubyte)[] buffer;
43 }
44 
45 struct mp3dec_ex_t {
46 	mp3dec_t mp3d;
47 	mp3dec_map_info_t file;
48 	int seek_method;
49 	version (MINIMP3_NO_STDIO) {
50 	}
51 	else {
52 		int is_file;
53 	}
54 }
55 
56 struct mp3dec_frame_info_t {
57 	int frame_bytes, channels, hz, layer, bitrate_kbps;
58 }
59 
60 struct mp3dec_t {
61 	float[9 * 32][2] mdct_overlap;
62 	float[15 * 2 * 32] qmf_state;
63 	int reserv, free_format_bytes;
64 	ubyte[4] header;
65 	ubyte[511] reserv_buf;
66 }
67 
68 struct bs_t {
69 	const(ubyte)* buf;
70 	int pos;
71 	int limit;
72 }
73 
74 struct L12_scale_info {
75 	float[3 * 64] scf;
76 	ubyte total_bands;
77 	ubyte stereo_bands;
78 	ubyte[64] bitalloc;
79 	ubyte[64] scfcod;
80 }
81 
82 struct L12_subband_alloc_t {
83 	ubyte tab_offset;
84 	ubyte code_tab_width;
85 	ubyte band_count;
86 }
87 
88 struct L3_gr_info_t {
89 	const(ubyte)* sfbtab;
90 	ushort part_23_length, big_values, scalefac_compress;
91 	ubyte global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;
92 	ubyte[3] table_select;
93 	ubyte[3] region_count;
94 	ubyte[3] subblock_gain;
95 	ubyte preflag, scalefac_scale, count1_table, scfsi;
96 }
97 
98 struct mp3dec_scratch_t {
99 	bs_t bs;
100 	ubyte[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES] maindata;
101 	L3_gr_info_t[4] gr_info;
102 	float[576][2] grbuf;
103 	float[40] scf;
104 	float[2 * 32][18 + 15] syn;
105 	ubyte[39][2] ist_pos;
106 }
107 
108 auto MINIMP3_MIN(A, B)(A a, B b) {
109 	return ((a) > (b) ? (b) : (a));
110 }
111 
112 auto MINIMP3_MAX(A, B)(A a, B b) {
113 	return ((a) < (b) ? (b) : (a));
114 }
115 
116 auto HDR_IS_MONO(H)(H h) {
117 	return (((h[3]) & 0xC0) == 0xC0);
118 }
119 
120 auto HDR_IS_MS_STEREO(H)(H h) {
121 	return (((h[3]) & 0xE0) == 0x60);
122 }
123 
124 auto HDR_IS_FREE_FORMAT(H)(H h) {
125 	return (((h[2]) & 0xF0) == 0);
126 }
127 
128 auto HDR_IS_CRC(H)(H h) {
129 	return (!((h[1]) & 1));
130 }
131 
132 auto HDR_TEST_PADDING(H)(H h) {
133 	return ((h[2]) & 0x2);
134 }
135 
136 auto HDR_TEST_MPEG1(H)(H h) {
137 	return ((h[1]) & 0x8);
138 }
139 
140 auto HDR_TEST_NOT_MPEG25(H)(H h) {
141 	return ((h[1]) & 0x10);
142 }
143 
144 auto HDR_TEST_I_STEREO(H)(H h) {
145 	return ((h[3]) & 0x10);
146 }
147 
148 auto HDR_TEST_MS_STEREO(H)(H h) {
149 	return ((h[3]) & 0x20);
150 }
151 
152 auto HDR_GET_STEREO_MODE(H)(H h) {
153 	return (((h[3]) >> 6) & 3);
154 }
155 
156 auto HDR_GET_STEREO_MODE_EXT(H)(H h) {
157 	return (((h[3]) >> 4) & 3);
158 }
159 
160 auto HDR_GET_LAYER(H)(H h) {
161 	return (((h[1]) >> 1) & 3);
162 }
163 
164 auto HDR_GET_BITRATE(H)(H h) {
165 	return ((h[2]) >> 4);
166 }
167 
168 auto HDR_GET_SAMPLE_RATE(H)(H h) {
169 	return (((h[2]) >> 2) & 3);
170 }
171 
172 auto HDR_GET_MY_SAMPLE_RATE(H)(H h) {
173 	return (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1)) * 3);
174 }
175 
176 auto HDR_IS_FRAME_576(H)(H h) {
177 	return ((h[1] & 14) == 2);
178 }
179 
180 auto HDR_IS_LAYER_1(H)(H h) {
181 	return ((h[1] & 6) == 6);
182 }
183 
184 void bs_init(bs_t* bs, const(ubyte)* data, int bytes) @safe {
185 	bs.buf = data;
186 	bs.pos = 0;
187 	bs.limit = bytes * 8;
188 }
189 
190 uint get_bits(bs_t* bs, int n) {
191 	uint next, cache = 0, s = bs.pos & 7;
192 	int shl = n + s;
193 	const(ubyte)* p = bs.buf + (bs.pos >> 3);
194 	if ((bs.pos += n) > bs.limit)
195 		return 0;
196 	next = *p++ & (255 >> s);
197 	while ((shl -= 8) > 0) {
198 		cache |= next << shl;
199 		next = *p++;
200 	}
201 	return cache | (next >> -shl);
202 }
203 
204 int hdr_valid(const(ubyte)* h) {
205 	return h[0] == 0xff && ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) && (HDR_GET_LAYER(h) != 0) && (HDR_GET_BITRATE(h) != 15) && (HDR_GET_SAMPLE_RATE(h) != 3);
206 }
207 
208 int hdr_compare(const(ubyte)* h1, const(ubyte)* h2) {
209 	return hdr_valid(h2) && ((h1[1] ^ h2[1]) & 0xFE) == 0 && ((h1[2] ^ h2[2]) & 0x0C) == 0 && !(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2));
210 }
211 
212 uint hdr_bitrate_kbps(const(ubyte)* h) {
213 	static const ubyte[15][3][2] halfrate = [
214 		[
215 			[0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80], [0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80],
216 			[0, 16, 24, 28, 32, 40, 48, 56, 64, 72, 80, 88, 96, 112, 128]
217 		],
218 		[
219 			[0, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160], [0, 16, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192],
220 			[0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224]
221 		],
222 	];
223 	return 2 * halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)];
224 }
225 
226 uint hdr_sample_rate_hz(const(ubyte)* h) {
227 	static const uint[3] g_hz = [44100, 48000, 32000];
228 	return g_hz[HDR_GET_SAMPLE_RATE(h)] >> cast(int) !HDR_TEST_MPEG1(h) >> cast(int) !HDR_TEST_NOT_MPEG25(h);
229 }
230 
231 uint hdr_frame_samples(const(ubyte)* h) {
232 	return HDR_IS_LAYER_1(h) ? 384 : (1152 >> cast(int) HDR_IS_FRAME_576(h));
233 }
234 
235 int hdr_frame_bytes(const(ubyte)* h, int free_format_size) {
236 	int frame_bytes = hdr_frame_samples(h) * hdr_bitrate_kbps(h) * 125 / hdr_sample_rate_hz(h);
237 	if (HDR_IS_LAYER_1(h)) {
238 		frame_bytes &= ~3; // slot align
239 	}
240 	return frame_bytes ? frame_bytes : free_format_size;
241 }
242 
243 int hdr_padding(const(ubyte)* h) {
244 	return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0;
245 }
246 
247 version (MINIMP3_ONLY_MP3) {
248 }
249 else {
250 	const(L12_subband_alloc_t)* L12_subband_alloc_table(const(ubyte)* hdr, L12_scale_info* sci) {
251 		const(L12_subband_alloc_t)* alloc;
252 		int mode = HDR_GET_STEREO_MODE(hdr);
253 		int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;
254 
255 		if (HDR_IS_LAYER_1(hdr)) {
256 			static const L12_subband_alloc_t[] g_alloc_L1 = [L12_subband_alloc_t(76, 4, 32)];
257 			alloc = g_alloc_L1.ptr;
258 			nbands = 32;
259 		}
260 		else if (!HDR_TEST_MPEG1(hdr)) {
261 			static const L12_subband_alloc_t[] g_alloc_L2M2 = [L12_subband_alloc_t(60, 4, 4), L12_subband_alloc_t(44, 3, 7), L12_subband_alloc_t(44, 2, 19)];
262 			alloc = g_alloc_L2M2.ptr;
263 			nbands = 30;
264 		}
265 		else {
266 			static const L12_subband_alloc_t[] g_alloc_L2M1 = [
267 				L12_subband_alloc_t(0, 4, 3), L12_subband_alloc_t(16, 4, 8), L12_subband_alloc_t(32, 3, 12), L12_subband_alloc_t(40, 2, 7)
268 			];
269 			int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr);
270 			uint kbps = hdr_bitrate_kbps(hdr) >> cast(int)(mode != MODE_MONO);
271 			if (!kbps) // free-format
272 			{
273 				kbps = 192;
274 			}
275 
276 			alloc = g_alloc_L2M1.ptr;
277 			nbands = 27;
278 			if (kbps < 56) {
279 				static const L12_subband_alloc_t[] g_alloc_L2M1_lowrate = [L12_subband_alloc_t(44, 4, 2), L12_subband_alloc_t(44, 3, 10)];
280 				alloc = g_alloc_L2M1_lowrate.ptr;
281 				nbands = sample_rate_idx == 2 ? 12 : 8;
282 			}
283 			else if (kbps >= 96 && sample_rate_idx != 1) {
284 				nbands = 30;
285 			}
286 		}
287 
288 		sci.total_bands = cast(ubyte) nbands;
289 		sci.stereo_bands = cast(ubyte) MINIMP3_MIN(stereo_bands, nbands);
290 
291 		return alloc;
292 	}
293 
294 	void L12_read_scalefactors(bs_t* bs, ubyte* pba, ubyte* scfcod, int bands, float* scf) {
295 
296 		enum DQ(int x) = [9.53674316e-07f / x, 7.56931807e-07f / x, 6.00777173e-07f / x];
297 
298 		static const float[18 * 3] g_deq_L12 = DQ!3 ~ DQ!7 ~ DQ!15 ~ DQ!31 ~ DQ!63 ~ DQ!127 ~ DQ!255 ~ DQ!511 ~ DQ!1023 ~ DQ!2047 ~ DQ!4095 ~ DQ!8191 ~ DQ!16383
299 			~ DQ!32767 ~ DQ!65535 ~ DQ!3 ~ DQ!5 ~ DQ!9;
300 
301 		int i, m;
302 		for (i = 0; i < bands; i++) {
303 			float s = 0;
304 			int ba = *pba++;
305 			int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;
306 			for (m = 4; m; m >>= 1) {
307 				if (mask & m) {
308 					int b = get_bits(bs, 6);
309 					s = g_deq_L12[ba * 3 - 6 + b % 3] * (1 << 21 >> b / 3);
310 				}
311 				*scf++ = s;
312 			}
313 		}
314 	}
315 
316 	void L12_read_scale_info(const(ubyte)* hdr, bs_t* bs, L12_scale_info* sci) {
317 		static const ubyte[] g_bitalloc_code_tab = [
318 			0, 17, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 17, 18, 3, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 0, 17, 18, 3, 19, 4, 5, 16, 0, 17, 18, 16,
319 			0, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 17, 18, 3, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
320 			15, 16
321 		];
322 		const(L12_subband_alloc_t)* subband_alloc = L12_subband_alloc_table(hdr, sci);
323 
324 		int i, k = 0, ba_bits = 0;
325 		const(ubyte)* ba_code_tab = g_bitalloc_code_tab.ptr;
326 
327 		for (i = 0; i < sci.total_bands; i++) {
328 			ubyte ba;
329 			if (i == k) {
330 				k += subband_alloc.band_count;
331 				ba_bits = subband_alloc.code_tab_width;
332 				ba_code_tab = g_bitalloc_code_tab.ptr + subband_alloc.tab_offset;
333 				subband_alloc++;
334 			}
335 			ba = ba_code_tab[get_bits(bs, ba_bits)];
336 			sci.bitalloc[2 * i] = ba;
337 			if (i < sci.stereo_bands) {
338 				ba = ba_code_tab[get_bits(bs, ba_bits)];
339 			}
340 			sci.bitalloc[2 * i + 1] = sci.stereo_bands ? ba : 0;
341 		}
342 
343 		for (i = 0; i < 2 * sci.total_bands; i++) {
344 			sci.scfcod[i] = cast(ubyte)(sci.bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6);
345 		}
346 
347 		L12_read_scalefactors(bs, sci.bitalloc.ptr, sci.scfcod.ptr, sci.total_bands * 2, sci.scf.ptr);
348 
349 		for (i = sci.stereo_bands; i < sci.total_bands; i++) {
350 			sci.bitalloc[2 * i + 1] = 0;
351 		}
352 	}
353 
354 	int L12_dequantize_granule(float* grbuf, bs_t* bs, L12_scale_info* sci, int group_size) {
355 		int i, j, k, choff = 576;
356 		for (j = 0; j < 4; j++) {
357 			float* dst = grbuf + group_size * j;
358 			for (i = 0; i < 2 * sci.total_bands; i++) {
359 				int ba = sci.bitalloc[i];
360 				if (ba != 0) {
361 					if (ba < 17) {
362 						int half = (1 << (ba - 1)) - 1;
363 						for (k = 0; k < group_size; k++) {
364 							dst[k] = cast(float)(cast(int) get_bits(bs, ba) - half);
365 						}
366 					}
367 					else {
368 						uint mod = (2 << (ba - 17)) + 1; // 3, 5, 9* /
369 						uint code = get_bits(bs, mod + 2 - (mod >> 3)); // 5, 7, 10* /
370 						for (k = 0; k < group_size; k++, code /= mod) {
371 							dst[k] = cast(float)(cast(int)(code % mod - mod / 2));
372 						}
373 					}
374 				}
375 				dst += choff;
376 				choff = 18 - choff;
377 			}
378 		}
379 		return group_size * 4;
380 	}
381 
382 	void L12_apply_scf_384(L12_scale_info* sci, const(float)* scf, float* dst) {
383 		int i, k;
384 		memcpy(dst + 576 + sci.stereo_bands * 18, dst + sci.stereo_bands * 18, (sci.total_bands - sci.stereo_bands) * 18 * float.sizeof);
385 		for (i = 0; i < sci.total_bands; i++, dst += 18, scf += 6) {
386 			for (k = 0; k < 12; k++) {
387 				dst[k + 0] *= scf[0];
388 				dst[k + 576] *= scf[3];
389 			}
390 		}
391 	}
392 }
393 
394 int L3_read_side_info(bs_t* bs, L3_gr_info_t* gr, const(ubyte)* hdr) {
395 	static const ubyte[23][8] g_scf_long = [
396 		[6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, 0],
397 		[12, 12, 12, 12, 12, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 76, 90, 2, 2, 2, 2, 2, 0],
398 		[6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, 0],
399 		[6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16, 18, 22, 26, 32, 38, 46, 54, 62, 70, 76, 36, 0],
400 		[6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, 0],
401 		[4, 4, 4, 4, 4, 4, 6, 6, 8, 8, 10, 12, 16, 20, 24, 28, 34, 42, 50, 54, 76, 158, 0],
402 		[4, 4, 4, 4, 4, 4, 6, 6, 6, 8, 10, 12, 16, 18, 22, 28, 34, 40, 46, 54, 54, 192, 0],
403 		[4, 4, 4, 4, 4, 4, 6, 6, 8, 10, 12, 16, 20, 24, 30, 38, 46, 56, 68, 84, 102, 26, 0]
404 	];
405 	static const ubyte[40][8] g_scf_short = [
406 		[4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14, 14, 18, 18, 18, 24, 24, 24, 30, 30, 30, 40, 40, 40, 18, 18, 18, 0],
407 		[8, 8, 8, 8, 8, 8, 8, 8, 8, 12, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 24, 28, 28, 28, 36, 36, 36, 2, 2, 2, 2, 2, 2, 2, 2, 2, 26, 26, 26, 0],
408 		[4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 8, 8, 8, 10, 10, 10, 14, 14, 14, 18, 18, 18, 26, 26, 26, 32, 32, 32, 42, 42, 42, 18, 18, 18, 0],
409 		[4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14, 14, 18, 18, 18, 24, 24, 24, 32, 32, 32, 44, 44, 44, 12, 12, 12, 0],
410 		[4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14, 14, 18, 18, 18, 24, 24, 24, 30, 30, 30, 40, 40, 40, 18, 18, 18, 0],
411 		[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14, 14, 18, 18, 18, 22, 22, 22, 30, 30, 30, 56, 56, 56, 0],
412 		[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 10, 10, 10, 12, 12, 12, 14, 14, 14, 16, 16, 16, 20, 20, 20, 26, 26, 26, 66, 66, 66, 0],
413 		[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 8, 8, 8, 12, 12, 12, 16, 16, 16, 20, 20, 20, 26, 26, 26, 34, 34, 34, 42, 42, 42, 12, 12, 12, 0]
414 	];
415 	static const ubyte[40][8] g_scf_mixed = [
416 		[6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14, 14, 18, 18, 18, 24, 24, 24, 30, 30, 30, 40, 40, 40, 18, 18, 18, 0],
417 		[12, 12, 12, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 24, 28, 28, 28, 36, 36, 36, 2, 2, 2, 2, 2, 2, 2, 2, 2, 26, 26, 26, 0],
418 		[6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 10, 10, 10, 14, 14, 14, 18, 18, 18, 26, 26, 26, 32, 32, 32, 42, 42, 42, 18, 18, 18, 0],
419 		[6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14, 14, 18, 18, 18, 24, 24, 24, 32, 32, 32, 44, 44, 44, 12, 12, 12, 0],
420 		[6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14, 14, 18, 18, 18, 24, 24, 24, 30, 30, 30, 40, 40, 40, 18, 18, 18, 0],
421 		[4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 4, 6, 6, 6, 8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14, 14, 18, 18, 18, 22, 22, 22, 30, 30, 30, 56, 56, 56, 0],
422 		[4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 4, 6, 6, 6, 6, 6, 6, 10, 10, 10, 12, 12, 12, 14, 14, 14, 16, 16, 16, 20, 20, 20, 26, 26, 26, 66, 66, 66, 0],
423 		[4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 4, 6, 6, 6, 8, 8, 8, 12, 12, 12, 16, 16, 16, 20, 20, 20, 26, 26, 26, 34, 34, 34, 42, 42, 42, 12, 12, 12, 0]
424 	];
425 
426 	uint tables, scfsi = 0;
427 	int main_data_begin, part_23_sum = 0;
428 	int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr);
429 	sr_idx -= (sr_idx != 0);
430 	int gr_count = HDR_IS_MONO(hdr) ? 1 : 2;
431 
432 	if (HDR_TEST_MPEG1(hdr)) {
433 		gr_count *= 2;
434 		main_data_begin = get_bits(bs, 9);
435 		scfsi = get_bits(bs, 7 + gr_count);
436 	}
437 	else {
438 		main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count;
439 	}
440 
441 	do {
442 		if (HDR_IS_MONO(hdr)) {
443 			scfsi <<= 4;
444 		}
445 		gr.part_23_length = cast(ushort) get_bits(bs, 12);
446 		part_23_sum += gr.part_23_length;
447 		gr.big_values = cast(ushort) get_bits(bs, 9);
448 		if (gr.big_values > 288) {
449 			return -1;
450 		}
451 		gr.global_gain = cast(ubyte) get_bits(bs, 8);
452 		gr.scalefac_compress = cast(ushort) get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9);
453 		gr.sfbtab = g_scf_long[sr_idx].ptr;
454 		gr.n_long_sfb = 22;
455 		gr.n_short_sfb = 0;
456 		if (get_bits(bs, 1)) {
457 			gr.block_type = cast(ubyte) get_bits(bs, 2);
458 			if (!gr.block_type) {
459 				return -1;
460 			}
461 			gr.mixed_block_flag = cast(ubyte) get_bits(bs, 1);
462 			gr.region_count[0] = 7;
463 			gr.region_count[1] = 255;
464 			if (gr.block_type == SHORT_BLOCK_TYPE) {
465 				scfsi &= 0x0F0F;
466 				if (!gr.mixed_block_flag) {
467 					gr.region_count[0] = 8;
468 					gr.sfbtab = g_scf_short[sr_idx].ptr;
469 					gr.n_long_sfb = 0;
470 					gr.n_short_sfb = 39;
471 				}
472 				else {
473 					gr.sfbtab = g_scf_mixed[sr_idx].ptr;
474 					gr.n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6;
475 					gr.n_short_sfb = 30;
476 				}
477 			}
478 			tables = get_bits(bs, 10);
479 			tables <<= 5;
480 			gr.subblock_gain[0] = cast(ubyte) get_bits(bs, 3);
481 			gr.subblock_gain[1] = cast(ubyte) get_bits(bs, 3);
482 			gr.subblock_gain[2] = cast(ubyte) get_bits(bs, 3);
483 		}
484 		else {
485 			gr.block_type = 0;
486 			gr.mixed_block_flag = 0;
487 			tables = get_bits(bs, 15);
488 			gr.region_count[0] = cast(ubyte) get_bits(bs, 4);
489 			gr.region_count[1] = cast(ubyte) get_bits(bs, 3);
490 			gr.region_count[2] = 255;
491 		}
492 		gr.table_select[0] = cast(ubyte)(tables >> 10);
493 		gr.table_select[1] = cast(ubyte)((tables >> 5) & 31);
494 		gr.table_select[2] = cast(ubyte)((tables) & 31);
495 		gr.preflag = cast(ubyte)(HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr.scalefac_compress >= 500));
496 		gr.scalefac_scale = cast(ubyte) get_bits(bs, 1);
497 		gr.count1_table = cast(ubyte) get_bits(bs, 1);
498 		gr.scfsi = cast(ubyte)((scfsi >> 12) & 15);
499 		scfsi <<= 4;
500 		gr++;
501 	}
502 	while (--gr_count);
503 
504 	if (part_23_sum + bs.pos > bs.limit + main_data_begin * 8) {
505 		return -1;
506 	}
507 
508 	return main_data_begin;
509 }
510 
511 void L3_read_scalefactors(ubyte* scf, ubyte* ist_pos, const(ubyte)* scf_size, const(ubyte)* scf_count, bs_t* bitbuf, int scfsi) {
512 	int i, k;
513 	for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2) {
514 		int cnt = scf_count[i];
515 		if (scfsi & 8) {
516 			memcpy(scf, ist_pos, cnt);
517 		}
518 		else {
519 			int bits = scf_size[i];
520 			if (!bits) {
521 				memset(scf, 0, cnt);
522 				memset(ist_pos, 0, cnt);
523 			}
524 			else {
525 				int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
526 				for (k = 0; k < cnt; k++) {
527 					int s = get_bits(bitbuf, bits);
528 					ist_pos[k] = cast(ubyte)(s == max_scf ? -1 : s);
529 					scf[k] = cast(ubyte) s;
530 				}
531 			}
532 		}
533 		ist_pos += cnt;
534 		scf += cnt;
535 	}
536 	scf[0] = scf[1] = scf[2] = 0;
537 }
538 
539 float L3_ldexp_q2(float y, int exp_q2) {
540 	static const float[4] g_expfrac = [9.31322575e-10f, 7.83145814e-10f, 6.58544508e-10f, 5.53767716e-10f];
541 	int e;
542 	do {
543 		e = MINIMP3_MIN(30 * 4, exp_q2);
544 		y *= g_expfrac[e & 3] * (1 << 30 >> (e >> 2));
545 	}
546 	while ((exp_q2 -= e) > 0);
547 	return y;
548 }
549 
550 void L3_decode_scalefactors(const(ubyte)* hdr, ubyte* ist_pos, bs_t* bs, const(L3_gr_info_t)* gr, float* scf, int ch) {
551 	static const ubyte[28][3] g_scf_partitions = [
552 		[6, 5, 5, 5, 6, 5, 5, 5, 6, 5, 7, 3, 11, 10, 0, 0, 7, 7, 7, 0, 6, 6, 6, 3, 8, 8, 5, 0],
553 		[8, 9, 6, 12, 6, 9, 9, 9, 6, 9, 12, 6, 15, 18, 0, 0, 6, 15, 12, 0, 6, 12, 9, 6, 6, 18, 9, 0],
554 		[9, 9, 6, 12, 9, 9, 9, 9, 9, 9, 12, 6, 18, 18, 0, 0, 12, 12, 12, 0, 12, 9, 9, 6, 15, 12, 9, 0]
555 	];
556 	const(ubyte)* scf_partition = g_scf_partitions[!!gr.n_short_sfb + !gr.n_long_sfb].ptr;
557 	ubyte[4] scf_size;
558 	ubyte[40] iscf;
559 	int i, scf_shift = gr.scalefac_scale + 1, gain_exp, scfsi = gr.scfsi;
560 	float gain;
561 
562 	if (HDR_TEST_MPEG1(hdr)) {
563 		static const ubyte[16] g_scfc_decode = [0, 1, 2, 3, 12, 5, 6, 7, 9, 10, 11, 13, 14, 15, 18, 19];
564 		int part = g_scfc_decode[gr.scalefac_compress];
565 		scf_size[1] = scf_size[0] = cast(ubyte)(part >> 2);
566 		scf_size[3] = scf_size[2] = cast(ubyte)(part & 3);
567 	}
568 	else {
569 		static const ubyte[6 * 4] g_mod = [5, 5, 4, 4, 5, 5, 4, 1, 4, 3, 1, 1, 5, 6, 6, 1, 4, 4, 4, 1, 4, 3, 1, 1];
570 		int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch;
571 		sfc = gr.scalefac_compress >> ist;
572 		for (k = ist * 3 * 4; sfc >= 0; sfc -= modprod, k += 4) {
573 			for (modprod = 1, i = 3; i >= 0; i--) {
574 				scf_size[i] = cast(ubyte)(sfc / modprod % g_mod[k + i]);
575 				modprod *= g_mod[k + i];
576 			}
577 		}
578 		scf_partition += k;
579 		scfsi = -16;
580 	}
581 	L3_read_scalefactors(iscf.ptr, ist_pos, scf_size.ptr, scf_partition, bs, scfsi);
582 
583 	if (gr.n_short_sfb) {
584 		int sh = 3 - scf_shift;
585 		for (i = 0; i < gr.n_short_sfb; i += 3) {
586 			iscf[gr.n_long_sfb + i + 0] += gr.subblock_gain[0] << sh;
587 			iscf[gr.n_long_sfb + i + 1] += gr.subblock_gain[1] << sh;
588 			iscf[gr.n_long_sfb + i + 2] += gr.subblock_gain[2] << sh;
589 		}
590 	}
591 	else if (gr.preflag) {
592 		static const ubyte[10] g_preamp = [1, 1, 1, 1, 2, 2, 3, 3, 3, 2];
593 		for (i = 0; i < 10; i++) {
594 			iscf[11 + i] += g_preamp[i];
595 		}
596 	}
597 
598 	gain_exp = gr.global_gain + BITS_DEQUANTIZER_OUT * 4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0);
599 	gain = L3_ldexp_q2(1 << (MAX_SCFI / 4), MAX_SCFI - gain_exp);
600 	for (i = 0; i < cast(int)(gr.n_long_sfb + gr.n_short_sfb); i++) {
601 		scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift);
602 	}
603 }
604 
605 static const float[129 + 16] g_pow43 = [
606 	0, -1, -2.519842f, -4.326749f, -6.349604f, -8.549880f, -10.902724f, -13.390518f, -16.000000f, -18.720754f, -21.544347f, -24.463781f, -27.473142f,
607 	-30.567351f, -33.741992f, -36.993181f, 0, 1, 2.519842f, 4.326749f, 6.349604f, 8.549880f, 10.902724f, 13.390518f, 16.000000f, 18.720754f, 21.544347f, 24.463781f,
608 	27.473142f, 30.567351f, 33.741992f, 36.993181f, 40.317474f, 43.711787f, 47.173345f, 50.699631f, 54.288352f, 57.937408f, 61.644865f, 65.408941f, 69.227979f,
609 	73.100443f, 77.024898f, 81.000000f, 85.024491f, 89.097188f, 93.216975f, 97.382800f, 101.593667f, 105.848633f, 110.146801f, 114.487321f, 118.869381f, 123.292209f,
610 	127.755065f, 132.257246f, 136.798076f, 141.376907f, 145.993119f, 150.646117f, 155.335327f, 160.060199f, 164.820202f, 169.614826f, 174.443577f, 179.305980f,
611 	184.201575f, 189.129918f, 194.090580f, 199.083145f, 204.107210f, 209.162385f, 214.248292f, 219.364564f, 224.510845f, 229.686789f, 234.892058f, 240.126328f,
612 	245.389280f, 250.680604f, 256.000000f, 261.347174f, 266.721841f, 272.123723f, 277.552547f, 283.008049f, 288.489971f, 293.998060f, 299.532071f, 305.091761f,
613 	310.676898f, 316.287249f, 321.922592f, 327.582707f, 333.267377f, 338.976394f, 344.709550f, 350.466646f, 356.247482f, 362.051866f, 367.879608f, 373.730522f,
614 	379.604427f, 385.501143f, 391.420496f, 397.362314f, 403.326427f, 409.312672f, 415.320884f, 421.350905f, 427.402579f, 433.475750f, 439.570269f, 445.685987f,
615 	451.822757f, 457.980436f, 464.158883f, 470.357960f, 476.577530f, 482.817459f, 489.077615f, 495.357868f, 501.658090f, 507.978156f, 514.317941f, 520.677324f,
616 	527.056184f, 533.454404f, 539.871867f, 546.308458f, 552.764065f, 559.238575f, 565.731879f, 572.243870f, 578.774440f, 585.323483f, 591.890898f, 598.476581f,
617 	605.080431f, 611.702349f, 618.342238f, 625.000000f, 631.675540f, 638.368763f, 645.079578f
618 ];
619 
620 float L3_pow_43(int x) {
621 	float frac;
622 	int sign, mult = 256;
623 
624 	if (x < 129) {
625 		return g_pow43[16 + x];
626 	}
627 
628 	if (x < 1024) {
629 		mult = 16;
630 		x <<= 3;
631 	}
632 
633 	sign = 2 * x & 64;
634 	frac = cast(float)((x & 63) - sign) / ((x & ~63) + sign);
635 	return g_pow43[16 + ((x + sign) >> 6)] * (1.0f + frac * ((4.0f / 3) + frac * (2.0f / 9))) * mult;
636 }
637 
638 void L3_huffman(float* dst, bs_t* bs, const(L3_gr_info_t)* gr_info, const(float)* scf, int layer3gr_limit) {
639 	static const short[] tabs = [
640 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 785, 785, 785, 785, 784, 784, 784, 784, 513, 513, 513, 513, 513,
641 		513, 513, 513, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, -255, 1313, 1298, 1282, 785, 785, 785, 785, 784, 784, 784, 784,
642 		769, 769, 769, 769, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 290, 288, -255, 1313, 1298, 1282, 769, 769, 769, 769, 529,
643 		529, 529, 529, 529, 529, 529, 529, 528, 528, 528, 528, 528, 528, 528, 528, 512, 512, 512, 512, 512, 512, 512, 512, 290, 288, -253, -318, -351, -367, 785,
644 		785, 785, 785, 784, 784, 784, 784, 769, 769, 769, 769, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 819, 818, 547, 547, 275,
645 		275, 275, 275, 561, 560, 515, 546, 289, 274, 288, 258, -254, -287, 1329, 1299, 1314, 1312, 1057, 1057, 1042, 1042, 1026, 1026, 784, 784, 784, 784, 529, 529,
646 		529, 529, 529, 529, 529, 529, 769, 769, 769, 769, 768, 768, 768, 768, 563, 560, 306, 306, 291, 259, -252, -413, -477, -542, 1298, -575, 1041, 1041, 784, 784,
647 		784, 784, 769, 769, 769, 769, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, -383, -399, 1107, 1092, 1106, 1061, 849, 849,
648 		789, 789, 1104, 1091, 773, 773, 1076, 1075, 341, 340, 325, 309, 834, 804, 577, 577, 532, 532, 516, 516, 832, 818, 803, 816, 561, 561, 531, 531, 515, 546, 289,
649 		289, 288, 258, -252, -429, -493, -559, 1057, 1057, 1042, 1042, 529, 529, 529, 529, 529, 529, 529, 529, 784, 784, 784, 784, 769, 769, 769, 769, 512, 512, 512,
650 		512, 512, 512, 512, 512, -382, 1077, -415, 1106, 1061, 1104, 849, 849, 789, 789, 1091, 1076, 1029, 1075, 834, 834, 597, 581, 340, 340, 339, 324, 804, 833, 532,
651 		532, 832, 772, 818, 803, 817, 787, 816, 771, 290, 290, 290, 290, 288, 258, -253, -349, -414, -447, -463, 1329, 1299, -479, 1314, 1312, 1057, 1057, 1042, 1042,
652 		1026, 1026, 785, 785, 785, 785, 784, 784, 784, 784, 769, 769, 769, 769, 768, 768, 768, 768, -319, 851, 821, -335, 836, 850, 805, 849, 341, 340, 325, 336, 533,
653 		533, 579, 579, 564, 564, 773, 832, 578, 548, 563, 516, 321, 276, 306, 291, 304, 259, -251, -572, -733, -830, -863, -879, 1041, 1041, 784, 784, 784, 784,
654 		769, 769, 769, 769, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, -511, -527, -543, 1396, 1351, 1381, 1366, 1395, 1335,
655 		1380, -559, 1334, 1138, 1138, 1063, 1063, 1350, 1392, 1031, 1031, 1062, 1062, 1364, 1363, 1120, 1120, 1333, 1348, 881, 881, 881, 881, 375, 374, 359, 373, 343,
656 		358, 341, 325, 791, 791, 1123, 1122, -703, 1105, 1045, -719, 865, 865, 790, 790, 774, 774, 1104, 1029, 338, 293, 323, 308, -799, -815, 833, 788, 772, 818,
657 		803, 816, 322, 292, 307, 320, 561, 531, 515, 546, 289, 274, 288, 258, -251, -525, -605, -685, -765, -831, -846, 1298, 1057, 1057, 1312, 1282, 785, 785, 785,
658 		785, 784, 784, 784, 784, 769, 769, 769, 769, 512, 512, 512, 512, 512, 512, 512, 512, 1399, 1398, 1383, 1367, 1382, 1396, 1351, -511, 1381, 1366, 1139, 1139,
659 		1079, 1079, 1124, 1124, 1364, 1349, 1363, 1333, 882, 882, 882, 882, 807, 807, 807, 807, 1094, 1094, 1136, 1136, 373, 341, 535, 535, 881, 775, 867, 822, 774,
660 		-591, 324, 338, -671, 849, 550, 550, 866, 864, 609, 609, 293, 336, 534, 534, 789, 835, 773, -751, 834, 804, 308, 307, 833, 788, 832, 772, 562, 562, 547, 547,
661 		305, 275, 560, 515, 290, 290, -252, -397, -477, -557, -622, -653, -719, -735, -750, 1329, 1299, 1314, 1057, 1057, 1042, 1042, 1312, 1282, 1024, 1024, 785, 785,
662 		785, 785, 784, 784, 784, 784, 769, 769, 769, 769, -383, 1127, 1141, 1111, 1126, 1140, 1095, 1110, 869, 869, 883, 883, 1079, 1109, 882, 882, 375, 374, 807, 868,
663 		838, 881, 791, -463, 867, 822, 368, 263, 852, 837, 836, -543, 610, 610, 550, 550, 352, 336, 534, 534, 865, 774, 851, 821, 850, 805, 593, 533, 579, 564, 773,
664 		832, 578, 578, 548, 548, 577, 577, 307, 276, 306, 291, 516, 560, 259, 259, -250, -2107, -2507, -2764, -2909, -2974, -3007, -3023, 1041, 1041, 1040, 1040,
665 		769, 769, 769, 769, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, -767, -1052, -1213, -1277, -1358, -1405, -1469,
666 		-1535, -1550, -1582, -1614, -1647, -1662, -1694, -1726, -1759, -1774, -1807, -1822, -1854, -1886, 1565, -1919, -1935, -1951, -1967, 1731, 1730, 1580, 1717,
667 		-1983, 1729, 1564, -1999, 1548, -2015, -2031, 1715, 1595, -2047, 1714, -2063, 1610, -2079, 1609, -2095, 1323, 1323, 1457, 1457, 1307, 1307, 1712, 1547, 1641,
668 		1700, 1699, 1594, 1685, 1625, 1442, 1442, 1322, 1322, -780, -973, -910, 1279, 1278, 1277, 1262, 1276, 1261, 1275, 1215, 1260, 1229, -959, 974, 974, 989, 989,
669 		-943, 735, 478, 478, 495, 463, 506, 414, -1039, 1003, 958, 1017, 927, 942, 987, 957, 431, 476, 1272, 1167, 1228, -1183, 1256, -1199, 895, 895, 941, 941, 1242,
670 		1227, 1212, 1135, 1014, 1014, 490, 489, 503, 487, 910, 1013, 985, 925, 863, 894, 970, 955, 1012, 847, -1343, 831, 755, 755, 984, 909, 428, 366, 754, 559,
671 		-1391, 752, 486, 457, 924, 997, 698, 698, 983, 893, 740, 740, 908, 877, 739, 739, 667, 667, 953, 938, 497, 287, 271, 271, 683, 606, 590, 712, 726, 574, 302,
672 		302, 738, 736, 481, 286, 526, 725, 605, 711, 636, 724, 696, 651, 589, 681, 666, 710, 364, 467, 573, 695, 466, 466, 301, 465, 379, 379, 709, 604, 665, 679, 316,
673 		316, 634, 633, 436, 436, 464, 269, 424, 394, 452, 332, 438, 363, 347, 408, 393, 448, 331, 422, 362, 407, 392, 421, 346, 406, 391, 376, 375, 359, 1441, 1306,
674 		-2367, 1290, -2383, 1337, -2399, -2415, 1426, 1321, -2431, 1411, 1336, -2447, -2463, -2479, 1169, 1169, 1049, 1049, 1424, 1289, 1412, 1352, 1319, -2495, 1154,
675 		1154, 1064, 1064, 1153, 1153, 416, 390, 360, 404, 403, 389, 344, 374, 373, 343, 358, 372, 327, 357, 342, 311, 356, 326, 1395, 1394, 1137, 1137, 1047, 1047, 1365,
676 		1392, 1287, 1379, 1334, 1364, 1349, 1378, 1318, 1363, 792, 792, 792, 792, 1152, 1152, 1032, 1032, 1121, 1121, 1046, 1046, 1120, 1120, 1030, 1030, -2895, 1106,
677 		1061, 1104, 849, 849, 789, 789, 1091, 1076, 1029, 1090, 1060, 1075, 833, 833, 309, 324, 532, 532, 832, 772, 818, 803, 561, 561, 531, 560, 515, 546, 289, 274,
678 		288, 258, -250, -1179, -1579, -1836, -1996, -2124, -2253, -2333, -2413, -2477, -2542, -2574, -2607, -2622, -2655, 1314, 1313, 1298, 1312, 1282, 785, 785, 785,
679 		785, 1040, 1040, 1025, 1025, 768, 768, 768, 768, -766, -798, -830, -862, -895, -911, -927, -943, -959, -975, -991, -1007, -1023, -1039, -1055, -1070,
680 		1724, 1647, -1103, -1119, 1631, 1767, 1662, 1738, 1708, 1723, -1135, 1780, 1615, 1779, 1599, 1677, 1646, 1778, 1583, -1151, 1777, 1567, 1737, 1692, 1765, 1722,
681 		1707, 1630, 1751, 1661, 1764, 1614, 1736, 1676, 1763, 1750, 1645, 1598, 1721, 1691, 1762, 1706, 1582, 1761, 1566, -1167, 1749, 1629, 767, 766, 751, 765, 494,
682 		494, 735, 764, 719, 749, 734, 763, 447, 447, 748, 718, 477, 506, 431, 491, 446, 476, 461, 505, 415, 430, 475, 445, 504, 399, 460, 489, 414, 503, 383, 474, 429,
683 		459, 502, 502, 746, 752, 488, 398, 501, 473, 413, 472, 486, 271, 480, 270, -1439, -1455, 1357, -1471, -1487, -1503, 1341, 1325, -1519, 1489, 1463, 1403,
684 		1309, -1535, 1372, 1448, 1418, 1476, 1356, 1462, 1387, -1551, 1475, 1340, 1447, 1402, 1386, -1567, 1068, 1068, 1474, 1461, 455, 380, 468, 440, 395, 425, 410,
685 		454, 364, 467, 466, 464, 453, 269, 409, 448, 268, 432, 1371, 1473, 1432, 1417, 1308, 1460, 1355, 1446, 1459, 1431, 1083, 1083, 1401, 1416, 1458, 1445, 1067,
686 		1067, 1370, 1457, 1051, 1051, 1291, 1430, 1385, 1444, 1354, 1415, 1400, 1443, 1082, 1082, 1173, 1113, 1186, 1066, 1185, 1050, -1967, 1158, 1128, 1172, 1097,
687 		1171, 1081, -1983, 1157, 1112, 416, 266, 375, 400, 1170, 1142, 1127, 1065, 793, 793, 1169, 1033, 1156, 1096, 1141, 1111, 1155, 1080, 1126, 1140, 898, 898, 808,
688 		808, 897, 897, 792, 792, 1095, 1152, 1032, 1125, 1110, 1139, 1079, 1124, 882, 807, 838, 881, 853, 791, -2319, 867, 368, 263, 822, 852, 837, 866, 806, 865,
689 		-2399, 851, 352, 262, 534, 534, 821, 836, 594, 594, 549, 549, 593, 593, 533, 533, 848, 773, 579, 579, 564, 578, 548, 563, 276, 276, 577, 576, 306, 291, 516,
690 		560, 305, 305, 275, 259, -251, -892, -2058, -2620, -2828, -2957, -3023, -3039, 1041, 1041, 1040, 1040, 769, 769, 769, 769, 256, 256, 256, 256, 256, 256, 256,
691 		256, 256, 256, 256, 256, 256, 256, 256, 256, -511, -527, -543, -559, 1530, -575, -591, 1528, 1527, 1407, 1526, 1391, 1023, 1023, 1023, 1023, 1525, 1375, 1268,
692 		1268, 1103, 1103, 1087, 1087, 1039, 1039, 1523, -604, 815, 815, 815, 815, 510, 495, 509, 479, 508, 463, 507, 447, 431, 505, 415, 399, -734, -782, 1262, -815,
693 		1259, 1244, -831, 1258, 1228, -847, -863, 1196, -879, 1253, 987, 987, 748, -767, 493, 493, 462, 477, 414, 414, 686, 669, 478, 446, 461, 445, 474, 429, 487,
694 		458, 412, 471, 1266, 1264, 1009, 1009, 799, 799, -1019, -1276, -1452, -1581, -1677, -1757, -1821, -1886, -1933, -1997, 1257, 1257, 1483, 1468, 1512, 1422,
695 		1497, 1406, 1467, 1496, 1421, 1510, 1134, 1134, 1225, 1225, 1466, 1451, 1374, 1405, 1252, 1252, 1358, 1480, 1164, 1164, 1251, 1251, 1238, 1238, 1389, 1465,
696 		-1407, 1054, 1101, -1423, 1207, -1439, 830, 830, 1248, 1038, 1237, 1117, 1223, 1148, 1236, 1208, 411, 426, 395, 410, 379, 269, 1193, 1222, 1132, 1235, 1221,
697 		1116, 976, 976, 1192, 1162, 1177, 1220, 1131, 1191, 963, 963, -1647, 961, 780, -1663, 558, 558, 994, 993, 437, 408, 393, 407, 829, 978, 813, 797, 947, -1743,
698 		721, 721, 377, 392, 844, 950, 828, 890, 706, 706, 812, 859, 796, 960, 948, 843, 934, 874, 571, 571, -1919, 690, 555, 689, 421, 346, 539, 539, 944, 779, 918, 873,
699 		932, 842, 903, 888, 570, 570, 931, 917, 674, 674, -2575, 1562, -2591, 1609, -2607, 1654, 1322, 1322, 1441, 1441, 1696, 1546, 1683, 1593, 1669, 1624, 1426, 1426,
700 		1321, 1321, 1639, 1680, 1425, 1425, 1305, 1305, 1545, 1668, 1608, 1623, 1667, 1592, 1638, 1666, 1320, 1320, 1652, 1607, 1409, 1409, 1304, 1304, 1288, 1288, 1664,
701 		1637, 1395, 1395, 1335, 1335, 1622, 1636, 1394, 1394, 1319, 1319, 1606, 1621, 1392, 1392, 1137, 1137, 1137, 1137, 345, 390, 360, 375, 404, 373, 1047, -2751,
702 		-2767, -2783, 1062, 1121, 1046, -2799, 1077, -2815, 1106, 1061, 789, 789, 1105, 1104, 263, 355, 310, 340, 325, 354, 352, 262, 339, 324, 1091, 1076, 1029, 1090,
703 		1060, 1075, 833, 833, 788, 788, 1088, 1028, 818, 818, 803, 803, 561, 561, 531, 531, 816, 771, 546, 546, 289, 274, 288, 258, -253, -317, -381, -446, -478,
704 		-509, 1279, 1279, -811, -1179, -1451, -1756, -1900, -2028, -2189, -2253, -2333, -2414, -2445, -2511, -2526, 1313, 1298, -2559, 1041, 1041, 1040, 1040, 1025,
705 		1025, 1024, 1024, 1022, 1007, 1021, 991, 1020, 975, 1019, 959, 687, 687, 1018, 1017, 671, 671, 655, 655, 1016, 1015, 639, 639, 758, 758, 623, 623, 757, 607,
706 		756, 591, 755, 575, 754, 559, 543, 543, 1009, 783, -575, -621, -685, -749, 496, -590, 750, 749, 734, 748, 974, 989, 1003, 958, 988, 973, 1002, 942, 987, 957,
707 		972, 1001, 926, 986, 941, 971, 956, 1000, 910, 985, 925, 999, 894, 970, -1071, -1087, -1102, 1390, -1135, 1436, 1509, 1451, 1374, -1151, 1405, 1358, 1480,
708 		1420, -1167, 1507, 1494, 1389, 1342, 1465, 1435, 1450, 1326, 1505, 1310, 1493, 1373, 1479, 1404, 1492, 1464, 1419, 428, 443, 472, 397, 736, 526, 464, 464, 486,
709 		457, 442, 471, 484, 482, 1357, 1449, 1434, 1478, 1388, 1491, 1341, 1490, 1325, 1489, 1463, 1403, 1309, 1477, 1372, 1448, 1418, 1433, 1476, 1356, 1462, 1387,
710 		-1439, 1475, 1340, 1447, 1402, 1474, 1324, 1461, 1371, 1473, 269, 448, 1432, 1417, 1308, 1460, -1711, 1459, -1727, 1441, 1099, 1099, 1446, 1386, 1431, 1401,
711 		-1743, 1289, 1083, 1083, 1160, 1160, 1458, 1445, 1067, 1067, 1370, 1457, 1307, 1430, 1129, 1129, 1098, 1098, 268, 432, 267, 416, 266, 400, -1887, 1144, 1187,
712 		1082, 1173, 1113, 1186, 1066, 1050, 1158, 1128, 1143, 1172, 1097, 1171, 1081, 420, 391, 1157, 1112, 1170, 1142, 1127, 1065, 1169, 1049, 1156, 1096, 1141, 1111,
713 		1155, 1080, 1126, 1154, 1064, 1153, 1140, 1095, 1048, -2159, 1125, 1110, 1137, -2175, 823, 823, 1139, 1138, 807, 807, 384, 264, 368, 263, 868, 838, 853, 791,
714 		867, 822, 852, 837, 866, 806, 865, 790, -2319, 851, 821, 836, 352, 262, 850, 805, 849, -2399, 533, 533, 835, 820, 336, 261, 578, 548, 563, 577, 532, 532, 832,
715 		772, 562, 562, 547, 547, 305, 275, 560, 515, 290, 290, 288, 258
716 	];
717 	static const ubyte[] tab32 = [130, 162, 193, 209, 44, 28, 76, 140, 9, 9, 9, 9, 9, 9, 9, 9, 190, 254, 222, 238, 126, 94, 157, 157, 109, 61, 173, 205];
718 	static const ubyte[] tab33 = [252, 236, 220, 204, 188, 172, 156, 140, 124, 108, 92, 76, 60, 44, 28, 12];
719 	static const short[2 * 16] tabindex = [
720 		0, 32, 64, 98, 0, 132, 180, 218, 292, 364, 426, 538, 648, 746, 0, 1126, 1460, 1460, 1460, 1460, 1460, 1460, 1460, 1460, 1842, 1842, 1842, 1842, 1842, 1842, 1842,
721 		1842
722 	];
723 	static const ubyte[] g_linbits = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 6, 8, 10, 13, 4, 5, 6, 7, 8, 9, 11, 13];
724 
725 	float one = 0.0f;
726 	int ireg = 0, big_val_cnt = gr_info.big_values;
727 	const(ubyte)* sfb = gr_info.sfbtab;
728 	const(ubyte)* bs_next_ptr = bs.buf + bs.pos / 8;
729 	uint bs_cache = (((bs_next_ptr[0] * 256u + bs_next_ptr[1]) * 256u + bs_next_ptr[2]) * 256u + bs_next_ptr[3]) << (bs.pos & 7);
730 	int pairs_to_decode, np, bs_sh = (bs.pos & 7) - 8;
731 	bs_next_ptr += 4;
732 
733 	auto PEEK_BITS(N)(N n) {
734 		return (bs_cache >> (32 - n));
735 	}
736 
737 	void FLUSH_BITS(N)(N n) {
738 		bs_cache <<= (n);
739 		bs_sh += (n);
740 	}
741 
742 	auto CHECK_BITS() {
743 		while (bs_sh >= 0) {
744 			bs_cache |= (cast(uint)*bs_next_ptr++ << bs_sh);
745 			bs_sh -= 8;
746 		}
747 	}
748 
749 	auto BSPOS() {
750 		return ((bs_next_ptr - bs.buf) * 8 - 24 + bs_sh);
751 	}
752 
753 	while (big_val_cnt > 0) {
754 		int tab_num = gr_info.table_select[ireg];
755 		int sfb_cnt = gr_info.region_count[ireg++];
756 		const(short)* codebook = tabs.ptr + tabindex[tab_num];
757 		int linbits = g_linbits[tab_num];
758 		do {
759 			np = *sfb++ / 2;
760 			pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
761 			one = *scf++;
762 			do {
763 				int j, w = 5;
764 				int leaf = codebook[PEEK_BITS(w)];
765 				while (leaf < 0) {
766 					FLUSH_BITS(w);
767 					w = leaf & 7;
768 					leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
769 				}
770 				FLUSH_BITS(leaf >> 8);
771 
772 				for (j = 0; j < 2; j++, dst++, leaf >>= 4) {
773 					int lsb = leaf & 0x0F;
774 					if (lsb == 15 && linbits) {
775 						lsb += PEEK_BITS(linbits);
776 						FLUSH_BITS(linbits);
777 						CHECK_BITS;
778 						*dst = one * L3_pow_43(lsb) * (cast(int) bs_cache < 0 ? -1 : 1);
779 					}
780 					else {
781 						*dst = g_pow43[16 + lsb - 16 * (bs_cache >> 31)] * one;
782 					}
783 					FLUSH_BITS(lsb ? 1 : 0);
784 				}
785 				CHECK_BITS;
786 			}
787 			while (--pairs_to_decode);
788 		}
789 		while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
790 	}
791 
792 	for (np = 1 - big_val_cnt;; dst += 4) {
793 		const(ubyte)* codebook_count1 = (gr_info.count1_table) ? tab33.ptr : tab32.ptr;
794 		int leaf = codebook_count1[PEEK_BITS(4)];
795 		if (!(leaf & 8)) {
796 			leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];
797 		}
798 		FLUSH_BITS(leaf & 7);
799 		if (BSPOS > layer3gr_limit) {
800 			break;
801 		}
802 
803 		bool RELOAD_SCALEFACTOR() {
804 			if (!--np) {
805 				np = *sfb++ / 2;
806 				if (!np)
807 					return true;
808 				one = *scf++;
809 			}
810 			return false;
811 		}
812 
813 		auto DEQ_COUNT1(S)(S s) {
814 			if (leaf & (128 >> s)) {
815 				dst[s] = (cast(int) bs_cache < 0) ? -one : one;
816 				FLUSH_BITS(1);
817 			}
818 		}
819 
820 		if (RELOAD_SCALEFACTOR())
821 			break;
822 		DEQ_COUNT1(0);
823 		DEQ_COUNT1(1);
824 		if (RELOAD_SCALEFACTOR())
825 			break;
826 		DEQ_COUNT1(2);
827 		DEQ_COUNT1(3);
828 		CHECK_BITS;
829 	}
830 
831 	bs.pos = layer3gr_limit;
832 }
833 
834 void L3_midside_stereo(float* left, int n) {
835 	int i = 0;
836 	float* right = left + 576;
837 	version (HAVE_SIMD) {
838 		if (have_simd())
839 			for (; i < n - 3; i += 4) {
840 				f4 vl = VLD(left + i);
841 				f4 vr = VLD(right + i);
842 				VSTORE(left + i, VADD(vl, vr));
843 				VSTORE(right + i, VSUB(vl, vr));
844 			}
845 	}
846 	for (; i < n; i++) {
847 		float a = left[i];
848 		float b = right[i];
849 		left[i] = a + b;
850 		right[i] = a - b;
851 	}
852 }
853 
854 void L3_intensity_stereo_band(float* left, int n, float kl, float kr) {
855 	int i;
856 	for (i = 0; i < n; i++) {
857 		left[i + 576] = left[i] * kr;
858 		left[i] = left[i] * kl;
859 	}
860 }
861 
862 void L3_stereo_top_band(const(float)* right, const(ubyte)* sfb, int nbands, int[3] max_band) {
863 	int i, k;
864 
865 	max_band[0] = max_band[1] = max_band[2] = -1;
866 
867 	for (i = 0; i < nbands; i++) {
868 		for (k = 0; k < sfb[i]; k += 2) {
869 			if (right[k] != 0 || right[k + 1] != 0) {
870 				max_band[i % 3] = i;
871 				break;
872 			}
873 		}
874 		right += sfb[i];
875 	}
876 }
877 
878 void L3_stereo_process(float* left, const(ubyte)* ist_pos, const(ubyte)* sfb, const(ubyte)* hdr, int[3] max_band, int mpeg2_sh) {
879 	static const float[7 * 2] g_pan = [0, 1, 0.21132487f, 0.78867513f, 0.36602540f, 0.63397460f, 0.5f, 0.5f, 0.63397460f, 0.36602540f, 0.78867513f, 0.21132487f, 1, 0];
880 	uint i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64;
881 
882 	for (i = 0; sfb[i]; i++) {
883 		uint ipos = ist_pos[i];
884 		if (cast(int) i > max_band[i % 3] && ipos < max_pos) {
885 			float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;
886 			if (HDR_TEST_MPEG1(hdr)) {
887 				kl = g_pan[2 * ipos];
888 				kr = g_pan[2 * ipos + 1];
889 			}
890 			else {
891 				kl = 1;
892 				kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);
893 				if (ipos & 1) {
894 					kl = kr;
895 					kr = 1;
896 				}
897 			}
898 			L3_intensity_stereo_band(left, sfb[i], kl * s, kr * s);
899 		}
900 		else if (HDR_TEST_MS_STEREO(hdr)) {
901 			L3_midside_stereo(left, sfb[i]);
902 		}
903 		left += sfb[i];
904 	}
905 }
906 
907 void L3_intensity_stereo(float* left, ubyte* ist_pos, const(L3_gr_info_t)* gr, const(ubyte)* hdr) {
908 	int[3] max_band;
909 	int n_sfb = gr.n_long_sfb + gr.n_short_sfb;
910 	int i, max_blocks = gr.n_short_sfb ? 3 : 1;
911 
912 	L3_stereo_top_band(left + 576, gr.sfbtab, n_sfb, max_band);
913 	if (gr.n_long_sfb) {
914 		max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]);
915 	}
916 	for (i = 0; i < max_blocks; i++) {
917 		int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0;
918 		int itop = n_sfb - max_blocks + i;
919 		int prev = itop - max_blocks;
920 		ist_pos[itop] = cast(ubyte)(max_band[i] >= prev ? default_pos : ist_pos[prev]);
921 	}
922 	L3_stereo_process(left, ist_pos, gr.sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);
923 }
924 
925 void L3_reorder(float* grbuf, float* scratch, const(ubyte)* sfb) {
926 	int i, len;
927 	float* src = grbuf;
928 	float* dst = scratch;
929 
930 	for (; 0 != (len = *sfb); sfb += 3, src += 2 * len) {
931 		for (i = 0; i < len; i++, src++) {
932 			*dst++ = src[0 * len];
933 			*dst++ = src[1 * len];
934 			*dst++ = src[2 * len];
935 		}
936 	}
937 	memcpy(grbuf, scratch, (dst - scratch) * float.sizeof);
938 }
939 
940 void L3_antialias(float* grbuf, int nbands) {
941 	static const float[8][2] g_aa = [
942 		[0.85749293f, 0.88174200f, 0.94962865f, 0.98331459f, 0.99551782f, 0.99916056f, 0.99989920f, 0.99999316f],
943 		[0.51449576f, 0.47173197f, 0.31337745f, 0.18191320f, 0.09457419f, 0.04096558f, 0.01419856f, 0.00369997f]
944 	];
945 
946 	for (; nbands > 0; nbands--, grbuf += 18) {
947 		int i = 0;
948 		version (HAVE_SIMD) {
949 			if (have_simd())
950 				for (; i < 8; i += 4) {
951 					f4 vu = VLD(grbuf + 18 + i);
952 					f4 vd = VLD(grbuf + 14 - i);
953 					f4 vc0 = VLD(g_aa[0] + i);
954 					f4 vc1 = VLD(g_aa[1] + i);
955 					vd = VREV(vd);
956 					VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1)));
957 					vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0));
958 					VSTORE(grbuf + 14 - i, VREV(vd));
959 				}
960 		}
961 		version (MINIMP3_ONLY_SIMD) {
962 		}
963 		else {
964 			for (; i < 8; i++) {
965 				float u = grbuf[18 + i];
966 				float d = grbuf[17 - i];
967 				grbuf[18 + i] = u * g_aa[0][i] - d * g_aa[1][i];
968 				grbuf[17 - i] = u * g_aa[1][i] + d * g_aa[0][i];
969 			}
970 		} // MINIMP3_ONLY_SIMD* /
971 	}
972 }
973 
974 void L3_dct3_9(float* y) {
975 	float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;
976 
977 	s0 = y[0];
978 	s2 = y[2];
979 	s4 = y[4];
980 	s6 = y[6];
981 	s8 = y[8];
982 	t0 = s0 + s6 * 0.5f;
983 	s0 -= s6;
984 	t4 = (s4 + s2) * 0.93969262f;
985 	t2 = (s8 + s2) * 0.76604444f;
986 	s6 = (s4 - s8) * 0.17364818f;
987 	s4 += s8 - s2;
988 
989 	s2 = s0 - s4 * 0.5f;
990 	y[4] = s4 + s0;
991 	s8 = t0 - t2 + s6;
992 	s0 = t0 - t4 + t2;
993 	s4 = t0 + t4 - s6;
994 
995 	s1 = y[1];
996 	s3 = y[3];
997 	s5 = y[5];
998 	s7 = y[7];
999 
1000 	s3 *= 0.86602540f;
1001 	t0 = (s5 + s1) * 0.98480775f;
1002 	t4 = (s5 - s7) * 0.34202014f;
1003 	t2 = (s1 + s7) * 0.64278761f;
1004 	s1 = (s1 - s5 - s7) * 0.86602540f;
1005 
1006 	s5 = t0 - s3 - t2;
1007 	s7 = t4 - s3 - t0;
1008 	s3 = t4 + s3 - t2;
1009 
1010 	y[0] = s4 - s7;
1011 	y[1] = s2 + s1;
1012 	y[2] = s0 - s3;
1013 	y[3] = s8 + s5;
1014 	y[5] = s8 - s5;
1015 	y[6] = s0 + s3;
1016 	y[7] = s2 - s1;
1017 	y[8] = s4 + s7;
1018 }
1019 
1020 void L3_imdct36(float* grbuf, float* overlap, const(float)* window, int nbands) {
1021 	int i, j;
1022 	static const float[18] g_twid9 = [
1023 		0.73727734f, 0.79335334f, 0.84339145f, 0.88701083f, 0.92387953f, 0.95371695f, 0.97629601f, 0.99144486f, 0.99904822f, 0.67559021f, 0.60876143f, 0.53729961f,
1024 		0.46174861f, 0.38268343f, 0.30070580f, 0.21643961f, 0.13052619f, 0.04361938f
1025 	];
1026 
1027 	for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9) {
1028 		float[9] co;
1029 		float[9] si;
1030 		co[0] = -grbuf[0];
1031 		si[0] = grbuf[17];
1032 		for (i = 0; i < 4; i++) {
1033 			si[8 - 2 * i] = grbuf[4 * i + 1] - grbuf[4 * i + 2];
1034 			co[1 + 2 * i] = grbuf[4 * i + 1] + grbuf[4 * i + 2];
1035 			si[7 - 2 * i] = grbuf[4 * i + 4] - grbuf[4 * i + 3];
1036 			co[2 + 2 * i] = -(grbuf[4 * i + 3] + grbuf[4 * i + 4]);
1037 		}
1038 		L3_dct3_9(co.ptr);
1039 		L3_dct3_9(si.ptr);
1040 
1041 		si[1] = -si[1];
1042 		si[3] = -si[3];
1043 		si[5] = -si[5];
1044 		si[7] = -si[7];
1045 
1046 		i = 0;
1047 
1048 		version (HAVE_SIMD) {
1049 			if (have_simd())
1050 				for (; i < 8; i += 4) {
1051 					f4 vovl = VLD(overlap + i);
1052 					f4 vc = VLD(co + i);
1053 					f4 vs = VLD(si + i);
1054 					f4 vr0 = VLD(g_twid9 + i);
1055 					f4 vr1 = VLD(g_twid9 + 9 + i);
1056 					f4 vw0 = VLD(window + i);
1057 					f4 vw1 = VLD(window + 9 + i);
1058 					f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0));
1059 					VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1)));
1060 					VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1)));
1061 					vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0));
1062 					VSTORE(grbuf + 14 - i, VREV(vsum));
1063 				}
1064 		}
1065 		for (; i < 9; i++) {
1066 			float ovl = overlap[i];
1067 			float sum = co[i] * g_twid9[9 + i] + si[i] * g_twid9[0 + i];
1068 			overlap[i] = co[i] * g_twid9[0 + i] - si[i] * g_twid9[9 + i];
1069 			grbuf[i] = ovl * window[0 + i] - sum * window[9 + i];
1070 			grbuf[17 - i] = ovl * window[9 + i] + sum * window[0 + i];
1071 		}
1072 	}
1073 }
1074 
1075 void L3_idct3(float x0, float x1, float x2, float* dst) {
1076 	float m1 = x1 * 0.86602540f;
1077 	float a1 = x0 - x2 * 0.5f;
1078 	dst[1] = x0 + x2;
1079 	dst[0] = a1 + m1;
1080 	dst[2] = a1 - m1;
1081 }
1082 
1083 void L3_imdct12(float* x, float* dst, float* overlap) {
1084 	static const float[6] g_twid3 = [0.79335334f, 0.92387953f, 0.99144486f, 0.60876143f, 0.38268343f, 0.13052619f];
1085 	float[3] co;
1086 	float[3] si;
1087 	int i;
1088 
1089 	L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co.ptr);
1090 	L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si.ptr);
1091 	si[1] = -si[1];
1092 
1093 	for (i = 0; i < 3; i++) {
1094 		float ovl = overlap[i];
1095 		float sum = co[i] * g_twid3[3 + i] + si[i] * g_twid3[0 + i];
1096 		overlap[i] = co[i] * g_twid3[0 + i] - si[i] * g_twid3[3 + i];
1097 		dst[i] = ovl * g_twid3[2 - i] - sum * g_twid3[5 - i];
1098 		dst[5 - i] = ovl * g_twid3[5 - i] + sum * g_twid3[2 - i];
1099 	}
1100 }
1101 
1102 void L3_imdct_short(float* grbuf, float* overlap, int nbands) {
1103 	for (; nbands > 0; nbands--, overlap += 9, grbuf += 18) {
1104 		float[18] tmp;
1105 		memcpy(tmp.ptr, grbuf, tmp.sizeof);
1106 		memcpy(grbuf, overlap, 6 * float.sizeof);
1107 		L3_imdct12(tmp.ptr, grbuf + 6, overlap + 6);
1108 		L3_imdct12(tmp.ptr + 1, grbuf + 12, overlap + 6);
1109 		L3_imdct12(tmp.ptr + 2, overlap, overlap + 6);
1110 	}
1111 }
1112 
1113 void L3_change_sign(float* grbuf) {
1114 	int b, i;
1115 	for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)
1116 		for (i = 1; i < 18; i += 2)
1117 			grbuf[i] = -grbuf[i];
1118 }
1119 
1120 void L3_imdct_gr(float* grbuf, float* overlap, uint block_type, uint n_long_bands) {
1121 	static const float[18][2] g_mdct_window = [
1122 		[
1123 			0.99904822f, 0.99144486f, 0.97629601f, 0.95371695f, 0.92387953f, 0.88701083f, 0.84339145f, 0.79335334f, 0.73727734f, 0.04361938f, 0.13052619f, 0.21643961f,
1124 			0.30070580f, 0.38268343f, 0.46174861f, 0.53729961f, 0.60876143f, 0.67559021f
1125 		], [1, 1, 1, 1, 1, 1, 0.99144486f, 0.92387953f, 0.79335334f, 0, 0, 0, 0, 0, 0, 0.13052619f, 0.38268343f, 0.60876143f]
1126 	];
1127 	if (n_long_bands) {
1128 		L3_imdct36(grbuf, overlap, g_mdct_window[0].ptr, n_long_bands);
1129 		grbuf += 18 * n_long_bands;
1130 		overlap += 9 * n_long_bands;
1131 	}
1132 	if (block_type == SHORT_BLOCK_TYPE)
1133 		L3_imdct_short(grbuf, overlap, 32 - n_long_bands);
1134 	else
1135 		L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE].ptr, 32 - n_long_bands);
1136 }
1137 
1138 void L3_save_reservoir(ref mp3dec_t h, mp3dec_scratch_t* s) {
1139 	int pos = (s.bs.pos + 7) / 8u;
1140 	int remains = s.bs.limit / 8u - pos;
1141 	if (remains > MAX_BITRESERVOIR_BYTES) {
1142 		pos += remains - MAX_BITRESERVOIR_BYTES;
1143 		remains = MAX_BITRESERVOIR_BYTES;
1144 	}
1145 	if (remains > 0) {
1146 		memmove(h.reserv_buf.ptr, s.maindata.ptr + pos, remains);
1147 	}
1148 	h.reserv = remains;
1149 }
1150 
1151 int L3_restore_reservoir(ref mp3dec_t h, bs_t* bs, mp3dec_scratch_t* s, int main_data_begin) {
1152 	int frame_bytes = (bs.limit - bs.pos) / 8;
1153 	int bytes_have = MINIMP3_MIN(h.reserv, main_data_begin);
1154 	memcpy(s.maindata.ptr, h.reserv_buf.ptr + MINIMP3_MAX(0, h.reserv - main_data_begin), MINIMP3_MIN(h.reserv, main_data_begin));
1155 	memcpy(s.maindata.ptr + bytes_have, bs.buf + bs.pos / 8, frame_bytes);
1156 	bs_init(&s.bs, s.maindata.ptr, bytes_have + frame_bytes);
1157 	return h.reserv >= main_data_begin;
1158 }
1159 
1160 void L3_decode(ref mp3dec_t h, mp3dec_scratch_t* s, L3_gr_info_t* gr_info, int nch) {
1161 	int ch;
1162 
1163 	for (ch = 0; ch < nch; ch++) {
1164 		int layer3gr_limit = s.bs.pos + gr_info[ch].part_23_length;
1165 		L3_decode_scalefactors(h.header.ptr, s.ist_pos[ch].ptr, &s.bs, gr_info + ch, s.scf.ptr, ch);
1166 		L3_huffman(s.grbuf[ch].ptr, &s.bs, gr_info + ch, s.scf.ptr, layer3gr_limit);
1167 	}
1168 
1169 	if (HDR_TEST_I_STEREO(h.header)) {
1170 		L3_intensity_stereo(s.grbuf[0].ptr, s.ist_pos[1].ptr, gr_info, h.header.ptr);
1171 	}
1172 	else if (HDR_IS_MS_STEREO(h.header)) {
1173 		L3_midside_stereo(s.grbuf[0].ptr, 576);
1174 	}
1175 
1176 	for (ch = 0; ch < nch; ch++, gr_info++) {
1177 		int aa_bands = 31;
1178 		int n_long_bands = (gr_info.mixed_block_flag ? 2 : 0) << cast(int)(HDR_GET_MY_SAMPLE_RATE(h.header) == 2);
1179 
1180 		if (gr_info.n_short_sfb) {
1181 			aa_bands = n_long_bands - 1;
1182 			L3_reorder(s.grbuf[ch].ptr + n_long_bands * 18, s.syn[0].ptr, gr_info.sfbtab + gr_info.n_long_sfb);
1183 		}
1184 
1185 		L3_antialias(s.grbuf[ch].ptr, aa_bands);
1186 		L3_imdct_gr(s.grbuf[ch].ptr, h.mdct_overlap[ch].ptr, gr_info.block_type, n_long_bands);
1187 		L3_change_sign(s.grbuf[ch].ptr);
1188 	}
1189 }
1190 
1191 void mp3d_DCT_II(float* grbuf, int n) {
1192 	static const(float)[24] g_sec = [
1193 		10.19000816f, 0.50060302f, 0.50241929f, 3.40760851f, 0.50547093f, 0.52249861f, 2.05778098f, 0.51544732f, 0.56694406f, 1.48416460f, 0.53104258f, 0.64682180f,
1194 		1.16943991f, 0.55310392f, 0.78815460f, 0.97256821f, 0.58293498f, 1.06067765f, 0.83934963f, 0.62250412f, 1.72244716f, 0.74453628f, 0.67480832f, 5.10114861f
1195 	];
1196 	int i, k = 0;
1197 
1198 	auto doNonSimdStuff() {
1199 		version (MINIMP3_ONLY_SIMD) {
1200 		}
1201 		else { /* MINIMP3_ONLY_SIMD */
1202 			for (; k < n; k++) {
1203 				float[8][4] t;
1204 				float* x;
1205 				float* y = grbuf + k;
1206 
1207 				for (x = t[0].ptr, i = 0; i < 8; i++, x++) {
1208 					float x0 = y[i * 18];
1209 					float x1 = y[(15 - i) * 18];
1210 					float x2 = y[(16 + i) * 18];
1211 					float x3 = y[(31 - i) * 18];
1212 					float t0 = x0 + x3;
1213 					float t1 = x1 + x2;
1214 					float t2 = (x1 - x2) * g_sec[3 * i + 0];
1215 					float t3 = (x0 - x3) * g_sec[3 * i + 1];
1216 					x[0] = t0 + t1;
1217 					x[8] = (t0 - t1) * g_sec[3 * i + 2];
1218 					x[16] = t3 + t2;
1219 					x[24] = (t3 - t2) * g_sec[3 * i + 2];
1220 				}
1221 				for (x = t[0].ptr, i = 0; i < 4; i++, x += 8) {
1222 					float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1223 					xt = x0 - x7;
1224 					x0 += x7;
1225 					x7 = x1 - x6;
1226 					x1 += x6;
1227 					x6 = x2 - x5;
1228 					x2 += x5;
1229 					x5 = x3 - x4;
1230 					x3 += x4;
1231 					x4 = x0 - x3;
1232 					x0 += x3;
1233 					x3 = x1 - x2;
1234 					x1 += x2;
1235 					x[0] = x0 + x1;
1236 					x[4] = (x0 - x1) * 0.70710677f;
1237 					x5 = x5 + x6;
1238 					x6 = (x6 + x7) * 0.70710677f;
1239 					x7 = x7 + xt;
1240 					x3 = (x3 + x4) * 0.70710677f;
1241 					x5 -= x7 * 0.198912367f; /* rotate by PI/8 */
1242 					x7 += x5 * 0.382683432f;
1243 					x5 -= x7 * 0.198912367f;
1244 					x0 = xt - x6;
1245 					xt += x6;
1246 					x[1] = (xt + x7) * 0.50979561f;
1247 					x[2] = (x4 + x3) * 0.54119611f;
1248 					x[3] = (x0 - x5) * 0.60134488f;
1249 					x[5] = (x0 + x5) * 0.89997619f;
1250 					x[6] = (x4 - x3) * 1.30656302f;
1251 					x[7] = (xt - x7) * 2.56291556f;
1252 
1253 				}
1254 				for (i = 0; i < 7; i++, y += 4 * 18) {
1255 					y[0 * 18] = t[0][i];
1256 					y[1 * 18] = t[2][i] + t[3][i] + t[3][i + 1];
1257 					y[2 * 18] = t[1][i] + t[1][i + 1];
1258 					y[3 * 18] = t[2][i + 1] + t[3][i] + t[3][i + 1];
1259 				}
1260 				y[0 * 18] = t[0][7];
1261 				y[1 * 18] = t[2][7] + t[3][7];
1262 				y[2 * 18] = t[1][7];
1263 				y[3 * 18] = t[3][7];
1264 			}
1265 		} /* MINIMP3_ONLY_SIMD */
1266 	}
1267 
1268 	version (HAVE_SIMD) {
1269 		if (have_simd())
1270 			for (; k < n; k += 4) {
1271 				f4[8][4] t;
1272 				f4* x;
1273 				float* y = grbuf + k;
1274 
1275 				for (x = t[0].ptr, i = 0; i < 8; i++, x++) {
1276 					f4 x0 = VLD(&y[i * 18]);
1277 					f4 x1 = VLD(&y[(15 - i) * 18]);
1278 					f4 x2 = VLD(&y[(16 + i) * 18]);
1279 					f4 x3 = VLD(&y[(31 - i) * 18]);
1280 					f4 t0 = VADD(x0, x3);
1281 					f4 t1 = VADD(x1, x2);
1282 					f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3 * i + 0]);
1283 					f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3 * i + 1]);
1284 					x[0] = VADD(t0, t1);
1285 					x[8] = VMUL_S(VSUB(t0, t1), g_sec[3 * i + 2]);
1286 					x[16] = VADD(t3, t2);
1287 					x[24] = VMUL_S(VSUB(t3, t2), g_sec[3 * i + 2]);
1288 				}
1289 				for (x = t[0].ptr, i = 0; i < 4; i++, x += 8) {
1290 					f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1291 					xt = VSUB(x0, x7);
1292 					x0 = VADD(x0, x7);
1293 					x7 = VSUB(x1, x6);
1294 					x1 = VADD(x1, x6);
1295 					x6 = VSUB(x2, x5);
1296 					x2 = VADD(x2, x5);
1297 					x5 = VSUB(x3, x4);
1298 					x3 = VADD(x3, x4);
1299 					x4 = VSUB(x0, x3);
1300 					x0 = VADD(x0, x3);
1301 					x3 = VSUB(x1, x2);
1302 					x1 = VADD(x1, x2);
1303 					x[0] = VADD(x0, x1);
1304 					x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f);
1305 					x5 = VADD(x5, x6);
1306 					x6 = VMUL_S(VADD(x6, x7), 0.70710677f);
1307 					x7 = VADD(x7, xt);
1308 					x3 = VMUL_S(VADD(x3, x4), 0.70710677f);
1309 					x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */
1310 					x7 = VADD(x7, VMUL_S(x5, 0.382683432f));
1311 					x5 = VSUB(x5, VMUL_S(x7, 0.198912367f));
1312 					x0 = VSUB(xt, x6);
1313 					xt = VADD(xt, x6);
1314 					x[1] = VMUL_S(VADD(xt, x7), 0.50979561f);
1315 					x[2] = VMUL_S(VADD(x4, x3), 0.54119611f);
1316 					x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f);
1317 					x[5] = VMUL_S(VADD(x0, x5), 0.89997619f);
1318 					x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f);
1319 					x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f);
1320 				}
1321 
1322 				if (k > n - 3) {
1323 					version (HAVE_SSE) {
1324 						auto VSAVE2(V)(int i, ref V v) {
1325 							_mm_storel_pi(cast(__m64*) cast(void*)&y[i * 18], v);
1326 						}
1327 					}
1328 					else { /* HAVE_SSE */
1329 						auto VSAVE2(V)(int i, ref V v) {
1330 							vst1_f32(cast(float32_t*)&y[i * 18], vget_low_f32(v));
1331 						}
1332 					} /* HAVE_SSE */
1333 
1334 					for (i = 0; i < 7; i++, y += 4 * 18) {
1335 						f4 s = VADD(t[3][i], t[3][i + 1]);
1336 						VSAVE2(0, t[0][i]);
1337 						VSAVE2(1, VADD(t[2][i], s));
1338 						VSAVE2(2, VADD(t[1][i], t[1][i + 1]));
1339 						VSAVE2(3, VADD(t[2][1 + i], s));
1340 					}
1341 					VSAVE2(0, t[0][7]);
1342 					VSAVE2(1, VADD(t[2][7], t[3][7]));
1343 					VSAVE2(2, t[1][7]);
1344 					VSAVE2(3, t[3][7]);
1345 				}
1346 				else {
1347 					auto VSAVE4(V)(int i, ref V v) {
1348 						VSTORE(&y[i * 18], v);
1349 					}
1350 
1351 					for (i = 0; i < 7; i++, y += 4 * 18) {
1352 						f4 s = VADD(t[3][i], t[3][i + 1]);
1353 						VSAVE4(0, t[0][i]);
1354 						VSAVE4(1, VADD(t[2][i], s));
1355 						VSAVE4(2, VADD(t[1][i], t[1][i + 1]));
1356 						VSAVE4(3, VADD(t[2][1 + i], s));
1357 					}
1358 					VSAVE4(0, t[0][7]);
1359 					VSAVE4(1, VADD(t[2][7], t[3][7]));
1360 					VSAVE4(2, t[1][7]);
1361 					VSAVE4(3, t[3][7]);
1362 				}
1363 			}
1364 		else {
1365 			doNonSimdStuff();
1366 		}
1367 	}
1368 	else { /* HAVE_SIMD */
1369 		doNonSimdStuff();
1370 	} /* HAVE_SIMD */
1371 }
1372 
1373 version (MINIMP3_FLOAT_OUTPUT) {
1374 	float mp3d_scale_pcm(float sample) {
1375 		return sample * (1.0f / 32768.0f);
1376 	}
1377 
1378 	void mp3dec_f32_to_s16(const(float)* in_, short* out_, int num_samples) {
1379 		if (num_samples > 0) {
1380 			int i = 0;
1381 			version (HAVE_SIMD) {
1382 				int aligned_count = num_samples & ~7;
1383 
1384 				for (; i < aligned_count; i += 8) {
1385 					static const f4 g_scale = {32768.0f, 32768.0f, 32768.0f, 32768.0f};
1386 					f4 a = VMUL(VLD(&in_[i]), g_scale);
1387 					f4 b = VMUL(VLD(&in_[i + 4]), g_scale);
1388 					version (HAVE_SSE) {
1389 						static const f4 g_max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1390 						static const f4 g_min = {-32768.0f, -32768.0f, -32768.0f, -32768.0f};
1391 						__m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)), _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1392 						out_[i] = _mm_extract_epi16(pcm8, 0);
1393 						out_[i + 1] = _mm_extract_epi16(pcm8, 1);
1394 						out_[i + 2] = _mm_extract_epi16(pcm8, 2);
1395 						out_[i + 3] = _mm_extract_epi16(pcm8, 3);
1396 						out_[i + 4] = _mm_extract_epi16(pcm8, 4);
1397 						out_[i + 5] = _mm_extract_epi16(pcm8, 5);
1398 						out_[i + 6] = _mm_extract_epi16(pcm8, 6);
1399 						out_[i + 7] = _mm_extract_epi16(pcm8, 7);
1400 					}
1401 					else { /* HAVE_SSE */
1402 						int16x4_t pcma, pcmb;
1403 						a = VADD(a, VSET(0.5f));
1404 						b = VADD(b, VSET(0.5f));
1405 						pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1406 						pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1407 						vst1_lane_s16(out_ + i, pcma, 0);
1408 						vst1_lane_s16(out_ + i + 1, pcma, 1);
1409 						vst1_lane_s16(out_ + i + 2, pcma, 2);
1410 						vst1_lane_s16(out_ + i + 3, pcma, 3);
1411 						vst1_lane_s16(out_ + i + 4, pcmb, 0);
1412 						vst1_lane_s16(out_ + i + 5, pcmb, 1);
1413 						vst1_lane_s16(out_ + i + 6, pcmb, 2);
1414 						vst1_lane_s16(out_ + i + 7, pcmb, 3);
1415 					} /* HAVE_SSE */
1416 				}
1417 			} /* HAVE_SIMD */
1418 			for (; i < num_samples; i++) {
1419 				float sample = in_[i] * 32768.0f;
1420 				if (sample >= 32766.5)
1421 					out_[i] = cast(short) 32767;
1422 				else if (sample <= -32767.5)
1423 					out_[i] = cast(short)-32768;
1424 				else {
1425 					short s = cast(short)(sample + .5f);
1426 					s -= (s < 0); /* away from zero, to be compliant */
1427 					out_[i] = s;
1428 				}
1429 			}
1430 		}
1431 	}
1432 }
1433 else { /* MINIMP3_FLOAT_OUTPUT */
1434 	short mp3d_scale_pcm(float sample) {
1435 		if (sample >= 32766.5)
1436 			return cast(short) 32767;
1437 		if (sample <= -32767.5)
1438 			return cast(short)-32768;
1439 		short s = cast(short)(sample + .5f);
1440 		s -= (s < 0); /* away from zero, to be compliant */
1441 		return s;
1442 	}
1443 } /* MINIMP3_FLOAT_OUTPUT */
1444 
1445 void mp3d_synth_pair(mp3d_sample_t* pcm, int nch, const(float)* z) {
1446 	float a;
1447 	a = (z[14 * 64] - z[0]) * 29;
1448 	a += (z[1 * 64] + z[13 * 64]) * 213;
1449 	a += (z[12 * 64] - z[2 * 64]) * 459;
1450 	a += (z[3 * 64] + z[11 * 64]) * 2037;
1451 	a += (z[10 * 64] - z[4 * 64]) * 5153;
1452 	a += (z[5 * 64] + z[9 * 64]) * 6574;
1453 	a += (z[8 * 64] - z[6 * 64]) * 37489;
1454 	a += z[7 * 64] * 75038;
1455 	pcm[0] = mp3d_scale_pcm(a);
1456 
1457 	z += 2;
1458 	a = z[14 * 64] * 104;
1459 	a += z[12 * 64] * 1567;
1460 	a += z[10 * 64] * 9727;
1461 	a += z[8 * 64] * 64019;
1462 	a += z[6 * 64] * -9975;
1463 	a += z[4 * 64] * -45;
1464 	a += z[2 * 64] * 146;
1465 	a += z[0 * 64] * -5;
1466 	pcm[16 * nch] = mp3d_scale_pcm(a);
1467 }
1468 
1469 void mp3d_synth(float* xl, mp3d_sample_t* dstl, int nch, float* lins) {
1470 	int i;
1471 	float* xr = xl + 576 * (nch - 1);
1472 	mp3d_sample_t* dstr = dstl + (nch - 1);
1473 
1474 	static const(float)[] g_win = [
1475 		-1, 26, -31, 208, 218, 401, -519, 2063, 2000, 4788, -5517, 7134, 5959, 35640, -39336, 74992, -1, 24, -35, 202, 222, 347, -581, 2080, 1952, 4425, -5879,
1476 		7640, 5288, 33791, -41176, 74856, -1, 21, -38, 196, 225, 294, -645, 2087, 1893, 4063, -6237, 8092, 4561, 31947, -43006, 74630, -1, 19, -41, 190, 227, 244,
1477 		-711, 2085, 1822, 3705, -6589, 8492, 3776, 30112, -44821, 74313, -1, 17, -45, 183, 228, 197, -779, 2075, 1739, 3351, -6935, 8840, 2935, 28289, -46617, 73908,
1478 		-1, 16, -49, 176, 228, 153, -848, 2057, 1644, 3004, -7271, 9139, 2037, 26482, -48390, 73415, -2, 14, -53, 169, 227, 111, -919, 2032, 1535, 2663, -7597,
1479 		9389, 1082, 24694, -50137, 72835, -2, 13, -58, 161, 224, 72, -991, 2001, 1414, 2330, -7910, 9592, 70, 22929, -51853, 72169, -2, 11, -63, 154, 221, 36, -1064,
1480 		1962, 1280, 2006, -8209, 9750, -998, 21189, -53534, 71420, -2, 10, -68, 147, 215, 2, -1137, 1919, 1131, 1692, -8491, 9863, -2122, 19478, -55178, 70590, -3,
1481 		9, -73, 139, 208, -29, -1210, 1870, 970, 1388, -8755, 9935, -3300, 17799, -56778, 69679, -3, 8, -79, 132, 200, -57, -1283, 1817, 794, 1095, -8998, 9966,
1482 		-4533, 16155, -58333, 68692, -4, 7, -85, 125, 189, -83, -1356, 1759, 605, 814, -9219, 9959, -5818, 14548, -59838, 67629, -4, 7, -91, 117, 177, -106, -1428,
1483 		1698, 402, 545, -9416, 9916, -7154, 12980, -61289, 66494, -5, 6, -97, 111, 163, -127, -1498, 1634, 185, 288, -9585, 9838, -8540, 11455, -62684, 65290
1484 	];
1485 	float* zlin = lins + 15 * 64;
1486 	const(float)* w = g_win.ptr;
1487 
1488 	zlin[4 * 15] = xl[18 * 16];
1489 	zlin[4 * 15 + 1] = xr[18 * 16];
1490 	zlin[4 * 15 + 2] = xl[0];
1491 	zlin[4 * 15 + 3] = xr[0];
1492 
1493 	zlin[4 * 31] = xl[1 + 18 * 16];
1494 	zlin[4 * 31 + 1] = xr[1 + 18 * 16];
1495 	zlin[4 * 31 + 2] = xl[1];
1496 	zlin[4 * 31 + 3] = xr[1];
1497 
1498 	mp3d_synth_pair(dstr, nch, lins + 4 * 15 + 1);
1499 	mp3d_synth_pair(dstr + 32 * nch, nch, lins + 4 * 15 + 64 + 1);
1500 	mp3d_synth_pair(dstl, nch, lins + 4 * 15);
1501 	mp3d_synth_pair(dstl + 32 * nch, nch, lins + 4 * 15 + 64);
1502 
1503 	void doEverythingNonSimd() {
1504 		version (MINIMP3_ONLY_SIMD) {
1505 		}
1506 		else { /* MINIMP3_ONLY_SIMD */
1507 			for (i = 14; i >= 0; i--) {
1508 				float[4] a;
1509 				float[4] b;
1510 				void S0(int k) {
1511 					int j;
1512 					float w0 = *w++;
1513 					float w1 = *w++;
1514 					float* vz = &zlin[4 * i - k * 64];
1515 					float* vy = &zlin[4 * i - (15 - k) * 64];
1516 					for (j = 0; j < 4; j++) {
1517 						b[j] = vz[j] * w1 + vy[j] * w0;
1518 						a[j] = vz[j] * w0 - vy[j] * w1;
1519 					}
1520 				}
1521 
1522 				void S1(int k) {
1523 					int j;
1524 					float w0 = *w++;
1525 					float w1 = *w++;
1526 					float* vz = &zlin[4 * i - k * 64];
1527 					float* vy = &zlin[4 * i - (15 - k) * 64];
1528 					for (j = 0; j < 4; j++) {
1529 						b[j] += vz[j] * w1 + vy[j] * w0;
1530 						a[j] += vz[j] * w0 - vy[j] * w1;
1531 					}
1532 				}
1533 
1534 				void S2(int k) {
1535 					int j;
1536 					float w0 = *w++;
1537 					float w1 = *w++;
1538 					float* vz = &zlin[4 * i - k * 64];
1539 					float* vy = &zlin[4 * i - (15 - k) * 64];
1540 					for (j = 0; j < 4; j++) {
1541 						b[j] += vz[j] * w1 + vy[j] * w0;
1542 						a[j] += vy[j] * w1 - vz[j] * w0;
1543 					}
1544 				}
1545 
1546 				zlin[4 * i] = xl[18 * (31 - i)];
1547 				zlin[4 * i + 1] = xr[18 * (31 - i)];
1548 				zlin[4 * i + 2] = xl[1 + 18 * (31 - i)];
1549 				zlin[4 * i + 3] = xr[1 + 18 * (31 - i)];
1550 				zlin[4 * (i + 16)] = xl[1 + 18 * (1 + i)];
1551 				zlin[4 * (i + 16) + 1] = xr[1 + 18 * (1 + i)];
1552 				zlin[4 * (i - 16) + 2] = xl[18 * (1 + i)];
1553 				zlin[4 * (i - 16) + 3] = xr[18 * (1 + i)];
1554 
1555 				S0(0);
1556 				S2(1);
1557 				S1(2);
1558 				S2(3);
1559 				S1(4);
1560 				S2(5);
1561 				S1(6);
1562 				S2(7);
1563 
1564 				dstr[(15 - i) * nch] = mp3d_scale_pcm(a[1]);
1565 				dstr[(17 + i) * nch] = mp3d_scale_pcm(b[1]);
1566 				dstl[(15 - i) * nch] = mp3d_scale_pcm(a[0]);
1567 				dstl[(17 + i) * nch] = mp3d_scale_pcm(b[0]);
1568 				dstr[(47 - i) * nch] = mp3d_scale_pcm(a[3]);
1569 				dstr[(49 + i) * nch] = mp3d_scale_pcm(b[3]);
1570 				dstl[(47 - i) * nch] = mp3d_scale_pcm(a[2]);
1571 				dstl[(49 + i) * nch] = mp3d_scale_pcm(b[2]);
1572 			}
1573 		} /* MINIMP3_ONLY_SIMD */
1574 	}
1575 
1576 	version (HAVE_SIMD) {
1577 		if (have_simd()) {
1578 			for (i = 14; i >= 0; i--) {
1579 				auto VLOAD(k) {
1580 					f4 w0 = VSET(*w++);
1581 					f4 w1 = VSET(*w++);
1582 					f4 vz = VLD(&zlin[4 * i - 64 * k]);
1583 					f4 vy = VLD(&zlin[4 * i - 64 * (15 - k)]);
1584 				}
1585 
1586 				auto V0(k) {
1587 					VLOAD(k);
1588 					b = VADD(VMUL(vz, w1), VMUL(vy, w0));
1589 					a = VSUB(VMUL(vz, w0), VMUL(vy, w1));
1590 				}
1591 
1592 				auto V1(k) {
1593 					VLOAD(k);
1594 					b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0)));
1595 					a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1)));
1596 				}
1597 
1598 				auto V2(k) {
1599 					VLOAD(k);
1600 					b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0)));
1601 					a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0)));
1602 				}
1603 
1604 				f4 a, b;
1605 				zlin[4 * i] = xl[18 * (31 - i)];
1606 				zlin[4 * i + 1] = xr[18 * (31 - i)];
1607 				zlin[4 * i + 2] = xl[1 + 18 * (31 - i)];
1608 				zlin[4 * i + 3] = xr[1 + 18 * (31 - i)];
1609 				zlin[4 * i + 64] = xl[1 + 18 * (1 + i)];
1610 				zlin[4 * i + 64 + 1] = xr[1 + 18 * (1 + i)];
1611 				zlin[4 * i - 64 + 2] = xl[18 * (1 + i)];
1612 				zlin[4 * i - 64 + 3] = xr[18 * (1 + i)];
1613 
1614 				V0(0);
1615 				V2(1);
1616 				V1(2);
1617 				V2(3);
1618 				V1(4);
1619 				V2(5);
1620 				V1(6);
1621 				V2(7);
1622 
1623 				{
1624 					version (MINIMP3_FLOAT_OUTPUT) {
1625 						static const f4 g_scale = {1.0f / 32768.0f, 1.0f / 32768.0f, 1.0f / 32768.0f, 1.0f / 32768.0f};
1626 						a = VMUL(a, g_scale);
1627 						b = VMUL(b, g_scale);
1628 						version (HAVE_SSE) {
1629 							_mm_store_ss(dstr + (15 - i) * nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
1630 							_mm_store_ss(dstr + (17 + i) * nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
1631 							_mm_store_ss(dstl + (15 - i) * nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
1632 							_mm_store_ss(dstl + (17 + i) * nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
1633 							_mm_store_ss(dstr + (47 - i) * nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
1634 							_mm_store_ss(dstr + (49 + i) * nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
1635 							_mm_store_ss(dstl + (47 - i) * nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
1636 							_mm_store_ss(dstl + (49 + i) * nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
1637 						}
1638 						else { /* HAVE_SSE */
1639 							vst1q_lane_f32(dstr + (15 - i) * nch, a, 1);
1640 							vst1q_lane_f32(dstr + (17 + i) * nch, b, 1);
1641 							vst1q_lane_f32(dstl + (15 - i) * nch, a, 0);
1642 							vst1q_lane_f32(dstl + (17 + i) * nch, b, 0);
1643 							vst1q_lane_f32(dstr + (47 - i) * nch, a, 3);
1644 							vst1q_lane_f32(dstr + (49 + i) * nch, b, 3);
1645 							vst1q_lane_f32(dstl + (47 - i) * nch, a, 2);
1646 							vst1q_lane_f32(dstl + (49 + i) * nch, b, 2);
1647 						} /* HAVE_SSE */
1648 					}
1649 					else {
1650 						version (HAVE_SSE) {
1651 							static const f4 g_max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1652 							static const f4 g_min = {-32768.0f, -32768.0f, -32768.0f, -32768.0f};
1653 							__m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)), _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1654 							dstr[(15 - i) * nch] = _mm_extract_epi16(pcm8, 1);
1655 							dstr[(17 + i) * nch] = _mm_extract_epi16(pcm8, 5);
1656 							dstl[(15 - i) * nch] = _mm_extract_epi16(pcm8, 0);
1657 							dstl[(17 + i) * nch] = _mm_extract_epi16(pcm8, 4);
1658 							dstr[(47 - i) * nch] = _mm_extract_epi16(pcm8, 3);
1659 							dstr[(49 + i) * nch] = _mm_extract_epi16(pcm8, 7);
1660 							dstl[(47 - i) * nch] = _mm_extract_epi16(pcm8, 2);
1661 							dstl[(49 + i) * nch] = _mm_extract_epi16(pcm8, 6);
1662 						}
1663 						else { /* HAVE_SSE */
1664 							int16x4_t pcma, pcmb;
1665 							a = VADD(a, VSET(0.5f));
1666 							b = VADD(b, VSET(0.5f));
1667 							pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1668 							pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1669 							vst1_lane_s16(dstr + (15 - i) * nch, pcma, 1);
1670 							vst1_lane_s16(dstr + (17 + i) * nch, pcmb, 1);
1671 							vst1_lane_s16(dstl + (15 - i) * nch, pcma, 0);
1672 							vst1_lane_s16(dstl + (17 + i) * nch, pcmb, 0);
1673 							vst1_lane_s16(dstr + (47 - i) * nch, pcma, 3);
1674 							vst1_lane_s16(dstr + (49 + i) * nch, pcmb, 3);
1675 							vst1_lane_s16(dstl + (47 - i) * nch, pcma, 2);
1676 							vst1_lane_s16(dstl + (49 + i) * nch, pcmb, 2);
1677 						} /* HAVE_SSE */
1678 					}
1679 				}
1680 			}
1681 		}
1682 		else {
1683 			doEverythingNonSimd();
1684 		}
1685 	}
1686 	else {
1687 		doEverythingNonSimd();
1688 	}
1689 }
1690 
1691 void mp3d_synth_granule(float* qmf_state, float* grbuf, int nbands, int nch, mp3d_sample_t* pcm, float* lins) {
1692 	int i;
1693 	for (i = 0; i < nch; i++) {
1694 		mp3d_DCT_II(grbuf + 576 * i, nbands);
1695 	}
1696 
1697 	memcpy(lins, qmf_state, float.sizeof * 15 * 64);
1698 
1699 	for (i = 0; i < nbands; i += 2) {
1700 		mp3d_synth(grbuf + i, pcm + 32 * nch * i, nch, lins + i * 64);
1701 	}
1702 
1703 	version (MINIMP3_NONSTANDARD_BUT_LOGICAL) {
1704 		memcpy(qmf_state, lins + nbands * 64, float.sizeof * 15 * 64);
1705 	}
1706 	else {
1707 		if (nch == 1) {
1708 			for (i = 0; i < 15 * 64; i += 2) {
1709 				qmf_state[i] = lins[nbands * 64 + i];
1710 			}
1711 		}
1712 		else {
1713 			memcpy(qmf_state, lins + nbands * 64, float.sizeof * 15 * 64);
1714 		}
1715 	}
1716 }
1717 
1718 int mp3d_match_frame(const(ubyte)* hdr, int mp3_bytes, int frame_bytes) {
1719 	int i, nmatch;
1720 	for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++) {
1721 		i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i);
1722 		if (i + HDR_SIZE > mp3_bytes)
1723 			return nmatch > 0;
1724 		if (!hdr_compare(hdr, hdr + i))
1725 			return 0;
1726 	}
1727 	return 1;
1728 }
1729 
1730 int mp3d_find_frame(const(ubyte)* mp3, int mp3_bytes, int* free_format_bytes, int* ptr_frame_bytes) {
1731 	int i, k;
1732 	for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++) {
1733 		if (hdr_valid(mp3)) {
1734 			int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes);
1735 			int frame_and_padding = frame_bytes + hdr_padding(mp3);
1736 
1737 			for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2 * k < mp3_bytes - HDR_SIZE; k++) {
1738 				if (hdr_compare(mp3, mp3 + k)) {
1739 					int fb = k - hdr_padding(mp3);
1740 					int nextfb = fb + hdr_padding(mp3 + k);
1741 					if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb))
1742 						continue;
1743 					frame_and_padding = k;
1744 					frame_bytes = fb;
1745 					*free_format_bytes = fb;
1746 				}
1747 			}
1748 			if ((frame_bytes && i + frame_and_padding <= mp3_bytes && mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) || (!i && frame_and_padding == mp3_bytes)) {
1749 				*ptr_frame_bytes = frame_and_padding;
1750 				return i;
1751 			}
1752 			*free_format_bytes = 0;
1753 		}
1754 	}
1755 	*ptr_frame_bytes = 0;
1756 	return i;
1757 }
1758 
1759 void mp3dec_init(ref mp3dec_t dec) {
1760 	memset(&dec, 0, mp3dec_t.sizeof);
1761 	dec.header[0] = 0;
1762 }
1763 
1764 int mp3dec_decode_frame(ref mp3dec_t dec, const(ubyte)* mp3, int mp3_bytes, mp3d_sample_t* pcm, mp3dec_frame_info_t* info) {
1765 	int i = 0, igr, frame_size = 0, success = 1;
1766 	const(ubyte)* hdr;
1767 	bs_t[1] bs_frame;
1768 	mp3dec_scratch_t scratch;
1769 
1770 	if (mp3_bytes > 4 && dec.header[0] == 0xff && hdr_compare(dec.header.ptr, mp3)) {
1771 		frame_size = hdr_frame_bytes(mp3, dec.free_format_bytes) + hdr_padding(mp3);
1772 		if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size))) {
1773 			frame_size = 0;
1774 		}
1775 	}
1776 	if (!frame_size) {
1777 		memset(&dec, 0, mp3dec_t.sizeof);
1778 		i = mp3d_find_frame(mp3, mp3_bytes, &dec.free_format_bytes, &frame_size);
1779 		if (!frame_size || i + frame_size > mp3_bytes) {
1780 			info.frame_bytes = i;
1781 			return 0;
1782 		}
1783 	}
1784 
1785 	hdr = mp3 + i;
1786 	memcpy(dec.header.ptr, hdr, HDR_SIZE);
1787 	info.frame_bytes = i + frame_size;
1788 	info.channels = HDR_IS_MONO(hdr) ? 1 : 2;
1789 	info.hz = hdr_sample_rate_hz(hdr);
1790 	info.layer = 4 - HDR_GET_LAYER(hdr);
1791 	info.bitrate_kbps = hdr_bitrate_kbps(hdr);
1792 
1793 	if (!pcm) {
1794 		return hdr_frame_samples(hdr);
1795 	}
1796 
1797 	bs_init(bs_frame.ptr, hdr + HDR_SIZE, frame_size - HDR_SIZE);
1798 	if (HDR_IS_CRC(hdr)) {
1799 		get_bits(bs_frame.ptr, 16);
1800 	}
1801 
1802 	if (info.layer == 3) {
1803 		int main_data_begin = L3_read_side_info(bs_frame.ptr, scratch.gr_info.ptr, hdr);
1804 		if (main_data_begin < 0 || bs_frame.ptr.pos > bs_frame.ptr.limit) {
1805 			mp3dec_init(dec);
1806 			return 0;
1807 		}
1808 		success = L3_restore_reservoir(dec, bs_frame.ptr, &scratch, main_data_begin);
1809 		if (success) {
1810 			for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576 * info.channels) {
1811 				memset(scratch.grbuf[0].ptr, 0, 576 * 2 * float.sizeof);
1812 				L3_decode(dec, &scratch, scratch.gr_info.ptr + igr * info.channels, info.channels);
1813 				mp3d_synth_granule(dec.qmf_state.ptr, scratch.grbuf[0].ptr, 18, info.channels, pcm, scratch.syn[0].ptr);
1814 			}
1815 		}
1816 		L3_save_reservoir(dec, &scratch);
1817 	}
1818 	else {
1819 		version (MINIMP3_ONLY_MP3) {
1820 			return 0;
1821 		}
1822 		else { /* MINIMP3_ONLY_MP3 */
1823 			L12_scale_info[1] sci;
1824 			L12_read_scale_info(hdr, bs_frame.ptr, sci.ptr);
1825 
1826 			memset(scratch.grbuf[0].ptr, 0, 576 * 2 * float.sizeof);
1827 			for (i = 0, igr = 0; igr < 3; igr++) {
1828 				if (12 == (i += L12_dequantize_granule(scratch.grbuf[0].ptr + i, bs_frame.ptr, sci.ptr, info.layer | 1))) {
1829 					i = 0;
1830 					L12_apply_scf_384(sci.ptr, sci.ptr.scf.ptr + igr, scratch.grbuf[0].ptr);
1831 					mp3d_synth_granule(dec.qmf_state.ptr, scratch.grbuf[0].ptr, 12, info.channels, pcm, scratch.syn[0].ptr);
1832 					memset(scratch.grbuf[0].ptr, 0, 576 * 2 * float.sizeof);
1833 					pcm += 384 * info.channels;
1834 				}
1835 				if (bs_frame.ptr.pos > bs_frame.ptr.limit) {
1836 					mp3dec_init(dec);
1837 					return 0;
1838 				}
1839 			}
1840 		} /* MINIMP3_ONLY_MP3 */
1841 	}
1842 	return success * hdr_frame_samples(dec.header.ptr);
1843 }