-
Notifications
You must be signed in to change notification settings - Fork 4
/
libslac.c
609 lines (476 loc) · 22.5 KB
/
libslac.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
////////////////////////////////////////////////////////////////////////////
// ****** SLAC ****** //
// Simple Lossless Audio Compressor //
// Copyright (c) 2019 - 2022 David Bryant //
// All Rights Reserved. //
// Distributed under the BSD Software License (see license.txt) //
////////////////////////////////////////////////////////////////////////////
// libslac.c
// This module provides functions to compress and decompress PCM integer
// audio. The audio is presented in 32-bit integers, but can be arbitrary
// bitdepths (redundant bits at either side of the words are handled). Mono
// and stereo streams are supported and up to 24 significant bits per sample
// (anywhere in the 32-bit word).
//
// No allocation is used; everything is done in place using the passed input
// and output buffers.
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "bitstream.h"
#include "libslac.h"
// If your platform has hardware support for counting leading zeros in 32-bit
// integers, define it here (these are for gcc and friends).
#if INT_MAX == 32767
#define count_leading_zeros __builtin_clzl
#else
#define count_leading_zeros __builtin_clz
#endif
#ifdef NO_BUILTIN_CLZ
// If your platform does not support __builtin_clz() or a variant, define this macro
// to instantiate a native C implementation. It is a hybrid of the Hacker's Delight
// binary search method and a table-driven technique.
static inline int count_leading_zeros (uint32_t x)
{
static const char nbits_table [] = {
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, // 0 - 15
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, // 16 - 31
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 32 - 47
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 48 - 63
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 64 - 79
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 80 - 95
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 96 - 111
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 112 - 127
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 128 - 143
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 144 - 159
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 160 - 175
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 176 - 191
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 192 - 207
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 208 - 223
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 224 - 239
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 // 240 - 255
};
uint32_t y;
int n = 32;
if ((y = x >> 16)) {
n -= 16;
x = y;
}
if ((y = x >> 8)) {
n -= 8;
x = y;
}
return n - nbits_table [x];
}
#endif
// These complementary macros convert from signed integers into the non-negative
// values that slac uses for analysis and entropy encoding (with the sign moved
// to the LSB), and back.
#define signed_to_non_negative(x) (((uint32_t)(x)<<1)^((int32_t)(x)>>31))
#define non_negative_to_signed(x) ((int32_t)(-(int32_t)((x)&1)^(x))>>1)
#define MAX_DECORR 6
// Complementary decorrelate / correlate functions used by both encoding and decoding
static int decorrelate (int32_t *audio_samples, int sample_count, int stride);
static int correlate (int32_t *audio_samples, int sample_count, int stride);
static int32_t decorrs [8], rice_ks [32], shifts [32]; // used for statistics only
/******************************* COMPRESSION *********************************/
static void entropy_encode (Bitstream *bs, int32_t *audio_samples, int sample_count, int stride);
static int redundant_bits (int32_t *audio_samples, int sample_count, int stride);
static int best_decorr (int32_t *audio_samples, int sample_count, int stride);
static int channels_identical (int32_t *audio_samples, int sample_count);
static void lr_to_ms (int32_t *audio_samples, int sample_count);
// Compress an array of audio samples (in either 1 or 2 interleaved channels)
// into the provided buffer (which should be big enough for the data). The
// return value is the number of bytes used (rounded up to the next word) or
// -1 indicating an error. The stereo_mode only applies if num_chans is 2
// (obviously) and can be either MID_SIDE or LEFT_RIGHT (see libslac.h). Note
// that selecting the better of left-right and mid-side encoding is not done
// here, but can easily be done at the next higher level by simply performing
// this operation for both methods and choosing the smaller.
// Note: the audio samples are processed in place, they are not "const" !!
int compress_audio_block (int32_t *audio_samples, int sample_count, int num_chans, int stereo_mode, char *outbuffer, int outbufsize)
{
int sent_chans = num_chans, chan;
Bitstream bs;
// this is the processing unique to stereo...from here on it's just 1 or 2 mono channels
if (num_chans == 2) {
if (channels_identical (audio_samples, sample_count)) {
stereo_mode = DUAL_MONO;
sent_chans = 1;
}
else if (stereo_mode == MID_SIDE)
lr_to_ms (audio_samples, sample_count);
else
stereo_mode = LEFT_RIGHT;
}
else
stereo_mode = MONO_MODE;
// open the bitstream for writing and store the stereo mode in the first two bits
bs_open_write (&bs, outbuffer, outbuffer + outbufsize);
putbits (stereo_mode, 2, &bs);
// the channels (1 or 2) are processed completely independently and sequentially here
for (chan = 0; chan < sent_chans; ++chan) {
int shift, decorr;
// check whether there are LSB zeros in every sample that can be shifted out
shift = redundant_bits (audio_samples + chan, sample_count, num_chans);
shifts [shift]++;
// find the best decorrelator (from -1 to 6) and write that to the stream in 3 bits
decorr = best_decorr (audio_samples + chan, sample_count, num_chans);
putbits (decorr + 1, 3, &bs);
decorrs [decorr + 1]++;
// since the shift amount is generally small, store that in unary (1's followed by 0)
while (shift--)
putbit_1 (&bs);
putbit_0 (&bs);
// because the magnitude of the first few samples can be very different than the rest
// of the data (because they're not as decorrelated), we store those using their own
// Rice parameter; this only costs 5 bits per channel, but can make a huge improvement
if (decorr && abs (decorr) < sample_count) {
entropy_encode (&bs, audio_samples + chan, abs (decorr), num_chans);
entropy_encode (&bs, audio_samples + abs (decorr) * num_chans + chan, sample_count - abs (decorr), num_chans);
}
else
entropy_encode (&bs, audio_samples + chan, sample_count, num_chans);
}
return bs_close_write (&bs); // close the bitstream and return the number of bytes written
}
static int best_rice_k (int32_t *audio_samples, int sample_count, int stride);
// Encode the supplied array of samples into a bitstream using Rice encoding.
// If stereo data is being compressed the channels must be processed in two
// separate passes and the stride parameter should be 2 and the pointer set to
// the first sample of the desired channel.
static void entropy_encode (Bitstream *bs, int32_t *audio_samples, int sample_count, int stride)
{
int rice_k = best_rice_k (audio_samples, sample_count, stride);
if (sample_count > MAX_DECORR) // only count the "real" blocks in the statistics
rice_ks [rice_k + 1]++;
putbits (rice_k + 1, 5, bs); // the Rice k parameter (offset 1) is stored in the first 5 bits
if (rice_k > 0) {
uint32_t mask = (1UL << rice_k) - 1;
while (sample_count--) {
uint32_t avalue = signed_to_non_negative (*audio_samples);
int modulo = avalue >> rice_k;
while (modulo--)
putbit_1 (bs);
putbit_0 (bs);
avalue &= mask;
putbits (avalue, rice_k, bs);
audio_samples += stride;
}
}
else if (rice_k == 0)
while (sample_count--) {
uint32_t avalue = signed_to_non_negative (*audio_samples);
while (avalue--)
putbit_1 (bs);
putbit_0 (bs);
audio_samples += stride;
}
}
// Calculate the optimum Rice k parameter (i.e. number of bits sent literally
// for each sample) for the supplied sample data. We calculate the sum of all
// the (converted to non-negative) sample values which requires more than just
// 32 bits of magnitude. To avoid requiring a 64-bit integer type (which some
// platforms don't have) or a float (which can be slow on some platforms) this
// is done with a 32-bit unsigned integer and another integer for overflows.
// Just the final calculation is done in floats. If there are only zero samples
// this function returns -1 so that silent blocks can be efficiently handled.
static int best_rice_k (int32_t *audio_samples, int sample_count, int stride)
{
int count = sample_count, upper32 = 0, rice_k = 0;
float float_sum = 0, min_bits;
int32_t *dptr = audio_samples;
uint32_t lower32 = 0;
while (count--) {
uint32_t temp = lower32;
if ((lower32 += signed_to_non_negative (*dptr)) < temp)
upper32++;
dptr += stride;
}
if (!upper32 && !lower32) // return -1 for complete silence
return -1;
float_sum = upper32 * 4294967296.0 + lower32;
min_bits = float_sum + sample_count;
// min_bits this is now the exact total number of bits used for encoding the
// samples with k=0; next we'll increase k and estimate the resulting bit
// count as long as it continues to improve
while (1) {
float bits = (float_sum /= 2) + (++rice_k * sample_count) + sample_count - (sample_count / 6);
if (bits < min_bits)
min_bits = bits;
else
return rice_k - 1;
}
}
// Scan sample array for redundant LSB's (zeros) and remove them through a
// shift. The number of bits removed is returned so that the samples can be
// correctly reconstructed on decode. This should not normally be time-
// consuming because we break out as soon as we see the LSB set (which would
// normally be very early, unless we are really going to have success).
static int redundant_bits (int32_t *audio_samples, int sample_count, int stride)
{
int count = sample_count, redundant_bits = 0;
int32_t *dptr = audio_samples;
int32_t ordata = 0;
while (count--)
if ((ordata |= *dptr) & 1)
break;
else
dptr += stride;
if (ordata)
while (!(ordata & 1)) {
redundant_bits++;
ordata >>= 1;
}
if (redundant_bits)
while (sample_count--) {
*audio_samples >>= redundant_bits;
audio_samples += stride;
}
return redundant_bits;
}
// Scan a buffer of sample data and return an estimated "magnitude" value
// based on the most significant 1 bit of the absolute value of each sample.
// There is no real significance to this number because it is simply used to
// compare arrays with different decorrelation processing. This should match
// the value returned by decorrelate() and correlate(). We add 1 because
// __builtin_clz() is undefined for zero input.
static int magnitude_bits (int32_t *audio_samples, int sample_count, int stride)
{
int32_t *dptr, *eptr = audio_samples + sample_count * stride;
int bits = sample_count * 31;
for (dptr = audio_samples; dptr < eptr; dptr += stride)
bits -= count_leading_zeros (signed_to_non_negative (*dptr) + 1);
return bits;
}
// Given an array of samples, determine the best decorrelation level (or count)
// that produces the smallest output, and leave the data decorrelated to that
// level so that it can be immediately entropy encoded. If even the first level
// of decorrelation shows degradation, then we try correlation() to see if that
// helps instead. Negative correlation would not normally be seen in regular
// audio, but might be seen in test signals, and it kind of comes free here.
static int best_decorr (int32_t *audio_samples, int sample_count, int stride)
{
int best_magnitude = magnitude_bits (audio_samples, sample_count, stride);
int decorr_index = 0, best_decorr_index = 0;
// first, continue to call decorrelate() until the result gets worse, then back up 1
while (decorr_index < MAX_DECORR) {
int magnitude = decorrelate (audio_samples, sample_count, stride); decorr_index++;
if (magnitude < best_magnitude) {
best_decorr_index = decorr_index;
best_magnitude = magnitude;
}
else {
correlate (audio_samples, sample_count, stride); decorr_index--;
break;
}
}
// if that didn't work at all, maybe negative decorrelation (i.e. correlation) will
if (decorr_index == 0) {
int magnitude = correlate (audio_samples, sample_count, stride); decorr_index--;
if (magnitude < best_magnitude) {
best_decorr_index = decorr_index;
best_magnitude = magnitude;
}
else {
decorrelate (audio_samples, sample_count, stride); decorr_index++;
}
}
return best_decorr_index;
}
// Scans the provided stereo samples to see if they are identical and can
// therefore be encoded as a "dual-mono" block. To avoid delays, we break out
// of the loop as soon as we se a difference (which would normally be the
// first or second sample).
static int channels_identical (int32_t *audio_samples, int sample_count)
{
while (sample_count--)
if (audio_samples [0] == audio_samples [1])
audio_samples += 2;
else
return 0;
return 1;
}
// Converts the supplied buffer of stereo samples from left-right encoding to
// mid-side encoding. Note that since L+R and L-R have identical LSBs, we
// don't really need to store the LSB of both, which is why it's "mid" instead
// of "sum".
static void lr_to_ms (int32_t *audio_samples, int sample_count)
{
while (sample_count--) {
audio_samples [1] += ((audio_samples [0] -= audio_samples [1]) >> 1);
audio_samples += 2;
}
}
/******************************** STATISTICS *********************************/
// Display some encoder statistics to the specified stream.
void dump_compression_stats (FILE *file)
{
int maxnz = -1, i;
char string [256];
fprintf (file, "decorr: (%ld) %ld %ld %ld %ld %ld %ld %ld\n",
(long) decorrs[0], (long) decorrs[1], (long) decorrs[2], (long) decorrs[3],
(long) decorrs[4], (long) decorrs[5], (long) decorrs[6], (long) decorrs[7]);
for (i = 0; i < 32; ++i) if (rice_ks [i]) maxnz = i;
for (string [0] = i = 0; i <= maxnz; ++i) sprintf (string + strlen(string), i ? " %ld" : " (%ld)", (long) rice_ks [i]);
fprintf (file, "rice-k:%s (max = %d)\n", string, maxnz - 1);
for (i = 0; i < 32; ++i) if (shifts [i]) maxnz = i;
if (maxnz) {
for (string [0] = i = 0; i <= maxnz; ++i) sprintf (string + strlen(string), " %ld", (long) shifts [i]);
fprintf (file, "shifts:%s (max = %d)\n", string, maxnz);
}
memset (decorrs, 0, sizeof (decorrs));
memset (rice_ks, 0, sizeof (rice_ks));
memset (shifts, 0, sizeof (shifts));
}
/****************************** DECOMPRESSION ********************************/
static void entropy_decode (Bitstream *bs, int32_t *audio_samples, int sample_count, int stride);
static void leftshift_bits (int32_t *audio_samples, int sample_count, int stride, int shift);
static void ms_to_lr (int32_t *audio_samples, int sample_count);
static void dm_to_lr (int32_t *audio_samples, int sample_count);
// Decompress the supplied compressed audio data into the original samples.
// Note that the number of samples and channels must be passed in, so these
// must be stored with the block somehow (although checking that both of the
// 2 LSB's of the first compressed data byte are zero can be used to determine
// if the block is mono, although this is kind of cheating). All the compressed
// data provided by compress_audio_block() must be present, or -1 will be
// returned indicating an error.
int decompress_audio_block (int32_t *audio_samples, int sample_count, int num_chans, char *inbuffer, int inbufsize)
{
int read_chans = num_chans, res = 0, stereo_mode, chan;
Bitstream bs;
// open the passed buffer as a bitstream and get the first 2 bits (stereo mode)
bs_open_read (&bs, inbuffer, inbuffer + inbufsize);
getbits (&stereo_mode, 2, &bs);
stereo_mode &= 0x3;
if (stereo_mode == DUAL_MONO) // for dual-mono, we only read one channel of data from the bitstream
read_chans = 1;
for (chan = 0; chan < read_chans && !bs.error; ++chan) {
int shift = 0, decorr;
getbits (&decorr, 3, &bs); // the first 3 bits for each channel is the decorrelation info
decorr = (decorr & 0x7) - 1;
while (getbit (&bs)) // the number of 1's that follow is the final left-shift amount
shift++;
// as with encoding, we read the first few still correlated samples in a separate Rice operation
if (decorr && abs (decorr) < sample_count) {
entropy_decode (&bs, audio_samples + chan, abs (decorr), num_chans);
entropy_decode (&bs, audio_samples + abs (decorr) * num_chans + chan, sample_count - abs (decorr), num_chans);
}
else
entropy_decode (&bs, audio_samples + chan, sample_count, num_chans);
// next undo the decorrelation step, and finally the shift
if (decorr < 0)
decorrelate (audio_samples + chan, sample_count, num_chans);
else while (decorr--)
correlate (audio_samples + chan, sample_count, num_chans);
if (shift)
leftshift_bits (audio_samples + chan, sample_count, num_chans, shift);
}
// if stereo and not simply stored left-right, we might have one more step
if (stereo_mode == MID_SIDE)
ms_to_lr (audio_samples, sample_count);
else if (stereo_mode == DUAL_MONO)
dm_to_lr (audio_samples, sample_count);
res = bs.error; // if we went past the end of the buffer, flag an error
bs_close_read (&bs);
return res;
}
// Decode the supplied array of samples from the bitstream using Rice encoding.
// If stereo data is being decompressed the channels are processed sequentially
// and the stride parameter should be 2 and the pointer set to the first sample
// of the desired channel. The first 5 bits are the Rice "k" value (offset 1)
// which specifies how many bits are sent literally, with 0 signalling silence.
static void entropy_decode (Bitstream *bs, int32_t *audio_samples, int sample_count, int stride)
{
int rice_k;
getbits (&rice_k, 5, bs); // Rice K is offset by 1 (so we send -1 for silence)
rice_k = (rice_k & 0x1f) - 1;
if (rice_k < 0) // if silence, just clear the samples and get out
while (sample_count--) {
*audio_samples = 0;
audio_samples += stride;
}
else if (rice_k > 0) { // normal case for k > 0
uint32_t mask = (1UL << rice_k) - 1, rice_k_bits;
while (sample_count--) {
uint32_t avalue = 0;
while (getbit (bs))
avalue++;
avalue <<= rice_k;
getbits (&rice_k_bits, rice_k, bs);
avalue |= (rice_k_bits & mask);
*audio_samples = non_negative_to_signed (avalue);
audio_samples += stride;
}
}
else // optimized case for k == 0
while (sample_count--) {
uint32_t avalue = 0;
while (getbit (bs))
avalue++;
*audio_samples = non_negative_to_signed (avalue);
audio_samples += stride;
}
}
// Leftshift specified number of zeros into the audio samples. This is to undo
// the redundant zeros detected and rightshifted by redundant_bits().
static void leftshift_bits (int32_t *audio_samples, int sample_count, int stride, int shift)
{
while (sample_count--) {
* (uint32_t *) audio_samples <<= shift;
audio_samples += stride;
}
}
// Duplicate the single left stereo channel into both channels for decoding
// "dual-mono" mode.
static void dm_to_lr (int32_t *audio_samples, int sample_count)
{
while (sample_count--) {
audio_samples [1] = audio_samples [0];
audio_samples += 2;
}
}
// Converts the supplied buffer of stereo samples from mid-side encoding back
// to left-right encoding.
static void ms_to_lr (int32_t *audio_samples, int sample_count)
{
while (sample_count--) {
audio_samples [0] += (audio_samples [1] -= (audio_samples [0] >> 1));
audio_samples += 2;
}
}
/********************* COMMON CORRELATION / DECORRELATION ********************/
// These two complementary functions are at the heart of the compressor and
// are probably the simplest implementation of decorrelation that exists.
// Essentially, for decorrelation, each sample is replaced by the difference
// between that sample and the previous one. In the reverse operation,
// called correlation, each sample is replaced by the sum of itself and all
// the previous samples. The first sample is never changed. Each function
// exactly undoes the operation of the other, which is why they can be used
// for lossless compression. So that they can be used during the encoder side
// analysis, they also calculate and return a "magnitude" value identical to
// the value returned by magnitude_bits() that can be used to determine if
// the operation reduced the average magnitude of the samples.
static int decorrelate (int32_t *audio_samples, int sample_count, int stride)
{
int bits = sample_count * 31;
uint32_t temp = 0;
while (sample_count--) {
temp += *audio_samples -= temp;
bits -= count_leading_zeros (signed_to_non_negative (*audio_samples) + 1);
audio_samples += stride;
}
return bits;
}
static int correlate (int32_t *audio_samples, int sample_count, int stride)
{
int bits = sample_count * 31;
uint32_t value = 0;
while (sample_count--) {
value = *audio_samples += value;
bits -= count_leading_zeros (signed_to_non_negative (value) + 1);
audio_samples += stride;
}
return bits;
}