-
Notifications
You must be signed in to change notification settings - Fork 34
/
delta_encoding_with_double-elias_coding.pl
115 lines (83 loc) · 2.55 KB
/
delta_encoding_with_double-elias_coding.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/perl
# Author: Trizen
# Date: 14 June 2023
# https://github.com/trizen
# Implementation of the Delta encoding scheme, combined with Elias gamma encoding, optimized for very large deltas.
# Reference:
# Data Compression (Summer 2023) - Lecture 6 - Delta Compression and Prediction
# https://youtube.com/watch?v=-3H_eDbWNEU
use 5.036;
sub read_bit ($fh, $bitstring) {
if (($$bitstring // '') eq '') {
$$bitstring = unpack('b*', getc($fh) // return undef);
}
chop($$bitstring);
}
sub delta_encode ($integers) {
my @deltas;
my $prev = 0;
unshift(@$integers, scalar(@$integers));
while (@$integers) {
my $curr = shift(@$integers);
push @deltas, $curr - $prev;
$prev = $curr;
}
my $bitstring = '';
foreach my $d (@deltas) {
if ($d == 0) {
$bitstring .= '0';
}
else {
my $t = sprintf('%b', abs($d) + 1);
my $l = sprintf('%b', length($t));
$bitstring .= '1' . (($d < 0) ? '0' : '1') . ('1' x (length($l) - 1)) . '0' . substr($l, 1) . substr($t, 1);
}
}
pack('B*', $bitstring);
}
sub delta_decode ($str) {
open my $fh, '<:raw', \$str;
my @deltas;
my $buffer = '';
my $len = 0;
for (my $k = 0 ; $k <= $len ; ++$k) {
my $bit = read_bit($fh, \$buffer);
if ($bit eq '0') {
push @deltas, 0;
}
else {
my $bit = read_bit($fh, \$buffer);
my $bl = 0;
++$bl while (read_bit($fh, \$buffer) eq '1');
my $bl2 = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. $bl));
my $int = oct('0b1' . join('', map { read_bit($fh, \$buffer) } 1 .. ($bl2 - 1)));
push @deltas, ($bit eq '1' ? 1 : -1) * ($int - 1);
}
if ($k == 0) {
$len = pop(@deltas);
}
}
my @acc;
my $prev = $len;
foreach my $d (@deltas) {
$prev += $d;
push @acc, $prev;
}
return \@acc;
}
my @integers = map { int(rand($_)) } 1 .. 1000;
my $str = delta_encode([@integers]);
say "Encoded length: ", length($str);
say "Rawdata length: ", length(join(' ', @integers));
my $decoded = delta_decode($str);
join(' ', @integers) eq join(' ', @$decoded) or die "Decoding error";
{
open my $fh, '<:raw', __FILE__;
my $str = do { local $/; <$fh> };
my $encoded = delta_encode([unpack('C*', $str)]);
my $decoded = delta_decode($encoded);
$str eq pack('C*', @$decoded) or die "error";
}
__END__
Encoded length: 1763
Rawdata length: 3615