-
Notifications
You must be signed in to change notification settings - Fork 34
/
adaptive_huffman_coding.pl
92 lines (68 loc) · 2.18 KB
/
adaptive_huffman_coding.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/perl
# Implementation of the Adaptive Huffman Coding.
# See also:
# https://rosettacode.org/wiki/huffman_coding
use 5.036;
use List::Util qw(uniq);
# produce encode and decode dictionary from a tree
sub walk ($node, $code, $h, $rev_h) {
my $c = $node->[0] // return ($h, $rev_h);
if (ref $c) { walk($c->[$_], $code . $_, $h, $rev_h) for ('0', '1') }
else { $h->{$c} = $code; $rev_h->{$code} = $c }
return ($h, $rev_h);
}
# make a tree, and return resulting dictionaries
sub mktree_from_freq ($freq) {
my @nodes = map { [$_, $freq->{$_}] } sort { $a <=> $b } keys %$freq;
do { # poor man's priority queue
@nodes = sort { $a->[1] <=> $b->[1] } @nodes;
my ($x, $y) = splice(@nodes, 0, 2);
if (defined($x)) {
if (defined($y)) {
push @nodes, [[$x, $y], $x->[1] + $y->[1]];
}
else {
push @nodes, [[$x], $x->[1]];
}
}
} while (@nodes > 1);
walk($nodes[0], '', {}, {});
}
sub encode ($bytes, $alphabet) {
my %freq;
++$freq{$_} for @$alphabet;
my @enc;
foreach my $byte (@$bytes) {
my ($h, $rev_h) = mktree_from_freq(\%freq);
++$freq{$byte};
push @enc, $h->{$byte};
}
return join('', @enc);
}
sub decode ($enc, $alphabet) {
my @out;
my $prefix = '';
my %freq;
++$freq{$_} for @$alphabet;
my ($h, $rev_h) = mktree_from_freq(\%freq);
foreach my $bit (split(//, $enc)) {
$prefix .= $bit;
if (exists $rev_h->{$prefix}) {
push @out, $rev_h->{$prefix};
++$freq{$rev_h->{$prefix}};
($h, $rev_h) = mktree_from_freq(\%freq);
$prefix = '';
}
}
return \@out;
}
my $text = "this is an example for huffman encoding";
my @bytes = unpack('C*', $text);
my @alphabet = uniq(@bytes);
my $enc = encode(\@bytes, \@alphabet);
my $dec = decode($enc, \@alphabet);
say $enc;
say pack('C*', @$dec);
__END__
1010000100010111110101010101010001010011011000101100010010010111110001011011111000011100111101111100111010110111011100111100011011100010001101100010011100000100010110001010
this is an example for huffman encoding