forked from rui314/mold
-
Notifications
You must be signed in to change notification settings - Fork 0
/
compress.cc
193 lines (160 loc) · 5.99 KB
/
compress.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
// This file implements a multi-threaded zlib compression routine.
//
// Multiple pieces of raw compressed data in zlib-format can be merged
// just by concatenation as long as each zlib stream is flushed with
// Z_SYNC_FLUSH. In this file, we split input data into multiple
// shards, compress them individually and concatenate them. We then
// append a header, a trailer and a checksum so that the concatenated
// data is valid zlib-format data.
//
// Using threads to compress data has a downside. Since the dictionary
// is reset on boundaries of shards, compression ratio is sacrificed
// a little bit. However, if a shard size is large enough, that loss
// is negligible in practice.
#include "mold.h"
#include <tbb/parallel_for_each.h>
#include <zlib.h>
#define CHECK(fn) do { int r = (fn); assert(r == Z_OK); } while (0)
namespace mold {
static constexpr i64 SHARD_SIZE = 1024 * 1024;
static std::vector<std::string_view> split(std::string_view input) {
std::vector<std::string_view> shards;
while (input.size() >= SHARD_SIZE) {
shards.push_back(input.substr(0, SHARD_SIZE));
input = input.substr(SHARD_SIZE);
}
if (!input.empty())
shards.push_back(input);
return shards;
}
static std::vector<u8> do_compress(std::string_view input) {
// Initialize zlib stream. Since debug info is generally compressed
// pretty well with lower compression levels, we chose compression
// level 1.
z_stream strm;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
CHECK(deflateInit2(&strm, 1, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY));
// Set an input buffer
strm.avail_in = input.size();
strm.next_in = (u8 *)input.data();
// Set an output buffer. deflateBound() returns an upper bound
// on the compression size. +16 for Z_SYNC_FLUSH.
std::vector<u8> buf(deflateBound(&strm, strm.avail_in) + 16);
// Compress data. It writes all compressed bytes except the last
// partial byte, so up to 7 bits can be held to be written to the
// buffer.
strm.avail_out = buf.size();
strm.next_out = buf.data();
CHECK(deflate(&strm, Z_BLOCK));
// This is a workaround for libbacktrace before 2022-04-06.
//
// Zlib is a bit stream, and what Z_SYNC_FLUSH does is to write a
// three bit value indicating the start of an uncompressed data
// block followed by four byte data 00 00 ff ff which indicates that
// the length of the block is zero. libbacktrace uses its own zlib
// inflate routine, and it had a bug that if that particular three
// bit value happens to end at a byte boundary, it accidentally
// skipped the next byte.
//
// In order to avoid triggering that bug, we should avoid calling
// deflate() with Z_SYNC_FLUSH if the current bit position is 5.
// If it's 5, we insert an empty block consisting of 10 bits so
// that the bit position is 7 in the next byte.
//
// https://github.com/ianlancetaylor/libbacktrace/pull/87
int nbits;
deflatePending(&strm, Z_NULL, &nbits);
if (nbits == 5)
CHECK(deflatePrime(&strm, 10, 2));
CHECK(deflate(&strm, Z_SYNC_FLUSH));
assert(strm.avail_out > 0);
buf.resize(buf.size() - strm.avail_out);
deflateEnd(&strm);
return buf;
}
ZlibCompressor::ZlibCompressor(u8 *buf, i64 size) {
std::string_view input{(char *)buf, (size_t)size};
std::vector<std::string_view> inputs = split(input);
std::vector<u64> adlers(inputs.size());
shards.resize(inputs.size());
// Compress each shard
tbb::parallel_for((i64)0, (i64)inputs.size(), [&](i64 i) {
adlers[i] = adler32(1, (u8 *)inputs[i].data(), inputs[i].size());
shards[i] = do_compress(inputs[i]);
});
// Combine checksums
checksum = adlers[0];
for (i64 i = 1; i < inputs.size(); i++)
checksum = adler32_combine(checksum, adlers[i], inputs[i].size());
}
i64 ZlibCompressor::size() const {
i64 size = 2; // +2 for header
for (const std::vector<u8> &shard : shards)
size += shard.size();
return size + 6; // +6 for trailer and checksum
}
void ZlibCompressor::write_to(u8 *buf) {
// Write a zlib-format header
buf[0] = 0x78;
buf[1] = 0x9c;
// Copy compressed data
std::vector<i64> offsets(shards.size());
offsets[0] = 2; // +2 for header
for (i64 i = 1; i < shards.size(); i++)
offsets[i] = offsets[i - 1] + shards[i - 1].size();
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
memcpy(&buf[offsets[i]], shards[i].data(), shards[i].size());
});
// Write a trailer
u8 *end = buf + size();
end[-6] = 3;
end[-5] = 0;
// Write a checksum
*(ub32 *)(end - 4) = checksum;
}
GzipCompressor::GzipCompressor(std::string_view input) {
std::vector<std::string_view> inputs = split(input);
std::vector<u32> crc(inputs.size());
shards.resize(inputs.size());
// Compress each shard
tbb::parallel_for((i64)0, (i64)inputs.size(), [&](i64 i) {
crc[i] = crc32(0, (u8 *)inputs[i].data(), inputs[i].size());
shards[i] = do_compress(inputs[i]);
});
// Combine checksums
checksum = crc[0];
for (i64 i = 1; i < inputs.size(); i++)
checksum = crc32_combine(checksum, crc[i], inputs[i].size());
uncompressed_size = input.size();
}
i64 GzipCompressor::size() const {
i64 size = 10; // +10 for header
for (const std::vector<u8> &shard : shards)
size += shard.size();
return size + 10; // +10 for trailer and checksum
}
void GzipCompressor::write_to(u8 *buf) {
// Write a zlib-format header
memset(buf, 0, 10);
buf[0] = 0x1f; // magic
buf[1] = 0x8b; // magic
buf[2] = 0x08; // compression method is zlib
buf[9] = 0xff; // made on unknown OS
// Copy compressed data
std::vector<i64> offsets(shards.size());
offsets[0] = 10; // +10 for header
for (i64 i = 1; i < shards.size(); i++)
offsets[i] = offsets[i - 1] + shards[i - 1].size();
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
memcpy(&buf[offsets[i]], shards[i].data(), shards[i].size());
});
// Write a trailer
u8 *end = buf + size();
end[-10] = 3; // two-byte zlib stream terminator
end[-9] = 0;
*(ul32 *)(end - 8) = checksum;
*(ul32 *)(end - 4) = uncompressed_size;
}
} // namespace mold