-
Notifications
You must be signed in to change notification settings - Fork 35
/
opencl_sboxes.h
345 lines (320 loc) · 16.9 KB
/
opencl_sboxes.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
#include "opencl_misc.h"
#if HAVE_LUT3
/*
* Bitslice DES S-boxes with LOP3.LUT instructions
* For NVIDIA Maxwell architecture and CUDA 7.5 RC
* by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
*
* Gate counts: 25 24 25 18 25 24 24 23
* Average: 23.5
* Depth: 8 7 7 6 8 10 10 8
* Average: 8
*
* These Boolean expressions corresponding to DES S-boxes were
* discovered by <deeplearningjohndoe at gmail.com>
*
* Copyright (c) 2012-2015 Sayantan Datta <[email protected]>
* Copyright (c) 2015 <deeplearningjohndoe at gmail.com>
* Copyright (c) 2015 magnum
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* The underlying mathematical formulas are NOT copyrighted.
*/
inline void
s1(vtype a1, vtype a2, vtype a3, vtype a4, vtype a5, vtype a6,
__private vtype *out, vtype c1, vtype c2, vtype c3, vtype c4)
{
vtype xAA55AA5500550055 = lut3(a1, a4, a6, 0xC1);
vtype xA55AA55AF0F5F0F5 = lut3(a3, a6, xAA55AA5500550055, 0x9E);
vtype x5F5F5F5FA5A5A5A5 = lut3(a1, a3, a6, 0xD6);
vtype xF5A0F5A0A55AA55A = lut3(a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56);
vtype x947A947AD1E7D1E7 = lut3(a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C);
vtype x5FFF5FFFFFFAFFFA = lut3(a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B);
vtype xB96CB96C69936993 = lut3(a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6);
vtype x3 = lut3(a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A);
vtype x55EE55EE55EE55EE = lut3(a1, a2, a4, 0x7A);
vtype x084C084CB77BB77B = lut3(a2, a6, xF5A0F5A0A55AA55A, 0xC9);
vtype x9C329C32E295E295 = lut3(x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72);
vtype xA51EA51E50E050E0 = lut3(a3, a6, x55EE55EE55EE55EE, 0x29);
vtype x4AD34AD3BE3CBE3C = lut3(a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95);
vtype x2 = lut3(a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6);
vtype xD955D95595D195D1 = lut3(a1, a2, x9C329C32E295E295, 0xD2);
vtype x8058805811621162 = lut3(x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90);
vtype x7D0F7D0FC4B3C4B3 = lut3(xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76);
vtype x0805080500010001 = lut3(a3, xAA55AA5500550055, xD955D95595D195D1, 0x80);
vtype x4A964A96962D962D = lut3(xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6);
vtype x4 = lut3(a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6);
vtype x148014807B087B08 = lut3(a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21);
vtype x94D894D86B686B68 = lut3(xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A);
vtype x5555555540044004 = lut3(a1, a6, x084C084CB77BB77B, 0x70);
vtype xAFB4AFB4BF5BBF5B = lut3(x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97);
vtype x1 = lut3(a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C);
out[c1] ^= x1;
out[c2] ^= x2;
out[c3] ^= x3;
out[c4] ^= x4;
}
inline void
s2(vtype a1, vtype a2, vtype a3, vtype a4, vtype a5, vtype a6,
__private vtype *out, vtype c1, vtype c2, vtype c3, vtype c4)
{
vtype xEEEEEEEE99999999 = lut3(a1, a2, a6, 0x97);
vtype xFFFFEEEE66666666 = lut3(a5, a6, xEEEEEEEE99999999, 0x67);
vtype x5555FFFFFFFF0000 = lut3(a1, a5, a6, 0x76);
vtype x6666DDDD5555AAAA = lut3(a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69);
vtype x6969D3D35353ACAC = lut3(a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A);
vtype xCFCF3030CFCF3030 = lut3(a2, a3, a5, 0x65);
vtype xE4E4EEEE9999F0F0 = lut3(a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D);
vtype xE5E5BABACDCDB0B0 = lut3(a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA);
vtype x3 = lut3(a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6);
vtype x3333CCCC00000000 = lut3(a2, a5, a6, 0x14);
vtype xCCCCDDDDFFFF0F0F = lut3(a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5);
vtype x00000101F0F0F0F0 = lut3(a3, a6, xFFFFEEEE66666666, 0x1C);
vtype x9A9A64646A6A9595 = lut3(a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96);
vtype x2 = lut3(a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A);
vtype x3333BBBB3333FFFF = lut3(a1, a2, x6666DDDD5555AAAA, 0xDE);
vtype x1414141441410000 = lut3(a1, a3, xE4E4EEEE9999F0F0, 0x90);
vtype x7F7FF3F3F5F53939 = lut3(x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79);
vtype x9494E3E34B4B3939 = lut3(a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29);
vtype x1 = lut3(a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6);
vtype xB1B1BBBBCCCCA5A5 = lut3(a1, a1, xE4E4EEEE9999F0F0, 0x4A);
vtype xFFFFECECEEEEDDDD = lut3(a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF);
vtype xB1B1A9A9DCDC8787 = lut3(xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D);
vtype xFFFFCCCCEEEE4444 = lut3(a2, a5, xFFFFEEEE66666666, 0x2B);
vtype x4 = lut3(a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C);
out[c1] ^= x1;
out[c2] ^= x2;
out[c3] ^= x3;
out[c4] ^= x4;
}
inline void
s3(vtype a1, vtype a2, vtype a3, vtype a4, vtype a5, vtype a6,
__private vtype *out, vtype c1, vtype c2, vtype c3, vtype c4)
{
vtype xA50FA50FA50FA50F = lut3(a1, a3, a4, 0xC9);
vtype xF0F00F0FF0F0F0F0 = lut3(a3, a5, a6, 0x4B);
vtype xAF0FA0AAAF0FAF0F = lut3(a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D);
vtype x5AA5A55A5AA55AA5 = lut3(a1, a4, xF0F00F0FF0F0F0F0, 0x69);
vtype xAA005FFFAA005FFF = lut3(a3, a5, xA50FA50FA50FA50F, 0xD6);
vtype x5AA5A55A0F5AFAA5 = lut3(a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C);
vtype x1 = lut3(a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6);
vtype xAA55AA5500AA00AA = lut3(a1, a4, a6, 0x49);
vtype xFAFAA50FFAFAA50F = lut3(a1, a5, xA50FA50FA50FA50F, 0x9B);
vtype x50AF0F5AFA50A5A5 = lut3(a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66);
vtype xAFAFAFAFFAFAFAFA = lut3(a1, a3, a6, 0x6F);
vtype xAFAFFFFFFFFAFAFF = lut3(a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB);
vtype x4 = lut3(a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C);
vtype x500F500F500F500F = lut3(a1, a3, a4, 0x98);
vtype xF0505A0505A5050F = lut3(x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D);
vtype xF0505A05AA55AAFF = lut3(a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A);
vtype xFF005F55FF005F55 = lut3(a1, a4, xAA005FFFAA005FFF, 0xB2);
vtype xA55F5AF0A55F5AF0 = lut3(a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D);
vtype x5A5F05A5A55F5AF0 = lut3(a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6);
vtype x3 = lut3(a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6);
vtype x0F0F0F0FA5A5A5A5 = lut3(a1, a3, a6, 0xC6);
vtype x5FFFFF5FFFA0FFA0 = lut3(x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB);
vtype xF5555AF500A05FFF = lut3(a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9);
vtype x05A5AAF55AFA55A5 = lut3(xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B);
vtype x2 = lut3(a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6);
out[c1] ^= x1;
out[c2] ^= x2;
out[c3] ^= x3;
out[c4] ^= x4;
}
#if 1
/* Roman Rusakov’s s4 */
inline void
s4(vtype a1, vtype a2, vtype a3, vtype a4, vtype a5, vtype a6,
__private vtype *out, vtype c1, vtype c2, vtype c3, vtype c4)
{
vtype x55AAFF00=lut3(a1, a4, a5, 0x36);
vtype x00F00F00=lut3(a3, a4, a5, 0x24);
vtype x1926330C=lut3(a2, a3, x55AAFF00, 0xA4);
vtype x4CA36B59=lut3(x00F00F00, a1, x1926330C, 0xB6);
vtype x00FF55AA=lut3(a1, a4, a5, 0x6C);
vtype x3FCC6E9D=lut3(a2, a3, x00FF55AA, 0x5E);
vtype x6A7935C8=lut3(a1, x00F00F00, x3FCC6E9D, 0xD6);
vtype x5D016B55=lut3(a1, x4CA36B59, x00FF55AA, 0xD4);
vtype x07AE9F5A=lut3(a3, x55AAFF00, x5D016B55, 0xD6);
vtype x61C8F93C=lut3(a1, a2, x07AE9F5A, 0x96);
vtype x3=lut3(a6, x4CA36B59, x61C8F93C, 0xC9);
vtype x4=lut3(a6, x4CA36B59, x61C8F93C, 0x93);
out[c3]^=x3;
out[c4]^=x4;
vtype x26DA5E91=x4CA36B59^x6A7935C8;
vtype x37217F22=lut3(a2, a4, x26DA5E91, 0x72);
vtype x56E9861E=x37217F22^x61C8F93C;
vtype x1=lut3(a6, x56E9861E, x6A7935C8, 0x5C);
vtype x2=lut3(a6, x56E9861E, x6A7935C8, 0x35);
out[c1]^=x1;
out[c2]^=x2;
}
#else
/* DeepLearningJohnDoe's s4 */
inline void
s4(vtype a1, vtype a2, vtype a3, vtype a4, vtype a5, vtype a6,
__private vtype *out, vtype c1, vtype c2, vtype c3, vtype c4)
{
vtype x55F055F055F055F0 = lut3(a1, a3, a4, 0x72);
vtype xA500F5F0A500F5F0 = lut3(a3, a5, x55F055F055F055F0, 0xAD);
vtype xF50AF50AF50AF50A = lut3(a1, a3, a4, 0x59);
vtype xF5FA0FFFF5FA0FFF = lut3(a3, a5, xF50AF50AF50AF50A, 0xE7);
vtype x61C8F93C61C8F93C = lut3(a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6);
vtype x9999666699996666 = lut3(a1, a2, a5, 0x69);
vtype x22C022C022C022C0 = lut3(a2, a4, x55F055F055F055F0, 0x18);
vtype xB35C94A6B35C94A6 = lut3(xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63);
vtype x4 = lut3(a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A);
vtype x4848484848484848 = lut3(a1, a2, a3, 0x12);
vtype x55500AAA55500AAA = lut3(a1, a5, xF5FA0FFFF5FA0FFF, 0x28);
vtype x3C90B3D63C90B3D6 = lut3(x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E);
vtype x8484333384843333 = lut3(a1, x9999666699996666, x4848484848484848, 0x14);
vtype x4452F1AC4452F1AC = lut3(xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78);
vtype x9586CA379586CA37 = lut3(x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6);
vtype x2 = lut3(a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A);
vtype x1 = lut3(a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9);
vtype x3 = lut3(a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56);
out[c1] ^= x1;
out[c2] ^= x2;
out[c3] ^= x3;
out[c4] ^= x4;
}
#endif
inline void
s5(vtype a1, vtype a2, vtype a3, vtype a4, vtype a5, vtype a6,
__private vtype *out, vtype c1, vtype c2, vtype c3, vtype c4)
{
vtype xA0A0A0A0FFFFFFFF = lut3(a1, a3, a6, 0xAB);
vtype xFFFF00005555FFFF = lut3(a1, a5, a6, 0xB9);
vtype xB3B320207777FFFF = lut3(a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8);
vtype x50505A5A5A5A5050 = lut3(a1, a3, xFFFF00005555FFFF, 0x34);
vtype xA2A2FFFF2222FFFF = lut3(a1, a5, xB3B320207777FFFF, 0xCE);
vtype x2E2E6969A4A46363 = lut3(a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29);
vtype x3 = lut3(a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6);
vtype xA5A50A0AA5A50A0A = lut3(a1, a3, a5, 0x49);
vtype x969639396969C6C6 = lut3(a2, a6, xA5A50A0AA5A50A0A, 0x96);
vtype x1B1B1B1B1B1B1B1B = lut3(a1, a2, a3, 0xCA);
vtype xBFBFBFBFF6F6F9F9 = lut3(a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E);
vtype x5B5BA4A4B8B81D1D = lut3(xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96);
vtype x2 = lut3(a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA);
vtype x5555BBBBFFFF5555 = lut3(a1, a2, xFFFF00005555FFFF, 0xE5);
vtype x6D6D9C9C95956969 = lut3(x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97);
vtype x1A1A67676A6AB4B4 = lut3(xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47);
vtype xA0A0FFFFAAAA0000 = lut3(a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B);
vtype x36369C9CC1C1D6D6 = lut3(x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9);
vtype x1 = lut3(a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA);
vtype x5555F0F0F5F55555 = lut3(a1, a3, xFFFF00005555FFFF, 0xB1);
vtype x79790202DCDC0808 = lut3(xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47);
vtype x6C6CF2F229295D5D = lut3(xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E);
vtype xA3A3505010101A1A = lut3(a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94);
vtype x7676C7C74F4FC7C7 = lut3(a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9);
vtype x4 = lut3(a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6);
out[c1] ^= x1;
out[c2] ^= x2;
out[c3] ^= x3;
out[c4] ^= x4;
}
inline void
s6(vtype a1, vtype a2, vtype a3, vtype a4, vtype a5, vtype a6,
__private vtype *out, vtype c1, vtype c2, vtype c3, vtype c4)
{
vtype x5050F5F55050F5F5 = lut3(a1, a3, a5, 0xB2);
vtype x6363C6C66363C6C6 = lut3(a1, a2, x5050F5F55050F5F5, 0x66);
vtype xAAAA5555AAAA5555 = lut3(a1, a1, a5, 0xA9);
vtype x3A3A65653A3A6565 = lut3(a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9);
vtype x5963A3C65963A3C6 = lut3(a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6);
vtype xE7E76565E7E76565 = lut3(a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD);
vtype x455D45DF455D45DF = lut3(a1, a4, xE7E76565E7E76565, 0xE4);
vtype x4 = lut3(a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C);
vtype x1101220211012202 = lut3(a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20);
vtype xF00F0FF0F00F0FF0 = lut3(a3, a4, a5, 0x69);
vtype x16E94A9716E94A97 = lut3(xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E);
vtype x2992922929929229 = lut3(a1, a2, xF00F0FF0F00F0FF0, 0x49);
vtype xAFAF9823AFAF9823 = lut3(a5, x5050F5F55050F5F5, x2992922929929229, 0x93);
vtype x3 = lut3(a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C);
vtype x4801810248018102 = lut3(a4, x5963A3C65963A3C6, x1101220211012202, 0xA4);
vtype x5EE8FFFD5EE8FFFD = lut3(a5, x16E94A9716E94A97, x4801810248018102, 0x76);
vtype xF0FF00FFF0FF00FF = lut3(a3, a4, a5, 0xCD);
vtype x942D9A67942D9A67 = lut3(x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86);
vtype x1 = lut3(a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6);
vtype x6A40D4ED6F4DD4EE = lut3(a2, x4, xAFAF9823AFAF9823, 0x2D);
vtype x6CA89C7869A49C79 = lut3(x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26);
vtype xD6DE73F9D6DE73F9 = lut3(a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B);
vtype x925E63E1965A63E1 = lut3(x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2);
vtype x2 = lut3(a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA);
out[c1] ^= x1;
out[c2] ^= x2;
out[c3] ^= x3;
out[c4] ^= x4;
}
inline void
s7(vtype a1, vtype a2, vtype a3, vtype a4, vtype a5, vtype a6,
__private vtype *out, vtype c1, vtype c2, vtype c3, vtype c4)
{
vtype x88AA88AA88AA88AA = lut3(a1, a2, a4, 0x0B);
vtype xAAAAFF00AAAAFF00 = lut3(a1, a4, a5, 0x27);
vtype xADAFF8A5ADAFF8A5 = lut3(a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E);
vtype x0A0AF5F50A0AF5F5 = lut3(a1, a3, a5, 0xA6);
vtype x6B69C5DC6B69C5DC = lut3(a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B);
vtype x1C69B2DC1C69B2DC = lut3(a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9);
vtype x1 = lut3(a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A);
vtype x9C9C9C9C9C9C9C9C = lut3(a1, a2, a3, 0x63);
vtype xE6E63BFDE6E63BFD = lut3(a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7);
vtype x6385639E6385639E = lut3(a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93);
vtype x5959C4CE5959C4CE = lut3(a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D);
vtype x5B53F53B5B53F53B = lut3(a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E);
vtype x3 = lut3(a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6);
vtype xFAF505FAFAF505FA = lut3(a3, a4, x0A0AF5F50A0AF5F5, 0x6D);
vtype x6A65956A6A65956A = lut3(a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6);
vtype x8888CCCC8888CCCC = lut3(a1, a2, a5, 0x23);
vtype x94E97A9494E97A94 = lut3(x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72);
vtype x4 = lut3(a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC);
vtype xA050A050A050A050 = lut3(a1, a3, a4, 0x21);
vtype xC1B87A2BC1B87A2B = lut3(xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4);
vtype xE96016B7E96016B7 = lut3(x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96);
vtype xE3CF1FD5E3CF1FD5 = lut3(x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E);
vtype x6776675B6776675B = lut3(xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B);
vtype x2 = lut3(a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6);
out[c1] ^= x1;
out[c2] ^= x2;
out[c3] ^= x3;
out[c4] ^= x4;
}
inline void
s8(vtype a1, vtype a2, vtype a3, vtype a4, vtype a5, vtype a6,
__private vtype *out, vtype c1, vtype c2, vtype c3, vtype c4)
{
vtype xEEEE3333EEEE3333 = lut3(a1, a2, a5, 0x9D);
vtype xBBBBBBBBBBBBBBBB = lut3(a1, a1, a2, 0x83);
vtype xDDDDAAAADDDDAAAA = lut3(a1, a2, a5, 0x5B);
vtype x29295A5A29295A5A = lut3(a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85);
vtype xC729695AC729695A = lut3(a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6);
vtype x3BF77B7B3BF77B7B = lut3(a2, a5, xC729695AC729695A, 0xF9);
vtype x2900FF002900FF00 = lut3(a4, a5, x29295A5A29295A5A, 0x0E);
vtype x56B3803F56B3803F = lut3(xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61);
vtype x4 = lut3(a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C);
vtype xFBFBFBFBFBFBFBFB = lut3(a1, a2, a3, 0xDF);
vtype x3012B7B73012B7B7 = lut3(a2, a5, xC729695AC729695A, 0xD4);
vtype x34E9B34C34E9B34C = lut3(a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69);
vtype xBFEAEBBEBFEAEBBE = lut3(a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F);
vtype xFFAEAFFEFFAEAFFE = lut3(a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9);
vtype x2 = lut3(a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6);
vtype xCFDE88BBCFDE88BB = lut3(a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C);
vtype x3055574530555745 = lut3(a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71);
vtype x99DDEEEE99DDEEEE = lut3(a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9);
vtype x693CD926693CD926 = lut3(x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69);
vtype x3 = lut3(a6, x3055574530555745, x693CD926693CD926, 0x6A);
vtype x9955EE559955EE55 = lut3(a1, a4, x99DDEEEE99DDEEEE, 0xE2);
vtype x9D48FA949D48FA94 = lut3(x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C);
vtype x1 = lut3(a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39);
out[c1] ^= x1;
out[c2] ^= x2;
out[c3] ^= x3;
out[c4] ^= x4;
}
#else
#undef andn
#define andn 0
#include "opencl_nonstd.h"
#endif /* HAVE_LUT3 */