-
Notifications
You must be signed in to change notification settings - Fork 0
/
twofish.c
1690 lines (1515 loc) · 61.6 KB
/
twofish.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Fast, portable, and easy-to-use Twofish implementation,
* Version 0.3.
* Copyright (c) 2002 by Niels Ferguson.
* (See further down for the almost-unrestricted licensing terms.)
*
* --------------------------------------------------------------------------
* There are two files for this implementation:
* - twofish.h, the header file.
* - twofish.c, the code file.
*
* To incorporate this code into your program you should:
* - Check the licensing terms further down in this comment.
* - Fix the two type definitions in twofish.h to suit your platform.
* - Fix a few definitions in twofish.c in the section marked
* PLATFORM FIXES. There is one important ones that affects
* functionality, and then a few definitions that you can optimise
* for efficiency but those have no effect on the functionality.
* Don't change anything else.
* - Put the code in your project and compile it.
*
* To use this library you should:
* - Call Twofish_initialise() in your program before any other function in
* this library.
* - Use Twofish_prepare_key(...) to convert a key to internal form.
* - Use Twofish_encrypt(...) and Twofish_decrypt(...) to encrypt and decrypt
* data.
* See the comments in the header file for details on these functions.
* --------------------------------------------------------------------------
*
* There are many Twofish implementation available for free on the web.
* Most of them are hard to integrate into your own program.
* As we like people to use our cipher, I thought I would make it easier.
* Here is a free and easy-to-integrate Twofish implementation in C.
* The latest version is always available from my personal home page at
* http://niels.ferguson.net/
*
* Integrating library code into a project is difficult because the library
* header files interfere with the project's header files and code.
* And of course the project's header files interfere with the library code.
* I've tried to resolve these problems here.
* The header file of this implementation is very light-weight.
* It contains two typedefs, a structure, and a few function declarations.
* All names it defines start with "Twofish_".
* The header file is therefore unlikely to cause problems in your project.
* The code file of this implementation doesn't need to include the header
* files of the project. There is thus no danger of the project interfering
* with all the definitions and macros of the Twofish code.
* In most situations, all you need to do is fill in a few platform-specific
* definitions in the header file and code file,
* and you should be able to run the Twofish code in your project.
* I estimate it should take you less than an hour to integrate this code
* into your project, most of it spent reading the comments telling you what
* to do.
*
* For people using C++: it is very easy to wrap this library into a
* TwofishKey class. One of the big advantages is that you can automate the
* wiping of the key material in the destructor. I have not provided a C++
* class because the interface depends too much on the abstract base class
* you use for block ciphers in your program, which I don't know about.
*
* This implementation is designed for use on PC-class machines. It uses the
* Twofish 'full' keying option which uses large tables. Total table size is
* around 5-6 kB for static tables plus 4.5 kB for each pre-processed key.
* If you need an implementation that uses less memory,
* take a look at Brian Gladman's code on his web site:
* http://fp.gladman.plus.com/cryptography_technology/aes/
* He has code for all AES candidates.
* His Twofish code has lots of options trading off table size vs. speed.
* You can also take a look at the optimised code by Doug Whiting on the
* Twofish web site
* http://www.counterpane.com/twofish.html
* which has loads of options.
* I believe these existing implementations are harder to re-use because they
* are not clean libraries and they impose requirements on the environment.
* This implementation is very careful to minimise those,
* and should be easier to integrate into any larger program.
*
* The default mode of this implementation is fully portable as it uses no
* behaviour not defined in the C standard. (This is harder than you think.)
* If you have any problems porting the default mode, please let me know
* so that I can fix the problem. (But only if this code is at fault, I
* don't fix compilers.)
* Most of the platform fixes are related to non-portable but faster ways
* of implementing certain functions.
*
* In general I've tried to make the code as fast as possible, at the expense
* of memory and code size. However, C does impose limits, and this
* implementation will be slower than an optimised assembler implementation.
* But beware of assembler implementations: a good Pentium implementation
* uses completely different code than a good Pentium II implementation.
* You basically have to re-write the assembly code for every generation of
* processor. Unless you are severely pressed for speed, stick with C.
*
* The initialisation routine of this implementation contains a self-test.
* If initialisation succeeds without calling the fatal routine, then
* the implementation works. I don't think you can break the implementation
* in such a way that it still passes the tests, unless you are malicious.
* In other words: if the initialisation routine returns,
* you have successfully ported the implementation.
* (Or not implemented the fatal routine properly, but that is your problem.)
*
* I'm indebted to many people who helped me in one way or another to write
* this code. During the design of Twofish and the AES process I had very
* extensive discussions of all implementation issues with various people.
* Doug Whiting in particular provided a wealth of information. The Twofish
* team spent untold hours discussion various cipher features, and their
* implementation. Brian Gladman implemented all AES candidates in C,
* and we had some fruitful discussions on how to implement Twofish in C.
* Jan Nieuwenhuizen tested this code on Linux using GCC.
*
* Now for the license:
* The author hereby grants a perpetual license to everybody to
* use this code for any purpose as long as the copyright message is included
* in the source code of this or any derived work.
*
* Yes, this means that you, your company, your club, and anyone else
* can use this code anywhere you want. You can change it and distribute it
* under the GPL, include it in your commercial product without releasing
* the source code, put it on the web, etc.
* The only thing you cannot do is remove my copyright message,
* or distribute any source code based on this implementation that does not
* include my copyright message.
*
* I appreciate a mention in the documentation or credits,
* but I understand if that is difficult to do.
* I also appreciate it if you tell me where and why you used my code.
*
* Please send any questions or comments to [email protected]
*
* Have Fun!
*
* Niels
*/
/*
* DISCLAIMER: As I'm giving away my work for free, I'm of course not going
* to accept any liability of any form. This code, or the Twofish cipher,
* might very well be flawed; you have been warned.
* This software is provided as-is, without any kind of warrenty or
* guarantee. And that is really all you can expect when you download
* code for free from the Internet.
*
* I think it is really sad that disclaimers like this seem to be necessary.
* If people only had a little bit more common sense, and didn't come
* whining like little children every time something happens....
*/
/*
* Version history:
* Version 0.0, 2002-08-30
* First written.
* Version 0.1, 2002-09-03
* Added disclaimer. Improved self-tests.
* Version 0.2, 2002-09-09
* Removed last non-portabilities. Default now works completely within
* the C standard. UInt32 can be larger than 32 bits without problems.
* Version 0.3, 2002-09-28
* Bugfix: use <string.h> instead of <memory.h> to adhere to ANSI/ISO.
* Rename BIG_ENDIAN macro to CPU_IS_BIG_ENDIAN. The gcc library
* header <string.h> already defines BIG_ENDIAN, even though it is not
* supposed to.
*/
/*
* Minimum set of include files.
* You should not need any application-specific include files for this code.
* In fact, adding you own header files could break one of the many macros or
* functions in this file. Be very careful.
* Standard include files will probably be ok.
*/
#include <string.h> /* for memset(), memcpy(), and memcmp() */
#include "twofish.h"
/*
* PLATFORM FIXES
* ==============
*
* Fix the type definitions in twofish.h first!
*
* The following definitions have to be fixed for each particular platform
* you work on. If you have a multi-platform program, you no doubt have
* portable definitions that you can substitute here without changing the
* rest of the code.
*/
/*
* Function called if something is fatally wrong with the implementation.
* This fatal function is called when a coding error is detected in the
* Twofish implementation, or when somebody passes an obviously erroneous
* parameter to this implementation. There is not much you can do when
* the code contains bugs, so we just stop.
*
* The argument is a string. Ideally the fatal function prints this string
* as an error message. Whatever else this function does, it should never
* return. A typical implementation would stop the program completely after
* printing the error message.
*
* This default implementation is not very useful,
* but does not assume anything about your environment.
* It will at least let you know something is wrong....
* I didn't want to include any libraries to print and error or so,
* as this makes the code much harder to integrate in a project.
*
* Note that the Twofish_fatal function may not return to the caller.
* Unfortunately this is not something the self-test can test for,
* so you have to make sure of this yourself.
*
* If you want to call an external function, be careful about including
* your own header files here. This code uses a lot of macros, and your
* header file could easily break it. Maybe the best solution is to use
* a separate extern statement for your fatal function.
*/
#define Twofish_fatal( msg ) {for(;;);}
/*
* The rest of the settings are not important for the functionality
* of this Twofish implementation. That is, their default settings
* work on all platforms. You can change them to improve the
* speed of the implementation on your platform. Erroneous settings
* will result in erroneous implementations, but the self-test should
* catch those.
*/
/*
* Macros to rotate a Twofish_UInt32 value left or right by the
* specified number of bits. This should be a 32-bit rotation,
* and not rotation of, say, 64-bit values.
*
* Every encryption or decryption operation uses 32 of these rotations,
* so it is a good idea to make these macros efficient.
*
* This fully portable definition has one piece of tricky stuff.
* The UInt32 might be larger than 32 bits, so we have to mask
* any higher bits off. The simplest way to do this is to 'and' the
* value first with 0xffffffff and then shift it right. An optimising
* compiler that has a 32-bit type can optimise this 'and' away.
*
* Unfortunately there is no portable way of writing the constant
* 0xffffffff. You don't know which suffix to use (U, or UL?)
* The UINT32_MASK definition uses a bit of trickery. Shift-left
* is only defined if the shift amount is strictly less than the size
* of the UInt32, so we can't use (1<<32). The answer it to take the value
* 2, cast it to a UInt32, shift it left 31 positions, and subtract one.
* Another example of how to make something very simple extremely difficult.
* I hate C.
*
* The rotation macros are straightforward.
* They are only applied to UInt32 values, which are _unsigned_
* so the >> operator must do a logical shift that brings in zeroes.
* On most platforms you will only need to optimise the ROL32 macro; the
* ROR32 macro is not inefficient on an optimising compiler as all rotation
* amounts in this code are known at compile time.
*
* On many platforms there is a faster solution.
* For example, MS compilers have the __rotl and __rotr functions
* that generate x86 rotation instructions.
*/
#define UINT32_MASK ( (((UInt32)2)<<31) - 1 )
#define ROL32( x, n ) ( (x)<<(n) | ((x) & UINT32_MASK) >> (32-(n)) )
#define ROR32( x, n ) ROL32( (x), 32-(n) )
/*
* Select data type for q-table entries.
*
* Larger entry types cost more memory (1.5 kB), and might be faster
* or slower depending on the CPU and compiler details.
*
* This choice only affects the static data size and the key setup speed.
* Functionality, expanded key size, or encryption speed are not affected.
* Define to 1 to get large q-table entries.
*/
#define LARGE_Q_TABLE 0 /* default = 0 */
/*
* Method to select a single byte from a UInt32.
* WARNING: non-portable code if set; might not work on all platforms.
*
* Inside the inner loop of Twofish it is necessary to access the 4
* individual bytes of a UInt32. This can be done using either shifts
* and masks, or memory accesses.
*
* Set to 0 to use shift and mask operations for the byte selection.
* This is more ALU intensive. It is also fully portable.
*
* Set to 1 to use memory accesses. The UInt32 is stored in memory and
* the individual bytes are read from memory one at a time.
* This solution is more memory-intensive, and not fully portable.
* It might be faster on your platform, or not. If you use this option,
* make sure you set the CPU_IS_BIG_ENDIAN flag appropriately.
*
* This macro does not affect the conversion of the inputs and outputs
* of the cipher. See the CONVERT_USING_CASTS macro for that.
*/
#define SELECT_BYTE_FROM_UINT32_IN_MEMORY 0 /* default = 0 */
/*
* Method used to read the input and write the output.
* WARNING: non-portable code if set; might not work on all platforms.
*
* Twofish operates on 32-bit words. The input to the cipher is
* a byte array, as is the output. The portable method of doing the
* conversion is a bunch of rotate and mask operations, but on many
* platforms it can be done faster using a cast.
* This only works if your CPU allows UInt32 accesses to arbitrary Byte
* addresses.
*
* Set to 0 to use the shift and mask operations. This is fully
* portable. .
*
* Set to 1 to use a cast. The Byte * is cast to a UInt32 *, and a
* UInt32 is read. If necessary (as indicated by the CPU_IS_BIG_ENDIAN
* macro) the byte order in the UInt32 is swapped. The reverse is done
* to write the output of the encryption/decryption. Make sure you set
* the CPU_IS_BIG_ENDIAN flag appropriately.
* This option does not work unless a UInt32 is exactly 32 bits.
*
* This macro only changes the reading/writing of the plaintext/ciphertext.
* See the SELECT_BYTE_FROM_UINT32_IN_MEMORY to affect the way in which
* a UInt32 is split into 4 bytes for the S-box selection.
*/
#define CONVERT_USING_CASTS 0 /* default = 0 */
/*
* Endianness switch.
* Only relevant if SELECT_BYTE_FROM_UINT32_IN_MEMORY or
* CONVERT_USING_CASTS is set.
*
* Set to 1 on a big-endian machine, and to 0 on a little-endian machine.
* Twofish uses the little-endian convention (least significant byte first)
* and big-endian machines (using most significant byte first)
* have to do a few conversions.
*
* CAUTION: This code has never been tested on a big-endian machine,
* because I don't have access to one. Feedback appreciated.
*/
#define CPU_IS_BIG_ENDIAN 0
/*
* Macro to reverse the order of the bytes in a UInt32.
* Used to convert to little-endian on big-endian machines.
* This macro is always tested, but only used in the encryption and
* decryption if CONVERT_USING_CASTS, and CPU_IS_BIG_ENDIAN
* are both set. In other words: this macro is only speed-critical if
* both these flags have been set.
*
* This default definition of SWAP works, but on many platforms there is a
* more efficient implementation.
*/
#define BSWAP(x) (ROL32((x),8)&0x00ff00ff | ROR32((x),8) & 0xff00ff00)
/*
* END OF PLATFORM FIXES
* =====================
*
* You should not have to touch the rest of this file.
*/
/*
* Convert the external type names to some that are easier to use inside
* this file. I didn't want to use the names Byte and UInt32 in the
* header file, because many programs already define them and using two
* conventions at once can be very difficult.
* Don't change these definitions! Change the originals
* in twofish.h instead.
*/
/* A Byte must be an unsigned integer, 8 bits long. */
typedef Twofish_Byte Byte;
/* A UInt32 must be an unsigned integer at least 32 bits long. */
typedef Twofish_UInt32 UInt32;
/*
* Define a macro ENDIAN_CONVERT.
*
* We define a macro ENDIAN_CONVERT that performs a BSWAP on big-endian
* machines, and is the identity function on little-endian machines.
* The code then uses this macro without considering the endianness.
*/
#if CPU_IS_BIG_ENDIAN
#define ENDIAN_CONVERT(x) BSWAP(x)
#else
#define ENDIAN_CONVERT(x) (x)
#endif
/*
* Compute byte offset within a UInt32 stored in memory.
*
* This is only used when SELECT_BYTE_FROM_UINT32_IN_MEMORY is set.
*
* The input is the byte number 0..3, 0 for least significant.
* Note the use of sizeof() to support UInt32 types that are larger
* than 4 bytes.
*/
#if CPU_IS_BIG_ENDIAN
#define BYTE_OFFSET( n ) (sizeof(UInt32) - 1 - (n) )
#else
#define BYTE_OFFSET( n ) (n)
#endif
/*
* Macro to get Byte no. b from UInt32 value X.
* We use two different definition, depending on the settings.
*/
#if SELECT_BYTE_FROM_UINT32_IN_MEMORY
/* Pick the byte from the memory in which X is stored. */
#define SELECT_BYTE( X, b ) (((Byte *)(&(X)))[BYTE_OFFSET(b)])
#else
/* Portable solution: Pick the byte directly from the X value. */
#define SELECT_BYTE( X, b ) (((X) >> 8*(b)) & 0xff)
#endif
/* Some shorthands because we use byte selection in large formulae. */
#define b0(X) SELECT_BYTE((X),0)
#define b1(X) SELECT_BYTE((X),1)
#define b2(X) SELECT_BYTE((X),2)
#define b3(X) SELECT_BYTE((X),3)
/*
* We need macros to load and store UInt32 from/to byte arrays
* using the least-significant-byte-first convention.
*
* GET32( p ) gets a UInt32 in lsb-first form from four bytes pointed to
* by p.
* PUT32( v, p ) writes the UInt32 value v at address p in lsb-first form.
*/
#if CONVERT_USING_CASTS
/* Get UInt32 from four bytes pointed to by p. */
#define GET32( p ) ENDIAN_CONVERT( *((UInt32 *)(p)) )
/* Put UInt32 into four bytes pointed to by p */
#define PUT32( v, p ) *((UInt32 *)(p)) = ENDIAN_CONVERT(v)
#else
/* Get UInt32 from four bytes pointed to by p. */
#define GET32( p ) \
( \
(UInt32)((p)[0]) \
| (UInt32)((p)[1])<< 8\
| (UInt32)((p)[2])<<16\
| (UInt32)((p)[3])<<24\
)
/* Put UInt32 into four bytes pointed to by p */
#define PUT32( v, p ) \
(p)[0] = (Byte)(((v) ) & 0xff);\
(p)[1] = (Byte)(((v) >> 8) & 0xff);\
(p)[2] = (Byte)(((v) >> 16) & 0xff);\
(p)[3] = (Byte)(((v) >> 24) & 0xff)
#endif
/*
* Test the platform-specific macros.
* This function tests the macros defined so far to make sure the
* definitions are appropriate for this platform.
* If you make any mistake in the platform configuration, this should detect
* that and inform you what went wrong.
* Somewhere, someday, this is going to save somebody a lot of time,
* because misbehaving macros are hard to debug.
*/
static void test_platform()
{
/* Buffer with test values. */
Byte buf[] = {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0};
UInt32 C;
UInt32 x,y;
int i;
/*
* Some sanity checks on the types that can't be done in compile time.
* A smart compiler will just optimise these tests away.
* The pre-processor doesn't understand different types, so we cannot
* do these checks in compile-time.
*
* I hate C.
*
* The first check in each case is to make sure the size is correct.
* The second check is to ensure that it is an unsigned type.
*/
if( ((UInt32)((UInt32)1 << 31) == 0) | ((UInt32)-1 < 0) )
{
Twofish_fatal( "Twofish code: Twofish_UInt32 type not suitable" );
}
if( sizeof( Byte ) != 1 | (Byte)-1 < 0 )
{
Twofish_fatal( "Twofish code: Twofish_Byte type not suitable" );
}
/*
* Sanity-check the endianness conversions.
* This is just an aid to find problems. If you do the endianness
* conversion macros wrong you will fail the full cipher test,
* but that does not help you find the error.
* Always make it easy to find the bugs!
*
* Detail: There is no fully portable way of writing UInt32 constants,
* as you don't know whether to use the U or UL suffix. Using only U you
* might only be allowed 16-bit constants. Using UL you might get 64-bit
* constants which cannot be stored in a UInt32 without warnings, and
* which generally behave subtly different from a true UInt32.
* As long as we're just comparing with the constant,
* we can always use the UL suffix and at worst lose some efficiency.
* I use a separate '32-bit constant' macro in most of my other code.
*
* I hate C.
*
* Start with testing GET32. We test it on all positions modulo 4
* to make sure we can handly any position of inputs. (Some CPUs
* do not allow non-aligned accesses which we would do if you used
* the CONVERT_USING_CASTS option.
*/
if( GET32( buf ) != 0x78563412UL || GET32(buf+1) != 0x9a785634UL
|| GET32( buf+2 ) != 0xbc9a7856UL || GET32(buf+3) != 0xdebc9a78UL )
{
Twofish_fatal( "Twofish code: GET32 not implemented properly" );
}
/*
* We can now use GET32 to test PUT32.
* We don't test the shifted versions. If GET32 can do that then
* so should PUT32.
*/
C = GET32( buf );
PUT32( 3*C, buf );
if( GET32( buf ) != 0x69029c36UL )
{
Twofish_fatal( "Twofish code: PUT32 not implemented properly" );
}
/* Test ROL and ROR */
for( i=1; i<32; i++ )
{
/* Just a simple test. */
x = ROR32( C, i );
y = ROL32( C, i );
x ^= (C>>i) ^ (C<<(32-i));
y ^= (C<<i) ^ (C>>(32-i));
x |= y;
/*
* Now all we check is that x is zero in the least significant
* 32 bits. Using the UL suffix is safe here, as it doesn't matter
* if we get a larger type.
*/
if( (x & 0xffffffffUL) != 0 )
{
Twofish_fatal( "Twofish ROL or ROR not properly defined." );
}
}
/* Test the BSWAP macro */
if( BSWAP(C) != 0x12345678UL )
{
/*
* The BSWAP macro should always work, even if you are not using it.
* A smart optimising compiler will just remove this entire test.
*/
Twofish_fatal( "BSWAP not properly defined." );
}
/* And we can test the b<i> macros which use SELECT_BYTE. */
if( b0(C)!=0x12 | b1(C) != 0x34 | b2(C) != 0x56 | b3(C) != 0x78 )
{
/*
* There are many reasons why this could fail.
* Most likely is that CPU_IS_BIG_ENDIAN has the wrong value.
*/
Twofish_fatal( "Twofish code: SELECT_BYTE not implemented properly" );
}
}
/*
* Finally, we can start on the Twofish-related code.
* You really need the Twofish specifications to understand this code. The
* best source is the Twofish book:
* "The Twofish Encryption Algorithm", by Bruce Schneier, John Kelsey,
* Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson.
* you can also use the AES submission document of Twofish, which is
* available from my list of publications on my personal web site at
* http://niels.ferguson.net/.
*
* The first thing we do is write the testing routines. This is what the
* implementation has to satisfy in the end. We only test the external
* behaviour of the implementation of course.
*/
/*
* Perform a single self test on a (plaintext,ciphertext,key) triple.
* Arguments:
* key array of key bytes
* key_len length of key in bytes
* p plaintext
* c ciphertext
*/
static void test_vector( Byte key[], int key_len, Byte p[16], Byte c[16] )
{
Byte tmp[16]; /* scratch pad. */
Twofish_key xkey; /* The expanded key */
int i;
/* Prepare the key */
Twofish_prepare_key( key, key_len, &xkey );
/*
* We run the test twice to ensure that the xkey structure
* is not damaged by the first encryption.
* Those are hideous bugs to find if you get them in an application.
*/
for( i=0; i<2; i++ )
{
/* Encrypt and test */
Twofish_encrypt( &xkey, p, tmp );
if( memcmp( c, tmp, 16 ) != 0 )
{
Twofish_fatal( "Twofish encryption failure" );
}
/* Decrypt and test */
Twofish_decrypt( &xkey, c, tmp );
if( memcmp( p, tmp, 16 ) != 0 )
{
Twofish_fatal( "Twofish decryption failure" );
}
}
/* The test keys are not secret, so we don't need to wipe xkey. */
}
/*
* Check implementation using three (key,plaintext,ciphertext)
* test vectors, one for each major key length.
*
* This is an absolutely minimal self-test.
* This routine does not test odd-sized keys.
*/
static void test_vectors()
{
/*
* We run three tests, one for each major key length.
* These test vectors come from the Twofish specification.
* One encryption and one decryption using randomish data and key
* will detect almost any error, especially since we generate the
* tables ourselves, so we don't have the problem of a single
* damaged table entry in the source.
*/
/* 128-bit test is the I=3 case of section B.2 of the Twofish book. */
static Byte k128[] = {
0x9F, 0x58, 0x9F, 0x5C, 0xF6, 0x12, 0x2C, 0x32,
0xB6, 0xBF, 0xEC, 0x2F, 0x2A, 0xE8, 0xC3, 0x5A,
};
static Byte p128[] = {
0xD4, 0x91, 0xDB, 0x16, 0xE7, 0xB1, 0xC3, 0x9E,
0x86, 0xCB, 0x08, 0x6B, 0x78, 0x9F, 0x54, 0x19
};
static Byte c128[] = {
0x01, 0x9F, 0x98, 0x09, 0xDE, 0x17, 0x11, 0x85,
0x8F, 0xAA, 0xC3, 0xA3, 0xBA, 0x20, 0xFB, 0xC3
};
/* 192-bit test is the I=4 case of section B.2 of the Twofish book. */
static Byte k192[] = {
0x88, 0xB2, 0xB2, 0x70, 0x6B, 0x10, 0x5E, 0x36,
0xB4, 0x46, 0xBB, 0x6D, 0x73, 0x1A, 0x1E, 0x88,
0xEF, 0xA7, 0x1F, 0x78, 0x89, 0x65, 0xBD, 0x44
};
static Byte p192[] = {
0x39, 0xDA, 0x69, 0xD6, 0xBA, 0x49, 0x97, 0xD5,
0x85, 0xB6, 0xDC, 0x07, 0x3C, 0xA3, 0x41, 0xB2
};
static Byte c192[] = {
0x18, 0x2B, 0x02, 0xD8, 0x14, 0x97, 0xEA, 0x45,
0xF9, 0xDA, 0xAC, 0xDC, 0x29, 0x19, 0x3A, 0x65
};
/* 256-bit test is the I=4 case of section B.2 of the Twofish book. */
static Byte k256[] = {
0xD4, 0x3B, 0xB7, 0x55, 0x6E, 0xA3, 0x2E, 0x46,
0xF2, 0xA2, 0x82, 0xB7, 0xD4, 0x5B, 0x4E, 0x0D,
0x57, 0xFF, 0x73, 0x9D, 0x4D, 0xC9, 0x2C, 0x1B,
0xD7, 0xFC, 0x01, 0x70, 0x0C, 0xC8, 0x21, 0x6F
};
static Byte p256[] = {
0x90, 0xAF, 0xE9, 0x1B, 0xB2, 0x88, 0x54, 0x4F,
0x2C, 0x32, 0xDC, 0x23, 0x9B, 0x26, 0x35, 0xE6
};
static Byte c256[] = {
0x6C, 0xB4, 0x56, 0x1C, 0x40, 0xBF, 0x0A, 0x97,
0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA
};
/* Run the actual tests. */
test_vector( k128, 16, p128, c128 );
test_vector( k192, 24, p192, c192 );
test_vector( k256, 32, p256, c256 );
}
/*
* Perform extensive test for a single key size.
*
* Test a single key size against the test vectors from section
* B.2 in the Twofish book. This is a sequence of 49 encryptions
* and decryptions. Each plaintext is equal to the ciphertext of
* the previous encryption. The key is made up from the ciphertext
* two and three encryptions ago. Both plaintext and key start
* at the zero value.
* We should have designed a cleaner recurrence relation for
* these tests, but it is too late for that now. At least we learned
* how to do it better next time.
* For details see appendix B of the book.
*
* Arguments:
* key_len Number of bytes of key
* final_value Final plaintext value after 49 iterations
*/
static void test_sequence( int key_len, Byte final_value[] )
{
Byte buf[ (50+3)*16 ]; /* Buffer to hold our computation values. */
Byte tmp[16]; /* Temp for testing the decryption. */
Twofish_key xkey; /* The expanded key */
int i;
Byte * p;
/* Wipe the buffer */
memset( buf, 0, sizeof( buf ) );
/*
* Because the recurrence relation is done in an inconvenient manner
* we end up looping backwards over the buffer.
*/
/* Pointer in buffer points to current plaintext. */
p = &buf[50*16];
for( i=1; i<50; i++ )
{
/*
* Prepare a key.
* This automatically checks that key_len is valid.
*/
Twofish_prepare_key( p+16, key_len, &xkey );
/* Compute the next 16 bytes in the buffer */
Twofish_encrypt( &xkey, p, p-16 );
/* Check that the decryption is correct. */
Twofish_decrypt( &xkey, p-16, tmp );
if( memcmp( tmp, p, 16 ) != 0 )
{
Twofish_fatal( "Twofish decryption failure in sequence" );
}
/* Move on to next 16 bytes in the buffer. */
p -= 16;
}
/* And check the final value. */
if( memcmp( p, final_value, 16 ) != 0 )
{
Twofish_fatal( "Twofish encryption failure in sequence" );
}
/* None of the data was secret, so there is no need to wipe anything. */
}
/*
* Run all three sequence tests from the Twofish test vectors.
*
* This checks the most extensive test vectors currently available
* for Twofish. The data is from the Twofish book, appendix B.2.
*/
static void test_sequences()
{
static Byte r128[] = {
0x5D, 0x9D, 0x4E, 0xEF, 0xFA, 0x91, 0x51, 0x57,
0x55, 0x24, 0xF1, 0x15, 0x81, 0x5A, 0x12, 0xE0
};
static Byte r192[] = {
0xE7, 0x54, 0x49, 0x21, 0x2B, 0xEE, 0xF9, 0xF4,
0xA3, 0x90, 0xBD, 0x86, 0x0A, 0x64, 0x09, 0x41
};
static Byte r256[] = {
0x37, 0xFE, 0x26, 0xFF, 0x1C, 0xF6, 0x61, 0x75,
0xF5, 0xDD, 0xF4, 0xC3, 0x3B, 0x97, 0xA2, 0x05
};
/* Run the three sequence test vectors */
test_sequence( 16, r128 );
test_sequence( 24, r192 );
test_sequence( 32, r256 );
}
/*
* Test the odd-sized keys.
*
* Every odd-sized key is equivalent to a one of 128, 192, or 256 bits.
* The equivalent key is found by padding at the end with zero bytes
* until a regular key size is reached.
*
* We just test that the key expansion routine behaves properly.
* If the expanded keys are identical, then the encryptions and decryptions
* will behave the same.
*/
static void test_odd_sized_keys()
{
Byte buf[32];
Twofish_key xkey;
Twofish_key xkey_two;
int i;
/*
* We first create an all-zero key to use as PRNG key.
* Normally we would not have to fill the buffer with zeroes, as we could
* just pass a zero key length to the Twofish_prepare_key function.
* However, this relies on using odd-sized keys, and those are just the
* ones we are testing here. We can't use an untested function to test
* itself.
*/
memset( buf, 0, sizeof( buf ) );
Twofish_prepare_key( buf, 16, &xkey );
/* Fill buffer with pseudo-random data derived from two encryptions */
Twofish_encrypt( &xkey, buf, buf );
Twofish_encrypt( &xkey, buf, buf+16 );
/* Create all possible shorter keys that are prefixes of the buffer. */
for( i=31; i>=0; i-- )
{
/* Set a byte to zero. This is the new padding byte */
buf[i] = 0;
/* Expand the key with only i bytes of length */
Twofish_prepare_key( buf, i, &xkey );
/* Expand the corresponding padded key of regular length */
Twofish_prepare_key( buf, i<=16 ? 16 : i<= 24 ? 24 : 32, &xkey_two );
/* Compare the two */
if( memcmp( &xkey, &xkey_two, sizeof( xkey ) ) != 0 )
{
Twofish_fatal( "Odd sized keys do not expand properly" );
}
}
/* None of the key values are secret, so we don't need to wipe them. */
}
/*
* Test the Twofish implementation.
*
* This routine runs all the self tests, in order of importance.
* It is called by the Twofish_initialise routine.
*
* In almost all applications the cost of running the self tests during
* initialisation is insignificant, especially
* compared to the time it takes to load the application from disk.
* If you are very pressed for initialisation performance,
* you could remove some of the tests. Make sure you did run them
* once in the software and hardware configuration you are using.
*/
static void self_test()
{
/* The three test vectors form an absolute minimal test set. */
test_vectors();
/*
* If at all possible you should run these tests too. They take
* more time, but provide a more thorough coverage.
*/
test_sequences();
/* Test the odd-sized keys. */
test_odd_sized_keys();
}
/*
* And now, the actual Twofish implementation.
*
* This implementation generates all the tables during initialisation.
* I don't like large tables in the code, especially since they are easily
* damaged in the source without anyone noticing it. You need code to
* generate them anyway, and this way all the code is close together.
* Generating them in the application leads to a smaller executable
* (the code is smaller than the tables it generates) and a
* larger static memory footprint.
*
* Twofish can be implemented in many ways. I have chosen to
* use large tables with a relatively long key setup time.
* If you encrypt more than a few blocks of data it pays to pre-compute
* as much as possible. This implementation is relatively inefficient for
* applications that need to re-key every block or so.
*/
/*
* We start with the t-tables, directly from the Twofish definition.
* These are nibble-tables, but merging them and putting them two nibbles
* in one byte is more work than it is worth.
*/
static Byte t_table[2][4][16] = {
{
{0x8,0x1,0x7,0xD,0x6,0xF,0x3,0x2,0x0,0xB,0x5,0x9,0xE,0xC,0xA,0x4},
{0xE,0xC,0xB,0x8,0x1,0x2,0x3,0x5,0xF,0x4,0xA,0x6,0x7,0x0,0x9,0xD},
{0xB,0xA,0x5,0xE,0x6,0xD,0x9,0x0,0xC,0x8,0xF,0x3,0x2,0x4,0x7,0x1},
{0xD,0x7,0xF,0x4,0x1,0x2,0x6,0xE,0x9,0xB,0x3,0x0,0x8,0x5,0xC,0xA}
},
{
{0x2,0x8,0xB,0xD,0xF,0x7,0x6,0xE,0x3,0x1,0x9,0x4,0x0,0xA,0xC,0x5},
{0x1,0xE,0x2,0xB,0x4,0xC,0x3,0x7,0x6,0xD,0xA,0x5,0xF,0x9,0x0,0x8},
{0x4,0xC,0x7,0x5,0x1,0x6,0x9,0xA,0x0,0xE,0xD,0x8,0x2,0xB,0x3,0xF},
{0xB,0x9,0x5,0x1,0xC,0x3,0xD,0xE,0x6,0x4,0x7,0xF,0x2,0x0,0x8,0xA}
}
};
/* A 1-bit rotation of 4-bit values. Input must be in range 0..15 */
#define ROR4BY1( x ) (((x)>>1) | (((x)<<3) & 0x8) )
/*
* The q-boxes are only used during the key schedule computations.
* These are 8->8 bit lookup tables. Some CPUs prefer to have 8->32 bit
* lookup tables as it is faster to load a 32-bit value than to load an
* 8-bit value and zero the rest of the register.
* The LARGE_Q_TABLE switch allows you to choose 32-bit entries in
* the q-tables. Here we just define the Qtype which is used to store
* the entries of the q-tables.
*/
#if LARGE_Q_TABLE
typedef UInt32 Qtype;
#else
typedef Byte Qtype;
#endif
/*
* The actual q-box tables.
* There are two q-boxes, each having 256 entries.
*/
static Qtype q_table[2][256];
/*
* Now the function that converts a single t-table into a q-table.
*
* Arguments:
* t[4][16] : four 4->4bit lookup tables that define the q-box
* q[256] : output parameter: the resulting q-box as a lookup table.
*/
static void make_q_table( Byte t[4][16], Qtype q[256] )
{
int ae,be,ao,bo; /* Some temporaries. */
int i;
/* Loop over all input values and compute the q-box result. */
for( i=0; i<256; i++ ) {
/*
* This is straight from the Twofish specifications.
*
* The ae variable is used for the a_i values from the specs
* with even i, and ao for the odd i's. Similarly for the b's.
*/
ae = i>>4; be = i&0xf;
ao = ae ^ be; bo = ae ^ ROR4BY1(be) ^ ((ae<<3)&8);
ae = t[0][ao]; be = t[1][bo];
ao = ae ^ be; bo = ae ^ ROR4BY1(be) ^ ((ae<<3)&8);
ae = t[2][ao]; be = t[3][bo];
/* Store the result in the q-box table, the cast avoids a warning. */
q[i] = (Qtype) ((be<<4) | ae);
}
}
/*
* Initialise both q-box tables.
*/
static void initialise_q_boxes() {
/* Initialise each of the q-boxes using the t-tables */