-
Notifications
You must be signed in to change notification settings - Fork 20
/
learn_detector_orig.cxx
2075 lines (1634 loc) · 54.1 KB
/
learn_detector_orig.cxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**
@mainpage
\section Introduction
This program created a corner detector by optimizing a decision tree using simulated annealing.
The tree is optimized to maximize the repeatability of the corner detector.
\section Instructions
To make on any unix-like environment, do:
<code>./configure @@ make</code>
There is no install option. To set the parameters, examine <code>learn_detector.cfg</code>.
In order to run this program, you will need a repeatability dataset, such as the one from
http://mi.eng.cam.ac.uk/~er258/work/datasets.html
The running program will generate a very extensive logfile on the standard
output, which will include the learned detector and the results of its
repeatability evaluation. Running <code>get_block_detector</code> on the output
will generate source code for the detector. Note that this source code will not
yet have been optimized for speed, only repeatability.
The complete sequence of operations is as follows
<ol>
<li> Make the executable:
<code> ./configure && make </code>
<li> Set up <code>learn_detector.cfg</code>. The default parameters are good,
except you will need to set up <code>datadir</code> to point to the
repeatability dataset.
<li> Run the corner detector learning program
<code>./learn_detector > logfile</code>
If you run it more than once, you will probably want to alter the random
seed in the configuration file.
<li> Extract a detector from the logfile
<code>./get_block_detector logfile > learned-faster-detector.h</code>
<li> make <code>extract-fast2.cxx</code>
Note that if you have changed the list of available offsets in
<code>learn_detector.cfg</code> then you will need to change
<code>offset_list</code> in <code>extract-fast2.cxx</code>. The list of
offsets will be in the logfile.
<li> Use <code>extract-fast2.cxx</code> to extract features from a set of
training images. Any reasonable images can be used, including the
training images used earlier. Do:
<code>./extract-fast2<code><i>imagefile imagefile2 ...<i><code>> features</code>
</ol>
*/
/**
@defgroup gRepeatability Measuring the repeatability of a detector
Functions to load a repeatability dataset, and compute the repeatability of
a list of detected points.
\section data The dataset
The dataset consists of a number of registered images. The images are stored in
<code>frames/frame_X.pgm</code> where X is an integer counting from zero. The
frames must all be the same size. The warps are stored in
<code>waprs/warp_Y_Z.warp</code>. The file <code>warp_Y_Z.warp</code> contains
one line for every pixel in image Y (pixels arranged in raster-scan order). The
line is the position that the pixel warps to in image Z. If location of -1, -1
indicates that this pixel does not appear in image Z.
*/
/**
@defgroup gTree Tree representation.
*/
/**
@defgroup gFastTree Compiled tree representations
*/
/**
@defgroup gUtility Utility functions.
*/
/**
@defgroup gOptimize Optimization routines
*/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <climits>
#include <float.h>
#include <cstring>
#include <cerrno>
#include <cmath>
#include <vector>
#include <utility>
#include <algorithm>
#include <set>
#include <cvd/image.h>
#include <cvd/image_io.h>
#include <cvd/byte.h>
#include <cvd/random.h>
#include <cvd/fast_corner.h>
#include <cvd/vector_image_ref.h>
#include <cvd/timer.h>
#include <cvd/cpu_hacks.h>
#include <tag/tuple.h>
#include <tag/stdpp.h>
#include <tag/fn.h>
#include <tag/printf.h>
#include <sys/mman.h>
#include <TooN/TooN.h>
#include <TooN/helpers.h>
#include "svector.h"
#include <gvars3/serialize.h>
namespace GVars3{ namespace serialize {
/**GVars serialization for containers. FIXME: should use existing serialization
to load types, rather than >>
@ingroup gUtility
*/
template<class C, template<class> class D >std::string to_string(const D<C>& s)
{
std::ostringstream o;
typename D<C>::const_iterator i;
for(i=s.begin();i != s.end(); i++)
{
if(i != s.begin())
o << " ";
o << *i;
}
return o.str();
}
template<class C, template<class> class D> int from_string(const std::string& s, D<C>& o)
{
std::istringstream i(s);
using namespace tag;
while(1)
{
C c;
i >> c;
if(i) //No data lost:
o.insert(o.end(), c);
else //Stream finished for some reason (either bad or b0rked)
return check_stream(i);
}
}
}}
#include <gvars3/instances.h>
using namespace std;
using namespace CVD;
using namespace tag;
using namespace GVars3;
using namespace TooN;
///Actual x,y offset of the offset numbers in the different available orientations.
///@ingroup gTree
vector<vector<ImageRef> > offsets;
///The number of possible offsets. Equivalent to <code>offsets[x].size()</code>
///@ingroup gTree
int num_offsets;
///Bounding box for offsets in all orientations. This is therefore a bounding box for the detector.
///@ingroup gTree
pair<ImageRef, ImageRef> offsets_bbox;
////////////////////////////////////////////////////////////////////////////////
//
// Utility functions
//
///Square a number
///@param d Number to square
///@return $d^2$
///@ingroup gUtility
double sq(double d)
{
return d*d;
}
///Populate a std::vector with the numbers 0,1,...,num
///@param num Size if the range
///@return the populated vector.
///@ingroup Utility
vector<int> range(int num)
{
vector<int> r;
for(int i=0; i < num; i++)
r.push_back(i);
return r;
}
////////////////////////////////////////////////////////////////////////////////
//
// Functions related to repeatability
//
///Generate a disc of ImageRefs.
///@param radius Radius of the disc
///@return the disc of ImageRefs
///@ingroup gRepeatability
vector<ImageRef> generate_disc(int radius)
{
vector<ImageRef> ret;
ImageRef p;
for(p.y = -radius; p.y <= radius; p.y++)
for(p.x = -radius; p.x <= radius; p.x++)
if((int)p.mag_squared() <= radius)
ret.push_back(p);
return ret;
}
///Paint shapes (a vector<ImageRef>) safely in to an image
///This is used to paint discs at corner locations in order to
///perform rapid proximity checking.
///
/// @param corners Locations to paint shapes
/// @param circle Shape to paint
/// @param size Image size to be painted in to
/// @return Image with shapes painted in to it.
///@ingroup gRepeatability
Image<bool> paint_circles(const vector<ImageRef>& corners, const vector<ImageRef>& circle, ImageRef size)
{
Image<bool> im(size, 0);
for(unsigned int i=0; i < corners.size(); i++)
for(unsigned int j=0; j < circle.size(); j++)
if(im.in_image(corners[i] + circle[j]))
im[corners[i] + circle[j]] = 1;
return im;
}
///Computes repeatability the slow way to avoid rounding errors, by comparing the warped
///corner position to every detected corner.
///
/// @param warps Every warping where warps[i][j] specifies warp from image i to image j.
/// @param corners Detected corners
/// @param r A corner must be as close as this to be considered repeated
/// @return The repeatability
/// @ingroup gRepeatability
double compute_repeatability_exact(const vector<vector<Image<Vector<2> > > >& warps, const vector<vector<ImageRef> >& corners, double r)
{
unsigned int n = corners.size();
int corners_tested = 0;
int good_corners = 0;
r *= r;
for(unsigned int i=0; i < n; i++)
for(unsigned int j=0; j < n; j++)
{
if(i==j)
continue;
for(unsigned int k=0; k < corners[i].size(); k++)
{
Vector<2> p = warps[i][j][corners[i][k]];
if(p[0] != -1) //pixel does not warp to inside image j
{
corners_tested++;
for(unsigned int l=0; l < corners[j].size(); l++)
{
Vector<2> d = p - vec(corners[j][l]);
if(d*d < r)
{
good_corners++;
break;
}
}
}
}
}
return 1.0 * good_corners / (corners_tested + DBL_EPSILON);
}
///Computes repeatability the quick way, by caching, but has small rounding errors. This
///function paints a disc of <code>true</code> around each detected corner in to an image.
///If a corner warps to a pixel which has the value <code>true</code> then it is a repeat.
///
/// @param warps Every warping where warps[i][j] specifies warp from image i to image j.
/// @param corners Detected corners
/// @param r A corner must be as close as this to be considered repeated
/// @param size Size of the region for cacheing. All images must be this size.
/// @return The repeatability.
/// @ingroup gRepeatability
float compute_repeatability(const vector<vector<Image<Vector<2> > > >& warps, const vector<vector<ImageRef> >& corners, int r, ImageRef size)
{
cvd_timer tmr;
unsigned int n = corners.size();
vector<ImageRef> disc = generate_disc(r);
vector<Image<bool> > detected;
for(unsigned int i=0; i < n; i++)
detected.push_back(paint_circles(corners[i], disc, size));
cout << "time_rep_paint " << tmr.get_time() << endl;
tmr.reset();
int corners_tested = 0;
int good_corners = 0;
for(unsigned int i=0; i < n; i++)
for(unsigned int j=0; j < n; j++)
{
if(i==j)
continue;
for(unsigned int k=0; k < corners[i].size(); k++)
{
ImageRef dest = ir_rounded(warps[i][j][corners[i][k]]);
if(dest.x != -1)
{
corners_tested++;
if(detected[j][dest])
good_corners++;
}
}
}
cout << "time_rep_rep " << tmr.get_time() << endl;
return 1.0 * good_corners / (DBL_EPSILON + corners_tested);
}
/**Load warps from a repeatability dataset.
The dataset contains warps which round to outside the image by one pixel in the max direction.
Fast repeatability testing founds values to integers, which causes errors, so these points need
to be pruned from the dataset. Slow repeatability testing does not do this, and these values must
be left so that the same data _exactly_ are used for the testing as int FAST-9 paper.
@param stub The directory of the whole dataset.
@param num The image numbers for which warps will be loaded.
@param size The size of the corresponding images.
@param prune Whether to prune points which warp to outside the images.
@return <code>return_value[i][j][y][x]</code> is where pixel x, y in image i warps to in image j.
@ingroup gRepeatability
*/
vector<vector<Image<Vector<2> > > > load_warps(string stub, const vector<int>& num, ImageRef size, bool prune=1)
{
stub += "/warps/";
vector<vector<Image<Vector<2> > > > ret(num.size(), vector<Image<Vector<2> > >(num.size()));
BasicImage<byte> tester(NULL, size);
Vector<2> outside = (make_Vector, -1, -1);
for(unsigned int from = 0; from < num.size(); from ++)
for(unsigned int to = 0; to < num.size(); to ++)
if(from != to)
{
Image<Vector<2> > w(size, (make_Vector, -1, -1));
int n = size.x * size.y;
Image<Vector<2> >::iterator p = w.begin();
ifstream f;
string fname = stub + sPrintf("warp_%i_%i.warp", num[from], num[to]);
f.open(fname.c_str());
if(!f.good())
{
cerr << "Error: " << fname << ": " << strerror(errno) << endl;
exit(1);
}
for(int i=0; i < n; ++i, ++p)
{
f >> *p;
if(prune && !tester.in_image(ir_rounded(*p)))
*p = outside;
}
if(!f.good())
{
cerr << "Error: " << fname << " went bad" << endl;
exit(1);
}
cerr << "Loaded " << fname << endl;
ret[from][to] = w;
}
return ret;
}
///Load images from a dataset
///@param stub The directory of the whole dataset.
///@param num The image numbers for which warps will be loaded.
///@return The loaded images.
///@ingroup gRepeatability
vector<Image<byte> > load_images(string stub, const vector<int>& num)
{
stub += "/frames/";
vector<Image<byte> > r;
for(unsigned int i=0; i < num.size(); i++)
r.push_back(img_load(stub + sPrintf("frame_%i.pgm", num[i])));
return r;
}
////////////////////////////////////////////////////////////////////////////////
//
// Fast implementations of the detector
//
/// This struct contains a byte code compiled version of the detector.
///
///
/// @ingroup gFastTree
struct fast_detector
{
/// This is a bytecode element for the bytecode-compiled
/// detector. The bytecode consists of a number of fixed length
/// blocks representing a 3 way branch. Special values of
/// of a block indicate the result that a pixel is a corner or
/// non-corner.
struct fast_detector_bit
{
int offset; ///< Memory offset from centre pixel to examine.
//Root node is 0. If lt == 0, then this is a leaf.
//gt holds the class.
int lt; ///<Position in bytecode to branch to if offset pixel is much darker than the centre pixel. If this
///is zero, then gt stores the result.
int gt; ///<Position in bytecode to branch to if offset pixel is much brighter than the centre pixel. If lt==0
///is a result block, then this stores the result, 0 for a non corner, 1 for a corner.
int eq; ///<Position in bytecode to branch to otherwise.
};
vector<fast_detector_bit> d; ///<This contains the compiled bytecode.
///Detects a corner at a given pointer, without the book keeping required to compute the score.
///This is quite a lot faster than @ref detect.
///
///@param imp Pointer at which to detect corner
///@param b FAST barrier
///@return is a corner or not
inline bool detect_no_score(const byte* imp, int b) const
{
int n=0;
int cb = *imp + b;
int c_b = *imp - b;
int p;
while(d[n].lt)
{
p = imp[d[n].offset];
if(p > cb)
n = d[n].gt;
else if(p < c_b)
n = d[n].lt;
else
n = d[n].eq;
}
return d[n].gt;
}
///Detects a corner at a given pointer, with book-keeping required for score computation
///
///@param imp Pointer at which to detect corner
///@param b FAST barrier
///@return 0 for non-corner, minimum increment required to make detector go down different branch, if it is a corner.
inline int detect(const byte* imp, int b) const
{
int n=0;
int m = INT_MAX;
int cb = *imp + b;
int c_b = *imp - b;
int p;
while(d[n].lt)
{
p = imp[d[n].offset];
if(p > cb)
{
if(p-cb < m)
m = p-cb;
n = d[n].gt;
}
else if(p < c_b)
{
if(c_b - p < m)
m = c_b - p;
n = d[n].lt;
}
else
n = d[n].eq;
}
if(d[n].gt)
return m;
else
return 0;
}
///Serialize the detector to an ostream. The serialized detector a number of lines
///of the form:
///@code
///Block N [X Y] G E L
///@endcode
///or:
///@code
///Block N corner
///@endcode
///or:
///@code
///Block N non_corner
///@endcode
///The first block type represents the code:
///@code
///if Image[current_pixel + (x, y)] > Image[current_pixel] + threshold
/// goto block G
///elseif Image[current_pixel + (x, y)] < Image[current_pixel] -threshold
/// goto block L
///else
/// goto block E
///endif
///@endcode
///@param o ostream for output
///@param width width the detector was created at, required to back out the offsets correctly.
void print(ostream& o, int width) const
{
for(unsigned int i=0; i < d.size(); i++)
{
if(d[i].lt == 0)
o << tag::print << "Block" << i << (d[i].gt?"corner":"non_corner");
else
{
int a = abs(d[i].offset) + width / 2;
if(d[i].offset < 0)
a = -a;
int y = a / width;
int x = d[i].offset - y * width;
o << tag::print << "Block" << i << ImageRef(x , y) << d[i].gt << d[i].eq << d[i].lt;
}
}
}
};
///This struct contains a x86 machine-code compiled version of the detector. The detector
///operates on a single row and inserts offset from the beginning of the image in to a
///std::vector.
///@ingroup gFastTree
class jit_detector
{
public:
///Run the compiled detector on a row of an image.
///@param im The image.
///@param row The row to detect corners in.
///@param xmin The starting position.
///@param xmax The ending position.
///@param corners The detected corners as offsets from image.data().
///@param threshold The corner detector threshold.
void detect_in_row(const Image<byte>& im, int row, int xmin, int xmax, vector<int>& corners, int threshold)
{
const byte* p = im[row] + xmin;
const int n = xmax - xmin;
void* cs = &corners;
const void* im_data = im.data();
/* r/m usage, at entry to machine code
In use:
%ecx Num remaining
%edi threshold
%ebp Detect in row machine code procedure address
%ebx cb
%edx c_b
%esi data
%eax Scratch
4%esp %esi: produced automatically by call
8%esp image.data()
12%esp &vector<int>
16%esp vector_inserter: simple function for calling member of std::vector
Input:
0 num remaining
1 data pointer
2 threshold
3 proc
4 push_back_proc
5 vector<int>
6 image.data()
*/
__asm__ __volatile__(
//Save all registers
" pusha \n"
//Load operands in to correct places
" pushl %4 \n"
" pushl %5 \n"
" pushl %6 \n"
" movl %0, %%ecx \n"
" movl %1, %%esi \n"
" movl %2, %%edi \n"
" movl %3, %%ebp \n" //%? uses ebp, so trash ebp last
//Start the loop
" cmp $0, %%ecx \n"
" je 1 \n"
" call *%%ebp \n"
"1: \n"
//Unload operands
" popl %%eax \n"
" popl %%eax \n"
" popl %%eax \n"
//Restore all registers
" popa \n"
:
: "m"(n), "m"(p), "m"(threshold), "m"(proc), "i"(&vector_inserter), "m"(cs), "m"(im_data)
);
}
///Create a compiled detector from the bytecode.
///@param v Bytecode.
jit_detector(const vector<fast_detector::fast_detector_bit>& v)
{
//blocksize
const int bs=28;
length = bs * (v.size() + 2); //Add head and tail block
/* The original assembler code looked like this
This is now done in machine code, with the whole tree in
place of line 0x804e0c1.
804e0b3: 83 f9 00 cmp $0x0,%ecx
804e0b6: 74 1b je 804e0d3 <finished>
0804e0b8 <loop>:
804e0b8: 0f b6 16 movzbl (%esi),%edx
804e0bb: 89 d3 mov %edx,%ebx
804e0bd: 29 fa sub %edi,%edx
804e0bf: 01 fb add %edi,%ebx
804e0c1: ff d5 call *%ebp
804e0c3: a8 ff test $0xff,%al
804e0c5: 74 08 je 804e0cf <nocorner>
804e0c7: 56 push %esi
804e0c8: 51 push %ecx
804e0c9: ff 54 24 10 call *0x10(%esp)
804e0cd: 59 pop %ecx
804e0ce: 58 pop %eax
0804e0cf <nocorner>:
804e0cf: 46 inc %esi
804e0d0: 49 dec %ecx
804e0d1: 75 e5 jne 804e0b8 <loop> //jne == jnz
Unused spaces are filled in with int $3, (instruction 0xcc), which
causes a debug trap. Makes catching errors easier.
The consists of fixed sized blocks pasted together. The size is determined by the
largest block, which is a tree node. This makes jump computation trivial, but
it also means that short jumps are never used, and the code is therefore larger
than necessary.
The rest have 0xcc filled in in the spare places.
The blocks are templates and have the relevant parts filled in prior to
copying.
Each tree node (including leaves are represented by an entire block)
Detectod corners are inserted in to a vector<int> as the integer offset of the corner
pixel from the beginning of the image
*/
const unsigned char loop_head[bs] =
{
0xEB, 0x11, //jmp + 17
0xcc,0xcc,0xcc,0xcc,0xcc,0xcc, //dead space
0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,
0xcc,0xcc,0xcc,0xcc,0xcc,
0x0f, 0xb6, 0x16, //movzbl (%esi),%edx Load data
0x89, 0xd3, //mov %edx,%ebx
0x29, 0xfa, //sub %edi,%edx Compute c_b
0x01, 0xfb, //add %edi,%ebx Compute cb
};
const int loop_head_start=19; //Jump to here to continue loop
unsigned char loop_tail[bs] =
{
0x56, //push %esi Functions seem to trash this otherwise
0x51, //push %ecx Functions seem to trash this otherwise
0xFF, 0x54, 0x24, 0x14, //call *16(%esp) Other arguments on the stack already
0x59, //pop %ecx Clean stack
0x58, //pop %eax ...
0x46, //inc %esi
0x49, //dec %ecx
0x0F, 0x85, 0xcc, 0xcc, 0xcc, 0xcc, //jnz <back to first block>
0xc3, //ret
0xcc,0xcc,0xcc,0xcc, //dead space
0xcc,0xcc,0xcc,0xcc,
0xcc,0xcc,0xcc,
};
const int loop_tail_address_offset = 12; //fill in the jump <back to first block> address here
const int loop_tail_jump_delta = 16; //Jump block_size*depth + this, to loop.
const int loop_tail_entry = 8; //jump to here to avoid inserting current point as corner
unsigned char cont_or_goto[bs] =
{
0xE9,0xcc, 0xcc, 0xcc, 0xcc, //Jump to end of loop
0xcc,0xcc,0xcc,0xcc,0xcc,0xcc, //dead space
0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,
0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,
0xcc,0xcc,0xcc,0xcc,0xcc
};
const int cont_jmp_addr = 1; //Jump address filled in here
const int cont_delta = 5; //This much in addition to block delta
unsigned char branch[bs] =
{
0x0f, 0xB6, 0x86, 0xcc, 0xcc, 0xcc, 0xcc, //movzbl OOOO(%esi),%eax
0x39, 0xd8, //cmp %ebx, %eax (eax - ebx) = (data[##]-cb
0x0F, 0x8F, 0xcc, 0xcc, 0xcc, 0xcc, //jg XXXX jmp by XXXX if eax > ebx
0x39, 0xC2, //cmp %eax, %edx (edx - eax) = c_b - data[##]
0x0F, 0x8F, 0xcc, 0xcc, 0xcc, 0xcc, //jg YYYY jmp by YYYY if ecx > ebx
0xE9, 0xcc, 0xcc, 0xcc, 0xcc, //jmp ZZZZ Unconditional jump to ZZZZ
};
const int block_off_off = 3;
const int block_gt_off = 11;
const int block_lt_off = 19;
const int block_eq_off = 24;
//mmap a writable, executable block of memory for JITted code
proc = (unsigned char*) mmap(0, length, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if(proc == MAP_FAILED)
{
cerr << "mmap failed with error: " << strerror(errno) << endl;
exit(1);
}
//Copy in the loop head: no parts to be filled in.
memcpy(proc, loop_head, bs);
for(int i=0; i < (int)v.size(); i++)
{
if(v[i].lt == 0) // leaf
{
if(v[i].gt == 0) //Fill in jump to continue part
{
*(int*)(cont_or_goto + cont_jmp_addr) = bs * (v.size()- i) - cont_delta + loop_tail_entry;
}
else //fill in jump to insert part
{
*(int*)(cont_or_goto + cont_jmp_addr) = bs * (v.size() - i) - cont_delta;
}
memcpy(proc + (i+1)*bs, cont_or_goto, bs);
}
else
{
*(int*)(branch+block_off_off) = v[i].offset;
//Optimization: leaf nodes have a non-conditional goto in them
//so goto the right place directly, rather than the leaf node.
//This has a 5% effect or so, so bigger gains elsewhere.
//Removed for simplicity.
*(int*)(branch+block_gt_off) = (v[i].gt -i) * bs - (block_gt_off + 4);
*(int*)(branch+block_lt_off) = (v[i].lt -i) * bs - (block_lt_off + 4);
*(int*)(branch+block_eq_off) = (v[i].eq -i) * bs - (block_eq_off + 4);
memcpy(proc + (i+1)*bs, branch, bs);
}
}
//Insert the correct backwards jump for looping
*(int*)(loop_tail+loop_tail_address_offset) = -bs * (1+v.size()) - loop_tail_jump_delta + loop_head_start;
memcpy(proc + bs * (v.size() + 1), loop_tail, bs);
}
~jit_detector()
{
munmap(proc, length);
}
private:
//Not copyable
void operator=(const jit_detector&);
jit_detector(const jit_detector&);
unsigned char* proc; ///< The machine code is stored in this mmap() allocated data which allows code execution.
int length; ///< Number of mmap() allocated bytes.
///Callback function to allow insertion in to std::vector. The execution of this function
///relies on the stack having the following layout (stack head on the left):
///@code
///return_address first_arg second_arg etc...
///@endcode
///so that the arguemnts directly reflect the stack layout.
///For speed, and in order to minimize stack handling, the argument list spans two call instructions worth of stack.
///
///@param ecx_dummy Pushed by the machine code, since the ABI allows ecx to be trashed
///@param p The pointer to the current pixel. Pushed by the machine code.
///@param esp_return_dummy Location to return to on a return from the machine code. Generated by the assembler call in to the machine code.
///@param im_data Pointer to the first image pixel. Pushed by the assembler caller.
///@param i Pointer to the std::vector<int> which stores the data. Pushed by the assembler caller.
static void vector_inserter(int ecx_dummy, const byte* p, const void* esp_return_dummy, const byte* im_data, vector<int>* i)
{
i->push_back(p-im_data);
}
};
/// This struct represents a node of the tree, and has pointers to other
/// structs, thereby representing a branch or the entire tree.
///
/// @ingroup gTree
class tree_element
{
public:
tree_element *lt; ///<Branch of the tree to take if the offset pixel is much darker than the centre.
tree_element *eq; ///<Branch of the tree to take if the offset pixel is much brighter than the centre.
tree_element *gt; ///<Branch of the tree to take otherwise.
bool is_corner; ///<If the node is a leaf, then this is its attribute.
int offset_index; ///<Offset number of the pixel to examine. This indexes offsets[x]
/// This returns the bounding box of the detector
pair<ImageRef, ImageRef> bbox() const
{
return offsets_bbox;
}
/// This returns the number of nodes in the tree
int num_nodes() const
{
if(eq == NULL)
return 1;
else
return 1 + lt->num_nodes() + eq->num_nodes() + gt->num_nodes();
}
///Is the node a leaf?
bool is_leaf() const
{
return eq == NULL;
}
///Return a given numbered element of the tree. Elements are numbered by depth-first traversal.
///
///@param t Element number to return
///@return pointer to the t'th element, and a flag indicating whether it's the direct child of an eq branch.
pair<tree_element*,bool> nth_element(int t)
{
//The root node can not be a corner.
//Otherwise the strength would be inf.
int n=0;
return nth_element(t, n, true);
}
///Compile the detector to bytecode. The bytecode is not a tree, but a graph. This is
///because the detector is applied in all orientations: offsets are integers which are
///indices in to a list of (x,y) offsets and there are multiple lists of offsets. The
///tree is also applied with intensity inversion.
///
///@param xsize The width of the image.
///@return The bytecode compiled detector.
///@ingroup gFastTree
fast_detector make_fast_detector(int xsize) const
{
vector<fast_detector::fast_detector_bit> f;
for(int invert=0; invert < 2; invert++)
for(unsigned int i=0; i < offsets.size(); i++)
{
//Make a FAST detector at a certain orientation
vector<fast_detector::fast_detector_bit> tmp(1);
make_fast_detector_o(tmp, 0, xsize, i, invert);
int endpos = f.size() + tmp.size();
int startpos = f.size();
//Append tmp on to f, filling in the non-corners (jumps to endpos)
//and correcting the intermediate jumps destinations
for(unsigned int i=0 ; i < tmp.size(); i++)
{
f.push_back(tmp[i]);
if(f.back().eq == -1)
f.back().eq = endpos;
else if(f.back().eq > 0)
f.back().eq += startpos;
if(f.back().gt == -1)
f.back().gt = endpos;
else if(f.back().gt > 0)
f.back().gt += startpos;
if(f.back().lt == -1)
f.back().lt = endpos;
else if(f.back().lt > 0)
f.back().lt += startpos;
}
}
//We need a final endpoint for non-corners
f.resize(f.size() + 1);
f.back().offset = 0;
f.back().lt = 0;
f.back().gt = 0;
f.back().eq = 0;
//Now we need an extra endpoint for corners
for(unsigned int i=0; i < f.size(); i++)
{
//EQ is always non-corner