-
Notifications
You must be signed in to change notification settings - Fork 4
/
fexp.f90
506 lines (404 loc) · 16 KB
/
fexp.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
module fexp
use fexp_debug
implicit none
character(len=10), parameter :: numbers = '1234567890'
character(len=26), parameter :: lowercase = 'abcdefghijklmnopqrstuvwxyz'
character(len=26), parameter :: uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
character(len=2), parameter :: otherwords = '-_'
integer, save :: matchstart, matchlength
contains
logical function match(regexp, text, debug)
character(len=*), intent(in) :: regexp
character(len=*), intent(in) :: text
logical, optional, intent(in) :: debug
integer :: pos
matchstart = 0
matchlength = 0
if (present(debug)) then
if (debug) then
call debug_set(.true., 'fexp.debug', 99)
else
call debug_set(.false.)
endif
else
call debug_set(.false.)
endif
call proc_start('match', regexp, text)
if (regexp(1:1).eq."^") then
match = matchhere(regexp(2:len(regexp)), text)
matchstart = 1
else
do pos = 1, len(text)
matchlength = 0
if ( matchhere(regexp, text(pos:len(text))) ) then
match = .true.
matchstart = pos
exit
endif
match = .false.
enddo
endif
call proc_end('match')
call debug_stop
end function match
logical recursive function matchhere(regexp, text) result(res)
character(len=*), intent(in) :: regexp
character(len=*), intent(in) :: text
integer :: classend
call proc_start('matchhere', regexp, text)
if (len(regexp).eq.0) then
! Assuming empty input is protected by caller
! this is needed to simplify the other recursive functions.
res = .true.
elseif (len(regexp).eq.1) then
! Almost at the end of the match - just need to check if we are at the end
! and watch out for '$' terminating condition.
if (text(1:1).eq.regexp(1:1)) then
matchlength = matchlength + 1
res = .true.
elseif (regexp(1:1).eq.".") then
matchlength = matchlength + 1
res = .true.
elseif ((regexp(1:1).eq."$") &
& .and.(len(text).eq.0)) then
res = .true.
else
res = .false.
endif
elseif (regexp(1:1).eq.'[') then
! This is the start of a character class... so find the end.
classend = index(regexp(2:len(regexp)), ']')
if (classend.eq.0) stop("No terminating char class")
! Check for modifiers at the end.
if (regexp(classend+2:classend+2).eq.'*') then
res = starcharclar(regexp(2:classend), regexp(classend+3:len(regexp)), text)
elseif (regexp(classend+2:classend+2).eq.'?') then
res = onecharclass(regexp(2:classend), regexp(classend+3:len(regexp)), text)
elseif (regexp(classend+2:classend+2).eq.'+') then
res = matchhere('['//regexp(2:classend)//'][' &
& //regexp(2:classend)//']*' &
& //regexp(classend+3:len(regexp)), text)
else
res = charclass(regexp(2:classend), regexp(classend+2:len(regexp)), text)
endif
elseif (regexp(1:1).eq.'\') then
! This is the start of a shortcut or escaped character.
! But before worrying about what it is - check for *, ? and + after it.
if (regexp(3:3).eq.'*') then
! Escaped something followed by a star
if (regexp(2:2).eq.'w') then
res = starcharclar(numbers//lowercase//uppercase//otherwords, &
& regexp(4:len(regexp)), text)
elseif (regexp(2:2).eq.'d') then
res = starcharclar(numbers, regexp(4:len(regexp)), text)
elseif (regexp(2:2).eq.'W') then
res = starcharclar("^"//numbers//lowercase//uppercase//otherwords, &
& regexp(4:len(regexp)), text)
elseif (regexp(2:2).eq.'D') then
res = starcharclar("^"//numbers, regexp(4:len(regexp)), text)
else
res = litmatch(regexp(2:len(regexp)), text)
endif
elseif (regexp(3:3).eq.'?') then
! Escaped something followed by a question mark
if (regexp(2:2).eq.'w') then
res = onecharclass(numbers//lowercase//uppercase//otherwords, &
& regexp(4:len(regexp)), text)
elseif (regexp(2:2).eq.'d') then
res = onecharclass(numbers, regexp(4:len(regexp)), text)
elseif (regexp(2:2).eq.'W') then
res = onecharclass("^"//numbers//lowercase//uppercase//otherwords, &
& regexp(4:len(regexp)), text)
elseif (regexp(2:2).eq.'D') then
res = onecharclass("^"//numbers, regexp(4:len(regexp)), text)
else
res = litmatch(regexp(2:len(regexp)), text)
endif
elseif (regexp(3:3).eq.'+') then
! Escaped something followed by a plus
if (regexp(2:2).eq.'w') then
res = matchhere('\w\w*'//regexp(4:len(regexp)), text)
elseif (regexp(2:2).eq.'d') then
res = matchhere('\d\d*'//regexp(4:len(regexp)), text)
elseif (regexp(2:2).eq.'W') then
res = matchhere('\W\W*'//regexp(4:len(regexp)), text)
elseif (regexp(2:2).eq.'D') then
res = matchhere('\D\D*'//regexp(4:len(regexp)), text)
else
res = litmatch(regexp(2:len(regexp)), text)
endif
else
! We just have to worry about the escaped char
if (regexp(2:2).eq.'w') then
res = charclass(numbers//lowercase//uppercase//otherwords, &
& regexp(3:len(regexp)), text)
elseif (regexp(2:2).eq.'d') then
res = charclass(numbers, regexp(3:len(regexp)), text)
elseif (regexp(2:2).eq.'W') then
res = charclass("^"//numbers//lowercase//uppercase//otherwords, &
& regexp(3:len(regexp)), text)
elseif (regexp(2:2).eq.'D') then
res = charclass("^"//numbers, regexp(3:len(regexp)), text)
else
res = litmatch(regexp(2:len(regexp)), text)
endif
endif
! Minimum length quantifiers
elseif (regexp(2:3).eq."??") then
res = onelazymatch(regexp(1:1), &
& regexp(4:len(regexp)), text)
elseif (regexp(2:3).eq."*?") then
res = matchlazystar(regexp(1:1), &
& regexp(4:len(regexp)), text)
elseif (regexp(2:3).eq."+?") then
res = matchhere(regexp(1:1)//regexp(1:1)// &
& '*?'//regexp(4:len(regexp)), text)
! Maximum length quantifiers
elseif (regexp(2:2).eq."?") then
res = onematch(regexp(1:1), &
& regexp(3:len(regexp)), text)
elseif (regexp(2:2).eq."*") then
res = matchstar(regexp(1:1), &
& regexp(3:len(regexp)), text)
elseif (regexp(2:2).eq."+") then
res = matchhere(regexp(1:1)//regexp(1:1)// &
& '*'//regexp(3:len(regexp)), text)
! Now check the character (should call litmatch I suppose.
elseif (text(1:1).eq.regexp(1:1)) then
matchlength = matchlength + 1
res = matchhere( regexp(2:len(regexp)), &
& text(2:len(text)) )
! And a dot is always OK
elseif (regexp(1:1).eq.".") then
matchlength = matchlength + 1
res = matchhere( regexp(2:len(regexp)), &
& text(2:len(text)) )
! Otherwise it does not match
else
res = .false.
endif
call proc_end('matchhere')
end function matchhere
logical recursive function litmatch(regexp, text)
character(len=*), intent(in) :: regexp
character(len=*), intent(in) :: text
! FIXME - handle cases of *, ? and + -- without breaking the lit match.
if (text(1:1).eq.regexp(1:1)) then
matchlength = matchlength + 1
litmatch = matchhere( regexp(2:len(regexp)), &
& text(2:len(text)) )
else
litmatch = .false.
endif
end function litmatch
logical recursive function onematch(onechar, regexp, text)
character(len=1), intent(in) :: onechar
character(len=*), intent(in) :: regexp
character(len=*), intent(in) :: text
if ((text(1:1).eq.onechar).or.(onechar.eq.'.')) then
if (matchhere(regexp, text(2:len(text)))) then
! One of the char and the rest of the expression
matchlength = matchlength + 1
onematch = .true.
else
! Backtrack to zero of the char and the expression
! No char to add here.
onematch = matchhere(regexp, text)
endif
else
! Just the rest of the expression also OK
onematch = matchhere(regexp, text)
endif
end function onematch
logical recursive function matchstar(starchar, regexp, text)
character(len=1), intent(in) :: starchar
character(len=*), intent(in) :: regexp
character(len=*), intent(in) :: text
! integer :: poss, pose
integer :: pos
call proc_start('matchstar', starchar, regexp, text)
if (starchar.eq.'.') then
pos = 0 ! Because anything will match the dot.
else
pos = verify(text, starchar) ! This is the last character not
! matching the star.
endif
! I'm sure this if block is not needed, and there are bugs lurking. But
! bunging it in makes everything pass at present so I'll go with this
! for now. TODO - check and validate properly!
if (pos.eq.0) then
pos = len(text)
do
if (matchhere(regexp, text(pos:len(text)))) then
matchstar = .true.
matchlength = matchlength + pos -1
exit
elseif (pos.ge.1) then
pos = pos -1
else
matchstar = .false.
exit
endif
enddo
else
do
if (matchhere(regexp, text(pos:len(text)))) then
matchstar = .true.
matchlength = matchlength + pos -1
exit
elseif (pos.ge.1) then
pos = pos -1
else
matchstar = .false.
exit
endif
enddo
endif
call proc_end('starchar')
end function matchstar
logical recursive function onelazymatch(onechar, regexp, text)
character(len=1), intent(in) :: onechar
character(len=*), intent(in) :: regexp
character(len=*), intent(in) :: text
if ((text(1:1).eq.onechar).or.(onechar.eq.'.')) then
if (matchhere(regexp, text(2:len(text)))) then
! One of the char and the rest of the expression
matchlength = matchlength + 1
onelazymatch = .true.
else
! Backtrack to zero of the char and the expression
! No char to add here.
onelazymatch = matchhere(regexp, text)
endif
else
! Just the rest of the expression also OK
onelazymatch = matchhere(regexp, text)
endif
end function onelazymatch
logical recursive function matchlazystar(starchar, regexp, text)
character(len=1), intent(in) :: starchar
character(len=*), intent(in) :: regexp
character(len=*), intent(in) :: text
integer :: pos
pos = 1
do
if (matchhere(regexp, text(pos:len(text)))) then
matchlazystar = .true.
matchlength = matchlength + pos - 1
exit
elseif ((pos.le.len(text)) &
& .and. ( &
& (text(pos:pos).eq.starchar) &
& .or.(starchar.eq.".") &
& )) then
pos = pos + 1
else
matchlazystar = .false.
exit
endif
enddo
end function matchlazystar
logical recursive function charclass(class, regexp, text)
character(len=*), intent(in) :: class
character(len=*), intent(in) :: regexp ! after the class
character(len=*), intent(in) :: text
logical :: negate
negate = .false.
if (class(1:1).eq.'^') then
negate = .true.
endif
if ((len(class).lt.1).and..not.negate) stop ("Empty char class!")
if ((len(class).lt.2).and.negate) stop ("Empty char class!")
if (.not.negate) then
if (scan(text(1:1),class).eq.1) then
matchlength = matchlength + 1
charclass = matchhere(regexp, text(2:len(text)))
else
charclass = .false.
endif
else
if (scan(text(1:1),class(2:len(class))).eq.1) then
charclass = .false.
else
matchlength = matchlength + 1
charclass = matchhere(regexp, text(2:len(text)))
endif
endif
end function charclass
logical recursive function onecharclass(class, regexp, text)
character(len=*), intent(in) :: class
character(len=*), intent(in) :: regexp ! after the class
character(len=*), intent(in) :: text
logical :: negate
negate = .false.
if (class(1:1).eq.'^') then
negate = .true.
endif
if ((len(class).lt.1).and..not.negate) stop ("Empty char class!")
if ((len(class).lt.2).and.negate) stop ("Empty char class!")
if (.not.negate) then
! not negated
if (scan(text(1:1),class).eq.1) then
if (matchhere(regexp, text(2:len(text)))) then
! One of the char and the rest of the expression
matchlength = matchlength + 1
onecharclass = .true.
else
! Backtrack to zero of the char and the expression
onecharclass = matchhere(regexp, text)
endif
else
! Just the rest of the expression also OK
onecharclass = matchhere(regexp, text)
endif
else
! Negated
if (scan(text(1:1),class(2:len(class))).eq.0) then
! OK if the whole of the rest of the regexp matched everything...
matchlength = matchlength + 1
onecharclass = matchhere(regexp, text)
else
! Ok if we can ignore the charclass and stull match all the text.
onecharclass = matchhere(regexp, text)
endif
endif
end function onecharclass
logical recursive function starcharclar(class, regexp, text)
character(len=*), intent(in) :: class
character(len=*), intent(in) :: regexp ! after the class
character(len=*), intent(in) :: text
logical :: negate
integer :: pos
call proc_start('starcharclar', class, regexp, text)
negate = .false.
if (class(1:1).eq.'^') then
negate = .true.
endif
if ((len(class).lt.1).and..not.negate) stop ("Empty char class!")
if ((len(class).lt.2).and.negate) stop ("Empty char class!")
!FIXME: Is this doing lazy evaluation - should it do greedy evaluation? Can
! I reuse matchstar with a bit of effort? Do I need lazy evaluation?
pos = 1
do
if (matchhere(regexp, text(pos:len(text)))) then
starcharclar = .true.
matchlength = matchlength + pos - 1
exit
elseif ((pos.le.len(text)) &
& .and.(.not.negate).and. ( &
& (scan(text(1:1),class).eq.1) &
& )) then
pos = pos+1
elseif ((pos.le.len(text)) &
& .and.negate.and. ( &
& (scan(text(1:1),class(2:len(class))).eq.0) &
& )) then
pos = pos+1
else
starcharclar = .false.
exit
endif
enddo
call proc_end('starcharclar')
end function starcharclar
end module fexp