-
Notifications
You must be signed in to change notification settings - Fork 3
/
nfa.cpp
345 lines (294 loc) · 14.2 KB
/
nfa.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
// Created by omar_swidan on 23/03/20.
//
#define EPSILON "\\L"
#include "nfa.h"
#include "regular_expression.h"
vector<pair<NFAState *, NFAState *>> combined_nfa_states;
vector<pair<NFAState *, NFAState *>> global_states;
NFA::NFA() {
}
/**
Takes a a vector of Regular Expressions and an unordered set of strings representing the input_table
Converts the input regex to nfa in 4 steps;
1-)Converts the infix of the regex to postfix
2-)Converts eact postfixed output to NFA
3-)Resolves the input table by removing all backslashes from all transitions
4-)Combines all the nfas of all regex
@return an NFAState pointer representing the start state of the combined resulting NFA
*/
NFAState *NFA::regex_to_nfa(std::unordered_set<std::string> input_table, std::vector<RegularExpression> regex) {
string postfix;
unordered_set<string> resolved_input_table;
//iterates over the size of the vector of regular expression
for (int i = 0; i < regex.size(); i++) {
//Convert each infix to postfix
postfix = regex[i].infix_to_postfix(input_table);
//Convert each postfix to an NFA
this->postfix_to_NFA(postfix, input_table, regex[i].getName(),i);
}
NFAState *start_state = new NFANormalState();
//Resolves the input table by removing all backslashes
this->set_input_table(this->resolve_input_table(input_table));
//Combining the nfastates
for (vector<pair<NFAState *, NFAState *> >::const_iterator it = combined_nfa_states.begin();
it != combined_nfa_states.end(); it++) {
start_state->add_neighbour(EPSILON, it->first);
}
global_states.clear();
return start_state;
}
/**
Takes a transition an constructs an nfa state with a start and ending states using thompson rule
@return an NFAState ponter representing the start state a single transition
*/
NFAState *
NFA::construct_one_transition_state(string transition, unordered_map<NFAState *, NFAState *> *start_to_acceptance_map,
bool final_finish_state) {
NFAState *start_state = new NFANormalState();
NFAState *finish_state = this->acceptance_state_generator(final_finish_state);
transition = resolve_backslash(transition);
start_state->add_neighbour(transition, finish_state);
start_to_acceptance_map->insert(pair<NFAState *, NFAState *>(start_state, finish_state));
global_states.push_back(pair<NFAState *, NFAState *>(start_state, finish_state));
return start_state;
}
/**
Converts the postfix to NFA by iterating over the chracters of the postfix and distinguishing whether
it is an operand or an operator and based on that it constructs the nfa state for each operator using thompson rules
@return an NFAState ponter representing the start state an NFA of a regex
*/
NFAState *NFA::postfix_to_NFA(string postfix, unordered_set<string> input_table, string regex_name,int priority) {
stack<NFAState *> nfa_state_stack;
unordered_map<NFAState *, NFAState *> start_to_acceptance_map;
bool input_acceptor = false;
bool final_finish_state = false;
string input_identifier = "";
// Scan all characters one by one
int i = 0;
do {
// If the scanned character is an operand (number here),
// push it to the stack.
if (!isOperator(postfix[i])) {
while (input_acceptor == false) {
input_identifier += postfix[i];
for (const auto &element: input_table) {
if (input_identifier.compare(element) == 0) {
nfa_state_stack.push(
this->construct_one_transition_state(input_identifier, &start_to_acceptance_map,
final_finish_state));
input_acceptor = true;
break;
}
}
i++;
}
i--;
input_acceptor = false;
input_identifier = "";
}
// If the scanned character is an operator, pop two
// elements from stack apply the operator
else if (isOperator(postfix[i])) {
switch (postfix[i]) {
case POSITIVE_CLOSURE_OPERATOR: {
NFAState *plus_nfa_state = this->kleene_and_plus(nfa_state_stack.top(), &start_to_acceptance_map,
false, final_finish_state);
nfa_state_stack.pop();
nfa_state_stack.push(plus_nfa_state);
break;
}
case KLEENE_CLOSURE_OPERATOR: {
NFAState *kleene_nfa_state = this->kleene_and_plus(nfa_state_stack.top(), &start_to_acceptance_map,
true, final_finish_state);
nfa_state_stack.pop();
nfa_state_stack.push(kleene_nfa_state);
break;
}
case UNION_OPERATOR: {
NFAState *second_operand_union_state = nfa_state_stack.top();
nfa_state_stack.pop();
NFAState *first_operand_union_state = nfa_state_stack.top();
nfa_state_stack.pop();
nfa_state_stack.push(this->or_combiner(first_operand_union_state, second_operand_union_state,
&start_to_acceptance_map, final_finish_state));
break;
}
case CONCAT_OPERATOR: {
NFAState *second_operand_union_state = nfa_state_stack.top();
nfa_state_stack.pop();
NFAState *first_operand_union_state = nfa_state_stack.top();
nfa_state_stack.pop();
nfa_state_stack.push(this->concat(first_operand_union_state, second_operand_union_state,
&start_to_acceptance_map, final_finish_state));
break;
}
}
}
i++;
} while (nfa_state_stack.size() >= 1 && i < postfix.size());
//Adding the start and acceptance states of the final nfa to the global combined nfa map
NFAState *acceptance = new NFAAcceptanceState();
NFAAcceptanceState *acceptance_state = dynamic_cast<NFAAcceptanceState *>(acceptance);
global_states[global_states.size() - 1].second->add_neighbour(EPSILON, acceptance_state);
acceptance_state->set_token(regex_name);
acceptance_state->set_priority(priority);
combined_nfa_states.push_back(pair<NFAState *, NFAState *>(nfa_state_stack.top(), acceptance_state));
return nfa_state_stack.top();
}
/**
Takes a transition and 2 NFAStates representing the start of each state and constructs an nfa state with a start and ending states using thompson rule
@return an NFAState ponter representing the start state the uniuond transition
*/
NFAState *NFA::or_combiner(NFAState *first_nfa_state, NFAState *second_nfa_state,
unordered_map<NFAState *, NFAState *> *start_to_acceptance_map, bool final_finish_state) {
//Initilaizing a start and acceptance node for the union thompson rule
NFAState *start_state = new NFANormalState();
NFAState *finish_state = this->acceptance_state_generator(final_finish_state);
//Adding 2 neighbours from the start node with a transition epsilon where the neighbours are the 2 start nodes of the unioned nfa's
start_state->add_neighbour(EPSILON, first_nfa_state);
start_state->add_neighbour(EPSILON, second_nfa_state);
//Getting the finish states of the unioned nfa's and adding the finsih state of the resulting nfa as their neigbhours with a transition epsilon
unordered_map<NFAState *, NFAState *>::iterator it = start_to_acceptance_map->begin();
for (std::pair<NFAState *, NFAState *> element : *start_to_acceptance_map) {
if (element.first == first_nfa_state || element.first == second_nfa_state) {
(element.second)->add_neighbour(EPSILON, finish_state);
}
}
// for (vector<pair<NFAState *, NFAState *> >::const_iterator it = start_to_acceptance_map->begin();
// it != start_to_acceptance_map->end(); it++) {
// if (it->first == first_nfa_state || it->first == second_nfa_state) {
// (it->second)->add_neighbour(EPSILON, finish_state);
// }
// }
//Adding the first and finish states to the map
start_to_acceptance_map->insert(pair<NFAState *, NFAState *>(start_state, finish_state));
global_states.push_back(pair<NFAState *, NFAState *>(start_state, finish_state));
return start_state;
}
/**
Takes a transition and 2 NFAStates representing the start of each state and constructs an nfa state with a start and ending states using thompson rule
Based on the kleene boolean it constucts a different state in case it is a plus opeartor
@return an NFAState pointer representing the start state the kleene or plus operation
*/
NFAState *
NFA::kleene_and_plus(NFAState *nfa_state, unordered_map<NFAState *, NFAState *> *start_to_acceptance_map, bool kleene,
bool final_finish_state) {
NFAState *start_state = new NFANormalState();
NFAState *finish_state = this->acceptance_state_generator(final_finish_state);
//Adding an epsilon transition from the new start state to 1-) the start state of the old nfa 2-)the finish state of the new nfa
start_state->add_neighbour(EPSILON, nfa_state);
//In case it is a kleene operator add an epsilon transition from the start state to the accept state
//In case it is a plus operator do nothing
if (kleene == true) {
start_state->add_neighbour(EPSILON, finish_state);
}
//getting the start and finish states of the old nfa and making 2 transitions
//1-)epsilon from the finsih state of the old to the start state of the old
//2-)epsilon from the finish state of the old to the finish state of the the new
unordered_map<NFAState *, NFAState *>::iterator it = start_to_acceptance_map->begin();
for (std::pair<NFAState *, NFAState *> element : *start_to_acceptance_map) {
if (element.first == nfa_state) {
(element.second)->add_neighbour(EPSILON, finish_state);
(element.second)->add_neighbour(EPSILON, element.first);
break;
}
}
//Adding the first and finish states of the new nfa to the map
start_to_acceptance_map->insert(pair<NFAState *, NFAState *>(start_state, finish_state));
global_states.push_back(pair<NFAState *, NFAState *>(start_state, finish_state));
return start_state;
}
/**
Takes a transition and 2 NFAStates representing the start of each state and constructs an nfa state with a start and ending states using thompson rule
@return an NFAState ponter representing the start state the concated operation
*/
NFAState *NFA::concat(NFAState *first_nfa_state, NFAState *second_nfa_state,
unordered_map<NFAState *, NFAState *> *start_to_acceptance_map, bool final_finish_state) {
NFAState *first_nfa_acceptance_state;
vector<pair<string, NFAState *>> temp_vector;
int iterator = 0;
unordered_map<NFAState *, NFAState *>::iterator it = start_to_acceptance_map->begin();
for (std::pair<NFAState *, NFAState *> element : *start_to_acceptance_map) {
//Save the address of the acceptance state of the first concatinated inorder to add the neigbhours of the start state of the second concatinated nfa
if (first_nfa_state == element.first) {
for (std::pair<NFAState *, NFAState *> element1 : *start_to_acceptance_map) {
if ((element1.first) == second_nfa_state) {
start_to_acceptance_map->erase(first_nfa_state);
start_to_acceptance_map->insert(pair<NFAState *, NFAState *>(first_nfa_state, element1.second));
element.second->add_neighbour(EPSILON, second_nfa_state);
global_states.push_back(pair<NFAState *, NFAState *>(first_nfa_state, element1.second));
iterator++;
}
}
iterator++;
}
//Get the neighbours of the start state of the second concatinated nfa inorder to add the neighbours to the acceptance state of the first concatinted nfa
if (iterator == 2) {
break;
}
}
return first_nfa_state;
}
/**
Check if a transition has a backslash,if yes then trancate the backslash from the string else return the string without change
@return string with no backslash
*/
std::string NFA::resolve_backslash(std::string transition) {
if (transition.find(ESCAPE_CHARACTER) != string::npos) {
if (transition[1] != 'L')
transition.erase(0, 1);
}
return transition;
}
/**
Handling the input table by removing all the backslashes from the input table inorder to sync with the transitioned data between states
@return string with no backslash
*/
unordered_set<string> NFA::resolve_input_table(unordered_set<string> input_table) {
unordered_set<string> editted_input_table;
for (const auto &element: input_table) {
if(element==EPSILON){
continue;
}
else{
editted_input_table.insert(this->resolve_backslash(element));
}
}
return editted_input_table;
}
/**
Checks the final boolean state and returns an NFAAcceptanceState if boolean is true,else it returns an NFANormalState
@return NFAState pointer of type Normal State or Acceptance State
*/
NFAState *NFA::acceptance_state_generator(bool final_finish_state) {
if (!final_finish_state) {
return new NFANormalState();
} else {
return new NFAAcceptanceState();
}
}
/**
Checks if the character is an operator or not
@return a boolean indicating if it is a character or not
*/
bool NFA::isOperator(char character) {
if (character == UNION_OPERATOR || character == KLEENE_CLOSURE_OPERATOR || character == POSITIVE_CLOSURE_OPERATOR ||
character == CONCAT_OPERATOR) {
return true;
}
return false;
}
/**
Getter for the input_table
@return unordered_set of type string
*/
unordered_set<string> NFA::get_input_table() {
return this->input_table;
}
/**
Sets the input table
@return void
*/
void NFA::set_input_table(unordered_set<string> input_table) {
this->input_table = input_table;
}