-
Notifications
You must be signed in to change notification settings - Fork 0
/
regex_functions.py
177 lines (164 loc) · 5.99 KB
/
regex_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""
# Copyright Nick Cheng, Brian Harrington, Danny Heap, Yining Wang, 2013, 2014,
2015, 2016
# Distributed under the terms of the GNU General Public License.
#
# This file is part of Assignment 2, CSCA48, Winter 2016
#
# This is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this file. If not, see <http://www.gnu.org/licenses/>.
"""
# Do not change this import statement, or add any of your own!
from regextree import RegexTree, StarTree, DotTree, BarTree, Leaf
# Do not change anything above this comment except for the copyright
# statement
# Student code below this comment.
def is_regex(s):
'''(str) -> bool
return whether it is a valid regex
>>> is_regex('0')
True
>>> is_regex('21asjdncv')
False
>>> is_regex('e*')
True
>>> is_regex('1**')
True
>>> is_regex('*1')
False
>>> is_regex('((1.(0|2)*).0)')
True
>>> is_regex('((1.(0|2))*).0)')
False
'''
degree = 0
if len(s) == 1:
# if length is 1, s can only be one of these to be a valid regex
return (s in ['0', '1', '2', 'e'])
elif s != "" and s[-1] == '*':
# if s is end with "*", cut the star and check the rest of s
return is_regex(s[:-1])
# if s is in brackets
elif s != "" and s[0] == "(" and s[-1] == ")":
# find index of the dot or bar that's in the first brackets
for i in range(len(s)):
if s[i] == "(":
degree += 1
elif s[i] == ")":
degree -= 1
if degree == 1 and (s[i] == "." or s[i] == '|'):
I = i
# recursion: remove the two brackets
# divide to 2 parts at the dot or bar found
return is_regex(s[1:I]) and is_regex(s[I+1:-1])
# if s is not in any of the form above return False
return False
def all_regex_permutations(s):
'''(str) -> set
returns a set of strings that are possible permutation of the given string
and are valid regexes
>>>all_regex_permutations('(0|2)')
set(['(2|0)', '(0|2)'])
>>>all_regex_permutations("")
set([])
'''
result = set()
# loop through every permutation of the string
for item in perms(s):
if is_regex(item):
# add the permutation into result set iff it is a valid regex
result.add(item)
return result
def perms(s):
'''(str) -> set
returns all permutations of the given string
>>>perms("abc")
set(['acb', 'cba', 'bca', 'abc', 'bac', 'cab'])
>>>perms("a")
set(['a'])
>>>perms("")
set([''])
'''
# if length is 0 or 1 return a set including only itself
if len(s) < 2:
return {s}
res = set()
# find all the permutation of the string with the first character removed
for permutation in perms(s[1:]):
for i in range(len(s)):
# insert the first character in every place between characters
res.add(permutation[:i] + s[0:1] + permutation[i:])
return res
def regex_match(r, s):
'''(RegexTree, str) -> bool
returns True iff s matches the regex tree whose root is r
>>>Tree = RegexTree
>>>regex_match(build_regex_tree("(1.2)*"), "")
True
>>>regex_match(build_regex_tree("(1.2)*"), "1212")
True
>>>regex_match(build_regex_tree("(1.2)*"), "21")
False
>>>regex_match(build_regex_tree("(0|2)"), "2")
True
>>>regex_match(build_regex_tree("(0|2)"), "1")
False
'''
# if the root is a Leaf
if type(r) == Leaf:
if symbol == "e":
# "e" means ""
return s == ""
else:
return s == symbol
# if the root is star
elif type(r) == StarTree:
if s == "":
return True
def build_regex_tree(regex):
'''(str) -> RegexTree
takes a string regex, build a regex tree, and returns the root of the tree
>>>build_regex_tree("(1.2)") == RegexTree('.', [Leaf('1'), Leaf('2')])
True
>>>build_regex_tree("(1*|e)") == RegexTree('|', [StarTree(Leaf('1')), Leaf(
'e')])
True
>>>build_regex_tree("(((e.1).2*)|((0***.e).(1|2*)))") == RegexTree('|', [
RegexTree('.', [RegexTree('.', [Leaf('e'), Leaf('1')]), StarTree(Leaf(
'2'))]), RegexTree('.', [RegexTree('.', [StarTree(StarTree(StarTree(Leaf(
'0')))), Leaf('e')]), RegexTree('|', [Leaf('1'), StarTree(Leaf('2'))])])])
True
'''
degree = 0
# base case if length is 1,regex is a leaf since it does not have any child
if len(regex) == 1:
return Leaf(regex)
# if it ends with a "*"
elif regex != "" and regex[-1] == '*':
# set the star to be the parent of a tree built with the rest of regex
return StarTree(build_regex_tree(regex[:-1]))
# if it is in brackets
elif regex != "" and regex[0] == "(" and regex[-1] == ")":
# loop and index of the dot or bar that's in the first brackets
for i in range(len(regex)):
if regex[i] == "(":
degree += 1
elif regex[i] == ")":
degree -= 1
if degree == 1 and (regex[i] == "." or regex[i] == '|'):
I = i
# recursion: remove the two brackets
# divide to 2 parts at the dot or bar found and build trees
# for each of the parts
return RegexTree(regex[I], [build_regex_tree(
regex[1:I]), build_regex_tree(regex[I+1:-1])])