-
Notifications
You must be signed in to change notification settings - Fork 2
/
er.rb
173 lines (130 loc) · 7.23 KB
/
er.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# i Ignore case when matching text.
# m The pattern is to be matched against multiline text, so treat newline as an ordinary character: allow . to match newlines.
# x Extended syntax: allow whitespace and comments in regexp.
# o Perform #{} interpolations only once, the first time the regexp literal is evaluated.
# u,e,s,n Interpret the regexp as Unicode (UTF-8), EUC, SJIS, or ASCII. If none of these modifiers is specified, the regular expression is assumed to use the source encoding.
%r|/| # Matches a single slash character, no escape required
p '</opa>'.match %r[</(.*)>]i # Flag characters are allowed with this syntax, too
money = /[$\u20AC\u{a3}\u{a5}]/ # match dollar,euro,pound, or yen sign
pattern = "[a-z]+" # One or more letters
suffix = Regexp.escape("()") # Treat these characters literally
r = Regexp.new(pattern + suffix) # /[a-z]+\(\)/
# Literal characters
/ruby/ # Match "ruby". Most characters simply match themselves.
/¥/ # Matches Yen sign. Multibyte characters are suported
# in Ruby 1.9 and Ruby 1.8.
# Character classes
/[Rr]uby/ # Match "Ruby" or "ruby"
/rub[ye]/ # Match "ruby" or "rube"
/[aeiou]/ # Match any one lowercase vowel
/[0-9]/ # Match any digit; same as /[0123456789]/
/[a-z]/ # Match any lowercase ASCII letter
/[A-Z]/ # Match any uppercase ASCII letter
/[a-zA-Z0-9]/ # Match any of the above
/[^aeiou]/ # Match anything other than a lowercase vowel
/[^0-9]/ # Match anything other than a digit
# Special character classes
/./ # Match any character except newline
/./m # In multiline mode . matches newline, too
/\d/ # Match a digit /[0-9]/
/\D/ # Match a nondigit: /[^0-9]/
/\s/ # Match a whitespace character: /[ \t\r\n\f]/
/\S/ # Match nonwhitespace: /[^ \t\r\n\f]/
/\w/ # Match a single word character: /[A-Za-z0-9_]/
/\W/ # Match a nonword character: /[^A-Za-z0-9_]/
# Repetition
/ruby?/ # Match "rub" or "ruby": the y is optional
/ruby*/ # Match "rub" plus 0 or more ys
/ruby+/ # Match "rub" plus 1 or more ys
/\d{3}/ # Match exactly 3 digits
/\d{3,}/ # Match 3 or more digits
/\d{3,5}/ # Match 3, 4, or 5 digits
# Nongreedy repetition: match the smallest number of repetitions
/<.*>/ # Greedy repetition: matches "<ruby>perl>"
/<.*?>/ # Nongreedy: matches "<ruby>" in "<ruby>perl>"
# Also nongreedy: ??, +?, and {n,m}?
# Grouping with parentheses
/\D\d+/ # No group: + repeats \d
/(\D\d)+/ # Grouped: + repeats \D\d pair
/([Rr]uby(, )?)+/ # Match "Ruby", "Ruby, ruby, ruby", etc.
# Backreferences: matching a previously matched group again
/([Rr])uby&\1ails/ # Match ruby&rails or Ruby&Rails
/(['"])[^\1]*\1/ # Single or double-quoted string
# \1 matches whatever the 1st group matched
# \2 matches whatever the 2nd group matched, etc.
# Alternatives
/ruby|rube/ # Match "ruby" or "rube"
/rub(y|le)/ # Match "ruby" or "ruble"
/ruby(!+|\?)/ # "ruby" followed by one or more ! or one ?
# Anchors: specifying match position
/^Ruby/ # Match "Ruby" at the start of a string or internal line
/Ruby$/ # Match "Ruby" at the end of a string or line
/\ARuby/ # Match "Ruby" at the start of a string
/Ruby\Z/ # Match "Ruby" at the end of a string
/\bRuby\b/ # Match "Ruby" at a word boundary
/\brub\B/ # \B is nonword boundary:
# match "rub" in "rube" and "ruby" but not alone
/Ruby(?=!)/ # Match "Ruby", if followed by an exclamation point
/Ruby(?!!)/ # Match "Ruby", if not followed by an exclamation point
# Special syntax with parentheses
/R(?#comment)/ # Matches "R". All the rest is a comment
/R(?i)uby/ # Case-insensitive while matching "uby"
/R(?i:uby)/ # Same thing
/rub(?:y|le)/ # Group only without creating \1 backreference
# The x option allows comments and ignores whitespace
/ # This is not a Ruby comment. It is a literal part
# of the regular expression, but is ignored.
R # Match a single letter R
(uby)+ # Followed by one or more "uby"s
\ # Use backslash for a nonignored space
/x # Closing delimiter. Don't forget the x option!
p '------------------------'
pattern = /Ruby?/i # Match "Rub" or "Ruby", case-insensitive
p pattern =~ "backrub" # Returns 4.
p "rub ruby" =~ pattern # 0
p pattern =~ "r" # nil
"hello" =~ /e\w{2}/ # 1: Match an e followed by 2 word characters
$~.string # "hello": the complete string
$~.to_s # "ell": the portion that matched
$~.pre_match # "h": the portion before the match
$~.post_match # "o": the portion after the match
p Regexp.last_match == $~ ## TRUE
# This is a pattern with three subpatterns
pattern = /(Ruby|Perl)(\s+)(rocks|sucks)!/
text = "Ruby\trocks!" # Text that matches the pattern
pattern =~ text # => 0: pattern matches at the first character
data = Regexp.last_match # => Get match details
data.size # => 4: MatchData objects behave like arrays
data[0] # => "Ruby\trocks!": the complete matched text
data[1] # => "Ruby": text matching first subpattern
data[2] # => "\t": text matching second subpattern
data[3] # => "rocks": text matching third subpattern
data[1,2] # => ["Ruby", "\t"]
data[1..3] # => ["Ruby", "\t", "rocks"]
data.values_at(1,3) # => ["Ruby", "rocks"]: only selected indexes
data.captures # => ["Ruby", "\t", "rocks"]: only subpatterns
Regexp.last_match(3) # => "rocks": same as Regexp.last_match[3]
# Start and end positions of matches
data.begin(0) # => 0: start index of entire match
data.begin(2) # => 4: start index of second subpattern
data.end(2) # => 5: end index of second subpattern
data.offset(3) # => [5,10]: start and end of third subpattern
# $~ $LAST_MATCH_INFO Regexp.last_match
# $& $MATCH Regexp.last_match[0]
# $` $PREMATCH Regexp.last_match.pre_match
# $' $POSTMATCH Regexp.last_match.post_match
# $1 none Regexp.last_match[1]
# $2, etc. none Regexp.last_match[2], etc.
# $+ $LAST_PAREN_MATCH Regexp.last_match[-1]
"ruby123"[/\d+/] # "123"
"ruby123"[/([a-z]+)(\d+)/, 1] # "ruby"
"ruby123"[/([a-z]+)(\d+)/, 2] # "123"
"one, two, three".split(/\s*,\s*/) # ["one","two","three"]: space is optional around comma
phone = "3213 #sd" # Read a phone number
phone.sub!(/#.*$/, "") # Delete Ruby-style comments %% JUST ONE
phone.gsub!(/\D/, "") # Remove anything other than digits %% ALL
text.gsub(/\bruby\b/i, "<b>#{$&}</b>")
text = "RUBY Java perl PyThOn % Isso FiCa CoMo TaH" # Text to modify
lang = /ruby|java|perl|python/i # Pattern to match
text.gsub!(lang) {|l| l.capitalize } # Fix capitalization
p text