-
Notifications
You must be signed in to change notification settings - Fork 17
/
restructure.go
229 lines (193 loc) · 5.85 KB
/
restructure.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
package restructure
import (
"fmt"
"reflect"
"regexp/syntax"
"github.com/alexflint/go-restructure/regex"
)
// Style determines whether we are in Perl or POSIX or custom mode
type Style int
const (
Perl Style = iota
POSIX
CustomStyle
)
// Options represents optional parameters for compilation
type Options struct {
Style Style // Style can be set to Perl, POSIX, or CustomStyle
SyntaxFlags syntax.Flags
}
type subcapture struct {
begin, end int
}
func (r subcapture) wasMatched() bool {
return r.begin != -1 && r.end != -1
}
type match struct {
input []byte
captures []subcapture
}
func matchFromIndices(indices []int, input []byte) *match {
match := &match{
input: input,
}
for i := 0; i < len(indices); i += 2 {
match.captures = append(match.captures, subcapture{indices[i], indices[i+1]})
}
return match
}
// Pos represents a position within a matched region. If a matched struct contains
// a field of type Pos then this field will be assigned a value indicating a position
// in the input string, where the position corresponds to the index of the Pos field.
type Pos int
// Submatch represents a matched region. It is a used to determine the begin and and
// position of the match corresponding to a field. This library treats fields of type
// `Submatch` just like `string` or `[]byte` fields, except that the matched string
// is inserted into `Submatch.Str` and its begin and end position are inserted into
// `Submatch.Begin` and `Submatch.End`.
type Submatch struct {
Begin Pos
End Pos
Bytes []byte
}
// String gets the matched substring
func (r *Submatch) String() string {
return string(r.Bytes)
}
// Regexp is a regular expression that captures submatches into struct fields.
type Regexp struct {
st *Struct
re *regex.Regexp
t reflect.Type
opts Options
}
// Find attempts to match the regular expression against the input string. It
// returns true if there was a match, and also populates the fields of the provided
// struct with the contents of each submatch.
func (r *Regexp) Find(dest interface{}, s string) bool {
v := reflect.ValueOf(dest)
input := []byte(s)
// Check the type
expected := reflect.PtrTo(r.t)
if v.Type() != expected {
panic(fmt.Errorf("expected destination to be *%s but got %T", r.t.String(), dest))
}
// Execute the regular expression
indices := r.re.FindSubmatchIndex(input)
if indices == nil {
return false
}
// Inflate matches into original struct
match := matchFromIndices(indices, input)
err := inflateStruct(v, match, r.st)
if err != nil {
panic(err)
}
return true
}
// FindAll attempts to match the regular expression against the input string. It returns true
// if there was at least one match.
func (r *Regexp) FindAll(dest interface{}, s string, limit int) {
// Check the type
v := reflect.ValueOf(dest)
t := v.Type()
if t.Kind() != reflect.Ptr {
panic(fmt.Errorf("parameter to FindAll should be a pointer to a slice but got %T", dest))
}
sliceType := t.Elem()
if sliceType.Kind() != reflect.Slice {
panic(fmt.Errorf("parameter to FindAll should be a pointer to a slice but got %T", dest))
}
itemType := sliceType.Elem()
if itemType != r.t && itemType != reflect.PtrTo(r.t) {
panic(fmt.Errorf("expected the slice element to be %s or *%s but it was %s", r.t, r.t, t))
}
// Execute the regular expression
input := []byte(s)
matches := r.re.FindAllSubmatchIndex(input, limit)
// Allocate a slice with the desired length
v.Elem().Set(reflect.MakeSlice(sliceType, len(matches), len(matches)))
// Inflate the matches into the slice elements
for i, indices := range matches {
// Get the i-th element of the slice
destItem := v.Elem().Index(i)
if itemType.Kind() != reflect.Ptr {
destItem = destItem.Addr()
}
// Create the match object
match := matchFromIndices(indices, input)
// Inflate the match into the dest item
err := inflateStruct(destItem, match, r.st)
if err != nil {
panic(err)
}
}
}
// String returns a string representation of the regular expression
func (r *Regexp) String() string {
return r.re.String()
}
// Compile constructs a regular expression from the struct fields on the
// provided struct.
func Compile(proto interface{}, opts Options) (*Regexp, error) {
return CompileType(reflect.TypeOf(proto), opts)
}
// CompileType is like Compile but takes a reflect.Type instead.
func CompileType(t reflect.Type, opts Options) (*Regexp, error) {
// We do this so that the zero value for Options gives us Perl mode,
// which is also the default used by the standard library regexp package
switch opts.Style {
case Perl:
opts.SyntaxFlags = syntax.Perl
case POSIX:
opts.SyntaxFlags = syntax.POSIX
}
if t.Kind() == reflect.Ptr {
t = t.Elem()
}
// Traverse the struct
b := newBuilder(opts)
st, expr, err := b.structure(t)
if err != nil {
return nil, err
}
// Compile regular expression
re, err := regex.CompileSyntax(expr)
if err != nil {
return nil, err
}
// Return
return &Regexp{
st: st,
re: re,
t: t,
opts: opts,
}, nil
}
// MustCompile is like Compile but panics if there is a compilation error
func MustCompile(proto interface{}, opts Options) *Regexp {
re, err := Compile(proto, opts)
if err != nil {
panic(err)
}
return re
}
// MustCompileType is like CompileType but panics if there is a compilation error
func MustCompileType(t reflect.Type, opts Options) *Regexp {
re, err := CompileType(t, opts)
if err != nil {
panic(err)
}
return re
}
// Find constructs a regular expression from the given struct and executes it on the
// given string, placing submatches into the fields of the struct. The first parameter
// must be a non-nil struct pointer. It returns true if the match succeeded. The only
// errors that are returned are compilation errors.
func Find(dest interface{}, s string) (bool, error) {
re, err := Compile(dest, Options{})
if err != nil {
return false, err
}
return re.Find(dest, s), nil
}