forked from antlr4-go/antlr
-
Notifications
You must be signed in to change notification settings - Fork 1
/
statistics.go
280 lines (245 loc) · 9.82 KB
/
statistics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
//go:build antlr.stats
package antlr
import (
"fmt"
"log"
"os"
"path/filepath"
"sort"
"strconv"
)
// This file allows the user to collect statistics about the runtime of the ANTLR runtime. It is not enabled by default
// and so incurs no time penalty. To enable it, you must build the runtime with the antlr.stats build tag.
//
// Tells various components to collect statistics - because it is only true when this file is included, it will
// allow the compiler to completely eliminate all the code that is only used when collecting statistics.
const collectStats = true
// goRunStats is a collection of all the various data the ANTLR runtime has collected about a particular run.
// It is exported so that it can be used by others to look for things that are not already looked for in the
// runtime statistics.
type goRunStats struct {
// jStats is a slice of all the [JStatRec] records that have been created, which is one for EVERY collection created
// during a run. It is exported so that it can be used by others to look for things that are not already looked for
// within this package.
//
jStats []*JStatRec
jStatsLock RWMutex
topN int
topNByMax []*JStatRec
topNByUsed []*JStatRec
unusedCollections map[CollectionSource]int
counts map[CollectionSource]int
}
const (
collectionsFile = "collections"
)
var (
Statistics = &goRunStats{
topN: 10,
}
)
type statsOption func(*goRunStats) error
// Configure allows the statistics system to be configured as the user wants and override the defaults
func (s *goRunStats) Configure(options ...statsOption) error {
for _, option := range options {
err := option(s)
if err != nil {
return err
}
}
return nil
}
// WithTopN sets the number of things to list in the report when we are concerned with the top N things.
//
// For example, if you want to see the top 20 collections by size, you can do:
//
// antlr.Statistics.Configure(antlr.WithTopN(20))
func WithTopN(topN int) statsOption {
return func(s *goRunStats) error {
s.topN = topN
return nil
}
}
// Analyze looks through all the statistical records and computes all the outputs that might be useful to the user.
//
// The function gathers and analyzes a number of statistics about any particular run of
// an ANTLR generated recognizer. In the vast majority of cases, the statistics are only
// useful to maintainers of ANTLR itself, but they can be useful to users as well. They may be
// especially useful in tracking down bugs or performance problems when an ANTLR user could
// supply the output from this package, but cannot supply the grammar file(s) they are using, even
// privately to the maintainers.
//
// The statistics are gathered by the runtime itself, and are not gathered by the parser or lexer, but the user
// must call this function their selves to analyze the statistics. This is because none of the infrastructure is
// extant unless the calling program is built with the antlr.stats tag like so:
//
// go build -tags antlr.stats .
//
// When a program is built with the antlr.stats tag, the Statistics object is created and available outside
// the package. The user can then call the [Statistics.Analyze] function to analyze the statistics and then call the
// [Statistics.Report] function to report the statistics.
//
// Please forward any questions about this package to the ANTLR discussion groups on GitHub or send to them to
// me [Jim Idle] directly at [email protected]
//
// [Jim Idle]: https:://github.com/jim-idle
func (s *goRunStats) Analyze() {
// Look for anything that looks strange and record it in our local maps etc for the report to present it
//
s.CollectionAnomalies()
s.TopNCollections()
}
// TopNCollections looks through all the statistical records and gathers the top ten collections by size.
func (s *goRunStats) TopNCollections() {
// Let's sort the stat records by MaxSize
//
sort.Slice(s.jStats, func(i, j int) bool {
return s.jStats[i].MaxSize > s.jStats[j].MaxSize
})
for i := 0; i < len(s.jStats) && i < s.topN; i++ {
s.topNByMax = append(s.topNByMax, s.jStats[i])
}
// Sort by the number of times used
//
sort.Slice(s.jStats, func(i, j int) bool {
return s.jStats[i].Gets+s.jStats[i].Puts > s.jStats[j].Gets+s.jStats[j].Puts
})
for i := 0; i < len(s.jStats) && i < s.topN; i++ {
s.topNByUsed = append(s.topNByUsed, s.jStats[i])
}
}
// Report dumps a markdown formatted report of all the statistics collected during a run to the given dir output
// path, which should represent a directory. Generated files will be prefixed with the given prefix and will be
// given a type name such as `anomalies` and a time stamp such as `2021-09-01T12:34:56` and a .md suffix.
func (s *goRunStats) Report(dir string, prefix string) error {
isDir, err := isDirectory(dir)
switch {
case err != nil:
return err
case !isDir:
return fmt.Errorf("output directory `%s` is not a directory", dir)
}
s.reportCollections(dir, prefix)
// Clean out any old data in case the user forgets
//
s.Reset()
return nil
}
func (s *goRunStats) Reset() {
s.jStats = nil
s.topNByUsed = nil
s.topNByMax = nil
}
func (s *goRunStats) reportCollections(dir, prefix string) {
cname := filepath.Join(dir, ".asciidoctor")
// If the file doesn't exist, create it, or append to the file
f, err := os.OpenFile(cname, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Fatal(err)
}
_, _ = f.WriteString(`// .asciidoctorconfig
++++
<style>
body {
font-family: "Quicksand", "Montserrat", "Helvetica";
background-color: black;
}
</style>
++++`)
_ = f.Close()
fname := filepath.Join(dir, prefix+"_"+"_"+collectionsFile+"_"+".adoc")
// If the file doesn't exist, create it, or append to the file
f, err = os.OpenFile(fname, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Fatal(err)
}
defer func(f *os.File) {
err := f.Close()
if err != nil {
log.Fatal(err)
}
}(f)
_, _ = f.WriteString("= Collections for " + prefix + "\n\n")
_, _ = f.WriteString("== Summary\n")
if s.unusedCollections != nil {
_, _ = f.WriteString("=== Unused Collections\n")
_, _ = f.WriteString("Unused collections incur a penalty for allocation that makes them a candidate for either\n")
_, _ = f.WriteString(" removal or optimization. If you are using a collection that is not used, you should\n")
_, _ = f.WriteString(" consider removing it. If you are using a collection that is used, but not very often,\n")
_, _ = f.WriteString(" you should consider using lazy initialization to defer the allocation until it is\n")
_, _ = f.WriteString(" actually needed.\n\n")
_, _ = f.WriteString("\n.Unused collections\n")
_, _ = f.WriteString(`[cols="<3,>1"]` + "\n\n")
_, _ = f.WriteString("|===\n")
_, _ = f.WriteString("| Type | Count\n")
for k, v := range s.unusedCollections {
_, _ = f.WriteString("| " + CollectionDescriptors[k].SybolicName + " | " + strconv.Itoa(v) + "\n")
}
f.WriteString("|===\n\n")
}
_, _ = f.WriteString("\n.Summary of Collections\n")
_, _ = f.WriteString(`[cols="<3,>1"]` + "\n\n")
_, _ = f.WriteString("|===\n")
_, _ = f.WriteString("| Type | Count\n")
for k, v := range s.counts {
_, _ = f.WriteString("| " + CollectionDescriptors[k].SybolicName + " | " + strconv.Itoa(v) + "\n")
}
_, _ = f.WriteString("| Total | " + strconv.Itoa(len(s.jStats)) + "\n")
_, _ = f.WriteString("|===\n\n")
_, _ = f.WriteString("\n.Summary of Top " + strconv.Itoa(s.topN) + " Collections by MaxSize\n")
_, _ = f.WriteString(`[cols="<1,<3,>1,>1,>1,>1"]` + "\n\n")
_, _ = f.WriteString("|===\n")
_, _ = f.WriteString("| Source | Description | MaxSize | EndSize | Puts | Gets\n")
for _, c := range s.topNByMax {
_, _ = f.WriteString("| " + CollectionDescriptors[c.Source].SybolicName + "\n")
_, _ = f.WriteString("| " + c.Description + "\n")
_, _ = f.WriteString("| " + strconv.Itoa(c.MaxSize) + "\n")
_, _ = f.WriteString("| " + strconv.Itoa(c.CurSize) + "\n")
_, _ = f.WriteString("| " + strconv.Itoa(c.Puts) + "\n")
_, _ = f.WriteString("| " + strconv.Itoa(c.Gets) + "\n")
_, _ = f.WriteString("\n")
}
_, _ = f.WriteString("|===\n\n")
_, _ = f.WriteString("\n.Summary of Top " + strconv.Itoa(s.topN) + " Collections by Access\n")
_, _ = f.WriteString(`[cols="<1,<3,>1,>1,>1,>1,>1"]` + "\n\n")
_, _ = f.WriteString("|===\n")
_, _ = f.WriteString("| Source | Description | MaxSize | EndSize | Puts | Gets | P+G\n")
for _, c := range s.topNByUsed {
_, _ = f.WriteString("| " + CollectionDescriptors[c.Source].SybolicName + "\n")
_, _ = f.WriteString("| " + c.Description + "\n")
_, _ = f.WriteString("| " + strconv.Itoa(c.MaxSize) + "\n")
_, _ = f.WriteString("| " + strconv.Itoa(c.CurSize) + "\n")
_, _ = f.WriteString("| " + strconv.Itoa(c.Puts) + "\n")
_, _ = f.WriteString("| " + strconv.Itoa(c.Gets) + "\n")
_, _ = f.WriteString("| " + strconv.Itoa(c.Gets+c.Puts) + "\n")
_, _ = f.WriteString("\n")
}
_, _ = f.WriteString("|===\n\n")
}
// AddJStatRec adds a [JStatRec] record to the [goRunStats] collection when build runtimeConfig antlr.stats is enabled.
func (s *goRunStats) AddJStatRec(rec *JStatRec) {
s.jStatsLock.Lock()
defer s.jStatsLock.Unlock()
s.jStats = append(s.jStats, rec)
}
// CollectionAnomalies looks through all the statistical records and gathers any anomalies that have been found.
func (s *goRunStats) CollectionAnomalies() {
s.jStatsLock.RLock()
defer s.jStatsLock.RUnlock()
s.counts = make(map[CollectionSource]int, len(s.jStats))
for _, c := range s.jStats {
// Accumlate raw counts
//
s.counts[c.Source]++
// Look for allocated but unused collections and count them
if c.MaxSize == 0 && c.Puts == 0 {
if s.unusedCollections == nil {
s.unusedCollections = make(map[CollectionSource]int)
}
s.unusedCollections[c.Source]++
}
if c.MaxSize > 6000 {
fmt.Println("Collection ", c.Description, "accumulated a max size of ", c.MaxSize, " - this is probably too large and indicates a poorly formed grammar")
}
}
}