This repository has been archived by the owner on May 20, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
601 lines (487 loc) · 13.3 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
package main
import (
"bufio"
"bytes"
"crypto/md5"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"sort"
"strings"
"github.com/eidolon/wordwrap"
"github.com/houseabsolute/omegasort/internal/sorters"
"golang.org/x/term"
"golang.org/x/text/language"
kingpin "gopkg.in/alecthomas/kingpin.v2"
)
var version = "0.0.6"
type omegasort struct {
opts *opts
app *kingpin.Application
sort sorters.Approach
locale language.Tag
lineEnding []byte
}
type opts struct {
sort string
locale string
unique bool
caseInsensitive bool
reverse bool
windows bool
inPlace bool
toStdout bool
check bool
debug bool
file string
}
var errNotSorted = errors.New("file is not sorted")
func main() {
o, err := new()
if err != nil {
panic(err)
}
if err = o.run(); err != nil {
if err == errNotSorted {
_, err = os.Stderr.WriteString(fmt.Sprintf("The %s file is not sorted\n", o.opts.file))
if err != nil {
panic(err)
}
os.Exit(1)
}
var nuErr notUniqueError
if errors.As(err, &nuErr) {
_, err = os.Stderr.WriteString(fmt.Sprintf("The %s file is not unique: %s\n", o.opts.file, nuErr))
if err != nil {
panic(err)
}
os.Exit(1)
}
_, err = os.Stderr.WriteString(fmt.Sprintf("error when sorting %s: %s\n", o.opts.file, err))
if err != nil {
panic(err)
}
os.Exit(2)
}
os.Exit(0)
}
func new() (*omegasort, error) {
app := kingpin.New("omegasort", "The last text file sorting tool you'll ever need.").
Author("Dave Rolsky <[email protected]>").
Version(version).
UsageWriter(os.Stdout).
UsageTemplate(kingpin.DefaultUsageTemplate + sortDocs())
app.HelpFlag.Short('h')
validSorts := []string{}
for _, as := range sorters.AvailableSorts {
validSorts = append(validSorts, as.Name)
}
// We cannot set .Required for this flag (or any others) because we want
// the --docs flag to work without any other flags needed.
sortType := app.Flag(
"sort",
"The type of sorting to use. See below for options.",
).Short('s').HintOptions(validSorts...).Enum(validSorts...)
locale := app.Flag(
"locale",
"The locale to use for sorting. If this is not specified the sorting is in codepoint order.",
).Short('l').Default("").String()
unique := app.Flag(
"unique",
"Make the file contents unique, or check that they're unique when used with --check.",
).Short('u').Default("false").Bool()
caseInsensitive := app.Flag(
"case-insensitive",
"Sort case-insensitively. Note that many locales always do this so if you specify"+
" a locale you may get case-insensitive output regardless of this flag.").
Short('c').Default("false").Bool()
reverse := app.Flag(
"reverse",
"Sort in reverse order.",
).Short('r').Default("false").Bool()
windows := app.Flag(
"windows",
"Parse paths as Windows paths for path sort.",
).Default("false").Bool()
inPlace := app.Flag(
"in-place",
"Modify the file in place instead of making a backup.",
).Short('i').Default("false").Bool()
toStdout := app.Flag(
"stdout",
"Print the sorted output to stdout instead of making a new file.",
).Default("false").Bool()
check := app.Flag(
"check",
"Check that the file is sorted instead of sorting it. If it is not sorted (or not unique if --unique is given) the exit status will be 1.",
).Default("false").Bool()
debug := app.Flag(
"debug",
"Print out debugging info while running.",
).Default("false").Bool()
docs := app.Flag(
"docs",
"Print out extended sorting documentation.",
).Default("false").Bool()
file := app.Arg(
"file",
"The file to sort.",
).ExistingFile()
appOpts := &opts{}
o := &omegasort{
app: app,
opts: appOpts,
}
_, err := app.Parse(os.Args[1:])
if err != nil {
return o, err
}
if docs != nil && *docs {
printExtendedDocs()
os.Exit(0)
}
appOpts.sort = *sortType
for _, as := range sorters.AvailableSorts {
if as.Name == appOpts.sort {
o.sort = as
break
}
}
appOpts.locale = *locale
appOpts.unique = *unique
appOpts.caseInsensitive = *caseInsensitive
appOpts.reverse = *reverse
appOpts.windows = *windows
appOpts.inPlace = *inPlace
appOpts.toStdout = *toStdout
appOpts.check = *check
appOpts.debug = *debug
appOpts.file = *file
if appOpts.debug {
fmt.Printf("opts = %+v\n", appOpts)
}
err = o.validateArgs()
return o, err
}
func sortDocs() string {
docs := "Sorting Options:\n\n"
width := getWidth()
width -= 4 // length of indent
wrapper := wordwrap.Wrapper(width, false)
for _, as := range sorters.AvailableSorts {
docs += fmt.Sprintf("## %s\n", as.Name)
docs += wordwrap.Indent(wrapper(as.Description), " ", true)
docs += "\n\n"
}
return docs
}
func (o *omegasort) validateArgs() error {
if o.opts.sort == "" {
return errors.New("you must set a --sort method")
}
if o.opts.file == "" {
return errors.New("you must pass a file to sort as the final argument")
}
if o.opts.locale != "" && !o.sort.SupportsLocale {
return fmt.Errorf("you cannot set a locale when sorting by %s", o.sort.Name)
}
if o.opts.toStdout && o.opts.inPlace {
return errors.New("you cannot set both --stdout and --in-place")
}
if o.opts.toStdout && o.opts.check {
return errors.New("you cannot set both --stdout and --check")
}
if o.opts.inPlace && o.opts.check {
return errors.New("you cannot set both --in-place and --check")
}
if o.opts.windows && !o.sort.SupportsPathType {
return fmt.Errorf("you cannot pass the --windows flag when sorting by %s", o.sort.Name)
}
if o.opts.locale != "" {
tag, err := language.Parse(o.opts.locale)
if err != nil {
return fmt.Errorf("could not find a locale matching %s: %s", o.opts.locale, err)
}
o.locale = tag
}
return nil
}
// nolint: lll
var extendedSortDocs = `There are a number of different sorting methods available.
## Text
This sorts each line of the file as text without any special parsing. The exact sorting is determined by the --locale, --case-insensitive, and --reverse flags. See below for details on how locales work.
## Numbered Text
This assumes that each line of the file starts with a numeric value, optionally followed by non-numeric text.
Lines should not have any leading space before the number. The number can either be an integer (including 0) or a simple float (no scientific notation).
The lines will be sorted numerically first. If two lines have the same number they will be sorted by text as above.
Lines without numbers always sort after lines with numbers.
This sorting method accepts the --locale, --case-insensitive, and --reverse flags.
## Path Sort
Each line is treated as a path.
The paths are sorted by the following rules:
* Absolute paths come before relative.
* Paths are sorted by depth before sorting by the path content, so /z comes before /a/a.
* If you pass the --windows flag, then paths with drive letters are sorted based on the drive letter first. Paths with drive letters sort before paths without them.
This sorting method accepts the --locale, --case-insensitive, and --reverse flags in addition to the --windows flag.
## Datetime Sort
This sorting method assumes that each line starts with a date or datetime, without any space in it. That means datetimes need to be in a format like "2019-08-27T19:13:16".
Lines should not have any leading space before the datetime.
This sorting method accepts the --locale, --case-insensitive, and --reverse flags.
## IP Sort
This method assumes that each line is an IPv4 or IPv6 address (not a network).
The sorting method is the same as if each line were the corresponding integer for the address.
This sorting method accepts the --reverse flag.
## Network Sort
This method assumes that each line is an IPv4 or IPv6 network in CIDR notation.
If there are two networks with the same base address they are sorted with the larger network first (so 1.1.1.0/24 comes before 1.1.1.0/28).
This sorting method accepts the --reverse flag.
`
func printExtendedDocs() {
width := getWidth()
wrapper := wordwrap.Wrapper(width, false)
lines := strings.Split(extendedSortDocs, "\n")
for _, l := range lines {
var err error
_, err = os.Stdout.WriteString(wrapper(l) + "\n")
if err != nil {
panic(err)
}
}
}
const maxWidth = 90
func getWidth() int {
width, _, err := term.GetSize(int(os.Stderr.Fd()))
if width > maxWidth {
width = maxWidth
}
if err != nil {
return 80
}
return width
}
const firstChunk = 2048
func (o *omegasort) run() error {
p := sorters.SortParams{
Locale: o.locale,
CaseInsensitive: o.opts.caseInsensitive,
Reverse: o.opts.reverse,
}
if o.opts.windows {
p.PathType = sorters.WindowsPaths
}
lines, err := o.readLines()
if err != nil {
return err
}
sorter, errRef := o.sort.MakeSortFunc(&lines, p)
if o.opts.check {
ok := sort.SliceIsSorted(lines, sorter)
if *errRef != nil {
return *errRef
}
if !ok {
return errNotSorted
}
if o.opts.unique {
return o.checkUnique(lines)
}
return nil
}
origHash, err := o.hashLines(lines)
if err != nil {
return err
}
sort.SliceStable(lines, sorter)
if *errRef != nil {
return *errRef
}
if o.opts.unique {
lines = o.uniquify(lines)
}
newHash, err := o.hashLines(lines)
if err != nil {
return err
}
if origHash != newHash || o.opts.toStdout {
out, err := o.outputFile()
if err != nil {
return err
}
for _, l := range lines {
_, err = out.WriteString(l)
if err != nil {
return err
}
_, err = out.Write(o.lineEnding)
if err != nil {
return err
}
}
if origHash != newHash {
// We need to close this before we remove it on Windows. Might as well do
// it everywhere.
err = out.Close()
if err != nil {
return err
}
if !o.opts.toStdout {
err := o.updateFiles(out.Name())
if err != nil {
return err
}
}
}
}
return nil
}
func (o *omegasort) readLines() ([]string, error) {
err := o.determineLineEnding()
if err != nil {
return nil, err
}
file, err := os.Open(o.opts.file)
if err != nil {
return nil, err
}
scanner := bufio.NewScanner(file)
scanner.Split(o.splitFunc())
lines := []string{}
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
if err = scanner.Err(); err != nil {
return nil, err
}
return lines, nil
}
var crlf = []byte{'\r', 'n'}
var cr = []byte{'\r'}
var nl = []byte{'\n'}
func (o *omegasort) determineLineEnding() error {
file, err := os.Open(o.opts.file)
if err != nil {
return err
}
buf := make([]byte, firstChunk)
_, err = io.ReadAtLeast(file, buf, firstChunk)
if err != nil {
if err == io.EOF {
return fmt.Errorf("could not read any data from %s", o.opts.file)
}
// If we got ErrUnexpectedEOF that just means the file is smaller than
// firstChunk, which is fine.
if err != io.ErrUnexpectedEOF {
return fmt.Errorf("error trying to read data from %s", o.opts.file)
}
}
switch {
case bytes.Contains(buf, crlf):
o.lineEnding = crlf
case bytes.Contains(buf, cr):
o.lineEnding = cr
case bytes.Contains(buf, nl):
o.lineEnding = nl
default:
return fmt.Errorf("could not determine line ending from reading first %d bytes of %s", firstChunk, o.opts.file)
}
return nil
}
func (o *omegasort) splitFunc() bufio.SplitFunc {
return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.Index(data, o.lineEnding); i >= 0 {
return i + len(o.lineEnding), data[0:i], nil
}
if atEOF {
return len(data), data, nil
}
return 0, nil, nil
}
}
type notUniqueError struct {
line int
content string
}
func (nue notUniqueError) Error() string {
return fmt.Sprintf("line %d is a repeat - %s", nue.line, nue.content)
}
func (o *omegasort) checkUnique(lines []string) error {
seen := make(map[string]bool, len(lines))
for i, l := range lines {
if seen[l] {
return notUniqueError{
line: i + 1,
content: l,
}
}
seen[l] = true
}
return nil
}
func (o *omegasort) uniquify(lines []string) []string {
seen := make(map[string]bool, len(lines))
uniq := make([]string, 0, len(lines))
for _, l := range lines {
if seen[l] {
continue
}
uniq = append(uniq, l)
seen[l] = true
}
return uniq
}
func (o *omegasort) hashLines(lines []string) (string, error) {
h := md5.New()
for _, l := range lines {
_, err := h.Write([]byte(l))
if err != nil {
return "", err
}
}
return fmt.Sprintf("%x", h.Sum(nil)), nil
}
func (o *omegasort) outputFile() (*os.File, error) {
if o.opts.toStdout {
return os.Stdout, nil
}
return ioutil.TempFile("", "omegasort")
}
func (o *omegasort) updateFiles(from string) error {
if !o.opts.inPlace {
bak := o.opts.file + ".bak"
err := copy(o.opts.file, bak)
if err != nil {
return fmt.Errorf("error copying %s to %s: %w", o.opts.file, bak, err)
}
}
if err := copy(from, o.opts.file); err != nil {
return fmt.Errorf("error copying %s to %s: %w", from, o.opts.file, err)
}
if err := os.Remove(from); err != nil {
return fmt.Errorf("error deleting %s: %w", from, err)
}
return nil
}
func copy(from, to string) error {
in, err := os.Open(from)
if err != nil {
return fmt.Errorf("error opening %s: %w", from, err)
}
// nolint:errcheck
defer in.Close()
out, err := os.Create(to)
if err != nil {
return fmt.Errorf("error opening %s: %w", to, err)
}
// nolint:errcheck
defer out.Close()
_, err = io.Copy(out, in)
if err != nil {
return err
}
return out.Close()
}