From aa2c8bdf94e2a9ab94d7d393de932d53701698dc Mon Sep 17 00:00:00 2001 From: Rahul Zhade Date: Thu, 8 Sep 2022 11:02:34 -0400 Subject: [PATCH 1/4] Refactor matching logic --- main.go | 88 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/main.go b/main.go index 3a832f9..58d6e4d 100644 --- a/main.go +++ b/main.go @@ -212,54 +212,60 @@ func searchFile(fileLocation string, keyword string, outfile string) { panic(err) } defer f.Close() + + var matcher func([]byte) bool if strings.HasPrefix(keyword, "regex") { - regexValue := strings.Split(keyword, " ")[1] - r, err := regexp.Compile(regexValue) - if err != nil { - color.Red("Invalid Regex!") - return - } - for scanner.Scan() { - if r.MatchString(scanner.Text()) { - textToWrite := strings.Split(scanner.Text(), "|")[1] - if _, err := f.WriteString(textToWrite + "\n"); err != nil { - panic(err) - } - } - } + matcher, err = regexMatch(keyword) + } else if strings.Contains(keyword, ",") { + matcher, err = multiKeywordMatcher(keyword) } else { - if strings.Contains(keyword, ",") { - keywords := strings.Split(keyword, ",") - for scanner.Scan() { - foundFlag := true - for i := 0; i < len(keywords); i++ { - if bytes.Contains(scanner.Bytes(), []byte(keywords[i])) { - continue - } else { - foundFlag = false - } - } - if foundFlag { - textToWrite := strings.Split(scanner.Text(), "|")[1] - if _, err := f.WriteString(textToWrite + "\n"); err != nil { - panic(err) - } - } - } + matcher, err = stringMatch(keyword) + } + if err != nil { + color.Red(err.Error()) + return + } - } else { - toFind := []byte(keyword) - for scanner.Scan() { - if bytes.Contains(scanner.Bytes(), toFind) { - textToWrite := strings.Split(scanner.Text(), "|")[1] - if _, err := f.WriteString(textToWrite + "\n"); err != nil { - panic(err) - } - } + for scanner.Scan() { + if matcher(scanner.Bytes()) { + textToWrite := strings.Split(scanner.Text(), "|")[1] + if _, err := f.WriteString(textToWrite + "\n"); err != nil { + panic(err) } } } +} + +func regexMatch(keyword string) (func([]byte) bool, error) { + regexValue := strings.Split(keyword, " ")[1] + r, err := regexp.Compile(regexValue) + return func(b []byte) bool { + s := string(b) + return r.MatchString(s) + }, err +} + +func multiKeywordMatcher(keyword string) (func([]byte) bool, error) { + keywords := strings.Split(keyword, ",") + bytes_keywords := make([][]byte, len(keywords)) + for i, k := range keywords { + bytes_keywords[i] = []byte(k) + } + return func(text []byte) bool { + for _, k := range bytes_keywords { + if !bytes.Contains(text, k) { + return false + } + } + return true + }, nil +} +func stringMatch(keyword string) (func([]byte) bool, error) { + bytes_keyword := []byte(keyword) + return func(b []byte) bool { + return bytes.Contains(b, bytes_keyword) + }, nil } func ifArchiveExists(fullname string) bool { From d6c69ba7a9bd1134294e3f00c7326cc3132951fe Mon Sep 17 00:00:00 2001 From: Rahul Zhade Date: Thu, 8 Sep 2022 11:11:02 -0400 Subject: [PATCH 2/4] Also log short URL --- go.mod | 1 + go.sum | 6 +++++- main.go | 14 ++++++++++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 3e5f6a4..19ff896 100644 --- a/go.mod +++ b/go.mod @@ -4,5 +4,6 @@ go 1.15 require ( github.com/fatih/color v1.10.0 + github.com/rzhade3/beaconspec v0.0.0-20220908152424-9ee117a49aef github.com/schollz/progressbar/v3 v3.7.1 ) diff --git a/go.sum b/go.sum index 62e32a8..292651a 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,5 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg= github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= @@ -11,11 +12,14 @@ github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/Qd github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/schollz/progressbar v1.0.0 h1:gbyFReLHDkZo8mxy/dLWMr+Mpb1MokGJ1FqCiqacjZM= +github.com/rzhade3/beaconspec v0.0.0-20220908152424-9ee117a49aef h1:pUdPMuB/3D1DhN+z+aKJH2HFe65tO14vYUkshaSL29g= +github.com/rzhade3/beaconspec v0.0.0-20220908152424-9ee117a49aef/go.mod h1:iTcJ+0KrnJXKBZvYH/Q6GKLhFuiXzD3z2PRae7xWqpY= github.com/schollz/progressbar/v3 v3.7.1 h1:aQR/t6d+1nURSdoMn6c7n0vJi5xQ3KndpF0n7R5wrik= github.com/schollz/progressbar/v3 v3.7.1/go.mod h1:CG/f0JmacksUc6TkZToO7tVq4t03zIQSQUtTd7F9GR4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9 h1:umElSU9WZirRdgu2yFHY0ayQkEnKiOC1TtM3fWXFnoU= diff --git a/main.go b/main.go index 58d6e4d..a015334 100644 --- a/main.go +++ b/main.go @@ -19,6 +19,7 @@ import ( "time" "github.com/fatih/color" + "github.com/rzhade3/beaconspec" "github.com/schollz/progressbar/v3" ) @@ -212,6 +213,10 @@ func searchFile(fileLocation string, keyword string, outfile string) { panic(err) } defer f.Close() + metadata, err := beaconspec.ReadMetadata(fileLocation) + if err != nil { + panic(err) + } var matcher func([]byte) bool if strings.HasPrefix(keyword, "regex") { @@ -228,8 +233,13 @@ func searchFile(fileLocation string, keyword string, outfile string) { for scanner.Scan() { if matcher(scanner.Bytes()) { - textToWrite := strings.Split(scanner.Text(), "|")[1] - if _, err := f.WriteString(textToWrite + "\n"); err != nil { + + line, err := beaconspec.ParseLine(scanner.Text(), &metadata) + if err != nil { + panic(err) + } + textToWrite := fmt.Sprintf("%s,%s\n", line.Source, line.Target) + if _, err := f.WriteString(textToWrite); err != nil { panic(err) } } From 1465b824a01f8c8a5bf6f999bf27a40dad5b85b9 Mon Sep 17 00:00:00 2001 From: Rahul Zhade Date: Thu, 8 Sep 2022 16:28:12 -0400 Subject: [PATCH 3/4] Update parseLine call --- go.mod | 2 +- go.sum | 4 ++-- main.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index 19ff896..10c7a2a 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,6 @@ go 1.15 require ( github.com/fatih/color v1.10.0 - github.com/rzhade3/beaconspec v0.0.0-20220908152424-9ee117a49aef + github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3 github.com/schollz/progressbar/v3 v3.7.1 ) diff --git a/go.sum b/go.sum index 292651a..3c370b6 100644 --- a/go.sum +++ b/go.sum @@ -14,8 +14,8 @@ github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2Em github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rzhade3/beaconspec v0.0.0-20220908152424-9ee117a49aef h1:pUdPMuB/3D1DhN+z+aKJH2HFe65tO14vYUkshaSL29g= -github.com/rzhade3/beaconspec v0.0.0-20220908152424-9ee117a49aef/go.mod h1:iTcJ+0KrnJXKBZvYH/Q6GKLhFuiXzD3z2PRae7xWqpY= +github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3 h1:2YkbhM98YoshI0K0BD95IoCFx+KNN1L/G0P5WzY2kac= +github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3/go.mod h1:iTcJ+0KrnJXKBZvYH/Q6GKLhFuiXzD3z2PRae7xWqpY= github.com/schollz/progressbar/v3 v3.7.1 h1:aQR/t6d+1nURSdoMn6c7n0vJi5xQ3KndpF0n7R5wrik= github.com/schollz/progressbar/v3 v3.7.1/go.mod h1:CG/f0JmacksUc6TkZToO7tVq4t03zIQSQUtTd7F9GR4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= diff --git a/main.go b/main.go index a015334..4d558d0 100644 --- a/main.go +++ b/main.go @@ -234,7 +234,7 @@ func searchFile(fileLocation string, keyword string, outfile string) { for scanner.Scan() { if matcher(scanner.Bytes()) { - line, err := beaconspec.ParseLine(scanner.Text(), &metadata) + line, err := beaconspec.ParseLine(scanner.Text(), metadata) if err != nil { panic(err) } From d2afdc36af3d6afbb9466758ca722b951ab7065f Mon Sep 17 00:00:00 2001 From: Rahul Zhade Date: Fri, 23 Sep 2022 10:28:08 -0700 Subject: [PATCH 4/4] Other supplementary improvements --- README.md | 2 +- main.go | 16 ---------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/README.md b/README.md index f37cc46..4d7b585 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ You can download the pre-built binaries from the [releases](https://github.com/u urlhunter requires 3 parameters to run: `-keywords`, `-date` and `-o`. -For example: `urlhunter -keywords keywords.txt -date 2020-11-20 -o out.txt` +For example: `urlhunter --keywords keywords.txt --date 2020-11-20 --o out.txt` ### --keywords diff --git a/main.go b/main.go index 0a8a0d5..ac11892 100644 --- a/main.go +++ b/main.go @@ -63,7 +63,6 @@ var err error var archivesPath string func main() { - var keywordFile string var dateParam string var outFile string @@ -227,7 +226,6 @@ func getArchive(body []byte, date string, keywordFile string, outfile string) { } func searchFile(fileLocation string, keyword string, outfile string) { - var path string if strings.HasPrefix(fileLocation, "archives") { @@ -384,20 +382,6 @@ func downloadFile(url string) { color.Green("Download Finished!") } -func ByteCountSI(b int64) string { - const unit = 1000 - if b < unit { - return fmt.Sprintf("%d B", b) - } - div, exp := int64(unit), 0 - for n := b / unit; n >= unit; n /= unit { - div *= unit - exp++ - } - return fmt.Sprintf("%.1f %cB", - float64(b)/float64(div), "kMGTPE"[exp]) -} - func Unzip(src string, dest string) ([]string, error) { var filenames []string r, err := zip.OpenReader(src)