diff --git a/pkg/crawler/crawler.go b/pkg/crawler/crawler.go index 09a8a355..f1523742 100644 --- a/pkg/crawler/crawler.go +++ b/pkg/crawler/crawler.go @@ -342,10 +342,20 @@ func isExternalLink(sourceURL, linkURL string) bool { // Takes the substring that correspond to the 1st and 2nd level domain (e.g., google.com) // regardless the number of subdomains + var srcDomainName string srcFqdnArr := strings.Split(sourceParsed.Hostname(), ".") - srcDomainName := strings.Join(srcFqdnArr[len(srcFqdnArr)-2:], ".") + if len(srcFqdnArr) < 3 { + srcDomainName = strings.Join(srcFqdnArr, ".") + } else { + srcDomainName = strings.Join(srcFqdnArr[len(srcFqdnArr)-2:], ".") + } linkFqdnArr := strings.Split(linkParsed.Hostname(), ".") - linkDomainName := strings.Join(linkFqdnArr[len(linkFqdnArr)-2:], ".") + var linkDomainName string + if len(linkFqdnArr) < 3 { + linkDomainName = strings.Join(linkFqdnArr, ".") + } else { + linkDomainName = strings.Join(linkFqdnArr[len(linkFqdnArr)-2:], ".") + } // Compare hostnames return srcDomainName != linkDomainName diff --git a/pkg/crawler/crawler_test.go b/pkg/crawler/crawler_test.go index 4bb17e7a..b6c8e782 100644 --- a/pkg/crawler/crawler_test.go +++ b/pkg/crawler/crawler_test.go @@ -41,7 +41,7 @@ func TestExtractLinks(t *testing.T) { } } -func Test_isExternalLink(t *testing.T) { +func TestIsExternalLink(t *testing.T) { type args struct { sourceURL string linkURL string @@ -57,6 +57,7 @@ func Test_isExternalLink(t *testing.T) { {"test3", args{"https://www.google.com", "https://www.google.com/test/test"}, false}, {"test4", args{"https://www.example.com", "https://www.google.com/test/test/test"}, true}, {"test5", args{"https://data.example.com", "https://www.example.com"}, false}, + {"test6", args{"www.apps.com", "javascript:void(0)"}, false}, } for _, tt := range tests {