From b3ea7fbefa2783dd83882b12ed9f39b46fa55802 Mon Sep 17 00:00:00 2001
From: Paolo Fabio Zaino
Date: Mon, 22 Jan 2024 17:53:27 +0000
Subject: [PATCH] fixed a bug found during integration tests
---
pkg/crawler/crawler.go | 14 ++++++++++++--
pkg/crawler/crawler_test.go | 3 ++-
2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/pkg/crawler/crawler.go b/pkg/crawler/crawler.go
index 09a8a355..f1523742 100644
--- a/pkg/crawler/crawler.go
+++ b/pkg/crawler/crawler.go
@@ -342,10 +342,20 @@ func isExternalLink(sourceURL, linkURL string) bool {
// Takes the substring that correspond to the 1st and 2nd level domain (e.g., google.com)
// regardless the number of subdomains
+ var srcDomainName string
srcFqdnArr := strings.Split(sourceParsed.Hostname(), ".")
- srcDomainName := strings.Join(srcFqdnArr[len(srcFqdnArr)-2:], ".")
+ if len(srcFqdnArr) < 3 {
+ srcDomainName = strings.Join(srcFqdnArr, ".")
+ } else {
+ srcDomainName = strings.Join(srcFqdnArr[len(srcFqdnArr)-2:], ".")
+ }
linkFqdnArr := strings.Split(linkParsed.Hostname(), ".")
- linkDomainName := strings.Join(linkFqdnArr[len(linkFqdnArr)-2:], ".")
+ var linkDomainName string
+ if len(linkFqdnArr) < 3 {
+ linkDomainName = strings.Join(linkFqdnArr, ".")
+ } else {
+ linkDomainName = strings.Join(linkFqdnArr[len(linkFqdnArr)-2:], ".")
+ }
// Compare hostnames
return srcDomainName != linkDomainName
diff --git a/pkg/crawler/crawler_test.go b/pkg/crawler/crawler_test.go
index 4bb17e7a..b6c8e782 100644
--- a/pkg/crawler/crawler_test.go
+++ b/pkg/crawler/crawler_test.go
@@ -41,7 +41,7 @@ func TestExtractLinks(t *testing.T) {
}
}
-func Test_isExternalLink(t *testing.T) {
+func TestIsExternalLink(t *testing.T) {
type args struct {
sourceURL string
linkURL string
@@ -57,6 +57,7 @@ func Test_isExternalLink(t *testing.T) {
{"test3", args{"https://www.google.com", "https://www.google.com/test/test"}, false},
{"test4", args{"https://www.example.com", "https://www.google.com/test/test/test"}, true},
{"test5", args{"https://data.example.com", "https://www.example.com"}, false},
+ {"test6", args{"www.apps.com", "javascript:void(0)"}, false},
}
for _, tt := range tests {