Skip to content

Commit

Permalink
feat: domain names are no longer duplicated. Add support of ABP parsi…
Browse files Browse the repository at this point in the history
…ng. Add goroutines for to parse blocklists faster.
  • Loading branch information
WIttyJudge committed Nov 19, 2024
1 parent f35fac2 commit e22e3f5
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 34 deletions.
134 changes: 100 additions & 34 deletions internal/hostsfile/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"barrier/internal/http"
"fmt"
"strings"
"sync"

"github.com/rs/zerolog/log"
)
Expand All @@ -13,6 +14,7 @@ const localhost = "127.0.0.1"

// Processor is a structure that is responsible for processing blocklists.
type Processor struct {
wg *sync.WaitGroup
config *config.Config
httpClient *http.HTTP
}
Expand All @@ -22,15 +24,15 @@ type Result struct {
startTag string
endTag string
descriptionComment string
parsedBlocklists []ParsedBlocklist
domainsBlocklist map[string]LineContent
}

// ParsedBlocklist represents a completed result of blocklist
// BlocklistResult represents a parsed result of blocklist
// that is ready to be appended into hosts file.
type ParsedBlocklist struct {
type BlocklistResult struct {
DomainsCount int

linesContent []LineContent
linesContent map[string]LineContent
}

type LineContent struct {
Expand All @@ -43,6 +45,7 @@ func NewProcessor(config *config.Config) *Processor {
httpClient := http.New()

return &Processor{
wg: &sync.WaitGroup{},
config: config,
httpClient: httpClient,
}
Expand All @@ -51,91 +54,154 @@ func NewProcessor(config *config.Config) *Processor {
// Process processes blocklists and returns a finished result
// that is ready to save to hosts file.
func (p *Processor) Process() (Result, error) {
parsedBlocklists := make([]ParsedBlocklist, 0, len(p.config.Blocklists))
blocklistsResult := make([]BlocklistResult, len(p.config.Blocklists))
totalDomainsCount := 0

for _, blocklist := range p.config.Blocklists {
for i, blocklist := range p.config.Blocklists {
i := i
target := blocklist.Target

log.Info().Str("target", target).Msg("processing blocklist..")
p.wg.Add(1)

fileContent, err := p.httpClient.Get(target)
if err != nil {
log.Error().Err(err).Str("target", target).Msg("failed to process blocklist")
continue
}
go func() {
defer p.wg.Done()

parsedBlocklist := p.processBlocklist(fileContent)
log.Info().Str("target", target).Msgf("number of %d domains parsed", parsedBlocklist.DomainsCount)
blocklistResult, err := p.processBlocklist(target)
if err != nil {
log.Error().Err(err).Str("target", target).Msg("failed to process blocklist")
return
}

parsedBlocklists = append(parsedBlocklists, parsedBlocklist)
blocklistsResult[i] = blocklistResult
totalDomainsCount += blocklistResult.DomainsCount
}()
}

p.wg.Wait()

domainsBlocklist := p.domainsBlocklist(blocklistsResult)

result := Result{
startTag: StartTag,
endTag: EndTag,
descriptionComment: DescriptionComment,
parsedBlocklists: parsedBlocklists,
domainsBlocklist: domainsBlocklist,
}

log.Info().Msgf("total number of uniq domains: %d", len(domainsBlocklist))

return result, nil
}

func (p *Processor) processBlocklist(content string) ParsedBlocklist {
lines := strings.Split(content, "\n")
func (p *Processor) processBlocklist(target string) (BlocklistResult, error) {
log.Info().Str("target", target).Msg("processing blocklist..")

fileContent, err := p.httpClient.Get(target)
if err != nil {
return BlocklistResult{}, err
}

linesContent := make([]LineContent, 0, len(lines))
linesContent := p.processContent(fileContent)

blocklistResult := BlocklistResult{
DomainsCount: len(linesContent),
linesContent: linesContent,
}

log.Info().Str("target", target).Msgf("number of domains: %d", blocklistResult.DomainsCount)

return blocklistResult, nil
}

func (p *Processor) processContent(content string) map[string]LineContent {
lines := strings.Split(content, "\n")
linesContent := make(map[string]LineContent)

for _, line := range lines {
// remove empty spaces
line := strings.TrimSpace(line)

// skip empty lines and comments
if line == "" || p.isLineComment(line) {
// skip empty lines, comments, ABP comments and ABP headers
if line == "" || p.isLineComment(line) || p.isABPComment(line) || p.isABPHeader(line) {
continue
}

// remove a comment in the middle of line
line = p.removeInLineComment(line)

lineContent := LineContent{
ipAddress: localhost,
// Convert all characters to lowercase
line = strings.ToLower(line)

if p.isABPDomain(line) {
line = p.parseABPDomain(line)
}

var domainName string
parts := strings.Fields(line)
if len(parts) == 1 {
lineContent.domainName = parts[0]
domainName = parts[0]
} else {
lineContent.domainName = parts[1]
domainName = parts[1]
}

linesContent = append(linesContent, lineContent)
lineContent := LineContent{
ipAddress: localhost,
domainName: domainName,
}

linesContent[domainName] = lineContent
}

parsedBlocklist := ParsedBlocklist{
linesContent: linesContent,
DomainsCount: len(linesContent),
return linesContent
}

func (p *Processor) domainsBlocklist(blocklistsResult []BlocklistResult) map[string]LineContent {
domainsBlocklist := make(map[string]LineContent)

for _, result := range blocklistsResult {
for _, line := range result.linesContent {
domainsBlocklist[line.domainName] = line
}
}

return parsedBlocklist
return domainsBlocklist
}

func (p *Processor) isLineComment(line string) bool {
return strings.HasPrefix(line, "#")
}

// Example: https://raw.githubusercontent.com/hagezi/dns-blocklists/main/adblock/light.txt
func (p *Processor) isABPComment(line string) bool {
return strings.HasPrefix(line, "!")
}

func (p *Processor) isABPHeader(line string) bool {
return strings.HasPrefix(line, "[")
}

func (p *Processor) removeInLineComment(line string) string {
return strings.Split(line, "#")[0]
}

func (p *Processor) isABPDomain(line string) bool {
return strings.HasPrefix(line, "||") && strings.HasSuffix(line, "^")
}

func (p *Processor) parseABPDomain(line string) string {
line = strings.TrimPrefix(line, "||")
line = strings.TrimSuffix(line, "^")
return line
}

func (r Result) FormatToHostsfile() string {
var builder strings.Builder

builder.WriteString(r.startTag)
builder.WriteString(r.descriptionComment)

for _, parsedBlocklist := range r.parsedBlocklists {
for _, lineContent := range parsedBlocklist.linesContent {
builder.WriteString(lineContent.Format())
}
for _, domain := range r.domainsBlocklist {
builder.WriteString(domain.Format())
}

builder.WriteString(r.endTag)
Expand Down
5 changes: 5 additions & 0 deletions internal/hostsfile/processor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package hostsfile

import "testing"

func TestIsLineComment(t *testing.T) {}

0 comments on commit e22e3f5

Please sign in to comment.