Skip to content

Commit

Permalink
feat: Allow passing headers
Browse files Browse the repository at this point in the history
  • Loading branch information
marcotuna committed Jul 18, 2024
1 parent 7eae728 commit 0cb0cef
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 22 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build
FROM golang:1.20-alpine AS build
FROM golang:1.22-alpine AS build

# Install dependencies
RUN apk update && apk upgrade && apk add --no-cache \
Expand All @@ -12,7 +12,7 @@ COPY . .
RUN make build-linux

# Final container
FROM alpine:3.18
FROM alpine:3.20

WORKDIR /app

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.minimal
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM alpine:3.18
FROM alpine:3.20

RUN apk update && \
apk add --no-cache curl ca-certificates && \
Expand Down
9 changes: 8 additions & 1 deletion cmd/wp-go-static/commands/scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ func init() {
ScrapeCmd.PersistentFlags().Bool("images", true, "Download images")
ScrapeCmd.PersistentFlags().Bool("check-head", true, "Checks head")
// ScrapeCmd.MarkPersistentFlagRequired("url")
// Allow passing additional headers as map[string]string
ScrapeCmd.PersistentFlags().StringToString("headers", map[string]string{}, "Additional headers")

ScrapeCmd.PersistentFlags().VisitAll(func(flag *pflag.Flag) {
bindFlag := fmt.Sprintf("%s.%s", bindFlagScrapePrefix, flag.Name)
Expand Down Expand Up @@ -95,7 +97,7 @@ func scrapeCmdF(command *cobra.Command, args []string) error {
scrape.hostname = parsedURL.Hostname()

// Visit only pages that are part of the website
scrape.c.AllowedDomains = []string{scrape.hostname}
scrape.c.AllowedDomains = []string{parsedURL.Host}

for _, extraPage := range scrape.config.Scrape.ExtraPages {
log.Println("Visiting Extra Page:", extraPage)
Expand Down Expand Up @@ -142,6 +144,11 @@ func scrapeCmdF(command *cobra.Command, args []string) error {

// Before making a request print "Visiting ..."
scrape.c.OnRequest(func(r *colly.Request) {
// Set headers
for headerName, headerValue := range scrape.config.Scrape.Headers {
r.Headers.Set(headerName, headerValue)
}

switch r.Method {
case http.MethodGet:
log.Printf("Visiting: %s\n", r.URL.String())
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module wp-go-static

go 1.20
go 1.22

require (
github.com/gocolly/colly v1.2.0
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,15 @@ github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po=
github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY=
github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
Expand Down Expand Up @@ -123,6 +125,7 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
Expand Down Expand Up @@ -157,9 +160,11 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
Expand All @@ -170,9 +175,11 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sagikazarmark/locafero v0.3.0 h1:zT7VEGWC2DTflmccN/5T1etyKvxSxpHsjb9cJvm4SvQ=
github.com/sagikazarmark/locafero v0.3.0/go.mod h1:w+v7UsPNFwzF1cHuOajOOzoq4U7v/ig1mpRjqV+Bu1U=
Expand Down Expand Up @@ -540,6 +547,7 @@ google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
Expand Down
37 changes: 20 additions & 17 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,30 @@ type Config struct {
}

type SitemapConfig struct {
Dir string `mapstructure:"dir"`
URL string `mapstructure:"url"`
ReplaceURL string `mapstructure:"replace-url"`
File string `mapstructure:"file"`
Dir string `mapstructure:"dir"`
URL string `mapstructure:"url"`
ReplaceURL string `mapstructure:"replace-url"`
File string `mapstructure:"file"`
Headers map[string]string `mapstructure:"headers"`
}

type ScrapeConfig struct {
Dir string `mapstructure:"dir"`
URL string `mapstructure:"url"`
Cache string `mapstructure:"cache"`
ReplaceURL string `mapstructure:"replace-url"`
Replace bool `mapstructure:"replace"`
Parallel bool `mapstructure:"parallel"`
Images bool `mapstructure:"images"`
CheckHead bool `mapstructure:"check-head"`
ExtraPages []string `mapstructure:"extra-pages"`
Dir string `mapstructure:"dir"`
URL string `mapstructure:"url"`
Cache string `mapstructure:"cache"`
ReplaceURL string `mapstructure:"replace-url"`
Replace bool `mapstructure:"replace"`
Parallel bool `mapstructure:"parallel"`
Images bool `mapstructure:"images"`
CheckHead bool `mapstructure:"check-head"`
ExtraPages []string `mapstructure:"extra-pages"`
Headers map[string]string `mapstructure:"headers"`
}

type RobotsConfig struct {
Dir string `mapstructure:"dir"`
URL string `mapstructure:"url"`
ReplaceURL string `mapstructure:"replace-url"`
File string `mapstructure:"file"`
Dir string `mapstructure:"dir"`
URL string `mapstructure:"url"`
ReplaceURL string `mapstructure:"replace-url"`
File string `mapstructure:"file"`
Headers map[string]string `mapstructure:"headers"`
}

0 comments on commit 0cb0cef

Please sign in to comment.