Skip to content

Commit

Permalink
Refactoring HTTP client for better consistency and maintainability (#401
Browse files Browse the repository at this point in the history
)

* Refactor http client

* Sets rivet client

* Alway specify client timeout

* Remove client timeout

* Use proxy instead

* Use proxy address from configuration

* Do not change default client

* Fix linter

* Remove rivet client

* Move client to ingress module

* Correct package name

* Allows to access proxy.Dialer

* Return when NewUTLSRoundTripper got error

* Support for more socks specification scheme

* Timeout for defaut client

* Format error

* Minor fix

* Bump depencency

* Upgrade dependencies

* Run tests on Go 1.21 and 1.22

* Fix data race

* Assign http transport

* testing: do not check context

* ci: allow packages read and statuses write permission for linter workflow

* Perform integration testing with parallel=1

* update permission for linter workflow

* Perform testing with `CHROMEDP_NO_SANDBOX` environment

* update permission for linter workflow

* Specify the waiting time for reading the WebSocket URL

* ci: ignore install.sh for super linter workflow

* Place proxy for `wabarc/screenshot`
  • Loading branch information
waybackarchiver authored Feb 16, 2024
1 parent da77ae3 commit ceafbdf
Show file tree
Hide file tree
Showing 21 changed files with 390 additions and 555 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ jobs:
super-linter:
name: Super Linter
uses: wabarc/.github/.github/workflows/reusable-super-linter.yml@main
with:
filter-regex-exclude: 'install.sh'
permissions:
contents: read
packages: read
statuses: write

golangci:
name: golangci-lint
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
fail-fast: false
matrix:
os: [ ubuntu-latest, macos-latest, windows-latest ]
go: [ "1.18", "1.19", "1.20" ]
go: [ "1.19", "1.20", "1.21", "1.22" ]
include:
# only update test coverage stats with the most recent go version on linux
- go: 1.x
Expand Down Expand Up @@ -192,6 +192,9 @@ jobs:
make test
make test-cover
shell: bash
env:
CHROMEDP_NO_SANDBOX: true
CHROMEDP_WSURLREADTIMEOUT: 60

- name: Upload artifact
uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
Expand All @@ -205,3 +208,6 @@ jobs:

- name: Run integration test
run: make test-integration
env:
CHROMEDP_NO_SANDBOX: true
CHROMEDP_WSURLREADTIMEOUT: 60
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,7 @@ test: ## Run testing

test-integration: ## Run integration testing
@echo 'mode: atomic' > coverage.out
@go list ./... | xargs -n1 -I{} sh -c 'CGO_ENABLED=1 go test -race -tags=integration -covermode=atomic -coverprofile=coverage.tmp -coverpkg $(go list ./... | tr "\n" ",") {} && tail -n +2 coverage.tmp >> coverage.out || exit 255'
@rm coverage.tmp
@CGO_ENABLED=1 go test -race -tags=integration -covermode=atomic -parallel=1 -coverprofile=coverage.out ./...

test-cover: ## Collect code coverage
@echo "-> Running go tool cover"
Expand Down
6 changes: 5 additions & 1 deletion cmd/wayback/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@ import (
"github.com/spf13/cobra"
"github.com/wabarc/logger"
"github.com/wabarc/wayback/config"
"github.com/wabarc/wayback/ingress"
"github.com/wabarc/wayback/pooling"
"github.com/wabarc/wayback/publish"
"github.com/wabarc/wayback/service"
"github.com/wabarc/wayback/storage"
"github.com/wabarc/wayback/systemd"

_ "github.com/wabarc/wayback/ingress"
_ "github.com/wabarc/wayback/ingress/register"
)

// Create channel to listen for signals.
Expand All @@ -42,6 +43,9 @@ func serve(_ *cobra.Command, opts *config.Options, _ []string) {
pool := pooling.New(ctx, cfg...)
go pool.Roll()

// Ingress initialize
ingress.Init(opts)

pub := publish.New(ctx, opts)
go pub.Start()

Expand Down
48 changes: 48 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3070,3 +3070,51 @@ func TestEnableServices(t *testing.T) {
})
}
}

func TestProxy(t *testing.T) {
t.Parallel()

var tests = []struct {
address string
expected string
}{
{
address: "",
expected: defProxy,
},
{
address: "http://127.0.0.1",
expected: `http://127.0.0.1`,
},
{
address: "http://127.0.0.1:1080",
expected: `http://127.0.0.1:1080`,
},
{
address: "https://127.0.0.1:1080",
expected: `https://127.0.0.1:1080`,
},
{
address: "socks5://127.0.0.1:1080",
expected: `socks5://127.0.0.1:1080`,
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_PROXY", test.address)

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

got := opts.Proxy()
if got != test.expected {
t.Fatalf(`Unexpected get proxy, got %s instead of %s`, got, test.expected)
}
})
}
}
7 changes: 7 additions & 0 deletions config/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ const (
defWaybackMaxRetries = 2
defWaybackUserAgent = "WaybackArchiver/1.0"
defWaybackFallback = false
defProxy = ""

defWaybackMeiliEndpoint = ""
defWaybackMeiliIndexing = "capsules"
Expand Down Expand Up @@ -143,6 +144,7 @@ type Options struct {
poolingSize int
storageDir string
maxMediaSize string
proxy string
waybackTimeout int
waybackMaxRetries int
waybackUserAgent string
Expand Down Expand Up @@ -910,3 +912,8 @@ func (o *Options) EnabledMeilisearch() bool {
func (o *Options) HTTPdEnabled() bool {
return o.isEnabled(ServiceHTTPd)
}

// Proxy returns the proxy server address.
func (o *Options) Proxy() string {
return o.proxy
}
2 changes: 2 additions & 0 deletions config/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ func (p *Parser) parseLines(lines []string) (err error) {
case "CHROME_REMOTE_ADDR":
p.opts.enabledChromeRemote = hasValue(val, defEnabledChromeRemote)
p.opts.chromeRemoteAddr = parseString(val, defChromeRemoteAddr)
case "WAYBACK_PROXY":
p.opts.proxy = parseString(val, defProxy)
case "WAYBACK_IPFS_HOST":
p.opts.ipfs.host = parseString(val, defIPFSHost)
case "WAYBACK_IPFS_PORT":
Expand Down
1 change: 1 addition & 0 deletions docs/environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Use the `-c` / `--config` option to specify the build definition file to use.
| - | `WAYBACK_LISTEN_ADDR` | `0.0.0.0:8964` | The listen address for the HTTP server |
| - | `CHROME_BIN` | - | Preferred to sets the path to the Chrome executable |
| - | `CHROME_REMOTE_ADDR` | - | Chrome/Chromium remote debugging address, for screenshot, format: `host:port`, `wss://domain.tld` |
| - | `WAYBACK_PROXY` | - | Proxy address, e.g. `socks5://127.0.0.1:1080` |
| - | `WAYBACK_POOLING_SIZE` | `3` | Number of worker pool for wayback at once |
| - | `WAYBACK_BOLT_PATH` | `./wayback.db` | File path of bolt database |
| - | `WAYBACK_STORAGE_DIR` | - | Directory to store binary file, e.g. PDF, html file |
Expand Down
106 changes: 53 additions & 53 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
module github.com/wabarc/wayback

// +heroku goVersion go1.19
// +heroku goVersion go1.20

go 1.18
go 1.20

require (
github.com/PuerkitoBio/goquery v1.8.1
Expand All @@ -15,11 +15,11 @@ require (
github.com/dustin/go-humanize v1.0.0
github.com/gabriel-vasile/mimetype v1.4.2
github.com/go-shiori/go-readability v0.0.0-20220215145315-dd6828d2f09b
github.com/go-shiori/obelisk v0.0.0-20221119111008-23c015a8fad7
github.com/go-shiori/obelisk v0.0.0-20230316095823-42f6a2f99d9d
github.com/google/go-github/v40 v40.0.0
github.com/gookit/color v1.5.3
github.com/gorilla/mux v1.8.0
github.com/gorilla/websocket v1.5.0
github.com/gorilla/websocket v1.5.1
github.com/hashicorp/go-version v1.6.0
github.com/iawia002/lux v0.17.0
github.com/ipsn/go-libtor v1.0.380
Expand All @@ -28,28 +28,29 @@ require (
github.com/nbd-wtf/go-nostr v0.17.1-0.20230426111250-32ca737acf77
github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/common v0.37.0
github.com/prometheus/client_golang v1.18.0
github.com/prometheus/common v0.46.0
github.com/rs/xid v1.4.0
github.com/slack-go/slack v0.11.2
github.com/spf13/cobra v1.6.1
github.com/thoj/go-ircevent v0.0.0-20190807115034-8e7ce4b5a1eb
github.com/wabarc/archive.is v1.3.0
github.com/wabarc/archive.is v1.4.0
github.com/wabarc/archive.org v1.2.1-0.20210708220121-cb9b83ff9896
github.com/wabarc/go-anonfile v0.1.0
github.com/wabarc/go-catbox v0.1.0
github.com/wabarc/helper v0.0.0-20230418130954-be7440352bcb
github.com/wabarc/imgbb v1.0.0
github.com/wabarc/ipfs-pinner v1.1.1-0.20220126131044-16299c0dd43d
github.com/wabarc/ipfs-pinner v1.1.1-0.20230502052510-dc378f9e202b
github.com/wabarc/logger v0.0.0-20210730133522-86bd3f31e792
github.com/wabarc/playback v0.0.0-20220715111526-90d0327d3f04
github.com/wabarc/rivet v0.1.4-0.20221226142645-ebc8a29d914f
github.com/wabarc/screenshot v1.6.1-0.20230315004517-7587f8bc14e0
github.com/wabarc/playback v0.0.0-20230331122619-84484ab4d599
github.com/wabarc/proxier v0.0.0-20230610135141-b55fe1536465
github.com/wabarc/rivet v0.1.4-0.20230505152228-2c5c81b4bd10
github.com/wabarc/screenshot v1.6.1-0.20240214000834-3820163034f4
github.com/wabarc/telegra.ph v0.0.0-20230318134541-a0922e1ace3a
github.com/wabarc/warcraft v0.3.1-0.20230308125707-3daa5592ba52
go.etcd.io/bbolt v1.3.6
golang.org/x/net v0.9.0
golang.org/x/sync v0.1.0
golang.org/x/net v0.21.0
golang.org/x/sync v0.6.0
gopkg.in/telebot.v3 v3.0.0-20220130115853-f0291132d3c3
maunium.net/go/mautrix v0.12.0
mellium.im/sasl v0.3.1
Expand All @@ -61,84 +62,83 @@ require (
github.com/MercuryEngineering/CookieMonster v0.0.0-20180304172713-1584578b3403 // indirect
github.com/SaveTheRbtz/generic-sync-map-go v0.0.0-20230201052002-6c5833b989be // indirect
github.com/VividCortex/ewma v1.2.0 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/benbjohnson/clock v1.3.0 // indirect
github.com/andybalholm/brotli v1.0.5 // indirect
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/benbjohnson/clock v1.3.5 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bitly/go-simplejson v0.5.0 // indirect
github.com/btcsuite/btcd/btcec/v2 v2.3.2 // indirect
github.com/btcsuite/btcd/btcutil v1.1.3 // indirect
github.com/btcsuite/btcd/chaincfg/chainhash v1.0.2 // indirect
github.com/cenkalti/backoff v2.2.1+incompatible // indirect
github.com/cenkalti/backoff/v4 v4.2.0 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/cheggaaa/pb/v3 v3.0.8 // indirect
github.com/chromedp/cdproto v0.0.0-20230310204135-a6d692f2c96d // indirect
github.com/chromedp/chromedp v0.9.1 // indirect
github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732 // indirect
github.com/chromedp/chromedp v0.9.5 // indirect
github.com/chromedp/sysutil v1.0.0 // indirect
github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 // indirect
github.com/decred/dcrd/crypto/blake256 v1.0.1 // indirect
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect
github.com/dghubble/sling v1.3.0 // indirect
github.com/dlclark/regexp2 v1.7.0 // indirect
github.com/dop251/goja v0.0.0-20221115122301-6c0d9883792e // indirect
github.com/fatih/color v1.15.0 // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/fortytw2/leaktest v1.3.0 // indirect
github.com/gaukas/godicttls v0.0.3 // indirect
github.com/go-shiori/dom v0.0.0-20210627111528-4e4722cd0d65 // indirect
github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
github.com/gobwas/httphead v0.1.0 // indirect
github.com/gobwas/pool v0.2.1 // indirect
github.com/gobwas/ws v1.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/gobwas/ws v1.3.2 // indirect
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/go-querystring v1.1.0 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/iawia002/lia v0.0.0-20221116085912-1f653221be4b // indirect
github.com/inconshreveable/mousetrap v1.0.1 // indirect
github.com/ipfs/go-cid v0.3.2 // indirect
github.com/ipfs/go-ipfs-api v0.3.0 // indirect
github.com/ipfs/go-ipfs-files v0.2.0 // indirect
github.com/ipfs/boxo v0.8.1 // indirect
github.com/ipfs/go-cid v0.4.1 // indirect
github.com/ipfs/go-ipfs-api v0.6.0 // indirect
github.com/itchyny/gojq v0.12.7 // indirect
github.com/itchyny/timefmt-go v0.1.3 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kallydev/telegraph-go v1.0.1-0.20230318133700-df034d9eed50 // indirect
github.com/kennygrant/sanitize v1.2.4 // indirect
github.com/kkdai/youtube/v2 v2.7.18 // indirect
github.com/klauspost/cpuid/v2 v2.2.2 // indirect
github.com/kr/pretty v0.3.0 // indirect
github.com/klauspost/compress v1.17.6 // indirect
github.com/klauspost/cpuid/v2 v2.2.6 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/libp2p/go-buffer-pool v0.1.0 // indirect
github.com/libp2p/go-flow-metrics v0.1.0 // indirect
github.com/libp2p/go-libp2p v0.24.1 // indirect
github.com/libp2p/go-libp2p-core v0.20.1 // indirect
github.com/libp2p/go-openssl v0.1.0 // indirect
github.com/libp2p/go-libp2p v0.32.2 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.18 // indirect
github.com/mattn/go-pointer v0.0.1 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/minio/sha256-simd v1.0.0 // indirect
github.com/minio/sha256-simd v1.0.1 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mr-tron/base58 v1.2.0 // indirect
github.com/multiformats/go-base32 v0.1.0 // indirect
github.com/multiformats/go-base36 v0.2.0 // indirect
github.com/multiformats/go-multiaddr v0.8.0 // indirect
github.com/multiformats/go-multibase v0.1.1 // indirect
github.com/multiformats/go-multicodec v0.7.0 // indirect
github.com/multiformats/go-multihash v0.2.1 // indirect
github.com/multiformats/go-multiaddr v0.12.2 // indirect
github.com/multiformats/go-multibase v0.2.0 // indirect
github.com/multiformats/go-multicodec v0.9.0 // indirect
github.com/multiformats/go-multihash v0.2.3 // indirect
github.com/multiformats/go-multistream v0.5.0 // indirect
github.com/multiformats/go-varint v0.0.7 // indirect
github.com/oliamb/cutter v0.2.2 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/refraction-networking/utls v1.3.2 // indirect
github.com/rivo/uniseg v0.4.3 // indirect
github.com/robertkrimen/otto v0.0.0-20211024170158-b87d35c0b86f // indirect
github.com/rogpeppe/go-internal v1.9.0 // indirect
github.com/rogpeppe/go-internal v1.10.0 // indirect
github.com/sirupsen/logrus v1.9.0 // indirect
github.com/spacemonkeygo/spacelog v0.0.0-20180420211403-2296661a0572 // indirect
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/tdewolff/parse/v2 v2.6.5 // indirect
Expand All @@ -147,20 +147,20 @@ require (
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tidwall/sjson v1.2.4 // indirect
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 // indirect
github.com/wabarc/memento v0.0.0-20210703205719-adc2f8ab8bae // indirect
github.com/wabarc/memento v0.0.0-20221023035729-e42d0fe1cefb // indirect
github.com/whyrusleeping/tar-utils v0.0.0-20201201191210-20a61371de5b // indirect
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect
github.com/ybbus/httpretry v1.0.1 // indirect
golang.org/x/crypto v0.5.0 // indirect
golang.org/x/exp v0.0.0-20230425010034-47ecfdc1ba53 // indirect
golang.org/x/mod v0.8.0 // indirect
golang.org/x/sys v0.7.0 // indirect
golang.org/x/text v0.9.0 // indirect
golang.org/x/tools v0.6.0 // indirect
google.golang.org/protobuf v1.28.1 // indirect
github.com/ybbus/httpretry v1.0.2 // indirect
golang.org/x/crypto v0.19.0 // indirect
golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3 // indirect
golang.org/x/mod v0.15.0 // indirect
golang.org/x/sys v0.17.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/tools v0.17.0 // indirect
google.golang.org/protobuf v1.32.0 // indirect
gopkg.in/sourcemap.v1 v1.0.5 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
lukechampine.com/blake3 v1.1.7 // indirect
lukechampine.com/blake3 v1.2.1 // indirect
mellium.im/reader v0.1.0 // indirect
mvdan.cc/xurls/v2 v2.4.0 // indirect
mvdan.cc/xurls/v2 v2.5.0 // indirect
)
Loading

0 comments on commit ceafbdf

Please sign in to comment.