Skip to content

Commit

Permalink
update with new endpoints
Browse files Browse the repository at this point in the history
  • Loading branch information
CorentinB committed Nov 6, 2024
1 parent 7af1997 commit db2e701
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 49 deletions.
10 changes: 6 additions & 4 deletions discovered.go → add.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"net/http"
)

func (c *Client) Discovered(URLs []URL, URLType string, bypassSeencheck bool, seencheckOnly bool) (discoveredResponse *DiscoveredResponse, err error) {
func (c *Client) Add(URLs []URL, bypassSeencheck bool) (discoveredResponse *DiscoveredResponse, err error) {
expectedStatusCode := 201
discoveredResponse = new(DiscoveredResponse)

Expand All @@ -18,8 +18,6 @@ func (c *Client) Discovered(URLs []URL, URLType string, bypassSeencheck bool, se

payload := DiscoveredPayload{
BypassSeencheck: bypassSeencheck,
SeencheckOnly: seencheckOnly,
Type: URLType,
URLs: URLsPayload,
}

Expand All @@ -29,7 +27,7 @@ func (c *Client) Discovered(URLs []URL, URLType string, bypassSeencheck bool, se
}

// build request
req, err := http.NewRequest("POST", c.DiscoveredEndpoint.String(), bytes.NewReader(jsonPayload))
req, err := http.NewRequest("POST", c.URLsEndpoint.String(), bytes.NewReader(jsonPayload))
if err != nil {
return discoveredResponse, err
}
Expand All @@ -38,6 +36,10 @@ func (c *Client) Discovered(URLs []URL, URLType string, bypassSeencheck bool, se
req.Header.Add("X-Auth-Secret", c.Secret)
req.Header.Add("User-Agent", "gocrawlhq/"+Version)

if c.Identifier != "" {
req.Header.Add("X-Identifier", c.Identifier)
}

// execute request
resp, err := c.HTTPClient.Do(req)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions finished.go → delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"net/http"
)

func (c *Client) Finished(URLs []URL, localCrawls int) (finishedResponse *FinishedResponse, err error) {
func (c *Client) Delete(URLs []URL, localCrawls int) (finishedResponse *FinishedResponse, err error) {
expectedStatusCode := 200
finishedResponse = new(FinishedResponse)

Expand All @@ -23,7 +23,7 @@ func (c *Client) Finished(URLs []URL, localCrawls int) (finishedResponse *Finish
}

// build request
req, err := http.NewRequest("POST", c.FinishedEndpoint.String(), bytes.NewReader(jsonPayload))
req, err := http.NewRequest("DELETE", c.URLsEndpoint.String(), bytes.NewReader(jsonPayload))
if err != nil {
return finishedResponse, err
}
Expand Down
19 changes: 9 additions & 10 deletions feed.go → get.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,14 @@ import (
"strconv"
)

func (c *Client) Feed(size int, strategy string) (feedResponse *FeedResponse, err error) {
func (c *Client) Feed(size int, strategy string) (URLs []URL, err error) {
expectedStatusCode := 200
emptyStatusCode := 204
feedResponse = new(FeedResponse)

// build request
req, err := http.NewRequest("GET", c.FeedEndpoint.String(), nil)
req, err := http.NewRequest("GET", c.URLsEndpoint.String(), nil)
if err != nil {
return feedResponse, err
return URLs, err
}

q := req.URL.Query()
Expand All @@ -35,25 +34,25 @@ func (c *Client) Feed(size int, strategy string) (feedResponse *FeedResponse, er
// execute request
resp, err := c.HTTPClient.Do(req)
if err != nil {
return feedResponse, err
return URLs, err
}
defer resp.Body.Close()

// check response status code for 'empty' or 204
if resp.StatusCode == emptyStatusCode {
return feedResponse, errors.New("gocrawlhq: feed is empty")
return URLs, errors.New("gocrawlhq: feed is empty")
}

// check response status code for 200
if resp.StatusCode != expectedStatusCode {
return feedResponse, fmt.Errorf("non-%d status code: %d", expectedStatusCode, resp.StatusCode)
return URLs, fmt.Errorf("non-%d status code: %d", expectedStatusCode, resp.StatusCode)
}

// decode response body
err = json.NewDecoder(resp.Body).Decode(feedResponse)
err = json.NewDecoder(resp.Body).Decode(URLs)
if err != nil {
return feedResponse, err
return URLs, err
}

return feedResponse, err
return URLs, err
}
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
module github.com/internetarchive/gocrawlhq

go 1.22
go 1.23.2

require github.com/gobwas/ws v1.4.0

require (
github.com/gobwas/httphead v0.1.0 // indirect
github.com/gobwas/pool v0.2.1 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/sys v0.26.0 // indirect
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakr
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
18 changes: 6 additions & 12 deletions gocrawlhq.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
)

var (
Version = "1.2.13"
Version = "1.2.15"
)

func Init(key, secret, project, address, identifier string) (c *Client, err error) {
Expand Down Expand Up @@ -42,17 +42,12 @@ func Init(key, secret, project, address, identifier string) (c *Client, err erro
}

// Initialize the endpoints
c.DiscoveredEndpoint, err = url.Parse(c.HQAddress)
c.URLsEndpoint, err = url.Parse(c.HQAddress)
if err != nil {
return c, err
}

c.FinishedEndpoint, err = url.Parse(c.HQAddress)
if err != nil {
return c, err
}

c.FeedEndpoint, err = url.Parse(c.HQAddress)
c.SeencheckEndpoint, err = url.Parse(c.HQAddress)
if err != nil {
return c, err
}
Expand All @@ -62,10 +57,9 @@ func Init(key, secret, project, address, identifier string) (c *Client, err erro
return c, err
}

c.DiscoveredEndpoint.Path = path.Join(c.DiscoveredEndpoint.Path, "api", "project", c.Project, "discovered")
c.FinishedEndpoint.Path = path.Join(c.FinishedEndpoint.Path, "api", "project", c.Project, "finished")
c.FeedEndpoint.Path = path.Join(c.FeedEndpoint.Path, "api", "project", c.Project, "feed")
c.ResetEndpoint.Path = path.Join(c.ResetEndpoint.Path, "api", "project", c.Project, "reset")
c.URLsEndpoint.Path = path.Join(c.URLsEndpoint.Path, "api", "projects", c.Project, "urls")
c.SeencheckEndpoint.Path = path.Join(c.SeencheckEndpoint.Path, "api", "projects", c.Project, "seencheck")
c.ResetEndpoint.Path = path.Join(c.ResetEndpoint.Path, "api", "projects", c.Project, "reset")

return c, nil
}
41 changes: 22 additions & 19 deletions models.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,29 @@ import (
)

type Client struct {
Key string
Secret string
Project string
HQAddress string
Identifier string
DiscoveredEndpoint *url.URL
FinishedEndpoint *url.URL
FeedEndpoint *url.URL
ResetEndpoint *url.URL
HTTPClient *http.Client
WebsocketConn *net.Conn
Key string
Secret string
Project string
HQAddress string
Identifier string
URLsEndpoint *url.URL
SeencheckEndpoint *url.URL
ResetEndpoint *url.URL
HTTPClient *http.Client
WebsocketConn *net.Conn
}

type URL struct {
ID string `json:"id,omitempty"`
Value string `json:"value"`
Path string `json:"path"`
Via string `json:"via,omitempty"`
ID string `json:"id" db:"id"`
Value string `json:"value" db:"value"`
Via string `json:"via,omitempty" db:"via"`
Host string `json:"host,omitempty" db:"host"`
Path string `json:"path,omitempty" db:"path"`
Type string `json:"type,omitempty" db:"type"`
Crawler string `json:"crawler,omitempty" db:"crawler"`
Status string `json:"status" db:"status"`
LiftOff int64 `json:"lift_off" db:"lift_off"`
Timestamp int64 `json:"timestamp" db:"timestamp"`
}

type FeedResponse struct {
Expand All @@ -43,10 +48,8 @@ type FinishedResponse struct {
}

type DiscoveredPayload struct {
Type string `json:"type"`
URLs []URL `json:"urls"`
BypassSeencheck bool `json:"bypassSeencheck"`
SeencheckOnly bool `json:"seencheckOnly"`
URLs []URL `json:"urls"`
BypassSeencheck bool `json:"bypassSeencheck"`
}

type FinishedPayload struct {
Expand Down
48 changes: 48 additions & 0 deletions seencheck.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package gocrawlhq

import (
"bytes"
"encoding/json"
"net/http"
)

func (c *Client) Seencheck(URLs []URL) (outputURLs []URL, err error) {
expectedStatusCodes := []int{200, 204}

jsonPayload, err := json.Marshal(URLs)
if err != nil {
return URLs, err
}

req, err := http.NewRequest("POST", c.SeencheckEndpoint.String(), bytes.NewReader(jsonPayload))
if err != nil {
return URLs, err
}

req.Header.Add("X-Auth-Key", c.Key)
req.Header.Add("X-Auth-Secret", c.Secret)
req.Header.Add("User-Agent", "gocrawlhq/"+Version)

if c.Identifier != "" {
req.Header.Add("X-Identifier", c.Identifier)
}

resp, err := c.HTTPClient.Do(req)
if err != nil {
return URLs, err
}
defer resp.Body.Close()

err = json.NewDecoder(resp.Body).Decode(&outputURLs)
if err != nil {
return URLs, err
}

for _, expectedStatusCode := range expectedStatusCodes {
if resp.StatusCode == expectedStatusCode {
return outputURLs, nil
}
}

return outputURLs, err
}

0 comments on commit db2e701

Please sign in to comment.