Skip to content

Latest commit

 

History

History
474 lines (349 loc) · 9.79 KB

01-09.md

File metadata and controls

474 lines (349 loc) · 9.79 KB

golang-learning-9.png 10.png

大家好,我叫谢伟,是一名程序员。

我们已经研究了:

今天的主题:Go 语言语法练习。

我们需要达到一个什么样的目的:目标网站 gocn,抓取所有信息,以我们需要的形式返回信息。

具体结果是这样的:http://localhost:8080/gocn/api/v1/tenants/contents/{page}

json.png

项目虽然小,但是有几个要点:

  • 项目的组织
  • go 爬虫
  • restful api 设计

尤其是 restful api 设计,在微服务领域,经常要设计一个组件,提供一些服务,各服务之间通过接口相互访问,同时对组件的编程语言、架构都没有要求,只对外暴露出现接口即可,通过接口访问,组件内部进行相应的处理。

这里我们不细讲,下次专门讲下 restful api 的设计:包括 HTTP 路由设计、状态码设计、返回值设计、错误信息设计。

1. 项目组织

workspace
   domain
       objects.go
   download
      download.go
   engine
      engine.go
  infra
     util_string.go
  main
    main.go
  parse
    gocn
        gocn_parse.go
  ui
    api-server
       api-server.go
       route_function.go

2. 定义字段

domain
   objects.go

定义抓取的字段:

type Content struct {
	URL          string
	Title        string
	Tag          string
	Reporter     string
	ReporterHome string
	Followers    int
	Answers      int
	Answerer     string
	See          int
	Passage      string
}

field.png

问题链接、标题、所属板块、楼主、楼主主页、关注数、回复数、浏览数、正文

3. 下载器

   download
      download.go
package download

import (
	"net/http"

	"errors"

	"github.com/PuerkitoBio/goquery"
)

var (
	ErrorHttpRequest  = errors.New("http request error")
	ErrorHttpResponse = errors.New("http response error")
)

func Download(url string) (*goquery.Document, error) {
	var (
		req *http.Request
		err error
	)
	if req, err = http.NewRequest("GET", url, nil); err != nil {
		return nil, ErrorHttpRequest
	}

	client := http.DefaultClient
	req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36")
	var (
		resp *http.Response
	)
	if resp, err = client.Do(req); err != nil {
		return nil, ErrorHttpResponse
	}

	defer resp.Body.Close()

	return goquery.NewDocumentFromReader(resp.Body)
}

  • 使用了内置的 net/http 库
  • 使用了第三方库 goquery ,主要是它很好的对网页解析

4. 解析器

  parse
    gocn
        gocn_parse.go
package gocn

import (
	"errors"

	"go-example-for-live/nine_learning/infra"

	"go-example-for-live/nine_learning/download"

	"go-example-for-live/nine_learning/domain"

	"github.com/PuerkitoBio/goquery"
)

var (
	ErrorParse = errors.New("parse error")
)

func TitleParse(document *goquery.Document) domain.Contents {

	var (
		AllContents domain.Contents
		OneContent  domain.Content
	)

	document.Find("div.aw-common-list div.aw-item").Each(func(i int, selection *goquery.Selection) {
		var (
			user           string
			userHome       string
			url            string
			passageContent string
			title          string
			tag            string
			lastAnswer     string
			one            int
			two            int
			three          int
		)
		user = infra.StringSplit(selection.Find("a").Eq(0).AttrOr("href", "None"))
		userHome = selection.Find("a").Eq(0).AttrOr("href", "None")
		url = selection.Find("div h4 a").AttrOr("href", "None")
		doc, _ := download.Download(url)
		passageContent, _ = PassageParse(doc)
		title = infra.StringTrim(selection.Find("div h4").Text())
		tag = selection.Find("p a").Eq(0).Text()
		lastAnswer = selection.Find("p a").Eq(1).Text()
		one, two, three = infra.StringSplitByDot(selection.Find("p span").Eq(0).Text())
		//fmt.Println(user, userHome, url, title, tag, lastAnswer, one, two, three, passageContent)

		OneContent = domain.Content{
			URL:          url,
			Title:        title,
			Tag:          tag,
			Reporter:     user,
			ReporterHome: userHome,
			Followers:    one,
			Answers:      two,
			Answerer:     lastAnswer,
			See:          three,
			Passage:      passageContent,
		}
		//fmt.Println(OneContent)
		AllContents = append(AllContents, OneContent)

	})
	return AllContents

}

func PassageParse(document *goquery.Document) (string, error) {
	var content string
	content = infra.StringTrim(document.Find("div.content.markitup-box").Text())
	return infra.StringsReplace(content), nil
}

  • PassageParse 获取正文
  • TitleParse 解析其他字段

5. 调度引擎

   engine
      engine.go
package engine

import (
	"errors"
	"go-example-for-live/nine_learning/download"

	"fmt"

	"go-example-for-live/nine_learning/parse/gocn"

	"go-example-for-live/nine_learning/domain"

	"github.com/PuerkitoBio/goquery"
)

var (
	ErrorRun = errors.New("engine error")
)

var (
	ContentsNil = domain.Contents{}
)

func Run(request domain.Request) (domain.Contents, error) {

	var (
		doc *goquery.Document
		err error
	)
	if doc, err = download.Download(request.Url); err != nil {
		fmt.Println(err)
		return ContentsNil, ErrorRun
	}

	return gocn.TitleParse(doc), nil

}

6. api 服务

  ui
    api-server
       api-server.go
       route_function.go
package apiserver

import (
	"log"
	"net/http"

	"github.com/emicklei/go-restful"
)

type APIServer struct {
	container *restful.Container
}

type APIServers []APIServer

func (a *APIServer) init() {
	wsContainer := restful.NewContainer()
	wsContainer.EnableContentEncoding(true)
	wsContainer.Router(restful.CurlyRouter{})
	a.container = wsContainer
}

func (a *APIServer) register() {
	ws := new(restful.WebService)
	ws.Consumes(restful.MIME_JSON)
	ws.Produces(restful.MIME_JSON)

	ws.Path("/gocn")
	ws.Route(ws.GET("/api/v1/tenants/contents/{page}").To(GetContents))

	a.container.Add(ws)

}

func (a *APIServer) Start() {
	server := &http.Server{Addr: ":8080", Handler: a.container}
	log.Fatal(server.ListenAndServe())
}

func New() *APIServer {
	apiServer := &APIServer{container: nil}
	apiServer.init()
	apiServer.register()
	return apiServer
}

package apiserver

import (
	"go-example-for-live/nine_learning/domain"
	"go-example-for-live/nine_learning/engine"
	"go-example-for-live/nine_learning/parse/gocn"

	"fmt"

	"github.com/emicklei/go-restful"
)

var (
	startURL = "https://gocn.io/sort_type-new__day-0__is_recommend-0"
	rootURL  = "https://gocn.io/sort_type-new__day-0__is_recommend-0__page-%s"
)

func GetContents(request *restful.Request, response *restful.Response) {
	var (
		page string
		url  string
	)
	page = request.PathParameter("page")
	if page == "" {
		url = startURL
	} else {
		url = fmt.Sprintf(rootURL, page)
	}
	contents, _ := engine.Run(
		domain.Request{
			Url:       url,
			ParseFunc: gocn.TitleParse,
		},
	)
	response.WriteEntity(contents)
}

  • 使用了go-restful 库
  • http 主要包含这么几个东西
1. http 方法
2. request 
3. reponse

7. 主函数入口

main
   main.go
package main

import "go-example-for-live/nine_learning/ui/api-server"

func main() {
	apiserver.New().Start()

}

启动http 服务,监听8080 端口。

8. 访问

http://localhost:8080/gocn/api/v1/tenants/contents/{page}

访问源代码:https://github.com/wuxiaoxiaoshen/go-example-for-live

通过本节,希望你能知道,如何组织项目结构,go 常用的用法:变量的用法、错误处理机制、结构体定义、结构体方法、第三方库的使用。

再会,我是谢伟。

package main

import (
	"bytes"
	"fmt"
	"io/ioutil"
)

// xieWei 2018.05.29

func main() {

	chaList := make([]chan int, 4)
	for index := 0; index < len(chaList); index++ {
		chaList[index] = make(chan int)
		go func(index int) {
			for {
				chaList[index] <- index + 1
			}
		}(index)
	}

	f := make([]bytes.Buffer, len(chaList))

	for i := 0; i < 20; i++ {
		for j := 0; j < len(f); j++ {
			fmt.Fprintf(&f[j], "%d", <-chaList[(i+j)%len(chaList)])
		}
	}


	fileList := []string{"A", "B", "C", "D"}

	for i := 0; i < len(f); i++ {
		fmt.Println(f[i].String())
		ioutil.WriteFile("./"+fileList[i]+".txt", f[i].Bytes(), 0644)
	}
}