Skip to content

Commit

Permalink
fix: 修复报错
Browse files Browse the repository at this point in the history
- 增加 pdf 首页长度, 避免被吞文本
- 仅实际发生微博详情抓取时, 才需要休眠 1s
  • Loading branch information
yangmingming committed Oct 26, 2024
1 parent 783a8f4 commit 07437bf
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 12 deletions.
43 changes: 32 additions & 11 deletions src/command/fetch/customer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,10 @@ class FetchCustomer extends Base {
mblog
})
this.log(`第${rawMblogFetchIndex}/${rawMBlogRes.recordList.length}条微博详情请求完毕, 休眠1s`)
await Util.asyncSleep(1000)
if (hydrateBlogRes.hasFetch) {
// 仅发生抓取时, 需要额外休眠1s
await Util.asyncSleep(1000)
}
if (rawMblogFetchIndex > 1 && rawMblogFetchIndex % 10 === 0) {
// 避免频繁请求导致被封ip
this.log(`累计抓取${rawMblogFetchIndex}条微博, 额外休眠${Const_Retry_Wait_Seconds}s`)
Expand Down Expand Up @@ -337,7 +340,10 @@ class FetchCustomer extends Base {
// 处理完毕, 将数据存入数据库中
await this.asyncReplaceMblogIntoDb(hydrateBlogRes.record)
this.log(`第${refetchMblogIndex}/${res.recordList.length}条微博详情请求完毕, 休眠1s`)
await Util.asyncSleep(1000)
if (hydrateBlogRes.hasFetch) {
// 仅发生抓取时, 需要额外休眠1s
await Util.asyncSleep(1000)
}
if (refetchMblogIndex > 1 && refetchMblogIndex % 10 === 0) {
// 避免频繁请求导致被封ip
this.log(`累计抓取${refetchMblogIndex}条微博, 额外休眠${Const_Retry_Wait_Seconds}s`)
Expand Down Expand Up @@ -382,8 +388,10 @@ class FetchCustomer extends Base {
mblog
})
this.log(`第${retryMblogConfigIndex}/${retryMblogConfigList.length}条微博详情请求完毕, 休眠1s`)
await Util.asyncSleep(1000)
await Util.asyncSleep(1000)
if (hydrateBlogRes.hasFetch) {
// 仅发生抓取时, 需要额外休眠1s
await Util.asyncSleep(1000)
}
if (retryMblogConfigIndex > 1 && retryMblogConfigIndex % 10 === 0) {
// 避免频繁请求导致被封ip
this.log(`累计抓取${retryMblogConfigIndex}条微博, 额外休眠${Const_Retry_Wait_Seconds}s`)
Expand Down Expand Up @@ -519,16 +527,19 @@ class FetchCustomer extends Base {
}): Promise<{
isSuccess: boolean,
record: TypeWeibo.TypeMblog
hasFetch: boolean
}> {
// 最多重试5次
const maxRetryCount = 5
if (_.isEmpty(mblog)) {
return {
isSuccess: false,
record: mblog
record: mblog,
hasFetch: false,
}
}

let hasFetch = false
const asyncGetLongTextWeibo = async ({ bid }: { bid: string }) => {
let retryCount = 0
let isSuccess = false
Expand Down Expand Up @@ -628,24 +639,28 @@ class FetchCustomer extends Base {

// 检查是否是长微博
if (mblog.isLongText === true) {
hasFetch = true
// 长微博需要调取api重新获得微博内容
let bid = mblog.bid
let realMblog = <TypeWeibo.TypeMblog>await asyncGetLongTextWeibo({ bid })
if (realMblog === undefined) {
// 获取失败, 自动返回
return {
isSuccess: false,
record: mblog
record: mblog,
hasFetch
}
}
return {
isSuccess: true,
record: realMblog
record: realMblog,
hasFetch,
}
}

if (_.isEmpty(mblog.retweeted_status) == false && mblog.retweeted_status !== undefined) {
if (mblog.retweeted_status.isLongText === true) {
hasFetch = true
// 转发微博属于长微博
let bid = mblog.retweeted_status.bid
let realRetweetMblog: TypeWeibo.TypeMblog | undefined = undefined
Expand All @@ -654,7 +669,8 @@ class FetchCustomer extends Base {
// 获取失败, 自动返回
return {
isSuccess: false,
record: mblog
record: mblog,
hasFetch
}
}
mblog.retweeted_status = realRetweetMblog
Expand All @@ -667,6 +683,7 @@ class FetchCustomer extends Base {
// 转发的是微博文章
let pageInfo = mblog.retweeted_status.page_info
let articleId = this.getArticleId(pageInfo.page_url)
hasFetch = true
let articleRecord = await asyncGetArticle({
articleId,
page_url: pageInfo.page_url
Expand All @@ -675,7 +692,8 @@ class FetchCustomer extends Base {
// 文章详情获取失败, 不储存该记录
return {
isSuccess: false,
record: mblog
record: mblog,
hasFetch
}
}
mblog.retweeted_status.article = articleRecord
Expand All @@ -685,6 +703,7 @@ class FetchCustomer extends Base {
// 文章类型为微博文章
let pageInfo = mblog.page_info
let articleId = this.getArticleId(pageInfo.page_url)
hasFetch = true
let articleRecord = await asyncGetArticle({
articleId,
page_url: pageInfo.page_url
Expand All @@ -693,14 +712,16 @@ class FetchCustomer extends Base {
// 文章详情获取失败, 不储存该记录
return {
isSuccess: false,
record: mblog
record: mblog,
hasFetch
}
}
mblog.article = articleRecord
}
return {
isSuccess: true,
record: mblog
record: mblog,
hasFetch
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/command/generate/customer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,7 @@ class GenerateCustomer extends Base {

let doc = new jsPDF({
unit: 'px',
format: [Const_Default_Webview_Width, 500],
format: [Const_Default_Webview_Width, 700],
orientation: "landscape"
})
// let fontUri = path.resolve(__dirname, '../../public/font/mi_sans_normal_thin.ttf')
Expand Down

0 comments on commit 07437bf

Please sign in to comment.