-
Notifications
You must be signed in to change notification settings - Fork 0
/
AlbumHandler.py
119 lines (101 loc) · 4.37 KB
/
AlbumHandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# coding:utf-8
import os
import json
import time
import requests
import Constants
PAGE_NUM = 30
IMG_DOWNLOAD_PATH = os.path.split(os.path.realpath(__file__))[0] + os.sep + 'downloadImgs'
DOWNLOAD_FAILED_IMG_PATH = IMG_DOWNLOAD_PATH + os.sep + 'downloadFailedImg.txt'
class AlbumHandler(object):
'''
相册处理
'''
def __init__(self, session, g_tk):
if not os.path.exists(IMG_DOWNLOAD_PATH):
os.mkdir(IMG_DOWNLOAD_PATH)
if os.path.exists(DOWNLOAD_FAILED_IMG_PATH):
os.remove(DOWNLOAD_FAILED_IMG_PATH)
self.session = session
self.g_tk = g_tk
def start(self, u_id):
'''
分页下载相册图片
'''
# 创建用户u_id下载图片文件夹
uidImgPath = IMG_DOWNLOAD_PATH + os.sep + u_id
if not os.path.exists(uidImgPath):
os.mkdir(uidImgPath)
# 请求相册列表
x = self.session.get(Constants.LIST_ALBUM
.replace('{g_tk}', str(self.g_tk))
.replace('{u_id}', u_id),
headers=Constants.REQUEST_HEADER).content
x = json.loads(x[x.find('(') + 1 : x.find(')')])
# 遍历相册进行下载
for album in x['data']['albumListModeSort']:
topicId = album['id']
# name = album['name'] 中文乱码 先用id处理
total = album['total'] # 该相册相片总数
# 创建相册保存路径
albumPath = uidImgPath + os.sep + topicId
if not os.path.exists(albumPath):
os.mkdir(albumPath)
page = total / PAGE_NUM + 1
remainder = total % PAGE_NUM
print '-----------album %s start download----------------------'%(topicId)
# 分页下载
pageStart = 1
currPage = 1
currPhoto = 1
while currPage <= page:
pageStart = (currPage-1) * PAGE_NUM
isLast = True if page==currPage else False
pageNum = remainder if isLast else PAGE_NUM
# 请求具体相册
photo = self.session.get(Constants.LIST_PHOTO
.replace('{g_tk}', str(self.g_tk))
.replace('{u_id}', u_id)
.replace('{topicId}', topicId)
.replace('{pageStart}', str(pageStart))
.replace('{pageNum}', str(pageNum)),
headers=Constants.REQUEST_HEADER).content
time.sleep(1)
photo = json.loads(photo[photo.find('(') + 1 : photo.find(')')])
for photo in photo['data']['photoList']:
pid = photo['lloc']
purl = photo['url']
imgPath = albumPath + os.sep + str(pid) + '.jpg'
if not os.path.exists(imgPath):
if self.imgDownload(purl, imgPath, 0):
print '[%s/%s page]: [%s / %s] downloaded: %s'%(currPage, page, currPhoto, total, purl)
else:
print 'photo %s exists, continue.' %(pid)
currPhoto += 1
currPage += 1
print '-----------album %s end download----------------------'%(topicId)
def imgDownload(self, url, imgPath, times):
'''
下载图片:异常重新下载
'''
try:
#TODO 下载丢失,捕获不到异常,怎么回事
resp = requests.get(url, headers=Constants.REQUEST_HEADER)
times += 1
time.sleep(2)
if resp.status_code==200:
if len(resp.content)<20000:
print resp.content
with open(imgPath, 'wb') as f:
f.write(resp.content)
return True
else:
resp.raise_for_status()
except Exception as e:
if times == 3:
print '%s redownloading failed, giving up...'%(e.message)
with open(DOWNLOAD_FAILED_IMG_PATH, 'a') as f:
f.writelines(url+"\n")
return False
print 'try %d times download %s failed.Redownloading..'%(times, url)
self.imgDownload(url, imgPath, times)