Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add mgstage film actor's thumb from seeaawiki_av_neme #20

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions AVDC_Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,7 +1013,10 @@ def thumbDownload(self, json_data, path, naming_rule, Config, filepath, failed_f
return
i = 1
while i <= int(Config['proxy']['retry']):
self.DownloadFileWithFilename(json_data['cover'], thumb_name, path, Config, filepath,
download_url = json_data['cover']
if "largeImage" in json_data.keys() and len(json_data['largeImage']) > 0:
download_url = json_data['largeImage'] # mgstage 封面下载大图
self.DownloadFileWithFilename(download_url, thumb_name, path, Config, filepath,
failed_folder)
if not check_pic(path + '/' + thumb_name):
print('[!]Image Download Failed! Trying again. ' + str(i) + '/' + Config['proxy']['retry'])
Expand All @@ -1024,7 +1027,28 @@ def thumbDownload(self, json_data, path, naming_rule, Config, filepath, failed_f
self.add_text_main('[+]Thumb Downloaded! ' + thumb_name)
else:
os.remove(path + '/' + thumb_name)
raise Exception("The Size of Thumb is Error! Deleted " + thumb_name + '!')
raise Exception("The p of Thumb is Error! Deleted " + thumb_name + '!')
# ========================================================================下载poster缩略图
def posterDownload(self, json_data, path, naming_rule, Config, filepath, failed_folder):
poster_name = naming_rule + '-poster.jpg'
if os.path.exists(path + '/' + poster_name):
self.add_text_main('[+]Poseter Existed! ' + poster_name)
return
i = 1
while i <= int(Config['proxy']['retry']):
download_url = json_data['cover']
self.DownloadFileWithFilename(download_url, poster_name, path, Config, filepath,
failed_folder)
if not check_pic(path + '/' + poster_name):
print('[!]Image Download Failed! Trying again. ' + str(i) + '/' + Config['proxy']['retry'])
i = i + 1
else:
break
if check_pic(path + '/' + poster_name):
self.add_text_main('[+]Poster Downloaded! ' + poster_name)
else:
os.remove(path + '/' + poster_name)
raise Exception("The Size of Poster is Error! Deleted " + poster_name + '!')

def deletethumb(self,path, naming_rule):
try:
Expand Down Expand Up @@ -1539,6 +1563,8 @@ def Core_Main(self, filepath, number, mode, count):
# imagecut 0 判断人脸位置裁剪缩略图为封面,1 裁剪右半面,3 下载小封面
self.thumbDownload(json_data, path, naming_rule, Config, filepath, failed_folder)
if self.Ui.checkBox_download_poster.isChecked():
if "largeImage" in json_data.keys() and len(json_data['largeImage']) > 0: ## 如果自带大封面了,直接下载小封面
self.posterDownload(json_data, path, naming_rule, Config, filepath, failed_folder)
if self.smallCoverDownload(path, naming_rule, json_data, Config, filepath,
failed_folder) == 'small_cover_error': # 下载小封面
json_data['imagecut'] = 0
Expand Down
4 changes: 4 additions & 0 deletions Function/Function.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据
number = json_data['number']
actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表
release = json_data['release']
outline = json_data['outline']
try:
cover_small = json_data['cover_small']
except:
Expand All @@ -249,6 +250,8 @@ def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据
title = title.replace(' ', '.')
title = title.replace('【', '')
title = title.replace('】', '')
title = title.replace('&', '')
outline = outline.replace('&', '')
release = release.replace('/', '-')
tmpArr = cover_small.split(',')
if len(tmpArr) > 0:
Expand All @@ -272,6 +275,7 @@ def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据
json_data['naming_media'] = naming_media
json_data['naming_file'] = naming_file
json_data['folder_name'] = folder_name
json_data['outline'] = outline
return json_data


Expand Down
6 changes: 3 additions & 3 deletions Function/getHtml.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def get_config():


# ========================================================================网页请求
def get_html(url, cookies=None):
def get_html(url, cookies=None,encode='utf-8'):
config = get_config()
retry_count = 0
proxy = ''
Expand All @@ -37,14 +37,14 @@ def get_html(url, cookies=None):
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/60.0.3100.0 Safari/537.36'}
getweb = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies)
getweb.encoding = 'utf-8'
getweb.encoding = encode
return getweb.text
else:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/68.0.3440.106 Safari/537.36'}
getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
getweb.encoding = 'utf-8'
getweb.encoding = encode
return getweb.text
except Exception as error_info:
i += 1
Expand Down
7 changes: 6 additions & 1 deletion Getter/jav321.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from lxml import etree
import json
from Function.getHtml import post_html

from Getter.seesaawiki_av_neme import getActorFromSeesaawiki

def getActorPhoto(actor):
data = {}
Expand Down Expand Up @@ -132,6 +132,11 @@ def main(number, isuncensored=False):
'website': getWebsite(detail_page),
'source': 'jav321.py',
}
wikiActor = getActorFromSeesaawiki(number, dic)
acotrList = dic['actor'].split(',')
acotrList.extend(wikiActor['actor'])
dic['actor'] = ','.join(acotrList)
dic['actor_photo'] ={**dic['actor_photo'],**wikiActor['actor_photo']}
except TimeoutError:
dic = {
'title': '',
Expand Down
15 changes: 13 additions & 2 deletions Getter/mgstage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from lxml import etree
import json
from Function.getHtml import get_html

from Getter.seesaawiki_av_neme import getActorFromSeesaawiki

def getTitle(htmlcode):
try:
Expand Down Expand Up @@ -107,6 +107,11 @@ def getOutline(htmlcode):
def getScore(htmlcode):
return str(re.findall(r'5点満点中 (\S+)点', htmlcode)).strip(" ['']")

def getLargeImage(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="EnlargeImage"]/@href')).strip(" ['']")
return result


def main(number):
try:
Expand All @@ -128,7 +133,7 @@ def main(number):
'release': getRelease(htmlcode).strip(','),
'number': getNum(htmlcode).strip(','),
'cover': getCover(htmlcode).strip(','),
'extrafanart': getExtraFanart(htmlcode).strip(','),
'extrafanart': getExtraFanart(htmlcode),
'imagecut': 0,
'tag': getTag(htmlcode).strip(','),
'series': getSeries(htmlcode).strip(','),
Expand All @@ -137,7 +142,13 @@ def main(number):
'director': '',
'website': 'https://www.mgstage.com/product/product_detail/' + str(number) + '/',
'source': 'mgstage.py',
'largeImage':getLargeImage(htmlcode).strip(',')
}
wikiActor = getActorFromSeesaawiki(number, dic)
acotrList = dic['actor'].split(',')
acotrList.extend(wikiActor['actor'])
dic['actor'] = ','.join(acotrList)
dic['actor_photo'] ={**dic['actor_photo'],**wikiActor['actor_photo']}
except TimeoutError:
dic = {
'title': '',
Expand Down
47 changes: 47 additions & 0 deletions Getter/seesaawiki_av_neme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import re
from lxml import etree
import json
from Function.getHtml import get_html

def getActorFromSeesaawiki(number, dic):
global localDic
localDic=dic.copy()
return getActor(number)

def getActor(number):
wiki_html = get_html('https://seesaawiki.jp/av_neme/search?keywords=' + str(number),"","EUC-JP")
html = etree.fromstring(wiki_html, etree.HTMLParser())
result = html.xpath('//*[@id="page-body-inner"]//h3//a')
hrefs = html.xpath('//*[@id="page-body-inner"]//h3//a/@href')
result = list(filter(filterActor, result))
actors = []
actorPhotoDic = {}
for actor in result:
actors.append(actor.text)
photo = getActorPhoto(actor.text, actor.attrib['href'])
p2 = {actor.text: photo}
actorPhotoDic.update(p2)
dic = {
'actor': actors,
'actor_photo':actorPhotoDic
}
return dic

def getActorPhoto(actor, href):
print(href)
actor_html=get_html(href,"","EUC-JP")
html = etree.fromstring(actor_html, etree.HTMLParser())
p = str(html.xpath('//*[@id="content_block_1-body"]/a/img/@src')).strip(" ['']")
return p

def stripActor(actor):
actor = actor.strip().strip("'")
return actor
def filterActor(actor):
print(localDic)
if "年" in actor.text:
return False
if actor.text ==localDic['series']:
return False
return True