-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
82 lines (65 loc) · 3.26 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import asyncio
import re
import requests
from bs4 import BeautifulSoup
import subchap_process
async def process_chapters(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
chapters = soup.find_all('div', class_='0 p20 btdwh btdwh1 fix')
tasks = [] # 用于存储所有子章节处理任务
for chapter in chapters:
chapter_title = chapter.find('div', class_='chapterText').text.strip()
print(f"章节名称: {chapter_title}") # 添加章节输出
subsections = chapter.find('ul').find_all('li')
for subsection in subsections:
subsection_index = subsection.find('div', class_='chapter_index').text.strip()
subsection_title = subsection.find('div', class_='chapterText').text.strip()
section_link = subsection.find('a', class_='wh wh1').get('href') # 使用get()方法,避免KeyError
if section_link is None: # 检查链接是否存在
print(f"小节 {subsection_title} 没有链接")
continue
print(f'小节名称: {subsection_title}')
pattern = r"courseId=(\d+)&knowledgeId=(\d+)"
match = re.search(pattern, section_link)
if match:
course_id = match.group(1)
knowledge_id = match.group(2)
print(f"courseId: {course_id}, knowledgeId: {knowledge_id}")
# 创建异步任务,并添加到tasks列表中
tasks.append(subchap_process.process_sub_chapter(course_id, knowledge_id))
else:
print(f"小节 {subsection_title} 的链接没有匹配到courseId和knowledgeId") # 改进错误信息,显示哪个小节出错
print(f"链接是: {section_link}") # 打印出错误的链接,方便调试
if tasks: # 只有当tasks不为空时才执行
await asyncio.gather(*tasks) # 并发执行所有子章节处理任务
else:
print("[x]没有需要处理的章节。")
print("[+]尝试使用越权漏洞提取内容")
await subchap_process.get_data_from_html(html_content)
async def main():
url = "https://mooc1.chaoxing.com/mooc-ans/course/这里是你的课程ID.html"
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie':'自己写',
'Pragma': 'no-cache',
'Referer': 'https://i.chaoxing.com/base?t=173408114514',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua': '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Linux"'
}
res = requests.get(url, headers=headers) # 获取网页内容
if res.status_code == 200: # 检查请求是否成功
html_content = res.text
await process_chapters(html_content)
else:
print(f"请求失败,状态码:{res.status_code}")
if __name__ == "__main__":
asyncio.run(main())