Skip to content

Commit

Permalink
fix #13
Browse files Browse the repository at this point in the history
  • Loading branch information
[email protected] authored and [email protected] committed Oct 18, 2020
1 parent 24a0d78 commit a83a619
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 47 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
openFull用于获取预览起始页,getNextPage用于获取后面的页。通过这两个函数就可以获取到一个文档的全部预览图片的地址。

# 4. Bug Fix
* todo
* 2020/10/18

ppt的下载
book118的下载逻辑变更,当前已修复支持;除核心解析、下载功能,其余功能由其他小伙伴提供,我没有测试,所以不保证能用。

* 2019/03/13

Expand Down
16 changes: 16 additions & 0 deletions http/request.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
### 情况一
### 文档详情页
GET https://max.book118.com/html/2017/0504/104201745.shtm

### 预览页
GET https://max.book118.com/index.php?g=Home&m=NewView&a=index&aid=5032121100002141

### 预览图片地址 每次最多6页
GET https://openapi.book118.com/getPreview.html?project_id=1&aid=246251736&view_token=zRvl7wn2IwANNnujrBTm6lAionOFBWTy&aid_encode=5032121100002141&page=7

### 情况二 PPT 使用原来的方式
### 预览页
GET https://max.book118.com/index.php?g=Home&m=NewView&a=index&aid=8067013053002102

### 预览图片
GET https://openapi.book118.com/getPreview.html?project_id=1&aid=266435511&view_token=x4b_euYB7s52V7qX4qUngljg22LDMglS&aid_encode=8067013053002102&page=1
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>me.rainking</groupId>
<artifactId>book118Downloader</artifactId>
<version>V2019</version>
<version>V2020</version>
<packaging>jar</packaging>

<properties>
Expand Down
15 changes: 6 additions & 9 deletions src/main/java/me/rainking/BookDownloader.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public class BookDownloader {
public static String getKey(String sTitle, String sErrorMessage, Function<String, Boolean> op, Scanner pSc) {

String sLine = "";
Boolean nFlag = true;
boolean nFlag = true;

// 输出提示信息
System.out.print(sTitle);
Expand All @@ -48,7 +48,7 @@ public static void main(String[] args) {
DocumentBrowser browser = new DocumentBrowser();
Scanner pSc = new Scanner(System.in, "UTF8");

System.out.println("Ver.20190629 latest: https://github.com/wxynihao/book118-downloader");
System.out.println("Ver.20201018 latest: https://github.com/wxynihao/book118-downloader");
List<String> pDocumentIDList = browser.readTaskList();

// 判断是否需要执行预设任务/遗留任务
Expand All @@ -59,7 +59,7 @@ public static void main(String[] args) {
if (sKey.toUpperCase().equals("N")) { pDocumentIDList.clear(); }
}

Boolean nFlag = true;
boolean nFlag = true;
while (nFlag) {
// 执行任务
if (pDocumentIDList.size() > 0) {
Expand All @@ -74,14 +74,11 @@ public static void main(String[] args) {
List<String> pLists = new ArrayList<>(Arrays.asList(new String[pDocumentIDList.size()]));
Collections.copy(pLists, pDocumentIDList);

System.out.println(String.format("下载文档: %s", sDocumentID));
System.out.printf("下载文档: %s%n", sDocumentID);
try {
browser.downloadWholeDocument(sDocumentID);
System.out.println("生成" + sDocumentID + "完成, 请到out文件夹查看。\n");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (DocumentException e) {
} catch (IOException | DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Expand Down Expand Up @@ -118,7 +115,7 @@ public static void main(String[] args) {
sCmd = pMatcher.group(1);
if (sCmd == null) { sCmd = pMatcher.group(2); }
pDocumentIDList.add(sCmd);
System.out.println(String.format("下载任务 %s 已填加", sCmd));
System.out.printf("下载任务 %s 已填加%n", sCmd);
}
}
// 写入任务列表
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/me/rainking/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class Constants {

static final String OPEN_FULL_URL = "https://max.book118.com/index.php?g=Home&m=View&a=viewUrl&flag=1&cid=";

static final String PREVIEW_URL = "https://max.book118.com/index.php?g=Home&m=NewView&a=index&aid=";

static final String PIC_LINK_URL = "https://openapi.book118.com/getPreview.html?project_id={}&aid={}&view_token={}&aid_encode={}&page=";

static final String FILE_NOT_EXIST = "文件不存在";

static final List<String> TAG_OF_END = CollUtil.newArrayList("!", "Over", "Error", "Response", "ReadLimit");
Expand Down
134 changes: 99 additions & 35 deletions src/main/java/me/rainking/DocumentBrowser.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import cn.hutool.core.io.file.FileReader;
import cn.hutool.core.io.file.FileWriter;
import cn.hutool.json.JSONObject;
import com.itextpdf.text.DocumentException;

import cn.hutool.core.bean.BeanUtil;
Expand Down Expand Up @@ -61,7 +62,7 @@ private int readDownloadedPage(String sDocumentId) {
if (FileUtil.exist(filePath)) {
FileReader fileReader = new FileReader(filePath);
String sPage = fileReader.readString();
nPage = Integer.valueOf(sPage);
nPage = Integer.parseInt(sPage);
}
return nPage;
}
Expand All @@ -83,7 +84,7 @@ private String moveToNextPage(PdfInfo pInfo) {
}

/**
* 下载文档的全部图片
* 下载文档的全部图片
*
* @param documentId 文档编号
* @throws IOException pdf创建错误
Expand All @@ -94,50 +95,107 @@ void downloadWholeDocument(String documentId) throws IOException, DocumentExcept
FileUtil.mkdir(new File(srcPath));
FileUtil.mkdir(new File(DES_PATH));

int page = 1, nDownloadedPage;
// 断点下载
nDownloadedPage = readDownloadedPage(documentId);
if (nDownloadedPage != 1) {
System.out.println(String.format("下载继续,当前已完成 %d 页", nDownloadedPage));
nDownloadedPage ++;
}
StaticLog.info("\n开始解析...");
String url = getPreviewData(documentId);
Map<String, String> pageAndUrl = getPicUrl(url);
StaticLog.info("\n解析完成,共{}页", pageAndUrl.size());

StringBuilder currentDownPage = new StringBuilder();
PdfInfo pdfInfo = getPdfInfo(documentId);
String imgUrl;
StaticLog.info("\n开始下载...");
while (pdfInfo != null) {
String nextPage = moveToNextPage(pdfInfo);
if (!Constants.TAG_OF_END.contains(nextPage)) {
//跳过已下载的文件
if (page < nDownloadedPage) {
System.out.print(String.format("\r当前页码: [%d] 已跳过", page));
page ++; continue;
}
imgUrl = (pdfInfo.getHost() + Constants.IMG_PREFIX_URL + nextPage);
downloadFile(imgUrl, srcPath + "/" + autoGenericCode(page, Constants.MAX_BIT_OF_PAGE) + ".gif");
currentDownPage.append("\r").append(String.format("已下载页数:[%d] 页", page));
System.out.print(currentDownPage);
// 保存当前下载完成页码
writeDownloadedPage(documentId, page);
page++;
} else {
break;
}
int i = 0;
for (Map.Entry<String, String> entry : pageAndUrl.entrySet()) {
downloadFile("http:" + entry.getValue(), srcPath + "/" + autoGenericCode(Integer.parseInt(entry.getKey())) + ".gif");
currentDownPage.append("\r").append(String.format("已下载页数:[%s] 页", ++i));
System.out.print(currentDownPage);
}

StaticLog.info("\n开始生成...");
PdfGenerator.creatPDF(srcPath, DES_PATH + "/" + documentId + ".pdf", "gif");
FileUtil.del(new File(srcPath));
StaticLog.info("\n生成完成");
}

/**
* 将数字字符串的左边补充0,使其长度达到指定长度
*
* @param number 需要处理的数字
* @param width 补充后字符串长度
* @return 通过填充0达到长度的数字字符串
*/
private String autoGenericCode(int number, int width) {
return String.format("%0" + width + "d", number);
private String autoGenericCode(int number) {
return String.format("%0" + Constants.MAX_BIT_OF_PAGE + "d", number);
}

private Map<String, String> getPicUrl(String baseUrl) {
Map<String, String> pageNumAndUrl = new HashMap<>();
// 第一次获取,解析总页数
int page = 1;
int step = 6;
String firstGet = HttpUtil.get(baseUrl + page);
JSONObject data = getJson(firstGet, "data");
data.forEach((k, v) -> pageNumAndUrl.put(k, v.toString()));
JSONObject pages = getJson(firstGet, "pages");
int limit = getPreviewLimit(pages);
StaticLog.info("\n共需解析{}页", limit);
if (limit > step) {
for (int i = page + step; i < limit; ) {
StaticLog.info("\n解析至第{}页", i);
// 必须休眠,否则获取不到结果
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
String getContent = HttpUtil.get(baseUrl + i);
JSONObject dataContent = getJson(getContent, "data");
// 未获取到则进行重试,直至获取到
if (!"".equals(dataContent.getStr(String.valueOf(i)))) {
i += step;
dataContent.forEach((k, v) -> pageNumAndUrl.put(k, v.toString()));
}
}
}
return pageNumAndUrl;
}

// "pages": {"preview": "50", "actual": "796", "filetype": "pdf"}
private int getPreviewLimit(JSONObject pages) {
String limitStr = pages.getStr("preview");
return Integer.parseInt(limitStr);
}

private JSONObject getJson(String all, String key) {
int pos = all.indexOf(key);
int start = all.indexOf("{", pos + 1);
int end = all.indexOf("}", start + 1);
return JSONUtil.parseObj(all.substring(start, end + 1));
}

private String getPreviewData(String documentId) {
String url = Constants.PREVIEW_URL + documentId;
String previewDataFull = HttpUtil.get(url);
int pos = previewDataFull.indexOf("PREVIEW_DATA");
Integer pId = parseProjectId(previewDataFull, pos);
String aid = parseStrInPreviewData(previewDataFull, "aid", pos);
String viewToken = parseStrInPreviewData(previewDataFull, "view_token", pos);
String aidEncode = parseStrInPreviewData(previewDataFull, "aid_encode", pos);
return StrUtil.format(Constants.PIC_LINK_URL, pId, aid, viewToken, aidEncode);
}

private int parseProjectId(final String previewDataFull, int pos) {
int keyPos = previewDataFull.indexOf("project_id", pos);
int start = previewDataFull.indexOf(":", keyPos + 1);
int end = previewDataFull.indexOf(",", start + 1);
return Integer.parseInt(previewDataFull.substring(start + 1, end).trim());
}

private String parseStrInPreviewData(final String previewDataFull, String key, int pos) {
int keyPos = previewDataFull.indexOf(key, pos);
if (keyPos == -1) {
return null;
}
int start = previewDataFull.indexOf("'", keyPos + 1);
int end = previewDataFull.indexOf("'", start + 1);
return previewDataFull.substring(start + 1, end);
}

/**
Expand All @@ -162,9 +220,9 @@ private PdfInfo getPdfInfo(String documentId) {
String redirectPage = HttpUtil.get(pdfPageUrlStr);
String href = ReUtil.get(Constants.HREF_PATTERN, redirectPage, 1);
String fullUrl;
if(href != null){
fullUrl = viewHost.substring(0, viewHost.length()-1) + HtmlUtil.unescape(href);
}else {
if (href != null) {
fullUrl = viewHost.substring(0, viewHost.length() - 1) + HtmlUtil.unescape(href);
} else {
fullUrl = pdfPageUrlStr;
}

Expand Down Expand Up @@ -215,4 +273,10 @@ private void downloadFile(String url, String localPath) {
StaticLog.error(e.getMessage());
}
}

public static void main(String[] args) {
DocumentBrowser documentBrowser = new DocumentBrowser();
String url = documentBrowser.getPreviewData("5032121100002141");
System.out.println(documentBrowser.getPicUrl(url));
}
}

0 comments on commit a83a619

Please sign in to comment.