fix #13

wxynihao · Oct 18, 2020 · a83a619 · a83a619
1 parent 24a0d78
commit a83a619
Show file tree

Hide file tree

Showing 6 changed files with 128 additions and 47 deletions.
diff --git a/README.md b/README.md
@@ -32,9 +32,9 @@
 openFull用于获取预览起始页，getNextPage用于获取后面的页。通过这两个函数就可以获取到一个文档的全部预览图片的地址。
 
 # 4. Bug Fix
-* todo
+* 2020/10/18
 
-ppt的下载
+book118的下载逻辑变更，当前已修复支持；除核心解析、下载功能，其余功能由其他小伙伴提供，我没有测试，所以不保证能用。
 
 * 2019/03/13
 

diff --git a/http/request.http b/http/request.http
@@ -0,0 +1,16 @@
+### 情况一
+### 文档详情页
+GET https://max.book118.com/html/2017/0504/104201745.shtm
+
+### 预览页
+GET https://max.book118.com/index.php?g=Home&m=NewView&a=index&aid=5032121100002141
+
+### 预览图片地址 每次最多6页
+GET https://openapi.book118.com/getPreview.html?project_id=1&aid=246251736&view_token=zRvl7wn2IwANNnujrBTm6lAionOFBWTy&aid_encode=5032121100002141&page=7
+
+### 情况二 PPT 使用原来的方式
+### 预览页
+GET https://max.book118.com/index.php?g=Home&m=NewView&a=index&aid=8067013053002102
+
+### 预览图片
+GET https://openapi.book118.com/getPreview.html?project_id=1&aid=266435511&view_token=x4b_euYB7s52V7qX4qUngljg22LDMglS&aid_encode=8067013053002102&page=1
diff --git a/pom.xml b/pom.xml
@@ -6,7 +6,7 @@
 
   <groupId>me.rainking</groupId>
   <artifactId>book118Downloader</artifactId>
-  <version>V2019</version>
+  <version>V2020</version>
   <packaging>jar</packaging>
 
   <properties>

diff --git a/src/main/java/me/rainking/BookDownloader.java b/src/main/java/me/rainking/BookDownloader.java
@@ -26,7 +26,7 @@ public class BookDownloader {
     public static String getKey(String sTitle, String sErrorMessage, Function<String, Boolean> op, Scanner pSc) {
 
         String sLine = "";
-        Boolean nFlag = true;
+        boolean nFlag = true;
 
         // 输出提示信息
         System.out.print(sTitle);
@@ -48,7 +48,7 @@ public static void main(String[] args) {
         DocumentBrowser browser = new DocumentBrowser();
         Scanner pSc = new Scanner(System.in, "UTF8");
 
-        System.out.println("Ver.20190629 latest: https://github.com/wxynihao/book118-downloader");
+        System.out.println("Ver.20201018 latest: https://github.com/wxynihao/book118-downloader");
         List<String> pDocumentIDList = browser.readTaskList();
 
         // 判断是否需要执行预设任务/遗留任务
@@ -59,7 +59,7 @@ public static void main(String[] args) {
             if (sKey.toUpperCase().equals("N")) { pDocumentIDList.clear(); }
         }
 
-        Boolean nFlag = true;
+        boolean nFlag = true;
         while (nFlag) {
             // 执行任务
             if (pDocumentIDList.size() > 0) {
@@ -74,14 +74,11 @@ public static void main(String[] args) {
                     List<String> pLists = new ArrayList<>(Arrays.asList(new String[pDocumentIDList.size()]));
                     Collections.copy(pLists, pDocumentIDList);
 
-                    System.out.println(String.format("下载文档： %s", sDocumentID));
+                    System.out.printf("下载文档： %s%n", sDocumentID);
                     try {
                         browser.downloadWholeDocument(sDocumentID);
                         System.out.println("生成" + sDocumentID + "完成, 请到out文件夹查看。\n");
-                    } catch (IOException e) {
-                        // TODO Auto-generated catch block
-                        e.printStackTrace();
-                    } catch (DocumentException e) {
+                    } catch (IOException | DocumentException e) {
                         // TODO Auto-generated catch block
                         e.printStackTrace();
                     }
@@ -118,7 +115,7 @@ public static void main(String[] args) {
                     sCmd = pMatcher.group(1);
                     if (sCmd == null) { sCmd = pMatcher.group(2); }
                     pDocumentIDList.add(sCmd);
-                    System.out.println(String.format("下载任务 %s 已填加", sCmd));
+                    System.out.printf("下载任务 %s 已填加%n", sCmd);
                 }
             }
             // 写入任务列表

diff --git a/src/main/java/me/rainking/Constants.java b/src/main/java/me/rainking/Constants.java
@@ -20,6 +20,10 @@ class Constants {
 
     static final String OPEN_FULL_URL = "https://max.book118.com/index.php?g=Home&m=View&a=viewUrl&flag=1&cid=";
 
+    static final String PREVIEW_URL = "https://max.book118.com/index.php?g=Home&m=NewView&a=index&aid=";
+
+    static final String PIC_LINK_URL = "https://openapi.book118.com/getPreview.html?project_id={}&aid={}&view_token={}&aid_encode={}&page=";
+
     static final String FILE_NOT_EXIST = "文件不存在";
 
     static final List<String> TAG_OF_END = CollUtil.newArrayList("!", "Over", "Error", "Response", "ReadLimit");

diff --git a/src/main/java/me/rainking/DocumentBrowser.java b/src/main/java/me/rainking/DocumentBrowser.java
@@ -13,6 +13,7 @@
 
 import cn.hutool.core.io.file.FileReader;
 import cn.hutool.core.io.file.FileWriter;
+import cn.hutool.json.JSONObject;
 import com.itextpdf.text.DocumentException;
 
 import cn.hutool.core.bean.BeanUtil;
@@ -61,7 +62,7 @@ private int readDownloadedPage(String sDocumentId) {
         if (FileUtil.exist(filePath)) {
             FileReader fileReader = new FileReader(filePath);
             String sPage = fileReader.readString();
-            nPage = Integer.valueOf(sPage);
+            nPage = Integer.parseInt(sPage);
         }
         return nPage;
     }
@@ -83,7 +84,7 @@ private String moveToNextPage(PdfInfo pInfo) {
     }
 
     /**
-     *  下载文档的全部图片
+     * 下载文档的全部图片
      *
      * @param documentId 文档编号
      * @throws IOException       pdf创建错误
@@ -94,50 +95,107 @@ void downloadWholeDocument(String documentId) throws IOException, DocumentExcept
         FileUtil.mkdir(new File(srcPath));
         FileUtil.mkdir(new File(DES_PATH));
 
-        int page = 1, nDownloadedPage;
-        // 断点下载
-        nDownloadedPage = readDownloadedPage(documentId);
-        if (nDownloadedPage != 1) {
-            System.out.println(String.format("下载继续，当前已完成 %d 页", nDownloadedPage));
-            nDownloadedPage ++;
-        }
+        StaticLog.info("\n开始解析...");
+        String url = getPreviewData(documentId);
+        Map<String, String> pageAndUrl = getPicUrl(url);
+        StaticLog.info("\n解析完成，共{}页", pageAndUrl.size());
+
         StringBuilder currentDownPage = new StringBuilder();
-        PdfInfo pdfInfo = getPdfInfo(documentId);
-        String imgUrl;
         StaticLog.info("\n开始下载...");
-        while (pdfInfo != null) {
-            String nextPage = moveToNextPage(pdfInfo);
-            if (!Constants.TAG_OF_END.contains(nextPage)) {
-                //跳过已下载的文件
-                if (page < nDownloadedPage) {
-                    System.out.print(String.format("\r当前页码: [%d]  已跳过", page));
-                    page ++; continue;
-                }
-                imgUrl = (pdfInfo.getHost() + Constants.IMG_PREFIX_URL + nextPage);
-                downloadFile(imgUrl, srcPath + "/" + autoGenericCode(page, Constants.MAX_BIT_OF_PAGE) + ".gif");
-                currentDownPage.append("\r").append(String.format("已下载页数：[%d] 页", page));
-                System.out.print(currentDownPage);
-                // 保存当前下载完成页码
-                writeDownloadedPage(documentId, page);
-                page++;
-            } else {
-                break;
-            }
+        int i = 0;
+        for (Map.Entry<String, String> entry : pageAndUrl.entrySet()) {
+            downloadFile("http:" + entry.getValue(), srcPath + "/" + autoGenericCode(Integer.parseInt(entry.getKey())) + ".gif");
+            currentDownPage.append("\r").append(String.format("已下载页数：[%s] 页", ++i));
+            System.out.print(currentDownPage);
         }
+
         StaticLog.info("\n开始生成...");
         PdfGenerator.creatPDF(srcPath, DES_PATH + "/" + documentId + ".pdf", "gif");
         FileUtil.del(new File(srcPath));
+        StaticLog.info("\n生成完成");
     }
 
     /**
      * 将数字字符串的左边补充0，使其长度达到指定长度
      *
      * @param number 需要处理的数字
-     * @param width  补充后字符串长度
      * @return 通过填充0达到长度的数字字符串
      */
-    private String autoGenericCode(int number, int width) {
-        return String.format("%0" + width + "d", number);
+    private String autoGenericCode(int number) {
+        return String.format("%0" + Constants.MAX_BIT_OF_PAGE + "d", number);
+    }
+
+    private Map<String, String> getPicUrl(String baseUrl) {
+        Map<String, String> pageNumAndUrl = new HashMap<>();
+        // 第一次获取，解析总页数
+        int page = 1;
+        int step = 6;
+        String firstGet = HttpUtil.get(baseUrl + page);
+        JSONObject data = getJson(firstGet, "data");
+        data.forEach((k, v) -> pageNumAndUrl.put(k, v.toString()));
+        JSONObject pages = getJson(firstGet, "pages");
+        int limit = getPreviewLimit(pages);
+        StaticLog.info("\n共需解析{}页", limit);
+        if (limit > step) {
+            for (int i = page + step; i < limit; ) {
+                StaticLog.info("\n解析至第{}页", i);
+                // 必须休眠，否则获取不到结果
+                try {
+                    Thread.sleep(1000);
+                } catch (InterruptedException e) {
+                    e.printStackTrace();
+                }
+                String getContent = HttpUtil.get(baseUrl + i);
+                JSONObject dataContent = getJson(getContent, "data");
+                // 未获取到则进行重试，直至获取到
+                if (!"".equals(dataContent.getStr(String.valueOf(i)))) {
+                    i += step;
+                    dataContent.forEach((k, v) -> pageNumAndUrl.put(k, v.toString()));
+                }
+            }
+        }
+        return pageNumAndUrl;
+    }
+
+    //  "pages": {"preview": "50", "actual": "796", "filetype": "pdf"}
+    private int getPreviewLimit(JSONObject pages) {
+        String limitStr = pages.getStr("preview");
+        return Integer.parseInt(limitStr);
+    }
+
+    private JSONObject getJson(String all, String key) {
+        int pos = all.indexOf(key);
+        int start = all.indexOf("{", pos + 1);
+        int end = all.indexOf("}", start + 1);
+        return JSONUtil.parseObj(all.substring(start, end + 1));
+    }
+
+    private String getPreviewData(String documentId) {
+        String url = Constants.PREVIEW_URL + documentId;
+        String previewDataFull = HttpUtil.get(url);
+        int pos = previewDataFull.indexOf("PREVIEW_DATA");
+        Integer pId = parseProjectId(previewDataFull, pos);
+        String aid = parseStrInPreviewData(previewDataFull, "aid", pos);
+        String viewToken = parseStrInPreviewData(previewDataFull, "view_token", pos);
+        String aidEncode = parseStrInPreviewData(previewDataFull, "aid_encode", pos);
+        return StrUtil.format(Constants.PIC_LINK_URL, pId, aid, viewToken, aidEncode);
+    }
+
+    private int parseProjectId(final String previewDataFull, int pos) {
+        int keyPos = previewDataFull.indexOf("project_id", pos);
+        int start = previewDataFull.indexOf(":", keyPos + 1);
+        int end = previewDataFull.indexOf(",", start + 1);
+        return Integer.parseInt(previewDataFull.substring(start + 1, end).trim());
+    }
+
+    private String parseStrInPreviewData(final String previewDataFull, String key, int pos) {
+        int keyPos = previewDataFull.indexOf(key, pos);
+        if (keyPos == -1) {
+            return null;
+        }
+        int start = previewDataFull.indexOf("'", keyPos + 1);
+        int end = previewDataFull.indexOf("'", start + 1);
+        return previewDataFull.substring(start + 1, end);
     }
 
     /**
@@ -162,9 +220,9 @@ private PdfInfo getPdfInfo(String documentId) {
         String redirectPage = HttpUtil.get(pdfPageUrlStr);
         String href = ReUtil.get(Constants.HREF_PATTERN, redirectPage, 1);
         String fullUrl;
-        if(href != null){
-            fullUrl = viewHost.substring(0, viewHost.length()-1) + HtmlUtil.unescape(href);
-        }else {
+        if (href != null) {
+            fullUrl = viewHost.substring(0, viewHost.length() - 1) + HtmlUtil.unescape(href);
+        } else {
             fullUrl = pdfPageUrlStr;
         }
 
@@ -215,4 +273,10 @@ private void downloadFile(String url, String localPath) {
             StaticLog.error(e.getMessage());
         }
     }
+
+    public static void main(String[] args) {
+        DocumentBrowser documentBrowser = new DocumentBrowser();
+        String url = documentBrowser.getPreviewData("5032121100002141");
+        System.out.println(documentBrowser.getPicUrl(url));
+    }
 }