diff --git a/pom.xml b/pom.xml
index d78692e..e420326 100644
--- a/pom.xml
+++ b/pom.xml
@@ -5,7 +5,7 @@
4.0.0
info.llort
torrent-web-parser
- 1.3
+ 1.4
Torrent web parser
diff --git a/src/main/java/info/llort/torrent/Config.java b/src/main/java/info/llort/torrent/Config.java
index d3a88cf..98c3d67 100644
--- a/src/main/java/info/llort/torrent/Config.java
+++ b/src/main/java/info/llort/torrent/Config.java
@@ -1,7 +1,7 @@
package info.llort.torrent;
public abstract class Config {
- public static final String URL_WEB_TO_PARSE = "https://atomixhq.top";
+ public static final String URL_WEB_TO_PARSE = "https://atomixhq.art";
public static final String FIREFOX_DRIVER_PATH = "/home/jllort/git/torrentwebparser/geckodriver";
public static final String FILESYSTEM_DOWNLOAD_PATH = "/home/jllort/Descargas";
public static long FILE_DOWNLOAD_TIMEOUT = 2;
diff --git a/src/main/java/info/llort/torrent/Main.java b/src/main/java/info/llort/torrent/Main.java
index 9055b72..3da71f3 100644
--- a/src/main/java/info/llort/torrent/Main.java
+++ b/src/main/java/info/llort/torrent/Main.java
@@ -1,7 +1,7 @@
package info.llort.torrent;
import info.llort.torrent.util.Console;
-import info.llort.torrent.util.PctmixWebParserV2;
+import info.llort.torrent.util.PctmixWebParserV3;
import net.lightbody.bmp.BrowserMobProxy;
import net.lightbody.bmp.BrowserMobProxyServer;
import net.lightbody.bmp.client.ClientUtil;
@@ -108,7 +108,7 @@ public static void main(String[] args) {
WebDriver driver = new FirefoxDriver(firefoxOptions);
- PctmixWebParserV2.capture(urlWebToParse, geckoDriverPath, filters, downloadTimeOut, driver, proxy);
+ PctmixWebParserV3.capture(urlWebToParse, geckoDriverPath, filters, downloadTimeOut, driver, proxy);
// closing the driver
driver.close();
diff --git a/src/main/java/info/llort/torrent/util/PctmixWebParserV3.java b/src/main/java/info/llort/torrent/util/PctmixWebParserV3.java
new file mode 100644
index 0000000..61ec1f2
--- /dev/null
+++ b/src/main/java/info/llort/torrent/util/PctmixWebParserV3.java
@@ -0,0 +1,156 @@
+package info.llort.torrent.util;
+
+import com.google.common.net.HttpHeaders;
+import info.llort.torrent.bean.PageLinkInfo;
+import net.lightbody.bmp.BrowserMobProxy;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.openqa.selenium.JavascriptExecutor;
+import org.openqa.selenium.WebDriver;
+
+import java.io.IOException;
+import java.time.Duration;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static org.fusesource.jansi.Ansi.Color.*;
+
+public class PctmixWebParserV3 {
+ public static void capture(String urlWebToParse, String geckoDriverPath, List filters, long downloadTimeOut, WebDriver driver, BrowserMobProxy proxy) throws IOException, InterruptedException {
+ Set mainPageLinks = findMainPageLinks(urlWebToParse, geckoDriverPath, filters, driver);
+ for (PageLinkInfo pli : mainPageLinks) {
+ Console.println("Main page link: " + pli.getUrl(), WHITE);
+ }
+
+ Set torrentPageLinks = findPageTorrentLinks(mainPageLinks, driver, proxy);
+ for (PageLinkInfo pli : torrentPageLinks) {
+ Console.println("Torrent page link: " + pli.getUrl(), WHITE);
+ }
+
+ Set downloadTorrentLinks = downloadTorrentLinks(torrentPageLinks, driver, proxy);
+ for (PageLinkInfo pli : downloadTorrentLinks) {
+ Console.println("Download link: " + pli.getUrl(), WHITE);
+ downloadTorrentFile(pli, driver, downloadTimeOut, proxy);
+ }
+ }
+
+ public static Set findMainPageLinks(String url, String geckoDriverPath, List filters, WebDriver driver) throws IOException {
+ // Inspired by https://www.javatpoint.com/selenium-webdriver-running-test-on-firefox-browser-gecko-driver
+ driver.get(url);
+
+ Set links = new HashSet<>();
+ Document doc = Jsoup.parse(driver.getPageSource());
+ Elements elements = doc.select("a[href]");
+ for (Element element : elements) {
+ String value = element.attr("href");
+ if (value.startsWith("https:")) {
+ if ((value.contains("/descargar/peliculas-castellano/") && value.contains("blurayrip")) ||
+ (value.contains("/descargar/peliculas-x264-mkv/") && value.contains("bluray")) ||
+ (value.contains("/descargar/cine-alta-definicion-hd/") && value.contains("bluray")) ||
+ value.contains("/descargar/serie/") ||
+ value.contains("/descargar/serie-en-hd/") ||
+ value.contains("/descargar/serie-4k/")) {
+ for (String filter : filters) {
+ if (value.toLowerCase().contains(filter.toLowerCase())) {
+ // /descargar/ must be replaced by /descargar/torrent/
+ PageLinkInfo pli = new PageLinkInfo();
+ pli.setReferer(value);
+ value = value.replace("/descargar/", "/descargar/torrent/");
+ pli.setUrl(value);
+ links.add(pli);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ return links;
+ }
+
+ public static Set findPageTorrentLinks(Set pageLinks, WebDriver driver, BrowserMobProxy proxy) throws IOException {
+ Set links = new HashSet<>();
+ for (PageLinkInfo pli : pageLinks) {
+ // Setting referer before jump to the page
+ proxy.addHeader(HttpHeaders.REFERER, pli.getReferer());
+ driver.get(pli.getUrl());
+
+ String htmlContent = driver.getPageSource();
+ //System.out.println(htmlContent);
+ String regex = "window.open\\(\"(.*?)\"\\)";
+ Pattern pattern = Pattern.compile(regex);
+ Matcher matcher = pattern.matcher(htmlContent);
+ while (matcher.find()) {
+ String tLink = matcher.group(1);
+ tLink = "https:" + tLink; // add https at the begining
+ PageLinkInfo newPli = new PageLinkInfo();
+ newPli.setUrl(tLink);
+ newPli.setReferer(pli.getUrl());
+ links.add(newPli);
+ }
+ }
+ return links;
+ }
+
+ public static Set downloadTorrentLinks(Set pageLinks, WebDriver driver, BrowserMobProxy proxy) throws IOException {
+ Set links = new HashSet<>();
+ for (PageLinkInfo pli : pageLinks) {
+ // Setting referer before jump to the page
+ proxy.addHeader(HttpHeaders.REFERER, pli.getReferer());
+ driver.get(pli.getUrl());
+
+ // Capture int value
+ String bodySource = driver.getPageSource();
+ String linkRegex = "(parseInt\\(\"(.*)\"\\);)";
+ Pattern pattern = Pattern.compile(linkRegex);
+ Matcher matcher = pattern.matcher(bodySource);
+ if (matcher.find()) {
+ String intValue = matcher.group(2);
+ Console.println("intValue found: " + intValue, WHITE);
+ if (driver instanceof JavascriptExecutor) {
+ // Referer must be atomtt
+ proxy.addHeader(HttpHeaders.REFERER, pli.getUrl()); // atomtt referer !!!
+ // Javascript request to be executed by selenium
+ String js = "var values = {'t':'167632'};\n"; // the variable
+ js += "var xhr = new XMLHttpRequest();\n";
+ js += "xhr.open('POST', 'https://atomtt.com/to.php', false);\n";
+ js += "xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded');\n";
+ js += "xhr.send('t=" + intValue + "');\n";
+ js += "return xhr.response;\n";
+ Console.println("executing javascript: " + js, YELLOW);
+ Object result = ((JavascriptExecutor) driver).executeScript(js);
+ Console.println("Javascript result: " + js, GREEN);
+ String torrentFileLinkValue = "https://atomixhq.art/t_download/" + result + ".torrent";
+ Console.println("torrentFileLinkValue: " + torrentFileLinkValue, GREEN);
+ PageLinkInfo newPli = new PageLinkInfo();
+ newPli.setUrl(torrentFileLinkValue);
+ newPli.setReferer(pli.getUrl());
+ links.add(newPli);
+ }
+
+ } else {
+ Console.println("intValue NOT found for page: " + pli.getUrl(), RED);
+ }
+ }
+ return links;
+ }
+
+ public static void downloadTorrentFile(PageLinkInfo pli, WebDriver driver, long timeOut, BrowserMobProxy proxy) throws InterruptedException {
+ try {
+ // Referer must be atomtt
+ proxy.addHeader(HttpHeaders.REFERER, pli.getReferer());
+ // Set timeout otherwise the driver lock
+ driver.manage().timeouts().scriptTimeout(Duration.ofSeconds(timeOut));
+ driver.manage().timeouts().pageLoadTimeout(Duration.ofSeconds(timeOut));
+ driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(timeOut));
+ driver.navigate().to(pli.getUrl());
+ } catch (Exception e) {
+ // Silent error
+ }
+ }
+}
diff --git a/src/test/java/info/jllort/torrent/ParserCheckIssue27.java b/src/test/java/info/jllort/torrent/ParserCheckIssue27.java
new file mode 100644
index 0000000..b9f95b1
--- /dev/null
+++ b/src/test/java/info/jllort/torrent/ParserCheckIssue27.java
@@ -0,0 +1,108 @@
+package info.jllort.torrent;
+
+import com.google.common.net.HttpHeaders;
+import info.llort.torrent.Config;
+import net.lightbody.bmp.BrowserMobProxy;
+import net.lightbody.bmp.BrowserMobProxyServer;
+import net.lightbody.bmp.client.ClientUtil;
+import net.lightbody.bmp.proxy.CaptureType;
+import org.openqa.selenium.JavascriptExecutor;
+import org.openqa.selenium.Proxy;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.firefox.FirefoxDriver;
+import org.openqa.selenium.firefox.FirefoxOptions;
+import org.openqa.selenium.firefox.FirefoxProfile;
+import org.openqa.selenium.remote.CapabilityType;
+
+import java.net.Inet4Address;
+import java.net.UnknownHostException;
+import java.time.Duration;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class ParserCheckIssue27 {
+ public static void main(String[] args) throws UnknownHostException, InterruptedException {
+ String geckoDriverPath = Config.FIREFOX_DRIVER_PATH;
+ String dstPath = Config.FILESYSTEM_DOWNLOAD_PATH;
+ long timeOut = Config.FILE_DOWNLOAD_TIMEOUT;
+
+ String referer = "https://atomixhq.art/descargar/torrent/peliculas-castellano/un-segundo-one-second--2022-/blurayrip-ac3-5-1/";
+ String link = "https://atomtt.com/t_download/167641/un-segundo--one-second---2022-/";
+
+ // Creating proxy
+ BrowserMobProxy proxy = new BrowserMobProxyServer();
+ proxy.start(8080);
+ Proxy seleniumProxy = ClientUtil.createSeleniumProxy(proxy);
+
+ String hostIp = Inet4Address.getLocalHost().getHostAddress();
+ seleniumProxy.setHttpProxy(hostIp + ":" + proxy.getPort());
+ seleniumProxy.setSslProxy(hostIp + ":" + proxy.getPort());
+ proxy.enableHarCaptureTypes(CaptureType.REQUEST_CONTENT, CaptureType.RESPONSE_CONTENT);
+
+// RequestFilter requestFilter = new RequestFilter() {
+// @Override
+// public HttpResponse filterRequest(HttpRequest httpRequest, HttpMessageContents httpMessageContents, HttpMessageInfo httpMessageInfo) {
+// return null;
+// }
+// };
+//
+// // put our custom header to each request
+// proxy.addRequestFilter((request, contents, messageInfo)-> {
+// request.headers().add(HttpHeaders.REFERER, referer);
+// System.out.println(request.headers().entries().toString());
+// return null;
+// });
+
+ proxy.addHeader(HttpHeaders.REFERER, referer);
+
+ // Creating the driver
+ System.setProperty("webdriver.gecko.driver", geckoDriverPath);
+ // firefox profile to autosave
+ FirefoxOptions firefoxOptions = new FirefoxOptions();
+ FirefoxProfile fxProfile = new FirefoxProfile();
+ fxProfile.setPreference("browser.download.folderList", 2);
+ fxProfile.setPreference("browser.download.dir", dstPath);
+ fxProfile.setPreference("browser.helperApps.neverAsk.saveToDisk","application/octet-stream");
+ fxProfile.setPreference("pdfjs.enabledCache.state",false);
+ firefoxOptions.setProfile(fxProfile);
+ // Setting the proxy
+ firefoxOptions.setCapability(CapabilityType.PROXY, seleniumProxy);
+ firefoxOptions.setCapability(CapabilityType.ACCEPT_SSL_CERTS, true);
+
+ WebDriver driver = new FirefoxDriver(firefoxOptions);
+
+ // Page with link
+ driver.get(link);
+ String bodySource = driver.getPageSource();
+ String linkRegex = "(parseInt\\(\"(.*)\"\\);)";
+ Pattern pattern = Pattern.compile(linkRegex);
+ Matcher matcher = pattern.matcher(bodySource);
+ if (matcher.find()) {
+ String intValue = matcher.group(2);
+ System.out.println("intValue found: " + intValue);
+ if (driver instanceof JavascriptExecutor) {
+ // Referer must be atomtt
+ proxy.addHeader(HttpHeaders.REFERER, "https://atomtt.com/t_download/167632/sharkwater--the-requin---2022-/");
+ // Javascript request to be executed by selenium
+ String js = "var values = {'t':'167632'};\n"; // the variable
+ js += "var xhr = new XMLHttpRequest();\n";
+ js += "xhr.open('POST', 'https://atomtt.com/to.php', false);\n";
+ js += "xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded');\n";
+ js += "xhr.send('t=" + intValue + "');\n";
+ js += "return xhr.response;\n";
+ System.out.println(js);
+ Object result = ((JavascriptExecutor) driver).executeScript(js);
+ System.out.println("Result value of cal to top: " + result);
+// result = ((JavascriptExecutor) driver).executeScript("alert('Welcome to Guru99');");
+ String torrentFileLinkValue = "https://atomixhq.art/t_download/" + result + ".torrent";
+ System.out.println(torrentFileLinkValue);
+ driver.manage().timeouts().scriptTimeout(Duration.ofSeconds(timeOut));
+ driver.manage().timeouts().pageLoadTimeout(Duration.ofSeconds(timeOut));
+ driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(timeOut));
+ // Other referer
+ proxy.addHeader(HttpHeaders.REFERER, link); // Referer is the page what contains the torrent link //atomtt etc...
+ driver.navigate().to(torrentFileLinkValue);
+ }
+ }
+ }
+}
diff --git a/src/test/java/info/jllort/torrent/RegexTest.java b/src/test/java/info/jllort/torrent/RegexTest.java
new file mode 100644
index 0000000..1b08e1a
--- /dev/null
+++ b/src/test/java/info/jllort/torrent/RegexTest.java
@@ -0,0 +1,35 @@
+package info.jllort.torrent;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class RegexTest {
+ private static final String bodySample = "function openTorrent(u)\n" +
+ "{\n" +
+ "\n" +
+ "\t \n" +
+ "\n" +
+ "\tvar link = \"https://www.linkonclick.com/jump/next.php?r=5302219\";\n" +
+ "\n" +
+ "\twindow.open(link);\n" +
+ "\n" +
+ "\t\n" +
+ "\twindow.location.href = u;\t\n" +
+ "\n" +
+ "}\t\n" +
+ "\n" +
+ "\n" +
+ "var tid = parseInt(\"167641\");\n" +
+ "\n" +
+ "var btn = document.getElementById(\"btntor\");\n" +
+ "btn.addEventListener(\"click\", function() ";
+
+ public static void main(String[] args) {
+ String linkRegex = "([\"]https://www.linkonclick.com/jump/next.*[\"])";
+ Pattern pattern = Pattern.compile(linkRegex);
+ Matcher matcher = pattern.matcher(bodySample);
+ if (matcher.find()) {
+ System.out.println(matcher.group().replaceAll("\"",""));
+ }
+ }
+}
diff --git a/src/test/java/info/jllort/torrent/RegexTest2.java b/src/test/java/info/jllort/torrent/RegexTest2.java
new file mode 100644
index 0000000..fc55571
--- /dev/null
+++ b/src/test/java/info/jllort/torrent/RegexTest2.java
@@ -0,0 +1,25 @@
+package info.jllort.torrent;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class RegexTest2 {
+ private static final String bodySample = "function openTorrent(u)\n" +
+ "}\t\n" +
+ "\n" +
+ "\n" +
+ "var tid = parseInt(\"167632\");\n" +
+ "\n" +
+ "var btn = document.getElementById(\"btntor\");\n" +
+ "btn.addEventListener(\"click\", function() ";
+
+ public static void main(String[] args) {
+ String linkRegex = "(parseInt\\(\"(.*)\"\\);)";
+ Pattern pattern = Pattern.compile(linkRegex);
+ Matcher matcher = pattern.matcher(bodySample);
+ if (matcher.find()) {
+ String value = matcher.group(2);
+ System.out.println(value);
+ }
+ }
+}