Skip to content

Commit

Permalink
Merge pull request #28 from darkman97i/issue/27
Browse files Browse the repository at this point in the history
Changes in the link of the torrent
  • Loading branch information
darkman97i committed Feb 5, 2022
2 parents 0f9a730 + ef24303 commit 132f388
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>info.llort</groupId>
<artifactId>torrent-web-parser</artifactId>
<version>1.3</version>
<version>1.4</version>
<name>Torrent web parser</name>

<developers>
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/info/llort/torrent/Config.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package info.llort.torrent;

public abstract class Config {
public static final String URL_WEB_TO_PARSE = "https://atomixhq.top";
public static final String URL_WEB_TO_PARSE = "https://atomixhq.art";
public static final String FIREFOX_DRIVER_PATH = "/home/jllort/git/torrentwebparser/geckodriver";
public static final String FILESYSTEM_DOWNLOAD_PATH = "/home/jllort/Descargas";
public static long FILE_DOWNLOAD_TIMEOUT = 2;
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/info/llort/torrent/Main.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package info.llort.torrent;

import info.llort.torrent.util.Console;
import info.llort.torrent.util.PctmixWebParserV2;
import info.llort.torrent.util.PctmixWebParserV3;
import net.lightbody.bmp.BrowserMobProxy;
import net.lightbody.bmp.BrowserMobProxyServer;
import net.lightbody.bmp.client.ClientUtil;
Expand Down Expand Up @@ -108,7 +108,7 @@ public static void main(String[] args) {

WebDriver driver = new FirefoxDriver(firefoxOptions);

PctmixWebParserV2.capture(urlWebToParse, geckoDriverPath, filters, downloadTimeOut, driver, proxy);
PctmixWebParserV3.capture(urlWebToParse, geckoDriverPath, filters, downloadTimeOut, driver, proxy);

// closing the driver
driver.close();
Expand Down
156 changes: 156 additions & 0 deletions src/main/java/info/llort/torrent/util/PctmixWebParserV3.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package info.llort.torrent.util;

import com.google.common.net.HttpHeaders;
import info.llort.torrent.bean.PageLinkInfo;
import net.lightbody.bmp.BrowserMobProxy;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;

import java.io.IOException;
import java.time.Duration;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.fusesource.jansi.Ansi.Color.*;

public class PctmixWebParserV3 {
public static void capture(String urlWebToParse, String geckoDriverPath, List<String> filters, long downloadTimeOut, WebDriver driver, BrowserMobProxy proxy) throws IOException, InterruptedException {
Set<PageLinkInfo> mainPageLinks = findMainPageLinks(urlWebToParse, geckoDriverPath, filters, driver);
for (PageLinkInfo pli : mainPageLinks) {
Console.println("Main page link: " + pli.getUrl(), WHITE);
}

Set<PageLinkInfo> torrentPageLinks = findPageTorrentLinks(mainPageLinks, driver, proxy);
for (PageLinkInfo pli : torrentPageLinks) {
Console.println("Torrent page link: " + pli.getUrl(), WHITE);
}

Set<PageLinkInfo> downloadTorrentLinks = downloadTorrentLinks(torrentPageLinks, driver, proxy);
for (PageLinkInfo pli : downloadTorrentLinks) {
Console.println("Download link: " + pli.getUrl(), WHITE);
downloadTorrentFile(pli, driver, downloadTimeOut, proxy);
}
}

public static Set<PageLinkInfo> findMainPageLinks(String url, String geckoDriverPath, List<String> filters, WebDriver driver) throws IOException {
// Inspired by https://www.javatpoint.com/selenium-webdriver-running-test-on-firefox-browser-gecko-driver
driver.get(url);

Set<PageLinkInfo> links = new HashSet<>();
Document doc = Jsoup.parse(driver.getPageSource());
Elements elements = doc.select("a[href]");
for (Element element : elements) {
String value = element.attr("href");
if (value.startsWith("https:")) {
if ((value.contains("/descargar/peliculas-castellano/") && value.contains("blurayrip")) ||
(value.contains("/descargar/peliculas-x264-mkv/") && value.contains("bluray")) ||
(value.contains("/descargar/cine-alta-definicion-hd/") && value.contains("bluray")) ||
value.contains("/descargar/serie/") ||
value.contains("/descargar/serie-en-hd/") ||
value.contains("/descargar/serie-4k/")) {
for (String filter : filters) {
if (value.toLowerCase().contains(filter.toLowerCase())) {
// /descargar/ must be replaced by /descargar/torrent/
PageLinkInfo pli = new PageLinkInfo();
pli.setReferer(value);
value = value.replace("/descargar/", "/descargar/torrent/");
pli.setUrl(value);
links.add(pli);
break;
}
}
}
}
}

return links;
}

public static Set<PageLinkInfo> findPageTorrentLinks(Set<PageLinkInfo> pageLinks, WebDriver driver, BrowserMobProxy proxy) throws IOException {
Set<PageLinkInfo> links = new HashSet<>();
for (PageLinkInfo pli : pageLinks) {
// Setting referer before jump to the page
proxy.addHeader(HttpHeaders.REFERER, pli.getReferer());
driver.get(pli.getUrl());

String htmlContent = driver.getPageSource();
//System.out.println(htmlContent);
String regex = "window.open\\(\"(.*?)\"\\)";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
String tLink = matcher.group(1);
tLink = "https:" + tLink; // add https at the begining
PageLinkInfo newPli = new PageLinkInfo();
newPli.setUrl(tLink);
newPli.setReferer(pli.getUrl());
links.add(newPli);
}
}
return links;
}

public static Set<PageLinkInfo> downloadTorrentLinks(Set<PageLinkInfo> pageLinks, WebDriver driver, BrowserMobProxy proxy) throws IOException {
Set<PageLinkInfo> links = new HashSet<>();
for (PageLinkInfo pli : pageLinks) {
// Setting referer before jump to the page
proxy.addHeader(HttpHeaders.REFERER, pli.getReferer());
driver.get(pli.getUrl());

// Capture int value
String bodySource = driver.getPageSource();
String linkRegex = "(parseInt\\(\"(.*)\"\\);)";
Pattern pattern = Pattern.compile(linkRegex);
Matcher matcher = pattern.matcher(bodySource);
if (matcher.find()) {
String intValue = matcher.group(2);
Console.println("intValue found: " + intValue, WHITE);
if (driver instanceof JavascriptExecutor) {
// Referer must be atomtt
proxy.addHeader(HttpHeaders.REFERER, pli.getUrl()); // atomtt referer !!!
// Javascript request to be executed by selenium
String js = "var values = {'t':'167632'};\n"; // the variable
js += "var xhr = new XMLHttpRequest();\n";
js += "xhr.open('POST', 'https://atomtt.com/to.php', false);\n";
js += "xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded');\n";
js += "xhr.send('t=" + intValue + "');\n";
js += "return xhr.response;\n";
Console.println("executing javascript: " + js, YELLOW);
Object result = ((JavascriptExecutor) driver).executeScript(js);
Console.println("Javascript result: " + js, GREEN);
String torrentFileLinkValue = "https://atomixhq.art/t_download/" + result + ".torrent";
Console.println("torrentFileLinkValue: " + torrentFileLinkValue, GREEN);
PageLinkInfo newPli = new PageLinkInfo();
newPli.setUrl(torrentFileLinkValue);
newPli.setReferer(pli.getUrl());
links.add(newPli);
}

} else {
Console.println("intValue NOT found for page: " + pli.getUrl(), RED);
}
}
return links;
}

public static void downloadTorrentFile(PageLinkInfo pli, WebDriver driver, long timeOut, BrowserMobProxy proxy) throws InterruptedException {
try {
// Referer must be atomtt
proxy.addHeader(HttpHeaders.REFERER, pli.getReferer());
// Set timeout otherwise the driver lock
driver.manage().timeouts().scriptTimeout(Duration.ofSeconds(timeOut));
driver.manage().timeouts().pageLoadTimeout(Duration.ofSeconds(timeOut));
driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(timeOut));
driver.navigate().to(pli.getUrl());
} catch (Exception e) {
// Silent error
}
}
}
108 changes: 108 additions & 0 deletions src/test/java/info/jllort/torrent/ParserCheckIssue27.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package info.jllort.torrent;

import com.google.common.net.HttpHeaders;
import info.llort.torrent.Config;
import net.lightbody.bmp.BrowserMobProxy;
import net.lightbody.bmp.BrowserMobProxyServer;
import net.lightbody.bmp.client.ClientUtil;
import net.lightbody.bmp.proxy.CaptureType;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.Proxy;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.firefox.FirefoxOptions;
import org.openqa.selenium.firefox.FirefoxProfile;
import org.openqa.selenium.remote.CapabilityType;

import java.net.Inet4Address;
import java.net.UnknownHostException;
import java.time.Duration;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ParserCheckIssue27 {
public static void main(String[] args) throws UnknownHostException, InterruptedException {
String geckoDriverPath = Config.FIREFOX_DRIVER_PATH;
String dstPath = Config.FILESYSTEM_DOWNLOAD_PATH;
long timeOut = Config.FILE_DOWNLOAD_TIMEOUT;

String referer = "https://atomixhq.art/descargar/torrent/peliculas-castellano/un-segundo-one-second--2022-/blurayrip-ac3-5-1/";
String link = "https://atomtt.com/t_download/167641/un-segundo--one-second---2022-/";

// Creating proxy
BrowserMobProxy proxy = new BrowserMobProxyServer();
proxy.start(8080);
Proxy seleniumProxy = ClientUtil.createSeleniumProxy(proxy);

String hostIp = Inet4Address.getLocalHost().getHostAddress();
seleniumProxy.setHttpProxy(hostIp + ":" + proxy.getPort());
seleniumProxy.setSslProxy(hostIp + ":" + proxy.getPort());
proxy.enableHarCaptureTypes(CaptureType.REQUEST_CONTENT, CaptureType.RESPONSE_CONTENT);

// RequestFilter requestFilter = new RequestFilter() {
// @Override
// public HttpResponse filterRequest(HttpRequest httpRequest, HttpMessageContents httpMessageContents, HttpMessageInfo httpMessageInfo) {
// return null;
// }
// };
//
// // put our custom header to each request
// proxy.addRequestFilter((request, contents, messageInfo)-> {
// request.headers().add(HttpHeaders.REFERER, referer);
// System.out.println(request.headers().entries().toString());
// return null;
// });

proxy.addHeader(HttpHeaders.REFERER, referer);

// Creating the driver
System.setProperty("webdriver.gecko.driver", geckoDriverPath);
// firefox profile to autosave
FirefoxOptions firefoxOptions = new FirefoxOptions();
FirefoxProfile fxProfile = new FirefoxProfile();
fxProfile.setPreference("browser.download.folderList", 2);
fxProfile.setPreference("browser.download.dir", dstPath);
fxProfile.setPreference("browser.helperApps.neverAsk.saveToDisk","application/octet-stream");
fxProfile.setPreference("pdfjs.enabledCache.state",false);
firefoxOptions.setProfile(fxProfile);
// Setting the proxy
firefoxOptions.setCapability(CapabilityType.PROXY, seleniumProxy);
firefoxOptions.setCapability(CapabilityType.ACCEPT_SSL_CERTS, true);

WebDriver driver = new FirefoxDriver(firefoxOptions);

// Page with link
driver.get(link);
String bodySource = driver.getPageSource();
String linkRegex = "(parseInt\\(\"(.*)\"\\);)";
Pattern pattern = Pattern.compile(linkRegex);
Matcher matcher = pattern.matcher(bodySource);
if (matcher.find()) {
String intValue = matcher.group(2);
System.out.println("intValue found: " + intValue);
if (driver instanceof JavascriptExecutor) {
// Referer must be atomtt
proxy.addHeader(HttpHeaders.REFERER, "https://atomtt.com/t_download/167632/sharkwater--the-requin---2022-/");
// Javascript request to be executed by selenium
String js = "var values = {'t':'167632'};\n"; // the variable
js += "var xhr = new XMLHttpRequest();\n";
js += "xhr.open('POST', 'https://atomtt.com/to.php', false);\n";
js += "xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded');\n";
js += "xhr.send('t=" + intValue + "');\n";
js += "return xhr.response;\n";
System.out.println(js);
Object result = ((JavascriptExecutor) driver).executeScript(js);
System.out.println("Result value of cal to top: " + result);
// result = ((JavascriptExecutor) driver).executeScript("alert('Welcome to Guru99');");
String torrentFileLinkValue = "https://atomixhq.art/t_download/" + result + ".torrent";
System.out.println(torrentFileLinkValue);
driver.manage().timeouts().scriptTimeout(Duration.ofSeconds(timeOut));
driver.manage().timeouts().pageLoadTimeout(Duration.ofSeconds(timeOut));
driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(timeOut));
// Other referer
proxy.addHeader(HttpHeaders.REFERER, link); // Referer is the page what contains the torrent link //atomtt etc...
driver.navigate().to(torrentFileLinkValue);
}
}
}
}
35 changes: 35 additions & 0 deletions src/test/java/info/jllort/torrent/RegexTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package info.jllort.torrent;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RegexTest {
private static final String bodySample = "function openTorrent(u)\n" +
"{\n" +
"\n" +
"\t \n" +
"\n" +
"\tvar link = \"https://www.linkonclick.com/jump/next.php?r=5302219\";\n" +
"\n" +
"\twindow.open(link);\n" +
"\n" +
"\t\n" +
"\twindow.location.href = u;\t\n" +
"\n" +
"}\t\n" +
"\n" +
"\n" +
"var tid = parseInt(\"167641\");\n" +
"\n" +
"var btn = document.getElementById(\"btntor\");\n" +
"btn.addEventListener(\"click\", function() ";

public static void main(String[] args) {
String linkRegex = "([\"]https://www.linkonclick.com/jump/next.*[\"])";
Pattern pattern = Pattern.compile(linkRegex);
Matcher matcher = pattern.matcher(bodySample);
if (matcher.find()) {
System.out.println(matcher.group().replaceAll("\"",""));
}
}
}
25 changes: 25 additions & 0 deletions src/test/java/info/jllort/torrent/RegexTest2.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package info.jllort.torrent;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RegexTest2 {
private static final String bodySample = "function openTorrent(u)\n" +
"}\t\n" +
"\n" +
"\n" +
"var tid = parseInt(\"167632\");\n" +
"\n" +
"var btn = document.getElementById(\"btntor\");\n" +
"btn.addEventListener(\"click\", function() ";

public static void main(String[] args) {
String linkRegex = "(parseInt\\(\"(.*)\"\\);)";
Pattern pattern = Pattern.compile(linkRegex);
Matcher matcher = pattern.matcher(bodySample);
if (matcher.find()) {
String value = matcher.group(2);
System.out.println(value);
}
}
}

0 comments on commit 132f388

Please sign in to comment.