-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #28 from darkman97i/issue/27
Changes in the link of the torrent
- Loading branch information
Showing
6 changed files
with
327 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
156 changes: 156 additions & 0 deletions
156
src/main/java/info/llort/torrent/util/PctmixWebParserV3.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
package info.llort.torrent.util; | ||
|
||
import com.google.common.net.HttpHeaders; | ||
import info.llort.torrent.bean.PageLinkInfo; | ||
import net.lightbody.bmp.BrowserMobProxy; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
import org.openqa.selenium.JavascriptExecutor; | ||
import org.openqa.selenium.WebDriver; | ||
|
||
import java.io.IOException; | ||
import java.time.Duration; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Set; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import static org.fusesource.jansi.Ansi.Color.*; | ||
|
||
public class PctmixWebParserV3 { | ||
public static void capture(String urlWebToParse, String geckoDriverPath, List<String> filters, long downloadTimeOut, WebDriver driver, BrowserMobProxy proxy) throws IOException, InterruptedException { | ||
Set<PageLinkInfo> mainPageLinks = findMainPageLinks(urlWebToParse, geckoDriverPath, filters, driver); | ||
for (PageLinkInfo pli : mainPageLinks) { | ||
Console.println("Main page link: " + pli.getUrl(), WHITE); | ||
} | ||
|
||
Set<PageLinkInfo> torrentPageLinks = findPageTorrentLinks(mainPageLinks, driver, proxy); | ||
for (PageLinkInfo pli : torrentPageLinks) { | ||
Console.println("Torrent page link: " + pli.getUrl(), WHITE); | ||
} | ||
|
||
Set<PageLinkInfo> downloadTorrentLinks = downloadTorrentLinks(torrentPageLinks, driver, proxy); | ||
for (PageLinkInfo pli : downloadTorrentLinks) { | ||
Console.println("Download link: " + pli.getUrl(), WHITE); | ||
downloadTorrentFile(pli, driver, downloadTimeOut, proxy); | ||
} | ||
} | ||
|
||
public static Set<PageLinkInfo> findMainPageLinks(String url, String geckoDriverPath, List<String> filters, WebDriver driver) throws IOException { | ||
// Inspired by https://www.javatpoint.com/selenium-webdriver-running-test-on-firefox-browser-gecko-driver | ||
driver.get(url); | ||
|
||
Set<PageLinkInfo> links = new HashSet<>(); | ||
Document doc = Jsoup.parse(driver.getPageSource()); | ||
Elements elements = doc.select("a[href]"); | ||
for (Element element : elements) { | ||
String value = element.attr("href"); | ||
if (value.startsWith("https:")) { | ||
if ((value.contains("/descargar/peliculas-castellano/") && value.contains("blurayrip")) || | ||
(value.contains("/descargar/peliculas-x264-mkv/") && value.contains("bluray")) || | ||
(value.contains("/descargar/cine-alta-definicion-hd/") && value.contains("bluray")) || | ||
value.contains("/descargar/serie/") || | ||
value.contains("/descargar/serie-en-hd/") || | ||
value.contains("/descargar/serie-4k/")) { | ||
for (String filter : filters) { | ||
if (value.toLowerCase().contains(filter.toLowerCase())) { | ||
// /descargar/ must be replaced by /descargar/torrent/ | ||
PageLinkInfo pli = new PageLinkInfo(); | ||
pli.setReferer(value); | ||
value = value.replace("/descargar/", "/descargar/torrent/"); | ||
pli.setUrl(value); | ||
links.add(pli); | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return links; | ||
} | ||
|
||
public static Set<PageLinkInfo> findPageTorrentLinks(Set<PageLinkInfo> pageLinks, WebDriver driver, BrowserMobProxy proxy) throws IOException { | ||
Set<PageLinkInfo> links = new HashSet<>(); | ||
for (PageLinkInfo pli : pageLinks) { | ||
// Setting referer before jump to the page | ||
proxy.addHeader(HttpHeaders.REFERER, pli.getReferer()); | ||
driver.get(pli.getUrl()); | ||
|
||
String htmlContent = driver.getPageSource(); | ||
//System.out.println(htmlContent); | ||
String regex = "window.open\\(\"(.*?)\"\\)"; | ||
Pattern pattern = Pattern.compile(regex); | ||
Matcher matcher = pattern.matcher(htmlContent); | ||
while (matcher.find()) { | ||
String tLink = matcher.group(1); | ||
tLink = "https:" + tLink; // add https at the begining | ||
PageLinkInfo newPli = new PageLinkInfo(); | ||
newPli.setUrl(tLink); | ||
newPli.setReferer(pli.getUrl()); | ||
links.add(newPli); | ||
} | ||
} | ||
return links; | ||
} | ||
|
||
public static Set<PageLinkInfo> downloadTorrentLinks(Set<PageLinkInfo> pageLinks, WebDriver driver, BrowserMobProxy proxy) throws IOException { | ||
Set<PageLinkInfo> links = new HashSet<>(); | ||
for (PageLinkInfo pli : pageLinks) { | ||
// Setting referer before jump to the page | ||
proxy.addHeader(HttpHeaders.REFERER, pli.getReferer()); | ||
driver.get(pli.getUrl()); | ||
|
||
// Capture int value | ||
String bodySource = driver.getPageSource(); | ||
String linkRegex = "(parseInt\\(\"(.*)\"\\);)"; | ||
Pattern pattern = Pattern.compile(linkRegex); | ||
Matcher matcher = pattern.matcher(bodySource); | ||
if (matcher.find()) { | ||
String intValue = matcher.group(2); | ||
Console.println("intValue found: " + intValue, WHITE); | ||
if (driver instanceof JavascriptExecutor) { | ||
// Referer must be atomtt | ||
proxy.addHeader(HttpHeaders.REFERER, pli.getUrl()); // atomtt referer !!! | ||
// Javascript request to be executed by selenium | ||
String js = "var values = {'t':'167632'};\n"; // the variable | ||
js += "var xhr = new XMLHttpRequest();\n"; | ||
js += "xhr.open('POST', 'https://atomtt.com/to.php', false);\n"; | ||
js += "xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded');\n"; | ||
js += "xhr.send('t=" + intValue + "');\n"; | ||
js += "return xhr.response;\n"; | ||
Console.println("executing javascript: " + js, YELLOW); | ||
Object result = ((JavascriptExecutor) driver).executeScript(js); | ||
Console.println("Javascript result: " + js, GREEN); | ||
String torrentFileLinkValue = "https://atomixhq.art/t_download/" + result + ".torrent"; | ||
Console.println("torrentFileLinkValue: " + torrentFileLinkValue, GREEN); | ||
PageLinkInfo newPli = new PageLinkInfo(); | ||
newPli.setUrl(torrentFileLinkValue); | ||
newPli.setReferer(pli.getUrl()); | ||
links.add(newPli); | ||
} | ||
|
||
} else { | ||
Console.println("intValue NOT found for page: " + pli.getUrl(), RED); | ||
} | ||
} | ||
return links; | ||
} | ||
|
||
public static void downloadTorrentFile(PageLinkInfo pli, WebDriver driver, long timeOut, BrowserMobProxy proxy) throws InterruptedException { | ||
try { | ||
// Referer must be atomtt | ||
proxy.addHeader(HttpHeaders.REFERER, pli.getReferer()); | ||
// Set timeout otherwise the driver lock | ||
driver.manage().timeouts().scriptTimeout(Duration.ofSeconds(timeOut)); | ||
driver.manage().timeouts().pageLoadTimeout(Duration.ofSeconds(timeOut)); | ||
driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(timeOut)); | ||
driver.navigate().to(pli.getUrl()); | ||
} catch (Exception e) { | ||
// Silent error | ||
} | ||
} | ||
} |
108 changes: 108 additions & 0 deletions
108
src/test/java/info/jllort/torrent/ParserCheckIssue27.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
package info.jllort.torrent; | ||
|
||
import com.google.common.net.HttpHeaders; | ||
import info.llort.torrent.Config; | ||
import net.lightbody.bmp.BrowserMobProxy; | ||
import net.lightbody.bmp.BrowserMobProxyServer; | ||
import net.lightbody.bmp.client.ClientUtil; | ||
import net.lightbody.bmp.proxy.CaptureType; | ||
import org.openqa.selenium.JavascriptExecutor; | ||
import org.openqa.selenium.Proxy; | ||
import org.openqa.selenium.WebDriver; | ||
import org.openqa.selenium.firefox.FirefoxDriver; | ||
import org.openqa.selenium.firefox.FirefoxOptions; | ||
import org.openqa.selenium.firefox.FirefoxProfile; | ||
import org.openqa.selenium.remote.CapabilityType; | ||
|
||
import java.net.Inet4Address; | ||
import java.net.UnknownHostException; | ||
import java.time.Duration; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
public class ParserCheckIssue27 { | ||
public static void main(String[] args) throws UnknownHostException, InterruptedException { | ||
String geckoDriverPath = Config.FIREFOX_DRIVER_PATH; | ||
String dstPath = Config.FILESYSTEM_DOWNLOAD_PATH; | ||
long timeOut = Config.FILE_DOWNLOAD_TIMEOUT; | ||
|
||
String referer = "https://atomixhq.art/descargar/torrent/peliculas-castellano/un-segundo-one-second--2022-/blurayrip-ac3-5-1/"; | ||
String link = "https://atomtt.com/t_download/167641/un-segundo--one-second---2022-/"; | ||
|
||
// Creating proxy | ||
BrowserMobProxy proxy = new BrowserMobProxyServer(); | ||
proxy.start(8080); | ||
Proxy seleniumProxy = ClientUtil.createSeleniumProxy(proxy); | ||
|
||
String hostIp = Inet4Address.getLocalHost().getHostAddress(); | ||
seleniumProxy.setHttpProxy(hostIp + ":" + proxy.getPort()); | ||
seleniumProxy.setSslProxy(hostIp + ":" + proxy.getPort()); | ||
proxy.enableHarCaptureTypes(CaptureType.REQUEST_CONTENT, CaptureType.RESPONSE_CONTENT); | ||
|
||
// RequestFilter requestFilter = new RequestFilter() { | ||
// @Override | ||
// public HttpResponse filterRequest(HttpRequest httpRequest, HttpMessageContents httpMessageContents, HttpMessageInfo httpMessageInfo) { | ||
// return null; | ||
// } | ||
// }; | ||
// | ||
// // put our custom header to each request | ||
// proxy.addRequestFilter((request, contents, messageInfo)-> { | ||
// request.headers().add(HttpHeaders.REFERER, referer); | ||
// System.out.println(request.headers().entries().toString()); | ||
// return null; | ||
// }); | ||
|
||
proxy.addHeader(HttpHeaders.REFERER, referer); | ||
|
||
// Creating the driver | ||
System.setProperty("webdriver.gecko.driver", geckoDriverPath); | ||
// firefox profile to autosave | ||
FirefoxOptions firefoxOptions = new FirefoxOptions(); | ||
FirefoxProfile fxProfile = new FirefoxProfile(); | ||
fxProfile.setPreference("browser.download.folderList", 2); | ||
fxProfile.setPreference("browser.download.dir", dstPath); | ||
fxProfile.setPreference("browser.helperApps.neverAsk.saveToDisk","application/octet-stream"); | ||
fxProfile.setPreference("pdfjs.enabledCache.state",false); | ||
firefoxOptions.setProfile(fxProfile); | ||
// Setting the proxy | ||
firefoxOptions.setCapability(CapabilityType.PROXY, seleniumProxy); | ||
firefoxOptions.setCapability(CapabilityType.ACCEPT_SSL_CERTS, true); | ||
|
||
WebDriver driver = new FirefoxDriver(firefoxOptions); | ||
|
||
// Page with link | ||
driver.get(link); | ||
String bodySource = driver.getPageSource(); | ||
String linkRegex = "(parseInt\\(\"(.*)\"\\);)"; | ||
Pattern pattern = Pattern.compile(linkRegex); | ||
Matcher matcher = pattern.matcher(bodySource); | ||
if (matcher.find()) { | ||
String intValue = matcher.group(2); | ||
System.out.println("intValue found: " + intValue); | ||
if (driver instanceof JavascriptExecutor) { | ||
// Referer must be atomtt | ||
proxy.addHeader(HttpHeaders.REFERER, "https://atomtt.com/t_download/167632/sharkwater--the-requin---2022-/"); | ||
// Javascript request to be executed by selenium | ||
String js = "var values = {'t':'167632'};\n"; // the variable | ||
js += "var xhr = new XMLHttpRequest();\n"; | ||
js += "xhr.open('POST', 'https://atomtt.com/to.php', false);\n"; | ||
js += "xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded');\n"; | ||
js += "xhr.send('t=" + intValue + "');\n"; | ||
js += "return xhr.response;\n"; | ||
System.out.println(js); | ||
Object result = ((JavascriptExecutor) driver).executeScript(js); | ||
System.out.println("Result value of cal to top: " + result); | ||
// result = ((JavascriptExecutor) driver).executeScript("alert('Welcome to Guru99');"); | ||
String torrentFileLinkValue = "https://atomixhq.art/t_download/" + result + ".torrent"; | ||
System.out.println(torrentFileLinkValue); | ||
driver.manage().timeouts().scriptTimeout(Duration.ofSeconds(timeOut)); | ||
driver.manage().timeouts().pageLoadTimeout(Duration.ofSeconds(timeOut)); | ||
driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(timeOut)); | ||
// Other referer | ||
proxy.addHeader(HttpHeaders.REFERER, link); // Referer is the page what contains the torrent link //atomtt etc... | ||
driver.navigate().to(torrentFileLinkValue); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
package info.jllort.torrent; | ||
|
||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
public class RegexTest { | ||
private static final String bodySample = "function openTorrent(u)\n" + | ||
"{\n" + | ||
"\n" + | ||
"\t \n" + | ||
"\n" + | ||
"\tvar link = \"https://www.linkonclick.com/jump/next.php?r=5302219\";\n" + | ||
"\n" + | ||
"\twindow.open(link);\n" + | ||
"\n" + | ||
"\t\n" + | ||
"\twindow.location.href = u;\t\n" + | ||
"\n" + | ||
"}\t\n" + | ||
"\n" + | ||
"\n" + | ||
"var tid = parseInt(\"167641\");\n" + | ||
"\n" + | ||
"var btn = document.getElementById(\"btntor\");\n" + | ||
"btn.addEventListener(\"click\", function() "; | ||
|
||
public static void main(String[] args) { | ||
String linkRegex = "([\"]https://www.linkonclick.com/jump/next.*[\"])"; | ||
Pattern pattern = Pattern.compile(linkRegex); | ||
Matcher matcher = pattern.matcher(bodySample); | ||
if (matcher.find()) { | ||
System.out.println(matcher.group().replaceAll("\"","")); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package info.jllort.torrent; | ||
|
||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
public class RegexTest2 { | ||
private static final String bodySample = "function openTorrent(u)\n" + | ||
"}\t\n" + | ||
"\n" + | ||
"\n" + | ||
"var tid = parseInt(\"167632\");\n" + | ||
"\n" + | ||
"var btn = document.getElementById(\"btntor\");\n" + | ||
"btn.addEventListener(\"click\", function() "; | ||
|
||
public static void main(String[] args) { | ||
String linkRegex = "(parseInt\\(\"(.*)\"\\);)"; | ||
Pattern pattern = Pattern.compile(linkRegex); | ||
Matcher matcher = pattern.matcher(bodySample); | ||
if (matcher.find()) { | ||
String value = matcher.group(2); | ||
System.out.println(value); | ||
} | ||
} | ||
} |