Skip to content

Commit

Permalink
Testing ui4j lib
Browse files Browse the repository at this point in the history
  • Loading branch information
Hronom committed Nov 19, 2015
1 parent 4b76524 commit cfdc44c
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 23 deletions.
23 changes: 23 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,39 @@
<maven>3.3.1</maven>
</prerequisites>

<repositories>
<repository>
<id>project.local</id>
<name>project</name>
<url>file:${project.basedir}/libs</url>
</repository>
</repositories>

<dependencies>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<!-- Ui4j -->
<dependency>
<groupId>com.ui4j</groupId>
<artifactId>ui4j-all</artifactId>
<version>2.1.0</version>
</dependency>
<!-- Jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
<!-- HtmlUnit -->
<dependency>
<groupId>net.sourceforge.htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>2.19</version>
</dependency>
<!-- Log4j -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
Expand All @@ -50,6 +67,12 @@
<artifactId>log4j-jcl</artifactId>
<version>2.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.4.1</version>
</dependency>
<!-- JUnit -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.ui4j.api.browser.BrowserEngine;
import com.ui4j.api.browser.BrowserFactory;
import com.ui4j.api.browser.Page;

import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.IOException;
import java.net.URL;
import java.util.concurrent.Executors;

Expand All @@ -26,25 +29,9 @@ public class ScrapeButtonController {

private final ScrapeView scrapeView;

private final WebClient webClient;

public ScrapeButtonController(ScrapeView scrapeViewArg) {
scrapeView = scrapeViewArg;
scrapeView.addScrapeButtonActionListener(createScrapeButtonActionListener());

webClient = new WebClient(BrowserVersion.FIREFOX_38);
webClient.getOptions().setCssEnabled(true);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setPopupBlockerEnabled(false);
webClient.getOptions().setRedirectEnabled(true);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.setAjaxController(new AjaxController() {
@Override
public boolean processSynchron(HtmlPage page, WebRequest request, boolean async) {
return true;
}
});
scrapeView.addScrapeButtonActionListener(createScrapeButtonActionListener2());
}

public ActionListener createScrapeButtonActionListener() {
Expand All @@ -69,18 +56,17 @@ public void run() {
!scrapeView.getSelector().isEmpty()) {
logger.info("Input parameters: \"" +
scrapeView.getWebsiteUrl() + "\", \"" +
scrapeView.getSelector() + "\"");
scrapeView.getSelector() + "\", \"");
}

// Process.
try {
try (WebClient webClient = createWebClient()) {
URL url = new URL(scrapeView.getWebsiteUrl());
scrapeView.setProgressBarTaskText("requesting page");
logger.info("Requesting page...");
HtmlPage page = webClient.getPage(url);
logger.info("Requesting of page completed.");


scrapeView.setProgressBarTaskText("viewing page as XML");
logger.info("View page as XML");
String xml = page.asXml();
Expand Down Expand Up @@ -118,8 +104,88 @@ public void run() {
logger.error(e);
}

logger.info("Close WebClient.");
webClient.close();
long endTime = System.currentTimeMillis();
logger.info("Process time: " + (endTime - beginTime) + " ms.");
logger.info("Processing complete.");

// Enable fields in view.
scrapeView.setWorkInProgress(false);
scrapeView.setScrapeButtonEnabled(true);
scrapeView.setSelectorTextFieldEnabled(true);
scrapeView.setWebsiteUrlTextFieldEnabled(true);
}
});
}
};
}

public ActionListener createScrapeButtonActionListener2() {
return new ActionListener() {
@Override
public void actionPerformed(ActionEvent event) {
Executors.newSingleThreadExecutor().submit(new Runnable() {
public void run() {
// Disable fields in view.
scrapeView.setWebsiteUrlTextFieldEnabled(false);
scrapeView.setSelectorTextFieldEnabled(false);
scrapeView.setScrapeButtonEnabled(false);
scrapeView.setWorkInProgress(true);
scrapeView.setOutput("");

scrapeView.setProgressBarTaskText("initializing");
logger.info("Start processing...");
long beginTime = System.currentTimeMillis();

// Output input parameters.
if (!scrapeView.getWebsiteUrl().isEmpty() &&
!scrapeView.getSelector().isEmpty()) {
logger.info("Input parameters: \"" +
scrapeView.getWebsiteUrl() + "\", \"" +
scrapeView.getSelector() + "\", \"");
}

// Process.
BrowserEngine browserEngine = createBrowserEngine();

// Navigate to blank page.
scrapeView.setProgressBarTaskText("requesting page");
logger.info("Requesting page...");
Page page = browserEngine.navigate(scrapeView.getWebsiteUrl());
logger.info("Requesting of page completed.");

scrapeView.setProgressBarTaskText("viewing page as XML");
logger.info("View page as XML");
String html = page.getDocument().getBody().getInnerHTML();;

// Unescape html.
scrapeView.setProgressBarTaskText("unescaping HTML");
logger.info("Unescape html");
html = StringEscapeUtils.unescapeHtml4(html);

logger.info("Get selector");
String selector = scrapeView.getSelector();
if (!html.isEmpty() && !selector.isEmpty()) {
scrapeView.setProgressBarTaskText("parsing HTML");
logger.info("Parse HTML");
Document doc = Jsoup.parse(html);

scrapeView.setProgressBarTaskText("selecting elements in HTML");
logger.info("select elements in HTML");
Elements selectedElements = doc.select(selector);

if (!selectedElements.isEmpty()) {
scrapeView.setProgressBarTaskText("parsing selected elements");
logger.info("Parse extracted elements");
StringBuilder sb = new StringBuilder();
for (Element element : selectedElements) {
String body = element.html();
sb.append(body);
sb.append("\n");
sb.append("\n");
}
scrapeView.setOutput(sb.toString());
}
}

long endTime = System.currentTimeMillis();
logger.info("Process time: " + (endTime - beginTime) + " ms.");
Expand All @@ -135,4 +201,42 @@ public void run() {
}
};
}

private WebClient createWebClient() {
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_38);
webClient.getOptions().setCssEnabled(true);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setPopupBlockerEnabled(false);
webClient.getOptions().setRedirectEnabled(true);
webClient.getOptions().setActiveXNative(true);
webClient.getOptions().setAppletEnabled(true);
webClient.getOptions().setUseInsecureSSL(true);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.setAjaxController(new AjaxController() {
@Override
public boolean processSynchron(HtmlPage page, WebRequest request, boolean async) {
return true;
}
});
return webClient;
}

private BrowserEngine createBrowserEngine() {
// Get the instance of the webkit.
BrowserEngine browser = BrowserFactory.getWebKit();

// Navigate to blank page.
// Page page = browser.navigate("about:blank");

// Show the browser page.
//page.show();
//System.setProperty("ui4j.headless", "true");

//page.getDocument().getBody().getInnerHTML();

// Append html header to the document body.
//page.getDocument().getBody().append("<h1>Hello, World!</h1>");
return browser;
}
}

0 comments on commit cfdc44c

Please sign in to comment.