Skip to content
This repository has been archived by the owner on Nov 14, 2019. It is now read-only.

Commit

Permalink
add RiverConfigManager
Browse files Browse the repository at this point in the history
  • Loading branch information
marevol committed Feb 12, 2016
1 parent 96f56d0 commit 0c54ffd
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 20 deletions.
7 changes: 5 additions & 2 deletions src/main/java/org/codelibs/riverweb/RiverWeb.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
import org.codelibs.fess.crawler.client.http.impl.AuthenticationImpl;
import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine;
import org.codelibs.riverweb.app.service.ScriptService;
import org.codelibs.riverweb.entity.RiverConfig;
import org.codelibs.riverweb.config.RiverConfig;
import org.codelibs.riverweb.config.RiverConfigManager;
import org.codelibs.riverweb.interval.WebRiverIntervalController;
import org.codelibs.riverweb.util.ConfigProperties;
import org.codelibs.riverweb.util.SettingsUtils;
Expand Down Expand Up @@ -98,7 +99,7 @@ public class RiverWeb {
protected ScriptService scriptService;

@Resource
protected RiverConfig riverConfig;
protected RiverConfigManager riverConfigManager;

@Resource
protected String defaultUserAgent;
Expand Down Expand Up @@ -236,6 +237,7 @@ private int crawl(Crawler crawler, String configId, String sessionId) {
vars.put("client", esClient);
vars.put("sessionId", sessionId);

final RiverConfig riverConfig = riverConfigManager.get(sessionId);
try {
// invoke execute event script
executeScript(crawlSettings, vars, "execute");
Expand Down Expand Up @@ -452,6 +454,7 @@ private int crawl(Crawler crawler, String configId, String sessionId) {
} finally {
// invoke finish event script
executeScript(crawlSettings, vars, "finish");
riverConfigManager.remove(sessionId);

if (cleanup) {
crawler.cleanup(sessionId);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package org.codelibs.riverweb.entity;
package org.codelibs.riverweb.config;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.riverweb.entity.ScrapingRule;

public class RiverConfig {

Expand Down
25 changes: 25 additions & 0 deletions src/main/java/org/codelibs/riverweb/config/RiverConfigManager.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.codelibs.riverweb.config;

import java.util.HashMap;
import java.util.Map;

public class RiverConfigManager {
protected Map<String, RiverConfig> configMap = new HashMap<>();

public RiverConfig get(final String sessionId) {
synchronized (configMap) {
if (configMap.containsKey(sessionId)) {
return configMap.get(sessionId);
}
RiverConfig config = new RiverConfig();
configMap.put(sessionId, config);
return config;
}
}

public RiverConfig remove(final String sessionId) {
synchronized (configMap) {
return configMap.remove(sessionId);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import org.codelibs.fess.crawler.client.CrawlerClient;
import org.codelibs.fess.crawler.client.EsClient;
import org.codelibs.fess.crawler.entity.UrlQueue;
import org.codelibs.riverweb.entity.RiverConfig;
import org.codelibs.riverweb.config.RiverConfig;
import org.codelibs.riverweb.config.RiverConfigManager;
import org.codelibs.riverweb.util.ConversionUtil;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.QueryBuilders;
Expand All @@ -22,7 +23,8 @@ public class RwCrawlerThread extends CrawlerThread {

@Override
protected boolean isContentUpdated(final CrawlerClient client, final UrlQueue<?> urlQueue) {
final RiverConfig riverConfig = SingletonLaContainer.getComponent(RiverConfig.class);
final RiverConfigManager riverConfigManager = SingletonLaContainer.getComponent(RiverConfigManager.class);
final RiverConfig riverConfig = riverConfigManager.get(crawlerContext.getSessionId());
if (riverConfig.isIncremental()) {
final EsClient esClient = SingletonLaContainer.getComponent(EsClient.class);
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
import org.codelibs.fess.crawler.transformer.impl.HtmlTransformer;
import org.codelibs.riverweb.WebRiverConstants;
import org.codelibs.riverweb.app.service.ScriptService;
import org.codelibs.riverweb.entity.RiverConfig;
import org.codelibs.riverweb.config.RiverConfig;
import org.codelibs.riverweb.config.RiverConfigManager;
import org.codelibs.riverweb.entity.ScrapingRule;
import org.codelibs.riverweb.util.SettingsUtils;
import org.elasticsearch.index.query.QueryBuilders;
Expand Down Expand Up @@ -90,31 +91,38 @@ public class ScrapingTransformer extends HtmlTransformer {
public String[] copiedResonseDataFields = new String[] { "url", "parentUrl", "httpStatusCode", "method", "charSet", "contentLength",
"mimeType", "executionTime", "lastModified" };

protected RiverConfig riverConfig;

private EsClient esClient;

protected RiverConfigManager riverConfigManager;

protected ThreadLocal<Set<String>> childUrlSetLocal = new ThreadLocal<Set<String>>();

protected ThreadLocal<RiverConfig> riverConfigLocal = new ThreadLocal<>();


@PostConstruct
public void init() {
riverConfig = SingletonLaContainer.getComponent(RiverConfig.class);
esClient = SingletonLaContainer.getComponent(EsClient.class);
riverConfigManager = SingletonLaContainer.getComponent(RiverConfigManager.class);
}

@Override
public ResultData transform(final ResponseData responseData) {
final RiverConfig riverConfig = riverConfigManager.get(responseData.getSessionId());

try {
riverConfigLocal.set(riverConfig);
return super.transform(responseData);
} finally {
riverConfigLocal.remove();
childUrlSetLocal.remove();
}
}

@Override
protected void updateCharset(final ResponseData responseData) {
int preloadSize = preloadSizeForCharset;
final ScrapingRule scrapingRule = riverConfig.getScrapingRule(responseData);
final ScrapingRule scrapingRule = riverConfigLocal.get().getScrapingRule(responseData);
if (scrapingRule != null) {
final Integer s = scrapingRule.getSetting("preloadSizeForCharset", Integer.valueOf(0));
if (s.intValue() > 0) {
Expand Down Expand Up @@ -164,14 +172,14 @@ protected String loadCharset(final InputStream inputStream, final int preloadSiz

@Override
protected void storeData(final ResponseData responseData, final ResultData resultData) {
final ScrapingRule scrapingRule = riverConfig.getScrapingRule(responseData);
if (scrapingRule == null) {
logger.info("Skip Scraping: " + responseData.getUrl());
return;
}

File file = null;
try {
final ScrapingRule scrapingRule = riverConfigLocal.get().getScrapingRule(responseData);
if (scrapingRule == null) {
logger.info("Skip Scraping: " + responseData.getUrl());
return;
}

file = File.createTempFile("river-web-", ".tmp");
CopyUtil.copy(responseData.getResponseBody(), file);
processData(scrapingRule, file, responseData, resultData);
Expand Down Expand Up @@ -540,6 +548,7 @@ protected void addPropertyData(final Map<String, Object> dataMap, final String k

protected void storeIndex(final ResponseData responseData, final Map<String, Object> dataMap) {
final String sessionId = responseData.getSessionId();
final RiverConfig riverConfig = riverConfigLocal.get();
final String indexName = riverConfig.getIndex();
final String typeName = riverConfig.getType();
final boolean overwrite = riverConfig.isOverwrite();
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
</arg>
</component>

<component name="riverConfig" class="org.codelibs.riverweb.entity.RiverConfig">
<component name="riverConfigManager" class="org.codelibs.riverweb.config.RiverConfigManager">
</component>
</components>
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
import org.codelibs.core.io.ResourceUtil;
import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.fess.crawler.entity.ResultData;
import org.codelibs.riverweb.entity.RiverConfig;
import org.codelibs.riverweb.config.RiverConfig;
import org.codelibs.riverweb.config.RiverConfigManager;
import org.junit.Test;

public class ScrapingTransformerTest {
@Test
public void fess_codelibs_org() {
final RiverConfig riverConfig = new RiverConfig();
final RiverConfigManager riverConfigManager = new RiverConfigManager();
final ScrapingTransformer transformer = new ScrapingTransformer() {
@SuppressWarnings("unchecked")
@Override
Expand All @@ -31,10 +32,12 @@ protected void storeIndex(final ResponseData responseData, final Map<String, Obj
assertThat(((List<String>) ((Map<String, Object>) dataMap.get("section2")).get("body")).size(), is(12));
}
};
transformer.riverConfig = riverConfig;
transformer.riverConfigManager = riverConfigManager;

final String sessionId = "test";
final String url = "http://fess.codelibs.org/";
final RiverConfig riverConfig = riverConfigManager.get(sessionId);
transformer.riverConfigLocal.set(riverConfig);

final Map<String, Map<String, Object>> scrapingRuleMap = new HashMap<String, Map<String, Object>>();
addScrapingRuleMap(scrapingRuleMap, "text", "nav.sideMenus", "div.sidebar-nav ul li", Boolean.TRUE, Boolean.TRUE);
Expand Down

0 comments on commit 0c54ffd

Please sign in to comment.