Skip to content

Commit

Permalink
CST-5249 dpspace.cfg conflicts fix
Browse files Browse the repository at this point in the history
  • Loading branch information
frabacche committed Nov 13, 2023
2 parents c440a9f + 1a5bac7 commit 19c3f6a
Show file tree
Hide file tree
Showing 73 changed files with 2,869 additions and 548 deletions.
296 changes: 104 additions & 192 deletions dspace-api/src/main/java/org/dspace/app/sitemap/GenerateSitemaps.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,10 @@
*/
package org.dspace.app.sitemap;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.sql.SQLException;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.cli.CommandLine;
Expand All @@ -29,12 +21,8 @@
import org.apache.commons.cli.ParseException;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.Item;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.CommunityService;
Expand All @@ -43,6 +31,7 @@
import org.dspace.core.LogHelper;
import org.dspace.discovery.DiscoverQuery;
import org.dspace.discovery.DiscoverResult;
import org.dspace.discovery.IndexableObject;
import org.dspace.discovery.SearchService;
import org.dspace.discovery.SearchServiceException;
import org.dspace.discovery.SearchUtils;
Expand All @@ -68,6 +57,7 @@ public class GenerateSitemaps {
private static final ConfigurationService configurationService =
DSpaceServicesFactory.getInstance().getConfigurationService();
private static final SearchService searchService = SearchUtils.getSearchService();
private static final int PAGE_SIZE = 100;

/**
* Default constructor
Expand All @@ -87,11 +77,6 @@ public static void main(String[] args) throws Exception {
"do not generate sitemaps.org protocol sitemap");
options.addOption("b", "no_htmlmap", false,
"do not generate a basic HTML sitemap");
options.addOption("a", "ping_all", false,
"ping configured search engines");
options
.addOption("p", "ping", true,
"ping specified search engine URL");
options
.addOption("d", "delete", false,
"delete sitemaps dir and its contents");
Expand All @@ -116,14 +101,13 @@ public static void main(String[] args) throws Exception {
}

/*
* Sanity check -- if no sitemap generation or pinging to do, or deletion, print usage
* Sanity check -- if no sitemap generation or deletion, print usage
*/
if (line.getArgs().length != 0 || line.hasOption('d') || line.hasOption('b')
&& line.hasOption('s') && !line.hasOption('g')
&& !line.hasOption('m') && !line.hasOption('y')
&& !line.hasOption('p')) {
&& !line.hasOption('m') && !line.hasOption('y')) {
System.err
.println("Nothing to do (no sitemap to generate, no search engines to ping)");
.println("Nothing to do (no sitemap to generate)");
hf.printHelp(usage, options);
System.exit(1);
}
Expand All @@ -137,20 +121,6 @@ public static void main(String[] args) throws Exception {
deleteSitemaps();
}

if (line.hasOption('a')) {
pingConfiguredSearchEngines();
}

if (line.hasOption('p')) {
try {
pingSearchEngine(line.getOptionValue('p'));
} catch (MalformedURLException me) {
System.err
.println("Bad search engine URL (include all except sitemap URL)");
System.exit(1);
}
}

System.exit(0);
}

Expand Down Expand Up @@ -211,171 +181,113 @@ public static void generateSitemaps(boolean makeHTMLMap, boolean makeSitemapOrg)
}

Context c = new Context(Context.Mode.READ_ONLY);
int offset = 0;
long commsCount = 0;
long collsCount = 0;
long itemsCount = 0;

List<Community> comms = communityService.findAll(c);

for (Community comm : comms) {
String url = uiURLStem + "communities/" + comm.getID();

if (makeHTMLMap) {
html.addURL(url, null);
}
if (makeSitemapOrg) {
sitemapsOrg.addURL(url, null);
}

c.uncacheEntity(comm);
}

List<Collection> colls = collectionService.findAll(c);

for (Collection coll : colls) {
String url = uiURLStem + "collections/" + coll.getID();

if (makeHTMLMap) {
html.addURL(url, null);
}
if (makeSitemapOrg) {
sitemapsOrg.addURL(url, null);
}

c.uncacheEntity(coll);
}

Iterator<Item> allItems = itemService.findAll(c);
int itemCount = 0;

while (allItems.hasNext()) {
Item i = allItems.next();

DiscoverQuery entityQuery = new DiscoverQuery();
entityQuery.setQuery("search.uniqueid:\"Item-" + i.getID() + "\" and entityType:*");
entityQuery.addSearchField("entityType");

try {
DiscoverResult discoverResult = searchService.search(c, entityQuery);

String url;
if (CollectionUtils.isNotEmpty(discoverResult.getIndexableObjects())
&& CollectionUtils.isNotEmpty(discoverResult.getSearchDocument(
discoverResult.getIndexableObjects().get(0)).get(0).getSearchFieldValues("entityType"))
&& StringUtils.isNotBlank(discoverResult.getSearchDocument(
discoverResult.getIndexableObjects().get(0)).get(0).getSearchFieldValues("entityType").get(0))
) {
url = uiURLStem + "entities/" + StringUtils.lowerCase(discoverResult.getSearchDocument(
discoverResult.getIndexableObjects().get(0))
.get(0).getSearchFieldValues("entityType").get(0)) + "/" + i.getID();
} else {
url = uiURLStem + "items/" + i.getID();
try {
DiscoverQuery discoveryQuery = new DiscoverQuery();
discoveryQuery.setMaxResults(PAGE_SIZE);
discoveryQuery.setQuery("search.resourcetype:Community");
do {
discoveryQuery.setStart(offset);
DiscoverResult discoverResult = searchService.search(c, discoveryQuery);
List<IndexableObject> docs = discoverResult.getIndexableObjects();
commsCount = discoverResult.getTotalSearchResults();

for (IndexableObject doc : docs) {
String url = uiURLStem + "communities/" + doc.getID();
c.uncacheEntity(doc.getIndexedObject());

if (makeHTMLMap) {
html.addURL(url, null);
}
if (makeSitemapOrg) {
sitemapsOrg.addURL(url, null);
}
}
Date lastMod = i.getLastModified();

if (makeHTMLMap) {
html.addURL(url, lastMod);
offset += PAGE_SIZE;
} while (offset < commsCount);

offset = 0;
discoveryQuery = new DiscoverQuery();
discoveryQuery.setMaxResults(PAGE_SIZE);
discoveryQuery.setQuery("search.resourcetype:Collection");
do {
discoveryQuery.setStart(offset);
DiscoverResult discoverResult = searchService.search(c, discoveryQuery);
List<IndexableObject> docs = discoverResult.getIndexableObjects();
collsCount = discoverResult.getTotalSearchResults();

for (IndexableObject doc : docs) {
String url = uiURLStem + "collections/" + doc.getID();
c.uncacheEntity(doc.getIndexedObject());

if (makeHTMLMap) {
html.addURL(url, null);
}
if (makeSitemapOrg) {
sitemapsOrg.addURL(url, null);
}
}
if (makeSitemapOrg) {
sitemapsOrg.addURL(url, lastMod);
offset += PAGE_SIZE;
} while (offset < collsCount);

offset = 0;
discoveryQuery = new DiscoverQuery();
discoveryQuery.setMaxResults(PAGE_SIZE);
discoveryQuery.setQuery("search.resourcetype:Item");
discoveryQuery.addSearchField("search.entitytype");
do {

discoveryQuery.setStart(offset);
DiscoverResult discoverResult = searchService.search(c, discoveryQuery);
List<IndexableObject> docs = discoverResult.getIndexableObjects();
itemsCount = discoverResult.getTotalSearchResults();

for (IndexableObject doc : docs) {
String url;
List<String> entityTypeFieldValues = discoverResult.getSearchDocument(doc).get(0)
.getSearchFieldValues("search.entitytype");
if (CollectionUtils.isNotEmpty(entityTypeFieldValues)) {
url = uiURLStem + "entities/" + StringUtils.lowerCase(entityTypeFieldValues.get(0)) + "/"
+ doc.getID();
} else {
url = uiURLStem + "items/" + doc.getID();
}
Date lastMod = doc.getLastModified();
c.uncacheEntity(doc.getIndexedObject());

if (makeHTMLMap) {
html.addURL(url, null);
}
if (makeSitemapOrg) {
sitemapsOrg.addURL(url, null);
}
}
} catch (SearchServiceException e) {
log.error("Failed getting entitytype through solr for item " + i.getID() + ": " + e.getMessage());
}

c.uncacheEntity(i);

itemCount++;
}
offset += PAGE_SIZE;
} while (offset < itemsCount);

if (makeHTMLMap) {
int files = html.finish();
log.info(LogHelper.getHeader(c, "write_sitemap",
"type=html,num_files=" + files + ",communities="
+ comms.size() + ",collections=" + colls.size()
+ ",items=" + itemCount));
}

if (makeSitemapOrg) {
int files = sitemapsOrg.finish();
log.info(LogHelper.getHeader(c, "write_sitemap",
"type=html,num_files=" + files + ",communities="
+ comms.size() + ",collections=" + colls.size()
+ ",items=" + itemCount));
}

c.abort();
}

/**
* Ping all search engines configured in {@code dspace.cfg}.
*
* @throws UnsupportedEncodingException theoretically should never happen
*/
public static void pingConfiguredSearchEngines()
throws UnsupportedEncodingException {
String[] engineURLs = configurationService
.getArrayProperty("sitemap.engineurls");

if (ArrayUtils.isEmpty(engineURLs)) {
log.warn("No search engine URLs configured to ping");
return;
}

for (int i = 0; i < engineURLs.length; i++) {
try {
pingSearchEngine(engineURLs[i]);
} catch (MalformedURLException me) {
log.warn("Bad search engine URL in configuration: "
+ engineURLs[i]);
}
}
}

/**
* Ping the given search engine.
*
* @param engineURL Search engine URL minus protocol etc, e.g.
* {@code www.google.com}
* @throws MalformedURLException if the passed in URL is malformed
* @throws UnsupportedEncodingException theoretically should never happen
*/
public static void pingSearchEngine(String engineURL)
throws MalformedURLException, UnsupportedEncodingException {
// Set up HTTP proxy
if ((StringUtils.isNotBlank(configurationService.getProperty("http.proxy.host")))
&& (StringUtils.isNotBlank(configurationService.getProperty("http.proxy.port")))) {
System.setProperty("proxySet", "true");
System.setProperty("proxyHost", configurationService
.getProperty("http.proxy.host"));
System.getProperty("proxyPort", configurationService
.getProperty("http.proxy.port"));
}

String sitemapURL = configurationService.getProperty("dspace.ui.url")
+ "/sitemap";

URL url = new URL(engineURL + URLEncoder.encode(sitemapURL, "UTF-8"));

try {
HttpURLConnection connection = (HttpURLConnection) url
.openConnection();

BufferedReader in = new BufferedReader(new InputStreamReader(
connection.getInputStream()));

String inputLine;
StringBuffer resp = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
resp.append(inputLine).append("\n");
if (makeHTMLMap) {
int files = html.finish();
log.info(LogHelper.getHeader(c, "write_sitemap",
"type=html,num_files=" + files + ",communities="
+ commsCount + ",collections=" + collsCount
+ ",items=" + itemsCount));
}
in.close();

if (connection.getResponseCode() == 200) {
log.info("Pinged " + url.toString() + " successfully");
} else {
log.warn("Error response pinging " + url.toString() + ":\n"
+ resp);
if (makeSitemapOrg) {
int files = sitemapsOrg.finish();
log.info(LogHelper.getHeader(c, "write_sitemap",
"type=html,num_files=" + files + ",communities="
+ commsCount + ",collections=" + collsCount
+ ",items=" + itemsCount));
}
} catch (IOException e) {
log.warn("Error pinging " + url.toString(), e);
} catch (SearchServiceException e) {
throw new RuntimeException(e);
} finally {
c.abort();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.dspace.content.MetadataSchemaEnum;
import org.dspace.core.Utils;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.dspace.submit.factory.SubmissionServiceFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
Expand Down Expand Up @@ -158,7 +159,8 @@ public List<DCInputSet> getInputsByCollectionHandle(String collectionHandle)
throws DCInputsReaderException {
SubmissionConfig config;
try {
config = new SubmissionConfigReader().getSubmissionConfigByCollection(collectionHandle);
config = SubmissionServiceFactory.getInstance().getSubmissionConfigService()
.getSubmissionConfigByCollection(collectionHandle);
String formName = config.getSubmissionName();
if (formName == null) {
throw new DCInputsReaderException("No form designated as default");
Expand All @@ -180,7 +182,8 @@ public List<DCInputSet> getInputsBySubmissionName(String name)
throws DCInputsReaderException {
SubmissionConfig config;
try {
config = new SubmissionConfigReader().getSubmissionConfigByName(name);
config = SubmissionServiceFactory.getInstance().getSubmissionConfigService()
.getSubmissionConfigByName(name);
String formName = config.getSubmissionName();
if (formName == null) {
throw new DCInputsReaderException("No form designated as default");
Expand Down
Loading

0 comments on commit 19c3f6a

Please sign in to comment.