Skip to content

Commit

Permalink
Merge pull request DSpace#9273 from abollini/DURACOM-225
Browse files Browse the repository at this point in the history
MediaFilter over large repository result in downtime
  • Loading branch information
tdonohue authored Feb 29, 2024
2 parents 529c495 + 1966bfb commit 5b76b17
Show file tree
Hide file tree
Showing 7 changed files with 403 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ public Options getOptions() {
options.addOption("v", "verbose", false, "print all extracted text and other details to STDOUT");
options.addOption("q", "quiet", false, "do not print anything except in the event of errors.");
options.addOption("f", "force", false, "force all bitstreams to be processed");
options.addOption("i", "identifier", true, "ONLY process bitstreams belonging to identifier");
options.addOption("i", "identifier", true,
"ONLY process bitstreams belonging to the provided handle identifier");
options.addOption("m", "maximum", true, "process no more than maximum items");
options.addOption("h", "help", false, "help");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,18 @@ public void applyFiltersAllItems(Context context) throws Exception {
@Override
public void applyFiltersCommunity(Context context, Community community)
throws Exception { //only apply filters if community not in skip-list
// ensure that the community is attached to the current hibernate session
// as we are committing after each item (handles, sub-communties and
// collections are lazy attributes)
community = context.reloadEntity(community);
if (!inSkipList(community.getHandle())) {
List<Community> subcommunities = community.getSubcommunities();
for (Community subcommunity : subcommunities) {
applyFiltersCommunity(context, subcommunity);
}

// ensure that the community is attached to the current hibernate session
// as we are committing after each item
community = context.reloadEntity(community);
List<Collection> collections = community.getCollections();
for (Collection collection : collections) {
applyFiltersCollection(context, collection);
Expand All @@ -148,6 +154,9 @@ public void applyFiltersCommunity(Context context, Community community)
@Override
public void applyFiltersCollection(Context context, Collection collection)
throws Exception {
// ensure that the collection is attached to the current hibernate session
// as we are committing after each item (handles are lazy attributes)
collection = context.reloadEntity(collection);
//only apply filters if collection not in skip-list
if (!inSkipList(collection.getHandle())) {
Iterator<Item> itemIterator = itemService.findAllByCollection(context, collection);
Expand All @@ -171,6 +180,8 @@ public void applyFiltersItem(Context c, Item item) throws Exception {
}
// clear item objects from context cache and internal cache
c.uncacheEntity(currentItem);
// commit after each item to release DB resources
c.commit();
currentItem = null;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import javax.persistence.Query;
import javax.persistence.criteria.CriteriaBuilder;
import javax.persistence.criteria.CriteriaQuery;
Expand All @@ -26,6 +27,7 @@
import org.dspace.core.AbstractHibernateDSODAO;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.UUIDIterator;

/**
* Hibernate implementation of the Database Access Object interface class for the Bitstream object.
Expand Down Expand Up @@ -76,48 +78,53 @@ public List<Bitstream> findBitstreamsWithNoRecentChecksum(Context context) throw

@Override
public Iterator<Bitstream> findByCommunity(Context context, Community community) throws SQLException {
Query query = createQuery(context, "select b from Bitstream b " +
Query query = createQuery(context, "select b.id from Bitstream b " +
"join b.bundles bitBundles " +
"join bitBundles.items item " +
"join item.collections itemColl " +
"join itemColl.communities community " +
"WHERE :community IN community");

query.setParameter("community", community);

return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Bitstream>(context, uuids, Bitstream.class, this);
}

@Override
public Iterator<Bitstream> findByCollection(Context context, Collection collection) throws SQLException {
Query query = createQuery(context, "select b from Bitstream b " +
Query query = createQuery(context, "select b.id from Bitstream b " +
"join b.bundles bitBundles " +
"join bitBundles.items item " +
"join item.collections c " +
"WHERE :collection IN c");

query.setParameter("collection", collection);

return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Bitstream>(context, uuids, Bitstream.class, this);
}

@Override
public Iterator<Bitstream> findByItem(Context context, Item item) throws SQLException {
Query query = createQuery(context, "select b from Bitstream b " +
Query query = createQuery(context, "select b.id from Bitstream b " +
"join b.bundles bitBundles " +
"join bitBundles.items item " +
"WHERE :item IN item");

query.setParameter("item", item);

return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Bitstream>(context, uuids, Bitstream.class, this);
}

@Override
public Iterator<Bitstream> findByStoreNumber(Context context, Integer storeNumber) throws SQLException {
Query query = createQuery(context, "select b from Bitstream b where b.storeNumber = :storeNumber");
Query query = createQuery(context, "select b.id from Bitstream b where b.storeNumber = :storeNumber");
query.setParameter("storeNumber", storeNumber);
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Bitstream>(context, uuids, Bitstream.class, this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.dspace.contentreport.QueryPredicate;
import org.dspace.core.AbstractHibernateDSODAO;
import org.dspace.core.Context;
import org.dspace.core.UUIDIterator;
import org.dspace.eperson.EPerson;
import org.dspace.util.JpaCriteriaBuilderKit;

Expand All @@ -54,28 +55,34 @@ protected ItemDAOImpl() {

@Override
public Iterator<Item> findAll(Context context, boolean archived) throws SQLException {
Query query = createQuery(context, "FROM Item WHERE inArchive=:in_archive ORDER BY id");
Query query = createQuery(context, "SELECT i.id FROM Item i WHERE inArchive=:in_archive ORDER BY id");
query.setParameter("in_archive", archived);
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
public Iterator<Item> findAll(Context context, boolean archived, int limit, int offset) throws SQLException {
Query query = createQuery(context, "FROM Item WHERE inArchive=:in_archive ORDER BY id");
Query query = createQuery(context, "SELECT i.id FROM Item i WHERE inArchive=:in_archive ORDER BY id");
query.setParameter("in_archive", archived);
query.setFirstResult(offset);
query.setMaxResults(limit);
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}


@Override
public Iterator<Item> findAll(Context context, boolean archived, boolean withdrawn) throws SQLException {
Query query = createQuery(context,
"FROM Item WHERE inArchive=:in_archive or withdrawn=:withdrawn ORDER BY id");
"SELECT i.id FROM Item i WHERE inArchive=:in_archive or withdrawn=:withdrawn ORDER BY id");
query.setParameter("in_archive", archived);
query.setParameter("withdrawn", withdrawn);
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
Expand All @@ -84,20 +91,22 @@ public Iterator<Item> findAllRegularItems(Context context) throws SQLException {
// It does not include workspace, workflow or template items.
Query query = createQuery(
context,
"SELECT i FROM Item as i " +
"SELECT i.id FROM Item as i " +
"LEFT JOIN Version as v ON i = v.item " +
"WHERE i.inArchive=true or i.withdrawn=true or (i.inArchive=false and v.id IS NOT NULL) " +
"ORDER BY i.id"
);
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
public Iterator<Item> findAll(Context context, boolean archived,
boolean withdrawn, boolean discoverable, Date lastModified)
throws SQLException {
StringBuilder queryStr = new StringBuilder();
queryStr.append("SELECT i FROM Item i");
queryStr.append("SELECT i.id FROM Item i");
queryStr.append(" WHERE (inArchive = :in_archive OR withdrawn = :withdrawn)");
queryStr.append(" AND discoverable = :discoverable");

Expand All @@ -113,16 +122,20 @@ public Iterator<Item> findAll(Context context, boolean archived,
if (lastModified != null) {
query.setParameter("last_modified", lastModified, TemporalType.TIMESTAMP);
}
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
public Iterator<Item> findBySubmitter(Context context, EPerson eperson) throws SQLException {
Query query = createQuery(context,
"FROM Item WHERE inArchive=:in_archive and submitter=:submitter ORDER BY id");
"SELECT i.id FROM Item i WHERE inArchive=:in_archive and submitter=:submitter ORDER BY id");
query.setParameter("in_archive", true);
query.setParameter("submitter", eperson);
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
Expand All @@ -131,16 +144,18 @@ public Iterator<Item> findBySubmitter(Context context, EPerson eperson, boolean
if (!retrieveAllItems) {
return findBySubmitter(context, eperson);
}
Query query = createQuery(context, "FROM Item WHERE submitter=:submitter ORDER BY id");
Query query = createQuery(context, "SELECT i.id FROM Item i WHERE submitter=:submitter ORDER BY id");
query.setParameter("submitter", eperson);
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
public Iterator<Item> findBySubmitter(Context context, EPerson eperson, MetadataField metadataField, int limit)
throws SQLException {
StringBuilder query = new StringBuilder();
query.append("SELECT item FROM Item as item ");
query.append("SELECT item.id FROM Item as item ");
addMetadataLeftJoin(query, Item.class.getSimpleName().toLowerCase(), Collections.singletonList(metadataField));
query.append(" WHERE item.inArchive = :in_archive");
query.append(" AND item.submitter =:submitter");
Expand All @@ -152,13 +167,15 @@ public Iterator<Item> findBySubmitter(Context context, EPerson eperson, Metadata
hibernateQuery.setParameter("in_archive", true);
hibernateQuery.setParameter("submitter", eperson);
hibernateQuery.setMaxResults(limit);
return iterate(hibernateQuery);
@SuppressWarnings("unchecked")
List<UUID> uuids = hibernateQuery.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
public Iterator<Item> findByMetadataField(Context context, MetadataField metadataField, String value,
boolean inArchive) throws SQLException {
String hqlQueryString = "SELECT item FROM Item as item join item.metadata metadatavalue " +
String hqlQueryString = "SELECT item.id FROM Item as item join item.metadata metadatavalue " +
"WHERE item.inArchive=:in_archive AND metadatavalue.metadataField = :metadata_field";
if (value != null) {
hqlQueryString += " AND STR(metadatavalue.value) = :text_value";
Expand All @@ -170,7 +187,9 @@ public Iterator<Item> findByMetadataField(Context context, MetadataField metadat
if (value != null) {
query.setParameter("text_value", value);
}
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
Expand Down Expand Up @@ -276,20 +295,22 @@ private <T> List<Predicate> toPredicates(CriteriaBuilder criteriaBuilder, Criter
public Iterator<Item> findByAuthorityValue(Context context, MetadataField metadataField, String authority,
boolean inArchive) throws SQLException {
Query query = createQuery(context,
"SELECT item FROM Item as item join item.metadata metadatavalue " +
"SELECT item.id FROM Item as item join item.metadata metadatavalue " +
"WHERE item.inArchive=:in_archive AND metadatavalue.metadataField = :metadata_field AND " +
"metadatavalue.authority = :authority ORDER BY item.id");
query.setParameter("in_archive", inArchive);
query.setParameter("metadata_field", metadataField);
query.setParameter("authority", authority);
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
public Iterator<Item> findArchivedByCollection(Context context, Collection collection, Integer limit,
Integer offset) throws SQLException {
Query query = createQuery(context,
"select i from Item i join i.collections c " +
"select i.id from Item i join i.collections c " +
"WHERE :collection IN c AND i.inArchive=:in_archive ORDER BY i.id");
query.setParameter("collection", collection);
query.setParameter("in_archive", true);
Expand All @@ -299,7 +320,9 @@ public Iterator<Item> findArchivedByCollection(Context context, Collection colle
if (limit != null) {
query.setMaxResults(limit);
}
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
Expand Down Expand Up @@ -334,17 +357,18 @@ public int countArchivedByCollectionExcludingOwning(Context context, Collection
@Override
public Iterator<Item> findAllByCollection(Context context, Collection collection) throws SQLException {
Query query = createQuery(context,
"select i from Item i join i.collections c WHERE :collection IN c ORDER BY i.id");
"select i.id from Item i join i.collections c WHERE :collection IN c ORDER BY i.id");
query.setParameter("collection", collection);

return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
public Iterator<Item> findAllByCollection(Context context, Collection collection, Integer limit, Integer offset)
throws SQLException {
Query query = createQuery(context,
"select i from Item i join i.collections c WHERE :collection IN c ORDER BY i.id");
"select i.id from Item i join i.collections c WHERE :collection IN c ORDER BY i.id");
query.setParameter("collection", collection);

if (offset != null) {
Expand All @@ -353,8 +377,9 @@ public Iterator<Item> findAllByCollection(Context context, Collection collection
if (limit != null) {
query.setMaxResults(limit);
}

return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
Expand Down Expand Up @@ -390,9 +415,11 @@ public int countItems(Context context, List<Collection> collections, boolean inc
public Iterator<Item> findByLastModifiedSince(Context context, Date since)
throws SQLException {
Query query = createQuery(context,
"SELECT i FROM Item i WHERE last_modified > :last_modified ORDER BY id");
"SELECT i.id FROM Item i WHERE last_modified > :last_modified ORDER BY id");
query.setParameter("last_modified", since, TemporalType.TIMESTAMP);
return iterate(query);
@SuppressWarnings("unchecked")
List<UUID> uuids = query.getResultList();
return new UUIDIterator<Item>(context, uuids, Item.class, this);
}

@Override
Expand Down
Loading

0 comments on commit 5b76b17

Please sign in to comment.