Skip to content

Commit

Permalink
Merge pull request #11097 from IQSS/11027-extend-datasets-files-from-…
Browse files Browse the repository at this point in the history
…search-api

extended search API
  • Loading branch information
ofahimIQSS authored Dec 19, 2024
2 parents bee3cdf + 1e47153 commit 77caada
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 4 deletions.
4 changes: 4 additions & 0 deletions conf/solr/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@
<field name="fileNameWithoutExtension" type="text_en" stored="true" indexed="true" multiValued="true"/>
<field name="variableName" type="text_en" stored="true" indexed="true" multiValued="true"/>
<field name="variableLabel" type="text_en" stored="true" indexed="true" multiValued="true"/>
<field name="variableCount" type="plong" stored="true" indexed="false" multiValued="false"/>
<field name="observations" type="plong" stored="true" indexed="false" multiValued="false"/>

<field name="literalQuestion" type="text_en" stored="true" indexed="true" multiValued="true"/>
<field name="interviewInstructions" type="text_en" stored="true" indexed="true" multiValued="true"/>
Expand Down Expand Up @@ -201,6 +203,8 @@
<field name="fileChecksumValue" type="string" stored="true" indexed="true" multiValued="false"/>
<field name="fileContentType" type="string" stored="true" indexed="true" multiValued="false"/>
<field name="deaccessionReason" type="string" stored="true" indexed="false" multiValued="false"/>
<field name="fileRestricted" type="boolean" stored="true" indexed="false" multiValued="false"/>
<field name="canDownloadFile" type="boolean" stored="true" indexed="false" multiValued="false"/>

<!-- Added for Dataverse 4.0 alpha 1. This is a required field so we don't have to go to the database to get the database id of the entity. On cards we use the id in links -->
<field name="entityId" type="plong" stored="true" indexed="true" multiValued="false"/>
Expand Down
22 changes: 22 additions & 0 deletions doc/release-notes/11027-extend-datasets-files-from-search-api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
### Feature to extend Search API for SPA

Added new fields to search results type=files

For Files:
- restricted: boolean
- canDownloadFile: boolean ( from file user permission)
- categories: array of string "categories" would be similar to what it is in metadata api.
For tabular files:
- tabularTags: array of string for example,{"tabularTags" : ["Event", "Genomics", "Geospatial"]}
- variables: number/int shows how many variables we have for the tabular file
- observations: number/int shows how many observations for the tabular file



New fields added to solr schema.xml (Note: upgrade instructions will need to include instructions for schema.xml):
<field name="fileRestricted" type="boolean" stored="true" indexed="false" multiValued="false"/>
<field name="canDownloadFile" type="boolean" stored="true" indexed="false" multiValued="false"/>
<field name="variableCount" type="plong" stored="true" indexed="false" multiValued="false"/>
<field name="observations" type="plong" stored="true" indexed="false" multiValued="false"/>

See https://github.com/IQSS/dataverse/issues/11027
Original file line number Diff line number Diff line change
Expand Up @@ -1580,6 +1580,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
}
datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_TYPE, fileMetadata.getDataFile().getChecksumType().toString());
datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_VALUE, fileMetadata.getDataFile().getChecksumValue());
datafileSolrInputDocument.addField(SearchFields.FILE_RESTRICTED, fileMetadata.getDataFile().isRestricted());
datafileSolrInputDocument.addField(SearchFields.DESCRIPTION, fileMetadata.getDescription());
datafileSolrInputDocument.addField(SearchFields.FILE_DESCRIPTION, fileMetadata.getDescription());
GlobalId filePid = fileMetadata.getDataFile().getGlobalId();
Expand All @@ -1602,6 +1603,9 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
// names and labels:
if (fileMetadata.getDataFile().isTabularData()) {
List<DataVariable> variables = fileMetadata.getDataFile().getDataTable().getDataVariables();
Long observations = fileMetadata.getDataFile().getDataTable().getCaseQuantity();
datafileSolrInputDocument.addField(SearchFields.OBSERVATIONS, observations);
datafileSolrInputDocument.addField(SearchFields.VARIABLE_COUNT, variables.size());

Map<Long, VariableMetadata> variableMap = null;
List<VariableMetadata> variablesByMetadata = variableService.findVarMetByFileMetaId(fileMetadata.getId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ public class SearchFields {
public static final String FILE_CHECKSUM_TYPE = "fileChecksumType";
public static final String FILE_CHECKSUM_VALUE = "fileChecksumValue";
public static final String FILENAME_WITHOUT_EXTENSION = "fileNameWithoutExtension";
public static final String FILE_RESTRICTED = "fileRestricted";
/**
* Indexed as a string so we can facet on it.
*/
Expand Down Expand Up @@ -270,6 +271,8 @@ more targeted results for just datasets. The format is YYYY (i.e.
*/
public static final String DATASET_TYPE = "datasetType";

public static final String OBSERVATIONS = "observations";
public static final String VARIABLE_COUNT = "variableCount";
public static final String VARIABLE_NAME = "variableName";
public static final String VARIABLE_LABEL = "variableLabel";
public static final String LITERAL_QUESTION = "literalQuestion";
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package edu.harvard.iq.dataverse.search;

import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.authorization.groups.Group;
import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
Expand All @@ -18,6 +19,7 @@
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
Expand Down Expand Up @@ -75,6 +77,8 @@ public class SearchServiceBean {
SystemConfig systemConfig;
@EJB
SolrClientService solrClientService;
@EJB
PermissionServiceBean permissionService;
@Inject
ThumbnailServiceWrapper thumbnailServiceWrapper;

Expand Down Expand Up @@ -677,6 +681,15 @@ public SolrQueryResponse search(
logger.info("Exception setting setFileChecksumType: " + ex);
}
solrSearchResult.setFileChecksumValue((String) solrDocument.getFieldValue(SearchFields.FILE_CHECKSUM_VALUE));

if (solrDocument.getFieldValue(SearchFields.FILE_RESTRICTED) != null) {
solrSearchResult.setFileRestricted((Boolean) solrDocument.getFieldValue(SearchFields.FILE_RESTRICTED));
}

if (solrSearchResult.getEntity() != null) {
solrSearchResult.setCanDownloadFile(permissionService.hasPermissionsFor(dataverseRequest, solrSearchResult.getEntity(), EnumSet.of(Permission.DownloadFile)));
}

solrSearchResult.setUnf((String) solrDocument.getFieldValue(SearchFields.UNF));
solrSearchResult.setDatasetVersionId(datasetVersionId);
List<String> fileCategories = (List) solrDocument.getFieldValues(SearchFields.FILE_TAG);
Expand All @@ -688,6 +701,10 @@ public SolrQueryResponse search(
Collections.sort(tabularDataTags);
solrSearchResult.setTabularDataTags(tabularDataTags);
}
Long observations = (Long) solrDocument.getFieldValue(SearchFields.OBSERVATIONS);
solrSearchResult.setObservations(observations);
Long tabCount = (Long) solrDocument.getFieldValue(SearchFields.VARIABLE_COUNT);
solrSearchResult.setTabularDataCount(tabCount);
String filePID = (String) solrDocument.getFieldValue(SearchFields.FILE_PERSISTENT_ID);
if(null != filePID && !"".equals(filePID) && !"".equals("null")) {
solrSearchResult.setFilePersistentId(filePID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ public class SolrSearchResult {
private String fileMd5;
private DataFile.ChecksumType fileChecksumType;
private String fileChecksumValue;
private Boolean fileRestricted;
private Boolean canDownloadFile;
private String dataverseAlias;
private String dataverseParentAlias;
private String dataverseParentName;
Expand All @@ -122,6 +124,8 @@ public class SolrSearchResult {
private String harvestingDescription = null;
private List<String> fileCategories = null;
private List<String> tabularDataTags = null;
private Long tabularDataCount;
private Long observations;

private String identifierOfDataverse = null;
private String nameOfDataverse = null;
Expand Down Expand Up @@ -565,7 +569,12 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool
.add("citationHtml", this.citationHtml)
.add("identifier_of_dataverse", this.identifierOfDataverse)
.add("name_of_dataverse", this.nameOfDataverse)
.add("citation", this.citation);
.add("citation", this.citation)
.add("restricted", this.fileRestricted)
.add("variables", this.tabularDataCount)
.add("observations", this.observations)
.add("canDownloadFile", this.canDownloadFile);

// Now that nullSafeJsonBuilder has been instatiated, check for null before adding to it!
if (showRelevance) {
nullSafeJsonBuilder.add("matches", getRelevance());
Expand All @@ -579,6 +588,12 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool
if (!getPublicationStatuses().isEmpty()) {
nullSafeJsonBuilder.add("publicationStatuses", getPublicationStatusesAsJSON());
}
if (this.fileCategories != null && !this.fileCategories.isEmpty()) {
nullSafeJsonBuilder.add("categories", JsonPrinter.asJsonArray(this.fileCategories));
}
if (this.tabularDataTags != null && !this.tabularDataTags.isEmpty()) {
nullSafeJsonBuilder.add("tabularTags", JsonPrinter.asJsonArray(this.tabularDataTags));
}

if (this.entity == null) {

Expand Down Expand Up @@ -956,6 +971,18 @@ public List<String> getTabularDataTags() {
public void setTabularDataTags(List<String> tabularDataTags) {
this.tabularDataTags = tabularDataTags;
}
public void setTabularDataCount(Long tabularDataCount) {
this.tabularDataCount = tabularDataCount;
}
public Long getTabularDataCount() {
return tabularDataCount;
}
public Long getObservations() {
return observations;
}
public void setObservations(Long observations) {
this.observations = observations;
}

public Map<String, String> getParent() {
return parent;
Expand Down Expand Up @@ -1078,6 +1105,21 @@ public void setFileChecksumValue(String fileChecksumValue) {
this.fileChecksumValue = fileChecksumValue;
}

public Boolean getFileRestricted() {
return fileRestricted;
}

public void setFileRestricted(Boolean fileRestricted) {
this.fileRestricted = fileRestricted;
}
public Boolean getCanDownloadFile() {
return canDownloadFile;
}

public void setCanDownloadFile(Boolean canDownloadFile) {
this.canDownloadFile = canDownloadFile;
}

public String getNameSort() {
return nameSort;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,10 @@ public NullSafeJsonBuilder add(String name, boolean value) {
delegate.add(name, value);
return this;
}

public NullSafeJsonBuilder add(String name, Boolean value) {
return (value != null) ? add(name, value.booleanValue()) : this;
}

@Override
public NullSafeJsonBuilder addNull(String name) {
delegate.addNull(name);
Expand Down
48 changes: 46 additions & 2 deletions src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import io.restassured.path.json.JsonPath;
import io.restassured.response.Response;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;

import java.util.List;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
import jakarta.json.Json;
Expand All @@ -29,6 +32,7 @@
import jakarta.json.JsonObjectBuilder;

import static jakarta.ws.rs.core.Response.Status.*;
import static java.lang.Thread.sleep;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
Expand Down Expand Up @@ -1284,7 +1288,7 @@ public static void cleanup() {
}

@Test
public void testSearchFilesAndUrlImages() {
public void testSearchFilesAndUrlImages() throws InterruptedException {
Response createUser = UtilIT.createRandomUser();
createUser.prettyPrint();
String username = UtilIT.getUsernameFromResponse(createUser);
Expand All @@ -1300,8 +1304,12 @@ public void testSearchFilesAndUrlImages() {
System.out.println("id: " + datasetId);
String datasetPid = JsonPath.from(createDatasetResponse.getBody().asString()).getString("data.persistentId");
System.out.println("datasetPid: " + datasetPid);

String pathToFile = "src/main/webapp/resources/images/dataverseproject.png";
Response logoResponse = UtilIT.uploadDatasetLogo(datasetPid, pathToFile, apiToken);
logoResponse.prettyPrint();
logoResponse.then().assertThat()
.statusCode(200);

Response uploadImage = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
uploadImage.prettyPrint();
uploadImage.then().assertThat()
Expand All @@ -1311,6 +1319,23 @@ public void testSearchFilesAndUrlImages() {
uploadFile.prettyPrint();
uploadFile.then().assertThat()
.statusCode(200);
pathToFile = "src/test/resources/tab/test.tab";
String searchableUniqueId = "testtab"+ UUID.randomUUID().toString().substring(0, 8); // so the search only returns 1 file
JsonObjectBuilder json = Json.createObjectBuilder()
.add("description", searchableUniqueId)
.add("restrict", "true")
.add("categories", Json.createArrayBuilder().add("Data"));
Response uploadTabFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, json.build(), apiToken);
uploadTabFile.prettyPrint();
uploadTabFile.then().assertThat()
.statusCode(200);
// Ensure tabular file is ingested
sleep(2000);
// Set tabular tags
String tabularFileId = uploadTabFile.getBody().jsonPath().getString("data.files[0].dataFile.id");
List<String> testTabularTags = List.of("Survey", "Genomics");
Response setFileTabularTagsResponse = UtilIT.setFileTabularTags(tabularFileId, apiToken, testTabularTags);
setFileTabularTagsResponse.then().assertThat().statusCode(OK.getStatusCode());

Response publishDataverse = UtilIT.publishDataverseViaSword(dataverseAlias, apiToken);
publishDataverse.prettyPrint();
Expand Down Expand Up @@ -1339,13 +1364,32 @@ public void testSearchFilesAndUrlImages() {
.body("data.items[0].url", CoreMatchers.containsString("/dataverse/"))
.body("data.items[0]", CoreMatchers.not(CoreMatchers.hasItem("image_url")));

searchResp = UtilIT.search(datasetPid, apiToken);
searchResp.prettyPrint();
searchResp.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.items[0].type", CoreMatchers.is("dataset"))
.body("data.items[0].image_url", CoreMatchers.containsString("/logo"));

searchResp = UtilIT.search("mydata", apiToken);
searchResp.prettyPrint();
searchResp.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.items[0].type", CoreMatchers.is("file"))
.body("data.items[0].url", CoreMatchers.containsString("/datafile/"))
.body("data.items[0]", CoreMatchers.not(CoreMatchers.hasItem("image_url")));
searchResp = UtilIT.search(searchableUniqueId, apiToken);
searchResp.prettyPrint();
searchResp.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.items[0].type", CoreMatchers.is("file"))
.body("data.items[0].url", CoreMatchers.containsString("/datafile/"))
.body("data.items[0].variables", CoreMatchers.is(3))
.body("data.items[0].observations", CoreMatchers.is(10))
.body("data.items[0].restricted", CoreMatchers.is(true))
.body("data.items[0].canDownloadFile", CoreMatchers.is(true))
.body("data.items[0].tabularTags", CoreMatchers.hasItem("Genomics"))
.body("data.items[0]", CoreMatchers.not(CoreMatchers.hasItem("image_url")));
}

@Test
Expand Down

0 comments on commit 77caada

Please sign in to comment.