Skip to content

Commit

Permalink
Merge pull request #10696 from erodde/10688_whitespace_trimming
Browse files Browse the repository at this point in the history
Added whitespace trimming to uploaded custom metadata TSV files
  • Loading branch information
ofahimIQSS authored Dec 3, 2024
2 parents f5f5cd2 + 59789b8 commit 1b5a1ea
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 5 deletions.
6 changes: 6 additions & 0 deletions doc/release-notes/10688_whitespace_trimming.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
### Added whitespace trimming to uploaded custom metadata TSV files

When loading custom metadata blocks using the `api/admin/datasetfield/load` API, whitespace can be introduced into field names.
This change trims whitespace at the beginning and end of all values read into the API before persisting them.

For more information, see #10688.
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public Response getByName(@PathParam("name") String name) {
String solrFieldSearchable = dsf.getSolrField().getNameSearchable();
String solrFieldFacetable = dsf.getSolrField().getNameFacetable();
String metadataBlock = dsf.getMetadataBlock().getName();
String uri=dsf.getUri();
String uri = dsf.getUri();
boolean hasParent = dsf.isHasParent();
boolean allowsMultiples = dsf.isAllowMultiples();
boolean isRequired = dsf.isRequired();
Expand Down Expand Up @@ -243,7 +243,9 @@ public Response loadDatasetFields(File file) {
br = new BufferedReader(new FileReader("/" + file));
while ((line = br.readLine()) != null) {
lineNumber++;
values = line.split(splitBy);
values = Arrays.stream(line.split(splitBy))
.map(String::trim)
.toArray(String[]::new);
if (values[0].startsWith("#")) { // Header row
switch (values[0]) {
case "#metadataBlock":
Expand Down Expand Up @@ -326,17 +328,17 @@ public Response loadDatasetFields(File file) {
*/
public String getGeneralErrorMessage(HeaderType header, int lineNumber, String message) {
List<String> arguments = new ArrayList<>();
arguments.add(header.name());
arguments.add(header != null ? header.name() : "unknown");
arguments.add(String.valueOf(lineNumber));
arguments.add(message);
return BundleUtil.getStringFromBundle("api.admin.datasetfield.load.GeneralErrorMessage", arguments);
}

/**
* Turn ArrayIndexOutOfBoundsException into an informative error message
* @param lineNumber
* @param header
* @param e
* @param lineNumber
* @param wrongIndex
* @return
*/
public String getArrayIndexOutOfBoundMessage(HeaderType header,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,61 @@
package edu.harvard.iq.dataverse.api;

import edu.harvard.iq.dataverse.ControlledVocabularyValueServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DataverseServiceBean;
import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import jakarta.json.Json;
import jakarta.json.JsonObject;
import jakarta.json.JsonReader;
import jakarta.ws.rs.core.Response;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;

import java.io.File;
import java.io.StringReader;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;

@ExtendWith(MockitoExtension.class)
public class DatasetFieldServiceApiTest {

@Mock
private ActionLogServiceBean actionLogSvc;

@Mock
private MetadataBlockServiceBean metadataBlockService;

@Mock
private DataverseServiceBean dataverseService;

@Mock
private DatasetFieldServiceBean datasetFieldService;

@Mock
private ControlledVocabularyValueServiceBean controlledVocabularyValueService;

private DatasetFieldServiceApi api;

@BeforeEach
public void setup(){
api = new DatasetFieldServiceApi();
api.actionLogSvc = actionLogSvc;
api.metadataBlockService = metadataBlockService;
api.dataverseService = dataverseService;
api.datasetFieldService = datasetFieldService;
api.controlledVocabularyValueService = controlledVocabularyValueService;
}

@Test
public void testArrayIndexOutOfBoundMessageBundle() {
List<String> arguments = new ArrayList<>();
Expand Down Expand Up @@ -59,4 +105,41 @@ public void testGetGeneralErrorMessage() {
message
);
}

@Test
public void testGetGeneralErrorMessageEmptyHeader() {
DatasetFieldServiceApi api = new DatasetFieldServiceApi();
String message = api.getGeneralErrorMessage(null, 5, "some error");
assertEquals(
"Error parsing metadata block in unknown part, line #5: some error",
message
);
}

@Test
public void testLoadDatasetFieldsWhitespaceTrimming() {

Path resourceDirectory = Paths.get("src/test/resources/tsv/whitespace-test.tsv");
File testfile = new File(resourceDirectory.toFile().getAbsolutePath());
JsonReader jsonReader;
try (Response response = api.loadDatasetFields(testfile)) {
assertEquals(200, response.getStatus());
jsonReader = Json.createReader(new StringReader(response.getEntity().toString()));
}
JsonObject jsonObject = jsonReader.readObject();

final List<String> metadataNames = jsonObject.getJsonObject("data").getJsonArray("added")
.getValuesAs(e -> e.asJsonObject().getString("name"));
assertThat(metadataNames).contains("whitespaceDemo")
.contains("whitespaceDemoOne")
.contains("whitespaceDemoTwo")
.contains("whitespaceDemoThree")
.contains("CV1")
.contains("CV2")
.contains("CV3");
assertThat(metadataNames).doesNotContain(" whitespaceDemo")
.doesNotContain("whitespaceDemoOne ")
.doesNotContain("CV1 ")
.doesNotContain(" CV2");
}
}
10 changes: 10 additions & 0 deletions src/test/resources/tsv/whitespace-test.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#metadataBlock name dataverseAlias displayName
whitespaceDemo Whitespace Demo
#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id
whitespaceDemoOne One Trailing Space text 0 TRUE TRUE TRUE FALSE TRUE FALSE whitespaceDemo
whitespaceDemoTwo Two Leading Space text 1 TRUE TRUE TRUE FALSE TRUE FALSE whitespaceDemo
whitespaceDemoThree Three CV with errors text 2 TRUE TRUE TRUE FALSE TRUE FALSE whitespaceDemo
#controlledVocabulary DatasetField Value identifier displayOrder
whitespaceDemoThree CV1 0
whitespaceDemoThree CV2 1
whitespaceDemoThree CV3 2

0 comments on commit 1b5a1ea

Please sign in to comment.