Skip to content

Commit

Permalink
Code lists batch import; allow importing zip files containing Excel f…
Browse files Browse the repository at this point in the history
…iles (#164)

Co-authored-by: Stefano Ricci <[email protected]>
  • Loading branch information
SteRiccio and SteRiccio authored Nov 9, 2023
1 parent 39428ea commit 2b6465e
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 5 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [4.0.88] - 2023-11-09
### Fixed
- Code lists batch import: allow importing zip files generated with MacOS;
### Added
- Code lists batch import: allow importing zip files containing Excel files;

## [4.0.87] - 2023-07-30
### Fixed
- Cannot add more rows in tables where taxon attribute is defined as key;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

import java.io.File;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.List;

import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipFile;
Expand All @@ -26,6 +28,10 @@ public class CodeListBatchImportJob extends Job {

private static final String ZIP = "zip";
private static final String CSV = "csv";
private static final String XLS = "xls";
private static final String XLSX = "xlsx";
private static final String MACOSX_HIDDEN_ENTRY_PREFIX = "__MACOSX/";
private static final List<String> VALID_EXTENSIONS = Arrays.<String>asList(CSV, XLS, XLSX);

//input
private CodeListManager codeListManager;
Expand All @@ -47,7 +53,7 @@ protected void createInternalVariables() throws Throwable {
@Override
protected void validateInput() throws Throwable {
super.validateInput();
if (!validateExtension(file.getName(), ZIP)) {
if (!hasExtension(file.getName(), ZIP)) {
throw new IllegalArgumentException("survey.code_list.import_data.error.invalid_extension");
}
ZipFile zipFile = null;
Expand All @@ -60,7 +66,9 @@ protected void validateInput() throws Throwable {
while (entries.hasMoreElements()) {
ZipArchiveEntry entry = (ZipArchiveEntry) entries.nextElement();
String entryName = entry.getName();
if (!validateExtension(entryName, CSV)) {
if (canSkipEntry(entryName)) {
// ignore it
} else if (!hasValidCodeListExtension(entryName)) {
throw new IllegalArgumentException("survey.code_list.import_data.error.invalid_extension");
} else if (!FilenameUtils.getBaseName(entryName).matches(Survey.INTERNAL_NAME_REGEX) ) {
throw new IllegalArgumentException("survey.code_list.import_data.error.invalid_filename");
Expand All @@ -72,8 +80,21 @@ protected void validateInput() throws Throwable {
}
}

private boolean validateExtension(String fileName, String expectedExtension) {
return expectedExtension.equals(FilenameUtils.getExtension(fileName));
private boolean canSkipEntry(String entryName) {
return entryName.startsWith(MACOSX_HIDDEN_ENTRY_PREFIX);
}

private boolean hasExtension(String fileName, String expectedExtension) {
return expectedExtension.equalsIgnoreCase(FilenameUtils.getExtension(fileName));
}

private boolean hasValidCodeListExtension(String fileName) {
for (String validExtension : VALID_EXTENSIONS) {
if (hasExtension(fileName, validExtension)) {
return true;
}
}
return false;
}

@Override
Expand All @@ -82,7 +103,7 @@ protected void buildTasks() throws Throwable {
while (entries.hasMoreElements()) {
ZipArchiveEntry entry = (ZipArchiveEntry) entries.nextElement();
String entryName = entry.getName();
if (CSV.equalsIgnoreCase(FilenameUtils.getExtension(entryName))) {
if (!canSkipEntry(entryName) && hasValidCodeListExtension(entryName)) {
addCodeListImportTask(FilenameUtils.getBaseName(entryName));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1188,3 +1188,6 @@ workflow.phase.cleansing=Data Cleansing
workflow.data_entry_type.entry=Copying paper forms
workflow.data_entry_type.cleansing=Mobile collection / Data Cleansing
codeListImport.parsingError.missing_required_columns.message=At least one "code" column must be defined; if importing a CSV file, check that the separator is comma.
codeListImport.parsingError.language_code_not_defined=Language code not defined among survey languages

0 comments on commit 2b6465e

Please sign in to comment.