diff --git a/src/main/java/cz/cvut/kbss/termit/exception/UnsupportedTextAnalysisLanguageException.java b/src/main/java/cz/cvut/kbss/termit/exception/UnsupportedTextAnalysisLanguageException.java new file mode 100644 index 000000000..3ddb95c60 --- /dev/null +++ b/src/main/java/cz/cvut/kbss/termit/exception/UnsupportedTextAnalysisLanguageException.java @@ -0,0 +1,14 @@ +package cz.cvut.kbss.termit.exception; + +import cz.cvut.kbss.termit.model.Asset; +import cz.cvut.kbss.termit.model.resource.File; + +/** + * Indicates that a language is not supported by the text analysis service. + */ +public class UnsupportedTextAnalysisLanguageException extends TermItException { + + public UnsupportedTextAnalysisLanguageException(String message, Asset asset) { + super(message, asset instanceof File ? "error.annotation.file.unsupportedLanguage" : "error.annotation.term.unsupportedLanguage"); + } +} diff --git a/src/main/java/cz/cvut/kbss/termit/model/resource/File.java b/src/main/java/cz/cvut/kbss/termit/model/resource/File.java index 26b45f940..c16d62a2a 100644 --- a/src/main/java/cz/cvut/kbss/termit/model/resource/File.java +++ b/src/main/java/cz/cvut/kbss/termit/model/resource/File.java @@ -21,16 +21,16 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import cz.cvut.kbss.jopa.model.annotations.FetchType; import cz.cvut.kbss.jopa.model.annotations.Inferred; +import cz.cvut.kbss.jopa.model.annotations.OWLAnnotationProperty; import cz.cvut.kbss.jopa.model.annotations.OWLClass; import cz.cvut.kbss.jopa.model.annotations.OWLObjectProperty; import cz.cvut.kbss.jopa.model.annotations.Types; +import cz.cvut.kbss.jopa.vocabulary.DC; import cz.cvut.kbss.jsonld.annotation.JsonLdAttributeOrder; -import cz.cvut.kbss.termit.exception.TermItException; import cz.cvut.kbss.termit.model.util.SupportsStorage; import cz.cvut.kbss.termit.service.IdentifierResolver; import cz.cvut.kbss.termit.util.Vocabulary; -import java.lang.reflect.Field; import java.util.Objects; import java.util.Set; @@ -43,6 +43,9 @@ public class File extends Resource implements SupportsStorage { @OWLObjectProperty(iri = Vocabulary.s_p_je_casti_dokumentu, fetch = FetchType.EAGER) private Document document; + @OWLAnnotationProperty(iri = DC.Terms.LANGUAGE, simpleLiteral = true) + private String language; + @Types private Set types; @@ -54,6 +57,14 @@ public void setDocument(Document document) { this.document = document; } + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } + public Set getTypes() { return types; } @@ -73,15 +84,11 @@ public boolean equals(Object o) { return Objects.equals(getUri(), file.getUri()); } - @Override - public int hashCode() { - return Objects.hash(getUri()); - } - @Override public String toString() { return "File{" + - super.toString() + (document != null ? "document=<" + document.getUri() + ">" : "") + '}'; + super.toString() + (language != null ? "@" + language : "") + + (document != null ? "document=<" + document.getUri() + ">" : "") + '}'; } /** @@ -109,12 +116,4 @@ public String getDirectoryName() { return IdentifierResolver.normalizeToAscii(labelPart) + '_' + getUri().hashCode(); } } - - public static Field getDocumentField() { - try { - return File.class.getDeclaredField("document"); - } catch (NoSuchFieldException e) { - throw new TermItException("Fatal error! Unable to retrieve \"document\" field.", e); - } - } } diff --git a/src/main/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDao.java b/src/main/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDao.java index 02230ea73..9a9a7d734 100644 --- a/src/main/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDao.java +++ b/src/main/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDao.java @@ -219,10 +219,13 @@ public Vocabulary update(Vocabulary entity) { /** * Forcefully removes the specified vocabulary. *

- * This deletes the whole graph of the vocabulary, all terms in the vocabulary's glossary and then removes the vocabulary itself. Extreme caution - * should be exercised when using this method. All relevant data, including documents and files, will be dropped. + * This deletes the whole graph of the vocabulary, all terms in the vocabulary's glossary and then removes the + * vocabulary itself. Extreme caution should be exercised when using this method. All relevant data, including + * documents and files, will be dropped. *

- * Publishes {@link VocabularyWillBeRemovedEvent} before the actual removal to allow other services to clean up related resources (e.g., delete the document). + * Publishes {@link VocabularyWillBeRemovedEvent} before the actual removal to allow other services to clean up + * related resources (e.g., delete the document). + * * @param entity The vocabulary to delete */ @ModifiesData @@ -238,9 +241,9 @@ public void remove(Vocabulary entity) { *

* Forcefully removes the specified vocabulary. *

- * This deletes all terms in the vocabulary's glossary and then removes the vocabulary itself. - * Extreme caution should be exercised when using this method, - * as it does not check for any references or usage and just drops all the relevant data. + * This deletes all terms in the vocabulary's glossary and then removes the vocabulary itself. Extreme caution + * should be exercised when using this method, as it does not check for any references or usage and just drops all + * the relevant data. *

* The document is not removed. */ @@ -250,19 +253,19 @@ public void removeVocabularyKeepDocument(Vocabulary entity) { /** *

- * Does not publish the {@link VocabularyWillBeRemovedEvent}.
- * You should use {@link #remove(Vocabulary)} instead. + * Does not publish the {@link VocabularyWillBeRemovedEvent}.
You should use {@link #remove(Vocabulary)} + * instead. *

* Forcefully removes the specified vocabulary. *

* This deletes all terms in the vocabulary's glossary and then removes the vocabulary itself. Extreme caution * should be exercised when using this method, as it does not check for any references or usage and just drops all * the relevant data. - * @param entity The vocabulary to delete - * @param dropGraph if false, - * executes {@code src/main/resources/query/remove/removeGlossaryTerms.ru} removing terms, - * their relations, model, glossary and vocabulary itself, keeps the document. - * When true, the whole vocabulary graph is dropped. + * + * @param entity The vocabulary to delete + * @param dropGraph if false, executes {@code src/main/resources/query/remove/removeGlossaryTerms.ru} removing + * terms, their relations, model, glossary and vocabulary itself, keeps the document. When true, + * the whole vocabulary graph is dropped. */ private void removeVocabulary(Vocabulary entity, boolean dropGraph) { Objects.requireNonNull(entity); @@ -270,7 +273,7 @@ private void removeVocabulary(Vocabulary entity, boolean dropGraph) { try { final URI vocabularyContext = contextMapper.getVocabularyContext(entity.getUri()); - if(dropGraph) { + if (dropGraph) { // drops whole named graph em.createNativeQuery("DROP GRAPH ?context") .setParameter("context", vocabularyContext) @@ -319,8 +322,8 @@ public Optional findGlossary(URI uri) { } /** - * Checks whether terms from the {@code subjectVocabulary} reference (as parent terms) any terms from the {@code - * targetVocabulary}. + * Checks whether terms from the {@code subjectVocabulary} reference (as parent terms) any terms from the + * {@code targetVocabulary}. * * @param subjectVocabulary Subject vocabulary identifier * @param targetVocabulary Target vocabulary identifier @@ -399,7 +402,7 @@ public List getChangesOfContent(Vocabulary vocabulary) { * Gets content change records of the specified vocabulary. * * @param vocabulary Vocabulary whose content changes to get - * @param pageReq Specification of the size and number of the page to return + * @param pageReq Specification of the size and number of the page to return * @return List of change records, ordered by date in descending order */ public List getDetailedHistoryOfContent(Vocabulary vocabulary, Pageable pageReq) { @@ -407,25 +410,27 @@ public List getDetailedHistoryOfContent(Vocabulary vocabul return createDetailedContentChangesQuery(vocabulary, pageReq).getResultList(); } - private TypedQuery createDetailedContentChangesQuery(Vocabulary vocabulary, Pageable pageReq) { + private TypedQuery createDetailedContentChangesQuery(Vocabulary vocabulary, + Pageable pageReq) { return em.createNativeQuery(""" - SELECT ?record WHERE { - ?term ?inVocabulary ?vocabulary ; - a ?termType . - ?record a ?changeRecord ; - ?relatesTo ?term ; - ?hasTime ?timestamp . - OPTIONAL { ?record ?hasChangedAttribute ?attribute . } - } ORDER BY DESC(?timestamp) ?attribute - """, AbstractChangeRecord.class) + SELECT ?record WHERE { + ?term ?inVocabulary ?vocabulary ; + a ?termType . + ?record a ?changeRecord ; + ?relatesTo ?term ; + ?hasTime ?timestamp . + OPTIONAL { ?record ?hasChangedAttribute ?attribute . } + } ORDER BY DESC(?timestamp) ?attribute + """, AbstractChangeRecord.class) .setParameter("inVocabulary", - URI.create(cz.cvut.kbss.termit.util.Vocabulary.s_p_je_pojmem_ze_slovniku)) + URI.create(cz.cvut.kbss.termit.util.Vocabulary.s_p_je_pojmem_ze_slovniku)) .setParameter("vocabulary", vocabulary) - .setParameter("termType", URI.create(SKOS.CONCEPT)) + .setParameter("termType", URI.create(SKOS.CONCEPT)) .setParameter("changeRecord", URI.create(cz.cvut.kbss.termit.util.Vocabulary.s_c_zmena)) .setParameter("relatesTo", URI.create(cz.cvut.kbss.termit.util.Vocabulary.s_p_ma_zmenenou_entitu)) .setParameter("hasTime", URI.create(cz.cvut.kbss.termit.util.Vocabulary.s_p_ma_datum_a_cas_modifikace)) - .setParameter("hasChangedAttribute", URI.create(cz.cvut.kbss.termit.util.Vocabulary.s_p_ma_zmeneny_atribut)) + .setParameter("hasChangedAttribute", + URI.create(cz.cvut.kbss.termit.util.Vocabulary.s_p_ma_zmeneny_atribut)) .setFirstResult((int) pageReq.getOffset()) .setMaxResults(pageReq.getPageSize()); } @@ -580,16 +585,17 @@ public List getVocabularyRelations(Vocabulary vocabulary, Collect try { return em.createNativeQuery(""" - SELECT DISTINCT ?object ?relation ?subject { - ?object a ?vocabularyType ; - ?relation ?subject . - FILTER(?object != ?subject) . - FILTER(?relation NOT IN (?excluded)) . - } ORDER BY ?object ?relation - """, "RDFStatement") + SELECT DISTINCT ?object ?relation ?subject { + ?object a ?vocabularyType ; + ?relation ?subject . + FILTER(?object != ?subject) . + FILTER(?relation NOT IN (?excluded)) . + } ORDER BY ?object ?relation + """, "RDFStatement") .setParameter("subject", vocabularyUri) - .setParameter("excluded", excludedRelations) - .setParameter("vocabularyType", URI.create(EntityToOwlClassMapper.getOwlClassForEntity(Vocabulary.class))) + .setParameter("excluded", excludedRelations) + .setParameter("vocabularyType", + URI.create(EntityToOwlClassMapper.getOwlClassForEntity(Vocabulary.class))) .getResultList(); } catch (RuntimeException e) { throw new PersistenceException(e); @@ -607,31 +613,31 @@ public List getTermRelations(Vocabulary vocabulary) { try { return em.createNativeQuery(""" - SELECT DISTINCT ?object ?relation ?subject WHERE { - ?term a ?termType; - ?inVocabulary ?vocabulary . - - { - ?term ?relation ?secondTerm . - ?secondTerm a ?termType; - ?inVocabulary ?secondVocabulary . - - BIND(?term as ?object) - BIND(?secondTerm as ?subject) - } UNION { - ?secondTerm ?relation ?term . - ?secondTerm a ?termType; - ?inVocabulary ?secondVocabulary . - - BIND(?secondTerm as ?object) - BIND(?term as ?subject) - } - - FILTER(?relation IN (?deniedRelations)) - FILTER(?object != ?subject) - FILTER(?secondVocabulary != ?vocabulary) - } ORDER by ?object ?relation ?subject - """, "RDFStatement" + SELECT DISTINCT ?object ?relation ?subject WHERE { + ?term a ?termType; + ?inVocabulary ?vocabulary . + + { + ?term ?relation ?secondTerm . + ?secondTerm a ?termType; + ?inVocabulary ?secondVocabulary . + + BIND(?term as ?object) + BIND(?secondTerm as ?subject) + } UNION { + ?secondTerm ?relation ?term . + ?secondTerm a ?termType; + ?inVocabulary ?secondVocabulary . + + BIND(?secondTerm as ?object) + BIND(?term as ?subject) + } + + FILTER(?relation IN (?deniedRelations)) + FILTER(?object != ?subject) + FILTER(?secondVocabulary != ?vocabulary) + } ORDER by ?object ?relation ?subject + """, "RDFStatement" ).setMaxResults(DEFAULT_PAGE_SIZE) .setParameter("termType", termType) .setParameter("inVocabulary", inVocabulary) @@ -642,4 +648,32 @@ public List getTermRelations(Vocabulary vocabulary) { throw new PersistenceException(e); } } + + /** + * Returns the list of all distinct languages (language tags) used by terms in the specified vocabulary. + * + * @param vocabularyUri Vocabulary identifier + * @return List of distinct languages + */ + public List getLanguages(URI vocabularyUri) { + Objects.requireNonNull(vocabularyUri); + try { + return em.createNativeQuery(""" + SELECT DISTINCT ?lang WHERE { + ?x a ?type ; + ?inVocabulary ?vocabulary ; + ?labelProp ?label . + BIND (LANG(?label) as ?lang) + } + """, String.class) + .setParameter("type", URI.create(SKOS.CONCEPT)) + .setParameter("inVocabulary", + URI.create(cz.cvut.kbss.termit.util.Vocabulary.s_p_je_pojmem_ze_slovniku)) + .setParameter("vocabulary", vocabularyUri) + .setParameter("labelProp", URI.create(SKOS.PREF_LABEL)) + .getResultList(); + } catch (RuntimeException e) { + throw new PersistenceException(e); + } + } } diff --git a/src/main/java/cz/cvut/kbss/termit/rest/VocabularyController.java b/src/main/java/cz/cvut/kbss/termit/rest/VocabularyController.java index e8cd5afb4..2f4d4d1a9 100644 --- a/src/main/java/cz/cvut/kbss/termit/rest/VocabularyController.java +++ b/src/main/java/cz/cvut/kbss/termit/rest/VocabularyController.java @@ -311,6 +311,22 @@ public List getDetailedHistoryOfContent( return vocabularyService.getDetailedHistoryOfContent(vocabulary, pageReq); } + @Operation(security = {@SecurityRequirement(name = "bearer-key")}, + description = "Gets a list of languages used in the vocabulary.") + @ApiResponses({ + @ApiResponse(responseCode = "200", description = "List of languages.") + }) + @GetMapping(value = "/{localName}/languages", produces = {MediaType.APPLICATION_JSON_VALUE, JsonLd.MEDIA_TYPE}) + public List getLanguages( + @Parameter(description = ApiDoc.ID_LOCAL_NAME_DESCRIPTION, + example = ApiDoc.ID_LOCAL_NAME_EXAMPLE) @PathVariable String localName, + @Parameter(description = ApiDoc.ID_NAMESPACE_DESCRIPTION, + example = ApiDoc.ID_NAMESPACE_EXAMPLE) @RequestParam(name = QueryParams.NAMESPACE, + required = false) Optional namespace) { + final URI vocabularyUri = resolveVocabularyUri(localName, namespace); + return vocabularyService.getLanguages(vocabularyUri); + } + @Operation(security = {@SecurityRequirement(name = "bearer-key")}, description = "Updates metadata of vocabulary with the specified identifier.") @ApiResponses({ diff --git a/src/main/java/cz/cvut/kbss/termit/rest/handler/RestExceptionHandler.java b/src/main/java/cz/cvut/kbss/termit/rest/handler/RestExceptionHandler.java index 0ea71c47c..53ba971a6 100644 --- a/src/main/java/cz/cvut/kbss/termit/rest/handler/RestExceptionHandler.java +++ b/src/main/java/cz/cvut/kbss/termit/rest/handler/RestExceptionHandler.java @@ -36,6 +36,7 @@ import cz.cvut.kbss.termit.exception.TermItException; import cz.cvut.kbss.termit.exception.UnsupportedOperationException; import cz.cvut.kbss.termit.exception.UnsupportedSearchFacetException; +import cz.cvut.kbss.termit.exception.UnsupportedTextAnalysisLanguageException; import cz.cvut.kbss.termit.exception.ValidationException; import cz.cvut.kbss.termit.exception.WebServiceIntegrationException; import cz.cvut.kbss.termit.exception.importing.UnsupportedImportMediaTypeException; @@ -99,7 +100,8 @@ private static ErrorInfo errorInfo(HttpServletRequest request, Throwable e) { } private static ErrorInfo errorInfo(HttpServletRequest request, TermItException e) { - return ErrorInfo.createParametrizedWithMessage(e.getMessage(), e.getMessageId(), request.getRequestURI(), e.getParameters()); + return ErrorInfo.createParametrizedWithMessage(e.getMessage(), e.getMessageId(), request.getRequestURI(), + e.getParameters()); } @ExceptionHandler(PersistenceException.class) @@ -290,4 +292,11 @@ public ResponseEntity uriSyntaxException(HttpServletRequest request, .addParameter("char", Character.toString(e.getInput().charAt(e.getIndex()))); return new ResponseEntity<>(errorInfo(request, exception), HttpStatus.CONFLICT); } + + @ExceptionHandler + public ResponseEntity unsupportedTextAnalysisLanguageException(HttpServletRequest request, + UnsupportedTextAnalysisLanguageException e) { + logException(e, request); + return new ResponseEntity<>(errorInfo(request, e), HttpStatus.CONFLICT); + } } diff --git a/src/main/java/cz/cvut/kbss/termit/service/business/AccessControlListService.java b/src/main/java/cz/cvut/kbss/termit/service/business/AccessControlListService.java index 4b6cdc889..c2b5772af 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/business/AccessControlListService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/business/AccessControlListService.java @@ -32,7 +32,7 @@ /** * Service for managing {@link AccessControlList}s (ACLs). *

- * Note that only management of ACLs is supported by this service. Access control itself is handled by TODO. + * Note that only management of ACLs is supported by this service. Access control itself is handled by {@link cz.cvut.kbss.termit.service.security.authorization.acl.AccessControlListBasedAuthorizationService}. */ public interface AccessControlListService { diff --git a/src/main/java/cz/cvut/kbss/termit/service/business/ResourceService.java b/src/main/java/cz/cvut/kbss/termit/service/business/ResourceService.java index f8d8f87a3..0156a8738 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/business/ResourceService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/business/ResourceService.java @@ -24,6 +24,7 @@ import cz.cvut.kbss.termit.exception.InvalidParameterException; import cz.cvut.kbss.termit.exception.NotFoundException; import cz.cvut.kbss.termit.exception.UnsupportedAssetOperationException; +import cz.cvut.kbss.termit.exception.UnsupportedTextAnalysisLanguageException; import cz.cvut.kbss.termit.model.TextAnalysisRecord; import cz.cvut.kbss.termit.model.Vocabulary; import cz.cvut.kbss.termit.model.changetracking.AbstractChangeRecord; @@ -37,6 +38,7 @@ import cz.cvut.kbss.termit.service.document.html.UnconfirmedTermOccurrenceRemover; import cz.cvut.kbss.termit.service.repository.ChangeRecordService; import cz.cvut.kbss.termit.service.repository.ResourceRepositoryService; +import cz.cvut.kbss.termit.util.Configuration; import cz.cvut.kbss.termit.util.TypeAwareResource; import jakarta.annotation.Nonnull; import org.slf4j.Logger; @@ -80,22 +82,26 @@ public class ResourceService private final ChangeRecordService changeRecordService; + private final Configuration config; + private ApplicationEventPublisher eventPublisher; @Autowired public ResourceService(ResourceRepositoryService repositoryService, DocumentManager documentManager, TextAnalysisService textAnalysisService, VocabularyService vocabularyService, - ChangeRecordService changeRecordService) { + ChangeRecordService changeRecordService, Configuration config) { this.repositoryService = repositoryService; this.documentManager = documentManager; this.textAnalysisService = textAnalysisService; this.vocabularyService = vocabularyService; this.changeRecordService = changeRecordService; + this.config = config; } /** * Ensures that document gets removed during Vocabulary removal */ + @Transactional @EventListener public void onVocabularyRemoval(VocabularyWillBeRemovedEvent event) { vocabularyService.find(event.getVocabularyIri()).ifPresent(vocabulary -> { @@ -239,6 +245,9 @@ public void addFileToDocument(Resource document, File file) { throw new UnsupportedAssetOperationException("Cannot add file to the specified resource " + document); } doc.addFile(file); + if (file.getLanguage() == null) { + file.setLanguage(config.getPersistence().getLanguage()); + } if (doc.getVocabulary() != null) { final Vocabulary vocabulary = vocabularyService.getReference(doc.getVocabulary()); repositoryService.persist(file, vocabulary); @@ -292,6 +301,7 @@ public void runTextAnalysis(Resource resource, Set vocabularies) { verifyFileOperationPossible(resource, "Text analysis"); LOG.trace("Invoking text analysis on resource {}.", resource); final File file = (File) resource; + verifyLanguageSupported(file); if (vocabularies.isEmpty()) { if (file.getDocument() == null || file.getDocument().getVocabulary() == null) { throw new UnsupportedAssetOperationException( @@ -305,6 +315,12 @@ public void runTextAnalysis(Resource resource, Set vocabularies) { } } + private void verifyLanguageSupported(File file) { + if (!textAnalysisService.supportsLanguage(file)) { + throw new UnsupportedTextAnalysisLanguageException("Text analysis service does not support language " + file.getLanguage(), file); + } + } + private Set includeImportedVocabularies(Set providedVocabularies) { final Set result = new HashSet<>(providedVocabularies); providedVocabularies.forEach(uri -> { diff --git a/src/main/java/cz/cvut/kbss/termit/service/business/VocabularyService.java b/src/main/java/cz/cvut/kbss/termit/service/business/VocabularyService.java index fe6d9b20a..6f265656c 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/business/VocabularyService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/business/VocabularyService.java @@ -316,7 +316,7 @@ public List getChangesOfContent(Vocabulary vocabulary) { * Gets content change records of the specified vocabulary. * * @param vocabulary Vocabulary whose content changes to get - * @param pageReq Specification of the size and number of the page to return + * @param pageReq Specification of the size and number of the page to return * @return List of change records, ordered by date in descending order */ public List getDetailedHistoryOfContent(Vocabulary vocabulary, Pageable pageReq) { @@ -522,6 +522,17 @@ public AccessLevel getAccessLevel(Vocabulary vocabulary) { return authorizationService.getAccessLevel(vocabulary); } + /** + * Gets the list of languages used in the specified vocabulary. + * + * @param vocabularyUri Vocabulary identifier + * @return List of languages + */ + @PreAuthorize("@vocabularyAuthorizationService.canRead(#vocabularyUri)") + public List getLanguages(URI vocabularyUri) { + return repositoryService.getLanguages(vocabularyUri); + } + @Override public void setApplicationEventPublisher(@Nonnull ApplicationEventPublisher eventPublisher) { this.eventPublisher = eventPublisher; diff --git a/src/main/java/cz/cvut/kbss/termit/service/document/TextAnalysisService.java b/src/main/java/cz/cvut/kbss/termit/service/document/TextAnalysisService.java index adc9dfdae..18da62044 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/document/TextAnalysisService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/document/TextAnalysisService.java @@ -20,11 +20,15 @@ import cz.cvut.kbss.termit.dto.TextAnalysisInput; import cz.cvut.kbss.termit.event.FileTextAnalysisFinishedEvent; import cz.cvut.kbss.termit.event.TermDefinitionTextAnalysisFinishedEvent; +import cz.cvut.kbss.termit.exception.TermItException; +import cz.cvut.kbss.termit.exception.UnsupportedTextAnalysisLanguageException; import cz.cvut.kbss.termit.exception.WebServiceIntegrationException; import cz.cvut.kbss.termit.model.AbstractTerm; +import cz.cvut.kbss.termit.model.Asset; import cz.cvut.kbss.termit.model.TextAnalysisRecord; import cz.cvut.kbss.termit.model.resource.File; import cz.cvut.kbss.termit.persistence.dao.TextAnalysisRecordDao; +import cz.cvut.kbss.termit.rest.handler.ErrorInfo; import cz.cvut.kbss.termit.util.Configuration; import cz.cvut.kbss.termit.util.Utils; import cz.cvut.kbss.termit.util.throttle.Throttle; @@ -32,20 +36,24 @@ import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationEventPublisher; +import org.springframework.core.ParameterizedTypeReference; import org.springframework.core.io.Resource; import org.springframework.http.HttpEntity; import org.springframework.http.HttpHeaders; import org.springframework.http.HttpMethod; +import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import org.springframework.web.client.HttpClientErrorException; import org.springframework.web.client.RestTemplate; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.util.HashSet; +import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.Set; @@ -67,6 +75,8 @@ public class TextAnalysisService { private final ApplicationEventPublisher eventPublisher; + private Set supportedLanguages; + @Autowired public TextAnalysisService(RestTemplate restClient, Configuration config, DocumentManager documentManager, AnnotationGenerator annotationGenerator, TextAnalysisRecordDao recordDao, @@ -107,7 +117,7 @@ private TextAnalysisInput createAnalysisInput(File file) { publicUrl.isEmpty() || publicUrl.get().isEmpty() ? config.getRepository().getUrl() : publicUrl.get() ); input.setVocabularyRepository(repositoryUrl); - input.setLanguage(config.getPersistence().getLanguage()); + input.setLanguage(file.getLanguage() != null ? file.getLanguage() : config.getPersistence().getLanguage()); input.setVocabularyRepositoryUserName(config.getRepository().getUsername()); input.setVocabularyRepositoryPassword(config.getRepository().getPassword()); return input; @@ -126,6 +136,8 @@ private void invokeTextAnalysisOnFile(File file, TextAnalysisInput input) { storeTextAnalysisRecord(file, input); } catch (WebServiceIntegrationException e) { throw e; + } catch (HttpClientErrorException e) { + throw handleTextAnalysisInvocationClientException(e, file); } catch (RuntimeException e) { throw new WebServiceIntegrationException("Text analysis invocation failed.", e); } catch (IOException e) { @@ -140,11 +152,10 @@ private Optional invokeTextAnalysisService(TextAnalysisInput input) { return Optional.empty(); } final HttpHeaders headers = new HttpHeaders(); - headers.add(HttpHeaders.ACCEPT, MediaType.APPLICATION_XML_VALUE); - LOG.debug("Invoking text analysis service at '{}' on input: {}", config.getTextAnalysis().getUrl(), input); - final ResponseEntity resp = restClient - .exchange(config.getTextAnalysis().getUrl(), HttpMethod.POST, - new HttpEntity<>(input, headers), Resource.class); + headers.addAll(HttpHeaders.ACCEPT, List.of(MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_XML_VALUE)); + LOG.debug("Invoking text analysis service at '{}' on input: {}", taUrl, input); + final ResponseEntity resp = restClient.exchange(taUrl, HttpMethod.POST, + new HttpEntity<>(input, headers), Resource.class); if (!resp.hasBody()) { throw new WebServiceIntegrationException("Text analysis service returned empty response."); } @@ -161,6 +172,16 @@ private void storeTextAnalysisRecord(File file, TextAnalysisInput config) { recordDao.persist(record); } + private TermItException handleTextAnalysisInvocationClientException(HttpClientErrorException ex, Asset asset) { + if (ex.getStatusCode() == HttpStatus.CONFLICT) { + final ErrorInfo errorInfo = ex.getResponseBodyAs(ErrorInfo.class); + if (errorInfo != null && errorInfo.getMessage().contains("language")) { + throw new UnsupportedTextAnalysisLanguageException(errorInfo.getMessage(),asset); + } + } + throw new WebServiceIntegrationException("Text analysis invocation failed.", ex); + } + /** * Gets the latest {@link TextAnalysisRecord} for the specified Resource. * @@ -205,10 +226,56 @@ private void invokeTextAnalysisOnTerm(AbstractTerm term, TextAnalysisInput input } } catch (WebServiceIntegrationException e) { throw e; + } catch (HttpClientErrorException e) { + throw handleTextAnalysisInvocationClientException(e, term); } catch (RuntimeException e) { throw new WebServiceIntegrationException("Text analysis invocation failed.", e); } catch (IOException e) { throw new WebServiceIntegrationException("Unable to read text analysis result from response.", e); } } + + /** + * Checks whether the text analysis service supports the language of the specified file. + *

+ * If the text analysis service does not provide endpoint for getting supported languages (or it is not configured), + * it is assumed that any language is supported. + *

+ * If the file does not have language set, it is assumed that it is supported as well. + * + * @param file File to be analyzed + * @return {@code true} if the file language is supported, {@code false} otherwise + */ + public boolean supportsLanguage(File file) { + Objects.requireNonNull(file); + return file.getLanguage() == null || getSupportedLanguages().isEmpty() || getSupportedLanguages().contains( + file.getLanguage()); + } + + private synchronized Set getSupportedLanguages() { + if (supportedLanguages != null) { + return supportedLanguages; + } + final String languagesEndpointUrl = config.getTextAnalysis().getLanguagesUrl(); + if (languagesEndpointUrl == null || languagesEndpointUrl.isBlank()) { + LOG.warn( + "Text analysis service languages endpoint URL not configured. Assuming any language is supported."); + this.supportedLanguages = Set.of(); + } else { + try { + LOG.debug("Getting list of supported languages from text analysis service at '{}'.", + languagesEndpointUrl); + ResponseEntity> response = restClient.exchange(languagesEndpointUrl, HttpMethod.GET, null, + new ParameterizedTypeReference<>() { + }); + this.supportedLanguages = response.getBody(); + LOG.trace("Text analysis supported languages: {}", supportedLanguages); + } catch (RuntimeException e) { + LOG.error("Unable to get list of supported languages from text analysis service at '{}'.", + languagesEndpointUrl, e); + this.supportedLanguages = Set.of(); + } + } + return supportedLanguages; + } } diff --git a/src/main/java/cz/cvut/kbss/termit/service/repository/VocabularyRepositoryService.java b/src/main/java/cz/cvut/kbss/termit/service/repository/VocabularyRepositoryService.java index 6be0b86d4..6cffad957 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/repository/VocabularyRepositoryService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/repository/VocabularyRepositoryService.java @@ -372,4 +372,15 @@ public Vocabulary findVersionValidAt(Vocabulary vocabulary, Instant at) { public PrefixDeclaration resolvePrefix(URI vocabularyUri) { return vocabularyDao.resolvePrefix(vocabularyUri); } + + /** + * Returns the list of all distinct languages (language tags) used by terms in the specified vocabulary. + * + * @param vocabularyUri Vocabulary identifier + * @return List of distinct languages + */ + @Transactional(readOnly = true) + public List getLanguages(URI vocabularyUri) { + return vocabularyDao.getLanguages(vocabularyUri); + } } diff --git a/src/main/java/cz/cvut/kbss/termit/util/Configuration.java b/src/main/java/cz/cvut/kbss/termit/util/Configuration.java index 8a655df59..4785f9eb6 100644 --- a/src/main/java/cz/cvut/kbss/termit/util/Configuration.java +++ b/src/main/java/cz/cvut/kbss/termit/util/Configuration.java @@ -673,6 +673,11 @@ public static class TextAnalysis { */ private String url; + /** + * URL of the endpoint providing list of languages supported by the text analysis service. + */ + private String languagesUrl; + /** * Score threshold for a term occurrence for it to be saved into the repository. */ @@ -693,6 +698,14 @@ public void setUrl(String url) { this.url = url; } + public String getLanguagesUrl() { + return languagesUrl; + } + + public void setLanguagesUrl(String languagesUrl) { + this.languagesUrl = languagesUrl; + } + public String getTermOccurrenceMinScore() { return termOccurrenceMinScore; } diff --git a/src/main/java/cz/cvut/kbss/termit/websocket/handler/WebSocketExceptionHandler.java b/src/main/java/cz/cvut/kbss/termit/websocket/handler/WebSocketExceptionHandler.java index c5869701b..c6042bb9a 100644 --- a/src/main/java/cz/cvut/kbss/termit/websocket/handler/WebSocketExceptionHandler.java +++ b/src/main/java/cz/cvut/kbss/termit/websocket/handler/WebSocketExceptionHandler.java @@ -19,6 +19,7 @@ import cz.cvut.kbss.termit.exception.TermItException; import cz.cvut.kbss.termit.exception.UnsupportedOperationException; import cz.cvut.kbss.termit.exception.UnsupportedSearchFacetException; +import cz.cvut.kbss.termit.exception.UnsupportedTextAnalysisLanguageException; import cz.cvut.kbss.termit.exception.ValidationException; import cz.cvut.kbss.termit.exception.WebServiceIntegrationException; import cz.cvut.kbss.termit.exception.importing.UnsupportedImportMediaTypeException; @@ -87,7 +88,8 @@ private static ErrorInfo errorInfo(Message message, Throwable e) { } private static ErrorInfo errorInfo(Message message, TermItException e) { - return ErrorInfo.createParametrizedWithMessage(e.getMessage(), e.getMessageId(), destination(message), e.getParameters()); + return ErrorInfo.createParametrizedWithMessage(e.getMessage(), e.getMessageId(), destination(message), + e.getParameters()); } @MessageExceptionHandler @@ -95,7 +97,7 @@ public void messageDeliveryException(Message message, MessageDeliveryExceptio // messages without destination will be logged only on trace (hasDestination(message) ? LOG.atError() : LOG.atTrace()) .setMessage("Failed to send message with destination {}: {}") - .addArgument(()-> destination(message)) + .addArgument(() -> destination(message)) .addArgument(e.getMessage()) .setCause(e.getCause()) .log(); @@ -226,7 +228,8 @@ public ErrorInfo invalidParameter(Message message, InvalidParameterException @MessageExceptionHandler public ErrorInfo maxUploadSizeExceededException(Message message, MaxUploadSizeExceededException e) { logException(e, message); - return ErrorInfo.createWithMessageAndMessageId(e.getMessage(), "error.file.maxUploadSizeExceeded", destination(message)); + return ErrorInfo.createWithMessageAndMessageId(e.getMessage(), "error.file.maxUploadSizeExceeded", + destination(message)); } @MessageExceptionHandler @@ -271,4 +274,11 @@ public ErrorInfo uriSyntaxException(Message message, URISyntaxException e) { logException(e, message); return errorInfo(message, e); } + + @MessageExceptionHandler + public ErrorInfo unsupportedTextAnalysisLanguageException(Message message, + UnsupportedTextAnalysisLanguageException e) { + logException(e, message); + return errorInfo(message, e); + } } diff --git a/src/test/java/cz/cvut/kbss/termit/persistence/context/DescriptorFactoryTest.java b/src/test/java/cz/cvut/kbss/termit/persistence/context/DescriptorFactoryTest.java index c22fc8a49..621c8d823 100644 --- a/src/test/java/cz/cvut/kbss/termit/persistence/context/DescriptorFactoryTest.java +++ b/src/test/java/cz/cvut/kbss/termit/persistence/context/DescriptorFactoryTest.java @@ -98,7 +98,7 @@ void termDescriptorCreatesDescriptorWithExactMatchesContextSetToDefaultToAllowEx } @Test - void fileDescriptorContainsAlsoDescriptorForDocument() { + void fileDescriptorContainsAlsoDescriptorForDocument() throws Exception { final File file = Generator.generateFileWithId("test.html"); final Document doc = Generator.generateDocumentWithId(); doc.addFile(file); @@ -106,7 +106,7 @@ void fileDescriptorContainsAlsoDescriptorForDocument() { doc.setVocabulary(Generator.generateUri()); final Descriptor result = sut.fileDescriptor(doc.getVocabulary()); final FieldSpecification docFieldSpec = mock(FieldSpecification.class); - when(docFieldSpec.getJavaField()).thenReturn(File.getDocumentField()); + when(docFieldSpec.getJavaField()).thenReturn(File.class.getDeclaredField("document")); final Descriptor docDescriptor = result.getAttributeDescriptor(docFieldSpec); assertNotNull(docDescriptor); } diff --git a/src/test/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDaoTest.java b/src/test/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDaoTest.java index 02af22e4c..68235b279 100644 --- a/src/test/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDaoTest.java +++ b/src/test/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDaoTest.java @@ -84,6 +84,7 @@ import static cz.cvut.kbss.termit.environment.util.ContainsSameEntities.containsSameEntities; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasItems; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -932,4 +933,23 @@ void getAnyExternalRelationsReturnsTermsWithBothRelations(URI termRelation) { } }); } + + @Test + void getLanguagesReturnsDistinctLanguagesUsedByVocabularyTerms() { + final Vocabulary vocabulary = Generator.generateVocabularyWithId(); + final Term term = Generator.generateTermWithId(vocabulary.getUri()); + final Term term2 = Generator.generateTermWithId(vocabulary.getUri()); + term2.getLabel().set("cs", "Název v češtině"); + transactional(() -> { + em.persist(vocabulary, descriptorFor(vocabulary)); + em.persist(term, descriptorFactory.termDescriptor(term)); + em.persist(term2, descriptorFactory.termDescriptor(term2)); + Generator.addTermInVocabularyRelationship(term, vocabulary.getUri(), em); + Generator.addTermInVocabularyRelationship(term2, vocabulary.getUri(), em); + }); + + final List languages = sut.getLanguages(vocabulary.getUri()); + assertEquals(2, languages.size()); + assertThat(languages, hasItems(Environment.LANGUAGE, "cs")); + } } diff --git a/src/test/java/cz/cvut/kbss/termit/rest/VocabularyControllerTest.java b/src/test/java/cz/cvut/kbss/termit/rest/VocabularyControllerTest.java index 0d1c7444d..32d3aa47c 100644 --- a/src/test/java/cz/cvut/kbss/termit/rest/VocabularyControllerTest.java +++ b/src/test/java/cz/cvut/kbss/termit/rest/VocabularyControllerTest.java @@ -642,4 +642,16 @@ void getExcelTemplateFileReturnsExcelTemplateFileRetrievedFromServiceAsAttachmen assertThat(mvcResult.getResponse().getHeader(HttpHeaders.CONTENT_DISPOSITION), containsString("filename=\"termit-import.xlsx\"")); } + + @Test + void getLanguagesRetrievesAndReturnsListOfLanguagesUsedInVocabulary() throws Exception { + when(idResolverMock.resolveIdentifier(NAMESPACE, FRAGMENT)).thenReturn(VOCABULARY_URI); + final List languages = List.of(Environment.LANGUAGE, "cs", "de"); + when(serviceMock.getLanguages(VOCABULARY_URI)).thenReturn(languages); + + final MvcResult mvcResult = mockMvc.perform(get(PATH + "/" + FRAGMENT + "/languages").queryParam(QueryParams.NAMESPACE, NAMESPACE)).andReturn(); + final List result = readValue(mvcResult, new TypeReference>() {}); + assertEquals(languages, result); + verify(serviceMock).getLanguages(VOCABULARY_URI); + } } diff --git a/src/test/java/cz/cvut/kbss/termit/service/business/ResourceServiceTest.java b/src/test/java/cz/cvut/kbss/termit/service/business/ResourceServiceTest.java index 6119b0f90..cae57e7e8 100644 --- a/src/test/java/cz/cvut/kbss/termit/service/business/ResourceServiceTest.java +++ b/src/test/java/cz/cvut/kbss/termit/service/business/ResourceServiceTest.java @@ -24,6 +24,7 @@ import cz.cvut.kbss.termit.exception.NotFoundException; import cz.cvut.kbss.termit.exception.TermItException; import cz.cvut.kbss.termit.exception.UnsupportedAssetOperationException; +import cz.cvut.kbss.termit.exception.UnsupportedTextAnalysisLanguageException; import cz.cvut.kbss.termit.model.TextAnalysisRecord; import cz.cvut.kbss.termit.model.Vocabulary; import cz.cvut.kbss.termit.model.changetracking.AbstractChangeRecord; @@ -35,6 +36,8 @@ import cz.cvut.kbss.termit.service.document.TextAnalysisService; import cz.cvut.kbss.termit.service.repository.ChangeRecordService; import cz.cvut.kbss.termit.service.repository.ResourceRepositoryService; +import cz.cvut.kbss.termit.util.Configuration; +import cz.cvut.kbss.termit.util.Constants; import cz.cvut.kbss.termit.util.TypeAwareByteArrayResource; import cz.cvut.kbss.termit.util.TypeAwareResource; import cz.cvut.kbss.termit.util.Utils; @@ -47,6 +50,7 @@ import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.Mockito; +import org.mockito.Spy; import org.mockito.junit.jupiter.MockitoExtension; import org.springframework.context.ApplicationEventPublisher; import org.springframework.http.MediaType; @@ -96,6 +100,9 @@ class ResourceServiceTest { @Mock private ApplicationEventPublisher eventPublisher; + @Spy + private Configuration config = new Configuration(); + @InjectMocks private ResourceService sut; @@ -197,6 +204,7 @@ void runTextAnalysisInvokesTextAnalysisWithVocabularyRelatedToFilesDocument() { file.setDocument(Generator.generateDocumentWithId()); final Vocabulary vocabulary = Generator.generateVocabularyWithId(); file.getDocument().setVocabulary(vocabulary.getUri()); + when(textAnalysisService.supportsLanguage(file)).thenReturn(true); sut.runTextAnalysis(file, Collections.emptySet()); verify(textAnalysisService).analyzeFile(file, Collections.singleton(vocabulary.getUri())); } @@ -212,6 +220,7 @@ void runTextAnalysisThrowsUnsupportedAssetOperationWhenResourceIsNotFile() { @Test void runTextAnalysisThrowsUnsupportedAssetOperationWhenFileHasNoVocabularyAndNoVocabulariesAreSpecifiedEither() { final File file = Generator.generateFileWithId("test.html"); + when(textAnalysisService.supportsLanguage(file)).thenReturn(true); assertThrows(UnsupportedAssetOperationException.class, () -> sut.runTextAnalysis(file, Collections.emptySet())); verify(textAnalysisService, never()).analyzeFile(any(), anySet()); @@ -221,6 +230,7 @@ void runTextAnalysisThrowsUnsupportedAssetOperationWhenFileHasNoVocabularyAndNoV void runTextAnalysisInvokesAnalysisWithCustomVocabulariesWhenSpecified() { final File file = Generator.generateFileWithId("test.html"); final Set vocabularies = new HashSet<>(Arrays.asList(Generator.generateUri(), Generator.generateUri())); + when(textAnalysisService.supportsLanguage(file)).thenReturn(true); sut.runTextAnalysis(file, vocabularies); verify(textAnalysisService).analyzeFile(file, vocabularies); } @@ -234,6 +244,7 @@ void runTextAnalysisInvokesAnalysisAlsoWithImportedVocabulariesOfVocabularyRElat final Set imported = new HashSet<>(Arrays.asList(Generator.generateUri(), Generator.generateUri())); when(vocabularyService.getReference(vocabulary.getUri())).thenReturn(vocabulary); when(vocabularyService.getTransitivelyImportedVocabularies(vocabulary)).thenReturn(imported); + when(textAnalysisService.supportsLanguage(file)).thenReturn(true); sut.runTextAnalysis(file, Collections.emptySet()); final Set expected = new HashSet<>(imported); @@ -253,6 +264,7 @@ void runTextAnalysisInvokesAnalysisWithProvidedVocabulariesAndTheirImports() { when(vocabularyService.getTransitivelyImportedVocabularies(vOne)).thenReturn(vOneImports); when(vocabularyService.getReference(vTwo.getUri())).thenReturn(vTwo); when(vocabularyService.getTransitivelyImportedVocabularies(vTwo)).thenReturn(vTwoImports); + when(textAnalysisService.supportsLanguage(file)).thenReturn(true); sut.runTextAnalysis(file, new HashSet<>(Arrays.asList(vOne.getUri(), vTwo.getUri()))); final Set expected = new HashSet<>(vOneImports); @@ -515,4 +527,49 @@ void getContentWithoutUnconfirmedOccurrencesRemovesUnconfirmedOccurrencesFromFil final org.jsoup.nodes.Document doc = Jsoup.parse(result.getInputStream(), StandardCharsets.UTF_8.name(), ""); assertTrue(doc.select("span[score]").isEmpty()); } + + @Test + void addFileToDocumentSetsFileLanguageToDefaultConfiguredWhenNotProvided() { + config.getPersistence().setLanguage(Constants.DEFAULT_LANGUAGE); + final Vocabulary vocabulary = Generator.generateVocabularyWithId(); + final Document document = Generator.generateDocumentWithId(); + document.setVocabulary(vocabulary.getUri()); + final File file = Generator.generateFileWithId("test.hml"); + when(resourceRepositoryService.exists(document.getUri())).thenReturn(true); + when(resourceRepositoryService.findRequired(document.getUri())).thenReturn(document); + when(vocabularyService.getReference(vocabulary.getUri())).thenReturn(vocabulary); + + sut.addFileToDocument(document, file); + verify(resourceRepositoryService).persist(file, vocabulary); + assertEquals(config.getPersistence().getLanguage(), file.getLanguage()); + } + + @Test + void addFileToDocumentDoesNotModifyLanguageWhenItIsAlreadySet() { + config.getPersistence().setLanguage(Constants.DEFAULT_LANGUAGE); + final Vocabulary vocabulary = Generator.generateVocabularyWithId(); + final Document document = Generator.generateDocumentWithId(); + document.setVocabulary(vocabulary.getUri()); + final File file = Generator.generateFileWithId("test.hml"); + file.setLanguage("cs"); + when(resourceRepositoryService.exists(document.getUri())).thenReturn(true); + when(resourceRepositoryService.findRequired(document.getUri())).thenReturn(document); + when(vocabularyService.getReference(vocabulary.getUri())).thenReturn(vocabulary); + + sut.addFileToDocument(document, file); + verify(resourceRepositoryService).persist(file, vocabulary); + assertEquals("cs", file.getLanguage()); + } + + @Test + void runTextAnalysisThrowsUnsupportedTextAnalysisExceptionWhenTextAnalysisServiceDoesNotSupportFileLanguage() { + final File file = Generator.generateFileWithId("test.html"); + file.setDocument(Generator.generateDocumentWithId()); + final Vocabulary vocabulary = Generator.generateVocabularyWithId(); + file.getDocument().setVocabulary(vocabulary.getUri()); + file.setLanguage("sk"); + when(textAnalysisService.supportsLanguage(file)).thenReturn(false); + assertThrows(UnsupportedTextAnalysisLanguageException.class, () -> sut.runTextAnalysis(file, Set.of(vocabulary.getUri()))); + verify(textAnalysisService).supportsLanguage(file); + } } diff --git a/src/test/java/cz/cvut/kbss/termit/service/document/TextAnalysisServiceTest.java b/src/test/java/cz/cvut/kbss/termit/service/document/TextAnalysisServiceTest.java index aa431671e..794753204 100644 --- a/src/test/java/cz/cvut/kbss/termit/service/document/TextAnalysisServiceTest.java +++ b/src/test/java/cz/cvut/kbss/termit/service/document/TextAnalysisServiceTest.java @@ -27,14 +27,17 @@ import cz.cvut.kbss.termit.event.FileTextAnalysisFinishedEvent; import cz.cvut.kbss.termit.event.TermDefinitionTextAnalysisFinishedEvent; import cz.cvut.kbss.termit.exception.NotFoundException; +import cz.cvut.kbss.termit.exception.UnsupportedTextAnalysisLanguageException; import cz.cvut.kbss.termit.exception.WebServiceIntegrationException; import cz.cvut.kbss.termit.model.Term; import cz.cvut.kbss.termit.model.TextAnalysisRecord; import cz.cvut.kbss.termit.model.Vocabulary; import cz.cvut.kbss.termit.model.resource.File; import cz.cvut.kbss.termit.persistence.dao.TextAnalysisRecordDao; +import cz.cvut.kbss.termit.rest.handler.ErrorInfo; import cz.cvut.kbss.termit.service.BaseServiceTestRunner; import cz.cvut.kbss.termit.util.Configuration; +import cz.cvut.kbss.termit.util.Constants; import cz.cvut.kbss.termit.util.Utils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -70,6 +73,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsString; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -84,8 +88,10 @@ import static org.mockito.Mockito.when; import static org.springframework.test.web.client.match.MockRestRequestMatchers.content; import static org.springframework.test.web.client.match.MockRestRequestMatchers.header; +import static org.springframework.test.web.client.match.MockRestRequestMatchers.jsonPath; import static org.springframework.test.web.client.match.MockRestRequestMatchers.method; import static org.springframework.test.web.client.match.MockRestRequestMatchers.requestTo; +import static org.springframework.test.web.client.response.MockRestResponseCreators.withRequestConflict; import static org.springframework.test.web.client.response.MockRestResponseCreators.withServerError; import static org.springframework.test.web.client.response.MockRestResponseCreators.withSuccess; @@ -143,14 +149,14 @@ void setUp() throws Exception { doCallRealMethod().when(documentManagerSpy).loadFileContent(any()); doNothing().when(documentManagerSpy).createBackup(any()); this.sut = new TextAnalysisService(restTemplate, config, documentManagerSpy, annotationGeneratorMock, - textAnalysisRecordDao, eventPublisher); + textAnalysisRecordDao, eventPublisher); } @Test void analyzeFileInvokesTextAnalysisServiceWithDocumentContent() { mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) - .andExpect(method(HttpMethod.POST)).andExpect(content().string(containsString(CONTENT))) - .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); + .andExpect(method(HttpMethod.POST)).andExpect(content().string(containsString(CONTENT))) + .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); sut.analyzeFile(file, Collections.singleton(vocabulary.getUri())); mockServer.verify(); } @@ -159,7 +165,8 @@ private void generateFile() throws IOException { final java.io.File dir = Files.createTempDirectory("termit").toFile(); dir.deleteOnExit(); config.getFile().setStorage(dir.getAbsolutePath()); - final java.io.File docDir = new java.io.File(dir.getAbsolutePath() + java.io.File.separator + file.getDirectoryName()); + final java.io.File docDir = new java.io.File( + dir.getAbsolutePath() + java.io.File.separator + file.getDirectoryName()); Files.createDirectory(docDir.toPath()); docDir.deleteOnExit(); final java.io.File content = new java.io.File( @@ -172,9 +179,9 @@ private void generateFile() throws IOException { void analyzeFilePassesRepositoryAndVocabularyContextToService() throws Exception { final TextAnalysisInput input = textAnalysisInput(); mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) - .andExpect(method(HttpMethod.POST)) - .andExpect(content().string(objectMapper.writeValueAsString(input))) - .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); + .andExpect(method(HttpMethod.POST)) + .andExpect(content().string(objectMapper.writeValueAsString(input))) + .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); sut.analyzeFile(file, Collections.singleton(vocabulary.getUri())); mockServer.verify(); } @@ -184,8 +191,8 @@ private TextAnalysisInput textAnalysisInput() { input.setContent(CONTENT); input.addVocabularyContext(vocabulary.getUri()); URI repositoryUrl = URI.create( - config.getRepository().getPublicUrl() - .orElse(config.getRepository().getUrl()) + config.getRepository().getPublicUrl() + .orElse(config.getRepository().getUrl()) ); input.setVocabularyRepository(repositoryUrl); input.setLanguage(config.getPersistence().getLanguage()); @@ -198,11 +205,11 @@ private TextAnalysisInput textAnalysisInput() { void analyzeFilePassesContentTypeAndAcceptHeadersToService() throws Exception { final TextAnalysisInput input = textAnalysisInput(); mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) - .andExpect(method(HttpMethod.POST)) - .andExpect(content().string(objectMapper.writeValueAsString(input))) - .andExpect(header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)) - .andExpect(header(HttpHeaders.ACCEPT, MediaType.APPLICATION_XML_VALUE)) - .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); + .andExpect(method(HttpMethod.POST)) + .andExpect(content().string(objectMapper.writeValueAsString(input))) + .andExpect(header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)) + .andExpect(header(HttpHeaders.ACCEPT,MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_XML_VALUE)) + .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); sut.analyzeFile(file, Collections.singleton(vocabulary.getUri())); mockServer.verify(); } @@ -228,11 +235,11 @@ void analyzeFilePassesRepositoryUsernameAndPasswordToServiceWhenProvided() throw void analyzeFileThrowsWebServiceIntegrationExceptionOnError() throws Exception { final TextAnalysisInput input = textAnalysisInput(); mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) - .andExpect(method(HttpMethod.POST)) - .andExpect(content().string(objectMapper.writeValueAsString(input))) - .andRespond(withServerError()); + .andExpect(method(HttpMethod.POST)) + .andExpect(content().string(objectMapper.writeValueAsString(input))) + .andRespond(withServerError()); assertThrows(WebServiceIntegrationException.class, - () -> sut.analyzeFile(file, Collections.singleton(vocabulary.getUri()))); + () -> sut.analyzeFile(file, Collections.singleton(vocabulary.getUri()))); mockServer.verify(); } @@ -256,7 +263,8 @@ void analyzeFileInvokesAnnotationGeneratorWithResultFromTextAnalysisService() th void analyzeFileThrowsNotFoundExceptionWhenFileCannotBeFound() { file.setLabel("unknown.html"); final NotFoundException result = assertThrows(NotFoundException.class, - () -> sut.analyzeFile(file, Collections.singleton(vocabulary.getUri()))); + () -> sut.analyzeFile(file, Collections.singleton( + vocabulary.getUri()))); assertThat(result.getMessage(), containsString("not found on file system")); } @@ -264,11 +272,12 @@ void analyzeFileThrowsNotFoundExceptionWhenFileCannotBeFound() { void analyzeFileThrowsWebServiceIntegrationExceptionWhenRemoteServiceReturnsEmptyBody() throws Exception { final TextAnalysisInput input = textAnalysisInput(); mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) - .andExpect(method(HttpMethod.POST)) - .andExpect(content().string(objectMapper.writeValueAsString(input))) - .andRespond(withSuccess()); + .andExpect(method(HttpMethod.POST)) + .andExpect(content().string(objectMapper.writeValueAsString(input))) + .andRespond(withSuccess()); final WebServiceIntegrationException result = assertThrows(WebServiceIntegrationException.class, - () -> sut.analyzeFile(file, Collections.singleton(vocabulary.getUri()))); + () -> sut.analyzeFile(file, Collections.singleton( + vocabulary.getUri()))); assertThat(result.getMessage(), containsString("empty response")); mockServer.verify(); } @@ -290,13 +299,13 @@ void analyzeFileCreatesFileBackupBeforeInvokingAnnotationGenerator() throws Exce @Test void analyzeFilePassesRepositoryAndSpecifiedVocabularyContextsToService() throws Exception { final Set vocabs = IntStream.range(0, 5).mapToObj(i -> Generator.generateUri()) - .collect(Collectors.toSet()); + .collect(Collectors.toSet()); final TextAnalysisInput expected = textAnalysisInput(); expected.setVocabularyContexts(vocabs); mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) - .andExpect(method(HttpMethod.POST)) - .andExpect(content().string(objectMapper.writeValueAsString(expected))) - .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); + .andExpect(method(HttpMethod.POST)) + .andExpect(content().string(objectMapper.writeValueAsString(expected))) + .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); sut.analyzeFile(file, vocabs); mockServer.verify(); } @@ -305,9 +314,9 @@ void analyzeFilePassesRepositoryAndSpecifiedVocabularyContextsToService() throws void analyzeFileBacksUpFileContentBeforeSavingNewAnalyzedContent() throws Exception { final TextAnalysisInput input = textAnalysisInput(); mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) - .andExpect(method(HttpMethod.POST)) - .andExpect(content().string(objectMapper.writeValueAsString(input))) - .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); + .andExpect(method(HttpMethod.POST)) + .andExpect(content().string(objectMapper.writeValueAsString(input))) + .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); sut.analyzeFile(file, Collections.singleton(vocabulary.getUri())); mockServer.verify(); final InOrder inOrder = Mockito.inOrder(documentManagerSpy, annotationGeneratorMock); @@ -318,8 +327,8 @@ void analyzeFileBacksUpFileContentBeforeSavingNewAnalyzedContent() throws Except @Test void analyzeFileCreatesTextAnalysisRecord() { mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) - .andExpect(method(HttpMethod.POST)).andExpect(content().string(containsString(CONTENT))) - .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); + .andExpect(method(HttpMethod.POST)).andExpect(content().string(containsString(CONTENT))) + .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); sut.analyzeFile(file, Collections.singleton(vocabulary.getUri())); final ArgumentCaptor captor = ArgumentCaptor.forClass(TextAnalysisRecord.class); verify(textAnalysisRecordDao).persist(captor.capture()); @@ -424,7 +433,8 @@ void analyzeFilePublishesAnalysisFinishedEvent() { .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); sut.analyzeFile(file, Collections.singleton(vocabulary.getUri())); - ArgumentCaptor eventCaptor = ArgumentCaptor.forClass(FileTextAnalysisFinishedEvent.class); + ArgumentCaptor eventCaptor = ArgumentCaptor.forClass( + FileTextAnalysisFinishedEvent.class); verify(eventPublisher).publishEvent(eventCaptor.capture()); assertNotNull(eventCaptor.getValue()); assertEquals(file.getUri(), eventCaptor.getValue().getFileUri()); @@ -444,10 +454,80 @@ void analyzeTermDefinitionPublishesAnalysisFinishedEvent() throws JsonProcessing sut.analyzeTermDefinition(term, vocabulary.getUri()); - ArgumentCaptor eventCaptor = ArgumentCaptor.forClass(TermDefinitionTextAnalysisFinishedEvent.class); + ArgumentCaptor eventCaptor = ArgumentCaptor.forClass( + TermDefinitionTextAnalysisFinishedEvent.class); verify(eventPublisher).publishEvent(eventCaptor.capture()); assertNotNull(eventCaptor.getValue()); assertEquals(term.getUri(), eventCaptor.getValue().getTermUri()); assertEquals(vocabulary.getUri(), eventCaptor.getValue().getVocabularyIri()); } + + @Test + void analyzeFileSetsFileLanguageInTextAnalysisInvocationInput() { + file.setLanguage("cs"); + mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) + .andExpect(method(HttpMethod.POST)) + .andExpect(jsonPath("$.language").value("cs")) + .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); + sut.analyzeFile(file, Collections.singleton(vocabulary.getUri())); + mockServer.verify(); + } + + @Test + void analyzeFileUsesConfiguredPersistenceLanguageInTextAnalysisInvocationInputWhenFileLanguageIsNotSet() { + file.setLanguage(null); + mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) + .andExpect(method(HttpMethod.POST)) + .andExpect(jsonPath("$.language").value(Environment.LANGUAGE)) + .andRespond(withSuccess(CONTENT, MediaType.APPLICATION_XML)); + sut.analyzeFile(file, Collections.singleton(vocabulary.getUri())); + mockServer.verify(); + } + + @Test + void analyzeFileThrowsUnsupportedLanguageExceptionWhenTextAnalysisInvocationReturnsConflictWithUnsupportedLanguageError() + throws Exception { + file.setLanguage("de"); + final ErrorInfo respBody = ErrorInfo.createWithMessage("No taggers for language 'de' available.", + "/annotace/annotate"); + mockServer.expect(requestTo(config.getTextAnalysis().getUrl())) + .andExpect(method(HttpMethod.POST)) + .andRespond(withRequestConflict().body(objectMapper.writeValueAsString(respBody)) + .contentType(MediaType.APPLICATION_JSON)); + + final UnsupportedTextAnalysisLanguageException ex = assertThrows(UnsupportedTextAnalysisLanguageException.class, + () -> sut.analyzeFile(file, + Collections.singleton( + vocabulary.getUri()))); + assertEquals("error.annotation.file.unsupportedLanguage", ex.getMessageId()); + } + + @Test + void supportsLanguageGetsListOfSupportedLanguagesFromTextAnalysisServiceAndChecksIfFileLanguageIsAmongThem() { + file.setLanguage("cs"); + mockServer.expect(requestTo(config.getTextAnalysis().getLanguagesUrl())) + .andExpect(method(HttpMethod.GET)) + .andRespond(withSuccess("[\"cs\", \"en\"]", MediaType.APPLICATION_JSON)); + assertTrue(sut.supportsLanguage(file)); + mockServer.verify(); + + file.setLanguage("de"); + assertFalse(sut.supportsLanguage(file)); + } + + @Test + void supportsLanguageReturnsTrueWhenTextAnalysisServiceLanguagesEndpointUrlIsNotConfigured() { + String endpointUrl = config.getTextAnalysis().getLanguagesUrl(); + file.setLanguage(Constants.DEFAULT_LANGUAGE); + config.getTextAnalysis().setLanguagesUrl(null); + assertTrue(sut.supportsLanguage(file)); + // Reset configuration state + config.getTextAnalysis().setLanguagesUrl(endpointUrl); + } + + @Test + void supportsLanguageReturnsTrueWhenFileHasNoLanguageSet() { + file.setLanguage(null); + assertTrue(sut.supportsLanguage(file)); + } } diff --git a/src/test/java/cz/cvut/kbss/termit/util/VocabularyTest.java b/src/test/java/cz/cvut/kbss/termit/util/VocabularyTest.java index a35fd6534..7c4d6aac9 100644 --- a/src/test/java/cz/cvut/kbss/termit/util/VocabularyTest.java +++ b/src/test/java/cz/cvut/kbss/termit/util/VocabularyTest.java @@ -23,7 +23,6 @@ public class VocabularyTest { @Test - // @todo until https://github.com/kbss-cvut/jopa/issues/85 is resolved public void ensureContentHasCorrectUrl() { Assert.equals("http://rdfs.org/sioc/ns#content", Vocabulary.s_p_sioc_content); } diff --git a/src/test/resources/application.yml b/src/test/resources/application.yml index 56f473bd6..258bfa622 100644 --- a/src/test/resources/application.yml +++ b/src/test/resources/application.yml @@ -1,3 +1,4 @@ +application.version: DEV spring: servlet: multipart: @@ -29,7 +30,8 @@ termit: file: storage: /tmp/termit textAnalysis: - url: http://localhost/annotace + url: http://localhost/annotace/annotate + languagesUrl: http://localhost/annotace/languages termOccurrenceMinScore: 0.49 comments: context: http://onto.fel.cvut.cz/ontologies/komentare