Skip to content

Commit

Permalink
[kbss-cvut/termit-ui#553] Check if text analysis service supports fil…
Browse files Browse the repository at this point in the history
…e language before annotation.
  • Loading branch information
ledsoft committed Nov 19, 2024
1 parent 4db5145 commit f44f7b0
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 12 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package cz.cvut.kbss.termit.exception;

import cz.cvut.kbss.termit.model.Asset;
import cz.cvut.kbss.termit.model.resource.File;

/**
* Indicates that a language is not supported by the text analysis service.
*/
public class UnsupportedTextAnalysisLanguageException extends TermItException {

public UnsupportedTextAnalysisLanguageException(String message, Asset<?> asset) {
super(message, asset instanceof File ? "error.annotation.file.unsupportedLanguage" : "error.annotation.term.unsupportedLanguage");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
/**
* Service for managing {@link AccessControlList}s (ACLs).
* <p>
* Note that only management of ACLs is supported by this service. Access control itself is handled by TODO.
* Note that only management of ACLs is supported by this service. Access control itself is handled by {@link cz.cvut.kbss.termit.service.security.authorization.acl.AccessControlListBasedAuthorizationService}.
*/
public interface AccessControlListService {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import cz.cvut.kbss.termit.exception.InvalidParameterException;
import cz.cvut.kbss.termit.exception.NotFoundException;
import cz.cvut.kbss.termit.exception.UnsupportedAssetOperationException;
import cz.cvut.kbss.termit.exception.UnsupportedTextAnalysisLanguageException;
import cz.cvut.kbss.termit.model.TextAnalysisRecord;
import cz.cvut.kbss.termit.model.Vocabulary;
import cz.cvut.kbss.termit.model.changetracking.AbstractChangeRecord;
Expand Down Expand Up @@ -300,6 +301,7 @@ public void runTextAnalysis(Resource resource, Set<URI> vocabularies) {
verifyFileOperationPossible(resource, "Text analysis");
LOG.trace("Invoking text analysis on resource {}.", resource);
final File file = (File) resource;
verifyLanguageSupported(file);
if (vocabularies.isEmpty()) {
if (file.getDocument() == null || file.getDocument().getVocabulary() == null) {
throw new UnsupportedAssetOperationException(
Expand All @@ -313,6 +315,12 @@ public void runTextAnalysis(Resource resource, Set<URI> vocabularies) {
}
}

private void verifyLanguageSupported(File file) {
if (!textAnalysisService.supportsLanguage(file)) {
throw new UnsupportedTextAnalysisLanguageException("Text analysis service does not support language " + file.getLanguage(), file);
}
}

private Set<URI> includeImportedVocabularies(Set<URI> providedVocabularies) {
final Set<URI> result = new HashSet<>(providedVocabularies);
providedVocabularies.forEach(uri -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,40 @@
import cz.cvut.kbss.termit.dto.TextAnalysisInput;
import cz.cvut.kbss.termit.event.FileTextAnalysisFinishedEvent;
import cz.cvut.kbss.termit.event.TermDefinitionTextAnalysisFinishedEvent;
import cz.cvut.kbss.termit.exception.TermItException;
import cz.cvut.kbss.termit.exception.UnsupportedTextAnalysisLanguageException;
import cz.cvut.kbss.termit.exception.WebServiceIntegrationException;
import cz.cvut.kbss.termit.model.AbstractTerm;
import cz.cvut.kbss.termit.model.Asset;
import cz.cvut.kbss.termit.model.TextAnalysisRecord;
import cz.cvut.kbss.termit.model.resource.File;
import cz.cvut.kbss.termit.persistence.dao.TextAnalysisRecordDao;
import cz.cvut.kbss.termit.rest.handler.ErrorInfo;
import cz.cvut.kbss.termit.util.Configuration;
import cz.cvut.kbss.termit.util.Utils;
import cz.cvut.kbss.termit.util.throttle.Throttle;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.core.ParameterizedTypeReference;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.client.HttpClientErrorException;
import org.springframework.web.client.RestTemplate;

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
Expand All @@ -67,6 +75,8 @@ public class TextAnalysisService {

private final ApplicationEventPublisher eventPublisher;

private Set<String> supportedLanguages;

@Autowired
public TextAnalysisService(RestTemplate restClient, Configuration config, DocumentManager documentManager,
AnnotationGenerator annotationGenerator, TextAnalysisRecordDao recordDao,
Expand Down Expand Up @@ -126,6 +136,8 @@ private void invokeTextAnalysisOnFile(File file, TextAnalysisInput input) {
storeTextAnalysisRecord(file, input);
} catch (WebServiceIntegrationException e) {
throw e;
} catch (HttpClientErrorException e) {
throw handleTextAnalysisInvocationClientException(e, file);
} catch (RuntimeException e) {
throw new WebServiceIntegrationException("Text analysis invocation failed.", e);
} catch (IOException e) {
Expand All @@ -140,11 +152,10 @@ private Optional<Resource> invokeTextAnalysisService(TextAnalysisInput input) {
return Optional.empty();
}
final HttpHeaders headers = new HttpHeaders();
headers.add(HttpHeaders.ACCEPT, MediaType.APPLICATION_XML_VALUE);
LOG.debug("Invoking text analysis service at '{}' on input: {}", config.getTextAnalysis().getUrl(), input);
final ResponseEntity<Resource> resp = restClient
.exchange(config.getTextAnalysis().getUrl(), HttpMethod.POST,
new HttpEntity<>(input, headers), Resource.class);
headers.addAll(HttpHeaders.ACCEPT, List.of(MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_XML_VALUE));
LOG.debug("Invoking text analysis service at '{}' on input: {}", taUrl, input);
final ResponseEntity<Resource> resp = restClient.exchange(taUrl, HttpMethod.POST,
new HttpEntity<>(input, headers), Resource.class);
if (!resp.hasBody()) {
throw new WebServiceIntegrationException("Text analysis service returned empty response.");
}
Expand All @@ -161,6 +172,16 @@ private void storeTextAnalysisRecord(File file, TextAnalysisInput config) {
recordDao.persist(record);
}

private TermItException handleTextAnalysisInvocationClientException(HttpClientErrorException ex, Asset<?> asset) {
if (ex.getStatusCode() == HttpStatus.CONFLICT) {
final ErrorInfo errorInfo = ex.getResponseBodyAs(ErrorInfo.class);
if (errorInfo != null && errorInfo.getMessage().contains("language")) {
throw new UnsupportedTextAnalysisLanguageException(errorInfo.getMessage(),asset);
}
}
throw new WebServiceIntegrationException("Text analysis invocation failed.", ex);
}

/**
* Gets the latest {@link TextAnalysisRecord} for the specified Resource.
*
Expand Down Expand Up @@ -205,10 +226,56 @@ private void invokeTextAnalysisOnTerm(AbstractTerm term, TextAnalysisInput input
}
} catch (WebServiceIntegrationException e) {
throw e;
} catch (HttpClientErrorException e) {
throw handleTextAnalysisInvocationClientException(e, term);
} catch (RuntimeException e) {
throw new WebServiceIntegrationException("Text analysis invocation failed.", e);
} catch (IOException e) {
throw new WebServiceIntegrationException("Unable to read text analysis result from response.", e);
}
}

/**
* Checks whether the text analysis service supports the language of the specified file.
* <p>
* If the text analysis service does not provide endpoint for getting supported languages (or it is not configured),
* it is assumed that any language is supported.
* <p>
* If the file does not have language set, it is assumed that it is supported as well.
*
* @param file File to be analyzed
* @return {@code true} if the file language is supported, {@code false} otherwise
*/
public boolean supportsLanguage(File file) {
Objects.requireNonNull(file);
return file.getLanguage() == null || getSupportedLanguages().isEmpty() || getSupportedLanguages().contains(
file.getLanguage());
}

private synchronized Set<String> getSupportedLanguages() {
if (supportedLanguages != null) {
return supportedLanguages;
}
final String languagesEndpointUrl = config.getTextAnalysis().getLanguagesUrl();
if (languagesEndpointUrl == null || languagesEndpointUrl.isBlank()) {
LOG.warn(
"Text analysis service languages endpoint URL not configured. Assuming any language is supported.");
this.supportedLanguages = Set.of();
} else {
try {
LOG.debug("Getting list of supported languages from text analysis service at '{}'.",
languagesEndpointUrl);
ResponseEntity<Set<String>> response = restClient.exchange(languagesEndpointUrl, HttpMethod.GET, null,
new ParameterizedTypeReference<>() {
});
this.supportedLanguages = response.getBody();
LOG.trace("Text analysis supported languages: {}", supportedLanguages);
} catch (RuntimeException e) {
LOG.error("Unable to get list of supported languages from text analysis service at '{}'.",
languagesEndpointUrl, e);
this.supportedLanguages = Set.of();
}
}
return supportedLanguages;
}
}
13 changes: 13 additions & 0 deletions src/main/java/cz/cvut/kbss/termit/util/Configuration.java
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,11 @@ public static class TextAnalysis {
*/
private String url;

/**
* URL of the endpoint providing list of languages supported by the text analysis service.
*/
private String languagesUrl;

/**
* Score threshold for a term occurrence for it to be saved into the repository.
*/
Expand All @@ -693,6 +698,14 @@ public void setUrl(String url) {
this.url = url;
}

public String getLanguagesUrl() {
return languagesUrl;
}

public void setLanguagesUrl(String languagesUrl) {
this.languagesUrl = languagesUrl;
}

public String getTermOccurrenceMinScore() {
return termOccurrenceMinScore;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import cz.cvut.kbss.termit.exception.TermItException;
import cz.cvut.kbss.termit.exception.UnsupportedOperationException;
import cz.cvut.kbss.termit.exception.UnsupportedSearchFacetException;
import cz.cvut.kbss.termit.exception.UnsupportedTextAnalysisLanguageException;
import cz.cvut.kbss.termit.exception.ValidationException;
import cz.cvut.kbss.termit.exception.WebServiceIntegrationException;
import cz.cvut.kbss.termit.exception.importing.UnsupportedImportMediaTypeException;
Expand Down Expand Up @@ -87,15 +88,16 @@ private static ErrorInfo errorInfo(Message<?> message, Throwable e) {
}

private static ErrorInfo errorInfo(Message<?> message, TermItException e) {
return ErrorInfo.createParametrizedWithMessage(e.getMessage(), e.getMessageId(), destination(message), e.getParameters());
return ErrorInfo.createParametrizedWithMessage(e.getMessage(), e.getMessageId(), destination(message),
e.getParameters());
}

@MessageExceptionHandler
public void messageDeliveryException(Message<?> message, MessageDeliveryException e) {
// messages without destination will be logged only on trace
(hasDestination(message) ? LOG.atError() : LOG.atTrace())
.setMessage("Failed to send message with destination {}: {}")
.addArgument(()-> destination(message))
.addArgument(() -> destination(message))
.addArgument(e.getMessage())
.setCause(e.getCause())
.log();
Expand Down Expand Up @@ -226,7 +228,8 @@ public ErrorInfo invalidParameter(Message<?> message, InvalidParameterException
@MessageExceptionHandler
public ErrorInfo maxUploadSizeExceededException(Message<?> message, MaxUploadSizeExceededException e) {
logException(e, message);
return ErrorInfo.createWithMessageAndMessageId(e.getMessage(), "error.file.maxUploadSizeExceeded", destination(message));
return ErrorInfo.createWithMessageAndMessageId(e.getMessage(), "error.file.maxUploadSizeExceeded",
destination(message));
}

@MessageExceptionHandler
Expand Down Expand Up @@ -271,4 +274,11 @@ public ErrorInfo uriSyntaxException(Message<?> message, URISyntaxException e) {
logException(e, message);
return errorInfo(message, e);
}

@MessageExceptionHandler
public ErrorInfo unsupportedTextAnalysisLanguageException(Message<?> message,
UnsupportedTextAnalysisLanguageException e) {
logException(e, message);
return errorInfo(message, e);
}
}
1 change: 1 addition & 0 deletions src/main/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ termit:
storage: /tmp/termit
textAnalysis:
url: http://localhost:8081/annotace/annotate
languagesUrl: http://localhost:8081/annotace/languages
changetracking:
context:
extension: /zmeny
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import cz.cvut.kbss.termit.exception.NotFoundException;
import cz.cvut.kbss.termit.exception.TermItException;
import cz.cvut.kbss.termit.exception.UnsupportedAssetOperationException;
import cz.cvut.kbss.termit.exception.UnsupportedTextAnalysisLanguageException;
import cz.cvut.kbss.termit.model.TextAnalysisRecord;
import cz.cvut.kbss.termit.model.Vocabulary;
import cz.cvut.kbss.termit.model.changetracking.AbstractChangeRecord;
Expand Down Expand Up @@ -203,6 +204,7 @@ void runTextAnalysisInvokesTextAnalysisWithVocabularyRelatedToFilesDocument() {
file.setDocument(Generator.generateDocumentWithId());
final Vocabulary vocabulary = Generator.generateVocabularyWithId();
file.getDocument().setVocabulary(vocabulary.getUri());
when(textAnalysisService.supportsLanguage(file)).thenReturn(true);
sut.runTextAnalysis(file, Collections.emptySet());
verify(textAnalysisService).analyzeFile(file, Collections.singleton(vocabulary.getUri()));
}
Expand All @@ -218,6 +220,7 @@ void runTextAnalysisThrowsUnsupportedAssetOperationWhenResourceIsNotFile() {
@Test
void runTextAnalysisThrowsUnsupportedAssetOperationWhenFileHasNoVocabularyAndNoVocabulariesAreSpecifiedEither() {
final File file = Generator.generateFileWithId("test.html");
when(textAnalysisService.supportsLanguage(file)).thenReturn(true);
assertThrows(UnsupportedAssetOperationException.class,
() -> sut.runTextAnalysis(file, Collections.emptySet()));
verify(textAnalysisService, never()).analyzeFile(any(), anySet());
Expand All @@ -227,6 +230,7 @@ void runTextAnalysisThrowsUnsupportedAssetOperationWhenFileHasNoVocabularyAndNoV
void runTextAnalysisInvokesAnalysisWithCustomVocabulariesWhenSpecified() {
final File file = Generator.generateFileWithId("test.html");
final Set<URI> vocabularies = new HashSet<>(Arrays.asList(Generator.generateUri(), Generator.generateUri()));
when(textAnalysisService.supportsLanguage(file)).thenReturn(true);
sut.runTextAnalysis(file, vocabularies);
verify(textAnalysisService).analyzeFile(file, vocabularies);
}
Expand All @@ -240,6 +244,7 @@ void runTextAnalysisInvokesAnalysisAlsoWithImportedVocabulariesOfVocabularyRElat
final Set<URI> imported = new HashSet<>(Arrays.asList(Generator.generateUri(), Generator.generateUri()));
when(vocabularyService.getReference(vocabulary.getUri())).thenReturn(vocabulary);
when(vocabularyService.getTransitivelyImportedVocabularies(vocabulary)).thenReturn(imported);
when(textAnalysisService.supportsLanguage(file)).thenReturn(true);

sut.runTextAnalysis(file, Collections.emptySet());
final Set<URI> expected = new HashSet<>(imported);
Expand All @@ -259,6 +264,7 @@ void runTextAnalysisInvokesAnalysisWithProvidedVocabulariesAndTheirImports() {
when(vocabularyService.getTransitivelyImportedVocabularies(vOne)).thenReturn(vOneImports);
when(vocabularyService.getReference(vTwo.getUri())).thenReturn(vTwo);
when(vocabularyService.getTransitivelyImportedVocabularies(vTwo)).thenReturn(vTwoImports);
when(textAnalysisService.supportsLanguage(file)).thenReturn(true);

sut.runTextAnalysis(file, new HashSet<>(Arrays.asList(vOne.getUri(), vTwo.getUri())));
final Set<URI> expected = new HashSet<>(vOneImports);
Expand Down Expand Up @@ -554,4 +560,16 @@ void addFileToDocumentDoesNotModifyLanguageWhenItIsAlreadySet() {
verify(resourceRepositoryService).persist(file, vocabulary);
assertEquals("cs", file.getLanguage());
}

@Test
void runTextAnalysisThrowsUnsupportedTextAnalysisExceptionWhenTextAnalysisServiceDoesNotSupportFileLanguage() {
final File file = Generator.generateFileWithId("test.html");
file.setDocument(Generator.generateDocumentWithId());
final Vocabulary vocabulary = Generator.generateVocabularyWithId();
file.getDocument().setVocabulary(vocabulary.getUri());
file.setLanguage("sk");
when(textAnalysisService.supportsLanguage(file)).thenReturn(false);
assertThrows(UnsupportedTextAnalysisLanguageException.class, () -> sut.runTextAnalysis(file, Set.of(vocabulary.getUri())));
verify(textAnalysisService).supportsLanguage(file);
}
}
Loading

0 comments on commit f44f7b0

Please sign in to comment.