From b0ffb998467216c8a8b3e01fdcb5724043a9e579 Mon Sep 17 00:00:00 2001 From: heinpa Date: Tue, 20 Aug 2024 00:53:38 +0200 Subject: [PATCH] add common functionality for language components --- qanary_helpers/language_queries.py | 77 ++++++++++++++++++++++++++++++ qanary_helpers/qanary_queries.py | 27 ++++++----- setup.py | 2 +- 3 files changed, 94 insertions(+), 12 deletions(-) create mode 100644 qanary_helpers/language_queries.py diff --git a/qanary_helpers/language_queries.py b/qanary_helpers/language_queries.py new file mode 100644 index 0000000..c9a5346 --- /dev/null +++ b/qanary_helpers/language_queries.py @@ -0,0 +1,77 @@ +from qanary_helpers.qanary_queries import select_from_triplestore + + +class question_text_with_language: + + def __init__(self, uri: str, text: str, lang: str): + self.uri = uri + self.text = text + self.lang = lang + + def get_uri(self): + return self.uri + + def get_text(self): + return self.text + + def get_language(self): + return self.lang + + +def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[question_text_with_language]: + source_texts = list() + sparql_find_ld = """ + PREFIX qa: + PREFIX oa: + PREFIX xsd: + + SELECT * + FROM <{graph}> + WHERE {{ + ?annotationId a qa:AnnotationOfQuestionLanguage . + ?annotationId oa:hasTarget ?hasTarget ; + oa:hasBody ?hasBody ; + oa:annotatedBy ?annotatedBy ; + oa:annotatedAt ?annotatedAt . + FILTER(STR(?hasBody) = {lang}) + }} + """.format( + graph = graph_uri, + lang=lang + ) + results = select_from_triplestore(triplestore_endpoint, sparql_find_ld) + for result in results["results"]["bindings"]: + question_uri = result["hasTarget"]["value"] + question_text = get_question_text_from_uri(question_uri, triplestore_endpoint) + source_texts.append(question_text_with_language(uri=question_uri, text=question_text, lang=lang)) + + return source_texts + + +def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[question_text_with_language]: + source_texts = list() + sparql_find_ld = """ + PREFIX qa: + PREFIX oa: + + SELECT * + FROM <{graph}> + WHERE {{ + ?annotationId a qa:AnnotationOfQuestionTranslation . + ?annotationId oa:hasTarget ?hasTarget ; + oa:hasBody ?hasBody ; + oa:annotatedBy ?annotatedBy ; + oa:annotatedAt ?annotatedAt . + FILTER(lang(?hasBody) = {lang}). + }} + """.format( + graph = graph_uri, + lang=lang + ) + results = select_from_triplestore(triplestore_endpoint, sparql_find_ld) + for result in results["results"]["bindings"]: + question_uri = result["hasTarget"]["value"] + question_text = result["hasBody"]["value"] + source_texts.append(question_text_with_language(question_uri, question_text, lang)) + + return source_texts diff --git a/qanary_helpers/qanary_queries.py b/qanary_helpers/qanary_queries.py index 5ceac7b..686cf75 100644 --- a/qanary_helpers/qanary_queries.py +++ b/qanary_helpers/qanary_queries.py @@ -5,6 +5,18 @@ import re +def get_text_question_from_uri(question_uri, triplestore_endpoint): + question_raw = question_uri + "/raw" + logging.info("found: questionURI={0} questionURIraw={1}".format( + question_uri, + question_raw + )) + question_text = requests.get(question_raw.replace( + "localhost", urlparse(triplestore_endpoint).hostname) + ) + return question_text.text + + def get_text_question_in_graph(triplestore_endpoint, graph): """ Retrieves the questions from the triplestore returns an array @@ -17,7 +29,7 @@ def get_text_question_in_graph(triplestore_endpoint, graph): query = """ PREFIX rdf: SELECT DISTINCT ?questionURI - FROM <{uri}> + FROM <{uri}> WHERE {{ ?questionURI rdf:type . }} @@ -26,16 +38,9 @@ def get_text_question_in_graph(triplestore_endpoint, graph): results = select_from_triplestore(triplestore_endpoint, query) for result in results["results"]["bindings"]: question_uri = result['questionURI']['value'] - question_raw = question_uri + "/raw" - logging.info("found: questionURI={0} questionURIraw={1}".format( - question_uri, - question_raw - )) - question_text = requests.get(question_raw.replace( - "localhost", urlparse(triplestore_endpoint).hostname) - ) - logging.info("found question: \"{0}\"".format(question_text.text)) - questions.append({"uri": question_uri, "text": question_text.text}) + question_text = get_text_question_from_uri(question_uri, triplestore_endpoint) + logging.info("found question: \"{0}\"".format(question_text)) + questions.append({"uri": question_uri, "text": question_text}) return questions diff --git a/setup.py b/setup.py index 8cfefe6..a813e63 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def read_requirements(): setuptools.setup( name="qanary-helpers", - version="0.2.2", + version="0.3.0", author="Andreas Both, Aleksandr Perevalov", author_email="andreas.both@htwk-leipzig.de, aleksandr.perevalov@hs-anhalt.de", description="A package that helps to build Python components for the Qanary Question Answering framework",