Skip to content

Commit

Permalink
add common functionality for language components
Browse files Browse the repository at this point in the history
  • Loading branch information
heinpa committed Aug 19, 2024
1 parent f080512 commit b0ffb99
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 12 deletions.
77 changes: 77 additions & 0 deletions qanary_helpers/language_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from qanary_helpers.qanary_queries import select_from_triplestore


class question_text_with_language:

def __init__(self, uri: str, text: str, lang: str):
self.uri = uri
self.text = text
self.lang = lang

def get_uri(self):
return self.uri

def get_text(self):
return self.text

def get_language(self):
return self.lang


def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[question_text_with_language]:
source_texts = list()
sparql_find_ld = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT *
FROM <{graph}>
WHERE {{
?annotationId a qa:AnnotationOfQuestionLanguage .
?annotationId oa:hasTarget ?hasTarget ;
oa:hasBody ?hasBody ;
oa:annotatedBy ?annotatedBy ;
oa:annotatedAt ?annotatedAt .
FILTER(STR(?hasBody) = {lang})
}}
""".format(
graph = graph_uri,
lang=lang
)
results = select_from_triplestore(triplestore_endpoint, sparql_find_ld)
for result in results["results"]["bindings"]:
question_uri = result["hasTarget"]["value"]
question_text = get_question_text_from_uri(question_uri, triplestore_endpoint)
source_texts.append(question_text_with_language(uri=question_uri, text=question_text, lang=lang))

return source_texts


def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[question_text_with_language]:
source_texts = list()
sparql_find_ld = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
SELECT *
FROM <{graph}>
WHERE {{
?annotationId a qa:AnnotationOfQuestionTranslation .
?annotationId oa:hasTarget ?hasTarget ;
oa:hasBody ?hasBody ;
oa:annotatedBy ?annotatedBy ;
oa:annotatedAt ?annotatedAt .
FILTER(lang(?hasBody) = {lang}).
}}
""".format(
graph = graph_uri,
lang=lang
)
results = select_from_triplestore(triplestore_endpoint, sparql_find_ld)
for result in results["results"]["bindings"]:
question_uri = result["hasTarget"]["value"]
question_text = result["hasBody"]["value"]
source_texts.append(question_text_with_language(question_uri, question_text, lang))

return source_texts
27 changes: 16 additions & 11 deletions qanary_helpers/qanary_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@
import re


def get_text_question_from_uri(question_uri, triplestore_endpoint):
question_raw = question_uri + "/raw"
logging.info("found: questionURI={0} questionURIraw={1}".format(
question_uri,
question_raw
))
question_text = requests.get(question_raw.replace(
"localhost", urlparse(triplestore_endpoint).hostname)
)
return question_text.text


def get_text_question_in_graph(triplestore_endpoint, graph):
"""
Retrieves the questions from the triplestore returns an array
Expand All @@ -17,7 +29,7 @@ def get_text_question_in_graph(triplestore_endpoint, graph):
query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT DISTINCT ?questionURI
FROM <{uri}>
FROM <{uri}>
WHERE {{
?questionURI rdf:type <http://www.wdaqua.eu/qa#Question> .
}}
Expand All @@ -26,16 +38,9 @@ def get_text_question_in_graph(triplestore_endpoint, graph):
results = select_from_triplestore(triplestore_endpoint, query)
for result in results["results"]["bindings"]:
question_uri = result['questionURI']['value']
question_raw = question_uri + "/raw"
logging.info("found: questionURI={0} questionURIraw={1}".format(
question_uri,
question_raw
))
question_text = requests.get(question_raw.replace(
"localhost", urlparse(triplestore_endpoint).hostname)
)
logging.info("found question: \"{0}\"".format(question_text.text))
questions.append({"uri": question_uri, "text": question_text.text})
question_text = get_text_question_from_uri(question_uri, triplestore_endpoint)
logging.info("found question: \"{0}\"".format(question_text))
questions.append({"uri": question_uri, "text": question_text})

return questions

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def read_requirements():

setuptools.setup(
name="qanary-helpers",
version="0.2.2",
version="0.3.0",
author="Andreas Both, Aleksandr Perevalov",
author_email="[email protected], [email protected]",
description="A package that helps to build Python components for the Qanary Question Answering framework",
Expand Down

0 comments on commit b0ffb99

Please sign in to comment.