Skip to content

Commit

Permalink
Extract multiple organizers (comma-separated)
Browse files Browse the repository at this point in the history
Also include any ID/URL in parentheses

ElixirTeSS/TeSS#914
  • Loading branch information
fbacall committed Feb 6, 2024
1 parent 9f266a9 commit 6501def
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 2 deletions.
2 changes: 1 addition & 1 deletion lib/tess/rdf/event_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def extract_params
params[:online] = extract_online
params.merge!(extract_location)

params[:organizer] = extract_names_or_values(RDF::Vocab::SCHEMA.organizer).first
params[:organizer] = extract_names_or_ids(RDF::Vocab::SCHEMA.organizer).join(', ')

params[:capacity] = extract_value(RDF::Vocab::SCHEMA.maximumAttendeeCapacity)

Expand Down
19 changes: 19 additions & 0 deletions lib/tess/rdf/extraction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,25 @@ def extract_mentions(subject: resource)
[:mention, RDF::Vocab::SCHEMA.url, :url, { optional: true }]).map { |a| { title: a[:name], url: a[:url] } }.compact
end

def extract_names_or_ids(predicate, subject: resource)
query([subject, predicate, :thing],
[:thing, RDF::Vocab::SCHEMA.name, :name, { optional: true }],
[:thing, RDF::Vocab::SCHEMA.url, :url, { optional: true }]).map do |r|
if r[:name].nil?
r[:thing]
else
v = r[:name]
if r[:thing]
v += " (#{r[:thing]})"
elsif r[:url]
v += " (#{r[:url]})"
end
v
end
end.compact.uniq.sort
end


def parse_value(value)
# Using 'value.class.name' instead of just 'value' here or things like RDF::Literal::DateTime fall into the RDF::Literal block
# Not using 'value.class' because 'case' uses '===' for comparison and RDF::URI === RDF::URI is false!
Expand Down
16 changes: 15 additions & 1 deletion test/extraction_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ class ExtractionTest < Test::Unit::TestCase
assert_equal '2022-08-22', params[:start]
assert_equal '2022-08-25', params[:end]
assert_equal ['training', 'biostatistics', 'raphael gottardo group'].sort, params[:keywords].sort
assert_equal 'Patricia Palagi', params[:organizer]
assert_equal 'Patricia Palagi (https://orcid.org/0000-0001-9062-6303), SIB Swiss Institute of Bioinformatics (https://ror.org/002n09z45)', params[:organizer]
assert params[:node_names].include?('Switzerland')
end

Expand Down Expand Up @@ -311,4 +311,18 @@ class ExtractionTest < Test::Unit::TestCase
assert_equal ['Bioinformatics', 'Genomics', 'Long-read', 'Metabarcoding', 'Metagenomics'], params[:scientific_topic_names]
assert_equal ['Edinburgh Genomics'], params[:host_institutions]
end

test 'extract multiple organizers as comma-separated string' do
file = fixture_file('ifb-multi-organizers.json')
base_uri = 'https://catalogue.france-bioinformatique.fr/api/event/591/?format=json-ld'

extractor = Tess::Rdf::CourseInstanceExtractor.new(file.read, :jsonld, base_uri: base_uri)
resources = extractor.extract

assert_equal 1, resources.count
params = resources.first

assert_equal "https://catalogue.france-bioinformatique.fr/api/organisation/CIRAD/?format=json-ld, https://catalogue.france-bioinformatique.fr/api/organisation/INRAE/?format=json-ld, https://catalogue.france-bioinformatique.fr/api/organisation/IRD/?format=json-ld, https://catalogue.france-bioinformatique.fr/api/team/South%20Green/?format=json-ld",
params[:organizer]
end
end
32 changes: 32 additions & 0 deletions test/field_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,38 @@ class FieldTest < Test::Unit::TestCase
{ title: 'European Genome-phenome Archive', url: 'https://www.ebi.ac.uk/ega/home' }], learning_resource_extractor(json).send(:extract_mentions)
end

test 'extract multiple organizers' do
json = %(
[{
"@context": "http://schema.org",
"@type": "CourseInstance",
"http://purl.org/dc/terms/conformsTo": {
"@id": "https://bioschemas.org/profiles/TrainingMaterial/1.0-RELEASE",
"@type": "CreativeWork"
},
"organizer": [
{
"@type": "Person",
"@id": "https://orcid.org/0000-0001-9062-6303",
"name": "Patricia Palagi"
},
{
"@type": "Person",
"name": "Someone"
},
{
"@type": "Organization",
"name": "SIB Swiss Institute of Bioinformatics",
"url": "https://www.sib.swiss/"
},
{
"@id" : "https://cool.guys"
}
]
}])
assert_equal 'Patricia Palagi (https://orcid.org/0000-0001-9062-6303), SIB Swiss Institute of Bioinformatics (https://www.sib.swiss/), Someone, https://cool.guys',
course_instance_extractor(json).send(:extract_names_or_ids, RDF::Vocab::SCHEMA.organizer).join(', ')
end
private

def course_extractor(fixture, format: :jsonld, base_uri: 'https://example.com/my.json')
Expand Down
95 changes: 95 additions & 0 deletions test/fixtures/ifb-multi-organizers.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
[
{
"@id": "https://catalogue.france-bioinformatique.fr/api/event/591/?format=json-ld",
"@type": [
"https://schema.org/CourseInstance"
],
"https://schema.org/description": [
{
"@type": "https://schema.org/Text",
"@value": "Ce module vise à fournir une expérience d’analyse de données de génomique.\r\nLes technologies Next Generation Sequencing (NGS) ont conduit à une production massive de\r\ndonnées « Omiques » pour les plantes cultivées majeures, ce qui demande de nouvelles\r\napproches d’analyses haut débit. La connaissance de ces approches et des outils qui en\r\ndécoulent pour analyser la séquence et la structure des génomes, les annoter et caractériser\r\nleur diversité et leurs profils d’expression permet d’aborder des questions de recherche\r\nbiologique avancée sur la diversité et l’adaptation des plantes. Les espèces prises en\r\nconsidération sont des espèces phares des instituts de recherche agronomique de Montpellier\r\net font partie des cultures les plus importantes pour l’agriculture mondiale. Des plateformes\r\nd’outils bioinformatiques récents reposant sur des centres de calcul et de stockage haute\r\ncapacité, sont en place pour analyser des jeux de données originales permettant de mieux\r\ncomprendre comment les génomes de plantes évoluent et s’expriment. L’ensemble de ces\r\nconnaissances Findable, Accessible, Interoperable, Reusable car intégré dans des systèmes\r\nd’information peut soutenir l'identification de gènes responsables de caractères adaptatifs ou\r\nde production. La mobilisation de jeunes chercheurs sur ces sujets est primordiale tant la\r\ndemande est importante.\r\nLe module est structuré sous la forme de cours et de travaux tutorés avec la rencontre de\r\ngénéticiens et de bioinformaticiens permettant d’appréhender les formes variées des progrès\r\nen bioanalyse génomique. Il permet d’acquérir les lignes directrices pour l’accès, l'utilisation\r\net l'analyse de différents types de données omique (e.g. (épi)génomique, transcriptomique,\r\nprotéique, métabolique) en vue d’accélérer les recherches en génomique fonctionnelle et\r\nbiotechnologie des plantes.\r\nL’évaluation sera faite sur la base de la participation et de la qualité du projet proposé par\r\nl’étudiant en fin de module, individuellement ou en binôme, suivant les consignes détaillées en\r\ndébut de module"
}
],
"https://schema.org/endDate": [
{
"@type": "https://schema.org/Date",
"@value": "2024-03-08"
}
],
"https://schema.org/location": [
{
"@id": "_:N6703f4fcf2b04f2d8b90e5b0341f8f09"
}
],
"https://schema.org/maximumAttendeeCapacity": [
{
"@type": "https://schema.org/Integer",
"@value": "50"
}
],
"https://schema.org/name": [
{
"@type": "https://schema.org/Text",
"@value": "BIGomics, Génomique Comparative Biopolis"
}
],
"https://schema.org/offers": [
{
"@type": "https://schema.org/Demand",
"@value": "Free to academics"
}
],
"https://schema.org/organizer": [
{
"@id": "https://catalogue.france-bioinformatique.fr/api/organisation/CIRAD/?format=json-ld"
},
{
"@id": "https://catalogue.france-bioinformatique.fr/api/organisation/INRAE/?format=json-ld"
},
{
"@id": "https://catalogue.france-bioinformatique.fr/api/organisation/IRD/?format=json-ld"
},
{
"@id": "https://catalogue.france-bioinformatique.fr/api/team/South%20Green/?format=json-ld"
}
],
"https://schema.org/startDate": [
{
"@type": "https://schema.org/Date",
"@value": "2024-03-04"
}
],
"https://schema.org/url": [
{
"@type": "https://schema.org/URL",
"@value": "https://cibio.up.pt/en/events/bigomics-high-throughput-genetic-diversity-analyses-of-tropical-crops/"
}
]
},
{
"@id": "_:N6703f4fcf2b04f2d8b90e5b0341f8f09",
"@type": [
"https://schema.org/PostalAddress"
],
"https://schema.org/addressCountry": [
{
"@value": "France"
}
],
"https://schema.org/addressLocality": [
{
"@value": "Montpellier"
}
],
"https://schema.org/postalCode": [
{
"@value": ""
}
],
"https://schema.org/streetAddress": [
{
"@value": "Salle 159 Batiment 3 CIRAD Lavalette, Avenue Agropolis"
}
]
}
]

0 comments on commit 6501def

Please sign in to comment.