Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract multiple organizers (comma-separated) #3

Merged
merged 1 commit into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/tess/rdf/event_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def extract_params
params[:online] = extract_online
params.merge!(extract_location)

params[:organizer] = extract_names_or_values(RDF::Vocab::SCHEMA.organizer).first
params[:organizer] = extract_names_or_ids(RDF::Vocab::SCHEMA.organizer).join(', ')

params[:capacity] = extract_value(RDF::Vocab::SCHEMA.maximumAttendeeCapacity)

Expand Down
19 changes: 19 additions & 0 deletions lib/tess/rdf/extraction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,25 @@ def extract_mentions(subject: resource)
[:mention, RDF::Vocab::SCHEMA.url, :url, { optional: true }]).map { |a| { title: a[:name], url: a[:url] } }.compact
end

def extract_names_or_ids(predicate, subject: resource)
query([subject, predicate, :thing],
[:thing, RDF::Vocab::SCHEMA.name, :name, { optional: true }],
[:thing, RDF::Vocab::SCHEMA.url, :url, { optional: true }]).map do |r|
if r[:name].nil?
r[:thing]
else
v = r[:name]
if r[:thing]
v += " (#{r[:thing]})"
elsif r[:url]
v += " (#{r[:url]})"
end
v
end
end.compact.uniq.sort
end


def parse_value(value)
# Using 'value.class.name' instead of just 'value' here or things like RDF::Literal::DateTime fall into the RDF::Literal block
# Not using 'value.class' because 'case' uses '===' for comparison and RDF::URI === RDF::URI is false!
Expand Down
16 changes: 15 additions & 1 deletion test/extraction_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ class ExtractionTest < Test::Unit::TestCase
assert_equal '2022-08-22', params[:start]
assert_equal '2022-08-25', params[:end]
assert_equal ['training', 'biostatistics', 'raphael gottardo group'].sort, params[:keywords].sort
assert_equal 'Patricia Palagi', params[:organizer]
assert_equal 'Patricia Palagi (https://orcid.org/0000-0001-9062-6303), SIB Swiss Institute of Bioinformatics (https://ror.org/002n09z45)', params[:organizer]
assert params[:node_names].include?('Switzerland')
end

Expand Down Expand Up @@ -311,4 +311,18 @@ class ExtractionTest < Test::Unit::TestCase
assert_equal ['Bioinformatics', 'Genomics', 'Long-read', 'Metabarcoding', 'Metagenomics'], params[:scientific_topic_names]
assert_equal ['Edinburgh Genomics'], params[:host_institutions]
end

test 'extract multiple organizers as comma-separated string' do
file = fixture_file('ifb-multi-organizers.json')
base_uri = 'https://catalogue.france-bioinformatique.fr/api/event/591/?format=json-ld'

extractor = Tess::Rdf::CourseInstanceExtractor.new(file.read, :jsonld, base_uri: base_uri)
resources = extractor.extract

assert_equal 1, resources.count
params = resources.first

assert_equal "https://catalogue.france-bioinformatique.fr/api/organisation/CIRAD/?format=json-ld, https://catalogue.france-bioinformatique.fr/api/organisation/INRAE/?format=json-ld, https://catalogue.france-bioinformatique.fr/api/organisation/IRD/?format=json-ld, https://catalogue.france-bioinformatique.fr/api/team/South%20Green/?format=json-ld",
params[:organizer]
end
end
32 changes: 32 additions & 0 deletions test/field_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,38 @@ class FieldTest < Test::Unit::TestCase
{ title: 'European Genome-phenome Archive', url: 'https://www.ebi.ac.uk/ega/home' }], learning_resource_extractor(json).send(:extract_mentions)
end

test 'extract multiple organizers' do
json = %(
[{
"@context": "http://schema.org",
"@type": "CourseInstance",
"http://purl.org/dc/terms/conformsTo": {
"@id": "https://bioschemas.org/profiles/TrainingMaterial/1.0-RELEASE",
"@type": "CreativeWork"
},
"organizer": [
{
"@type": "Person",
"@id": "https://orcid.org/0000-0001-9062-6303",
"name": "Patricia Palagi"
},
{
"@type": "Person",
"name": "Someone"
},
{
"@type": "Organization",
"name": "SIB Swiss Institute of Bioinformatics",
"url": "https://www.sib.swiss/"
},
{
"@id" : "https://cool.guys"
}
]
}])
assert_equal 'Patricia Palagi (https://orcid.org/0000-0001-9062-6303), SIB Swiss Institute of Bioinformatics (https://www.sib.swiss/), Someone, https://cool.guys',
course_instance_extractor(json).send(:extract_names_or_ids, RDF::Vocab::SCHEMA.organizer).join(', ')
end
private

def course_extractor(fixture, format: :jsonld, base_uri: 'https://example.com/my.json')
Expand Down
95 changes: 95 additions & 0 deletions test/fixtures/ifb-multi-organizers.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
[
{
"@id": "https://catalogue.france-bioinformatique.fr/api/event/591/?format=json-ld",
"@type": [
"https://schema.org/CourseInstance"
],
"https://schema.org/description": [
{
"@type": "https://schema.org/Text",
"@value": "Ce module vise à fournir une expérience d’analyse de données de génomique.\r\nLes technologies Next Generation Sequencing (NGS) ont conduit à une production massive de\r\ndonnées « Omiques » pour les plantes cultivées majeures, ce qui demande de nouvelles\r\napproches d’analyses haut débit. La connaissance de ces approches et des outils qui en\r\ndécoulent pour analyser la séquence et la structure des génomes, les annoter et caractériser\r\nleur diversité et leurs profils d’expression permet d’aborder des questions de recherche\r\nbiologique avancée sur la diversité et l’adaptation des plantes. Les espèces prises en\r\nconsidération sont des espèces phares des instituts de recherche agronomique de Montpellier\r\net font partie des cultures les plus importantes pour l’agriculture mondiale. Des plateformes\r\nd’outils bioinformatiques récents reposant sur des centres de calcul et de stockage haute\r\ncapacité, sont en place pour analyser des jeux de données originales permettant de mieux\r\ncomprendre comment les génomes de plantes évoluent et s’expriment. L’ensemble de ces\r\nconnaissances Findable, Accessible, Interoperable, Reusable car intégré dans des systèmes\r\nd’information peut soutenir l'identification de gènes responsables de caractères adaptatifs ou\r\nde production. La mobilisation de jeunes chercheurs sur ces sujets est primordiale tant la\r\ndemande est importante.\r\nLe module est structuré sous la forme de cours et de travaux tutorés avec la rencontre de\r\ngénéticiens et de bioinformaticiens permettant d’appréhender les formes variées des progrès\r\nen bioanalyse génomique. Il permet d’acquérir les lignes directrices pour l’accès, l'utilisation\r\net l'analyse de différents types de données omique (e.g. (épi)génomique, transcriptomique,\r\nprotéique, métabolique) en vue d’accélérer les recherches en génomique fonctionnelle et\r\nbiotechnologie des plantes.\r\nL’évaluation sera faite sur la base de la participation et de la qualité du projet proposé par\r\nl’étudiant en fin de module, individuellement ou en binôme, suivant les consignes détaillées en\r\ndébut de module"
}
],
"https://schema.org/endDate": [
{
"@type": "https://schema.org/Date",
"@value": "2024-03-08"
}
],
"https://schema.org/location": [
{
"@id": "_:N6703f4fcf2b04f2d8b90e5b0341f8f09"
}
],
"https://schema.org/maximumAttendeeCapacity": [
{
"@type": "https://schema.org/Integer",
"@value": "50"
}
],
"https://schema.org/name": [
{
"@type": "https://schema.org/Text",
"@value": "BIGomics, Génomique Comparative Biopolis"
}
],
"https://schema.org/offers": [
{
"@type": "https://schema.org/Demand",
"@value": "Free to academics"
}
],
"https://schema.org/organizer": [
{
"@id": "https://catalogue.france-bioinformatique.fr/api/organisation/CIRAD/?format=json-ld"
},
{
"@id": "https://catalogue.france-bioinformatique.fr/api/organisation/INRAE/?format=json-ld"
},
{
"@id": "https://catalogue.france-bioinformatique.fr/api/organisation/IRD/?format=json-ld"
},
{
"@id": "https://catalogue.france-bioinformatique.fr/api/team/South%20Green/?format=json-ld"
}
],
"https://schema.org/startDate": [
{
"@type": "https://schema.org/Date",
"@value": "2024-03-04"
}
],
"https://schema.org/url": [
{
"@type": "https://schema.org/URL",
"@value": "https://cibio.up.pt/en/events/bigomics-high-throughput-genetic-diversity-analyses-of-tropical-crops/"
}
]
},
{
"@id": "_:N6703f4fcf2b04f2d8b90e5b0341f8f09",
"@type": [
"https://schema.org/PostalAddress"
],
"https://schema.org/addressCountry": [
{
"@value": "France"
}
],
"https://schema.org/addressLocality": [
{
"@value": "Montpellier"
}
],
"https://schema.org/postalCode": [
{
"@value": ""
}
],
"https://schema.org/streetAddress": [
{
"@value": "Salle 159 Batiment 3 CIRAD Lavalette, Avenue Agropolis"
}
]
}
]
Loading