From 7d2942394360d2dff0e06f3fc6ca245ac3e458aa Mon Sep 17 00:00:00 2001 From: lopierra Date: Tue, 2 Apr 2024 12:26:50 -0700 Subject: [PATCH 01/15] add 'including dictionaries' to expectedNumberOfFiles (Dataset) --- src/linkml/include_study.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkml/include_study.yaml b/src/linkml/include_study.yaml index 3e0ad24..90d00a5 100644 --- a/src/linkml/include_study.yaml +++ b/src/linkml/include_study.yaml @@ -251,7 +251,7 @@ slots: required: true expectedNumberOfFiles: definition_uri: include:expectedNumberOfFiles - description: Expected number of files associated with this dataset + description: Expected number of files associated with this dataset, including dictionaries title: Expected Number of Files range: integer required: false #update to true when this is figured out From ddce62d00a6967bc60c8d17a5d3814da57239c4f Mon Sep 17 00:00:00 2001 From: lopierra Date: Tue, 2 Apr 2024 19:32:20 -0700 Subject: [PATCH 02/15] add Dataset Description slot --- src/linkml/include_study.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/linkml/include_study.yaml b/src/linkml/include_study.yaml index 90d00a5..fb4e910 100644 --- a/src/linkml/include_study.yaml +++ b/src/linkml/include_study.yaml @@ -63,6 +63,7 @@ classes: - hasStudy #add hasDataset to Assay>DataFile? - datasetName + - datasetDescription - datasetGlobalId - datasetExternalId - datasetExpectedNumberOfParticipants @@ -225,6 +226,11 @@ slots: title: Dataset Name range: string required: true + datasetDescription: + definition_uri: include:datasetDescription + description: Brief additional notes about the dataset (1-3 sentences) that are not already captured in the other fields + title: Dataset Description + range: string datasetGlobalId: definition_uri: include:datasetGlobalId description: Unique Global ID for dataset, generated by DCC From 1f6d849b8285836af68c674c3f5cb68e2334a501 Mon Sep 17 00:00:00 2001 From: lopierra Date: Fri, 5 Apr 2024 07:52:45 -0700 Subject: [PATCH 03/15] changed Trio+ to Trio Plus --- src/linkml/include_participant.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkml/include_participant.yaml b/src/linkml/include_participant.yaml index f65b4f0..2cdaac8 100644 --- a/src/linkml/include_participant.yaml +++ b/src/linkml/include_participant.yaml @@ -416,7 +416,7 @@ enums: description: Proband + two parents trio_plus: text: trio_plus - title: Trio+ + title: Trio Plus description: Proband + two parents + other relatives enum_race: name: enum_race From bff674e60ca7c7544ceb7d96e35826bb0ec0da82 Mon Sep 17 00:00:00 2001 From: lopierra Date: Wed, 17 Apr 2024 16:44:56 -0400 Subject: [PATCH 04/15] add min/max to age at engagement/vital status/condition --- src/linkml/include_participant.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/linkml/include_participant.yaml b/src/linkml/include_participant.yaml index 2cdaac8..f11b856 100644 --- a/src/linkml/include_participant.yaml +++ b/src/linkml/include_participant.yaml @@ -178,6 +178,8 @@ slots: description: Age in days of Participant at first recorded study event (enrollment, visit, observation, sample collection, survey completion, etc.). Age at enrollment is preferred, if available. title: Age at First Patient Engagement range: integer + minimum_value: 0 + maximum_value: 33000 required: true firstPatientEngagementEvent: definition_uri: include:firstPatientEngagementEvent @@ -194,6 +196,8 @@ slots: description: Age in days when participant's vital status was last recorded title: Age at Last Vital Status range: integer + minimum_value: 0 + maximum_value: 33000 # hasCondition: # definition_uri: include:hasCondition # description: Link to a Condition @@ -221,6 +225,8 @@ slots: description: Age in days at which Condition or Measure was observed, recorded, or diagnosed title: Age At Condition or Measure Observation range: integer + minimum_value: 0 + maximum_value: 33000 conditionInterpretation: definition_uri: include:conditionInterpretation description: Whether Condition was observed or not From 650247de570a478fcc76517eb798d8c028784de4 Mon Sep 17 00:00:00 2001 From: lopierra Date: Wed, 17 Apr 2024 16:58:47 -0400 Subject: [PATCH 05/15] added range:string to Condition slots that didn't have range; changed range to string for study/dataset fields that need explanations --- src/linkml/include_participant.yaml | 8 ++++++-- src/linkml/include_study.yaml | 12 +++++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/linkml/include_participant.yaml b/src/linkml/include_participant.yaml index f11b856..563ab41 100644 --- a/src/linkml/include_participant.yaml +++ b/src/linkml/include_participant.yaml @@ -186,6 +186,7 @@ slots: description: Event for which Age at First Patient Engagement is given (e.g. enrollment, visit, observation, sample collection, survey completion, etc.). Age at enrollment is preferred, if available. title: First Patient Engagement Event required: true + range: string outcomesVitalStatus: definition_uri: include:outcomesVitalStatus description: Whether participant is alive or dead @@ -212,14 +213,17 @@ slots: definition_uri: include:eventId description: Identifier for event (Visit, Survey completion, Sample collection, etc.) to which the Condition data are linked, if applicable. There may be multiple events linked to a Participant. title: Event ID + range: string eventType: definition_uri: include:eventType description: Type of event for which Event ID is given (Visit, Survey completion, Sample collection, etc.) title: Event Type + range: string conditionMeasureSourceText: definition_uri: include:conditionMeasureSourceText description: Co-occurring Condition (phenotype or diagnosis) or Measure (observation with numeric value), as described by data contributor. The Down Syndrome Genetic Diagnosis will be rolled into this field. title: Condition or Measure Source Text + range: string ageAtConditionMeasureObservation: definition_uri: include:ageAtConditionMeasureObservation description: Age in days at which Condition or Measure was observed, recorded, or diagnosed @@ -305,7 +309,7 @@ enums: text: not_observed title: Not Observed description: Participant was specifically examined or medical record queried for condition and found to be negative - enum_conditionDataSource: + enum_conditionDataSource: #replace with enum_clinicalDataSourceType & re-harmonize data name: enum_conditionDataSource permissible_values: clinical: @@ -422,7 +426,7 @@ enums: description: Proband + two parents trio_plus: text: trio_plus - title: Trio Plus + title: Trio Plus #need to reharmonize data description: Proband + two parents + other relatives enum_race: name: enum_race diff --git a/src/linkml/include_study.yaml b/src/linkml/include_study.yaml index fb4e910..44f7a1b 100644 --- a/src/linkml/include_study.yaml +++ b/src/linkml/include_study.yaml @@ -206,7 +206,7 @@ slots: definition_uri: include:studyExpectedNumberOfParticipants description: Expected number of participants in this study title: Study Expected Number of Participants - range: integer + range: string required: true guidType: definition_uri: include:guidType @@ -253,13 +253,13 @@ slots: definition_uri: include:datasetExpectedNumberOfParticipants description: Expected number of participants in this dataset title: Dataset Expected Number of Participants - range: integer + range: string required: true expectedNumberOfFiles: definition_uri: include:expectedNumberOfFiles description: Expected number of files associated with this dataset, including dictionaries title: Expected Number of Files - range: integer + range: string required: false #update to true when this is figured out # participantIdList: # definition_uri: include:participantIdList @@ -272,13 +272,15 @@ slots: definition_uri: include:dataCollectionStartYear description: Year that data collection started title: Data Collection Start Year - pattern: "^1|2\\d(3)$|nan" + range: string + #pattern: "^1|2\\d(3)$|nan" required: false #change to true when we have for all studies dataCollectionEndYear: definition_uri: include:dataCollectionEndYear description: Year that data collection ended title: Data Collection End Year - pattern: "^1|2\\d(3)$|nan" + range: string + #pattern: "^1|2\\d(3)$|nan" required: false datasetDataCategory: definition_uri: include:dataCategory From e070c15768065c81a4798af8e0d2232f60dd8e8c Mon Sep 17 00:00:00 2001 From: lopierra Date: Thu, 2 May 2024 17:55:58 -0700 Subject: [PATCH 06/15] add Physical Fitness to enum_researchDomain; add Fitness and Physical Activity to enum_dataCategory; lowercase d in enum_dataCategory --- src/linkml/include_study.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/linkml/include_study.yaml b/src/linkml/include_study.yaml index 44f7a1b..cd1c703 100644 --- a/src/linkml/include_study.yaml +++ b/src/linkml/include_study.yaml @@ -462,6 +462,10 @@ enums: text: all_co_occurring_conditions title: All Co-occurring Conditions meaning: mesh:D013568 + physical_fitness: + text: physical_fitness + title: Physical Fitness + meaning: mesh:D010809 other: text: other title: Other @@ -541,6 +545,12 @@ enums: microbiome: text: microbiome title: Microbiome + fitness: + text: fitness + title: Fitness + physical_activity: + text: physical_activity + title: Physical Activity other: text: other title: Other From 19280885802cd5eabcbbd114ae91b1838f9ba052 Mon Sep 17 00:00:00 2001 From: lopierra Date: Thu, 2 May 2024 18:00:33 -0700 Subject: [PATCH 07/15] change Expected Number of Files/Participants back to integer; update descriptions to say that additional explanation should go in Description --- src/linkml/include_study.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/linkml/include_study.yaml b/src/linkml/include_study.yaml index cd1c703..2925c35 100644 --- a/src/linkml/include_study.yaml +++ b/src/linkml/include_study.yaml @@ -188,7 +188,7 @@ slots: definition_uri: include:expectedDataCategory description: Categories of data expected to be collected in this study title: Expected Data Category - range: enum_DataCategory + range: enum_dataCategory required: true multivalued: true studyWebsite: @@ -204,7 +204,7 @@ slots: multivalued: true studyExpectedNumberOfParticipants: definition_uri: include:studyExpectedNumberOfParticipants - description: Expected number of participants in this study + description: Expected number of participants in this study. If additional explanation is needed, please add to Study Description field. title: Study Expected Number of Participants range: string required: true @@ -251,15 +251,15 @@ slots: # required: false #should be derived from dataFile if omics datasetExpectedNumberOfParticipants: definition_uri: include:datasetExpectedNumberOfParticipants - description: Expected number of participants in this dataset + description: Expected number of participants in this dataset. If additional explanation is needed, please add to Dataset Description field. title: Dataset Expected Number of Participants - range: string + range: integer required: true expectedNumberOfFiles: definition_uri: include:expectedNumberOfFiles - description: Expected number of files associated with this dataset, including dictionaries + description: Expected number of files associated with this dataset, including dictionaries. If additional explanation is needed, please add to Dataset Description field. title: Expected Number of Files - range: string + range: integer required: false #update to true when this is figured out # participantIdList: # definition_uri: include:participantIdList @@ -287,7 +287,7 @@ slots: description: General category of data in dataset; pipe-separated if multiple title: Dataset Data Category required: true - range: enum_DataCategory + range: enum_dataCategory datasetDataType: definition_uri: include:dataType description: Specific type of data contained in dataset; pipe-separated if multiple (e.g. Preprocessed metabolite relative abundance, Absolute protein concentration, Aligned reads, Simple nucleotide variations, GVCF, Gene expression quantifications, Gene fusions, Somatic copy number variations, Somatic structural variations) @@ -511,9 +511,9 @@ enums: unknown: text: unknown title: Unknown - enum_DataCategory: - name: enum_DataCategory - definition_uri: include:enum_DataCategory + enum_dataCategory: + name: enum_dataCategory + definition_uri: include:enum_dataCategory permissible_values: unharmonized_demographic_clinical_data: text: unharmonized_demographic_clinical_data From 50333acf64365048f30bbd70de2ed1160daed1d4 Mon Sep 17 00:00:00 2001 From: lopierra Date: Thu, 2 May 2024 18:14:51 -0700 Subject: [PATCH 08/15] add acknowledgments and citationStatement to Study --- src/linkml/include_study.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/linkml/include_study.yaml b/src/linkml/include_study.yaml index 2925c35..2850e59 100644 --- a/src/linkml/include_study.yaml +++ b/src/linkml/include_study.yaml @@ -49,6 +49,8 @@ classes: - studyPublication - studyExpectedNumberOfParticipants - guidType + - acknowledgments + - citationStatement Dataset: name: Dataset definition_uri: include:Dataset @@ -220,6 +222,19 @@ slots: title: Study dbGaP range: string multivalued: true + acknowledgments: + definition_uri: include:acknowledgments + description: Funding statement and acknowledgments for this study + title: Acknowledgments + range: string + multivalued: true + studyDbgap: + definition_uri: include:studyDbgap + description: dbGaP "phs" accession code(s) associated with this Study, either for access or informational purposes (pipe-separated if multiple) + title: Study dbGaP + range: string + multivalued: true + - citationStatement datasetName: definition_uri: include:datasetName description: Full name of the dataset, provided by contributor From 5be39a27f67446ca9ec66ac15f08e5c7d72598c9 Mon Sep 17 00:00:00 2001 From: lopierra Date: Fri, 3 May 2024 14:24:59 -0700 Subject: [PATCH 09/15] replaced hasStudy, hasParticipant etc. with studyCode, participantExternalId, etc --- src/linkml/include_assay.yaml | 45 +++++------------------------ src/linkml/include_participant.yaml | 17 ++--------- src/linkml/include_study.yaml | 11 ++++--- 3 files changed, 16 insertions(+), 57 deletions(-) diff --git a/src/linkml/include_assay.yaml b/src/linkml/include_assay.yaml index f858471..5296a23 100644 --- a/src/linkml/include_assay.yaml +++ b/src/linkml/include_assay.yaml @@ -45,8 +45,9 @@ classes: name: Biospecimen title: Biospecimen slots: - - hasStudy - - hasParticipant + - studyCode + - participantGlobalId + - participantExternalId - sampleGlobalId - sampleExternalId - sampleType @@ -67,9 +68,7 @@ classes: - biospecimenStorage - sampleAvailability - containerAvailability - # - hasDataFile - # - hasAliquot DataFile: annotations: required: @@ -84,9 +83,11 @@ classes: name: DataFile title: Data File slots: - - hasStudy - - hasParticipant - - hasBiospecimen + - studyCode + - participantGlobalId + - participantExternalId + - sampleGlobalId + - sampleExternalId - fileGlobalId - fileExternalId - fileName @@ -215,11 +216,6 @@ slots: the Virtual Biorepository title: Container Availability range: enum_containerAvailability - hasParticipant: - definition_uri: include:hasParticipant - description: Link to a Participant - title: Has Participant - range: Participant # this should link to Participant External ID for now, then INCLUDE ID when we have them fileGlobalId: definition_uri: include:fileGlobalId description: INCLUDE global file identifier, assigned by DCC @@ -276,11 +272,6 @@ slots: title: File Format required: true range: string - # has_aliquot: - # definition_uri: include:has_aliquot - # description: An aliquot of a sample - # title: Has Aliquot - # range: Aliquot fileSize: definition_uri: include:fileSize description: Size of file, if known (mainly important if large) @@ -301,26 +292,6 @@ slots: description: DOI of publication associated with this file, if published title: Publication DOI range: string # is there a doi range? - hasBiospecimen: - definition_uri: include:hasBiospecimen - description: Link to a Biospecimen - title: Has Biospecimen - range: Biospecimen - # hasDataFile: - # definition_uri: include:hasDataFile - # description: Link to a Data File - # title: Has Data File - # range: DataFile - # has_output: - # definition_uri: include:has_output - # description: The DataFile Output of an Assay - # title: Has Output - # range: DataFile - # uses_biospecimen: - # definition_uri: include:uses_biospecimen - # description: The Biospecimen an Assay is performed on - # title: Uses Biospecimen - # range: Biospecimen enums: enum_dataAccess: definition_uri: include:enum_dataAccess diff --git a/src/linkml/include_participant.yaml b/src/linkml/include_participant.yaml index 563ab41..37f0ce2 100644 --- a/src/linkml/include_participant.yaml +++ b/src/linkml/include_participant.yaml @@ -35,7 +35,7 @@ classes: title: Participant is_a: Thing slots: - - hasStudy + - studyCode - participantGlobalId - participantExternalId - familyId @@ -53,7 +53,6 @@ classes: - firstPatientEngagementEvent - outcomesVitalStatus - ageAtLastVitalStatus -# - hasCondition # FamilyGroup: # name: FamilyGroup # definition_uri: include:FamilyGroup @@ -80,8 +79,8 @@ classes: is_a: Thing description: Co-occurring conditions and other observations for the participant slots: - - hasStudy - - hasParticipant + - studyCode + - participantExternalId - eventId - eventType - conditionMeasureSourceText # will include DS Genetic Diagnosis @@ -199,16 +198,6 @@ slots: range: integer minimum_value: 0 maximum_value: 33000 -# hasCondition: -# definition_uri: include:hasCondition -# description: Link to a Condition -# title: Has Condition -# range: Condition - hasStudy: - definition_uri: include:hasStudy - description: Link to a Study - title: Has Study - range: Study eventId: definition_uri: include:eventId description: Identifier for event (Visit, Survey completion, Sample collection, etc.) to which the Condition data are linked, if applicable. There may be multiple events linked to a Participant. diff --git a/src/linkml/include_study.yaml b/src/linkml/include_study.yaml index 2850e59..e80dec1 100644 --- a/src/linkml/include_study.yaml +++ b/src/linkml/include_study.yaml @@ -62,7 +62,7 @@ classes: title: Dataset is_a: Thing slots: - - hasStudy + - studyCode #add hasDataset to Assay>DataFile? - datasetName - datasetDescription @@ -228,13 +228,12 @@ slots: title: Acknowledgments range: string multivalued: true - studyDbgap: - definition_uri: include:studyDbgap - description: dbGaP "phs" accession code(s) associated with this Study, either for access or informational purposes (pipe-separated if multiple) - title: Study dbGaP + citationStatement: + definition_uri: include:citationStatement + description: Statement that secondary data users should use to acknowledge use of this dataset. E.g., "The results analyzed and here are based in whole or in part upon data generated by the INCLUDE (INvestigation of Co-occurring conditions across the Lifespan to Understand Down syndromE) Project , and were accessed from the INCLUDE Data Hub and ." + title: Citation Statement range: string multivalued: true - - citationStatement datasetName: definition_uri: include:datasetName description: Full name of the dataset, provided by contributor From 625c14daafad92264eb7f332d2c3a6b7385c8556 Mon Sep 17 00:00:00 2001 From: lopierra Date: Tue, 7 May 2024 15:03:13 -0700 Subject: [PATCH 10/15] corrected description of studyDescription --- src/linkml/include_study.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkml/include_study.yaml b/src/linkml/include_study.yaml index e80dec1..f1f4ae9 100644 --- a/src/linkml/include_study.yaml +++ b/src/linkml/include_study.yaml @@ -106,7 +106,7 @@ slots: multivalued: true studyDescription: definition_uri: include:studyDescription - description: Brief description of the study (2-4 sentences). Should match description in https://includedcc.org/studies. + description: Brief description of the study (2-4 sentences) title: Study Description required: true range: string From 3d9735f9ca25500968e997c36310d96bc19f4dd9 Mon Sep 17 00:00:00 2001 From: madanucd Date: Fri, 10 May 2024 14:53:56 -0400 Subject: [PATCH 11/15] data category enum reference --- src/linkml/include_assay.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkml/include_assay.yaml b/src/linkml/include_assay.yaml index 5296a23..a2a57d9 100644 --- a/src/linkml/include_assay.yaml +++ b/src/linkml/include_assay.yaml @@ -250,7 +250,7 @@ slots: Metabolomics, Immune profiling, Transcriptomics) title: Data Category required: true - range: enum_DataCategory + range: include:enum_DataCategory dataType: definition_uri: include:dataType description: Specific type of data contained in file (e.g. Preprocessed metabolite relative abundance, Absolute protein concentration, Aligned reads, Simple nucleotide variations, GVCF, Gene expression quantifications, Gene fusions, Somatic copy number variations, Somatic structural variations) From d833710a45a610c01123273568291db3ca780012 Mon Sep 17 00:00:00 2001 From: madanucd Date: Fri, 10 May 2024 15:13:01 -0400 Subject: [PATCH 12/15] exclude prefix in data category enum --- src/linkml/include_assay.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkml/include_assay.yaml b/src/linkml/include_assay.yaml index a2a57d9..5296a23 100644 --- a/src/linkml/include_assay.yaml +++ b/src/linkml/include_assay.yaml @@ -250,7 +250,7 @@ slots: Metabolomics, Immune profiling, Transcriptomics) title: Data Category required: true - range: include:enum_DataCategory + range: enum_DataCategory dataType: definition_uri: include:dataType description: Specific type of data contained in file (e.g. Preprocessed metabolite relative abundance, Absolute protein concentration, Aligned reads, Simple nucleotide variations, GVCF, Gene expression quantifications, Gene fusions, Somatic copy number variations, Somatic structural variations) From 1c98dd5fb6d4646194e4b08562a030ded67c8aef Mon Sep 17 00:00:00 2001 From: madanucd Date: Fri, 10 May 2024 16:16:27 -0400 Subject: [PATCH 13/15] Convert dataCategory enum to camelCase --- src/linkml/include_assay.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkml/include_assay.yaml b/src/linkml/include_assay.yaml index 5296a23..889d1ad 100644 --- a/src/linkml/include_assay.yaml +++ b/src/linkml/include_assay.yaml @@ -250,7 +250,7 @@ slots: Metabolomics, Immune profiling, Transcriptomics) title: Data Category required: true - range: enum_DataCategory + range: enum_dataCategory dataType: definition_uri: include:dataType description: Specific type of data contained in file (e.g. Preprocessed metabolite relative abundance, Absolute protein concentration, Aligned reads, Simple nucleotide variations, GVCF, Gene expression quantifications, Gene fusions, Somatic copy number variations, Somatic structural variations) From ae5f050c3628c1146717b25cf8c23a38ab727f0b Mon Sep 17 00:00:00 2001 From: lopierra Date: Fri, 10 May 2024 15:31:07 -0700 Subject: [PATCH 14/15] changed studyExpectedNumberOfParticipants from string back to integer --- src/linkml/include_study.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkml/include_study.yaml b/src/linkml/include_study.yaml index f1f4ae9..17158a6 100644 --- a/src/linkml/include_study.yaml +++ b/src/linkml/include_study.yaml @@ -208,7 +208,7 @@ slots: definition_uri: include:studyExpectedNumberOfParticipants description: Expected number of participants in this study. If additional explanation is needed, please add to Study Description field. title: Study Expected Number of Participants - range: string + range: integer required: true guidType: definition_uri: include:guidType From 486e87a3f46aff9f038eefa2e6e57bd1e6b02c39 Mon Sep 17 00:00:00 2001 From: lopierra Date: Fri, 10 May 2024 15:44:22 -0700 Subject: [PATCH 15/15] add participantGlobalId to Condition --- src/linkml/include_participant.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/linkml/include_participant.yaml b/src/linkml/include_participant.yaml index 37f0ce2..3ed82e8 100644 --- a/src/linkml/include_participant.yaml +++ b/src/linkml/include_participant.yaml @@ -80,6 +80,7 @@ classes: description: Co-occurring conditions and other observations for the participant slots: - studyCode + - participantGlobalId - participantExternalId - eventId - eventType