diff --git a/src/ARCTokenization/ARCMock.fs b/src/ARCTokenization/ARCMock.fs index 644bbca..43bf16f 100644 --- a/src/ARCTokenization/ARCMock.fs +++ b/src/ARCTokenization/ARCMock.fs @@ -199,8 +199,7 @@ type ARCMock = ["" ; yield! Study_Person_Roles_Term_Source_REF |> Option.defaultValue Seq.empty] ] - Terms.InvestigationMetadata.nonObsoleteCvTerms - |> List.skip 1 //(ignore root term) + Terms.InvestigationMetadata.nonObsoleteNonRootCvTerms |> List.filter (fun t -> (not (t.Name.StartsWith("Comment"))) || (t.Name.Equals("Comment[ORCID]"))) // ignore all comments except non-obsolete orcid |> List.zip valueRows |> List.map (fun (values,term) -> @@ -343,8 +342,7 @@ type ARCMock = [""; yield! Study_Person_Roles_Term_Source_REF |> Option.defaultValue Seq.empty ] ] - Terms.StudyMetadata.nonObsoleteCvTerms - |> List.skip 1 //(ignore root term) + Terms.StudyMetadata.nonObsoleteNonRootCvTerms |> List.filter (fun t -> not (t.Name.StartsWith("Comment"))) // ignore all comments |> List.zip valueRows |> List.map (fun (values,term) -> @@ -407,8 +405,7 @@ type ARCMock = [""; yield! Assay_Performer_Roles_Term_Source_REF |> Option.defaultValue Seq.empty ] ] - Terms.AssayMetadata.nonObsoleteCvTerms - |> List.skip 1 //(ignore root term) + Terms.AssayMetadata.nonObsoleteNonRootCvTerms |> List.filter (fun t -> not (t.Name.StartsWith("Comment"))) // ignore all comments |> List.zip valueRows |> List.map (fun (values,term) -> diff --git a/src/ARCTokenization/MetadataSheet.fs b/src/ARCTokenization/MetadataSheet.fs index e615c64..61108eb 100644 --- a/src/ARCTokenization/MetadataSheet.fs +++ b/src/ARCTokenization/MetadataSheet.fs @@ -34,8 +34,8 @@ module MetadataSheet = | UnMatchable name -> fun (pv) -> UserParam(name, pv, attributes) - let rec parseAssayKey = parseKeyWithTerms AssayMetadata.cvTerms + let rec parseAssayKey = parseKeyWithTerms AssayMetadata.nonRootCvTerms - let rec parseStudyKey = parseKeyWithTerms StudyMetadata.cvTerms + let rec parseStudyKey = parseKeyWithTerms StudyMetadata.nonRootCvTerms - let rec parseInvestigationKey = parseKeyWithTerms InvestigationMetadata.cvTerms \ No newline at end of file + let rec parseInvestigationKey = parseKeyWithTerms InvestigationMetadata.nonRootCvTerms \ No newline at end of file diff --git a/src/ARCTokenization/Terms.fs b/src/ARCTokenization/Terms.fs index 3397b0b..b0b8d2b 100644 --- a/src/ARCTokenization/Terms.fs +++ b/src/ARCTokenization/Terms.fs @@ -22,58 +22,123 @@ module InvestigationMetadata = let ontology = OboOntology.fromLines true obo - let nonObsoleteCvTerms = + let nonRootOboTerms = ontology.Terms - |> List.filter (fun t -> not t.IsObsolete) - |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) + |> List.skip 1 - let obsoleteCvTerms = + let nonObsoleteOboTerms = ontology.Terms + |> List.filter (fun t -> not t.IsObsolete) + + let nonObsoleteNonRootOboTerms = + nonRootOboTerms + |> List.filter (fun t -> not t.IsObsolete) + + let obsoleteOboTerms = + nonRootOboTerms |> List.filter (fun t -> t.IsObsolete) - |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) let cvTerms = ontology.Terms |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) + let nonRootCvTerms = + nonRootOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) + + let nonObsoleteCvTerms = + nonObsoleteOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) + + let nonObsoleteNonRootCvTerms = + nonObsoleteNonRootOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) + + let obsoleteCvTerms = + obsoleteOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) + module StudyMetadata = let internal obo = (EmbeddedResource.load "structural_ontologies.study_metadata_structural_ontology.obo").Replace("\r\n", "\n").Split('\n') let ontology = OboOntology.fromLines true obo - let nonObsoleteCvTerms = + let nonRootOboTerms = ontology.Terms - |> List.filter (fun t -> not t.IsObsolete) - |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) + |> List.skip 1 - let obsoleteCvTerms = + let nonObsoleteOboTerms = ontology.Terms + |> List.filter (fun t -> not t.IsObsolete) + + let nonObsoleteNonRootOboTerms = + nonRootOboTerms + |> List.filter (fun t -> not t.IsObsolete) + + let obsoleteOboTerms = + nonRootOboTerms |> List.filter (fun t -> t.IsObsolete) - |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "STDMSO")) let cvTerms = ontology.Terms - |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "STDMSO")) + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "STDMSO")) + let nonRootCvTerms = + nonRootOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "STDMSO")) + + let nonObsoleteCvTerms = + nonObsoleteOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "STDMSO")) + + let nonObsoleteNonRootCvTerms = + nonObsoleteNonRootOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "STDMSO")) + + let obsoleteCvTerms = + obsoleteOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "STDMSO")) module AssayMetadata = let internal obo = (EmbeddedResource.load "structural_ontologies.assay_metadata_structural_ontology.obo").Replace("\r\n", "\n").Split('\n') let ontology = OboOntology.fromLines true obo - let nonObsoleteCvTerms = + let nonRootOboTerms = + ontology.Terms + |> List.skip 1 + + let nonObsoleteOboTerms = ontology.Terms |> List.filter (fun t -> not t.IsObsolete) - |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) + + let nonObsoleteNonRootOboTerms = + nonRootOboTerms + |> List.filter (fun t -> not t.IsObsolete) + + let obsoleteOboTerms = + nonRootOboTerms + |> List.filter (fun t -> t.IsObsolete) let cvTerms = ontology.Terms |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "ASSMSO")) - let obsoleteCvTerms = - ontology.Terms - |> List.filter (fun t -> t.IsObsolete) + let nonRootCvTerms = + nonRootOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "ASSMSO")) + + let nonObsoleteCvTerms = + nonObsoleteOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "ASSMSO")) + + let nonObsoleteNonRootCvTerms = + nonObsoleteNonRootOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "ASSMSO")) + + let obsoleteCvTerms = + obsoleteOboTerms |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "ASSMSO")) diff --git a/src/ARCTokenization/structural_ontologies/arc_file_structure_ontology.obo b/src/ARCTokenization/structural_ontologies/arc_file_structure_ontology.obo index eaa86c1..f15987a 100644 --- a/src/ARCTokenization/structural_ontologies/arc_file_structure_ontology.obo +++ b/src/ARCTokenization/structural_ontologies/arc_file_structure_ontology.obo @@ -1,4 +1,4 @@ -!This file was auto generated on 2024-01-03. Do not edit it. All manual changes will be overwritten by the next generator run eventually. +!This file was auto generated on 2024-01-05. Do not edit it. All manual changes will be overwritten by the next generator run eventually. format-version: 1.2 data-version: init/2023-10-26 saved-by: Kevin Schneider diff --git a/src/ARCTokenization/structural_ontologies/assay_metadata_structural_ontology.obo b/src/ARCTokenization/structural_ontologies/assay_metadata_structural_ontology.obo index b567d37..960f869 100644 --- a/src/ARCTokenization/structural_ontologies/assay_metadata_structural_ontology.obo +++ b/src/ARCTokenization/structural_ontologies/assay_metadata_structural_ontology.obo @@ -1,4 +1,4 @@ -!This file was auto generated on 2024-01-03. Do not edit it. All manual changes will be overwritten by the next generator run eventually. +!This file was auto generated on 2024-01-05. Do not edit it. All manual changes will be overwritten by the next generator run eventually. format-version: 1.2 data-version: init/2023-07-27 saved-by: Kevin Schneider diff --git a/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo b/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo index 94052f6..8a37d85 100644 --- a/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo +++ b/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo @@ -1,4 +1,4 @@ -!This file was auto generated on 2024-01-03. Do not edit it. All manual changes will be overwritten by the next generator run eventually. +!This file was auto generated on 2024-01-05. Do not edit it. All manual changes will be overwritten by the next generator run eventually. format-version: 1.2 data-version: init/2023-07-20 saved-by: Kevin Schneider diff --git a/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo b/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo index 78dda8b..dc4129e 100644 --- a/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo +++ b/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo @@ -1,4 +1,4 @@ -!This file was auto generated on 2024-01-03. Do not edit it. All manual changes will be overwritten by the next generator run eventually. +!This file was auto generated on 2024-01-05. Do not edit it. All manual changes will be overwritten by the next generator run eventually. format-version: 1.2 data-version: init/2023-07-27 saved-by: Kevin Schneider diff --git a/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj b/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj index aa4ddec..5725122 100644 --- a/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj +++ b/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj @@ -11,7 +11,7 @@ - + diff --git a/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs index 6569c36..f2482ad 100644 --- a/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs +++ b/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs @@ -11,6 +11,5 @@ module AssayMetadata = open TestUtils let allExpectedMetadataTermsEmpty = - Terms.AssayMetadata.cvTerms - |> List.skip 1 //(ignore root term) + Terms.AssayMetadata.nonObsoleteNonRootCvTerms |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs index b77a78a..6fb8120 100644 --- a/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs +++ b/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs @@ -14,10 +14,8 @@ module InvestigationMetadata = let parsedInvestigationMetadataSimple = Investigation.parseMetadataSheetFromFile "Fixtures/correct/investigation_simple.xlsx" let allExpectedMetadataTermsEmpty = - Terms.InvestigationMetadata.cvTerms - |> List.skip 1 //(ignore root term) + Terms.InvestigationMetadata.nonObsoleteNonRootCvTerms |> List.filter (fun t -> not (t.Name.StartsWith("Comment"))) // ignore orcids - |> List.filter (fun t -> not (List.contains t Terms.InvestigationMetadata.obsoleteCvTerms)) // ignore obsolete terms |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) [] diff --git a/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs index 7f7958b..287cbf7 100644 --- a/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs +++ b/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs @@ -11,6 +11,5 @@ module StudyMetadata = open TestUtils let allExpectedMetadataTermsEmpty = - Terms.StudyMetadata.cvTerms - |> List.skip 1 //(ignore root term) + Terms.StudyMetadata.nonObsoleteNonRootCvTerms |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/ReferenceObjects.fs b/tests/ARCTokenization.Tests/ReferenceObjects.fs index 7852c03..d6e9942 100644 --- a/tests/ARCTokenization.Tests/ReferenceObjects.fs +++ b/tests/ARCTokenization.Tests/ReferenceObjects.fs @@ -4,6 +4,16 @@ open ControlledVocabulary open ARCTokenization open FsSpreadsheet +module Terms = + + let referenceInvestigationMetadataOntologyName = "INVMSO" + let referenceStudyMetadataOntologyName = "INVMSO" + let referenceAssayMetadataOntologyName = "INVMSO" + + let referenceInvestigationMetadataOntologyRootTerm = CvTerm.create(accession = "INVMSO:00000001", name = "Investigation Metadata", ref = "INVMSO") + let referenceStudyMetadataOntologyRootTerm = CvTerm.create(accession = "STDMSO:00000001", name = "Study Metadata", ref = "STDMSO") + let referenceAssayMetadataOntologyRootTerm = CvTerm.create(accession = "ASSMSO:00000001", name = "Assay Metadata", ref = "ASSMSO") + module Tokenization = module KeyParser = @@ -224,8 +234,7 @@ module MockAPI = // equivalent to a metadatasheet with only the first column that contains metadata section keys let empty = - Terms.InvestigationMetadata.nonObsoleteCvTerms - |> List.skip 1 //(ignore root term) + Terms.InvestigationMetadata.nonObsoleteNonRootCvTerms |> List.filter (fun t -> (not (t.Name.StartsWith("Comment"))) || (t.Name.Equals("Comment[ORCID]"))) // ignore all comments except non-obsolete orcid |> List.map (fun cvTerm -> CvParam(cvTerm, ParamValue.CvValue Terms.StructuralTerms.metadataSectionKey, [])) @@ -233,8 +242,7 @@ module MockAPI = // equivalent to a metadatasheet with only the first column that contains metadata section keys let empty = - Terms.StudyMetadata.nonObsoleteCvTerms - |> List.skip 1 //(ignore root term) + Terms.StudyMetadata.nonObsoleteNonRootCvTerms |> List.filter (fun t -> not (t.Name.StartsWith("Comment")) ) // ignore all comments |> List.map (fun cvTerm -> CvParam(cvTerm, ParamValue.CvValue Terms.StructuralTerms.metadataSectionKey, [])) @@ -242,7 +250,6 @@ module MockAPI = // equivalent to a metadatasheet with only the first column that contains metadata section keys let empty = - Terms.AssayMetadata.nonObsoleteCvTerms - |> List.skip 1 //(ignore root term) + Terms.AssayMetadata.nonObsoleteNonRootCvTerms |> List.filter (fun t -> not (t.Name.StartsWith("Comment")) ) // ignore all comments |> List.map (fun cvTerm -> CvParam(cvTerm, ParamValue.CvValue Terms.StructuralTerms.metadataSectionKey, [])) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/StructuralOntologytests.fs b/tests/ARCTokenization.Tests/StructuralOntologytests.fs index 7b00433..df66808 100644 --- a/tests/ARCTokenization.Tests/StructuralOntologytests.fs +++ b/tests/ARCTokenization.Tests/StructuralOntologytests.fs @@ -5,6 +5,8 @@ open ARCTokenization open ARCTokenization.Terms open Xunit +open TestUtils + module InvestigationMetadata = [] @@ -21,6 +23,18 @@ module InvestigationMetadata = (fun (e,a) -> Assert.Equal(e,a)) ) + [] + let ``no root term in non root terms`` () = + Assert.All( + InvestigationMetadata.nonRootCvTerms, + (fun t -> + Assert.True( + t.Name <> ReferenceObjects.Terms.referenceInvestigationMetadataOntologyRootTerm.Name + && t.Accession <> ReferenceObjects.Terms.referenceInvestigationMetadataOntologyRootTerm.Accession + ) + ) + ) + module StudyMetadata = [] @@ -37,6 +51,18 @@ module StudyMetadata = (fun (e,a) -> Assert.Equal(e,a)) ) + [] + let ``no root term in non root terms`` () = + Assert.All( + StudyMetadata.nonRootCvTerms, + (fun t -> + Assert.True( + t.Name <> ReferenceObjects.Terms.referenceStudyMetadataOntologyRootTerm.Name + && t.Accession <> ReferenceObjects.Terms.referenceStudyMetadataOntologyRootTerm.Accession + ) + ) + ) + [] let ``"STUDY METADATA" has correct ID`` () = let smTerm = @@ -100,4 +126,16 @@ module AssayMetadata = Assert.All( List.zip expected actual, (fun (e,a) -> Assert.Equal(e,a)) + ) + + [] + let ``no root term in non root terms`` () = + Assert.All( + AssayMetadata.nonRootCvTerms, + (fun t -> + Assert.True( + t.Name <> ReferenceObjects.Terms.referenceAssayMetadataOntologyRootTerm.Name + && t.Accession <> ReferenceObjects.Terms.referenceAssayMetadataOntologyRootTerm.Accession + ) + ) ) \ No newline at end of file