diff --git a/build/ProjectInfo.fs b/build/ProjectInfo.fs index 9f90359..7d7d9bf 100644 --- a/build/ProjectInfo.fs +++ b/build/ProjectInfo.fs @@ -76,6 +76,7 @@ let structuralOntologySources = [ "src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.yml" "src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.yml" "src/ARCTokenization/structural_ontologies/assay_metadata_structural_ontology.yml" + "src/ARCTokenization/structural_ontologies/arc_process_graph_structural_ontology.yml" "src/ARCTokenization/structural_ontologies/arc_file_structure_ontology.yml" ] diff --git a/src/ARCTokenization/ARCMock.fs b/src/ARCTokenization/ARCMock.fs index a47c67c..7f2176b 100644 --- a/src/ARCTokenization/ARCMock.fs +++ b/src/ARCTokenization/ARCMock.fs @@ -1,6 +1,8 @@ namespace ARCTokenization open ControlledVocabulary +open ARCtrl +open ARCtrl.ISA type ARCMock = @@ -418,4 +420,26 @@ type ARCMock = else CvParam(term, ParamValue.Value v, []) ) - ) \ No newline at end of file + ) + + static member ProcessGraphColumn( + header: ARCtrl.ISA.CompositeHeader, + cells: seq + ) = + CompositeColumn.create(header, cells |> Array.ofSeq) + |> Tokenization.ARCtrl.CompositeColumn.tokenize + + static member ProcessGraph( + columns: seq> + ) = + let table = ArcTable.create("", new ResizeArray<_>(), new System.Collections.Generic.Dictionary<_,_>()) + + columns + |> Seq.map (fun (headerTerm, cells) -> + CompositeColumn.create(headerTerm, cells |> Array.ofSeq) + ) + |> Array.ofSeq + |> table.AddColumns + + table + |> Tokenization.ARCtrl.ARCTable.tokenizeColumns \ No newline at end of file diff --git a/src/ARCTokenization/ARCTokenization.fsproj b/src/ARCTokenization/ARCTokenization.fsproj index f2daf83..8810615 100644 --- a/src/ARCTokenization/ARCTokenization.fsproj +++ b/src/ARCTokenization/ARCTokenization.fsproj @@ -19,14 +19,17 @@ + + + @@ -60,9 +63,10 @@ - - - + + + + diff --git a/src/ARCTokenization/AnnotationTable.fs b/src/ARCTokenization/AnnotationTable.fs index 2f63ac1..1a92b70 100644 --- a/src/ARCTokenization/AnnotationTable.fs +++ b/src/ARCTokenization/AnnotationTable.fs @@ -5,298 +5,37 @@ open FSharpAux open FsSpreadsheet open System +open ARCtrl +open ARCtrl.ISA +module AnnnotationTable = -module internal List = // remove as soon as this is available in next F#Aux NuGet release - let map4 (mapping : 'T -> 'T -> 'T -> 'T -> 'U) (list1 : 'T list) (list2 : 'T list) (list3 : 'T list) (list4 : 'T list) = - if list1.Length <> list2.Length || list1.Length <> list3.Length || list1.Length <> list4.Length then - failwithf "The input lists have different lengths.\n\tlist1.Length = %i; list2.Length = %i; list3.Length = %i; list4.Length = %i" list1.Length list2.Length list3.Length list4.Length - let rec loop acc nl1 nl2 nl3 nl4 = - match nl1, nl2, nl3, nl4 with - | h1 :: t1, h2 :: t2, h3 :: t3, h4 :: t4 -> - loop (mapping h1 h2 h3 h4 :: acc) t1 t2 t3 t4 - | _ -> List.rev acc - loop [] list1 list2 list3 list4 + let a ws = + let t = ARCtrl.ISA.Spreadsheet.ArcTable.tryFromFsWorksheet ws - let inline transposeOrdinary (lists : seq<'T list>) = - if lists |> Seq.forall (fun t -> t.Length <> (Seq.head lists).Length) then - failwith "Input lists have different lengths." - List.init (Seq.head lists).Length ( - fun i -> - List.init (Seq.length lists) ( - fun j -> (Seq.item j lists)[i] - ) - ) - - -/// -/// Functions to work with FsTables into graph-based models. -/// -module AnnotationTable = - - // ----- - // TYPES - // ----- - - type TokenizedAnnotationTable = { - IOColumns : CvParam list list - TermRelatedBuildingBlocks : CvParam list list - } with - static member create io terms = - { - IOColumns = io - TermRelatedBuildingBlocks = terms - } - - /// - /// Modelling of the different types of nodes / Building Blocks. - /// - [] - type NodeType = - | Source - | Sink - | ProtocolRef - - - // ------ - // VALUES - // ------ - - /// - /// The header names of the columns containing Node-related Building Blocks. - /// - [] - let nodeColumnNames = [ - "Source Name" - "Sample Name" - "Raw Data File" - "Derived Data File" - "Protocol Type" - "Protocol REF" - ] + t.Value.Columns + |> Array.map (fun c -> + match c.Header with + | ISA.CompositeHeader.Input i | ISA.CompositeHeader.Output i -> + + 1 - // --------- - // FUNCTIONS - // --------- + | ISA.CompositeHeader.Characteristic headerOntology + | ISA.CompositeHeader.Factor headerOntology + | ISA.CompositeHeader.Parameter headerOntology -> + 2 - /// - /// Takes an FsWorkbook and returns all Annotation Tables it contains. - /// - let getAnnotationTables workbook = - let tables = FsWorkbook.getTables workbook |> List.ofArray - tables |> List.filter (fun t -> String.contains "annotationTable" t.Name) + | ISA.CompositeHeader.FreeText s -> + 3 - /// - /// Returns all header cells from a given FsCellsCollection and a given FsTable. - /// - let getHeaderCellsOf (fcc : FsCellsCollection) (table : FsTable) = - table.Cells fcc - |> Seq.filter (fun c -> c.RowNumber = table.RangeAddress.FirstAddress.RowNumber) + | ISA.CompositeHeader.Component a -> 4 - /// - /// Returns all data cells from a given header cell by using a given FsCellsCollection and a given FsTable. - /// - let getDataCellsOf (fcc : FsCellsCollection) (table : FsTable) (headerCell : FsCell) = - let headerColIndex = - fcc.GetCells() - |> Seq.find (fun t -> t.Value = headerCell.Value) - |> fun c -> c.Address.ColumnNumber - table.GetDataCellsOfColumnAt(fcc, headerColIndex) - //FsCellsCollection.getCellsInColumn headerCell.ColumnNumber fcc - |> Seq.toList - //|> List.skip 1 - - /// - /// Takes a list of header cells and splits them into a tuple of IO column header cells and Term-related Building Block header cells by using a given FsCellsCollection and a given FsTable. - /// - let splitColumns (table : FsTable) (fcc : FsCellsCollection) (cl : FsCell list) = - cl - |> List.partition (fun c -> List.contains c.Value nodeColumnNames) - - /// - /// Takes a list of header cells from Term-related Building Blocks and groups them into a list of Building Block units. - /// - /// (1 inner list = 1 Building Block unit) - /// - let groupTermRelatedBuildingBlocks (table : FsTable) (fcc : FsCellsCollection) (cl : FsCell list) = - cl - |> Seq.groupWhen (fun h -> String.contains "[" h.Value) - |> List.ofSeq - |> List.map List.ofSeq - - /// - /// Takes a list of header cells and an FsCellsCollections and returns a list of CvParams according to the information from the FsCells. - /// If `crStart` is true, it is assumed that the first header cell is a Term containing header cell of a Building Block. - /// - /// This function should only be used for parsing Term-related Building Blocks. - /// - let parseTermRelatedBuildingBlocks crStart (table : FsTable) (fcc : FsCellsCollection) (cl : FsCell list) = - //let empty() = FsCell.createEmpty () - //let getTableFieldOf (table : FsTable) (cell : FsCell) = - // table.Fields(fcc) |> ignore - - let rec loop roundOne (cells : FsCell list) = - [ - match cells with - // Case: Correct Quadruplet of headers Name, Unit, TSR, TAN - | a :: b :: c :: d :: rest when roundOne && (String.startsWith "Unit" b.Value) -> - // a = Value/Name header, b = Unit header, c = TermSourceRef header, d = TermAccessionNumber header - //let tfa = FsTableField(a.Value, a.ColumnNumber, ) - //FsTableField.getDataCells fcc true - let dataCellsVal = getDataCellsOf fcc table a - let dataCellsUnt = getDataCellsOf fcc table b - let dataCellsTsr = getDataCellsOf fcc table c - let dataCellsTan = getDataCellsOf fcc table d - let cvPars = - List.map4 ( - fun (vl : FsCell) unt tan tsr -> - let valTerm = CvUnit.create(accession = tan.Value, name = vl.Value, ref = tsr.Value) - CvParam(d.Value, a.Value, c.Value, WithCvUnitAccession (unt.Value, valTerm)) - ) dataCellsVal dataCellsUnt dataCellsTan dataCellsTsr - yield! cvPars - yield! loop false rest - // Case: Correct Triplet of headers Name, Unit, TSR, TAN - | a :: b :: c :: rest when roundOne -> - // a = Value/Name header, b = TermSourceRef header, c = TermAccessionNumber header - let dataCellsVal = getDataCellsOf fcc table a - let dataCellsTsr = getDataCellsOf fcc table b - let dataCellsTan = getDataCellsOf fcc table c - let cvPars = - (dataCellsVal, dataCellsTsr, dataCellsTan) - |||> List.map3 ( - fun vl tsr tan -> - let valTerm = CvTerm.create(accession = tan.Value, name = vl.Value, ref = tsr.Value) - CvParam(c.Value, a.Value, b.Value, CvValue valTerm) - ) - yield! cvPars - yield! loop false rest - // Case: Incorrect Duplet of headers Name, TSR/TAN, or headers TSR, TAN - | a :: b :: rest -> - match roundOne with - | true -> - // a = Value/Name header, b = TermSourceRef header (assumed, could also be TermAccessionNumber header if TSR column is missing) - let dataCellsVal = getDataCellsOf fcc table a - let dataCellsTsr = getDataCellsOf fcc table b - let cvPars = - (dataCellsVal, dataCellsTsr) - ||> List.map2 ( - fun vl tsr -> - let valTerm = CvTerm.create (accession = "(n/a)", name = vl.Value, ref = tsr.Value) - CvParam("n/a", a.Value, b.Value, CvValue valTerm) - ) - yield! cvPars - yield! loop false rest - | false -> - // a = TermSourceRef header, b = TermAccessionNumber header - let dataCellsTsr = getDataCellsOf fcc table a - let dataCellsTan = getDataCellsOf fcc table b - let cvPars = - (dataCellsTsr, dataCellsTan) - ||> List.map2 ( - fun tsr tan -> - let valTerm = CvTerm.create(accession = tan.Value, name = "n/a", ref = tsr.Value) - CvParam(b.Value, "(n/a)", a.Value, CvValue valTerm) - ) - yield! cvPars - yield! loop false rest - // Case: Incorrect Singlet of only header Name or header TSR/TAN - | a :: [] -> - match roundOne with - | true -> - // a = Value/Name header - let dataCellsVal = getDataCellsOf fcc table a - let cvPars = - dataCellsVal - |> List.map ( - fun vl -> - // use this if ParamValue shall be CvValue instead of mere Value - //let valTerm = CvTerm("(n/a)", vl.Value, "(n/a)") - CvParam("(n/a)", a.Value, "(n/a)", Value vl.Value) - ) - yield! cvPars - | false -> - // a = TermSourceRef header (assumed, could also be TermAccessionNumber header if TSR column is missing) - let dataCellsTsr = getDataCellsOf fcc table a - let cvPars = - dataCellsTsr - |> List.map ( - fun tsr -> - CvParam("(n/a)", "(n/a)", tsr.Value, Value "(n/a)") - ) - yield! cvPars - | [] -> () - ] - loop crStart cl - - /// - /// Takes a header cells and an FsCellsCollections and returns a list of CvParams according to the information from the FsCells. - /// - /// This function should only be used for parsing input-, output-, and featured columns. - /// - let parseIOColumns fcc table headerCell = - let dataCellsVal = getDataCellsOf fcc table headerCell - dataCellsVal - |> List.map ( - fun dc -> - //UserParam(headerCell.Value, ParamValue.Value dc.Value) - CvParam("(n/a)", headerCell.Value, "(n/a)", ParamValue.Value dc.Value) - ) - - /// - /// Takes an FsWorkbook and returns a list of worksheet names and their respective IO columns as CvParam lists and Term-related Building Blocks as CvParam lists. - /// - /// (inner CvParam list: CvParams of a column, outer CvParam list: all columns in a worksheet's AnnotationTable) - /// - let parseWorkbook wb = - let tables = getAnnotationTables wb - let worksheets = wb.GetWorksheets() - // get worksheet and its AnnotationTable as tuple - let worksheetsAndTables = - tables - |> Seq.map ( - fun t -> - let associatedWs = - worksheets - |> Seq.find ( - fun ws -> - ws.Tables - |> Seq.exists (fun t2 -> t2.Name = t.Name) - ) - associatedWs, t - ) - worksheetsAndTables - |> Seq.map ( - fun (ws,t) -> - let ioHeaderCells, termRelatedBuildingBlockHeaderCells = - getHeaderCellsOf ws.CellCollection t - |> List.ofSeq - |> splitColumns t ws.CellCollection - let ioColumns = - ioHeaderCells - |> List.map (parseIOColumns ws.CellCollection t) - let termRelatedBuildingBlocks = - termRelatedBuildingBlockHeaderCells - |> groupTermRelatedBuildingBlocks t ws.CellCollection - |> List.map (parseTermRelatedBuildingBlocks true t ws.CellCollection) - ws.Name, - TokenizedAnnotationTable.create ioColumns termRelatedBuildingBlocks + | ISA.CompositeHeader.ProtocolDescription -> 5 + | ISA.CompositeHeader.ProtocolREF -> 6 + | ISA.CompositeHeader.ProtocolUri -> 7 + | ISA.CompositeHeader.ProtocolVersion -> 8 + | ISA.CompositeHeader.ProtocolType -> 9 + | ISA.CompositeHeader.Performer -> 10 + | ISA.CompositeHeader.Date -> 11 ) - |> List.ofSeq - - /// - /// Takes a CvParam and returns the type of Node it contains. - /// - [] - let getNodeType (cvPar : #IParamBase) = - //let castedCvPar = cvPar :?> CvParam // debatable approach - //let v = Param.getCvName castedCvPar - let v = CvBase.getCvName cvPar - match v with - | "Source Name" -> Source - | "Sample Name" - | "Raw Data File" - | "Derived Data File" -> Sink - | "Protocol REF" - | "Protocol Type" -> ProtocolRef - | _ -> failwith $"HeaderCell {v} cannot be parsed to any NodeType." \ No newline at end of file diff --git a/src/ARCTokenization/RELEASE_NOTES.md b/src/ARCTokenization/RELEASE_NOTES.md index 91e442e..f46d5a1 100644 --- a/src/ARCTokenization/RELEASE_NOTES.md +++ b/src/ARCTokenization/RELEASE_NOTES.md @@ -1,3 +1,12 @@ +### 3.0.0 - (Released TBD) + +- Additions: + - [Add basic process graph tokenization](https://github.com/nfdi4plants/ARCTokenization/pull/48/commits/e6cd1775575aaac5aca3d2a48ff26fd31b136038): + - Based on ARCtrl ARCTable + - add ARC Process Graph Structural ontology (APGSO) + + + ### 2.0.0 - (Released 2024-01-08) Changes in existing ontology terms will now ways result in a new major version of the package. diff --git a/src/ARCTokenization/Terms.fs b/src/ARCTokenization/Terms.fs index b0b8d2b..b435c01 100644 --- a/src/ARCTokenization/Terms.fs +++ b/src/ARCTokenization/Terms.fs @@ -1,6 +1,6 @@ namespace ARCTokenization.Terms -open FsOboParser +open OBO.NET open System.IO open ControlledVocabulary @@ -99,6 +99,7 @@ module StudyMetadata = let obsoleteCvTerms = obsoleteOboTerms |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "STDMSO")) + module AssayMetadata = let internal obo = (EmbeddedResource.load "structural_ontologies.assay_metadata_structural_ontology.obo").Replace("\r\n", "\n").Split('\n') @@ -138,9 +139,57 @@ module AssayMetadata = |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "ASSMSO")) let obsoleteCvTerms = - obsoleteOboTerms + obsoleteOboTerms |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "ASSMSO")) +// equivalents of composite header / cell types in ARCtrl +module ProcessGraph = + + let internal obo = (EmbeddedResource.load "structural_ontologies.arc_process_graph_structural_ontology.obo").Replace("\r\n", "\n").Split('\n') + + let ontology = OboOntology.fromLines true obo + + let nonRootOboTerms = + ontology.Terms + |> List.filter (fun t -> + t.Name <> "Process Graph Header" + && t.Id <> "APGSO:00000001" + && t.Name <> "IOType" + && t.Id <> "APGSO:00000015" + ) + + let nonObsoleteOboTerms = + ontology.Terms + |> List.filter (fun t -> not t.IsObsolete) + + let nonObsoleteNonRootOboTerms = + nonRootOboTerms + |> List.filter (fun t -> not t.IsObsolete) + + let obsoleteOboTerms = + nonRootOboTerms + |> List.filter (fun t -> t.IsObsolete) + + let cvTerms = + ontology.Terms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "APGSO")) + + let nonRootCvTerms = + nonRootOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "APGSO")) + + let nonObsoleteCvTerms = + nonObsoleteOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "APGSO")) + + let nonObsoleteNonRootCvTerms = + nonObsoleteNonRootOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "APGSO")) + + let obsoleteCvTerms = + obsoleteOboTerms + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "APGSO")) + module StructuralTerms = diff --git a/src/ARCTokenization/Tokenization.fs b/src/ARCTokenization/Tokenization.fs index 9b855c1..ef690e4 100644 --- a/src/ARCTokenization/Tokenization.fs +++ b/src/ARCTokenization/Tokenization.fs @@ -4,6 +4,7 @@ open ControlledVocabulary open FsSpreadsheet open MetadataSheet open ARCTokenization.Terms +open ARCtrl.ISA module Tokenization = @@ -11,7 +12,7 @@ module Tokenization = match line |> Seq.toList with | [] -> failwith "Cannot convert nothin" | key :: [] -> - let f = keyParser [] key.Value + let f = keyParser [] (key.ValueAsString()) let keyTerm = let tmp = f (ParamValue.CvValue Terms.StructuralTerms.metadataSectionKey) @@ -22,7 +23,7 @@ module Tokenization = [keyTerm] | key :: cells -> - let f = keyParser [] key.Value + let f = keyParser [] (key.ValueAsString()) let keyTerm = let tmp = f (ParamValue.CvValue Terms.StructuralTerms.metadataSectionKey) @@ -33,10 +34,127 @@ module Tokenization = let cellTerms = cells |> List.map (fun c -> - let param = f (ParamValue.Value c.Value) + let param = f (ParamValue.Value (c.ValueAsString())) CvAttributeCollection.tryAddAttribute (Address.createRowParam(c.RowNumber)) param |> ignore CvAttributeCollection.tryAddAttribute (Address.createColumnParam(c.ColumnNumber)) param |> ignore param ) - keyTerm :: cellTerms \ No newline at end of file + keyTerm :: cellTerms + + module ARCtrl = + + module OntologyAnnotation = + + let asCvTerm (oa: OntologyAnnotation) = + CvTerm.create( + accession = oa.TermAccessionString, + name = oa.NameText, + ref = oa.TermSourceREFString + ) + + module IOType = + + let asCvTerm (io: IOType) = + match io with + | IOType.Source -> StructuralOntology.APGSO.IOType.Source + | IOType.Sample -> StructuralOntology.APGSO.IOType.Sample + | IOType.RawDataFile -> StructuralOntology.APGSO.IOType.RawDataFile + | IOType.DerivedDataFile -> StructuralOntology.APGSO.IOType.DerivedDataFile + | IOType.ImageFile -> StructuralOntology.APGSO.IOType.ImageFile + | IOType.Material -> StructuralOntology.APGSO.IOType.Material + | IOType.FreeText s -> CvTerm.create (accession = "", name = s, ref = "") + + module CompositeHeader = + + let toCvTerm(ch: CompositeHeader) = + match ch with + | CompositeHeader.Characteristic _ -> StructuralOntology.APGSO.``Process Graph Header``.Characteristic + | CompositeHeader.Factor _ -> StructuralOntology.APGSO.``Process Graph Header``.Factor + | CompositeHeader.Parameter _ -> StructuralOntology.APGSO.``Process Graph Header``.Parameter + | CompositeHeader.Component _ -> StructuralOntology.APGSO.``Process Graph Header``.Component + | CompositeHeader.ProtocolType -> StructuralOntology.APGSO.``Process Graph Header``.ProtocolType + | CompositeHeader.ProtocolDescription -> StructuralOntology.APGSO.``Process Graph Header``.ProtocolDescription + | CompositeHeader.ProtocolUri -> StructuralOntology.APGSO.``Process Graph Header``.ProtocolUri + | CompositeHeader.ProtocolVersion -> StructuralOntology.APGSO.``Process Graph Header``.ProtocolVersion + | CompositeHeader.ProtocolREF -> StructuralOntology.APGSO.``Process Graph Header``.ProtocolREF + | CompositeHeader.Performer -> StructuralOntology.APGSO.``Process Graph Header``.Performer + | CompositeHeader.Date -> StructuralOntology.APGSO.``Process Graph Header``.Date + | CompositeHeader.Input _ -> StructuralOntology.APGSO.``Process Graph Header``.Input + | CompositeHeader.Output _ -> StructuralOntology.APGSO.``Process Graph Header``.Output + | CompositeHeader.FreeText _ -> StructuralOntology.APGSO.FreeText + + let toHeaderParam (ch: CompositeHeader) : IParam = + match ch with + | CompositeHeader.Characteristic term -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.Characteristic, ParamValue.CvValue (OntologyAnnotation.asCvTerm term)) + + | CompositeHeader.Factor term -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.Factor, ParamValue.CvValue (OntologyAnnotation.asCvTerm term)) + + | CompositeHeader.Parameter term -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.Parameter, ParamValue.CvValue (OntologyAnnotation.asCvTerm term)) + + | CompositeHeader.Component term -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.Component, ParamValue.CvValue (OntologyAnnotation.asCvTerm term)) + + | CompositeHeader.ProtocolType -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.ProtocolType, ParamValue.Value "") + + | CompositeHeader.ProtocolDescription -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.ProtocolDescription, ParamValue.Value "") + + | CompositeHeader.ProtocolUri -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.ProtocolUri, ParamValue.Value "") + + | CompositeHeader.ProtocolVersion -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.ProtocolVersion, ParamValue.Value "") + + | CompositeHeader.ProtocolREF -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.ProtocolREF, ParamValue.Value "") + + | CompositeHeader.Performer -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.Performer, ParamValue.Value "") + + | CompositeHeader.Date -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.Date, ParamValue.Value "") + + | CompositeHeader.Input io -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.Input, ParamValue.CvValue (IOType.asCvTerm io)) + + | CompositeHeader.Output io -> + CvParam(StructuralOntology.APGSO.``Process Graph Header``.Output, ParamValue.CvValue (IOType.asCvTerm io)) + + | CompositeHeader.FreeText f -> + UserParam(f, ParamValue.CvValue StructuralOntology.APGSO.FreeText) + + module CompositeCell = + + let toCvParam (ch: CompositeHeader) (cc: CompositeCell) : IParam = + + let headerTerm = CompositeHeader.toCvTerm ch + + match cc with + | CompositeCell.FreeText t -> + CvParam(headerTerm, ParamValue.Value t) + + | CompositeCell.Term term -> + CvParam(headerTerm, ParamValue.CvValue (OntologyAnnotation.asCvTerm term)) + + | CompositeCell.Unitized (v, unit) -> + CvParam(headerTerm, ParamValue.WithCvUnitAccession (v, OntologyAnnotation.asCvTerm unit)) + + module CompositeColumn = + + let tokenize (cc: CompositeColumn) : IParam list= + [ + CompositeHeader.toHeaderParam cc.Header + yield! (Array.map (CompositeCell.toCvParam cc.Header) cc.Cells) + ] + + module ARCTable = + + let tokenizeColumns (at: ArcTable) = + at.Columns + |> Array.map CompositeColumn.tokenize + |> List.ofArray diff --git a/src/ARCTokenization/TopLevelParsers.fs b/src/ARCTokenization/TopLevelParsers.fs index b73e421..9638df1 100644 --- a/src/ARCTokenization/TopLevelParsers.fs +++ b/src/ARCTokenization/TopLevelParsers.fs @@ -118,12 +118,25 @@ type Study = |> List.concat /// - /// Parses all annotation tables from an ISA Study XLSX file as a list of `TokenizedAnnotationTable`s, a type that contains IO columns separated from the other columns. + /// Parses all annotation tables from an ISA Study XLSX file as a + /// Map of string * `IParam` 2D List representing the individual parts parts of the Process graph, + /// where the string is the name of the worksheet that contained the table, + /// and the 2D lists represent a single table in which the inner 1D lists represent a single column. /// /// he path to the study xlsx file - static member parseAnnotationTablesFromFile (path: string) = - FsWorkbook.fromXlsxFile path - |> AnnotationTable.parseWorkbook + static member parseProcessGraphColumnsFromFile (path: string) = + (FsWorkbook.fromXlsxFile path) + .GetWorksheets() + |> Seq.choose (fun ws -> + ws + |> ARCtrl.ISA.Spreadsheet.ArcTable.tryFromFsWorksheet + |> Option.map (fun t -> + ws.Name, + t + |> Tokenization.ARCtrl.ARCTable.tokenizeColumns + ) + ) + |> Map.ofSeq type Assay = @@ -159,9 +172,22 @@ type Assay = |> List.concat /// - /// Parses all annotation tables from an ISA Assay XLSX file as a list of `TokenizedAnnotationTable`s, a type that contains IO columns separated from the other columns. + /// Parses all annotation tables from an ISA Assay XLSX file as a + /// Map of string * `IParam` 2D List representing the individual parts parts of the Process graph, + /// where the string is the name of the worksheet that contained the table, + /// and the 2D lists represent a single table in which the inner 1D lists represent a single column. /// - /// he path to the assay xlsx file - static member parseAnnotationTablesFromFile (path: string) = - FsWorkbook.fromXlsxFile path - |> AnnotationTable.parseWorkbook \ No newline at end of file + /// he path to the study xlsx file + static member parseProcessGraphColumnsFromFile (path: string) = + (FsWorkbook.fromXlsxFile path) + .GetWorksheets() + |> Seq.choose (fun ws -> + ws + |> ARCtrl.ISA.Spreadsheet.ArcTable.tryFromFsWorksheet + |> Option.map (fun t -> + ws.Name, + t + |> Tokenization.ARCtrl.ARCTable.tokenizeColumns + ) + ) + |> Map.ofSeq \ No newline at end of file diff --git a/src/ARCTokenization/packages.lock.json b/src/ARCTokenization/packages.lock.json index b1d1696..b5c47e8 100644 --- a/src/ARCTokenization/packages.lock.json +++ b/src/ARCTokenization/packages.lock.json @@ -2,6 +2,23 @@ "version": 1, "dependencies": { ".NETStandard,Version=v2.0": { + "ARCtrl": { + "type": "Direct", + "requested": "[1.0.1, 1.0.1]", + "resolved": "1.0.1", + "contentHash": "HwGWJ8I9NMavBMgLeQt6YSSQ7FHX8/Qt/5LRbNVxh0HxpY4fQR/BL3q8n+XWOUT62F5CznvjBYtSfwXS6WaCIw==", + "dependencies": { + "ARCtrl.CWL": "1.0.1", + "ARCtrl.Contract": "1.0.1", + "ARCtrl.FileSystem": "1.0.1", + "ARCtrl.ISA": "1.0.1", + "ARCtrl.ISA.Json": "1.0.1", + "ARCtrl.ISA.Spreadsheet": "1.0.1", + "FSharp.Core": "6.0.7", + "Fable.Fetch": "2.6.0", + "Fable.SimpleHttp": "3.5.0" + } + }, "FSharp.Core": { "type": "Direct", "requested": "[8.0.100, )", @@ -17,21 +34,11 @@ "FSharp.Core": "6.0.7" } }, - "FsOboParser": { - "type": "Direct", - "requested": "[0.3.0, 0.3.0]", - "resolved": "0.3.0", - "contentHash": "vLXAWcQVwRTVZN0pBwujorxPRt3UTl3OFyBOR2a2SGEiyUPkzV2kdc1QbXynR4FxwmQyf0F1VxR0auS9qSxL3A==", - "dependencies": { - "FSharp.Core": "6.0.7", - "ISADotNet": "0.6.1" - } - }, "FsSpreadsheet": { "type": "Direct", - "requested": "[4.1.0, 4.1.0]", - "resolved": "4.1.0", - "contentHash": "le3gCV13dNtOYnIN5e8GblSFnwaAHeIKlNdMTi9Luh5CqfFUrNiTT2kwCaf3pC6blCwmOIJXgNGaH7TZdXCSTw==", + "requested": "[5.0.2, 5.0.2]", + "resolved": "5.0.2", + "contentHash": "3ttBXigOzJ0SaQru8YkH3sOxkObRKZ20QHE3pbbzebilB+7RzWk9aYCEroozXf3+4bL72R2bzwCphDUdhGRFqg==", "dependencies": { "FSharp.Core": "6.0.7", "Fable.Core": "4.0.0" @@ -39,13 +46,13 @@ }, "FsSpreadsheet.ExcelIO": { "type": "Direct", - "requested": "[4.1.0, 4.1.0]", - "resolved": "4.1.0", - "contentHash": "jcSb5sX3BU32bvt2wUt07fB4K4172dVp8beS/QTAL+ze84+3ur3SC+d++QYObQmHF9g8vmyGnni+c1wkAjtkWg==", + "requested": "[5.0.2, 5.0.2]", + "resolved": "5.0.2", + "contentHash": "xQtaTbXHx6gbpEF13w0dufMwayx2+KRP/BKdhlWsbhxTZkXOcO/CMXrk8rg9/en/G9RHMTa1lp0Ga7+iTsAGXg==", "dependencies": { "DocumentFormat.OpenXml": "[2.12.3]", "FSharp.Core": "6.0.7", - "FsSpreadsheet": "[4.0.0, 5.0.0)" + "FsSpreadsheet": "[5.0.0, 6.0.0)" } }, "Microsoft.SourceLink.GitHub": { @@ -67,6 +74,75 @@ "Microsoft.NETCore.Platforms": "1.1.0" } }, + "OBO.NET": { + "type": "Direct", + "requested": "[0.4.2, 0.4.2]", + "resolved": "0.4.2", + "contentHash": "NMa0/diYAbtSoBXKeutu6fXz1TNRdfQcMLWgY57BaAGzjbJgq/0lCJMlG8aaamGA0dUigZD2K1qG2myX5BiQLw==", + "dependencies": { + "ARCtrl.ISA": "1.0.0-beta.7", + "FSharp.Core": "7.0.401", + "FSharpAux": "[2.0.0]" + } + }, + "ARCtrl.Contract": { + "type": "Transitive", + "resolved": "1.0.1", + "contentHash": "tM44ZViozBmyhD/L0XJ/HPUXFqQ2BRk6FNIHfhv3Xo26oOKw0vbmggTemYjd+9HsjBBqV+0cOJrYhoZQkHqmBw==", + "dependencies": { + "ARCtrl.ISA": "1.0.1", + "FSharp.Core": "6.0.7" + } + }, + "ARCtrl.CWL": { + "type": "Transitive", + "resolved": "1.0.1", + "contentHash": "mLXMabcxKKEl7OKs8ikBZUokMHypld+3Cpa3um4gVOxPIFjAxCc08qLuEtxpeb/NS1t51ZwHMgeIcd6vPQTH/Q==", + "dependencies": { + "FSharp.Core": "6.0.7" + } + }, + "ARCtrl.FileSystem": { + "type": "Transitive", + "resolved": "1.0.1", + "contentHash": "4sya36b5oijVszp7w7nJOBEsTrS5wS1QrQ0pfQaRj9oA67I2Trud6ROHGQhOMYBdQfydcmoFpNSWITjohA1bfg==", + "dependencies": { + "FSharp.Core": "6.0.7", + "Fable.Core": "4.2.0" + } + }, + "ARCtrl.ISA": { + "type": "Transitive", + "resolved": "1.0.1", + "contentHash": "JNqAApJEoQclG4UDAcVViH4MfTkPV4AnrSbIREe4ULh5nfGrHlnFKqHeFjkCZVI8ee9P9shhkbCPKFyNzf599Q==", + "dependencies": { + "ARCtrl.FileSystem": "1.0.1", + "FSharp.Core": "6.0.7" + } + }, + "ARCtrl.ISA.Json": { + "type": "Transitive", + "resolved": "1.0.1", + "contentHash": "FUGA4RA39BBgWl4mmLv4mQHV81G0r7Ua6PLmM06vApORhbqwfqVV5cMtQHRGnlB+4m4PEmC5KJXM2Z1cwM822A==", + "dependencies": { + "ARCtrl.ISA": "1.0.1", + "FSharp.Core": "6.0.7", + "NJsonSchema": "10.8.0", + "Thoth.Json": "10.1.0", + "Thoth.Json.Net": "11.0.0" + } + }, + "ARCtrl.ISA.Spreadsheet": { + "type": "Transitive", + "resolved": "1.0.1", + "contentHash": "Hev33Td607esBbfluDzURSaSYnD6IdYCMGX9BSlcgbesxabUr7VZOZ4dmrVnAJzz3aBkwZ7Wt1Vv70cevdqivA==", + "dependencies": { + "ARCtrl.FileSystem": "1.0.1", + "ARCtrl.ISA": "1.0.1", + "FSharp.Core": "6.0.7", + "FsSpreadsheet": "5.0.1" + } + }, "DocumentFormat.OpenXml": { "type": "Transitive", "resolved": "2.12.3", @@ -75,37 +151,111 @@ "System.IO.Packaging": "4.7.0" } }, + "Fable.Browser.Blob": { + "type": "Transitive", + "resolved": "1.2.0", + "contentHash": "bM4zbtIeycTFFCH7o4WuN28W70dTxNTMZiMvR70XUTYrBnbz7GpS5XxzUy5caDB4l7s2l7wiuVDhh52t7NXxDg==", + "dependencies": { + "FSharp.Core": "4.7.2", + "Fable.Core": "3.0.0" + } + }, + "Fable.Browser.Dom": { + "type": "Transitive", + "resolved": "1.0.0", + "contentHash": "ZodpIKYuKnUnqN71Fi103mh0joFYrRPGwpYOrpbZ149PkVAW7DNKXgxad5lsi9df7vAe5+sBhhO71TPREZaWlw==", + "dependencies": { + "FSharp.Core": "4.5.2", + "Fable.Browser.Blob": "1.0.0", + "Fable.Browser.Event": "1.0.0", + "Fable.Browser.WebStorage": "1.0.0", + "Fable.Core": "3.0.0" + } + }, + "Fable.Browser.Event": { + "type": "Transitive", + "resolved": "1.5.0", + "contentHash": "Bx2AOOASIG1Eq1Pe8869H8baMePte6STmKGccGuOYMT2p6nWVS8G6ZBZb5encQ0tAL2/0vhA4KJOl4bYwUaQqg==", + "dependencies": { + "FSharp.Core": "4.7.2", + "Fable.Browser.Gamepad": "1.1.0", + "Fable.Core": "3.0.0" + } + }, + "Fable.Browser.Gamepad": { + "type": "Transitive", + "resolved": "1.1.0", + "contentHash": "8m/Ae/mrH2Hb2ue435rTPEeVb2FhfWsRJJLpCxMvk+5EUOO2+IIjIkLq4thUfRL98uQVt9V5cQd14h2aBf2XJA==", + "dependencies": { + "FSharp.Core": "4.7.2", + "Fable.Core": "3.0.0" + } + }, + "Fable.Browser.WebStorage": { + "type": "Transitive", + "resolved": "1.0.0", + "contentHash": "ZqnZKYkhPO+wmJPxQqtiwSc3zCC/mB37SPlVi4ZLiHoPFnra7SQ3qaRn4/ENYTeaVtVq92eVaYbTyAOnFn+GPA==", + "dependencies": { + "FSharp.Core": "4.5.2", + "Fable.Browser.Event": "1.0.0", + "Fable.Core": "3.0.0" + } + }, + "Fable.Browser.XMLHttpRequest": { + "type": "Transitive", + "resolved": "1.1.0", + "contentHash": "27p/F8781NrnV9vQ23RhX10ww9MDkX+Yi3yTiV9s8U8Bufi/VCCjS4swX0LXvgKQANN3k87CwaNeiO75r2U7gw==", + "dependencies": { + "FSharp.Core": "4.6.2", + "Fable.Browser.Blob": "1.1.0", + "Fable.Browser.Event": "1.0.0", + "Fable.Core": "3.0.0" + } + }, "Fable.Core": { "type": "Transitive", - "resolved": "4.0.0", - "contentHash": "crDLkyN4xADgnCqIneMUGVDo2oS92W4oufbIH/jwc8Fk7nIH6pS/Ds1rBB4R1jUfU1WM1k9GEPa4Up8cNj5VYw==" + "resolved": "4.2.0", + "contentHash": "ikacgHRLZpNVS33oBzl4uDHXZJDH660SaNPYCDGvFb7Hhm4WCXq0qTik8bpPJAhCnLDVrXLCAIoGKsKQ5pfx2A==" }, - "FSharp.SystemTextJson": { + "Fable.Fetch": { "type": "Transitive", - "resolved": "0.19.13", - "contentHash": "q6KzrtNR2d8w9UzMyII3w6Kz91i9gomhUPRqSxm9nzezd26uiAowDnv4J+QVhGPDkJ7oGW4bpbbWTYEZ/OF+Kw==", + "resolved": "2.6.0", + "contentHash": "zhCl95EYeuKcc7bk2jGHLSuLhkPqvRcrlwC91GqgX51BlQ7WJF2IQ7mUxW2n1mg74M1D2VOwEKqQpTAZDCVa8Q==", "dependencies": { - "FSharp.Core": "4.7.0", - "System.Text.Json": "6.0.0" + "FSharp.Core": "4.7.2", + "Fable.Browser.Blob": "1.2.0", + "Fable.Browser.Event": "1.5.0", + "Fable.Core": "3.7.1", + "Fable.Promise": "2.2.2" } }, - "ISADotNet": { + "Fable.Promise": { "type": "Transitive", - "resolved": "0.6.1", - "contentHash": "1uv4LRgqN1s8fVUMh7Lrlq43ajvOMAjuVvP2lsNDkMVgnjk2z4Jc0HzfZ5PcNvAwX+JyQ+QJQjFsUmurPeQNcg==", + "resolved": "2.2.2", + "contentHash": "yHFSo7GCY0l/Wjskh/HESuFoGzXIoRM22UlrARA5ewnX736Y1wM27kcqCWeGcIzaEsgJnZcDkp093M0gQyMcWA==", "dependencies": { - "FSharp.Core": "6.0.5", - "FSharp.SystemTextJson": "0.19.13", - "System.Text.Encodings.Web": "6.0.0", - "System.Text.Json": "6.0.5" + "FSharp.Core": "4.7.2", + "Fable.Core": "3.1.5" } }, - "Microsoft.Bcl.AsyncInterfaces": { + "Fable.SimpleHttp": { "type": "Transitive", - "resolved": "6.0.0", - "contentHash": "UcSjPsst+DfAdJGVDsu346FX0ci0ah+lw3WRtn18NUwEqRt70HaOQ7lI72vy3+1LxtqI3T5GWwV39rQSrCzAeg==", + "resolved": "3.5.0", + "contentHash": "SWYshvAI90JcdGLsUmTWBG9eaazY6ihdIk/uehrEz/VqMx9qX+e7+PzYaw31DMwGYSva9/mpq9s69T/z8Ubl5Q==", "dependencies": { - "System.Threading.Tasks.Extensions": "4.5.4" + "FSharp.Core": "4.6.2", + "Fable.Browser.Dom": "1.0.0", + "Fable.Browser.XMLHttpRequest": "1.1.0", + "Fable.Core": "3.0.0" + } + }, + "FSharpAux": { + "type": "Transitive", + "resolved": "2.0.0", + "contentHash": "2gFFDzIVheYO/glZmzyRaol/P60SWPzH7zM7QFZzfkR+rB3F0IdyVQpo3h9eOdsnOVTIYJhiywOaBAtsW0ZFzA==", + "dependencies": { + "FSharp.Core": "6.0.7", + "FSharpAux.Core": "2.0.0" } }, "Microsoft.Build.Tasks.Git": { @@ -113,76 +263,357 @@ "resolved": "1.1.1", "contentHash": "AT3HlgTjsqHnWpBHSNeR0KxbLZD7bztlZVj7I8vgeYG9SYqbeFGh0TM/KVtC6fg53nrWHl3VfZFvb5BiQFcY6Q==" }, + "Microsoft.CSharp": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "P+MBhIM0YX+JqROuf7i306ZLJEjQYA9uUyRDE+OqwUI5sh41e2ZbPQV3LfAPh+29cmceE1pUffXsGfR4eMY3KA==", + "dependencies": { + "System.Collections": "4.3.0", + "System.Diagnostics.Debug": "4.3.0", + "System.Dynamic.Runtime": "4.3.0", + "System.Globalization": "4.3.0", + "System.Linq": "4.3.0", + "System.Linq.Expressions": "4.3.0", + "System.ObjectModel": "4.3.0", + "System.Reflection": "4.3.0", + "System.Reflection.Extensions": "4.3.0", + "System.Reflection.Primitives": "4.3.0", + "System.Reflection.TypeExtensions": "4.3.0", + "System.Resources.ResourceManager": "4.3.0", + "System.Runtime": "4.3.0", + "System.Runtime.Extensions": "4.3.0", + "System.Runtime.InteropServices": "4.3.0", + "System.Threading": "4.3.0" + } + }, "Microsoft.NETCore.Platforms": { "type": "Transitive", "resolved": "1.1.0", "contentHash": "kz0PEW2lhqygehI/d6XsPCQzD7ff7gUJaVGPVETX611eadGsA3A877GdSlU0LRVMCTH/+P3o2iDTak+S08V2+A==" }, + "Microsoft.NETCore.Targets": { + "type": "Transitive", + "resolved": "1.1.0", + "contentHash": "aOZA3BWfz9RXjpzt0sRJJMjAscAUm3Hoa4UWAfceV9UTYxgwZ1lZt5nO2myFf+/jetYQo4uTP7zS8sJY67BBxg==" + }, "Microsoft.SourceLink.Common": { "type": "Transitive", "resolved": "1.1.1", "contentHash": "WMcGpWKrmJmzrNeuaEb23bEMnbtR/vLmvZtkAP5qWu7vQsY59GqfRJd65sFpBszbd2k/bQ8cs8eWawQKAabkVg==" }, - "System.Buffers": { + "Namotion.Reflection": { "type": "Transitive", - "resolved": "4.5.1", - "contentHash": "Rw7ijyl1qqRS0YQD/WycNst8hUUMgrMH4FCn1nNm27M4VxchZ1js3fVjQaANHO5f3sN4isvP4a+Met9Y4YomAg==" + "resolved": "2.1.0", + "contentHash": "9t63RauDp+CWzMCcCRAGXLRqEVIw0djYisGaDWhgHuXSaz/Djjpp9gpumCWVLpuDHLNf4HUmYWJeBt4AUyJSWA==", + "dependencies": { + "Microsoft.CSharp": "4.3.0" + } + }, + "Newtonsoft.Json": { + "type": "Transitive", + "resolved": "11.0.2", + "contentHash": "IvJe1pj7JHEsP8B8J8DwlMEx8UInrs/x+9oVY+oCD13jpLu4JbJU2WCIsMRn5C4yW9+DgkaO8uiVE5VHKjpmdQ==" + }, + "NJsonSchema": { + "type": "Transitive", + "resolved": "10.8.0", + "contentHash": "lChjsLWaxyvElh4WJjVhdIiCtx7rimYGFTxtSi2pAkZf0ZnKaXYIX484HCVyzbDDHejDZPgOrcfAJ3kqNSTONw==", + "dependencies": { + "Namotion.Reflection": "2.1.0", + "Newtonsoft.Json": "9.0.1" + } + }, + "System.Collections": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "3Dcj85/TBdVpL5Zr+gEEBUuFe2icOnLalmEh9hfck1PTYbbyWuZgh4fmm2ysCLTrqLQw6t3TgTyJ+VLp+Qb+Lw==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Runtime": "4.3.0" + } + }, + "System.Diagnostics.Debug": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "ZUhUOdqmaG5Jk3Xdb8xi5kIyQYAA4PnTNlHx1mu9ZY3qv4ELIdKbnL/akbGaKi2RnNUWaZsAs31rvzFdewTj2g==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Runtime": "4.3.0" + } + }, + "System.Dynamic.Runtime": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "SNVi1E/vfWUAs/WYKhE9+qlS6KqK0YVhnlT0HQtr8pMIA8YX3lwy3uPMownDwdYISBdmAF/2holEIldVp85Wag==", + "dependencies": { + "System.Collections": "4.3.0", + "System.Diagnostics.Debug": "4.3.0", + "System.Linq": "4.3.0", + "System.Linq.Expressions": "4.3.0", + "System.ObjectModel": "4.3.0", + "System.Reflection": "4.3.0", + "System.Reflection.Emit": "4.3.0", + "System.Reflection.Emit.ILGeneration": "4.3.0", + "System.Reflection.Primitives": "4.3.0", + "System.Reflection.TypeExtensions": "4.3.0", + "System.Resources.ResourceManager": "4.3.0", + "System.Runtime": "4.3.0", + "System.Runtime.Extensions": "4.3.0", + "System.Threading": "4.3.0" + } + }, + "System.Globalization": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "kYdVd2f2PAdFGblzFswE4hkNANJBKRmsfa2X5LG2AcWE1c7/4t0pYae1L8vfZ5xvE2nK/R9JprtToA61OSHWIg==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Runtime": "4.3.0" + } + }, + "System.IO": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "3qjaHvxQPDpSOYICjUoTsmoq5u6QJAFRUITgeT/4gqkF1bajbSmb1kwSxEA8AHlofqgcKJcM8udgieRNhaJ5Cg==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Runtime": "4.3.0", + "System.Text.Encoding": "4.3.0", + "System.Threading.Tasks": "4.3.0" + } }, "System.IO.Packaging": { "type": "Transitive", "resolved": "4.7.0", "contentHash": "9VV4KAbgRQZ79iEoG40KIeZy38O30oWwewScAST879+oki8g/Wa2HXZQgrhDDxQM4GkP1PnRJll05NMiVPbYAw==" }, - "System.Memory": { + "System.Linq": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "5DbqIUpsDp0dFftytzuMmc0oeMdQwjcP/EWxsksIz/w1TcFRkZ3yKKz0PqiYFMmEwPSWw+qNVqD7PJ889JzHbw==", + "dependencies": { + "System.Collections": "4.3.0", + "System.Diagnostics.Debug": "4.3.0", + "System.Resources.ResourceManager": "4.3.0", + "System.Runtime": "4.3.0", + "System.Runtime.Extensions": "4.3.0" + } + }, + "System.Linq.Expressions": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "PGKkrd2khG4CnlyJwxwwaWWiSiWFNBGlgXvJpeO0xCXrZ89ODrQ6tjEWS/kOqZ8GwEOUATtKtzp1eRgmYNfclg==", + "dependencies": { + "System.Collections": "4.3.0", + "System.Diagnostics.Debug": "4.3.0", + "System.Globalization": "4.3.0", + "System.IO": "4.3.0", + "System.Linq": "4.3.0", + "System.ObjectModel": "4.3.0", + "System.Reflection": "4.3.0", + "System.Reflection.Emit": "4.3.0", + "System.Reflection.Emit.ILGeneration": "4.3.0", + "System.Reflection.Emit.Lightweight": "4.3.0", + "System.Reflection.Extensions": "4.3.0", + "System.Reflection.Primitives": "4.3.0", + "System.Reflection.TypeExtensions": "4.3.0", + "System.Resources.ResourceManager": "4.3.0", + "System.Runtime": "4.3.0", + "System.Runtime.Extensions": "4.3.0", + "System.Threading": "4.3.0" + } + }, + "System.ObjectModel": { "type": "Transitive", - "resolved": "4.5.4", - "contentHash": "1MbJTHS1lZ4bS4FmsJjnuGJOu88ZzTT2rLvrhW7Ygic+pC0NWA+3hgAen0HRdsocuQXCkUTdFn9yHJJhsijDXw==", + "resolved": "4.3.0", + "contentHash": "bdX+80eKv9bN6K4N+d77OankKHGn6CH711a6fcOpMQu2Fckp/Ft4L/kW9WznHpyR0NRAvJutzOMHNNlBGvxQzQ==", "dependencies": { - "System.Buffers": "4.5.1", - "System.Numerics.Vectors": "4.4.0", - "System.Runtime.CompilerServices.Unsafe": "4.5.3" + "System.Collections": "4.3.0", + "System.Diagnostics.Debug": "4.3.0", + "System.Resources.ResourceManager": "4.3.0", + "System.Runtime": "4.3.0", + "System.Threading": "4.3.0" } }, - "System.Numerics.Vectors": { + "System.Reflection": { "type": "Transitive", - "resolved": "4.5.0", - "contentHash": "QQTlPTl06J/iiDbJCiepZ4H//BVraReU4O4EoRw1U02H5TLUIT7xn3GnDp9AXPSlJUDyFs4uWjWafNX6WrAojQ==" + "resolved": "4.3.0", + "contentHash": "KMiAFoW7MfJGa9nDFNcfu+FpEdiHpWgTcS2HdMpDvt9saK3y/G4GwprPyzqjFH9NTaGPQeWNHU+iDlDILj96aQ==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.IO": "4.3.0", + "System.Reflection.Primitives": "4.3.0", + "System.Runtime": "4.3.0" + } + }, + "System.Reflection.Emit": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "228FG0jLcIwTVJyz8CLFKueVqQK36ANazUManGaJHkO0icjiIypKW7YLWLIWahyIkdh5M7mV2dJepllLyA1SKg==", + "dependencies": { + "System.IO": "4.3.0", + "System.Reflection": "4.3.0", + "System.Reflection.Emit.ILGeneration": "4.3.0", + "System.Reflection.Primitives": "4.3.0", + "System.Runtime": "4.3.0" + } + }, + "System.Reflection.Emit.ILGeneration": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "59tBslAk9733NXLrUJrwNZEzbMAcu8k344OYo+wfSVygcgZ9lgBdGIzH/nrg3LYhXceynyvTc8t5/GD4Ri0/ng==", + "dependencies": { + "System.Reflection": "4.3.0", + "System.Reflection.Primitives": "4.3.0", + "System.Runtime": "4.3.0" + } }, - "System.Runtime.CompilerServices.Unsafe": { + "System.Reflection.Emit.Lightweight": { "type": "Transitive", - "resolved": "6.0.0", - "contentHash": "/iUeP3tq1S0XdNNoMz5C9twLSrM/TH+qElHkXWaPvuNOt+99G75NrV0OS2EqHx5wMN7popYjpc8oTjC1y16DLg==" + "resolved": "4.3.0", + "contentHash": "oadVHGSMsTmZsAF864QYN1t1QzZjIcuKU3l2S9cZOwDdDueNTrqq1yRj7koFfIGEnKpt6NjpL3rOzRhs4ryOgA==", + "dependencies": { + "System.Reflection": "4.3.0", + "System.Reflection.Emit.ILGeneration": "4.3.0", + "System.Reflection.Primitives": "4.3.0", + "System.Runtime": "4.3.0" + } + }, + "System.Reflection.Extensions": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "rJkrJD3kBI5B712aRu4DpSIiHRtr6QlfZSQsb0hYHrDCZORXCFjQfoipo2LaMUHoT9i1B7j7MnfaEKWDFmFQNQ==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Reflection": "4.3.0", + "System.Runtime": "4.3.0" + } + }, + "System.Reflection.Primitives": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "5RXItQz5As4xN2/YUDxdpsEkMhvw3e6aNveFXUn4Hl/udNTCNhnKp8lT9fnc3MhvGKh1baak5CovpuQUXHAlIA==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Runtime": "4.3.0" + } + }, + "System.Reflection.TypeExtensions": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "7u6ulLcZbyxB5Gq0nMkQttcdBTx57ibzw+4IOXEfR+sXYQoHvjW5LTLyNr8O22UIMrqYbchJQJnos4eooYzYJA==", + "dependencies": { + "System.Reflection": "4.3.0", + "System.Runtime": "4.3.0" + } + }, + "System.Resources.ResourceManager": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "/zrcPkkWdZmI4F92gL/TPumP98AVDu/Wxr3CSJGQQ+XN6wbRZcyfSKVoPo17ilb3iOr0cCRqJInGwNMolqhS8A==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Globalization": "4.3.0", + "System.Reflection": "4.3.0", + "System.Runtime": "4.3.0" + } + }, + "System.Runtime": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "JufQi0vPQ0xGnAczR13AUFglDyVYt4Kqnz1AZaiKZ5+GICq0/1MH/mO/eAJHt/mHW1zjKBJd7kV26SrxddAhiw==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0" + } + }, + "System.Runtime.Extensions": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "guW0uK0fn5fcJJ1tJVXYd7/1h5F+pea1r7FLSOz/f8vPEqbR2ZAknuRDvTQ8PzAilDveOxNjSfr0CHfIQfFk8g==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Runtime": "4.3.0" + } + }, + "System.Runtime.Handles": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "OKiSUN7DmTWeYb3l51A7EYaeNMnvxwE249YtZz7yooT4gOZhmTjIn48KgSsw2k2lYdLgTKNJw/ZIfSElwDRVgg==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Runtime": "4.3.0" + } + }, + "System.Runtime.InteropServices": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "uv1ynXqiMK8mp1GM3jDqPCFN66eJ5w5XNomaK2XD+TuCroNTLFGeZ+WCmBMcBDyTFKou3P6cR6J/QsaqDp7fGQ==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Reflection": "4.3.0", + "System.Reflection.Primitives": "4.3.0", + "System.Runtime": "4.3.0", + "System.Runtime.Handles": "4.3.0" + } + }, + "System.Text.Encoding": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "BiIg+KWaSDOITze6jGQynxg64naAPtqGHBwDrLaCtixsa5bKiR8dpPOHA7ge3C0JJQizJE+sfkz1wV+BAKAYZw==", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Runtime": "4.3.0" + } + }, + "System.Threading": { + "type": "Transitive", + "resolved": "4.3.0", + "contentHash": "VkUS0kOBcUf3Wwm0TSbrevDDZ6BlM+b/HRiapRFWjM5O0NS0LviG0glKmFK+hhPDd1XFeSdU1GmlLhb2CoVpIw==", + "dependencies": { + "System.Runtime": "4.3.0", + "System.Threading.Tasks": "4.3.0" + } }, - "System.Text.Encodings.Web": { + "System.Threading.Tasks": { "type": "Transitive", - "resolved": "6.0.0", - "contentHash": "Vg8eB5Tawm1IFqj4TVK1czJX89rhFxJo9ELqc/Eiq0eXy13RK00eubyU6TJE6y+GQXjyV5gSfiewDUZjQgSE0w==", + "resolved": "4.3.0", + "contentHash": "LbSxKEdOUhVe8BezB/9uOGGppt+nZf6e1VFyw6v3DN6lqitm0OSn2uXMOdtP0M3W4iMcqcivm2J6UgqiwwnXiA==", "dependencies": { - "System.Buffers": "4.5.1", - "System.Memory": "4.5.4", - "System.Runtime.CompilerServices.Unsafe": "6.0.0" + "Microsoft.NETCore.Platforms": "1.1.0", + "Microsoft.NETCore.Targets": "1.1.0", + "System.Runtime": "4.3.0" } }, - "System.Text.Json": { + "Thoth.Json": { "type": "Transitive", - "resolved": "6.0.5", - "contentHash": "SSH+YYrMpvLcy7Orzb5K1tSyffnFacWahyxCCjYH1PbSHdAF4dekmIetBurFKgtTHDmwEe/J2Csi/7niRH6d/g==", + "resolved": "10.1.0", + "contentHash": "2KpHPR0646KzD4WBlBgbLL4rgkoQ6yXKzqHCuSETlklb2dax/YzuLeUHc7axEBY9wHYiIMfpO0mtXmTu0XPh5w==", "dependencies": { - "Microsoft.Bcl.AsyncInterfaces": "6.0.0", - "System.Buffers": "4.5.1", - "System.Memory": "4.5.4", - "System.Numerics.Vectors": "4.5.0", - "System.Runtime.CompilerServices.Unsafe": "6.0.0", - "System.Text.Encodings.Web": "6.0.0", - "System.Threading.Tasks.Extensions": "4.5.4" + "FSharp.Core": "4.7.2", + "Fable.Core": "3.1.6" } }, - "System.Threading.Tasks.Extensions": { + "Thoth.Json.Net": { "type": "Transitive", - "resolved": "4.5.4", - "contentHash": "zteT+G8xuGu6mS+mzDzYXbzS7rd3K6Fjb9RiZlYlJPam2/hU7JCBZBVEcywNuR+oZ1ncTvc/cq0faRr3P01OVg==", + "resolved": "11.0.0", + "contentHash": "ugheFKMHRO3ReobCENha5J6uexPrp+Bn2d+WEcFbXaA77sNBWtTlx2StB+7lX8prMqdvO5uqlPeHlg+9dSpkNg==", "dependencies": { - "System.Runtime.CompilerServices.Unsafe": "4.5.3" + "FSharp.Core": "4.7.2", + "Fable.Core": "3.1.6", + "Newtonsoft.Json": "11.0.2" } }, "controlledvocabulary": { diff --git a/src/ARCTokenization/structural_ontologies/APGSO.fs b/src/ARCTokenization/structural_ontologies/APGSO.fs new file mode 100644 index 0000000..06702a7 --- /dev/null +++ b/src/ARCTokenization/structural_ontologies/APGSO.fs @@ -0,0 +1,42 @@ +// This file should eventually be auto-generated from the respective obo files, to have a safe way of updating it from the same source. +// For now, it is manually created and updated. It is not complete, just a collectAFSOn of terms needed for baseline WIP validatAFSOn + +namespace ARCTokenization.StructuralOntology + +open ControlledVocabulary + +module APGSO = + + let FreeText = CvTerm.create(accession = "APGSO:00000022", name = "FreeText", ref = "APGSO") + + module ``Process Graph Header`` = + + let key = CvTerm.create(accession = "APGSO:00000001", name = "Process Graph Header", ref = "APGSO") + + let Characteristic = CvTerm.create(accession = "APGSO:00000002", name = "Characteristic", ref = "APGSO") + let Factor = CvTerm.create(accession = "APGSO:00000003", name = "Factor", ref = "APGSO") + let Parameter = CvTerm.create(accession = "APGSO:00000004", name = "Parameter", ref = "APGSO") + let Component = CvTerm.create(accession = "APGSO:00000005", name = "Component", ref = "APGSO") + let ProtocolType = CvTerm.create(accession = "APGSO:00000006", name = "ProtocolType", ref = "APGSO") + let ProtocolDescription = CvTerm.create(accession = "APGSO:00000007", name = "ProtocolDescription", ref = "APGSO") + let ProtocolUri = CvTerm.create(accession = "APGSO:00000008", name = "ProtocolUri", ref = "APGSO") + let ProtocolVersion = CvTerm.create(accession = "APGSO:00000009", name = "ProtocolVersion", ref = "APGSO") + let ProtocolREF = CvTerm.create(accession = "APGSO:00000010", name = "ProtocolREF", ref = "APGSO") + let Performer = CvTerm.create(accession = "APGSO:00000011", name = "Performer", ref = "APGSO") + let Date = CvTerm.create(accession = "APGSO:00000012", name = "Date", ref = "APGSO") + let Input = CvTerm.create(accession = "APGSO:00000013", name = "Input", ref = "APGSO") + let Output = CvTerm.create(accession = "APGSO:00000014", name = "Output", ref = "APGSO") + + module IOType = + + let key = CvTerm.create(accession = "APGSO:00000016", name = "IOType", ref = "APGSO") + + let Source = CvTerm.create(accession = "APGSO:00000016", name = "Source", ref = "APGSO") + let Sample = CvTerm.create(accession = "APGSO:00000017", name = "Sample", ref = "APGSO") + let RawDataFile = CvTerm.create(accession = "APGSO:00000018", name = "RawDataFile", ref = "APGSO") + let DerivedDataFile = CvTerm.create(accession = "APGSO:00000019", name = "DerivedDataFile", ref = "APGSO") + let ImageFile = CvTerm.create(accession = "APGSO:00000020", name = "ImageFile", ref = "APGSO") + let Material = CvTerm.create(accession = "APGSO:00000021", name = "Material", ref = "APGSO") + + + diff --git a/src/ARCTokenization/structural_ontologies/arc_file_structure_ontology.obo b/src/ARCTokenization/structural_ontologies/arc_file_structure_ontology.obo index ea5c0e9..704337a 100644 --- a/src/ARCTokenization/structural_ontologies/arc_file_structure_ontology.obo +++ b/src/ARCTokenization/structural_ontologies/arc_file_structure_ontology.obo @@ -1,4 +1,4 @@ -!This file was auto generated on 1/8/2024. Do not edit it. All manual changes will be overwritten by the next generator run eventually. +!This file was auto generated on 2024-01-09. Do not edit it. All manual changes will be overwritten by the next generator run eventually. format-version: 1.2 data-version: init/2023-10-26 saved-by: Kevin Schneider diff --git a/src/ARCTokenization/structural_ontologies/arc_process_graph_structural_ontology.obo b/src/ARCTokenization/structural_ontologies/arc_process_graph_structural_ontology.obo new file mode 100644 index 0000000..c0b95b9 --- /dev/null +++ b/src/ARCTokenization/structural_ontologies/arc_process_graph_structural_ontology.obo @@ -0,0 +1,129 @@ +!This file was auto generated on 2024-01-09. Do not edit it. All manual changes will be overwritten by the next generator run eventually. +format-version: 1.2 +data-version: init/2024-01-09 +saved-by: Kevin Schneider +default-namespace: apgso +ontology: APGSO + +[Term] +id: APGSO:00000001 +name: Process Graph Header +def: "" + +[Term] +id: APGSO:00000002 +name: Characteristic +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000003 +name: Factor +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000004 +name: Parameter +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000005 +name: Component +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000006 +name: ProtocolType +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000007 +name: ProtocolDescription +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000008 +name: ProtocolUri +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000009 +name: ProtocolVersion +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000010 +name: ProtocolREF +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000011 +name: Performer +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000012 +name: Date +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000013 +name: Input +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000014 +name: Output +def: "" +relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000015 +name: IOType +def: "" + +[Term] +id: APGSO:00000016 +name: Source +relationship: is_a APGSO:00000014 ! IOType + +[Term] +id: APGSO:00000017 +name: Sample +relationship: is_a APGSO:00000014 ! IOType + +[Term] +id: APGSO:00000018 +name: RawDataFile +relationship: is_a APGSO:00000014 ! IOType + +[Term] +id: APGSO:00000019 +name: DerivedDataFile +relationship: is_a APGSO:00000014 ! IOType + +[Term] +id: APGSO:00000020 +name: ImageFile +relationship: is_a APGSO:00000014 ! IOType + +[Term] +id: APGSO:00000021 +name: Material +relationship: is_a APGSO:00000014 ! IOType + +[Term] +id: APGSO:00000022 +name: FreeText +def: "" diff --git a/src/ARCTokenization/structural_ontologies/arc_process_graph_structural_ontology.yml b/src/ARCTokenization/structural_ontologies/arc_process_graph_structural_ontology.yml new file mode 100644 index 0000000..83e7891 --- /dev/null +++ b/src/ARCTokenization/structural_ontologies/arc_process_graph_structural_ontology.yml @@ -0,0 +1,128 @@ +format-version: 1.2 +data-version: init/2024-01-09 +saved-by: Kevin Schneider +default-namespace: apgso +ontology: APGSO + +[Term] +id: APGSO:00000001 +name: Process Graph Header +def: "" + + [Term] + id: APGSO:00000002 + name: Characteristic + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000003 + name: Factor + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000004 + name: Parameter + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000005 + name: Component + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000006 + name: ProtocolType + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000007 + name: ProtocolDescription + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000008 + name: ProtocolUri + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000009 + name: ProtocolVersion + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000010 + name: ProtocolREF + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000011 + name: Performer + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000012 + name: Date + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000013 + name: Input + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + + [Term] + id: APGSO:00000014 + name: Output + def: "" + relationship: is_a APGSO:00000001 ! Process Graph Header + +[Term] +id: APGSO:00000015 +name: IOType +def: "" + + [Term] + id: APGSO:00000016 + name: Source + relationship: is_a APGSO:00000014 ! IOType + + [Term] + id: APGSO:00000017 + name: Sample + relationship: is_a APGSO:00000014 ! IOType + + [Term] + id: APGSO:00000018 + name: RawDataFile + relationship: is_a APGSO:00000014 ! IOType + + [Term] + id: APGSO:00000019 + name: DerivedDataFile + relationship: is_a APGSO:00000014 ! IOType + + [Term] + id: APGSO:00000020 + name: ImageFile + relationship: is_a APGSO:00000014 ! IOType + + [Term] + id: APGSO:00000021 + name: Material + relationship: is_a APGSO:00000014 ! IOType + +[Term] +id: APGSO:00000022 +name: FreeText +def: "" \ No newline at end of file diff --git a/src/ARCTokenization/structural_ontologies/assay_metadata_structural_ontology.obo b/src/ARCTokenization/structural_ontologies/assay_metadata_structural_ontology.obo index 0cfeedb..3fb2b41 100644 --- a/src/ARCTokenization/structural_ontologies/assay_metadata_structural_ontology.obo +++ b/src/ARCTokenization/structural_ontologies/assay_metadata_structural_ontology.obo @@ -1,4 +1,4 @@ -!This file was auto generated on 1/8/2024. Do not edit it. All manual changes will be overwritten by the next generator run eventually. +!This file was auto generated on 2024-01-09. Do not edit it. All manual changes will be overwritten by the next generator run eventually. format-version: 1.2 data-version: init/2023-07-27 saved-by: Kevin Schneider diff --git a/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo b/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo index e2f5848..dac71f7 100644 --- a/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo +++ b/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo @@ -1,4 +1,4 @@ -!This file was auto generated on 1/8/2024. Do not edit it. All manual changes will be overwritten by the next generator run eventually. +!This file was auto generated on 2024-01-09. Do not edit it. All manual changes will be overwritten by the next generator run eventually. format-version: 1.2 data-version: init/2023-07-20 saved-by: Kevin Schneider diff --git a/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo b/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo index ed62060..4b7c425 100644 --- a/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo +++ b/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo @@ -1,4 +1,4 @@ -!This file was auto generated on 1/8/2024. Do not edit it. All manual changes will be overwritten by the next generator run eventually. +!This file was auto generated on 2024-01-09. Do not edit it. All manual changes will be overwritten by the next generator run eventually. format-version: 1.2 data-version: init/2023-07-27 saved-by: Kevin Schneider diff --git a/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj b/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj index 5725122..bd72fdb 100644 --- a/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj +++ b/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 false false @@ -12,27 +12,29 @@ - + + + + - + - + - - - - + + + runtime; build; native; contentfiles; analyzers; buildtransitive all - + runtime; build; native; contentfiles; analyzers; buildtransitive all diff --git a/tests/ARCTokenization.Tests/Fixtures/correct/study_with_source_characteristics_sample.xlsx b/tests/ARCTokenization.Tests/Fixtures/correct/study_with_source_characteristics_sample.xlsx new file mode 100644 index 0000000..35c4d4d Binary files /dev/null and b/tests/ARCTokenization.Tests/Fixtures/correct/study_with_source_characteristics_sample.xlsx differ diff --git a/tests/ARCTokenization.Tests/IntegrationTests/AssayAnnotationTable.fs b/tests/ARCTokenization.Tests/IntegrationTests/AssayAnnotationTable.fs deleted file mode 100644 index d3b444f..0000000 --- a/tests/ARCTokenization.Tests/IntegrationTests/AssayAnnotationTable.fs +++ /dev/null @@ -1,130 +0,0 @@ -namespace IntegrationTests - -open ControlledVocabulary -open FsSpreadsheet -open FsSpreadsheet.ExcelIO -open ARCTokenization -open Xunit - -open TestUtils -open TestObjects.Integration - -module Assay = - module Correct = - - module ``Assay with only source and sample column`` = - - let assay = AssayAnnotationTable.Correct.``assay with only source and sample column`` - - [] - let ``AnnotationTable count`` () = - Assert.Equal(assay.Length, 1) - - let table = assay.[0] |> snd - - [] - let ``IOColumns count`` () = - TokenizedAnnotationTable.hasIOColumnAmount 2 table - - [] - let ``TermRelatedBuildingBlocks count`` () = - TokenizedAnnotationTable.hasTermRelatedBuildingBlockAmount 0 table - - let expectedIOColumns = - [ - [ - CvParam( - id = "(n/a)", - name = "Source Name", - ref = "(n/a)", - pv = (ParamValue.Value "Source A"), - attributes = [] - ) - ] - [ - CvParam( - id = "(n/a)", - name = "Sample Name", - ref = "(n/a)", - pv = (ParamValue.Value "Sample A"), - attributes = [] - ) - ] - ] - - let expectedTermRelatedBuildingBlocks: CvParam list list = [] - - [] - let ``IOColumns CvParams`` () = - table - |> TokenizedAnnotationTable.IOColumnsEqual expectedIOColumns - - - [] - let ``TermRelatedBuildingBlocks CvParams`` () = - table - |> TokenizedAnnotationTable.termRelatedBuildingBlocksEqual expectedTermRelatedBuildingBlocks - - - module ``Assay with single characteristics`` = - - let assay = AssayAnnotationTable.Correct.``assay with single characteristics`` - - [] - let ``AnnotationTable count`` () = - Assert.Equal(assay.Length, 1) - - let table = assay.[0] |> snd - - [] - let ``IOColumns count`` () = - TokenizedAnnotationTable.hasIOColumnAmount 2 table - - [] - let ``TermRelatedBuildingBlocks count`` () = - TokenizedAnnotationTable.hasTermRelatedBuildingBlockAmount 1 table - - let expectedIOColumns = - [ - [ - CvParam( - id = "(n/a)", - name = "Source Name", - ref = "(n/a)", - pv = (ParamValue.Value "Source A"), - attributes = [] - ) - ] - [ - CvParam( - id = "(n/a)", - name = "Sample Name", - ref = "(n/a)", - pv = (ParamValue.Value "Sample A"), - attributes = [] - ) - ] - ] - - let expectedTermRelatedBuildingBlocks = - [ - [ - CvParam( - id = "Term Accession Number (OBI:0100026)", - name = "Characteristic [organism]", - ref = "Term Source REF (OBI:0100026)", - pv = (ParamValue.CvValue (CvTerm.create(accession = "http://purl.obolibrary.org/obo/NCBITaxon_3702", name = "Arabidopsis thaliana", ref = "NCBITaxon"))), - attributes = [] - ) - ] - ] - - [] - let ``IOColumns CvParams`` () = - table - |> TokenizedAnnotationTable.IOColumnsEqual expectedIOColumns - - [] - let ``TermRelatedBuildingBlocks CvParams`` () = - table - |> TokenizedAnnotationTable.termRelatedBuildingBlocksEqual expectedTermRelatedBuildingBlocks \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/IntegrationTests/AssayProcessGraph.fs b/tests/ARCTokenization.Tests/IntegrationTests/AssayProcessGraph.fs new file mode 100644 index 0000000..229b395 --- /dev/null +++ b/tests/ARCTokenization.Tests/IntegrationTests/AssayProcessGraph.fs @@ -0,0 +1,4 @@ +namespace IntegrationTests + +module AssayProcessGraph = () + diff --git a/tests/ARCTokenization.Tests/IntegrationTests/StudyAnnotationTable.fs b/tests/ARCTokenization.Tests/IntegrationTests/StudyAnnotationTable.fs deleted file mode 100644 index 58b34cb..0000000 --- a/tests/ARCTokenization.Tests/IntegrationTests/StudyAnnotationTable.fs +++ /dev/null @@ -1,2 +0,0 @@ -module StudyAnnotationTable - diff --git a/tests/ARCTokenization.Tests/IntegrationTests/StudyProcessgraph.fs b/tests/ARCTokenization.Tests/IntegrationTests/StudyProcessgraph.fs new file mode 100644 index 0000000..9dac078 --- /dev/null +++ b/tests/ARCTokenization.Tests/IntegrationTests/StudyProcessgraph.fs @@ -0,0 +1,61 @@ +namespace IntegrationTests + +module StudyProcessGraph = + + open ControlledVocabulary + open FsSpreadsheet + open FsSpreadsheet.ExcelIO + open ARCTokenization + open Xunit + + open TestUtils + + let parsedStudyProcessGraphSimple = Study.parseProcessGraphColumnsFromFile "Fixtures/correct/study_with_source_characteristics_sample.xlsx" + + let allExpectedProcessGraphTerms = + Map ([ + "process_sheet_1", + [ + // Input [Source Name] + [ + CvParam(CvTerm.create("APGSO:00000013", "Input", "APGSO"), ParamValue.CvValue(CvTerm.create("APGSO:00000016", "Source", "APGSO"))) + CvParam(CvTerm.create("APGSO:00000013", "Input", "APGSO"), ParamValue.Value "Source_1") + CvParam(CvTerm.create("APGSO:00000013", "Input", "APGSO"), ParamValue.Value "Source_1") + ] + // Characteristic [organism] | Term Source REF (OBI:0100026) | Term Accession Number (OBI:0100026) + [ + CvParam(CvTerm.create("APGSO:00000002", "Characteristic", "APGSO"), ParamValue.CvValue(CvTerm.create("OBI:0100026","organism","OBI"))) + CvParam(CvTerm.create("APGSO:00000002", "Characteristic", "APGSO"), ParamValue.CvValue(CvTerm.create("http://purl.obolibrary.org/obo/NCBITaxon_3702","Arabidopsis thaliana","NCBITaxon"))) + CvParam(CvTerm.create("APGSO:00000002", "Characteristic", "APGSO"), ParamValue.CvValue(CvTerm.create("http://purl.obolibrary.org/obo/NCBITaxon_3702","Arabidopsis thaliana","NCBITaxon"))) + ] + // Output [Sample Name] + [ + CvParam(CvTerm.create("APGSO:00000014", "Output", "APGSO"), ParamValue.CvValue(CvTerm.create("APGSO:00000017", "Sample", "APGSO"))) + CvParam(CvTerm.create("APGSO:00000014", "Output", "APGSO"), ParamValue.Value "Sample_1") + CvParam(CvTerm.create("APGSO:00000014", "Output", "APGSO"), ParamValue.Value "Sample_2") + ] + ] + ]) + + [] + let ``Simple study process graph has corrrect sheet keys`` () = + Assert.Equal>( + (allExpectedProcessGraphTerms |> Map.keys), + (parsedStudyProcessGraphSimple |> Map.keys) + ) + + [] + let ``Simple study process graph is tokenized correctly`` () = + Assert.All( + List.zip + allExpectedProcessGraphTerms["process_sheet_1"] + parsedStudyProcessGraphSimple["process_sheet_1"] + , + (fun (expected,actual) -> + Assert.All( + List.zip expected actual, + (fun (expected,actual) -> Param.typedStructuralEquality expected actual) + ) + ) + ) + diff --git a/tests/ARCTokenization.Tests/MockAPITests/MockProcessGraphTokens.fs b/tests/ARCTokenization.Tests/MockAPITests/MockProcessGraphTokens.fs new file mode 100644 index 0000000..ec461df --- /dev/null +++ b/tests/ARCTokenization.Tests/MockAPITests/MockProcessGraphTokens.fs @@ -0,0 +1,54 @@ +namespace MockAPITests + +open ARCTokenization +open Xunit +open TestUtils + +module MockProcessGraphTokens = + + module ProcessGraphColumn = + + [] + let ``Input column mock tokens are correct`` () = + let expected = ReferenceObjects.MockAPI.ProcessGraphTokens.referenceInputColumn + let actual = TestObjects.MockAPI.ProcessGraphTokens.inputColumn + Assert.All( + + List.zip expected actual, + (fun (expected,actual) -> Param.typedStructuralEquality (expected) (actual)) + ) + + [] + let ``Characteristics column mock tokens are correct`` () = + let expected = ReferenceObjects.MockAPI.ProcessGraphTokens.referenceCharacteristicsColumn + let actual = TestObjects.MockAPI.ProcessGraphTokens.characteristicsColumn + Assert.All( + + List.zip expected actual, + (fun (expected,actual) -> Param.typedStructuralEquality (expected) (actual)) + ) + + [] + let ``Output column mock tokens are correct`` () = + let expected = ReferenceObjects.MockAPI.ProcessGraphTokens.referenceOutputColumn + let actual = TestObjects.MockAPI.ProcessGraphTokens.outputColumn + Assert.All( + + List.zip expected actual, + (fun (expected,actual) -> Param.typedStructuralEquality (expected) (actual)) + ) + + module ProcessGraph = + + [] + let ``Simple study process graph mock tokens are correct`` () = + let expected = ReferenceObjects.MockAPI.ProcessGraph.referenceStudyProcessGraphTable + let actual = TestObjects.MockAPI.ProcessGraphTokens.simpleStudy + Assert.All( + List.zip expected actual, + (fun (expected,actual) -> + Assert.All( + List.zip expected actual, + (fun (expected,actual) -> Param.typedStructuralEquality (expected) (actual))) + ) + ) diff --git a/tests/ARCTokenization.Tests/ReferenceObjects.fs b/tests/ARCTokenization.Tests/ReferenceObjects.fs index 100a413..8808c17 100644 --- a/tests/ARCTokenization.Tests/ReferenceObjects.fs +++ b/tests/ARCTokenization.Tests/ReferenceObjects.fs @@ -175,7 +175,6 @@ module Terms = CvTerm.create("STDMSO:00000060", "Study Person Roles Term Accession Number", "STDMSO") CvTerm.create("STDMSO:00000061", "Study Person Roles Term Source REF", "STDMSO") ] - module AssayMetadata = @@ -208,8 +207,32 @@ module Terms = ] - - + module ProcessGraph = + + let referenceOntologyName = "APGSO" + + let expectedNonObsoleteNonRootTerms = [ + CvTerm.create(accession = "APGSO:00000002", name = "Characteristic", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000003", name = "Factor", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000004", name = "Parameter", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000005", name = "Component", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000006", name = "ProtocolType", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000007", name = "ProtocolDescription", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000008", name = "ProtocolUri", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000009", name = "ProtocolVersion", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000010", name = "ProtocolREF", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000011", name = "Performer", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000012", name = "Date", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000013", name = "Input", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000014", name = "Output", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000016", name = "Source", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000017", name = "Sample", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000018", name = "RawDataFile", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000019", name = "DerivedDataFile", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000020", name = "ImageFile", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000021", name = "Material", ref = "APGSO") + CvTerm.create(accession = "APGSO:00000022", name = "FreeText", ref = "APGSO") + ] module Tokenization = @@ -425,6 +448,10 @@ module Tokenization = ) |> List.sortBy (fun cvp -> cvp.Value |> ParamValue.getValueAsString) + module ARCtrl = + () + + module MockAPI = module InvestigationMetadataTokens = @@ -449,4 +476,33 @@ module MockAPI = let empty = Terms.AssayMetadata.nonObsoleteNonRootCvTerms |> List.filter (fun t -> not (t.Name.StartsWith("Comment")) ) // ignore all comments - |> List.map (fun cvTerm -> CvParam(cvTerm, ParamValue.CvValue Terms.StructuralTerms.metadataSectionKey, [])) \ No newline at end of file + |> List.map (fun cvTerm -> CvParam(cvTerm, ParamValue.CvValue Terms.StructuralTerms.metadataSectionKey, [])) + + module ProcessGraphTokens = + + let referenceInputColumn = [ + CvParam(CvTerm.create("APGSO:00000013", "Input", "APGSO"), ParamValue.CvValue(CvTerm.create("APGSO:00000016", "Source", "APGSO"))) + CvParam(CvTerm.create("APGSO:00000013", "Input", "APGSO"), ParamValue.Value "Source_1") + CvParam(CvTerm.create("APGSO:00000013", "Input", "APGSO"), ParamValue.Value "Source_1") + ] + + let referenceCharacteristicsColumn = [ + CvParam(CvTerm.create("APGSO:00000002", "Characteristic", "APGSO"), ParamValue.CvValue(CvTerm.create("OBI:0100026","organism","OBI"))) + CvParam(CvTerm.create("APGSO:00000002", "Characteristic", "APGSO"), ParamValue.CvValue(CvTerm.create("http://purl.obolibrary.org/obo/NCBITaxon_3702","Arabidopsis thaliana","NCBITaxon"))) + CvParam(CvTerm.create("APGSO:00000002", "Characteristic", "APGSO"), ParamValue.CvValue(CvTerm.create("http://purl.obolibrary.org/obo/NCBITaxon_3702","Arabidopsis thaliana","NCBITaxon"))) + ] + + let referenceOutputColumn = [ + CvParam(CvTerm.create("APGSO:00000014", "Output", "APGSO"), ParamValue.CvValue(CvTerm.create("APGSO:00000017", "Sample", "APGSO"))) + CvParam(CvTerm.create("APGSO:00000014", "Output", "APGSO"), ParamValue.Value "Sample_1") + CvParam(CvTerm.create("APGSO:00000014", "Output", "APGSO"), ParamValue.Value "Sample_2") + ] + + module ProcessGraph = + + let referenceStudyProcessGraphTable = + [ + ProcessGraphTokens.referenceInputColumn + ProcessGraphTokens.referenceCharacteristicsColumn + ProcessGraphTokens.referenceOutputColumn + ] \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/StructuralOntologyTests.fs b/tests/ARCTokenization.Tests/StructuralOntologyTests.fs index fb7b85b..dcdd862 100644 --- a/tests/ARCTokenization.Tests/StructuralOntologyTests.fs +++ b/tests/ARCTokenization.Tests/StructuralOntologyTests.fs @@ -1,6 +1,6 @@ namespace StructuralOntologyTests -open FsOboParser +open OBO.NET open ARCTokenization open ARCTokenization.Terms open Xunit @@ -210,4 +210,44 @@ module AssayMetadata = && (actual.RefUri = expected.RefUri) ) ) + ) + +module ProcessGraph = + + [] + let ``no duplicate term ids`` () = + let expected = [1 .. ProcessGraph.ontology.Terms.Length] + let actual = + ProcessGraph.ontology.Terms + |> List.map (fun t -> + t.Id.Replace("APGSO:","") |> int + ) + |> List.sort + Assert.All( + List.zip expected actual, + (fun (e,a) -> Assert.Equal(e,a)) + ) + + [] + let ``all ontology names correct`` () = + Assert.All( + ProcessGraph.cvTerms, + (fun t -> Assert.True(t.RefUri = ReferenceObjects.Terms.ProcessGraph.referenceOntologyName)) + ) + + [] + let ``all non root non obsolete CvTerms are correct`` () = + Assert.All( + ( + List.zip + ProcessGraph.nonObsoleteNonRootCvTerms + ReferenceObjects.Terms.ProcessGraph.expectedNonObsoleteNonRootTerms + ), + (fun (actual, expected) -> + Assert.True( + (actual.Name = expected.Name) + && (actual.Accession = expected.Accession) + && (actual.RefUri = expected.RefUri) + ) + ) ) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/TestObjects.fs b/tests/ARCTokenization.Tests/TestObjects.fs index 9b3a141..4c6500f 100644 --- a/tests/ARCTokenization.Tests/TestObjects.fs +++ b/tests/ARCTokenization.Tests/TestObjects.fs @@ -2,24 +2,8 @@ open ARCTokenization open FsSpreadsheet - -module Tokenization = - - module InvestigationMetadata = - - () - -module Integration = - - module AssayAnnotationTable = - - module Correct = - - let ``assay with only source and sample column`` = - Assay.parseAnnotationTablesFromFile "Fixtures/correct/assay_with_only_source_and_sample_column.xlsx" - - let ``assay with single characteristics`` = - Assay.parseAnnotationTablesFromFile "Fixtures/correct/assay_with_single_characteristics.xlsx" +open ARCtrl +open ARCtrl.ISA module MockAPI = @@ -42,4 +26,53 @@ module MockAPI = // equivalent to a metadatasheet with only the first column that contains metadata section keys let empty = ARCMock.AssayMetadataTokens() - |> List.concat // use flat list \ No newline at end of file + |> List.concat // use flat list + + module ProcessGraphTokens = + + let inputColumn = + ARCMock.ProcessGraphColumn( + header = CompositeHeader.Input IOType.Source, + cells = [ + CompositeCell.FreeText "Source_1" + CompositeCell.FreeText "Source_1" + ] + ) + + let characteristicsColumn = + ARCMock.ProcessGraphColumn( + header = CompositeHeader.Characteristic (OntologyAnnotation.create(TermAccessionNumber = "OBI:0100026", Name = AnnotationValue.Text "organism", TermSourceREF = "OBI")), + cells = [ + CompositeCell.Term (OntologyAnnotation.create(TermAccessionNumber = "http://purl.obolibrary.org/obo/NCBITaxon_3702", Name = AnnotationValue.Text "Arabidopsis thaliana", TermSourceREF = "NCBITaxon")) + CompositeCell.Term (OntologyAnnotation.create(TermAccessionNumber = "http://purl.obolibrary.org/obo/NCBITaxon_3702", Name = AnnotationValue.Text "Arabidopsis thaliana", TermSourceREF = "NCBITaxon")) + ] + ) + + let outputColumn = + ARCMock.ProcessGraphColumn( + header = CompositeHeader.Output IOType.Sample, + cells = [ + CompositeCell.FreeText "Sample_1" + CompositeCell.FreeText "Sample_2" + ] + ) + + let simpleStudy = + ARCMock.ProcessGraph( + [ + CompositeHeader.Input IOType.Source, [ + CompositeCell.FreeText "Source_1" + CompositeCell.FreeText "Source_1" + ] + + CompositeHeader.Characteristic (OntologyAnnotation.create(TermAccessionNumber = "OBI:0100026", Name = AnnotationValue.Text "organism", TermSourceREF = "OBI")), [ + CompositeCell.Term (OntologyAnnotation.create(TermAccessionNumber = "http://purl.obolibrary.org/obo/NCBITaxon_3702", Name = AnnotationValue.Text "Arabidopsis thaliana", TermSourceREF = "NCBITaxon")) + CompositeCell.Term (OntologyAnnotation.create(TermAccessionNumber = "http://purl.obolibrary.org/obo/NCBITaxon_3702", Name = AnnotationValue.Text "Arabidopsis thaliana", TermSourceREF = "NCBITaxon")) + ] + + CompositeHeader.Output IOType.Sample, [ + CompositeCell.FreeText "Sample_1" + CompositeCell.FreeText "Sample_2" + ] + ] + ) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/TestUtils.fs b/tests/ARCTokenization.Tests/TestUtils.fs index c1c6857..e940b6b 100644 --- a/tests/ARCTokenization.Tests/TestUtils.fs +++ b/tests/ARCTokenization.Tests/TestUtils.fs @@ -3,13 +3,12 @@ open Xunit open ControlledVocabulary open ARCTokenization -open ARCTokenization.AnnotationTable module CvParam = - let termNamesEqual (cvpExpectec : CvParam) (cvpActual : CvParam) = + let termNamesEqual (cvpExpected : CvParam) (cvpActual : CvParam) = Assert.Equal( - (CvBase.getCvName cvpExpectec), + (CvBase.getCvName cvpExpected), (CvBase.getCvName cvpActual) ) @@ -19,9 +18,9 @@ module CvParam = (CvBase.getCvName cvpActual) ) - let accessionsEqual (cvpExpectec : CvParam) (cvpActual : CvParam) = + let accessionsEqual (cvpExpected : CvParam) (cvpActual : CvParam) = Assert.Equal( - (CvBase.getCvAccession cvpExpectec), + (CvBase.getCvAccession cvpExpected), (CvBase.getCvAccession cvpActual) ) @@ -31,9 +30,9 @@ module CvParam = (CvBase.getCvAccession cvpActual) ) - let refUrisEqual (cvpExpectec : CvParam) (cvpActual : CvParam) = + let refUrisEqual (cvpExpected : CvParam) (cvpActual : CvParam) = Assert.Equal( - (CvBase.getCvRef cvpExpectec), + (CvBase.getCvRef cvpExpected), (CvBase.getCvRef cvpActual) ) @@ -43,9 +42,9 @@ module CvParam = (CvBase.getCvRef cvpActual) ) - let valuesEqual (cvpExpectec : CvParam) (cvpActual : CvParam) = + let valuesEqual (cvpExpected : CvParam) (cvpActual : CvParam) = Assert.Equal( - (Param.getParamValue cvpExpectec), + (Param.getParamValue cvpExpected), (Param.getParamValue cvpActual) ) @@ -55,48 +54,140 @@ module CvParam = (Param.getParamValue cvpActual) ) - let structuralEquality (cvpExpectec : CvParam) (cvpActual : CvParam) = - termNamesEqual cvpExpectec cvpActual - accessionsEqual cvpExpectec cvpActual - refUrisEqual cvpExpectec cvpActual - valuesEqual cvpExpectec cvpActual + let structuralEquality (cvpExpected : CvParam) (cvpActual : CvParam) = + termNamesEqual cvpExpected cvpActual + accessionsEqual cvpExpected cvpActual + refUrisEqual cvpExpected cvpActual + valuesEqual cvpExpected cvpActual module UserParam = open ARCTokenization - let termNamesEqual (upActual : UserParam) (upExpectec : UserParam) = + let termNamesEqual (upExpected : UserParam) (upActual : UserParam) = Assert.Equal( - (CvBase.getCvName upActual), - (CvBase.getCvName upExpectec) + (CvBase.getCvName upExpected), + (CvBase.getCvName upActual) ) -module TokenizedAnnotationTable = + let hasTermValue (expectedValue : string) (upActual : UserParam) = + Assert.Equal( + expectedValue, + (CvBase.getCvName upActual) + ) + + let accessionsEqual (upExpected : UserParam) (upActual : UserParam) = + Assert.Equal( + (CvBase.getCvAccession upExpected), + (CvBase.getCvAccession upActual) + ) + + let hasAccession (expectedID : string) (upActual : UserParam) = + Assert.Equal( + expectedID, + (CvBase.getCvAccession upActual) + ) + + let refUrisEqual (upExpected : UserParam) (upActual : UserParam) = + Assert.Equal( + (CvBase.getCvRef upExpected), + (CvBase.getCvRef upActual) + ) + + let hasRefUri (expectedRefUri : string) (upActual : UserParam) = + Assert.Equal( + expectedRefUri, + (CvBase.getCvRef upActual) + ) + + let valuesEqual (upExpected : UserParam) (upActual : UserParam) = + Assert.Equal( + (Param.getParamValue upExpected), + (Param.getParamValue upActual) + ) + + let hasValue (expectedValue : ParamValue) (upActual : UserParam) = + Assert.Equal( + expectedValue, + (Param.getParamValue upActual) + ) + + let structuralEquality (upActual : UserParam) (upExpected : UserParam) = + termNamesEqual upExpected upActual + accessionsEqual upExpected upActual + refUrisEqual upExpected upActual + valuesEqual upExpected upActual + +module Param = - let IOColumnsEqual (expectedIOColumns : CvParam list list) (table : TokenizedAnnotationTable) = - (expectedIOColumns, table.IOColumns) - ||> List.iter2 (fun expectedGroup actualGroup -> - (expectedGroup, actualGroup) - ||> List.iter2 (fun expectedParam actualParam -> - CvParam.structuralEquality expectedParam actualParam - ) + open ARCTokenization + + let termNamesEqual (ipExpected : IParam) (ipActual : IParam) = + Assert.Equal( + (CvBase.getCvName ipExpected), + (CvBase.getCvName ipActual) + ) + + let hasTermValue (expectedValue : string) (ipActual : IParam) = + Assert.Equal( + expectedValue, + (CvBase.getCvName ipActual) ) - let hasIOColumnAmount (expectedIOColumnAmount : int) (table : TokenizedAnnotationTable) = - Assert.Equal(expectedIOColumnAmount, table.IOColumns.Length) + let accessionsEqual (ipExpected : IParam) (ipActual : IParam) = + Assert.Equal( + (CvBase.getCvAccession ipExpected), + (CvBase.getCvAccession ipActual) + ) + let hasAccession (expectedID : string) (ipActual : IParam) = + Assert.Equal( + expectedID, + (CvBase.getCvAccession ipActual) + ) - let termRelatedBuildingBlocksEqual (expectedTermRelatedBuildingBlocks : CvParam list list) (table : TokenizedAnnotationTable) = - (expectedTermRelatedBuildingBlocks, table.TermRelatedBuildingBlocks) - ||> List.iter2 (fun expectedGroup actualGroup -> - (expectedGroup, actualGroup) - ||> List.iter2 (fun expectedParam actualParam -> - CvParam.structuralEquality expectedParam actualParam - ) + let refUrisEqual (ipExpected : IParam) (ipActual : IParam) = + Assert.Equal( + (CvBase.getCvRef ipExpected), + (CvBase.getCvRef ipActual) ) - let hasTermRelatedBuildingBlockAmount (expectedTermRelatedBuildingBlockAmount : int) (table : TokenizedAnnotationTable) = - Assert.Equal(expectedTermRelatedBuildingBlockAmount, table.TermRelatedBuildingBlocks.Length) + let hasRefUri (expectedRefUri : string) (ipActual : IParam) = + Assert.Equal( + expectedRefUri, + (CvBase.getCvRef ipActual) + ) + + let valuesEqual (ipExpected : IParam) (ipActual : IParam) = + Assert.Equal( + (Param.getParamValue ipExpected), + (Param.getParamValue ipActual) + ) + + let hasValue (expectedValue : ParamValue) (ipActual : IParam) = + Assert.Equal( + expectedValue, + (Param.getParamValue ipActual) + ) + + let structuralEquality (ipActual : IParam) (ipExpected : IParam) = + termNamesEqual ipExpected ipActual + accessionsEqual ipExpected ipActual + refUrisEqual ipExpected ipActual + valuesEqual ipExpected ipActual + + let typedStructuralEquality (ipExpected : IParam) (ipActual : IParam) = + if Param.is ipExpected && Param.is ipActual then + structuralEquality ipExpected ipActual + elif + Param.is ipExpected && Param.is ipActual then + structuralEquality ipExpected ipActual + else + Assert.True(false, "Expected and actual parameters are not of the same param subtype") + +module TokenizedAnnotationTable = + + () diff --git a/tests/ARCTokenization.Tests/TokenizationTests/FileSystemTokenization.fs b/tests/ARCTokenization.Tests/TokenizationTests/FileSystemTokenization.fs new file mode 100644 index 0000000..90e2630 --- /dev/null +++ b/tests/ARCTokenization.Tests/TokenizationTests/FileSystemTokenization.fs @@ -0,0 +1,54 @@ +namespace TokenizationTests + +open ControlledVocabulary +open ARCTokenization +open Xunit + +module FileSystem = + + open ReferenceObjects.Tokenization.FileSystem + open System.IO + + let parsedRelativeDirectoryPaths = FS.tokenizeRelativeDirectoryPaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq |> List.sortBy (fun cvp -> cvp.Value |> ParamValue.getValueAsString) + + [] + let ``Relative directory paths are tokenized correctly`` () = + let actual = parsedRelativeDirectoryPaths + let expected = referenceRelativeDirectoryPaths + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(e.Equals(a)) + ) + + let parsedRelativeFilePaths = FS.tokenizeRelativeFilePaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq |> List.sortBy (fun cvp -> cvp.Value |> ParamValue.getValueAsString) + + [] + let ``Relative file paths are tokenized correctly`` () = + let actual = parsedRelativeFilePaths + let expected = referenceRelativeFilePaths + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(e.Equals(a)) + ) + + let parsedAbsoluteDirectoryPaths = FS.tokenizeAbsoluteDirectoryPaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq |> List.sortBy (fun cvp -> cvp.Value |> ParamValue.getValueAsString) + + [] + let ``Absolute directory paths are tokenized correctly`` () = + let actual = parsedAbsoluteDirectoryPaths + let expected = referenceAbsoluteDirectoryPaths(Path.Combine(System.Environment.CurrentDirectory, "Fixtures/testPaths/")) + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(e.Equals(a)) + ) + + let parsedAbsoluteFilePaths = FS.tokenizeAbsoluteFilePaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq |> List.sortBy (fun cvp -> cvp.Value |> ParamValue.getValueAsString) + + [] + let ``Absolute file paths are tokenized correctly`` () = + let actual = parsedAbsoluteFilePaths + let expected = referenceAbsoluteFilePaths(Path.Combine(System.Environment.CurrentDirectory, "Fixtures/testPaths/")) + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(e.Equals(a)) + ) diff --git a/tests/ARCTokenization.Tests/TokenizationTests/MetadataTokenization.fs b/tests/ARCTokenization.Tests/TokenizationTests/MetadataTokenization.fs new file mode 100644 index 0000000..da9aa5e --- /dev/null +++ b/tests/ARCTokenization.Tests/TokenizationTests/MetadataTokenization.fs @@ -0,0 +1,153 @@ +namespace TokenizationTests + +open ControlledVocabulary +open ARCTokenization +open Xunit + +module Metadata = + + module ParseKeyWithTerms = + + open ReferenceObjects.Tokenization.KeyParser + + let tokenizer = MetadataSheet.parseKeyWithTerms referenceTerms [] + + let parsedCvParams = + List.zip referenceKeys referenceParamValues + |> List.map (fun (key, pv) -> tokenizer key pv) + + [] + let ``CvTerms are matched and parsed as CvParams`` () = + let expected = referenceCvParams + let actual = parsedCvParams + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(Param.equals e a) + ) + + let parsedComments = + List.zip referenceCommentKeys referenceParamValues + |> List.map (fun (key, pv) -> tokenizer key pv) + + [] + let ``Comments are matched and parsed as CvParams`` () = + let expected = referenceCommentCvParams + let actual = parsedComments + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(Param.equals e a) + ) + + let parsedIgnoreLines = + List.zip referenceIgnoreLineKeys referenceParamValues + |> List.map (fun (key, pv) -> tokenizer key pv) + + [] + let ``IgnoreLines are matched and parsed as CvParams`` () = + let expected = referenceIgnoreLineCvParams + let actual = parsedIgnoreLines + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(Param.equals e a) + ) + + let parsedUserParams = + List.zip referenceUserParamKeys referenceParamValues + |> List.map (fun (key, pv) -> tokenizer key pv) + + [] + let ``UserParams are matched and parsed as UserParams`` () = + let expected = referenceUserParams + let actual = parsedUserParams + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(Param.equals e a) + ) + + let parsedMixedParams = + List.zip referenceMixedKeys referenceMixedParamValues + |> List.map (fun (key, pv) -> tokenizer key pv) + + [] + let ``Mixed keys are matched and parsed to correct Params`` () = + let expected = referenceMixedParams + let actual = parsedMixedParams + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(Param.equals e a) + ) + + module ConvertMetadataTokens = + + open ReferenceObjects.Tokenization.ConvertMetadataTokens + open FsSpreadsheet + + let tokenizer : FsCell seq -> IParam list = Tokenization.convertMetadataTokens (MetadataSheet.parseKeyWithTerms referenceTerms) + + let parsedCvParams = tokenizer referenceRow + + [] + let ``Row with CvTerm as section key is tokenized as CvParams`` () = + let actual = parsedCvParams + let expected = referenceCvParams + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(Param.equals e a) + ) + + [] + let ``CvTerm row has metadata section key as value of first token`` () = + let actual = parsedCvParams.[0] |> Param.getValueAsTerm + let expected = Terms.StructuralTerms.metadataSectionKey + Assert.Equal(expected, actual) + + let parsedComments = tokenizer referenceCommentRow + + [] + let ``Row with Comment as section key is tokenized as CvParams`` () = + let actual = parsedComments + let expected = referenceCommentCvParams + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(Param.equals e a) + ) + + [] + let ``Comment row has metadata section key as value of first token`` () = + let actual = parsedComments.[0] |> Param.getValueAsTerm + let expected = Terms.StructuralTerms.metadataSectionKey + Assert.Equal(expected, actual) + + let parsedIgnoreLines = tokenizer referenceIgnoreLineRow + + [] + let ``Row with IgnoreLine as section key is tokenized as CvParams`` () = + let actual = parsedIgnoreLines + let expected = referenceIgnoreLineCvParams + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(Param.equals e a) + ) + + [] + let ``IgnoreLine row has metadata section key as value of first token`` () = + let actual = parsedIgnoreLines.[0] |> Param.getValueAsTerm + let expected = Terms.StructuralTerms.metadataSectionKey + Assert.Equal(expected, actual) + + let parsedUserParams = tokenizer referenceUserParamRow + + [] + let ``Row with UserParam as section key is tokenized as UserParams`` () = + let actual = parsedUserParams + let expected = referenceUserParams + Assert.All( + List.zip expected actual, + fun (e, a) -> Assert.True(Param.equals e a) + ) + + [] + let ``UserParam row has metadata section key as value of first token`` () = + let actual = parsedUserParams.[0] |> Param.getValueAsTerm + let expected = Terms.StructuralTerms.metadataSectionKey + Assert.Equal(expected, actual) diff --git a/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs b/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs deleted file mode 100644 index 1ff6860..0000000 --- a/tests/ARCTokenization.Tests/TokenizationTests/ParserFunctions.fs +++ /dev/null @@ -1,200 +0,0 @@ -namespace TokenizationTests - -open ControlledVocabulary -open ARCTokenization -open Xunit - -module FileSystem = - - open ReferenceObjects.Tokenization.FileSystem - open System.IO - - let parsedRelativeDirectoryPaths = FS.tokenizeRelativeDirectoryPaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq |> List.sortBy (fun cvp -> cvp.Value |> ParamValue.getValueAsString) - - [] - let ``Relative directory paths are tokenized correctly`` () = - let actual = parsedRelativeDirectoryPaths - let expected = referenceRelativeDirectoryPaths - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(e.Equals(a)) - ) - - let parsedRelativeFilePaths = FS.tokenizeRelativeFilePaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq |> List.sortBy (fun cvp -> cvp.Value |> ParamValue.getValueAsString) - - [] - let ``Relative file paths are tokenized correctly`` () = - let actual = parsedRelativeFilePaths - let expected = referenceRelativeFilePaths - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(e.Equals(a)) - ) - - let parsedAbsoluteDirectoryPaths = FS.tokenizeAbsoluteDirectoryPaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq |> List.sortBy (fun cvp -> cvp.Value |> ParamValue.getValueAsString) - - [] - let ``Absolute directory paths are tokenized correctly`` () = - let actual = parsedAbsoluteDirectoryPaths - let expected = referenceAbsoluteDirectoryPaths(Path.Combine(System.Environment.CurrentDirectory, "Fixtures/testPaths/")) - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(e.Equals(a)) - ) - - let parsedAbsoluteFilePaths = FS.tokenizeAbsoluteFilePaths (Path.GetFullPath("Fixtures/testPaths/")) |> List.ofSeq |> List.sortBy (fun cvp -> cvp.Value |> ParamValue.getValueAsString) - - [] - let ``Absolute file paths are tokenized correctly`` () = - let actual = parsedAbsoluteFilePaths - let expected = referenceAbsoluteFilePaths(Path.Combine(System.Environment.CurrentDirectory, "Fixtures/testPaths/")) - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(e.Equals(a)) - ) - -module ParseKeyWithTerms = - - open ReferenceObjects.Tokenization.KeyParser - - let tokenizer = MetadataSheet.parseKeyWithTerms referenceTerms [] - - let parsedCvParams = - List.zip referenceKeys referenceParamValues - |> List.map (fun (key, pv) -> tokenizer key pv) - - [] - let ``CvTerms are matched and parsed as CvParams`` () = - let expected = referenceCvParams - let actual = parsedCvParams - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(Param.equals e a) - ) - - let parsedComments = - List.zip referenceCommentKeys referenceParamValues - |> List.map (fun (key, pv) -> tokenizer key pv) - - [] - let ``Comments are matched and parsed as CvParams`` () = - let expected = referenceCommentCvParams - let actual = parsedComments - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(Param.equals e a) - ) - - let parsedIgnoreLines = - List.zip referenceIgnoreLineKeys referenceParamValues - |> List.map (fun (key, pv) -> tokenizer key pv) - - [] - let ``IgnoreLines are matched and parsed as CvParams`` () = - let expected = referenceIgnoreLineCvParams - let actual = parsedIgnoreLines - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(Param.equals e a) - ) - - let parsedUserParams = - List.zip referenceUserParamKeys referenceParamValues - |> List.map (fun (key, pv) -> tokenizer key pv) - - [] - let ``UserParams are matched and parsed as UserParams`` () = - let expected = referenceUserParams - let actual = parsedUserParams - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(Param.equals e a) - ) - - let parsedMixedParams = - List.zip referenceMixedKeys referenceMixedParamValues - |> List.map (fun (key, pv) -> tokenizer key pv) - - [] - let ``Mixed keys are matched and parsed to correct Params`` () = - let expected = referenceMixedParams - let actual = parsedMixedParams - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(Param.equals e a) - ) - -module ConvertMetadataTokens = - - open ReferenceObjects.Tokenization.ConvertMetadataTokens - open FsSpreadsheet - - let tokenizer : FsCell seq -> IParam list = Tokenization.convertMetadataTokens (MetadataSheet.parseKeyWithTerms referenceTerms) - - let parsedCvParams = tokenizer referenceRow - - [] - let ``Row with CvTerm as section key is tokenized as CvParams`` () = - let actual = parsedCvParams - let expected = referenceCvParams - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(Param.equals e a) - ) - - [] - let ``CvTerm row has metadata section key as value of first token`` () = - let actual = parsedCvParams.[0] |> Param.getValueAsTerm - let expected = Terms.StructuralTerms.metadataSectionKey - Assert.Equal(expected, actual) - - let parsedComments = tokenizer referenceCommentRow - - [] - let ``Row with Comment as section key is tokenized as CvParams`` () = - let actual = parsedComments - let expected = referenceCommentCvParams - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(Param.equals e a) - ) - - [] - let ``Comment row has metadata section key as value of first token`` () = - let actual = parsedComments.[0] |> Param.getValueAsTerm - let expected = Terms.StructuralTerms.metadataSectionKey - Assert.Equal(expected, actual) - - let parsedIgnoreLines = tokenizer referenceIgnoreLineRow - - [] - let ``Row with IgnoreLine as section key is tokenized as CvParams`` () = - let actual = parsedIgnoreLines - let expected = referenceIgnoreLineCvParams - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(Param.equals e a) - ) - - [] - let ``IgnoreLine row has metadata section key as value of first token`` () = - let actual = parsedIgnoreLines.[0] |> Param.getValueAsTerm - let expected = Terms.StructuralTerms.metadataSectionKey - Assert.Equal(expected, actual) - - let parsedUserParams = tokenizer referenceUserParamRow - - [] - let ``Row with UserParam as section key is tokenized as UserParams`` () = - let actual = parsedUserParams - let expected = referenceUserParams - Assert.All( - List.zip expected actual, - fun (e, a) -> Assert.True(Param.equals e a) - ) - - [] - let ``UserParam row has metadata section key as value of first token`` () = - let actual = parsedUserParams.[0] |> Param.getValueAsTerm - let expected = Terms.StructuralTerms.metadataSectionKey - Assert.Equal(expected, actual) diff --git a/tests/ARCTokenization.Tests/TokenizationTests/ProcessGraphTokenization.fs b/tests/ARCTokenization.Tests/TokenizationTests/ProcessGraphTokenization.fs new file mode 100644 index 0000000..8339e6e --- /dev/null +++ b/tests/ARCTokenization.Tests/TokenizationTests/ProcessGraphTokenization.fs @@ -0,0 +1,6 @@ +namespace TokenizationTests.ProcessGraph + +open ControlledVocabulary +open ARCTokenization +open Xunit + diff --git a/tests/ControlledVocabulary.Tests/ControlledVocabulary.Tests.fsproj b/tests/ControlledVocabulary.Tests/ControlledVocabulary.Tests.fsproj index 3c313ae..3f16e63 100644 --- a/tests/ControlledVocabulary.Tests/ControlledVocabulary.Tests.fsproj +++ b/tests/ControlledVocabulary.Tests/ControlledVocabulary.Tests.fsproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 false false @@ -16,13 +16,13 @@ - - - + + + runtime; build; native; contentfiles; analyzers; buildtransitive all - + runtime; build; native; contentfiles; analyzers; buildtransitive all @@ -32,8 +32,4 @@ - - - -