Skip to content

Commit

Permalink
Merge branch 'main' into feature-codeGeneration-#55
Browse files Browse the repository at this point in the history
  • Loading branch information
omaus authored Mar 13, 2024
2 parents 8c49196 + e5280af commit acc94c0
Show file tree
Hide file tree
Showing 24 changed files with 2,068 additions and 1,728 deletions.
60 changes: 32 additions & 28 deletions playground.fsx
Original file line number Diff line number Diff line change
@@ -1,34 +1,8 @@
//open System.IO
//open System.Collections.Generic

//let dllBasePath = @"c:/repos/csbiology/fsspreadsheet/src"
//File.Copy(dllBasePath + "/FsSpreadsheet/bin/Debug/netstandard2.0/FsSpreadsheet.dll", dllBasePath + "/FsSpreadsheet/bin/Debug/netstandard2.0/FsSpreadsheet_Copy.dll", true)
//File.Copy(dllBasePath + "/FsSpreadsheet.CsvIO/bin/Debug/netstandard2.0/FsSpreadsheet.CsvIO.dll", dllBasePath + "/FsSpreadsheet.CsvIO/bin/Debug/netstandard2.0/FsSpreadsheet.CsvIO_Copy.dll", true)
//File.Copy(dllBasePath + "/FsSpreadsheet.ExcelIO/bin/Debug/netstandard2.0/FsSpreadsheet.ExcelIO.dll", dllBasePath + "/FsSpreadsheet.ExcelIO/bin/Debug/netstandard2.0/FsSpreadsheet.ExcelIO_Copy.dll", true)
//File.Copy(@"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\net6.0\ArcGraphModel.dll", @"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\net6.0\ArcGraphModel_Copy.dll", true)

#r "nuget: FSharpAux"
//#r "nuget: FsOboParser"
#r "nuget: OBO.NET"
#r "nuget: FsSpreadsheet.ExcelIO, 4.1.0"
#r "nuget: FSharp.FGL.ArrayAdjacencyGraph"

open DocumentFormat.OpenXml
open FSharpAux
open FSharp.FGL
open FSharp.FGL.ArrayAdjacencyGraph
open System.Collections.Generic


//#r "c:/repos/csbiology/fsspreadsheet/src/FsSpreadsheet/bin/Debug/netstandard2.0/FsSpreadsheet.dll"
//#r "c:/repos/csbiology/fsspreadsheet/src/FsSpreadsheet.CsvIO/bin/Debug/netstandard2.0/FsSpreadsheet.CsvIO.dll"
//#r "c:/repos/csbiology/fsspreadsheet/src/FsSpreadsheet.ExcelIO/bin/Debug/netstandard2.0/FsSpreadsheet.ExcelIO.dll"
//#r @"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\net6.0\ArcGraphModel.dll"
//#r @"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\netstandard2.0\ArcGraphModel.dll"
//#r @"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel.IO\bin\Debug\netstandard2.0\ArcGraphModel.IO.dll"
//#r @"C:/Users/olive/.nuget/packages/fsharpaux/1.1.0/lib/net5.0/FSharpAux.dll"
//#r "src/ArcGraphModel/bin/Release/netstandard2.0/ArcGraphModel.dll"
//#r "src/ControlledVocabulary/bin/Release/netstandard2.0/ControlledVocabulary.dll"

#I "src/ControlledVocabulary/bin/Debug/netstandard2.0"
#I "src/ControlledVocabulary/bin/Release/netstandard2.0"
#r "ControlledVocabulary.dll"
Expand All @@ -45,6 +19,36 @@ open ControlledVocabulary
open type ControlledVocabulary.ParamBase
open ARCTokenization
open ARCTokenization.StructuralOntology
open System.IO


let arcProt = @"C:\Repos\git.nfdi4plants.org\ArcPrototype"

let afts = FileSystem.parseAbsoluteFilePaths arcProt
afts |> Seq.iter (Param.getValueAsString >> printfn "%s")

let tryParseMetadataSheetFromToken (isaFileName: string) (isaMdsParsingF: string -> IParam list) (absFileToken: IParam) =
let cvpStr = Param.getValueAsString absFileToken
printfn $"cvpStr: {cvpStr}"
//if String.contains isaFileName cvpStr then
if Path.GetFileName cvpStr = isaFileName then
try
Some (isaMdsParsingF cvpStr)
with _ ->
None
else None

afts
|> Seq.map (
fun cvp ->
printfn $"{Param.getValueAsString cvp}"
tryParseMetadataSheetFromToken "isa.investigation.xlsx" (Investigation.parseMetadataSheetFromFile()) cvp
)
|> Seq.length

let its = Investigation.parseMetadataSheetsFromTokens() afts




let ot = List.head Terms.InvestigationMetadata.ontology.Terms
Expand Down Expand Up @@ -129,7 +133,7 @@ let expectedTermValuesSimple =
[""]
[""]
[""]
[""; "Maus"; "Keider"; "müller"; "oih"]
[""; "Maus"; "Keider"; "müller"; "oih"]
[""; "Oliver"; "andreas"]
[""; "L. I."; "C."]
[""; "[email protected]"]
Expand Down
30 changes: 29 additions & 1 deletion src/ARCTokenization/FileSystem.fs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ open ARCTokenization.StructuralOntology
open System.IO
open System
open ControlledVocabulary
open Tokenization

module internal FS =

Expand Down Expand Up @@ -51,4 +52,31 @@ module internal FS =
cvTerm = AFSO.``File Path``,
v = file.Replace("\\","/")
)
}
}


let internal normalisePath (path:string) =
path.Replace("\\","/")

let tokenizeARCFileSystem (rootPath:string) =
let rootPathNormalised = rootPath|>normalisePath

let directories =
Directory.EnumerateDirectories(rootPath, "*", SearchOption.AllDirectories)
|> Seq.map(fun p ->
Tokenization.ArcFileSystem.PType.Directory,
p|>normalisePath
)

let files =
Directory.EnumerateFiles(rootPath, "*", SearchOption.AllDirectories)
|> Seq.map(fun p ->
Tokenization.ArcFileSystem.PType.File,
p|>normalisePath
)
let collection: (Tokenization.ArcFileSystem.PType * string) seq = Seq.concat (seq{directories;files})

collection
|>Seq.map(fun (pType,p) -> ArcFileSystem.getArcFileSystemTokens rootPathNormalised pType p)


7 changes: 6 additions & 1 deletion src/ARCTokenization/RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
### 3.0.0 - (Released 2024-01-0)
### 4.0.0 - (Released 2024-03-02)

- Additions:
- [Add enhanced Tokenization for Specific Folders and Files](https://github.com/nfdi4plants/ARCTokenization/pull/53)

### 3.0.0 - (Released 2024-01-10)

- Additions:
- [Add basic process graph tokenization](https://github.com/nfdi4plants/ARCTokenization/pull/48/commits/e6cd1775575aaac5aca3d2a48ff26fd31b136038):
Expand Down
38 changes: 38 additions & 0 deletions src/ARCTokenization/Tokenization.fs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ open ControlledVocabulary
open FsSpreadsheet
open MetadataSheet
open ARCTokenization.Terms
open ARCtrl
open ARCtrl.ISA

module Tokenization =
Expand Down Expand Up @@ -158,3 +159,40 @@ module Tokenization =
at.Columns
|> Array.map CompositeColumn.tokenize
|> List.ofArray

module ArcFileSystem =

/// Represents the type of file system entity (Directory or File)
type PType =
| File
| Directory

/// Matches a CvParam based on the relative path and file system type
let convertRelativePath (pType:PType) (relativePath: string) =
match pType with
| PType.Directory ->
match (relativePath.Split '/') with
| [|Path.StudiesFolderName|] -> StructuralOntology.AFSO.``Studies Directory`` |> fun t -> CvParam(t,relativePath)
| [|Path.StudiesFolderName; _|] -> StructuralOntology.AFSO.``Study Directory`` |> fun t -> CvParam(t,relativePath)
| [|Path.AssaysFolderName|] -> StructuralOntology.AFSO.``Assays Directory`` |> fun t -> CvParam(t,relativePath)
| [|Path.AssaysFolderName; _|] -> StructuralOntology.AFSO.``Assay Directory`` |> fun t -> CvParam(t,relativePath)
| [|Path.RunsFolderName|] -> StructuralOntology.AFSO.``Runs Directory`` |> fun t -> CvParam(t,relativePath)
| [|Path.RunsFolderName; _|] -> StructuralOntology.AFSO.``Run Directory`` |> fun t -> CvParam(t,relativePath)
| [|Path.WorkflowsFolderName|] -> StructuralOntology.AFSO.``Workflows Directory`` |> fun t -> CvParam(t,relativePath)
| [|Path.WorkflowsFolderName; _|] -> StructuralOntology.AFSO.``Workflow Directory`` |> fun t -> CvParam(t,relativePath)
| _ -> StructuralOntology.AFSO.``Directory Path`` |> fun t -> CvParam(t,relativePath)
| PType.File ->
match relativePath with
| _ when relativePath.EndsWith "isa.investigation.xlsx" -> StructuralOntology.AFSO.``Investigation File`` |> fun t -> CvParam(t,relativePath)
| _ when relativePath.EndsWith "isa.assay.xlsx" -> StructuralOntology.AFSO.``Assay File`` |> fun t -> CvParam(t,relativePath)
| _ when relativePath.EndsWith "isa.dataset.xlsx" -> StructuralOntology.AFSO.``Dataset File`` |> fun t -> CvParam(t,relativePath)
| _ when relativePath.EndsWith "isa.study.xlsx" -> StructuralOntology.AFSO.``Study File`` |> fun t -> CvParam(t,relativePath)
| _ when relativePath.EndsWith ".yml" -> StructuralOntology.AFSO.``YML File`` |> fun t -> CvParam(t,relativePath)
| _ when relativePath.EndsWith ".cwl" -> StructuralOntology.AFSO.``CWL File`` |> fun t -> CvParam(t,relativePath)
| _ -> StructuralOntology.AFSO.``File Path`` |> fun t -> CvParam(t,relativePath)

/// Gets CvParams based on the root path, file system type, and full path
let getArcFileSystemTokens (rootPath:string) (pType:PType) (path:string) =
let relativePath = path.Replace(rootPath,"").TrimStart('/')
convertRelativePath pType relativePath

Loading

0 comments on commit acc94c0

Please sign in to comment.