From 05c38c8d5905f6aac6b322a3e2aa0d44d4bc43e3 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 12 Dec 2023 15:17:37 +0100 Subject: [PATCH 1/2] Add v3 terms, and switch to native Hugo templating --- Makefile | 15 +- content/terms.Rmd | 47 - content/terms.html | 860 ------------------- content/terms.md | 23 + data/.gitignore | 2 + data/properties_description/.gitkeep | 0 layouts/shortcodes/properties-description.md | 41 + scripts/properties_to_json.py | 102 +++ 8 files changed, 182 insertions(+), 908 deletions(-) delete mode 100644 content/terms.Rmd delete mode 100644 content/terms.html create mode 100644 content/terms.md create mode 100644 data/properties_description/.gitkeep create mode 100644 layouts/shortcodes/properties-description.md create mode 100644 scripts/properties_to_json.py diff --git a/Makefile b/Makefile index 366d4e1..a17ebe5 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: data/crosswalk.json +all: data/crosswalk.json data/properties_description.json # Download the latest crosswalk data/crosswalk.csv: @@ -7,3 +7,16 @@ data/crosswalk.csv: # Convert crosswalk.csv to crosswalk.json so Hugo can parse it data/crosswalk.json: data/crosswalk.csv python3 scripts/crosswalk_to_json.py + +# properties_description.csv file was only split off from crosswalks.csv starting with +# v2.1, so we can't download v2.0 itself. There were no major changes between the two, +# anyway. +data/properties_description/v2.0.csv: + wget https://github.com/codemeta/codemeta/raw/2.1/properties_description.csv -O $@ + +# Download properties descriptions for other versions +data/properties_description/v%.csv: + wget https://github.com/codemeta/codemeta/raw/$*/properties_description.csv -O $@ + +data/properties_description.json: data/properties_description/v2.0.csv data/properties_description/v3.0.csv + python3 scripts/properties_to_json.py diff --git a/content/terms.Rmd b/content/terms.Rmd deleted file mode 100644 index f0c672f..0000000 --- a/content/terms.Rmd +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: Codemeta Terms - ---- - -```{r echo=FALSE, include = FALSE, message = FALSE} -knitr::opts_chunk$set(echo=FALSE, message = FALSE, warning = FALSE) -library("readr") -library("dplyr") -``` - -## Terms from Schema.org - -Recognized properties for CodeMeta `SoftwareSourceCode` and `SoftwareApplication` includes the following terms from . These terms are part of the CodeMeta specification and can be used without any prefix. - -```{r} -crosswalk <- "https://github.com/codemeta/codemeta/raw/master/crosswalk.csv" -cw <- read_csv(crosswalk) -cw %>% - filter(grepl("schema:(SoftwareSourceCode|SoftwareApplication|CreativeWork|Thing)", `Parent Type`)) %>% - select(Property, Type, Description) %>% -knitr::kable("html", table.attr="class=\"table table-striped\"") -``` - -These terms are all recognized properties of or Types. Note that while most properties take basic data types as values (`Text`, `URL`), several take other node types, such as `Person` or `Organization`. Recommended fields for these node types in CodeMeta documents are given below. - -```{r} -cw %>% - filter(grepl("schema:(Person|Thing)", `Parent Type`)) %>% - select(Property, Type, Description) %>% -knitr::kable("html", table.attr="class=\"table table-striped\"") -``` - -## Codemeta terms - -The CodeMeta project also introduces the following additional properties, which lack clear equivalents in but can play an important role in software metadata records covered by the CodeMeta crosswalk. - -```{r} -cw %>% - filter(grepl("codemeta:", `Parent Type`)) %>% - select(Property, Type, Description) %>% -knitr::kable("html", table.attr="class=\"table table-striped\"") - -``` - - -Please suggest additional terms or adjustments to this representation in the [codemeta issues](https://github.com/codemeta/codemeta/issues) diff --git a/content/terms.html b/content/terms.html deleted file mode 100644 index 641089d..0000000 --- a/content/terms.html +++ /dev/null @@ -1,860 +0,0 @@ ---- -title: Codemeta Terms - ---- - - - -
-

Terms from Schema.org

-

Recognized properties for CodeMeta Code includes the following terms from https://schema.org. These terms are part of the CodeMeta specification and can be used without any prefix.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-Property - -Type - -Description -
-codeRepository - -URL - -Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, etc.). -
-programmingLanguage - -ComputerLanguage or Text - -The computer programming language. -
-runtimePlatform - -Text - -Runtime platform or script interpreter dependencies (Example - Java v1, Python2.3, .Net Framework 3.0). Supersedes runtime. -
-targetProduct - -SoftwareApplication - -Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. -
-applicationCategory - -Text or URL - -Type of software application, e.g. ‘Game, Multimedia’. -
-applicationSubCategory - -Text or URL - -Subcategory of the application, e.g. ‘Arcade Game’. -
-downloadUrl - -URL - -If the file can be downloaded, URL to download the binary. -
-fileSize - -Text - -Size of the application / package (e.g. 18MB). In the absence of a unit (MB, KB etc.), KB will be assumed. -
-installUrl - -URL - -URL at which the app may be installed, if different from the URL of the item. -
-memoryRequirements - -Text or URL - -Minimum memory requirements. -
-operatingSystem - -Text - -Operating systems supported (Windows 7, OSX 10.6, Android 1.6). -
-permissions - -Text - -Permission(s) required to run the app (for example, a mobile app may require full internet access or may run only on wifi). -
-processorRequirements - -Text - -Processor architecture required to run the application (e.g. IA64). -
-releaseNotes - -Text or URL - -Description of what changed in this version. -
-softwareHelp - -CreativeWork - -Software application help. -
-softwareRequirements - -SoftwareSourceCode - -Required software dependencies -
-softwareVersion - -Text - -Version of the software instance. -
-storageRequirements - -Text or URL - -Storage requirements (free space required). -
-supportingData - -DataFeed - -Supporting data for a SoftwareApplication. -
-author - -Organization or Person - -The author of this content or rating. Please note that author is special in that HTML 5 provides a special mechanism for indicating authorship via the rel tag. That is equivalent to this and may be used interchangeably. -
-citation - -CreativeWork or URL - -A citation or reference to another creative work, such as another publication, web page, scholarly article, etc. -
-contributor - -Organization or Person - -A secondary contributor to the CreativeWork or Event. -
-copyrightHolder - -Organization or Person - -The party holding the legal copyright to the CreativeWork. -
-copyrightYear - -Number - -The year during which the claimed copyright for the CreativeWork was first asserted. -
-creator - -Organization or Person - -The creator/author of this CreativeWork. This is the same as the Author property for CreativeWork. -
-dateCreated - -Date - -The date on which the CreativeWork was created or the item was added to a DataFeed. -
-dateModified - -Date - -The date on which the CreativeWork was most recently modified or when the item’s entry was modified within a DataFeed. -
-datePublished - -Date - -Date of first broadcast/publication. -
-editor - -Person - -Specifies the Person who edited the CreativeWork. -
-encoding - -MediaObject - -A media object that encodes this CreativeWork. This property is a synonym for associatedMedia. Supersedes encodings. -
-fileFormat - -Text or URL - -Media type, typically MIME format (see IANA site) of the content e.g. application/zip of a SoftwareApplication binary. In cases where a CreativeWork has several media type representations, ‘encoding’ can be used to indicate each MediaObject alongside particular fileFormat information. Unregistered or niche file formats can be indicated instead via the most appropriate URL, e.g. defining Web page or a Wikipedia entry. -
-funder - -Organization or Person - -A person or organization that supports (sponsors) something through some kind of financial contribution. -
-keywords - -Text - -Keywords or tags used to describe this content. Multiple entries in a keywords list are typically delimited by commas. -
-license - -CreativeWork or URL - -A license document that applies to this content, typically indicated by URL. -
-producer - -Organization or Person - -The person or organization who produced the work (e.g. music album, movie, tv/radio series etc.). -
-provider - -Organization or Person - -The service provider, service operator, or service performer; the goods producer. Another party (a seller) may offer those services or goods on behalf of the provider. A provider may also serve as the seller. Supersedes carrier. -
-publisher - -Organization or Person - -The publisher of the creative work. -
-sponsor - -Organization or Person - -A person or organization that supports a thing through a pledge, promise, or financial contribution. e.g. a sponsor of a Medical Study or a corporate sponsor of an event. -
-version - -Number or Text - -The version of the CreativeWork embodied by a specified resource. -
-isAccessibleForFree - -Boolean - -A flag to signal that the publication is accessible for free. -
-isPartOf - -CreativeWork - -Indicates a CreativeWork that this CreativeWork is (in some sense) part of. Reverse property hasPart -
-hasPart - -CreativeWork - -Indicates a CreativeWork that is (in some sense) a part of this CreativeWork. Reverse property isPartOf -
-position - -Integer or Text - -The position of an item in a series or sequence of items. (While schema.org considers this a property of CreativeWork, it is also the way to indicate ordering in any list (e.g. the Authors list). By default arrays are unordered in JSON-LD -
-description - -Text - -A description of the item. -
-identifier - -PropertyValue or URL - -The identifier property represents any kind of identifier for any kind of Thing, such as ISBNs, GTIN codes, UUIDs etc. Schema.org provides dedicated properties for representing many of these, either as textual strings or as URL (URI) links. See background notes for more details. -
-name - -Text - -The name of the item (software, Organization) -
-sameAs - -URL - -URL of a reference Web page that unambiguously indicates the item’s identity. E.g. the URL of the item’s Wikipedia page, Wikidata entry, or official website. -
-url - -URL - -URL of the item. -
-relatedLink - -URL - -A link related to this object, e.g. related web pages -
-

These terms are all recognized properties of https://schema.org/SoftwareSourceCode or https://schema.org/SoftwareApplication Types. Note that while most properties take basic data types as values (Text, URL), several take other node types, such as Person or Organization. Recommended fields for these node types in CodeMeta documents are given below:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-Property - -Type - -Description -
-description - -Text - -A description of the item. -
-identifier - -PropertyValue or URL - -The identifier property represents any kind of identifier for any kind of Thing, such as ISBNs, GTIN codes, UUIDs etc. Schema.org provides dedicated properties for representing many of these, either as textual strings or as URL (URI) links. See background notes for more details. -
-name - -Text - -The name of the item (software, Organization) -
-sameAs - -URL - -URL of a reference Web page that unambiguously indicates the item’s identity. E.g. the URL of the item’s Wikipedia page, Wikidata entry, or official website. -
-url - -URL - -URL of the item. -
-relatedLink - -URL - -A link related to this object, e.g. related web pages -
-givenName - -Text - -Given name. In the U.S., the first name of a Person. This can be used along with familyName instead of the name property -
-familyName - -Text - -Family name. In the U.S., the last name of an Person. This can be used along with givenName instead of the name property. -
-email - -Text - -Email address -
-affiliation - -Organization - -An organization that this person is affiliated with. For example, a school/university -
-identifier - -URL - -URL identifier, ideally an ORCID ID for individuals, a FundRef ID for funders -
-name - -Text - -The name of an Organization, or if separate given and family names cannot be resolved for a Person -
-address - -PostalAddress or Text - -Physical address of the item. -
-
-
-

Codemeta terms

-

The CodeMeta project also introduces the following additional properties, which lack clear equivalents in https://schema.org but can play an important role in software metadata records covered by the CodeMeta crosswalk.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-Property - -Type - -Description -
-softwareSuggestions - -SoftwareSourceCode - -Optional dependencies , e.g. for optional features, code development, etc. -
-maintainer - -Person - -Individual responsible for maintaining the software (usually includes an email contact address) -
-contIntegration - -URL - -link to continuous integration service -
-buildInstructions - -URL - -link to installation instructions/documentation -
-developmentStatus - -Text - -Description of development status, e.g. Active, inactive, suspended. See repostatus.org -
-embargoDate - -Date - -Software may be embargoed from public access until a specified date (e.g. pending publication, 1 year from publication) -
-funding - -Text - -Funding source (e.g. specific grant) -
-issueTracker - -URL - -link to software bug reporting or issue tracking system -
-referencePublication - -ScholarlyArticle - -An academic publication related to the software. -
-readme - -URL - -link to software Readme file -
-

Please suggest additional terms or adjustments to this representation in the codemeta issues

-
diff --git a/content/terms.md b/content/terms.md new file mode 100644 index 0000000..a8b0e0b --- /dev/null +++ b/content/terms.md @@ -0,0 +1,23 @@ +--- +title: Codemeta Terms + +--- + +## Terms from Schema.org + +Recognized properties for CodeMeta `SoftwareSourceCode` and `SoftwareApplication` includes the following terms from . These terms are part of the CodeMeta specification and can be used without any prefix. + +{{< properties-description matchParentType="schema:(SoftwareSourceCode|SoftwareApplication|CreativeWork|Thing)">}} + +These terms are all recognized properties of or Types. Note that while most properties take basic data types as values (`Text`, `URL`), several take other node types, such as `Person`, `Organization`, `Review`, or `Role`. Recommended fields for these node types in CodeMeta documents are given below. + +{{< properties-description matchParentType="schema:(Person|Thing|Review|Role)">}} + +## Codemeta terms + +The CodeMeta project also introduces the following additional properties, which lack clear equivalents in but can play an important role in software metadata records covered by the CodeMeta crosswalk. + +{{< properties-description matchParentType="codemeta:">}} + + +Please suggest additional terms or adjustments to this representation in the [codemeta issues](https://github.com/codemeta/codemeta/issues) diff --git a/data/.gitignore b/data/.gitignore index 985a6a7..4fe5f8c 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -1,2 +1,4 @@ crosswalk.csv crosswalk.json +properties_description.json +properties_description/*.csv diff --git a/data/properties_description/.gitkeep b/data/properties_description/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/layouts/shortcodes/properties-description.md b/layouts/shortcodes/properties-description.md new file mode 100644 index 0000000..230e1ac --- /dev/null +++ b/layouts/shortcodes/properties-description.md @@ -0,0 +1,41 @@ +{{ $matchParentType := .Params.matchParentType }} + + + + + + + + + + + +{{ range $property := .Site.Data.properties_description }} +{{ $parentType := index $property "Parent Type" }} +{{ if findRE $matchParentType $parentType }} + + + + + + +{{ end }} +{{ end }} + +
+Property + +Type + +Versions + +Description +
+{{ index $property "Property" }} + +{{ index $property "Type" }} + +{{ replaceRE "(\\.0)*" "" (delimit (sort (index $property "versions")) ", ") }} + +{{ index $property "Description" }} +
diff --git a/scripts/properties_to_json.py b/scripts/properties_to_json.py new file mode 100644 index 0000000..2ccdcfd --- /dev/null +++ b/scripts/properties_to_json.py @@ -0,0 +1,102 @@ +"""Transforms a set of ``properties_Description.csv`` files from the main Codemeta repo +into JSON processable by Hugo. + +For example, this turns this :file:`v3.0.csv`: + +.. code-block: csv + + Parent Type,Property,Type,Description + schema:CreativeWork,author,Organization or Person,The author of this content + codemeta:SoftwareSourceCode,readme,URL,link to software Readme file + codemeta:SoftwareSourceCode,embargoEndDate,Date,"Software may be embargoed from public access until a specified date + +and this :file:`v2.0.csv`: + +.. code-block: csv + + Parent Type,Property,Type,Description + schema:CreativeWork,author,Organization or Person,The author of this content + codemeta:SoftwareSourceCode,readme,URL,link to software Readme file + codemeta:SoftwareSourceCode,embargoDate,Date,"Software may be embargoed from public access until a specified date + +into: + +.. code-block: json + + [ + { + "versions": ["v3.0", "v2.0"], + "Parent Type": "schema:CreativeWork", + "Property": "author", + "Type": "Organization or Person", + "Description": "The author of this content" + }, + { + "versions": ["v3.0", "v2.0"], + "Parent Type": "schema:SoftwareSourceCode", + "Property": "readme", + "Type": "URL", + "Description": "The author of this content" + }, + { + "versions": ["v3.0"], + "Parent Type": "schema:SoftwareSourceCode", + "Property": "embargoEndDate", + "Type": "Date", + "Description": "Software may be embargoed from public access until a specified date" + }, + { + "versions": ["v2.0"], + "Parent Type": "schema:SoftwareSourceCode", + "Property": "embargoDate", + "Type": "Date", + "Description": "Software may be embargoed from public access until a specified date" + } + ] +""" + +import csv +import json +import pathlib + +DIR = pathlib.Path(__file__).parent.parent +CSV_PATH = DIR / "data/properties_description/" +JSON_PATH = DIR / "data/properties_description.json" + +json_items = [] + +# List .csv files in reverse version order, so Description from the latest version +# takes precedence. +paths = sorted( + CSV_PATH.glob("*.csv"), key=lambda p: float(p.stem.lstrip("v")), reverse=True +) + +for csv_path in paths: + version = csv_path.stem + # header = ["Parent Type", "Property", "Type", "Description"] + (header, *rows) = list(csv.reader(csv_path.open())) + + for row in rows: + item = dict(zip(header, row)) + + if item["Property"] == "": + continue # skip empty rows + + # Look for a similar existing item from a newer Codemeta version + for existing_item in json_items: + if existing_item.items() >= item.items(): + # We found an existing item, add this version to its list + assert ( + version not in existing_item["versions"] + ), f"Codemeta {version} has duplicated property {item}" + existing_item["versions"].append(version) + break + else: + # No similar item, create a new one + item["versions"] = [version] + json_items.append(item) + +# Sort properties by their name +json_items.sort(key=lambda item: item["Property"]) + +JSON_PATH.write_text(json.dumps(json_items, indent=" ")) From be4ffc6da438ff288fcd9e5e4800ef013efbc0d5 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Thu, 14 Dec 2023 11:49:17 +0100 Subject: [PATCH 2/2] Makefile: Rebuild when .py scripts are changed --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a17ebe5..e8c1dca 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ data/crosswalk.csv: wget https://github.com/codemeta/codemeta/raw/master/crosswalk.csv -O data/crosswalk.csv # Convert crosswalk.csv to crosswalk.json so Hugo can parse it -data/crosswalk.json: data/crosswalk.csv +data/crosswalk.json: scripts/crosswalk_to_json.py data/crosswalk.csv python3 scripts/crosswalk_to_json.py # properties_description.csv file was only split off from crosswalks.csv starting with @@ -18,5 +18,5 @@ data/properties_description/v2.0.csv: data/properties_description/v%.csv: wget https://github.com/codemeta/codemeta/raw/$*/properties_description.csv -O $@ -data/properties_description.json: data/properties_description/v2.0.csv data/properties_description/v3.0.csv +data/properties_description.json: scripts/properties_to_json.py data/properties_description/v2.0.csv data/properties_description/v3.0.csv python3 scripts/properties_to_json.py