Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DO NOT MERGE: MAM 20241031 overlap, for post-google sheet value set, but base don feature branch #255

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ install:
.PHONY: install

# ---
# Project Syncronization
# Project Synchronization
# ---
#
# check we are up to date
Expand All @@ -95,7 +95,7 @@ site: clean schema-clean src/nmdc_submission_schema/schema/nmdc_submission_schem
gen-project gendoc project/json/nmdc_submission_schema.json

%.yaml: gen-project
# make deploy has been depricated by an updated .github/workflows/deploy-docs.yaml
# make deploy has been deprecated by an updated .github/workflows/deploy-docs.yaml
#deploy: all mkd-gh-deploy

# generates all project files
Expand Down
65 changes: 37 additions & 28 deletions project.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@ schema-clean: modifications-clean sheets_and_friends-clean examples-clean post-c
cp placeholder.md local
mkdir -p examples/output

# todo: fewer enums
# todo: use booleans for yes/no enumerations
# todo: some numbers appear as strings in the schema (just examples? check for minimum value etc)
# todo maximum value for pH has to be an int?

sheets_and_friends-clean:
rm -rf sheets_and_friends/yaml_out/with_shuttles.yaml

Expand All @@ -54,7 +49,6 @@ local/with_shuttles_yq.yaml: local/with_shuttles.yaml
# ControlledTermValue: what about multivalued CTVs? don't see any besides chem_administration above at this time
# for water, can depth be a point, a range, or both?


# globally replace structured ranges with strings.
# undoes some of the range alterations that nmdc-schema makes when importing MIxS terms
# future versions of the nmdc-schema might just use strings, too
Expand Down Expand Up @@ -141,14 +135,12 @@ local/with_shuttles_yq.yaml: local/with_shuttles.yaml

yq -i 'del(.slots.[] | select(.name == "was_informed_by"))' $@


modifications-clean:
rm -rf sheets_and_friends/yaml_out/with_modifications.yaml

local/nmdc.yaml:
wget -O $@ https://raw.githubusercontent.com/microbiomedata/nmdc-schema/v11.0.1/nmdc_schema/nmdc_materialized_patterns.yaml

# sheets-for-nmdc-submission-schema_validation_converter_empty.tsv
local/with_modifications.yaml: local/with_shuttles_yq.yaml \
sheets_and_friends/tsv_in/modifications_long.tsv \
sheets_and_friends/tsv_in/validation_converter.tsv \
Expand Down Expand Up @@ -196,9 +188,6 @@ src/nmdc_submission_schema/schema/nmdc_submission_schema.yaml: local/with_modifi
yq -i '(.slots.[] | select(.range == "TextValue") | .range) = "string"' $@
yq -i '(.slots.[] | select(.range == "TimestampValue") | .range) = "string"' $@

# yq -i '(.slots.[] | select(has("range") | not ) | .range ) = "string"' $@
# yq -i '(.classes.[].slot_usage.[] | select(has("range") | not ) | .range ) = "string"' $@

yq -i '(.slots.[] | select(.name == "sample_link") | .range ) = "string"' $@

yq -i '(.slots.[] | select(.range == "string") | .multivalued ) = false' $@
Expand All @@ -208,9 +197,6 @@ src/nmdc_submission_schema/schema/nmdc_submission_schema.yaml: local/with_modifi
yq -i 'del(.classes.[].slot_usage.[] | select(.multivalued == "false").inlined)' $@
yq -i 'del(.classes.[].slot_usage.[] | select(.multivalued == "false").inlined_as_list)' $@

# yq -i '(.slots.[] | select(.name == "dna_dnase") | .range) = "boolean"' $@
# yq -i '(.classes.[].slot_usage.[] | select(.name == "dna_dnase") | .range) = "boolean"' $@

yq -i '(.slots.[] | select(.name == "oxy_stat_samp") | .range) = "OxyStatSampEnum"' $@
yq -i '(.classes.[].slot_usage.[] | select(.name == "oxy_stat_samp") | .range) = "OxyStatSampEnum"' $@

Expand All @@ -230,8 +216,6 @@ examples-clean:
examples/output/README.md: src/nmdc_submission_schema/schema/nmdc_submission_schema.yaml \
src/data/invalid src/data/valid
mkdir -p $(dir $@)
# RDF/TTL generation is failing
# https://github.com/microbiomedata/submission-schema/issues/13
$(RUN) linkml-run-examples \
--output-formats json \
--output-formats yaml \
Expand All @@ -256,20 +240,45 @@ dh-build: project/json/nmdc_submission_schema.json

###

## todo frozen content in src/data/data_harmonizer_io has been removed
## todo find a better home for the se scripts if they are still of any use
#src/data/data_harmonizer_io/soil_data.json: src/data/data_harmonizer_io/soil_for_linkml.json
# $(RUN) linkml-json2dh \
# --input-file $< \
# --output-dir $(dir $@)

local/usage_template.tsv: src/nmdc_submission_schema/schema/nmdc_submission_schema.yaml
mkdir -p $(@D)
$(RUN) generate_and_populate_template \
--base-class slot_definition \
--columns-to-insert class \
--columns-to-insert slot \
--destination-template $@ \
--meta-model-excel-file local/meta.xlsx \
--meta-path https://raw.githubusercontent.com/linkml/linkml-model/main/linkml_model/model/schema/meta.yaml \
--source-schema-path $<
## depends on an old version of schemasheets?
## could be replaced with https://github.com/linkml/schemasheets/blob/bdde85d74637ae116fb5fd64a2e47999a1aebdfb/pyproject.toml#L36C1-L36C20
#local/usage_template.tsv: src/nmdc_submission_schema/schema/nmdc_submission_schema.yaml
# mkdir -p $(@D)
# $(RUN) generate_and_populate_template \
# --base-class slot_definition \
# --columns-to-insert class \
# --columns-to-insert slot \
# --destination-template $@ \
# --meta-model-excel-file local/meta.xlsx \
# --meta-path https://raw.githubusercontent.com/linkml/linkml-model/main/linkml_model/model/schema/meta.yaml \
# --source-schema-path $<

local/soil-env-broad-scale-oak-only.txt:
$(RUN) runoak --input sqlite:obo:envo descendants --predicates i "terrestrial biome" > $@

local/soil-env-medium-oak-only.txt:
$(RUN) runoak --input sqlite:obo:envo info .desc//p=i "soil" .not .desc//p=i "enriched soil" > $@ # also subtract "soils whose differential could appear in the env_local_scale slot # https://incatools.github.io/ontology-access-kit/> $@ https://incatools.github.io/ontology-access-kit/howtos/use-oak-expression-language.html

local/soil-env-medium-oak-only-relations.tsv:
$(RUN) runoak --input sqlite:obo:envo relationships .desc//p=i "soil" .not .desc//p=i "enriched soil" > $@

local/soil-env-medium-oak-only-relations-no-sco.tsv: local/soil-env-medium-oak-only-relations.tsv
awk -F'\t' '$$3 != "rdfs:subClassOf"' $< > $@

local/soil-env-medium-oak-only-relations-no-sco-with-dist.tsv: local/soil-env-medium-oak-only-relations-no-sco.tsv
$(RUN) python tsv_text_diff.py \
--input-file $< \
--output-file $@ \
--left-column "subject_label" \
--right-column "object_label"

local/soil-env-medium-oak-only-relation-label-counts.txt: local/soil-env-medium-oak-only-relations.tsv
awk -F'\t' '{print $$4}' $< | sort | uniq -c | sort -nr > $@

local/soil-env-medium-oak-only-relation-id-counts.txt: local/soil-env-medium-oak-only-relations.tsv
awk -F'\t' '{print $$3}' $< | sort | uniq -c | sort -nr > $@
9 changes: 2 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@ include = [
[tool.poetry.dependencies]
python = "^3.9"
linkml-runtime = "^1.6.2"
textdistance = "^4.6.3"

[tool.poetry.group.dev.dependencies]
exhaustion-check = "^0.1.3"
linkml = "^1.7.10"
mkdocs-material = "^9.1.2"
mkdocs-mermaid2-plugin = "^0.6.0"
oaklib = "^0.5.28" # https://pypi.org/project/oaklib/0.5.6/
oaklib = "^0.5.28"
rdflib = "^6.2.0"
sheets-and-friends = "^0.5.4"

Expand All @@ -53,12 +54,6 @@ patterns = [
"(^version:\\s*['\\\"]?)[^'\\\"]*?(['\\\"]?)$"
]


## /Users/MAM/Documents/gitrepos/sheets_and_friends
## /home/mark/gitrepos/sheets_and_friends
#sheets-and-friends = { path = "/home/mark/gitrepos/sheets_and_friends", develop = true }


[build-system]
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"]
build-backend = "poetry_dynamic_versioning.backend"
Expand Down
Loading