Skip to content

Commit

Permalink
Merge pull request #23 from pepkit/dev
Browse files Browse the repository at this point in the history
v0.1.3
  • Loading branch information
stolarczyk authored Oct 7, 2020
2 parents 45092ac + 3372416 commit d63aa77
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 20 deletions.
5 changes: 5 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

## [0.1.3] - 2020-10-07
### Changed
- `validate_inputs` function now returns a dictionary with validation data, i.e missing, required_inputs, all_inputs, input_file_size rather than a list of missing files
- `validate_inputs` function does not modify `Sample` objects

## [0.1.2] - 2020-08-06
### Added
- license in the package source distribution
Expand Down
2 changes: 1 addition & 1 deletion eido/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.2"
__version__ = "0.1.3"
14 changes: 14 additions & 0 deletions eido/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,17 @@
PROP_KEY = "properties"
REQUIRED_FILES_KEY = "required_files"
FILES_KEY = "files"

# sample schema input validation key names, these values are required by looper
# to refer to the dict values
MISSING_KEY = "missing"
REQUIRED_INPUTS_KEY = "required_inputs"
ALL_INPUTS_KEY = "all_inputs"
INPUT_FILE_SIZE_KEY = "input_file_size"

# groups of constants
GENERAL = ["LOGGING_LEVEL", "PKG_NAME", "INSPECT_CMD", "VALIDATE_CMD", "SUBPARSER_MSGS"]
SCHEMA_SECTIONS = ["PROP_KEY", "REQUIRED_FILES_KEY", "FILES_KEY"]
SCHEMA_VALIDAION_KEYS = ["MISSING_KEY", "REQUIRED_INPUTS_KEY", "ALL_INPUTS_KEY", "INPUT_FILE_SIZE_KEY"]

__all__ = GENERAL + SCHEMA_SECTIONS + SCHEMA_VALIDAION_KEYS
54 changes: 38 additions & 16 deletions eido/eido.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
from copy import deepcopy as dpcpy
from warnings import catch_warnings as cw
from pandas.core.common import flatten

from logmuse import init_logger
from ubiquerg import VersionInHelpParser, size
Expand Down Expand Up @@ -234,29 +235,50 @@ def validate_inputs(sample, schema):
:param peppy.Sample sample: sample to investigate
:param dict schema: schema dict to validate against
:return list[str]: list of missing required file paths,
empty if all exist.
:return dict: dictionary with validation data, i.e missing,
required_inputs, all_inputs, input_file_size
"""
sample.all_inputs = set()
sample.required_inputs = set()
def _get_attr_values(obj, attrlist):
"""
Get value corresponding to each given attribute.
:param Mapping obj: an object to get the attributes from
:param str | Iterable[str] attrlist: names of attributes to
retrieve values for
:return dict: value corresponding to
each named attribute; null if this Sample's value for the
attribute given by the argument to the "attrlist" parameter is
empty/null, or if this Sample lacks the indicated attribute
"""
# If attribute is None, then value is also None.
if not attrlist:
return None
if not isinstance(attrlist, list):
attrlist = [attrlist]
# Strings contained here are appended later so shouldn't be null.
return list(flatten([getattr(obj, attr, "") for attr in attrlist]))

all_inputs = set()
required_inputs = set()
schema = schema[-1] # use only first schema, in case there are imports
sample_schema_dict = schema["properties"]["samples"]["items"]
if FILES_KEY in sample_schema_dict:
sample[FILES_KEY] = sample_schema_dict[FILES_KEY]
sample.all_inputs.update(sample.get_attr_values(sample[FILES_KEY]))
all_inputs.update(
_get_attr_values(sample, sample_schema_dict[FILES_KEY]))
if REQUIRED_FILES_KEY in sample_schema_dict:
sample[REQUIRED_FILES_KEY] = sample_schema_dict[REQUIRED_FILES_KEY]
sample.required_inputs = \
sample.get_attr_values(sample[REQUIRED_FILES_KEY])
sample.all_inputs.update(sample.required_inputs)
required_inputs = set(_get_attr_values(
sample, sample_schema_dict[REQUIRED_FILES_KEY]))
all_inputs.update(required_inputs)
with cw(record=True) as w:
sample.input_file_size = \
sum([size(f, size_str=False) or 0.0
for f in sample.all_inputs if f != ""])/(1024 ** 3)
input_file_size = sum([size(f, size_str=False) or 0.0
for f in all_inputs if f != ""])/(1024 ** 3)
if w:
_LOGGER.warning("{} input files missing, job input size was not"
" calculated accurately".format(len(w)))
return [i for i in sample.required_inputs if not os.path.exists(i)]
_LOGGER.warning(f"{len(w)} input files missing, job input size was "
f"not calculated accurately")

return {MISSING_KEY: [i for i in required_inputs if not os.path.exists(i)],
REQUIRED_INPUTS_KEY: required_inputs, ALL_INPUTS_KEY: all_inputs,
INPUT_FILE_SIZE_KEY: input_file_size}


def inspect_project(p, sample_names=None, max_attr=10):
Expand Down
3 changes: 2 additions & 1 deletion requirements/requirements-all.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ubiquerg>=0.5.2
logmuse>=0.2.5
jsonschema>=3.0.1
peppy>=0.30.1
peppy>=0.31.0
yacman>=0.6.7
pandas
2 changes: 0 additions & 2 deletions requirements/requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
mock>=2.0.0
pytest>=3.0.7
1 change: 1 addition & 0 deletions requirements/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mock>=2.0.0
pytest>=4.6.9
pytest-cov>=2.8.1
coveralls

0 comments on commit d63aa77

Please sign in to comment.