Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

numpydoc style(signac/contrib/schema.py) #318

Merged
merged 13 commits into from
Apr 22, 2020
4 changes: 1 addition & 3 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,9 @@ def __getattr__(cls, name):
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.coverage',
#'sphinx.ext.viewcode',
'sphinx.ext.intersphinx',
'sphinx.ext.mathjax',
'nbsphinx',
'sphinx.ext.napoleon',
'IPython.sphinxext.ipython_console_highlighting',
'sphinx.ext.autosummary',
]
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ max-line-length = 100
exclude = configobj,passlib,cite.py,conf.py

[pydocstyle]
match = jsondict.py
ignore = D105, D107, D203, D213
match = (jsondict|schema).py
ignore = D105, D107, D203, D204, D213

[coverage:run]
source = signac
Expand Down
2 changes: 1 addition & 1 deletion signac/contrib/import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _make_schema_based_path_function(jobs, exclude_keys=None, delimiter_nested='
return lambda job, sep=None: ''

index = [{'_id': job._id, 'statepoint': job.sp()} for job in jobs]
jsi = _build_job_statepoint_index(jobs=jobs, exclude_const=True, index=index)
jsi = _build_job_statepoint_index(exclude_const=True, index=index)
sp_index = OrderedDict(jsi)

paths = dict()
Expand Down
6 changes: 2 additions & 4 deletions signac/contrib/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,7 @@ def build_job_statepoint_index(self, exclude_const=False, index=None):
from .schema import _build_job_statepoint_index
if index is None:
index = [{'_id': job._id, 'statepoint': job.sp()} for job in self]
for x, y in _build_job_statepoint_index(
jobs=self, exclude_const=exclude_const, index=index):
for x, y in _build_job_statepoint_index(exclude_const=exclude_const, index=index):
yield tuple(x.split('.')), y

def detect_schema(self, exclude_const=False, subset=None, index=None):
Expand All @@ -578,8 +577,7 @@ def detect_schema(self, exclude_const=False, subset=None, index=None):
if subset is not None:
subset = {str(s) for s in subset}
index = [doc for doc in index if doc['_id'] in subset]
statepoint_index = _build_job_statepoint_index(
jobs=self, exclude_const=exclude_const, index=index)
statepoint_index = _build_job_statepoint_index(exclude_const=exclude_const, index=index)
return ProjectSchema.detect(statepoint_index)

@deprecated(deprecated_in="1.3", removed_in="2.0", current_version=__version__,
Expand Down
186 changes: 165 additions & 21 deletions signac/contrib/schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (c) 2017 The Regents of the University of Michigan
# All rights reserved.
# This software is licensed under the BSD 3-Clause License.
"""Project Schema."""
ac-optimus marked this conversation as resolved.
Show resolved Hide resolved

from pprint import pformat
from collections import defaultdict as ddict
from numbers import Number
Expand All @@ -9,19 +11,55 @@


class _Vividict(dict):
"""A dict that returns an empty _Vividict for keys that are missing.

Useful for automatically nesting keys with ``vividict['a']['b']['c'] = 'd'``.

"""
def __missing__(self, key):
value = self[key] = type(self)()
return value


def _collect_by_type(values):
"""Construct a mapping of types to a set of elements drawn from the input values.

Parameters
----------
values :
An iterable of values.

Returns
-------
defaultdict(set)
A mapping of types to a set of input values of that type.

"""
values_by_type = ddict(set)
for v in values:
values_by_type[type(v)].add(v)
return values_by_type


def _build_job_statepoint_index(jobs, exclude_const, index):
def _build_job_statepoint_index(exclude_const, index):
"""Build index for job state points.

Parameters
----------
exclude_const : bool
Excludes state point keys whose values are constant across the index.
index :
An iterable of state points.

Yields
------
statepoint_key : str
State point key.
statepoint_values : dict
Dictionary mapping from a state point value to the set of job ids
with that state point value.

"""
from .collection import Collection
from .collection import _DictPlaceholder
from .utility import _nested_dicts_to_dotted_keys
Expand All @@ -36,58 +74,122 @@ def _build_job_statepoint_index(jobs, exclude_const, index):
def strip_prefix(key): return k[len('statepoint.'):]

def remove_dict_placeholder(x):
"""Remove _DictPlaceholder elements from a mapping.

Parameters
----------
x :
Dictionary from which ``_DictPlaceholder`` values will be removed.

Returns
-------
dict
Dictionary with ``_DictPlaceholder`` keys removed.

ac-optimus marked this conversation as resolved.
Show resolved Hide resolved
"""
return {k: v for k, v in x.items() if k is not _DictPlaceholder}

for k in sorted(tmp, key=lambda k: (len(tmp[k]), k)):
if exclude_const and len(tmp[k]) == 1 \
and len(tmp[k][list(tmp[k].keys())[0]]) == len(collection):
continue
yield strip_prefix(k), remove_dict_placeholder(tmp[k])
statepoint_key = strip_prefix(k)
statepoint_values = remove_dict_placeholder(tmp[k])
yield statepoint_key, statepoint_values


class ProjectSchema(object):
"A description of a project's state point schema."
"""A description of a project's state point schema.

Parameters
----------
schema : dict
Project schema.

"""
def __init__(self, schema=None):
if schema is None:
schema = dict()
self._schema = schema

@classmethod
def detect(cls, statepoint_index):
"""Detect Project's state point schema.

Parameters
----------
statepoint_index :
State point index.

Returns
-------
:class:`~ProjectSchema`
The detected project schema.

"""
return cls({k: _collect_by_type(v) for k, v in statepoint_index})

def format(self, depth=None, precision=None, max_num_range=None):
"""Format the schema for printing.

:param depth:
Parameters
----------
depth : int
A non-zero value will return a nested formatting up to the specified depth,
defaults to 0.
:type depth:
int
:param precision:
Round numerical values up the give precision, defaults to unlimited precision.
:type precision:
int
:param max_num_range:
precision : int
Round numerical values to the given precision, defaults to unlimited precision.
max_num_range : int
The maximum number of entries shown for a value range, defaults to 5.
:type max_num_range:
int
:returns:

Returns
-------
str
A formatted representation of the project schema.

"""
if depth is None:
depth = 0
if max_num_range is None:
max_num_range = 5

def _fmt_value(x):
"""Convert a value to a string, rounded to a given precision.

Parameters
----------
x :
Value to convert to string.

Returns
-------
str
Formatted value.

ac-optimus marked this conversation as resolved.
Show resolved Hide resolved
"""
if precision is not None and isinstance(x, Number):
return str(round(x, precision))
else:
return str(x)

def _fmt_range(type_, values):
"""Convert sequence of values into a comma-separated string.

Inserts an ellipsis (...) if the number of values exceeds ``max_num_range``.

Parameters
----------
type_ : type
Type of values.
values :
An iterable of values.

Returns
-------
str
Comma-separated string of the input values.

"""
try:
sorted_values = sorted(values)
except TypeError:
Expand All @@ -104,6 +206,19 @@ def _fmt_range(type_, values):
type_name=type_.__name__, values_string=values_string, length=len(values))

def _fmt_values(values):
"""Convert values into a single string.

Parameters
----------
values :
An iterable of values.

Returns
-------
str
Comma-separated string of the input values.

"""
return ', '.join(_fmt_range(*v) for v in values.items())

if depth > 0:
Expand Down Expand Up @@ -151,35 +266,64 @@ def __iter__(self):
return iter(self._schema)

def keys(self):
"""Return schema keys."""
return self._schema.keys()

def values(self):
"""Return schema values."""
return self._schema.values()

def items(self):
"""Return schema items."""
return self._schema.items()

def __eq__(self, other):
"""Check if two schemas are the same.

Returns
-------
bool
True if both schemas have the same keys and values.

"""
return self._schema == other._schema

def difference(self, other, ignore_values=False):
"""Determine the difference between this and another project schema.

:param ignore_values:
Ignore if the value (range) of a specific keys differ, only return missing keys.
:type ignore_values:
bool
:returns:
Parameters
----------
ignore_values : bool
Ignore if the value (range) of a specific keys differ,
only return missing keys (Default value = False).
other : :class:`~ProjectSchema`
Other project schema.

Returns
-------
set
A set of key tuples that are either missing or different in the other schema.
"""

"""
ret = set(self.keys()).difference(other.keys())
if not ignore_values:
ret.update({k for k, v in self.items() if k in other and other[k] != v})
return ret

def __call__(self, jobs_or_statepoints):
"Evaluate the schema for the given state points."
"""Evaluate the schema for the given state points.

Parameters
----------
jobs_or_statepoints :
An iterable of jobs or state points.

Returns
-------
:class:`~ProjectSchema`
Schema of the project.

"""
s = dict()
iterators = itertools.tee(jobs_or_statepoints, len(self))
for key, it in zip(self, iterators):
Expand Down