From dfac32e28508f75e375223a2952b320ee4456754 Mon Sep 17 00:00:00 2001 From: Will Dumm Date: Thu, 1 Dec 2022 14:24:00 -0800 Subject: [PATCH] docs improvements (#49) * fix references and api index * try again * address comments from #48 --- docs/api.rst | 40 +++++++++++++++++++++++++++-- docs/conf.py | 1 + historydag/__init__.py | 1 + historydag/dag.py | 58 ++++++++++++++++++++++-------------------- historydag/utils.py | 7 +++-- 5 files changed, 76 insertions(+), 31 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 16ceb02..69564d2 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,16 +1,47 @@ .. currentmodule:: historydag -Python API for the ``historydag`` package +Description +----------- + +This page documents the API for the ``historydag`` package. +The fundamental data structure implemented here is the :class:`HistoryDag`. +This data structure provides efficient storage for collections of trees with +internal node labels. In this package, we refer to such a tree as a _history_. +A history can be represented as a tree-shaped :class:`HistoryDag` object. + +This package provides functions for: + +* creating histories from tree data, +* merging histories together to create history DAGs, +* doing efficient computation on collections of histories stored in history DAGs, and +* accessing histories contained in a history DAG, and exporting them to other + tree formats. Classes ------- -Top level classes. +Top level classes, promoted from the ``dag`` module. .. autosummary:: :toctree: stubs HistoryDag + HistoryDagNode + +Functions +--------- + +Top level functions, promoted from the ``dag`` module. + +.. autosummary:: + :toctree: stubs + + from_tree + empty_node + from_newick + history_dag_from_newicks + history_dag_from_etes + history_dag_from_histories Modules ------- @@ -18,5 +49,10 @@ Modules .. autosummary:: :toctree: stubs + dag + sequence_dag + mutation_annotated_dag utils + parsimony + compact_genome counterops diff --git a/docs/conf.py b/docs/conf.py index 3d5d33c..8636d50 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -69,6 +69,7 @@ False # Remove 'view source code' from top of page (for html, not python) ) autodoc_inherit_docstrings = True # If no class summary, inherit base class summary +autodoc_preserve_defaults = True # default arguments are not evaluated, but rendered as in code autodoc_default_options = { "members": True, diff --git a/historydag/__init__.py b/historydag/__init__.py index 9027aae..db18e9b 100644 --- a/historydag/__init__.py +++ b/historydag/__init__.py @@ -1,3 +1,4 @@ +# Remember to add any additional functions/modules to docs/api.rst from .dag import ( # noqa HistoryDag, HistoryDagNode, diff --git a/historydag/dag.py b/historydag/dag.py index 2a20a79..75678b4 100644 --- a/historydag/dag.py +++ b/historydag/dag.py @@ -409,14 +409,15 @@ def from_history_dag( Returns: The converted HistoryDag object, carrying the type from which this static method was called. After conversion to the new HistoryDag subclass ``to_cls``, the following will be true about node labels: - * If passed ``label_fields`` is None, then existing label fields will be preserved, except that missing - required label fields will be recovered if possible, and the existing label fields used to recover - them will be omitted. Recovered label fields will appear before the existing label fields. - * If passed ``label_fields`` is not None, then it must include all fields expected in node labels - in the converted history DAG object, otherwise an exception will be raised. - * Converted node label field order will match the order of passed ``label_fields``. - * All label fields passed in ``label_fields`` will be included - in converted node labels, if possible. Otherwise, an exception will be raised. + + * If passed ``label_fields`` is None, then existing label fields will be preserved, except that missing + required label fields will be recovered if possible, and the existing label fields used to recover + them will be omitted. Recovered label fields will appear before the existing label fields. + * If passed ``label_fields`` is not None, then it must include all fields expected in node labels + in the converted history DAG object, otherwise an exception will be raised. + * Converted node label field order will match the order of passed ``label_fields``. + * All label fields passed in ``label_fields`` will be included + in converted node labels, if possible. Otherwise, an exception will be raised. """ if label_fields is not None: label_fields = list(label_fields) @@ -526,8 +527,8 @@ def __eq__(self, other: object) -> bool: raise NotImplementedError def __getitem__(self, key) -> "HistoryDag": - r"""Returns the sub-history below the current history dag corresponding - to the given index.""" + r"""Returns the history (tree-shaped sub-history DAG) in the current + history dag corresponding to the given index.""" length = self.count_histories() if key < 0: key = length + key @@ -803,10 +804,10 @@ def serialize(self) -> bytes: return pickle.dumps(self.__getstate__()) def get_histories(self) -> Generator["HistoryDag", None, None]: - """Return a generator containing all internally labeled trees in the - history DAG. + """Return a generator containing all histories in the history DAG. - Note that each history is a history DAG, containing a UA node. + Note that each history is a tree-shaped history DAG, containing a UA node, + which exists as a subgraph of the history DAG. The order of these histories does not necessarily match the order of indexing. That is, ``dag.get_histories()`` and ``history for history in @@ -1060,7 +1061,7 @@ def merge(self, trees: Union["HistoryDag", Sequence["HistoryDag"]]): pnode.add_edge(nodedict[child], weight=weight) def add_all_allowed_edges(self, *args, **kwargs) -> int: - """Provided as a deprecated synonym for :meth:``make_complete``.""" + """Provided as a deprecated synonym for :meth:`make_complete`.""" return self.make_complete(*args, **kwargs) def make_complete( @@ -1539,7 +1540,7 @@ def default_accum_above_edge(subtree_weight, edge_weight): return self.dagroot._dp_data def postorder_cladetree_accum(self, *args, **kwargs) -> Weight: - """Deprecated name for :meth:`postorder_history_accum`""" + """Deprecated name for :meth:`HistoryDag.postorder_history_accum`""" return self.postorder_history_accum(*args, **kwargs) def optimal_weight_annotate( @@ -1632,7 +1633,7 @@ def count_topologies(self, collapse_leaves: bool = False) -> int: :meth:`count_histories` gives the total number of unique trees in the DAG, taking into account internal node labels. - For large DAGs, this method is prohibitively slow. Use :meth:``count_topologies_fast`` instead. + For large DAGs, this method is prohibitively slow. Use :meth:`count_topologies_fast` instead. Args: collapse_leaves: By default, topologies are counted as-is in the DAG. However, @@ -1870,9 +1871,10 @@ def weight_counts_with_ambiguities( expand_func: Callable[[Label], Iterable[Label]] = utils.sequence_resolutions, ): r"""Template method for counting tree weights in the DAG, with exploded - labels. Like :meth:`weight_counts`, but creates dictionaries of Counter - objects at each node, keyed by possible sequences at that node. - Analogous to :meth:`count_histories` with `expand_func` provided. + labels. Like :meth:`HistoryDag.weight_count`, but creates dictionaries + of Counter objects at each node, keyed by possible sequences at that + node. Analogous to :meth:`HistoryDag.count_histories` with + `expand_func` provided. Weights must be hashable. @@ -1954,7 +1956,7 @@ def optimal_rf_distance( The given history must be on the same taxa as all trees in the DAG. Since computing reference splits is expensive, it is better to use - :meth:``optimal_weight_annotate`` and :meth:``utils.make_rfdistance_countfuncs`` + :meth:`optimal_weight_annotate` and :meth:`utils.make_rfdistance_countfuncs` instead of making multiple calls to this method with the same reference history. """ @@ -1967,7 +1969,7 @@ def count_rf_distances(self, history: "HistoryDag", rooted: bool = False): The given history must be on the same taxa as all trees in the DAG. Since computing reference splits is expensive, it is better to use - :meth:``weight_count`` and :meth:``utils.make_rfdistance_countfuncs`` + :meth:`weight_count` and :meth:`utils.make_rfdistance_countfuncs` instead of making multiple calls to this method with the same reference history. """ @@ -2234,15 +2236,17 @@ def insert_node( [HistoryDagNode, HistoryDagNode], Weight ] = utils.wrapped_hamming_distance, ): - """Inserts a sequence into the DAG as a child of the dagnode(s) - realizing the minimum overall distance between sequences, and then adds - the same new sequence to the dag as a child of other nodes in such a - way as to guarantee that every tree in the DAG now contains the new - sequence. + """Inserts a sequence into the DAG. + + Sequence will be inserted as a child of the dagnode(s) + realizing the minimum overall distance between sequences, and also added + to the dag as a child of other nodes in such a way as to guarantee + that every tree in the DAG now contains the new sequence. The choice of other nodes is computed by looking at the set of nodes that are `incompatible` with the first minimizing node. - For a full description of this, see :meth: `incompatible`. + For a full description of this, see the docstring for the method-local + function ``incompatible``. """ # all nodes in the dag except for the UA postorder = list(self.postorder())[:-1] diff --git a/historydag/utils.py b/historydag/utils.py index c77d94e..c27cb16 100644 --- a/historydag/utils.py +++ b/historydag/utils.py @@ -421,9 +421,12 @@ def wrapper(weighttuplelist: List[Weight]): def make_rfdistance_countfuncs(ref_tree: "HistoryDag", rooted: bool = False): - """Provides functions to compute RF distances of trees in a DAG, relative + """Provides functions to compute Robinson-Foulds (RF) distances of trees in a DAG, relative to a fixed reference tree. + We use :meth:`ete3.TreeNode.robinson_foulds` as the reference implementation for both + rooted and unrooted RF distance. + Args: ref_tree: A tree with respect to which Robinson-Foulds distance will be computed. rooted: If False, use edges' splits for RF distance computation. Otherwise, use @@ -434,7 +437,7 @@ def make_rfdistance_countfuncs(ref_tree: "HistoryDag", rooted: bool = False): This calculation relies on the observation that the symmetric distance between the splits A in a tree in the DAG, and the splits B in the reference tree, can be computed as: - |A ^ B| = |A U B| - |A n B| = |A - B| + |B| - |A n B| + ``|A ^ B| = |A U B| - |A n B| = |A - B| + |B| - |A n B|`` As long as tree edges are in bijection with splits, this can be computed without constructing the set A by considering each edge's split independently.