diff --git a/.binder/environment.yml b/.binder/environment.yml index 748806a8..420d73a4 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -1,4 +1,17 @@ channels: - conda-forge dependencies: -- pyiron_base +- coveralls +- coverage +- bidict =0.22.1 +- cloudpickle =2.2.1 +- graphviz =8.1.0 +- maggma =0.57.1 +- matplotlib =3.8.0 +- numpy =1.26.0 +- pyiron_atomistics =0.3.4 +- pyiron_base =0.6.7 +- python-graphviz =0.20.1 +- toposort =1.10 +- typeguard =4.1.5 +- lammps diff --git a/.binder/postBuild b/.binder/postBuild index aa97083d..c63d4551 100644 --- a/.binder/postBuild +++ b/.binder/postBuild @@ -1,20 +1,20 @@ # pip install master pip install --no-deps . -# Note: This produces build and pyiron_module_template.egg-info directories +# Note: This produces build and pyiron_workflow.egg-info directories # clean up if [ -d "notebooks" ]; then mv notebooks/* . fi -if [ -d "${HOME}/pyiron_module_template" ]; then +if [ -d "${HOME}/pyiron_workflow" ]; then rm -r ${HOME}/.binder \ ${HOME}/.ci_support \ ${HOME}/.github \ ${HOME}/build \ ${HOME}/docs \ ${HOME}/notebooks \ - ${HOME}/pyiron_module_template \ - ${HOME}/pyiron_module_template.egg-info \ + ${HOME}/pyiron_workflow \ + ${HOME}/pyiron_workflow.egg-info \ ${HOME}/tests \ ${HOME}/.gitattributes \ ${HOME}/.gitignore \ diff --git a/.ci_support/environment-notebooks.yml b/.ci_support/environment-notebooks.yml new file mode 100644 index 00000000..4530a97b --- /dev/null +++ b/.ci_support/environment-notebooks.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge +dependencies: + - lammps \ No newline at end of file diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml index cf17d456..2923d03a 100644 --- a/.ci_support/environment.yml +++ b/.ci_support/environment.yml @@ -1,5 +1,16 @@ channels: - conda-forge dependencies: - - pyiron_base - + - coveralls + - coverage + - bidict =0.22.1 + - cloudpickle =2.2.1 + - graphviz =8.1.0 + - maggma =0.57.1 + - matplotlib =3.8.0 + - numpy =1.26.0 + - pyiron_atomistics =0.3.4 + - pyiron_base =0.6.7 + - python-graphviz =0.20.1 + - toposort =1.10 + - typeguard =4.1.5 diff --git a/.coveragerc b/.coveragerc index 0176c43b..49adb486 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,5 +1,5 @@ # .coveragerc to control coverage.py [run] -source = pyiron_module_template -omit = pyiron_module_template/_version.py +source = pyiron_workflow +omit = pyiron_workflow/_version.py concurrency = multiprocessing \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index 84b63df1..73877b21 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -pyiron_module_template/_version.py export-subst +pyiron_workflow/_version.py export-subst diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 3a29cab1..1ac44be0 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,6 +1,6 @@ --- name: Bug report -about: Create a bug report to help us eliminate issues and improve pyiron_module_template +about: Create a bug report to help us eliminate issues and improve pyiron_workflow title: '' labels: bug assignees: '' diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index f96aa7b6..de4fd986 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,6 +1,6 @@ --- name: Feature request -about: Make a suggestion for a new feature or a change to pyiron_module_template +about: Make a suggestion for a new feature or a change to pyiron_workflow title: '' labels: enhancement assignees: '' @@ -13,7 +13,7 @@ assignees: '' **Detailed Description** - + **Further Information, Files, and Links** diff --git a/.github/delete-merged-branch-config.yml b/.github/delete-merged-branch-config.yml index 8a1c49b7..76898c70 100644 --- a/.github/delete-merged-branch-config.yml +++ b/.github/delete-merged-branch-config.yml @@ -1,3 +1,3 @@ exclude: - - master + - main delete_closed_pr: false diff --git a/.github/workflows/push-pull-main.yml b/.github/workflows/push-pull.yml similarity index 58% rename from .github/workflows/push-pull-main.yml rename to .github/workflows/push-pull.yml index 0ce15dab..9c3da711 100644 --- a/.github/workflows/push-pull-main.yml +++ b/.github/workflows/push-pull.yml @@ -1,14 +1,15 @@ # This runs jobs which pyiron modules should run on pushes or PRs to main -name: Push-Pull-main +name: Push-main-Pull-all on: push: branches: [ main ] pull_request: - branches: [ main ] jobs: pyiron: uses: pyiron/actions/.github/workflows/push-pull-main.yml@main - secrets: inherit \ No newline at end of file + secrets: inherit + with: + notebooks-env-files: .ci_support/environment.yml .ci_support/environment-notebooks.yml \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 71f7e8d9..3c96ce48 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ include versioneer.py -include pyiron_module_template/_version.py +include pyiron_workflow/_version.py include LICENSE diff --git a/README.md b/README.md index c7ee7082..01cdfb62 100644 --- a/README.md +++ b/README.md @@ -1,88 +1,31 @@ -# pyiron_module_template +# pyiron_workflow -## Overview - -This repository is a template for new pyiron modules similar to the existing modules of the -pyiron framework, e.g. -[pyiron_base](https://github.com/pyiron/pyiron_base), -[pyiron_atomistics](https://github.com/pyiron/pyiron_atomistics), -and -[pyiron_contrib](https://github.com/pyiron/pyiron_contrib). - -Within this repository, the new module is called `pyiron_module_template` which should be renamed to `pyiron_IntendedModuleName`. -This can be easily achieved by modifying and running the update_module_name.sh script. - -The licence is free to choose, but as a default the BSD3 licence packed here. -## Continuous Integration +## Overview -We collect all files relevant for the continuous integration (CI) pipelines in `.ci_support`, -while the actual CI workflows are handled by GitHub and stored in `.github`. -If you are cloning this template *inside* the pyiron GitHub organization, the full CI should work out-of-the-box by calling reusable workflows from [pyiron/actions](github.com/pyiron/actions) and inheriting organization-wide secrets. -Otherwise, you will either need to modify the CI workflow files, or give your repository the following secrets: -- `DEPENDABOT_WORKFLOW_TOKEN` (GitHub token for an account that has permissions to your repository -- needs to differ from the default `github_token` already available though! In pyiron we have a special [@pyiron_runner account](https://github.com/pyiron-runner) for this purpose.) -- `PYPI_PASSWORD` (Token generated on PyPi to give access to your account there) -- `CODACY_PROJECT_TOKEN` (Token generated on Codacy to give access to your account there) +This repository is home to the pyiron code for structuring workflows as graph objects, with different computational elements as nodes and data and execution signals travelling along edges. It is currently in an alpha state, changing quickly, and not yet feature-complete. -The default CI setup from [pyiron/actions](github.com/pyiron/actions) makes some assumptions about your directory structure. -The most important one is that your environment should be specified in `.ci_support/environment.yml`. -There is a base environment there already, giving dependence on `pyiron_base`. -The CI will automatically keep environment files read by readthedocs (which will look at `.readthedocs.yml`) and MyBinder (which looks in `.binder`) up-to-date based on this environment file. +## The absolute basics -In case you need extra environment files for some setups, you can modify the workflows in `.github/workflows`, which accept input variables for the docs, tests, and notebooks environments. -For example, it's typically good to not make your project depend on the `lammps` package, since this is not available for windows. -However, you might want to give some demo notebooks that run on MyBinder (a linux environment) and use LAMMPS calculations. -In this case, you could add a new file `.ci_support/environment-notebooks.yml`, and then edit `.github/workflows/push-pull-main.yml` so that instead of reading +`pyiron_workflow` offers a single-point-of-entry in the form of the `Workflow` object, and uses decorators to make it easy to turn regular python functions into "nodes" that can be put in a computation graph: -```yaml -jobs: - pyiron: - uses: pyiron/actions/.github/workflows/push-pull-main.yml@main - secrets: inherit - # All the environment files variables point to .ci_support/environment.yml by default -``` +```python +from pyiron_workflow import Workflow -It instead reads +@Workflow.wrap_as.function_node("sum") +def x_plus_y(x: int = 0, y: int = 0) -> int: + return x + y -```yaml -jobs: - pyiron: - uses: pyiron/actions/.github/workflows/push-pull-main.yml@main - secrets: inherit - with: - notebooks-env-files: .ci_support/environment.yml .ci_support/environment-notebooks.yml -``` +wf = Workflow("my_workflow") +wf.a1 = x_plus_y() +wf.a2 = x_plus_y() +wf.b = x_plus_y(x=wf.a1.outputs.sum, y=wf.a2.outputs.sum) -Where `.ci_support/environment-notebooks.yml` looks like: +out = wf(a1__x=0, a1__y=1, a2__x=2, a2__y=3) +out.b__sum +>>> 6 -```yaml -channels: - - conda-forge -dependencies: - - lammps +wf.draw() ``` -### Label-based CI - -Some CI triggers when labels get applied to a PR. -In a new repository, you will need to define these labels: -- `format_black`: Runs black analyis and creates a bot-generated commit to fix any format violations -- `run_CodeQL`: Runs the external CodeQL analysis (expensive, only do at the end) -- `run_coverage`: Run all the tests in `tests` and use coveralls to generate a coverage report (also expensive, only run near the end of your PR) - -## Documentation - -You should modify this README to reflect the purpose of your new package. -You can look at the other pyiron modules to get a hint for what sort of information to include, and how to link badges at the head of your README file. - -At a minimum, we suggest creating a meaningful example notebook in the `notebooks/` directory and creating a MyBinder badge so that people can quickly and easily explore your work. - -You can also edit the docs for your package by modifying `docs/index.rst`. -By default, only a simple API section is included. - -## Publishing your package - -If you are inside the pyiron organization or have your own `PYPI_PASSWORD` secret configured, your package will be published on PyPI automatically when you make a new "release" on GitHub -- *as long as* that tag matches the pattern specified in `setup.cfg`; by default any tag that `pyiron_module_template-`, where `pyiron_module_template` is replaced with the name of your module. We recommend using semantic versioning so that your first release looks like `pyiron_module_template-0.0.1`. - -Releasing your package on Conda-Forge is slightly more involved, but not too hard (at least for pure python packages). -See [conda-forge/staged-recipes](https://github.com/conda-forge/staged-recipes) for how to publish it there. +![](docs/_static/demo.png) \ No newline at end of file diff --git a/docs/_static/demo.png b/docs/_static/demo.png new file mode 100644 index 00000000..60cf5223 Binary files /dev/null and b/docs/_static/demo.png differ diff --git a/docs/conf.py b/docs/conf.py index fbf341e3..4968c2dd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -275,10 +275,10 @@ # dir menu entry, description, category) texinfo_documents = [ ('index', - 'pyiron_module_template', + 'pyiron_workflow', u'pyiron Documentation', u'Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department', - 'pyiron_module_template', + 'pyiron_workflow', 'One line description of project.', 'Miscellaneous'), ] @@ -295,7 +295,7 @@ # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False -main(['-e', '-o', 'apidoc', '../pyiron_module_template', '--force']) +main(['-e', '-o', 'apidoc', '../pyiron_workflow', '--force']) curdir = os.path.dirname(os.path.abspath(__file__)) if os.path.exists(os.path.join(curdir, 'source/notebooks')): diff --git a/docs/environment.yml b/docs/environment.yml index 5e6bde44..2f7d13e2 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -3,4 +3,16 @@ channels: dependencies: - ipykernel - nbsphinx -- pyiron_base +- coveralls +- coverage +- bidict =0.22.1 +- cloudpickle =2.2.1 +- graphviz =8.1.0 +- maggma =0.57.1 +- matplotlib =3.8.0 +- numpy =1.26.0 +- pyiron_atomistics =0.3.4 +- pyiron_base =0.6.7 +- python-graphviz =0.20.1 +- toposort =1.10 +- typeguard =4.1.5 diff --git a/docs/index.rst b/docs/index.rst index 1170c406..05b6a5f6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,11 +1,11 @@ -.. pyiron_module_template documentation master file +.. pyiron_workflow documentation master file .. _index: -====================== -pyiron_module_template -====================== +=============== +pyiron_workflow +=============== .. toctree:: :hidden: diff --git a/notebooks/version.ipynb b/notebooks/version.ipynb deleted file mode 100644 index f5956d83..00000000 --- a/notebooks/version.ipynb +++ /dev/null @@ -1,36 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "52a8cfcc", - "metadata": {}, - "outputs": [], - "source": [ - "import pyiron_module_template\n", - "print(pyiron_module_template.__version__)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/workflow_example.ipynb b/notebooks/workflow_example.ipynb new file mode 100644 index 00000000..8d21f40c --- /dev/null +++ b/notebooks/workflow_example.ipynb @@ -0,0 +1,3106 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5edfe456-c5b8-4347-a74f-1fb19fdff91b", + "metadata": {}, + "source": [ + "# Pyiron workflows: Introduction and Syntax\n", + "\n", + "Here we will highlight:\n", + "- How to instantiate a node\n", + "- How to make reusable node classes\n", + "- How to connect node inputs and outputs together\n", + "- SingleValue nodes and syntactic sugar\n", + "- Workflows: keeping your computational graphs organized\n", + "- Using pre-defined nodes \n", + "- Macro nodes" + ] + }, + { + "cell_type": "markdown", + "id": "f4e75528-3ea7-4feb-8167-533d439f798d", + "metadata": {}, + "source": [ + "## Instantiating a node\n", + "\n", + "Simple nodes can be defined on-the-fly by passing any callable to the `Function(Node)` class. This transforms the function into a node instance which has input and output, can be connected to other nodes in a workflow, and can run the function it stores.\n", + "\n", + "Input and output channels are _automatically_ extracted from the signature and return value(s) of the function. (Note: \"Nodized\" functions must have _at most_ one `return` expression!)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8aca3b9b-9ba6-497a-ba9e-abdb15a6a5df", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "89ec887909114967be06c171de9e83c6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pyiron_workflow.function import Function" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2502bc6b-d961-43d1-b2d9-66b20e2740d7", + "metadata": {}, + "outputs": [], + "source": [ + "def plus_minus_one(x):\n", + " return x+1, x-1\n", + "\n", + "pm_node = Function(plus_minus_one)" + ] + }, + { + "cell_type": "markdown", + "id": "5d15f0c2-b36d-4960-86b3-40d769f78528", + "metadata": {}, + "source": [ + "This has automatically created a node with input and output data channels whose labels are gathered by inspecting the function:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "840f4c07-4b21-4bcc-b15c-9847c6c1b048", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['x'] ['x+1', 'x-1']\n" + ] + } + ], + "source": [ + "print(pm_node.inputs.labels, pm_node.outputs.labels)" + ] + }, + { + "cell_type": "markdown", + "id": "22ee2a49-47d1-4cec-bb25-8441ea01faf7", + "metadata": {}, + "source": [ + "The output is still empty (`NotData`) because we haven't `run()` the node:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "613a90fa-66ed-49f8-ba8c-2f83a54253cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'x+1': , 'x-1': }\n" + ] + } + ], + "source": [ + "print(pm_node.outputs.to_value_dict())" + ] + }, + { + "cell_type": "markdown", + "id": "0374e277-55ab-45d2-8058-b06365bd07af", + "metadata": {}, + "source": [ + "If we try that now though, we'll just get a type error because the input is not set! " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "05196cd8-97c7-4f08-ae3a-ad6a076512f7", + "metadata": {}, + "outputs": [], + "source": [ + "# pm_node.run()\n", + "pm_node.update()" + ] + }, + { + "cell_type": "markdown", + "id": "48b0db5a-548e-4195-8361-76763ddf0474", + "metadata": {}, + "source": [ + "Using the softer `update()` call checks to make sure the input is `ready` before moving on to `run()`, avoiding this error. In this case, `update()` sees we have no input an aborts by returning `None`.\n", + "\n", + "(Note: If you _do_ swap `update()` to `run()` in this cell, not only will you get the expected error, but `pm_node` will also set its `failed` attribute to `True` -- this will prevent it from being `ready` again until you manually reset `pm_node.failed = False`.)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b6c00a4e-0c39-4283-ac00-53c3d07f7f10", + "metadata": {}, + "outputs": [], + "source": [ + "# pm_node.failed = False" + ] + }, + { + "cell_type": "markdown", + "id": "84af4b04-79b4-4944-a4c9-131af915d254", + "metadata": {}, + "source": [ + "If we update the input, we'll give the node enough data to work with:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b1500a40-f4f2-4c06-ad78-aaebcf3e9a50", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'x+1': 6, 'x-1': 4}\n" + ] + } + ], + "source": [ + "pm_node.inputs.x = 5\n", + "pm_node.run()\n", + "print(pm_node.outputs.to_value_dict())" + ] + }, + { + "cell_type": "markdown", + "id": "c54a691e-a075-4d41-bc0f-3a990857a27a", + "metadata": {}, + "source": [ + "Alternatively, the `run()` command (and `update()` when it proceeds to execution) just return the function's return value:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e845843c-61f4-4e5c-ac1a-d005787c2841", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(6, 4)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = pm_node.run()\n", + "out" + ] + }, + { + "cell_type": "markdown", + "id": "df4520d7-856e-4bc8-817f-5b2e22c1ddce", + "metadata": {}, + "source": [ + "We can give our function defaults so that it's ready to go from the beginning. Let's also take the opportunity to give our output channel a better name so we can get it by dot-access." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ab1ac28a-6e69-491f-882f-da4a43162dd7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def adder(x: int = 0, y: int = 1) -> int:\n", + " sum_ = x + y\n", + " return sum_\n", + "\n", + "adder_node = Function(adder)\n", + "adder_node.run()\n", + "adder_node.outputs.sum_.value # We use `value` to see the data the channel holds" + ] + }, + { + "cell_type": "markdown", + "id": "58ed9b25-6dde-488d-9582-d49d405793c6", + "metadata": {}, + "source": [ + "This node also exploits type hinting! `run()` will always force the execution, but `update()` will not only check if the data is there, but also if it is the right type:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ac0fe993-6c82-48c8-a780-cbd0c97fc386", + "metadata": {}, + "outputs": [], + "source": [ + "adder_node.inputs.x = \"not an integer\"\n", + "adder_node.inputs.x.type_hint, type(adder_node.inputs.x.value)\n", + "adder_node.update()\n", + "# No error because the update doesn't trigger a run since the type hint is not satisfied" + ] + }, + { + "cell_type": "markdown", + "id": "2737de39-6e75-44e1-b751-6315afe5c676", + "metadata": {}, + "source": [ + "Since the execution never happened, the output is unchanged" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "bcbd17f1-a3e4-44f0-bde1-cbddc51c5d73", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adder_node.outputs.sum_.value" + ] + }, + { + "cell_type": "markdown", + "id": "263f5b24-113f-45d9-82cc-0475c59da587", + "metadata": {}, + "source": [ + "Note that assigning data to channels with `=` is actually just syntactic sugar for calling the `update` method of the underlying channel:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "15742a49-4c23-4d4a-84d9-9bf19677544c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adder_node.inputs.x.update(2)\n", + "adder_node.update()" + ] + }, + { + "cell_type": "markdown", + "id": "416ba898-21ee-4638-820f-0f04a98a6706", + "metadata": {}, + "source": [ + "We can also set new input as any valid combination of kwargs and/or args at both instantiation or on call:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "0c8f09a7-67c4-4c6c-a021-e3fea1a16576", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "30" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adder_node(10, y=20)\n", + "adder_node.run()" + ] + }, + { + "cell_type": "markdown", + "id": "c0997630-c053-42bb-8c0d-332f8bc26216", + "metadata": {}, + "source": [ + "Finally, we can update input and then `run` together by calling an already-instantiated node. Just like at node instantiation, the input for `Function` nodes can be set by positional and/or keyword argument. Here we'll use two positional args:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "69b59737-9e09-4b4b-a0e2-76a09de02c08", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "31" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adder_node(15, 16)" + ] + }, + { + "cell_type": "markdown", + "id": "f233f3f7-9576-4400-8e92-a1f6109d7f9b", + "metadata": {}, + "source": [ + "Note for advanced users: when the node has an executor set, running returns a futures object for the calculation, whose `.result()` will eventually be the function output." + ] + }, + { + "cell_type": "markdown", + "id": "07a22cee-e340-4551-bb81-07d8be1d152b", + "metadata": {}, + "source": [ + "## Reusable node classes\n", + "\n", + "If we're going to use a node many times, we may want to define a new sub-class of `Function` to handle this.\n", + "\n", + "The can be done directly by inheriting from `Function` and overriding it's `__init__` function so that the core functionality of the node (i.e. the node function and output labels) are set in stone, but even easier is to use the `function_node` decorator to do this for you! \n", + "\n", + "The decorator also lets us explicitly choose the names of our output channels by passing the `output_labels` argument to the decorator -- as a string to create a single channel for the returned values, or as a list of strings equal to the number of returned values in a returned tuple." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "61b43a9b-8dad-48b7-9194-2045e465793b", + "metadata": {}, + "outputs": [], + "source": [ + "from pyiron_workflow.function import function_node" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "647360a9-c971-4272-995c-aa01e5f5bb83", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "class name = SubtractNode\n", + "label = subtract_node\n", + "default output = 1\n" + ] + } + ], + "source": [ + "@function_node(\"diff\")\n", + "def subtract_node(x: int | float = 2, y: int | float = 1) -> int | float:\n", + " return x - y\n", + "\n", + "sn = subtract_node()\n", + "print(\"class name =\", sn.__class__.__name__)\n", + "print(\"label =\", sn.label)\n", + "\n", + "sn() # Runs without updating input data, but we have defaults so that's fine\n", + "print(\"default output =\", sn.outputs.diff.value)" + ] + }, + { + "cell_type": "markdown", + "id": "9b9220b0-833d-4c6a-9929-5dfa60a47d14", + "metadata": { + "tags": [] + }, + "source": [ + "# Connecting nodes and controlling flow\n", + "\n", + "Multiple nodes can be used together to build a computational graph, with each node performing a particular operation in the overall workflow:\n", + "\n", + "The input and output of nodes can be chained together by connecting their data channels. When a node runs, its output channels will push their new value to each input node to whom they are connected. In this way, data propagates forwards\n", + "\n", + "In addition to input and output data channels, nodes also have \"signal\" channels available. Input signals are bound to a callback function (typically one of its node's methods), and output signals trigger the callbacks for all the input signal channels they're connected to.\n", + "\n", + "Standard nodes have a `run` input signal (which is, unsurprisingly, bound to the `run` method), and a `ran` output signal (which, again, hopefully with no great surprise, is triggered at the end of the `run` method.)\n", + "\n", + "In the example below we see how this works for a super-simple toy graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "2e418abf-7059-4e1e-9b9f-b3dc0a4b5e35", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 2\n" + ] + } + ], + "source": [ + "@function_node()\n", + "def linear(x):\n", + " return x\n", + "\n", + "@function_node(\"double\")\n", + "def times_two(x):\n", + " return 2 * x\n", + "\n", + "l = linear()\n", + "t2 = times_two()\n", + "\n", + "l.inputs.x = 1\n", + "t2.inputs.x = l.outputs.x\n", + "t2.signals.input.run = l.signals.output.ran\n", + "\n", + "l.run()\n", + "print(t2.inputs.x, t2.outputs.double)" + ] + }, + { + "cell_type": "markdown", + "id": "5da1ecfc-7145-4fb2-b5c0-417f050c5de4", + "metadata": {}, + "source": [ + "We can use a couple pieces of syntactic sugar to make this faster.\n", + "\n", + "First: data connections can be made with keyword arguments just like other input data definitions.\n", + "\n", + "Second: the `>` is a shortcut for creating connections between the left-hand node's `signals.output.ran` channel and the right-hand node's `signals.input.run` channel.\n", + "\n", + "With both of these together, we can write:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "59c29856-c77e-48a1-9f17-15d4c58be588", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10 20\n" + ] + } + ], + "source": [ + "l = linear(x=10)\n", + "t2 = times_two(x=l.outputs.x)\n", + "l > t2\n", + "l.run()\n", + "print(t2.inputs.x, t2.outputs.double)" + ] + }, + { + "cell_type": "markdown", + "id": "e5c531a3-77e4-48ad-a189-fed619e79baa", + "metadata": {}, + "source": [ + "## Single Value nodes\n", + "\n", + "Many functions return just a single value. In this case, we can take advantage of the `SingleValue` node class which employs a bunch of syntactic tricks to make our lives easier.\n", + "\n", + "The main difference between this and it's parent the `Function` class is that attribute and item access fall back to looking for attributes and items of this single output value.\n", + "\n", + "Let's look at a use case:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "1a4e9693-0980-4435-aecc-3331d8b608dd", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from pyiron_workflow.function import SingleValue" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7c4d314b-33bb-4a67-bfb9-ed77fba3949c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "[0.02040816 0.04081633 0.06122449]\n", + "0.5\n" + ] + } + ], + "source": [ + "def linspace_node(\n", + " start: int | float = 0, stop: int | float = 1, num: int = 50\n", + "):\n", + " linspace = np.linspace(start=start, stop=stop, num=num)\n", + " return linspace\n", + "\n", + "lin = SingleValue(linspace_node)\n", + "lin.run()\n", + "\n", + "print(type(lin.outputs.linspace.value)) # Output is just what we expect\n", + "print(lin[1:4]) # Gets items from the output\n", + "print(lin.mean()) # Finds the method on the output -- a special feature of SingleValueNode" + ] + }, + { + "cell_type": "markdown", + "id": "eef23cb0-6192-4fe6-b9cc-007e261e347a", + "metadata": {}, + "source": [ + "The other advantage is that single value nodes can also be connected directly to input, since there is only one possible data connection. Of course it has a construction decorator just like `Function`, so let's replace `@function_node` with `@single_value_node` in one of our examples above to see how it tightens up the syntax a bit:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "61ae572f-197b-4a60-8d3e-e19c1b9cc6e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "times_two (TimesTwo) output single-value: 4\n" + ] + } + ], + "source": [ + "from pyiron_workflow.function import single_value_node\n", + "\n", + "@single_value_node()\n", + "def linear(x):\n", + " return x\n", + "\n", + "@single_value_node(\"double\")\n", + "def times_two(x):\n", + " return 2 * x\n", + "\n", + "l = linear(x=2)\n", + "t2 = times_two(x=l) # Just takes the whole `l` node!\n", + "l > t2\n", + "l.run()\n", + "print(t2)" + ] + }, + { + "cell_type": "markdown", + "id": "b2e56a64-d053-4127-bb8c-069777c1c6b5", + "metadata": {}, + "source": [ + "Nodes can take input from multiple sources, and we can chain together these execution orders:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "6569014a-815b-46dd-8b47-4e1cd4584b3b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.45174171, 0.42157923, 0.505547 , 0.47028098, 0.43732173,\n", + " 0.50225988, 0.9376775 , 0.61550209, 0.81934053, 0.32220586])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGdCAYAAADAAnMpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAmG0lEQVR4nO3df1Dc1b3/8dcCwqKFtSQNbAQppjUFabXAECHmdqqGJHrpzZ12pLUxxmrnktobk9w4Jjf3Ssl0htH2WmsV1Bp0bFLN1B/9JnMpV2ZaI0m0aUjoFEkbb4JCzCIDuS5YCzHL+f6RCzcrYPgs7B6WfT5mPn/s2fNh33tcP/vK53w+Z13GGCMAAABL4mwXAAAAYhthBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVCbYLmIzh4WGdOnVKKSkpcrlctssBAACTYIzRwMCA5s+fr7i4ic9/REUYOXXqlLKysmyXAQAAQtDV1aXMzMwJn4+KMJKSkiLp3JtJTU21XA0AAJiM/v5+ZWVljX6PTyQqwsjI1ExqaiphBACAKHOhSyy4gBUAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgVVQsegaEW2DY6GDHafUMDGpeilvFOWmKj+N3kAAgEhyfGXnttddUXl6u+fPny+Vy6de//vUF99m7d68KCwvldrt1xRVX6PHHHw+lViAsGtt8uu6B3+pbP39D9zzfqm/9/A1d98Bv1djms10aAMQEx2Hkr3/9q66++mo9+uijk+rf0dGhm266SUuWLNGRI0f0r//6r1q3bp1efPFFx8UC062xzae1Ow7L5x8Mau/2D2rtjsMEEgCIAMfTNCtWrNCKFSsm3f/xxx/X5ZdfrocffliSlJubq0OHDunHP/6xvv71rzt9eWDaBIaNqve0y4zznJHkklS9p11L8zKYsgGAMAr7Bayvv/66ysrKgtqWLVumQ4cO6aOPPhp3n6GhIfX39wdtwHQ72HF6zBmR8xlJPv+gDnacjlxRABCDwh5Guru7lZ6eHtSWnp6us2fPqre3d9x9ampq5PF4RresrKxwl4kY1DMwcRAJpR8AIDQRubX34z8dbIwZt33Eli1b5Pf7R7eurq6w14jYMy/FPa39AAChCfutvRkZGeru7g5q6+npUUJCgubMmTPuPklJSUpKSgp3aYhxxTlp8nrc6vYPjnvdiEtShufcbb4AgPAJ+5mRkpISNTU1BbW98sorKioq0kUXXRTulwcmFB/nUlV5nqRzweN8I4+ryvO4eBUAwsxxGPnggw/U2tqq1tZWSedu3W1tbVVnZ6ekc1Msq1evHu1fWVmpd955Rxs3btTRo0dVX1+v7du3a9OmTdPzDoApWJ7vVd2qAmV4gqdiMjxu1a0q0PJ8r6XKACB2uMzIBRyT9Oqrr+qrX/3qmPbbb79dzzzzjNasWaO3335br7766uhze/fu1YYNG/Tmm29q/vz5uu+++1RZWTnp1+zv75fH45Hf71dqaqqTcoFJYQVWAJh+k/3+dhxGbCCMAAAQfSb7/c0P5QEAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMCqkMJIbW2tcnJy5Ha7VVhYqObm5k/sv3PnTl199dW6+OKL5fV6dccdd6ivry+kggEAwOziOIzs2rVL69ev19atW3XkyBEtWbJEK1asUGdn57j99+3bp9WrV+vOO+/Um2++qV/96lf6wx/+oLvuumvKxQMAgOjnOIw89NBDuvPOO3XXXXcpNzdXDz/8sLKyslRXVzdu/zfeeEOf/exntW7dOuXk5Oi6667TP/3TP+nQoUNTLh4AAEQ/R2HkzJkzamlpUVlZWVB7WVmZDhw4MO4+paWlOnnypBoaGmSM0XvvvacXXnhBN99884SvMzQ0pP7+/qANAADMTo7CSG9vrwKBgNLT04Pa09PT1d3dPe4+paWl2rlzpyoqKpSYmKiMjAxdeuml+tnPfjbh69TU1Mjj8YxuWVlZTsoEAABRJKQLWF0uV9BjY8yYthHt7e1at26d7r//frW0tKixsVEdHR2qrKyc8O9v2bJFfr9/dOvq6gqlTAAAEAUSnHSeO3eu4uPjx5wF6enpGXO2ZERNTY0WL16se++9V5L0pS99SZdccomWLFmiH/7wh/J6vWP2SUpKUlJSkpPSAABAlHJ0ZiQxMVGFhYVqamoKam9qalJpaem4+3z44YeKiwt+mfj4eEnnzqgAAIDY5niaZuPGjXrqqadUX1+vo0ePasOGDers7ByddtmyZYtWr1492r+8vFwvvfSS6urqdOLECe3fv1/r1q1TcXGx5s+fP33vBAAARCVH0zSSVFFRob6+Pm3btk0+n0/5+flqaGhQdna2JMnn8wWtObJmzRoNDAzo0Ucf1b/8y7/o0ksv1fXXX68HHnhg+t4FAACIWi4TBXMl/f398ng88vv9Sk1NtV0OAACYhMl+f/PbNAAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKxKsF0AAACwIzBsdLDjtHoGBjUvxa3inDTFx7kiXgdhBACAGNTY5lP1nnb5/IOjbV6PW1XleVqe741oLUzTAAAQYxrbfFq743BQEJGkbv+g1u44rMY2X0TrIYwAABBDAsNG1XvaZcZ5bqStek+7AsPj9QgPwggAADHkYMfpMWdEzmck+fyDOthxOmI1EUYAAIghPQMTB5FQ+k0HwggAADFkXop7WvtNB8IIAAAxpDgnTV6PWxPdwOvSubtqinPSIlYTYQQAgBgSH+dSVXmeJI0JJCOPq8rzIrreCGEEAIAYszzfq7pVBcrwBE/FZHjcqltVEPF1Rlj0DACAGLQ836uleRmswAoAAOyJj3OpZMEc22UwTQMAAOwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKziV3sRcYFhMyN+shoAMDMQRhBRjW0+Ve9pl88/ONrm9bhVVZ6n5flei5UBAGxhmgYR09jm09odh4OCiCR1+we1dsdhNbb5LFUGALCJMIKICAwbVe9plxnnuZG26j3tCgyP1wMAMJsRRhARBztOjzkjcj4jyecf1MGO05ErCgAwIxBGEBE9AxMHkVD6AQBmD8IIImJeinta+wEAZg/CCCKiOCdNXo9bE93A69K5u2qKc9IiWRYAYAYgjCAi4uNcqirPk6QxgWTkcVV5HuuNAEAMCimM1NbWKicnR263W4WFhWpubv7E/kNDQ9q6dauys7OVlJSkBQsWqL6+PqSCEb2W53tVt6pAGZ7gqZgMj1t1qwpYZwQAYpTjRc927dql9evXq7a2VosXL9YTTzyhFStWqL29XZdffvm4+9xyyy167733tH37dn3uc59TT0+Pzp49O+XiEX2W53u1NC+DFVgBAKNcxhhHCzssWrRIBQUFqqurG23Lzc3VypUrVVNTM6Z/Y2OjvvnNb+rEiRNKSwvteoD+/n55PB75/X6lpqaG9DcAAEBkTfb729E0zZkzZ9TS0qKysrKg9rKyMh04cGDcfXbv3q2ioiI9+OCDuuyyy3TllVdq06ZN+tvf/ubkpQEAwCzlaJqmt7dXgUBA6enpQe3p6enq7u4ed58TJ05o3759crvdevnll9Xb26vvfe97On369ITXjQwNDWloaGj0cX9/v5MyAQBAFAnpAlaXK3h+3xgzpm3E8PCwXC6Xdu7cqeLiYt1000166KGH9Mwzz0x4dqSmpkYej2d0y8rKCqVMAAAQBRyFkblz5yo+Pn7MWZCenp4xZ0tGeL1eXXbZZfJ4PKNtubm5Msbo5MmT4+6zZcsW+f3+0a2rq8tJmQAAIIo4CiOJiYkqLCxUU1NTUHtTU5NKS0vH3Wfx4sU6deqUPvjgg9G2Y8eOKS4uTpmZmePuk5SUpNTU1KANAADMTo6naTZu3KinnnpK9fX1Onr0qDZs2KDOzk5VVlZKOndWY/Xq1aP9b731Vs2ZM0d33HGH2tvb9dprr+nee+/Vd77zHSUnJ0/fOwEAAFHJ8TojFRUV6uvr07Zt2+Tz+ZSfn6+GhgZlZ2dLknw+nzo7O0f7f+pTn1JTU5P++Z//WUVFRZozZ45uueUW/fCHP5y+dwEAAKKW43VGbGCdEQAAok9Y1hkBAACYboQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWOl4NH7AgMGx3sOK2egUHNS3GrOCdN8XEu22UBAGYZwgjG1djmU/Wedvn8g6NtXo9bVeV5Wp7vtVgZAGC2YZoGYzS2+bR2x+GgICJJ3f5Brd1xWI1tPkuVAQBmI8IIggSGjar3tGu8X08caave067A8Iz/fUUAQJQgjCDIwY7TY86InM9I8vkHdbDjdOSKAgDMaoQRBOkZmDiIhNIPAIALIYwgyLwU97T2AwDgQggjCFKckyavx62JbuB16dxdNcU5aZEsCwAwixFGECQ+zqWq8jxJGhNIRh5Xleex3ggAYNoQRjDG8nyv6lYVKMMTPBWT4XGrblUB64wAAKYVi55hXMvzvVqal8EKrACAsCOMYELxcS6VLJhjuwwAwCzHNA0AALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALAqpDBSW1urnJwcud1uFRYWqrm5eVL77d+/XwkJCbrmmmtCeVkAADALOQ4ju3bt0vr167V161YdOXJES5Ys0YoVK9TZ2fmJ+/n9fq1evVo33HBDyMUCAIDZx2WMMU52WLRokQoKClRXVzfalpubq5UrV6qmpmbC/b75zW/q85//vOLj4/XrX/9ara2tk37N/v5+eTwe+f1+paamOikXAABYMtnvb0dnRs6cOaOWlhaVlZUFtZeVlenAgQMT7vf000/r+PHjqqqqmtTrDA0Nqb+/P2gDAACzk6Mw0tvbq0AgoPT09KD29PR0dXd3j7vPW2+9pc2bN2vnzp1KSEiY1OvU1NTI4/GMbllZWU7KBAAAUSSkC1hdLlfQY2PMmDZJCgQCuvXWW1VdXa0rr7xy0n9/y5Yt8vv9o1tXV1coZQIAgCgwuVMV/2vu3LmKj48fcxakp6dnzNkSSRoYGNChQ4d05MgRff/735ckDQ8PyxijhIQEvfLKK7r++uvH7JeUlKSkpCQnpQEAgCjl6MxIYmKiCgsL1dTUFNTe1NSk0tLSMf1TU1P1pz/9Sa2traNbZWWlFi5cqNbWVi1atGhq1QMAgKjn6MyIJG3cuFG33XabioqKVFJSoieffFKdnZ2qrKyUdG6K5d1339Wzzz6ruLg45efnB+0/b948ud3uMe0AACA2OQ4jFRUV6uvr07Zt2+Tz+ZSfn6+GhgZlZ2dLknw+3wXXHAEAABjheJ0RG1hnBACA6BOWdUYAAACmG2EEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGBVgu0CAMwugWGjgx2n1TMwqHkpbhXnpCk+zmW7LAAzGGEEwLRpbPOpek+7fP7B0Tavx62q8jwtz/darAzATMY0DYBp0djm09odh4OCiCR1+we1dsdhNbb5LFUGYKYjjACYssCwUfWedplxnhtpq97TrsDweD0AxDrCCIApO9hxeswZkfMZST7/oA52nI5cUQCiBmEEwJT1DEwcRELpByC2EEYATNm8FPe09gMQWwgjAKasOCdNXo9bE93A69K5u2qKc9IiWRaAKEEYATBl8XEuVZXnSdKYQDLyuKo8j/VGAIyLMAJgWizP96puVYEyPMFTMRket+pWFbDOCIAJsegZgGmzPN+rpXkZrMAKwBHCCIBpFR/nUsmCObbLABBFmKYBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFaFFEZqa2uVk5Mjt9utwsJCNTc3T9j3pZde0tKlS/WZz3xGqampKikp0X/913+FXDAAAJhdHIeRXbt2af369dq6dauOHDmiJUuWaMWKFers7By3/2uvvaalS5eqoaFBLS0t+upXv6ry8nIdOXJkysUDAIDo5zLGGCc7LFq0SAUFBaqrqxtty83N1cqVK1VTUzOpv3HVVVepoqJC999//6T69/f3y+PxyO/3KzU11Um5AADAksl+fzs6M3LmzBm1tLSorKwsqL2srEwHDhyY1N8YHh7WwMCA0tIm/sGsoaEh9ff3B20AAGB2chRGent7FQgElJ6eHtSenp6u7u7uSf2N//iP/9Bf//pX3XLLLRP2qampkcfjGd2ysrKclAkAAKJISBewulzBvzNhjBnTNp7nnntOP/jBD7Rr1y7Nmzdvwn5btmyR3+8f3bq6ukIpEwAARAFHv00zd+5cxcfHjzkL0tPTM+Zsycft2rVLd955p371q1/pxhtv/MS+SUlJSkpKclIaAACIUo7OjCQmJqqwsFBNTU1B7U1NTSotLZ1wv+eee05r1qzRL3/5S918882hVQoAAGYlx7/au3HjRt12220qKipSSUmJnnzySXV2dqqyslLSuSmWd999V88++6ykc0Fk9erV+ulPf6prr7129KxKcnKyPB7PNL4VAAAQjRyHkYqKCvX19Wnbtm3y+XzKz89XQ0ODsrOzJUk+ny9ozZEnnnhCZ8+e1d1336277757tP3222/XM888M/V3AAAAoprjdUZsYJ0RAACiT1jWGQEAAJhuhBEAAGAVYQQAAFhFGAEAAFY5vpsGAIBICwwbHew4rZ6BQc1Lcas4J03xcRde+RvRgTCCqMHBCIhNjW0+Ve9pl88/ONrm9bhVVZ6n5flei5VhuhBGEBU4GAGxqbHNp7U7Duvja1B0+we1dsdh1a0q4BgwC3DNCGa8kYPR+UFE+r+DUWObz1JlAMIpMGxUvad9TBCRNNpWvaddgeEZv1wWLoAwghmNgxEQuw52nB7zj5DzGUk+/6AOdpyOXFEIC8IIZjQORkDs6hmY+P/9UPph5iKMYEbjYATErnkp7mnth5mLMIIZjYMRELuKc9Lk9bg10T1zLp27kL04Jy2SZSEMCCOY0WwcjALDRq8f79P/a31Xrx/v43oUwJL4OJeqyvMkacwxYORxVXket/jPAtzaixlt5GC0dsdhuaSgC1nDcTDiFmJgZlme71XdqoIx/19m8P/lrOIyxsz4f/ZN9ieIMXtFIiRMtJ7BSMxhPQPAHhY9jE6T/f4mjCBqhPNgFBg2uu6B3054545L5/4ltu++6zkAAsAkTfb7m2kaRI34OJdKFswJy992cgtxuGoAgFjFBayAuIUYAGwijADiFmIAsIkwAoj1DADAJsIIINYzAACbCCPA/xpZzyDDEzwVk+Fxc1svAIQRd9MA51me79XSvAzWMwCACCKMAB8TzluIAQBjMU0DAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwKqYXWckMGxY2AoAgBkgJsNIY5tP1Xva5fP/38/Bez1uVZXnseQ3AAARFnPTNI1tPq3dcTgoiEhSt39Qa3ccVmObz1JlAADEppgKI4Fho+o97TLjPDfSVr2nXYHh8XoAAIBwiKkwcrDj9JgzIuczknz+QR3sOB25ogAAiHExFUZ6BiYOIqH0AwAAUxdTYWReinta+wEAgKmLqTBSnJMmr8etiW7gdencXTXFOWmRLAsAgJgWU2EkPs6lqvI8SRoTSEYeV5Xnsd4IAAARFFNhRJKW53tVt6pAGZ7gqZgMj1t1qwpYZwQAgAiLyUXPlud7tTQvgxVYAQCYAWIyjEjnpmxKFsyxXQYAADEv5qZpAADAzEIYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWxeytvZh+gWHD2i0AAMcII5gWjW0+Ve9pl8//f7947PW4VVWex6q2AIBPxDQNpqyxzae1Ow4HBRFJ6vYPau2Ow2ps81mqDAAQDQgjmJLAsFH1nnaZcZ4baave067A8Hg9AAAgjGCKDnacHnNG5HxGks8/qIMdpyNXFAAgqhBGMCU9AxMHkVD6AQBiD2EEUzIvxT2t/QAAsYcwgikpzkmT1+PWRDfwunTurprinLRIlgUAiCKEEUxJfJxLVeV5kjQmkIw8rirPY70RAMCEQgojtbW1ysnJkdvtVmFhoZqbmz+x/969e1VYWCi3260rrrhCjz/+eEjFYmZanu9V3aoCZXiCp2IyPG7VrSpgnREAwCdyvOjZrl27tH79etXW1mrx4sV64okntGLFCrW3t+vyyy8f07+jo0M33XSTvvvd72rHjh3av3+/vve97+kzn/mMvv71r0/Lm4B9y/O9WpqXwQqsAADHXMYYRwtALFq0SAUFBaqrqxtty83N1cqVK1VTUzOm/3333afdu3fr6NGjo22VlZX64x//qNdff31Sr9nf3y+PxyO/36/U1FQn5QIAAEsm+/3taJrmzJkzamlpUVlZWVB7WVmZDhw4MO4+r7/++pj+y5Yt06FDh/TRRx+Nu8/Q0JD6+/uDNgAAMDs5CiO9vb0KBAJKT08Pak9PT1d3d/e4+3R3d4/b/+zZs+rt7R13n5qaGnk8ntEtKyvLSZkAACCKhHQBq8sVfB2AMWZM24X6j9c+YsuWLfL7/aNbV1dXKGUCAIAo4OgC1rlz5yo+Pn7MWZCenp4xZz9GZGRkjNs/ISFBc+bMGXefpKQkJSUlOSkNAABEKUdnRhITE1VYWKimpqag9qamJpWWlo67T0lJyZj+r7zyioqKinTRRRc5LBcAAMw2jqdpNm7cqKeeekr19fU6evSoNmzYoM7OTlVWVko6N8WyevXq0f6VlZV65513tHHjRh09elT19fXavn27Nm3aNH3vAgAARC3H64xUVFSor69P27Ztk8/nU35+vhoaGpSdnS1J8vl86uzsHO2fk5OjhoYGbdiwQY899pjmz5+vRx55hDVGAACApBDWGbGBdUYAAIg+YVlnBAAAYLo5nqaxYeTkDYufAQAQPUa+ty80CRMVYWRgYECSWPwMAIAoNDAwII/HM+HzUXHNyPDwsE6dOqWUlJSghdL6+/uVlZWlrq4uriWZAGM0OYzThTFGF8YYXRhjNDmzZZyMMRoYGND8+fMVFzfxlSFRcWYkLi5OmZmZEz6fmpoa1f+xIoExmhzG6cIYowtjjC6MMZqc2TBOn3RGZAQXsAIAAKsIIwAAwKqoDiNJSUmqqqrid2w+AWM0OYzThTFGF8YYXRhjNDmxNk5RcQErAACYvaL6zAgAAIh+hBEAAGAVYQQAAFhFGAEAAFbN+DBSW1urnJwcud1uFRYWqrm5ecK++/bt0+LFizVnzhwlJyfrC1/4gn7yk59EsFo7nIzR+fbv36+EhARdc8014S1wBnAyRq+++qpcLteY7c9//nMEK7bD6WdpaGhIW7duVXZ2tpKSkrRgwQLV19dHqFo7nIzRmjVrxv0sXXXVVRGsOPKcfo527typq6++WhdffLG8Xq/uuOMO9fX1Rahae5yO02OPPabc3FwlJydr4cKFevbZZyNUaQSYGez55583F110kfn5z39u2tvbzT333GMuueQS884774zb//Dhw+aXv/ylaWtrMx0dHeYXv/iFufjii80TTzwR4cojx+kYjXj//ffNFVdcYcrKyszVV18dmWItcTpGv/vd74wk85e//MX4fL7R7ezZsxGuPLJC+Sx97WtfM4sWLTJNTU2mo6PD/P73vzf79++PYNWR5XSM3n///aDPUFdXl0lLSzNVVVWRLTyCnI5Rc3OziYuLMz/96U/NiRMnTHNzs7nqqqvMypUrI1x5ZDkdp9raWpOSkmKef/55c/z4cfPcc8+ZT33qU2b37t0Rrjw8ZnQYKS4uNpWVlUFtX/jCF8zmzZsn/Tf+8R//0axatWq6S5sxQh2jiooK82//9m+mqqpq1ocRp2M0Ekb+53/+JwLVzRxOx+k3v/mN8Xg8pq+vLxLlzQhTPSa9/PLLxuVymbfffjsc5c0ITsfoRz/6kbniiiuC2h555BGTmZkZthpnAqfjVFJSYjZt2hTUds8995jFixeHrcZImrHTNGfOnFFLS4vKysqC2svKynTgwIFJ/Y0jR47owIED+spXvhKOEq0LdYyefvppHT9+XFVVVeEu0bqpfI6+/OUvy+v16oYbbtDvfve7cJZpXSjjtHv3bhUVFenBBx/UZZddpiuvvFKbNm3S3/72t0iUHHHTcUzavn27brzxRmVnZ4ejROtCGaPS0lKdPHlSDQ0NMsbovffe0wsvvKCbb745EiVbEco4DQ0Nye12B7UlJyfr4MGD+uijj8JWa6TM2DDS29urQCCg9PT0oPb09HR1d3d/4r6ZmZlKSkpSUVGR7r77bt11113hLNWaUMborbfe0ubNm7Vz504lJETF7yROSShj5PV69eSTT+rFF1/USy+9pIULF+qGG27Qa6+9FomSrQhlnE6cOKF9+/apra1NL7/8sh5++GG98MILuvvuuyNRcsRN5ZgkST6fT7/5zW9m7fFICm2MSktLtXPnTlVUVCgxMVEZGRm69NJL9bOf/SwSJVsRyjgtW7ZMTz31lFpaWmSM0aFDh1RfX6+PPvpIvb29kSg7rGb8t5HL5Qp6bIwZ0/Zxzc3N+uCDD/TGG29o8+bN+tznPqdvfetb4SzTqsmOUSAQ0K233qrq6mpdeeWVkSpvRnDyOVq4cKEWLlw4+rikpERdXV368Y9/rL/7u78La522ORmn4eFhuVwu7dy5c/RXOR966CF94xvf0GOPPabk5OSw12tDKMckSXrmmWd06aWXauXKlWGqbOZwMkbt7e1at26d7r//fi1btkw+n0/33nuvKisrtX379kiUa42Tcfr3f/93dXd369prr5UxRunp6VqzZo0efPBBxcfHR6LcsJqxZ0bmzp2r+Pj4MSmxp6dnTJr8uJycHH3xi1/Ud7/7XW3YsEE/+MEPwlipPU7HaGBgQIcOHdL3v/99JSQkKCEhQdu2bdMf//hHJSQk6Le//W2kSo+YqXyOznfttdfqrbfemu7yZoxQxsnr9eqyyy4L+nnw3NxcGWN08uTJsNZrw1Q+S8YY1dfX67bbblNiYmI4y7QqlDGqqanR4sWLde+99+pLX/qSli1bptraWtXX18vn80Wi7IgLZZySk5NVX1+vDz/8UG+//bY6Ozv12c9+VikpKZo7d24kyg6rGRtGEhMTVVhYqKampqD2pqYmlZaWTvrvGGM0NDQ03eXNCE7HKDU1VX/605/U2to6ulVWVmrhwoVqbW3VokWLIlV6xEzX5+jIkSPyer3TXd6MEco4LV68WKdOndIHH3ww2nbs2DHFxcUpMzMzrPXaMJXP0t69e/Xf//3fuvPOO8NZonWhjNGHH36ouLjgr6KRf+mbWfrTaVP5LF100UXKzMxUfHy8nn/+ef393//9mPGLSjaump2skVuftm/fbtrb28369evNJZdcMnol+ubNm81tt9022v/RRx81u3fvNseOHTPHjh0z9fX1JjU11WzdutXWWwg7p2P0cbFwN43TMfrJT35iXn75ZXPs2DHT1tZmNm/ebCSZF1980dZbiAin4zQwMGAyMzPNN77xDfPmm2+avXv3ms9//vPmrrvusvUWwi7U/99WrVplFi1aFOlyrXA6Rk8//bRJSEgwtbW15vjx42bfvn2mqKjIFBcX23oLEeF0nP7yl7+YX/ziF+bYsWPm97//vamoqDBpaWmmo6PD0juYXjM6jBhjzGOPPWays7NNYmKiKSgoMHv37h197vbbbzdf+cpXRh8/8sgj5qqrrjIXX3yxSU1NNV/+8pdNbW2tCQQCFiqPHCdj9HGxEEaMcTZGDzzwgFmwYIFxu93m05/+tLnuuuvMf/7nf1qoOvKcfpaOHj1qbrzxRpOcnGwyMzPNxo0bzYcffhjhqiPL6Ri9//77Jjk52Tz55JMRrtQep2P0yCOPmLy8PJOcnGy8Xq/59re/bU6ePBnhqiPPyTi1t7eba665xiQnJ5vU1FTzD//wD+bPf/6zharDw2XMLD0PBgAAosIsmGgCAADRjDACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAqv8PE9oHtjpnt7sAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "@single_value_node()\n", + "def noise(length: int = 1):\n", + " array = np.random.rand(length)\n", + " return array\n", + "\n", + "@function_node()\n", + "def plot(x, y):\n", + " fig = plt.scatter(x, y)\n", + " return fig\n", + "\n", + "x = noise(length=10)\n", + "y = noise(length=10)\n", + "f = plot(x=x, y=y)\n", + "x > y > f\n", + "x()" + ] + }, + { + "cell_type": "markdown", + "id": "5dc12164-b663-405b-872f-756996f628bd", + "metadata": {}, + "source": [ + "# Workflows\n", + "\n", + "The case where we have groups of connected nodes working together is our normal, intended use case.\n", + "We offer a formal way to group these objects together as a `Workflow(Node)` object.\n", + "`Workflow` also offers us a single point of entry to the codebase -- i.e. most of the time you shouldn't need the node imports used above, because the decorators are available right on the workflow class.\n", + "\n", + "We will also see here that we can our node output channels using the `output_labels: Optional[str | list[str] | tuple[str]` kwarg, in case they don't have a convenient name to start with.\n", + "This way we can always have convenient dot-based access (and tab completion) instead of having to access things by string-based keys.\n", + "\n", + "Finally, when a workflow is run, unless its `automate_execution` flag has been set to `False` or the data connections form a cyclic graph, it will _automatically_ build the necessary run signals! That means for all directed acyclic graph (DAG) workflows, all we typically need to worry about is the data connections." + ] + }, + { + "cell_type": "markdown", + "id": "9b9d3881-3584-4d6f-8068-5eed05760c36", + "metadata": {}, + "source": [ + "Here is an example showing how `Workflow` can be used as a single-point-of-import for defining new nodes:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "1cd000bd-9b24-4c39-9cac-70a3291d0660", + "metadata": {}, + "outputs": [], + "source": [ + "from pyiron_workflow import Workflow\n", + "\n", + "@Workflow.wrap_as.single_value_node(\"is_greater\")\n", + "def greater_than_half(x: int | float | bool = 0) -> bool:\n", + " \"\"\"The functionality doesn't matter here, it's just an example\"\"\"\n", + " return x > 0.5" + ] + }, + { + "cell_type": "markdown", + "id": "8f17751c-f5bf-4b13-8275-0685d8a1629e", + "metadata": {}, + "source": [ + "## Adding nodes to a workflow\n", + "\n", + "Each node can belong to exactly one workflow...but how to we create a workflow and add nodes to it\n", + "\n", + "All five of the approaches below are equivalent ways to add a node to a workflow. Note that when `create` is called from the workflow _class_ it just gives you access to the class being created; when it is called from a workflow _instance_, it wraps this class so that the created node has its parent value automatically set to the workflow instance that's creating it." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "7964df3c-55af-4c25-afc5-9e07accb606a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "n1 == n1) 0.0 > 0.5 False\n", + "n2 == n2) 0.2 > 0.5 False\n", + "n3 == n3) 0.4 > 0.5 False\n", + "n4 == n4) 0.6 > 0.5 True\n", + "n5 == n5) 0.8 > 0.5 True\n" + ] + } + ], + "source": [ + "n1 = greater_than_half(label=\"n1\")\n", + "\n", + "wf = Workflow(\"my_wf\", n1) # As args at init\n", + "wf.create.SingleValue(n1.node_function, output_labels=\"p1\", label=\"n2\") # Instantiating from the class with a function\n", + "wf.add(greater_than_half(label=\"n3\")) # Instantiating then passing to node adder\n", + "wf.n4 = greater_than_half(label=\"will_get_overwritten_with_n4\") # Set attribute to instance\n", + "greater_than_half(label=\"n5\", parent=wf) # By passing the workflow to the node\n", + "\n", + "for i, (label, node) in enumerate(wf.nodes.items()):\n", + " x = i / len(wf)\n", + " node(x=x)\n", + " print(f\"{label} == {node.label}) {x} > 0.5 {node.single_value}\")" + ] + }, + { + "cell_type": "markdown", + "id": "dd5768a4-1810-4675-9389-bceb053cddfa", + "metadata": {}, + "source": [ + "Workflows have inputs and outputs just like function nodes, but these are dynamically created to map to all _unconnected_ input and output for their underlying graph. They automatically get named by connecting the node label and channel label with a double underscore, but this can be overriden by providing an `inputs_map` and/or an `outputs_map` -- these maps can also let you expose data channels that would otherwise be hidden because they have a connection!" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "809178a5-2e6b-471d-89ef-0797db47c5ad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['ax', 'b__x'] ['ay', 'a + b + 2']\n" + ] + } + ], + "source": [ + "wf = Workflow(\"simple\")\n", + "\n", + "@Workflow.wrap_as.single_value_node()\n", + "def add_one(x):\n", + " y = x + 1\n", + " return y\n", + "\n", + "@Workflow.wrap_as.single_value_node(\"sum\")\n", + "def add_node(x, y):\n", + " return x + y\n", + "\n", + "wf.a = add_one(0)\n", + "wf.b = add_one(0)\n", + "wf.sum = add_node(wf.a, wf.b) \n", + "wf.inputs_map = {\"a__x\": \"ax\"}\n", + "wf.outputs_map = {\"a__y\": \"ay\", \"sum__sum\": \"a + b + 2\"}\n", + "# Remember, with single value nodes we can pass the whole node instead of an output channel!\n", + "\n", + "print(wf.inputs.labels, wf.outputs.labels)" + ] + }, + { + "cell_type": "markdown", + "id": "848a45a9-dfcc-4b9e-aec5-e879d88325a2", + "metadata": {}, + "source": [ + "When `run()` is called on a workflow, it will call `run()` on each node in its `starting_nodes` list and rely on these to propagate the execution with their run signals. If your data flow is DAG-like, all of this gets handled automatically so you just need to call `run()` on the workflow.\n", + "\n", + "If you do have cyclic data flows, or just want more control, you are still free to set the `starting_nodes` and run signals yourself, just don't forget to set `automate_execution=False` on the workflow." + ] + }, + { + "cell_type": "markdown", + "id": "18ba07ca-f1f9-4f05-98db-d5612f9acbb6", + "metadata": {}, + "source": [ + "Unlike function nodes, workflow input has no intrinsic order. We can still update it by calling the workflow, but we _need_ to use keyword and not positional arguments. Runs of the workflow then return a dot-accessible dictionary based on the output channels:" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "52c48d19-10a2-4c48-ae81-eceea4129a60", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ay': 3, 'a + b + 2': 7}" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = wf(ax=2, b__x=3)\n", + "out" + ] + }, + { + "cell_type": "markdown", + "id": "e3f4b51b-7c28-47f7-9822-b4755e12bd4d", + "metadata": {}, + "source": [ + "We can see now why we've been trying to givesuccinct string labels to our `Function` node outputs instead of just arbitrary expressions! The expressions are typically not dot-accessible:" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "bb35ba3e-602d-4c9c-b046-32da9401dd1c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7, 3)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out[\"a + b + 2\"], out.ay" + ] + }, + { + "cell_type": "markdown", + "id": "c67ddcd9-cea0-4f3f-96aa-491da0a4c459", + "metadata": {}, + "source": [ + "We can also look at our graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "2b0d2c85-9049-417b-8739-8a8432a1efbe", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clustersimple\n", + "\n", + "simple: Workflow\n", + "\n", + "clustersimpleInputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Inputs\n", + "\n", + "\n", + "clustersimpleOutputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Outputs\n", + "\n", + "\n", + "clustersimplea\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "a: AddOne\n", + "\n", + "\n", + "clustersimpleaInputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Inputs\n", + "\n", + "\n", + "clustersimpleaOutputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Outputs\n", + "\n", + "\n", + "clustersimpleb\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "b: AddOne\n", + "\n", + "\n", + "clustersimplebInputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Inputs\n", + "\n", + "\n", + "clustersimplebOutputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Outputs\n", + "\n", + "\n", + "clustersimplesum\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "sum: AddNode\n", + "\n", + "\n", + "clustersimplesumInputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Inputs\n", + "\n", + "\n", + "clustersimplesumOutputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Outputs\n", + "\n", + "\n", + "\n", + "clustersimpleInputsrun\n", + "\n", + "run\n", + "\n", + "\n", + "\n", + "clustersimpleOutputsran\n", + "\n", + "ran\n", + "\n", + "\n", + "\n", + "\n", + "clustersimpleInputsx\n", + "\n", + "x\n", + "\n", + "\n", + "\n", + "clustersimpleaInputsx\n", + "\n", + "x\n", + "\n", + "\n", + "\n", + "clustersimpleInputsx->clustersimpleaInputsx\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clustersimplebInputsx\n", + "\n", + "x\n", + "\n", + "\n", + "\n", + "clustersimpleInputsx->clustersimplebInputsx\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clustersimpleOutputsy\n", + "\n", + "y\n", + "\n", + "\n", + "\n", + "clustersimpleOutputssum\n", + "\n", + "sum\n", + "\n", + "\n", + "\n", + "clustersimpleaInputsrun\n", + "\n", + "run\n", + "\n", + "\n", + "\n", + "clustersimpleaOutputsran\n", + "\n", + "ran\n", + "\n", + "\n", + "\n", + "\n", + "clustersimplebInputsrun\n", + "\n", + "run\n", + "\n", + "\n", + "\n", + "clustersimpleaOutputsran->clustersimplebInputsrun\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clustersimpleaOutputsy\n", + "\n", + "y\n", + "\n", + "\n", + "\n", + "clustersimpleaOutputsy->clustersimpleOutputsy\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clustersimplesumInputsx\n", + "\n", + "x\n", + "\n", + "\n", + "\n", + "clustersimpleaOutputsy->clustersimplesumInputsx\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clustersimplebOutputsran\n", + "\n", + "ran\n", + "\n", + "\n", + "\n", + "\n", + "clustersimplesumInputsrun\n", + "\n", + "run\n", + "\n", + "\n", + "\n", + "clustersimplebOutputsran->clustersimplesumInputsrun\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clustersimplebOutputsy\n", + "\n", + "y\n", + "\n", + "\n", + "\n", + "clustersimplesumInputsy\n", + "\n", + "y\n", + "\n", + "\n", + "\n", + "clustersimplebOutputsy->clustersimplesumInputsy\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clustersimplesumOutputsran\n", + "\n", + "ran\n", + "\n", + "\n", + "\n", + "\n", + "clustersimplesumOutputssum\n", + "\n", + "sum\n", + "\n", + "\n", + "\n", + "clustersimplesumOutputssum->clustersimpleOutputssum\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wf.draw()" + ] + }, + { + "cell_type": "markdown", + "id": "2671dc36-42a4-466b-848d-067ef7bd1d1d", + "metadata": {}, + "source": [ + "# Example with pre-built nodes\n", + "\n", + "Currently we have a handfull of pre-build nodes available for import from the `nodes` package. Let's use these to quickly put together a workflow for looking at some MD data." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ae500d5e-e55b-432c-8b5f-d5892193cdf5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The job JUSTAJOBNAME was saved and received the ID: 9558\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAApkklEQVR4nO3dfXBU133/8c/qaSUUaYsE0mqDTOVErSMv2CAMBjOGhsfUiJ/HnUAMOLhhMpinoBgKJu6MIGNLhkzAydCqY8ZjHFSqTicmMS1RkGNHDgUiRkCDUOuHWLWF2Y0So6yErQcsnd8flBsvQsBKi3RWvF8z94899yvxvQfG+/G9597rMsYYAQAAWCRuqBsAAAC4GgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGCdhKFuoD96enp0/vx5paWlyeVyDXU7AADgJhhj1NbWJp/Pp7i4658jicmAcv78eeXm5g51GwAAoB+ampo0ZsyY69bEZEBJS0uTdPkA09PTh7gbAABwM1pbW5Wbm+t8j19PTAaUK5d10tPTCSgAAMSYm1mewSJZAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6Mfmgtlulu8eotvGCmts6lJWWrMl5GYqP410/AAAMtojPoHz44YdatmyZMjMzNWLECN17772qq6tz9htjtHXrVvl8PqWkpGjmzJk6e/Zs2O/o7OzUunXrNGrUKKWmpmrhwoU6d+7cwI9mAKrqA5q+/XU9uue41lee1qN7jmv69tdVVR8Y0r4AALgdRRRQWlpa9MADDygxMVE/+9nP1NDQoO9///v6sz/7M6dmx44d2rlzp3bv3q0TJ07I6/Vqzpw5amtrc2qKi4t14MABVVZW6siRI7p48aIWLFig7u7uqB1YJKrqA1pVcVKBUEfYeDDUoVUVJwkpAAAMMpcxxtxs8VNPPaX//M//1K9+9atr7jfGyOfzqbi4WJs3b5Z0+WxJdna2tm/frpUrVyoUCmn06NHat2+fFi9eLOlPbyc+dOiQ5s2bd8M+Wltb5fF4FAqFBvwunu4eo+nbX+8VTq5wSfJ6knVk85e53AMAwABE8v0d0RmUV199VZMmTdJXv/pVZWVlacKECdqzZ4+zv7GxUcFgUHPnznXG3G63ZsyYoaNHj0qS6urqdOnSpbAan88nv9/v1Fyts7NTra2tYVu01DZe6DOcSJKRFAh1qLbxQtT+TAAAcH0RBZT33ntP5eXlys/P189//nM98cQT+ta3vqUf/ehHkqRgMChJys7ODvu57OxsZ18wGFRSUpJGjhzZZ83VysrK5PF4nC03NzeStq+rua3vcNKfOgAAMHARBZSenh5NnDhRpaWlmjBhglauXKlvfvObKi8vD6u7+jXKxpgbvlr5ejVbtmxRKBRytqampkjavq6stOSo1gEAgIGLKKDk5OSooKAgbOxLX/qSPvjgA0mS1+uVpF5nQpqbm52zKl6vV11dXWppaemz5mput1vp6elhW7RMzstQjidZfcUnl6Qcz+VbjgEAwOCIKKA88MADeuutt8LG3n77bY0dO1aSlJeXJ6/Xq+rqamd/V1eXampqNG3aNElSYWGhEhMTw2oCgYDq6+udmsEUH+dSSdHl0HV1SLnyuaSogAWyAAAMoogCyre//W0dP35cpaWlevfdd7V//3698MILWrNmjaTLl3aKi4tVWlqqAwcOqL6+Xo8//rhGjBihJUuWSJI8Ho9WrFihDRs26Be/+IVOnTqlZcuWady4cZo9e3b0j/AmzPfnqHzZRHk94ZdxvJ5klS+bqPn+nCHpCwCA21VET5K97777dODAAW3ZskXf/e53lZeXp+eff15Lly51ajZt2qT29natXr1aLS0tmjJlig4fPqy0tDSnZteuXUpISNCiRYvU3t6uWbNmae/evYqPj4/ekUVovj9Hcwq8PEkWAAALRPQcFFtE8zkoAABgcNyy56AAAAAMBgIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFgnooCydetWuVyusM3r9Tr7jTHaunWrfD6fUlJSNHPmTJ09ezbsd3R2dmrdunUaNWqUUlNTtXDhQp07dy46RwMAAIaFiM+g3H333QoEAs525swZZ9+OHTu0c+dO7d69WydOnJDX69WcOXPU1tbm1BQXF+vAgQOqrKzUkSNHdPHiRS1YsEDd3d3ROSIAABDzEiL+gYSEsLMmVxhj9Pzzz+vpp5/WI488Ikl6+eWXlZ2drf3792vlypUKhUJ68cUXtW/fPs2ePVuSVFFRodzcXL322muaN2/eAA8HAAAMBxGfQXnnnXfk8/mUl5enr33ta3rvvfckSY2NjQoGg5o7d65T63a7NWPGDB09elSSVFdXp0uXLoXV+Hw++f1+p+ZaOjs71draGrYBAIDhK6KAMmXKFP3oRz/Sz3/+c+3Zs0fBYFDTpk3TRx99pGAwKEnKzs4O+5ns7GxnXzAYVFJSkkaOHNlnzbWUlZXJ4/E4W25ubiRtAwCAGBNRQPnKV76iv/mbv9G4ceM0e/Zs/cd//Ieky5dyrnC5XGE/Y4zpNXa1G9Vs2bJFoVDI2ZqamiJpGwAAxJgB3WacmpqqcePG6Z133nHWpVx9JqS5udk5q+L1etXV1aWWlpY+a67F7XYrPT09bAMAAMPXgAJKZ2en/vu//1s5OTnKy8uT1+tVdXW1s7+rq0s1NTWaNm2aJKmwsFCJiYlhNYFAQPX19U4NAABARHfxbNy4UUVFRbrjjjvU3NysZ555Rq2trVq+fLlcLpeKi4tVWlqq/Px85efnq7S0VCNGjNCSJUskSR6PRytWrNCGDRuUmZmpjIwMbdy40blkBAAAIEUYUM6dO6dHH31Uf/jDHzR69Gjdf//9On78uMaOHStJ2rRpk9rb27V69Wq1tLRoypQpOnz4sNLS0pzfsWvXLiUkJGjRokVqb2/XrFmztHfvXsXHx0f3yAAAQMxyGWPMUDcRqdbWVnk8HoVCIdajAAAQIyL5/uZdPAAAwDoRP0kWuB119xjVNl5Qc1uHstKSNTkvQ/Fx1799HgDQfwQU4Aaq6gPadrBBgVCHM5bjSVZJUYHm+3OGsDMAGL64xANcR1V9QKsqToaFE0kKhjq0quKkquoDQ9QZAAxvBBSgD909RtsONuhaq8ivjG072KDunphbZw4A1iOgAH2obbzQ68zJZxlJgVCHahsvDF5TAHCbIKAAfWhu6zuc9KcOAHDzCChAH7LSkqNaBwC4eQQUoA+T8zKU40lWXzcTu3T5bp7JeRmD2RYA3BYIKEAf4uNcKikqkKReIeXK55KiAp6HAgC3AAEFuI75/hyVL5soryf8Mo7Xk6zyZRN5DgoA3CI8qA24gfn+HM0p8PIkWQAYRAQU4CbEx7k09QuZQ90GANw2uMQDAACswxmUGMdL7AAAwxEBJYbxEjsAwHDFJZ4YxUvsAADDGQElBvESOwDAcEdAiUG8xA4AMNwRUGIQL7EDAAx3BJQYxEvsAADDHQElBvESOwDAcEdAiUG8xA4AMNwRUGIUL7EDAAxnPKgthvESOwDAcEVAiXG8xA4AMBxxiQcAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsM6AAkpZWZlcLpeKi4udMWOMtm7dKp/Pp5SUFM2cOVNnz54N+7nOzk6tW7dOo0aNUmpqqhYuXKhz584NpBUAADCM9DugnDhxQi+88ILGjx8fNr5jxw7t3LlTu3fv1okTJ+T1ejVnzhy1tbU5NcXFxTpw4IAqKyt15MgRXbx4UQsWLFB3d3f/jwQAAAwb/QooFy9e1NKlS7Vnzx6NHDnSGTfG6Pnnn9fTTz+tRx55RH6/Xy+//LI++eQT7d+/X5IUCoX04osv6vvf/75mz56tCRMmqKKiQmfOnNFrr70WnaMCAAAxrV8BZc2aNXrooYc0e/bssPHGxkYFg0HNnTvXGXO73ZoxY4aOHj0qSaqrq9OlS5fCanw+n/x+v1MDAABubwmR/kBlZaVOnjypEydO9NoXDAYlSdnZ2WHj2dnZev/9952apKSksDMvV2qu/PzVOjs71dnZ6XxubW2NtG0AABBDIjqD0tTUpPXr16uiokLJycl91rlcrrDPxpheY1e7Xk1ZWZk8Ho+z5ebmRtI2AACIMREFlLq6OjU3N6uwsFAJCQlKSEhQTU2NfvjDHyohIcE5c3L1mZDm5mZnn9frVVdXl1paWvqsudqWLVsUCoWcrampKZK2YZnuHqNjv/1IPz39oY799iN195ihbgkAYJmILvHMmjVLZ86cCRv727/9W911113avHmz7rzzTnm9XlVXV2vChAmSpK6uLtXU1Gj79u2SpMLCQiUmJqq6ulqLFi2SJAUCAdXX12vHjh3X/HPdbrfcbnfEBwf7VNUHtO1ggwKhDmcsx5OskqICzffnDGFnAACbRBRQ0tLS5Pf7w8ZSU1OVmZnpjBcXF6u0tFT5+fnKz89XaWmpRowYoSVLlkiSPB6PVqxYoQ0bNigzM1MZGRnauHGjxo0b12vRLYaXqvqAVlWc1NXnS4KhDq2qOKnyZRMJKQAASf1YJHsjmzZtUnt7u1avXq2WlhZNmTJFhw8fVlpamlOza9cuJSQkaNGiRWpvb9esWbO0d+9excfHR7sdWKK7x2jbwYZe4USSjCSXpG0HGzSnwKv4uOuvVwIADH8uY0zMLQBobW2Vx+NRKBRSenr6ULeDm3Dstx/p0T3Hb1j3L9+8X1O/kDkIHQEABlsk39+8iweDormt48ZFEdQBAIY3AgoGRVZa37el96cOADC8EVAwKCbnZSjHk6y+Vpe4dPlunsl5GYPZFgDAUgQUDIr4OJdKigokqVdIufK5pKiABbIAAEkEFAyi+f4clS+bKK8n/DKO15PMLcYAgDBRv80YuJ75/hzNKfCqtvGCmts6lJV2+bIOZ04AAJ9FQMGgi49zcSsxAOC6uMQDAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA63MUDDFPdPYbbuQHELAIKMAxV1Qe07WCDAqE/vXwxx5OskqICHogHICZwiQcYZqrqA1pVcTIsnEhSMNShVRUnVVUfGKLOAODmEVCAYaS7x2jbwQaZa+y7MrbtYIO6e65VAQD2IKAAw0ht44VeZ04+y0gKhDpU23hh8JoCgH5gDQowjDS39R1O+lMH4PZjywJ7AgowjGSlJd+4KII6ALcXmxbYc4kHGEYm52Uox5Osvv5fx6XL/7GZnJcxmG0BiAG2LbAnoADDSHycSyVFBZLUK6Rc+VxSVMDzUACEsXGBPQEFGGbm+3NUvmyivJ7wyzheT7LKl03kOSgAerFxgT1rUIBhaL4/R3MKvFYsdANgPxsX2BNQgGEqPs6lqV/IHOo2AMQAGxfYc4kHAIDbnI0L7AkoAADc5mxcYE9AAQAA1i2wZw0KAACQZNcCewIKAABw2LLAnks8AADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArJMw1A0AADBcdfcY1TZeUHNbh7LSkjU5L0Pxca6hbismEFAAALgFquoD2nawQYFQhzOW40lWSVGB5vtzhrCz2MAlHgAAoqyqPqBVFSfDwokkBUMdWlVxUlX1gSHqLHYQUAAAiKLuHqNtBxtkrrHvyti2gw3q7rlWBa4goAAAEEW1jRd6nTn5LCMpEOpQbeOFwWsqBhFQAACIoua2vsNJf+puVwQUAACiKCstOap1tysCCgAAUTQ5L0M5nmT1dTOxS5fv5pmclzGYbcUcAgoAAFEUH+dSSVGBJPUKKVc+lxQV8DyUG4gooJSXl2v8+PFKT09Xenq6pk6dqp/97GfOfmOMtm7dKp/Pp5SUFM2cOVNnz54N+x2dnZ1at26dRo0apdTUVC1cuFDnzp2LztEAAGCB+f4clS+bKK8n/DKO15Os8mUTeQ7KTXAZY276PqeDBw8qPj5eX/ziFyVJL7/8sr73ve/p1KlTuvvuu7V9+3Y9++yz2rt3r/7iL/5CzzzzjN5880299dZbSktLkyStWrVKBw8e1N69e5WZmakNGzbowoULqqurU3x8/E310draKo/Ho1AopPT09H4cNgAAtx5Pkg0Xyfd3RAHlWjIyMvS9731P3/jGN+Tz+VRcXKzNmzdLuny2JDs7W9u3b9fKlSsVCoU0evRo7du3T4sXL5YknT9/Xrm5uTp06JDmzZsX9QMEAAB2iOT7u99rULq7u1VZWamPP/5YU6dOVWNjo4LBoObOnevUuN1uzZgxQ0ePHpUk1dXV6dKlS2E1Pp9Pfr/fqbmWzs5Otba2hm0AAGD4ijignDlzRp/73Ofkdrv1xBNP6MCBAyooKFAwGJQkZWdnh9VnZ2c7+4LBoJKSkjRy5Mg+a66lrKxMHo/H2XJzcyNtGwAAxJCIA8pf/uVf6vTp0zp+/LhWrVql5cuXq6GhwdnvcoVfWzPG9Bq72o1qtmzZolAo5GxNTU2Rtg0AAGJIxAElKSlJX/ziFzVp0iSVlZXpnnvu0Q9+8AN5vV5J6nUmpLm52Tmr4vV61dXVpZaWlj5rrsXtdjt3Dl3ZAADA8DXg56AYY9TZ2am8vDx5vV5VV1c7+7q6ulRTU6Np06ZJkgoLC5WYmBhWEwgEVF9f79QAAAAkRFL8ne98R1/5yleUm5urtrY2VVZW6pe//KWqqqrkcrlUXFys0tJS5efnKz8/X6WlpRoxYoSWLFkiSfJ4PFqxYoU2bNigzMxMZWRkaOPGjRo3bpxmz559Sw4QAADEnogCyu9+9zs99thjCgQC8ng8Gj9+vKqqqjRnzhxJ0qZNm9Te3q7Vq1erpaVFU6ZM0eHDh51noEjSrl27lJCQoEWLFqm9vV2zZs3S3r17b/oZKAAAYPgb8HNQhgLPQQEAIPYMynNQAAAAbhUCCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoJQ90AAMSy7h6j2sYLam7rUFZasibnZSg+zjXUbQExj4ACAP1UVR/QtoMNCoQ6nLEcT7JKigo0358zhJ0BsY9LPADQD1X1Aa2qOBkWTiQpGOrQqoqTqqoPDFFnwPBAQAGACHX3GG072CBzjX1XxrYdbFB3z7UqANwMAgoAa3T3GB377Uf66ekPdey3H1n7BV/beKHXmZPPMpICoQ7VNl4YvKaAYYY1KACsEEvrOZrb+g4n/akD0BtnUAAMuVhbz5GVlhzVOgC9EVAADKlYXM8xOS9DOZ5k9XUzsUuXz/5MzssYzLaAYYWAAmBIxeJ6jvg4l0qKCiSpV0i58rmkqIDnoQADQEABMKRidT3HfH+OypdNlNcTfhnH60lW+bKJ1q2bAWJNRAGlrKxM9913n9LS0pSVlaWHH35Yb731VliNMUZbt26Vz+dTSkqKZs6cqbNnz4bVdHZ2at26dRo1apRSU1O1cOFCnTt3buBHAyDmxPJ6jvn+HB3Z/GX9yzfv1w++dq/+5Zv368jmLxNOgCiIKKDU1NRozZo1On78uKqrq/Xpp59q7ty5+vjjj52aHTt2aOfOndq9e7dOnDghr9erOXPmqK2tzakpLi7WgQMHVFlZqSNHjujixYtasGCBuru7o3dkAGJCrK/niI9zaeoXMvX/7v28pn4hk8s6QJS4jDH9Xnn2+9//XllZWaqpqdGDDz4oY4x8Pp+Ki4u1efNmSZfPlmRnZ2v79u1auXKlQqGQRo8erX379mnx4sWSpPPnzys3N1eHDh3SvHnzbvjntra2yuPxKBQKKT09vb/tA7DElbt4JIUtlr3yVc8lE2B4iOT7e0BrUEKhkCQpI+Py/9k0NjYqGAxq7ty5To3b7daMGTN09OhRSVJdXZ0uXboUVuPz+eT3+50aALcX1nMAuFq/H9RmjNGTTz6p6dOny+/3S5KCwaAkKTs7O6w2Oztb77//vlOTlJSkkSNH9qq58vNX6+zsVGdnp/O5tbW1v20DsNR8f47mFHh5MzAASQMIKGvXrtVvfvMbHTlypNc+lyv8PyjGmF5jV7teTVlZmbZt29bfVgHEiCvrOQCgX5d41q1bp1dffVVvvPGGxowZ44x7vV5J6nUmpLm52Tmr4vV61dXVpZaWlj5rrrZlyxaFQiFna2pq6k/bAAAgRkQUUIwxWrt2rV555RW9/vrrysvLC9ufl5cnr9er6upqZ6yrq0s1NTWaNm2aJKmwsFCJiYlhNYFAQPX19U7N1dxut9LT08M2AAAwfEV0iWfNmjXav3+/fvrTnyotLc05U+LxeJSSkiKXy6Xi4mKVlpYqPz9f+fn5Ki0t1YgRI7RkyRKndsWKFdqwYYMyMzOVkZGhjRs3aty4cZo9e3b0jxAAAMSciAJKeXm5JGnmzJlh4y+99JIef/xxSdKmTZvU3t6u1atXq6WlRVOmTNHhw4eVlpbm1O/atUsJCQlatGiR2tvbNWvWLO3du1fx8fEDOxoAADAsDOg5KEOF56AAABB7Bu05KAAAALcCAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWSRjqBgAAg6+7x6i28YKa2zqUlZasyXkZio9zDXVbgIOAAgC3mar6gLYdbFAg1OGM5XiSVVJUoPn+nCHsDPgTLvEAwG2kqj6gVRUnw8KJJAVDHVpVcVJV9YEh6gwIR0ABgNtEd4/RtoMNMtfYd2Vs28EGdfdcqwIYXAQUALhN1DZe6HXm5LOMpECoQ7WNFwavKaAPBBQAuE00t/UdTvpTB9xKBBQAuE1kpSVHtQ64lQgoAHCbmJyXoRxPsvq6mdily3fzTM7LGMy2gGsioADAbSI+zqWSogJJ6hVSrnwuKSrgeSiwAgEFAG4j8/05Kl82UV5P+GUcrydZ5csm8hwUWIMHtQHAbWa+P0dzCrw8SRZWI6AAwG0oPs6lqV/IHOo2gD5xiQcAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOglD3QAAADeju8eotvGCmts6lJWWrMl5GYqPcw11W7hFCCgAAOtV1Qe07WCDAqEOZyzHk6ySogLN9+cMYWe4VbjEAwCwWlV9QKsqToaFE0kKhjq0quKkquoDQ9QZbiUCCgDAWt09RtsONshcY9+VsW0HG9Tdc60KxDICCgDAWrWNF3qdOfksIykQ6lBt44XBawqDgoACALBWc1vf4aQ/dYgdBBQAgLWy0pKjWofYQUABAFhrcl6GcjzJ6utmYpcu380zOS9jMNvCICCgAACsFR/nUklRgST1CilXPpcUFfA8lGGIgAIAsNp8f47Kl02U1xN+GcfrSVb5sok8B2WY4kFtAADrzffnaE6BlyfJ3kYIKACAmBAf59LUL2QOdRsYJFziAQAA1ok4oLz55psqKiqSz+eTy+XST37yk7D9xhht3bpVPp9PKSkpmjlzps6ePRtW09nZqXXr1mnUqFFKTU3VwoULde7cuQEdCAAAGD4iDigff/yx7rnnHu3evfua+3fs2KGdO3dq9+7dOnHihLxer+bMmaO2tjanpri4WAcOHFBlZaWOHDmiixcvasGCBeru7u7/kQAAgGHDZYzp9wsMXC6XDhw4oIcffljS5bMnPp9PxcXF2rx5s6TLZ0uys7O1fft2rVy5UqFQSKNHj9a+ffu0ePFiSdL58+eVm5urQ4cOad68eTf8c1tbW+XxeBQKhZSent7f9gEAwCCK5Ps7qmtQGhsbFQwGNXfuXGfM7XZrxowZOnr0qCSprq5Oly5dCqvx+Xzy+/1OzdU6OzvV2toatgEAgOErqgElGAxKkrKzs8PGs7OznX3BYFBJSUkaOXJknzVXKysrk8fjcbbc3Nxotg0AACxzS+7icbnC70s3xvQau9r1arZs2aJQKORsTU1NUesVAADYJ6oBxev1SlKvMyHNzc3OWRWv16uuri61tLT0WXM1t9ut9PT0sA0AAAxfUQ0oeXl58nq9qq6udsa6urpUU1OjadOmSZIKCwuVmJgYVhMIBFRfX+/UAACA21vET5K9ePGi3n33XedzY2OjTp8+rYyMDN1xxx0qLi5WaWmp8vPzlZ+fr9LSUo0YMUJLliyRJHk8Hq1YsUIbNmxQZmamMjIytHHjRo0bN06zZ8++qR6u3HjEYlkAAGLHle/tm7qB2ETojTfeMJJ6bcuXLzfGGNPT02NKSkqM1+s1brfbPPjgg+bMmTNhv6O9vd2sXbvWZGRkmJSUFLNgwQLzwQcf3HQPTU1N1+yBjY2NjY2Nzf6tqanpht/1A3oOylDp6enR+fPnlZaWdsPFt5FqbW1Vbm6umpqaWOtyCzHPg4N5HhzM8+BhrgfHrZpnY4za2trk8/kUF3f9VSYx+bLAuLg4jRkz5pb+GSzGHRzM8+BgngcH8zx4mOvBcSvm2ePx3FQdLwsEAADWIaAAAADrEFCu4na7VVJSIrfbPdStDGvM8+BgngcH8zx4mOvBYcM8x+QiWQAAMLxxBgUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUD7jH//xH5WXl6fk5GQVFhbqV7/61VC3FFPKysp03333KS0tTVlZWXr44Yf11ltvhdUYY7R161b5fD6lpKRo5syZOnv2bFhNZ2en1q1bp1GjRik1NVULFy7UuXPnBvNQYkpZWZlcLpeKi4udMeY5Oj788EMtW7ZMmZmZGjFihO69917V1dU5+5nngfv000/193//98rLy1NKSoruvPNOffe731VPT49Twzz3z5tvvqmioiL5fD65XC795Cc/CdsfrXltaWnRY489Jo/HI4/Ho8cee0x//OMfB34AN/0CnGGusrLSJCYmmj179piGhgazfv16k5qaat5///2hbi1mzJs3z7z00kumvr7enD592jz00EPmjjvuMBcvXnRqnnvuOZOWlmZ+/OMfmzNnzpjFixebnJwc09ra6tQ88cQT5vOf/7yprq42J0+eNH/1V39l7rnnHvPpp58OxWFZrba21vz5n/+5GT9+vFm/fr0zzjwP3IULF8zYsWPN448/bn7961+bxsZG89prr5l3333XqWGeB+6ZZ54xmZmZ5t///d9NY2Oj+bd/+zfzuc99zjz//PNODfPcP4cOHTJPP/20+fGPf2wkmQMHDoTtj9a8zp8/3/j9fnP06FFz9OhR4/f7zYIFCwbcPwHl/0yePNk88cQTYWN33XWXeeqpp4aoo9jX3NxsJJmamhpjzOUXSXq9XvPcc885NR0dHcbj8Zh/+qd/MsYY88c//tEkJiaayspKp+bDDz80cXFxpqqqanAPwHJtbW0mPz/fVFdXmxkzZjgBhXmOjs2bN5vp06f3uZ95jo6HHnrIfOMb3wgbe+SRR8yyZcuMMcxztFwdUKI1rw0NDUaSOX78uFNz7NgxI8n8z//8z4B65hKPpK6uLtXV1Wnu3Llh43PnztXRo0eHqKvYFwqFJEkZGRmSpMbGRgWDwbB5drvdmjFjhjPPdXV1unTpUliNz+eT3+/n7+Iqa9as0UMPPaTZs2eHjTPP0fHqq69q0qRJ+upXv6qsrCxNmDBBe/bscfYzz9Exffp0/eIXv9Dbb78tSfqv//ovHTlyRH/9138tiXm+VaI1r8eOHZPH49GUKVOcmvvvv18ej2fAcx+TLwuMtj/84Q/q7u5WdnZ22Hh2draCweAQdRXbjDF68sknNX36dPn9fkly5vJa8/z+++87NUlJSRo5cmSvGv4u/qSyslInT57UiRMneu1jnqPjvffeU3l5uZ588kl95zvfUW1trb71rW/J7Xbr61//OvMcJZs3b1YoFNJdd92l+Ph4dXd369lnn9Wjjz4qiX/Pt0q05jUYDCorK6vX78/Kyhrw3BNQPsPlcoV9Nsb0GsPNWbt2rX7zm9/oyJEjvfb1Z575u/iTpqYmrV+/XocPH1ZycnKfdczzwPT09GjSpEkqLS2VJE2YMEFnz55VeXm5vv71rzt1zPPA/Ou//qsqKiq0f/9+3X333Tp9+rSKi4vl8/m0fPlyp455vjWiMa/Xqo/G3HOJR9KoUaMUHx/fK+01Nzf3Spe4sXXr1unVV1/VG2+8oTFjxjjjXq9Xkq47z16vV11dXWppaemz5nZXV1en5uZmFRYWKiEhQQkJCaqpqdEPf/hDJSQkOPPEPA9MTk6OCgoKwsa+9KUv6YMPPpDEv+do+bu/+zs99dRT+trXvqZx48bpscce07e//W2VlZVJYp5vlWjNq9fr1e9+97tev//3v//9gOeegCIpKSlJhYWFqq6uDhuvrq7WtGnThqir2GOM0dq1a/XKK6/o9ddfV15eXtj+vLw8eb3esHnu6upSTU2NM8+FhYVKTEwMqwkEAqqvr+fv4v/MmjVLZ86c0enTp51t0qRJWrp0qU6fPq0777yTeY6CBx54oNdt8m+//bbGjh0riX/P0fLJJ58oLi78qyg+Pt65zZh5vjWiNa9Tp05VKBRSbW2tU/PrX/9aoVBo4HM/oCW2w8iV24xffPFF09DQYIqLi01qaqr53//936FuLWasWrXKeDwe88tf/tIEAgFn++STT5ya5557zng8HvPKK6+YM2fOmEcfffSat7WNGTPGvPbaa+bkyZPmy1/+8m1/u+CNfPYuHmOY52iora01CQkJ5tlnnzXvvPOO+ed//mczYsQIU1FR4dQwzwO3fPly8/nPf965zfiVV14xo0aNMps2bXJqmOf+aWtrM6dOnTKnTp0ykszOnTvNqVOnnMdnRGte58+fb8aPH2+OHTtmjh07ZsaNG8dtxtH2D//wD2bs2LEmKSnJTJw40bk9FjdH0jW3l156yanp6ekxJSUlxuv1GrfbbR588EFz5syZsN/T3t5u1q5dazIyMkxKSopZsGCB+eCDDwb5aGLL1QGFeY6OgwcPGr/fb9xut7nrrrvMCy+8ELafeR641tZWs379enPHHXeY5ORkc+edd5qnn37adHZ2OjXMc/+88cYb1/xv8vLly40x0ZvXjz76yCxdutSkpaWZtLQ0s3TpUtPS0jLg/l3GGDOwczAAAADRxRoUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKzz/wH8F5zKaZrpTwAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "wf = Workflow(\"with_prebuilt\")\n", + "\n", + "wf.structure = wf.create.atomistics.Bulk(cubic=True, name=\"Al\")\n", + "wf.engine = wf.create.atomistics.Lammps(structure=wf.structure)\n", + "wf.calc = wf.create.atomistics.CalcMd(job=wf.engine)\n", + "wf.plot = wf.create.standard.Scatter(\n", + " x=wf.calc.outputs.steps, \n", + " y=wf.calc.outputs.temperature\n", + ")\n", + "wf.structure > wf.engine > wf.calc > wf.plot\n", + "\n", + "out = wf.run()\n", + "out.plot__fig" + ] + }, + { + "cell_type": "markdown", + "id": "43c09aa8-8229-4636-aaeb-9214b723c2fc", + "metadata": {}, + "source": [ + "In case you want to see more or less of the inner workings of the nodes when visualizing a workflow, you can modify the `depth` parameter, which controls how deeply child nodes are decomposed. E.g. we can force our workflow to only show us it's basic IO by setting `depth=0`:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "2114d0c3-cdad-43c7-9ffa-50c36d56d18f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterwith_prebuilt\n", + "\n", + "with_prebuilt: Workflow\n", + "\n", + "clusterwith_prebuiltInputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Inputs\n", + "\n", + "\n", + "clusterwith_prebuiltOutputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Outputs\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputsrun\n", + "\n", + "run\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputsran\n", + "\n", + "ran\n", + "\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputsname\n", + "\n", + "name\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputscrystalstructure\n", + "\n", + "crystalstructure\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputsa\n", + "\n", + "a\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputsc\n", + "\n", + "c\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputscovera\n", + "\n", + "covera\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputsu\n", + "\n", + "u\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputsorthorhombic\n", + "\n", + "orthorhombic\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputscubic\n", + "\n", + "cubic\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputsn_ionic_steps\n", + "\n", + "n_ionic_steps: int\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputsn_print\n", + "\n", + "n_print: int\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputstemperature\n", + "\n", + "temperature\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltInputspressure\n", + "\n", + "pressure\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputscells\n", + "\n", + "cells\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputsdisplacements\n", + "\n", + "displacements\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputsenergy_pot\n", + "\n", + "energy_pot\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputsenergy_tot\n", + "\n", + "energy_tot\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputsforce_max\n", + "\n", + "force_max\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputsforces\n", + "\n", + "forces\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputsindices\n", + "\n", + "indices\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputspositions\n", + "\n", + "positions\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputspressures\n", + "\n", + "pressures\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputstotal_displacements\n", + "\n", + "total_displacements\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputsunwrapped_positions\n", + "\n", + "unwrapped_positions\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputsvolume\n", + "\n", + "volume\n", + "\n", + "\n", + "\n", + "clusterwith_prebuiltOutputsfig\n", + "\n", + "fig\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wf.draw(depth=0)" + ] + }, + { + "cell_type": "markdown", + "id": "d1f3b308-28b2-466b-8cf5-6bfd806c08ca", + "metadata": {}, + "source": [ + "# Macros\n", + "\n", + "Once you have a workflow that you're happy with, you may want to store it as a macro so it can be stored in a human-readable way, reused, and shared. Automated conversion of an existing `Workflow` instance into a `Macro` subclass is still on the TODO list, but defining a new macro is pretty easy: they are just composite nodes that have a function defining their graph setup:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "c71a8308-f8a1-4041-bea0-1c841e072a6d", + "metadata": {}, + "outputs": [], + "source": [ + "from pyiron_workflow.macro import Macro" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "2b9bb21a-73cd-444e-84a9-100e202aa422", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "13" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@Workflow.wrap_as.single_value_node(\"result\")\n", + "def add_one(x):\n", + " return x + 1\n", + "\n", + "def add_three_macro(macro: Macro) -> None:\n", + " \"\"\"\n", + " The graph constructor a Macro expects must take the macro as its only argument\n", + " (i.e. \"self\" from the macro's perspective) and return nothing.\n", + " Inside, it should add nodes to the macro, wire their connections, etc.\n", + " \"\"\"\n", + " macro.add_one = add_one(0)\n", + " macro.add_two = add_one(macro.add_one)\n", + " macro.add_three = add_one(macro.add_two)\n", + " # Just like workflows, for simple DAG macros we don't _need_\n", + " # to set signals and starting nodes -- the macro will build them\n", + " # automatically. But, if you do set both then the macro will use them\n", + " macro.add_one > macro.add_two > macro.add_three\n", + " macro.starting_nodes = [macro.add_one] \n", + " \n", + "macro = Macro(add_three_macro)\n", + "macro(add_one__x=10).add_three__result" + ] + }, + { + "cell_type": "markdown", + "id": "bd5099c4-1c01-4a45-a5bb-e5087595db9f", + "metadata": {}, + "source": [ + "Of course, we can also use a decorator like for other node types. This is shown below, along with an example of how exploit label maps to give our macro IO easier-to-use names (and expose IO that would be skipped by default because it's internally connected):" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "3668f9a9-adca-48a4-84ea-13add965897c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'intermediate': 102, 'plus_three': 103}" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@Workflow.wrap_as.macro_node()\n", + "def add_three_macro(macro: Macro) -> None:\n", + " \"\"\"\n", + " The graph constructor a Macro expects must take the macro as its only argument\n", + " (i.e. \"self\" from the macro's perspective) and return nothing.\n", + " Inside, it should add nodes to the macro, wire their connections, etc.\n", + " \"\"\"\n", + " macro.add_one = add_one(0)\n", + " macro.add_two = add_one(macro.add_one)\n", + " macro.add_three = add_one(macro.add_two)\n", + " macro.inputs_map = {\"add_one__x\": \"x\"}\n", + " macro.outputs_map = {\"add_three__result\": \"plus_three\", \"add_two__result\": \"intermediate\"}\n", + " \n", + "macro = add_three_macro()\n", + "macro(x=100)\n", + "macro.outputs.to_value_dict()" + ] + }, + { + "cell_type": "markdown", + "id": "22d2fdcf-0206-497d-9344-a71e3472a2c0", + "metadata": {}, + "source": [ + "## Nesting\n", + "\n", + "Composite nodes can be nested to abstract workflows into simpler components -- i.e. macros can be added to workflows, and macros can be used inside of macros.\n", + "\n", + "For our final example, let's define a macro for doing Lammps minimizations, then use this in a workflow to compare energies between different phases." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "9aaeeec0-5f88-4c94-a6cc-45b56d2f0111", + "metadata": {}, + "outputs": [], + "source": [ + "@Workflow.wrap_as.macro_node()\n", + "def lammps_minimize(macro):\n", + " macro.structure = macro.create.atomistics.Bulk()\n", + " macro.engine = macro.create.atomistics.Lammps(structure=macro.structure)\n", + " macro.calc = macro.create.atomistics.CalcMin(job=macro.engine, pressure=0)\n", + " \n", + " macro.inputs_map = {\n", + " \"structure__name\": \"element\", \n", + " \"structure__crystalstructure\": \"crystalstructure\",\n", + " \"structure__a\": \"lattice_guess\",\n", + " }\n", + " macro.outputs_map = {\n", + " \"calc__energy_pot\": \"energy\",\n", + " \"structure__structure\": \"structure\",\n", + " }\n", + "\n", + "@Workflow.wrap_as.single_value_node()\n", + "def per_atom_energy_difference(structure1, energy1, structure2, energy2):\n", + " de = (energy2[-1]/len(structure2)) - (energy1[-1]/len(structure1))\n", + " return de" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "a832e552-b3cc-411a-a258-ef21574fc439", + "metadata": {}, + "outputs": [], + "source": [ + "wf = Workflow(\"phase_preference\")\n", + "wf.element = wf.create.standard.UserInput()\n", + "wf.min_phase1 = lammps_minimize(element=wf.element)\n", + "wf.min_phase2 = lammps_minimize(element=wf.element)\n", + "wf.compare = per_atom_energy_difference(\n", + " wf.min_phase1.outputs.structure,\n", + " wf.min_phase1.outputs.energy,\n", + " wf.min_phase2.outputs.structure,\n", + " wf.min_phase2.outputs.energy,\n", + ")\n", + "\n", + "wf.inputs_map = {\n", + " \"element__user_input\": \"element\",\n", + " \"min_phase1__crystalstructure\": \"phase1\",\n", + " \"min_phase2__crystalstructure\": \"phase2\",\n", + " \"min_phase1__lattice_guess\": \"lattice_guess1\",\n", + " \"min_phase2__lattice_guess\": \"lattice_guess2\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "b764a447-236f-4cb7-952a-7cba4855087d", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preference\n", + "\n", + "phase_preference: Workflow\n", + "\n", + "clusterphase_preferenceInputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Inputs\n", + "\n", + "\n", + "clusterphase_preferenceOutputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Outputs\n", + "\n", + "\n", + "clusterphase_preferenceelement\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "element: UserInput\n", + "\n", + "\n", + "clusterphase_preferenceelementInputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Inputs\n", + "\n", + "\n", + "clusterphase_preferenceelementOutputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Outputs\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "min_phase1: LammpsMinimize\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Inputs\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Outputs\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "min_phase2: LammpsMinimize\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Inputs\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Outputs\n", + "\n", + "\n", + "clusterphase_preferencecompare\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "compare: PerAtomEnergyDifference\n", + "\n", + "\n", + "clusterphase_preferencecompareInputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Inputs\n", + "\n", + "\n", + "clusterphase_preferencecompareOutputs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Outputs\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsrun\n", + "\n", + "run\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputsran\n", + "\n", + "ran\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsuser_input\n", + "\n", + "user_input\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceelementInputsuser_input\n", + "\n", + "user_input\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsuser_input->clusterphase_preferenceelementInputsuser_input\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputscrystalstructure\n", + "\n", + "crystalstructure\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputscrystalstructure\n", + "\n", + "crystalstructure\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputscrystalstructure->clusterphase_preferencemin_phase1Inputscrystalstructure\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputscrystalstructure\n", + "\n", + "crystalstructure\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputscrystalstructure->clusterphase_preferencemin_phase2Inputscrystalstructure\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsa\n", + "\n", + "a\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputsa\n", + "\n", + "a\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsa->clusterphase_preferencemin_phase1Inputsa\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputsa\n", + "\n", + "a\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsa->clusterphase_preferencemin_phase2Inputsa\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsc\n", + "\n", + "c\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputsc\n", + "\n", + "c\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsc->clusterphase_preferencemin_phase1Inputsc\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputsc\n", + "\n", + "c\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsc->clusterphase_preferencemin_phase2Inputsc\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputscovera\n", + "\n", + "covera\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputscovera\n", + "\n", + "covera\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputscovera->clusterphase_preferencemin_phase1Inputscovera\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputscovera\n", + "\n", + "covera\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputscovera->clusterphase_preferencemin_phase2Inputscovera\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsu\n", + "\n", + "u\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputsu\n", + "\n", + "u\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsu->clusterphase_preferencemin_phase1Inputsu\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputsu\n", + "\n", + "u\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsu->clusterphase_preferencemin_phase2Inputsu\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsorthorhombic\n", + "\n", + "orthorhombic\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputsorthorhombic\n", + "\n", + "orthorhombic\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsorthorhombic->clusterphase_preferencemin_phase1Inputsorthorhombic\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputsorthorhombic\n", + "\n", + "orthorhombic\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsorthorhombic->clusterphase_preferencemin_phase2Inputsorthorhombic\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputscubic\n", + "\n", + "cubic\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputscubic\n", + "\n", + "cubic\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputscubic->clusterphase_preferencemin_phase1Inputscubic\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputscubic\n", + "\n", + "cubic\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputscubic->clusterphase_preferencemin_phase2Inputscubic\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsn_ionic_steps\n", + "\n", + "n_ionic_steps: int\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputsn_ionic_steps\n", + "\n", + "n_ionic_steps: int\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsn_ionic_steps->clusterphase_preferencemin_phase1Inputsn_ionic_steps\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputsn_ionic_steps\n", + "\n", + "n_ionic_steps: int\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsn_ionic_steps->clusterphase_preferencemin_phase2Inputsn_ionic_steps\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsn_print\n", + "\n", + "n_print: int\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputsn_print\n", + "\n", + "n_print: int\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsn_print->clusterphase_preferencemin_phase1Inputsn_print\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputsn_print\n", + "\n", + "n_print: int\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputsn_print->clusterphase_preferencemin_phase2Inputsn_print\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputspressure\n", + "\n", + "pressure\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputspressure\n", + "\n", + "pressure\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputspressure->clusterphase_preferencemin_phase1Inputspressure\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputspressure\n", + "\n", + "pressure\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceInputspressure->clusterphase_preferencemin_phase2Inputspressure\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputscells\n", + "\n", + "cells\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputsdisplacements\n", + "\n", + "displacements\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputsenergy_tot\n", + "\n", + "energy_tot\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputsforce_max\n", + "\n", + "force_max\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputsforces\n", + "\n", + "forces\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputsindices\n", + "\n", + "indices\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputspositions\n", + "\n", + "positions\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputspressures\n", + "\n", + "pressures\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputssteps\n", + "\n", + "steps\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputstotal_displacements\n", + "\n", + "total_displacements\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputsunwrapped_positions\n", + "\n", + "unwrapped_positions\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputsvolume\n", + "\n", + "volume\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceOutputsde\n", + "\n", + "de\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceelementInputsrun\n", + "\n", + "run\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceelementOutputsran\n", + "\n", + "ran\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceelementOutputsuser_input\n", + "\n", + "user_input\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputsname\n", + "\n", + "name\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceelementOutputsuser_input->clusterphase_preferencemin_phase1Inputsname\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputsname\n", + "\n", + "name\n", + "\n", + "\n", + "\n", + "clusterphase_preferenceelementOutputsuser_input->clusterphase_preferencemin_phase2Inputsname\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Inputsrun\n", + "\n", + "run\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsran\n", + "\n", + "ran\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsstructure\n", + "\n", + "structure\n", + "\n", + "\n", + "\n", + "clusterphase_preferencecompareInputsstructure1\n", + "\n", + "structure1\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsstructure->clusterphase_preferencecompareInputsstructure1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputscells\n", + "\n", + "cells\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputscells->clusterphase_preferenceOutputscells\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsdisplacements\n", + "\n", + "displacements\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsdisplacements->clusterphase_preferenceOutputsdisplacements\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsenergy_pot\n", + "\n", + "energy_pot\n", + "\n", + "\n", + "\n", + "clusterphase_preferencecompareInputsenergy1\n", + "\n", + "energy1\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsenergy_pot->clusterphase_preferencecompareInputsenergy1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsenergy_tot\n", + "\n", + "energy_tot\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsenergy_tot->clusterphase_preferenceOutputsenergy_tot\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsforce_max\n", + "\n", + "force_max\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsforce_max->clusterphase_preferenceOutputsforce_max\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsforces\n", + "\n", + "forces\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsforces->clusterphase_preferenceOutputsforces\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsindices\n", + "\n", + "indices\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsindices->clusterphase_preferenceOutputsindices\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputspositions\n", + "\n", + "positions\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputspositions->clusterphase_preferenceOutputspositions\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputspressures\n", + "\n", + "pressures\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputspressures->clusterphase_preferenceOutputspressures\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputssteps\n", + "\n", + "steps\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputssteps->clusterphase_preferenceOutputssteps\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputstotal_displacements\n", + "\n", + "total_displacements\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputstotal_displacements->clusterphase_preferenceOutputstotal_displacements\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsunwrapped_positions\n", + "\n", + "unwrapped_positions\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsunwrapped_positions->clusterphase_preferenceOutputsunwrapped_positions\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsvolume\n", + "\n", + "volume\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase1Outputsvolume->clusterphase_preferenceOutputsvolume\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Inputsrun\n", + "\n", + "run\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsran\n", + "\n", + "ran\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsstructure\n", + "\n", + "structure\n", + "\n", + "\n", + "\n", + "clusterphase_preferencecompareInputsstructure2\n", + "\n", + "structure2\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsstructure->clusterphase_preferencecompareInputsstructure2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputscells\n", + "\n", + "cells\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputscells->clusterphase_preferenceOutputscells\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsdisplacements\n", + "\n", + "displacements\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsdisplacements->clusterphase_preferenceOutputsdisplacements\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsenergy_pot\n", + "\n", + "energy_pot\n", + "\n", + "\n", + "\n", + "clusterphase_preferencecompareInputsenergy2\n", + "\n", + "energy2\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsenergy_pot->clusterphase_preferencecompareInputsenergy2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsenergy_tot\n", + "\n", + "energy_tot\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsenergy_tot->clusterphase_preferenceOutputsenergy_tot\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsforce_max\n", + "\n", + "force_max\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsforce_max->clusterphase_preferenceOutputsforce_max\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsforces\n", + "\n", + "forces\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsforces->clusterphase_preferenceOutputsforces\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsindices\n", + "\n", + "indices\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsindices->clusterphase_preferenceOutputsindices\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputspositions\n", + "\n", + "positions\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputspositions->clusterphase_preferenceOutputspositions\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputspressures\n", + "\n", + "pressures\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputspressures->clusterphase_preferenceOutputspressures\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputssteps\n", + "\n", + "steps\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputssteps->clusterphase_preferenceOutputssteps\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputstotal_displacements\n", + "\n", + "total_displacements\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputstotal_displacements->clusterphase_preferenceOutputstotal_displacements\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsunwrapped_positions\n", + "\n", + "unwrapped_positions\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsunwrapped_positions->clusterphase_preferenceOutputsunwrapped_positions\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsvolume\n", + "\n", + "volume\n", + "\n", + "\n", + "\n", + "clusterphase_preferencemin_phase2Outputsvolume->clusterphase_preferenceOutputsvolume\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencecompareInputsrun\n", + "\n", + "run\n", + "\n", + "\n", + "\n", + "clusterphase_preferencecompareOutputsran\n", + "\n", + "ran\n", + "\n", + "\n", + "\n", + "\n", + "clusterphase_preferencecompareOutputsde\n", + "\n", + "de\n", + "\n", + "\n", + "\n", + "clusterphase_preferencecompareOutputsde->clusterphase_preferenceOutputsde\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wf.draw()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "b51bef25-86c5-4d57-80c1-ab733e703caf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The job JUSTAJOBNAME was saved and received the ID: 9558\n", + "The job JUSTAJOBNAME was saved and received the ID: 9558\n", + "Al: E(hcp) - E(fcc) = 1.17 eV/atom\n" + ] + } + ], + "source": [ + "out = wf(element=\"Al\", phase1=\"fcc\", phase2=\"hcp\", lattice_guess1=4, lattice_guess2=4)\n", + "print(f\"{wf.inputs.element.value}: E({wf.inputs.phase2.value}) - E({wf.inputs.phase1.value}) = {out.compare__de:.2f} eV/atom\")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "091e2386-0081-436c-a736-23d019bd9b91", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The job JUSTAJOBNAME was saved and received the ID: 9558\n", + "The job JUSTAJOBNAME was saved and received the ID: 9558\n", + "Mg: E(hcp) - E(fcc) = -4.54 eV/atom\n" + ] + } + ], + "source": [ + "out = wf(element=\"Mg\", phase1=\"fcc\", phase2=\"hcp\", lattice_guess1=3, lattice_guess2=3)\n", + "print(f\"{wf.inputs.element.value}: E({wf.inputs.phase2.value}) - E({wf.inputs.phase1.value}) = {out.compare__de:.2f} eV/atom\")" + ] + }, + { + "cell_type": "markdown", + "id": "f447531e-3e8c-4c7e-a579-5f9c56b75a5b", + "metadata": {}, + "source": [ + "# Here be dragons\n", + "\n", + "While everything in the workflows sub-module is under development, the following complex features are _even more likely_ to see substantial modifications to their interface and behaviour. Nonetheless, they're fun so let's look at them." + ] + }, + { + "cell_type": "markdown", + "id": "069cc8e8-f8b9-4382-a424-b3b2dd2bf739", + "metadata": {}, + "source": [ + "## Parallelization\n", + "\n", + "You can currently run _some_ nodes (namely, `Function` nodes that don't take `self` as an argument) in a background process by setting an `executor` of the right type.\n", + "Cf. the `Workflow` class tests in the source code for an example.\n", + "\n", + "Right now our treatment of DAGs is quite rudimentary, and the data flow is (unless cyclic) converted into a _linear_ execution pattern. \n", + "This is practical and robust, but highly inefficient when combined with nodes that can run in parallel, i.e. with \"executors\".\n", + "Going forward, we will exploit the same infrastructure of data flow DAGs and run signals to build up more sophisticated execution patterns which support parallelization." + ] + }, + { + "cell_type": "markdown", + "id": "1f29fde8-1645-444e-99dc-3ec465461c7e", + "metadata": {}, + "source": [ + "## Serialization and node libraries\n", + "\n", + "Serialization doesn't exist yet.\n", + "\n", + "What you _can_ do is `register` new lists of nodes (including macros) with the workflow, so feel free to build up your own `.py` files containing nodes you like to use for easy re-use.\n", + "\n", + "Serialization of workflows is still forthcoming, while for node registration flexibility and documentation is forthcoming but the basics are here already." + ] + }, + { + "cell_type": "markdown", + "id": "1f012460-19af-45f7-98aa-a0ad5b8e6faa", + "metadata": {}, + "source": [ + "## Meta-nodes and flow control\n", + "\n", + "A meta-node is a function that produces a node _class_ instedad of a node _instance_.\n", + "Right now, these are used to produce parameterized flow-control nodes, which take an node class as input and return a new macro class that builds some graph using the passed node class, e.g. for- and while-loops.\n", + "\n", + "### For-loops\n", + "\n", + "One meta node is a for-loop builder, which creates a macro with $n$ internal instances of the \"loop body\" node class, and a new IO interface.\n", + "The new input allows you to specify which input channels are being looped over -- such that the macro input for this channel is interpreted as list-like and distributed to all the copies of the nodes separately --, and which is _not_ being looped over -- and thus interpreted as the loop body node would normally interpret the input and passed to all copies equally.\n", + "All of the loop body outputs are then collected as a list of length $n$.\n", + "\n", + "We follow a convention that inputs and outputs being looped over are indicated by their channel labels being ALL CAPS.\n", + "\n", + "In the example below, we loop over the bulk structure node to create structures with different lattice constants:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "0b373764-b389-4c24-8086-f3d33a4f7fd7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[14.829749999999995,\n", + " 15.407468749999998,\n", + " 15.999999999999998,\n", + " 16.60753125,\n", + " 17.230249999999995]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n = 5\n", + "\n", + "bulk_loop = Workflow.create.meta.for_loop(\n", + " Workflow.create.atomistics.Bulk,\n", + " n,\n", + " iterate_on=(\"a\",),\n", + ")()\n", + "\n", + "out = bulk_loop(\n", + " name=\"Al\", # Sent equally to each body node\n", + " A=np.linspace(3.9, 4.1, n).tolist(), # Distributed across body nodes\n", + ")\n", + "\n", + "[struct.cell.volume for struct in out.STRUCTURE] \n", + "# output is a list collected from copies of the body node, as indicated by CAPS label" + ] + }, + { + "cell_type": "markdown", + "id": "4e7ed210-dbc2-4afa-825e-b91168baff25", + "metadata": {}, + "source": [ + "## While-loops\n", + "\n", + "We can also create a while-loop, which takes both a body node and a condition node. The condition node must be a `SingleValue` returning a `bool` type. Instead of creating copies of the body node, the body node gets re-run until the condition node returns `False`.\n", + "\n", + "You _must_ specify the data connection so that the body node passes information to the condition node. You may optionally also loop output of the body node back to input of the body node to change the input at each iteration. Right now this is done with horribly ugly string tuples, but we're still working on it." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "0dd04b4c-e3e7-4072-ad34-58f2c1e4f596", + "metadata": {}, + "outputs": [], + "source": [ + "@Workflow.wrap_as.single_value_node()\n", + "def add(a, b):\n", + " print(f\"{a} + {b} = {a + b}\")\n", + " return a + b\n", + "\n", + "@Workflow.wrap_as.single_value_node()\n", + "def less_than_ten(value):\n", + " return value < 10\n", + "\n", + "AddWhile = Workflow.create.meta.while_loop(\n", + " loop_body_class=add,\n", + " condition_class=less_than_ten,\n", + " internal_connection_map=[\n", + " (\"Add\", \"a + b\", \"LessThanTen\", \"value\"),\n", + " (\"Add\", \"a + b\", \"Add\", \"a\")\n", + " ],\n", + " inputs_map={\"Add__a\": \"a\", \"Add__b\": \"b\"},\n", + " outputs_map={\"Add__a + b\": \"total\"}\n", + ")\n", + "\n", + "wf = Workflow(\"do_while\")\n", + "wf.add_while = AddWhile()\n", + "\n", + "wf.inputs_map = {\n", + " \"add_while__a\": \"a\",\n", + " \"add_while__b\": \"b\"\n", + "}\n", + "wf.outputs_map = {\n", + " \"add_while__total\": \"total\", # Rename this output\n", + " \"add_while__switch__truth\": None # Disable this output\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "eb810e1e-4d13-4cb1-94cc-6d191b8c568d", + "metadata": {}, + "source": [ + "Note that initializing the `a` and `b` input to numeric values when we call the workflow below does not destroy the connection made between the body node input and output -- so the first run of the body node uses the initial value passed, but then it updates its own input for subsequent calls!" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "2dfb967b-41ac-4463-b606-3e315e617f2a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 + 2 = 3\n", + "3 + 2 = 5\n", + "5 + 2 = 7\n", + "7 + 2 = 9\n", + "9 + 2 = 11\n", + "Finally {'total': 11}\n" + ] + } + ], + "source": [ + "response = wf(a=1, b=2)\n", + "print(\"Finally\", response)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "2e87f858-b327-4f6b-9237-c8a557f29aeb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.499 > 0.2\n", + "0.879 > 0.2\n", + "0.993 > 0.2\n", + "0.606 > 0.2\n", + "0.126 <= 0.2\n", + "Finally 0.126\n" + ] + } + ], + "source": [ + "@Workflow.wrap_as.single_value_node(\"random\")\n", + "def random(length: int | None = None):\n", + " return np.random.random(length)\n", + "\n", + "@Workflow.wrap_as.single_value_node()\n", + "def greater_than(x: float, threshold: float):\n", + " gt = x > threshold\n", + " symbol = \">\" if gt else \"<=\"\n", + " print(f\"{x:.3f} {symbol} {threshold}\")\n", + " return gt\n", + "\n", + "RandomWhile = Workflow.create.meta.while_loop(\n", + " loop_body_class=random,\n", + " condition_class=greater_than,\n", + " internal_connection_map=[(\"Random\", \"random\", \"GreaterThan\", \"x\")],\n", + " outputs_map={\"Random__random\": \"capped_result\"}\n", + ")\n", + "\n", + "# Define workflow\n", + "\n", + "wf = Workflow(\"random_until_small_enough\")\n", + "\n", + "## Wire together the while loop and its condition\n", + "\n", + "wf.random_while = RandomWhile()\n", + "\n", + "## Give convenient labels\n", + "wf.inputs_map = {\"random_while__GreaterThan__threshold\": \"threshold\"}\n", + "wf.outputs_map = {\"random_while__capped_result\": \"capped_result\"}\n", + "\n", + "print(f\"Finally {wf(threshold=0.2).capped_result:.3f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f40bfd6f-3fbf-4c2b-aeee-534ed4bcc970", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyiron_module_template/__init__.py b/pyiron_module_template/__init__.py deleted file mode 100644 index 80edaf05..00000000 --- a/pyiron_module_template/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from ._version import get_versions - -__version__ = get_versions()["version"] -del get_versions diff --git a/pyiron_workflow/__init__.py b/pyiron_workflow/__init__.py new file mode 100644 index 00000000..b2636e88 --- /dev/null +++ b/pyiron_workflow/__init__.py @@ -0,0 +1 @@ +from pyiron_workflow.workflow import Workflow diff --git a/pyiron_module_template/_version.py b/pyiron_workflow/_version.py similarity index 99% rename from pyiron_module_template/_version.py rename to pyiron_workflow/_version.py index 85bcfddd..29bbb9ca 100644 --- a/pyiron_module_template/_version.py +++ b/pyiron_workflow/_version.py @@ -40,9 +40,9 @@ def get_config(): cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440-pre" - cfg.tag_prefix = "pyiron_module_template-" - cfg.parentdir_prefix = "pyiron_module_template" - cfg.versionfile_source = "pyiron_module_template/_version.py" + cfg.tag_prefix = "pyiron_workflow-" + cfg.parentdir_prefix = "pyiron_workflow" + cfg.versionfile_source = "pyiron_workflow/_version.py" cfg.verbose = False return cfg diff --git a/pyiron_workflow/channels.py b/pyiron_workflow/channels.py new file mode 100644 index 00000000..5fe20999 --- /dev/null +++ b/pyiron_workflow/channels.py @@ -0,0 +1,497 @@ +""" +Channels are access points for information to flow into and out of nodes. + +Data channels carry, unsurprisingly, data. +Output data channels will attempt to push their new value to all their connected input +data channels on update, while input data channels will reject any updates if their +parent node is running. +In this way, data channels facilitate forward propagation of data through a graph. +They hold data persistently. + +Signal channels are tools for procedurally exposing functionality on nodes. +Input signal channels are connected to a callback function which gets invoked when the +channel is updated. +Output signal channels must be accessed by the owning node directly, and then trigger +all the input signal channels to which they are connected. +In this way, signal channels can force behaviour (node method calls) to propagate +forwards through a graph. +They do not hold any data, but rather fire for an effect. +""" + +from __future__ import annotations + +import typing +from abc import ABC, abstractmethod +from warnings import warn + +from pyiron_workflow.has_channel import HasChannel +from pyiron_workflow.has_to_dict import HasToDict +from pyiron_workflow.type_hinting import ( + valid_value, + type_hint_is_as_or_more_specific_than, +) + +if typing.TYPE_CHECKING: + from pyiron_workflow.composite import Composite + from pyiron_workflow.node import Node + + +class Channel(HasChannel, HasToDict, ABC): + """ + Channels facilitate the flow of information (data or control signals) into and + out of nodes. + They must have a label and belong to a node. + + Input/output channels can be (dis)connected from other output/input channels, and + store all of their current connections in a list. + This connection information is duplicated in that it is stored on _both_ channels + that form the connection. + + Child classes must define a string representation, `__str__`, and what to do on an + attempted connection, `connect`. + + Attributes: + label (str): The name of the channel. + node (pyiron_workflow.node.Node): The node to which the channel + belongs. + connections (list[Channel]): Other channels to which this channel is connected. + """ + + def __init__( + self, + label: str, + node: Node, + ): + """ + Make a new channel. + + Args: + label (str): A name for the channel. + node (pyiron_workflow.node.Node): The node to which the + channel belongs. + """ + self.label: str = label + self.node: Node = node + self.connections: list[Channel] = [] + + @abstractmethod + def __str__(self): + pass + + @abstractmethod + def connect(self, *others: Channel) -> None: + """ + How to handle connections to other channels. + + Args: + *others (Channel): The other channel objects to attempt to connect with. + """ + pass + + def disconnect(self, *others: Channel) -> list[tuple[Channel, Channel]]: + """ + If currently connected to any others, removes this and the other from eachothers + respective connections lists. + + Args: + *others (Channel): The other channels to disconnect from. + + Returns: + [list[tuple[Channel, Channel]]]: A list of the pairs of channels that no + longer participate in a connection. + """ + destroyed_connections = [] + for other in others: + if other in self.connections: + self.connections.remove(other) + other.disconnect(self) + destroyed_connections.append((self, other)) + else: + warn( + f"The channel {self.label} was not connected to {other.label}, and" + f"thus could not disconnect from it." + ) + return destroyed_connections + + def disconnect_all(self) -> list[tuple[Channel, Channel]]: + """ + Disconnect from all other channels currently in the connections list. + """ + return self.disconnect(*self.connections) + + @property + def connected(self) -> bool: + """ + Has at least one connection. + """ + return len(self.connections) > 0 + + def _already_connected(self, other: Channel) -> bool: + return other in self.connections + + def __iter__(self): + return self.connections.__iter__() + + def __len__(self): + return len(self.connections) + + @property + def channel(self) -> Channel: + return self + + def to_dict(self) -> dict: + return { + "label": self.label, + "connected": self.connected, + "connections": [f"{c.node.label}.{c.label}" for c in self.connections], + } + + +class NotData: + """ + This class exists purely to initialize data channel values where no default value + is provided; it lets the channel know that it has _no data in it_ and thus should + not identify as ready. + """ + + @classmethod + def __repr__(cls): + # We use the class directly (not instances of it) where there is not yet data + # So give it a decent repr, even as just a class + return cls.__name__ + + +class DataChannel(Channel, ABC): + """ + Data channels control the flow of data on the graph. + They store this data in a `value` attribute. + They may optionally have a type hint. + They have a `ready` attribute which tells whether their value matches their type + hint (if one is provided, else `True`). + (In the future they may optionally have a storage priority.) + (In the future they may optionally have a storage history limit.) + (In the future they may optionally have an ontological type.) + + The `value` held by a channel can be manually assigned, but should normally be set + by the `update` method. + In neither case is the type hint strictly enforced. + + Type hinting is strictly enforced in one situation: when making connections to + other channels and at least one data channel has a non-None value for its type hint. + In this case, we insist that the output type hint be _as or more more specific_ than + the input type hint, to ensure that the input always receives output of a type it + expects. This behaviour can be disabled and all connections allowed by setting + `strict_connections = False` on the relevant input channel. + + For simple type hints like `int` or `str`, type hint comparison is trivial. + However, some hints take arguments, e.g. `dict[str, int]` to specify key and value + types; `tuple[int, int, str]` to specify a tuple with certain values; + `typing.Literal['a', 'b', 'c']` to specify particular choices; + `typing.Callable[[float, float], str]` to specify a callable that takes particular + argument types and has a return type; etc. + For hints with the origin `dict`, `tuple`, and `typing.Callable`, the two hints must + have _exactly the same arguments_ for one two qualify as "as or more specific". + E.g. `tuple[int, int|float]` is as or more specific than + `tuple[int|float, int|float]`, but not `tuple[int, int|float, str]`. + For _all other hints_, we demand that the output hint arguments be a _subset_ of + the input. + E.g. `Literal[1, 2]` is as or more specific that both `Literal[1, 2]` and + `Literal[1, 2, "three"]`. + + The data `value` will initialize to an instance of `NotData` by default. + The channel will identify as `ready` when the value is _not_ an instance of + `NotData`, and when the value conforms to type hints (if any). + + Warning: + Type hinting in python is quite complex, and determining when a hint is + "more specific" can be tricky. For instance, in python 3.11 you can now type + hint a tuple with a mixture of fixed elements of fixed type, followed by an + arbitrary elements of arbitrary type. This and other complex scenarios are not + yet included in our test suite and behaviour is not guaranteed. + """ + + def __init__( + self, + label: str, + node: Node, + default: typing.Optional[typing.Any] = NotData, + type_hint: typing.Optional[typing.Any] = None, + ): + super().__init__(label=label, node=node) + self.default = default + self.value = default + self.type_hint = type_hint + + @property + def ready(self) -> bool: + """ + Check if the currently stored value satisfies the channel's type hint. + + Returns: + (bool): Whether the value matches the type hint. + """ + if self.type_hint is not None: + return self._value_is_data and valid_value(self.value, self.type_hint) + else: + return self._value_is_data + + @property + def _value_is_data(self): + return self.value is not NotData + + def update(self, value) -> None: + """ + Store a new value and trigger before- and after-update routines. + + Args: + value: The value to store. + """ + self._before_update() + self.value = value + self._after_update() + + def _before_update(self) -> None: + """ + A tool for child classes to do things before the value changed during an update. + """ + pass + + def _after_update(self) -> None: + """ + A tool for child classes to do things after the value changed during an update. + """ + pass + + def connect(self, *others: DataChannel) -> None: + """ + For all others for which the connection is valid (one input, one output, both + data channels), adds this to the other's list of connections and the other to + this list of connections. + Then the input channel gets updated with the output channel's current value. + + Args: + *others (DataChannel): + + Raises: + TypeError: When one of others is not a `DataChannel` + """ + for other in others: + if self._valid_connection(other): + self.connections.append(other) + other.connections.append(self) + out, inp = self._figure_out_who_is_who(other) + if out.value is not NotData: + inp.update(out.value) + else: + if isinstance(other, DataChannel): + warn( + f"{self.label} ({self.__class__.__name__}) and {other.label} " + f"({other.__class__.__name__}) were not a valid connection" + ) + else: + raise TypeError( + f"Can only connect two channels, but {self.label} " + f"({self.__class__.__name__}) got a {other} ({type(other)})" + ) + + def _valid_connection(self, other) -> bool: + if self._is_IO_pair(other) and not self._already_connected(other): + if self._both_typed(other): + out, inp = self._figure_out_who_is_who(other) + if not inp.strict_connections: + return True + else: + return type_hint_is_as_or_more_specific_than( + out.type_hint, inp.type_hint + ) + else: + # If either is untyped, don't do type checking + return True + else: + return False + + def _is_IO_pair(self, other: DataChannel) -> bool: + return isinstance(other, DataChannel) and not isinstance(other, self.__class__) + + def _both_typed(self, other: DataChannel) -> bool: + return self.type_hint is not None and other.type_hint is not None + + def _figure_out_who_is_who(self, other: DataChannel) -> (OutputData, InputData): + return (self, other) if isinstance(self, OutputData) else (other, self) + + def __str__(self): + return str(self.value) + + def to_dict(self) -> dict: + d = super().to_dict() + d["value"] = repr(self.value) + d["ready"] = self.ready + return d + + +class InputData(DataChannel): + """ + On `update`, Input channels will only `update` if their parent node is not + `running`. + + The `strict_connections` parameter controls whether connections are subject to + type checking requirements. + I.e., they may set `strict_connections` to `False` (`True` -- default) at + instantiation or later with `(de)activate_strict_connections()` to prevent (enable) + data type checking when making connections with `OutputData` channels. + """ + + def __init__( + self, + label: str, + node: Node, + default: typing.Optional[typing.Any] = NotData, + type_hint: typing.Optional[typing.Any] = None, + strict_connections: bool = True, + ): + super().__init__( + label=label, + node=node, + default=default, + type_hint=type_hint, + ) + self.strict_connections = strict_connections + + def _before_update(self) -> None: + if self.node.running: + raise RuntimeError( + f"Parent node {self.node.label} of {self.label} is running, so value " + f"cannot be updated." + ) + + def activate_strict_connections(self) -> None: + self.strict_connections = True + + def deactivate_strict_connections(self) -> None: + self.strict_connections = False + + +class OutputData(DataChannel): + """ + On `update`, Output channels propagate their value (as long as it's actually data) + to all the input channels to which they are connected by invoking their `update` + method. + """ + + def _after_update(self) -> None: + if self._value_is_data: + for inp in self.connections: + inp.update(self.value) + + +class SignalChannel(Channel, ABC): + """ + Signal channels give the option control execution flow by triggering callback + functions. + + Output channels can be called to trigger the callback functions of all input + channels to which they are connected. + + Signal channels support `>` as syntactic sugar for their connections, i.e. + `some_output > some_input` is equivalent to `some_input.connect(some_output)`. + (This is also interoperable with `Node` objects, cf. the `Node` docs.) + """ + + @abstractmethod + def __call__(self) -> None: + pass + + def connect(self, *others: SignalChannel) -> None: + """ + For all others for which the connection is valid (one input, one output, both + data channels), adds this to the other's list of connections and the other to + this list of connections. + + Args: + *others (SignalChannel): The other channels to attempt a connection to + + Raises: + TypeError: When one of others is not a `SignalChannel` + """ + for other in others: + if self._valid_connection(other): + self.connections.append(other) + other.connections.append(self) + else: + if isinstance(other, SignalChannel): + warn( + f"{self.label} ({self.__class__.__name__}) and {other.label} " + f"({other.__class__.__name__}) were not a valid connection" + ) + else: + raise TypeError( + f"Can only connect two signal channels, but {self.label} " + f"({self.__class__.__name__}) got a {other} ({type(other)})" + ) + + def _valid_connection(self, other) -> bool: + return self._is_IO_pair(other) and not self._already_connected(other) + + def _is_IO_pair(self, other) -> bool: + return isinstance(other, SignalChannel) and not isinstance( + other, self.__class__ + ) + + def connect_output_signal(self, signal: OutputSignal): + self.connect(signal) + + +class InputSignal(SignalChannel): + """ + Invokes a callback when called. + """ + + def __init__( + self, + label: str, + node: Node, + callback: callable, + ): + """ + Make a new input signal channel. + + Args: + label (str): A name for the channel. + node (pyiron_workflow.node.Node): The node to which the + channel belongs. + callback (callable): An argument-free callback to invoke when calling this + object. + """ + super().__init__(label=label, node=node) + self.callback: callable = callback + + def __call__(self) -> None: + self.callback() + + def __str__(self): + return f"{self.label} runs {self.callback.__name__}" + + def to_dict(self) -> dict: + d = super().to_dict() + d["callback"] = self.callback.__name__ + return d + + +class OutputSignal(SignalChannel): + """ + Calls all the input signal objects in its connections list when called. + """ + + def __call__(self) -> None: + for c in self.connections: + c() + + def __str__(self): + return ( + f"{self.label} activates " + f"{[f'{c.node.label}.{c.label}' for c in self.connections]}" + ) + + def __gt__(self, other: InputSignal | Node): + other.connect_output_signal(self) + return True diff --git a/pyiron_workflow/composite.py b/pyiron_workflow/composite.py new file mode 100644 index 00000000..59cb2a93 --- /dev/null +++ b/pyiron_workflow/composite.py @@ -0,0 +1,445 @@ +""" +A base class for nodal objects that have internal structure -- i.e. they hold a +sub-graph +""" + +from __future__ import annotations + +from abc import ABC +from functools import partial +from typing import Literal, Optional, TYPE_CHECKING + +from bidict import bidict +from toposort import toposort_flatten, CircularDependencyError + +from pyiron_workflow.interfaces import Creator, Wrappers +from pyiron_workflow.io import Outputs, Inputs +from pyiron_workflow.node import Node +from pyiron_workflow.node_package import NodePackage +from pyiron_workflow.util import logger, DotDict, SeabornColors + +if TYPE_CHECKING: + from pyiron_workflow.channels import Channel + + +class Composite(Node, ABC): + """ + A base class for nodes that have internal structure -- i.e. they hold a sub-graph. + + Item and attribute access is modified to give access to owned nodes. + Adding a node with the `add` functionality or by direct attribute assignment sets + this object as the parent of that node. + + Guarantees that each owned node is unique, and does not belong to any other parents. + + Offers a class method (`wrap_as`) to give easy access to the node-creating + decorators. + + Offers a creator (the `create` method) which allows instantiation of other workflow + objects. + This method behaves _differently_ on the composite class and its instances -- on + instances, any created nodes get their `parent` attribute automatically set to the + composite instance being used. + + Specifies the required `on_run()` to call `run()` on a subset of owned + `starting_nodes`nodes to kick-start computation on the owned sub-graph. + Both the specification of these starting nodes and specifying execution signals to + propagate execution through the graph is left to the user/child classes. + In the case of non-cyclic workflows (i.e. DAGs in terms of data flow), both + starting nodes and execution flow can be specified by invoking `` + + The `run()` method (and `update()`, and calling the workflow) return a new + dot-accessible dictionary of keys and values created from the composite output IO + panel. + + Does not specify `input` and `output` as demanded by the parent class; this + requirement is still passed on to children. + + Attributes: + inputs/outputs_map (bidict|None): Maps in the form + `{"node_label__channel_label": "some_better_name"}` that expose canonically + named channels of child nodes under a new name. This can be used both for re- + naming regular IO (i.e. unconnected child channels), as well as forcing the + exposure of irregular IO (i.e. child channels that are already internally + connected to some other child channel). Non-`None` values provided at input + can be in regular dictionary form, but get re-cast as a clean bidict to ensure + the bijective nature of the maps (i.e. there is a 1:1 connection between any + IO exposed at the `Composite` level and the underlying channels). + nodes (DotDict[pyiron_workflow.node.Node]): The owned nodes that + form the composite subgraph. + strict_naming (bool): When true, repeated assignment of a new node to an + existing node label will raise an error, otherwise the label gets appended + with an index and the assignment proceeds. (Default is true: disallow assigning + to existing labels.) + create (Creator): A tool for adding new nodes to this subgraph. + starting_nodes (None | list[pyiron_workflow.node.Node]): A subset + of the owned nodes to be used on running. Only necessary if the execution graph + has been manually specified with `run` signals. (Default is an empty list.) + wrap_as (Wrappers): A tool for accessing node-creating decorators + + Methods: + add(node: Node): Add the node instance to this subgraph. + remove(node: Node): Break all connections the node has, remove it from this + subgraph, and set its parent to `None`. + + TODO: + Wrap node registration at the class level so we don't need to do + `X.create.register` but can just do `X.register` + """ + + wrap_as = Wrappers() + create = Creator() + + def __init__( + self, + label: str, + *args, + parent: Optional[Composite] = None, + strict_naming: bool = True, + inputs_map: Optional[dict | bidict] = None, + outputs_map: Optional[dict | bidict] = None, + **kwargs, + ): + super().__init__(*args, label=label, parent=parent, **kwargs) + self.strict_naming: bool = strict_naming + self._inputs_map = None + self._outputs_map = None + self.inputs_map = inputs_map + self.outputs_map = outputs_map + self.nodes: DotDict[str:Node] = DotDict() + self.starting_nodes: list[Node] = [] + self._creator = self.create + self.create = self._owned_creator # Override the create method from the class + + @property + def inputs_map(self) -> bidict | None: + return self._inputs_map + + @inputs_map.setter + def inputs_map(self, new_map: dict | bidict | None): + new_map = new_map if new_map is None else bidict(new_map) + self._inputs_map = new_map + + @property + def outputs_map(self) -> bidict | None: + return self._outputs_map + + @outputs_map.setter + def outputs_map(self, new_map: dict | bidict | None): + new_map = new_map if new_map is None else bidict(new_map) + self._outputs_map = new_map + + @property + def _owned_creator(self): + """ + A misdirection so that the `create` method behaves differently on the class + and on instances (in the latter case, created nodes should get the instance as + their parent). + """ + return OwnedCreator(self, self._creator) + + @property + def executor(self) -> None: + return None + + @executor.setter + def executor(self, new_executor): + if new_executor is not None: + raise NotImplementedError( + "Running composite nodes with an executor is not yet supported" + ) + + def to_dict(self): + return { + "label": self.label, + "nodes": {n.label: n.to_dict() for n in self.nodes.values()}, + } + + @property + def on_run(self): + return self.run_graph + + @staticmethod + def run_graph(self): + for node in self.starting_nodes: + node.run() + return DotDict(self.outputs.to_value_dict()) + + def disconnect_run(self) -> list[tuple[Channel, Channel]]: + """ + Disconnect all `signals.input.run` connections on all child nodes. + + Returns: + list[tuple[Channel, Channel]]: Any disconnected pairs. + """ + disconnected_pairs = [] + for node in self.nodes.values(): + disconnected_pairs.extend(node.signals.disconnect_run()) + return disconnected_pairs + + def set_run_signals_to_dag_execution(self): + """ + Disconnects all `signals.input.run` connections among children and attempts to + reconnect these according to the DAG flow of the data. + + Raises: + ValueError: When the data connections do not form a DAG. + """ + self.disconnect_run() + self._set_run_connections_and_starting_nodes_according_to_linear_dag() + # TODO: Replace this linear setup with something more powerful + + def _set_run_connections_and_starting_nodes_according_to_linear_dag(self): + # This is the most primitive sort of topological exploitation we can do + # It is not efficient if the nodes have executors and can run in parallel + try: + # Topological sorting ensures that all input dependencies have been + # executed before the node depending on them gets run + # The flattened part is just that we don't care about topological + # generations that are mutually independent (inefficient but easier for now) + execution_order = toposort_flatten(self.get_data_digraph()) + except CircularDependencyError as e: + raise ValueError( + f"Detected a cycle in the data flow topology, unable to automate the " + f"execution of non-DAGs: cycles found among {e.data}" + ) + + for i, label in enumerate(execution_order[:-1]): + next_node = execution_order[i + 1] + self.nodes[label] > self.nodes[next_node] + self.starting_nodes = [self.nodes[execution_order[0]]] + + def get_data_digraph(self) -> dict[str, set[str]]: + """ + Builds a directed graph of node labels based on data connections between nodes + directly owned by this composite -- i.e. does not worry about data connections + which are entirely internal to an owned sub-graph. + + Returns: + dict[str, set[str]]: A dictionary of nodes and the nodes they depend on for + data. + + Raises: + ValueError: When a node appears in its own input. + """ + digraph = {} + + for node in self.nodes.values(): + node_dependencies = [] + for channel in node.inputs: + locally_scoped_dependencies = [] + for upstream in channel.connections: + if upstream.node.parent is self: + locally_scoped_dependencies.append(upstream.node.label) + elif channel.node.get_first_shared_parent(upstream.node) is self: + locally_scoped_dependencies.append( + upstream.node.get_parent_proximate_to(self).label + ) + node_dependencies.extend(locally_scoped_dependencies) + node_dependencies = set(node_dependencies) + if node.label in node_dependencies: + # the toposort library has a + # [known issue](https://gitlab.com/ericvsmith/toposort/-/issues/3) + # That self-dependency isn't caught, so we catch it manually here. + raise ValueError( + f"Detected a cycle in the data flow topology, unable to automate " + f"the execution of non-DAGs: {node.label} appears in its own input." + ) + digraph[node.label] = node_dependencies + + return digraph + + @property + def run_args(self) -> dict: + return {"self": self} + + def _build_io( + self, + io: Inputs | Outputs, + target: Literal["inputs", "outputs"], + key_map: dict[str, str] | None, + ) -> Inputs | Outputs: + key_map = {} if key_map is None else key_map + for node in self.nodes.values(): + panel = getattr(node, target) + for channel_label in panel.labels: + channel = panel[channel_label] + default_key = f"{node.label}__{channel_label}" + try: + if key_map[default_key] is not None: + io[key_map[default_key]] = channel + except KeyError: + if not channel.connected: + io[default_key] = channel + return io + + def _build_inputs(self) -> Inputs: + return self._build_io(Inputs(), "inputs", self.inputs_map) + + def _build_outputs(self) -> Outputs: + return self._build_io(Outputs(), "outputs", self.outputs_map) + + def add(self, node: Node, label: Optional[str] = None) -> None: + """ + Assign a node to the parent. Optionally provide a new label for that node. + + Args: + node (pyiron_workflow.node.Node): The node to add. + label (Optional[str]): The label for this node. + + Raises: + TypeError: If the + """ + if not isinstance(node, Node): + raise TypeError( + f"Only new node instances may be added, but got {type(node)}." + ) + self._ensure_node_has_no_other_parent(node) + label = self._get_unique_label(node.label if label is None else label) + self._ensure_node_is_not_duplicated(node, label) + + self.nodes[label] = node + node.label = label + node.parent = self + return node + + def _get_unique_label(self, label): + if label in self.__dir__(): + if isinstance(getattr(self, label), Node): + if self.strict_naming: + raise AttributeError( + f"{label} is already the label for a node. Please remove it " + f"before assigning another node to this label." + ) + else: + label = self._add_suffix_to_label(label) + else: + raise AttributeError( + f"{label} is an attribute or method of the {self.__class__} class, " + f"and cannot be used as a node label." + ) + return label + + def _add_suffix_to_label(self, label): + i = 0 + new_label = label + while new_label in self.nodes.keys(): + new_label = f"{label}{i}" + i += 1 + if new_label != label: + logger.info( + f"{label} is already a node; appending an index to the " + f"node label instead: {new_label}" + ) + return new_label + + def _ensure_node_has_no_other_parent(self, node: Node): + if node.parent is not None and node.parent is not self: + raise ValueError( + f"The node ({node.label}) already belongs to the parent " + f"{node.parent.label}. Please remove it there before trying to " + f"add it to this parent ({self.label})." + ) + + def _ensure_node_is_not_duplicated(self, node: Node, label: str): + if ( + node.parent is self + and label != node.label + and self.nodes[node.label] is node + ): + logger.info( + f"Reassigning the node {node.label} to the label {label} when " + f"adding it to the parent {self.label}." + ) + del self.nodes[node.label] + + def remove(self, node: Node | str): + if isinstance(node, Node): + node.parent = None + node.disconnect() + del self.nodes[node.label] + else: + del self.nodes[node] + + def __setattr__(self, key: str, node: Node): + if isinstance(node, Node) and key != "parent": + self.add(node, label=key) + else: + super().__setattr__(key, node) + + def __getattr__(self, key): + try: + return self.nodes[key] + except KeyError: + # Raise an attribute error from getattr to make sure hasattr works well! + raise AttributeError( + f"Could not find attribute {key} on {self.label} " + f"({self.__class__.__name__}) or in its nodes ({self.nodes.keys()})" + ) + + def __getitem__(self, item): + return self.__getattr__(item) + + def __setitem__(self, key, value): + self.__setattr__(key, value) + + def __iter__(self): + return self.nodes.values().__iter__() + + def __len__(self): + return len(self.nodes) + + def __dir__(self): + return set(super().__dir__() + list(self.nodes.keys())) + + @property + def color(self) -> str: + """For drawing the graph""" + return SeabornColors.brown + + +class OwnedCreator: + """ + A creator that overrides the `parent` arg of all accessed nodes to its own parent. + + Necessary so that `Workflow.create.Function(...)` returns an unowned function node, + while `some_workflow_instance.create.Function(...)` returns a function node owned + by the workflow instance. + """ + + def __init__(self, parent: Composite, creator: Creator): + self._parent = parent + self._creator = creator + + def __getattr__(self, item): + value = getattr(self._creator, item) + + try: + is_node_class = issubclass(value, Node) + except TypeError: + # issubclass complains if the value isn't even a class + is_node_class = False + + if is_node_class: + value = partial(value, parent=self._parent) + elif isinstance(value, NodePackage): + value = OwnedNodePackage(self._parent, value) + + return value + + +class OwnedNodePackage: + """ + A wrapper for node packages so that accessed node classes can have their parent + value automatically filled. + """ + + def __init__(self, parent: Composite, node_package: NodePackage): + self._parent = parent + self._node_package = node_package + + def __getattr__(self, item): + value = getattr(self._node_package, item) + if issubclass(value, Node): + value = partial(value, parent=self._parent) + return value diff --git a/pyiron_workflow/draw.py b/pyiron_workflow/draw.py new file mode 100644 index 00000000..7906cddd --- /dev/null +++ b/pyiron_workflow/draw.py @@ -0,0 +1,370 @@ +""" +Functions for drawing the graph. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Literal, Optional, TYPE_CHECKING + +import graphviz +from matplotlib.colors import to_hex, to_rgb + +from pyiron_workflow.util import SeabornColors + +if TYPE_CHECKING: + from pyiron_workflow.channels import Channel as WorkflowChannel + from pyiron_workflow.io import DataIO, SignalIO + from pyiron_workflow.node import Node as WorkflowNode + + +def directed_graph(name, label, rankdir, color_start, color_end, gradient_angle): + """A shortcut method for instantiating the type of graphviz graph we want""" + digraph = graphviz.graphs.Digraph(name=name) + digraph.attr( + label=label, + compound="true", + rankdir=rankdir, + style="filled", + fillcolor=f"{color_start}:{color_end}", + gradientangle=gradient_angle, + ) + return digraph + + +def reverse_rankdir(rankdir: Literal["LR", "TB"]): + if rankdir == "LR": + return "TB" + elif rankdir == "TB": + return "LR" + else: + raise ValueError(f"Expected rankdir of 'LR' or 'TB' but got {rankdir}") + + +def blend_colours(color_a, color_b, fraction_a=0.5): + """Blends two hex code colours together""" + return to_hex( + tuple( + fraction_a * a + (1 - fraction_a) * b + for (a, b) in zip(to_rgb(color_a), to_rgb(color_b)) + ) + ) + + +def lighten_hex_color(color, lightness=0.7): + """Blends the given hex code color with pure white.""" + return blend_colours(SeabornColors.white, color, fraction_a=lightness) + + +class WorkflowGraphvizMap(ABC): + """ + A parent class defining the interface for the graphviz representation of all our + workflow objects. + """ + + @property + @abstractmethod + def parent(self) -> WorkflowGraphvizMap | None: + pass + + @property + @abstractmethod + def name(self) -> str: + pass + + @property + @abstractmethod + def label(self) -> str: + pass + + @property + @abstractmethod + def graph(self) -> graphviz.graphs.Digraph: + pass + + @property + @abstractmethod + def color(self) -> str: + pass + + +class _Channel(WorkflowGraphvizMap, ABC): + """ + An abstract representation for channel objects, which are "nodes" in graphviz + parlance. + """ + + def __init__(self, parent: _IO, channel: WorkflowChannel, local_name: str): + self.channel = channel + self._parent = parent + self._name = self.parent.name + local_name + self._label = local_name + self._build_label_suffix() + self.channel: WorkflowChannel = channel + + self.graph.node( + name=self.name, + label=self.label, + shape=self.shape, + color=self.color, + style="filled", + ) + + @property + @abstractmethod + def shape(self) -> str: + pass + + def _build_label_suffix(self): + suffix = "" + try: + if self.channel.type_hint is not None: + suffix += ": " + self.channel.type_hint.__name__ + except AttributeError: + pass # Signals have no type + return suffix + + @property + def parent(self) -> _IO | None: + return self._parent + + @property + def name(self) -> str: + return self._name + + @property + def label(self) -> str: + return self._label + + @property + def graph(self) -> graphviz.graphs.Digraph: + return self.parent.graph + + +class DataChannel(_Channel): + @property + def color(self) -> str: + return SeabornColors.orange + + @property + def shape(self) -> str: + return "oval" + + +class SignalChannel(_Channel): + @property + def color(self) -> str: + return SeabornColors.blue + + @property + def shape(self) -> str: + return "cds" + + +class _IO(WorkflowGraphvizMap, ABC): + """ + An abstract class for IO panels, which are represented as a "subgraph" in graphviz + parlance. + """ + + def __init__(self, parent: Node): + self._parent = parent + self.node: WorkflowNode = self.parent.node + self.data_io, self.signals_io = self._get_node_io() + self._name = self.parent.name + self.data_io.__class__.__name__ + self._label = self.data_io.__class__.__name__ + self._graph = directed_graph( + self.name, + self.label, + rankdir=reverse_rankdir(self.parent.rankdir), + color_start=self.color, + color_end=lighten_hex_color(self.color), + gradient_angle=self.gradient_angle, + ) + + self.channels = [ + SignalChannel(self, channel, panel_label) + for panel_label, channel in self.signals_io.items() + ] + [ + DataChannel(self, channel, panel_label) + for panel_label, channel in self.data_io.items() + ] + + self.parent.graph.subgraph(self.graph) + + @abstractmethod + def _get_node_io(self) -> tuple[DataIO, SignalIO]: + pass + + @property + @abstractmethod + def gradient_angle(self) -> str: + """Background fill colour angle in degrees""" + + @property + def parent(self) -> Node: + return self._parent + + @property + def name(self) -> str: + return self._name + + @property + def label(self) -> str: + return self._label + + @property + def graph(self) -> graphviz.graphs.Digraph: + return self._graph + + @property + def color(self) -> str: + return SeabornColors.gray + + def __len__(self): + return len(self.channels) + + +class Inputs(_IO): + def _get_node_io(self) -> tuple[DataIO, SignalIO]: + return self.node.inputs, self.node.signals.input + + @property + def gradient_angle(self) -> str: + return "0" + + +class Outputs(_IO): + def _get_node_io(self) -> tuple[DataIO, SignalIO]: + return self.node.outputs, self.node.signals.output + + @property + def gradient_angle(self) -> str: + return "180" + + +class Node(WorkflowGraphvizMap): + """ + A wrapper class to connect graphviz to our workflow nodes. The nodes are + represented by a "graph" or "subgraph" in graphviz parlance (depending on whether + the node being visualized is the top-most node or not). + + Visualized nodes show their label and type, and IO panels with label and type. + Colors and shapes are exploited to differentiate various node classes, input/output, + and data/signal channels. + + If the node is composite in nature and the `depth` argument is at least `1`, owned + children are also visualized (recursively with `depth = depth - 1`) inside the scope + of this node. + + Args: + node (pyiron_workflow.node.Node): The node to visualize. + parent (Optional[pyiron_workflow.draw.Node]): The visualization that + owns this visualization (if any). + depth (int): How deeply to decompose any child nodes beyond showing their IO. + rankdir ("LR" | "TB"): Use left-right or top-bottom graphviz `rankdir`. + """ + + def __init__( + self, + node: WorkflowNode, + parent: Optional[Node] = None, + depth: int = 1, + rankdir: Literal["LR", "TB"] = "LR", + ): + self.node = node + self._parent = parent + self._name = self.build_node_name() + self._label = self.node.label + ": " + self.node.__class__.__name__ + self.rankdir: Literal["LR", "TB"] = rankdir + self._graph = directed_graph( + self.name, + self.label, + rankdir=self.rankdir, + color_start=self.color, + color_end=lighten_hex_color(self.color), + gradient_angle="90", + ) + + self.inputs = Inputs(self) + self.outputs = Outputs(self) + self.graph.edge( + self.inputs.channels[0].name, self.outputs.channels[0].name, style="invis" + ) + + if depth > 0: + try: + self._connect_owned_nodes(depth) + except AttributeError: + # Only composite nodes have their own nodes attribute + pass + + if self.parent is not None: + self.parent.graph.subgraph(self.graph) + + def _channel_bicolor(self, start_channel, end_channel): + return f"{start_channel.color};0.5:{end_channel.color};0.5" + + def _connect_owned_nodes(self, depth): + nodes = [Node(node, self, depth - 1) for node in self.node.nodes.values()] + internal_inputs = [ + channel for node in nodes for channel in node.inputs.channels + ] + internal_outputs = [ + channel for node in nodes for channel in node.outputs.channels + ] + + # Loop to check for internal node output --> internal node input connections + for output_channel in internal_outputs: + for input_channel in internal_inputs: + if input_channel.channel in output_channel.channel.connections: + self.graph.edge( + output_channel.name, + input_channel.name, + color=self._channel_bicolor(output_channel, input_channel), + ) + + # Loop to check for macro input --> internal node input connections + self._connect_matching(self.inputs.channels, internal_inputs) + # Loop to check for macro input --> internal node input connections + self._connect_matching(internal_outputs, self.outputs.channels) + + def _connect_matching(self, sources: list[_Channel], destinations: list[_Channel]): + """ + Draw an edge between two graph channels whose workflow channels are the same + """ + for source in sources: + for destination in destinations: + if source.channel is destination.channel: + self.graph.edge( + source.name, + destination.name, + color=self._channel_bicolor(source, destination), + ) + + def build_node_name(self, suffix=""): + if self.parent is not None: + # Recursively prepend parent labels to get a totally unique label string + # (inside the scope of this graph) + return self.parent.build_node_name(suffix=suffix + self.node.label) + else: + return "cluster" + self.node.label + suffix + + @property + def parent(self) -> Node | None: + return self._parent + + @property + def name(self) -> str: + return self._name + + @property + def label(self) -> str: + return self._label + + @property + def graph(self) -> graphviz.graphs.Digraph: + return self._graph + + @property + def color(self) -> str: + return self.node.color diff --git a/pyiron_workflow/executors/README.md b/pyiron_workflow/executors/README.md new file mode 100644 index 00000000..e96e8b75 --- /dev/null +++ b/pyiron_workflow/executors/README.md @@ -0,0 +1,3 @@ +# Executors + +This sub-module holds custom children of `concurrent.futures.Executor` for use in other parts of pyiron (e.g. `pyiron_contrib.workflow`). \ No newline at end of file diff --git a/pyiron_workflow/executors/__init__.py b/pyiron_workflow/executors/__init__.py new file mode 100644 index 00000000..83069e79 --- /dev/null +++ b/pyiron_workflow/executors/__init__.py @@ -0,0 +1,7 @@ +""" +This module holds customized children of `concurrent.futures.Executor`. +""" + +from pyiron_workflow.executors.cloudpickleprocesspool import ( + CloudpickleProcessPoolExecutor, +) diff --git a/pyiron_workflow/executors/cloudpickleprocesspool.py b/pyiron_workflow/executors/cloudpickleprocesspool.py new file mode 100644 index 00000000..7ef13cb7 --- /dev/null +++ b/pyiron_workflow/executors/cloudpickleprocesspool.py @@ -0,0 +1,216 @@ +from concurrent.futures import Future, ProcessPoolExecutor +from concurrent.futures.process import _global_shutdown, _WorkItem, BrokenProcessPool +from sys import version_info + +import cloudpickle + + +class CloudLoadsFuture(Future): + def result(self, timeout=None): + result = super().result(timeout=timeout) + if isinstance(result, bytes): + result = cloudpickle.loads(result) + return result + + +class _CloudPickledCallable: + def __init__(self, fnc: callable): + self.fnc_serial = cloudpickle.dumps(fnc) + + def __call__(self, /, dumped_args, dumped_kwargs): + fnc = cloudpickle.loads(self.fnc_serial) + args = cloudpickle.loads(dumped_args) + kwargs = cloudpickle.loads(dumped_kwargs) + return cloudpickle.dumps(fnc(*args, **kwargs)) + + @classmethod + def dumps(cls, stuff): + return cloudpickle.dumps(stuff) + + +class CloudpickleProcessPoolExecutor(ProcessPoolExecutor): + """ + This class wraps `concurrent.futures.ProcessPoolExecutor` such that the submitted + callable, its arguments, and its return value are all pickled using `cloudpickle`. + In this way, the executor extends support to all objects which are cloud-pickleable, + e.g. dynamically defined or decorated classes. + + To accomplish this, the underlying `concurrent.futures.Future` class used is + replaced with our `CloudLoadsFuture`, which is identical except that calls to + `result()` will first try to `cloudpickle.loads` and `bytes` results found. + + Examples: + Consider a class created from a function dynamically with a decorator. + These are not normally pickleable, so in this example we should how this class + allows us to submit a method from such a class, that both takes as an argument + and returns such an unpickleable class. + Actions such as registering callbacks and waiting for results behave just like + normal. + >>> from functools import partialmethod + >>> + >>> from pyiron_contrib.executors import CloudpickleProcessPoolExecutor + >>> + >>> class Foo: + ... ''' + ... A base class to be dynamically modified for testing our executor. + ... ''' + ... def __init__(self, fnc: callable): + ... self.fnc = fnc + ... self.result = None + ... + ... @property + ... def run(self): + ... return self.fnc + ... + ... def process_result(self, future): + ... self.result = future.result() + >>> + >>> + >>> def dynamic_foo(): + ... ''' + ... A decorator for dynamically modifying the Foo class. + ... + ... Overrides the `fnc` input of `Foo` with the decorated function. + ... ''' + ... def as_dynamic_foo(fnc: callable): + ... return type( + ... "DynamicFoo", + ... (Foo,), # Define parentage + ... { + ... "__init__": partialmethod( + ... Foo.__init__, + ... fnc + ... ) + ... }, + ... ) + ... + ... return as_dynamic_foo + >>> + >>> @dynamic_foo() + >>> def UnpicklableCallable(unpicklable_arg): + ... unpicklable_arg.result = "This was an arg" + ... return unpicklable_arg + >>> + >>> + >>> instance = UnpicklableCallable() + >>> arg = UnpicklableCallable() + >>> executor = CloudpickleProcessPoolExecutor() + >>> fs = executor.submit(instance.run, arg) + >>> fs.add_done_callback(instance.process_result) + >>> print(fs.done()) + False + + >>> print(fs.result().__class__.__name__) + DynamicFoo + + >>> print(fs.done()) + True + + >>> print(instance.result.result) + This was an arg + """ + + def submit(self, fn, /, *args, **kwargs): + return self._submit( + _CloudPickledCallable(fn), + _CloudPickledCallable.dumps(args), + _CloudPickledCallable.dumps(kwargs), + ) + + submit.__doc__ = ProcessPoolExecutor.submit.__doc__ + + def _submit(self, fn, /, *args, **kwargs): + """ + We override the regular `concurrent.futures.ProcessPoolExecutor` to use our + custom future that unpacks cloudpickled results. + + This approach is simple, but the brute-force nature of it means we manually + accommodate different implementations of `ProcessPoolExecutor` in different + python versions. + """ + if version_info.major != 3: + raise RuntimeError( + f"{self.__class__} is only built for python3, but got " + f"{version_info.major}" + ) + + if version_info.minor == 8: + return self._submit_3_8(fn, *args, **kwargs) + elif version_info.minor >= 9: + return self._submit_3_gt9(fn, *args, **kwargs) + else: + raise RuntimeError( + f"{self.__class__} is only built for python 3.8+, but got " + f"{version_info.major}.{version_info.minor}." + ) + + def _submit_3_gt9(self, fn, /, *args, **kwargs): + with self._shutdown_lock: + if self._broken: + raise BrokenProcessPool(self._broken) + if self._shutdown_thread: + raise RuntimeError("cannot schedule new futures after shutdown") + if _global_shutdown: + raise RuntimeError( + "cannot schedule new futures after " "interpreter shutdown" + ) + + f = CloudLoadsFuture() + w = _WorkItem(f, fn, args, kwargs) + + self._pending_work_items[self._queue_count] = w + self._work_ids.put(self._queue_count) + self._queue_count += 1 + # Wake up queue management thread + self._executor_manager_thread_wakeup.wakeup() + + if self._safe_to_dynamically_spawn_children: + self._adjust_process_count() + self._start_executor_manager_thread() + return f + + def _submit_3_8(*args, **kwargs): + if len(args) >= 2: + self, fn, *args = args + elif not args: + raise TypeError( + "descriptor 'submit' of 'ProcessPoolExecutor' object " + "needs an argument" + ) + elif "fn" in kwargs: + fn = kwargs.pop("fn") + self, *args = args + import warnings + + warnings.warn( + "Passing 'fn' as keyword argument is deprecated", + DeprecationWarning, + stacklevel=2, + ) + else: + raise TypeError( + "submit expected at least 1 positional argument, " + "got %d" % (len(args) - 1) + ) + + with self._shutdown_lock: + if self._broken: + raise BrokenProcessPool(self._broken) + if self._shutdown_thread: + raise RuntimeError("cannot schedule new futures after shutdown") + if _global_shutdown: + raise RuntimeError( + "cannot schedule new futures after " "interpreter shutdown" + ) + + f = CloudLoadsFuture() + w = _WorkItem(f, fn, args, kwargs) + + self._pending_work_items[self._queue_count] = w + self._work_ids.put(self._queue_count) + self._queue_count += 1 + # Wake up queue management thread + self._queue_management_thread_wakeup.wakeup() + + self._start_queue_management_thread() + return f diff --git a/pyiron_workflow/files.py b/pyiron_workflow/files.py new file mode 100644 index 00000000..05c6af03 --- /dev/null +++ b/pyiron_workflow/files.py @@ -0,0 +1,99 @@ +from pathlib import Path + + +def delete_files_and_directories_recursively(path): + if not path.exists(): + return + for item in path.rglob("*"): + if item.is_file(): + item.unlink() + else: + delete_files_and_directories_recursively(item) + path.rmdir() + + +def categorize_folder_items(folder_path): + types = [ + "dir", + "file", + "mount", + "symlink", + "block_device", + "char_device", + "fifo", + "socket", + ] + results = {t: [] for t in types} + + for item in folder_path.iterdir(): + for tt in types: + try: + if getattr(item, f"is_{tt}")(): + results[tt].append(str(item)) + except NotImplementedError: + pass + return results + + +class DirectoryObject: + def __init__(self, directory): + self.path = Path(directory) + self.create() + + def create(self): + self.path.mkdir(parents=True, exist_ok=True) + + def delete(self): + delete_files_and_directories_recursively(self.path) + + def list_content(self): + return categorize_folder_items(self.path) + + def __len__(self): + return sum([len(cc) for cc in self.list_content().values()]) + + def __repr__(self): + return f"DirectoryObject(directory='{self.path}')\n{self.list_content()}" + + def get_path(self, file_name): + return self.path / file_name + + def file_exists(self, file_name): + return self.get_path(file_name).is_file() + + def write(self, file_name, content, mode="w"): + with self.get_path(file_name).open(mode=mode) as f: + f.write(content) + + def create_subdirectory(self, path): + return DirectoryObject(self.path / path) + + def create_file(self, file_name): + return FileObject(file_name, self) + + +class FileObject: + def __init__(self, file_name: str, directory: DirectoryObject): + self.directory = directory + self._file_name = file_name + + @property + def file_name(self): + return self._file_name + + @property + def path(self): + return self.directory.path / Path(self._file_name) + + def write(self, content, mode="x"): + self.directory.write(file_name=self.file_name, content=content, mode=mode) + + def read(self, mode="r"): + with open(self.path, mode=mode) as f: + return f.read() + + def is_file(self): + return self.directory.file_exists(self.file_name) + + def delete(self): + self.path.unlink() diff --git a/pyiron_workflow/function.py b/pyiron_workflow/function.py new file mode 100644 index 00000000..25c2c9a5 --- /dev/null +++ b/pyiron_workflow/function.py @@ -0,0 +1,680 @@ +from __future__ import annotations + +import inspect +import warnings +from functools import partialmethod +from typing import get_args, get_type_hints, Optional, TYPE_CHECKING + +from pyiron_workflow.channels import InputData, OutputData, NotData +from pyiron_workflow.has_channel import HasChannel +from pyiron_workflow.io import Inputs, Outputs, Signals +from pyiron_workflow.node import Node +from pyiron_workflow.output_parser import ParseOutput +from pyiron_workflow.util import SeabornColors + +if TYPE_CHECKING: + from pyiron_workflow.composite import Composite + from pyiron_workflow.workflow import Workflow + + +class Function(Node): + """ + Function nodes wrap an arbitrary python function. + Node IO, including type hints, is generated automatically from the provided + function. + Input data for the wrapped function can be provided as any valid combination of + `*arg` and `**kwarg` at both initialization and on calling the node. + + On running, the function node executes this wrapped function with its current input + and uses the results to populate the node output. + + Function nodes must be instantiated with a callable to deterimine their function, + and a string to name each returned value of that callable. (If you really want to + return a tuple, just have multiple return values but only one output label -- there + is currently no way to mix-and-match, i.e. to have multiple return values at least + one of which is a tuple.) + + The node label (unless otherwise provided), IO channel names, IO types, and input + defaults for the node are produced _automatically_ from introspection of the node + function. + Explicit output labels can be provided to modify the number of return values (from + $N$ to 1 in case you _want_ a tuple returned) and to dodge constraints on the + automatic scraping routine (namely, that there be _at most_ one `return` + expression). + (Additional properties like storage priority and ontological type are forthcoming + as kwarg dictionaries with keys corresponding to the channel labels (i.e. the node + arguments of the node function, or the output labels provided).) + + Actual function node instances can either be instances of the base node class, in + which case the callable node function *must* be provided OR they can be instances + of children of this class. + Those children may define some or all of the node behaviour at the class level, and + modify their signature accordingly so this is not available for alteration by the + user, e.g. the node function and output labels may be hard-wired. + + Although not strictly enforced, it is a best-practice that where possible, function + nodes should be both functional (always returning the same output given the same + input) and idempotent (not modifying input data in-place, but creating copies where + necessary and returning new objects as output). + Further, functions with multiple return branches that return different types or + numbers of return values may or may not work smoothly, depending on the details. + + Output is updated in the `process_run_result` inside the parent class `finish_run` + call, such that output data gets pushed after the node stops running but before + then `ran` signal fires: run, process and push result, ran. + + After a node is instantiated, its input can be updated as `*args` and/or `**kwargs` + on call. + `run()` returns the output of the executed function, or a futures object if the + node is set to use an executor. + Calling the node or executing an `update()` returns the same thing as running, if + the node is run, or, in the case of `update()`, `None` if it is not `ready` to run. + + Args: + node_function (callable): The function determining the behaviour of the node. + label (str): The node's label. (Defaults to the node function's name.) + output_labels (Optional[str | list[str] | tuple[str]]): A name for each return + value of the node function OR a single label. (Default is None, which + scrapes output labels automatically from the source code of the wrapped + function.) This can be useful when returned values are not well named, e.g. + to make the output channel dot-accessible if it would otherwise have a label + that requires item-string-based access. Additionally, specifying a _single_ + label for a wrapped function that returns a tuple of values ensures that a + _single_ output channel (holding the tuple) is created, instead of one + channel for each return value. The default approach of extracting labels + from the function source code also requires that the function body contain + _at most_ one `return` expression, so providing explicit labels can be used + to circumvent this (at your own risk). + **kwargs: Any additional keyword arguments whose keyword matches the label of an + input channel will have their value assigned to that channel. + + Attributes: + inputs (Inputs): A collection of input data channels. + outputs (Outputs): A collection of output data channels. + signals (Signals): A holder for input and output collections of signal channels. + ready (bool): All input reports ready, node is not running or failed. + running (bool): Currently running. + failed (bool): An exception was thrown when executing the node function. + connected (bool): Any IO channel has at least one connection. + fully_connected (bool): Every IO channel has at least one connection. + + Methods: + update: If your input is ready, will run the engine. + run: Parse and process the input, execute the engine, process the results and + update the output. + disconnect: Disconnect all data and signal IO connections. + update_input: Allows input channels' values to be updated without any running. + + Examples: + At the most basic level, to use nodes all we need to do is provide the + `Function` class with a function and labels for its output, like so: + >>> from pyiron_workflow.function import Function + >>> + >>> def mwe(x, y): + ... return x+1, y-1 + >>> + >>> plus_minus_1 = Function(mwe) + >>> + >>> print(plus_minus_1.outputs["x+1"]) + + + There is no output because we haven't given our function any input, it has + no defaults, and we never ran it! So outputs have the channel default value of + `NotData` -- a special non-data class (since `None` is sometimes a meaningful + value in python). + + We'll run into a hiccup if we try to set only one of the inputs and force the + run: + >>> plus_minus_1.inputs.x = 2 + >>> plus_minus_1.run() + TypeError: unsupported operand type(s) for -: 'type' and 'int' + + This is because the second input (`y`) still has no input value, so we can't do + the sum between `NotData` and `2`. + + Once we update `y`, all the input is ready we will be allowed to proceed to a + `run()` call, which succeeds and updates the output. + The final thing we need to do is disable the `failed` status we got from our + last run call + >>> plus_minus_1.failed = False + >>> plus_minus_1.inputs.y = 3 + >>> plus_minus_1.run() + >>> plus_minus_1.outputs.to_value_dict() + {'x+1': 3, 'y-1': 2} + + We can also, optionally, provide initial values for some or all of the input and + labels for the output: + >>> plus_minus_1 = Function(mwe, output_labels=("p1", "m1"), x=1) + >>> plus_minus_1.inputs.y = 2 + >>> out = plus_minus_1.run() + >>> out + (2, 1) + + Input data can be provided to both initialization and on call as ordered args + or keyword kwargs. + When running, updating, or calling the node, the output of the wrapped function + (if it winds up getting run in the conditional cases of updating and calling) is + returned: + >>> plus_minus_1(2, y=3) + (3, 2) + + We can make our node even more sensible by adding type + hints (and, optionally, default values) when defining the function that the node + wraps. + The node will automatically figure out defaults and type hints for the IO + channels from inspection of the wrapped function. + + In this example, note the mixture of old-school (`typing.Union`) and new (`|`) + type hints as well as nested hinting with a union-type inside the tuple for the + return hint. + Our treatment of type hints is **not infinitely robust**, but covers a wide + variety of common use cases. + Note that getting "good" (i.e. dot-accessible) output labels can be achieved by + using good variable names and returning those variables instead of using + `output_labels`. + If we force the node to `run()` (or call it) with bad types, it will raise an + error. + But, if we use the gentler `update()`, it will check types first and simply + return `None` if the input is not all `ready`. + >>> from typing import Union + >>> + >>> def hinted_example( + ... x: Union[int, float], + ... y: int | float = 1 + ... ) -> tuple[int, int | float]: + ... p1, m1 = x+1, y-1 + ... return p1, m1 + >>> + >>> plus_minus_1 = Function(hinted_example, x="not an int") + >>> plus_minus_1.update() + >>> plus_minus_1.outputs.to_value_dict() + {'p1': , + 'm1': } + + Here, even though all the input has data, the node sees that some of it is the + wrong type and so the automatic updates don't proceed all the way to a run. + Note that the type hinting doesn't actually prevent us from assigning bad values + directly to the channel (although it will, by default, prevent connections + _between_ type-hinted channels with incompatible hints), but it _does_ stop the + node from running and throwing an error because it sees that the channel (and + thus node) is not ready + >>> plus_minus_1.inputs.x.value + 'not an int' + + >>> plus_minus_1.ready, plus_minus_1.inputs.x.ready, plus_minus_1.inputs.y.ready + (False, False, True) + + In these examples, we've instantiated nodes directly from the base `Function` + class, and populated their input directly with data. + In practice, these nodes are meant to be part of complex workflows; that means + both that you are likely to have particular nodes that get heavily re-used, and + that you need the nodes to pass data to each other. + + For reusable nodes, we want to create a sub-class of `Function` that fixes some + of the node behaviour -- usually the `node_function` and `output_labels`. + + This can be done most easily with the `node` decorator, which takes a function + and returns a node class: + >>> from pyiron_workflow.function import function_node + >>> + >>> @function_node(output_labels=("p1", "m1")) + ... def my_mwe_node( + ... x: int | float, y: int | float = 1 + ... ) -> tuple[int | float, int | float]: + ... return x+1, y-1 + >>> + >>> node_instance = my_mwe_node(x=0) + >>> node_instance(y=0) + (1, -1) + + Where we've passed the output labels and class arguments to the decorator, + and inital values to the newly-created node class (`my_mwe_node`) at + instantiation. + Because we provided a good initial value for `x`, we get our result right away. + + Using the decorator is the recommended way to create new node classes, but this + magic is just equivalent to these two more verbose ways of defining a new class. + The first is to override the `__init__` method directly: + >>> from typing import Literal, Optional + >>> + >>> class AlphabetModThree(Function): + ... def __init__( + ... self, + ... label: Optional[str] = None, + ... **kwargs + ... ): + ... super().__init__( + ... self.alphabet_mod_three, + ... label=label, + ... **kwargs + ... ) + ... + ... @staticmethod + ... def alphabet_mod_three(i: int) -> Literal["a", "b", "c"]: + ... letter = ["a", "b", "c"][i % 3] + ... return letter + + The second effectively does the same thing, but leverages python's + `functools.partialmethod` to do so much more succinctly. + In this example, note that the function is declared _before_ `__init__` is set, + so that it is available in the correct scope (above, we could place it + afterwards because we were accessing it through self). + >>> from functools import partialmethod + >>> + >>> class Adder(Function): + ... @staticmethod + ... def adder(x: int = 0, y: int = 0) -> int: + ... sum = x + y + ... return sum + ... + ... __init__ = partialmethod( + ... Function.__init__, + ... adder, + ... ) + + Finally, let's put it all together by using both of these nodes at once. + Instead of setting input to a particular data value, we'll set it to + be another node's output channel, thus forming a connection. + Then we need to define the corresponding execution flow, which can be done + by directly connecting `.signals.input.run` and `.signals.output.ran` channels + just like we connect data channels, but can also be accomplished with some + syntactic sugar using the `>` operator. + When we update the upstream node, we'll see the result passed downstream: + >>> adder = Adder() + >>> alpha = AlphabetModThree(i=adder.outputs.sum) + >>> adder > alpha + >>> + >>> adder(x=1) + >>> print(alpha.outputs.letter) + "b" + >>> adder(y=1) + >>> print(alpha.outputs.letter) + "c" + >>> adder.inputs.x = 0 + >>> adder.inputs.y = 0 + >>> adder() + >>> print(alpha.outputs.letter) + "a" + + To see more details on how to use many nodes together, look at the + `Workflow` class. + + Comments: + + If you use the function argument `self` in the first position, the + whole node object is inserted there: + + >>> def with_self(self, x): + >>> ... + >>> return x + + For this function, you don't have the freedom to choose `self`, because + pyiron automatically sets the node object there (which is also the + reason why you do not see `self` in the list of inputs). + """ + + def __init__( + self, + node_function: callable, + *args, + label: Optional[str] = None, + parent: Optional[Composite] = None, + output_labels: Optional[str | list[str] | tuple[str]] = None, + **kwargs, + ): + super().__init__( + label=label if label is not None else node_function.__name__, + parent=parent, + # **kwargs, + ) + + self.node_function = node_function + + self._inputs = None + self._outputs = None + self._output_labels = self._get_output_labels(output_labels) + # TODO: Parse output labels from the node function in case output_labels is None + + self.signals = self._build_signal_channels() + self.update_input(*args, **kwargs) + + def _get_output_labels(self, output_labels: str | list[str] | tuple[str] | None): + """ + If output labels are provided, turn convert them to a list if passed as a + string and return them, else scrape them from the source channel. + + Note: When the user explicitly provides output channels, they are taking + responsibility that these are correct, e.g. in terms of quantity, order, etc. + """ + if output_labels is None: + return self._scrape_output_labels() + elif isinstance(output_labels, str): + return [output_labels] + else: + return output_labels + + def _scrape_output_labels(self): + """ + Inspect the source code to scrape out strings representing the returned values. + _Only_ works for functions with a single `return` expression in their body. + + Will return expressions and function calls just fine, thus best practice is to + create well-named variables and return those so that the output labels stay + dot-accessible. + """ + parsed_outputs = ParseOutput(self.node_function).output + return [] if parsed_outputs is None else parsed_outputs + + @property + def _input_args(self): + return inspect.signature(self.node_function).parameters + + @property + def inputs(self) -> Inputs: + if self._inputs is None: + self._inputs = Inputs(*self._build_input_channels()) + return self._inputs + + @property + def outputs(self) -> Outputs: + if self._outputs is None: + self._outputs = Outputs(*self._build_output_channels(*self._output_labels)) + return self._outputs + + def _build_input_channels(self): + channels = [] + type_hints = get_type_hints(self.node_function) + + for ii, (label, value) in enumerate(self._input_args.items()): + is_self = False + if label == "self": # `self` is reserved for the node object + if ii == 0: + is_self = True + else: + warnings.warn( + "`self` is used as an argument but not in the first" + " position, so it is treated as a normal function" + " argument. If it is to be treated as the node object," + " use it as a first argument" + ) + if label in self._init_keywords: + # We allow users to parse arbitrary kwargs as channel initialization + # So don't let them choose bad channel names + raise ValueError( + f"The Input channel name {label} is not valid. Please choose a " + f"name _not_ among {self._init_keywords}" + ) + + try: + type_hint = type_hints[label] + if is_self: + warnings.warn("type hint for self ignored") + except KeyError: + type_hint = None + + default = NotData # The standard default in DataChannel + if value.default is not inspect.Parameter.empty: + if is_self: + warnings.warn("default value for self ignored") + else: + default = value.default + + if not is_self: + channels.append( + InputData( + label=label, + node=self, + default=default, + type_hint=type_hint, + ) + ) + return channels + + @property + def _init_keywords(self): + return list(inspect.signature(self.__init__).parameters.keys()) + + def _build_output_channels(self, *return_labels: str): + try: + type_hints = get_type_hints(self.node_function)["return"] + if len(return_labels) > 1: + type_hints = get_args(type_hints) + if not isinstance(type_hints, tuple): + raise TypeError( + f"With multiple return labels expected to get a tuple of type " + f"hints, but got type {type(type_hints)}" + ) + if len(type_hints) != len(return_labels): + raise ValueError( + f"Expected type hints and return labels to have matching " + f"lengths, but got {len(type_hints)} hints and " + f"{len(return_labels)} labels: {type_hints}, {return_labels}" + ) + else: + # If there's only one hint, wrap it in a tuple so we can zip it with + # *return_labels and iterate over both at once + type_hints = (type_hints,) + except KeyError: + type_hints = [None] * len(return_labels) + + channels = [] + for label, hint in zip(return_labels, type_hints): + channels.append( + OutputData( + label=label, + node=self, + type_hint=hint, + ) + ) + + return channels + + @property + def on_run(self): + return self.node_function + + @property + def run_args(self) -> dict: + kwargs = self.inputs.to_value_dict() + if "self" in self._input_args: + if self.executor is not None: + raise NotImplementedError( + f"The node {self.label} cannot be run on an executor because it " + f"uses the `self` argument and this functionality is not yet " + f"implemented" + ) + kwargs["self"] = self + return kwargs + + def process_run_result(self, function_output): + """ + Take the results of the node function, and use them to update the node output. + + By extracting this as a separate method, we allow the node to pass the actual + execution off to another entity and release the python process to do other + things. In such a case, this function should be registered as a callback + so that the node can finishing "running" and push its data forward when that + execution is finished. + """ + if len(self.outputs) == 0: + return + elif len(self.outputs) == 1: + function_output = (function_output,) + + for out, value in zip(self.outputs, function_output): + out.update(value) + + def _convert_input_args_and_kwargs_to_input_kwargs(self, *args, **kwargs): + reverse_keys = list(self._input_args.keys())[::-1] + if len(args) > len(reverse_keys): + raise ValueError( + f"Received {len(args)} positional arguments, but the node {self.label}" + f"only accepts {len(reverse_keys)} inputs." + ) + + positional_keywords = reverse_keys[-len(args) :] if len(args) > 0 else [] # -0: + if len(set(positional_keywords).intersection(kwargs.keys())) > 0: + raise ValueError( + f"Cannot use {set(positional_keywords).intersection(kwargs.keys())} " + f"as both positional _and_ keyword arguments; args {args}, kwargs " + f"{kwargs}, reverse_keys {reverse_keys}, positional_keyworkds " + f"{positional_keywords}" + ) + + for arg in args: + key = positional_keywords.pop() + kwargs[key] = arg + + return kwargs + + def update_input(self, *args, **kwargs) -> None: + """ + Match positional and keyword arguments to input channels and update input + values. + + Args: + *args: Interpreted in the same order as node function arguments. + **kwargs: input label - input value (including channels for connection) + pairs. + """ + kwargs = self._convert_input_args_and_kwargs_to_input_kwargs(*args, **kwargs) + return super().update_input(**kwargs) + + def __call__(self, *args, **kwargs) -> None: + kwargs = self._convert_input_args_and_kwargs_to_input_kwargs(*args, **kwargs) + return super().__call__(**kwargs) + + def to_dict(self): + return { + "label": self.label, + "ready": self.ready, + "connected": self.connected, + "fully_connected": self.fully_connected, + "inputs": self.inputs.to_dict(), + "outputs": self.outputs.to_dict(), + "signals": self.signals.to_dict(), + } + + @property + def color(self) -> str: + """For drawing the graph""" + return SeabornColors.green + + +class SingleValue(Function, HasChannel): + """ + A node that _must_ return only a single value. + + Attribute and item access is modified to finally attempt access on the output value. + Note that this means any attributes/method available on the output value become + available directly at the node level (at least those which don't conflict with the + existing node namespace). + """ + + def __init__( + self, + node_function: callable, + *args, + label: Optional[str] = None, + parent: Optional[Workflow] = None, + output_labels: Optional[str | list[str] | tuple[str]] = None, + **kwargs, + ): + super().__init__( + node_function, + *args, + label=label, + parent=parent, + output_labels=output_labels, + **kwargs, + ) + + def _get_output_labels(self, output_labels: str | list[str] | tuple[str] | None): + output_labels = super()._get_output_labels(output_labels) + if len(output_labels) > 1: + raise ValueError( + f"{self.__class__.__name__} must only have a single return value, but " + f"got multiple output labels: {output_labels}" + ) + return output_labels + + @property + def single_value(self): + return self.outputs[self.outputs.labels[0]].value + + @property + def channel(self) -> OutputData: + """The channel for the single output""" + return list(self.outputs.channel_dict.values())[0] + + @property + def color(self) -> str: + """For drawing the graph""" + return SeabornColors.cyan + + def __getitem__(self, item): + return self.single_value.__getitem__(item) + + def __getattr__(self, item): + return getattr(self.single_value, item) + + def __repr__(self): + return self.single_value.__repr__() + + def __str__(self): + return f"{self.label} ({self.__class__.__name__}) output single-value: " + str( + self.single_value + ) + + +def function_node(output_labels=None): + """ + A decorator for dynamically creating node classes from functions. + + Decorates a function. + Returns a `Function` subclass whose name is the camel-case version of the function + node, and whose signature is modified to exclude the node function and output labels + (which are explicitly defined in the process of using the decorator). + + Optionally takes any keyword arguments of `Function`. + """ + + def as_node(node_function: callable): + return type( + node_function.__name__.title().replace("_", ""), # fnc_name to CamelCase + (Function,), # Define parentage + { + "__init__": partialmethod( + Function.__init__, + node_function, + output_labels=output_labels, + ) + }, + ) + + return as_node + + +def single_value_node(output_labels=None): + """ + A decorator for dynamically creating fast node classes from functions. + + Unlike normal nodes, fast nodes _must_ have default values set for all their inputs. + + Optionally takes any keyword arguments of `SingleValueNode`. + """ + + def as_single_value_node(node_function: callable): + return type( + node_function.__name__.title().replace("_", ""), # fnc_name to CamelCase + (SingleValue,), # Define parentage + { + "__init__": partialmethod( + SingleValue.__init__, + node_function, + output_labels=output_labels, + ) + }, + ) + + return as_single_value_node diff --git a/pyiron_workflow/has_channel.py b/pyiron_workflow/has_channel.py new file mode 100644 index 00000000..69ff0a14 --- /dev/null +++ b/pyiron_workflow/has_channel.py @@ -0,0 +1,27 @@ +# coding: utf-8 +# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department +# Distributed under the terms of "New BSD License", see the LICENSE file. + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pyiron_workflow.channels import Channel + + +class HasChannel(ABC): + """ + A mix-in class for use with the `Channel` class. + A `Channel` is able to (attempt to) connect to any child instance of `HasConnection` + by looking at its `connection` attribute. + + This is useful for letting channels attempt to connect to non-channel objects + directly by pointing them to some channel that object holds. + """ + + @property + @abstractmethod + def channel(self) -> Channel: + pass diff --git a/pyiron_workflow/has_to_dict.py b/pyiron_workflow/has_to_dict.py new file mode 100644 index 00000000..a78bc427 --- /dev/null +++ b/pyiron_workflow/has_to_dict.py @@ -0,0 +1,17 @@ +from abc import ABC, abstractmethod +from json import dumps + + +class HasToDict(ABC): + @abstractmethod + def to_dict(self): + pass + + def _repr_json_(self): + return self.to_dict() + + def info(self): + print(dumps(self.to_dict(), indent=2)) + + def __str__(self): + return str(self.to_dict()) diff --git a/pyiron_workflow/interfaces.py b/pyiron_workflow/interfaces.py new file mode 100644 index 00000000..36cd2c85 --- /dev/null +++ b/pyiron_workflow/interfaces.py @@ -0,0 +1,115 @@ +""" +Container classes for giving access to various workflow objects and tools +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pyiron_base.interfaces.singleton import Singleton + +# from pyiron_contrib.executors import CloudpickleProcessPoolExecutor as Executor +# from pympipool.mpi.executor import PyMPISingleTaskExecutor as Executor + +from pyiron_workflow.executors import CloudpickleProcessPoolExecutor as Executor + +from pyiron_workflow.function import ( + Function, + SingleValue, + function_node, + single_value_node, +) + +if TYPE_CHECKING: + from pyiron_workflow.node import Node + + +class Creator(metaclass=Singleton): + """ + A container class for providing access to various workflow objects. + Handles the registration of new node packages and, by virtue of being a singleton, + makes them available to all composite nodes holding a creator. + """ + + def __init__(self): + self.Executor = Executor + + self.Function = Function + self.SingleValue = SingleValue + + # Avoid circular imports by delaying import for children of Composite + self._macro = None + self._workflow = None + self._meta = None + + @property + def Macro(self): + if self._macro is None: + from pyiron_workflow.macro import Macro + + self._macro = Macro + return self._macro + + @property + def Workflow(self): + if self._workflow is None: + from pyiron_workflow.workflow import Workflow + + self._workflow = Workflow + return self._workflow + + @property + def standard(self): + try: + return self._standard + except AttributeError: + from pyiron_workflow.node_library.standard import nodes + + self.register("_standard", *nodes) + return self._standard + + @property + def atomistics(self): + try: + return self._atomistics + except AttributeError: + from pyiron_workflow.node_library.atomistics import nodes + + self.register("_atomistics", *nodes) + return self._atomistics + + @property + def meta(self): + if self._meta is None: + from pyiron_workflow.meta import meta_nodes + + self._meta = meta_nodes + return self._meta + + def register(self, domain: str, *nodes: list[type[Node]]): + if domain in self.__dir__(): + raise AttributeError(f"{domain} is already an attribute of {self}") + from pyiron_workflow.node_package import NodePackage + + setattr(self, domain, NodePackage(*nodes)) + + +class Wrappers(metaclass=Singleton): + """ + A container class giving access to the decorators that transform functions to nodes. + """ + + def __init__(self): + self.function_node = function_node + self.single_value_node = single_value_node + + # Avoid circular imports by delaying import when wrapping children of Composite + self._macro_node = None + + @property + def macro_node(self): + if self._macro_node is None: + from pyiron_workflow.macro import macro_node + + self._macro_node = macro_node + return self._macro_node diff --git a/pyiron_workflow/io.py b/pyiron_workflow/io.py new file mode 100644 index 00000000..ceebf57e --- /dev/null +++ b/pyiron_workflow/io.py @@ -0,0 +1,272 @@ +""" +Collections of channel objects. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod + +from pyiron_workflow.channels import ( + Channel, + DataChannel, + InputData, + OutputData, + SignalChannel, + InputSignal, + OutputSignal, +) +from pyiron_workflow.has_channel import HasChannel +from pyiron_workflow.has_to_dict import HasToDict +from pyiron_workflow.util import DotDict, logger + + +class IO(HasToDict, ABC): + """ + IO is a convenience layer for holding and accessing multiple input/output channels. + It allows key and dot-based access to the underlying channels. + Channels can also be iterated over, and there are a number of helper functions to + alter the properties of or check the status of all the channels at once. + + A new channel can be assigned as an attribute of an IO collection, as long as it + matches the channel's type (e.g. `OutputChannel` for `Outputs`, `InputChannel` + for `Inputs`, etc...). + + When assigning something to an attribute holding an existing channel, if the + assigned object is a `Channel`, then an attempt is made to make a `connection` + between the two channels, otherwise we fall back on a value assignment that must + be defined in child classes under `_assign_value_to_existing_channel`, i.e. + >>> some_io.some_existing_channel = 5 + + is equivalent to + >>> some_io._assign_value_to_existing_channel( + ... some_io["some_existing_channel"], 5 + ... ) + + and + >>> some_io.some_existing_channel = some_other_channel + + is equivalent to + >>> some_io.some_existing_channel.connect(some_other_channel) + """ + + def __init__(self, *channels: Channel): + self.__dict__["channel_dict"] = DotDict( + { + channel.label: channel + for channel in channels + if isinstance(channel, self._channel_class) + } + ) + + @property + @abstractmethod + def _channel_class(self) -> type(Channel): + pass + + @abstractmethod + def _assign_a_non_channel_value(self, channel: Channel, value) -> None: + """What to do when some non-channel value gets assigned to a channel""" + pass + + def __getattr__(self, item) -> Channel: + try: + return self.channel_dict[item] + except KeyError: + # Raise an attribute error from getattr to make sure hasattr works well! + raise AttributeError( + f"Could not find attribute {item} on {self.__class__.__name__} object " + f"nor in its channels ({self.labels})" + ) + + def __setattr__(self, key, value): + if key in self.channel_dict.keys(): + self._assign_value_to_existing_channel(self.channel_dict[key], value) + elif isinstance(value, self._channel_class): + if key != value.label: + logger.info( + f"Assigning a channel with the label {value.label} to the io key " + f"{key}" + ) + self.channel_dict[key] = value + else: + raise TypeError( + f"Can only set Channel object or connect to existing channels, but the " + f"attribute {key} got assigned {value} of type {type(value)}" + ) + + def _assign_value_to_existing_channel(self, channel: Channel, value) -> None: + if isinstance(value, HasChannel): + channel.connect(value.channel) + else: + self._assign_a_non_channel_value(channel, value) + + def __getitem__(self, item) -> Channel: + return self.__getattr__(item) + + def __setitem__(self, key, value): + self.__setattr__(key, value) + + @property + def connections(self) -> list[Channel]: + """All the unique connections across all channels""" + return list( + set([connection for channel in self for connection in channel.connections]) + ) + + @property + def connected(self): + return any([c.connected for c in self]) + + @property + def fully_connected(self): + return all([c.connected for c in self]) + + def disconnect(self) -> list[tuple[Channel, Channel]]: + """ + Disconnect all connections that owned channels have. + + Returns: + [list[tuple[Channel, Channel]]]: A list of the pairs of channels that no + longer participate in a connection. + """ + destroyed_connections = [] + for c in self: + destroyed_connections.extend(c.disconnect_all()) + return destroyed_connections + + @property + def labels(self): + return list(self.channel_dict.keys()) + + def items(self): + return self.channel_dict.items() + + def __iter__(self): + return self.channel_dict.values().__iter__() + + def __len__(self): + return len(self.channel_dict) + + def __dir__(self): + return set(super().__dir__() + self.labels) + + def __str__(self): + return f"{self.__class__.__name__} {self.labels}" + + def to_dict(self): + return { + "label": self.__class__.__name__, + "connected": self.connected, + "fully_connected": self.fully_connected, + "channels": {l: c.to_dict() for l, c in self.channel_dict.items()}, + } + + +class DataIO(IO, ABC): + """ + Extends the base IO class with helper methods relevant to data channels. + """ + + def _assign_a_non_channel_value(self, channel: DataChannel, value) -> None: + channel.update(value) + + def to_value_dict(self): + return {label: channel.value for label, channel in self.channel_dict.items()} + + @property + def ready(self): + return all([c.ready for c in self]) + + def to_dict(self): + d = super().to_dict() + d["ready"] = self.ready + return d + + +class Inputs(DataIO): + @property + def _channel_class(self) -> type(InputData): + return InputData + + def activate_strict_connections(self): + [c.activate_strict_connections() for c in self] + + def deactivate_strict_connections(self): + [c.deactivate_strict_connections() for c in self] + + +class Outputs(DataIO): + @property + def _channel_class(self) -> type(OutputData): + return OutputData + + +class SignalIO(IO, ABC): + def _assign_a_non_channel_value(self, channel: SignalChannel, value) -> None: + raise TypeError( + f"Tried to assign {value} ({type(value)} to the {channel.label}, which is " + f"already a {type(channel)}. Only other signal channels may be connected " + f"in this way." + ) + + +class InputSignals(SignalIO): + @property + def _channel_class(self) -> type(InputSignal): + return InputSignal + + def disconnect_run(self) -> list[tuple[Channel, Channel]]: + try: + return self.run.disconnect_all() + except AttributeError: + return [] + + +class OutputSignals(SignalIO): + @property + def _channel_class(self) -> type(OutputSignal): + return OutputSignal + + +class Signals: + """ + A meta-container for input and output signal IO containers. + + Attributes: + input (InputSignals): An empty input signals IO container. + output (OutputSignals): An empty input signals IO container. + """ + + def __init__(self): + self.input = InputSignals() + self.output = OutputSignals() + + def disconnect(self) -> list[tuple[Channel, Channel]]: + """ + Disconnect all connections in input and output signals. + + Returns: + [list[tuple[Channel, Channel]]]: A list of the pairs of channels that no + longer participate in a connection. + """ + return self.input.disconnect() + self.output.disconnect() + + def disconnect_run(self) -> list[tuple[Channel, Channel]]: + return self.input.disconnect_run() + + @property + def connected(self): + return self.input.connected or self.output.connected + + @property + def fully_connected(self): + return self.input.fully_connected and self.output.fully_connected + + def to_dict(self): + return { + "input": self.input.to_dict(), + "output": self.output.to_dict(), + } + + def __str__(self): + return f"{str(self.input)}\n{str(self.output)}" diff --git a/pyiron_workflow/macro.py b/pyiron_workflow/macro.py new file mode 100644 index 00000000..838ad578 --- /dev/null +++ b/pyiron_workflow/macro.py @@ -0,0 +1,227 @@ +""" +A base class for macro nodes, which are composite like workflows but have a static +interface and are not intended to be internally modified after instantiation. +""" + +from __future__ import annotations + +from functools import partialmethod +from typing import Optional, TYPE_CHECKING + +from pyiron_workflow.composite import Composite +from pyiron_workflow.io import Outputs, Inputs + +if TYPE_CHECKING: + from bidict import bidict + + +class Macro(Composite): + """ + A macro is a composite node that holds a graph with a fixed interface, like a + pre-populated workflow that is the same every time you instantiate it. + + At instantiation, the macro uses a provided callable to build and wire the graph, + then builds a static IO interface for this graph. (By default, unconnected IO is + passed using the same formalism as workflows to combine node and channel names, but + this can be overriden to rename the channels in the IO panel and/or to expose + channels that already have an internal connection.) + + Like function nodes, initial values for input can be set using kwargs, and the node + will (by default) attempt to update at the end of the instantiation process. + + It is intended that subclasses override the initialization signature and provide + the graph creation directly from their own method. + + As with workflows, all DAG macros will determine their execution flow automatically, + if you have cycles in your data flow, or otherwise want more control over the + execution, all you need to do is specify the `node.signals.input.run` connections + and `starting_nodes` list yourself. + If only _one_ of these is specified, you'll get an error, but if you've provided + both then no further checks of their validity/reasonableness are performed, so be + careful. + + Examples: + Let's consider the simplest case of macros that just consecutively add 1 to + their input: + >>> from pyiron_workflow.macro import Macro + >>> + >>> def add_one(x): + ... result = x + 1 + ... return result + >>> + >>> def add_three_macro(macro): + ... macro.one = macro.create.SingleValue(add_one) + ... macro.two = macro.create.SingleValue(add_one, macro.one) + ... macro.three = macro.create.SingleValue(add_one, macro.two) + ... macro.one > macro.two > macro.three + ... macro.starting_nodes = [macro.one] + + In this case we had _no need_ to specify the execution order and starting nodes + --it's just an extremely simple DAG after all! -- but it's done here to + demonstrate the syntax. + + We can make a macro by passing this graph-building function (that takes a macro + as its first argument, i.e. `self` from the macro's perspective) to the `Macro` + class. Then, we can use it like a regular node! Just like a workflow, the + io is constructed from unconnected owned-node IO by combining node and channel + labels. + >>> macro = Macro(add_three_macro) + >>> out = macro(one__x=3) + >>> out.three__result + 6 + + If there's a particular macro we're going to use again and again, we might want + to consider making a new child class of `Macro` that overrides the + `graph_creator` arg such that the same graph is always created. We could + override `__init__` the normal way, but it's even faster to just use + `partialmethod`: + >>> from functools import partialmethod + >>> class AddThreeMacro(Macro): + ... def build_graph(self): + ... add_three_macro(self) + ... + ... __init__ = partialmethod( + ... Macro.__init__, + ... build_graph, + ... ) + >>> + >>> macro = AddThreeMacro() + >>> macro(one__x=0).three__result + 3 + + We can also nest macros, rename their IO, and provide access to + internally-connected IO by inputs and outputs maps: + >>> def nested_macro(macro): + ... macro.a = macro.create.SingleValue(add_one) + ... macro.b = macro.create.Macro(add_three_macro, one__x=macro.a) + ... macro.c = macro.create.SingleValue( + ... add_one, x=macro.b.outputs.three__result + ... ) + >>> + >>> macro = Macro( + ... nested_macro, + ... inputs_map={"a__x": "inp"}, + ... outputs_map={"c__result": "out", "b__three__result": "intermediate"}, + ... ) + >>> macro(inp=1) + {'intermediate': 5, 'out': 6} + + Macros and workflows automatically generate execution flows when their data + is acyclic. + Let's build a simple macro with two independent tracks: + >>> def modified_flow_macro(macro): + ... macro.a = macro.create.SingleValue(add_one, x=0) + ... macro.b = macro.create.SingleValue(add_one, x=0) + ... macro.c = macro.create.SingleValue(add_one, x=0) + >>> + >>> m = Macro(modified_start_macro) + >>> m.outputs.to_value_dict() + >>> m(a__x=1, b__x=2, c__x=3) + {'a__result': 2, 'b__result': 3, 'c__result': 4} + + We can override which nodes get used to start by specifying the `starting_nodes` + property. + If we do this we also need to provide at least one connection among the run + signals, but beyond that the code doesn't hold our hands. + Let's use this and then observe how the `a` sub-node no longer gets run: + >>> m.starting_nodes = [m.b] # At least one starting node + >>> m.b > m.c # At least one run signal + >>> m(a__x=1000, b__x=2000, c__x=3000) + {'a__result': 2, 'b__result': 2001, 'c__result': 3001} + + Note how the `a` node is no longer getting run, so the output is not updated! + Manually controlling execution flow is necessary for cyclic graphs (cf. the + while loop meta-node), but best to avoid when possible as it's easy to miss + intended connections in complex graphs. + """ + + def __init__( + self, + graph_creator: callable[[Macro], None], + label: Optional[str] = None, + parent: Optional[Composite] = None, + strict_naming: bool = True, + inputs_map: Optional[dict | bidict] = None, + outputs_map: Optional[dict | bidict] = None, + **kwargs, + ): + self._parent = None + super().__init__( + label=label if label is not None else graph_creator.__name__, + parent=parent, + strict_naming=strict_naming, + inputs_map=inputs_map, + outputs_map=outputs_map, + ) + graph_creator(self) + self._configure_graph_execution() + + self._inputs: Inputs = self._build_inputs() + self._outputs: Outputs = self._build_outputs() + + self.update_input(**kwargs) + + @property + def inputs(self) -> Inputs: + return self._inputs + + @property + def outputs(self) -> Outputs: + return self._outputs + + def _configure_graph_execution(self): + run_signals = self.disconnect_run() + + has_signals = len(run_signals) > 0 + has_starters = len(self.starting_nodes) > 0 + + if has_signals and has_starters: + # Assume the user knows what they're doing + self._reconnect_run(run_signals) + elif not has_signals and not has_starters: + # Automate construction of the execution graph + self.set_run_signals_to_dag_execution() + else: + raise ValueError( + f"The macro '{self.label}' has {len(run_signals)} run signals " + f"internally and {len(self.starting_nodes)} starting nodes. Either " + f"the entire execution graph must be specified manually, or both run " + f"signals and starting nodes must be left entirely unspecified for " + f"automatic construction of the execution graph." + ) + + def _reconnect_run(self, run_signal_pairs_to_restore): + self.disconnect_run() + for pairs in run_signal_pairs_to_restore: + pairs[0].connect(pairs[1]) + + def to_workfow(self): + raise NotImplementedError + + +def macro_node(**node_class_kwargs): + """ + A decorator for dynamically creating macro classes from graph-creating functions. + + Decorates a function. + Returns a `Macro` subclass whose name is the camel-case version of the + graph-creating function, and whose signature is modified to exclude this function + and provided kwargs. + + Optionally takes any keyword arguments of `Macro`. + """ + + def as_node(graph_creator: callable[[Macro], None]): + return type( + graph_creator.__name__.title().replace("_", ""), # fnc_name to CamelCase + (Macro,), # Define parentage + { + "__init__": partialmethod( + Macro.__init__, + graph_creator, + **node_class_kwargs, + ) + }, + ) + + return as_node diff --git a/pyiron_workflow/meta.py b/pyiron_workflow/meta.py new file mode 100644 index 00000000..948d4bb1 --- /dev/null +++ b/pyiron_workflow/meta.py @@ -0,0 +1,326 @@ +""" +Meta nodes are callables that create a node class instead of a node instance. +""" + +from __future__ import annotations + +from typing import Optional + +from pyiron_workflow.function import ( + Function, + SingleValue, + function_node, + single_value_node, +) +from pyiron_workflow.macro import Macro, macro_node +from pyiron_workflow.node import Node +from pyiron_workflow.util import DotDict + + +def list_to_output(length: int, **node_class_kwargs) -> type[Function]: + """ + A meta-node that returns a node class with `length` input channels and + maps these to a single output channel with type `list`. + """ + + def _list_to_many(length: int): + template = f""" +def __list_to_many(l: list): + {"; ".join([f"out{i} = l[{i}]" for i in range(length)])} + return [{", ".join([f"out{i}" for i in range(length)])}] + """ + exec(template) + return locals()["__list_to_many"] + + return function_node(**node_class_kwargs)(_list_to_many(length=length)) + + +def input_to_list(length: int, **node_class_kwargs) -> type[SingleValue]: + """ + A meta-node that returns a node class with `length` output channels and + maps an input list to these. + """ + + def _many_to_list(length: int): + template = f""" +def __many_to_list({", ".join([f"inp{i}=None" for i in range(length)])}): + return [{", ".join([f"inp{i}" for i in range(length)])}] + """ + exec(template) + return locals()["__many_to_list"] + + return single_value_node(**node_class_kwargs)(_many_to_list(length=length)) + + +def for_loop( + loop_body_class: type[Node], + length: int, + iterate_on: str | tuple[str] | list[str], + # TODO: +) -> type[Macro]: + """ + An _extremely rough_ first draft of a for-loop meta-node. + + Takes a node class, how long the loop should be, and which input(s) of the provided + node class should be looped over (given as strings of the channel labels) and + builds a macro that + - Makes copies of the provided node class, i.e. the "body node" + - For each input channel specified to "loop over", creates a list-to-many node and + connects each of its outputs to their respective body node inputs + - For all other inputs, makes a 1:1 node and connects its output to _all_ of the + body nodes + - Relables the macro IO to match the passed node class IO so that list-ified IO + (i.e. the specified input and all output) is all caps + + Examples: + >>> import numpy as np + >>> from pyiron_workflow import Workflow + >>> + >>> bulk_loop = Workflow.create.meta.for_loop( + ... Workflow.create.atomistics.Bulk, + ... 5, + ... iterate_on = ("a",), + ... )() + >>> + >>> [ + ... struct.cell.volume for struct in bulk_loop( + ... name="Al", # Sent equally to each body node + ... A=np.linspace(3.9, 4.1, 5).tolist(), # Distributed across body nodes + ... ).STRUCTURE + ... ] + [14.829749999999995, + 15.407468749999998, + 15.999999999999998, + 16.60753125, + 17.230249999999995] + + TODO: + - Refactor like crazy, it's super hard to read and some stuff is too hard-coded + - Give some sort of access to flow control?? + - How to handle passing executors to the children? Maybe this is more + generically a Macro question? + - Is it possible to somehow dynamically adapt the held graph depending on the + length of the input values being iterated over? Tricky to keep IO well defined + - Allow a different mode, or make a different meta node, that makes all possible + pairs of body nodes given the input being looped over instead of just `length` + - Provide enter and exit magic methods so we can `for` or `with` this fancy-like + """ + iterate_on = [iterate_on] if isinstance(iterate_on, str) else iterate_on + + def make_loop(macro): + macro.inputs_map = {} + macro.outputs_map = {} + body_nodes = [] + + # Parallelize over body nodes + for n in range(length): + body_nodes.append( + macro.add(loop_body_class(label=f"{loop_body_class.__name__}_{n}")) + ) + + # Make input interface + for label, inp in body_nodes[0].inputs.items(): + # Don't rely on inp.label directly, since inputs may be a Composite IO + # panel that has a different key for this input channel than its label + + # Scatter a list of inputs to each node separately + if label in iterate_on: + interface = list_to_output(length)( + parent=macro, + label=label.upper(), + output_labels=[ + f"{loop_body_class.__name__}__{inp.label}_{i}" + for i in range(length) + ], + l=[inp.default] * length, + ) + # Connect each body node input to the input interface's respective output + for body_node, out in zip(body_nodes, interface.outputs): + body_node.inputs[label] = out + macro.inputs_map[f"{interface.label}__l"] = interface.label + # TODO: Don't hardcode __l + # Or distribute the same input to each node equally + else: + interface = macro.create.standard.UserInput( + label=label, output_labels=label, user_input=inp.default + ) + for body_node in body_nodes: + body_node.inputs[label] = interface + macro.inputs_map[f"{interface.label}__user_input"] = interface.label + # TODO: Don't hardcode __user_input + + # Make output interface: outputs to lists + for label, out in body_nodes[0].outputs.items(): + interface = input_to_list(length)( + parent=macro, + label=label.upper(), + output_labels=f"{loop_body_class.__name__}__{label}", + ) + # Connect each body node output to the output interface's respective input + for body_node, inp in zip(body_nodes, interface.inputs): + inp.connect(body_node.outputs[label]) + if body_node.executor is not None: + raise NotImplementedError( + "Right now the output interface gets run after each body node," + "if the body nodes can run asynchronously we need something " + "more clever than that!" + ) + macro.outputs_map[ + f"{interface.label}__{loop_body_class.__name__}__{label}" + ] = interface.label + # TODO: Don't manually copy the output label construction + + return macro_node()(make_loop) + + +def while_loop( + loop_body_class: type[Node], + condition_class: type[SingleValue], + internal_connection_map: dict[str, str], + inputs_map: Optional[dict[str, str]] = None, + outputs_map: Optional[dict[str, str]] = None, +) -> type[Macro]: + """ + An _extremely rough_ first draft of a for-loop meta-node. + + Takes body and condition node classes and builds a macro that makes a cyclic signal + connection between them and an "if" switch, i.e. when the body node finishes it + runs the condtion, which runs the switch, and as long as the condition result was + `True`, the switch loops back to run the body again. + We additionally allow four-tuples of (input node, input channel, output node, + output channel) labels to wire data connections inside the macro, e.g. to pass data + from the body to the condition. This is beastly syntax, but it will suffice for now. + Finally, you can set input and output maps as normal. + + Args: + loop_body_class (type[pyiron_workflow.node.Node]): The class for the + body of the while-loop. + condition_class (type[pyiron_workflow.function.SingleValue]): A single + value node returning a `bool` controlling the while loop exit condition + (exits on False) + internal_connection_map (list[tuple[str, str, str, str]]): String tuples + giving (input node, input channel, output node, output channel) labels + connecting channel pairs inside the macro. + inputs_map Optional[dict[str, str]]: The inputs map as usual for a macro. + outputs_map Optional[dict[str, str]]: The outputs map as usual for a macro. + Examples: + >>> from pyiron_workflow import Workflow + >>> + >>> @Workflow.wrap_as.single_value_node() + >>> def add(a, b): + ... print(f"{a} + {b} = {a + b}") + ... return a + b + >>> + >>> @Workflow.wrap_as.single_value_node() + >>> def less_than_ten(value): + ... return value < 10 + >>> + >>> AddWhile = Workflow.create.meta.while_loop( + ... loop_body_class=add, + ... condition_class=less_than_ten, + ... internal_connection_map=[ + ... ("Add", "a + b", "LessThanTen", "value"), + ... ("Add", "a + b", "Add", "a") + ... ], + ... inputs_map={"Add__a": "a", "Add__b": "b"}, + ... outputs_map={"Add__a + b": "total"} + ... ) + >>> + >>> wf = Workflow("do_while") + >>> wf.add_while = AddWhile() + >>> + >>> wf.inputs_map = { + ... "add_while__a": "a", + ... "add_while__b": "b" + ... } + >>> wf.outputs_map = {"add_while__total": "total"} + >>> + >>> print(f"Finally, {wf(a=1, b=2).total}") + 1 + 2 = 3 + 3 + 2 = 5 + 5 + 2 = 7 + 7 + 2 = 9 + 9 + 2 = 11 + Finally, 11 + + >>> import numpy as np + >>> from pyiron_workflow import Workflow + >>> + >>> np.random.seed(0) + >>> + >>> @Workflow.wrap_as.single_value_node("random") + >>> def random(length: int | None = None): + ... return np.random.random(length) + >>> + >>> @Workflow.wrap_as.single_value_node() + >>> def greater_than(x: float, threshold: float): + ... gt = x > threshold + ... symbol = ">" if gt else "<=" + ... print(f"{x:.3f} {symbol} {threshold}") + ... return gt + >>> + >>> RandomWhile = Workflow.create.meta.while_loop( + ... loop_body_class=random, + ... condition_class=greater_than, + ... internal_connection_map=[("Random", "random", "GreaterThan", "x")], + ... outputs_map={"Random__random": "capped_result"} + ... ) + >>> + >>> # Define workflow + >>> + >>> wf = Workflow("random_until_small_enough") + >>> + >>> ## Wire together the while loop and its condition + >>> + >>> wf.random_while = RandomWhile() + >>> + >>> ## Give convenient labels + >>> wf.inputs_map = {"random_while__GreaterThan__threshold": "threshold"} + >>> wf.outputs_map = {"random_while__capped_result": "capped_result"} + >>> + >>> # Set a threshold and run + >>> print(f"Finally {wf(threshold=0.1).capped_result:.3f}") + 0.549 > 0.1 + 0.715 > 0.1 + 0.603 > 0.1 + 0.545 > 0.1 + 0.424 > 0.1 + 0.646 > 0.1 + 0.438 > 0.1 + 0.892 > 0.1 + 0.964 > 0.1 + 0.383 > 0.1 + 0.792 > 0.1 + 0.529 > 0.1 + 0.568 > 0.1 + 0.926 > 0.1 + 0.071 <= 0.1 + Finally 0.071 + """ + + def make_loop(macro): + body_node = macro.add(loop_body_class(label=loop_body_class.__name__)) + condition_node = macro.add(condition_class(label=condition_class.__name__)) + switch = macro.create.standard.If(label="switch") + + switch.inputs.condition = condition_node + for out_n, out_c, in_n, in_c in internal_connection_map: + macro.nodes[in_n].inputs[in_c] = macro.nodes[out_n].outputs[out_c] + + switch.signals.output.true > body_node > condition_node > switch + macro.starting_nodes = [body_node] + + macro.inputs_map = {} if inputs_map is None else inputs_map + macro.outputs_map = {} if outputs_map is None else outputs_map + + return macro_node()(make_loop) + + +meta_nodes = DotDict( + { + for_loop.__name__: for_loop, + input_to_list.__name__: input_to_list, + list_to_output.__name__: list_to_output, + while_loop.__name__: while_loop, + } +) diff --git a/pyiron_workflow/node.py b/pyiron_workflow/node.py new file mode 100644 index 00000000..3ebe134e --- /dev/null +++ b/pyiron_workflow/node.py @@ -0,0 +1,390 @@ +""" +A base class for objects that can form nodes in the graph representation of a +computational workflow. +""" + +from __future__ import annotations + +import warnings +from abc import ABC, abstractmethod +from concurrent.futures import Future +from typing import Any, Literal, Optional, TYPE_CHECKING + +from pyiron_workflow.draw import Node as GraphvizNode +from pyiron_workflow.files import DirectoryObject +from pyiron_workflow.has_to_dict import HasToDict +from pyiron_workflow.io import Signals, InputSignal, OutputSignal +from pyiron_workflow.util import SeabornColors + +if TYPE_CHECKING: + import graphviz + + from pyiron_base.jobs.job.extension.server.generic import Server + + from pyiron_workflow.composite import Composite + from pyiron_workflow.io import Inputs, Outputs + + +class Node(HasToDict, ABC): + """ + Nodes are elements of a computational graph. + They have input and output data channels that interface with the outside + world, and a callable that determines what they actually compute, and input and + output signal channels that can be used to customize the execution flow of their + graph; + Together these channels represent edges on the dual data and execution computational + graphs. + + Nodes can be run to force their computation, or more gently updated, which will + trigger a run only if all of the input is ready (i.e. channel values conform to + any type hints provided). + + Nodes may have a `parent` node that owns them as part of a sub-graph. + + Every node must be named with a `label`, and may use this label to attempt to create + a working directory in memory for itself if requested. + These labels also help to identify nodes in the wider context of (potentially + nested) computational graphs. + + By default, nodes' signals input comes with `run` and `ran` IO ports which force + the `run()` method and which emit after `finish_run()` is completed, respectfully. + These signal connections can be made manually by reference to the node signals + channel, or with the `>` symbol to indicate a flow of execution. This syntactic + sugar can be mixed between actual signal channels (output signal > input signal), + or nodes, but when referring to nodes it is always a shortcut to the `run`/`ran` + channels. + + The `run()` method returns a representation of the node output (possible a futures + object, if the node is running on an executor), and consequently `update()` also + returns this output if the node is `ready`. + + Calling an already instantiated node allows its input channels to be updated using + keyword arguments corresponding to the channel labels, performing a batch-update of + all supplied input and then calling `run()`. + As such, calling the node _also_ returns a representation of the output (or `None` + if the node is not set to run on updates, or is otherwise unready to run). + + Nodes have a status, which is currently represented by the `running` and `failed` + boolean flag attributes. + Their value is controlled automatically in the defined `run` and `finish_run` + methods. + + Nodes can be run on the main python process that owns them, or by assigning an + appropriate executor to their `executor` attribute. + In case they are run with an executor, their `future` attribute will be populated + with the resulting future object. + WARNING: Executors are currently only working when the node executable function does + not use `self`. + + This is an abstract class. + Children *must* define how `inputs` and `outputs` are constructed, and what will + happen `on_run`. + They may also override the `run_args` property to specify input passed to the + defined `on_run` method, and may add additional signal channels to the signals IO. + + # TODO: Everything with (de)serialization and executors for running on something + # other than the main python process. + + Attributes: + connected (bool): Whether _any_ of the IO (including signals) are connected. + failed (bool): Whether the node raised an error calling `run`. (Default + is False.) + fully_connected (bool): whether _all_ of the IO (including signals) are + connected. + future (concurrent.futures.Future | None): A futures object, if the node is + currently running or has already run using an executor. + inputs (pyiron_workflow.io.Inputs): **Abstract.** Children must define + a property returning an `Inputs` object. + label (str): A name for the node. + outputs (pyiron_workflow.io.Outputs): **Abstract.** Children must define + a property returning an `Outputs` object. + parent (pyiron_workflow.composite.Composite | None): The parent object + owning this, if any. + ready (bool): Whether the inputs are all ready and the node is neither + already running nor already failed. + running (bool): Whether the node has called `run` and has not yet + received output from this call. (Default is False.) + server (Optional[pyiron_base.jobs.job.extension.server.generic.Server]): A + server object for computing things somewhere else. Default (and currently + _only_) behaviour is to compute things on the main python process owning + the node. + signals (pyiron_workflow.io.Signals): A container for input and output + signals, which are channels for controlling execution flow. By default, has + a `signals.inputs.run` channel which has a callback to the `run` method, + and `signals.outputs.ran` which should be called at when the `run` method + is finished. + Additional signal channels in derived classes can be added to + `signals.inputs` and `signals.outputs` after this mixin class is + initialized. + + Methods: + disconnect: Remove all connections, including signals. + draw: Use graphviz to visualize the node, its IO and, if composite in nature, + its internal structure. + on_run: **Abstract.** Do the thing. + run: A wrapper to handle all the infrastructure around executing `on_run`. + """ + + def __init__( + self, + label: str, + *args, + parent: Optional[Composite] = None, + **kwargs, + ): + """ + A mixin class for objects that can form nodes in the graph representation of a + computational workflow. + + Args: + label (str): A name for this node. + *args: Arguments passed on with `super`. + **kwargs: Keyword arguments passed on with `super`. + """ + super().__init__(*args, **kwargs) + self.label: str = label + self.parent = parent + if parent is not None: + parent.add(self) + self.running = False + self.failed = False + # TODO: Replace running and failed with a state object + self._server: Server | None = ( + None # Or "task_manager" or "executor" -- we'll see what's best + ) + # TODO: Move from a traditional "sever" to a tinybase "executor" + # TODO: Provide support for actually computing stuff with the server/executor + self.signals = self._build_signal_channels() + self._working_directory = None + self.executor = None + self.future: None | Future = None + + @property + @abstractmethod + def inputs(self) -> Inputs: + pass + + @property + @abstractmethod + def outputs(self) -> Outputs: + pass + + @property + @abstractmethod + def on_run(self) -> callable[..., Any | tuple]: + """ + What the node actually does! + """ + pass + + @property + def run_args(self) -> dict: + """ + Any data needed for `on_run`, will be passed as **kwargs. + """ + return {} + + def process_run_result(self, run_output: Any | tuple) -> None: + """ + What to _do_ with the results of `on_run` once you have them. + + Args: + run_output (tuple): The results of a `self.on_run(self.run_args)` call. + """ + pass + + def run(self) -> Any | tuple | Future: + """ + Executes the functionality of the node defined in `on_run`. + Handles the status of the node, and communicating with any remote + computing resources. + """ + if self.running: + raise RuntimeError(f"{self.label} is already running") + + self.running = True + self.failed = False + + if self.executor is None: + try: + run_output = self.on_run(**self.run_args) + except Exception as e: + self.running = False + self.failed = True + raise e + return self.finish_run(run_output) + else: + # Just blindly try to execute -- as we nail down the executor interaction + # we'll want to fail more cleanly here. + self.future = self.executor.submit(self.on_run, **self.run_args) + self.future.add_done_callback(self.finish_run) + return self.future + + def finish_run(self, run_output: tuple | Future) -> Any | tuple: + """ + Switch the node status, process the run result, then fire the ran signal. + + By extracting this as a separate method, we allow the node to pass the actual + execution off to another entity and release the python process to do other + things. In such a case, this function should be registered as a callback + so that the node can finish "running" and, e.g. push its data forward when that + execution is finished. In such a case, a `concurrent.futures.Future` object is + expected back and must be unpacked. + """ + if isinstance(run_output, Future): + run_output = run_output.result() + + self.running = False + try: + self.process_run_result(run_output) + self.signals.output.ran() + return run_output + except Exception as e: + self.failed = True + raise e + + def _build_signal_channels(self) -> Signals: + signals = Signals() + signals.input.run = InputSignal("run", self, self.run) + signals.output.ran = OutputSignal("ran", self) + return signals + + def update(self) -> Any | tuple | Future | None: + if self.ready: + return self.run() + + @property + def working_directory(self): + if self._working_directory is None: + if self.parent is not None and hasattr(self.parent, "working_directory"): + parent_dir = self.parent.working_directory + self._working_directory = parent_dir.create_subdirectory(self.label) + else: + self._working_directory = DirectoryObject(self.label) + return self._working_directory + + @property + def server(self) -> Server | None: + return self._server + + @server.setter + def server(self, server: Server | None): + self._server = server + + def disconnect(self): + """ + Disconnect all connections belonging to inputs, outputs, and signals channels. + + Returns: + [list[tuple[Channel, Channel]]]: A list of the pairs of channels that no + longer participate in a connection. + """ + destroyed_connections = [] + destroyed_connections.extend(self.inputs.disconnect()) + destroyed_connections.extend(self.outputs.disconnect()) + destroyed_connections.extend(self.signals.disconnect()) + return destroyed_connections + + @property + def ready(self) -> bool: + return not (self.running or self.failed) and self.inputs.ready + + @property + def connected(self) -> bool: + return self.inputs.connected or self.outputs.connected or self.signals.connected + + @property + def fully_connected(self): + return ( + self.inputs.fully_connected + and self.outputs.fully_connected + and self.signals.fully_connected + ) + + def update_input(self, **kwargs) -> None: + """ + Match keywords to input channel labels and update input values. + + Args: + **kwargs: input label - input value (including channels for connection) + pairs. + """ + for k, v in kwargs.items(): + if k in self.inputs.labels: + self.inputs[k] = v + else: + warnings.warn( + f"The keyword '{k}' was not found among input labels. If you are " + f"trying to update a node keyword, please use attribute assignment " + f"directly instead of calling" + ) + + def __call__(self, **kwargs) -> None: + self.update_input(**kwargs) + return self.run() + + @property + def color(self) -> str: + """A hex code color for use in drawing.""" + return SeabornColors.white + + def draw( + self, depth: int = 1, rankdir: Literal["LR", "TB"] = "LR" + ) -> graphviz.graphs.Digraph: + """ + Draw the node structure. + + Args: + depth (int): How deeply to decompose the representation of composite nodes + to reveal their inner structure. (Default is 1, which will show owned + nodes if _this_ is a composite node, but all children will be drawn + at the level of showing their IO only.) A depth value greater than the + max depth of the node will have no adverse side effects. + rankdir ("LR" | "TB"): Use left-right or top-bottom graphviz `rankdir` to + orient the flow of the graph. + + Returns: + (graphviz.graphs.Digraph): The resulting graph object. + + Note: + The graphviz docs will elucidate all the possibilities of what to do with + the returned object, but the thing you are most likely to need is the + `render` method, which allows you to save the resulting graph as an image. + E.g. `self.draw().render(filename="my_node", format="png")`. + """ + return GraphvizNode(self, depth=depth, rankdir=rankdir).graph + + def __str__(self): + return ( + f"{self.label} ({self.__class__.__name__}):\n" + f"{str(self.inputs)}\n" + f"{str(self.outputs)}\n" + f"{str(self.signals)}" + ) + + def connect_output_signal(self, signal: OutputSignal): + self.signals.input.run.connect(signal) + + def __gt__(self, other: InputSignal | Node): + """ + Allows users to connect run and ran signals like: `first_node > second_node`. + """ + other.connect_output_signal(self.signals.output.ran) + return True + + def get_parent_proximate_to(self, composite: Composite) -> Composite | None: + parent = self.parent + while parent is not None and parent.parent is not composite: + parent = parent.parent + return parent + + def get_first_shared_parent(self, other: Node) -> Composite | None: + our, their = self, other + while our.parent is not None: + while their.parent is not None: + if our.parent is their.parent: + return our.parent + their = their.parent + our = our.parent + their = other + return None diff --git a/pyiron_workflow/node_library/__init__.py b/pyiron_workflow/node_library/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyiron_workflow/node_library/atomistics.py b/pyiron_workflow/node_library/atomistics.py new file mode 100644 index 00000000..045fdfe5 --- /dev/null +++ b/pyiron_workflow/node_library/atomistics.py @@ -0,0 +1,202 @@ +from __future__ import annotations + +from typing import Literal, Optional + +from pyiron_atomistics import Project, _StructureFactory +from pyiron_atomistics.atomistics.job.atomistic import AtomisticGenericJob +from pyiron_atomistics.atomistics.structure.atoms import Atoms +from pyiron_atomistics.lammps.lammps import Lammps as LammpsJob + +from pyiron_workflow.function import function_node, single_value_node + + +Bulk = single_value_node(output_labels="structure")(_StructureFactory().bulk) + + +@single_value_node(output_labels="job") +def lammps(structure: Optional[Atoms] = None) -> LammpsJob: + pr = Project(".") + job = pr.atomistics.job.Lammps("NOTAREALNAME") + job.structure = structure if structure is not None else _StructureFactory().bulk() + job.potential = job.list_potentials()[0] + return job + + +def _run_and_remove_job(job, modifier: Optional[callable] = None, **modifier_kwargs): + """ + Extracts the commonalities for all the "calc" methods for running a Lammps engine. + Will need to be extended/updated once we support other engines so that more output + can be parsed. Output may wind up more concretely packaged, e.g. as `CalcOutput` or + `MDOutput`, etc., ala Joerg's suggestion later, so for the time being we don't put + too much effort into this. + + Warning: + Jobs are created in a dummy project with a dummy name and are all removed at the + end; this works fine for serial workflows, but will need to be revisited -- + probably with naming based on the parantage of node/workflow labels -- once + other non-serial execution is introduced. + """ + job_name = "JUSTAJOBNAME" + pr = Project("WORKFLOWNAMEPROJECT") + job = job.copy_to(project=pr, new_job_name=job_name, delete_existing_job=True) + if modifier is not None: + job = modifier(job, **modifier_kwargs) + job.run() + + cells = job.output.cells + displacements = job.output.displacements + energy_pot = job.output.energy_pot + energy_tot = job.output.energy_tot + force_max = job.output.force_max + forces = job.output.forces + indices = job.output.indices + positions = job.output.positions + pressures = job.output.pressures + steps = job.output.steps + temperature = job.output.temperature + total_displacements = job.output.total_displacements + unwrapped_positions = job.output.unwrapped_positions + volume = job.output.volume + + job.remove() + pr.remove(enable=True) + + return ( + cells, + displacements, + energy_pot, + energy_tot, + force_max, + forces, + indices, + positions, + pressures, + steps, + temperature, + total_displacements, + unwrapped_positions, + volume, + ) + + +@function_node( + output_labels=[ + "cells", + "displacements", + "energy_pot", + "energy_tot", + "force_max", + "forces", + "indices", + "positions", + "pressures", + "steps", + "temperature", + "total_displacements", + "unwrapped_positions", + "volume", + ] +) +def calc_static( + job: AtomisticGenericJob, +): + return _run_and_remove_job(job=job) + + +@function_node( + output_labels=[ + "cells", + "displacements", + "energy_pot", + "energy_tot", + "force_max", + "forces", + "indices", + "positions", + "pressures", + "steps", + "temperature", + "total_displacements", + "unwrapped_positions", + "volume", + ] +) +def calc_md( + job: AtomisticGenericJob, + n_ionic_steps: int = 1000, + n_print: int = 100, + temperature: int | float = 300.0, + pressure: float + | tuple[float, float, float] + | tuple[float, float, float, float, float, float] + | None = None, +): + def calc_md(job, n_ionic_steps, n_print, temperature, pressure): + job.calc_md( + n_ionic_steps=n_ionic_steps, + n_print=n_print, + temperature=temperature, + pressure=pressure, + ) + return job + + return _run_and_remove_job( + job=job, + modifier=calc_md, + n_ionic_steps=n_ionic_steps, + n_print=n_print, + temperature=temperature, + pressure=pressure, + ) + + +@function_node( + output_labels=[ + "cells", + "displacements", + "energy_pot", + "energy_tot", + "force_max", + "forces", + "indices", + "positions", + "pressures", + "steps", + "total_displacements", + "unwrapped_positions", + "volume", + ] +) +def calc_min( + job: AtomisticGenericJob, + n_ionic_steps: int = 1000, + n_print: int = 100, + pressure: float + | tuple[float, float, float] + | tuple[float, float, float, float, float, float] + | None = None, +): + def calc_min(job, n_ionic_steps, n_print, pressure): + job.calc_minimize( + max_iter=n_ionic_steps, # Calc minimize uses a different var than MD + n_print=n_print, + pressure=pressure, + ) + return job + + return _run_and_remove_job( + job=job, + modifier=calc_min, + n_ionic_steps=n_ionic_steps, + n_print=n_print, + pressure=pressure, + ) + + +nodes = [ + Bulk, + calc_md, + calc_min, + calc_static, + lammps, +] diff --git a/pyiron_workflow/node_library/standard.py b/pyiron_workflow/node_library/standard.py new file mode 100644 index 00000000..bf24fab5 --- /dev/null +++ b/pyiron_workflow/node_library/standard.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from inspect import isclass +from typing import Optional + +import numpy as np +from matplotlib import pyplot as plt + +from pyiron_workflow.channels import NotData, OutputSignal +from pyiron_workflow.function import SingleValue, single_value_node + + +@single_value_node(output_labels="fig") +def scatter( + x: Optional[list | np.ndarray] = None, y: Optional[list | np.ndarray] = None +): + return plt.scatter(x, y) + + +@single_value_node() +def user_input(user_input): + return user_input + + +class If(SingleValue): + """ + Has two extra signal channels: true and false. Evaluates the input as a boolean and + fires the corresponding output signal after running. + """ + + def __init__(self, **kwargs): + super().__init__(self.if_, output_labels="truth", **kwargs) + self.signals.output.true = OutputSignal("true", self) + self.signals.output.false = OutputSignal("false", self) + + @staticmethod + def if_(condition): + if isclass(condition) and issubclass(condition, NotData): + raise TypeError(f"Logic 'If' node expected data but got NotData as input.") + return bool(condition) + + def process_run_result(self, function_output): + """ + Process the output as usual, then fire signals accordingly. + """ + super().process_run_result(function_output) + + if self.outputs.truth.value: + self.signals.output.true() + else: + self.signals.output.false() + + +nodes = [ + scatter, + user_input, + If, +] diff --git a/pyiron_workflow/node_package.py b/pyiron_workflow/node_package.py new file mode 100644 index 00000000..45f4ab8b --- /dev/null +++ b/pyiron_workflow/node_package.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from pyiron_workflow.node import Node +from pyiron_workflow.util import DotDict + + +class NodePackage(DotDict): + """ + A collection of node classes. + + Node classes are accessible by their _class name_ by item or attribute access. + + Can be extended by adding node classes to new names with an item or attribute set, + but to update an existing node the `update` method must be used. + """ + + def __init__(self, *node_classes: Node): + super().__init__() + for node in node_classes: + self[node.__name__] = node + + def __setitem__(self, key, value): + if key in self.keys(): + raise KeyError(f"The name {key} is already a stored node class.") + elif key in self.__dir__(): + raise KeyError( + f"The name {key} is already an attribute of this " + f"{self.__class__.__name__} instance." + ) + if not isinstance(value, type) or not issubclass(value, Node): + raise TypeError( + f"Can only set members that are (sub)classes of {Node.__name__}, " + f"but got {type(value)}" + ) + super().__setitem__(key, value) + + def update(self, *node_classes): + replacing = set(self.keys()).intersection([n.__name__ for n in node_classes]) + for name in replacing: + del self[name] + + for node in node_classes: + self[node.__name__] = node diff --git a/pyiron_workflow/output_parser.py b/pyiron_workflow/output_parser.py new file mode 100644 index 00000000..2f88e71e --- /dev/null +++ b/pyiron_workflow/output_parser.py @@ -0,0 +1,98 @@ +""" +Inspects code to automatically parse return values as strings +""" + +import ast +import inspect +import re +from textwrap import dedent + + +def _remove_spaces_until_character(string): + pattern = r"\s+(?=\s)" + modified_string = re.sub(pattern, "", string) + return modified_string + + +class ParseOutput: + """ + Given a function with at most one `return` expression, inspects the source code and + parses a list of strings containing the returned values. + If the function returns `None`, the parsed value is also `None`. + This parsed value is evaluated at instantiation and stored in the `output` + attribute. + In case more than one `return` expression is found, a `ValueError` is raised. + """ + + def __init__(self, function): + self._func = function + self._source = None + self._output = self.get_parsed_output() + + @property + def func(self): + return self._func + + @property + def dedented_source_string(self): + return dedent(inspect.getsource(self.func)) + + @property + def node_return(self): + tree = ast.parse(self.dedented_source_string) + returns = [] + for node in ast.walk(tree): + if isinstance(node, ast.Return): + returns.append(node) + + if len(returns) > 1: + raise ValueError( + f"{self.__class__.__name__} can only parse callables with at most one " + f"return value, but ast.walk found {len(returns)}." + ) + + try: + return returns[0] + except IndexError: + return None + + @property + def source(self): + if self._source is None: + self._source = self.dedented_source_string.split("\n")[:-1] + return self._source + + def get_string(self, node): + string = "" + for ll in range(node.lineno - 1, node.end_lineno): + if ll == node.lineno - 1 == node.end_lineno - 1: + string += _remove_spaces_until_character( + self.source[ll][node.col_offset : node.end_col_offset] + ) + elif ll == node.lineno - 1: + string += _remove_spaces_until_character( + self.source[ll][node.col_offset :] + ) + elif ll == node.end_lineno - 1: + string += _remove_spaces_until_character( + self.source[ll][: node.end_col_offset] + ) + else: + string += _remove_spaces_until_character(self.source[ll]) + return string + + @property + def output(self): + return self._output + + def get_parsed_output(self): + if self.node_return is None or self.node_return.value is None: + return + elif isinstance(self.node_return.value, ast.Tuple): + return [self.get_string(s) for s in self.node_return.value.dims] + else: + out = [self.get_string(self.node_return.value)] + if out == ["None"]: + return + else: + return out diff --git a/pyiron_workflow/type_hinting.py b/pyiron_workflow/type_hinting.py new file mode 100644 index 00000000..31efaa53 --- /dev/null +++ b/pyiron_workflow/type_hinting.py @@ -0,0 +1,94 @@ +""" +This module provides helper functions for evaluating data relative to type hints, and +type hints relative to each other. +""" + +import types +import typing +from collections.abc import Callable + +from typeguard import check_type, TypeCheckError + + +def valid_value(value, type_hint) -> bool: + try: + return isinstance(value, type_hint) + except TypeError: + # Subscripted generics cannot be used with class and instance checks + try: + # typeguard handles this case + check_type(value, type_hint) + return True + except TypeCheckError: + # typeguard raises an error on a failed check + return False + + +def type_hint_to_tuple(type_hint) -> tuple: + if isinstance(type_hint, (types.UnionType, typing._UnionGenericAlias)): + return typing.get_args(type_hint) + else: + return (type_hint,) + + +def type_hint_is_as_or_more_specific_than(hint, other) -> bool: + hint_origin = typing.get_origin(hint) + other_origin = typing.get_origin(other) + if set([hint_origin, other_origin]) & set([types.UnionType, typing.Union]): + # If either hint is a union, turn both into tuples and call recursively + return all( + [ + any( + [ + type_hint_is_as_or_more_specific_than(h, o) + for o in type_hint_to_tuple(other) + ] + ) + for h in type_hint_to_tuple(hint) + ] + ) + elif hint_origin is None and other_origin is None: + # Once both are raw classes, just do a subclass test + try: + return issubclass(hint, other) + except TypeError: + return hint == other + elif hint_origin == other_origin: + # If they both have an origin, break into arguments and treat cases + hint_args = typing.get_args(hint) + other_args = typing.get_args(other) + if len(hint_args) == 0 and len(other_args) > 0: + # Failing to specify anything is not being more specific + return False + elif hint_origin in [dict, tuple, Callable]: + # for these origins the order of arguments matters + if len(other_args) == 0: + # If the other doesn't specify _any_ arguments, we must be more specific + return True + elif len(other_args) == len(hint_args): + # If they both specify arguments, they should be more specific 1:1 + return all( + [ + type_hint_is_as_or_more_specific_than(h, o) + for o, h in zip(other_args, hint_args) + ] + ) + else: + # Otherwise they both specify but a mis-matching number of args + return False + else: + # Otherwise order doesn't matter so make sure the arguments are a subset + return all( + [ + any( + [ + type_hint_is_as_or_more_specific_than(h, o) + for o in other_args + ] + ) + for h in hint_args + ] + ) + else: + # Lastly, if they both have origins, but different ones, fail + return False diff --git a/pyiron_workflow/util.py b/pyiron_workflow/util.py new file mode 100644 index 00000000..61dae6c7 --- /dev/null +++ b/pyiron_workflow/util.py @@ -0,0 +1,34 @@ +from pyiron_base import state + +logger = state.logger + + +class DotDict(dict): + def __getattr__(self, item): + return self.__getitem__(item) + + def __setattr__(self, key, value): + self[key] = value + + def __dir__(self): + return set(super().__dir__() + list(self.keys())) + + +class SeabornColors: + """ + Hex codes for the ten `seaborn.color_palette()` colors (plus pure white and black), + recreated to avoid adding an entire dependency. + """ + + blue = "#1f77b4" + orange = "#ff7f0e" + green = "#2ca02c" + red = "#d62728" + purple = "#9467bd" + brown = "#8c564b" + pink = "#e377c2" + gray = "#7f7f7f" + olive = "#bcbd22" + cyan = "#17becf" + white = "#ffffff" + black = "#000000" diff --git a/pyiron_workflow/workflow.py b/pyiron_workflow/workflow.py new file mode 100644 index 00000000..7177593b --- /dev/null +++ b/pyiron_workflow/workflow.py @@ -0,0 +1,229 @@ +""" +Provides the main workhorse class for creating and running workflows. + +This class is intended as the single point of entry for users making an import. +""" + +from __future__ import annotations + +from typing import Optional, TYPE_CHECKING + +from pyiron_workflow.composite import Composite +from pyiron_workflow.io import Inputs, Outputs + + +if TYPE_CHECKING: + from bidict import bidict + + from pyiron_workflow.node import Node + + +class Workflow(Composite): + """ + Workflows are a dynamic composite node -- i.e. they hold and run a collection of + nodes (a subgraph) which can be dynamically modified (adding and removing nodes, + and modifying their connections). + + Nodes can be added to the workflow at instantiation or with dot-assignment later on. + They are then accessible either under the `nodes` dot-dictionary, or just directly + by dot-access on the workflow object itself. + + Using the `input` and `output` attributes, the workflow gives access to all the + IO channels among its nodes which are currently unconnected. + + The `Workflow` class acts as a single-point-of-import for us; + Directly from the class we can use the `create` method to instantiate workflow + objects. + When called from a workflow _instance_, any created nodes get their parent set to + the workflow instance being used. + + Examples: + We allow adding nodes to workflows in five equivalent ways: + >>> from pyiron_workflow.workflow import Workflow + >>> + >>> def fnc(x=0): + ... return x + 1 + >>> + >>> # (1) As *args at instantiation + >>> n1 = Workflow.create.Function(fnc, label="n1") + >>> wf = Workflow("my_workflow", n1) + >>> + >>> # (2) Being passed to the `add` method + >>> wf.add(Workflow.create.Function(fnc, label="n2")) + >>> + >>> # (3) Calling `create` from the _workflow instance_ that will own the node + >>> wf.create.Function(fnc, label="n3") # Instantiating from add + >>> + >>> # (4) By attribute assignment (here the node can be created from the + >>> # workflow class or instance and the end result is the same + >>> wf.n4 = wf.create.Function(fnc, label="anyhow_n4_gets_used") + >>> + >>> # (5) By creating from the workflow class but specifying the parent kwarg + >>> Workflow.create.Function(fnc, label="n5", parent=wf) + + By default, the node naming scheme is strict, so if you try to add a node to a + label that already exists, you will get an error. This behaviour can be changed + at instantiation with the `strict_naming` kwarg, or afterwards by assigning a + bool to this property. When deactivated, repeated assignments to the same label + just get appended with an index: + >>> wf.strict_naming = False + >>> wf.my_node = wf.create.Function(fnc, x=0) + >>> wf.my_node = wf.create.Function(fnc, x=1) + >>> wf.my_node = wf.create.Function(fnc, x=2) + >>> print(wf.my_node.inputs.x, wf.my_node0.inputs.x, wf.my_node1.inputs.x) + 0, 1, 2 + + The `Workflow` class is designed as a single point of entry for workflows, so + you can also access decorators to define new node classes right from the + workflow (cf. the `Node` docs for more detail on the node types). + Let's use these to explore a workflow's input and output, which are dynamically + generated from the unconnected IO of its nodes: + >>> @Workflow.wrap_as.function_node("y") + >>> def plus_one(x: int = 0): + ... return x + 1 + >>> + >>> wf = Workflow("io_workflow") + >>> wf.first = plus_one() + >>> wf.second = plus_one() + >>> print(len(wf.inputs), len(wf.outputs)) + 2 2 + + If we connect the output of one node to the input of the other, there are fewer + dangling channels for the workflow IO to find: + >>> wf.second.inputs.x = wf.first.outputs.y + >>> print(len(wf.inputs), len(wf.outputs)) + 1 1 + + Then we just run the workflow + >>> out = wf.run() + + The workflow joins node lavels and channel labels with a `_` character to + provide direct access to the output: + >>> print(wf.outputs.second__y.value) + 2 + + These input keys can be used when calling the workflow to update the input. In + our example, the nodes update automatically when their input gets updated, so + all we need to do to see updated workflow output is update the input: + >>> out = wf(first__x=10) + >>> out + {'second__y': 12} + + Note: this _looks_ like a dictionary, but has some extra convenience that we + can dot-access data: + >>> out.second__y + 12 + + Workflows also give access to packages of pre-built nodes under different + namespaces, e.g. + >>> wf = Workflow("with_prebuilt") + >>> + >>> wf.structure = wf.create.atomistics.Bulk( + ... cubic=True, + ... name="Al" + ... ) + >>> wf.engine = wf.create.atomistics.Lammps(structure=wf.structure) + >>> wf.calc = wf.create.atomistics.CalcMd( + ... job=wf.engine, + ... ) + >>> wf.plot = wf.create.standard.Scatter( + ... x=wf.calc.outputs.steps, + ... y=wf.calc.outputs.temperature + ... ) + + We can give more convenient names to IO, and even access IO that would normally + be hidden (because it's connected) by specifying an `inputs_map` and/or + `outputs_map`. In the example above, let's make the resulting figure a bit + easier to find: + >>> wf.outputs_map = {"plot__fig": "fig"} + >>> wf().fig + + Workflows can be visualized in the notebook using graphviz: + >>> wf.draw() + + The resulting object can be saved as an image, e.g. + >>> wf.draw().render(filename="demo", format="png") + + When your workflow's data follows a directed-acyclic pattern, it will determine + the execution flow automatically. + If you want or need more control, you can set the `automate_execution` flag to + `False` and manually specify an execution flow. + Cf. the + + TODO: Workflows can be serialized. + + TODO: Once you're satisfied with how a workflow is structured, you can export it + as a macro node for use in other workflows. (Maybe we should allow for nested + workflows without exporting to a node? I was concerned then what happens to the + nesting abstraction if, instead of accessing IO through the workflow's IO flags, + a user manually connects IO from individual nodes from two different, nested or + sibling workflows when those connections were _previously internal to their own + workflow_. This seems very unsafe. Maybe there is something like a lock we can + apply that falls short of a full export, but still guarantees the internal + integrity of workflows when they're used somewhere else? + """ + + def __init__( + self, + label: str, + *nodes: Node, + strict_naming: bool = True, + inputs_map: Optional[dict | bidict] = None, + outputs_map: Optional[dict | bidict] = None, + automate_execution: bool = True, + ): + super().__init__( + label=label, + parent=None, + strict_naming=strict_naming, + inputs_map=inputs_map, + outputs_map=outputs_map, + ) + self.automate_execution = automate_execution + + for node in nodes: + self.add(node) + + @property + def inputs(self) -> Inputs: + return self._build_inputs() + + @property + def outputs(self) -> Outputs: + return self._build_outputs() + + @staticmethod + def run_graph(self): + if self.automate_execution: + self.set_run_signals_to_dag_execution() + return super().run_graph(self) + + def to_node(self): + """ + Export the workflow to a macro node, with the currently exposed IO mapped to + new IO channels, and the workflow mapped into the node_function. + """ + raise NotImplementedError + + # (De)serialization is necessary throughout these classes, but not implemented here + def serialize(self): + raise NotImplementedError + + def deserialize(self, source): + raise NotImplementedError + + @property + def parent(self) -> None: + return None + + @parent.setter + def parent(self, new_parent: None): + # Currently workflows are not allowed to have a parent -- maybe we want to + # change our minds on this in the future? If we do, we can just expose `parent` + # as a kwarg and roll back this private var/property/setter protection and let + # the super call in init handle everything + if new_parent is not None: + raise TypeError( + f"{self.__class__} may only take None as a parent but got " + f"{type(new_parent)}" + ) diff --git a/setup.cfg b/setup.cfg index f64d2cce..a916a715 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,8 +6,8 @@ [versioneer] VCS = git style = pep440-pre -versionfile_source = pyiron_module_template/_version.py +versionfile_source = pyiron_workflow/_version.py #versionfile_build = -tag_prefix = pyiron_module_template- -parentdir_prefix = pyiron_module_template +tag_prefix = pyiron_workflow- +parentdir_prefix = pyiron_workflow diff --git a/setup.py b/setup.py index f69b856b..f4e7ba1c 100644 --- a/setup.py +++ b/setup.py @@ -5,14 +5,14 @@ import versioneer setup( - name='pyiron_module_template', + name='pyiron_workflow', version=versioneer.get_version(), - description='pyiron_module_template - module extension to pyiron.', + description='Graph-and-node based workflow tools.', long_description='http://pyiron.org', - url='https://github.com/pyiron/pyiron_module_template', + url='https://github.com/pyiron/pyiron_workflow', author='Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department', - author_email='@mpie.de', + author_email='liamhuber@greyhavensolutions.com', license='BSD', classifiers=[ @@ -21,7 +21,6 @@ 'License :: OSI Approved :: BSD License', 'Intended Audience :: Science/Research', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', ], @@ -29,7 +28,16 @@ keywords='pyiron', packages=find_packages(exclude=["*tests*", "*docs*", "*binder*", "*conda*", "*notebooks*", "*.ci_support*"]), install_requires=[ - 'pyiron_base' + 'bidict==0.22.1', + 'cloudpickle==2.2.1', + 'graphviz==0.20.1', + 'maggma==0.57.1', + 'matplotlib==3.8.0', + 'numpy==1.26.0', + 'pyiron_atomistics==0.3.4', + 'pyiron_base==0.6.7', + 'toposort==1.10', + 'typeguard==4.1.5', ], cmdclass=versioneer.get_cmdclass(), diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py deleted file mode 100644 index 9c25ae2d..00000000 --- a/tests/integration/test_integration.py +++ /dev/null @@ -1,6 +0,0 @@ -import unittest - - -class TestNothing(unittest.TestCase): - def test_nothing(self): - self.assertTrue(True) diff --git a/tests/integration/test_workflow.py b/tests/integration/test_workflow.py new file mode 100644 index 00000000..834f66bf --- /dev/null +++ b/tests/integration/test_workflow.py @@ -0,0 +1,173 @@ +import unittest + +import numpy as np + +from pyiron_workflow.channels import OutputSignal +from pyiron_workflow.function import Function +from pyiron_workflow.workflow import Workflow + + +class TestTopology(unittest.TestCase): + def test_manually_constructed_cyclic_graph(self): + """ + Check that cyclic graphs run. + """ + + @Workflow.wrap_as.single_value_node() + def numpy_randint(low=0, high=20): + rand = np.random.randint(low=low, high=high) + print(f"Generating random number between {low} and {high}...{rand}!") + return rand + + class GreaterThanLimitSwitch(Function): + """ + A switch class for sending signal output depending on a '>' check + applied to input + """ + + def __init__(self, **kwargs): + super().__init__( + self.greater_than, + output_labels="value_gt_limit", + **kwargs + ) + self.signals.output.true = OutputSignal("true", self) + self.signals.output.false = OutputSignal("false", self) + + @staticmethod + def greater_than(value, limit=10): + return value > limit + + def process_run_result(self, function_output): + """ + Process the output as usual, then fire signals accordingly. + """ + super().process_run_result(function_output) + + if self.outputs.value_gt_limit.value: + print(f"{self.inputs.value.value} > {self.inputs.limit.value}") + self.signals.output.true() + else: + print(f"{self.inputs.value.value} <= {self.inputs.limit.value}") + self.signals.output.false() + + @Workflow.wrap_as.single_value_node() + def numpy_sqrt(value=0): + sqrt = np.sqrt(value) + print(f"sqrt({value}) = {sqrt}") + return sqrt + + wf = Workflow("rand_until_big_then_sqrt", automate_execution=False) + + wf.rand = numpy_randint() + + wf.gt_switch = GreaterThanLimitSwitch() + wf.gt_switch.inputs.value = wf.rand + + wf.sqrt = numpy_sqrt() + wf.sqrt.inputs.value = wf.rand + + wf.gt_switch.signals.output.false > wf.rand > wf.gt_switch # Loop on false + wf.gt_switch.signals.output.true > wf.sqrt # On true break to sqrt node + wf.starting_nodes = [wf.rand] + + wf.run() + self.assertAlmostEqual( + np.sqrt(wf.rand.outputs.rand.value), wf.sqrt.outputs.sqrt.value, 6 + ) + + def test_for_loop(self): + n = 5 + + bulk_loop = Workflow.create.meta.for_loop( + Workflow.create.atomistics.Bulk, + n, + iterate_on=("a",), + )() + + out = bulk_loop( + name="Al", # Sent equally to each body node + A=np.linspace(3.9, 4.1, n).tolist(), # Distributed across body nodes + ) + + self.assertTrue( + np.allclose( + [struct.cell.volume for struct in out.STRUCTURE], + [ + 14.829749999999995, + 15.407468749999998, + 15.999999999999998, + 16.60753125, + 17.230249999999995 + ] + ) + ) + + def test_while_loop(self): + with self.subTest("Random"): + np.random.seed(0) + + @Workflow.wrap_as.single_value_node("random") + def random(length: int | None = None): + return np.random.random(length) + + @Workflow.wrap_as.single_value_node("gt") + def greater_than(x: float, threshold: float): + return x > threshold + + RandomWhile = Workflow.create.meta.while_loop( + loop_body_class=random, + condition_class=greater_than, + internal_connection_map=[("Random", "random", "GreaterThan", "x")], + outputs_map={"Random__random": "capped_result"} + ) + + # Define workflow + + wf = Workflow("random_until_small_enough") + + ## Wire together the while loop and its condition + + wf.random_while = RandomWhile() + + ## Give convenient labels + wf.inputs_map = {"random_while__GreaterThan__threshold": "threshold"} + wf.outputs_map = {"random_while__capped_result": "capped_result"} + + self.assertAlmostEqual( + wf(threshold=0.1).capped_result, + 0.07103605819788694, # For this reason we set the random seed + ) + + with self.subTest("Self-data-loop"): + + @Workflow.wrap_as.single_value_node() + def add(a, b): + return a + b + + @Workflow.wrap_as.single_value_node() + def less_than_ten(value): + return value < 10 + + AddWhile = Workflow.create.meta.while_loop( + loop_body_class=add, + condition_class=less_than_ten, + internal_connection_map=[ + ("Add", "a + b", "LessThanTen", "value"), + ("Add", "a + b", "Add", "a") + ], + inputs_map={"Add__a": "a", "Add__b": "b"}, + outputs_map={"Add__a + b": "total"} + ) + + wf = Workflow("do_while") + wf.add_while = AddWhile() + + wf.inputs_map = { + "add_while__a": "a", + "add_while__b": "b" + } + wf.outputs_map = {"add_while__total": "total"} + + out = wf(a=1, b=2) + self.assertEqual(out.total, 11) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index cc7dda6f..e69de29b 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -1,3 +0,0 @@ -""" -Small tests run, relatively fast tests for checking individual bits of the code base. -""" \ No newline at end of file diff --git a/tests/unit/executors/__init__.py b/tests/unit/executors/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/executors/test_cloudprocesspool.py b/tests/unit/executors/test_cloudprocesspool.py new file mode 100644 index 00000000..eb49333e --- /dev/null +++ b/tests/unit/executors/test_cloudprocesspool.py @@ -0,0 +1,169 @@ +from functools import partialmethod +from concurrent.futures import TimeoutError +from time import sleep +import unittest + +from pyiron_workflow.executors.cloudpickleprocesspool import ( + CloudpickleProcessPoolExecutor +) + + +class Foo: + """ + A base class to be dynamically modified for testing CloudpickleProcessPoolExecutor. + """ + def __init__(self, fnc: callable): + self.fnc = fnc + self.result = None + + @property + def run(self): + return self.fnc + + def process_result(self, future): + self.result = future.result() + + +def dynamic_foo(): + """ + A decorator for dynamically modifying the Foo class to test + CloudpickleProcessPoolExecutor. + + Overrides the `fnc` input of `Foo` with the decorated function. + """ + def as_dynamic_foo(fnc: callable): + return type( + "DynamicFoo", + (Foo,), # Define parentage + { + "__init__": partialmethod( + Foo.__init__, + fnc + ) + }, + ) + + return as_dynamic_foo + + +class TestCloudpickleProcessPoolExecutor(unittest.TestCase): + + def test_unpickleable_callable(self): + """ + We should be able to use an unpickleable callable -- in this case, a method of + a dynamically defined class. + """ + fortytwo = 42 # No magic numbers; we use it in a couple places so give it a var + + @dynamic_foo() + def slowly_returns_42(): + sleep(0.1) + return fortytwo + + dynamic_42 = slowly_returns_42() # Instantiate the dynamically defined class + self.assertIsInstance( + dynamic_42, + Foo, + msg="Just a sanity check that the test is set up right" + ) + self.assertIsNone( + dynamic_42.result, + msg="Just a sanity check that the test is set up right" + ) + executor = CloudpickleProcessPoolExecutor() + fs = executor.submit(dynamic_42.run) + fs.add_done_callback(dynamic_42.process_result) + self.assertFalse(fs.done(), msg="Should be running on the executor") + self.assertEqual(fortytwo, fs.result(), msg="Future must complete") + self.assertEqual(fortytwo, dynamic_42.result, msg="Callback must get called") + + def test_unpickleable_return(self): + """ + We should be able to use an unpickleable return value -- in this case, a + method of a dynamically defined class. + """ + + @dynamic_foo() + def does_nothing(): + return + + @dynamic_foo() + def slowly_returns_unpickleable(): + """ + Returns a complex, dynamically defined variable + """ + sleep(0.1) + inside_variable = does_nothing() + inside_variable.result = "it was an inside job!" + return inside_variable + + dynamic_dynamic = slowly_returns_unpickleable() + executor = CloudpickleProcessPoolExecutor() + fs = executor.submit(dynamic_dynamic.run) + self.assertIsInstance( + fs.result(), + Foo, + msg="The custom future should be unpickling the result" + ) + self.assertEqual(fs.result().result, "it was an inside job!") + + def test_unpickleable_args(self): + """ + We should be able to use an unpickleable return value -- in this case, a + method of a dynamically defined class. + """ + + @dynamic_foo() + def does_nothing(): + return + + @dynamic_foo() + def slowly_returns_unpickleable(unpickleable_arg): + """ + Returns a complex, dynamically defined variable + """ + sleep(0.1) + unpickleable_arg.result = "input updated" + return unpickleable_arg + + dynamic_dynamic = slowly_returns_unpickleable() + executor = CloudpickleProcessPoolExecutor() + unpicklable_object = does_nothing() + fs = executor.submit(dynamic_dynamic.run, unpicklable_object) + self.assertEqual(fs.result().result, "input updated") + + def test_exception(self): + @dynamic_foo() + def raise_error(): + raise RuntimeError + + re = raise_error() + executor = CloudpickleProcessPoolExecutor() + fs = executor.submit(re.run) + with self.assertRaises(RuntimeError): + fs.result() + + def test_timeout(self): + fortytwo = 42 + + @dynamic_foo() + def slow(): + sleep(0.1) + return fortytwo + + f = slow() + executor = CloudpickleProcessPoolExecutor() + fs = executor.submit(f.run) + self.assertEqual( + fs.result(timeout=30), + fortytwo, + msg="waiting long enough should get the result" + ) + + with self.assertRaises(TimeoutError): + fs = executor.submit(f.run) + fs.result(timeout=0.0001) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/test_channels.py b/tests/unit/test_channels.py new file mode 100644 index 00000000..4dfd1d7f --- /dev/null +++ b/tests/unit/test_channels.py @@ -0,0 +1,210 @@ +from unittest import TestCase, skipUnless +from sys import version_info + +from pyiron_workflow.channels import ( + InputData, OutputData, InputSignal, OutputSignal, NotData +) + + +class DummyNode: + def __init__(self): + self.foo = [0] + self.running = False + self.label = "node_label" + + def update(self): + self.foo.append(self.foo[-1] + 1) + + +@skipUnless(version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+") +class TestDataChannels(TestCase): + + def setUp(self) -> None: + self.ni1 = InputData(label="numeric", node=DummyNode(), default=1, type_hint=int | float) + self.ni2 = InputData(label="numeric", node=DummyNode(), default=1, type_hint=int | float) + self.no = OutputData(label="numeric", node=DummyNode(), default=0, type_hint=int | float) + + self.so1 = OutputData(label="list", node=DummyNode(), default=["foo"], type_hint=list) + self.so2 = OutputData(label="list", node=DummyNode(), default=["foo"], type_hint=list) + + def test_mutable_defaults(self): + self.so1.default.append("bar") + self.assertEqual( + len(self.so2.default), + len(self.so1.default) - 1, + msg="Mutable defaults should avoid sharing between instances" + ) + + def test_connections(self): + + with self.subTest("Test connection reflexivity and value updating"): + self.assertEqual(self.no.value, 0) + self.ni1.connect(self.no) + self.assertIn(self.no, self.ni1.connections) + self.assertIn(self.ni1, self.no.connections) + self.assertEqual(self.no.value, self.ni1.value) + + with self.subTest("Test disconnection"): + disconnected = self.ni2.disconnect(self.no) + self.assertEqual( + len(disconnected), + 0, + msg="There were no connections to begin with, nothing should be there" + ) + disconnected = self.ni1.disconnect(self.no) + self.assertEqual( + [], self.ni1.connections, msg="No connections should be left" + ) + self.assertEqual( + [], + self.no.connections, + msg="Disconnection should also have been reflexive" + ) + self.assertListEqual( + disconnected, + [(self.ni1, self.no)], + msg="Expected a list of the disconnected pairs." + ) + + with self.subTest("Test multiple connections"): + self.no.connect(self.ni1, self.ni2) + self.assertEqual(2, len(self.no.connections), msg="Should connect to all") + + with self.subTest("Test iteration"): + self.assertTrue(all([con in self.no.connections for con in self.no])) + + with self.subTest("Don't push NotData"): + self.no.disconnect_all() + self.no.value = NotData + self.ni1.value = 1 + self.ni1.connect(self.no) + self.assertEqual( + self.ni1.value, + 1, + msg="NotData should not be getting pushed on connection" + ) + self.ni2.value = 2 + self.no.value = 3 + self.ni2.connect(self.no) + self.assertEqual( + self.ni2.value, + 3, + msg="Actual data should be getting pushed" + ) + self.no.update(NotData) + self.assertEqual( + self.ni2.value, + 3, + msg="NotData should not be getting pushed on updates" + ) + + def test_connection_validity_tests(self): + self.ni1.type_hint = int | float | bool # Override with a larger set + self.ni2.type_hint = int # Override with a smaller set + + with self.assertRaises(TypeError): + self.ni1.connect("Not a channel at all") + + self.no.connect(self.ni1) + self.assertIn( + self.no, + self.ni1.connections, + "Input types should be allowed to be a super-set of output types" + ) + + self.no.connect(self.ni2) + self.assertNotIn( + self.no, + self.ni2.connections, + "Input types should not be allowed to be a sub-set of output types" + ) + + self.so1.connect(self.ni2) + self.assertNotIn( + self.so1, + self.ni2.connections, + "Totally different types should not allow connections" + ) + + self.ni2.strict_connections = False + self.so1.connect(self.ni2) + self.assertIn( + self.so1, + self.ni2.connections, + "With strict connections turned off, we should allow type-violations" + ) + + def test_ready(self): + with self.subTest("Test defaults and not-data"): + without_default = InputData(label="without_default", node=DummyNode()) + self.assertIs( + without_default.value, + NotData, + msg=f"Without a default, spec is to have a NotData value but got " + f"{type(without_default.value)}" + ) + self.assertFalse( + without_default.ready, + msg="Even without type hints, readiness should be false when the value" + "is NotData" + ) + + self.ni1.value = 1 + self.assertTrue(self.ni1.ready) + + self.ni1.value = "Not numeric at all" + self.assertFalse(self.ni1.ready) + + def test_update(self): + self.no.connect(self.ni1, self.ni2) + self.no.update(42) + for inp in self.no.connections: + self.assertEqual( + self.no.value, + inp.value, + msg="Value should have been passed downstream" + ) + + self.ni1.node.running = True + with self.assertRaises(RuntimeError): + self.no.update(42) + + +class TestSignalChannels(TestCase): + def setUp(self) -> None: + node = DummyNode() + self.inp = InputSignal(label="inp", node=node, callback=node.update) + self.out = OutputSignal(label="out", node=DummyNode()) + + def test_connections(self): + with self.subTest("Good connection"): + self.inp.connect(self.out) + self.assertEqual(self.inp.connections, [self.out]) + self.assertEqual(self.out.connections, [self.inp]) + + with self.subTest("Ignore repeated connection"): + self.out.connect(self.inp) + self.assertEqual(len(self.inp), 1) + self.assertEqual(len(self.out), 1) + + with self.subTest("Check disconnection"): + self.out.disconnect_all() + self.assertEqual(len(self.inp), 0) + self.assertEqual(len(self.out), 0) + + with self.subTest("No connections to non-SignalChannels"): + bad = InputData(label="numeric", node=DummyNode(), default=1, type_hint=int) + with self.assertRaises(TypeError): + self.inp.connect(bad) + + with self.subTest("Test syntactic sugar"): + self.out.disconnect_all() + self.out > self.inp + self.assertIn(self.out, self.inp.connections) + + def test_calls(self): + self.out.connect(self.inp) + self.out() + self.assertListEqual(self.inp.node.foo, [0, 1]) + self.inp() + self.assertListEqual(self.inp.node.foo, [0, 1, 2]) diff --git a/tests/unit/test_files.py b/tests/unit/test_files.py new file mode 100644 index 00000000..0093fea9 --- /dev/null +++ b/tests/unit/test_files.py @@ -0,0 +1,50 @@ +import unittest +from pyiron_workflow.files import DirectoryObject, FileObject +from pathlib import Path + + +class TestFiles(unittest.TestCase): + def setUp(cls): + cls.directory = DirectoryObject("test") + + def tearDown(cls): + cls.directory.delete() + + def test_directory_exists(self): + self.assertTrue(Path("test").exists() and Path("test").is_dir()) + + def test_write(self): + self.directory.write(file_name="test.txt", content="something") + self.assertTrue(self.directory.file_exists("test.txt")) + self.assertTrue( + "test/test.txt" in [ + ff.replace("\\", "/") + for ff in self.directory.list_content()['file'] + ] + ) + self.assertEqual(len(self.directory), 1) + + def test_create_subdirectory(self): + self.directory.create_subdirectory("another_test") + self.assertTrue(Path("test/another_test").exists()) + + def test_path(self): + f = FileObject("test.txt", self.directory) + self.assertEqual(str(f.path).replace("\\", "/"), "test/test.txt") + + def test_read_and_write(self): + f = FileObject("test.txt", self.directory) + f.write("something") + self.assertEqual(f.read(), "something") + + def test_is_file(self): + f = FileObject("test.txt", self.directory) + self.assertFalse(f.is_file()) + f.write("something") + self.assertTrue(f.is_file()) + f.delete() + self.assertFalse(f.is_file()) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/test_function.py b/tests/unit/test_function.py new file mode 100644 index 00000000..1420d06d --- /dev/null +++ b/tests/unit/test_function.py @@ -0,0 +1,546 @@ +from concurrent.futures import Future +from sys import version_info +from typing import Optional, Union +import unittest +import warnings + +# from pyiron_contrib.executors import CloudpickleProcessPoolExecutor as Executor +# from pympipool.mpi.executor import PyMPISingleTaskExecutor as Executor + +from pyiron_workflow.executors import CloudpickleProcessPoolExecutor as Executor + +from pyiron_workflow.channels import NotData +from pyiron_workflow.files import DirectoryObject +from pyiron_workflow.function import ( + Function, SingleValue, function_node, single_value_node +) + + +def throw_error(x: Optional[int] = None): + raise RuntimeError + + +def plus_one(x=1) -> Union[int, float]: + y = x + 1 + return y + + +def no_default(x, y): + return x + y + 1 + + +def returns_multiple(x, y): + return x, y, x + y + + +def void(): + pass + + +def multiple_branches(x): + if x < 10: + return True + else: + return False + + +@unittest.skipUnless(version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+") +class TestFunction(unittest.TestCase): + def test_instantiation(self): + with self.subTest("Void function is allowable"): + void_node = Function(void) + self.assertEqual(len(void_node.outputs), 0) + + with self.subTest("Args and kwargs at initialization"): + node = Function(plus_one) + self.assertIs( + node.outputs.y.value, + NotData, + msg="Nodes should not run at instantiation", + ) + node.inputs.x = 10 + self.assertIs( + node.outputs.y.value, + NotData, + msg="Nodes should not run on input updates", + ) + node.run() + self.assertEqual( + node.outputs.y.value, + 11, + msg=f"Slow nodes should still run when asked! Expected 11 but got " + f"{node.outputs.y.value}" + ) + + node = Function(no_default, 1, y=2, output_labels="output") + node.run() + self.assertEqual( + no_default(1, 2), + node.outputs.output.value, + msg="Nodes should allow input initialization by arg and kwarg" + ) + node(2, y=3) + node.run() + self.assertEqual( + no_default(2, 3), + node.outputs.output.value, + msg="Nodes should allow input update on call by arg and kwarg" + ) + + with self.assertRaises(ValueError): + # Can't pass more args than the function takes + Function(returns_multiple, 1, 2, 3) + + with self.subTest("Initializing with connections"): + node = Function(plus_one, x=2) + node2 = Function(plus_one, x=node.outputs.y) + self.assertIs( + node2.inputs.x.connections[0], + node.outputs.y, + msg="Should be able to make a connection at initialization" + ) + node > node2 + node.run() + self.assertEqual(4, node2.outputs.y.value, msg="Initialize from connection") + + def test_defaults(self): + with_defaults = Function(plus_one) + self.assertEqual( + with_defaults.inputs.x.value, + 1, + msg=f"Expected to get the default provided in the underlying function but " + f"got {with_defaults.inputs.x.value}", + ) + without_defaults = Function(no_default) + self.assertIs( + without_defaults.inputs.x.value, + NotData, + msg=f"Expected values with no default specified to start as {NotData} but " + f"got {without_defaults.inputs.x.value}", + ) + self.assertFalse( + without_defaults.ready, + msg="I guess we should test for behaviour and not implementation... Without" + "defaults, the node should not be ready!" + ) + + def test_label_choices(self): + with self.subTest("Automatically scrape output labels"): + n = Function(plus_one) + self.assertListEqual(n.outputs.labels, ["y"]) + + with self.subTest("Allow overriding them"): + n = Function(no_default, output_labels=("sum_plus_one",)) + self.assertListEqual(n.outputs.labels, ["sum_plus_one"]) + + with self.subTest("Allow forcing _one_ output channel"): + n = Function(returns_multiple, output_labels="its_a_tuple") + self.assertListEqual(n.outputs.labels, ["its_a_tuple"]) + + with self.subTest("Fail on multiple return values"): + with self.assertRaises(ValueError): + # Can't automatically parse output labels from a function with multiple + # return expressions + Function(multiple_branches) + + with self.subTest("Override output label scraping"): + switch = Function(multiple_branches, output_labels="bool") + self.assertListEqual(switch.outputs.labels, ["bool"]) + + def test_signals(self): + @function_node() + def linear(x): + return x + + @function_node() + def times_two(y): + return 2 * y + + l = linear(x=1) + t2 = times_two( + output_labels=["double"], + y=l.outputs.x + ) + self.assertIs( + t2.outputs.double.value, + NotData, + msg=f"Without updates, expected the output to be {NotData} but got " + f"{t2.outputs.double.value}" + ) + + # Nodes should _all_ have the run and ran signals + t2.signals.input.run = l.signals.output.ran + l.run() + self.assertEqual( + t2.outputs.double.value, 2, + msg="Running the upstream node should trigger a run here" + ) + + with self.subTest("Test syntactic sugar"): + t2.signals.input.run.disconnect_all() + l > t2 + self.assertIn( + l.signals.output.ran, + t2.signals.input.run.connections, + msg="> should be equivalent to run/ran connection" + ) + + t2.signals.input.run.disconnect_all() + l > t2.signals.input.run + self.assertIn( + l.signals.output.ran, + t2.signals.input.run.connections, + msg="> should allow us to mix and match nodes and signal channels" + ) + + t2.signals.input.run.disconnect_all() + l.signals.output.ran > t2 + self.assertIn( + l.signals.output.ran, + t2.signals.input.run.connections, + msg="Mixing and matching should work both directions" + ) + + t2.signals.input.run.disconnect_all() + l > t2 > l + self.assertTrue( + l.signals.input.run.connections[0] is t2.signals.output.ran + and t2.signals.input.run.connections[0] is l.signals.output.ran, + msg="> should allow chaining signal connections" + ) + + def test_statuses(self): + n = Function(plus_one) + self.assertTrue(n.ready) + self.assertFalse(n.running) + self.assertFalse(n.failed) + + # Can't really test "running" until we have a background executor, so fake a bit + n.running = True + with self.assertRaises(RuntimeError): + # Running nodes can't be run + n.run() + n.running = False + + n.inputs.x = "Can't be added together with an int" + with self.assertRaises(TypeError): + # The function error should get passed up + n.run() + self.assertFalse(n.ready) + # self.assertFalse(n.running) + self.assertTrue(n.failed) + + n.inputs.x = 1 + self.assertFalse( + n.ready, + msg="Should not be ready while it has failed status" + ) + + n.run() + self.assertTrue( + n.ready, + msg="A manual run() call bypasses checks, so readiness should reset" + ) + self.assertTrue(n.ready) + # self.assertFalse(n.running) + self.assertFalse(n.failed, msg="Re-running should reset failed status") + + def test_with_self(self): + def with_self(self, x: float) -> float: + # Note: Adding internal state to the node like this goes against the best + # practice of keeping nodes "functional". Following python's paradigm of + # giving users lots of power, we want to guarantee that this behaviour is + # _possible_. + # TODO: update this test with a better-conforming example of this power at + # a future date. + if hasattr(self, "some_counter"): + self.some_counter += 1 + else: + self.some_counter = 1 + return x + 0.1 + + node = Function(with_self, output_labels="output") + self.assertTrue( + "x" in node.inputs.labels, + msg=f"Expected to find function input 'x' in the node input but got " + f"{node.inputs.labels}" + ) + self.assertFalse( + "self" in node.inputs.labels, + msg="Expected 'self' to be filtered out of node input, but found it in the " + "input labels" + ) + node.inputs.x = 1 + node.run() + self.assertEqual( + node.outputs.output.value, + 1.1, + msg="Basic node functionality appears to have failed" + ) + self.assertEqual( + node.some_counter, + 1, + msg="Function functions should be able to modify attributes on the node object." + ) + + node.executor = Executor() + with self.assertRaises(NotImplementedError): + # Submitting node_functions that use self is still raising + # TypeError: cannot pickle '_thread.lock' object + # For now we just fail cleanly + node.run() + + def with_messed_self(x: float, self) -> float: + return x + 0.1 + + with warnings.catch_warnings(record=True) as warning_list: + node = Function(with_messed_self) + self.assertTrue("self" in node.inputs.labels) + + self.assertEqual(len(warning_list), 1) + + def test_call(self): + node = Function(no_default, output_labels="output") + + with self.subTest("Ensure desired failures occur"): + with self.assertRaises(ValueError): + # More input args than there are input channels + node(1, 2, 3) + + with self.assertRaises(ValueError): + # Using input as an arg _and_ a kwarg + node(1, y=2, x=3) + + with self.subTest("Make sure data updates work as planned"): + node(1, y=2) + self.assertEqual( + node.inputs.x.value, + 1, + msg="__call__ should accept args to update input" + ) + self.assertEqual( + node.inputs.y.value, + 2, + msg="__call__ should accept kwargs to update input" + ) + self.assertEqual( + node.outputs.output.value, 1 + 2 + 1, msg="__call__ should run things" + ) + + node(3) # Implicitly test partial update + self.assertEqual( + no_default(3, 2), + node.outputs.output.value, + msg="__call__ should allow updating only _some_ input before running" + ) + + with self.subTest("Check that bad kwargs don't stop good ones"): + with self.assertWarns(Warning): + original_label = node.label + node(4, label="won't get read", y=5, foobar="not a kwarg of any sort") + + self.assertEqual( + node.label, + original_label, + msg="You should only be able to update input on a call, that's " + "what the warning is for!" + ) + self.assertTupleEqual( + (node.inputs.x.value, node.inputs.y.value), + (4, 5), + msg="The warning should not prevent other data from being parsed" + ) + + with self.assertWarns(Warning): + # It's also fine if you just have a typo in your kwarg or whatever, + # there should just be a warning that the data didn't get updated + node(some_randome_kwaaaaarg="foo") + + def test_return_value(self): + node = Function(plus_one) + + with self.subTest("Run on main process"): + return_on_call = node(1) + self.assertEqual( + return_on_call, + plus_one(1), + msg="Run output should be returned on call" + ) + + node.inputs.x = 2 + return_on_explicit_run = node.run() + self.assertEqual( + return_on_explicit_run, + plus_one(2), + msg="On explicit run, the most recent input data should be used and the " + "result should be returned" + ) + + with self.subTest("Run on executor"): + node.executor = Executor() + + return_on_explicit_run = node.run() + self.assertIsInstance( + return_on_explicit_run, + Future, + msg="Running with an executor should return the future" + ) + with self.assertRaises(RuntimeError): + # The executor run should take a second + # So we can double check that attempting to run while already running + # raises an error + node.run() + node.future.result() # Wait for the remote execution to finish + + +@unittest.skipUnless(version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+") +class TestSingleValue(unittest.TestCase): + def test_instantiation(self): + node = SingleValue(no_default, 1, y=2, output_labels="output") + node.run() + self.assertEqual( + no_default(1, 2), + node.outputs.output.value, + msg="Single value node should allow function input by arg and kwarg" + ) + + with self.assertRaises(ValueError): + # Too many labels + SingleValue(plus_one, output_labels=["z", "excess_label"]) + + def test_item_and_attribute_access(self): + class Foo: + some_attribute = "exists" + connected = True # Overlaps with an attribute of the node + + def __getitem__(self, item): + if item == 0: + return True + else: + return False + + def returns_foo() -> Foo: + return Foo() + + svn = SingleValue(returns_foo, output_labels="foo") + svn.run() + + self.assertEqual( + svn.some_attribute, + "exists", + msg="Should fall back to looking on the single value" + ) + + self.assertEqual( + svn.connected, + False, + msg="Should return the _node_ attribute, not the single value attribute" + ) + + with self.assertRaises(AttributeError): + svn.doesnt_exists_anywhere + + self.assertEqual( + svn[0], + True, + msg="Should fall back to looking on the single value" + ) + + self.assertEqual( + svn["some other key"], + False, + msg="Should fall back to looking on the single value" + ) + + def test_repr(self): + with self.subTest("Filled data"): + svn = SingleValue(plus_one) + svn.run() + self.assertEqual( + svn.__repr__(), svn.outputs.y.value.__repr__(), + msg="SingleValueNodes should have their output as their representation" + ) + + with self.subTest("Not data"): + svn = SingleValue(no_default, output_labels="output") + self.assertIs(svn.outputs.output.value, NotData) + self.assertTrue( + svn.__repr__().endswith(NotData.__name__), + msg="When the output is still not data, the representation should " + "indicate this" + ) + + def test_str(self): + svn = SingleValue(plus_one) + svn.run() + self.assertTrue( + str(svn).endswith(str(svn.single_value)), + msg="SingleValueNodes should have their output as a string in their string " + "representation (e.g., perhaps with a reminder note that this is " + "actually still a Function and not just the value you're seeing.)" + ) + + def test_easy_output_connection(self): + svn = SingleValue(plus_one) + regular = Function(plus_one) + + regular.inputs.x = svn + + self.assertIn( + svn.outputs.y, regular.inputs.x.connections, + msg="SingleValueNodes should be able to make connections between their " + "output and another node's input by passing themselves" + ) + + svn > regular + svn.run() + self.assertEqual( + regular.outputs.y.value, 3, + msg="SingleValue connections should pass data just like usual; in this " + "case default->plus_one->plus_one = 1 + 1 +1 = 3" + ) + + at_instantiation = Function(plus_one, x=svn) + self.assertIn( + svn.outputs.y, at_instantiation.inputs.x.connections, + msg="The parsing of SingleValue output as a connection should also work" + "from assignment at instantiation" + ) + + def test_working_directory(self): + n_f = Function(plus_one) + self.assertTrue(n_f._working_directory is None) + self.assertIsInstance(n_f.working_directory, DirectoryObject) + self.assertTrue(str(n_f.working_directory.path).endswith(n_f.label)) + n_f.working_directory.delete() + + def test_disconnection(self): + n1 = Function(no_default, output_labels="out") + n2 = Function(no_default, output_labels="out") + n3 = Function(no_default, output_labels="out") + n4 = Function(plus_one) + + n3.inputs.x = n1.outputs.out + n3.inputs.y = n2.outputs.out + n4.inputs.x = n3.outputs.out + n2 > n3 > n4 + disconnected = n3.disconnect() + self.assertListEqual( + disconnected, + [ + # Inputs + (n3.inputs.x, n1.outputs.out), + (n3.inputs.y, n2.outputs.out), + # Outputs + (n3.outputs.out, n4.inputs.x), + # Signals (inputs, then output) + (n3.signals.input.run, n2.signals.output.ran), + (n3.signals.output.ran, n4.signals.input.run), + ], + msg="Expected to find pairs (starting with the node disconnect was called " + "on) of all broken connections among input, output, and signals." + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/test_io.py b/tests/unit/test_io.py new file mode 100644 index 00000000..af213043 --- /dev/null +++ b/tests/unit/test_io.py @@ -0,0 +1,181 @@ +from unittest import TestCase, skipUnless +from sys import version_info + +from pyiron_workflow.channels import ( + InputData, InputSignal, OutputData, OutputSignal +) +from pyiron_workflow.io import Inputs, Outputs, Signals + + +class DummyNode: + def __init__(self): + self.running = False + self.label = "node_label" + + def update(self): + pass + + +@skipUnless(version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+") +class TestDataIO(TestCase): + + @classmethod + def setUp(self) -> None: + node = DummyNode() + self.inputs = [ + InputData(label="x", node=node, default=0, type_hint=float), + InputData(label="y", node=node, default=1, type_hint=float) + ] + outputs = [ + OutputData(label="a", node=node, type_hint=float), + ] + + self.post_facto_output = OutputData(label="b", node=node, type_hint=float) + + self.input = Inputs(*self.inputs) + self.output = Outputs(*outputs) + + def test_access(self): + self.assertEqual(self.input.x, self.input["x"]) + + def test_assignment(self): + with self.assertRaises(TypeError): + self.input.foo = "not an input channel" + + with self.assertRaises(TypeError): + # Right label, and a channel, but wrong type of channel + self.input.b = self.post_facto_output + + with self.subTest("Successful channel assignment"): + self.output.b = self.post_facto_output + + with self.subTest("Can assign to a key that is not the label"): + label_before_assignment = self.post_facto_output.label + self.output.not_this_channels_name = self.post_facto_output + self.assertIs( + self.output.not_this_channels_name, + self.post_facto_output, + msg="Expected channel to get assigned" + ) + self.assertEqual( + self.post_facto_output.label, + label_before_assignment, + msg="Labels should not get updated on assignment of channels to IO " + "collections" + ) + + def test_connection(self): + self.input.x = self.input.y + self.assertEqual( + 0, + len(self.input.x.connections), + msg="Shouldn't be allowed to connect two inputs, but only passes warning" + ) + + self.input.x = self.output.a + self.assertIn( + self.input.x, + self.output.a.connections, + msg="Should be able to create connections by assignment" + ) + + self.input.x = 7 + self.assertEqual(self.input.x.value, 7) + + self.input.y = self.output.a + disconnected = self.input.disconnect() + self.assertListEqual( + disconnected, + [ + (self.input.x, self.output.a), + (self.input.y, self.output.a) + ], + msg="Disconnecting the panel should disconnect all children" + ) + + def test_conversion(self): + converted = self.input.to_value_dict() + for template in self.inputs: + self.assertEqual(template.default, converted[template.label]) + self.assertEqual( + len(self.inputs), + len(converted), + msg="And it shouldn't have any extra items either" + ) + + def test_iteration(self): + self.assertTrue(all([c.label in self.input.labels for c in self.input])) + + def test_connections_property(self): + self.assertEqual( + len(self.input.connections), + 0, + msg="Sanity check expectations about self.input" + ) + self.assertEqual( + len(self.output.connections), + 0, + msg="Sanity check expectations about self.input" + ) + + for inp in self.input: + inp.connect(self.output.a) + + self.assertEqual( + len(self.output.connections), + len(self.input), + msg="Expected to find all the channels in the input" + ) + self.assertEqual( + len(self.input.connections), + 1, + msg="Each unique connection should appear only once" + ) + self.assertIs( + self.input.connections[0], + self.input.x.connections[0], + msg="The IO connection found should be the same object as the channel " + "connection" + ) + +@skipUnless(version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+") +class TestDataIO(TestCase): + def setUp(self) -> None: + node = DummyNode() + + def do_nothing(): + pass + + signals = Signals() + signals.input.run = InputSignal("run", node, do_nothing) + signals.input.foo = InputSignal("foo", node, do_nothing) + signals.output.ran = OutputSignal("ran", node) + signals.output.bar = OutputSignal("bar", node) + + signals.output.ran > signals.input.run + signals.output.ran > signals.input.foo + signals.output.bar > signals.input.run + signals.output.bar > signals.input.foo + + self.signals = signals + + def test_disconnect(self): + self.assertEqual( + 4, + len(self.signals.disconnect()), + msg="Disconnect should disconnect all on panels and the Signals super-panel" + ) + + def test_disconnect_run(self): + self.assertEqual( + 2, + len(self.signals.disconnect_run()), + msg="Should disconnect exactly everything connected to run" + ) + + no_run_signals = Signals() + self.assertEqual( + 0, + len(no_run_signals.disconnect_run()), + msg="If there is no run channel, the list of disconnections should be empty" + ) diff --git a/tests/unit/test_macro.py b/tests/unit/test_macro.py new file mode 100644 index 00000000..c928d4be --- /dev/null +++ b/tests/unit/test_macro.py @@ -0,0 +1,242 @@ +from functools import partialmethod +import unittest +from sys import version_info + +from pyiron_workflow.channels import NotData +from pyiron_workflow.function import SingleValue +from pyiron_workflow.macro import Macro + + +def add_one(x): + result = x + 1 + return result + + +def add_three_macro(macro): + macro.one = SingleValue(add_one) + SingleValue(add_one, macro.one, label="two", parent=macro) + macro.add(SingleValue(add_one, macro.two, label="three")) + # Cover a handful of addition methods, + # although these are more thoroughly tested in Workflow tests + + +@unittest.skipUnless(version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+") +class TestMacro(unittest.TestCase): + + def test_labels(self): + m = Macro(add_three_macro) + self.assertEqual( + m.label, + add_three_macro.__name__, + msg="Label should be automatically generated" + ) + label = "custom_name" + m2 = Macro(add_three_macro, label=label) + self.assertEqual(m2.label, label, msg="Should be able to specify a label") + + def test_wrapper_function(self): + m = Macro(add_three_macro) + + self.assertIs( + m.outputs.three__result.value, + NotData, + msg="Output should be accessible with the usual naming convention, but we " + "have not run yet so there shouldn't be any data" + ) + + input_x = 1 + expected_value = add_one(add_one(add_one(input_x))) + out = m(one__x=input_x) # Take kwargs to set input at runtime + + self.assertEqual( + out.three__result, + expected_value, + msg="Macros should return the output, just like other nodes" + ) + self.assertEqual( + m.outputs.three__result.value, + expected_value, + msg="Macros should get output updated, just like other nodes" + ) + + def test_subclass(self): + class MyMacro(Macro): + def build_graph(self): + add_three_macro(self) + + __init__ = partialmethod( + Macro.__init__, + build_graph, + ) + + x = 0 + m = MyMacro(one__x=x) + m.run() + self.assertEqual( + m.outputs.three__result.value, + add_one(add_one(add_one(x))), + msg="Subclasses should be able to simply override the graph_creator arg" + ) + + def test_key_map(self): + m = Macro( + add_three_macro, + inputs_map={"one__x": "my_input"}, + outputs_map={ + "three__result": "my_output", + "two__result": "intermediate" + }, + ) + self.assertSetEqual( + set(m.inputs.labels), + set(("my_input",)), + msg="Input should be relabelled, but not added to or taken away from" + ) + self.assertSetEqual( + set(m.outputs.labels), + set(("my_output", "intermediate")), + msg="Output should be relabelled and expanded" + ) + + with self.subTest("Make new names can be used as usual"): + x = 0 + out = m(my_input=x) + self.assertEqual( + out.my_output, + add_one(add_one(add_one(x))), + msg="Expected output but relabeled should be accessible" + ) + self.assertEqual( + out.intermediate, + add_one(add_one(x)), + msg="New, internally connected output that was specifically requested " + "should be accessible" + ) + + with self.subTest("IO can be disabled"): + m = Macro( + add_three_macro, + inputs_map={"one__x": None}, + outputs_map={"three__result": None}, + ) + self.assertEqual( + len(m.inputs.labels), + 0, + msg="Only inputs should have been disabled" + ) + self.assertEqual( + len(m.outputs.labels), + 0, + msg="Only outputs should have been disabled" + ) + + def test_nesting(self): + def nested_macro(macro): + macro.a = SingleValue(add_one) + macro.b = Macro( + add_three_macro, + one__x=macro.a, + outputs_map={"two__result": "intermediate_result"} + ) + macro.c = Macro( + add_three_macro, + one__x=macro.b.outputs.three__result, + outputs_map={"two__result": "intermediate_result"} + ) + macro.d = SingleValue( + add_one, + x=macro.c.outputs.three__result, + ) + macro.a > macro.b > macro.c > macro.d + macro.starting_nodes = [macro.a] + # This definition of the execution graph is not strictly necessary in this + # simple DAG case; we just do it to make sure nesting definied/automatic + # macros works ok + macro.outputs_map = {"b__intermediate_result": "deep_output"} + + m = Macro(nested_macro) + self.assertEqual(m(a__x=0).d__result, 8) + + m2 = Macro(nested_macro) + + with self.subTest("Test Node.get_parent_proximate_to"): + self.assertIs( + m.b, + m.b.two.get_parent_proximate_to(m), + msg="Should return parent closest to the passed composite" + ) + + self.assertIsNone( + m.b.two.get_parent_proximate_to(m2), + msg="Should return None when composite is not in parentage" + ) + + with self.subTest("Test Node.get_first_shared_parent"): + self.assertIs( + m.b, + m.b.two.get_first_shared_parent(m.b.three), + msg="Should get the parent when parents are the same" + ) + self.assertIs( + m, + m.b.two.get_first_shared_parent(m.c.two), + msg="Should find first matching object in parentage" + ) + self.assertIs( + m, + m.b.two.get_first_shared_parent(m.d), + msg="Should work when depth is not equal" + ) + self.assertIsNone( + m.b.two.get_first_shared_parent(m2.b.two), + msg="Should return None when no shared parent exists" + ) + self.assertIsNone( + m.get_first_shared_parent(m.b), + msg="Should return None when parent is None" + ) + + def test_execution_automation(self): + fully_automatic = add_three_macro + + def fully_defined(macro): + add_three_macro(macro) + macro.one > macro.two > macro.three + macro.starting_nodes = [macro.one] + + def only_order(macro): + add_three_macro(macro) + macro.two > macro.three + + def only_starting(macro): + add_three_macro(macro) + macro.starting_nodes = [macro.one] + + m_auto = Macro(fully_automatic) + m_user = Macro(fully_defined) + + x = 0 + expected = add_one(add_one(add_one(x))) + self.assertEqual( + m_auto(one__x=x).three__result, + expected, + "DAG macros should run fine without user specification of execution." + ) + self.assertEqual( + m_user(one__x=x).three__result, + expected, + "Macros should run fine if the user nicely specifies the exeuction graph." + ) + + with self.subTest("Partially specified execution should fail"): + # We don't yet check for _crappy_ user-defined execution, + # But we should make sure it's at least valid in principle + with self.assertRaises(ValueError): + Macro(only_order) + + with self.assertRaises(ValueError): + Macro(only_starting) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/test_node_package.py b/tests/unit/test_node_package.py new file mode 100644 index 00000000..ae575e34 --- /dev/null +++ b/tests/unit/test_node_package.py @@ -0,0 +1,68 @@ +from unittest import TestCase, skipUnless +from sys import version_info + +from pyiron_workflow.node_package import NodePackage +from pyiron_workflow.function import function_node + + +@function_node() +def dummy(x: int = 0): + return x + + +@skipUnless(version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+") +class TestNodePackage(TestCase): + def setUp(self) -> None: + self.package = NodePackage(dummy) + + def test_init(self): + self.assertTrue( + hasattr(self.package, dummy.__name__), + msg="Classes should be added at instantiation" + ) + + def test_access(self): + node = self.package.Dummy() + self.assertIsInstance(node, dummy) + + def test_update(self): + with self.assertRaises(KeyError): + self.package.Dummy = "This is already a node class name" + + with self.assertRaises(KeyError): + self.package.update = "This is already a method" + + with self.assertRaises(TypeError): + self.package.available_name = "But we can still only assign node classes" + + @function_node("y") + def add(x: int = 0): + return x + 1 + + self.package.node_class_and_free_key = add # Should work! + + with self.assertRaises(KeyError): + # This is already occupied by another node class + self.package.Dummy = add + + old_dummy_instance = self.package.Dummy(label="old_dummy_instance") + + @function_node() + def dummy(x: int = 0): + y = x + 1 + return y + + self.package.update(dummy) + + self.assertEqual(len(self.package), 2, msg="Update should replace, not extend") + + new_dummy_instance = self.package.Dummy(label="new_dummy_instance") + + old_dummy_instance.run() + new_dummy_instance.run() + self.assertEqual( + old_dummy_instance.outputs.x.value, 0, msg="Should have old functionality" + ) + self.assertEqual( + new_dummy_instance.outputs.y.value, 1, msg="Should have new functionality" + ) diff --git a/tests/unit/test_output_parser.py b/tests/unit/test_output_parser.py new file mode 100644 index 00000000..c92c669b --- /dev/null +++ b/tests/unit/test_output_parser.py @@ -0,0 +1,90 @@ +from sys import version_info +import unittest + +import numpy as np + +from pyiron_workflow.output_parser import ParseOutput + + +@unittest.skipUnless( + version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+" +) +class TestParseOutput(unittest.TestCase): + def test_parsing(self): + with self.subTest("Single return"): + def identity(x): + return x + self.assertListEqual(ParseOutput(identity).output, ["x"]) + + with self.subTest("Expression return"): + def add(x, y): + return x + y + self.assertListEqual(ParseOutput(add).output, ["x + y"]) + + with self.subTest("Weird whitespace"): + def add_with_whitespace(x, y): + return x + y + self.assertListEqual(ParseOutput(add_with_whitespace).output, ["x + y"]) + + with self.subTest("Multiple expressions"): + def add_and_subtract(x, y): + return x + y, x - y + self.assertListEqual( + ParseOutput(add_and_subtract).output, + ["x + y", "x - y"] + ) + + with self.subTest("Best-practice (well-named return vars)"): + def md(job): + temperature = job.output.temperature + energy = job.output.energy + return temperature, energy + self.assertListEqual(ParseOutput(md).output, ["temperature", "energy"]) + + with self.subTest("Function call returns"): + def function_return(i, j): + return ( + np.arange( + i, dtype=int + ), + np.shape(i, j) + ) + self.assertListEqual( + ParseOutput(function_return).output, + ["np.arange( i, dtype=int )", "np.shape(i, j)"] + ) + + with self.subTest("Methods too"): + class Foo: + def add(self, x, y): + return x + y + self.assertListEqual(ParseOutput(Foo.add).output, ["x + y"]) + + def test_void(self): + with self.subTest("No return"): + def no_return(): + pass + self.assertIsNone(ParseOutput(no_return).output) + + with self.subTest("Empty return"): + def empty_return(): + return + self.assertIsNone(ParseOutput(empty_return).output) + + with self.subTest("Return None explicitly"): + def none_return(): + return None + self.assertIsNone(ParseOutput(none_return).output) + + def test_multiple_branches(self): + def bifurcating(x): + if x > 5: + return True + else: + return False + with self.assertRaises(ValueError): + ParseOutput(bifurcating) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/test_tests.py b/tests/unit/test_tests.py deleted file mode 100644 index 2d9f5a8f..00000000 --- a/tests/unit/test_tests.py +++ /dev/null @@ -1,9 +0,0 @@ -import unittest -import pyiron_module_template - - -class TestVersion(unittest.TestCase): - def test_version(self): - version = pyiron_module_template.__version__ - print(version) - self.assertTrue(version.startswith('0')) diff --git a/tests/unit/test_type_hinting.py b/tests/unit/test_type_hinting.py new file mode 100644 index 00000000..651247c2 --- /dev/null +++ b/tests/unit/test_type_hinting.py @@ -0,0 +1,82 @@ +import typing +from unittest import TestCase, skipUnless +from sys import version_info + +from pyiron_workflow.type_hinting import ( + type_hint_is_as_or_more_specific_than, valid_value +) + + +@skipUnless(version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+") +class TestTypeHinting(TestCase): + def test_value_validation(self): + class Foo: + pass + + class Bar: + def __call__(self): + return None + + for hint, good, bad in ( + (int | float, 1, "foo"), + (typing.Union[int, float], 2.0, "bar"), + (typing.Literal[1, 2], 2, 3), + (typing.Literal[1, 2], 1, "baz"), + (Foo, Foo(), Foo), + (typing.Type[Bar], Bar, Bar()), + # (callable, Bar(), Foo()), # Misses the bad! + # Can't hint args and returns without typing.Callable anyhow, so that's + # what people should be using regardless + (typing.Callable, Bar(), Foo()), + (tuple[int, float], (1, 1.1), ("fo", 0)), + (dict[str, int], {'a': 1}, {'a': 'b'}), + ): + with self.subTest(msg=f"Good {good} vs hint {hint}"): + self.assertTrue(valid_value(good, hint)) + with self.subTest(msg=f"Bad {bad} vs hint {hint}"): + self.assertFalse(valid_value(bad, hint)) + + def test_hint_comparisons(self): + # Standard types and typing types should be interoperable + # tuple, dict, and typing.Callable care about the exact matching of args + # Everyone else just needs to have args be a subset (e.g. typing.Literal) + + for target, reference, is_more_specific in [ + (int, int | float, True), + (int | float, int, False), + (typing.Literal[1, 2], typing.Literal[1, 2, 3], True), + (typing.Literal[1, 2, 3], typing.Literal[1, 2], False), + (tuple[str, int], typing.Tuple[str, int], True), + (typing.Tuple[int, str], tuple[str, int], False), + (tuple[str, int], typing.Tuple[str, int | float], True), + (typing.Tuple[str, int | float], tuple[str, int], False), + (tuple[str, int], typing.Tuple, True), + (tuple[str, int], tuple[str, int, float], False), + (list[int], typing.List[int], True), + (typing.List, list[int], False), + (dict[str, int], typing.Dict[str, int], True), + (dict[int, str], typing.Dict[str, int], False), + (typing.Callable[[int, float], None], typing.Callable, True), + ( + typing.Callable[[int, float], None], + typing.Callable[[float, int], None], + False + ), + ( + typing.Callable[[int, float], float], + typing.Callable[[int, float], float | str], + True + ), + ( + typing.Callable[[int, float, str], float], + typing.Callable[[int, float], float], + False + ), + ]: + with self.subTest( + target=target, reference=reference, expected=is_more_specific + ): + self.assertEqual( + type_hint_is_as_or_more_specific_than(target, reference), + is_more_specific + ) \ No newline at end of file diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py new file mode 100644 index 00000000..f25df9f8 --- /dev/null +++ b/tests/unit/test_util.py @@ -0,0 +1,14 @@ +from unittest import TestCase, skipUnless +from sys import version_info + +import pyiron_workflow.util as util + + +@skipUnless(version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+") +class TestUtil(TestCase): + def test_dot_dict(self): + dd = util.DotDict({'foo': 42}) + + self.assertEqual(dd['foo'], dd.foo, msg="Dot access should be equivalent.") + dd.bar = "towel" + self.assertEqual("towel", dd["bar"], msg="Dot assignment should be equivalent.") \ No newline at end of file diff --git a/tests/unit/test_workflow.py b/tests/unit/test_workflow.py new file mode 100644 index 00000000..a169989b --- /dev/null +++ b/tests/unit/test_workflow.py @@ -0,0 +1,391 @@ +import unittest +from sys import version_info +from time import sleep + +from bidict import ValueDuplicationError + +from pyiron_workflow.channels import NotData +from pyiron_workflow.files import DirectoryObject +from pyiron_workflow.util import DotDict +from pyiron_workflow.workflow import Workflow + + +def plus_one(x=0): + y = x + 1 + return y + + +@unittest.skipUnless(version_info[0] == 3 and version_info[1] >= 10, "Only supported for 3.10+") +class TestWorkflow(unittest.TestCase): + + def test_node_addition(self): + wf = Workflow("my_workflow") + + # Validate the four ways to add a node + wf.add(Workflow.create.Function(plus_one, label="foo")) + wf.create.Function(plus_one, label="bar") + wf.baz = wf.create.Function(plus_one, label="whatever_baz_gets_used") + Workflow.create.Function(plus_one, label="qux", parent=wf) + self.assertListEqual(list(wf.nodes.keys()), ["foo", "bar", "baz", "qux"]) + wf.boa = wf.qux + self.assertListEqual( + list(wf.nodes.keys()), + ["foo", "bar", "baz", "boa"], + msg="Reassignment should remove the original instance" + ) + + wf.strict_naming = False + # Validate name incrementation + wf.add(Workflow.create.Function(plus_one, label="foo")) + wf.create.Function(plus_one, label="bar") + wf.baz = wf.create.Function( + plus_one, + label="without_strict_you_can_override_by_assignment" + ) + Workflow.create.Function(plus_one, label="boa", parent=wf) + self.assertListEqual( + list(wf.nodes.keys()), + [ + "foo", "bar", "baz", "boa", + "foo0", "bar0", "baz0", "boa0", + ] + ) + + with self.subTest("Make sure strict naming causes a bunch of attribute errors"): + wf.strict_naming = True + # Validate name preservation + with self.assertRaises(AttributeError): + wf.add(wf.create.Function(plus_one, label="foo")) + + with self.assertRaises(AttributeError): + wf.create.Function(plus_one, label="bar") + + with self.assertRaises(AttributeError): + wf.baz = wf.create.Function(plus_one, label="whatever_baz_gets_used") + + with self.assertRaises(AttributeError): + Workflow.create.Function(plus_one, label="boa", parent=wf) + + def test_node_packages(self): + wf = Workflow("my_workflow") + + # Test invocation + wf.create.atomistics.Bulk(cubic=True, element="Al") + # Test invocation with attribute assignment + wf.engine = wf.create.atomistics.Lammps(structure=wf.bulk) + + self.assertSetEqual( + set(wf.nodes.keys()), + set(["bulk", "engine"]), + msg=f"Expected one node label generated automatically from the class and " + f"the other from the attribute assignment, but got {wf.nodes.keys()}" + ) + + def test_double_workfloage_and_node_removal(self): + wf1 = Workflow("one") + wf1.create.Function(plus_one, label="node1") + node2 = Workflow.create.Function( + plus_one, label="node2", parent=wf1, x=wf1.node1.outputs.y + ) + self.assertTrue(node2.connected) + + wf2 = Workflow("two") + with self.assertRaises(ValueError): + # Can't belong to two workflows at once + wf2.add(node2) + wf1.remove(node2) + wf2.add(node2) + self.assertEqual(node2.parent, wf2) + self.assertFalse(node2.connected) + + def test_workflow_io(self): + wf = Workflow("wf") + wf.create.Function(plus_one, label="n1") + wf.create.Function(plus_one, label="n2") + wf.create.Function(plus_one, label="n3") + + with self.subTest("Workflow IO should be drawn from its nodes"): + self.assertEqual(len(wf.inputs), 3) + self.assertEqual(len(wf.outputs), 3) + + wf.n3.inputs.x = wf.n2.outputs.y + wf.n2.inputs.x = wf.n1.outputs.y + + with self.subTest("Only unconnected channels should count"): + self.assertEqual(len(wf.inputs), 1) + self.assertEqual(len(wf.outputs), 1) + + with self.subTest( + "IO should be re-mappable, including exposing internally connected " + "channels" + ): + wf.inputs_map = {"n1__x": "inp"} + wf.outputs_map = {"n3__y": "out", "n2__y": "intermediate"} + out = wf(inp=0) + self.assertEqual(out.out, 3) + self.assertEqual(out.intermediate, 2) + + def test_node_decorator_access(self): + @Workflow.wrap_as.function_node("y") + def plus_one(x: int = 0) -> int: + return x + 1 + + self.assertEqual(plus_one().run(), 1) + + def test_working_directory(self): + wf = Workflow("wf") + self.assertTrue(wf._working_directory is None) + self.assertIsInstance(wf.working_directory, DirectoryObject) + self.assertTrue(str(wf.working_directory.path).endswith(wf.label)) + wf.create.Function(plus_one) + self.assertTrue( + str(wf.plus_one.working_directory.path).endswith(wf.plus_one.label) + ) + wf.working_directory.delete() + + def test_no_parents(self): + wf = Workflow("wf") + wf2 = Workflow("wf2") + wf2.parent = None # Is already the value and should ignore this + with self.assertRaises(TypeError): + # We currently specify workflows shouldn't get parents, this just verifies + # the spec. If that spec changes, test instead that you _can_ set parents! + wf2.parent = "not None" + + with self.assertRaises(TypeError): + # Setting a non-None value to parent raises the type error from the setter + wf2.parent = wf + + def test_executor(self): + wf = Workflow("wf") + with self.assertRaises(NotImplementedError): + # Submitting callables that use self is still raising + # TypeError: cannot pickle '_thread.lock' object + # For now we just fail cleanly + wf.executor = "literally anything other than None should raise the error" + + def test_parallel_execution(self): + wf = Workflow("wf") + + @Workflow.wrap_as.single_value_node() + def five(sleep_time=0.): + sleep(sleep_time) + five = 5 + return five + + @Workflow.wrap_as.single_value_node("sum") + def sum(a, b): + return a + b + + wf.slow = five(sleep_time=1) + wf.fast = five() + wf.sum = sum(a=wf.fast, b=wf.slow) + + wf.slow.executor = wf.create.Executor() + + wf.slow.run() + wf.fast.run() + self.assertTrue( + wf.slow.running, + msg="The slow node should still be running" + ) + self.assertEqual( + wf.fast.outputs.five.value, + 5, + msg="The slow node should not prohibit the completion of the fast node" + ) + self.assertEqual( + wf.sum.outputs.sum.value, + NotData, + msg="The slow node _should_ hold up the downstream node to which it inputs" + ) + + while wf.slow.future.running(): + sleep(0.1) + + wf.sum.run() + self.assertEqual( + wf.sum.outputs.sum.value, + 5 + 5, + msg="After the slow node completes, its output should be updated as a " + "callback, and downstream nodes should proceed" + ) + + def test_call(self): + wf = Workflow("wf") + + wf.a = wf.create.SingleValue(plus_one) + wf.b = wf.create.SingleValue(plus_one) + + @Workflow.wrap_as.single_value_node("sum") + def sum_(a, b): + return a + b + + wf.sum = sum_(wf.a, wf.b) + wf.run() + self.assertEqual( + wf.a.outputs.y.value + wf.b.outputs.y.value, + wf.sum.outputs.sum.value, + msg="Sanity check" + ) + wf(a__x=42, b__x=42) + self.assertEqual( + plus_one(42) + plus_one(42), + wf.sum.outputs.sum.value, + msg="Workflow should accept input channel kwargs and update inputs " + "accordingly" + # Since the nodes run automatically, there is no need for wf.run() here + ) + + with self.assertRaises(TypeError): + # IO is not ordered, so args make no sense for a workflow call + # We _must_ use kwargs + wf(42, 42) + + def test_return_value(self): + wf = Workflow("wf") + wf.a = wf.create.SingleValue(plus_one) + wf.b = wf.create.SingleValue(plus_one, x=wf.a) + + with self.subTest("Run on main process"): + return_on_call = wf(a__x=1) + self.assertEqual( + return_on_call, + DotDict({"b__y": 1 + 2}), + msg="Run output should be returned on call. Expecting a DotDict of " + "output values" + ) + + wf.inputs.a__x = 2 + return_on_explicit_run = wf.run() + self.assertEqual( + return_on_explicit_run["b__y"], + 2 + 2, + msg="On explicit run, the most recent input data should be used and the " + "result should be returned" + ) + + # Note: We don't need to test running on an executor, because Workflows can't + # do that yet + + def test_execution_automation(self): + @Workflow.wrap_as.single_value_node("out") + def foo(x, y): + return x + y + + def make_workflow(): + wf = Workflow("dag") + wf.n1l = foo(0, 1) + wf.n1r = foo(2, 0) + wf.n2l = foo(-10, wf.n1l) + wf.n2m = foo(wf.n1l, wf.n1r) + wf.n2r = foo(wf.n1r, 10) + return wf + + def matches_expectations(results): + expected = {'n2l__out': -9, 'n2m__out': 3, 'n2r__out': 12} + return all(expected[k] == v for k, v in results.items()) + + auto = make_workflow() + self.assertTrue( + matches_expectations(auto()), + msg="DAGs should run automatically" + ) + + user = make_workflow() + user.automate_execution = False + user.n1l > user.n1r > user.n2l + user.n1r > user.n2m + user.n1r > user.n2r + user.starting_nodes = [user.n1l] + self.assertTrue( + matches_expectations(user()), + msg="Users shoudl be allowed to ask to run things manually" + ) + + self.assertIn( + user.n1r.signals.output.ran, + user.n2r.signals.input.run.connections, + msg="Expected execution signals as manually defined" + ) + user.automate_execution = True + self.assertTrue( + matches_expectations(user()), + msg="Users should be able to switch back to automatic execution" + ) + self.assertNotIn( + user.n1r.signals.output.ran, + user.n2r.signals.input.run.connections, + msg="Expected old execution signals to be overwritten" + ) + self.assertIn( + user.n2m.signals.output.ran, + user.n2r.signals.input.run.connections, + msg="At time of writing tests, automation makes a linear execution flow " + "based on node topology and initialized by the order of appearance in " + "the nodes list, so for a simple DAG like this the final node should " + "be getting triggered by the penultimate node." + "If this test failed, maybe you've written more sophisticated " + "automation." + ) + + with self.subTest("Make sure automated cyclic graphs throw an error"): + trivially_cyclic = make_workflow() + trivially_cyclic.n1l.inputs.y = trivially_cyclic.n1l + with self.assertRaises(ValueError): + trivially_cyclic() + + cyclic = make_workflow() + cyclic.n1l.inputs.y = cyclic.n2l + with self.assertRaises(ValueError): + cyclic() + + def test_io_label_maps_are_bijective(self): + + with self.subTest("Null case"): + Workflow( + "my_workflow", + Workflow.create.Function(plus_one, label="foo1"), + Workflow.create.Function(plus_one, label="foo2"), + inputs_map={ + "foo1__x": "x1", + "foo2__x": "x2" + }, + outputs_map=None + ) + + with self.subTest("At instantiation"): + with self.assertRaises(ValueDuplicationError): + Workflow( + "my_workflow", + Workflow.create.Function(plus_one, label="foo1"), + Workflow.create.Function(plus_one, label="foo2"), + inputs_map={ + "foo1__x": "x", + "foo2__x": "x" + } + ) + + with self.subTest("Post-facto assignment"): + wf = Workflow( + "my_workflow", + Workflow.create.Function(plus_one, label="foo1"), + Workflow.create.Function(plus_one, label="foo2"), + ) + wf.outputs_map = None + with self.assertRaises(ValueDuplicationError): + wf.inputs_map = {"foo1__x": "x", "foo2__x": "x"} + + with self.subTest("Post-facto update"): + wf = Workflow( + "my_workflow", + Workflow.create.Function(plus_one, label="foo1"), + Workflow.create.Function(plus_one, label="foo2"), + ) + wf.inputs_map = {"foo1__x": "x1", "foo2__x": "x2"} + with self.assertRaises(ValueDuplicationError): + wf.inputs_map["foo2__x"] = "x1" + + +if __name__ == '__main__': + unittest.main() diff --git a/update_module_name.sh b/update_module_name.sh deleted file mode 100644 index 4255b499..00000000 --- a/update_module_name.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# Mac users: You [may first need to install gnu-sed](https://github.com/MigleSur/GenAPI/issues/8) - -module_name="pyiron_IntendedModuleName" -rst_delimit="=========================" # This should be as many '=' as the name length. - -for file in .binder/postBuild \ - .github/ISSUE_TEMPLATE/*.md \ - docs/conf.py \ - docs/index.rst \ - notebooks/version.ipynb \ - tests/unit/test_tests.py \ - .coveragerc \ - .gitattributes \ - MANIFEST.in \ - setup.cfg \ - setup.py -do - sed -i "s/pyiron_module_template/${module_name}/g" ${file} - sed -i "s/======================/${rst_delimit}/g" ${file} -done - - -mv pyiron_module_template ${module_name} - -python -m versioneer setup - -rm update_module_name.sh