Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing out external scripts #99

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions cupid/ploomber.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
from __future__ import annotations

import os
import subprocess
import sys
import tempfile
from pathlib import Path
from subprocess import PIPE

from ploomber.tasks import ScriptRunner


def _python_bin():
"""
Get the path to the Python executable, return 'python' if unable to get it
"""
executable = sys.executable
return executable if executable else "python"


def _run_script_in_subprocess(interpreter, path, cwd, env):
if isinstance(interpreter, str):
res = subprocess.run([interpreter, str(path)], cwd=cwd, env=env, stderr=PIPE)
else:
res = subprocess.run(interpreter + [str(path)], cwd=cwd, env=env, stderr=PIPE)
if res.returncode:
stderr = res.stderr.decode()

if "SyntaxError" in stderr:
stderr += (
"(Note: IPython magics are not supported in "
"ScriptRunner, remove them or use the regular "
"NotebookRunner)"
)

raise RuntimeError("Error while executing ScriptRunner:\n" f"{stderr}")


class CUPiDScriptRunner(ScriptRunner):
"""
Similar to NotebookRunner, except it uses python to run the code,
instead of papermill, hence, it doesn't generate an output notebook. But
it also works by injecting a cell into the source code. Source can be
a ``.py`` script or an ``.ipynb`` notebook. **Does not support magics.**

Parameters
----------
source: str or pathlib.Path
Script source, if str, the content is interpreted as the actual
script, if pathlib.Path, the content of the file is loaded. When
loading from a str, ext_in must be passed
product: ploomber.File
The output file
dag: ploomber.DAG
A DAG to add this task to
name: str, optional
A str to indentify this task. Should not already exist in the dag
params: dict, optional
Script parameters. This are passed as the "parameters" argument
to the papermill.execute_notebook function, by default, "product"
and "upstream" are included
ext_in: str, optional
Source extension. Required if loading from a str. If source is a
``pathlib.Path``, the extension from the file is used.
static_analysis : ('disabled', 'regular', 'strict'), default='regular'
Check for various errors in the script. In 'regular' mode, it aborts
execution if the notebook has syntax issues, or similar problems that
would cause the code to break if executed. In 'strict' mode, it
performs the same checks but raises an issue before starting execution
of any task, furthermore, it verifies that the parameters cell and
the params passed to the notebook match, thus, making the script
behave like a function with a signature.
local_execution : bool, optional
Change working directory to be the parent of the script source.
Defaults to False.

Examples
--------

Spec API:

.. code-block:: yaml
:class: text-editor
:name: pipeline-yaml

tasks:
- source: script.py
class: ScriptRunner
product:
data: data.csv
another: another.csv

Python API:

>>> from pathlib import Path
>>> from ploomber import DAG
>>> from ploomber.tasks import ScriptRunner
>>> from ploomber.products import File
>>> dag = DAG()
>>> product = {'data': File('data.csv'), 'another': File('another.csv')}
>>> _ = ScriptRunner(Path('script.py'), product, dag=dag)
>>> _ = dag.build()
"""

def __init__(
self,
source,
product,
dag,
kernelspec_name=None,
name=None,
params=None,
ext_in=None,
static_analysis="regular",
local_execution=False,
):
self.kernelspec_name = kernelspec_name
self.ext_in = ext_in

kwargs = dict(hot_reload=dag._params.hot_reload)
self._source = ScriptRunner._init_source(
source,
kwargs,
ext_in,
static_analysis,
False,
False,
)
self.local_execution = local_execution
super(ScriptRunner, self).__init__(product, dag, name, params)

def run(self):
# regular mode: raise but not check signature
# strict mode: called at render time
if self.static_analysis == "regular":
self.source._check_notebook(raise_=True, check_signature=False)

fd, tmp = tempfile.mkstemp(".py")
os.close(fd)

code = "\n\n".join(
[
c["source"]
for c in self.source.nb_obj_rendered.cells
if c["cell_type"] == "code"
],
)

cwd = str(self.source.loc.parent.resolve())
orig_env = os.environ.copy()

if "PYTHONPATH" not in orig_env:
orig_env["PYTHONPATH"] = cwd
else:
orig_env["PYTHONPATH"] += os.pathsep + cwd

tmp = Path(tmp)
tmp.write_text(code)

if self.source.language == "python":
interpreter = _python_bin()
if self.kernelspec_name:
interpreter = f"conda run -n {self.kernelspec_name} python".split()
elif self.source.language == "r":
interpreter = "Rscript"
else:
raise ValueError("ScriptRunner only works with Python and R scripts")

try:
_run_script_in_subprocess(interpreter, tmp, cwd, orig_env)
except Exception as e:
raise RuntimeError(
"Error when executing task" f" {self.name!r}.",
) from e # should be TaskBuildError
finally:
tmp.unlink()
5 changes: 4 additions & 1 deletion cupid/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
from jinja2 import Template
from papermill.engines import NBClientEngine

from cupid.ploomber import CUPiDScriptRunner


class MarkdownJinjaEngine(NBClientEngine):
"""Class for using the Jinja Engine to run notebooks"""
Expand Down Expand Up @@ -272,11 +274,12 @@ def create_ploomber_script_task(
if cat_path is not None:
parms_in["path_to_cat"] = cat_path

task = ploomber.tasks.ScriptRunner(
task = CUPiDScriptRunner(
Path(input_path),
ploomber.products.File(info["product"]),
dag,
params=parms_in,
kernelspec_name=info["kernel_name"],
name=output_name,
)

Expand Down
163 changes: 163 additions & 0 deletions examples/coupled_model/config2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@

################## SETUP ##################

################
# Data Sources #
################
data_sources:
# sname is any string used as a nickname for this configuration. It will be
### used as the name of the folder your computed notebooks are put in
sname: quick-run2

# run_dir is the path to the folder you want
### all the files associated with this configuration
### to be created in
run_dir: .

# nb_path_root is the path to the folder that cupid will
### look for your template notebooks in. It doesn't have to
### be inside run_dir, or be specific to this project, as
### long as the notebooks are there
nb_path_root: ../nblibrary

######################
# Computation Config #
######################

computation_config:

# default_kernel_name is the name of the environment that
### the notebooks in this configuration will be run in by default.
### It must already be installed on your machine. You can also
### specify a different environment than the default for any
### notebook in NOTEBOOK CONFIG

default_kernel_name: cupid-analysis


############# NOTEBOOK CONFIG #############

############################
# Notebooks and Parameters #
############################

# All parameters under global_params get passed to all the notebooks

global_params:
CESM_output_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing
lc_kwargs:
threads_per_worker: 1

timeseries:
num_procs: 8
ts_done: [False]
overwrite_ts: [False]
case_name: 'b.e23_alpha16b.BLT1850.ne30_t232.054'

atm:
vars: ['ACTNI', 'ACTNL', 'ACTREI','ACTREL','AODDUST']
derive_vars: [] # {'PRECT':['PRECL','PRECC'], 'RESTOM':['FLNT','FSNT']}
hist_str: 'h0'
start_years: [2]
end_years: [102]
level: 'lev'

lnd:
vars: ['ALTMAX', 'COST_NACTIVE', 'DENIT', 'EFLX_DYNBAL'] #['process_all']
derive_vars: []
hist_str: 'h0'
start_years: [2]
end_years: [102]
level: 'lev'

ocn:
vars: ['taux', 'tauy'] # ['process_all']
derive_vars: []
hist_str: 'h.frc'
start_years: [2]
end_years: [102]
level: 'lev'

ice:
vars: ['hi', 'hs', 'snowfrac', 'Tsfc'] #['process_all']
derive_vars: []
hist_str: 'h'
start_years: [2]
end_years: [102]
level: 'lev'

glc:
vars: ['usurf', 'topg'] #['process_all']
derive_vars: []
hist_str: 'initial_hist'
start_years: [2]
end_years: [102]
level: 'lev'

compute_notebooks:

# This is where all the notebooks you want run and their
### parameters are specified. Several examples of different
### types of notebooks are provided.

# The first key (here simple_no_params_nb) is the name of the
### notebook from nb_path_root, minus the .ipynb
infrastructure:
index:
parameter_groups:
none: {}

atm:
adf_quick_run:
parameter_groups:
none:
adf_path: ../../../externals/ADF
config_path: .
config_fil_str: "config_f.cam6_3_119.FLTHIST_ne30.r328_gamma0.33_soae.001.yaml"

compute_scripts:

ice:
divergence:
kernel_name: cupid-analysis
parameter_groups:
none:
dummy: thpth
product: vector.png

########### JUPYTER BOOK CONFIG ###########

##################################
# Jupyter Book Table of Contents #
##################################
book_toc:

# See https://jupyterbook.org/en/stable/structure/configure.html for
## complete documentation of Jupyter book construction options

format: jb-book

# All filenames are notebook filename without the .ipynb, similar to above

root: index # root is the notebook that will be the homepage for the book
parts:

# Parts group notebooks into different sections in the Jupyter book
### table of contents, so you can organize different parts of your project.

- caption: Atmosphere

# Each chapter is the name of one of the notebooks that you executed
### in compute_notebooks above, also without .ipynb
chapters:
- file: adf_quick_run

#####################################
# Keys for Jupyter Book _config.yml #
#####################################
book_config_keys:

title: Example project # Title of your jupyter book

# Other keys can be added here, see https://jupyterbook.org/en/stable/customize/config.html
### for many more options
Loading
Loading