NCAR · dabail10 · May 15, 2024 · May 15, 2024 · May 15, 2024 · May 15, 2024
diff --git a/cupid/ploomber.py b/cupid/ploomber.py
@@ -0,0 +1,176 @@
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+from subprocess import PIPE
+
+from ploomber.tasks import ScriptRunner
+
+
+def _python_bin():
+    """
+    Get the path to the Python executable, return 'python' if unable to get it
+    """
+    executable = sys.executable
+    return executable if executable else "python"
+
+
+def _run_script_in_subprocess(interpreter, path, cwd, env):
+    if isinstance(interpreter, str):
+        res = subprocess.run([interpreter, str(path)], cwd=cwd, env=env, stderr=PIPE)
+    else:
+        res = subprocess.run(interpreter + [str(path)], cwd=cwd, env=env, stderr=PIPE)
+    if res.returncode:
+        stderr = res.stderr.decode()
+
+        if "SyntaxError" in stderr:
+            stderr += (
+                "(Note: IPython magics are not supported in "
+                "ScriptRunner, remove them or use the regular "
+                "NotebookRunner)"
+            )
+
+        raise RuntimeError("Error while executing ScriptRunner:\n" f"{stderr}")
+
+
+class CUPiDScriptRunner(ScriptRunner):
+    """
+    Similar to NotebookRunner, except it uses python to run the code,
+    instead of papermill, hence, it doesn't generate an output notebook. But
+    it also works by injecting a cell into the source code. Source can be
+    a ``.py`` script or an ``.ipynb`` notebook. **Does not support magics.**
+
+    Parameters
+    ----------
+    source: str or pathlib.Path
+        Script source, if str, the content is interpreted as the actual
+        script, if pathlib.Path, the content of the file is loaded. When
+        loading from a str, ext_in must be passed
+    product: ploomber.File
+        The output file
+    dag: ploomber.DAG
+        A DAG to add this task to
+    name: str, optional
+        A str to indentify this task. Should not already exist in the dag
+    params: dict, optional
+        Script parameters. This are passed as the "parameters" argument
+        to the papermill.execute_notebook function, by default, "product"
+        and "upstream" are included
+    ext_in: str, optional
+        Source extension. Required if loading from a str. If source is a
+        ``pathlib.Path``, the extension from the file is used.
+    static_analysis : ('disabled', 'regular', 'strict'), default='regular'
+        Check for various errors in the script. In 'regular' mode, it aborts
+        execution if the notebook has syntax issues, or similar problems that
+        would cause the code to break if executed. In 'strict' mode, it
+        performs the same checks but raises an issue before starting execution
+        of any task, furthermore, it verifies that the parameters cell and
+        the params passed to the notebook match, thus, making the script
+        behave like a function with a signature.
+    local_execution : bool, optional
+        Change working directory to be the parent of the script source.
+        Defaults to False.
+
+    Examples
+    --------
+
+    Spec API:
+
+    .. code-block:: yaml
+        :class: text-editor
+        :name: pipeline-yaml
+
+        tasks:
+          - source: script.py
+            class: ScriptRunner
+            product:
+                data: data.csv
+                another: another.csv
+
+    Python API:
+
+    >>> from pathlib import Path
+    >>> from ploomber import DAG
+    >>> from ploomber.tasks import ScriptRunner
+    >>> from ploomber.products import File
+    >>> dag = DAG()
+    >>> product = {'data': File('data.csv'), 'another': File('another.csv')}
+    >>> _ = ScriptRunner(Path('script.py'), product, dag=dag)
+    >>> _ = dag.build()
+    """
+
+    def __init__(
+        self,
+        source,
+        product,
+        dag,
+        kernelspec_name=None,
+        name=None,
+        params=None,
+        ext_in=None,
+        static_analysis="regular",
+        local_execution=False,
+    ):
+        self.kernelspec_name = kernelspec_name
+        self.ext_in = ext_in
+
+        kwargs = dict(hot_reload=dag._params.hot_reload)
+        self._source = ScriptRunner._init_source(
+            source,
+            kwargs,
+            ext_in,
+            static_analysis,
+            False,
+            False,
+        )
+        self.local_execution = local_execution
+        super(ScriptRunner, self).__init__(product, dag, name, params)
+
+    def run(self):
+        # regular mode: raise but not check signature
+        # strict mode: called at render time
+        if self.static_analysis == "regular":
+            self.source._check_notebook(raise_=True, check_signature=False)
+
+        fd, tmp = tempfile.mkstemp(".py")
+        os.close(fd)
+
+        code = "\n\n".join(
+            [
+                c["source"]
+                for c in self.source.nb_obj_rendered.cells
+                if c["cell_type"] == "code"
+            ],
+        )
+
+        cwd = str(self.source.loc.parent.resolve())
+        orig_env = os.environ.copy()
+
+        if "PYTHONPATH" not in orig_env:
+            orig_env["PYTHONPATH"] = cwd
+        else:
+            orig_env["PYTHONPATH"] += os.pathsep + cwd
+
+        tmp = Path(tmp)
+        tmp.write_text(code)
+
+        if self.source.language == "python":
+            interpreter = _python_bin()
+            if self.kernelspec_name:
+                interpreter = f"conda run -n {self.kernelspec_name} python".split()
+        elif self.source.language == "r":
+            interpreter = "Rscript"
+        else:
+            raise ValueError("ScriptRunner only works with Python and R scripts")
+
+        try:
+            _run_script_in_subprocess(interpreter, tmp, cwd, orig_env)
+        except Exception as e:
+            raise RuntimeError(
+                "Error when executing task" f" {self.name!r}.",
+            ) from e  # should be TaskBuildError
+        finally:
+            tmp.unlink()
diff --git a/cupid/util.py b/cupid/util.py
@@ -27,6 +27,8 @@
 from jinja2 import Template
 from papermill.engines import NBClientEngine
 
+from cupid.ploomber import CUPiDScriptRunner
+
 
 class MarkdownJinjaEngine(NBClientEngine):
     """Class for using the Jinja Engine to run notebooks"""
@@ -272,11 +274,12 @@ def create_ploomber_script_task(
         if cat_path is not None:
             parms_in["path_to_cat"] = cat_path
 
-        task = ploomber.tasks.ScriptRunner(
+        task = CUPiDScriptRunner(
             Path(input_path),
             ploomber.products.File(info["product"]),
             dag,
             params=parms_in,
+            kernelspec_name=info["kernel_name"],
             name=output_name,
         )
 

diff --git a/examples/coupled_model/config2.yml b/examples/coupled_model/config2.yml
@@ -0,0 +1,163 @@
+
+################## SETUP ##################
+
+################
+# Data Sources #
+################
+data_sources:
+    # sname is any string used as a nickname for this configuration. It will be
+    ### used as the name of the folder your computed notebooks are put in
+    sname: quick-run2
+
+    # run_dir is the path to the folder you want
+    ### all the files associated with this configuration
+    ### to be created in
+    run_dir: .
+
+    # nb_path_root is the path to the folder that cupid will
+    ### look for your template notebooks in. It doesn't have to
+    ### be inside run_dir, or be specific to this project, as
+    ### long as the notebooks are there
+    nb_path_root: ../nblibrary
+
+######################
+# Computation Config #
+######################
+
+computation_config:
+
+    # default_kernel_name is the name of the environment that
+    ### the notebooks in this configuration will be run in by default.
+    ### It must already be installed on your machine. You can also
+    ### specify a different environment than the default for any
+    ### notebook in NOTEBOOK CONFIG
+
+    default_kernel_name: cupid-analysis
+
+
+############# NOTEBOOK CONFIG #############
+
+############################
+# Notebooks and Parameters #
+############################
+
+# All parameters under global_params get passed to all the notebooks
+
+global_params:
+  CESM_output_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing
+  lc_kwargs:
+    threads_per_worker: 1
+
+timeseries:
+  num_procs: 8
+  ts_done: [False]
+  overwrite_ts: [False]
+  case_name: 'b.e23_alpha16b.BLT1850.ne30_t232.054'
+
+  atm:
+    vars: ['ACTNI', 'ACTNL', 'ACTREI','ACTREL','AODDUST']
+    derive_vars: [] # {'PRECT':['PRECL','PRECC'], 'RESTOM':['FLNT','FSNT']}
+    hist_str: 'h0'
+    start_years: [2]
+    end_years: [102]
+    level: 'lev'
+
+  lnd:
+    vars: ['ALTMAX', 'COST_NACTIVE', 'DENIT', 'EFLX_DYNBAL']  #['process_all']
+    derive_vars: []
+    hist_str: 'h0'
+    start_years: [2]
+    end_years: [102]
+    level: 'lev'
+
+  ocn:
+    vars: ['taux', 'tauy'] # ['process_all']
+    derive_vars: []
+    hist_str: 'h.frc'
+    start_years: [2]
+    end_years: [102]
+    level: 'lev'
+
+  ice:
+    vars: ['hi', 'hs', 'snowfrac', 'Tsfc'] #['process_all']
+    derive_vars: []
+    hist_str: 'h'
+    start_years: [2]
+    end_years: [102]
+    level: 'lev'
+
+  glc:
+    vars: ['usurf', 'topg'] #['process_all']
+    derive_vars: []
+    hist_str: 'initial_hist'
+    start_years: [2]
+    end_years: [102]
+    level: 'lev'
+
+compute_notebooks:
+
+  # This is where all the notebooks you want run and their
+  ### parameters are specified. Several examples of different
+  ### types of notebooks are provided.
+
+  # The first key (here simple_no_params_nb) is the name of the
+  ### notebook from nb_path_root, minus the .ipynb
+  infrastructure:
+    index:
+      parameter_groups:
+        none: {}
+
+  atm:
+    adf_quick_run:
+      parameter_groups:
+        none:
+          adf_path: ../../../externals/ADF
+          config_path: .
+          config_fil_str: "config_f.cam6_3_119.FLTHIST_ne30.r328_gamma0.33_soae.001.yaml"
+
+compute_scripts:
+
+  ice:
+    divergence:
+      kernel_name: cupid-analysis
+      parameter_groups:
+        none:
+          dummy: thpth
+      product: vector.png
+
+########### JUPYTER BOOK CONFIG ###########
+
+##################################
+# Jupyter Book Table of Contents #
+##################################
+book_toc:
+
+  # See https://jupyterbook.org/en/stable/structure/configure.html for
+  ## complete documentation of Jupyter book construction options
+
+  format: jb-book
+
+  # All filenames are notebook filename without the .ipynb, similar to above
+
+  root: index # root is the notebook that will be the homepage for the book
+  parts:
+
+    # Parts group notebooks into different sections in the Jupyter book
+    ### table of contents, so you can organize different parts of your project.
+
+    - caption: Atmosphere
+
+      # Each chapter is the name of one of the notebooks that you executed
+      ### in compute_notebooks above, also without .ipynb
+      chapters:
+        - file: adf_quick_run
+
+#####################################
+# Keys for Jupyter Book _config.yml #
+#####################################
+book_config_keys:
+
+  title: Example project   # Title of your jupyter book
+
+  # Other keys can be added here, see https://jupyterbook.org/en/stable/customize/config.html
+  ### for many more options