Skip to content

Commit

Permalink
Issue #191: Updated notebook Export_Experiment for nested run support
Browse files Browse the repository at this point in the history
  • Loading branch information
amesar committed Jul 21, 2024
1 parent 62818c8 commit f38a52d
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
27 changes: 21 additions & 6 deletions databricks_notebooks/single/Export_Experiment.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Databricks notebook source
# MAGIC %md ### Export Experiment
# MAGIC
# MAGIC
# MAGIC #### Overview
# MAGIC * Exports an experiment and its runs (artifacts too) to a directory.
# MAGIC * Exports an experiment and its runs to a directory.
# MAGIC * Output file `experiment.json` contains top-level experiment metadata.
# MAGIC * Each run and its artifacts are stored as a sub-directory whose name is that of the run_id.
# MAGIC * Notebooks can be exported in several formats.
# MAGIC
# MAGIC
# MAGIC ##### Output folder
# MAGIC ```
# MAGIC
# MAGIC
# MAGIC +-experiment.json
# MAGIC +-d2309e6c74dc4679b576a37abf6b6af8/
# MAGIC | +-run.json
Expand All @@ -19,13 +19,17 @@
# MAGIC | | +-conda.yaml
# MAGIC | | +-MLmodel
# MAGIC ```
# MAGIC
# MAGIC
# MAGIC ##### Widgets
# MAGIC * `1. Experiment ID or name` - Either the experiment ID or experiment name.
# MAGIC * `2. Output base directory` - Base output directory of the exported experiment. All the experiment data will be saved here under the experiment ID sub-directory.
# MAGIC * `3. Run start date` - Export runs after this UTC date (inclusive). Example: `2023-04-05`.
# MAGIC * `4. Export permissions` - Export Databricks permissions.
# MAGIC * `5. Notebook formats` - Standard Databricks notebook formats such as SOURCE, HTML, JUPYTER, DBC. See [Databricks Export Format](https://docs.databricks.com/dev-tools/api/latest/workspace.html#notebookexportformat) documentation.
# MAGIC * `6. Run IDs` - comma-seperated list of runs to export. Default is to export all runs.
# MAGIC * `7. Check nested runs` - If true, will export all the runs of a nested run specified in above run ID list.
# MAGIC
# MAGIC See: https://github.com/mlflow/mlflow-export-import/blob/master/mlflow_export_import/experiment/export_experiment.py.

# COMMAND ----------

Expand All @@ -49,19 +53,28 @@

dbutils.widgets.text("3. Run start date", "")
run_start_date = dbutils.widgets.get("3. Run start date")
if run_start_date=="": run_start_date = None

dbutils.widgets.dropdown("4. Export permissions","no",["yes","no"])
export_permissions = dbutils.widgets.get("4. Export permissions") == "yes"

notebook_formats = get_notebook_formats(5)

if run_start_date=="": run_start_date = None
dbutils.widgets.text("6. Run IDs", "")
run_ids = dbutils.widgets.get("6. Run IDs")
if run_ids:
run_ids = run_ids.split(",")

dbutils.widgets.dropdown("7. Check nested runs","no",["yes","no"])
check_nested_runs = dbutils.widgets.get("7. Check nested runs") == "yes"

print("experiment_id_or_name:", experiment_id_or_name)
print("output_dir:", output_dir)
print("run_start_date:", run_start_date)
print("export_permissions:", export_permissions)
print("run_ids:", run_ids)
print("notebook_formats:", notebook_formats)
print("check_nested_runs:", check_nested_runs)

# COMMAND ----------

Expand Down Expand Up @@ -99,6 +112,8 @@
experiment_id_or_name = experiment.experiment_id,
output_dir = output_dir,
run_start_time = run_start_date,
run_ids = run_ids,
check_nested_runs = check_nested_runs,
export_permissions = export_permissions,
notebook_formats = notebook_formats
)
Expand Down
2 changes: 1 addition & 1 deletion databricks_notebooks/single/_README.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@

# COMMAND ----------

# MAGIC %md ##### Last updated: _2024-04-12_
# MAGIC %md ##### Last updated: _2024-07-21_

0 comments on commit f38a52d

Please sign in to comment.