Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

generate ADF config file #142

Merged
merged 23 commits into from
Nov 18, 2024
Merged
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
fd4db19
really adding the script this time (??)
brianpm Oct 14, 2024
97565d0
added newline at end of file per linting suggestion
brianpm Oct 14, 2024
bd92dc1
just more linting/formatting on my end
brianpm Oct 14, 2024
49a0f99
add more details for ADF config
brianpm Oct 23, 2024
c9759dc
Merge branch 'NCAR:main' into gen_adf_config
brianpm Oct 23, 2024
13c0620
fix typo for sname
brianpm Oct 23, 2024
eb25a31
ran pre-commit
TeaganKing Nov 4, 2024
9fd4ca9
minor change & note sname portion
TeaganKing Nov 4, 2024
9ed14fd
linting suggestion from after commit
TeaganKing Nov 4, 2024
9ba9d6d
update path to adf
TeaganKing Nov 4, 2024
ab8d539
include case name and not just CESM output dir
TeaganKing Nov 4, 2024
db5a6f9
included hist_str for top level rather than case-specific
TeaganKing Nov 4, 2024
db47343
updates to config file generation to include full path to ADF output …
TeaganKing Nov 6, 2024
35a9ebb
include _build/html in path
TeaganKing Nov 8, 2024
8f79704
include option for overwriting vars in adf config file
TeaganKing Nov 13, 2024
0388b54
initial response to review comments
TeaganKing Nov 15, 2024
d8a563f
additional review updates
TeaganKing Nov 15, 2024
634c2a2
Improve how we get diag_var_list
mnlevy1981 Nov 15, 2024
605a59c
Pass CI tests
mnlevy1981 Nov 16, 2024
a2308d7
update hist_str line
TeaganKing Nov 18, 2024
677a668
remove .get from base_case_name
TeaganKing Nov 18, 2024
101e1ac
update if statements for base case name
TeaganKing Nov 18, 2024
750e005
remove if statement cam_case_name
TeaganKing Nov 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
258 changes: 258 additions & 0 deletions helper_scripts/generate_adf_config_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
#!/usr/bin/env python3
from __future__ import annotations

import argparse
import os
import sys

import yaml


def _parse_args():
"""Parse command line arguments"""

parser = argparse.ArgumentParser(
description="Generate cupid_adf_config.yml based on an existing CUPID YAML file",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
# Command line argument for location of CESM source code (required)
parser.add_argument(
"--cesm-root",
action="store",
dest="cesm_root",
required=True,
help="Location of CESM source code",
)
# Command line argument for CUPiD example from which to get config.yml
parser.add_argument(
"--cupid-example",
action="store",
dest="cupid_example",
default="external_diag_packages",
help="CUPiD example to use as template for config.yml",
)
parser.add_argument(
"--adf-template",
action="store",
required=True,
help="an adf config file to use as a base",
)
parser.add_argument(
"--out-file",
action="store",
required=True,
help="the output file to save",
)
return parser.parse_args()


def generate_adf_config(cesm_root, cupid_example, adf_file, out_file):
"""Use cupid config file (YAML) from cupid_example and adf_file (YAML)
to produce out_file by modifying adf_file with data from cupid config file.
"""
sys.path.append(os.path.join(cesm_root, "cime"))

# Is cupid_example a valid value?
cupid_root = os.path.join(cesm_root, "tools", "CUPiD")
cupid_examples = os.path.join(cupid_root, "examples")
valid_examples = [
example
for example in next(os.walk(cupid_examples))[1]
if example not in ["ilamb", "nblibrary"]
]
if cupid_example not in valid_examples:
error_msg = f"argument --cupid-example: invalid choice '{cupid_example}'"
raise KeyError(
f"{error_msg} (choose from subdirectories of {cupid_examples}: {valid_examples})",
)

with open(os.path.join(cupid_root, "examples", cupid_example, "config.yml")) as c:
c_dict = yaml.safe_load(c)
with open(adf_file, encoding="UTF-8") as a:
a_dict = yaml.safe_load(a)

# read parameters from CUPID
# use `get` to default to None
DOUT = c_dict["global_params"]["CESM_output_dir"]
base_case_name = c_dict["global_params"]["base_case_name"]
test_case_name = c_dict["global_params"]["case_name"]
c_ts = c_dict["timeseries"]
ts_case_names = c_ts.get("case_name")
if not ts_case_names:
raise ValueError("CUPiD file does not have timeseries case_name array.")

# Set case names for ADF config
a_dict["diag_cam_climo"]["cam_case_name"] = test_case_name
a_dict["diag_cam_baseline_climo"]["cam_case_name"] = base_case_name
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it okay for this value to be None? Do we need something like

if base_case_name:
  a_dict["diag_cam_baseline_climo"]["cam_case_name"] = base_case_name
elif "cam_case_name" in "cam_case_name":
  del a_dict["diag_cam_baseline_climo"]["cam_case_name"]

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following up because I think the github formatting made it hard to see the context of my comment. This is specifically referring to a_dict["diag_cam_baseline_climo"]["cam_case_name"] = base_case_name, since base_case_name = c_dict["global_params"].get("base_case_name") -- what is the expected behavior if base_case_name is not in global_params?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the ADF config file, compare_obs determines whether the model run is compared to observations or another model run. If this variable is false in the ADF config file, then diag_cam_baseline_climo is used and is a required config file section. If not present, ADF fails.

I'm curious if we would want to support comparison to obs in addition to model-model comparison-- @brianpm and @justin-richling do you have thoughts on this?

With all that said, I think that we could just use base_case_name = c_dict["global_params"]["base_case_name"] instead of the .get() since it is an expected value for model-model comparisons. I'll update this now.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should allow for the model vs obs comparisons. In adf_info.py there is the variable data_name that gets set for the baseline case name if model vs model, but data_name gets set to "Obs" if its a model vs obs case.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit hesitant to include this feature within this PR if we want to get this in before Wednesday. I think it will require an additional default configuration file from ADF as well as a number of additional parameters in the cupid configuration file (including compare_obs) as well as parameters that are added to the script to generate the ADF config file.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See issue #150


# TEST CASE HISTORY FILE PATH
a_dict["diag_cam_climo"]["cam_hist_loc"] = os.path.join(
DOUT,
test_case_name,
"atm",
"hist",
)
# TEST CASE TIME SERIES FILE PATH
a_dict["diag_cam_climo"]["cam_ts_loc"] = os.path.join(
DOUT,
test_case_name,
"atm",
"proc",
"tseries",
)
# TEST CASE CLIMO FILE PATH
a_dict["diag_cam_climo"]["cam_climo_loc"] = os.path.join(
DOUT,
test_case_name,
"atm",
"proc",
"climo",
)
# TEST CASE START / END YEARS
test_case_cupid_ts_index = (
ts_case_names.index(test_case_name) if test_case_name in ts_case_names else None
)
start_date = get_date_from_ts(c_ts["atm"], "start_years", test_case_cupid_ts_index)
end_date = get_date_from_ts(c_ts["atm"], "end_years", test_case_cupid_ts_index)
a_dict["diag_cam_climo"]["start_year"] = start_date
a_dict["diag_cam_climo"]["end_year"] = end_date

# Set values for BASELINE
base_case_cupid_ts_index = (
ts_case_names.index(base_case_name) if base_case_name in ts_case_names else None
)
mnlevy1981 marked this conversation as resolved.
Show resolved Hide resolved

base_case_output_dir = c_dict["global_params"].get(
"base_case_output_dir",
DOUT + "/" + base_case_name,
)
base_start_date = get_date_from_ts(
c_ts["atm"],
"start_years",
base_case_cupid_ts_index,
)
base_end_date = get_date_from_ts(
c_ts["atm"],
"end_years",
base_case_cupid_ts_index,
)
if base_start_date is None:
base_start_date = start_date
if base_end_date is None:
base_end_date = end_date

a_dict["diag_cam_baseline_climo"]["cam_hist_loc"] = os.path.join(
base_case_output_dir,
"atm",
"hist",
)
a_dict["diag_cam_baseline_climo"]["cam_ts_loc"] = os.path.join(
base_case_output_dir,
"atm",
"proc",
"tseries",
)
a_dict["diag_cam_baseline_climo"]["cam_climo_loc"] = os.path.join(
base_case_output_dir,
"atm",
"proc",
"climo",
)
a_dict["diag_cam_baseline_climo"]["start_year"] = base_start_date
a_dict["diag_cam_baseline_climo"]["end_year"] = base_end_date

a_dict["diag_basic_info"]["hist_str"] = c_dict["timeseries"]["atm"]["hist_str"]
a_dict["diag_basic_info"]["num_procs"] = c_dict["timeseries"].get("num_procs", 1)
a_dict["diag_basic_info"]["cam_regrid_loc"] = os.path.join(
DOUT,
base_case_name,
"atm",
"proc",
"regrid",
) # This is where ADF will make "regrid" files
a_dict["diag_basic_info"]["cam_diag_plot_loc"] = os.path.join(
cupid_root,
"examples",
cupid_example,
"computed_notebooks",
c_dict["data_sources"]["sname"],
"_build",
"html",
"ADF",
) # this is where ADF will put plots, and "website" directory
a_dict["user"] = os.path.join(
cupid_root,
"examples",
cupid_example,
"computed_notebooks",
c_dict["data_sources"]["sname"],
"_build",
"html",
"ADF",
)

diag_var_list = []
plotting_scripts = []
for component in c_dict["compute_notebooks"]:
for nb in c_dict["compute_notebooks"][component]:
if (
c_dict["compute_notebooks"][component][nb]
.get("external_tool", {})
.get("tool_name")
== "ADF"
):
for var in c_dict["compute_notebooks"][component][nb][
"external_tool"
].get("vars", []):
if var not in diag_var_list:
diag_var_list.append(var)
for script in c_dict["compute_notebooks"][component][nb][
"external_tool"
].get("plotting_scripts", []):
if script not in plotting_scripts:
plotting_scripts.append(script)
if diag_var_list:
a_dict["diag_var_list"] = diag_var_list
if plotting_scripts:
a_dict["plotting_scripts"] = plotting_scripts

# os.getenv("USER")

with open(out_file, "w") as f:
# Header of file is a comment logging provenance
f.write(
"# This file has been auto-generated using generate_adf_config_file.py\n",
)
f.write(f"# It is based off of examples/{cupid_example}/config.yml\n")
f.write("# Arguments:\n")
f.write(f"# {cesm_root=}\n")
f.write(f"# {cupid_example=}\n")
f.write(f"# {adf_file=}\n")
f.write(f"# Output: {out_file=}\n")
# enter in each element of the dictionary into the new file
yaml.dump(a_dict, f, sort_keys=False)


def get_date_from_ts(data: dict, keyname: str, listindex: int, default=None):
if type(data) is not dict:
raise TypeError(f"first argument needs to be dict, got {type(data)}")
if keyname not in data:
raise KeyError(f"no entry {keyname} in the dict")
x = data[keyname]
if isinstance(x, list):
return x[listindex]
elif isinstance(x, int):
return x
else:
return default


if __name__ == "__main__":
args = vars(_parse_args())
print(args)
generate_adf_config(
args["cesm_root"],
args["cupid_example"],
args["adf_template"],
args["out_file"],
)
Loading