diff --git a/README.md b/README.md
index 118b6b8..56ca5b9 100644
--- a/README.md
+++ b/README.md
@@ -100,6 +100,7 @@ Options:
-lnd, --land Run land component diagnostics
-ice, --seaice Run sea ice component diagnostics
-glc, --landice Run land ice component diagnostics
+ -rof, --river-runoff Run river runoff component diagnostics
--config_path Path to the YAML configuration file containing specifications for notebooks (default config.yml)
-h, --help Show this message and exit.
```
diff --git a/cupid/run.py b/cupid/run.py
index d1e578a..d791669 100755
--- a/cupid/run.py
+++ b/cupid/run.py
@@ -17,6 +17,7 @@
-lnd, --land Run land component diagnostics
-ice, --seaice Run sea ice component diagnostics
-glc, --landice Run land ice component diagnostics
+ -rof, --river-runoff Run river runoff component diagnostics
-config_path Path to the YAML configuration file containing specifications for notebooks (default: config.yml)
-h, --help Show this message and exit.
"""
@@ -46,6 +47,7 @@
@click.option("--land", "-lnd", is_flag=True, help="Run land component diagnostics")
@click.option("--seaice", "-ice", is_flag=True, help="Run sea ice component diagnostics")
@click.option("--landice", "-glc", is_flag=True, help="Run land ice component diagnostics")
+@click.option("--river-runoff", "-rof", is_flag=True, help="Run river runoff component diagnostics")
@click.argument("config_path", default="config.yml")
def run(
config_path,
@@ -57,6 +59,7 @@ def run(
land=False,
seaice=False,
landice=False,
+ river_runoff=False,
):
"""
Main engine to set up running all the notebooks.
@@ -81,11 +84,12 @@ def run(
"lnd": land,
"ice": seaice,
"glc": landice,
+ "rof": river_runoff,
}
# Automatically run all if no components specified
- if True not in [atmosphere, ocean, land, seaice, landice]:
+ if True not in [atmosphere, ocean, land, seaice, landice, river_runoff]:
all = True
for key in component_options.keys():
component_options[key] = True
diff --git a/examples/coupled_model/config.yml b/examples/coupled_model/config.yml
index bf91a81..ac3e83a 100644
--- a/examples/coupled_model/config.yml
+++ b/examples/coupled_model/config.yml
@@ -95,13 +95,21 @@ timeseries:
end_years: [102]
level: 'lev'
+ rof:
+ vars: []
+ derive_vars: []
+ hist_str: 'initial_hist'
+ start_years: [2]
+ end_years: [102]
+ level: 'lev'
+
compute_notebooks:
# This is where all the notebooks you want run and their
# parameters are specified. Several examples of different
# types of notebooks are provided.
- # The first key (here simple_no_params_nb) is the name of the
+ # The second key (here adf_quick_run) is the name of the
# notebook from nb_path_root, minus the .ipynb
infrastructure:
diff --git a/examples/key_metrics/config.yml b/examples/key_metrics/config.yml
index 55dabbb..d6eb0a3 100644
--- a/examples/key_metrics/config.yml
+++ b/examples/key_metrics/config.yml
@@ -129,6 +129,24 @@ compute_notebooks:
obs_name: 'GrIS_MARv3.12_climo_1960_1999.nc'
climo_nyears: 40
+ rof:
+ global_discharge_gauge_compare_obs:
+ parameter_groups:
+ none:
+ analysis_name: ""
+ grid_name: 'f09_f09_mosart' # ROF grid name
+ rof_start_date: '0091-01-01'
+ rof_end_date: '0101-01-01'
+ figureSave: False
+ global_discharge_ocean_compare_obs:
+ parameter_groups:
+ none:
+ analysis_name: ""
+ grid_name: 'f09_f09_mosart' # ROF grid name
+ rof_start_date: '0091-01-01'
+ rof_end_date: '0101-01-01'
+ figureSave: False
+
# ice:
# seaice:
# parameter_groups:
diff --git a/examples/nblibrary/rof/global_discharge_gauge_compare_obs.ipynb b/examples/nblibrary/rof/global_discharge_gauge_compare_obs.ipynb
new file mode 100644
index 0000000..dc64a3c
--- /dev/null
+++ b/examples/nblibrary/rof/global_discharge_gauge_compare_obs.ipynb
@@ -0,0 +1,1217 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "source": [
+ "## ROF global monthly, annual, seasonal flows analysis \n",
+ "\n",
+ "Use the following datasets\n",
+ "\n",
+ "1. reach-D19 gauge link ascii\n",
+ "2. D19 flow site geopackage\n",
+ "3. D19 discharge netCDF\n",
+ "4. history netCD including river discharge\n",
+ "\n",
+ "[1. Setupt](#setup)\n",
+ "\n",
+ "[2. Loading data](#load_data)\n",
+ "\n",
+ "- Read monthly history files from archive. \n",
+ "- Reference data: monthly discharge estimates at 922 big river mouths from Dai et al. 2019 data (D19)\n",
+ "\n",
+ "[3. Large 24 river analysis](#24_large_rivers)\n",
+ "\n",
+ "- Plotting time seriese (annual, seasonal cycle).\n",
+ "- if D19 referece data is available, scatter plots at Large 24 selected rivers against D19 referece data\n",
+ "\n",
+ "[4. Large 50 rivers analysis](#50_large_rivers)\n",
+ "\n",
+ "- Annual flow summary table at large 50 selected rivers.\n",
+ "- if D19 referece data is available, Scatter plot of annual flow against D19 reference data.\n",
+ "\n",
+ "[5. 922 rivers analysis](#922_rivers)\n",
+ "\n",
+ "- run only if reference flow is available\n",
+ "- error statistics (%bias, rmse, correlation) at all 922 river sites.\n",
+ "- plot error statistic on the global map\n",
+ "- plot boxplots including case(s) for each error metric\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "\n",
+ "import os, sys\n",
+ "import glob\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib as mpl\n",
+ "from matplotlib.ticker import FormatStrFormatter\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import geopandas as gpd\n",
+ "import xarray as xr\n",
+ "import cartopy.crs as ccrs\n",
+ "import cartopy.feature as cfeature\n",
+ "from dask.distributed import Client, LocalCluster\n",
+ "\n",
+ "import scripts.colors as colors\n",
+ "import scripts.metrics as metric\n",
+ "from scripts.utility import load_yaml\n",
+ "from scripts.utility import no_time_variable\n",
+ "\n",
+ "rivers_50m = cfeature.NaturalEarthFeature(\"physical\", \"rivers_lake_centerlines\", \"50m\")\n",
+ "land = cfeature.LAND\n",
+ "\n",
+ "print(\"\\nThe Python version: %s.%s.%s\" % sys.version_info[:3])\n",
+ "print(xr.__name__, xr.__version__)\n",
+ "print(pd.__name__, pd.__version__)\n",
+ "print(gpd.__name__, gpd.__version__)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "-------------------------\n",
+ "## 1. Setup "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": [
+ "parameters"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "# Parameter Defaults\n",
+ "# parameters are set in CUPiD's config.yml file\n",
+ "\n",
+ "CESM_output_dir = \"\"\n",
+ "case_name = None # case name\n",
+ "base_case_name = None # base case name\n",
+ "start_date = \"\"\n",
+ "end_date = \"\"\n",
+ "serial = False # use dask LocalCluster\n",
+ "lc_kwargs = {}\n",
+ "\n",
+ "analysis_name = \"\" # Used for Figure png names\n",
+ "if analysis_name:\n",
+ " analysis_name = case_name\n",
+ "rof_start_date = start_date # specify if different starting yyyy-mm-dd is desired\n",
+ "rof_end_date = end_date # specify if different ending yyyy-mm-dd is desired\n",
+ "grid_name = \"f09_f09_mosart\" # ROF grid name used in case\n",
+ "base_grid_name = (\n",
+ " grid_name # spcify ROF grid name for base_case in config.yml if different than case\n",
+ ")\n",
+ "figureSave = False"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "-------------------------\n",
+ "ROF ancillary data specification "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "setup = load_yaml(\"./setup/setup.yaml\")\n",
+ "\n",
+ "ancillary_dir = setup[\n",
+ " \"ancillary_dir\"\n",
+ "] # ancillary directory including ROF domain, river network data etc.\n",
+ "ref_flow_dir = setup[\"ref_flow_dir\"] # including observed or reference flow data\n",
+ "case_meta = setup[\"case_meta\"] # Case metadata\n",
+ "reach_gpkg = setup[\"reach_gpkg\"] # river reach geopackage meta"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "case_list = [case for case in [case_name, base_case_name] if case is not None]\n",
+ "grid_list = [grid for grid in [grid_name, base_grid_name] if grid is not None]\n",
+ "time_period = slice(f\"{rof_start_date}\", f\"{rof_end_date}\") # analysis time period\n",
+ "nyrs = int(rof_end_date[:4]) - int(rof_start_date[:4]) + 1 # number of years\n",
+ "nmons = nyrs * 12 # number of months\n",
+ "year_list = [\n",
+ " \"{:04d}\".format(yr)\n",
+ " for yr in np.arange(int(rof_start_date[0:4]), int(rof_end_date[0:4]) + 1)\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "-----\n",
+ "### dasks (optional)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Spin up cluster (if running in parallel)\n",
+ "client = None\n",
+ "if not serial:\n",
+ " cluster = LocalCluster(**lc_kwargs)\n",
+ " client = Client(cluster)\n",
+ "\n",
+ "client"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. Loading data "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.1. Monthly/annual flow netCDFs\n",
+ "- month_data (xr dataset) - read from archive\n",
+ "- year_data (xr dataset) - computed from monthly\n",
+ "- seas_data (xr dataset) - computed from monthly"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "reachID = {}\n",
+ "month_data = {}\n",
+ "year_data = {}\n",
+ "seas_data = {}\n",
+ "for case, grid in zip(case_list, grid_list):\n",
+ " in_dire = os.path.join(CESM_output_dir, case, \"rof/hist\")\n",
+ " model = case_meta[grid][\"model\"]\n",
+ " domain = case_meta[grid][\"domain_nc\"]\n",
+ " var_list = case_meta[grid][\"vars_read\"]\n",
+ "\n",
+ " def preprocess(ds):\n",
+ " return ds[var_list]\n",
+ "\n",
+ " # monthly\n",
+ " nc_list = []\n",
+ " for nc_path in sorted(glob.glob(f\"{in_dire}/{case}.{model}.h*.????-*.nc\")):\n",
+ " for yr in year_list:\n",
+ " if yr in os.path.basename(nc_path):\n",
+ " nc_list.append(nc_path)\n",
+ "\n",
+ " month_data[case] = (\n",
+ " xr.open_mfdataset(\n",
+ " nc_list,\n",
+ " data_vars=\"minimal\",\n",
+ " parallel=True,\n",
+ " preprocess=preprocess,\n",
+ " )\n",
+ " .sel(time=time_period)\n",
+ " .load()\n",
+ " )\n",
+ " # annual\n",
+ " year_data[case] = month_data[case].resample(time=\"YS\").mean(dim=\"time\")\n",
+ "\n",
+ " # seasonal (compute here instead of reading for conisistent analysis period)\n",
+ " seas_data[case] = month_data[case].groupby(\"time.month\").mean(\"time\")\n",
+ " vars_no_time = no_time_variable(month_data[case])\n",
+ " if vars_no_time:\n",
+ " seas_data[case][vars_no_time] = seas_data[case][vars_no_time].isel(\n",
+ " month=0, drop=True\n",
+ " )\n",
+ " time = month_data[case][\"time\"]\n",
+ " if domain == \"None\":\n",
+ " reachID[case] = month_data[case][\"reachID\"].values\n",
+ " else:\n",
+ " reachID[case] = xr.open_dataset(f\"{ancillary_dir}/{domain}\")[\"reachID\"].values\n",
+ " print(f\"Finished loading {case}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.2 Large river ID and name ascii\n",
+ "- big_river_50: dictionary {_site_id_:_river name_}\n",
+ "- big_river_24: dictionary {_site_id_:_river name_}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(\"./setup/large_river_50.txt\", index_col=\"river_name\")\n",
+ "big_river_50 = {key: values[\"site_id\"] for key, values in df.iterrows()}\n",
+ "big_river_24 = {\n",
+ " key: values[\"site_id\"] for ix, (key, values) in enumerate(df.iterrows()) if ix < 24\n",
+ "} # The first 24 is used for plots"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.3. reach-D19 gauge link csv\n",
+ "- gauge_reach_lnk (dataframe)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gauge_reach_lnk = {}\n",
+ "for case, grid in zip(case_list, grid_list):\n",
+ " gauge_reach_lnk[case] = pd.read_csv(\n",
+ " \"%s/D09/D09_925.%s.asc\" % (ref_flow_dir, case_meta[grid][\"network\"])\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.4 D19 flow site shapefile\n",
+ "- gauge_shp (dataframe)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "gauge_shp = gpd.read_file(\n",
+ " os.path.join(ref_flow_dir, \"D09\", \"geospatial\", \"D09_925.gpkg\")\n",
+ ")\n",
+ "gauge_shp = gauge_shp[gauge_shp[\"id\"] != 9999999]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.5 D19 discharge data\n",
+ "- ds_q_obs_mon (xr datasets)\n",
+ "- ds_q_obs_yr (xr datasets)\n",
+ "- dr_q_obs_seasonal (xr datasets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "# read monthly data\n",
+ "ds_q = xr.open_dataset(\n",
+ " \"%s/D09/coastal-stns-Vol-monthly.updated-May2019.mod.nc\" % (ref_flow_dir),\n",
+ " decode_times=False,\n",
+ ")\n",
+ "ds_q[\"time\"] = xr.cftime_range(\n",
+ " start=\"1900-01-01\", end=\"2018-12-01\", freq=\"MS\", calendar=\"standard\"\n",
+ ")\n",
+ "\n",
+ "# monthly - if time_period is outside observation period, use the entire obs period\n",
+ "obs_available = True\n",
+ "if ds_q[\"time\"].sel(time=time_period).values.size == 0:\n",
+ " obs_available = False\n",
+ " ds_q_obs_mon = ds_q[\"FLOW\"]\n",
+ "else:\n",
+ " ds_q_obs_mon = ds_q[\"FLOW\"].sel(time=time_period)\n",
+ "# compute annual flow from monthly\n",
+ "ds_q_obs_yr = ds_q_obs_mon.resample(time=\"YE\").mean(dim=\"time\")\n",
+ "# compute annual cycle at monthly scale\n",
+ "dr_q_obs_seasonal = ds_q_obs_mon.groupby(\"time.month\").mean(\"time\")\n",
+ "# annual mean, max, and min flow during time_period\n",
+ "ds_q_obs_yr_min = ds_q_obs_yr.min(\"time\")\n",
+ "ds_q_obs_yr_max = ds_q_obs_yr.max(\"time\")\n",
+ "ds_q_obs_yr_mean = ds_q_obs_yr.mean(\"time\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.6 Get indices in observation and simulation for gauge name (processing)\n",
+ "- gauge_plot (dictionary)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "gauge_plot = {}\n",
+ "for gname, site_id in big_river_50.items():\n",
+ " gauge_plot[gname] = {}\n",
+ " for case in case_list:\n",
+ " gauge_ix = [\n",
+ " i for i, gid in enumerate(ds_q.id.values) if gid == site_id\n",
+ " ] # go through obs Dataset and get index matching to river (gauge) name\n",
+ " gauge_id = ds_q.id.values[gauge_ix][0] ## guage ID\n",
+ " seg_id = (\n",
+ " gauge_reach_lnk[case]\n",
+ " .loc[gauge_reach_lnk[case][\"gauge_id\"] == gauge_id][\"route_id\"]\n",
+ " .values\n",
+ " ) # matching reach ID in river network\n",
+ " seg_ix = np.argwhere(\n",
+ " reachID[case] == seg_id\n",
+ " ) # matching reach index in river network\n",
+ " if len(seg_ix) == 0:\n",
+ " seg_ix = -999\n",
+ " else:\n",
+ " seg_ix = seg_ix[0]\n",
+ " gauge_plot[gname][case] = [gauge_ix, seg_ix, seg_id]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "------\n",
+ "## 3. Analysis for 24 large rivers "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.1 Annual flow series"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "fig, axes = plt.subplots(8, 3, figsize=(7.25, 11.5))\n",
+ "plt.subplots_adjust(\n",
+ " top=0.975, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n",
+ ") # create some space below the plots by increasing the bottom-value\n",
+ "\n",
+ "for ix, river_name in enumerate(big_river_24.keys()):\n",
+ " row = ix // 3\n",
+ " col = ix % 3\n",
+ " for case, grid in zip(case_list, grid_list):\n",
+ " net_idx = gauge_plot[river_name][case][1]\n",
+ " gaug_idx = gauge_plot[river_name][case][0]\n",
+ "\n",
+ " q_name = case_meta[grid][\"flow_name\"]\n",
+ "\n",
+ " if len(net_idx) == 1:\n",
+ " year_data[case][q_name][:, net_idx].plot(\n",
+ " ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n",
+ " )\n",
+ " elif len(net_idx) == 2: # means 2d grid\n",
+ " year_data[case][q_name][:, net_idx[0], net_idx[1]].plot(\n",
+ " ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n",
+ " )\n",
+ " if obs_available:\n",
+ " ds_q_obs_yr.loc[:, gaug_idx].plot(\n",
+ " ax=axes[row, col],\n",
+ " linestyle=\"None\",\n",
+ " marker=\"o\",\n",
+ " markersize=3,\n",
+ " c=\"k\",\n",
+ " label=\"D19\",\n",
+ " )\n",
+ " else:\n",
+ " qob_mean = ds_q_obs_yr_mean.loc[gaug_idx]\n",
+ " qob_max = ds_q_obs_yr_max.loc[gaug_idx]\n",
+ " qob_min = ds_q_obs_yr_min.loc[gaug_idx]\n",
+ " axes[row, col].axhline(\n",
+ " y=qob_mean, color=\"k\", linestyle=\"--\", lw=0.5, label=\"D19 mean (1900-2018)\"\n",
+ " )\n",
+ " axes[row, col].axhspan(\n",
+ " qob_min[0],\n",
+ " qob_max[0],\n",
+ " color=\"gray\",\n",
+ " alpha=0.1,\n",
+ " label=\"D19 min-max (1900-2018)\",\n",
+ " )\n",
+ "\n",
+ " axes[row, col].set_title(\"%d %s\" % (ix + 1, river_name), fontsize=8)\n",
+ "\n",
+ " axes[row, col].set_xlabel(\"\")\n",
+ " if row < 7:\n",
+ " axes[row, col].set_xticklabels(\"\")\n",
+ " if col == 0:\n",
+ " axes[row, col].set_ylabel(\"Annual flow [m$^3$/s]\", fontsize=8)\n",
+ " else:\n",
+ " axes[row, col].set_ylabel(\"\")\n",
+ " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n",
+ "\n",
+ "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n",
+ "axes.flatten()[-2].legend(\n",
+ " loc=\"upper center\",\n",
+ " bbox_to_anchor=(0.125, -0.35, 0.75, 0.1),\n",
+ " ncol=5,\n",
+ " fontsize=\"x-small\",\n",
+ ")\n",
+ "\n",
+ "if figureSave:\n",
+ " plt.savefig(f\"./NB1_Fig1_big_river_annual_{analysis_name}.png\", dpi=200)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.2. Annual cycle at monthly step"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "fig, axes = plt.subplots(8, 3, figsize=(7.25, 11.5))\n",
+ "plt.subplots_adjust(\n",
+ " top=0.975, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n",
+ ") # create some space below the plots by increasing the bottom-value\n",
+ "\n",
+ "for ix, river_name in enumerate(big_river_24.keys()):\n",
+ " row = ix // 3\n",
+ " col = ix % 3\n",
+ " for case, grid in zip(case_list, grid_list):\n",
+ "\n",
+ " net_idx = gauge_plot[river_name][case][1]\n",
+ " gaug_idx = gauge_plot[river_name][case][0]\n",
+ "\n",
+ " q_name = case_meta[grid][\"flow_name\"]\n",
+ "\n",
+ " if len(net_idx) == 1: # means vector\n",
+ " seas_data[case][q_name][:, net_idx].plot(\n",
+ " ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n",
+ " )\n",
+ " elif len(net_idx) == 2: # means 2d grid\n",
+ " seas_data[case][q_name][:, net_idx[0], net_idx[1]].plot(\n",
+ " ax=axes[row, col], linestyle=\"-\", lw=1.0, label=case\n",
+ " )\n",
+ "\n",
+ " if obs_available:\n",
+ " dr_q_obs_seasonal.loc[:, gaug_idx].plot(\n",
+ " ax=axes[row, col],\n",
+ " linestyle=\":\",\n",
+ " lw=0.5,\n",
+ " marker=\"o\",\n",
+ " markersize=2,\n",
+ " c=\"k\",\n",
+ " label=\"D19\",\n",
+ " )\n",
+ "\n",
+ " axes[row, col].set_title(\"%d %s\" % (ix + 1, river_name), fontsize=8)\n",
+ " axes[row, col].set_xlabel(\"\")\n",
+ " if row < 7:\n",
+ " axes[row, col].set_xticklabels(\"\")\n",
+ " if col == 0:\n",
+ " axes[row, col].set_ylabel(\"Mon. flow [m$^3$/s]\", fontsize=8)\n",
+ " else:\n",
+ " axes[row, col].set_ylabel(\"\")\n",
+ " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n",
+ "\n",
+ "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n",
+ "axes.flatten()[-2].legend(\n",
+ " loc=\"upper center\",\n",
+ " bbox_to_anchor=(0.10, -0.30, 0.75, 0.1),\n",
+ " ncol=5,\n",
+ " fontsize=\"x-small\",\n",
+ ")\n",
+ "\n",
+ "if figureSave:\n",
+ " plt.savefig(f\"./NB1_Fig2_big_river_season_{analysis_name}.png\", dpi=200)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.3. scatter plots of monthly flow - obs vs sim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "if obs_available:\n",
+ " # Monthly flow scatter plot\n",
+ " fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n",
+ " plt.subplots_adjust(\n",
+ " top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n",
+ " )\n",
+ "\n",
+ " for ix, river_name in enumerate(big_river_24.keys()):\n",
+ " row = ix // 3\n",
+ " col = ix % 3\n",
+ " axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n",
+ "\n",
+ " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+ "\n",
+ " net_idx = gauge_plot[river_name][case][1]\n",
+ " gaug_idx = gauge_plot[river_name][case][0]\n",
+ "\n",
+ " q_name = case_meta[grid][\"flow_name\"]\n",
+ "\n",
+ " if len(net_idx) == 1: # means vector\n",
+ " ds_sim = month_data[case][q_name][:, net_idx].sel(time=time_period)\n",
+ " elif len(net_idx) == 2: # means 2d grid\n",
+ " ds_sim = (\n",
+ " month_data[case][q_name][:, net_idx[0], net_idx[1]]\n",
+ " .sel(time=time_period)\n",
+ " .squeeze()\n",
+ " )\n",
+ "\n",
+ " ds_obs = ds_q_obs_mon.sel(time=time_period).loc[:, gaug_idx]\n",
+ "\n",
+ " axes[row, col].plot(\n",
+ " ds_obs, ds_sim, \"o\", label=case, markersize=4.0, alpha=0.4\n",
+ " )\n",
+ " if jx == 0:\n",
+ " max_val = np.max(ds_obs)\n",
+ " min_val = np.min(ds_obs)\n",
+ " else:\n",
+ " max_val = np.max([np.max(ds_sim), max_val])\n",
+ " min_val = np.min([np.min(ds_sim), min_val])\n",
+ "\n",
+ " axes[row, col].plot(\n",
+ " [min_val * 0.98, max_val * 1.02],\n",
+ " [min_val * 0.98, max_val * 1.02],\n",
+ " \":k\",\n",
+ " linewidth=0.5,\n",
+ " )\n",
+ "\n",
+ " axes[row, col].annotate(\n",
+ " \"%d %s\" % (ix + 1, river_name),\n",
+ " xy=(0.05, 0.875),\n",
+ " xycoords=\"axes fraction\",\n",
+ " fontsize=8,\n",
+ " bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n",
+ " )\n",
+ " if row == 7 and col == 1:\n",
+ " axes[row, col].set_xlabel(\n",
+ " \"Monthly reference discharge [m$^3$/s]\", fontsize=9\n",
+ " )\n",
+ " else:\n",
+ " axes[row, col].set_xlabel(\"\")\n",
+ "\n",
+ " if col == 0 and row == 5:\n",
+ " axes[row, col].set_ylabel(\n",
+ " \"Monthly simulated discharge [m$^3$/s]\", fontsize=10\n",
+ " )\n",
+ " else:\n",
+ " axes[row, col].set_ylabel(\"\")\n",
+ " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n",
+ "\n",
+ " axes.flatten()[-2].legend(\n",
+ " loc=\"upper center\",\n",
+ " bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n",
+ " ncol=5,\n",
+ " fontsize=\"x-small\",\n",
+ " )\n",
+ "\n",
+ " if figureSave:\n",
+ " plt.savefig(f\"./NB1_Fig3_big_river_month_scatter_{analysis_name}.png\", dpi=200)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.4. scatter plots of annual flow"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "if obs_available:\n",
+ " # annual flow scatter plot\n",
+ " fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n",
+ " plt.subplots_adjust(\n",
+ " top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n",
+ " )\n",
+ "\n",
+ " for ix, river_name in enumerate(big_river_24.keys()):\n",
+ " row = ix // 3\n",
+ " col = ix % 3\n",
+ " axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n",
+ "\n",
+ " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+ "\n",
+ " net_idx = gauge_plot[river_name][case][1]\n",
+ " gaug_idx = gauge_plot[river_name][case][0]\n",
+ "\n",
+ " q_name = case_meta[grid][\"flow_name\"]\n",
+ "\n",
+ " if len(net_idx) == 1: # means vector\n",
+ " ds_sim = year_data[case][q_name][:, net_idx].sel(time=time_period)\n",
+ " elif len(net_idx) == 2: # means 2d grid\n",
+ " ds_sim = year_data[case][q_name][:, net_idx[0], net_idx[1]].sel(\n",
+ " time=time_period\n",
+ " )\n",
+ "\n",
+ " ds_obs = ds_q_obs_yr.sel(time=time_period).loc[:, gaug_idx]\n",
+ "\n",
+ " axes[row, col].plot(\n",
+ " ds_obs, ds_sim, \"o\", label=case, markersize=4.0, alpha=0.4\n",
+ " )\n",
+ " if jx == 0:\n",
+ " max_val = np.max(ds_obs)\n",
+ " min_val = np.min(ds_obs)\n",
+ " else:\n",
+ " max_val = np.max([np.max(ds_sim), max_val])\n",
+ " min_val = np.min([np.min(ds_sim), min_val])\n",
+ "\n",
+ " axes[row, col].plot(\n",
+ " [min_val * 0.98, max_val * 1.02],\n",
+ " [min_val * 0.98, max_val * 1.02],\n",
+ " \":k\",\n",
+ " linewidth=0.5,\n",
+ " )\n",
+ "\n",
+ " axes[row, col].annotate(\n",
+ " \"%d %s\" % (ix + 1, river_name),\n",
+ " xy=(0.05, 0.875),\n",
+ " xycoords=\"axes fraction\",\n",
+ " fontsize=8,\n",
+ " bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n",
+ " )\n",
+ " if row == 7 and col == 1:\n",
+ " axes[row, col].set_xlabel(\n",
+ " \"Monthly reference discharge [m$^3$/s]\", fontsize=9\n",
+ " )\n",
+ " else:\n",
+ " axes[row, col].set_xlabel(\"\")\n",
+ "\n",
+ " if col == 0 and row == 5:\n",
+ " axes[row, col].set_ylabel(\n",
+ " \"Monthly simulated discharge [m$^3$/s]\", fontsize=10\n",
+ " )\n",
+ " else:\n",
+ " axes[row, col].set_ylabel(\"\")\n",
+ " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n",
+ "\n",
+ " axes.flatten()[-2].legend(\n",
+ " loc=\"upper center\",\n",
+ " bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n",
+ " ncol=5,\n",
+ " fontsize=\"x-small\",\n",
+ " )\n",
+ "\n",
+ " if figureSave:\n",
+ " plt.savefig(f\"./NB1_Fig4_big_river_annual_scatter_{analysis_name}.png\", dpi=200)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 4. Anaysis for Large 50 rivers "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4.1 Summary tables"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "with open(f\"large50rivers_{analysis_name}.txt\", \"w\") as f:\n",
+ " # some meta\n",
+ " f.write(\"# obs: Dai et al.(2019)\\n\")\n",
+ " f.write(\"# vol: km^3/yr\\n\")\n",
+ " f.write(\"# area: km^2\\n\")\n",
+ "\n",
+ " # headers\n",
+ " f.write(\"No, river_name,\")\n",
+ " f.write(\"{0: >15}_vol,\".format(\"obs\"))\n",
+ " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+ " f.write(\"{0: >15}_vol,\".format(grid))\n",
+ " f.write(\"{0: >15}_area\".format(\"obs\"))\n",
+ " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+ " f.write(\",\")\n",
+ " f.write(\"{0: >15}_area\".format(grid))\n",
+ " f.write(\"\\n\")\n",
+ "\n",
+ " # data for each river\n",
+ " for ix, river_name in enumerate(big_river_50.keys()):\n",
+ "\n",
+ " f.write(\"%02d,\" % (ix + 1))\n",
+ " f.write(\"{0: >20}\".format(river_name))\n",
+ "\n",
+ " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+ " f.write(\",\")\n",
+ " net_idx = gauge_plot[river_name][case][1]\n",
+ " gaug_idx = gauge_plot[river_name][case][0]\n",
+ "\n",
+ " q_name = case_meta[grid][\"flow_name\"]\n",
+ "\n",
+ " if len(net_idx) == 1: # means vector\n",
+ " qsim = (\n",
+ " np.nanmean(\n",
+ " year_data[case][q_name][:, net_idx].sel(time=time_period).values\n",
+ " )\n",
+ " * 60\n",
+ " * 60\n",
+ " * 24\n",
+ " * 365\n",
+ " / 10**9\n",
+ " )\n",
+ " elif len(net_idx) == 2: # means 2d grid\n",
+ " qsim = (\n",
+ " np.nanmean(\n",
+ " year_data[case][q_name][:, net_idx[0], net_idx[1]]\n",
+ " .sel(time=time_period)\n",
+ " .values\n",
+ " )\n",
+ " * 60\n",
+ " * 60\n",
+ " * 24\n",
+ " * 365\n",
+ " / 10**9\n",
+ " )\n",
+ "\n",
+ " if jx == 0:\n",
+ " qobs = (\n",
+ " ds_q_obs_yr_mean.loc[gaug_idx].values[0]\n",
+ " * 60\n",
+ " * 60\n",
+ " * 24\n",
+ " * 365\n",
+ " / 10**9\n",
+ " )\n",
+ " f.write(\"{:19.1f},\".format(qobs))\n",
+ "\n",
+ " f.write(\"{:19.1f}\".format(qsim))\n",
+ "\n",
+ " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+ " f.write(\",\")\n",
+ " gaug_idx = gauge_plot[river_name][case][0]\n",
+ "\n",
+ " # just get gauge_id from qs_q for now\n",
+ " gauge_id = ds_q[\"id\"].loc[gaug_idx].values[0]\n",
+ " network_area = gauge_reach_lnk[case][\n",
+ " gauge_reach_lnk[case][\"gauge_id\"] == gauge_id\n",
+ " ][\"route_area\"].values[0]\n",
+ "\n",
+ " if jx == 0:\n",
+ " area = ds_q[\"area_stn\"].loc[gaug_idx].values[0]\n",
+ " f.write(\"{:20.0f},\".format(area))\n",
+ "\n",
+ " f.write(\"{:20.0f}\".format(network_area))\n",
+ "\n",
+ " f.write(\"\\n\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4.2. scatter plot of annual mean flow"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if obs_available:\n",
+ "\n",
+ " df_yr_vol = pd.read_csv(\n",
+ " f\"./large50rivers_{analysis_name}.txt\",\n",
+ " skiprows=3,\n",
+ " index_col=[\"No\"],\n",
+ " skipinitialspace=True,\n",
+ " )\n",
+ "\n",
+ " fig, axes = plt.subplots(1, figsize=(5.50, 5.50))\n",
+ " regressor = LinearRegression()\n",
+ " bias_text = \"\"\n",
+ "\n",
+ " for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+ "\n",
+ " # compute linear regression\n",
+ " df_reg = df_yr_vol[[\"obs_vol\", f\"{grid}_vol\"]].dropna()\n",
+ " regressor.fit(df_reg[[\"obs_vol\"]], df_reg[f\"{grid}_vol\"])\n",
+ " y_pred = regressor.predict(df_reg[[\"obs_vol\"]])\n",
+ "\n",
+ " # compute bias over 50 sites\n",
+ " diff = (df_yr_vol[f\"{grid}_vol\"] - df_yr_vol[\"obs_vol\"]).mean(\n",
+ " axis=0, skipna=True\n",
+ " )\n",
+ "\n",
+ " df_yr_vol.plot(\n",
+ " ax=axes,\n",
+ " kind=\"scatter\",\n",
+ " x=\"obs_vol\",\n",
+ " y=f\"{grid}_vol\",\n",
+ " s=30,\n",
+ " alpha=0.6,\n",
+ " label=grid,\n",
+ " )\n",
+ " # plt.plot(df_reg['obs_vol'], y_pred, color=color)\n",
+ " bias_text = bias_text + f\"\\n{grid}: {diff:.1f} \"\n",
+ "\n",
+ " plt.text(\n",
+ " 0.65, 0.30, \"bias [km3/yr]\", transform=axes.transAxes, verticalalignment=\"top\"\n",
+ " )\n",
+ " plt.text(\n",
+ " 0.65, 0.30, f\"{bias_text} \", transform=axes.transAxes, verticalalignment=\"top\"\n",
+ " )\n",
+ "\n",
+ " plt.axline((0, 0), slope=1, linestyle=\"--\", color=\"black\")\n",
+ " axes.set_xscale(\"log\")\n",
+ " axes.set_yscale(\"log\")\n",
+ " axes.set_xlabel(\"reference flow\")\n",
+ " axes.set_ylabel(\"Simulated flow\")\n",
+ " axes.set_title(\"River Flow at stations [km^3/yr]\")\n",
+ "\n",
+ " if figureSave:\n",
+ " plt.savefig(\n",
+ " f\"./NB1_Fig5_50big_river_annual_flow_scatter_{analysis_name}.png\", dpi=200\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "------\n",
+ "## 5. Anaysis for all 922 sites "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5.1 Compute metris at all the sites (no plots nor tables)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "if obs_available:\n",
+ " # Merge gauge_reach lnk (dataframe) into gauge shapefile\n",
+ " gauge_shp1 = {}\n",
+ " for case, df in gauge_reach_lnk.items():\n",
+ " # df = df.loc[(df['flag'] == 0)]\n",
+ " df1 = df.drop(columns=[\"riv_name\"])\n",
+ " gauge_shp1[case] = pd.merge(\n",
+ " gauge_shp, df1, how=\"inner\", left_on=\"id\", right_on=\"gauge_id\"\n",
+ " )\n",
+ "\n",
+ " # compute %bias, correlation, and RMSE at each site based on monthly\n",
+ " mon_pbias = {}\n",
+ " mon_corr = {}\n",
+ " mon_rmse = {}\n",
+ "\n",
+ " for case, grid in zip(case_list, grid_list):\n",
+ "\n",
+ " bias = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n",
+ " corr = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n",
+ " rmse = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n",
+ "\n",
+ " for ix, row in gauge_shp1[case].iterrows():\n",
+ " q_obs = np.full(\n",
+ " nmons, np.nan, dtype=float\n",
+ " ) # dummy q_sim that will be replaced by actual data if exist\n",
+ " q_sim = np.full(\n",
+ " nmons, np.nan, dtype=float\n",
+ " ) # dummy q_sim that will be replaced by actual data if exist\n",
+ "\n",
+ " route_id = row[\"route_id\"]\n",
+ " gauge_id = row[\"gauge_id\"]\n",
+ "\n",
+ " q_name = case_meta[grid][\"flow_name\"]\n",
+ "\n",
+ " gauge_ix = np.argwhere(ds_q.id.values == gauge_id)\n",
+ " if len(gauge_ix) == 1:\n",
+ " gauge_ix = gauge_ix[0]\n",
+ " q_obs = ds_q_obs_mon[:, gauge_ix].squeeze().values\n",
+ "\n",
+ " route_ix = np.argwhere(reachID[case] == route_id)\n",
+ " if (\n",
+ " len(route_ix) == 1\n",
+ " ): # meaning there is flow site in network and simulation exist at this site\n",
+ " route_ix = route_ix[0]\n",
+ " if len(route_ix) == 1: # means vector\n",
+ " q_sim = month_data[case][q_name][:, route_ix].squeeze().values\n",
+ " elif len(route_ix) == 2: # means 2d grid\n",
+ " q_sim = (\n",
+ " month_data[case][q_name][:, route_ix[0], route_ix[1]]\n",
+ " .squeeze()\n",
+ " .values\n",
+ " )\n",
+ "\n",
+ " # compute %bias, correlation, RMSE\n",
+ " bias[ix] = metric.pbias(q_sim, q_obs)\n",
+ " corr[ix] = np.corrcoef(q_sim, q_obs)[0, 1]\n",
+ " rmse[ix] = metric.rmse(qsim, qobs)\n",
+ "\n",
+ " mon_pbias[case] = bias\n",
+ " mon_corr[case] = corr\n",
+ " mon_rmse[case] = rmse\n",
+ "\n",
+ " gauge_shp1[case][f\"bias_{grid}\"] = bias\n",
+ " gauge_shp1[case][f\"corr_{grid}\"] = corr\n",
+ " gauge_shp1[case][f\"rmse_{grid}\"] = rmse"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5.2. Spatial metric map "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "if obs_available:\n",
+ " # some local plot setups\n",
+ " cbar_kwrgs = {\n",
+ " \"bias\": {\n",
+ " \"shrink\": 0.9,\n",
+ " \"pad\": 0.02,\n",
+ " \"orientation\": \"horizontal\",\n",
+ " \"extend\": \"both\",\n",
+ " },\n",
+ " \"corr\": {\n",
+ " \"shrink\": 0.9,\n",
+ " \"pad\": 0.02,\n",
+ " \"orientation\": \"horizontal\",\n",
+ " \"extend\": \"min\",\n",
+ " },\n",
+ " \"rmse\": {\n",
+ " \"shrink\": 0.9,\n",
+ " \"pad\": 0.02,\n",
+ " \"orientation\": \"horizontal\",\n",
+ " \"extend\": \"max\",\n",
+ " },\n",
+ " }\n",
+ "\n",
+ " meta = {\n",
+ " \"bias\": {\n",
+ " \"name\": \"%bias\",\n",
+ " \"vmin\": -100,\n",
+ " \"vmax\": 100,\n",
+ " \"cm\": colors.cmap_RedGrayBlue,\n",
+ " },\n",
+ " \"corr\": {\"name\": \"correlation\", \"vmin\": 0.2, \"vmax\": 1, \"cm\": colors.cmap_corr},\n",
+ " \"rmse\": {\"name\": \"RMSE\", \"vmin\": 0, \"vmax\": 1000, \"cm\": mpl.cm.turbo},\n",
+ " }\n",
+ "\n",
+ " for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n",
+ " for case, grid in zip(case_list, grid_list):\n",
+ " fig, ax = plt.subplots(\n",
+ " 1, figsize=(7.5, 4.0), subplot_kw={\"projection\": ccrs.PlateCarree()}\n",
+ " )\n",
+ "\n",
+ " ax.add_feature(\n",
+ " rivers_50m, facecolor=\"None\", edgecolor=\"b\", lw=0.5, alpha=0.3\n",
+ " )\n",
+ " ax.add_feature(land, facecolor=\"white\", edgecolor=\"grey\")\n",
+ "\n",
+ " gauge_shp1[case].plot(\n",
+ " ax=ax,\n",
+ " column=f\"{error_metric}_{grid}\",\n",
+ " markersize=10,\n",
+ " cmap=meta[error_metric][\"cm\"],\n",
+ " vmin=meta[error_metric][\"vmin\"],\n",
+ " vmax=meta[error_metric][\"vmax\"],\n",
+ " )\n",
+ "\n",
+ " ax.set_title(\"%s %s\" % (case, meta[error_metric][\"name\"]))\n",
+ "\n",
+ " points = ax.collections[-1]\n",
+ " plt.colorbar(points, ax=ax, **cbar_kwrgs[error_metric])\n",
+ "\n",
+ " plt.tight_layout()\n",
+ "\n",
+ " if figureSave:\n",
+ " plt.savefig(f\"./NB1_Fig6_{error_metric}_{case}_map.png\", dpi=200)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5.4 Boxplots of Error metrics (RMSE, %bias, and correlation coefficient)\n",
+ "Boxplot distribution is based on metrics sampled at 922 sites.\n",
+ "\n",
+ "The box extends from the Q1 to Q3 quartile values of the data, with a line at the median (Q2). \n",
+ "The whiskers extend from the edges of box to show the range of the data. \n",
+ "By default, they extend no more than 1.5 * IQR (IQR = Q3 - Q1) from the edges of the box, ending at the farthest data point within that interval. \n",
+ "Outliers are plotted as separate dots."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if obs_available:\n",
+ " boxprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"blue\"}\n",
+ " medianprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"red\"}\n",
+ "\n",
+ " for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n",
+ " column_stat = []\n",
+ " gauge_shp_all_case = gauge_shp.copy(deep=True)\n",
+ " for case, grid in zip(case_list, grid_list):\n",
+ " gauge_shp_all_case = gauge_shp_all_case.merge(\n",
+ " gauge_shp1[case][[\"id\", f\"{error_metric}_{grid}\"]],\n",
+ " left_on=\"id\",\n",
+ " right_on=\"id\",\n",
+ " )\n",
+ " column_stat.append(f\"{error_metric}_{grid}\")\n",
+ " fig, ax = plt.subplots(1, figsize=(6.5, 4))\n",
+ " gauge_shp_all_case.boxplot(\n",
+ " ax=ax,\n",
+ " column=column_stat,\n",
+ " boxprops=boxprops,\n",
+ " medianprops=medianprops,\n",
+ " sym=\".\",\n",
+ " )\n",
+ "\n",
+ " xticklabels = [label[len(error_metric) + 1 :] for label in column_stat]\n",
+ " ax.set_xticklabels(xticklabels)\n",
+ "\n",
+ " if error_metric == \"rmse\":\n",
+ " ax.set_ylim([0, 1000])\n",
+ " ax.set_title(\"RMSE [m3/s]\")\n",
+ " elif error_metric == \"bias\":\n",
+ " ax.set_ylim([-150, 250])\n",
+ " ax.set_title(\"%bias [%]\")\n",
+ " elif error_metric == \"corr\":\n",
+ " ax.set_ylim([-0.2, 1])\n",
+ " ax.set_title(\"correlation\")\n",
+ "\n",
+ " if figureSave:\n",
+ " plt.savefig(f\"./NB1_Fig7_{error_metric}_boxplot.png\", dpi=150)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "cupid-analysis",
+ "language": "python",
+ "name": "cupid-analysis"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/nblibrary/rof/global_discharge_ocean_compare_obs.ipynb b/examples/nblibrary/rof/global_discharge_ocean_compare_obs.ipynb
new file mode 100644
index 0000000..5d09da6
--- /dev/null
+++ b/examples/nblibrary/rof/global_discharge_ocean_compare_obs.ipynb
@@ -0,0 +1,603 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "source": [
+ "## ROF monthly, annual, seasonal discharge at ocean outlets \n",
+ "\n",
+ "Use the following datasets\n",
+ "\n",
+ "1. reach-D19 gauge link ascii\n",
+ "2. D19 flow site geopackage\n",
+ "3. D19 discharge netCDF\n",
+ "4. monthly and yearly flow netCD (history file)\n",
+ "\n",
+ "[1. Setupt](#setup)\n",
+ "\n",
+ "\n",
+ "[2. Loading discharge data](#load_discharge_data)\n",
+ "\n",
+ "- Read monthly history files from archive. \n",
+ "- Reference data: monthly discharge estimates at 922 big river mouths from Dai et al. 2019 data (D19)\n",
+ "\n",
+ "[3. Read river, catchment, gauge information](#read_ancillary)\n",
+ "\n",
+ "- catchment polygon (geopackage)\n",
+ "- gauge point (geopackage)\n",
+ "- gauge-catchment link (csv)\n",
+ "- outlet reach information (netCDF) including discharging ocean names\n",
+ "\n",
+ "[4. Ocean discharge line plots](#24_large_rivers)\n",
+ "\n",
+ "- total seasonal flow for oceans. \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "\n",
+ "import os, sys\n",
+ "import glob\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import geopandas as gpd\n",
+ "import xarray as xr\n",
+ "import cartopy.feature as cfeature\n",
+ "from dask.distributed import Client, LocalCluster\n",
+ "\n",
+ "from scripts.utility import load_yaml\n",
+ "from scripts.utility import no_time_variable\n",
+ "from scripts.utility import read_shps\n",
+ "from scripts.utility import get_index_array\n",
+ "\n",
+ "rivers_50m = cfeature.NaturalEarthFeature(\"physical\", \"rivers_lake_centerlines\", \"50m\")\n",
+ "land = cfeature.LAND\n",
+ "\n",
+ "print(\"\\nThe Python version: %s.%s.%s\" % sys.version_info[:3])\n",
+ "print(xr.__name__, xr.__version__)\n",
+ "print(pd.__name__, pd.__version__)\n",
+ "print(gpd.__name__, gpd.__version__)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "-------------------------\n",
+ "## 1. Setup "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": [
+ "parameters"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "# Parameter Defaults\n",
+ "# parameters are set in CUPiD's config.yml file\n",
+ "\n",
+ "CESM_output_dir = \"\"\n",
+ "case_name = None # case name\n",
+ "base_case_name = None # base case name\n",
+ "start_date = \"\"\n",
+ "end_date = \"\"\n",
+ "serial = False # use dask LocalCluster\n",
+ "lc_kwargs = {}\n",
+ "\n",
+ "analysis_name = \"\" # Used for Figure png names\n",
+ "if analysis_name:\n",
+ " analysis_name = case_name\n",
+ "rof_start_date = start_date # specify if different starting yyyy-mm-dd is desired\n",
+ "rof_end_date = end_date # specify if different ending yyyy-mm-dd is desired\n",
+ "grid_name = \"f09_f09_mosart\" # ROF grid name used in case\n",
+ "base_grid_name = (\n",
+ " grid_name # spcify ROF grid name for base_case in config.yml if different than case\n",
+ ")\n",
+ "figureSave = False"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "-------------------------\n",
+ "ROF ancillary data specification "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "setup = load_yaml(\"./setup/setup.yaml\")\n",
+ "\n",
+ "domain_dir = setup[\n",
+ " \"ancillary_dir\"\n",
+ "] # ancillary directory including such as ROF domain, river network data\n",
+ "geospatial_dir = setup[\"geospatial_dir\"] # including shapefiles or geopackages\n",
+ "ref_flow_dir = setup[\"ref_flow_dir\"] # including observed or reference flow data\n",
+ "case_meta = setup[\"case_meta\"] # RO grid meta\n",
+ "catch_gpkg = setup[\"catch_gpkg\"] # catchment geopackage meta\n",
+ "reach_gpkg = setup[\"reach_gpkg\"] # reach geopackage meta\n",
+ "network_nc = setup[\"river_network\"] # river network meta"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "oceans_list = [\n",
+ " \"arctic\",\n",
+ " \"atlantic\",\n",
+ " \"indian\",\n",
+ " \"mediterranean\",\n",
+ " \"pacific\",\n",
+ " \"south_china\",\n",
+ " \"global\",\n",
+ "]\n",
+ "case_list = [case for case in [case_name, base_case_name] if case is not None]\n",
+ "grid_list = [grid for grid in [grid_name, base_grid_name] if grid is not None]\n",
+ "time_period = slice(f\"{rof_start_date}\", f\"{rof_end_date}\") # analysis time period\n",
+ "nyrs = int(rof_end_date[:4]) - int(rof_start_date[:4]) + 1 # number of years\n",
+ "nmons = nyrs * 12 # number of months\n",
+ "year_list = [\n",
+ " \"{:04d}\".format(yr)\n",
+ " for yr in np.arange(int(rof_start_date[0:4]), int(rof_end_date[0:4]) + 1)\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "-----\n",
+ "### dasks (optional)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Spin up cluster (if running in parallel)\n",
+ "client = None\n",
+ "if not serial:\n",
+ " cluster = LocalCluster(**lc_kwargs)\n",
+ " client = Client(cluster)\n",
+ "\n",
+ "client"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. Loading discharge data "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.1. Monthly/annual flow netCDFs\n",
+ "- month_data (xr dataset)\n",
+ "- year_data (xr dataset)\n",
+ "- seas_data (xr dataset)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "reachID = {}\n",
+ "month_data = {}\n",
+ "year_data = {}\n",
+ "seas_data = {}\n",
+ "for case, grid in zip(case_list, grid_list):\n",
+ " in_dire = os.path.join(CESM_output_dir, case, \"rof/hist\")\n",
+ " model = case_meta[grid][\"model\"]\n",
+ " domain = case_meta[grid][\"domain_nc\"]\n",
+ " var_list = case_meta[grid][\"vars_read\"]\n",
+ "\n",
+ " def preprocess(ds):\n",
+ " return ds[var_list]\n",
+ "\n",
+ " # monthly\n",
+ " nc_list = []\n",
+ " for nc_path in sorted(glob.glob(f\"{in_dire}/{case}.{model}.h*.????-*.nc\")):\n",
+ " for yr in year_list:\n",
+ " if yr in os.path.basename(nc_path):\n",
+ " nc_list.append(nc_path)\n",
+ " month_data[case] = (\n",
+ " xr.open_mfdataset(\n",
+ " nc_list,\n",
+ " data_vars=\"minimal\",\n",
+ " parallel=True,\n",
+ " preprocess=preprocess,\n",
+ " )\n",
+ " .sel(time=time_period)\n",
+ " .load()\n",
+ " )\n",
+ " # annual\n",
+ " year_data[case] = month_data[case].resample(time=\"YS\").mean(dim=\"time\")\n",
+ "\n",
+ " # seasonal (compute here instead of reading for conisistent analysis period)\n",
+ " seas_data[case] = month_data[case].groupby(\"time.month\").mean(\"time\")\n",
+ " vars_no_time = no_time_variable(month_data[case])\n",
+ " if vars_no_time:\n",
+ " seas_data[case][vars_no_time] = seas_data[case][vars_no_time].isel(\n",
+ " month=0, drop=True\n",
+ " )\n",
+ " time = month_data[case][\"time\"]\n",
+ " if domain == \"None\":\n",
+ " reachID[case] = month_data[case][\"reachID\"].values\n",
+ " else:\n",
+ " reachID[case] = (\n",
+ " xr.open_dataset(f\"{domain_dir}/{domain}\")[\"reachID\"]\n",
+ " .stack(seg=(\"lat\", \"lon\"))\n",
+ " .values\n",
+ " )\n",
+ " print(case)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.2 D19 discharge data\n",
+ "- ds_q_obs_mon (xr datasets)\n",
+ "- ds_q_obs_yr (xr datasets)\n",
+ "- dr_q_obs_seasonal (xr datasets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "# read monthly data\n",
+ "ds_q = xr.open_dataset(\n",
+ " \"%s/D09/coastal-stns-Vol-monthly.updated-May2019.mod.nc\" % (ref_flow_dir),\n",
+ " decode_times=False,\n",
+ ")\n",
+ "ds_q[\"time\"] = xr.cftime_range(\n",
+ " start=\"1900-01-01\", end=\"2018-12-01\", freq=\"MS\", calendar=\"standard\"\n",
+ ")\n",
+ "\n",
+ "# monthly- if time_period is outside observation period, use the entire obs period\n",
+ "obs_available = True\n",
+ "if ds_q[\"time\"].sel(time=time_period).values.size == 0:\n",
+ " obs_available = False\n",
+ " ds_q_obs_mon = ds_q[\"FLOW\"]\n",
+ "else:\n",
+ " ds_q_obs_mon = ds_q[\"FLOW\"].sel(time=time_period)\n",
+ "# compute annual flow from monthly\n",
+ "ds_q_obs_yr = ds_q_obs_mon.resample(time=\"YE\").mean(dim=\"time\")\n",
+ "# compute annual cycle at monthly scale\n",
+ "dr_q_obs_seasonal = ds_q_obs_mon.groupby(\"time.month\").mean(\"time\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 3. Reading river, catchment, gauge infomation \n",
+ "\n",
+ "- catchment polygon (geopackage)\n",
+ "- gauge point (geopackage)\n",
+ "- gauge-catchment link (csv)\n",
+ "- outlet reach information (netCDF)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.1. reach-D19 gauge link csv\n",
+ "- gauge_reach_lnk (dataframe)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gauge_reach_lnk = {}\n",
+ "for case, grid in zip(case_list, grid_list):\n",
+ " gauge_reach_lnk[case] = pd.read_csv(\n",
+ " \"%s/D09/D09_925.%s.asc\" % (ref_flow_dir, case_meta[grid][\"network\"])\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.2 D19 flow site shapefile\n",
+ "- gauge_shp (dataframe)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "gauge_shp = gpd.read_file(\n",
+ " os.path.join(ref_flow_dir, \"D09\", \"geospatial\", \"D09_925.gpkg\")\n",
+ ")\n",
+ "gauge_shp = gauge_shp[gauge_shp[\"id\"] != 9999999]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "ocean_shp = gpd.read_file(os.path.join(geospatial_dir, \"oceans.gpkg\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.3 Read river network information\n",
+ "- riv_ocean (dataframe)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "## read catchment geopackage\n",
+ "gdf_cat = {}\n",
+ "for case, grid in zip(case_list, grid_list):\n",
+ " cat_gpkg = os.path.join(\n",
+ " geospatial_dir, catch_gpkg[grid][\"file_name\"]\n",
+ " ) # geopackage name\n",
+ " id_name_cat = catch_gpkg[grid][\"id_name\"] # reach ID in geopackage\n",
+ " var_list = [id_name_cat]\n",
+ " if \"lk\" in grid_name:\n",
+ " var_list.append(\"lake\")\n",
+ " gdf_cat[case] = read_shps([cat_gpkg], var_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "# read river outlet netcdf\n",
+ "riv_ocean = {}\n",
+ "for case, grid in zip(case_list, grid_list):\n",
+ " riv_ocean_file = os.path.join(\n",
+ " domain_dir, network_nc[grid][\"file_name\"].replace(\".aug.nc\", \".outlet.nc\")\n",
+ " ) # network netcdf name\n",
+ " ds_rn_ocean = xr.open_dataset(riv_ocean_file).set_index(seg=\"seg_id\")\n",
+ " df_tmp = ds_rn_ocean.to_dataframe()\n",
+ " riv_ocean[case] = pd.merge(\n",
+ " gdf_cat[case], df_tmp, left_on=catch_gpkg[grid][\"id_name\"], right_index=True\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.6 Merge gauge, outlet catchment dataframe\n",
+ "\n",
+ "- gauge_shp1 (dataframe)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "# Merge gauge_reach lnk (dataframe) into gauge shapefile\n",
+ "gauge_shp1 = {}\n",
+ "for case, grid in zip(case_list, grid_list):\n",
+ " df = gauge_reach_lnk[case]\n",
+ "\n",
+ " # df = df.loc[(df['flag'] == 0)]\n",
+ " df1 = df.drop(columns=[\"riv_name\"])\n",
+ " df2 = pd.merge(gauge_shp, df1, how=\"inner\", left_on=\"id\", right_on=\"gauge_id\")\n",
+ " gauge_shp1[case] = pd.merge(\n",
+ " df2,\n",
+ " riv_ocean[case],\n",
+ " how=\"inner\",\n",
+ " left_on=\"route_id\",\n",
+ " right_on=catch_gpkg[grid][\"id_name\"],\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "------\n",
+ "## 3. plot annual cycle for global oceans \n",
+ "\n",
+ "TODO: Referece flow plot should be independent from cases (network). Currently the last case plotted looks better matched with reference flow. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%time\n",
+ "\n",
+ "nrows = 4\n",
+ "ncols = 2\n",
+ "fig, axes = plt.subplots(nrows, ncols, figsize=(7.25, 6.5))\n",
+ "plt.subplots_adjust(\n",
+ " top=0.95, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n",
+ ") # create some space below the plots by increasing the bottom-value\n",
+ "\n",
+ "for ix, ocean_name in enumerate(oceans_list):\n",
+ " row = ix // 2\n",
+ " col = ix % 2\n",
+ " for case, grid in zip(case_list, grid_list):\n",
+ "\n",
+ " q_name = case_meta[grid][\"flow_name\"]\n",
+ "\n",
+ " if case_meta[grid][\"network_type\"] == \"vector\":\n",
+ " if ocean_name == \"global\":\n",
+ " id_list = gauge_shp1[case][\"route_id\"].values\n",
+ " else:\n",
+ " id_list = gauge_shp1[case][gauge_shp1[case][\"ocean\"] == ocean_name][\n",
+ " \"route_id\"\n",
+ " ].values\n",
+ " reach_index = get_index_array(reachID[case], id_list)\n",
+ " dr_flow = seas_data[case][q_name].isel(seg=reach_index).sum(dim=\"seg\")\n",
+ " dr_flow.plot(ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case)\n",
+ "\n",
+ " elif case_meta[grid_name][\"network_type\"] == \"grid\": # means 2d grid\n",
+ " if ocean_name == \"global\":\n",
+ " id_list = gauge_shp1[case][\"route_id\"].values\n",
+ " else:\n",
+ " id_list = gauge_shp1[case][gauge_shp1[case][\"ocean\"] == ocean_name][\n",
+ " \"route_id\"\n",
+ " ].values\n",
+ "\n",
+ " reach_index = get_index_array(reachID[case], id_list)\n",
+ " seas_data_vector = seas_data[case][q_name].stack(seg=(\"lat\", \"lon\"))\n",
+ " dr_flow = seas_data_vector.isel(seg=reach_index).sum(dim=\"seg\")\n",
+ " dr_flow.plot(ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case)\n",
+ "\n",
+ " # reference data\n",
+ " if obs_available:\n",
+ " if ocean_name == \"global\":\n",
+ " id_list = gauge_shp1[case][\"id\"].values\n",
+ " else:\n",
+ " id_list = gauge_shp1[case][gauge_shp1[case][\"ocean\"] == ocean_name][\n",
+ " \"id\"\n",
+ " ].values\n",
+ " gauge_index = get_index_array(ds_q[\"id\"].values, id_list)\n",
+ " dr_obs = dr_q_obs_seasonal.isel(station=gauge_index).sum(dim=\"station\")\n",
+ " dr_obs.plot(\n",
+ " ax=axes[row, col],\n",
+ " linestyle=\"None\",\n",
+ " marker=\"o\",\n",
+ " markersize=2,\n",
+ " c=\"k\",\n",
+ " label=\"D19\",\n",
+ " )\n",
+ "\n",
+ " axes[row, col].set_title(\"%d %s\" % (ix + 1, ocean_name), fontsize=9)\n",
+ " axes[row, col].set_xlabel(\"\")\n",
+ " if row < 7:\n",
+ " axes[row, col].set_xticklabels(\"\")\n",
+ " if col == 0:\n",
+ " axes[row, col].set_ylabel(\"Mon. flow [m$^3$/s]\", fontsize=9)\n",
+ " else:\n",
+ " axes[row, col].set_ylabel(\"\")\n",
+ " axes[row, col].tick_params(\"both\", labelsize=\"x-small\")\n",
+ "\n",
+ "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n",
+ "axes[row, col].legend(\n",
+ " loc=\"center left\", bbox_to_anchor=(1.10, 0.40, 0.75, 0.1), ncol=1, fontsize=\"small\"\n",
+ ")\n",
+ "\n",
+ "for jx in range(ix + 1, nrows * ncols):\n",
+ " row = jx // 2\n",
+ " col = jx % 2\n",
+ " fig.delaxes(axes[row][col])\n",
+ "\n",
+ "if figureSave:\n",
+ " plt.savefig(f\"./NB2_Fig1_ocean_discharge_season_{analysis_name}.png\", dpi=200)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "cupid-analysis",
+ "language": "python",
+ "name": "cupid-analysis"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/nblibrary/rof/scripts/colors.py b/examples/nblibrary/rof/scripts/colors.py
new file mode 100644
index 0000000..71c5619
--- /dev/null
+++ b/examples/nblibrary/rof/scripts/colors.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+import matplotlib as mpl
+from matplotlib.colors import LinearSegmentedColormap
+
+# create colormaps
+# ---------------
+cmap_RedGrayBlue = LinearSegmentedColormap.from_list(
+ "custom blue",
+ [
+ (0, "xkcd:red"),
+ (0.05, "xkcd:orange"),
+ (0.50, "xkcd:light grey"),
+ (0.65, "xkcd:sky blue"),
+ (1, "xkcd:blue"),
+ ],
+ N=15,
+)
+
+# %bias
+vals_pbias = [-50, -40, -30, -20, -10, 10, 0.2, 30, 40, 50]
+cmap_pbias = LinearSegmentedColormap.from_list(
+ "custom1",
+ [(0.0, "xkcd:red"), (0.5, "xkcd:light grey"), (1.0, "xkcd:blue")],
+ N=11,
+)
+cmap_pbias.set_over("xkcd:dark blue")
+cmap_pbias.set_under("xkcd:dark red")
+norm_pbias = mpl.colors.BoundaryNorm(vals_pbias, cmap_pbias.N)
+
+
+# corr
+vals_corr = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+cmap_corr = LinearSegmentedColormap.from_list(
+ "custom1",
+ [(0.00, "xkcd:yellow"), (0.50, "xkcd:green"), (1.00, "xkcd:blue")],
+ N=9,
+)
+cmap_corr.set_over("xkcd:dark blue")
+cmap_corr.set_under("xkcd:brown")
+norm_corr = mpl.colors.BoundaryNorm(vals_corr, cmap_corr.N)
diff --git a/examples/nblibrary/rof/scripts/metrics.py b/examples/nblibrary/rof/scripts/metrics.py
new file mode 100644
index 0000000..263c28d
--- /dev/null
+++ b/examples/nblibrary/rof/scripts/metrics.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+import numpy as np
+
+# ----- time series error
+# There are many other error metrics (e.g., KGE, NSE, ame, etc.) as well as
+# many flow metrics (Flow duration curve, annual maximmum flow and timing etcs.)
+# available.
+
+
+def remove_nan(qsim, qobs):
+ """
+ Compare sim array and obs array time series,
+ Then remove both sim and obs at times if either or both sim and obs have NaN
+
+ Arguments
+ ---------
+ qsim: array-like
+ Simulated time series array.
+ qobs: array-like
+ Observed time series array.
+
+ Returns
+ -------
+ sim_obs: float
+ simulation and observation pair with nan removed
+ """
+
+ sim_obs = np.stack((qsim, qobs), axis=1)
+ sim_obs = sim_obs[~np.isnan(sim_obs).any(axis=1), :]
+ return sim_obs[:, 0], sim_obs[:, 1]
+
+
+def pbias(qsim, qobs):
+ """
+ Calculates percentage bias between two flow arrays.
+
+ Arguments
+ ---------
+ sim: array-like
+ Simulated time series array.
+ obs: array-like
+ Observed time series array.
+
+ Returns
+ -------
+ pbial: float
+ percentage bias calculated between the two arrays.
+ """
+
+ qsim1, qobs1 = remove_nan(qsim, qobs)
+ return np.sum(qsim1 - qobs1) / np.sum(qobs1) * 100
+
+
+def rmse(qsim, qobs):
+ """
+ Calculates root mean squared of error (rmse) between two time series arrays.
+ Arguments
+ ---------
+ sim: array-like
+ Simulated time series array.
+ obs: array-like
+ Observed time series array.
+
+ Returns
+ -------
+ rmse: float
+ rmse calculated between the two arrays.
+ """
+ qsim1, qobs1 = remove_nan(qsim, qobs)
+ return np.sqrt(np.mean((qsim1 - qobs1) ** 2))
diff --git a/examples/nblibrary/rof/scripts/utility.py b/examples/nblibrary/rof/scripts/utility.py
new file mode 100644
index 0000000..bb13d19
--- /dev/null
+++ b/examples/nblibrary/rof/scripts/utility.py
@@ -0,0 +1,88 @@
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+import yaml
+from pyogrio import read_dataframe
+
+
+# in case toml module may not be available
+try:
+ import tomli
+
+ def load_toml(toml_file) -> dict:
+ """Load TOML data from file"""
+ with open(toml_file, "rb") as f:
+ return tomli.load(f)
+
+except ImportError:
+ pass # or anything to log
+
+
+def load_yaml(yaml_file) -> dict:
+ """Load yaml data from file"""
+ with open(yaml_file) as ymlfile:
+ return yaml.load(ymlfile, Loader=yaml.FullLoader)
+
+
+class AutoVivification(dict):
+ """Implementation of perl's autovivification feature."""
+
+ def __getitem__(self, item):
+ try:
+ return dict.__getitem__(self, item)
+ except KeyError:
+ value = self[item] = type(self)()
+ return value
+
+
+def read_shps(shp_list, usecols, **kwargs):
+ """Faster load shapefiles with selected attributes in dataframe"""
+ gdf_frame = []
+ for shp in shp_list:
+ gdf_frame.append(read_dataframe(shp, columns=usecols))
+ print("Finished reading %s" % shp.strip("\n"))
+ return pd.concat(gdf_frame)
+
+
+def get_index_array(a_array, b_array):
+ """
+ Get index array where each index points to locataion in a_array. The order of index array corresponds to b_array
+
+ e.g.,
+ a_array = [2, 4, 1, 8, 3, 10, 5, 9, 7, 6]
+ b_array = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ result = [2, 0, 4, 1, 6, 9, 8, 3, 7, 5]
+
+ https://stackoverflow.com/questions/8251541/numpy-for-every-element-in-one-array-find-the-index-in-another-array
+ """
+ index = np.argsort(a_array)
+ sorted_a_array = a_array[index]
+ sorted_index = np.searchsorted(sorted_a_array, b_array)
+
+ yindex = np.take(index, sorted_index, mode="clip")
+ mask = a_array[yindex] != b_array
+
+ result = np.ma.array(yindex, mask=mask)
+
+ return result
+
+
+def reorder_index(ID_array_orig, ID_array_target):
+ x = ID_array_orig
+ # Find the indices of the reordered array. See link for more details:
+ # https://stackoverflow.com/questions/8251541/numpy-for-every-element-in-one-array-find-the-index-in-another-array
+ index = np.argsort(x)
+ sorted_x = x[index]
+ sorted_index = np.searchsorted(sorted_x, ID_array_target)
+
+ return np.take(index, sorted_index, mode="clip")
+
+
+def no_time_variable(ds):
+ vars_without_time = []
+ for var in ds.variables:
+ if "time" not in ds[var].dims:
+ if var not in list(ds.coords):
+ vars_without_time.append(var)
+ return vars_without_time
diff --git a/examples/nblibrary/rof/setup/large_river_50.txt b/examples/nblibrary/rof/setup/large_river_50.txt
new file mode 100644
index 0000000..b2c777a
--- /dev/null
+++ b/examples/nblibrary/rof/setup/large_river_50.txt
@@ -0,0 +1,51 @@
+site_id,river_name
+81168,Amazon
+9894,Congo
+9910,Orinoco
+9306,Changjiang
+9270,Brahmaputra
+9598,Mississippi
+90973,Yenisey
+35021,Parana
+91050,Lena
+9903,Mekong
+81329,Tocantins
+91082,Ob
+9227,Ganges
+9900,Irrawaddy
+9550,St Lawrence
+90271,Amur
+10224,Mackenzie
+9301,Xijiang
+9560,Columbia
+9400,Magdalena
+9370,Uruguay
+10623,Yukon
+9763,Danube
+9052,Niger
+81217,Xingu
+9403,Atrato
+81192,Tapajos
+9038,Ogoou
+9360,Essequibo
+9504,Fraser
+91103,Pechora
+9483,Nelson
+95213,Khatanga
+9736,Sepik
+95067,Kolyma
+9114,Zambeze
+91128,Severnaya Dvina
+9275,Indus
+9079,Sanaga
+9226,Godavari
+10006,Rajang
+81748,Sao Francisco
+9626,Usumacinta
+9366,Maroni
+9909,Rhine
+9733,Purari
+9459,Caniapiscau
+9232,Mahanadi
+9558,Sacramento
+9789,Elbe
diff --git a/examples/nblibrary/rof/setup/setup.yaml b/examples/nblibrary/rof/setup/setup.yaml
new file mode 100644
index 0000000..32a0b46
--- /dev/null
+++ b/examples/nblibrary/rof/setup/setup.yaml
@@ -0,0 +1,145 @@
+## Configurations used for ctsm-mizuRoute
+
+# Directories
+geospatial_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/rof_data/geospatial
+ancillary_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/rof_data/ancillary_data
+ref_flow_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/rof_data/obs
+
+# case meta dictionary data (TODO: consider removing color)
+# grid_name -> name of grid
+# network_name -> name of river network
+# model -> MOSART or mizuroute
+# network_type -> grid or vector
+# vars_read -> history variables read in
+# flow_name -> routed runoff
+# runoff_name -> instantaneous runoff from land model
+# color -> color used for plotting
+case_meta:
+ rHDMAlk:
+ network: HDMA_lake
+ model: mizuroute
+ network_type: vector
+ domain_nc: None
+ vars_read:
+ - DWroutedRunoff
+ - basRunoff
+ flow_name: DWroutedRunoff
+ runoff_name: basRunoff
+ color: 'xkcd:red'
+ rHDMAlk_h06:
+ network: HDMA_lake_h06
+ model: mizuroute
+ network_type: vector
+ domain_nc: None
+ vars_read:
+ - DWroutedRunoff
+ - basRunoff
+ flow_name: DWroutedRunoff
+ runoff_name: basRunoff
+ color: 'xkcd:green'
+ rHDMA:
+ network: HDMA
+ model: mizuroute
+ network_type: vector
+ domain_nc: None
+ vars_read:
+ - DWroutedRunoff
+ - basRunoff
+ flow_name: DWroutedRunoff
+ runoff_name: basRunoff
+ color: 'xkcd:blue'
+ rMERIT:
+ network: MERIT_Hydro
+ model: mizuroute
+ network_type: vector
+ domain_nc: None
+ vars_read:
+ - DWroutedRunoff
+ - basRunoff
+ flow_name: DWroutedRunoff
+ runoff_name: basRunoff
+ color: 'xkcd:green'
+ f09_f09:
+ network: mosart
+ model: mizuroute
+ network_type: vector
+ domain_nc: None
+ vars_read:
+ - DWroutedRunoff
+ - basRunoff
+ flow_name: DWroutedRunoff
+ runoff_name: basRunoff
+ color: 'xkcd:green'
+ f09_f09_mosart:
+ network: mosart
+ model: mosart
+ network_type: grid
+ domain_nc: mosart0.5_domain.nc
+ vars_read:
+ - RIVER_DISCHARGE_OVER_LAND_LIQ
+ - RIVER_DISCHARGE_OVER_LAND_ICE
+ flow_name: RIVER_DISCHARGE_OVER_LAND_LIQ
+ runoff_name: None
+ color: 'xkcd:grey'
+
+# riiver line geopackage metadata
+# sub-key is network name
+reach_gpkg:
+ rHDMA:
+ file_name: hdma_global_stream.gpkg
+ id_name: seg_id
+ down_id_name: Tosegment
+ rHDMAlk:
+ file_name: hdma_global_stream.gpkg
+ id_name: seg_id
+ down_id_name: Tosegment
+ rMERIT:
+ file_name: rivEndMERIT_simplified_0003.gpkg
+ id_name: COMID
+ down_id_name: NextDownID
+ f09_f09_mosart:
+ file_name: MOSART_routing_Global_0.5x0.5_c170601_flowline.gpkg
+ id_name: seg_id
+ down_id_name: Tosegment
+ f09_f09:
+ file_name: MOSART_routing_Global_0.5x0.5_c170601_flowline.gpkg
+ id_name: seg_id
+ down_id_name: Tosegment
+
+# catchment geopackage metadata
+# sub-key is network name
+catch_gpkg:
+ rHDMA:
+ file_name: hdma_global_catch_v2_0.01.gpkg
+ id_name: hruid
+ rHDMAlk:
+ file_name: hdma_hydrolake_global_catch_v1_0.01.gpkg
+ id_name: ID
+ rMERIT:
+ file_name: catEndoMERIT.gpkg
+ id_name: hruid
+ f09_f09_mosart:
+ file_name: MOSART_routing_Global_0.5x0.5_c170601_hru.gpkg
+ id_name: hruid
+ f09_f09:
+ file_name: MOSART_routing_Global_0.5x0.5_c170601_hru.gpkg
+ id_name: hruid
+
+# river network data (mizuRoute format, augumented one)
+# sub-key is network name
+river_network:
+ rHDMA:
+ file_name: ntopo_HDMA.v2.aug.nc
+ reach_ID: seg_id
+ rHDMAlk:
+ file_name: Network_topology_HDMA_HydroLake_v3.aug.nc
+ reach_ID: seg_id
+ rMERIT:
+ file_name: ntopo_MERIT_Hydro_v1.aug.nc
+ reach_ID: seg_id
+ f09_f09_mosart:
+ file_name: mizuRoute_MOSART_routing_Global_0.5x0.5_c170601.aug.nc
+ reach_ID: seg_id
+ f09_f09:
+ file_name: mizuRoute_MOSART_routing_Global_0.5x0.5_c170601.aug.nc
+ reach_ID: seg_id