diff --git a/README.md b/README.md
index 118b6b8..56ca5b9 100644
--- a/README.md
+++ b/README.md
@@ -100,6 +100,7 @@ Options:
   -lnd, --land        Run land component diagnostics
   -ice, --seaice      Run sea ice component diagnostics
   -glc, --landice     Run land ice component diagnostics
+  -rof, --river-runoff Run river runoff component diagnostics
   --config_path       Path to the YAML configuration file containing specifications for notebooks (default config.yml)
   -h, --help          Show this message and exit.
 ```
diff --git a/cupid/run.py b/cupid/run.py
index d1e578a..d791669 100755
--- a/cupid/run.py
+++ b/cupid/run.py
@@ -17,6 +17,7 @@
   -lnd, --land        Run land component diagnostics
   -ice, --seaice      Run sea ice component diagnostics
   -glc, --landice     Run land ice component diagnostics
+  -rof, --river-runoff Run river runoff component diagnostics
   -config_path        Path to the YAML configuration file containing specifications for notebooks (default: config.yml)
   -h, --help          Show this message and exit.
 """
@@ -46,6 +47,7 @@
 @click.option("--land", "-lnd", is_flag=True, help="Run land component diagnostics")
 @click.option("--seaice", "-ice", is_flag=True, help="Run sea ice component diagnostics")
 @click.option("--landice", "-glc", is_flag=True, help="Run land ice component diagnostics")
+@click.option("--river-runoff", "-rof", is_flag=True, help="Run river runoff component diagnostics")
 @click.argument("config_path", default="config.yml")
 def run(
     config_path,
@@ -57,6 +59,7 @@ def run(
     land=False,
     seaice=False,
     landice=False,
+    river_runoff=False,
 ):
     """
     Main engine to set up running all the notebooks.
@@ -81,11 +84,12 @@ def run(
         "lnd": land,
         "ice": seaice,
         "glc": landice,
+        "rof": river_runoff,
     }
 
     # Automatically run all if no components specified
 
-    if True not in [atmosphere, ocean, land, seaice, landice]:
+    if True not in [atmosphere, ocean, land, seaice, landice, river_runoff]:
         all = True
         for key in component_options.keys():
             component_options[key] = True
diff --git a/examples/coupled_model/config.yml b/examples/coupled_model/config.yml
index bf91a81..ac3e83a 100644
--- a/examples/coupled_model/config.yml
+++ b/examples/coupled_model/config.yml
@@ -95,13 +95,21 @@ timeseries:
     end_years: [102]
     level: 'lev'
 
+  rof:
+    vars: []
+    derive_vars: []
+    hist_str: 'initial_hist'
+    start_years: [2]
+    end_years: [102]
+    level: 'lev'
+
 compute_notebooks:
 
   # This is where all the notebooks you want run and their
   # parameters are specified. Several examples of different
   # types of notebooks are provided.
 
-  # The first key (here simple_no_params_nb) is the name of the
+  # The second key (here adf_quick_run) is the name of the
   # notebook from nb_path_root, minus the .ipynb
 
     infrastructure:
diff --git a/examples/key_metrics/config.yml b/examples/key_metrics/config.yml
index 55dabbb..d6eb0a3 100644
--- a/examples/key_metrics/config.yml
+++ b/examples/key_metrics/config.yml
@@ -129,6 +129,24 @@ compute_notebooks:
             obs_name: 'GrIS_MARv3.12_climo_1960_1999.nc'
             climo_nyears: 40
 
+    rof:
+      global_discharge_gauge_compare_obs:
+        parameter_groups:
+          none:
+            analysis_name: ""
+            grid_name: 'f09_f09_mosart' # ROF grid name
+            rof_start_date: '0091-01-01'
+            rof_end_date: '0101-01-01'
+            figureSave: False
+      global_discharge_ocean_compare_obs:
+        parameter_groups:
+          none:
+            analysis_name: ""
+            grid_name: 'f09_f09_mosart' # ROF grid name
+            rof_start_date: '0091-01-01'
+            rof_end_date: '0101-01-01'
+            figureSave: False
+
 #    ice:
 #      seaice:
 #        parameter_groups:
diff --git a/examples/nblibrary/rof/global_discharge_gauge_compare_obs.ipynb b/examples/nblibrary/rof/global_discharge_gauge_compare_obs.ipynb
new file mode 100644
index 0000000..dc64a3c
--- /dev/null
+++ b/examples/nblibrary/rof/global_discharge_gauge_compare_obs.ipynb
@@ -0,0 +1,1217 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "source": [
+    "## ROF global monthly, annual, seasonal flows analysis <a id='top'></a>\n",
+    "\n",
+    "Use the following datasets\n",
+    "\n",
+    "1. reach-D19 gauge link ascii\n",
+    "2. D19 flow site geopackage\n",
+    "3. D19 discharge netCDF\n",
+    "4. history netCD including river discharge\n",
+    "\n",
+    "[1. Setupt](#setup)\n",
+    "\n",
+    "[2. Loading data](#load_data)\n",
+    "\n",
+    "- Read monthly history files from archive. \n",
+    "- Reference data: monthly discharge estimates at 922 big river mouths from Dai et al. 2019 data (D19)\n",
+    "\n",
+    "[3. Large 24 river analysis](#24_large_rivers)\n",
+    "\n",
+    "- Plotting time seriese (annual, seasonal cycle).\n",
+    "- if D19 referece data is available, scatter plots at Large 24 selected rivers against D19 referece data\n",
+    "\n",
+    "[4. Large 50 rivers analysis](#50_large_rivers)\n",
+    "\n",
+    "- Annual flow summary table at large 50 selected rivers.\n",
+    "- if D19 referece data is available, Scatter plot of annual flow against D19 reference data.\n",
+    "\n",
+    "[5. 922 rivers analysis](#922_rivers)\n",
+    "\n",
+    "- run only if reference flow is available\n",
+    "- error statistics (%bias, rmse, correlation) at all 922 river sites.\n",
+    "- plot error statistic on the global map\n",
+    "- plot boxplots including case(s) for each error metric\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "\n",
+    "import os, sys\n",
+    "import glob\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib as mpl\n",
+    "from matplotlib.ticker import FormatStrFormatter\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "import xarray as xr\n",
+    "import cartopy.crs as ccrs\n",
+    "import cartopy.feature as cfeature\n",
+    "from dask.distributed import Client, LocalCluster\n",
+    "\n",
+    "import scripts.colors as colors\n",
+    "import scripts.metrics as metric\n",
+    "from scripts.utility import load_yaml\n",
+    "from scripts.utility import no_time_variable\n",
+    "\n",
+    "rivers_50m = cfeature.NaturalEarthFeature(\"physical\", \"rivers_lake_centerlines\", \"50m\")\n",
+    "land = cfeature.LAND\n",
+    "\n",
+    "print(\"\\nThe Python version: %s.%s.%s\" % sys.version_info[:3])\n",
+    "print(xr.__name__, xr.__version__)\n",
+    "print(pd.__name__, pd.__version__)\n",
+    "print(gpd.__name__, gpd.__version__)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "-------------------------\n",
+    "## 1. Setup <a id='setup'></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# Parameter Defaults\n",
+    "# parameters are set in CUPiD's config.yml file\n",
+    "\n",
+    "CESM_output_dir = \"\"\n",
+    "case_name = None  # case name\n",
+    "base_case_name = None  # base case name\n",
+    "start_date = \"\"\n",
+    "end_date = \"\"\n",
+    "serial = False  # use dask LocalCluster\n",
+    "lc_kwargs = {}\n",
+    "\n",
+    "analysis_name = \"\"  # Used for Figure png names\n",
+    "if analysis_name:\n",
+    "    analysis_name = case_name\n",
+    "rof_start_date = start_date  # specify if different starting yyyy-mm-dd is desired\n",
+    "rof_end_date = end_date  # specify if different ending yyyy-mm-dd is desired\n",
+    "grid_name = \"f09_f09_mosart\"  # ROF grid name used in case\n",
+    "base_grid_name = (\n",
+    "    grid_name  # spcify ROF grid name for base_case in config.yml if different than case\n",
+    ")\n",
+    "figureSave = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "-------------------------\n",
+    "ROF ancillary data specification "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "setup = load_yaml(\"./setup/setup.yaml\")\n",
+    "\n",
+    "ancillary_dir = setup[\n",
+    "    \"ancillary_dir\"\n",
+    "]  # ancillary directory including ROF domain, river network data etc.\n",
+    "ref_flow_dir = setup[\"ref_flow_dir\"]  # including observed or reference flow data\n",
+    "case_meta = setup[\"case_meta\"]  # Case metadata\n",
+    "reach_gpkg = setup[\"reach_gpkg\"]  # river reach geopackage meta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "case_list = [case for case in [case_name, base_case_name] if case is not None]\n",
+    "grid_list = [grid for grid in [grid_name, base_grid_name] if grid is not None]\n",
+    "time_period = slice(f\"{rof_start_date}\", f\"{rof_end_date}\")  # analysis time period\n",
+    "nyrs = int(rof_end_date[:4]) - int(rof_start_date[:4]) + 1  # number of years\n",
+    "nmons = nyrs * 12  # number of months\n",
+    "year_list = [\n",
+    "    \"{:04d}\".format(yr)\n",
+    "    for yr in np.arange(int(rof_start_date[0:4]), int(rof_end_date[0:4]) + 1)\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "-----\n",
+    "### dasks (optional)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Spin up cluster (if running in parallel)\n",
+    "client = None\n",
+    "if not serial:\n",
+    "    cluster = LocalCluster(**lc_kwargs)\n",
+    "    client = Client(cluster)\n",
+    "\n",
+    "client"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Loading data <a id='load_data'></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.1. Monthly/annual flow netCDFs\n",
+    "- month_data (xr dataset) - read from archive\n",
+    "- year_data (xr dataset) - computed from monthly\n",
+    "- seas_data (xr dataset) - computed from monthly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "reachID = {}\n",
+    "month_data = {}\n",
+    "year_data = {}\n",
+    "seas_data = {}\n",
+    "for case, grid in zip(case_list, grid_list):\n",
+    "    in_dire = os.path.join(CESM_output_dir, case, \"rof/hist\")\n",
+    "    model = case_meta[grid][\"model\"]\n",
+    "    domain = case_meta[grid][\"domain_nc\"]\n",
+    "    var_list = case_meta[grid][\"vars_read\"]\n",
+    "\n",
+    "    def preprocess(ds):\n",
+    "        return ds[var_list]\n",
+    "\n",
+    "    # monthly\n",
+    "    nc_list = []\n",
+    "    for nc_path in sorted(glob.glob(f\"{in_dire}/{case}.{model}.h*.????-*.nc\")):\n",
+    "        for yr in year_list:\n",
+    "            if yr in os.path.basename(nc_path):\n",
+    "                nc_list.append(nc_path)\n",
+    "\n",
+    "    month_data[case] = (\n",
+    "        xr.open_mfdataset(\n",
+    "            nc_list,\n",
+    "            data_vars=\"minimal\",\n",
+    "            parallel=True,\n",
+    "            preprocess=preprocess,\n",
+    "        )\n",
+    "        .sel(time=time_period)\n",
+    "        .load()\n",
+    "    )\n",
+    "    # annual\n",
+    "    year_data[case] = month_data[case].resample(time=\"YS\").mean(dim=\"time\")\n",
+    "\n",
+    "    # seasonal (compute here instead of reading for conisistent analysis period)\n",
+    "    seas_data[case] = month_data[case].groupby(\"time.month\").mean(\"time\")\n",
+    "    vars_no_time = no_time_variable(month_data[case])\n",
+    "    if vars_no_time:\n",
+    "        seas_data[case][vars_no_time] = seas_data[case][vars_no_time].isel(\n",
+    "            month=0, drop=True\n",
+    "        )\n",
+    "    time = month_data[case][\"time\"]\n",
+    "    if domain == \"None\":\n",
+    "        reachID[case] = month_data[case][\"reachID\"].values\n",
+    "    else:\n",
+    "        reachID[case] = xr.open_dataset(f\"{ancillary_dir}/{domain}\")[\"reachID\"].values\n",
+    "    print(f\"Finished loading {case}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.2 Large river ID and name ascii\n",
+    "- big_river_50: dictionary {_site_id_:_river name_}\n",
+    "- big_river_24: dictionary {_site_id_:_river name_}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"./setup/large_river_50.txt\", index_col=\"river_name\")\n",
+    "big_river_50 = {key: values[\"site_id\"] for key, values in df.iterrows()}\n",
+    "big_river_24 = {\n",
+    "    key: values[\"site_id\"] for ix, (key, values) in enumerate(df.iterrows()) if ix < 24\n",
+    "}  # The first 24 is used for plots"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.3. reach-D19 gauge link csv\n",
+    "- gauge_reach_lnk (dataframe)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gauge_reach_lnk = {}\n",
+    "for case, grid in zip(case_list, grid_list):\n",
+    "    gauge_reach_lnk[case] = pd.read_csv(\n",
+    "        \"%s/D09/D09_925.%s.asc\" % (ref_flow_dir, case_meta[grid][\"network\"])\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.4 D19 flow site shapefile\n",
+    "- gauge_shp (dataframe)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "gauge_shp = gpd.read_file(\n",
+    "    os.path.join(ref_flow_dir, \"D09\", \"geospatial\", \"D09_925.gpkg\")\n",
+    ")\n",
+    "gauge_shp = gauge_shp[gauge_shp[\"id\"] != 9999999]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.5 D19 discharge data\n",
+    "- ds_q_obs_mon (xr datasets)\n",
+    "- ds_q_obs_yr (xr datasets)\n",
+    "- dr_q_obs_seasonal (xr datasets)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "# read monthly data\n",
+    "ds_q = xr.open_dataset(\n",
+    "    \"%s/D09/coastal-stns-Vol-monthly.updated-May2019.mod.nc\" % (ref_flow_dir),\n",
+    "    decode_times=False,\n",
+    ")\n",
+    "ds_q[\"time\"] = xr.cftime_range(\n",
+    "    start=\"1900-01-01\", end=\"2018-12-01\", freq=\"MS\", calendar=\"standard\"\n",
+    ")\n",
+    "\n",
+    "# monthly - if time_period is outside observation period, use the entire obs period\n",
+    "obs_available = True\n",
+    "if ds_q[\"time\"].sel(time=time_period).values.size == 0:\n",
+    "    obs_available = False\n",
+    "    ds_q_obs_mon = ds_q[\"FLOW\"]\n",
+    "else:\n",
+    "    ds_q_obs_mon = ds_q[\"FLOW\"].sel(time=time_period)\n",
+    "# compute annual flow from monthly\n",
+    "ds_q_obs_yr = ds_q_obs_mon.resample(time=\"YE\").mean(dim=\"time\")\n",
+    "# compute annual cycle at monthly scale\n",
+    "dr_q_obs_seasonal = ds_q_obs_mon.groupby(\"time.month\").mean(\"time\")\n",
+    "# annual mean, max, and min flow during time_period\n",
+    "ds_q_obs_yr_min = ds_q_obs_yr.min(\"time\")\n",
+    "ds_q_obs_yr_max = ds_q_obs_yr.max(\"time\")\n",
+    "ds_q_obs_yr_mean = ds_q_obs_yr.mean(\"time\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.6 Get indices in observation and simulation for gauge name (processing)\n",
+    "- gauge_plot (dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "gauge_plot = {}\n",
+    "for gname, site_id in big_river_50.items():\n",
+    "    gauge_plot[gname] = {}\n",
+    "    for case in case_list:\n",
+    "        gauge_ix = [\n",
+    "            i for i, gid in enumerate(ds_q.id.values) if gid == site_id\n",
+    "        ]  # go through obs Dataset and get index matching to river (gauge) name\n",
+    "        gauge_id = ds_q.id.values[gauge_ix][0]  ## guage ID\n",
+    "        seg_id = (\n",
+    "            gauge_reach_lnk[case]\n",
+    "            .loc[gauge_reach_lnk[case][\"gauge_id\"] == gauge_id][\"route_id\"]\n",
+    "            .values\n",
+    "        )  # matching reach ID in river network\n",
+    "        seg_ix = np.argwhere(\n",
+    "            reachID[case] == seg_id\n",
+    "        )  # matching reach index in river network\n",
+    "        if len(seg_ix) == 0:\n",
+    "            seg_ix = -999\n",
+    "        else:\n",
+    "            seg_ix = seg_ix[0]\n",
+    "        gauge_plot[gname][case] = [gauge_ix, seg_ix, seg_id]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "------\n",
+    "## 3. Analysis for 24 large rivers <a id='24_large_rivers'></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.1 Annual flow series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "fig, axes = plt.subplots(8, 3, figsize=(7.25, 11.5))\n",
+    "plt.subplots_adjust(\n",
+    "    top=0.975, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n",
+    ")  # create some space below the plots by increasing the bottom-value\n",
+    "\n",
+    "for ix, river_name in enumerate(big_river_24.keys()):\n",
+    "    row = ix // 3\n",
+    "    col = ix % 3\n",
+    "    for case, grid in zip(case_list, grid_list):\n",
+    "        net_idx = gauge_plot[river_name][case][1]\n",
+    "        gaug_idx = gauge_plot[river_name][case][0]\n",
+    "\n",
+    "        q_name = case_meta[grid][\"flow_name\"]\n",
+    "\n",
+    "        if len(net_idx) == 1:\n",
+    "            year_data[case][q_name][:, net_idx].plot(\n",
+    "                ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n",
+    "            )\n",
+    "        elif len(net_idx) == 2:  # means 2d grid\n",
+    "            year_data[case][q_name][:, net_idx[0], net_idx[1]].plot(\n",
+    "                ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n",
+    "            )\n",
+    "    if obs_available:\n",
+    "        ds_q_obs_yr.loc[:, gaug_idx].plot(\n",
+    "            ax=axes[row, col],\n",
+    "            linestyle=\"None\",\n",
+    "            marker=\"o\",\n",
+    "            markersize=3,\n",
+    "            c=\"k\",\n",
+    "            label=\"D19\",\n",
+    "        )\n",
+    "    else:\n",
+    "        qob_mean = ds_q_obs_yr_mean.loc[gaug_idx]\n",
+    "        qob_max = ds_q_obs_yr_max.loc[gaug_idx]\n",
+    "        qob_min = ds_q_obs_yr_min.loc[gaug_idx]\n",
+    "        axes[row, col].axhline(\n",
+    "            y=qob_mean, color=\"k\", linestyle=\"--\", lw=0.5, label=\"D19 mean (1900-2018)\"\n",
+    "        )\n",
+    "        axes[row, col].axhspan(\n",
+    "            qob_min[0],\n",
+    "            qob_max[0],\n",
+    "            color=\"gray\",\n",
+    "            alpha=0.1,\n",
+    "            label=\"D19 min-max (1900-2018)\",\n",
+    "        )\n",
+    "\n",
+    "    axes[row, col].set_title(\"%d %s\" % (ix + 1, river_name), fontsize=8)\n",
+    "\n",
+    "    axes[row, col].set_xlabel(\"\")\n",
+    "    if row < 7:\n",
+    "        axes[row, col].set_xticklabels(\"\")\n",
+    "    if col == 0:\n",
+    "        axes[row, col].set_ylabel(\"Annual flow [m$^3$/s]\", fontsize=8)\n",
+    "    else:\n",
+    "        axes[row, col].set_ylabel(\"\")\n",
+    "    axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n",
+    "\n",
+    "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n",
+    "axes.flatten()[-2].legend(\n",
+    "    loc=\"upper center\",\n",
+    "    bbox_to_anchor=(0.125, -0.35, 0.75, 0.1),\n",
+    "    ncol=5,\n",
+    "    fontsize=\"x-small\",\n",
+    ")\n",
+    "\n",
+    "if figureSave:\n",
+    "    plt.savefig(f\"./NB1_Fig1_big_river_annual_{analysis_name}.png\", dpi=200)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.2. Annual cycle at monthly step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "fig, axes = plt.subplots(8, 3, figsize=(7.25, 11.5))\n",
+    "plt.subplots_adjust(\n",
+    "    top=0.975, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n",
+    ")  # create some space below the plots by increasing the bottom-value\n",
+    "\n",
+    "for ix, river_name in enumerate(big_river_24.keys()):\n",
+    "    row = ix // 3\n",
+    "    col = ix % 3\n",
+    "    for case, grid in zip(case_list, grid_list):\n",
+    "\n",
+    "        net_idx = gauge_plot[river_name][case][1]\n",
+    "        gaug_idx = gauge_plot[river_name][case][0]\n",
+    "\n",
+    "        q_name = case_meta[grid][\"flow_name\"]\n",
+    "\n",
+    "        if len(net_idx) == 1:  # means vector\n",
+    "            seas_data[case][q_name][:, net_idx].plot(\n",
+    "                ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n",
+    "            )\n",
+    "        elif len(net_idx) == 2:  # means 2d grid\n",
+    "            seas_data[case][q_name][:, net_idx[0], net_idx[1]].plot(\n",
+    "                ax=axes[row, col], linestyle=\"-\", lw=1.0, label=case\n",
+    "            )\n",
+    "\n",
+    "    if obs_available:\n",
+    "        dr_q_obs_seasonal.loc[:, gaug_idx].plot(\n",
+    "            ax=axes[row, col],\n",
+    "            linestyle=\":\",\n",
+    "            lw=0.5,\n",
+    "            marker=\"o\",\n",
+    "            markersize=2,\n",
+    "            c=\"k\",\n",
+    "            label=\"D19\",\n",
+    "        )\n",
+    "\n",
+    "    axes[row, col].set_title(\"%d %s\" % (ix + 1, river_name), fontsize=8)\n",
+    "    axes[row, col].set_xlabel(\"\")\n",
+    "    if row < 7:\n",
+    "        axes[row, col].set_xticklabels(\"\")\n",
+    "    if col == 0:\n",
+    "        axes[row, col].set_ylabel(\"Mon. flow [m$^3$/s]\", fontsize=8)\n",
+    "    else:\n",
+    "        axes[row, col].set_ylabel(\"\")\n",
+    "    axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n",
+    "\n",
+    "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n",
+    "axes.flatten()[-2].legend(\n",
+    "    loc=\"upper center\",\n",
+    "    bbox_to_anchor=(0.10, -0.30, 0.75, 0.1),\n",
+    "    ncol=5,\n",
+    "    fontsize=\"x-small\",\n",
+    ")\n",
+    "\n",
+    "if figureSave:\n",
+    "    plt.savefig(f\"./NB1_Fig2_big_river_season_{analysis_name}.png\", dpi=200)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.3. scatter plots of monthly flow - obs vs sim"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "if obs_available:\n",
+    "    # Monthly flow scatter plot\n",
+    "    fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n",
+    "    plt.subplots_adjust(\n",
+    "        top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n",
+    "    )\n",
+    "\n",
+    "    for ix, river_name in enumerate(big_river_24.keys()):\n",
+    "        row = ix // 3\n",
+    "        col = ix % 3\n",
+    "        axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n",
+    "\n",
+    "        for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+    "\n",
+    "            net_idx = gauge_plot[river_name][case][1]\n",
+    "            gaug_idx = gauge_plot[river_name][case][0]\n",
+    "\n",
+    "            q_name = case_meta[grid][\"flow_name\"]\n",
+    "\n",
+    "            if len(net_idx) == 1:  # means vector\n",
+    "                ds_sim = month_data[case][q_name][:, net_idx].sel(time=time_period)\n",
+    "            elif len(net_idx) == 2:  # means 2d grid\n",
+    "                ds_sim = (\n",
+    "                    month_data[case][q_name][:, net_idx[0], net_idx[1]]\n",
+    "                    .sel(time=time_period)\n",
+    "                    .squeeze()\n",
+    "                )\n",
+    "\n",
+    "            ds_obs = ds_q_obs_mon.sel(time=time_period).loc[:, gaug_idx]\n",
+    "\n",
+    "            axes[row, col].plot(\n",
+    "                ds_obs, ds_sim, \"o\", label=case, markersize=4.0, alpha=0.4\n",
+    "            )\n",
+    "            if jx == 0:\n",
+    "                max_val = np.max(ds_obs)\n",
+    "                min_val = np.min(ds_obs)\n",
+    "            else:\n",
+    "                max_val = np.max([np.max(ds_sim), max_val])\n",
+    "                min_val = np.min([np.min(ds_sim), min_val])\n",
+    "\n",
+    "        axes[row, col].plot(\n",
+    "            [min_val * 0.98, max_val * 1.02],\n",
+    "            [min_val * 0.98, max_val * 1.02],\n",
+    "            \":k\",\n",
+    "            linewidth=0.5,\n",
+    "        )\n",
+    "\n",
+    "        axes[row, col].annotate(\n",
+    "            \"%d %s\" % (ix + 1, river_name),\n",
+    "            xy=(0.05, 0.875),\n",
+    "            xycoords=\"axes fraction\",\n",
+    "            fontsize=8,\n",
+    "            bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n",
+    "        )\n",
+    "        if row == 7 and col == 1:\n",
+    "            axes[row, col].set_xlabel(\n",
+    "                \"Monthly reference discharge [m$^3$/s]\", fontsize=9\n",
+    "            )\n",
+    "        else:\n",
+    "            axes[row, col].set_xlabel(\"\")\n",
+    "\n",
+    "        if col == 0 and row == 5:\n",
+    "            axes[row, col].set_ylabel(\n",
+    "                \"Monthly simulated discharge [m$^3$/s]\", fontsize=10\n",
+    "            )\n",
+    "        else:\n",
+    "            axes[row, col].set_ylabel(\"\")\n",
+    "        axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n",
+    "\n",
+    "    axes.flatten()[-2].legend(\n",
+    "        loc=\"upper center\",\n",
+    "        bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n",
+    "        ncol=5,\n",
+    "        fontsize=\"x-small\",\n",
+    "    )\n",
+    "\n",
+    "    if figureSave:\n",
+    "        plt.savefig(f\"./NB1_Fig3_big_river_month_scatter_{analysis_name}.png\", dpi=200)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.4. scatter plots of annual flow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "if obs_available:\n",
+    "    # annual flow scatter plot\n",
+    "    fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n",
+    "    plt.subplots_adjust(\n",
+    "        top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n",
+    "    )\n",
+    "\n",
+    "    for ix, river_name in enumerate(big_river_24.keys()):\n",
+    "        row = ix // 3\n",
+    "        col = ix % 3\n",
+    "        axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n",
+    "\n",
+    "        for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+    "\n",
+    "            net_idx = gauge_plot[river_name][case][1]\n",
+    "            gaug_idx = gauge_plot[river_name][case][0]\n",
+    "\n",
+    "            q_name = case_meta[grid][\"flow_name\"]\n",
+    "\n",
+    "            if len(net_idx) == 1:  # means vector\n",
+    "                ds_sim = year_data[case][q_name][:, net_idx].sel(time=time_period)\n",
+    "            elif len(net_idx) == 2:  # means 2d grid\n",
+    "                ds_sim = year_data[case][q_name][:, net_idx[0], net_idx[1]].sel(\n",
+    "                    time=time_period\n",
+    "                )\n",
+    "\n",
+    "            ds_obs = ds_q_obs_yr.sel(time=time_period).loc[:, gaug_idx]\n",
+    "\n",
+    "            axes[row, col].plot(\n",
+    "                ds_obs, ds_sim, \"o\", label=case, markersize=4.0, alpha=0.4\n",
+    "            )\n",
+    "            if jx == 0:\n",
+    "                max_val = np.max(ds_obs)\n",
+    "                min_val = np.min(ds_obs)\n",
+    "            else:\n",
+    "                max_val = np.max([np.max(ds_sim), max_val])\n",
+    "                min_val = np.min([np.min(ds_sim), min_val])\n",
+    "\n",
+    "        axes[row, col].plot(\n",
+    "            [min_val * 0.98, max_val * 1.02],\n",
+    "            [min_val * 0.98, max_val * 1.02],\n",
+    "            \":k\",\n",
+    "            linewidth=0.5,\n",
+    "        )\n",
+    "\n",
+    "        axes[row, col].annotate(\n",
+    "            \"%d %s\" % (ix + 1, river_name),\n",
+    "            xy=(0.05, 0.875),\n",
+    "            xycoords=\"axes fraction\",\n",
+    "            fontsize=8,\n",
+    "            bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n",
+    "        )\n",
+    "        if row == 7 and col == 1:\n",
+    "            axes[row, col].set_xlabel(\n",
+    "                \"Monthly reference discharge [m$^3$/s]\", fontsize=9\n",
+    "            )\n",
+    "        else:\n",
+    "            axes[row, col].set_xlabel(\"\")\n",
+    "\n",
+    "        if col == 0 and row == 5:\n",
+    "            axes[row, col].set_ylabel(\n",
+    "                \"Monthly simulated discharge [m$^3$/s]\", fontsize=10\n",
+    "            )\n",
+    "        else:\n",
+    "            axes[row, col].set_ylabel(\"\")\n",
+    "        axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n",
+    "\n",
+    "    axes.flatten()[-2].legend(\n",
+    "        loc=\"upper center\",\n",
+    "        bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n",
+    "        ncol=5,\n",
+    "        fontsize=\"x-small\",\n",
+    "    )\n",
+    "\n",
+    "    if figureSave:\n",
+    "        plt.savefig(f\"./NB1_Fig4_big_river_annual_scatter_{analysis_name}.png\", dpi=200)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Anaysis for Large 50 rivers <a id='50_large_rivers'></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4.1 Summary tables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "with open(f\"large50rivers_{analysis_name}.txt\", \"w\") as f:\n",
+    "    # some meta\n",
+    "    f.write(\"# obs: Dai et al.(2019)\\n\")\n",
+    "    f.write(\"# vol: km^3/yr\\n\")\n",
+    "    f.write(\"# area: km^2\\n\")\n",
+    "\n",
+    "    # headers\n",
+    "    f.write(\"No,          river_name,\")\n",
+    "    f.write(\"{0: >15}_vol,\".format(\"obs\"))\n",
+    "    for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+    "        f.write(\"{0: >15}_vol,\".format(grid))\n",
+    "    f.write(\"{0: >15}_area\".format(\"obs\"))\n",
+    "    for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+    "        f.write(\",\")\n",
+    "        f.write(\"{0: >15}_area\".format(grid))\n",
+    "    f.write(\"\\n\")\n",
+    "\n",
+    "    # data for each river\n",
+    "    for ix, river_name in enumerate(big_river_50.keys()):\n",
+    "\n",
+    "        f.write(\"%02d,\" % (ix + 1))\n",
+    "        f.write(\"{0: >20}\".format(river_name))\n",
+    "\n",
+    "        for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+    "            f.write(\",\")\n",
+    "            net_idx = gauge_plot[river_name][case][1]\n",
+    "            gaug_idx = gauge_plot[river_name][case][0]\n",
+    "\n",
+    "            q_name = case_meta[grid][\"flow_name\"]\n",
+    "\n",
+    "            if len(net_idx) == 1:  # means vector\n",
+    "                qsim = (\n",
+    "                    np.nanmean(\n",
+    "                        year_data[case][q_name][:, net_idx].sel(time=time_period).values\n",
+    "                    )\n",
+    "                    * 60\n",
+    "                    * 60\n",
+    "                    * 24\n",
+    "                    * 365\n",
+    "                    / 10**9\n",
+    "                )\n",
+    "            elif len(net_idx) == 2:  # means 2d grid\n",
+    "                qsim = (\n",
+    "                    np.nanmean(\n",
+    "                        year_data[case][q_name][:, net_idx[0], net_idx[1]]\n",
+    "                        .sel(time=time_period)\n",
+    "                        .values\n",
+    "                    )\n",
+    "                    * 60\n",
+    "                    * 60\n",
+    "                    * 24\n",
+    "                    * 365\n",
+    "                    / 10**9\n",
+    "                )\n",
+    "\n",
+    "            if jx == 0:\n",
+    "                qobs = (\n",
+    "                    ds_q_obs_yr_mean.loc[gaug_idx].values[0]\n",
+    "                    * 60\n",
+    "                    * 60\n",
+    "                    * 24\n",
+    "                    * 365\n",
+    "                    / 10**9\n",
+    "                )\n",
+    "                f.write(\"{:19.1f},\".format(qobs))\n",
+    "\n",
+    "            f.write(\"{:19.1f}\".format(qsim))\n",
+    "\n",
+    "        for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+    "            f.write(\",\")\n",
+    "            gaug_idx = gauge_plot[river_name][case][0]\n",
+    "\n",
+    "            # just get gauge_id from qs_q for now\n",
+    "            gauge_id = ds_q[\"id\"].loc[gaug_idx].values[0]\n",
+    "            network_area = gauge_reach_lnk[case][\n",
+    "                gauge_reach_lnk[case][\"gauge_id\"] == gauge_id\n",
+    "            ][\"route_area\"].values[0]\n",
+    "\n",
+    "            if jx == 0:\n",
+    "                area = ds_q[\"area_stn\"].loc[gaug_idx].values[0]\n",
+    "                f.write(\"{:20.0f},\".format(area))\n",
+    "\n",
+    "            f.write(\"{:20.0f}\".format(network_area))\n",
+    "\n",
+    "        f.write(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4.2. scatter plot of annual mean flow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if obs_available:\n",
+    "\n",
+    "    df_yr_vol = pd.read_csv(\n",
+    "        f\"./large50rivers_{analysis_name}.txt\",\n",
+    "        skiprows=3,\n",
+    "        index_col=[\"No\"],\n",
+    "        skipinitialspace=True,\n",
+    "    )\n",
+    "\n",
+    "    fig, axes = plt.subplots(1, figsize=(5.50, 5.50))\n",
+    "    regressor = LinearRegression()\n",
+    "    bias_text = \"\"\n",
+    "\n",
+    "    for jx, (case, grid) in enumerate(zip(case_list, grid_list)):\n",
+    "\n",
+    "        # compute linear regression\n",
+    "        df_reg = df_yr_vol[[\"obs_vol\", f\"{grid}_vol\"]].dropna()\n",
+    "        regressor.fit(df_reg[[\"obs_vol\"]], df_reg[f\"{grid}_vol\"])\n",
+    "        y_pred = regressor.predict(df_reg[[\"obs_vol\"]])\n",
+    "\n",
+    "        # compute bias over 50 sites\n",
+    "        diff = (df_yr_vol[f\"{grid}_vol\"] - df_yr_vol[\"obs_vol\"]).mean(\n",
+    "            axis=0, skipna=True\n",
+    "        )\n",
+    "\n",
+    "        df_yr_vol.plot(\n",
+    "            ax=axes,\n",
+    "            kind=\"scatter\",\n",
+    "            x=\"obs_vol\",\n",
+    "            y=f\"{grid}_vol\",\n",
+    "            s=30,\n",
+    "            alpha=0.6,\n",
+    "            label=grid,\n",
+    "        )\n",
+    "        # plt.plot(df_reg['obs_vol'], y_pred, color=color)\n",
+    "        bias_text = bias_text + f\"\\n{grid}: {diff:.1f} \"\n",
+    "\n",
+    "    plt.text(\n",
+    "        0.65, 0.30, \"bias [km3/yr]\", transform=axes.transAxes, verticalalignment=\"top\"\n",
+    "    )\n",
+    "    plt.text(\n",
+    "        0.65, 0.30, f\"{bias_text} \", transform=axes.transAxes, verticalalignment=\"top\"\n",
+    "    )\n",
+    "\n",
+    "    plt.axline((0, 0), slope=1, linestyle=\"--\", color=\"black\")\n",
+    "    axes.set_xscale(\"log\")\n",
+    "    axes.set_yscale(\"log\")\n",
+    "    axes.set_xlabel(\"reference flow\")\n",
+    "    axes.set_ylabel(\"Simulated flow\")\n",
+    "    axes.set_title(\"River Flow at stations [km^3/yr]\")\n",
+    "\n",
+    "    if figureSave:\n",
+    "        plt.savefig(\n",
+    "            f\"./NB1_Fig5_50big_river_annual_flow_scatter_{analysis_name}.png\", dpi=200\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "------\n",
+    "## 5. Anaysis for all 922 sites <a id='922_rivers'></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5.1 Compute metris at all the sites (no plots nor tables)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "if obs_available:\n",
+    "    # Merge gauge_reach lnk (dataframe) into gauge shapefile\n",
+    "    gauge_shp1 = {}\n",
+    "    for case, df in gauge_reach_lnk.items():\n",
+    "        # df = df.loc[(df['flag'] == 0)]\n",
+    "        df1 = df.drop(columns=[\"riv_name\"])\n",
+    "        gauge_shp1[case] = pd.merge(\n",
+    "            gauge_shp, df1, how=\"inner\", left_on=\"id\", right_on=\"gauge_id\"\n",
+    "        )\n",
+    "\n",
+    "    # compute %bias, correlation, and RMSE at each site based on monthly\n",
+    "    mon_pbias = {}\n",
+    "    mon_corr = {}\n",
+    "    mon_rmse = {}\n",
+    "\n",
+    "    for case, grid in zip(case_list, grid_list):\n",
+    "\n",
+    "        bias = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n",
+    "        corr = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n",
+    "        rmse = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n",
+    "\n",
+    "        for ix, row in gauge_shp1[case].iterrows():\n",
+    "            q_obs = np.full(\n",
+    "                nmons, np.nan, dtype=float\n",
+    "            )  # dummy q_sim that will be replaced by actual data if exist\n",
+    "            q_sim = np.full(\n",
+    "                nmons, np.nan, dtype=float\n",
+    "            )  # dummy q_sim that will be replaced by actual data if exist\n",
+    "\n",
+    "            route_id = row[\"route_id\"]\n",
+    "            gauge_id = row[\"gauge_id\"]\n",
+    "\n",
+    "            q_name = case_meta[grid][\"flow_name\"]\n",
+    "\n",
+    "            gauge_ix = np.argwhere(ds_q.id.values == gauge_id)\n",
+    "            if len(gauge_ix) == 1:\n",
+    "                gauge_ix = gauge_ix[0]\n",
+    "                q_obs = ds_q_obs_mon[:, gauge_ix].squeeze().values\n",
+    "\n",
+    "            route_ix = np.argwhere(reachID[case] == route_id)\n",
+    "            if (\n",
+    "                len(route_ix) == 1\n",
+    "            ):  # meaning there is flow site in network and simulation exist at this site\n",
+    "                route_ix = route_ix[0]\n",
+    "                if len(route_ix) == 1:  # means vector\n",
+    "                    q_sim = month_data[case][q_name][:, route_ix].squeeze().values\n",
+    "                elif len(route_ix) == 2:  # means 2d grid\n",
+    "                    q_sim = (\n",
+    "                        month_data[case][q_name][:, route_ix[0], route_ix[1]]\n",
+    "                        .squeeze()\n",
+    "                        .values\n",
+    "                    )\n",
+    "\n",
+    "                # compute %bias, correlation, RMSE\n",
+    "                bias[ix] = metric.pbias(q_sim, q_obs)\n",
+    "                corr[ix] = np.corrcoef(q_sim, q_obs)[0, 1]\n",
+    "                rmse[ix] = metric.rmse(qsim, qobs)\n",
+    "\n",
+    "        mon_pbias[case] = bias\n",
+    "        mon_corr[case] = corr\n",
+    "        mon_rmse[case] = rmse\n",
+    "\n",
+    "        gauge_shp1[case][f\"bias_{grid}\"] = bias\n",
+    "        gauge_shp1[case][f\"corr_{grid}\"] = corr\n",
+    "        gauge_shp1[case][f\"rmse_{grid}\"] = rmse"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5.2. Spatial metric map "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "if obs_available:\n",
+    "    # some local plot setups\n",
+    "    cbar_kwrgs = {\n",
+    "        \"bias\": {\n",
+    "            \"shrink\": 0.9,\n",
+    "            \"pad\": 0.02,\n",
+    "            \"orientation\": \"horizontal\",\n",
+    "            \"extend\": \"both\",\n",
+    "        },\n",
+    "        \"corr\": {\n",
+    "            \"shrink\": 0.9,\n",
+    "            \"pad\": 0.02,\n",
+    "            \"orientation\": \"horizontal\",\n",
+    "            \"extend\": \"min\",\n",
+    "        },\n",
+    "        \"rmse\": {\n",
+    "            \"shrink\": 0.9,\n",
+    "            \"pad\": 0.02,\n",
+    "            \"orientation\": \"horizontal\",\n",
+    "            \"extend\": \"max\",\n",
+    "        },\n",
+    "    }\n",
+    "\n",
+    "    meta = {\n",
+    "        \"bias\": {\n",
+    "            \"name\": \"%bias\",\n",
+    "            \"vmin\": -100,\n",
+    "            \"vmax\": 100,\n",
+    "            \"cm\": colors.cmap_RedGrayBlue,\n",
+    "        },\n",
+    "        \"corr\": {\"name\": \"correlation\", \"vmin\": 0.2, \"vmax\": 1, \"cm\": colors.cmap_corr},\n",
+    "        \"rmse\": {\"name\": \"RMSE\", \"vmin\": 0, \"vmax\": 1000, \"cm\": mpl.cm.turbo},\n",
+    "    }\n",
+    "\n",
+    "    for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n",
+    "        for case, grid in zip(case_list, grid_list):\n",
+    "            fig, ax = plt.subplots(\n",
+    "                1, figsize=(7.5, 4.0), subplot_kw={\"projection\": ccrs.PlateCarree()}\n",
+    "            )\n",
+    "\n",
+    "            ax.add_feature(\n",
+    "                rivers_50m, facecolor=\"None\", edgecolor=\"b\", lw=0.5, alpha=0.3\n",
+    "            )\n",
+    "            ax.add_feature(land, facecolor=\"white\", edgecolor=\"grey\")\n",
+    "\n",
+    "            gauge_shp1[case].plot(\n",
+    "                ax=ax,\n",
+    "                column=f\"{error_metric}_{grid}\",\n",
+    "                markersize=10,\n",
+    "                cmap=meta[error_metric][\"cm\"],\n",
+    "                vmin=meta[error_metric][\"vmin\"],\n",
+    "                vmax=meta[error_metric][\"vmax\"],\n",
+    "            )\n",
+    "\n",
+    "            ax.set_title(\"%s %s\" % (case, meta[error_metric][\"name\"]))\n",
+    "\n",
+    "            points = ax.collections[-1]\n",
+    "            plt.colorbar(points, ax=ax, **cbar_kwrgs[error_metric])\n",
+    "\n",
+    "            plt.tight_layout()\n",
+    "\n",
+    "            if figureSave:\n",
+    "                plt.savefig(f\"./NB1_Fig6_{error_metric}_{case}_map.png\", dpi=200)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5.4 Boxplots of Error metrics (RMSE, %bias, and correlation coefficient)\n",
+    "Boxplot distribution is based on metrics sampled at 922 sites.\n",
+    "\n",
+    "The box extends from the Q1 to Q3 quartile values of the data, with a line at the median (Q2). \n",
+    "The whiskers extend from the edges of box to show the range of the data. \n",
+    "By default, they extend no more than 1.5 * IQR (IQR = Q3 - Q1) from the edges of the box, ending at the farthest data point within that interval. \n",
+    "Outliers are plotted as separate dots."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if obs_available:\n",
+    "    boxprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"blue\"}\n",
+    "    medianprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"red\"}\n",
+    "\n",
+    "    for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n",
+    "        column_stat = []\n",
+    "        gauge_shp_all_case = gauge_shp.copy(deep=True)\n",
+    "        for case, grid in zip(case_list, grid_list):\n",
+    "            gauge_shp_all_case = gauge_shp_all_case.merge(\n",
+    "                gauge_shp1[case][[\"id\", f\"{error_metric}_{grid}\"]],\n",
+    "                left_on=\"id\",\n",
+    "                right_on=\"id\",\n",
+    "            )\n",
+    "            column_stat.append(f\"{error_metric}_{grid}\")\n",
+    "        fig, ax = plt.subplots(1, figsize=(6.5, 4))\n",
+    "        gauge_shp_all_case.boxplot(\n",
+    "            ax=ax,\n",
+    "            column=column_stat,\n",
+    "            boxprops=boxprops,\n",
+    "            medianprops=medianprops,\n",
+    "            sym=\".\",\n",
+    "        )\n",
+    "\n",
+    "        xticklabels = [label[len(error_metric) + 1 :] for label in column_stat]\n",
+    "        ax.set_xticklabels(xticklabels)\n",
+    "\n",
+    "        if error_metric == \"rmse\":\n",
+    "            ax.set_ylim([0, 1000])\n",
+    "            ax.set_title(\"RMSE [m3/s]\")\n",
+    "        elif error_metric == \"bias\":\n",
+    "            ax.set_ylim([-150, 250])\n",
+    "            ax.set_title(\"%bias [%]\")\n",
+    "        elif error_metric == \"corr\":\n",
+    "            ax.set_ylim([-0.2, 1])\n",
+    "            ax.set_title(\"correlation\")\n",
+    "\n",
+    "        if figureSave:\n",
+    "            plt.savefig(f\"./NB1_Fig7_{error_metric}_boxplot.png\", dpi=150)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cupid-analysis",
+   "language": "python",
+   "name": "cupid-analysis"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/nblibrary/rof/global_discharge_ocean_compare_obs.ipynb b/examples/nblibrary/rof/global_discharge_ocean_compare_obs.ipynb
new file mode 100644
index 0000000..5d09da6
--- /dev/null
+++ b/examples/nblibrary/rof/global_discharge_ocean_compare_obs.ipynb
@@ -0,0 +1,603 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "source": [
+    "## ROF monthly, annual, seasonal discharge at ocean outlets <a id='top'></a>\n",
+    "\n",
+    "Use the following datasets\n",
+    "\n",
+    "1. reach-D19 gauge link ascii\n",
+    "2. D19 flow site geopackage\n",
+    "3. D19 discharge netCDF\n",
+    "4. monthly and yearly flow netCD (history file)\n",
+    "\n",
+    "[1. Setupt](#setup)\n",
+    "\n",
+    "\n",
+    "[2. Loading discharge data](#load_discharge_data)\n",
+    "\n",
+    "- Read monthly history files from archive. \n",
+    "- Reference data: monthly discharge estimates at 922 big river mouths from Dai et al. 2019 data (D19)\n",
+    "\n",
+    "[3. Read river, catchment, gauge information](#read_ancillary)\n",
+    "\n",
+    "- catchment polygon (geopackage)\n",
+    "- gauge point (geopackage)\n",
+    "- gauge-catchment link (csv)\n",
+    "- outlet reach information (netCDF) including discharging ocean names\n",
+    "\n",
+    "[4. Ocean discharge line plots](#24_large_rivers)\n",
+    "\n",
+    "- total seasonal flow for oceans. \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "\n",
+    "import os, sys\n",
+    "import glob\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "import xarray as xr\n",
+    "import cartopy.feature as cfeature\n",
+    "from dask.distributed import Client, LocalCluster\n",
+    "\n",
+    "from scripts.utility import load_yaml\n",
+    "from scripts.utility import no_time_variable\n",
+    "from scripts.utility import read_shps\n",
+    "from scripts.utility import get_index_array\n",
+    "\n",
+    "rivers_50m = cfeature.NaturalEarthFeature(\"physical\", \"rivers_lake_centerlines\", \"50m\")\n",
+    "land = cfeature.LAND\n",
+    "\n",
+    "print(\"\\nThe Python version: %s.%s.%s\" % sys.version_info[:3])\n",
+    "print(xr.__name__, xr.__version__)\n",
+    "print(pd.__name__, pd.__version__)\n",
+    "print(gpd.__name__, gpd.__version__)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "-------------------------\n",
+    "## 1. Setup <a id='setup'></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# Parameter Defaults\n",
+    "# parameters are set in CUPiD's config.yml file\n",
+    "\n",
+    "CESM_output_dir = \"\"\n",
+    "case_name = None  # case name\n",
+    "base_case_name = None  # base case name\n",
+    "start_date = \"\"\n",
+    "end_date = \"\"\n",
+    "serial = False  # use dask LocalCluster\n",
+    "lc_kwargs = {}\n",
+    "\n",
+    "analysis_name = \"\"  # Used for Figure png names\n",
+    "if analysis_name:\n",
+    "    analysis_name = case_name\n",
+    "rof_start_date = start_date  # specify if different starting yyyy-mm-dd is desired\n",
+    "rof_end_date = end_date  # specify if different ending yyyy-mm-dd is desired\n",
+    "grid_name = \"f09_f09_mosart\"  # ROF grid name used in case\n",
+    "base_grid_name = (\n",
+    "    grid_name  # spcify ROF grid name for base_case in config.yml if different than case\n",
+    ")\n",
+    "figureSave = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "-------------------------\n",
+    "ROF ancillary data specification "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "setup = load_yaml(\"./setup/setup.yaml\")\n",
+    "\n",
+    "domain_dir = setup[\n",
+    "    \"ancillary_dir\"\n",
+    "]  # ancillary directory including such as ROF domain, river network data\n",
+    "geospatial_dir = setup[\"geospatial_dir\"]  # including shapefiles or geopackages\n",
+    "ref_flow_dir = setup[\"ref_flow_dir\"]  # including observed or reference flow data\n",
+    "case_meta = setup[\"case_meta\"]  # RO grid meta\n",
+    "catch_gpkg = setup[\"catch_gpkg\"]  # catchment geopackage meta\n",
+    "reach_gpkg = setup[\"reach_gpkg\"]  # reach geopackage meta\n",
+    "network_nc = setup[\"river_network\"]  # river network meta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "oceans_list = [\n",
+    "    \"arctic\",\n",
+    "    \"atlantic\",\n",
+    "    \"indian\",\n",
+    "    \"mediterranean\",\n",
+    "    \"pacific\",\n",
+    "    \"south_china\",\n",
+    "    \"global\",\n",
+    "]\n",
+    "case_list = [case for case in [case_name, base_case_name] if case is not None]\n",
+    "grid_list = [grid for grid in [grid_name, base_grid_name] if grid is not None]\n",
+    "time_period = slice(f\"{rof_start_date}\", f\"{rof_end_date}\")  # analysis time period\n",
+    "nyrs = int(rof_end_date[:4]) - int(rof_start_date[:4]) + 1  # number of years\n",
+    "nmons = nyrs * 12  # number of months\n",
+    "year_list = [\n",
+    "    \"{:04d}\".format(yr)\n",
+    "    for yr in np.arange(int(rof_start_date[0:4]), int(rof_end_date[0:4]) + 1)\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "-----\n",
+    "### dasks (optional)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Spin up cluster (if running in parallel)\n",
+    "client = None\n",
+    "if not serial:\n",
+    "    cluster = LocalCluster(**lc_kwargs)\n",
+    "    client = Client(cluster)\n",
+    "\n",
+    "client"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Loading discharge data <a id='load_discharge_data'></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.1. Monthly/annual flow netCDFs\n",
+    "- month_data (xr dataset)\n",
+    "- year_data (xr dataset)\n",
+    "- seas_data (xr dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "reachID = {}\n",
+    "month_data = {}\n",
+    "year_data = {}\n",
+    "seas_data = {}\n",
+    "for case, grid in zip(case_list, grid_list):\n",
+    "    in_dire = os.path.join(CESM_output_dir, case, \"rof/hist\")\n",
+    "    model = case_meta[grid][\"model\"]\n",
+    "    domain = case_meta[grid][\"domain_nc\"]\n",
+    "    var_list = case_meta[grid][\"vars_read\"]\n",
+    "\n",
+    "    def preprocess(ds):\n",
+    "        return ds[var_list]\n",
+    "\n",
+    "    # monthly\n",
+    "    nc_list = []\n",
+    "    for nc_path in sorted(glob.glob(f\"{in_dire}/{case}.{model}.h*.????-*.nc\")):\n",
+    "        for yr in year_list:\n",
+    "            if yr in os.path.basename(nc_path):\n",
+    "                nc_list.append(nc_path)\n",
+    "    month_data[case] = (\n",
+    "        xr.open_mfdataset(\n",
+    "            nc_list,\n",
+    "            data_vars=\"minimal\",\n",
+    "            parallel=True,\n",
+    "            preprocess=preprocess,\n",
+    "        )\n",
+    "        .sel(time=time_period)\n",
+    "        .load()\n",
+    "    )\n",
+    "    # annual\n",
+    "    year_data[case] = month_data[case].resample(time=\"YS\").mean(dim=\"time\")\n",
+    "\n",
+    "    # seasonal (compute here instead of reading for conisistent analysis period)\n",
+    "    seas_data[case] = month_data[case].groupby(\"time.month\").mean(\"time\")\n",
+    "    vars_no_time = no_time_variable(month_data[case])\n",
+    "    if vars_no_time:\n",
+    "        seas_data[case][vars_no_time] = seas_data[case][vars_no_time].isel(\n",
+    "            month=0, drop=True\n",
+    "        )\n",
+    "    time = month_data[case][\"time\"]\n",
+    "    if domain == \"None\":\n",
+    "        reachID[case] = month_data[case][\"reachID\"].values\n",
+    "    else:\n",
+    "        reachID[case] = (\n",
+    "            xr.open_dataset(f\"{domain_dir}/{domain}\")[\"reachID\"]\n",
+    "            .stack(seg=(\"lat\", \"lon\"))\n",
+    "            .values\n",
+    "        )\n",
+    "    print(case)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.2 D19 discharge data\n",
+    "- ds_q_obs_mon (xr datasets)\n",
+    "- ds_q_obs_yr (xr datasets)\n",
+    "- dr_q_obs_seasonal (xr datasets)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "# read monthly data\n",
+    "ds_q = xr.open_dataset(\n",
+    "    \"%s/D09/coastal-stns-Vol-monthly.updated-May2019.mod.nc\" % (ref_flow_dir),\n",
+    "    decode_times=False,\n",
+    ")\n",
+    "ds_q[\"time\"] = xr.cftime_range(\n",
+    "    start=\"1900-01-01\", end=\"2018-12-01\", freq=\"MS\", calendar=\"standard\"\n",
+    ")\n",
+    "\n",
+    "# monthly- if time_period is outside observation period, use the entire obs period\n",
+    "obs_available = True\n",
+    "if ds_q[\"time\"].sel(time=time_period).values.size == 0:\n",
+    "    obs_available = False\n",
+    "    ds_q_obs_mon = ds_q[\"FLOW\"]\n",
+    "else:\n",
+    "    ds_q_obs_mon = ds_q[\"FLOW\"].sel(time=time_period)\n",
+    "# compute annual flow from monthly\n",
+    "ds_q_obs_yr = ds_q_obs_mon.resample(time=\"YE\").mean(dim=\"time\")\n",
+    "# compute annual cycle at monthly scale\n",
+    "dr_q_obs_seasonal = ds_q_obs_mon.groupby(\"time.month\").mean(\"time\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Reading river, catchment, gauge infomation  <a id='read_ancillary'></a>\n",
+    "\n",
+    "- catchment polygon (geopackage)\n",
+    "- gauge point (geopackage)\n",
+    "- gauge-catchment link (csv)\n",
+    "- outlet reach information (netCDF)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.1. reach-D19 gauge link csv\n",
+    "- gauge_reach_lnk (dataframe)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gauge_reach_lnk = {}\n",
+    "for case, grid in zip(case_list, grid_list):\n",
+    "    gauge_reach_lnk[case] = pd.read_csv(\n",
+    "        \"%s/D09/D09_925.%s.asc\" % (ref_flow_dir, case_meta[grid][\"network\"])\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.2 D19 flow site shapefile\n",
+    "- gauge_shp (dataframe)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "gauge_shp = gpd.read_file(\n",
+    "    os.path.join(ref_flow_dir, \"D09\", \"geospatial\", \"D09_925.gpkg\")\n",
+    ")\n",
+    "gauge_shp = gauge_shp[gauge_shp[\"id\"] != 9999999]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "ocean_shp = gpd.read_file(os.path.join(geospatial_dir, \"oceans.gpkg\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.3 Read river network information\n",
+    "- riv_ocean (dataframe)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "## read catchment geopackage\n",
+    "gdf_cat = {}\n",
+    "for case, grid in zip(case_list, grid_list):\n",
+    "    cat_gpkg = os.path.join(\n",
+    "        geospatial_dir, catch_gpkg[grid][\"file_name\"]\n",
+    "    )  # geopackage name\n",
+    "    id_name_cat = catch_gpkg[grid][\"id_name\"]  # reach ID in geopackage\n",
+    "    var_list = [id_name_cat]\n",
+    "    if \"lk\" in grid_name:\n",
+    "        var_list.append(\"lake\")\n",
+    "    gdf_cat[case] = read_shps([cat_gpkg], var_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "# read river outlet netcdf\n",
+    "riv_ocean = {}\n",
+    "for case, grid in zip(case_list, grid_list):\n",
+    "    riv_ocean_file = os.path.join(\n",
+    "        domain_dir, network_nc[grid][\"file_name\"].replace(\".aug.nc\", \".outlet.nc\")\n",
+    "    )  # network netcdf name\n",
+    "    ds_rn_ocean = xr.open_dataset(riv_ocean_file).set_index(seg=\"seg_id\")\n",
+    "    df_tmp = ds_rn_ocean.to_dataframe()\n",
+    "    riv_ocean[case] = pd.merge(\n",
+    "        gdf_cat[case], df_tmp, left_on=catch_gpkg[grid][\"id_name\"], right_index=True\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.6 Merge gauge, outlet catchment dataframe\n",
+    "\n",
+    "- gauge_shp1 (dataframe)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "# Merge gauge_reach lnk (dataframe) into gauge shapefile\n",
+    "gauge_shp1 = {}\n",
+    "for case, grid in zip(case_list, grid_list):\n",
+    "    df = gauge_reach_lnk[case]\n",
+    "\n",
+    "    # df = df.loc[(df['flag'] == 0)]\n",
+    "    df1 = df.drop(columns=[\"riv_name\"])\n",
+    "    df2 = pd.merge(gauge_shp, df1, how=\"inner\", left_on=\"id\", right_on=\"gauge_id\")\n",
+    "    gauge_shp1[case] = pd.merge(\n",
+    "        df2,\n",
+    "        riv_ocean[case],\n",
+    "        how=\"inner\",\n",
+    "        left_on=\"route_id\",\n",
+    "        right_on=catch_gpkg[grid][\"id_name\"],\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "------\n",
+    "## 3. plot annual cycle for global oceans <a id='24_large_rivers'></a>\n",
+    "\n",
+    "TODO: Referece flow plot should be independent from cases (network). Currently the last case plotted looks better matched with reference flow. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%time\n",
+    "\n",
+    "nrows = 4\n",
+    "ncols = 2\n",
+    "fig, axes = plt.subplots(nrows, ncols, figsize=(7.25, 6.5))\n",
+    "plt.subplots_adjust(\n",
+    "    top=0.95, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n",
+    ")  # create some space below the plots by increasing the bottom-value\n",
+    "\n",
+    "for ix, ocean_name in enumerate(oceans_list):\n",
+    "    row = ix // 2\n",
+    "    col = ix % 2\n",
+    "    for case, grid in zip(case_list, grid_list):\n",
+    "\n",
+    "        q_name = case_meta[grid][\"flow_name\"]\n",
+    "\n",
+    "        if case_meta[grid][\"network_type\"] == \"vector\":\n",
+    "            if ocean_name == \"global\":\n",
+    "                id_list = gauge_shp1[case][\"route_id\"].values\n",
+    "            else:\n",
+    "                id_list = gauge_shp1[case][gauge_shp1[case][\"ocean\"] == ocean_name][\n",
+    "                    \"route_id\"\n",
+    "                ].values\n",
+    "            reach_index = get_index_array(reachID[case], id_list)\n",
+    "            dr_flow = seas_data[case][q_name].isel(seg=reach_index).sum(dim=\"seg\")\n",
+    "            dr_flow.plot(ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case)\n",
+    "\n",
+    "        elif case_meta[grid_name][\"network_type\"] == \"grid\":  # means 2d grid\n",
+    "            if ocean_name == \"global\":\n",
+    "                id_list = gauge_shp1[case][\"route_id\"].values\n",
+    "            else:\n",
+    "                id_list = gauge_shp1[case][gauge_shp1[case][\"ocean\"] == ocean_name][\n",
+    "                    \"route_id\"\n",
+    "                ].values\n",
+    "\n",
+    "            reach_index = get_index_array(reachID[case], id_list)\n",
+    "            seas_data_vector = seas_data[case][q_name].stack(seg=(\"lat\", \"lon\"))\n",
+    "            dr_flow = seas_data_vector.isel(seg=reach_index).sum(dim=\"seg\")\n",
+    "            dr_flow.plot(ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case)\n",
+    "\n",
+    "    # reference data\n",
+    "    if obs_available:\n",
+    "        if ocean_name == \"global\":\n",
+    "            id_list = gauge_shp1[case][\"id\"].values\n",
+    "        else:\n",
+    "            id_list = gauge_shp1[case][gauge_shp1[case][\"ocean\"] == ocean_name][\n",
+    "                \"id\"\n",
+    "            ].values\n",
+    "        gauge_index = get_index_array(ds_q[\"id\"].values, id_list)\n",
+    "        dr_obs = dr_q_obs_seasonal.isel(station=gauge_index).sum(dim=\"station\")\n",
+    "        dr_obs.plot(\n",
+    "            ax=axes[row, col],\n",
+    "            linestyle=\"None\",\n",
+    "            marker=\"o\",\n",
+    "            markersize=2,\n",
+    "            c=\"k\",\n",
+    "            label=\"D19\",\n",
+    "        )\n",
+    "\n",
+    "    axes[row, col].set_title(\"%d %s\" % (ix + 1, ocean_name), fontsize=9)\n",
+    "    axes[row, col].set_xlabel(\"\")\n",
+    "    if row < 7:\n",
+    "        axes[row, col].set_xticklabels(\"\")\n",
+    "    if col == 0:\n",
+    "        axes[row, col].set_ylabel(\"Mon. flow [m$^3$/s]\", fontsize=9)\n",
+    "    else:\n",
+    "        axes[row, col].set_ylabel(\"\")\n",
+    "    axes[row, col].tick_params(\"both\", labelsize=\"x-small\")\n",
+    "\n",
+    "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n",
+    "axes[row, col].legend(\n",
+    "    loc=\"center left\", bbox_to_anchor=(1.10, 0.40, 0.75, 0.1), ncol=1, fontsize=\"small\"\n",
+    ")\n",
+    "\n",
+    "for jx in range(ix + 1, nrows * ncols):\n",
+    "    row = jx // 2\n",
+    "    col = jx % 2\n",
+    "    fig.delaxes(axes[row][col])\n",
+    "\n",
+    "if figureSave:\n",
+    "    plt.savefig(f\"./NB2_Fig1_ocean_discharge_season_{analysis_name}.png\", dpi=200)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cupid-analysis",
+   "language": "python",
+   "name": "cupid-analysis"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/nblibrary/rof/scripts/colors.py b/examples/nblibrary/rof/scripts/colors.py
new file mode 100644
index 0000000..71c5619
--- /dev/null
+++ b/examples/nblibrary/rof/scripts/colors.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+import matplotlib as mpl
+from matplotlib.colors import LinearSegmentedColormap
+
+# create colormaps
+# ---------------
+cmap_RedGrayBlue = LinearSegmentedColormap.from_list(
+    "custom blue",
+    [
+        (0, "xkcd:red"),
+        (0.05, "xkcd:orange"),
+        (0.50, "xkcd:light grey"),
+        (0.65, "xkcd:sky blue"),
+        (1, "xkcd:blue"),
+    ],
+    N=15,
+)
+
+# %bias
+vals_pbias = [-50, -40, -30, -20, -10, 10, 0.2, 30, 40, 50]
+cmap_pbias = LinearSegmentedColormap.from_list(
+    "custom1",
+    [(0.0, "xkcd:red"), (0.5, "xkcd:light grey"), (1.0, "xkcd:blue")],
+    N=11,
+)
+cmap_pbias.set_over("xkcd:dark blue")
+cmap_pbias.set_under("xkcd:dark red")
+norm_pbias = mpl.colors.BoundaryNorm(vals_pbias, cmap_pbias.N)
+
+
+# corr
+vals_corr = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+cmap_corr = LinearSegmentedColormap.from_list(
+    "custom1",
+    [(0.00, "xkcd:yellow"), (0.50, "xkcd:green"), (1.00, "xkcd:blue")],
+    N=9,
+)
+cmap_corr.set_over("xkcd:dark blue")
+cmap_corr.set_under("xkcd:brown")
+norm_corr = mpl.colors.BoundaryNorm(vals_corr, cmap_corr.N)
diff --git a/examples/nblibrary/rof/scripts/metrics.py b/examples/nblibrary/rof/scripts/metrics.py
new file mode 100644
index 0000000..263c28d
--- /dev/null
+++ b/examples/nblibrary/rof/scripts/metrics.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+import numpy as np
+
+# ----- time series error
+# There are many other error metrics (e.g., KGE, NSE, ame, etc.) as well as
+# many flow metrics (Flow duration curve, annual maximmum flow and timing etcs.)
+# available.
+
+
+def remove_nan(qsim, qobs):
+    """
+    Compare sim array and obs array time series,
+    Then remove both sim and obs at times if either or both sim and obs have NaN
+
+    Arguments
+    ---------
+    qsim: array-like
+        Simulated time series array.
+    qobs: array-like
+        Observed time series array.
+
+    Returns
+    -------
+    sim_obs: float
+        simulation and observation pair with nan removed
+    """
+
+    sim_obs = np.stack((qsim, qobs), axis=1)
+    sim_obs = sim_obs[~np.isnan(sim_obs).any(axis=1), :]
+    return sim_obs[:, 0], sim_obs[:, 1]
+
+
+def pbias(qsim, qobs):
+    """
+    Calculates percentage bias between two flow arrays.
+
+    Arguments
+    ---------
+    sim: array-like
+        Simulated time series array.
+    obs: array-like
+        Observed time series array.
+
+    Returns
+    -------
+    pbial: float
+        percentage bias calculated between the two arrays.
+    """
+
+    qsim1, qobs1 = remove_nan(qsim, qobs)
+    return np.sum(qsim1 - qobs1) / np.sum(qobs1) * 100
+
+
+def rmse(qsim, qobs):
+    """
+    Calculates root mean squared of error (rmse) between two time series arrays.
+    Arguments
+    ---------
+    sim: array-like
+        Simulated time series array.
+    obs: array-like
+        Observed time series array.
+
+    Returns
+    -------
+    rmse: float
+        rmse calculated between the two arrays.
+    """
+    qsim1, qobs1 = remove_nan(qsim, qobs)
+    return np.sqrt(np.mean((qsim1 - qobs1) ** 2))
diff --git a/examples/nblibrary/rof/scripts/utility.py b/examples/nblibrary/rof/scripts/utility.py
new file mode 100644
index 0000000..bb13d19
--- /dev/null
+++ b/examples/nblibrary/rof/scripts/utility.py
@@ -0,0 +1,88 @@
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+import yaml
+from pyogrio import read_dataframe
+
+
+# in case toml module may not be available
+try:
+    import tomli
+
+    def load_toml(toml_file) -> dict:
+        """Load TOML data from file"""
+        with open(toml_file, "rb") as f:
+            return tomli.load(f)
+
+except ImportError:
+    pass  # or anything to log
+
+
+def load_yaml(yaml_file) -> dict:
+    """Load yaml data from file"""
+    with open(yaml_file) as ymlfile:
+        return yaml.load(ymlfile, Loader=yaml.FullLoader)
+
+
+class AutoVivification(dict):
+    """Implementation of perl's autovivification feature."""
+
+    def __getitem__(self, item):
+        try:
+            return dict.__getitem__(self, item)
+        except KeyError:
+            value = self[item] = type(self)()
+            return value
+
+
+def read_shps(shp_list, usecols, **kwargs):
+    """Faster load shapefiles with selected attributes in dataframe"""
+    gdf_frame = []
+    for shp in shp_list:
+        gdf_frame.append(read_dataframe(shp, columns=usecols))
+        print("Finished reading %s" % shp.strip("\n"))
+    return pd.concat(gdf_frame)
+
+
+def get_index_array(a_array, b_array):
+    """
+    Get index array where each index points to locataion in a_array. The order of index array corresponds to b_array
+
+      e.g.,
+      a_array = [2, 4, 1, 8, 3, 10, 5, 9, 7, 6]
+      b_array = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+      result  = [2, 0, 4, 1, 6, 9, 8, 3, 7, 5]
+
+    https://stackoverflow.com/questions/8251541/numpy-for-every-element-in-one-array-find-the-index-in-another-array
+    """
+    index = np.argsort(a_array)
+    sorted_a_array = a_array[index]
+    sorted_index = np.searchsorted(sorted_a_array, b_array)
+
+    yindex = np.take(index, sorted_index, mode="clip")
+    mask = a_array[yindex] != b_array
+
+    result = np.ma.array(yindex, mask=mask)
+
+    return result
+
+
+def reorder_index(ID_array_orig, ID_array_target):
+    x = ID_array_orig
+    # Find the indices of the reordered array. See link for more details:
+    # https://stackoverflow.com/questions/8251541/numpy-for-every-element-in-one-array-find-the-index-in-another-array
+    index = np.argsort(x)
+    sorted_x = x[index]
+    sorted_index = np.searchsorted(sorted_x, ID_array_target)
+
+    return np.take(index, sorted_index, mode="clip")
+
+
+def no_time_variable(ds):
+    vars_without_time = []
+    for var in ds.variables:
+        if "time" not in ds[var].dims:
+            if var not in list(ds.coords):
+                vars_without_time.append(var)
+    return vars_without_time
diff --git a/examples/nblibrary/rof/setup/large_river_50.txt b/examples/nblibrary/rof/setup/large_river_50.txt
new file mode 100644
index 0000000..b2c777a
--- /dev/null
+++ b/examples/nblibrary/rof/setup/large_river_50.txt
@@ -0,0 +1,51 @@
+site_id,river_name
+81168,Amazon
+9894,Congo
+9910,Orinoco
+9306,Changjiang
+9270,Brahmaputra
+9598,Mississippi
+90973,Yenisey
+35021,Parana
+91050,Lena
+9903,Mekong
+81329,Tocantins
+91082,Ob
+9227,Ganges
+9900,Irrawaddy
+9550,St Lawrence
+90271,Amur
+10224,Mackenzie
+9301,Xijiang
+9560,Columbia
+9400,Magdalena
+9370,Uruguay
+10623,Yukon
+9763,Danube
+9052,Niger
+81217,Xingu
+9403,Atrato
+81192,Tapajos
+9038,Ogoou
+9360,Essequibo
+9504,Fraser
+91103,Pechora
+9483,Nelson
+95213,Khatanga
+9736,Sepik
+95067,Kolyma
+9114,Zambeze
+91128,Severnaya Dvina
+9275,Indus
+9079,Sanaga
+9226,Godavari
+10006,Rajang
+81748,Sao Francisco
+9626,Usumacinta
+9366,Maroni
+9909,Rhine
+9733,Purari
+9459,Caniapiscau
+9232,Mahanadi
+9558,Sacramento
+9789,Elbe
diff --git a/examples/nblibrary/rof/setup/setup.yaml b/examples/nblibrary/rof/setup/setup.yaml
new file mode 100644
index 0000000..32a0b46
--- /dev/null
+++ b/examples/nblibrary/rof/setup/setup.yaml
@@ -0,0 +1,145 @@
+## Configurations used for ctsm-mizuRoute
+
+# Directories
+geospatial_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/rof_data/geospatial
+ancillary_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/rof_data/ancillary_data
+ref_flow_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/rof_data/obs
+
+# case meta dictionary data (TODO: consider removing color)
+# grid_name -> name of grid
+#   network_name -> name of river network
+#   model -> MOSART or mizuroute
+#   network_type -> grid or vector
+#   vars_read -> history variables read in
+#   flow_name -> routed runoff
+#   runoff_name -> instantaneous runoff from land model
+#   color -> color used for plotting
+case_meta:
+  rHDMAlk:
+    network: HDMA_lake
+    model: mizuroute
+    network_type: vector
+    domain_nc: None
+    vars_read:
+     - DWroutedRunoff
+     - basRunoff
+    flow_name: DWroutedRunoff
+    runoff_name: basRunoff
+    color: 'xkcd:red'
+  rHDMAlk_h06:
+    network: HDMA_lake_h06
+    model: mizuroute
+    network_type: vector
+    domain_nc: None
+    vars_read:
+     - DWroutedRunoff
+     - basRunoff
+    flow_name: DWroutedRunoff
+    runoff_name: basRunoff
+    color: 'xkcd:green'
+  rHDMA:
+    network: HDMA
+    model: mizuroute
+    network_type: vector
+    domain_nc: None
+    vars_read:
+     - DWroutedRunoff
+     - basRunoff
+    flow_name: DWroutedRunoff
+    runoff_name: basRunoff
+    color: 'xkcd:blue'
+  rMERIT:
+    network: MERIT_Hydro
+    model: mizuroute
+    network_type: vector
+    domain_nc: None
+    vars_read:
+     - DWroutedRunoff
+     - basRunoff
+    flow_name: DWroutedRunoff
+    runoff_name: basRunoff
+    color: 'xkcd:green'
+  f09_f09:
+    network: mosart
+    model: mizuroute
+    network_type: vector
+    domain_nc: None
+    vars_read:
+     - DWroutedRunoff
+     - basRunoff
+    flow_name: DWroutedRunoff
+    runoff_name: basRunoff
+    color: 'xkcd:green'
+  f09_f09_mosart:
+    network: mosart
+    model: mosart
+    network_type: grid
+    domain_nc: mosart0.5_domain.nc
+    vars_read:
+     - RIVER_DISCHARGE_OVER_LAND_LIQ
+     - RIVER_DISCHARGE_OVER_LAND_ICE
+    flow_name: RIVER_DISCHARGE_OVER_LAND_LIQ
+    runoff_name: None
+    color: 'xkcd:grey'
+
+# riiver line geopackage metadata
+# sub-key is network name
+reach_gpkg:
+  rHDMA:
+    file_name: hdma_global_stream.gpkg
+    id_name: seg_id
+    down_id_name: Tosegment
+  rHDMAlk:
+    file_name: hdma_global_stream.gpkg
+    id_name: seg_id
+    down_id_name: Tosegment
+  rMERIT:
+    file_name: rivEndMERIT_simplified_0003.gpkg
+    id_name: COMID
+    down_id_name: NextDownID
+  f09_f09_mosart:
+    file_name: MOSART_routing_Global_0.5x0.5_c170601_flowline.gpkg
+    id_name: seg_id
+    down_id_name: Tosegment
+  f09_f09:
+    file_name: MOSART_routing_Global_0.5x0.5_c170601_flowline.gpkg
+    id_name: seg_id
+    down_id_name: Tosegment
+
+# catchment geopackage metadata
+# sub-key is network name
+catch_gpkg:
+  rHDMA:
+    file_name: hdma_global_catch_v2_0.01.gpkg
+    id_name: hruid
+  rHDMAlk:
+    file_name: hdma_hydrolake_global_catch_v1_0.01.gpkg
+    id_name: ID
+  rMERIT:
+    file_name: catEndoMERIT.gpkg
+    id_name: hruid
+  f09_f09_mosart:
+    file_name: MOSART_routing_Global_0.5x0.5_c170601_hru.gpkg
+    id_name: hruid
+  f09_f09:
+    file_name: MOSART_routing_Global_0.5x0.5_c170601_hru.gpkg
+    id_name: hruid
+
+# river network data (mizuRoute format, augumented one)
+# sub-key is network name
+river_network:
+  rHDMA:
+    file_name: ntopo_HDMA.v2.aug.nc
+    reach_ID: seg_id
+  rHDMAlk:
+    file_name: Network_topology_HDMA_HydroLake_v3.aug.nc
+    reach_ID: seg_id
+  rMERIT:
+    file_name: ntopo_MERIT_Hydro_v1.aug.nc
+    reach_ID: seg_id
+  f09_f09_mosart:
+    file_name: mizuRoute_MOSART_routing_Global_0.5x0.5_c170601.aug.nc
+    reach_ID: seg_id
+  f09_f09:
+    file_name: mizuRoute_MOSART_routing_Global_0.5x0.5_c170601.aug.nc
+    reach_ID: seg_id