diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..5286e6d --- /dev/null +++ b/.pylintrc @@ -0,0 +1,4 @@ +[MESSAGES CONTROL] +disable=invalid-name,bad-continuation,too-many-lines,superfluous-parens,too-many-arguments,too-many-branches,too-many-statements,too-many-locals,too-few-public-methods,too-many-instance-attributes +[TYPECHECK] +ignored-modules=numpy,netCDF4,scipy,scipy.spatial \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 279d620..f8c742d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -64,8 +64,6 @@ before_install: #----------------------------------------------------------------------------- - conda config --add channels conda-forge - conda install --yes cmake gdal future netcdf4 numpy pandas pangaea pyproj pytz requests rtree scipy shapely -- conda install --yes pytest-cov -- pip install coveralls - source deactivate rapid #------------------------------------------------------------------------------- @@ -81,9 +79,8 @@ before_install: - git clone https://github.com/c-h-david/rapid.git --branch 20161221 - cd rapid # Installing RAPID Prereqs -- chmod +x rapid_install_prereqs.sh -- ./rapid_install_prereqs.sh -i=$INSTALLZ_DIR -- source ./rapid_specify_varpath.sh $INSTALLZ_DIR +- bash rapid_install_prereqs.sh -i=$INSTALLZ_DIR +- source rapid_specify_varpath.sh $INSTALLZ_DIR # Building RAPID - cd src - make rapid @@ -92,7 +89,6 @@ before_install: #Installing TauDEM #------------------------------------------------------------------------------- - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then - wget http://repo.continuum.io/miniconda/Miniconda${TRAVIS_PYTHON_VERSION:0:1}-latest-Linux-x86_64.sh -O miniconda.sh ; cd $TRAVIS_BUILD_DIR/.. ; git clone https://github.com/dtarb/TauDEM.git ; cd TauDEM/src ; @@ -107,13 +103,14 @@ before_install: #******************************************************************************* install: - cd $TRAVIS_BUILD_DIR -- python setup.py install +- pip install -e .[tests] #******************************************************************************* #Testing RAPIDpy #******************************************************************************* -- cd $TRAVIS_BUILD_DIR/tests script: - py.test --cov-report term-missing --cov=RAPIDpy +- flake8 RAPIDpy +- pylint RAPIDpy ##ADD Coveralls stats for code coverage after_success: - coveralls diff --git a/RAPIDpy/__init__.py b/RAPIDpy/__init__.py index f2e54c7..f68246a 100644 --- a/RAPIDpy/__init__.py +++ b/RAPIDpy/__init__.py @@ -1,3 +1,9 @@ # -*- coding: utf-8 -*- -from .rapid import RAPID -from .dataset import RAPIDDataset \ No newline at end of file +""" + RAPIDpy + + Created by Alan D Snow, 2015. + License: BSD 3-Clause +""" +from .rapid import RAPID # noqa +from .dataset import RAPIDDataset # noqa diff --git a/RAPIDpy/dataset.py b/RAPIDpy/dataset.py index d2d6ecd..a6afe8b 100644 --- a/RAPIDpy/dataset.py +++ b/RAPIDpy/dataset.py @@ -1,31 +1,28 @@ # -*- coding: utf-8 -*- -'''RAPIDDataset docstrings - -''' -## -# dataset.py -# RAPIDpy -# -# Created by Alan D Snow. -# Copyright © 2016 Alan D Snow. All rights reserved. -# BSD 3-Clause +""" + dataset.py + RAPIDpy + Created by Alan D Snow, 2016. + License: BSD-3-Clause +""" from csv import writer as csv_writer import datetime + from netCDF4 import Dataset, num2date import numpy as np from numpy.ma import is_masked import pandas as pd -from past.builtins import xrange +from past.builtins import xrange # pylint: disable=redefined-builtin from pytz import utc -# local import + from .helper_functions import log, open_csv -# ------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- # Helper Function -# ------------------------------------------------------------------------------ -def compare_qout_files(dataset1_path, dataset2_path, Qout_var="Qout"): +# ----------------------------------------------------------------------------- +def compare_qout_files(dataset1_path, dataset2_path): """ This function compares the output of RAPID Qout and tells you where they are different. @@ -71,9 +68,9 @@ def compare_qout_files(dataset1_path, dataset2_path, Qout_var="Qout"): decimal_test = -1 except AssertionError as ex: if decimal_test <= 1: - print(ex) + log(ex, "WARNING") decimal_test -= 1 - pass + log("Number of different timeseries: {0}".format(len(un_where_diff)), "INFO") log("COMID idexes where different: {0}".format(un_where_diff), @@ -106,14 +103,27 @@ class RAPIDDataset(object): This class is designed to access data from the RAPID Qout NetCDF file. - Attributes: - filename(str): Path to the RAPID Qout NetCDF file. - river_id_dimension(Optional[str]): Name of the river ID dimension. Default is to search through a standard list. - river_id_variable(Optional[str]): Name of the river ID variable. Default is to search through a standard list. - streamflow_variable(Optional[str]): Name of the streamflow varaible. Default is to search through a standard list. - datetime_simulation_start(Optional[datetime]): This is a datetime object with the date of the simulation start time. - simulation_time_step_seconds(Optional[integer]): This is the time step of the simulation output in seconds. - out_tzinfo(Optional[tzinfo]): Time zone to output data as. The dates will be converted from UTC to the time zone input. Default is UTC. + Attributes + ---------- + filename: str + Path to the RAPID Qout NetCDF file. + river_id_dimension: str, optional + Name of the river ID dimension. Default is to search through + a pre-defined list. + river_id_variable: str, optional + Name of the river ID variable. Default is to search through + a pre-defined list. + streamflow_variable: str, optional + Name of the streamflow varaible. Default is to search through + a pre-defined list. + datetime_simulation_start: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the simulation start time. + simulation_time_step_seconds: int, optional + This is the time step of the simulation output in seconds. + out_tzinfo: tzinfo, optional + Time zone to output data as. The dates will be converted from UTC + to the time zone input. Default is UTC. + Example:: @@ -124,7 +134,7 @@ class RAPIDDataset(object): #USE FUNCTIONS TO ACCESS DATA HERE """ - + # pylint: disable=too-many-instance-attributes def __init__(self, filename, river_id_dimension="", river_id_variable="", @@ -151,9 +161,9 @@ def __init__(self, filename, elif 'FEATUREID' in self.qout_nc.dimensions: self.river_id_dimension = 'FEATUREID' else: - raise IndexError('ERROR: Could not find river ID dimension.') + raise IndexError('Could not find river ID dimension.') elif river_id_dimension not in self.qout_nc.dimensions: - raise IndexError('ERROR: Could not find river ID dimension:' + raise IndexError('Could not find river ID dimension:' ' {0}.'.format(river_id_dimension)) self.size_river_id = len(self.qout_nc @@ -174,7 +184,7 @@ def __init__(self, filename, raise IndexError('ERROR: Could not find flow variable.' ' Looked for Qout, streamflow, and m3_riv.') elif streamflow_variable not in variable_keys: - raise IndexError('ERROR: Could not find flow variable.' + raise IndexError('Could not find flow variable.' ' Looked for {0}.'.format(streamflow_variable)) self.size_q_var = len(self.qout_nc.variables[self.q_var_name]) @@ -185,7 +195,7 @@ def __init__(self, filename, elif 'Time' in self.qout_nc.dimensions: self.size_time = len(self.qout_nc.dimensions['Time']) else: - raise IndexError('ERROR: Could not find time dimension.') + raise IndexError('Could not find time dimension.') # determine river ID variable self.river_id_variable = river_id_variable @@ -201,11 +211,13 @@ def __init__(self, filename, elif 'FEATUREID' in variable_keys: self.river_id_variable = 'FEATUREID' else: - print('WARNING: Could not find river ID variable' - ' in {0}.'.format(variable_keys)) + log('Could not find river ID variable' + ' in {0}.'.format(variable_keys), + "WARNING") elif river_id_variable not in variable_keys: - print('WARNING: Could not find river ID variable:' - ' {0}.'.format(river_id_variable)) + log('Could not find river ID variable:' + ' {0}.'.format(river_id_variable), + "WARNING") self.out_tzinfo = out_tzinfo self.datetime_simulation_start = datetime_simulation_start @@ -218,11 +230,17 @@ def __exit__(self, exc_type, exc_value, traceback): self.close() def close(self): + """Close the dataset.""" self.qout_nc.close() def _is_legacy_time_valid(self): """ - This determines whether or not legacy time is set correctly + This determines whether or not legacy time is set correctly. + + Returns + ------- + boolean: + True if the legacy time is setup correctly, otherwise false. """ return self.datetime_simulation_start is not None and \ self.simulation_time_step_seconds is not None @@ -232,8 +250,11 @@ def is_time_variable_valid(self): This function returns whether or not the time variable is valid. - Returns: - boolean: True if the time variable is valid, otherwise false. + Returns + ------- + boolean + True if the time variable is valid, otherwise false. + Example:: @@ -243,7 +264,9 @@ def is_time_variable_valid(self): with RAPIDDataset(path_to_rapid_qout) as qout_nc: if qout_nc.is_time_variable_valid(): #DO WORK HERE + """ + # pylint: disable=len-as-condition time_var_valid = False if 'time' in self.qout_nc.variables.keys(): if len(self.qout_nc.dimensions['time']) > 0: @@ -264,6 +287,12 @@ def is_time_variable_valid(self): return time_var_valid + def raise_time_valid(self): + """Raise ValueError if time not valid""" + if not (self.is_time_variable_valid() or self._is_legacy_time_valid()): + raise IndexError("Valid time variable not found. Valid time" + " variable required in Qout file to proceed ...") + def get_time_array(self, datetime_simulation_start=None, simulation_time_step_seconds=None, @@ -275,16 +304,31 @@ def get_time_array(self, However, the old version requires the user to know when the simulation began and the time step of the output. - Parameters: - return_datetime(Optional[boolean]): If true, it converts the data to a list of datetime objects. Default is False. - time_index_array(Optional[list or np.array]): This is used to extract the datetime vales. This can be from the *get_time_index_range* function. - - Returns: - list: An array of integers representing seconds since Jan 1, 1970 UTC or datetime objects if return_datetime is set to True. + Parameters + ---------- + datetime_simulation_start: :obj:`datetime.datetime`, optional + The start datetime of the simulation. Only required if the time + variable is not included in the file. + simulation_time_step_seconds: int, optional + The time step of the file in seconds. Only required if the time + variable is not included in the file. + return_datetime: bool, optional + If true, it converts the data to a list of datetime objects. + Default is False. + time_index_array: list or :obj:`numpy.array`, optional + This is used to extract the datetime values by index from the main + list. This can be from the *get_time_index_range* function. + + Returns + ------- + list: + An array of integers representing seconds since Jan 1, 1970 UTC + or datetime objects if *return_datetime* is set to True. These examples demonstrates how to retrieve or generate a time array to go along with your RAPID streamflow series. + CF-Compliant Qout File Example: .. code:: python @@ -308,8 +352,9 @@ def get_time_array(self, path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout, - datetime_simulation_start=datetime_simulation_start, - simulation_time_step_seconds=simulation_time_step_seconds) as qout_nc: + datetime_simulation_start=datetime(1980, 1, 1), + simulation_time_step_seconds=3 * 3600)\ + as qout_nc: #retrieve integer timestamp array time_array = qout_nc.get_time_array() @@ -324,8 +369,6 @@ def get_time_array(self, if simulation_time_step_seconds is not None: self.simulation_time_step_seconds = simulation_time_step_seconds - time_array = [] - epoch = datetime.datetime(1970, 1, 1, tzinfo=utc) time_units = "seconds since {0}".format(epoch) @@ -348,10 +391,10 @@ def get_time_array(self, final_time_seconds, self.simulation_time_step_seconds) else: - raise Exception("ERROR: This file does not contain the time" - " variable. To get time array, add" - " datetime_simulation_start and" - " simulation_time_step_seconds") + raise ValueError("This file does not contain the time" + " variable. To get time array, add" + " datetime_simulation_start and" + " simulation_time_step_seconds") if time_index_array is not None: time_array = time_array[time_index_array] @@ -378,15 +421,29 @@ def get_time_index_range(self, Generates a time index range based on time bounds given. This is useful for subset data extraction. - Parameters: - date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. - date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. - time_index_start(Optional[int]): This is the index of the start of the time array subset. Useful for the old file version. - time_index_end(Optional[int]): This is the index of the end of the time array subset. Useful for the old file version. - time_index(Optional[int]): This is the index of time to return in the case that your code only wants one index. Used internally. + Parameters + ---------- + date_search_start: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the minimum date for + starting. + date_search_end: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the maximum date + for ending. + time_index_start: int, optional + This is the index of the start of the time array subset. + Useful for the old file version. + time_index_end: int, optional + This is the index of the end of the time array subset. + Useful for the old file version. + time_index: int, optional + This is the index of time to return in the case that your + code only wants one index. Used internally. + + Returns + ------- + :obj:`numpy.array`: + This is an array of time indices used to extract a subset of data. - Returns: - index_array: This is an array used to extract a subset of data. CF-Compliant Qout File Example: @@ -397,8 +454,9 @@ def get_time_index_range(self, path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: - time_index_range = qout_nc.get_time_index_range(date_search_start=datetime(1980, 1, 1), - date_search_end=datetime(1980, 12, 11)) + time_index_range = qout_nc.get_time_index_range( + date_search_start=datetime(1980, 1, 1), + date_search_end=datetime(1980, 12, 11)) Legacy Qout File Example: @@ -413,8 +471,10 @@ def get_time_index_range(self, datetime_simulation_start=datetime(1980, 1, 1), simulation_time_step_seconds=3600) as qout_nc: - time_index_range = qout_nc.get_time_index_range(date_search_start=datetime(1980, 1, 1), - date_search_end=datetime(1980, 12, 11)) + time_index_range = qout_nc.get_time_index_range( + date_search_start=datetime(1980, 1, 1), + date_search_end=datetime(1980, 12, 11)) + """ # get the range of time based on datetime range time_range = None @@ -422,8 +482,9 @@ def get_time_index_range(self, (date_search_start is not None or date_search_end is not None)): - print("Determining time range ({0} to {1})" - "...".format(date_search_start, date_search_end)) + log("Determining time range ({0} to {1})" + "...".format(date_search_start, date_search_end), + "INFO") time_array = self.get_time_array() if date_search_start is not None: date_search_start_utc = date_search_start @@ -474,8 +535,11 @@ def get_river_id_array(self): """ This method returns the river ID array for this file. - Returns: - numpy.array: An array of the river ID's + Returns + ------- + :obj:`numpy.array`: + An array of the river ID's + Example:: @@ -493,8 +557,16 @@ def get_river_index(self, river_id): This method retrieves the river index in the netCDF dataset corresponding to the river ID. - Returns: - int: The index of the river ID's in the file + Parameters + ---------- + river_id: int + The ID of the river segment. + + Returns + ------- + int: + The index of the river ID's in the file. + Example:: @@ -505,6 +577,7 @@ def get_river_index(self, river_id): with RAPIDDataset(path_to_rapid_qout) as qout_nc: river_index = qout_nc.get_river_index(river_id) + """ try: return np.where(self.get_river_id_array() == river_id)[0][0] @@ -517,6 +590,22 @@ def get_subset_riverid_index_list(self, river_id_list): Gets the subset riverid_list from the netcdf file Optional returns include the list of valid river ids in the dataset as well as a list of missing rive rids + + Parameters + ---------- + river_id_list: list or :obj:`numpy.array` + Array of river ID's for the river segments you want the index of. + + Returns + ------- + :obj:`numpy.array` + A sorted array of the river index in the NetCDF file that + were found. + :obj:`numpy.array` + A sorted array of the river IDs that were found. + list + An array of the missing river ids. + """ netcdf_river_indices_list = [] valid_river_ids = [] @@ -528,10 +617,10 @@ def get_subset_riverid_index_list(self, river_id_list): .append(self.get_river_index(river_id)) valid_river_ids.append(river_id) except IndexError: - print("WARNING: ReachID {0} not found in netCDF dataset." - " Skipping ...".format(river_id)) + log("ReachID {0} not found in netCDF dataset." + " Skipping ...".format(river_id), + "WARNING") missing_river_ids.append(river_id) - pass np_valid_river_indices_list = np.array(netcdf_river_indices_list) np_valid_river_ids = np.array(valid_river_ids) @@ -541,7 +630,8 @@ def get_subset_riverid_index_list(self, river_id_list): np_valid_river_ids[sorted_indexes], np.array(missing_river_ids)) - def get_qout(self, river_id_array=None, + def get_qout(self, + river_id_array=None, date_search_start=None, date_search_end=None, time_index_start=None, @@ -550,26 +640,52 @@ def get_qout(self, river_id_array=None, time_index_array=None, daily=False, pd_filter=None, - daily_mode="mean"): + filter_mode="mean", + as_dataframe=False): """ This method extracts streamflow data by a single river ID or by a river ID array. It has options to extract by date or by date index. - Parameters: - river_id_array(Optional[list or int]): A single river ID or an array of river IDs. - date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. - date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. - time_index_start(Optional[int]): This is the index of the start of the time array subset. Useful for the old file version. - time_index_end(Optional[int]): This is the index of the end of the time array subset. Useful for the old file version. - time_index(Optional[int]): This is the index of time to return in the case that your code only wants one index. Used internally. - time_index_array(Optional[list or np.array]): This is used to extract the vales only for particular dates. This can be from the *get_time_index_range* function. - daily(Optional[bool]): If true, this will convert qout to daily average. - pd_filter(Optional[str]): This is a valid pandas resample frequency filter. - filter_mode(Optional[str]): You can get the daily average "mean" or the maximum "max". Default is "mean". - - Returns: - numpy.array: This is a 1D or 2D array or a single value depending on your input search. + Parameters + ---------- + river_id_array: :obj:`numpy.array` or list or int, optional + A single river ID or an array of river IDs. + date_search_start: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the minimum date + for starting. + date_search_end: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the maximum date + for ending. + time_index_start: int, optional + This is the index of the start of the time array subset. + Useful for the old file version. + time_index_end: int, optional + This is the index of the end of the time array subset. + Useful for the old file version. + time_index: int, optional + This is the index of time to return in the case that your + code only wants one index. Used internally. + time_index_array: list or :obj:`numpy.array`, optional + This is used to extract the vales only for particular dates. + This can be from the *get_time_index_range* function. + daily: bool, optional + If true, this will convert qout to daily average. + pd_filter: str, optional + This is a valid pandas resample frequency filter. + filter_mode: str, optional + You can get the daily average "mean" or the maximum "max". + Default is "mean". + as_dataframe: bool, optional + Return as a pandas dataframe object. Default is False. + + + Returns + ------- + qout_array: :obj:`numpy.array` + This is a 1D or 2D array or a single value depending on your + input search. + This example demonstrates how to retrieve the streamflow associated with the reach you are interested in:: @@ -589,9 +705,10 @@ def get_qout(self, river_id_array=None, path_to_rapid_qout = '/path/to/Qout.nc' river_id = 500 with RAPIDDataset(path_to_rapid_qout) as qout_nc: - streamflow_array = qout_nc.get_qout(river_id, - date_search_start=datetime(1985,1,1), - date_search_end=datetime(1985,2,4)) + streamflow_array = qout_nc.get_qout( + river_id, + date_search_start=datetime(1985,1,1), + date_search_end=datetime(1985,2,4)) """ # get indices of where the streamflow data is @@ -611,9 +728,11 @@ def get_qout(self, river_id_array=None, time_index_array, daily, pd_filter, - daily_mode) + filter_mode, + as_dataframe) - def get_qout_index(self, river_index_array=None, + def get_qout_index(self, + river_index_array=None, date_search_start=None, date_search_end=None, time_index_start=None, @@ -622,11 +741,14 @@ def get_qout_index(self, river_index_array=None, time_index_array=None, daily=False, pd_filter=None, - filter_mode="mean"): + filter_mode="mean", + as_dataframe=False): """ - This method extracts streamflow data by river index + This method extracts streamflow data by river index. It allows for extracting single or multiple river streamflow arrays - It has options to extract by date or by date index + It has options to extract by date or by date index. + + See: :meth:`RAPIDpy.RAPIDDataset.get_qout` """ if river_index_array is not None: if hasattr(river_index_array, "__len__"): @@ -642,7 +764,6 @@ def get_qout_index(self, river_index_array=None, qout_variable = self.qout_nc.variables[self.q_var_name] qout_dimensions = qout_variable.dimensions - streamflow_array = [] if qout_dimensions[0].lower() == 'time' and \ qout_dimensions[1].lower() == self.river_id_dimension.lower(): if time_index_array is not None and river_index_array is not None: @@ -673,19 +794,24 @@ def get_qout_index(self, river_index_array=None, if daily: pd_filter = "D" - if pd_filter is not None: + if pd_filter is not None or as_dataframe: time_array = self.get_time_array(return_datetime=True, time_index_array=time_index_array) - df = pd.DataFrame(streamflow_array.T, index=time_array) \ - .resample(pd_filter) - if filter_mode == "mean": - df = df.mean() - elif filter_mode == "max": - df = df.max() - else: - raise Exception("Invalid filter_mode ...") + qout_df = pd.DataFrame(streamflow_array.T, index=time_array) + + if pd_filter is not None: + qout_df = qout_df.resample(pd_filter) + if filter_mode == "mean": + qout_df = qout_df.mean() + elif filter_mode == "max": + qout_df = qout_df.max() + else: + raise Exception("Invalid filter_mode ...") - streamflow_array = df.as_matrix().T + if as_dataframe: + return qout_df + + streamflow_array = qout_df.as_matrix().T if streamflow_array.ndim > 0 and streamflow_array.shape[0] == 1: streamflow_array = streamflow_array[0] @@ -698,21 +824,34 @@ def write_flows_to_csv(self, path_to_output_file, date_search_start=None, date_search_end=None, daily=False, - mode="mean"): + filter_mode="mean"): """ Write out RAPID output to CSV file. - .. note:: Need either *reach\_id* or *reach\_index* parameter, + .. note:: Need either *reach_id* or *reach_index* parameter, but either can be used. - Parameters: - path_to_output_file(str): Path to the output csv file. - river_index(Optional[datetime]): This is the index of the river in the file you want the streamflow for. - river_id(Optional[datetime]): This is the river ID that you want the streamflow for. - date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. - date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. - daily(Optional[boolean]): If True and the file is CF-Compliant, write out daily flows. - mode(Optional[str]): You can get the daily average "mean" or the maximum "max". Defauls is "mean". + Parameters + ---------- + path_to_output_file: str + Path to the output csv file. + river_index: :obj:`datetime.datetime`, optional + This is the index of the river in the file you want the + streamflow for. + river_id: :obj:`datetime.datetime`, optional + This is the river ID that you want the streamflow for. + date_search_start: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the minimum date + for starting. + date_search_end: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the maximum date + for ending. + daily: bool, optional + If True and the file is CF-Compliant, write out daily flows. + filter_mode: str, optional + You can get the daily average "mean" or the maximum "max". + Default is "mean". + Example writing entire time series to file: @@ -772,45 +911,34 @@ def write_flows_to_csv(self, path_to_output_file, with RAPIDDataset(path_to_rapid_qout) as qout_nc: # if file is CF compliant, you can filter by date - qout_nc.write_flows_to_csv('/timeseries/Qout_daily_date_filter.csv', - river_id=river_id, - daily=True, - date_search_start=datetime(2002, 8, 31), - date_search_end=datetime(2002, 9, 15), - mode="max" - ) + qout_nc.write_flows_to_csv( + '/timeseries/Qout_daily_date_filter.csv', + river_id=river_id, + daily=True, + date_search_start=datetime(2002, 8, 31), + date_search_end=datetime(2002, 9, 15), + filter_mode="max" + ) """ if river_id is not None: river_index = self.get_river_index(river_id) elif river_id is None and river_index is None: - raise Exception("ERROR: Need reach id or reach index ...") + raise ValueError("Need reach id or reach index ...") # analyze and write if self.is_time_variable_valid() or self._is_legacy_time_valid(): - time_index_range = \ - self.get_time_index_range(date_search_start=date_search_start, - date_search_end=date_search_end) - - qout_arr = self.get_qout_index(river_index, - time_index_array=time_index_range) - time_array = self.get_time_array(time_index_array=time_index_range, - return_datetime=True) - - df = pd.DataFrame(qout_arr.T, index=time_array) - - if daily: - df = df.resample('D') - if mode == "mean": - df = df.mean() - elif mode == "max": - df = df.max() - else: - raise Exception("Invalid mode ...") + qout_df = self.get_qout_index(river_index, + date_search_start=date_search_start, + date_search_end=date_search_end, + daily=daily, + filter_mode=filter_mode, + as_dataframe=True) - df.to_csv(path_to_output_file, header=False) + qout_df.to_csv(path_to_output_file, header=False) else: - print("Valid time variable not found. Printing values only ...") + log("Valid time variable not found. Printing values only ...", + "WARNING") qout_arr = self.get_qout_index(river_index) with open_csv(path_to_output_file, 'w') as outcsv: writer = csv_writer(outcsv) @@ -826,20 +954,35 @@ def write_flows_to_gssha_time_series_xys(self, date_search_start=None, date_search_end=None, daily=False, - mode="mean"): + filter_mode="mean"): """ Write out RAPID output to GSSHA WMS time series xys file. - Parameters: - path_to_output_file(str): Path to the output xys file. - series_name(str): The name for the series. - series_id(int): The ID to give the series. - river_index(Optional[datetime]): This is the index of the river in the file you want the streamflow for. - river_id(Optional[datetime]): This is the river ID that you want the streamflow for. - date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. - date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. - daily(Optional[boolean]): If True and the file is CF-Compliant, write out daily flows. - mode(Optional[str]): You can get the daily average "mean" or the maximum "max". Defauls is "mean". + Parameters + ---------- + path_to_output_file: str + Path to the output xys file. + series_name: str + The name for the series. + series_id: int + The ID to give the series. + river_index: :obj:`datetime.datetime`, optional + This is the index of the river in the file you want the + streamflow for. + river_id: :obj:`datetime.datetime`, optional + This is the river ID that you want the streamflow for. + date_search_start: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the minimum date for + starting. + date_search_end: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the maximum date for + ending. + daily: bool, optional + If True and the file is CF-Compliant, write out daily flows. + filter_mode: str, optional + You can get the daily average "mean" or the maximum "max". + Defauls is "mean". + Example writing entire time series to file: @@ -851,11 +994,12 @@ def write_flows_to_gssha_time_series_xys(self, path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: - qout_nc.write_flows_to_gssha_time_series_xys('/timeseries/Qout_3624735.xys', - series_name="RAPID_TO_GSSHA_{0}".format(river_id), - series_id=34, - river_id=river_id, - ) + qout_nc.write_flows_to_gssha_time_series_xys( + '/timeseries/Qout_{0}.xys'.format(river_id), + series_name="RAPID_TO_GSSHA_{0}".format(river_id), + series_id=34, + river_id=river_id) + Example writing entire time series as daily average to file: @@ -872,12 +1016,13 @@ def write_flows_to_gssha_time_series_xys(self, river_index = qout_nc.get_river_index(river_id) # if file is CF compliant, you can write out daily average - qout_nc.write_flows_to_gssha_time_series_xys('/timeseries/Qout_daily.xys', - series_name="RAPID_TO_GSSHA_{0}".format(river_id), - series_id=34, - river_index=river_index, - daily=True, - ) + qout_nc.write_flows_to_gssha_time_series_xys( + '/timeseries/Qout_daily.xys', + series_name="RAPID_TO_GSSHA_{0}".format(river_id), + series_id=34, + river_index=river_index, + daily=True) + Example writing subset of time series as daily maximum to file: @@ -896,47 +1041,40 @@ def write_flows_to_gssha_time_series_xys(self, # if file is CF compliant, you can filter by date and # get daily values - qout_nc.write_flows_to_gssha_time_series_xys('/timeseries/Qout_daily_date_filter.xys', - series_name="RAPID_TO_GSSHA_{0}".format(river_id), - series_id=34, - river_index=river_index, - date_search_start=datetime(2002, 8, 31), - date_search_end=datetime(2002, 9, 15), - daily=True, - mode="max" - ) + qout_nc.write_flows_to_gssha_time_series_xys( + '/timeseries/Qout_daily_date_filter.xys', + series_name="RAPID_TO_GSSHA_{0}".format(river_id), + series_id=34, + river_index=river_index, + date_search_start=datetime(2002, 8, 31), + date_search_end=datetime(2002, 9, 15), + daily=True, + filter_mode="max") + """ if river_id is not None: river_index = self.get_river_index(river_id) elif river_id is None and river_index is None: - raise Exception("ERROR: Need reach id or reach index ...") + raise ValueError(" Need reach id or reach index ...") + + self.raise_time_valid() # analyze and write - if self.is_time_variable_valid() or self._is_legacy_time_valid(): - time_index_range = \ - self.get_time_index_range(date_search_start=date_search_start, - date_search_end=date_search_end) - - qout_arr = self.get_qout_index(river_index, - time_index_array=time_index_range) - time_array = self.get_time_array(time_index_array=time_index_range, - return_datetime=True) - - df = pd.DataFrame(qout_arr.T, index=time_array) - if daily: - df = df.resample('D').mean() - - with open_csv(path_to_output_file, 'w') as out_ts: - out_ts.write("XYS {0} {1} \"{2}\"\r\n".format(series_id, - len(df.index), - series_name)) - for index, pd_row in df.iterrows(): - date_str = index.strftime("%m/%d/%Y %I:%M:%S %p") - out_ts.write("\"{0}\" {1:.5f}\n".format(date_str, - pd_row[0])) - else: - raise IndexError("Valid time variable not found. Valid time" - " variable required in Qout file to proceed ...") + qout_df = self.get_qout_index(river_index, + date_search_start=date_search_start, + date_search_end=date_search_end, + daily=daily, + filter_mode=filter_mode, + as_dataframe=True) + + with open_csv(path_to_output_file, 'w') as out_ts: + out_ts.write("XYS {0} {1} \"{2}\"\r\n".format(series_id, + len(qout_df.index), + series_name)) + for index, pd_row in qout_df.iterrows(): + date_str = index.strftime("%m/%d/%Y %I:%M:%S %p") + out_ts.write("\"{0}\" {1:.5f}\n".format(date_str, + pd_row[0])) def write_flows_to_gssha_time_series_ihg(self, path_to_output_file, @@ -944,7 +1082,8 @@ def write_flows_to_gssha_time_series_ihg(self, date_search_start=None, date_search_end=None, daily=False, - mode="mean"): + filter_mode="mean"): + # pylint: disable=line-too-long """ Write out RAPID output to GSSHA time series ihg file @@ -952,14 +1091,24 @@ def write_flows_to_gssha_time_series_ihg(self, .. note:: GSSHA project card is CHAN_POINT_INPUT - Parameters: - path_to_output_file(str): Path to the output xys file. - connection_list_file(list): CSV file with link_id, node_id, baseflow, and rapid_rivid header and rows with data. - date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. - date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. - out_tzinfo(Optional[tzinfo]): Timezone object with output time zone for GSSHA. Default is the native RAPID output timezone (UTC). - daily(Optional[boolean]): If True and the file is CF-Compliant, write out daily flows. - mode(Optional[str]): You can get the daily average "mean" or the maximum "max". Defauls is "mean". + Parameters + ---------- + path_to_output_file: str + Path to the output xys file. + connection_list_file: str + CSV file with link_id, node_id, baseflow, and rapid_rivid header + and rows with data. + date_search_start: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the minimum date + for starting. + date_search_end: :obj:`datetime.datetime`, optional + This is a datetime object with the date of the maximum date + for ending. + daily: bool, optional + If True and the file is CF-Compliant, write out daily flows. + filter_mode: str, optional + You can get the daily average "mean" or the maximum "max". + Defauls is "mean". Example connection list file:: @@ -968,6 +1117,7 @@ def write_flows_to_gssha_time_series_ihg(self, 599, 1, 0.0, 80968 603, 1, 0.0, 80967 + Example writing entire time series to file: .. code:: python @@ -979,9 +1129,10 @@ def write_flows_to_gssha_time_series_ihg(self, with RAPIDDataset(path_to_rapid_qout) as qout_nc: #for writing entire time series to file - qout_nc.write_flows_to_gssha_time_series_ihg('/timeseries/Qout_3624735.ihg', - connection_list_file, - ) + qout_nc.write_flows_to_gssha_time_series_ihg( + '/timeseries/Qout_3624735.ihg', + connection_list_file) + Example writing entire time series as daily average to file: @@ -994,10 +1145,10 @@ def write_flows_to_gssha_time_series_ihg(self, with RAPIDDataset(path_to_rapid_qout) as qout_nc: # if file is CF compliant, you can write out daily average - qout_nc.write_flows_to_gssha_time_series_ihg('/timeseries/Qout_3624735.ihg', - connection_list_file, - daily=True, - ) + qout_nc.write_flows_to_gssha_time_series_ihg( + '/timeseries/Qout_3624735.ihg', + connection_list_file, + daily=True) Example writing subset of time series as daily maximum to file: @@ -1011,79 +1162,71 @@ def write_flows_to_gssha_time_series_ihg(self, connection_list_file = '/path/to/connection_list_file.csv' with RAPIDDataset(path_to_rapid_qout) as qout_nc: - # if file is CF compliant, you can filter by date and get daily values - qout_nc.write_flows_to_gssha_time_series_ihg('/timeseries/Qout_daily_date_filter.ihg', - connection_list_file, - date_search_start=datetime(2002, 8, 31), - date_search_end=datetime(2002, 9, 15), - daily=True, - mode="max" - ) - """ + # if file is CF compliant, you can filter by + # date and get daily values + qout_nc.write_flows_to_gssha_time_series_ihg( + '/timeseries/Qout_daily_date_filter.ihg', + connection_list_file, + date_search_start=datetime(2002, 8, 31), + date_search_end=datetime(2002, 9, 15), + daily=True, + filter_mode="max") + """ # noqa + self.raise_time_valid() + # analyze and write - if self.is_time_variable_valid() or self._is_legacy_time_valid(): - time_index_range = \ - self.get_time_index_range(date_search_start=date_search_start, - date_search_end=date_search_end) - - with open_csv(path_to_output_file, 'w') as out_ts: - # HEADER SECTION EXAMPLE: - # NUMPT 3 - # POINT 1 599 0.0 - # POINT 1 603 0.0 - # POINT 1 605 0.0 - - connection_list = np.loadtxt(connection_list_file, skiprows=1, - ndmin=1, delimiter=',', - usecols=(0, 1, 2, 3), - dtype={'names': ('link_id', - 'node_id', - 'baseflow', - 'rapid_rivid'), - 'formats': ('i8', 'i8', - 'f4', 'i8') - }, - ) - - out_ts.write("NUMPT {0}\n".format(connection_list.size)) - - river_idx_list = [] - for connection in connection_list: - out_ts.write("POINT {0} {1} {2}\n" - "".format(connection['node_id'], - connection['link_id'], - connection['baseflow'], - ), - ) - river_idx_list.append( - self.get_river_index(connection['rapid_rivid']) - ) - - # INFLOW SECTION EXAMPLE: - # NRPDS 54 - # INPUT 2002 01 01 00 00 15.551210 12.765090 0.000000 - # INPUT 2002 01 02 00 00 15.480830 12.765090 0.000000 - # INPUT 2002 01 03 00 00 16.078910 12.765090 0.000000 - # ... - qout_2d_array = \ - self.get_qout_index(river_idx_list, - time_index_array=time_index_range) - time_array = \ - self.get_time_array(time_index_array=time_index_range, - return_datetime=True) - - df = pd.DataFrame(qout_2d_array.T, index=time_array) - - if daily: - df = df.resample('D').mean() - - out_ts.write("NRPDS {0}\n".format(len(df.index))) - - for index, pd_row in df.iterrows(): - date_str = index.strftime("%Y %m %d %H %M") - qout_str = " ".join(["{0:.5f}".format(pd_row[column]) - for column in df]) - out_ts.write("INPUT {0} {1}\n".format(date_str, qout_str)) - else: - raise IndexError("Valid time variable not found. Valid time " - "variable required in Qout file to proceed ...") + with open_csv(path_to_output_file, 'w') as out_ts: + # HEADER SECTION EXAMPLE: + # NUMPT 3 + # POINT 1 599 0.0 + # POINT 1 603 0.0 + # POINT 1 605 0.0 + + connection_list = np.loadtxt(connection_list_file, + skiprows=1, ndmin=1, + delimiter=',', + usecols=(0, 1, 2, 3), + dtype={'names': ('link_id', + 'node_id', + 'baseflow', + 'rapid_rivid'), + 'formats': ('i8', 'i8', + 'f4', 'i8') + }, + ) + + out_ts.write("NUMPT {0}\n".format(connection_list.size)) + + river_idx_list = [] + for connection in connection_list: + out_ts.write("POINT {0} {1} {2}\n" + "".format(connection['node_id'], + connection['link_id'], + connection['baseflow'], + ), + ) + river_idx_list.append( + self.get_river_index(connection['rapid_rivid']) + ) + + # INFLOW SECTION EXAMPLE: + # NRPDS 54 + # INPUT 2002 01 01 00 00 15.551210 12.765090 0.000000 + # INPUT 2002 01 02 00 00 15.480830 12.765090 0.000000 + # INPUT 2002 01 03 00 00 16.078910 12.765090 0.000000 + # ... + qout_df = self.get_qout_index( + river_idx_list, + date_search_start=date_search_start, + date_search_end=date_search_end, + daily=daily, + filter_mode=filter_mode, + as_dataframe=True) + + out_ts.write("NRPDS {0}\n".format(len(qout_df.index))) + + for index, pd_row in qout_df.iterrows(): + date_str = index.strftime("%Y %m %d %H %M") + qout_str = " ".join(["{0:.5f}".format(pd_row[column]) + for column in qout_df]) + out_ts.write("INPUT {0} {1}\n".format(date_str, qout_str)) diff --git a/RAPIDpy/gis/__init__.py b/RAPIDpy/gis/__init__.py index 40a96af..603dbe2 100644 --- a/RAPIDpy/gis/__init__.py +++ b/RAPIDpy/gis/__init__.py @@ -1 +1,23 @@ # -*- coding: utf-8 -*- +""" + RAPIDpy.gis + + Created by Alan D Snow, 2016. + Based on RAPID_Toolbox for ArcMap + License: BSD 3-Clause +""" +from osgeo import ogr + + +def open_shapefile(shapefile_path, file_geodatabase=None): + """Opens a shapefile using either a shapefile path + or a file geodatabase + """ + if file_geodatabase: + gdb_driver = ogr.GetDriverByName("OpenFileGDB") + ogr_shapefile = gdb_driver.Open(file_geodatabase) + ogr_shapefile_lyr = ogr_shapefile.GetLayer(shapefile_path) + else: + ogr_shapefile = ogr.Open(shapefile_path) + ogr_shapefile_lyr = ogr_shapefile.GetLayer() + return ogr_shapefile_lyr, ogr_shapefile diff --git a/RAPIDpy/gis/centroid.py b/RAPIDpy/gis/centroid.py index 32eb15b..9083c5f 100644 --- a/RAPIDpy/gis/centroid.py +++ b/RAPIDpy/gis/centroid.py @@ -1,74 +1,83 @@ # -*- coding: utf-8 -*- -## -## centroid.py -## RAPIDpy -## -## Created by Alan D Snow. -## -## Copyright © 2016 Alan D Snow. All rights reserved. -## License: BSD 3-Clause +""" + centroid.py + RAPIDpy + Created by Alan D Snow, 2016. + License: BSD 3-Clause +""" from csv import writer as csv_writer -try: - from osgeo import gdal, ogr, osr -except Exception: - raise Exception("You need the gdal python package to run this tool ...") + +from osgeo import gdal, osr + +# local +from . import open_shapefile +from ..helper_functions import open_csv + # Enable GDAL/OGR exceptions gdal.UseExceptions() -#local -from ..helper_functions import open_csv def FlowlineToPoint(in_drainage_line, river_id, out_csv_file, file_geodatabase=None): """ - Converts flowline feature to a list of centroid points with their comid in EPSG:4326. + Converts flowline feature to a list of centroid points with their rivid + in EPSG:4326. + + Parameters + ---------- + in_drainage_line: str + Path to the stream network (i.e. Drainage Line) shapefile. + river_id: str + The name of the field with the river ID + (Ex. 'HydroID', 'COMID', or 'LINKNO'). + out_csv_file: str + Path to the output csv file with the centroid points. + file_geodatabase: str, optional + Path to the file geodatabase. If you use this option, in_drainage_line + is the name of the stream network feature class + (WARNING: Not always stable with GDAL). + - Args: - in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile. - river_id(str): The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO'). - out_csv_file(str): Path to the output csv file with the centroid points. - file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) - Example:: - + from RAPIDpy.gis.centroid import FlowlineToPoint - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - FlowlineToPoint(in_drainage_line='/path/to/drainageline.shp', - river_id='LINKNO', - out_csv_file='/path/to/comid_lat_lon_z.csv', - ) - + + FlowlineToPoint( + in_drainage_line='/path/to/drainageline.shp', + river_id='LINKNO', + out_csv_file='/path/to/comid_lat_lon_z.csv') + """ + ogr_drainage_line_shapefile_lyr, ogr_drainage_line_shapefile = \ + open_shapefile(in_drainage_line, file_geodatabase) - if file_geodatabase: - gdb_driver = ogr.GetDriverByName("OpenFileGDB") - ogr_file_geodatabase = gdb_driver.Open(file_geodatabase, 0) - ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer(in_drainage_line) - else: - ogr_drainage_line_shapefile = ogr.Open(in_drainage_line) - ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer() - - ogr_drainage_line_shapefile_lyr_proj = ogr_drainage_line_shapefile_lyr.GetSpatialRef() + ogr_drainage_line_shapefile_lyr_proj = \ + ogr_drainage_line_shapefile_lyr.GetSpatialRef() osr_geographic_proj = osr.SpatialReference() osr_geographic_proj.ImportFromEPSG(4326) proj_transform = None if ogr_drainage_line_shapefile_lyr_proj != osr_geographic_proj: - proj_transform = osr.CoordinateTransformation(ogr_drainage_line_shapefile_lyr_proj, osr_geographic_proj) + proj_transform = osr.CoordinateTransformation( + ogr_drainage_line_shapefile_lyr_proj, osr_geographic_proj) - #print valid field names to table + # print valid field names to table with open_csv(out_csv_file, 'w') as outfile: writer = csv_writer(outfile) - writer.writerow(['rivid','lat','lon','z']) + writer.writerow(['rivid', 'lat', 'lon', 'z']) for feature in ogr_drainage_line_shapefile_lyr: feat_geom = feature.GetGeometryRef() if proj_transform: feat_geom.Transform(proj_transform) centroid = feat_geom.Centroid() centroid_pt = centroid.GetPoint(0) - writer.writerow([feature.GetField(river_id), centroid_pt[1], centroid_pt[0], centroid_pt[2]]) \ No newline at end of file + writer.writerow([ + feature.GetField(river_id), + centroid_pt[1], + centroid_pt[0], + centroid_pt[2] + ]) + + del ogr_drainage_line_shapefile diff --git a/RAPIDpy/gis/merge.py b/RAPIDpy/gis/merge.py deleted file mode 100644 index fcfd150..0000000 --- a/RAPIDpy/gis/merge.py +++ /dev/null @@ -1,131 +0,0 @@ -# -*- coding: utf-8 -*- -## -## merge.py -## RAPIDpy -## -## Created by Alan D Snow. -## -## Copyright © 2016 Alan D Snow. All rights reserved. -## License: BSD 3-Clause - -import csv -import numpy as np - -from ..helper_functions import csv_to_list - -def MergeWeightTables(weight_table_file, - connectivity_file, - new_weight_table_file): - """ - This function merges multiple weight tables combined into one file - with duplicate headers removed - """ - - weight_table = csv_to_list(weight_table_file) - weight_comid_list = np.array([row[0] for row in weight_table[1:]], dtype=int) - #FEATUREID,area_sqm,lon_index,lat_index,npoints,weight,Lon,Lat - new_weight_table = weight_table[0:1] - replacement_row = weight_table[1][1:] - #set area_sqm to zero - replacement_row[0] = 0 - #set npoints to one - replacement_row[3] = 1 - - print("Looping ...") - with open(connectivity_file, "rb") as fconnect: - for row in fconnect: - connect_rivid = int(float(row.split(",")[0])) - try: - #find all occurences - comid_indicies = np.where(weight_comid_list==connect_rivid)[0] - weight_indices = [int(d)+1 for d in comid_indicies] - #if num occurences don't match what table says - if len(weight_indices) > int(weight_table[weight_indices[0]][4]): - #print weight_table[weight_indices[0]] - for weight_index in weight_indices: - #remove if it has an area of zero - if float(weight_table[weight_index][1]) == 0.0: - #print "REMOVED:", weight_table[weight_index] - weight_indices.remove(weight_index) - - if len(weight_indices) != int(weight_table[weight_indices[0]][4]): - for weight_index in weight_indices: - print("ERROR: {0} {1}".format(weight_index, weight_table[weight_index])) - - for weight_index in weight_indices: - new_weight_table.append(weight_table[weight_index]) - except IndexError: - print("{0} not found ...".format(connect_rivid)) - #skip if not found - continue - - print("Writing ...") - with open(new_weight_table_file, 'wb') as outfile: - writer = csv.writer(outfile) - writer.writerows(new_weight_table) - -def MergeNetworkConnectFiles(old_connectivity_file, - new_connectivity_file): - - """ - This function merges multiple rapid_connect files combined into one file - """ - connectivity_table = csv_to_list(old_connectivity_file) - - max_num_upstream = max([int(float(row[2])) for row in connectivity_table]) - - print("Maximum number of upstream reaches: {0}".format(max_num_upstream)) - print("Looping ...") - new_comid_list = np.zeros(len(connectivity_table), dtype=np.int32) - new_connectivity_table = [] - index = 0 - for row in connectivity_table: - try: - comid_index = np.where(new_comid_list==int(float(row[0])))[0][0] - if int(float(new_connectivity_table[comid_index][2]))= 2: # find the slope - stream_slope = slope_list[streamIDindex] - + stream_slope = slope_list[stream_id_index] + if stream_slope <= 0: - #if no slope, take average of upstream and downstream to get it - nextDownID = int(float(row[1])) + # if no slope, take average of upstream + # and downstream to get it + next_down_id = int(float(row[1])) next_down_slope = 0 try: - next_down_index = np.where(river_id_list==nextDownID)[0][0] + next_down_index = \ + np.where(river_id_list == next_down_id)[0][0] next_down_slope = slope_list[next_down_index] except IndexError: pass - - nextUpID = int(float(row[3])) + + next_up_id = int(float(row[3])) next_up_slope = 0 try: - next_up_index = np.where(river_id_list==nextUpID)[0][0] + next_up_index = \ + np.where(river_id_list == next_up_id)[0][0] next_up_slope = slope_list[next_up_index] except IndexError: pass - - stream_slope = (next_down_slope+next_up_slope)/2.0 - if stream_slope <=0: - #if still no slope, set to 0.001 + + stream_slope = (next_down_slope + next_up_slope) / 2.0 + if stream_slope <= 0: + # if still no slope, set to 0.001 stream_slope = 0.001 - - length_slope_array.append(stream_length/stream_slope**0.5) - kfac2_array.append(stream_length/celerity) + + length_slope_array.append(stream_length / stream_slope**0.5) + kfac2_array.append(stream_length / celerity) else: - kfac = stream_length/celerity + kfac = stream_length / celerity kfac_writer.writerow(kfac) - + if formula_type >= 2: if formula_type == 3: - print("Filtering Data by 5th and 95th Percentiles ...") + log("Filtering Data by 5th and 95th Percentiles ...") length_slope_array = np.array(length_slope_array) percentile_5 = np.percentile(length_slope_array, 5) percentile_95 = np.percentile(length_slope_array, 95) - - length_slope_array[length_slope_arraypercentile_95] = percentile_95 - + + length_slope_array[length_slope_array < percentile_5] = \ + percentile_5 + length_slope_array[length_slope_array > percentile_95] = \ + percentile_95 + eta = np.mean(kfac2_array) / np.mean(length_slope_array) - print("Kfac2_Avg {0}".format(np.mean(kfac2_array))) - print("Length_Slope Avg {0}".format( np.mean(length_slope_array))) - print("Eta {0}".format(eta)) - print("Writing Data ...") + log("Kfac2_Avg {0}".format(np.mean(kfac2_array))) + log("Length_Slope Avg {0}".format(np.mean(length_slope_array))) + log("Eta {0}".format(eta)) + log("Writing Data ...") for len_slope in length_slope_array: kfac_writer.writerow(eta*len_slope) + def CreateMuskingumKFile(lambda_k, in_kfac_file, out_k_file): """ Creates muskingum k file from kfac file. - - Args: - lambda_k(float): The value for lambda given from RAPID after the calibration process. If no calibration has been performed, 0.35 is reasonable. - in_kfac_file(str): The path to the input kfac file. - out_k_file(str): The path to the output k file. - + + Parameters + ---------- + lambda_k: float + The value for lambda given from RAPID after the calibration process. + If no calibration has been performed, 0.35 is reasonable. + in_kfac_file: str + The path to the input kfac file. + out_k_file: str + The path to the output k file. + + Example:: - + from RAPIDpy.gis.muskingum import CreateMuskingumKFile - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - CreateMuskingumKFile(lambda_k=0.35, - in_kfac_file='/path/to/kfac.csv', - out_k_file='/path/to/k.csv', - ) + + CreateMuskingumKFile( + lambda_k=0.35, + in_kfac_file='/path/to/kfac.csv', + out_k_file='/path/to/k.csv') + """ kfac_table = csv_to_list(in_kfac_file) - - with open_csv(out_k_file,'w') as kfile: + + with open_csv(out_k_file, 'w') as kfile: k_writer = csv_writer(kfile) for row in kfac_table: - k_writer.writerow([lambda_k*float(row[0])]) + k_writer.writerow([lambda_k * float(row[0])]) + def CreateMuskingumXFileFromDranageLine(in_drainage_line, x_id, @@ -227,67 +266,75 @@ def CreateMuskingumXFileFromDranageLine(in_drainage_line, """ Create muskingum X file from drainage line. - Args: - in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile. - x_id(str): The name of the muksingum X field (i.e. 'Musk_x'). - out_x_file(str): The path to the output x file. - file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) - + Parameters + ---------- + in_drainage_line: str + Path to the stream network (i.e. Drainage Line) shapefile. + x_id: str + The name of the muksingum X field (i.e. 'Musk_x'). + out_x_file: str + The path to the output x file. + file_geodatabase: str, optional + Path to the file geodatabase. If you use this option, + in_drainage_line is the name of the stream network feature class + (WARNING: Not always stable with GDAL). + + Example:: - + from RAPIDpy.gis.muskingum import CreateMuskingumXFileFromDranageLine - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - CreateMuskingumXFileFromDranageLine(in_drainage_line='/path/to/drainageline.shp', - x_id='Musk_x', - out_x_file='/path/to/x.csv', - ) + + CreateMuskingumXFileFromDranageLine( + in_drainage_line='/path/to/drainageline.shp', + x_id='Musk_x', + out_x_file='/path/to/x.csv') + """ - if file_geodatabase: - gdb_driver = ogr.GetDriverByName("OpenFileGDB") - ogr_file_geodatabase = gdb_driver.Open(file_geodatabase) - ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer(in_drainage_line) - else: - ogr_drainage_line_shapefile = ogr.Open(in_drainage_line) - ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer() + ogr_drainage_line_shapefile_lyr, ogr_drainage_line_shapefile = \ + open_shapefile(in_drainage_line, file_geodatabase) - with open_csv(out_x_file,'w') as kfile: + with open_csv(out_x_file, 'w') as kfile: x_writer = csv_writer(kfile) for drainage_line_feature in ogr_drainage_line_shapefile_lyr: - x_writer.writerow([drainage_line_feature.GetField(x_id)]) + x_writer.writerow([drainage_line_feature.GetField(x_id)]) + + del ogr_drainage_line_shapefile + def CreateConstMuskingumXFile(x_value, in_connectivity_file, out_x_file): """ - Create muskingum X file from value that is constant all the way through for each river segment. - - Args: - x_value(float): Value for the muskingum X parameter [0-0.5]. - in_connectivity_file(str): The path to the RAPID connectivity file. - out_x_file(str): The path to the output x file. - + Create muskingum X file from value that is constant all the way through + for each river segment. + + Parameters + ---------- + x_value: float + Value for the muskingum X parameter [0-0.5]. + in_connectivity_file: str + The path to the RAPID connectivity file. + out_x_file: str + The path to the output x file. + + Example:: - + from RAPIDpy.gis.muskingum import CreateConstMuskingumXFile - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - CreateConstMuskingumXFile(x_value=0.3, - in_connectivity_file='/path/to/rapid_connect.csv', - out_x_file='/path/to/x.csv', - ) + + CreateConstMuskingumXFile( + x_value=0.3, + in_connectivity_file='/path/to/rapid_connect.csv', + out_x_file='/path/to/x.csv') + """ num_rivers = 0 with open_csv(in_connectivity_file, "r") as csvfile: reader = csv_reader(csvfile) - for row in reader: - num_rivers+=1 + for _ in reader: + num_rivers += 1 - with open_csv(out_x_file,'w') as kfile: + with open_csv(out_x_file, 'w') as kfile: x_writer = csv_writer(kfile) - for idx in xrange(num_rivers): - x_writer.writerow([x_value]) + for _ in xrange(num_rivers): + x_writer.writerow([x_value]) diff --git a/RAPIDpy/gis/network.py b/RAPIDpy/gis/network.py index b100202..d659bac 100644 --- a/RAPIDpy/gis/network.py +++ b/RAPIDpy/gis/network.py @@ -1,27 +1,26 @@ # -*- coding: utf-8 -*- -## -## centroid.py -## RAPIDpy -## -## Created by Alan D Snow. -## Based on RAPID_Toolbox for ArcMap -## Copyright © 2016 Alan D Snow. All rights reserved. -## License: BSD 3-Clause +""" + centroid.py + RAPIDpy + Created by Alan D Snow, 2016. + Based on RAPID_Toolbox for ArcMap + License: BSD 3-Clause +""" from csv import writer as csv_writer + + import numpy as np -from past.builtins import xrange +from osgeo import gdal +from past.builtins import xrange # pylint: disable=redefined-builtin -try: - from osgeo import gdal, ogr -except Exception: - raise Exception("You need the gdal python package to run this tool ...") +# local +from ..helper_functions import log, open_csv +from . import open_shapefile # Enable GDAL/OGR exceptions gdal.UseExceptions() -#local -from ..helper_functions import open_csv def StreamIDNextDownIDToConnectivity(stream_id_array, next_down_id_array, @@ -34,25 +33,30 @@ def StreamIDNextDownIDToConnectivity(stream_id_array, for hydroid in np.sort(stream_id_array): # find the HydroID of the upstreams - list_upstreamID = stream_id_array[next_down_id_array==hydroid] + list_upstreamID = stream_id_array[next_down_id_array == hydroid] # count the total number of the upstreams count_upstream = len(list_upstreamID) if count_upstream > max_count_upstream: max_count_upstream = count_upstream - nextDownID = next_down_id_array[stream_id_array==hydroid][0] -#THIS IS REMOVED DUE TO THE FACT THAT THERE ARE STREAMS WITH ID OF ZERO -# # replace the nextDownID with 0 if it equals to -1 (no next downstream) -# if nextDownID == -1: -# nextDownID = 0 - # append the list of Stream HydroID, NextDownID, Count of Upstream ID, and HydroID of each Upstream into a larger list - list_all.append(np.concatenate([np.array([hydroid,nextDownID,count_upstream]),list_upstreamID]).astype(int)) - - with open_csv(out_csv_file,'w') as csvfile: + nextDownID = next_down_id_array[stream_id_array == hydroid][0] + # append the list of Stream HydroID, NextDownID, Count of Upstream ID, + # and HydroID of each Upstream into a larger list + list_all.append( + np.concatenate( + [np.array([hydroid, nextDownID, count_upstream]), + list_upstreamID] + ).astype(int)) + + with open_csv(out_csv_file, 'w') as csvfile: connectwriter = csv_writer(csvfile) for row_list in list_all: - out = np.concatenate([row_list, np.array([0 for i in xrange(max_count_upstream - row_list[2])])]) + out = np.concatenate([ + row_list, + np.array([0 for _ in xrange(max_count_upstream - row_list[2])]) + ]) connectwriter.writerow(out.astype(int)) + def CreateNetworkConnectivity(in_drainage_line, river_id, next_down_id, @@ -60,62 +64,72 @@ def CreateNetworkConnectivity(in_drainage_line, file_geodatabase=None): """ Creates Network Connectivity input CSV file for RAPID - based on the Drainage Line shapefile with river ID and next downstream ID fields - - Args: - in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile. - river_id(str): The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO'). - next_down_id(str): The name of the field with the river ID of the next downstream river segment (Ex. 'NextDownID' or 'DSLINKNO'). - out_connectivity_file(str): The path to the output connectivity file. - file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) - + based on the Drainage Line shapefile with river ID and + next downstream ID fields. + + Parameters + ---------- + in_drainage_line: str + Path to the stream network (i.e. Drainage Line) shapefile. + river_id: str + The name of the field with the river ID + (Ex. 'HydroID', 'COMID', or 'LINKNO'). + next_down_id: str + The name of the field with the river ID of the next downstream + river segment (Ex. 'NextDownID' or 'DSLINKNO'). + out_connectivity_file: str + The path to the output connectivity file. + file_geodatabase + Path to the file geodatabase. If you use this option, in_drainage_line + is the name of the stream network feature class. + (WARNING: Not always stable with GDAL.) + + Example:: - + from RAPIDpy.gis.network import CreateNetworkConnectivity - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - CreateNetworkConnectivity(in_drainage_line='/path/to/drainageline.shp', - river_id='LINKNO', - next_down_id='DSLINKNO', - out_connectivity_file='/path/to/rapid_connect.csv', - ) - """ - if file_geodatabase: - gdb_driver = ogr.GetDriverByName("OpenFileGDB") - ogr_file_geodatabase = gdb_driver.Open(file_geodatabase, 0) - ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer(in_drainage_line) - else: - ogr_drainage_line_shapefile = ogr.Open(in_drainage_line) - ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer() + + CreateNetworkConnectivity( + in_drainage_line='/path/to/drainageline.shp', + river_id='LINKNO', + next_down_id='DSLINKNO', + out_connectivity_file='/path/to/rapid_connect.csv') + + """ + ogr_drainage_line_shapefile_lyr, ogr_drainage_line_shapefile = \ + open_shapefile(in_drainage_line, file_geodatabase) stream_id_array = [] next_down_id_array = [] for drainage_line_feature in ogr_drainage_line_shapefile_lyr: stream_id_array.append(drainage_line_feature.GetField(river_id)) next_down_id_array.append(drainage_line_feature.GetField(next_down_id)) - + stream_id_array = np.array(stream_id_array, dtype=np.int32) next_down_id_array = np.array(next_down_id_array, dtype=np.int32) StreamIDNextDownIDToConnectivity(stream_id_array, next_down_id_array, out_connectivity_file) - + + del ogr_drainage_line_shapefile + + def CreateNetworkConnectivityTauDEMTree(network_connectivity_tree_file, out_csv_file): """ Creates Network Connectivity input CSV file for RAPID based on the TauDEM network connectivity tree file - """ + """ stream_id_array = [] next_down_id_array = [] with open_csv(network_connectivity_tree_file, "r") as csvfile: for row in csvfile: split_row = row.split() - stream_id_array.append(split_row[0].strip()) #link number - next_down_id_array.append(split_row[3].strip()) #next downstream link number + # link number + stream_id_array.append(split_row[0].strip()) + # next downstream link number + next_down_id_array.append(split_row[3].strip()) stream_id_array = np.array(stream_id_array, dtype=np.int32) next_down_id_array = np.array(next_down_id_array, dtype=np.int32) @@ -123,157 +137,181 @@ def CreateNetworkConnectivityTauDEMTree(network_connectivity_tree_file, StreamIDNextDownIDToConnectivity(stream_id_array, next_down_id_array, out_csv_file) - + + def CreateNetworkConnectivityNHDPlus(in_drainage_line, out_connectivity_file, file_geodatabase=None): """ Creates Network Connectivity input CSV file for RAPID - based on the NHDPlus drainage lines with COMID, FROMNODE, TONODE, and DIVERGENCE fields. - - Args: - in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile. - out_connectivity_file(str): The path to the output connectivity file. - file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) - + based on the NHDPlus drainage lines with + COMID, FROMNODE, TONODE, and DIVERGENCE fields. + + Parameters + ---------- + in_drainage_line: str + Path to the stream network (i.e. Drainage Line) shapefile. + out_connectivity_file: str + The path to the output connectivity file. + file_geodatabase: str, optional + Path to the file geodatabase. If you use this option, + in_drainage_line is the name of the stream network feature class + (WARNING: Not always stable with GDAL). + + Example:: - + from RAPIDpy.gis.network import CreateNetworkConnectivityNHDPlus - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - CreateNetworkConnectivityNHDPlus(in_drainage_line='/path/to/drainageline.shp', - out_connectivity_file='/path/to/rapid_connect.csv', - ) - """ - if file_geodatabase: - gdb_driver = ogr.GetDriverByName("OpenFileGDB") - ogr_file_geodatabase = gdb_driver.Open(file_geodatabase, 0) - ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer(in_drainage_line) - else: - ogr_drainage_line_shapefile = ogr.Open(in_drainage_line) - ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer() - - ogr_drainage_line_definition = ogr_drainage_line_shapefile_lyr.GetLayerDefn() - + + CreateNetworkConnectivityNHDPlus( + in_drainage_line='/path/to/drainageline.shp', + out_connectivity_file='/path/to/rapid_connect.csv') + + """ + ogr_drainage_line_shapefile_lyr, ogr_drainage_line_shapefile = \ + open_shapefile(in_drainage_line, file_geodatabase) + + ogr_drainage_line_definition = \ + ogr_drainage_line_shapefile_lyr.GetLayerDefn() + orig_field_names = [] for idx in xrange(ogr_drainage_line_definition.GetFieldCount()): - orig_field_names.append(ogr_drainage_line_definition.GetFieldDefn(idx).GetName()) - + orig_field_names.append( + ogr_drainage_line_definition.GetFieldDefn(idx).GetName()) + upper_field_names = [field.upper() for field in orig_field_names] - def get_field_name_index(upper_field_name, upper_field_names): + def get_field_name_index(upper_field_name, _upper_field_names): """ returns index of field name """ try: - return upper_field_names.index(upper_field_name) + return _upper_field_names.index(upper_field_name) except ValueError: - raise IndexError("{0} not found in shapefile ..".format(upper_field_name)) + raise IndexError("{0} not found in shapefile .." + .format(_upper_field_names)) + + rivid_field = \ + orig_field_names[get_field_name_index('COMID', upper_field_names)] + fromnode_field = \ + orig_field_names[get_field_name_index('FROMNODE', upper_field_names)] + tonode_field = \ + orig_field_names[get_field_name_index('TONODE', upper_field_names)] + divergence_field =\ + orig_field_names[get_field_name_index('DIVERGENCE', upper_field_names)] - rivid_field = orig_field_names[get_field_name_index('COMID', upper_field_names)] - fromnode_field = orig_field_names[get_field_name_index('FROMNODE', upper_field_names)] - tonode_field = orig_field_names[get_field_name_index('TONODE', upper_field_names)] - divergence_field = orig_field_names[get_field_name_index('DIVERGENCE', upper_field_names)] - number_of_features = ogr_drainage_line_shapefile_lyr.GetFeatureCount() rivid_list = np.zeros(number_of_features, dtype=np.int32) fromnode_list = np.zeros(number_of_features, dtype=np.int32) tonode_list = np.zeros(number_of_features, dtype=np.int32) divergence_list = np.zeros(number_of_features, dtype=np.int32) - for feature_idx, catchment_feature in enumerate(ogr_drainage_line_shapefile_lyr): + for feature_idx, catchment_feature in \ + enumerate(ogr_drainage_line_shapefile_lyr): rivid_list[feature_idx] = catchment_feature.GetField(rivid_field) fromnode_list[feature_idx] = catchment_feature.GetField(fromnode_field) tonode_list[feature_idx] = catchment_feature.GetField(tonode_field) - divergence_list[feature_idx] = catchment_feature.GetField(divergence_field) - - #------------------------------------------------------------------------------- - #Compute connectivity (based on: https://github.com/c-h-david/rrr/blob/master/src/rrr_riv_tot_gen_all_nhdplus.py) - #------------------------------------------------------------------------------- - fromnode_list[fromnode_list==0] = -9999 - #Some NHDPlus v1 reaches have FLOWDIR='With Digitized' but no info in VAA table - - fromnode_list[divergence_list==2] = -9999 - #Virtually disconnect the upstream node of all minor divergences - divergence_list = [] #don't need this anymore + divergence_list[feature_idx] = \ + catchment_feature.GetField(divergence_field) + + del ogr_drainage_line_shapefile + # ------------------------------------------------------------------------- + # Compute connectivity, based on: + # https://github.com/c-h-david/rrr/blob/master/src/rrr_riv_tot_gen_all_nhdplus.py + # ------------------------------------------------------------------------- + fromnode_list[fromnode_list == 0] = -9999 + # Some NHDPlus v1 reaches have FLOWDIR='With Digitized' + # but no info in VAA table + + fromnode_list[divergence_list == 2] = -9999 + # Virtually disconnect the upstream node of all minor divergences + del divergence_list # delete information in list next_down_id_list = np.zeros(number_of_features, dtype=np.int32) - for rivid_index, rivid in enumerate(rivid_list): + for rivid_index in xrange(len(rivid_list)): try: - next_down_id_list[rivid_index] = rivid_list[np.where(fromnode_list==tonode_list[rivid_index])[0][0]] + next_down_id_list[rivid_index] = \ + rivid_list[ + np.where(fromnode_list == tonode_list[rivid_index])[0][0]] except IndexError: - next_down_id_list[rivid_index] = -1 #this is an outlet - pass - #determine the downstream reach for each reach - - #empty unecessary lists - fromnode_list = [] - tonode_list = [] - + # this is an outlet + next_down_id_list[rivid_index] = -1 + + # determine the downstream reach for each reach + + # empty unecessary lists + del fromnode_list + del tonode_list + StreamIDNextDownIDToConnectivity(rivid_list, next_down_id_list, out_connectivity_file) - + + def CreateSubsetFile(in_drainage_line, - river_id, + river_id, out_riv_bas_id_file, file_geodatabase=None): """ Creates River Basin ID subset input CSV file for RAPID - based on the Drainage Line shapefile with river ID and next downstream ID fields - - Args: - in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile. - river_id(str): The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO'). - out_riv_bas_id_file(str): The path to the output river basin ID subset file. - file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) - + based on the Drainage Line shapefile with river ID and + next downstream ID fields + + Parameters + ---------- + in_drainage_line: str + Path to the stream network (i.e. Drainage Line) shapefile. + river_id: str + The name of the field with the river ID + (Ex. 'HydroID', 'COMID', or 'LINKNO'). + out_riv_bas_id_file: str + The path to the output river basin ID subset file. + file_geodatabase: str, optional + Path to the file geodatabase. If you use this option, + in_drainage_line is the name of the stream network feature class + (WARNING: Not always stable with GDAL). + + Example:: - + from RAPIDpy.gis.network import CreateSubsetFile - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - CreateSubsetFile(in_drainage_line='/path/to/drainageline.shp', - river_id='LINKNO', - out_riv_bas_id_file='/path/to/riv_bas_id.csv', - ) - """ - - if file_geodatabase: - gdb_driver = ogr.GetDriverByName("OpenFileGDB") - ogr_file_geodatabase = gdb_driver.Open(file_geodatabase, 0) - ogr_drainage_line_shapefile_lyr = ogr_file_geodatabase.GetLayer(in_drainage_line) - else: - ogr_drainage_line_shapefile = ogr.Open(in_drainage_line) - ogr_drainage_line_shapefile_lyr = ogr_drainage_line_shapefile.GetLayer() - ogr_drainage_line_definition = ogr_drainage_line_shapefile_lyr.GetLayerDefn() - + CreateSubsetFile( + in_drainage_line='/path/to/drainageline.shp', + river_id='LINKNO', + out_riv_bas_id_file='/path/to/riv_bas_id.csv') + + """ + ogr_drainage_line_shapefile_lyr, ogr_drainage_line_shapefile = \ + open_shapefile(in_drainage_line, file_geodatabase) + + ogr_drainage_line_definition = \ + ogr_drainage_line_shapefile_lyr.GetLayerDefn() + orig_field_names = [] for idx in xrange(ogr_drainage_line_definition.GetFieldCount()): - orig_field_names.append(ogr_drainage_line_definition.GetFieldDefn(idx).GetName()) - + orig_field_names.append( + ogr_drainage_line_definition.GetFieldDefn(idx).GetName()) + upper_field_names = [field.upper() for field in orig_field_names] sort_field = None - - #Sort by HYDROSEQ order if the option exists + + # Sort by HYDROSEQ order if the option exists if 'HYDROSEQ' in upper_field_names: - #with this method, smaller is downstream + # with this method, smaller is downstream sort_field = orig_field_names[upper_field_names.index('HYDROSEQ')] - print("Sorting by {0}".format(sort_field)) + log("Sorting by {0}".format(sort_field)) hydroseq_list = [] hydroid_list = [] - '''The script line below makes sure that rows in the subset file are - arranged in descending order of NextDownID of stream segements''' + # The script line below makes sure that rows in the subset file are + # arranged in descending order of NextDownID of stream segements for drainage_line_feature in ogr_drainage_line_shapefile_lyr: hydroid_list.append(drainage_line_feature.GetField(river_id)) if sort_field: hydroseq_list.append(drainage_line_feature.GetField(sort_field)) + del ogr_drainage_line_shapefile + hydroid_list = np.array(hydroid_list, dtype=np.int32) if hydroseq_list: hydroseq_list = np.array(hydroseq_list, dtype=np.int32) @@ -281,9 +319,8 @@ def CreateSubsetFile(in_drainage_line, hydroid_list = hydroid_list[sort_order] else: hydroid_list = np.sort(hydroid_list) - - with open_csv(out_riv_bas_id_file,'w') as csvfile: + with open_csv(out_riv_bas_id_file, 'w') as csvfile: connectwriter = csv_writer(csvfile) for hydroid in hydroid_list: connectwriter.writerow([hydroid]) diff --git a/RAPIDpy/gis/taudem.py b/RAPIDpy/gis/taudem.py index 61f081c..2957c76 100644 --- a/RAPIDpy/gis/taudem.py +++ b/RAPIDpy/gis/taudem.py @@ -1,44 +1,50 @@ # -*- coding: utf-8 -*- -## -## taudem.py -## RAPIDpy -## -## Created by Alan D Snow. -## Command line function based on ArcGIS python scripts by David Tarboton (https://github.com/dtarb/TauDEM) -## Copyright © 2016 Alan D Snow. All rights reserved. -## License: BSD 3-Clause - +""" + taudem.py + RAPIDpy + + Created by Alan D Snow, 2016. + Command line function based on ArcGIS python scripts by David Tarboton + (https://github.com/dtarb/TauDEM) + License: BSD 3-Clause +""" from datetime import datetime from multiprocessing import cpu_count -import numpy as np import os from subprocess import PIPE, Popen from sys import getrecursionlimit, setrecursionlimit -from past.builtins import xrange - -try: - from pyproj import Geod - from shapely.wkb import loads as shapely_loads - from shapely.ops import cascaded_union - from osgeo import gdal, ogr, osr -except ImportError: - print("WARNING: Several GIS functions will not work. " \ - "You need to install the gdal, pyproj, and shapely " \ - "python packages for these functions to work ...") - pass - -#------------------------------------------------------------------------------ + +from gazar.grid import GDALGrid +import numpy as np +from past.builtins import xrange # pylint: disable=redefined-builtin +from pyproj import Geod +from shapely.wkb import loads as shapely_loads +from shapely.ops import cascaded_union +from osgeo import gdal, ogr, osr + +from ..helper_functions import log + + +# ----------------------------------------------------------------------------- # MAIN CLASS -#------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- class TauDEM(object): """ TauDEM process manager. - Attributes: - taudem_exe_path(Optional[str]): Path to TauDEM directory containing executables. This is requred to use TauDEM functionality. - num_processors(Optional[int]): Number of proessors to use with TauDEM. It only works if use_all_processors=False. - use_all_processors(Optional[bool]): If True, the TauDEM processes will use all avaialble processors. - mpiexec_path(Optional[str]): Path to mpiexec command. Default is 'mpiexec'. + Attributes + ---------- + taudem_exe_path: str, optional + Path to TauDEM directory containing executables. This is requred to + use TauDEM functionality. + num_processors: int, optional + Number of proessors to use with TauDEM. It only works if + use_all_processors=False. + use_all_processors: bool, optional + If True, the TauDEM processes will use all avaialble processors. + mpiexec_path: str, optional + Path to mpiexec command. Default is 'mpiexec'. + Initialization Example: @@ -64,35 +70,36 @@ def __init__(self, self.num_processors = num_processors self.mpiexec_path = mpiexec_path + # other attributes + self.pit_filled_elevation_grid = None + self.flow_dir_grid = None + self.contributing_area_grid = None + self.stream_raster_grid = None + def _run_mpi_cmd(self, cmd): """ This runs the command you send in """ - # Get and describe the first argument - # - - print("Number of Processes: {0}".format(self.num_processors)) + log("Number of Processes: {0}".format(self.num_processors)) time_start = datetime.utcnow() # Construct the taudem command line. cmd = [self.mpiexec_path, '-n', str(self.num_processors)] + cmd - print("Command Line: {0}".format(" ".join(cmd))) + log("Command Line: {0}".format(" ".join(cmd))) process = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=False) out, err = process.communicate() if out: - print("OUTPUT:") + log("OUTPUT:") for line in out.split(b'\n'): - print(line) + log(line) if err: - print("ERROR:") - print(err) - #raise Exception(err) - print("Time to complete: {0}".format(datetime.utcnow()-time_start)) + log(err, severity="WARNING") + log("Time to complete: {0}".format(datetime.utcnow()-time_start)) - def _add_prj_file(self, original_gis_file, - new_gis_file): + @staticmethod + def _add_prj_file(original_gis_file, new_gis_file): """ - Adds prj file + Adds projection file """ out_prj_file = "{0}.prj".format(os.path.splitext(new_gis_file)[0]) if original_gis_file.endswith(".shp"): @@ -108,8 +115,8 @@ def _add_prj_file(self, original_gis_file, with open(out_prj_file, 'w') as prj_file: prj_file.write(spatial_ref_str) - def extractSubNetwork(self, - network_file, + @staticmethod + def extractSubNetwork(network_file, out_subset_network_file, outlet_ids, river_id_field, @@ -120,14 +127,27 @@ def extractSubNetwork(self, Extracts a subset river network from the main river network based on the outlet IDs. - Parameters: - network_file(str): Path to the stream network shapefile. - out_subset_network_file(str): Path to the output subset stream network shapefile. - outlet_ids(list): List if integers reperesenting the outlet IDs to be included in the subset stream network. - river_id_field(str): Name of the river ID field in the stream network shapefile. - next_down_id_field(str): Name if the field with the river ID of the next downstream river segment in the stream network shapefile. - river_magnitude_field(str): Name of the river magnitude field in the stream network shapefile. - safe_mode(Optional[bool]): If True, it will kill the simulation early before over taxing your computer. If you are confident your computer can handle it, set it to False. + Parameters + ---------- + network_file: str + Path to the stream network shapefile. + out_subset_network_file: str + Path to the output subset stream network shapefile. + outlet_ids: list + List of integers reperesenting the outlet IDs to be included in + the subset stream network. + river_id_field: str + Name of the river ID field in the stream network shapefile. + next_down_id_field: str + Name if the field with the river ID of the next downstream + river segment in the stream network shapefile. + river_magnitude_field: str + Name of the river magnitude field in the stream network shapefile. + safe_mode: bool, optional + If True, it will kill the simulation early before over taxing + your computer. If you are confident your computer can handle it, + set it to False. + Here is an example of how to use this: @@ -136,16 +156,21 @@ def extractSubNetwork(self, import os from RAPIDpy.gis.taudem import TauDEM - td = TauDEM() output_directory = '/path/to/output/files' - td.extractSubNetwork(network_file=os.path.join(output_directory,"stream_reach_file.shp"), - out_subset_network_file=os.path.join(output_directory,"stream_reach_file_subset.shp"), - outlet_ids=[60830], - river_id_field="LINKNO", - next_down_id_field="DSLINKNO", - river_magnitude_field="Magnitude", - ) + network_shp = os.path.join(output_directory, + "stream_reach_file.shp") + out_shp = os.path.join(output_directory, + "stream_reach_file_subset.shp") + + TauDEM.extractSubNetwork( + network_file=network_shp, + out_subset_network_file=out_shp, + outlet_ids=[60830], + river_id_field="LINKNO", + next_down_id_field="DSLINKNO", + river_magnitude_field="Magnitude", + ) """ network_shapefile = ogr.Open(network_file) @@ -155,22 +180,26 @@ def extractSubNetwork(self, rivid_list = np.zeros(number_of_features, dtype=np.int32) next_down_rivid_list = np.zeros(number_of_features, dtype=np.int32) for feature_idx, drainage_line_feature in enumerate(network_layer): - rivid_list[feature_idx] = drainage_line_feature.GetField(river_id_field) - next_down_rivid_list[feature_idx] = drainage_line_feature.GetField(next_down_id_field) + rivid_list[feature_idx] = \ + drainage_line_feature.GetField(river_id_field) + next_down_rivid_list[feature_idx] = \ + drainage_line_feature.GetField(next_down_id_field) def getSubNetworkIDList(outlet_river_id, - rivid_list, - next_down_rivid_list): + _rivid_list, + _next_down_rivid_list): """ Adds ids upstream of the outlet to a list """ sub_network_index_list = [] try: - for feature_index in np.where(next_down_rivid_list==outlet_river_id)[0]: - sub_network_index_list.append(feature_index) - sub_network_index_list += getSubNetworkIDList(rivid_list[feature_index], - rivid_list, - next_down_rivid_list) + for feature_ii in \ + np.where(_next_down_rivid_list == outlet_river_id)[0]: + sub_network_index_list.append(feature_ii) + sub_network_index_list += \ + getSubNetworkIDList(_rivid_list[feature_ii], + _rivid_list, + _next_down_rivid_list) except IndexError: pass return sub_network_index_list @@ -179,50 +208,58 @@ def getSubNetworkIDList(outlet_river_id, try: main_sub_network_index_list = [] for outlet_id in outlet_ids: - outlet_index = np.where(rivid_list==outlet_id)[0][0] + outlet_index = np.where(rivid_list == outlet_id)[0][0] outlet_feature = network_layer.GetFeature(outlet_index) - outlet_magnitude = outlet_feature.GetField(river_magnitude_field) + outlet_magnitude = \ + outlet_feature.GetField(river_magnitude_field) if outlet_magnitude > original_recursion_limit: if not safe_mode: setrecursionlimit(outlet_magnitude) else: - raise Exception("Current recursion limit {0} will not allow" - " extraction for stream magnitude {1}. To override," - " set safe_mode to False ...".format(original_recursion_limit, - outlet_magnitude)) + raise Exception("Current recursion limit {0} will not " + "allow extraction for stream magnitude" + " {1}. To override, set safe_mode to " + "False ..." + .format(original_recursion_limit, + outlet_magnitude)) main_sub_network_index_list.append(outlet_index) - main_sub_network_index_list += getSubNetworkIDList(outlet_id, - rivid_list, - next_down_rivid_list) + main_sub_network_index_list += \ + getSubNetworkIDList(outlet_id, + rivid_list, + next_down_rivid_list) except Exception: setrecursionlimit(original_recursion_limit) raise setrecursionlimit(original_recursion_limit) - #Write out subset to new shapefile + # Write out subset to new shapefile shp_drv = ogr.GetDriverByName('ESRI Shapefile') # Remove output shapefile if it already exists if os.path.exists(out_subset_network_file): shp_drv.DeleteDataSource(out_subset_network_file) network_subset_shp = shp_drv.CreateDataSource(out_subset_network_file) - network_subset_layer = network_subset_shp.CreateLayer('', network_layer.GetSpatialRef(), - ogr.wkbLineString) + network_subset_layer = \ + network_subset_shp.CreateLayer('', + network_layer.GetSpatialRef(), + ogr.wkbLineString) # Add input Layer Fields to the output Layer if it is the one we want - for i in xrange(network_layer_defn.GetFieldCount()): - network_subset_layer.CreateField(network_layer_defn.GetFieldDefn(i)) + for iii in xrange(network_layer_defn.GetFieldCount()): + network_subset_layer.CreateField( + network_layer_defn.GetFieldDefn(iii)) network_subset_layer_defn = network_subset_layer.GetLayerDefn() for feature_index in main_sub_network_index_list: subset_feature = network_layer.GetFeature(feature_index) - #add to list + # add to list new_feat = ogr.Feature(network_subset_layer_defn) # Add field values from input Layer - for i in xrange(network_layer_defn.GetFieldCount()): - new_feat.SetField(network_subset_layer_defn.GetFieldDefn(i).GetNameRef(), - subset_feature.GetField(i)) + for iii in xrange(network_layer_defn.GetFieldCount()): + new_feat.SetField( + network_subset_layer_defn.GetFieldDefn(iii).GetNameRef(), + subset_feature.GetField(iii)) # Set geometry as centroid geom = subset_feature.GetGeometryRef() @@ -230,7 +267,8 @@ def getSubNetworkIDList(outlet_river_id, # Add new feature to output Layer network_subset_layer.CreateFeature(new_feat) - def extractLargestSubNetwork(self, + @classmethod + def extractLargestSubNetwork(cls, network_file, out_subset_network_file, river_id_field, @@ -238,15 +276,27 @@ def extractLargestSubNetwork(self, river_magnitude_field, safe_mode=True): """ - Extracts the larges sub network from the watershed based on the magnitude parameter. + Extracts the larges sub network from the watershed based on the + magnitude parameter. + + Parameters + ---------- + network_file: str + Path to the stream network shapefile. + out_subset_network_file: str + Path to the output subset stream network shapefile. + river_id_field: str + Name of the river ID field in the stream network shapefile. + next_down_id_field: str + Name of the field with the river ID of the next downstream river + segment in the stream network shapefile. + river_magnitude_field: str + Name of the river magnitude field in the stream network shapefile. + safe_mode: bool, optional + If True, it will kill the simulation early before over taxing + your computer. If you are confident your computer can handle it, + set it to False. - Parameters: - network_file(str): Path to the stream network shapefile. - out_subset_network_file(str): Path to the output subset stream network shapefile. - river_id_field(str): Name of the river ID field in the stream network shapefile. - next_down_id_field(str): Name if the field with the river ID of the next downstream river segment in the stream network shapefile. - river_magnitude_field(str): Name of the river magnitude field in the stream network shapefile. - safe_mode(Optional[bool]): If True, it will kill the simulation early before over taxing your computer. If you are confident your computer can handle it, set it to False. Here is an example of how to use this: @@ -255,49 +305,63 @@ def extractLargestSubNetwork(self, import os from RAPIDpy.gis.taudem import TauDEM - td = TauDEM() - output_directory = '/path/to/output/files' - td.extractLargestSubNetwork(network_file=os.path.join(output_directory,"stream_reach_file.shp"), - out_subset_network_file=os.path.join(output_directory,"stream_reach_file_subset.shp"), - river_id_field="LINKNO", - next_down_id_field="DSLINKNO", - river_magnitude_field="Magnitude", - ) + network_shp = os.path.join(output_directory, + "stream_reach_file.shp") + out_shp = os.path.join(output_directory, + "stream_reach_file_subset.shp") + + TauDEM.extractLargestSubNetwork( + network_file=network_shp, + out_subset_network_file=out_shp, + river_id_field="LINKNO", + next_down_id_field="DSLINKNO", + river_magnitude_field="Magnitude", + ) """ network_shapefile = ogr.Open(network_file) network_layer = network_shapefile.GetLayer() number_of_features = network_layer.GetFeatureCount() riv_magnuitude_list = np.zeros(number_of_features, dtype=np.int32) for feature_idx, drainage_line_feature in enumerate(network_layer): - riv_magnuitude_list[feature_idx] = drainage_line_feature.GetField(river_magnitude_field) - - max_magnitude_feature = network_layer.GetFeature(np.argmax(riv_magnuitude_list)) - self.extractSubNetwork(network_file, - out_subset_network_file, - [max_magnitude_feature.GetField(river_id_field)], - river_id_field, - next_down_id_field, - river_magnitude_field, - safe_mode) - - def extractSubsetFromWatershed(self, - subset_network_file, + riv_magnuitude_list[feature_idx] =\ + drainage_line_feature.GetField(river_magnitude_field) + + max_magnitude_feature = \ + network_layer.GetFeature(np.argmax(riv_magnuitude_list)) + cls.extractSubNetwork(network_file, + out_subset_network_file, + [max_magnitude_feature.GetField(river_id_field)], + river_id_field, + next_down_id_field, + river_magnitude_field, + safe_mode) + + @staticmethod + def extractSubsetFromWatershed(subset_network_file, subset_network_river_id_field, watershed_file, watershed_network_river_id_field, out_watershed_subset_file): """ - Extract catchment by using subset network file. Use this after using either :func:`~RAPIDpy.gis.taudem.TauDEM.extractSubNetwork()` + Extract catchment by using subset network file. + Use this after using either + :func:`~RAPIDpy.gis.taudem.TauDEM.extractSubNetwork()` or :func:`~RAPIDpy.gis.taudem.TauDEM.extractLargestSubNetwork()`. - Parameters: - network_file(str): Path to the stream network shapefile. - out_subset_network_file(str): Path to the output subset stream network shapefile. - river_id_field(str): Name of the river ID field in the stream network shapefile. - next_down_id_field(str): Name if the field with the river ID of the next downstream river segment in the stream network shapefile. - river_magnitude_field(str): Name of the river magnitude field in the stream network shapefile. - safe_mode(Optional[bool]): If True, it will kill the simulation early before over taxing your computer. If you are confident your computer can handle it, set it to False. + Parameters + ---------- + subset_network_file: str + Path to the pre-subsetted stream network shapefile. + subset_network_river_id_field: str + The field name with the river ID in the stream network shapefile. + watershed_file: str + Path to the watershed shapefile. + watershed_network_river_id_field: str + Name of the field with the river ID in the watershed shapefile. + out_watershed_subset_file: str + The path to output the subset watershed shapefile. + Here is an example of how to use this: @@ -306,14 +370,19 @@ def extractSubsetFromWatershed(self, import os from RAPIDpy.gis.taudem import TauDEM - td = TauDEM() - output_directory = '/path/to/output/files' - td.extractSubsetFromWatershed(subset_network_file=os.path.join(output_directory,"stream_reach_file_subset.shp"), - subset_network_river_id_field="LINKNO", - watershed_file=os.path.join(output_directory,"watershed_shapefile.shp"), - watershed_network_river_id_field="LINKNO", - out_watershed_subset_file=os.path.join(output_directory,"watershed_shapefile_subset.shp")) + network_shp = os.path.join(output_directory, + "stream_reach_file.shp") + water_shp = os.path.join(output_directory, + "watershed_shapefile.shp") + out_shp = os.path.join(output_directory, + "watershed_shapefile_subset.shp") + TauDEM.extractSubsetFromWatershed( + subset_network_filenetwork_shp, + subset_network_river_id_field="LINKNO", + watershed_file=water_shp, + watershed_network_river_id_field="LINKNO", + out_watershed_subset_file=out_shp) """ subset_network_shapefile = ogr.Open(subset_network_file) @@ -321,43 +390,58 @@ def extractSubsetFromWatershed(self, ogr_watershed_shapefile = ogr.Open(watershed_file) ogr_watershed_shapefile_lyr = ogr_watershed_shapefile.GetLayer() - ogr_watershed_shapefile_lyr_defn = ogr_watershed_shapefile_lyr.GetLayerDefn() + ogr_watershed_shapefile_lyr_defn = \ + ogr_watershed_shapefile_lyr.GetLayerDefn() number_of_features = ogr_watershed_shapefile_lyr.GetFeatureCount() watershed_rivid_list = np.zeros(number_of_features, dtype=np.int32) - for feature_idx, watershed_feature in enumerate(ogr_watershed_shapefile_lyr): - watershed_rivid_list[feature_idx] = watershed_feature.GetField(watershed_network_river_id_field) - + for feature_idx, watershed_feature in \ + enumerate(ogr_watershed_shapefile_lyr): + watershed_rivid_list[feature_idx] = \ + watershed_feature.GetField(watershed_network_river_id_field) shp_drv = ogr.GetDriverByName('ESRI Shapefile') # Remove output shapefile if it already exists if os.path.exists(out_watershed_subset_file): shp_drv.DeleteDataSource(out_watershed_subset_file) - subset_watershed_shapefile = shp_drv.CreateDataSource(out_watershed_subset_file) - subset_watershed_layer = subset_watershed_shapefile.CreateLayer('', ogr_watershed_shapefile_lyr.GetSpatialRef(), - ogr.wkbPolygon) + subset_watershed_shapefile = \ + shp_drv.CreateDataSource(out_watershed_subset_file) + subset_watershed_layer = \ + subset_watershed_shapefile.CreateLayer( + '', + ogr_watershed_shapefile_lyr.GetSpatialRef(), + ogr.wkbPolygon) # Add input Layer Fields to the output Layer if it is the one we want - for i in xrange(ogr_watershed_shapefile_lyr_defn.GetFieldCount()): - subset_watershed_layer.CreateField(ogr_watershed_shapefile_lyr_defn.GetFieldDefn(i)) + for iii in xrange(ogr_watershed_shapefile_lyr_defn.GetFieldCount()): + subset_watershed_layer.CreateField( + ogr_watershed_shapefile_lyr_defn.GetFieldDefn(iii)) subset_watershed_layer_defn = subset_watershed_layer.GetLayerDefn() for drainage_line_feature in subset_network_layer: try: - watershed_feature_index = np.where(watershed_rivid_list==drainage_line_feature.GetField(subset_network_river_id_field))[0][0] + watershed_feature_index = \ + np.where(watershed_rivid_list == + drainage_line_feature.GetField( + subset_network_river_id_field))[0][0] except IndexError: - print("{0} {1} not found ...".format(subset_network_river_id_field, - drainage_line_feature.GetField(subset_network_river_id_field))) + log("{0} {1} not found ...".format( + subset_network_river_id_field, + drainage_line_feature.GetField( + subset_network_river_id_field))) continue - subset_feature = ogr_watershed_shapefile_lyr.GetFeature(watershed_feature_index) - #add to list + subset_feature = \ + ogr_watershed_shapefile_lyr.GetFeature(watershed_feature_index) + # add to list new_feat = ogr.Feature(subset_watershed_layer_defn) # Add field values from input Layer - for i in xrange(ogr_watershed_shapefile_lyr_defn.GetFieldCount()): - new_feat.SetField(subset_watershed_layer_defn.GetFieldDefn(i).GetNameRef(), - subset_feature.GetField(i)) + for iii in \ + xrange(ogr_watershed_shapefile_lyr_defn.GetFieldCount()): + new_feat.SetField( + subset_watershed_layer_defn.GetFieldDefn(iii).GetNameRef(), + subset_feature.GetField(iii)) # Set geometry as centroid geom = subset_feature.GetGeometryRef() @@ -365,39 +449,35 @@ def extractSubsetFromWatershed(self, # Add new feature to output Layer subset_watershed_layer.CreateFeature(new_feat) - def rasterToPolygon(self, raster_file, polygon_file): + @staticmethod + def rasterToPolygon(raster_file, polygon_file): """ - Converts raster to polygon and then dissolves it + Converts watershed raster to polygon and then dissolves it. + It dissolves features based on the LINKNO attribute. """ - print("Process: Raster to Polygon ...") + log("Process: Raster to Polygon ...") time_start = datetime.utcnow() - temp_polygon_file = "{0}_temp.shp".format(os.path.splitext(os.path.basename(polygon_file))[0]) - cmd = ["gdal_polygonize.py", raster_file, - "-f", "ESRI Shapefile", temp_polygon_file, - os.path.splitext(os.path.basename(temp_polygon_file))[0], - "LINKNO"] + temp_polygon_file = \ + "{0}_temp.shp".format( + os.path.splitext(os.path.basename(polygon_file))[0]) - process = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=False) - out, err = process.communicate() - if out: - print("OUTPUT:") - for line in out.split(b'\n'): - print(line) - if err: - print("ERROR:") - print(err) - #raise Exception(err) - print("Time to convert to polygon: {0}".format(datetime.utcnow()-time_start)) + GDALGrid(raster_file).to_polygon(out_shapefile=temp_polygon_file, + fieldname="LINKNO", + self_mask=True) - print("Dissolving ...") + log("Time to convert to polygon: {0}" + .format(datetime.utcnow()-time_start)) + + log("Dissolving ...") time_start_dissolve = datetime.utcnow() ogr_polygin_shapefile = ogr.Open(temp_polygon_file) ogr_polygon_shapefile_lyr = ogr_polygin_shapefile.GetLayer() number_of_features = ogr_polygon_shapefile_lyr.GetFeatureCount() polygon_rivid_list = np.zeros(number_of_features, dtype=np.int32) - for feature_idx, catchment_feature in enumerate(ogr_polygon_shapefile_lyr): - polygon_rivid_list[feature_idx] = catchment_feature.GetField('LINKNO') - + for feature_idx, catchment_feature in \ + enumerate(ogr_polygon_shapefile_lyr): + polygon_rivid_list[feature_idx] = \ + catchment_feature.GetField('LINKNO') shp_drv = ogr.GetDriverByName('ESRI Shapefile') # Remove output shapefile if it already exists @@ -405,44 +485,59 @@ def rasterToPolygon(self, raster_file, polygon_file): shp_drv.DeleteDataSource(polygon_file) dissolve_shapefile = shp_drv.CreateDataSource(polygon_file) - dissolve_layer = dissolve_shapefile.CreateLayer('', ogr_polygon_shapefile_lyr.GetSpatialRef(), - ogr.wkbPolygon) + dissolve_layer = \ + dissolve_shapefile.CreateLayer( + '', + ogr_polygon_shapefile_lyr.GetSpatialRef(), + ogr.wkbPolygon) dissolve_layer.CreateField(ogr.FieldDefn('LINKNO', ogr.OFTInteger)) dissolve_layer_defn = dissolve_layer.GetLayerDefn() for unique_rivid in np.unique(polygon_rivid_list): - #get indices where it is in the polygon - feature_indices = np.where(polygon_rivid_list==unique_rivid)[0] + # get indices where it is in the polygon + feature_indices = np.where(polygon_rivid_list == unique_rivid)[0] new_feat = ogr.Feature(dissolve_layer_defn) new_feat.SetField('LINKNO', int(unique_rivid)) if len(feature_indices) == 1: - ##write feature to file - feature = ogr_polygon_shapefile_lyr.GetFeature(feature_indices[0]) + # write feature to file + feature = \ + ogr_polygon_shapefile_lyr.GetFeature(feature_indices[0]) new_feat.SetGeometry(feature.GetGeometryRef()) else: - ##dissolve + # dissolve dissolve_poly_list = [] for feature_index in feature_indices: - feature = ogr_polygon_shapefile_lyr.GetFeature(feature_index) + feature = \ + ogr_polygon_shapefile_lyr.GetFeature(feature_index) feat_geom = feature.GetGeometryRef() - dissolve_poly_list.append(shapely_loads(feat_geom.ExportToWkb())) + dissolve_poly_list.append( + shapely_loads(feat_geom.ExportToWkb())) dissolve_polygon = cascaded_union(dissolve_poly_list) - new_feat.SetGeometry(ogr.CreateGeometryFromWkb(dissolve_polygon.wkb)) + new_feat.SetGeometry( + ogr.CreateGeometryFromWkb(dissolve_polygon.wkb)) dissolve_layer.CreateFeature(new_feat) - #clean up + # clean up shp_drv.DeleteDataSource(temp_polygon_file) - print("Time to dissolve: {0}".format(datetime.utcnow()-time_start_dissolve)) - print("Total time to convert: {0}".format(datetime.utcnow()-time_start)) + log("Time to dissolve: {0}".format(datetime.utcnow() - + time_start_dissolve)) + log("Total time to convert: {0}".format(datetime.utcnow() - + time_start)) - def addLengthMeters(self, stream_network): + @staticmethod + def addLengthMeters(stream_network): """ - Adds length field in meters to network (The added field name will be 'LENGTH_M'). + Adds length field in meters to network + (The added field name will be 'LENGTH_M'). - .. note:: This may be needed for generating the kfac file depending on the units of your raster. See: :doc:`gis_tools`. + .. note:: This may be needed for generating the kfac file + depending on the units of your raster. See: :doc:`gis_tools`. + + Parameters + ---------- + stream_network: str + Path to stream network file. - Parameters: - stream_network(str): Path to stream network file. Here is an example of how to use this: @@ -451,30 +546,30 @@ def addLengthMeters(self, stream_network): import os from RAPIDpy.gis.taudem import TauDEM - td = TauDEM() - output_directory = '/path/to/output/files' - td.addLengthMeters(os.path.join(output_directory,"stream_reach_file.shp")) + TauDEM.addLengthMeters(os.path.join(output_directory, + "stream_reach_file.shp")) """ network_shapefile = ogr.Open(stream_network, 1) network_layer = network_shapefile.GetLayer() network_layer_defn = network_layer.GetLayerDefn() - #make sure projection EPSG:4326 + # make sure projection EPSG:4326 network_layer_proj = network_layer.GetSpatialRef() geographic_proj = osr.SpatialReference() geographic_proj.ImportFromEPSG(4326) proj_transform = None if network_layer_proj != geographic_proj: - proj_transform = osr.CoordinateTransformation(network_layer_proj, geographic_proj) + proj_transform = osr.CoordinateTransformation(network_layer_proj, + geographic_proj) - #check for field - create_field=True + # check for field + create_field = True for i in xrange(network_layer_defn.GetFieldCount()): field_name = network_layer_defn.GetFieldDefn(i).GetName() if field_name == 'LENGTH_M': - create_field=False + create_field = False break if create_field: @@ -483,13 +578,14 @@ def addLengthMeters(self, stream_network): geo_manager = Geod(ellps="WGS84") for network_feature in network_layer: feat_geom = network_feature.GetGeometryRef() - #make sure coordinates are geographic + # make sure coordinates are geographic if proj_transform: feat_geom.Transform(proj_transform) line = shapely_loads(feat_geom.ExportToWkb()) lon_list, lat_list = line.xy - az1, az2, dist = geo_manager.inv(lon_list[:-1], lat_list[:-1], lon_list[1:], lat_list[1:]) + dist = geo_manager.inv(lon_list[:-1], lat_list[:-1], + lon_list[1:], lat_list[1:])[2] network_feature.SetField('LENGTH_M', sum(dist)) network_layer.SetFeature(network_feature) @@ -500,9 +596,9 @@ def pitRemove(self, consider4way=False, ): """ - Remove low spots from DEM + Remove low spots from DEM. """ - print("PROCESS: PitRemove") + log("PROCESS: PitRemove") self.pit_filled_elevation_grid = pit_filled_elevation_grid # Construct the taudem command line. @@ -518,7 +614,7 @@ def pitRemove(self, self._run_mpi_cmd(cmd) - #create projection file + # create projection file self._add_prj_file(elevation_grid, self.pit_filled_elevation_grid) @@ -529,7 +625,7 @@ def dinfFlowDirection(self, """ Calculates flow direction with Dinf method """ - print("PROCESS: DinfFlowDirection") + log("PROCESS: DinfFlowDirection") if pit_filled_elevation_grid: self.pit_filled_elevation_grid = pit_filled_elevation_grid @@ -542,7 +638,7 @@ def dinfFlowDirection(self, self._run_mpi_cmd(cmd) - #create projection files + # create projection files self._add_prj_file(self.pit_filled_elevation_grid, flow_dir_grid) self._add_prj_file(self.pit_filled_elevation_grid, @@ -553,9 +649,9 @@ def d8FlowDirection(self, slope_grid, pit_filled_elevation_grid=None): """ - Calculates flow direction with D8 method + Calculates flow direction with D8 method. """ - print("PROCESS: D8FlowDirection") + log("PROCESS: D8FlowDirection") if pit_filled_elevation_grid: self.pit_filled_elevation_grid = pit_filled_elevation_grid @@ -570,7 +666,7 @@ def d8FlowDirection(self, self._run_mpi_cmd(cmd) - #create projection files + # create projection files self._add_prj_file(self.pit_filled_elevation_grid, self.flow_dir_grid) self._add_prj_file(self.pit_filled_elevation_grid, @@ -584,9 +680,9 @@ def dinfContributingArea(self, edge_contamination=False, ): """ - Calculates contributing area with Dinf method + Calculates contributing area with Dinf method. """ - print("PROCESS: DinfContributingArea") + log("PROCESS: DinfContributingArea") # Construct the taudem command line. cmd = [os.path.join(self.taudem_exe_path, 'areadinf'), @@ -603,7 +699,7 @@ def dinfContributingArea(self, self._run_mpi_cmd(cmd) - #create projection file + # create projection file self._add_prj_file(flow_dir_grid, contributing_area_grid) @@ -614,9 +710,9 @@ def d8ContributingArea(self, edge_contamination=False, flow_dir_grid=None): """ - Calculates contributing area with D8 method + Calculates contributing area with D8 method. """ - print("PROCESS: D8ContributingArea") + log("PROCESS: D8ContributingArea") if flow_dir_grid: self.flow_dir_grid = flow_dir_grid @@ -636,7 +732,7 @@ def d8ContributingArea(self, self._run_mpi_cmd(cmd) - #create projection file + # create projection file self._add_prj_file(self.flow_dir_grid, self.contributing_area_grid) @@ -647,9 +743,9 @@ def streamDefByThreshold(self, mask_grid=None, ): """ - Calculates the stream definition by threshold + Calculates the stream definition by threshold. """ - print("PROCESS: StreamDefByThreshold") + log("PROCESS: StreamDefByThreshold") self.stream_raster_grid = stream_raster_grid # Construct the taudem command line. @@ -664,7 +760,7 @@ def streamDefByThreshold(self, self._run_mpi_cmd(cmd) - #create projection file + # create projection file self._add_prj_file(contributing_area_grid, self.stream_raster_grid) @@ -684,7 +780,7 @@ def streamReachAndWatershed(self, """ Creates vector network and shapefile from stream raster grid """ - print("PROCESS: StreamReachAndWatershed") + log("PROCESS: StreamReachAndWatershed") if pit_filled_elevation_grid: self.pit_filled_elevation_grid = pit_filled_elevation_grid if flow_dir_grid: @@ -714,7 +810,7 @@ def streamReachAndWatershed(self, self._run_mpi_cmd(cmd) - #create projection file + # create projection file self._add_prj_file(self.pit_filled_elevation_grid, out_stream_order_grid) self._add_prj_file(self.pit_filled_elevation_grid, @@ -743,17 +839,35 @@ def demToStreamNetwork(self, .. note:: For information about the *threshold* parameter, see: http://hydrology.usu.edu/taudem/taudem5/help53/StreamDefinitionByThreshold.html - Parameters: - output_directory(str): Path to output generated files to. - raw_elevation_dem(Optional[str]): Path to original elevation DEM file. Required if *pit_filled_elevation_grid* is not used. - pit_filled_elevation_grid(Optional[str]): Path to pit filled elevation DEM file. Required if *raw_elevation_dem* is not used. - flow_dir_grid_d8(Optional[str]): Path to flow direction grid generated using TauDEM's D8 method. - contributing_area_grid_d8(Optional[str]): Path to contributing area grid generated using TauDEM's D8 method. - flow_dir_grid_dinf(Optional[str]): Path to flow direction grid generated using TauDEM's D-Infinity method (EXPERIMENTAL). - contributing_area_grid_dinf(Optional[str]): Path to contributing area grid generated using TauDEM's D-Infinity method (EXPERIMENTAL). - use_dinf(Optional[bool]): Use the D-Infinity method to get stream definition (EXPERIMENTAL). - threshold(Optional[int]): The stream threshold or maximum number of upstream grid cells. See above note. - delineate(Optional[bool]): If True, this will use the delineate option for theis method using TauDEM. Default is False. + Parameters + ---------- + output_directory: str + Path to output generated files to. + raw_elevation_dem: str, optional + Path to original elevation DEM file. Required if + *pit_filled_elevation_grid* is not used. + pit_filled_elevation_grid: str, optional + Path to pit filled elevation DEM file. Required if + *raw_elevation_dem* is not used. + flow_dir_grid_d8: str, optional + Path to flow direction grid generated using TauDEM's D8 method. + contributing_area_grid_d8: str, optional + Path to contributing area grid generated using TauDEM's D8 method. + flow_dir_grid_dinf: str, optional + Path to flow direction grid generated using TauDEM's + D-Infinity method (EXPERIMENTAL). + contributing_area_grid_dinf: str, optional + Path to contributing area grid generated using TauDEM's + D-Infinity method (EXPERIMENTAL). + use_dinf: bool, optional + Use the D-Infinity method to get stream definition (EXPERIMENTAL). + threshold: int, optional + The stream threshold or maximum number of upstream grid cells. + See above note. + delineate: bool, optional + If True, this will use the delineate option for theis method + using TauDEM. Default is False. + Here is an example of how to use this: @@ -761,48 +875,56 @@ def demToStreamNetwork(self, from RAPIDpy.gis.taudem import TauDEM - td = TauDEM("/path/to/scripts/TauDEM") elevation_dem = '/path/to/dem.tif' output_directory = '/path/to/output/files' + + td = TauDEM("/path/to/scripts/TauDEM") td.demToStreamNetwork(output_directory, elevation_dem, threshold=1000) """ - time_start = datetime.utcnow() - #FILL PITS IF NEEDED + # FILL PITS IF NEEDED self.pit_filled_elevation_grid = pit_filled_elevation_grid if not pit_filled_elevation_grid: - pit_filled_elevation_grid = os.path.join(output_directory, 'pit_filled_elevation_grid.tif') + pit_filled_elevation_grid = \ + os.path.join(output_directory, 'pit_filled_elevation_grid.tif') self.pitRemove(raw_elevation_dem, pit_filled_elevation_grid) - #GENERATE D8 RASTERS + # GENERATE D8 RASTERS self.flow_dir_grid = flow_dir_grid_d8 if not flow_dir_grid_d8: - flow_dir_grid_d8 = os.path.join(output_directory, 'flow_dir_grid_d8.tif') + flow_dir_grid_d8 = \ + os.path.join(output_directory, 'flow_dir_grid_d8.tif') slope_grid_d8 = os.path.join(output_directory, 'slope_grid_d8.tif') self.d8FlowDirection(flow_dir_grid_d8, slope_grid_d8) self.contributing_area_grid = contributing_area_grid_d8 if not contributing_area_grid_d8: - contributing_area_grid_d8 = os.path.join(output_directory, 'contributing_area_grid_d8.tif') + contributing_area_grid_d8 = \ + os.path.join(output_directory, 'contributing_area_grid_d8.tif') self.d8ContributingArea(contributing_area_grid_d8) - stream_raster_grid = os.path.join(output_directory, 'stream_raster_grid.tif') + stream_raster_grid = \ + os.path.join(output_directory, 'stream_raster_grid.tif') if use_dinf: - print("USING DINF METHOD TO GET STREAM DEFINITION ...") + log("USING DINF METHOD TO GET STREAM DEFINITION ...") if not flow_dir_grid_dinf: - flow_dir_grid_dinf = os.path.join(output_directory, 'flow_dir_grid_dinf.tif') - slope_grid_dinf = os.path.join(output_directory, 'slope_grid_dinf.tif') + flow_dir_grid_dinf = \ + os.path.join(output_directory, 'flow_dir_grid_dinf.tif') + slope_grid_dinf = \ + os.path.join(output_directory, 'slope_grid_dinf.tif') self.dinfFlowDirection(flow_dir_grid_dinf, slope_grid_dinf) if not contributing_area_grid_dinf: - contributing_area_grid_dinf = os.path.join(output_directory, 'contributing_area_grid_dinf.tif') + contributing_area_grid_dinf = \ + os.path.join(output_directory, + 'contributing_area_grid_dinf.tif') self.dinfContributingArea(contributing_area_grid_dinf, flow_dir_grid_dinf) @@ -810,17 +932,22 @@ def demToStreamNetwork(self, threshold, contributing_area_grid_dinf) else: - print("USING D8 METHOD TO GET STREAM DEFINITION ...") + log("USING D8 METHOD TO GET STREAM DEFINITION ...") self.streamDefByThreshold(stream_raster_grid, threshold, contributing_area_grid_d8) - #GENERATE STREAM NETWORK - out_stream_order_grid = os.path.join(output_directory, 'stream_order_grid.tif') - out_network_connectivity_tree = os.path.join(output_directory, 'network_connectivity_tree.txt') - out_network_coordinates = os.path.join(output_directory, 'network_coordinates.txt') - out_stream_reach_file = os.path.join(output_directory, 'stream_reach_file.shp') - out_watershed_grid = os.path.join(output_directory, 'watershed_grid.tif') + # GENERATE STREAM NETWORK + out_stream_order_grid = \ + os.path.join(output_directory, 'stream_order_grid.tif') + out_network_connectivity_tree = \ + os.path.join(output_directory, 'network_connectivity_tree.txt') + out_network_coordinates = \ + os.path.join(output_directory, 'network_coordinates.txt') + out_stream_reach_file = \ + os.path.join(output_directory, 'stream_reach_file.shp') + out_watershed_grid = \ + os.path.join(output_directory, 'watershed_grid.tif') self.streamReachAndWatershed(delineate, out_stream_order_grid, out_network_connectivity_tree, @@ -828,7 +955,9 @@ def demToStreamNetwork(self, out_stream_reach_file, out_watershed_grid) - #convert watersed grid to shapefile - out_watershed_shapefile = os.path.join(output_directory, 'watershed_shapefile.shp') + # convert watersed grid to shapefile + out_watershed_shapefile = \ + os.path.join(output_directory, 'watershed_shapefile.shp') self.rasterToPolygon(out_watershed_grid, out_watershed_shapefile) - print("Total time to complete: {0}".format(datetime.utcnow()-time_start)) + log("Total time to complete: {0}".format(datetime.utcnow() - + time_start)) diff --git a/RAPIDpy/gis/voronoi.py b/RAPIDpy/gis/voronoi.py index c725f72..3633794 100644 --- a/RAPIDpy/gis/voronoi.py +++ b/RAPIDpy/gis/voronoi.py @@ -1,21 +1,20 @@ # -*- coding: utf-8 -*- -## -## voronoi.py -## RAPIDpy -## -## Created by Alan D Snow. -## -## Copyright © 2016 Alan D Snow. All rights reserved. -## License: BSD 3-Clause +""" + voronoi.py + RAPIDpy -import numpy as np + Created by Alan D Snow, 2016. + License: BSD 3-Clause +""" import os -try: - from osgeo import ogr, osr - from scipy.spatial import Voronoi - from shapely.geometry import Polygon -except Exception: - raise Exception("You need scipy, gdal, and shapely python packages to run these tools ...") + +import numpy as np +from osgeo import ogr, osr +from scipy.spatial import Voronoi +from shapely.geometry import Polygon + +from ..helper_functions import log + def _get_voronoi_centroid_array(lsm_lat_array, lsm_lon_array, extent): """ @@ -31,60 +30,76 @@ def _get_voronoi_centroid_array(lsm_lat_array, lsm_lon_array, extent): if (lsm_lat_array.ndim == 2) and (lsm_lon_array.ndim == 2): # generate point list with 2D lat lon lists if extent: - #exctract subset within extent + # exctract subset within extent lsm_dx = np.max(np.absolute(np.diff(lsm_lon_array))) lsm_dy = np.max(np.absolute(np.diff(lsm_lat_array, axis=0))) - - #remove values with NaN + + # remove values with NaN lsm_lat_array = np.ma.filled(lsm_lat_array, fill_value=-9999) lsm_lon_array = np.ma.filled(lsm_lon_array, fill_value=-9999) - - lsm_lat_indices_from_lat, lsm_lon_indices_from_lat = np.where((lsm_lat_array >= (YMin - 2*lsm_dy)) & (lsm_lat_array <= (YMax + 2*lsm_dy))) - lsm_lat_indices_from_lon, lsm_lon_indices_from_lon = np.where((lsm_lon_array >= (XMin - 2*lsm_dx)) & (lsm_lon_array <= (XMax + 2*lsm_dx))) - lsm_lat_indices = np.intersect1d(lsm_lat_indices_from_lat, lsm_lat_indices_from_lon) - lsm_lon_indices = np.intersect1d(lsm_lon_indices_from_lat, lsm_lon_indices_from_lon) + lsm_lat_indices_from_lat, lsm_lon_indices_from_lat = \ + np.where((lsm_lat_array >= (YMin - 2*lsm_dy)) & + (lsm_lat_array <= (YMax + 2*lsm_dy))) + lsm_lat_indices_from_lon, lsm_lon_indices_from_lon = \ + np.where((lsm_lon_array >= (XMin - 2*lsm_dx)) & + (lsm_lon_array <= (XMax + 2*lsm_dx))) + + lsm_lat_indices = np.intersect1d(lsm_lat_indices_from_lat, + lsm_lat_indices_from_lon) + lsm_lon_indices = np.intersect1d(lsm_lon_indices_from_lat, + lsm_lon_indices_from_lon) - lsm_lat_list = lsm_lat_array[lsm_lat_indices,:][:,lsm_lon_indices] - lsm_lon_list = lsm_lon_array[lsm_lat_indices,:][:,lsm_lon_indices] + lsm_lat_list = \ + lsm_lat_array[lsm_lat_indices, :][:, lsm_lon_indices] + lsm_lon_list = \ + lsm_lon_array[lsm_lat_indices, :][:, lsm_lon_indices] # Create a list of geographic coordinate pairs for i in range(len(lsm_lat_indices)): for j in range(len(lsm_lon_indices)): ptList.append([lsm_lon_list[i][j], lsm_lat_list[i][j]]) - + elif lsm_lat_array.ndim == 1 and lsm_lon_array.ndim == 1: - #generate point list with 1D lat lon lists + # generate point list with 1D lat lon lists if extent: Ybuffer = 2 * abs(lsm_lat_array[0]-lsm_lat_array[1]) Xbuffer = 2 * abs(lsm_lon_array[0]-lsm_lon_array[1]) - # Extract the lat and lon within buffered extent (buffer with 2* interval degree) - lsm_lat_list = lsm_lat_array[(lsm_lat_array >= (YMin - Ybuffer)) & (lsm_lat_array <= (YMax + Ybuffer))] - lsm_lon_list = lsm_lon_array[(lsm_lon_array >= (XMin - Xbuffer)) & (lsm_lon_array <= (XMax + Xbuffer))] - + # Extract the lat and lon within buffered extent + # (buffer with 2* interval degree) + lsm_lat_list = lsm_lat_array[(lsm_lat_array >= (YMin - Ybuffer)) & + (lsm_lat_array <= (YMax + Ybuffer))] + lsm_lon_list = lsm_lon_array[(lsm_lon_array >= (XMin - Xbuffer)) & + (lsm_lon_array <= (XMax + Xbuffer))] + # Create a list of geographic coordinate pairs for ptX in lsm_lon_list: for ptY in lsm_lat_list: ptList.append([ptX, ptY]) else: - raise IndexError("Lat/Lon lists have invalid dimensions. Only 1D or 2D arrays allowed ...") + raise IndexError("Lat/Lon lists have invalid dimensions. " + "Only 1D or 2D arrays allowed ...") + + if len(ptList) <= 0: + raise IndexError("The watershed is outside of the bounds of the" + " land surface model grid ...") - if len(ptList) <=0: - raise IndexError("The watershed is outside of the bounds of the land surface model grid ...") + return np.array(ptList) # set-up for input to Delaunay - return np.array(ptList) # set-up for input to Delaunay -def _get_voronoi_poly_points(vert_index_list, voronoi_vertices, voronoi_centroid): +def _get_voronoi_poly_points(vert_index_list, voronoi_vertices, + voronoi_centroid): """ - This function returns the corner points for a polygon from scipy voronoi information + This function returns the corner points for a + polygon from scipy voronoi information """ voronoi_poly_points = [] if -1 not in vert_index_list and len(vert_index_list) > 3: voronoi_poly_points = voronoi_vertices[vert_index_list] - elif vert_index_list.size>0: - #ASSUME RECTANGLE - vert_index_list = vert_index_list[vert_index_list>=0] + elif vert_index_list.size > 0: + # ASSUME RECTANGLE + vert_index_list = vert_index_list[vert_index_list >= 0] voronoi_poly_points = voronoi_vertices[vert_index_list] - #CASE 1: 2 valid voronoi vertices + # CASE 1: 2 valid voronoi vertices if vert_index_list.size == 2: center_lon = voronoi_centroid[0] center_lat = voronoi_centroid[1] @@ -92,23 +107,27 @@ def _get_voronoi_poly_points(vert_index_list, voronoi_vertices, voronoi_centroid corner_lat1 = voronoi_poly_points[0][1] corner_lon2 = voronoi_poly_points[1][0] corner_lat2 = voronoi_poly_points[1][1] - - #check if need to add points in lon or lat + + # check if need to add points in lon or lat if abs(corner_lon1-corner_lon2) > abs(corner_lat1-corner_lat2): dLat = center_lat - corner_lat1 - #append the corners in order - voronoi_poly_points = np.array([[corner_lon1, corner_lat1], - [corner_lon2, corner_lat2], - [corner_lon2, center_lat + dLat], - [corner_lon1, center_lat + dLat]]) + # append the corners in order + voronoi_poly_points = np.array([ + [corner_lon1, corner_lat1], + [corner_lon2, corner_lat2], + [corner_lon2, center_lat + dLat], + [corner_lon1, center_lat + dLat] + ]) else: dLon = center_lon - corner_lon1 - #append the corners in order - voronoi_poly_points = np.array([[corner_lon1, corner_lat1], - [corner_lon2, corner_lat2], - [center_lon+dLon, corner_lat2], - [center_lon+dLon, corner_lat1]]) - #CADE 2: 1 valid voronoi vertex + # append the corners in order + voronoi_poly_points = np.array([ + [corner_lon1, corner_lat1], + [corner_lon2, corner_lat2], + [center_lon + dLon, corner_lat2], + [center_lon + dLon, corner_lat1] + ]) + # CASE 2: 1 valid voronoi vertex elif vert_index_list.size == 1: center_lon = voronoi_centroid[0] center_lat = voronoi_centroid[1] @@ -116,15 +135,17 @@ def _get_voronoi_poly_points(vert_index_list, voronoi_vertices, voronoi_centroid corner_lat = voronoi_poly_points[0][1] dLat = center_lat - corner_lat dLon = center_lon - corner_lon - #append the corners in order - voronoi_poly_points = np.array([[corner_lon, corner_lat], - [center_lon + dLon, corner_lat], - [center_lon + dLon, center_lat + dLat], - [corner_lon, center_lat + dLat]]) - + # append the corners in order + voronoi_poly_points = np.array([ + [corner_lon, corner_lat], + [center_lon + dLon, corner_lat], + [center_lon + dLon, center_lat + dLat], + [corner_lon, center_lat + dLat] + ]) return voronoi_poly_points + def pointsToVoronoiGridShapefile(lat, lon, vor_shp_path, extent=None): """ Converts points to shapefile grid via voronoi @@ -132,8 +153,10 @@ def pointsToVoronoiGridShapefile(lat, lon, vor_shp_path, extent=None): voronoi_centroids = _get_voronoi_centroid_array(lat, lon, extent) # set-up output polygon shp - print("Creating output polygon shp {0}".format(os.path.basename(vor_shp_path))) - if os.path.exists(vor_shp_path): os.remove(vor_shp_path) + log("Creating output polygon shp {0}" + .format(os.path.basename(vor_shp_path))) + if os.path.exists(vor_shp_path): + os.remove(vor_shp_path) drv = ogr.GetDriverByName('ESRI Shapefile') outShp = drv.CreateDataSource(vor_shp_path) osr_geographic_proj = osr.SpatialReference() @@ -142,64 +165,62 @@ def pointsToVoronoiGridShapefile(lat, lon, vor_shp_path, extent=None): layer.CreateField(ogr.FieldDefn('GRID_LAT', ogr.OFTReal)) layer.CreateField(ogr.FieldDefn('GRID_LON', ogr.OFTReal)) layerDefn = layer.GetLayerDefn() - + # find nodes surrounding polygon centroid # sort nodes in counterclockwise order # create polygon perimeter through nodes - print("Building Voronoi polygons...") - #compute voronoi - voronoi_manager = Voronoi(voronoi_centroids) + log("Building Voronoi polygons...") + # compute voronoi + voronoi_manager = Voronoi(voronoi_centroids) voronoi_vertices = voronoi_manager.vertices voronoi_regions = voronoi_manager.regions for point_id, region_index in enumerate(voronoi_manager.point_region): vert_index_list = np.array(voronoi_regions[region_index]) voronoi_centroid = voronoi_centroids[point_id] - voronoi_poly_points = _get_voronoi_poly_points(vert_index_list, - voronoi_vertices, + voronoi_poly_points = _get_voronoi_poly_points(vert_index_list, + voronoi_vertices, voronoi_centroid) if len(voronoi_poly_points) == 4: poly = ogr.Geometry(ogr.wkbPolygon) ring = ogr.Geometry(ogr.wkbLinearRing) - for i, node in enumerate(voronoi_poly_points): - if i==0: - loopLon = node[0] # grab first node to close ring - loopLat = node[1] - ring.AddPoint(node[0],node[1]) - - ring.AddPoint(loopLon,loopLat) + for node in voronoi_poly_points: + ring.AddPoint(node[0], node[1]) + + # grab first node to close ring + ring.AddPoint(voronoi_poly_points[0][0], voronoi_poly_points[0][1]) + poly.AddGeometry(ring) feat = ogr.Feature(layerDefn) feat.SetField('GRID_LON', float(voronoi_centroid[0])) feat.SetField('GRID_LAT', float(voronoi_centroid[1])) - feat.SetGeometry(poly) + feat.SetGeometry(poly) layer.CreateFeature(feat) - feat = poly = ring = None + def pointsToVoronoiGridArray(lat, lon, extent=None): """ Converts points to grid array via voronoi """ voronoi_centroids = _get_voronoi_centroid_array(lat, lon, extent) - # find nodes surrounding polygon centroid # sort nodes in counterclockwise order # create polygon perimeter through nodes - print("Building Voronoi polygons...") - #compute voronoi - voronoi_manager = Voronoi(voronoi_centroids) + log("Building Voronoi polygons...") + # compute voronoi + voronoi_manager = Voronoi(voronoi_centroids) voronoi_vertices = voronoi_manager.vertices voronoi_regions = voronoi_manager.regions feature_list = [] for point_id, region_index in enumerate(voronoi_manager.point_region): vert_index_list = np.array(voronoi_regions[region_index]) voronoi_centroid = voronoi_centroids[point_id] - voronoi_poly_points = _get_voronoi_poly_points(vert_index_list, - voronoi_vertices, + voronoi_poly_points = _get_voronoi_poly_points(vert_index_list, + voronoi_vertices, voronoi_centroid) - + if len(voronoi_poly_points) == 4: feature_list.append({'polygon': Polygon(voronoi_poly_points), - 'lon' : voronoi_centroid[0], - 'lat' : voronoi_centroid[1]}) - + 'lon': voronoi_centroid[0], + 'lat': voronoi_centroid[1]}) + return feature_list diff --git a/RAPIDpy/gis/weight.py b/RAPIDpy/gis/weight.py index 8d690cc..f17c020 100644 --- a/RAPIDpy/gis/weight.py +++ b/RAPIDpy/gis/weight.py @@ -1,45 +1,44 @@ # -*- coding: utf-8 -*- -## -## weight.py -## RAPIDpy -## -## Created by Alan D Snow. -## Based on RAPID_Toolbox for ArcMap -## Copyright © 2016 Alan D Snow. All rights reserved. -## License: BSD 3-Clause +""" + weight.py + RAPIDpy + Created by Alan D Snow, 2016. + Based on RAPID_Toolbox for ArcMap + License: BSD 3-Clause +""" import csv from datetime import datetime from functools import partial + from netCDF4 import Dataset import numpy as np +from pyproj import Proj, transform +from shapely.wkb import loads as shapely_loads +from shapely.ops import transform as shapely_transform +from shapely.geos import TopologicalError +import rtree # http://toblerity.org/rtree/install.html +from osgeo import gdal, ogr, osr -try: - from pyproj import Proj, transform - from shapely.wkb import loads as shapely_loads - from shapely.ops import transform as shapely_transform - from shapely.geos import TopologicalError - import rtree #http://toblerity.org/rtree/install.html - from osgeo import gdal, ogr, osr -except Exception: - raise Exception("You need the gdal, pyproj, shapely, and rtree python package to run these tools ...") - -#local +# local from .voronoi import pointsToVoronoiGridArray -from ..helper_functions import open_csv +from ..helper_functions import log, open_csv gdal.UseExceptions() + def get_poly_area_geo(poly): """ Calculates the area in meters squared of the individual polygon """ minx, miny, maxx, maxy = poly.bounds - #reproject polygon to get area - reprojected_for_area = Proj("+proj=aea +lat_1={0} +lat_1={1} +lat_0={2} +lon_0={3}".format(miny, - maxy, - (miny+maxy)/2.0, - (minx+maxx)/2.0)) + # reproject polygon to get area + reprojected_for_area = Proj("+proj=aea +lat_1={0} +lat_1={1} " + "+lat_0={2} +lon_0={3}" + .format(miny, + maxy, + (miny + maxy) / 2.0, + (minx + maxx) / 2.0)) geographic_proj = Proj(init='epsg:4326') project_func = partial(transform, geographic_proj, @@ -47,230 +46,284 @@ def get_poly_area_geo(poly): reprojected_poly = shapely_transform(project_func, poly) return reprojected_poly.area + def _get_lat_lon_indices(lsm_lat_array, lsm_lon_array, lat, lon): """ - Determines the index in the array (1D or 2D) where the + Determines the index in the array (1D or 2D) where the lat/lon point is - """ + """ if lsm_lat_array.ndim == 2 and lsm_lon_array.ndim == 2: - lsm_lat_indices_from_lat, lsm_lon_indices_from_lat = np.where((lsm_lat_array == lat)) - lsm_lat_indices_from_lon, lsm_lon_indices_from_lon = np.where((lsm_lon_array == lon)) + lsm_lat_indices_from_lat, lsm_lon_indices_from_lat = \ + np.where((lsm_lat_array == lat)) + lsm_lat_indices_from_lon, lsm_lon_indices_from_lon = \ + np.where((lsm_lon_array == lon)) + + index_lsm_grid_lat = np.intersect1d(lsm_lat_indices_from_lat, + lsm_lat_indices_from_lon)[0] + index_lsm_grid_lon = np.intersect1d(lsm_lon_indices_from_lat, + lsm_lon_indices_from_lon)[0] - index_lsm_grid_lat = np.intersect1d(lsm_lat_indices_from_lat, lsm_lat_indices_from_lon)[0] - index_lsm_grid_lon = np.intersect1d(lsm_lon_indices_from_lat, lsm_lon_indices_from_lon)[0] - elif lsm_lat_array.ndim == 1 and lsm_lon_array.ndim == 1: index_lsm_grid_lon = np.where(lsm_lon_array == lon)[0][0] index_lsm_grid_lat = np.where(lsm_lat_array == lat)[0][0] else: - raise IndexError("Lat/Lon lists have invalid dimensions. Only 1D or 2D arrays allowed ...") - - return index_lsm_grid_lat, index_lsm_grid_lon - + raise IndexError("Lat/Lon lists have invalid dimensions. " + "Only 1D or 2D arrays allowed ...") + + return index_lsm_grid_lat, index_lsm_grid_lon + def find_nearest(array, value): """ Get the nearest index to value searching for """ return (np.abs(array-value)).argmin() - -def RTreeCreateWeightTable(lsm_grid_lat, lsm_grid_lon, - in_catchment_shapefile, river_id, - in_rapid_connect, out_weight_table, - file_geodatabase=None, area_id=None): - + + +def rtree_create_weight_table(lsm_grid_lat, lsm_grid_lon, + in_catchment_shapefile, river_id, + in_rapid_connect, out_weight_table, + file_geodatabase=None, area_id=None): """ Create Weight Table for Land Surface Model Grids """ - time_start_all = datetime.utcnow() + time_start_all = datetime.utcnow() if lsm_grid_lat.ndim == 3 and lsm_grid_lon.ndim == 3: - #assume first dimension is time + # assume first dimension is time lsm_grid_lat = lsm_grid_lat[0] lsm_grid_lon = lsm_grid_lon[0] - - print("Generating LSM Grid Thiessen Array ...") + + log("Generating LSM Grid Thiessen Array ...") if file_geodatabase: gdb_driver = ogr.GetDriverByName("OpenFileGDB") ogr_file_geodatabase = gdb_driver.Open(file_geodatabase, 0) - ogr_catchment_shapefile_lyr = ogr_file_geodatabase.GetLayer(in_catchment_shapefile) + ogr_catchment_shapefile_lyr = \ + ogr_file_geodatabase.GetLayer(in_catchment_shapefile) else: ogr_catchment_shapefile = ogr.Open(in_catchment_shapefile) ogr_catchment_shapefile_lyr = ogr_catchment_shapefile.GetLayer() - - ogr_catchment_shapefile_lyr_proj = ogr_catchment_shapefile_lyr.GetSpatialRef() - original_catchment_proj = Proj(ogr_catchment_shapefile_lyr_proj.ExportToProj4()) - geographic_proj = Proj(init='EPSG:4326') #geographic + + ogr_catchment_shapefile_lyr_proj = \ + ogr_catchment_shapefile_lyr.GetSpatialRef() + original_catchment_proj = \ + Proj(ogr_catchment_shapefile_lyr_proj.ExportToProj4()) + geographic_proj = Proj(init='EPSG:4326') extent = ogr_catchment_shapefile_lyr.GetExtent() if original_catchment_proj != geographic_proj: x, y = transform(original_catchment_proj, geographic_proj, - [extent[0], extent[1]], + [extent[0], extent[1]], [extent[2], extent[3]]) extent = [min(x), max(x), min(y), max(y)] - - lsm_grid_feature_list = pointsToVoronoiGridArray(lsm_grid_lat, lsm_grid_lon, extent) - - ##COMMENTED LINES FOR TESTING + + lsm_grid_feature_list = \ + pointsToVoronoiGridArray(lsm_grid_lat, lsm_grid_lon, extent) + +# ##COMMENTED LINES FOR TESTING # import os # from .voronoi import pointsToVoronoiGridShapefile -# vor_shp_path = os.path.join(os.path.dirname(in_catchment_shapefile), "test_grid.shp") -# pointsToVoronoiGridShapefile(lsm_grid_lat, lsm_grid_lon, vor_shp_path, extent) - +# vor_shp_path = \ +# os.path.join(os.path.dirname(in_catchment_shapefile), "test_grid.shp") +# pointsToVoronoiGridShapefile(lsm_grid_lat, lsm_grid_lon, +# vor_shp_path, extent) + time_end_lsm_grid_thiessen = datetime.utcnow() - print(time_end_lsm_grid_thiessen - time_start_all) - - print("Generating LSM Grid Rtree ...") + log(time_end_lsm_grid_thiessen - time_start_all) + + log("Generating LSM Grid Rtree ...") rtree_idx = rtree.index.Index() # Populate R-tree index with bounds of ECMWF grid cells for lsm_grid_pos, lsm_grid_feature in enumerate(lsm_grid_feature_list): rtree_idx.insert(lsm_grid_pos, lsm_grid_feature['polygon'].bounds) - + time_end_lsm_grid_rtree = datetime.utcnow() - print(time_end_lsm_grid_rtree - time_end_lsm_grid_thiessen) - - print("Retrieving catchment river id list ...") - number_of_catchment_features = ogr_catchment_shapefile_lyr.GetFeatureCount() - catchment_rivid_list = np.zeros(number_of_catchment_features, dtype=np.int32) - for feature_idx, catchment_feature in enumerate(ogr_catchment_shapefile_lyr): - catchment_rivid_list[feature_idx] = catchment_feature.GetField(river_id) - - print("Reading in RAPID connect file ...") - rapid_connect_rivid_list = np.loadtxt(in_rapid_connect, - delimiter=",", + log(time_end_lsm_grid_rtree - time_end_lsm_grid_thiessen) + + log("Retrieving catchment river id list ...") + number_of_catchment_features = \ + ogr_catchment_shapefile_lyr.GetFeatureCount() + catchment_rivid_list = \ + np.zeros(number_of_catchment_features, dtype=np.int32) + for feature_idx, catchment_feature in \ + enumerate(ogr_catchment_shapefile_lyr): + catchment_rivid_list[feature_idx] = \ + catchment_feature.GetField(river_id) + + log("Reading in RAPID connect file ...") + rapid_connect_rivid_list = np.loadtxt(in_rapid_connect, + delimiter=",", usecols=(0,), ndmin=1, dtype=int) - print("Find LSM grid cells that intersect with each catchment") - print("and write out weight table ...") - - dummy_lat_index, dummy_lon_index = _get_lat_lon_indices(lsm_grid_lat, lsm_grid_lon, - lsm_grid_feature_list[0]['lat'], - lsm_grid_feature_list[0]['lon']) - dummy_row_end = [0, - dummy_lon_index, - dummy_lat_index, - 1, - lsm_grid_feature_list[0]['lon'], - lsm_grid_feature_list[0]['lat'] - ] - + log("Find LSM grid cells that intersect with each catchment") + log("and write out weight table ...") + + dummy_lat_index, dummy_lon_index = \ + _get_lat_lon_indices(lsm_grid_lat, + lsm_grid_lon, + lsm_grid_feature_list[0]['lat'], + lsm_grid_feature_list[0]['lon']) + dummy_row_end = [ + 0, + dummy_lon_index, + dummy_lat_index, + 1, + lsm_grid_feature_list[0]['lon'], + lsm_grid_feature_list[0]['lat'] + ] + with open_csv(out_weight_table, 'w') as csvfile: connectwriter = csv.writer(csvfile) - connectwriter.writerow(['rivid', 'area_sqm', 'lon_index', 'lat_index', + connectwriter.writerow(['rivid', 'area_sqm', 'lon_index', 'lat_index', 'npoints', 'lsm_grid_lon', 'lsm_grid_lat']) - geographic_proj = Proj(init='EPSG:4326') #geographic + geographic_proj = Proj(init='EPSG:4326') osr_geographic_proj = osr.SpatialReference() osr_geographic_proj.ImportFromEPSG(4326) proj_transform = None if original_catchment_proj != geographic_proj: - proj_transform = osr.CoordinateTransformation(ogr_catchment_shapefile_lyr_proj, osr_geographic_proj) - + proj_transform = \ + osr.CoordinateTransformation(ogr_catchment_shapefile_lyr_proj, + osr_geographic_proj) + for rapid_connect_rivid in rapid_connect_rivid_list: intersect_grid_info_list = [] try: - catchment_pos = np.where(catchment_rivid_list==rapid_connect_rivid)[0][0] + catchment_pos = \ + np.where(catchment_rivid_list == rapid_connect_rivid)[0][0] except IndexError: - #if it is not in the catchment, add dummy row in its place + # if it is not in the catchment, add dummy row in its place connectwriter.writerow([rapid_connect_rivid] + dummy_row_end) continue - pass - get_catchment_feature = ogr_catchment_shapefile_lyr.GetFeature(catchment_pos) + + get_catchment_feature = \ + ogr_catchment_shapefile_lyr.GetFeature(catchment_pos) feat_geom = get_catchment_feature.GetGeometryRef() - #make sure coordinates are geographic + # make sure coordinates are geographic if proj_transform: feat_geom.Transform(proj_transform) catchment_polygon = shapely_loads(feat_geom.ExportToWkb()) - for sub_lsm_grid_pos in rtree_idx.intersection(catchment_polygon.bounds): - lsm_grid_polygon = lsm_grid_feature_list[sub_lsm_grid_pos]['polygon'] + for sub_lsm_grid_pos in \ + rtree_idx.intersection(catchment_polygon.bounds): + lsm_grid_polygon = \ + lsm_grid_feature_list[sub_lsm_grid_pos]['polygon'] if catchment_polygon.intersects(lsm_grid_polygon): try: - intersect_poly = catchment_polygon.intersection(lsm_grid_polygon) + intersect_poly = \ + catchment_polygon.intersection(lsm_grid_polygon) except TopologicalError: - print('INFO: The catchment polygon with id {0} was invalid. Attempting to self clean...'.format(rapid_connect_rivid)) + log('The catchment polygon with id {0} was ' + 'invalid. Attempting to self clean...' + .format(rapid_connect_rivid)) original_area = catchment_polygon.area catchment_polygon = catchment_polygon.buffer(0) area_ratio = original_area/catchment_polygon.area - print('AREA_RATIO', area_ratio) + log('AREA_RATIO: {0}'.format(area_ratio)) msg_level = "INFO" if round(area_ratio, 5) != 1: msg_level = "WARNING" - print('{0}: The cleaned catchment polygon area differs from the' - ' original area by {1}%.'.format(msg_level, abs(area_ratio - 1))) - intersect_poly = catchment_polygon.intersection(lsm_grid_polygon) + log('The cleaned catchment polygon area ' + 'differs from the original area by {1}%.' + .format(abs(area_ratio - 1)), severity=msg_level) + intersect_poly = \ + catchment_polygon.intersection(lsm_grid_polygon) if not area_id: - #attempt to calculate AREA + # attempt to calculate AREA poly_area = get_poly_area_geo(intersect_poly) else: - poly_area = float(get_catchment_feature.GetField(area_id))*intersect_poly.area/catchment_polygon.area - - index_lsm_grid_lat, index_lsm_grid_lon = _get_lat_lon_indices(lsm_grid_lat, lsm_grid_lon, - lsm_grid_feature_list[sub_lsm_grid_pos]['lat'], - lsm_grid_feature_list[sub_lsm_grid_pos]['lon']) - intersect_grid_info_list.append({'rivid' : rapid_connect_rivid, - 'area' : poly_area, - 'lsm_grid_lat': lsm_grid_feature_list[sub_lsm_grid_pos]['lat'], - 'lsm_grid_lon': lsm_grid_feature_list[sub_lsm_grid_pos]['lon'], - 'index_lsm_grid_lon': index_lsm_grid_lon, - 'index_lsm_grid_lat': index_lsm_grid_lat}) + poly_area = \ + float(get_catchment_feature.GetField(area_id)) * \ + intersect_poly.area/catchment_polygon.area + + index_lsm_grid_lat, index_lsm_grid_lon = \ + _get_lat_lon_indices( + lsm_grid_lat, + lsm_grid_lon, + lsm_grid_feature_list[sub_lsm_grid_pos]['lat'], + lsm_grid_feature_list[sub_lsm_grid_pos]['lon']) + intersect_grid_info_list.append({ + 'rivid': rapid_connect_rivid, + 'area': poly_area, + 'lsm_grid_lat': + lsm_grid_feature_list[sub_lsm_grid_pos]['lat'], + 'lsm_grid_lon': + lsm_grid_feature_list[sub_lsm_grid_pos]['lon'], + 'index_lsm_grid_lon': index_lsm_grid_lon, + 'index_lsm_grid_lat': index_lsm_grid_lat + }) npoints = len(intersect_grid_info_list) - #If no intersection found, add dummy row - if(npoints <=0): + # If no intersection found, add dummy row + if npoints <= 0: connectwriter.writerow([rapid_connect_rivid] + dummy_row_end) - + for intersect_grid_info in intersect_grid_info_list: - connectwriter.writerow([intersect_grid_info['rivid'], - intersect_grid_info['area'], - intersect_grid_info['index_lsm_grid_lon'], - intersect_grid_info['index_lsm_grid_lat'], - npoints, - intersect_grid_info['lsm_grid_lon'], - intersect_grid_info['lsm_grid_lat']]) - - time_end_all = datetime.utcnow() - print(time_end_all - time_end_lsm_grid_rtree) - print("TOTAL TIME: {0}".format(time_end_all - time_start_all)) - - -def CreateWeightTableECMWF(in_ecmwf_nc, - in_catchment_shapefile, + connectwriter.writerow([ + intersect_grid_info['rivid'], + intersect_grid_info['area'], + intersect_grid_info['index_lsm_grid_lon'], + intersect_grid_info['index_lsm_grid_lat'], + npoints, + intersect_grid_info['lsm_grid_lon'], + intersect_grid_info['lsm_grid_lat'] + ]) + + time_end_all = datetime.utcnow() + log(time_end_all - time_end_lsm_grid_rtree) + log("TOTAL TIME: {0}".format(time_end_all - time_start_all)) + + +def CreateWeightTableECMWF(in_ecmwf_nc, + in_catchment_shapefile, river_id, - in_connectivity_file, + in_connectivity_file, out_weight_table, - area_id=None, - file_geodatabase=None, - ): - + area_id=None, + file_geodatabase=None): """ Create Weight Table for ECMWF Grids - - .. note:: The grids are in the RAPIDpy package under the gis/lsm_grids folder. - - Args: - in_ecmwf_nc(str): Path to the ECMWF NetCDF grid. - in_catchment_shapefile(str): Path to the Catchment shapefile. - river_id(str): The name of the field with the river ID (Ex. 'DrainLnID' or 'LINKNO'). - in_connectivity_file(str): The path to the RAPID connectivity file. - out_weight_table(str): The path to the output weight table file. - area_id(Optional[str]): The name of the field with the area of each catchment stored in meters squared. Default is it calculate the area. - file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) - + + .. note:: The grids are in the RAPIDpy package under + the gis/lsm_grids folder. + + Parameters + ---------- + in_ecmwf_nc: str + Path to the ECMWF NetCDF grid. + in_catchment_shapefile: str + Path to the Catchment shapefile. + river_id: str + The name of the field with the river ID (Ex. 'DrainLnID' or 'LINKNO'). + in_connectivity_file: str + The path to the RAPID connectivity file. + out_weight_table: str + The path to the output weight table file. + area_id: str, optional + The name of the field with the area of each catchment stored in meters + squared. Default is it calculate the area. + file_geodatabase: str, optional + Path to the file geodatabase. If you use this option, in_drainage_line + is the name of the stream network feature class. + (WARNING: Not always stable with GDAL.) + + Example: - + .. code:: python - + from RAPIDpy.gis.weight import CreateWeightTableECMWF - CreateWeightTableECMWF(in_ecmwf_nc='/path/to/runoff_ecmwf_grid.nc' - in_catchment_shapefile='/path/to/catchment.shp', - river_id='LINKNO', - in_connectivity_file='/path/to/rapid_connect.csv', - out_weight_table='/path/to/ecmwf_weight.csv', - ) + CreateWeightTableECMWF( + in_ecmwf_nc='/path/to/runoff_ecmwf_grid.nc' + in_catchment_shapefile='/path/to/catchment.shp', + river_id='LINKNO', + in_connectivity_file='/path/to/rapid_connect.csv', + out_weight_table='/path/to/ecmwf_weight.csv', + ) + """ - #extract ECMWF GRID + # extract ECMWF GRID data_ecmwf_nc = Dataset(in_ecmwf_nc) variables_list = data_ecmwf_nc.variables.keys() in_ecmwf_lat_var = 'lat' @@ -280,66 +333,87 @@ def CreateWeightTableECMWF(in_ecmwf_nc, if 'longitude' in variables_list: in_ecmwf_lon_var = 'longitude' - ecmwf_lon = (data_ecmwf_nc.variables[in_ecmwf_lon_var][:] + 180) % 360 - 180 # convert [0, 360] to [-180, 180] - ecmwf_lat = data_ecmwf_nc.variables[in_ecmwf_lat_var][:] #assume [-90,90] + # convert [0, 360] to [-180, 180] + ecmwf_lon = \ + (data_ecmwf_nc.variables[in_ecmwf_lon_var][:] + 180) % 360 - 180 + # assume [-90, 90] + ecmwf_lat = data_ecmwf_nc.variables[in_ecmwf_lat_var][:] data_ecmwf_nc.close() - - RTreeCreateWeightTable(ecmwf_lat, ecmwf_lon, - in_catchment_shapefile, river_id, - in_connectivity_file, out_weight_table, - file_geodatabase, area_id) + + rtree_create_weight_table(ecmwf_lat, ecmwf_lon, + in_catchment_shapefile, river_id, + in_connectivity_file, out_weight_table, + file_geodatabase, area_id) + def CreateWeightTableLDAS(in_ldas_nc, in_nc_lon_var, in_nc_lat_var, - in_catchment_shapefile, + in_catchment_shapefile, river_id, - in_connectivity_file, + in_connectivity_file, out_weight_table, - area_id=None, + area_id=None, file_geodatabase=None): - """ - Create Weight Table for NLDAS, GLDAS grids as well as for 2D Joules, or LIS Grids - - Args: - in_ldas_nc(str): Path to the land surface model NetCDF grid. - in_nc_lon_var(str): The variable name in the NetCDF file for the longitude. - in_nc_lat_var(str): The variable name in the NetCDF file for the latitude. - in_catchment_shapefile(str): Path to the Catchment shapefile. - river_id(str): The name of the field with the river ID (Ex. 'DrainLnID' or 'LINKNO'). - in_connectivity_file(str): The path to the RAPID connectivity file. - out_weight_table(str): The path to the output weight table file. - area_id(Optional[str]): The name of the field with the area of each catchment stored in meters squared. Default is it calculate the area. - file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) - + Create Weight Table for NLDAS, GLDAS grids as well as for 2D Joules, + or LIS Grids + + Parameters + ---------- + in_ldas_nc: str + Path to the land surface model NetCDF grid. + in_nc_lon_var: str + The variable name in the NetCDF file for the longitude. + in_nc_lat_var: str + The variable name in the NetCDF file for the latitude. + in_catchment_shapefile: str + Path to the Catchment shapefile. + river_id: str + The name of the field with the river ID (Ex. 'DrainLnID' or 'LINKNO'). + in_connectivity_file: str + The path to the RAPID connectivity file. + out_weight_table: str + The path to the output weight table file. + area_id: str, optional + The name of the field with the area of each catchment stored in meters + squared. Default is it calculate the area. + file_geodatabase: str, optional + Path to the file geodatabase. If you use this option, in_drainage_line + is the name of the stream network feature class. + (WARNING: Not always stable with GDAL.) + + Example: - + .. code:: python - + from RAPIDpy.gis.weight import CreateWeightTableLDAS - CreateWeightTableLDAS(in_ldas_nc='/path/to/runoff_grid.nc', - in_nc_lon_var="lon_110", - in_nc_lat_var="lat_110", - in_catchment_shapefile='/path/to/catchment.shp', - river_id='LINKNO', - in_connectivity_file='/path/to/rapid_connect.csv', - out_weight_table='/path/to/ldas_weight.csv', - ) + CreateWeightTableLDAS( + in_ldas_nc='/path/to/runoff_grid.nc', + in_nc_lon_var="lon_110", + in_nc_lat_var="lat_110", + in_catchment_shapefile='/path/to/catchment.shp', + river_id='LINKNO', + in_connectivity_file='/path/to/rapid_connect.csv', + out_weight_table='/path/to/ldas_weight.csv', + ) """ - #extract ECMWF GRID + # extract LDAS GRID data_ldas_nc = Dataset(in_ldas_nc) variables_list = data_ldas_nc.variables.keys() if in_nc_lon_var not in variables_list: - raise Exception("Invalid longitude variable. Choose from: {0}".format(variables_list)) + raise Exception("Invalid longitude variable. Choose from: {0}" + .format(variables_list)) if in_nc_lat_var not in variables_list: - raise Exception("Invalid latitude variable. Choose from: {0}".format(variables_list)) - ldas_lon = data_ldas_nc.variables[in_nc_lon_var][:] #assume [-180, 180] - ldas_lat = data_ldas_nc.variables[in_nc_lat_var][:] #assume [-90,90] + raise Exception("Invalid latitude variable. Choose from: {0}" + .format(variables_list)) + ldas_lon = data_ldas_nc.variables[in_nc_lon_var][:] # assume [-180, 180] + ldas_lat = data_ldas_nc.variables[in_nc_lat_var][:] # assume [-90,90] data_ldas_nc.close() - - RTreeCreateWeightTable(ldas_lat, ldas_lon, - in_catchment_shapefile, river_id, - in_connectivity_file, out_weight_table, - file_geodatabase, area_id) + + rtree_create_weight_table(ldas_lat, ldas_lon, + in_catchment_shapefile, river_id, + in_connectivity_file, out_weight_table, + file_geodatabase, area_id) diff --git a/RAPIDpy/gis/workflow.py b/RAPIDpy/gis/workflow.py index e693ab3..110ca0f 100644 --- a/RAPIDpy/gis/workflow.py +++ b/RAPIDpy/gis/workflow.py @@ -1,22 +1,24 @@ # -*- coding: utf-8 -*- -## -## workflow.py -## RAPIDpy -## -## Created by Alan D Snow. -## Based on RAPID_Toolbox for ArcMap -## Copyright © 2016 Alan D Snow. All rights reserved. -## License: BSD 3-Clause +""" + workflow.py + RAPIDpy + Created by Alan D Snow, 2016. + Based on RAPID_Toolbox for ArcMap + License: BSD 3-Clause +""" import os -from .network import (CreateNetworkConnectivity, CreateNetworkConnectivityTauDEMTree, - CreateNetworkConnectivityNHDPlus, CreateSubsetFile) -from .muskingum import (CreateMuskingumKfacFile, CreateMuskingumKFile, +from .network import (CreateNetworkConnectivity, + CreateNetworkConnectivityTauDEMTree, + CreateNetworkConnectivityNHDPlus, + CreateSubsetFile) +from .muskingum import (CreateMuskingumKfacFile, CreateMuskingumKFile, CreateConstMuskingumXFile) from .weight import CreateWeightTableECMWF from .centroid import FlowlineToPoint + def CreateAllStaticRAPIDFiles(in_drainage_line, river_id, length_id, @@ -30,66 +32,92 @@ def CreateAllStaticRAPIDFiles(in_drainage_line, x_value=0.3, nhdplus=False, taudem_network_connectivity_tree_file=None, - file_geodatabase=None - ): + file_geodatabase=None): """ To generate the static RAPID files (rapid_connect.csv, riv_bas_id.csv, kfac.csv, k.csv, x.csv, comid_lat_lon_z.csv) with default values. - - Args: - in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile. - river_id(str): The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO'). - length_id(str): The field name containging the length of the river segment (Ex. 'LENGTHKM' or 'Length'). - slope_id(str): The field name containging the slope of the river segment (Ex. 'Avg_Slope' or 'Slope'). - next_down_id(str): The name of the field with the river ID of the next downstream river segment (Ex. 'NextDownID' or 'DSLINKNO'). - rapid_output_folder(str): The path to the folder where all of the RAPID output will be generated. - kfac_celerity(Optional[float]): The flow wave celerity for the watershed in meters per second. 1 km/hr or 1000.0/3600.0 m/s is a reasonable value if unknown. - kfac_formula_type(Optional[int]): An integer representing the formula type to use when calculating kfac. Default is 3. - kfac_length_units(Optional[str]): The units for the length_id field. Supported types are "m" for meters and "km" for kilometers. Default is "km". - lambda_k(Optional[float]): The value for lambda given from RAPID after the calibration process. Default is 0.35. - x_value(Optional[float]): Value for the muskingum X parameter [0-0.5]. Default is 0.3. - nhdplus(Optional[bool]): If True, the drainage line is from the NHDPlus dataset with the VAA fields COMID, FROMNODE, TONODE, and DIVERGENCE. Default is False. - taudem_network_connectivity_tree_file(Optional[str]): If set, the connectivity file will be generated from the TauDEM connectivity tree file. - file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) - + + Parameters + ---------- + in_drainage_line: str + Path to the stream network (i.e. Drainage Line) shapefile. + river_id: str + The name of the field with the river ID + (Ex. 'HydroID', 'COMID', or 'LINKNO'). + length_id: str + The field name containging the length of the river segment + (Ex. 'LENGTHKM' or 'Length'). + slope_id: str + The field name containging the slope of the river segment + (Ex. 'Avg_Slope' or 'Slope'). + next_down_id: str + The name of the field with the river ID of the next downstream river + segment (Ex. 'NextDownID' or 'DSLINKNO'). + rapid_output_folder: str + The path to the folder where all of the RAPID output will be generated. + kfac_celerity: float, optional + The flow wave celerity for the watershed in meters per second. + 1 km/hr or 1000.0/3600.0 m/s is a reasonable value if unknown. + kfac_formula_type: int, optional + An integer representing the formula type to use when calculating kfac. + Default is 3. + kfac_length_units: str, optional + The units for the length_id field. Supported types are "m" for meters + and "km" for kilometers. Default is "km". + lambda_k: float, optional + The value for lambda given from RAPID after the calibration process. + Default is 0.35. + x_value: float, optional + Value for the muskingum X parameter [0-0.5]. Default is 0.3. + nhdplus: bool, optional + If True, the drainage line is from the NHDPlus dataset with the VAA + fields COMID, FROMNODE, TONODE, and DIVERGENCE. Default is False. + taudem_network_connectivity_tree_file: str, optional + If set, the connectivity file will be generated from the TauDEM + connectivity tree file. + file_geodatabase: str, optional + Path to the file geodatabase. If you use this option, + in_drainage_line is the name of the stream network feature class. + (WARNING: Not always stable with GDAL.) + + Example:: - + from RAPIDpy.gis.workflow import CreateAllStaticRAPIDFiles - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__=="__main__": - CreateAllStaticRAPIDFiles(in_drainage_line="/path/to/drainage_line.shp", - river_id="HydroID", - length_id="LENGTHKM", - slope_id="SLOPE", - next_down_river_id="NextDownID", - rapid_output_folder="/path/to/rapid/output", - ) + + CreateAllStaticRAPIDFiles( + in_drainage_line="/path/to/drainage_line.shp", + river_id="HydroID", + length_id="LENGTHKM", + slope_id="SLOPE", + next_down_river_id="NextDownID", + rapid_output_folder="/path/to/rapid/output", + ) """ - #RAPID connect file + # RAPID connect file rapid_connect_file = os.path.join(rapid_output_folder, 'rapid_connect.csv') if nhdplus: CreateNetworkConnectivityNHDPlus(in_drainage_line, rapid_connect_file, file_geodatabase) elif taudem_network_connectivity_tree_file: - CreateNetworkConnectivityTauDEMTree(taudem_network_connectivity_tree_file, - rapid_connect_file) + CreateNetworkConnectivityTauDEMTree( + taudem_network_connectivity_tree_file, + rapid_connect_file) else: CreateNetworkConnectivity(in_drainage_line, river_id, next_down_id, rapid_connect_file, file_geodatabase) - - #river basin id file + + # river basin id file riv_bas_id_file = os.path.join(rapid_output_folder, 'riv_bas_id.csv') CreateSubsetFile(in_drainage_line, - river_id, + river_id, riv_bas_id_file, file_geodatabase) - #kfac file + # kfac file kfac_file = os.path.join(rapid_output_folder, 'kfac.csv') CreateMuskingumKfacFile(in_drainage_line, river_id, @@ -101,23 +129,25 @@ def CreateAllStaticRAPIDFiles(in_drainage_line, kfac_file, length_units=kfac_length_units, file_geodatabase=file_geodatabase) - #k file + # k file k_file = os.path.join(rapid_output_folder, 'k.csv') CreateMuskingumKFile(lambda_k, kfac_file, k_file) - #x file + # x file x_file = os.path.join(rapid_output_folder, 'x.csv') CreateConstMuskingumXFile(x_value, rapid_connect_file, x_file) - #comid lat lon z file - comid_lat_lon_z_file = os.path.join(rapid_output_folder, 'comid_lat_lon_z.csv') + # comid lat lon z file + comid_lat_lon_z_file = \ + os.path.join(rapid_output_folder, 'comid_lat_lon_z.csv') FlowlineToPoint(in_drainage_line, river_id, comid_lat_lon_z_file, file_geodatabase) - + + def CreateAllStaticECMWFFiles(in_catchment, catchment_river_id, rapid_output_folder, @@ -125,60 +155,77 @@ def CreateAllStaticECMWFFiles(in_catchment, file_geodatabase=None ): """ - This creates all of the ECMWF grid weight tables using an area weighted method based on Esri's RAPID_Toolbox. - - Args: - in_catchment(str): Path to the Catchment shapefile. - catchment_river_id(str): The name of the field with the river ID (Ex. 'DrainLnID' or 'LINKNO'). - rapid_output_folder(str): The path to the folder where all of the RAPID output will be generated. - rapid_connect_file(str): The path to the RAPID connectivity file. - file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) - + This creates all of the ECMWF grid weight tables using an area + weighted method based on Esri's RAPID_Toolbox. + + Parameters + ---------- + in_catchment: str + Path to the Catchment shapefile. + catchment_river_id: str + The name of the field with the river ID (Ex. 'DrainLnID' or 'LINKNO'). + rapid_output_folder: str + The path to the folder where all of the RAPID output will be generated. + rapid_connect_file: str + The path to the RAPID connectivity file. + file_geodatabase: str, optional + Path to the file geodatabase. If you use this option, + in_drainage_line is the name of the stream network feature class. + (WARNING: Not always stable with GDAL.) + + Example:: - + from RAPIDpy.gis.workflow import CreateAllStaticECMWFFiles - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__=="__main__": - CreateAllStaticECMWFFiles(in_catchment="/path/to/catchment.shp", - catchment_river_id="DrainLnID", - rapid_output_folder="/path/to/rapid/output", - rapid_connect_file="/path/to/rapid_connect.csv", - ) + + CreateAllStaticECMWFFiles( + in_catchment="/path/to/catchment.shp", + catchment_river_id="DrainLnID", + rapid_output_folder="/path/to/rapid/output", + rapid_connect_file="/path/to/rapid_connect.csv", + ) + """ - lsm_grid_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lsm_grids') + lsm_grid_folder = \ + os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lsm_grids') - #create from ECMWF high reslution grid - ecmwf_t1279_grid_file = os.path.join(lsm_grid_folder, 'runoff_ecmwf_t1279_grid.nc') - weight_ecmwf_t1279_file = os.path.join(rapid_output_folder, 'weight_ecmwf_t1279.csv') - CreateWeightTableECMWF(ecmwf_t1279_grid_file, - in_catchment, + # create from ECMWF high reslution grid + ecmwf_t1279_grid_file = \ + os.path.join(lsm_grid_folder, 'runoff_ecmwf_t1279_grid.nc') + weight_ecmwf_t1279_file = \ + os.path.join(rapid_output_folder, 'weight_ecmwf_t1279.csv') + CreateWeightTableECMWF(ecmwf_t1279_grid_file, + in_catchment, catchment_river_id, - rapid_connect_file, + rapid_connect_file, weight_ecmwf_t1279_file, file_geodatabase=file_geodatabase) - #create from ECMWF low reslution grid - ecmwf_tco639_grid_file = os.path.join(lsm_grid_folder, 'runoff_ecmwf_tco639_grid.nc') - weight_ecmwf_tco639_file = os.path.join(rapid_output_folder, 'weight_ecmwf_tco639.csv') - CreateWeightTableECMWF(ecmwf_tco639_grid_file, - in_catchment, + # create from ECMWF low reslution grid + ecmwf_tco639_grid_file = \ + os.path.join(lsm_grid_folder, 'runoff_ecmwf_tco639_grid.nc') + weight_ecmwf_tco639_file = \ + os.path.join(rapid_output_folder, 'weight_ecmwf_tco639.csv') + CreateWeightTableECMWF(ecmwf_tco639_grid_file, + in_catchment, catchment_river_id, - rapid_connect_file, + rapid_connect_file, weight_ecmwf_tco639_file, file_geodatabase=file_geodatabase) - #create from ERA Interim grid - era_t511_grid_file = os.path.join(lsm_grid_folder, 'runoff_era_t511_grid.nc') - weight_era_t511_file = os.path.join(rapid_output_folder, 'weight_era_t511.csv') - CreateWeightTableECMWF(era_t511_grid_file, - in_catchment, + # create from ERA Interim grid + era_t511_grid_file = \ + os.path.join(lsm_grid_folder, 'runoff_era_t511_grid.nc') + weight_era_t511_file = \ + os.path.join(rapid_output_folder, 'weight_era_t511.csv') + CreateWeightTableECMWF(era_t511_grid_file, + in_catchment, catchment_river_id, - rapid_connect_file, + rapid_connect_file, weight_era_t511_file, file_geodatabase=file_geodatabase) + def CreateAllStaticECMWFRAPIDFiles(in_drainage_line, river_id, length_id, @@ -194,47 +241,73 @@ def CreateAllStaticECMWFRAPIDFiles(in_drainage_line, x_value=0.3, nhdplus=False, taudem_network_connectivity_tree_file=None, - file_geodatabase=None - ): + file_geodatabase=None): """ This creates all of the static RAPID files and ECMWF grid weight tables. - - Args: - in_drainage_line(str): Path to the stream network (i.e. Drainage Line) shapefile. - river_id(str): The name of the field with the river ID (Ex. 'HydroID', 'COMID', or 'LINKNO'). - length_id(str): The field name containging the length of the river segment (Ex. 'LENGTHKM' or 'Length'). - slope_id(str): The field name containging the slope of the river segment (Ex. 'Avg_Slope' or 'Slope'). - next_down_id(str): The name of the field with the river ID of the next downstream river segment (Ex. 'NextDownID' or 'DSLINKNO'). - in_catchment(str): Path to the Catchment shapefile. - catchment_river_id(str): The name of the field with the river ID (Ex. 'DrainLnID' or 'LINKNO'). - rapid_output_folder(str): The path to the folder where all of the RAPID output will be generated. - kfac_celerity(Optional[float]): The flow wave celerity for the watershed in meters per second. 1 km/hr or 1000.0/3600.0 m/s is a reasonable value if unknown. - kfac_formula_type(Optional[int]): An integer representing the formula type to use when calculating kfac. Default is 3. - kfac_length_units(Optional[str]): The units for the length_id field. Supported types are "m" for meters and "km" for kilometers. Default is "km". - lambda_k(Optional[float]): The value for lambda given from RAPID after the calibration process. Default is 0.35. - x_value(Optional[float]): Value for the muskingum X parameter [0-0.5]. Default is 0.3. - nhdplus(Optional[bool]): If True, the drainage line is from the NHDPlus dataset with the VAA fields COMID, FROMNODE, TONODE, and DIVERGENCE. Default is False. - taudem_network_connectivity_tree_file(Optional[str]): If set, the connectivity file will be generated from the TauDEM connectivity tree file. - file_geodatabase(Optional[str]): Path to the file geodatabase. If you use this option, in_drainage_line is the name of the stream network feature class. (WARNING: Not always stable with GDAL.) - + + Parameters + ---------- + in_drainage_line: str + Path to the stream network (i.e. Drainage Line) shapefile. + river_id: str + The name of the field with the river ID + (Ex. 'HydroID', 'COMID', or 'LINKNO'). + length_id: str + The field name containging the length of the river segment + (Ex. 'LENGTHKM' or 'Length'). + slope_id: str + The field name containging the slope of the river segment + (Ex. 'Avg_Slope' or 'Slope'). + next_down_id: str + The name of the field with the river ID of the next downstream + river segment (Ex. 'NextDownID' or 'DSLINKNO'). + in_catchment: str + Path to the Catchment shapefile. + catchment_river_id: str + The name of the field with the river ID (Ex. 'DrainLnID' or 'LINKNO'). + rapid_output_folder: str + The path to the folder where all of the RAPID output will be generated. + kfac_celerity: float, optional + The flow wave celerity for the watershed in meters per second. + 1 km/hr or 1000.0/3600.0 m/s is a reasonable value if unknown. + kfac_formula_type: int, optional + An integer representing the formula type to use when calculating kfac. + Default is 3. + kfac_length_units: str, optional + The units for the length_id field. Supported types are "m" for meters + and "km" for kilometers. Default is "km". + lambda_k: float, optional + The value for lambda given from RAPID after the calibration process. + Default is 0.35. + x_value: float, optional + Value for the muskingum X parameter [0-0.5].Default is 0.3. + nhdplus: bool, optional + If True, the drainage line is from the NHDPlus dataset with the + VAA fields COMID, FROMNODE, TONODE, and DIVERGENCE. Default is False. + taudem_network_connectivity_tree_file: str, optional + If set, the connectivity file will be generated from the + TauDEM connectivity tree file. + file_geodatabase: str, optional + Path to the file geodatabase. If you use this option, + in_drainage_line is the name of the stream network feature class + (WARNING: Not always stable with GDAL). + + Example:: - + from RAPIDpy.gis.workflow import CreateAllStaticECMWFRAPIDFiles - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__=="__main__": - CreateAllStaticECMWFRAPIDFiles(in_drainage_line="/path/to/drainage_line.shp", - river_id="HydroID", - length_id="LENGTHKM", - slope_id="SLOPE", - next_down_id="NextDownID", - in_catchment="/path/to/catchment.shp", - catchment_river_id="DrainLnID", - rapid_output_folder="/path/to/rapid/output", - ) + + CreateAllStaticECMWFRAPIDFiles( + in_drainage_line="/path/to/drainage_line.shp", + river_id="HydroID", + length_id="LENGTHKM", + slope_id="SLOPE", + next_down_id="NextDownID", + in_catchment="/path/to/catchment.shp", + catchment_river_id="DrainLnID", + rapid_output_folder="/path/to/rapid/output", + ) """ - #create all RAPID files CreateAllStaticRAPIDFiles(in_drainage_line, river_id, length_id, @@ -249,7 +322,6 @@ def CreateAllStaticECMWFRAPIDFiles(in_drainage_line, nhdplus, taudem_network_connectivity_tree_file, file_geodatabase) - rapid_connect_file = os.path.join(rapid_output_folder, 'rapid_connect.csv') @@ -257,4 +329,4 @@ def CreateAllStaticECMWFRAPIDFiles(in_drainage_line, catchment_river_id, rapid_output_folder, rapid_connect_file, - file_geodatabase) \ No newline at end of file + file_geodatabase) diff --git a/RAPIDpy/helper_functions.py b/RAPIDpy/helper_functions.py index 6c632e5..f21179a 100644 --- a/RAPIDpy/helper_functions.py +++ b/RAPIDpy/helper_functions.py @@ -1,36 +1,39 @@ # -*- coding: utf-8 -*- -## -## helper_functions.py -## RAPIDpy -## -## Created by Alan D Snow, 2015. -## Copyright © 2015 Alan D Snow. All rights reserved. -## +""" + helper_functions.py + RAPIDpy + + Created by Alan D Snow, 2015. +""" import csv +from os import remove +from sys import version_info + from numpy.testing import assert_almost_equal from numpy import array as np_array from numpy import float32 as np_float32 -from os import remove -from sys import version_info -#------------------------------------------------------------------------------ + +# ----------------------------------------------------------------------------- # HELPER FUNCTIONS -#------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- +# pylint: disable=line-too-long def open_csv(csv_file, mode='r'): """ Get mode depending on Python version Based on: http://stackoverflow.com/questions/29840849/writing-a-csv-file-in-python-that-works-for-both-python-2-7-and-python-3-3-in - """ + """ # noqa if version_info[0] == 2: # Not named on 2.6 access = '{0}b'.format(mode) kwargs = {} else: access = '{0}t'.format(mode) - kwargs = {'newline':''} - + kwargs = {'newline': ''} + return open(csv_file, access, **kwargs) - -def log(message, severity, print_debug=True): + + +def log(message, severity="INFO", print_debug=True): """Logs, prints, or raises a message. Arguments: @@ -45,10 +48,11 @@ def log(message, severity, print_debug=True): if print_debug: print("{0}: {1}".format(severity, message)) else: - print("{0}: {1}".format(severity, message)) + print("{0}: {1}".format(severity, message)) else: raise Exception("{0}: {1}".format(severity, message)) + def csv_to_list(csv_file, delimiter=','): """ Reads in a CSV file and returns the contents as list, @@ -57,53 +61,57 @@ def csv_to_list(csv_file, delimiter=','): """ with open_csv(csv_file) as csv_con: if len(delimiter) > 1: - dialect = csv.Sniffer().sniff(csv_con.read(1024), delimiters=delimiter) + dialect = csv.Sniffer().sniff(csv_con.read(1024), + delimiters=delimiter) csv_con.seek(0) reader = csv.reader(csv_con, dialect) else: reader = csv.reader(csv_con, delimiter=delimiter) return list(reader) + def compare_csv_decimal_files(file1, file2, header=True, timeseries=False): """ This function compares two csv files """ - #CHECK NUM LINES + # CHECK NUM LINES with open_csv(file1) as fh1, \ - open_csv(file2) as fh2: - assert sum(1 for line1 in fh1) == sum(1 for line2 in fh2) - + open_csv(file2) as fh2: + assert sum(1 for _ in fh1) == sum(1 for _ in fh2) + with open_csv(file1) as fh1, \ - open_csv(file2) as fh2: + open_csv(file2) as fh2: csv1 = csv.reader(fh1) csv2 = csv.reader(fh2) - + if header: - assert next(csv1) == next(csv2) #header - + assert next(csv1) == next(csv2) # header + while True: try: row1 = next(csv1) row2 = next(csv2) compare_start_index = 0 if timeseries: - assert row1[0] == row2[0] #check dates - compare_start_index=1 - - assert_almost_equal(np_array(row1[compare_start_index:], dtype=np_float32), - np_array(row2[compare_start_index:], dtype=np_float32), - decimal=2) + assert row1[0] == row2[0] # check dates + compare_start_index = 1 + + assert_almost_equal( + np_array(row1[compare_start_index:], dtype=np_float32), + np_array(row2[compare_start_index:], dtype=np_float32), + decimal=2) except StopIteration: break - pass return True - + + def compare_csv_timeseries_files(file1, file2, header=True): """ This function compares two csv files """ return compare_csv_decimal_files(file1, file2, header, True) + def remove_files(*args): """ This function removes all files input as arguments @@ -114,3 +122,15 @@ def remove_files(*args): except OSError: pass + +def add_latlon_metadata(lat_var, lon_var): + """Adds latitude and longitude metadata""" + lat_var.long_name = 'latitude' + lat_var.standard_name = 'latitude' + lat_var.units = 'degrees_north' + lat_var.axis = 'Y' + + lon_var.long_name = 'longitude' + lon_var.standard_name = 'longitude' + lon_var.units = 'degrees_east' + lon_var.axis = 'X' diff --git a/RAPIDpy/inflow/CreateInflowFileFromERAInterimRunoff.py b/RAPIDpy/inflow/CreateInflowFileFromERAInterimRunoff.py index 0c16c53..bbfb609 100644 --- a/RAPIDpy/inflow/CreateInflowFileFromERAInterimRunoff.py +++ b/RAPIDpy/inflow/CreateInflowFileFromERAInterimRunoff.py @@ -1,185 +1,54 @@ # -*- coding: utf-8 -*- -# -# CreateInflowFileFromERAInterimRunoff.py -# RAPIDpy -# -# Created by Alan D. Snow (adapted from CreateInflowFileFromECMWFRunoff.py). -# Copyright © 2015-2016 Alan D Snow. All rights reserved. -# License: BSD-3 Clause +""" + CreateInflowFileFromERAInterimRunoff.py + RAPIDpy -import netCDF4 as NET -import numpy as NUM -import os -from sys import version_info -from past.builtins import xrange + Created by Alan D. Snow, 2015 + Adapted from CreateInflowFileFromECMWFRunoff.py. + License: BSD-3-Clause +""" +from netCDF4 import Dataset -from .CreateInflowFileFromGriddedRunoff import CreateInflowFileFromGriddedRunoff +from .CreateInflowFileFromGriddedRunoff import \ + CreateInflowFileFromGriddedRunoff class CreateInflowFileFromERAInterimRunoff(CreateInflowFileFromGriddedRunoff): + """Create Inflow File From ERA Interim Runoff + + Creates RAPID NetCDF input of water inflow based on + ERA Interim runoff and previously created weight table. + """ + land_surface_model_name = "ERA Interim" + header_wt = ['rivid', 'area_sqm', 'lon_index', 'lat_index', 'npoints'] + dims_oi = [['lon', 'lat', 'time'], ['longitude', 'latitude', 'time']] + vars_oi = [["lon", "lat", "time", "RO"], + ['longitude', 'latitude', 'time', 'ro']] + length_time = {"Daily": 1, "3-Hourly": 8} + def __init__(self): - """Define the tool (tool name is the name of the class).""" - self.label = "Create Inflow File From ERA Interim Runoff" - self.description = ("Creates RAPID NetCDF input of water inflow " - "based on ERA Interim runoff results and " - "previously created weight table.") - self.header_wt = ['rivid', 'area_sqm', 'lon_index', 'lat_index', 'npoints'] - self.dims_oi = [['lon', 'lat', 'time'], ['longitude', 'latitude', 'time']] - self.vars_oi = [["lon", "lat", "time", "RO"], ['longitude', 'latitude', 'time', 'ro']] - self.length_time = {"Daily": 1, "3-Hourly": 8} - self.errorMessages = ["Missing Variable 'time'", - "Incorrect dimensions in the input ERA Interim runoff file.", - "Incorrect variables in the input ERA Interim runoff file.", - "Incorrect time variable in the input ERA Interim runoff file", - "Incorrect number of columns in the weight table", - "No or incorrect header in the weight table", - "Incorrect sequence of rows in the weight table"] + """Define the attributes to look for""" + self.runoff_vars = ['ro'] + super(CreateInflowFileFromERAInterimRunoff, self).__init__() - def dataValidation(self, in_nc): - """Check the necessary dimensions and variables in the input netcdf data""" + def data_validation(self, in_nc): + """Check the necessary dimensions and variables in the input + netcdf data""" + data_nc = Dataset(in_nc) - data_nc = NET.Dataset(in_nc) - - dims = data_nc.dimensions - if version_info[0] == 2: # Python 2 - dims = dims.keys() - if version_info[0] == 3: # Python 3 - dims = list(dims) - - if dims not in self.dims_oi: - raise Exception("{0} {1}".format(self.errorMessages[1],dims)) + dims = list(data_nc.dimensions) - vars = data_nc.variables - if version_info[0] == 2: # Python 2 - vars = vars.keys() - if version_info[0] == 3: # Python 3 - vars = list(vars) - - if vars == self.vars_oi[0]: - vars_oi_index = 0 - elif vars == self.vars_oi[1]: - vars_oi_index = 1 - else: - raise Exception("{0} {1}".format(self.errorMessages[2],vars)) + if dims not in self.dims_oi: + data_nc.close() + raise Exception("{0} {1}".format(self.error_messages[1], dims)) - return vars_oi_index + nc_vars = list(data_nc.variables) - def dataIdentify(self, in_nc, vars_oi_index): - """Check if the data is daily (one value) or 3 hourly""" - data_nc = NET.Dataset(in_nc) - name_time = self.vars_oi[vars_oi_index][2] - time = data_nc.variables[name_time][:] - if len(time) == self.length_time["Daily"]: - return "Daily" - - diff = NUM.unique(NUM.diff(time)) - data_nc.close() - time_interval_3hr = NUM.array([3.0],dtype=float) - if (diff == time_interval_3hr).all(): - return "3-Hourly" + if nc_vars == self.vars_oi[0]: + self.runoff_vars = [self.vars_oi[0][-1]] + elif nc_vars == self.vars_oi[1]: + self.runoff_vars = [self.vars_oi[1][-1]] else: - return None - - def execute(self, nc_file_list, index_list, in_weight_table, - out_nc, grid_type, mp_lock): - - """The source code of the tool.""" - if not os.path.exists(out_nc): - print("ERROR: Outfile has not been created. You need to run: generateOutputInflowFile function ...") - raise Exception("ERROR: Outfile has not been created. You need to run: generateOutputInflowFile function ...") - - if len(nc_file_list) != len(index_list): - print("ERROR: Number of runoff files not equal to number of indices ...") - raise Exception("ERROR: Number of runoff files not equal to number of indices ...") - - self.readInWeightTable(in_weight_table) - - lon_ind_all = [int(i) for i in self.dict_list[self.header_wt[2]]] - lat_ind_all = [int(j) for j in self.dict_list[self.header_wt[3]]] - - # Obtain a subset of runoff data based on the indices in the weight table - min_lon_ind_all = min(lon_ind_all) - max_lon_ind_all = max(lon_ind_all) - min_lat_ind_all = min(lat_ind_all) - max_lat_ind_all = max(lat_ind_all) - - index_new = [] - - # Validate the netcdf dataset - vars_oi_index = self.dataValidation(nc_file_list[0]) - - id_data = self.dataIdentify(nc_file_list[0], vars_oi_index) - if id_data is None: - raise Exception(self.errorMessages[3]) - - # combine inflow data - for nc_file_array_index, nc_file in enumerate(nc_file_list): - index = index_list[nc_file_array_index] - - '''Calculate water inflows''' - # print("Calculating water inflows for {0} {1} ...".format(os.path.basename(nc_file) , grid_type)) - - ''' Read the netcdf dataset''' - data_in_nc = NET.Dataset(nc_file) - time = data_in_nc.variables[self.vars_oi[vars_oi_index][2]][:] - - # Check the size of time variable in the netcdf data - size_time = len(time) - if size_time != self.length_time[id_data]: - raise Exception(self.errorMessages[3]) - - data_subset_all = data_in_nc.variables[self.vars_oi[vars_oi_index][3]][:, min_lat_ind_all:max_lat_ind_all+1, min_lon_ind_all:max_lon_ind_all+1] - data_in_nc.close() - - len_time_subset_all = data_subset_all.shape[0] - len_lat_subset_all = data_subset_all.shape[1] - len_lon_subset_all = data_subset_all.shape[2] - data_subset_all = data_subset_all.reshape(len_time_subset_all, (len_lat_subset_all * len_lon_subset_all)) - - # compute new indices based on the data_subset_all - if not index_new: - for r in xrange(self.count): - ind_lat_orig = lat_ind_all[r] - ind_lon_orig = lon_ind_all[r] - index_new.append((ind_lat_orig - min_lat_ind_all)*len_lon_subset_all + (ind_lon_orig - min_lon_ind_all)) - - # obtain a new subset of data - data_subset_new = data_subset_all[:,index_new] - - inflow_data = NUM.zeros((size_time, self.size_streamID)) - # start compute inflow - pointer = 0 - for stream_index in xrange(self.size_streamID): - npoints = int(self.dict_list[self.header_wt[4]][pointer]) - # Check if all npoints points correspond to the same streamID - if len(set(self.dict_list[self.header_wt[0]][pointer : (pointer + npoints)])) != 1: - print("ROW INDEX {0}".format(pointer)) - print("COMID {0}".format(self.dict_list[self.header_wt[0]][pointer])) - raise Exception(self.errorMessages[2]) - - area_sqm_npoints = [float(k) for k in self.dict_list[self.header_wt[1]][pointer : (pointer + npoints)]] - area_sqm_npoints = NUM.array(area_sqm_npoints) - area_sqm_npoints = area_sqm_npoints.reshape(1, npoints) - data_goal = data_subset_new[:, pointer:(pointer + npoints)] - - if grid_type == 't255': - # A) ERA Interim Low Res (T255) - data is cumulative - data_goal = data_goal.astype(NUM.float32) - # from time 3/6/9/12 (time zero not included, so assumed to be zero) - ro_first_half = NUM.concatenate([data_goal[0:1,], NUM.subtract(data_goal[1:4,], data_goal[0:3,])]) - # from time 15/18/21/24 (time restarts at time 12, assumed to be zero) - ro_second_half = NUM.concatenate([data_goal[4:5,], NUM.subtract(data_goal[5:,], data_goal[4:7,])]) - ro_stream = NUM.multiply(NUM.concatenate([ro_first_half, ro_second_half]), area_sqm_npoints) - else: - # A) ERA Interim High Res (T511) - data is incremental - # from time 3/6/9/12/15/18/21/24 - ro_stream = NUM.multiply(data_goal, area_sqm_npoints) - inflow_data[:,stream_index] = ro_stream.sum(axis=1) - pointer += npoints - - # only one process is allowed to write at a time to netcdf file - mp_lock.acquire() - data_out_nc = NET.Dataset(out_nc, "a", format="NETCDF3_CLASSIC") - data_out_nc.variables['m3_riv'][index*size_time:(index+1)*size_time,:] = inflow_data - data_out_nc.close() - mp_lock.release() + data_nc.close() + raise Exception("{0} {1}".format(self.error_messages[2], nc_vars)) + data_nc.close() diff --git a/RAPIDpy/inflow/CreateInflowFileFromGriddedRunoff.py b/RAPIDpy/inflow/CreateInflowFileFromGriddedRunoff.py index 33f33e4..b299ab7 100644 --- a/RAPIDpy/inflow/CreateInflowFileFromGriddedRunoff.py +++ b/RAPIDpy/inflow/CreateInflowFileFromGriddedRunoff.py @@ -1,29 +1,55 @@ # -*- coding: utf-8 -*- -## -## CreateInflowFileFromGriddedRunoff.py -## RAPIDpy -## -## Created by Alan D Snow. -## Copyright © 2016 Alan D Snow. All rights reserved. -## BSD 3-Clause +""" + CreateInflowFileFromGriddedRunoff.py + RAPIDpy + Created by Alan D. Snow, 2016 + License: BSD-3-Clause +""" +from abc import abstractmethod import csv from datetime import datetime -import netCDF4 as NET -import numpy as np import os + +from netCDF4 import Dataset +import numpy as np from pytz import utc +from past.builtins import xrange # pylint: disable=redefined-builtin -#local +# local from ..helper_functions import open_csv class CreateInflowFileFromGriddedRunoff(object): + """Create Inflow File From Gridded Runoff + + Base class for creating RAPID NetCDF input + of water inflow based on land surface model + runoff and previously created weight table. + """ + land_surface_model_name = "land surface model" + header_wt = ['rivid', 'area_sqm', 'lon_index', 'lat_index', 'npoints'] + runoff_vars = [] + def __init__(self): - """Define the tool (tool name is the name of the class).""" - self.header_wt = ['rivid', 'area_sqm', 'lon_index', 'lat_index', 'npoints'] + self.dict_list = [] + self.count = 0 + self.size_stream_id = 0 + self.simulation_time_step_seconds = 0 + self.error_messages = [ + "Missing Variable 'time'", + "Incorrect dimensions in the input {} runoff file." + .format(self.land_surface_model_name), + "Incorrect variables in the input {} runoff file." + .format(self.land_surface_model_name), + "Incorrect time variable in the input {} runoff file" + .format(self.land_surface_model_name), + "Incorrect number of columns in the weight table", + "No or incorrect header in the weight table", + "Incorrect sequence of rows in the weight table" + ] - def readInWeightTable(self, in_weight_table): + def read_in_weight_table(self, in_weight_table): """ Read in weight table """ @@ -33,74 +59,90 @@ def readInWeightTable(self, in_weight_table): header_row = next(reader) # check number of columns in the weight table if len(header_row) < len(self.header_wt): - raise Exception(self.errorMessages[4]) + raise Exception(self.error_messages[4]) # check header if header_row[1:len(self.header_wt)] != self.header_wt[1:]: - raise Exception(self.errorMessages[5]) - - self.dict_list = np.loadtxt(in_weight_table, - delimiter=",", - usecols=(0, 1, 2, 3, 4), - skiprows=1, - dtype={'names': (self.header_wt[0], self.header_wt[1], self.header_wt[2], self.header_wt[3], self.header_wt[4]), - 'formats': ('i8', 'f8', 'i8', 'i8', 'i8')}, - ) - + raise Exception(self.error_messages[5]) + + self.dict_list = \ + np.loadtxt( + in_weight_table, + delimiter=",", + usecols=(0, 1, 2, 3, 4), + skiprows=1, + dtype={ + 'names': (self.header_wt[0], + self.header_wt[1], + self.header_wt[2], + self.header_wt[3], + self.header_wt[4]), + 'formats': ('i8', 'f8', 'i8', 'i8', 'i8') + }, + ) + self.count = self.dict_list.shape[0] - self.size_streamID = len(np.unique(np.array(self.dict_list[self.header_wt[0]], dtype=np.int32))) - - def _write_lat_lon(self, data_out_nc, rivid_lat_lon_z_file): + self.size_stream_id = \ + len(np.unique(np.array(self.dict_list[self.header_wt[0]], + dtype=np.int32))) + + @staticmethod + def _write_lat_lon(data_out_nc, rivid_lat_lon_z_file): """Add latitude and longitude each netCDF feature Lookup table is a CSV file with rivid, Lat, Lon, columns. - Columns must be in that order and these must be the first three columns. + Columns must be in that order and these must be the first + three columns. """ - #only add if user adds + # only add if user adds if rivid_lat_lon_z_file and os.path.exists(rivid_lat_lon_z_file): - #get list of COMIDS - lookup_table = np.loadtxt(rivid_lat_lon_z_file, - delimiter=",", - usecols=(0,1,2), - skiprows=1, - dtype={'names': ('rivid', 'lat', 'lon'), - 'formats': ('i8', 'f8', 'f8'), - }, - ) - + # get list of COMIDS + lookup_table = np.loadtxt( + rivid_lat_lon_z_file, + delimiter=",", + usecols=(0, 1, 2), + skiprows=1, + dtype={ + 'names': ('rivid', 'lat', 'lon'), + 'formats': ('i8', 'f8', 'f8'), + }, + ) + # Get relevant arrays while we update them nc_rivids = data_out_nc.variables['rivid'][:] lats = data_out_nc.variables['lat'][:] lons = data_out_nc.variables['lon'][:] - + lat_min = None lat_max = None lon_min = None lon_max = None - + # Process each row in the lookup table for nc_index, nc_rivid in enumerate(nc_rivids): try: - lookup_index = np.where(lookup_table['rivid'] == nc_rivid)[0][0] + lookup_index = \ + np.where(lookup_table['rivid'] == nc_rivid)[0][0] except Exception: - raise Exception('rivid {0} misssing in comid_lat_lon_z file'.format(nc_rivid)) - + raise Exception('rivid {0} misssing in ' + 'comid_lat_lon_z file'.format(nc_rivid)) + lat = float(lookup_table['lat'][lookup_index]) lats[nc_index] = lat - if (lat_min) is None or lat < lat_min: + if lat_min is None or lat < lat_min: lat_min = lat - if (lat_max) is None or lat > lat_max: + if lat_max is None or lat > lat_max: lat_max = lat - + lon = float(lookup_table['lon'][lookup_index]) lons[nc_index] = lon - if (lon_min) is None or lon < lon_min: + if lon_min is None or lon < lon_min: lon_min = lon - if (lon_max) is None or lon > lon_max: + if lon_max is None or lon > lon_max: lon_max = lon - + # Overwrite netCDF variable values data_out_nc.variables['lat'][:] = lats data_out_nc.variables['lon'][:] = lons - + # Update metadata if lat_min is not None: data_out_nc.geospatial_lat_min = lat_min @@ -113,8 +155,8 @@ def _write_lat_lon(self, data_out_nc, rivid_lat_lon_z_file): else: print('No comid_lat_lon_z file. Not adding values ...') - def generateOutputInflowFile(self, - out_nc, #file generated for inflows + def generateOutputInflowFile(self, + out_nc, start_datetime_utc, number_of_timesteps, simulation_time_step_seconds, @@ -130,40 +172,40 @@ def generateOutputInflowFile(self, # Create output inflow netcdf data print("Generating inflow file ...") - data_out_nc = NET.Dataset(out_nc, "w", format="NETCDF3_CLASSIC") - rivid_list = np.loadtxt(in_rapid_connect_file, + data_out_nc = Dataset(out_nc, "w", format="NETCDF3_CLASSIC") + rivid_list = np.loadtxt(in_rapid_connect_file, delimiter=",", - ndmin=1, - usecols=(0,), + ndmin=1, + usecols=(0,), dtype=int) - #create dimensions + # create dimensions data_out_nc.createDimension('time', number_of_timesteps) data_out_nc.createDimension('rivid', len(rivid_list)) data_out_nc.createDimension('nv', 2) - #create variables - #m3_riv - m3_riv_var = data_out_nc.createVariable('m3_riv', 'f4', + # create variables + # m3_riv + m3_riv_var = data_out_nc.createVariable('m3_riv', 'f4', ('time', 'rivid'), fill_value=0) - m3_riv_var.long_name = 'accumulated external water volume inflow upstream of each river reach' + m3_riv_var.long_name = 'accumulated external water volume ' \ + 'inflow upstream of each river reach' m3_riv_var.units = 'm3' m3_riv_var.coordinates = 'lon lat' m3_riv_var.grid_mapping = 'crs' m3_riv_var.cell_methods = "time: sum" data_out_nc.close() - + try: - data_out_nc = NET.Dataset(out_nc, "a", format="NETCDF3_CLASSIC") - #rivid - rivid_var = data_out_nc.createVariable('rivid', 'i4', - ('rivid',)) + data_out_nc = Dataset(out_nc, "a", format="NETCDF3_CLASSIC") + # rivid + rivid_var = data_out_nc.createVariable('rivid', 'i4', + ('rivid',)) rivid_var.long_name = 'unique identifier for each river reach' rivid_var.units = '1' rivid_var.cf_role = 'timeseries_id' - rivid_var[:] = rivid_list - - #time + + # time time_var = data_out_nc.createVariable('time', 'i4', ('time',)) time_var.long_name = 'time' @@ -172,54 +214,296 @@ def generateOutputInflowFile(self, time_var.axis = 'T' time_var.calendar = 'gregorian' time_var.bounds = 'time_bnds' - - initial_time_seconds = (start_datetime_utc.replace(tzinfo=utc)- - datetime(1970,1,1, tzinfo=utc)).total_seconds() - final_time_seconds = initial_time_seconds + number_of_timesteps*simulation_time_step_seconds - time_array = np.arange(initial_time_seconds, final_time_seconds, simulation_time_step_seconds) + + initial_time_seconds = \ + (start_datetime_utc.replace(tzinfo=utc) - + datetime(1970, 1, 1, tzinfo=utc)).total_seconds() + final_time_seconds = \ + initial_time_seconds + number_of_timesteps\ + * simulation_time_step_seconds + time_array = np.arange(initial_time_seconds, final_time_seconds, + simulation_time_step_seconds) time_var[:] = time_array - - #time_bnds + + # time_bnds time_bnds_var = data_out_nc.createVariable('time_bnds', 'i4', - ('time', 'nv',)) + ('time', 'nv',)) for time_index, time_element in enumerate(time_array): time_bnds_var[time_index, 0] = time_element - time_bnds_var[time_index, 1] = time_element+simulation_time_step_seconds - - #longitude + time_bnds_var[time_index, 1] = \ + time_element + simulation_time_step_seconds + + # longitude lon_var = data_out_nc.createVariable('lon', 'f8', ('rivid',), - fill_value=-9999.0) - lon_var.long_name = 'longitude of a point related to each river reach' + fill_value=-9999.0) + lon_var.long_name = \ + 'longitude of a point related to each river reach' lon_var.standard_name = 'longitude' lon_var.units = 'degrees_east' lon_var.axis = 'X' - - #latitude + + # latitude lat_var = data_out_nc.createVariable('lat', 'f8', ('rivid',), - fill_value=-9999.0) - lat_var.long_name = 'latitude of a point related to each river reach' + fill_value=-9999.0) + lat_var.long_name = \ + 'latitude of a point related to each river reach' lat_var.standard_name = 'latitude' lat_var.units = 'degrees_north' lat_var.axis = 'Y' - + crs_var = data_out_nc.createVariable('crs', 'i4') crs_var.grid_mapping_name = 'latitude_longitude' crs_var.epsg_code = 'EPSG:4326' # WGS 84 crs_var.semi_major_axis = 6378137.0 crs_var.inverse_flattening = 298.257223563 - - #add global attributes + + # add global attributes data_out_nc.Conventions = 'CF-1.6' - data_out_nc.title = 'RAPID Inflow from {0}'.format(land_surface_model_description) - data_out_nc.history = 'date_created: {0}'.format(datetime.utcnow().replace(tzinfo=utc)) + data_out_nc.title = 'RAPID Inflow from {0}'\ + .format(land_surface_model_description) + data_out_nc.history = 'date_created: {0}'\ + .format(datetime.utcnow().replace(tzinfo=utc)) data_out_nc.featureType = 'timeSeries' data_out_nc.institution = modeling_institution - - #write lat lon data + + # write lat lon data self._write_lat_lon(data_out_nc, in_rivid_lat_lon_z_file) - - #close file + + # close file data_out_nc.close() except RuntimeError: - print("File size too big to add data beforehand. Performing conversion after ...") - pass \ No newline at end of file + print("File size too big to add data beforehand." + " Performing conversion after ...") + + def get_conversion_factor(self, in_nc, num_nc_files): + """get conversion_factor""" + data_in_nc = Dataset(in_nc) + + # convert from kg/m^2 (i.e. mm) to m + conversion_factor = 0.001 + + # ECMWF units are in m + if data_in_nc.variables[self.runoff_vars[0]] \ + .getncattr("units") == "m": + conversion_factor = 1 + + # ftp://hydro1.sci.gsfc.nasa.gov/data/s4pa/GLDAS_V1/README.GLDAS.pdf + if "s" in data_in_nc.variables[self.runoff_vars[0]] \ + .getncattr("units"): + # that means kg/m^2/s in GLDAS v1 that is 3-hr avg, + # so multiply by 3 hr (ex. 3*3600). Assumed same + # for others (ex. 1*3600). + # If combining files, need to take average of these, + # so divide by number of files + conversion_factor *= \ + self.simulation_time_step_seconds / \ + num_nc_files + data_in_nc.close() + + return conversion_factor + + @abstractmethod + def data_validation(self, in_nc): + """Check the necessary dimensions and variables in the input + netcdf data""" + pass + + def execute(self, nc_file_list, index_list, in_weight_table, + out_nc, grid_type, mp_lock): + + """The source code of the tool.""" + if not os.path.exists(out_nc): + raise Exception("Outfile has not been created. " + "You need to run: generateOutputInflowFile " + "function ...") + + if len(nc_file_list) != len(index_list): + raise Exception("ERROR: Number of runoff files not equal to " + "number of indices ...") + + demo_file_list = nc_file_list[0] + if not isinstance(nc_file_list[0], list): + demo_file_list = [demo_file_list] + + self.data_validation(demo_file_list[0]) + self.read_in_weight_table(in_weight_table) + + conversion_factor = self.get_conversion_factor(demo_file_list[0], + len(demo_file_list)) + + # get indices of subset of data + lon_ind_all = [int(i) for i in self.dict_list[self.header_wt[2]]] + lat_ind_all = [int(j) for j in self.dict_list[self.header_wt[3]]] + + # Obtain a subset of runoff data based on the indices in the + # weight table + min_lon_ind_all = min(lon_ind_all) + max_lon_ind_all = max(lon_ind_all) + min_lat_ind_all = min(lat_ind_all) + max_lat_ind_all = max(lat_ind_all) + lon_slice = slice(min_lon_ind_all, max_lon_ind_all + 1) + lat_slice = slice(min_lat_ind_all, max_lat_ind_all + 1) + index_new = [] + + # combine inflow data + for nc_file_array_index, nc_file_array in enumerate(nc_file_list): + + index = index_list[nc_file_array_index] + + if not isinstance(nc_file_array, list): + nc_file_array = [nc_file_array] + + data_subset_all = None + for nc_file in nc_file_array: + # Validate the netcdf dataset + self.data_validation(nc_file) + + # Read the netcdf dataset + data_in_nc = Dataset(nc_file) + + # Calculate water inflows + runoff_dimension_size = \ + len(data_in_nc.variables[self.runoff_vars[0]].dimensions) + if runoff_dimension_size == 2: + # obtain subset of surface and subsurface runoff + data_subset_runoff = \ + data_in_nc.variables[self.runoff_vars[0]][ + lat_slice, lon_slice] + for var_name in self.runoff_vars[1:]: + data_subset_runoff += \ + data_in_nc.variables[var_name][ + lat_slice, lon_slice] + + # get runoff dims + len_time_subset = 1 + len_lat_subset = data_subset_runoff.shape[0] + len_lon_subset = data_subset_runoff.shape[1] + + # reshape the runoff + data_subset_runoff = data_subset_runoff.reshape( + len_lat_subset * len_lon_subset) + + elif runoff_dimension_size == 3: + # obtain subset of surface and subsurface runoff + data_subset_runoff = \ + data_in_nc.variables[self.runoff_vars[0]][ + :, lat_slice, lon_slice] + for var_name in self.runoff_vars[1:]: + data_subset_runoff += \ + data_in_nc.variables[var_name][ + :, lat_slice, lon_slice] + + # get runoff dims + len_time_subset = data_subset_runoff.shape[0] + len_lat_subset = data_subset_runoff.shape[1] + len_lon_subset = data_subset_runoff.shape[2] + # reshape the runoff + data_subset_runoff = \ + data_subset_runoff.reshape( + len_time_subset, + (len_lat_subset * len_lon_subset)) + + data_in_nc.close() + + if not index_new: + # compute new indices based on the data_subset_surface + for r in range(0, self.count): + ind_lat_orig = lat_ind_all[r] + ind_lon_orig = lon_ind_all[r] + index_new.append( + (ind_lat_orig - min_lat_ind_all) * len_lon_subset + + (ind_lon_orig - min_lon_ind_all)) + + # obtain a new subset of data + if runoff_dimension_size == 2: + data_subset_new = data_subset_runoff[index_new] + elif runoff_dimension_size == 3: + data_subset_new = data_subset_runoff[:, index_new] + + # FILTER DATA + try: + # set masked values to zero + data_subset_new = data_subset_new.filled(fill_value=0) + except AttributeError: + pass + # set negative values to zero + data_subset_new[data_subset_new < 0] = 0 + + # combine data + if data_subset_all is None: + data_subset_all = data_subset_new + else: + data_subset_all = np.add(data_subset_all, data_subset_new) + + if runoff_dimension_size == 3 and len_time_subset > 1: + inflow_data = np.zeros((len_time_subset, self.size_stream_id)) + else: + inflow_data = np.zeros(self.size_stream_id) + + pointer = 0 + for stream_index in xrange(self.size_stream_id): + npoints = int(self.dict_list[self.header_wt[4]][pointer]) + # Check if all npoints points correspond to the same streamID + if len(set(self.dict_list[self.header_wt[0]][ + pointer: (pointer + npoints)])) != 1: + print("ROW INDEX {0}".format(pointer)) + print("COMID {0}".format( + self.dict_list[self.header_wt[0]][pointer])) + raise Exception(self.error_messages[2]) + + area_sqm_npoints = \ + np.array([float(k) for k in + self.dict_list[self.header_wt[1]][ + pointer: (pointer + npoints)]]) + + # assume data is incremental + if runoff_dimension_size == 3: + data_goal = data_subset_all[:, pointer:(pointer + npoints)] + else: + data_goal = data_subset_all[pointer:(pointer + npoints)] + + if grid_type == 't255': + # A) ERA Interim Low Res (T255) - data is cumulative + # from time 3/6/9/12 + # (time zero not included, so assumed to be zero) + ro_first_half = \ + np.concatenate([data_goal[0:1, ], + np.subtract(data_goal[1:4, ], + data_goal[0:3, ])]) + # from time 15/18/21/24 + # (time restarts at time 12, assumed to be zero) + ro_second_half = \ + np.concatenate([data_goal[4:5, ], + np.subtract(data_goal[5:, ], + data_goal[4:7, ])]) + ro_stream = \ + np.multiply( + np.concatenate([ro_first_half, ro_second_half]), + area_sqm_npoints) + + else: + ro_stream = data_goal * area_sqm_npoints * \ + conversion_factor + + # filter nan + ro_stream[np.isnan(ro_stream)] = 0 + + if ro_stream.any(): + if runoff_dimension_size == 3 and len_time_subset > 1: + inflow_data[:, stream_index] = ro_stream.sum(axis=1) + else: + inflow_data[stream_index] = ro_stream.sum() + + pointer += npoints + + # only one process is allowed to write at a time to netcdf file + mp_lock.acquire() + data_out_nc = Dataset(out_nc, "a", format="NETCDF3_CLASSIC") + if runoff_dimension_size == 3 and len_time_subset > 1: + data_out_nc.variables['m3_riv'][ + index*len_time_subset:(index+1)*len_time_subset, :] = \ + inflow_data + else: + data_out_nc.variables['m3_riv'][index] = inflow_data + data_out_nc.close() + mp_lock.release() diff --git a/RAPIDpy/inflow/CreateInflowFileFromLDASRunoff.py b/RAPIDpy/inflow/CreateInflowFileFromLDASRunoff.py index 3105a26..014bd16 100644 --- a/RAPIDpy/inflow/CreateInflowFileFromLDASRunoff.py +++ b/RAPIDpy/inflow/CreateInflowFileFromLDASRunoff.py @@ -1,220 +1,54 @@ # -*- coding: utf-8 -*- -# -# CreateInflowFileFromLDASRunoff.py -# RAPIDpy -# -# Created by Alan D. Snow (adapted from CreateInflowFileFromECMWFRunoff.py). -# Copyright © 2015-2016 Alan D Snow. All rights reserved. -# License: BSD-3 Clause +""" + CreateInflowFileFromLDASRunoff.py + RAPIDpy -import netCDF4 as NET -import numpy as NUM -import os -from past.builtins import xrange + Created by Alan D. Snow, 2015 + Adapted from CreateInflowFileFromECMWFRunoff.py. + License: BSD-3-Clause +""" +from netCDF4 import Dataset -from .CreateInflowFileFromGriddedRunoff import CreateInflowFileFromGriddedRunoff +from .CreateInflowFileFromGriddedRunoff import \ + CreateInflowFileFromGriddedRunoff class CreateInflowFileFromLDASRunoff(CreateInflowFileFromGriddedRunoff): - def __init__(self, lat_dim, # "g0_lat_0", - lon_dim, # "g0_lon_1", - lat_var, # "g0_lat_0", - lon_var, # "g0_lon_1", - runoff_vars, # ["Qsb_GDS0_SFC_ave1h", "Qs_GDS0_SFC_ave1h"], - ): + """Create Inflow File From LDAS Runoff + + Base class for creating RAPID NetCDF input + of water inflow based on LDAS land surface model + runoff and previously created weight table. + """ + land_surface_model_name = "LDAS" + + def __init__(self, + lat_dim, # "g0_lat_0", + lon_dim, # "g0_lon_1", + lat_var, # "g0_lat_0", + lon_var, # "g0_lon_1", + runoff_vars): # ["Qsb_GDS0_SFC_ave1h", "Qs_GDS0_SFC_ave1h"], """Define the attributes to look for""" self.dims_oi = [lon_dim, lat_dim] self.vars_oi = [lon_var, lat_var] + runoff_vars self.runoff_vars = runoff_vars - self.length_time = {"Hourly": 1} - self.errorMessages = ["Missing Variable 'time'", - "Incorrect dimensions in the input runoff file.", - "Incorrect variables in the input runoff file.", - "Incorrect time variable in the input runoff file", - "Incorrect number of columns in the weight table", - "No or incorrect header in the weight table", - "Incorrect sequence of rows in the weight table"] super(CreateInflowFileFromLDASRunoff, self).__init__() - def dataValidation(self, in_nc): - """Check the necessary dimensions and variables in the input netcdf data""" - data_nc = NET.Dataset(in_nc) + def data_validation(self, in_nc): + """Check the necessary dimensions and variables in the + input netcdf data""" + data_nc = Dataset(in_nc) for dim in self.dims_oi: if dim not in data_nc.dimensions.keys(): data_nc.close() - raise Exception(self.errorMessages[1]) + raise Exception(self.error_messages[1]) for var in self.vars_oi: if var not in data_nc.variables.keys(): data_nc.close() - raise Exception(self.errorMessages[2]) + raise Exception(self.error_messages[2]) data_nc.close() return - - def execute(self, nc_file_list, index_list, in_weight_table, - out_nc, grid_type, mp_lock): - - """The source code of the tool.""" - if not os.path.exists(out_nc): - print("ERROR: Outfile has not been created. You need to run: " - "generateOutputInflowFile function ...") - raise Exception("ERROR: Outfile has not been created. " - "You need to run: generateOutputInflowFile function ...") - - if len(nc_file_list) != len(index_list): - print("ERROR: Number of runoff files not equal to number of indices ...") - raise Exception("ERROR: Number of runoff files not equal to number of indices ...") - - self.readInWeightTable(in_weight_table) - - # get indices of subset of data - lon_ind_all = [int(i) for i in self.dict_list[self.header_wt[2]]] - lat_ind_all = [int(j) for j in self.dict_list[self.header_wt[3]]] - - # Obtain a subset of runoff data based on the indices in the weight table - min_lon_ind_all = min(lon_ind_all) - max_lon_ind_all = max(lon_ind_all) - min_lat_ind_all = min(lat_ind_all) - max_lat_ind_all = max(lat_ind_all) - lon_slice = slice(min_lon_ind_all, max_lon_ind_all + 1) - lat_slice = slice(min_lat_ind_all, max_lat_ind_all + 1) - index_new = [] - conversion_factor = None - - # combine inflow data - for nc_file_array_index, nc_file_array in enumerate(nc_file_list): - - index = index_list[nc_file_array_index] - - if not isinstance(nc_file_array, list): - nc_file_array = [nc_file_array] - else: - nc_file_array = nc_file_array - - data_subset_all = None - - for nc_file in nc_file_array: - # Validate the netcdf dataset - self.dataValidation(nc_file) - - ''' Read the netcdf dataset''' - data_in_nc = NET.Dataset(nc_file) - - '''Calculate water inflows''' - # print("Calculating water inflows for {0} {1} ...".format(os.path.basename(nc_file) , grid_type)) - runoff_dimension_size = len(data_in_nc.variables[self.vars_oi[2]].dimensions) - if runoff_dimension_size == 2: - # obtain subset of surface and subsurface runoff - data_subset_runoff = data_in_nc.variables[self.runoff_vars[0]][lat_slice, lon_slice] - for var_name in self.runoff_vars[1:]: - data_subset_runoff += data_in_nc.variables[var_name][lat_slice, lon_slice] - - # get runoff dims - len_time_subset = 1 - len_lat_subset = data_subset_runoff.shape[0] - len_lon_subset = data_subset_runoff.shape[1] - - # reshape the runoff - data_subset_runoff = data_subset_runoff.reshape(len_lat_subset * len_lon_subset) - - elif runoff_dimension_size == 3: - # obtain subset of surface and subsurface runoff - data_subset_runoff = data_in_nc.variables[self.runoff_vars[0]][:, lat_slice, lon_slice] - for var_name in self.runoff_vars[1:]: - data_subset_runoff += data_in_nc.variables[var_name][:, lat_slice, lon_slice] - - # get runoff dims - len_time_subset = data_subset_runoff.shape[0] - len_lat_subset = data_subset_runoff.shape[1] - len_lon_subset = data_subset_runoff.shape[2] - # reshape the runoff - data_subset_runoff = data_subset_runoff.reshape(len_time_subset, - (len_lat_subset * len_lon_subset)) - - if conversion_factor == None: - # get conversion_factor - conversion_factor = 0.001 #convert from kg/m^2 (i.e. mm) to m - if "s" in data_in_nc.variables[self.vars_oi[2]].getncattr("units"): - # that means kg/m^2/s in GLDAS v1 that is 3-hr avg, so multiply - # by 3 hr (ex. 3*3600). Assumed same for others (ex. 1*3600). - # ftp://hydro1.sci.gsfc.nasa.gov/data/s4pa/GLDAS_V1/README.GLDAS.pdf - # If combining files, need to take average of these, so divide by number of files - conversion_factor *= self.simulation_time_step_seconds/len(nc_file_array) - data_in_nc.close() - - if not index_new: - # compute new indices based on the data_subset_surface - for r in range(0,self.count): - ind_lat_orig = lat_ind_all[r] - ind_lon_orig = lon_ind_all[r] - index_new.append((ind_lat_orig - min_lat_ind_all)*len_lon_subset +\ - (ind_lon_orig - min_lon_ind_all)) - - # obtain a new subset of data - if runoff_dimension_size == 2: - data_subset_new = data_subset_runoff[index_new] - elif runoff_dimension_size == 3: - data_subset_new = data_subset_runoff[:, index_new] - - # FILTER DATA - try: - # set masked values to zero - data_subset_new = data_subset_new.filled(fill_value=0) - except AttributeError: - pass - # set negative values to zero - data_subset_new[data_subset_new<0] = 0 - - # combine data - if data_subset_all is None: - data_subset_all = data_subset_new - else: - data_subset_all = NUM.add(data_subset_all, data_subset_new) - - if runoff_dimension_size == 3 and len_time_subset > 1: - inflow_data = NUM.zeros((len_time_subset, self.size_streamID)) - else: - inflow_data = NUM.zeros(self.size_streamID) - - pointer = 0 - for stream_index in xrange(self.size_streamID): - npoints = int(self.dict_list[self.header_wt[4]][pointer]) - # Check if all npoints points correspond to the same streamID - if len(set(self.dict_list[self.header_wt[0]][pointer : (pointer + npoints)])) != 1: - print("ROW INDEX {0}".format(pointer)) - print("COMID {0}".format(self.dict_list[self.header_wt[0]][pointer])) - raise Exception(self.errorMessages[2]) - - area_sqm_npoints = \ - NUM.array([float(k) for k in \ - self.dict_list[self.header_wt[1]][pointer : (pointer + npoints)]]) - - # assume data is incremental - if runoff_dimension_size == 3: - data_goal = data_subset_all[:, pointer:(pointer + npoints)] - else: - data_goal = data_subset_all[pointer:(pointer + npoints)] - - ro_stream = data_goal * area_sqm_npoints * conversion_factor - # filter nan - ro_stream[NUM.isnan(ro_stream)] = 0 - - if ro_stream.any(): - if runoff_dimension_size == 3 and len_time_subset > 1: - inflow_data[:,stream_index] = ro_stream.sum(axis=1) - else: - inflow_data[stream_index] = ro_stream.sum() - - pointer += npoints - # only one process is allowed to write at a time to netcdf file - mp_lock.acquire() - data_out_nc = NET.Dataset(out_nc, "a", format = "NETCDF3_CLASSIC") - if runoff_dimension_size == 3 and len_time_subset > 1: - data_out_nc.variables['m3_riv'][index*len_time_subset:(index+1)*len_time_subset,:] = inflow_data - else: - data_out_nc.variables['m3_riv'][index] = inflow_data - data_out_nc.close() - mp_lock.release() diff --git a/RAPIDpy/inflow/CreateInflowFileFromWRFHydroRunoff.py b/RAPIDpy/inflow/CreateInflowFileFromWRFHydroRunoff.py index 94965b0..035af0b 100644 --- a/RAPIDpy/inflow/CreateInflowFileFromWRFHydroRunoff.py +++ b/RAPIDpy/inflow/CreateInflowFileFromWRFHydroRunoff.py @@ -1,15 +1,27 @@ -'''------------------------------------------------------------------------------- - Source Name: CreateInflowFileFromWRFHydroRunoff.py - Author: Environmental Systems Research Institute Inc. - Updated by: Alan D. Snow, US Army ERDC - Description: Creates RAPID inflow file based on the WRF_Hydro land model output - and the weight table previously created. - History: Initial coding - 10/17/2014, version 1.0 - ------------------------------------------------------------------------------''' +""" + CreateInflowFileFromLDASRunoff.py + RAPIDpy + + Created by Alan D. Snow, 2016 + Adapted from CreateInflowFileFromWRFHydroRunoff.py. + License: BSD-3-Clause +""" from .CreateInflowFileFromLDASRunoff import CreateInflowFileFromLDASRunoff class CreateInflowFileFromWRFHydroRunoff(CreateInflowFileFromLDASRunoff): + """Create Inflow File From WRF-Hydro Runoff + + Base class for creating RAPID NetCDF input + of water inflow based on WRF-Hydro + runoff and previously created weight table. + + According to David Gochis, underground runoff is + "a major fraction of total river flow in most places" + """ + land_surface_model_name = "WRF-Hydro" + header_wt = ['rivid', 'area_sqm', 'west_east', 'south_north', 'npoints'] + def __init__(self, lat_dim="south_north", lon_dim="west_east", lat_var="XLAT", @@ -18,10 +30,8 @@ def __init__(self, lat_dim="south_north", subsurface_runoff_var="UDROFF"): """Define the tool (tool name is the name of the class).""" - - super(CreateInflowFileFromWRFHydroRunoff, self).__init__(lat_dim, lon_dim, lat_var, lon_var, - [surface_runoff_var, subsurface_runoff_var]) - - self.header_wt = ['rivid', 'area_sqm', 'west_east', 'south_north', 'npoints'] - # According to David Gochis, underground runoff is "a major fraction of total river flow in most places" self.dims_oi = ['Time', lat_dim, lon_dim] + + super(CreateInflowFileFromWRFHydroRunoff, self).\ + __init__(lat_dim, lon_dim, lat_var, lon_var, + [surface_runoff_var, subsurface_runoff_var]) diff --git a/RAPIDpy/inflow/__init__.py b/RAPIDpy/inflow/__init__.py index 9df5de6..a9dea21 100644 --- a/RAPIDpy/inflow/__init__.py +++ b/RAPIDpy/inflow/__init__.py @@ -1 +1,11 @@ -from .lsm_rapid_process import run_lsm_rapid_process \ No newline at end of file +# -*- coding: utf-8 -*- +""" + inflow + RAPIDpy + + This module is for generating inflow files for RAPID simulations. + + Created by: Alan D. Snow, 2015. + License: BSD 3-Clause +""" +from .lsm_rapid_process import run_lsm_rapid_process # noqa: F401 diff --git a/RAPIDpy/inflow/lsm_rapid_process.py b/RAPIDpy/inflow/lsm_rapid_process.py index d4418ad..7208bbb 100644 --- a/RAPIDpy/inflow/lsm_rapid_process.py +++ b/RAPIDpy/inflow/lsm_rapid_process.py @@ -1,12 +1,11 @@ # -*- coding: utf-8 -*- -# -# lsm_rapid_process.py -# RAPIDpy -# -# Created by Alan D. Snow. -# Copyright © 2015-2016 Alan D Snow. All rights reserved. -# License: BSD 3-Clause +""" + lsm_rapid_process.py + RAPIDpy + Created by Alan D. Snow, 2015. + License: BSD 3-Clause +""" from datetime import datetime, timedelta import multiprocessing import os @@ -21,9 +20,11 @@ # local imports from ..rapid import RAPID -from .CreateInflowFileFromERAInterimRunoff import CreateInflowFileFromERAInterimRunoff +from .CreateInflowFileFromERAInterimRunoff import \ + CreateInflowFileFromERAInterimRunoff from .CreateInflowFileFromLDASRunoff import CreateInflowFileFromLDASRunoff -from .CreateInflowFileFromWRFHydroRunoff import CreateInflowFileFromWRFHydroRunoff +from .CreateInflowFileFromWRFHydroRunoff import \ + CreateInflowFileFromWRFHydroRunoff from ..postprocess.generate_return_periods import generate_return_periods from ..postprocess.generate_seasonal_averages import generate_seasonal_averages from ..utilities import (case_insensitive_file_search, @@ -31,9 +32,9 @@ partition) -# ------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- # MULTIPROCESSING FUNCTION -# ------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- def generate_inflows_from_runoff(args): """ prepare runoff inflow file for rapid @@ -43,7 +44,7 @@ def generate_inflows_from_runoff(args): weight_table_file = args[2] grid_type = args[3] rapid_inflow_file = args[4] - RAPID_Inflow_Tool = args[5] + rapid_inflow_tool = args[5] mp_lock = args[6] time_start_all = datetime.utcnow() @@ -69,13 +70,12 @@ def generate_inflows_from_runoff(args): print(runoff_string) print("Converting inflow ...") try: - RAPID_Inflow_Tool.execute(nc_file_list=runoff_file_list, + rapid_inflow_tool.execute(nc_file_list=runoff_file_list, index_list=file_index_list, in_weight_table=weight_table_file, out_nc=rapid_inflow_file, grid_type=grid_type, - mp_lock=mp_lock, - ) + mp_lock=mp_lock) except Exception: # This prints the type, value, and stack trace of the # current exception being handled. @@ -83,12 +83,13 @@ def generate_inflows_from_runoff(args): raise time_finish_ecmwf = datetime.utcnow() - print("Time to convert inflows: {0}".format(time_finish_ecmwf-time_start_all)) + print("Time to convert inflows: {0}" + .format(time_finish_ecmwf-time_start_all)) -# ------------------------------------------------------------------------------ -# UTILITY FUNCTIONS -# ------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- +# UTILITY FUNCTIONS +# ----------------------------------------------------------------------------- DEFAULT_LSM_INPUTS = { 't255': { 'file_datetime_re_pattern': r'\d{8}', @@ -191,7 +192,6 @@ def identify_lsm_grid(lsm_grid_path): elif 'times' in dim_list: time_dim = 'times' - lat_dim_size = len(lsm_example_file.dimensions[latitude_dim]) lon_dim_size = len(lsm_example_file.dimensions[longitude_dim]) @@ -331,7 +331,7 @@ def identify_lsm_grid(lsm_grid_path): # lon = 1024 ; # lat = 512 ; lsm_file_data["description"] = "ERA Interim (T511 Grid)" - lsm_file_data["weight_file_name"]= r'weight_era_t511\.csv' + lsm_file_data["weight_file_name"] = r'weight_era_t511\.csv' lsm_file_data["model_name"] = "erai" lsm_file_data["grid_type"] = 't511' elif lat_dim_size == 161 and lon_dim_size == 320: @@ -399,9 +399,11 @@ def identify_lsm_grid(lsm_grid_path): # g0_lat_0 = 600 ; # g0_lon_1 = 1440 ; # variables - # SSRUN_GDS0_SFC_ave1h (surface), BGRUN_GDS0_SFC_ave1h (subsurface) + # SSRUN_GDS0_SFC_ave1h (surface) + # BGRUN_GDS0_SFC_ave1h (subsurface) # or - # SSRUNsfc_GDS0_SFC_ave1h (surface), BGRUNsfc_GDS0_SFC_ave1h (subsurface) + # SSRUNsfc_GDS0_SFC_ave1h (surface) + # BGRUNsfc_GDS0_SFC_ave1h (subsurface) lsm_file_data["description"] = "GLDAS" lsm_file_data["weight_file_name"] = r'weight_gldas\.csv' lsm_file_data["grid_type"] = 'gldas' @@ -466,48 +468,58 @@ def identify_lsm_grid(lsm_grid_path): return lsm_file_data -def determine_start_end_timestep(lsm_file_list, file_re_match=None, file_datetime_pattern=None, - expected_time_step=None, lsm_grid_info=None): +def determine_start_end_timestep(lsm_file_list, + file_re_match=None, + file_datetime_pattern=None, + expected_time_step=None, + lsm_grid_info=None): """ Determine the start and end date from LSM input files """ - if lsm_grid_info is None: lsm_grid_info = identify_lsm_grid(lsm_file_list[0]) if None in (lsm_grid_info['time_var'], lsm_grid_info['time_dim'])\ or lsm_grid_info['model_name'] in ('era_20cm', 'erai'): - # NOTE: the ERA20CM and ERA 24hr time variables in the tests are erroneous + # NOTE: the ERA20CM and ERA 24hr time variables + # in the tests are erroneous if None in (file_re_match, file_datetime_pattern): - raise ValueError("LSM files missing time dimension and/or variable." - "To mitigate this, add the 'file_re_match' and " - "'file_datetime_pattern' arguments.") + raise ValueError("LSM files missing time dimension and/or " + "variable.To mitigate this, add the " + "'file_re_match' and 'file_datetime_pattern' " + "arguments.") if lsm_grid_info['time_dim'] is None: print("Assuming time dimension is 1") file_size_time = 1 else: lsm_example_file = Dataset(lsm_file_list[0]) - file_size_time = len(lsm_example_file.dimensions[lsm_grid_info['time_dim']]) + file_size_time = \ + len(lsm_example_file.dimensions[lsm_grid_info['time_dim']]) lsm_example_file.close() total_num_time_steps = int(file_size_time * len(lsm_file_list)) # determine the start time from the existing files - actual_simulation_start_datetime = datetime.strptime(file_re_match.search(lsm_file_list[0]).group(0), - file_datetime_pattern) + actual_simulation_start_datetime = \ + datetime.strptime(file_re_match.search(lsm_file_list[0]).group(0), + file_datetime_pattern) # check to see if the time step matches expected if len(lsm_file_list) > 1: - time_step = int((datetime.strptime(file_re_match.search(lsm_file_list[1]).group(0), file_datetime_pattern) - - actual_simulation_start_datetime).total_seconds() - / float(file_size_time)) + time_step = \ + int((datetime.strptime( + file_re_match.search(lsm_file_list[1]).group(0), + file_datetime_pattern) - + actual_simulation_start_datetime).total_seconds() + / float(file_size_time)) elif expected_time_step is not None: time_step = int(expected_time_step) else: raise ValueError("Only one LSM file with one timestep present. " - "'expected_time_step' parameter required to continue.") + "'expected_time_step' parameter required to " + "continue.") # determine the end datetime actual_simulation_end_datetime = \ @@ -523,7 +535,8 @@ def determine_start_end_timestep(lsm_file_list, file_re_match=None, file_datetim lon_dim=lsm_grid_info['longitude_dim'], time_dim=lsm_grid_info['time_dim']) as xds: - datetime_arr = [pd.to_datetime(dval) for dval in xds.lsm.datetime.values] + datetime_arr = [pd.to_datetime(dval) for dval in + xds.lsm.datetime.values] actual_simulation_start_datetime = datetime_arr[0] actual_simulation_end_datetime = datetime_arr[-1] total_num_time_steps = len(datetime_arr) @@ -532,18 +545,21 @@ def determine_start_end_timestep(lsm_file_list, file_re_match=None, file_datetim if expected_time_step is not None: time_step = int(expected_time_step) else: - raise ValueError("Only one LSM file with one timestep present. " - "'expected_time_step' parameter required to continue.") - + raise ValueError("Only one LSM file with one timestep " + "present. 'expected_time_step' parameter " + "required to continue.") else: - time_step = int(np.diff(xds.lsm.datetime.values)[0] / np.timedelta64(1, 's')) + time_step = int(np.diff(xds.lsm.datetime.values)[0] + / np.timedelta64(1, 's')) if expected_time_step is not None: if time_step != int(expected_time_step): - print("WARNING: The time step used {0} is different than expected {1}".format(time_step, - expected_time_step)) + print("WARNING: The time step used {0} is different than " + "expected {1}".format(time_step, expected_time_step)) + + return (actual_simulation_start_datetime, actual_simulation_end_datetime, + time_step, total_num_time_steps) - return actual_simulation_start_datetime, actual_simulation_end_datetime, time_step, total_num_time_steps # ------------------------------------------------------------------------------ # MAIN PROCESS @@ -558,7 +574,7 @@ def run_lsm_rapid_process(rapid_executable_location, file_datetime_pattern=None, file_datetime_re_pattern=None, initial_flows_file=None, - ensemble_list=[None], + ensemble_list=(None,), generate_rapid_namelist_file=True, run_rapid_simulation=True, generate_return_periods_file=False, @@ -570,42 +586,97 @@ def run_lsm_rapid_process(rapid_executable_location, num_processors=1, mpiexec_command="mpiexec", cygwin_bin_location="", - modeling_institution="US Army Engineer Research and Development Center", + modeling_institution="US Army Engineer Research " + "and Development Center", convert_one_hour_to_three=False, - expected_time_step=None, - ): + expected_time_step=None): + # pylint: disable=anomalous-backslash-in-string """ This is the main process to generate inflow for RAPID and to run RAPID. - Args: - rapid_executable_location(str): Path to the RAPID executable. - lsm_data_location(str): Path to the directory containing the Land Surface Model output files. - rapid_io_files_location(Optional[str]): Path to the directory containing the input and output folders for RAPID. This is for running multiple watersheds. - rapid_input_location(Optional[str]): Path to directory with RAPID simulation input data. Required if `rapid_io_files_location` is not set. - rapid_output_location(Optional[str]): Path to directory to put output. Required if `rapid_io_files_location` is not set. - simulation_start_datetime(Optional[datetime]): Datetime object with date bound of earliest simulation start. - simulation_end_datetime(Optional[datetime]): Datetime object with date bound of latest simulation end. Defaults to datetime.utcnow(). - file_datetime_pattern(Optional[str]): Datetime pattern for files (Ex. '%Y%m%d%H'). If set, file_datetime_re_pattern is required. Various defaults used by each model. - file_datetime_re_pattern(Optional[raw str]): Regex pattern to extract datetime (Ex. r'\d{10}'). If set, file_datetime_pattern is required. Various defaults used by each model. - initial_flows_file(Optional[str]): If given, this is the path to a file with initial flows for the simulaion. - ensemble_list(Optional[list]): This is the expexted ensemble name appended to the end of the file name. - generate_rapid_namelist_file(Optional[bool]): If True, this will create a RAPID namelist file for the run in your RAPID input directory. Default is True. - run_rapid_simulation(Optional[bool]): If True, the RAPID simulation will run after generating the inflow file. Default is True. - generate_return_periods_file(Optional[bool]): If True, the return period file will be generated in the output. Default is False. - return_period_method(Optional[str]): If True, the return period file will be generated in the output. Default is False. - generate_seasonal_averages_file(Optional[bool]): If True, the season average file will be generated. Default is False. - generate_seasonal_initialization_file(Optional[bool]): If True, an intialization based on the seasonal average for the current day of the year will be created. Default is False. - generate_initialization_file(Optional[bool]): If True, an initialization file from the last time step of the simulation willl be created. Default is False. - use_all_processors(Optional[bool]): If True, it will use all available processors to perform this operation. Default is True. - num_processors(Optional[int]): If use_all_processors is False, this argument will determine the number of processors to use. Default is 1. - mpiexec_command(Optional[str]): This is the command to execute RAPID. Default is "mpiexec". - cygwin_bin_location(Optional[str]): If using Windows, this is the path to the Cygwin bin location. Default is "". - modeling_institution(Optional[str]): This is the institution performing the modeling and is in the output files. Default is "US Army Engineer Research and Development Center". - convert_one_hour_to_three(Optional[bool]): If the time step is expected to be 1-hr it will convert to 3. Set to False if the LIS, NLDAS, or Joules grid time step is greater than 1-hr. - expected_time_step(Optional[int]): The time step in seconds of your LSM input data if only one file is given. Required if only one file is present. - - Returns: - list: A list of output file information. + Parameters + ---------- + rapid_executable_location: str + Path to the RAPID executable. + lsm_data_location: str + Path to the directory containing the Land Surface Model output files. + rapid_io_files_location: str, optional + Path to the directory containing the input and output folders for + RAPID. This is for running multiple watersheds. + rapid_input_location: str, optional + Path to directory with RAPID simulation input data. + Required if `rapid_io_files_location` is not set. + rapid_output_location: str, optional + Path to directory to put output. Required if + `rapid_io_files_location` is not set. + simulation_start_datetime: datetime, optional + Datetime object with date bound of earliest simulation start. + simulation_end_datetime: datetime, optional + Datetime object with date bound of latest simulation end. + Defaults to :obj:`datetime.utcnow`. + file_datetime_pattern: str, optional + Datetime pattern for files (Ex. '%Y%m%d%H'). If set, + `file_datetime_re_pattern` is required. + Various defaults used by each model. + file_datetime_re_pattern: raw str, optional + Regex pattern to extract datetime (Ex. r'\d{10}'). + If set, `file_datetime_pattern` is required. + Various defaults used by each model. + initial_flows_file: str, optional + If given, this is the path to a file with initial flows + for the simulaion. + ensemble_list: list, optional + This is the expexted ensemble name appended to the end of the + file name. + generate_rapid_namelist_file: bool, optional + If True, this will create a RAPID namelist file for the run in + your RAPID input directory. Default is True. + run_rapid_simulation: bool, optional + If True, the RAPID simulation will run after generating the + inflow file. Default is True. + generate_return_periods_file: bool, optional + If True, the return period file will be generated in the output. + Default is False. + return_period_method: str, optional + If True, the return period file will be generated in the output. + Default is False. + generate_seasonal_averages_file: bool, optional + If True, the season average file will be generated. Default is False. + generate_seasonal_initialization_file: bool, optional + If True, an intialization based on the seasonal average for the + current day of the year will be created. Default is False. + generate_initialization_file: bool, optional + If True, an initialization file from the last time step of the + simulation willl be created. Default is False. + use_all_processors: bool, optional + If True, it will use all available processors to perform this + operation. Default is True. + num_processors: int, optional + If use_all_processors is False, this argument will determine the + number of processors to use. Default is 1. + mpiexec_command: str, optional + This is the command to execute RAPID. Default is "mpiexec". + cygwin_bin_location: str, optional + If using Windows, this is the path to the Cygwin bin location. + Default is "". + modeling_institution: str, optional + This is the institution performing the modeling and is in the + output files. + Default is "US Army Engineer Research and Development Center". + convert_one_hour_to_three: bool, optional + If the time step is expected to be 1-hr it will convert to 3. + Set to False if the LIS, NLDAS, or Joules grid time step is + greater than 1-hr. + expected_time_step: int, optional + The time step in seconds of your LSM input data if only one file + is given. Required if only one file is present. + + + Returns + ------- + list: + A list of output file information. + Example of regular run: @@ -613,15 +684,12 @@ def run_lsm_rapid_process(rapid_executable_location, from datetime import datetime from RAPIDpy.inflow import run_lsm_rapid_process - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - run_lsm_rapid_process( - rapid_executable_location='/home/alan/rapid/src/rapid', - rapid_io_files_location='/home/alan/rapid-io', - lsm_data_location='/home/alan/era_data', - ) + + run_lsm_rapid_process( + rapid_executable_location='/home/alan/rapid/src/rapid', + rapid_io_files_location='/home/alan/rapid-io', + lsm_data_location='/home/alan/era_data', + ) Example of single input/output run: @@ -629,16 +697,13 @@ def run_lsm_rapid_process(rapid_executable_location, from datetime import datetime from RAPIDpy.inflow import run_lsm_rapid_process - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - run_lsm_rapid_process( - rapid_executable_location='/home/alan/rapid/src/rapid', - rapid_input_location='/home/alan/rapid-io/input/provo_watershed', - rapid_output_location='/home/alan/rapid-io/output/provo_watershed', - lsm_data_location='/home/alan/era_data', - ) + + run_lsm_rapid_process( + rapid_executable_location='/home/alan/rapid/src/rapid', + rapid_input_location='/home/alan/rapid-io/input/provo_watershed', + rapid_output_location='/home/alan/rapid-io/output/provo_watershed', + lsm_data_location='/home/alan/era_data', + ) Example of run with FLDAS and datetime filter: @@ -649,18 +714,15 @@ def run_lsm_rapid_process(rapid_executable_location, from datetime import datetime from RAPIDpy.inflow import run_lsm_rapid_process - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - run_lsm_rapid_process( - rapid_executable_location='/home/alan/rapid/src/rapid', - rapid_io_files_location='/home/alan/rapid-io', - lsm_data_location='/home/alan/lsm_data', - simulation_start_datetime=datetime(1980, 1, 1), - file_datetime_re_pattern = r'\d{8}', - file_datetime_pattern = "%Y%m%d", - ) + + run_lsm_rapid_process( + rapid_executable_location='/home/alan/rapid/src/rapid', + rapid_io_files_location='/home/alan/rapid-io', + lsm_data_location='/home/alan/lsm_data', + simulation_start_datetime=datetime(1980, 1, 1), + file_datetime_re_pattern = r'\d{8}', + file_datetime_pattern = "%Y%m%d", + ) Example of run with CMIP5: @@ -672,67 +734,70 @@ def run_lsm_rapid_process(rapid_executable_location, from datetime import datetime from RAPIDpy.inflow import run_lsm_rapid_process - #------------------------------------------------------------------------------ - #main process - #------------------------------------------------------------------------------ - if __name__ == "__main__": - run_lsm_rapid_process( - rapid_executable_location='/home/jimwlewis/rapid/src/rapid', - rapid_io_files_location='/data/rapid-io4', - lsm_data_location='/data/rapid-io4/input/cmip5-jun01', - simulation_start_datetime=datetime(2001, 1, 1), - simulation_end_datetime=datetime(2002, 12, 31), - file_datetime_pattern="%Y", - file_datetime_re_pattern=r'\d{4}', - ) - """ + + run_lsm_rapid_process( + rapid_executable_location='/home/jimwlewis/rapid/src/rapid', + rapid_io_files_location='/data/rapid-io4', + lsm_data_location='/data/rapid-io4/input/cmip5-jun01', + simulation_start_datetime=datetime(2001, 1, 1), + simulation_end_datetime=datetime(2002, 12, 31), + file_datetime_pattern="%Y", + file_datetime_re_pattern=r'\d{4}', + ) + """ # noqa time_begin_all = datetime.utcnow() # use all processors makes precedent over num_processors arg - if use_all_processors == True: - NUM_CPUS = multiprocessing.cpu_count() + if use_all_processors is True: + num_cpus = multiprocessing.cpu_count() elif num_processors > multiprocessing.cpu_count(): print("WARNING: Num processors requested exceeded max. Set to max ...") - NUM_CPUS = multiprocessing.cpu_count() + num_cpus = multiprocessing.cpu_count() else: - NUM_CPUS = num_processors - - # get list of correclty formatted rapid input directories in rapid directory + num_cpus = num_processors + # get list of correctly formatted rapid input directories in + # rapid directory rapid_directories = [] if rapid_io_files_location is not None: - main_rapid_input_directory = os.path.join(rapid_io_files_location, 'input') - for watershed_directory in get_valid_directory_list(main_rapid_input_directory): + main_rapid_input_directory = os.path.join(rapid_io_files_location, + 'input') + for watershed_directory in \ + get_valid_directory_list(main_rapid_input_directory): watershed_input_path = os.path.join(main_rapid_input_directory, watershed_directory) watershed_output_path = os.path.join(rapid_io_files_location, 'output', watershed_directory) - rapid_directories.append((watershed_input_path, watershed_output_path)) + rapid_directories.append( + (watershed_input_path, watershed_output_path)) elif None not in (rapid_input_location, rapid_output_location): rapid_directories = [(rapid_input_location, rapid_output_location)] else: - raise ValueError("Need 'rapid_io_files_location' or 'rapid_input_location' " - "and 'rapid_output_location' set to continue.") + raise ValueError("Need 'rapid_io_files_location' or " + "'rapid_input_location' and 'rapid_output_location'" + " set to continue.") all_output_file_information = [] - for ensemble in ensemble_list: output_file_information = { - 'ensemble' : ensemble, + 'ensemble': ensemble, } ensemble_file_ending = ".nc" ensemble_file_ending4 = ".nc4" - if ensemble != None: + if ensemble is not None: ensemble_file_ending = "_{0}.nc".format(ensemble) ensemble_file_ending4 = "_{0}.nc4".format(ensemble) # get list of files lsm_file_list = [] - for subdir, dirs, files in os.walk(lsm_data_location, followlinks=True): - for lsm_file in files: - if lsm_file.endswith(ensemble_file_ending) or lsm_file.endswith(ensemble_file_ending4): - lsm_file_list.append(os.path.join(subdir, lsm_file)) + for walkdir_info in os.walk(lsm_data_location, + followlinks=True): + for lsm_file in walkdir_info[2]: + if lsm_file.endswith(ensemble_file_ending) or \ + lsm_file.endswith(ensemble_file_ending4): + lsm_file_list.append( + os.path.join(walkdir_info[0], lsm_file)) lsm_file_list = sorted(lsm_file_list) # IDENTIFY THE GRID @@ -740,8 +805,12 @@ def run_lsm_rapid_process(rapid_executable_location, # load in the datetime pattern if file_datetime_pattern is None or file_datetime_re_pattern is None: - file_datetime_re_pattern = DEFAULT_LSM_INPUTS[lsm_file_data['grid_type']]['file_datetime_re_pattern'] - file_datetime_pattern = DEFAULT_LSM_INPUTS[lsm_file_data['grid_type']]['file_datetime_pattern'] + file_datetime_re_pattern = \ + DEFAULT_LSM_INPUTS[lsm_file_data['grid_type']][ + 'file_datetime_re_pattern'] + file_datetime_pattern = \ + DEFAULT_LSM_INPUTS[lsm_file_data['grid_type']][ + 'file_datetime_pattern'] file_re_match = re.compile(file_datetime_re_pattern) # get subset based on time bounds @@ -750,11 +819,12 @@ def run_lsm_rapid_process(rapid_executable_location, lsm_file_list_subset = [] for lsm_file in lsm_file_list: match = file_re_match.search(lsm_file) - file_date = datetime.strptime(match.group(0), file_datetime_pattern) + file_date = datetime.strptime(match.group(0), + file_datetime_pattern) if file_date > simulation_end_datetime: break if file_date >= simulation_start_datetime: - lsm_file_list_subset.append(os.path.join(subdir, lsm_file)) + lsm_file_list_subset.append(lsm_file) lsm_file_list = sorted(lsm_file_list_subset) @@ -762,33 +832,38 @@ def run_lsm_rapid_process(rapid_executable_location, lsm_file_list[-1])) # get number of time steps in file - actual_simulation_start_datetime, actual_simulation_end_datetime, time_step, total_num_time_steps = \ - determine_start_end_timestep(lsm_file_list, - file_re_match=file_re_match, - file_datetime_pattern=file_datetime_pattern, - expected_time_step=expected_time_step, - lsm_grid_info=lsm_file_data) + actual_simulation_start_datetime, actual_simulation_end_datetime, \ + time_step, total_num_time_steps = \ + determine_start_end_timestep( + lsm_file_list, + file_re_match=file_re_match, + file_datetime_pattern=file_datetime_pattern, + expected_time_step=expected_time_step, + lsm_grid_info=lsm_file_data) # VALIDATING INPUT IF DIVIDING BY 3 - time_step_multiply_factor = 1 - if (lsm_file_data['grid_type'] in ('nldas', 'lis', 'joules')) and convert_one_hour_to_three: + if (lsm_file_data['grid_type'] in ('nldas', 'lis', 'joules')) \ + and convert_one_hour_to_three: num_extra_files = total_num_time_steps % 3 if num_extra_files != 0: - print("WARNING: Number of files needs to be divisible by 3. Remainder is {0}".format(num_extra_files)) + print("WARNING: Number of files needs to be divisible by 3. " + "Remainder is {0}".format(num_extra_files)) print("This means your simulation will be truncated") total_num_time_steps /= 3 time_step *= 3 # compile the file ending - out_file_ending = "{0}_{1}_{2}hr_{3:%Y%m%d}to{4:%Y%m%d}{5}".format(lsm_file_data['model_name'], - lsm_file_data['grid_type'], - int(time_step/3600), - actual_simulation_start_datetime, - actual_simulation_end_datetime, - ensemble_file_ending) + out_file_ending = "{0}_{1}_{2}hr_{3:%Y%m%d}to{4:%Y%m%d}{5}"\ + .format(lsm_file_data['model_name'], + lsm_file_data['grid_type'], + int(time_step/3600), + actual_simulation_start_datetime, + actual_simulation_end_datetime, + ensemble_file_ending) # run LSM processes - for master_watershed_input_directory, master_watershed_output_directory in rapid_directories: + for master_watershed_input_directory, \ + master_watershed_output_directory in rapid_directories: print("Running from: {0}".format(master_watershed_input_directory)) try: os.makedirs(master_watershed_output_directory) @@ -796,78 +871,92 @@ def run_lsm_rapid_process(rapid_executable_location, pass # create inflow to dump data into - master_rapid_runoff_file = os.path.join(master_watershed_output_directory, - 'm3_riv_bas_{0}'.format(out_file_ending)) + master_rapid_runoff_file = \ + os.path.join(master_watershed_output_directory, + 'm3_riv_bas_{0}'.format(out_file_ending)) - weight_table_file = case_insensitive_file_search(master_watershed_input_directory, - lsm_file_data['weight_file_name']) + weight_table_file = \ + case_insensitive_file_search(master_watershed_input_directory, + lsm_file_data['weight_file_name']) try: - in_rivid_lat_lon_z_file = case_insensitive_file_search(master_watershed_input_directory, - r'comid_lat_lon_z\.csv') - except Exception: + in_rivid_lat_lon_z_file = \ + case_insensitive_file_search( + master_watershed_input_directory, + r'comid_lat_lon_z\.csv') + except IndexError: in_rivid_lat_lon_z_file = "" - print("WARNING: comid_lat_lon_z file not found. The lat/lon will not be added ...") - pass + print("WARNING: comid_lat_lon_z file not found." + " The lat/lon will not be added ...") - print("Writing inflow file to: {0}".format(master_rapid_runoff_file)) + print("Writing inflow file to: {0}" + .format(master_rapid_runoff_file)) lsm_file_data['rapid_inflow_tool'].generateOutputInflowFile( out_nc=master_rapid_runoff_file, start_datetime_utc=actual_simulation_start_datetime, number_of_timesteps=total_num_time_steps, simulation_time_step_seconds=time_step, - in_rapid_connect_file=case_insensitive_file_search(master_watershed_input_directory, - r'rapid_connect\.csv'), + in_rapid_connect_file=case_insensitive_file_search( + master_watershed_input_directory, + r'rapid_connect\.csv'), in_rivid_lat_lon_z_file=in_rivid_lat_lon_z_file, land_surface_model_description=lsm_file_data['description'], modeling_institution=modeling_institution ) job_combinations = [] - if (lsm_file_data['grid_type'] in ('nldas', 'lis', 'joules')) and convert_one_hour_to_three: - print("Grouping {0} in threes".format(lsm_file_data['grid_type'])) + if (lsm_file_data['grid_type'] in ('nldas', 'lis', 'joules')) \ + and convert_one_hour_to_three: + print("Grouping {0} in threes" + .format(lsm_file_data['grid_type'])) lsm_file_list = [lsm_file_list[nldas_index:nldas_index+3] - for nldas_index in range(0, len(lsm_file_list), 3) - if len(lsm_file_list[nldas_index:nldas_index+3]) == 3] - - if len(lsm_file_list) < NUM_CPUS: - NUM_CPUS = len(lsm_file_list) + for nldas_index in + range(0, len(lsm_file_list), 3) + if len(lsm_file_list[ + nldas_index:nldas_index+3]) == 3] + + if len(lsm_file_list) < num_cpus: + num_cpus = len(lsm_file_list) + # pylint: disable=no-member mp_lock = multiprocessing.Manager().Lock() - partition_list, partition_index_list = partition(lsm_file_list, NUM_CPUS) + partition_list, partition_index_list = \ + partition(lsm_file_list, num_cpus) for loop_index, cpu_grouped_file_list in enumerate(partition_list): if cpu_grouped_file_list and partition_index_list[loop_index]: - job_combinations.append((cpu_grouped_file_list, - partition_index_list[loop_index], - weight_table_file, - lsm_file_data['grid_type'], - master_rapid_runoff_file, - lsm_file_data['rapid_inflow_tool'], - mp_lock)) - # COMMENTED CODE IS FOR DEBUGGING -# generate_inflows_from_runoff((cpu_grouped_file_list, -# partition_index_list[loop_index], -# lsm_file_data['weight_table_file'], -# lsm_file_data['grid_type'], -# master_rapid_runoff_file, -# lsm_file_data['rapid_inflow_tool'], -# mp_lock)) - pool = multiprocessing.Pool(NUM_CPUS) + job_combinations.append(( + cpu_grouped_file_list, + partition_index_list[loop_index], + weight_table_file, + lsm_file_data['grid_type'], + master_rapid_runoff_file, + lsm_file_data['rapid_inflow_tool'], + mp_lock)) +# # COMMENTED CODE IS FOR DEBUGGING +# generate_inflows_from_runoff(( +# cpu_grouped_file_list, +# partition_index_list[loop_index], +# lsm_file_data['weight_table_file'], +# lsm_file_data['grid_type'], +# master_rapid_runoff_file, +# lsm_file_data['rapid_inflow_tool'], +# mp_lock)) + pool = multiprocessing.Pool(num_cpus) pool.map(generate_inflows_from_runoff, job_combinations) pool.close() pool.join() # set up RAPID manager - rapid_manager = RAPID(rapid_executable_location=rapid_executable_location, - cygwin_bin_location=cygwin_bin_location, - num_processors=NUM_CPUS, - mpiexec_command=mpiexec_command, - ZS_TauR=time_step, # duration of routing procedure (time step of runoff data) - ZS_dtR=15 * 60, # internal routing time step - ZS_TauM=total_num_time_steps * time_step, # total simulation time - ZS_dtM=time_step # RAPID recommended internal time step (1 day) - ) + rapid_manager = RAPID( + rapid_executable_location=rapid_executable_location, + cygwin_bin_location=cygwin_bin_location, + num_processors=num_cpus, + mpiexec_command=mpiexec_command, + ZS_TauR=time_step, + ZS_dtR=15 * 60, + ZS_TauM=total_num_time_steps * time_step, + ZS_dtM=time_step) if initial_flows_file and os.path.exists(initial_flows_file): rapid_manager.update_parameters( @@ -876,73 +965,94 @@ def run_lsm_rapid_process(rapid_executable_location, ) # run RAPID for the watershed - lsm_rapid_output_file = os.path.join(master_watershed_output_directory, - 'Qout_{0}'.format(out_file_ending)) + lsm_rapid_output_file = \ + os.path.join(master_watershed_output_directory, + 'Qout_{0}'.format(out_file_ending)) rapid_manager.update_parameters( - rapid_connect_file=case_insensitive_file_search(master_watershed_input_directory, - r'rapid_connect\.csv'), + rapid_connect_file=case_insensitive_file_search( + master_watershed_input_directory, + r'rapid_connect\.csv'), Vlat_file=master_rapid_runoff_file, - riv_bas_id_file=case_insensitive_file_search(master_watershed_input_directory, - r'riv_bas_id\.csv'), - k_file=case_insensitive_file_search(master_watershed_input_directory, - r'k\.csv'), - x_file=case_insensitive_file_search(master_watershed_input_directory, - r'x\.csv'), + riv_bas_id_file=case_insensitive_file_search( + master_watershed_input_directory, + r'riv_bas_id\.csv'), + k_file=case_insensitive_file_search( + master_watershed_input_directory, + r'k\.csv'), + x_file=case_insensitive_file_search( + master_watershed_input_directory, + r'x\.csv'), Qout_file=lsm_rapid_output_file ) rapid_manager.update_reach_number_data() - - output_file_information[os.path.basename(master_watershed_input_directory)] = { - 'm3_riv': master_rapid_runoff_file, - 'qout': lsm_rapid_output_file - } - + output_file_information[ + os.path.basename(master_watershed_input_directory)] = { + 'm3_riv': master_rapid_runoff_file, + 'qout': lsm_rapid_output_file + } if generate_rapid_namelist_file: - rapid_manager.generate_namelist_file(os.path.join(master_watershed_input_directory, - "rapid_namelist_{}".format(out_file_ending[:-3]))) + rapid_manager.generate_namelist_file( + os.path.join(master_watershed_input_directory, + "rapid_namelist_{}" + .format(out_file_ending[:-3]))) if run_rapid_simulation: rapid_manager.run() - rapid_manager.make_output_CF_compliant( + rapid_manager.make_output_cf_compliant( simulation_start_datetime=actual_simulation_start_datetime, comid_lat_lon_z_file=in_rivid_lat_lon_z_file, - project_name="{0} Based Historical flows by {1}".format(lsm_file_data['description'], - modeling_institution) + project_name="{0} Based Historical flows by {1}" + .format(lsm_file_data['description'], + modeling_institution) ) # generate return periods - if generate_return_periods_file and os.path.exists(lsm_rapid_output_file) and lsm_rapid_output_file: - return_periods_file = os.path.join(master_watershed_output_directory, - 'return_periods_{0}'.format(out_file_ending)) + if generate_return_periods_file and \ + os.path.exists(lsm_rapid_output_file) and \ + lsm_rapid_output_file: + return_periods_file = os.path.join( + master_watershed_output_directory, + 'return_periods_{0}'.format(out_file_ending)) # assume storm has 3 day length storm_length_days = 3 - generate_return_periods(qout_file=lsm_rapid_output_file, - return_period_file=return_periods_file, - num_cpus=NUM_CPUS, - storm_duration_days=storm_length_days, - method=return_period_method) - + generate_return_periods( + qout_file=lsm_rapid_output_file, + return_period_file=return_periods_file, + num_cpus=num_cpus, + storm_duration_days=storm_length_days, + method=return_period_method) + # generate seasonal averages file - if generate_seasonal_averages_file and os.path.exists(lsm_rapid_output_file) and lsm_rapid_output_file: - seasonal_averages_file = os.path.join(master_watershed_output_directory, - 'seasonal_averages_{0}'.format(out_file_ending)) + if generate_seasonal_averages_file and \ + os.path.exists(lsm_rapid_output_file) and \ + lsm_rapid_output_file: + seasonal_averages_file = os.path.join( + master_watershed_output_directory, + 'seasonal_averages_{0}'.format(out_file_ending)) generate_seasonal_averages(lsm_rapid_output_file, seasonal_averages_file, - NUM_CPUS) + num_cpus) # generate seasonal initialization file - if generate_seasonal_initialization_file and os.path.exists(lsm_rapid_output_file) and lsm_rapid_output_file: - seasonal_qinit_file = os.path.join(master_watershed_input_directory, - 'seasonal_qinit_{0}.csv'.format(out_file_ending[:-3])) - rapid_manager.generate_seasonal_intitialization(seasonal_qinit_file) + if generate_seasonal_initialization_file and \ + os.path.exists(lsm_rapid_output_file) and \ + lsm_rapid_output_file: + seasonal_qinit_file = os.path.join( + master_watershed_input_directory, + 'seasonal_qinit_{0}.csv'.format(out_file_ending[:-3])) + rapid_manager.generate_seasonal_intitialization( + seasonal_qinit_file) # generate initialization file - if generate_initialization_file and os.path.exists(lsm_rapid_output_file) and lsm_rapid_output_file: - qinit_file = os.path.join(master_watershed_input_directory, - 'qinit_{0}.csv'.format(out_file_ending[:-3])) + if generate_initialization_file and \ + os.path.exists(lsm_rapid_output_file) and \ + lsm_rapid_output_file: + qinit_file = os.path.join( + master_watershed_input_directory, + 'qinit_{0}.csv'.format(out_file_ending[:-3])) rapid_manager.generate_qinit_from_past_qout(qinit_file) all_output_file_information.append(output_file_information) @@ -953,4 +1063,4 @@ def run_lsm_rapid_process(rapid_executable_location, print("Time Finish All: {0}".format(time_end)) print("TOTAL TIME: {0}".format(time_end-time_begin_all)) - return all_output_file_information \ No newline at end of file + return all_output_file_information diff --git a/RAPIDpy/postprocess/__init__.py b/RAPIDpy/postprocess/__init__.py index 74911ff..d7bb95b 100644 --- a/RAPIDpy/postprocess/__init__.py +++ b/RAPIDpy/postprocess/__init__.py @@ -1,3 +1,10 @@ # -*- coding: utf-8 -*- -from .goodness_of_fit import find_goodness_of_fit, find_goodness_of_fit_csv -from .merge import ConvertRAPIDOutputToCF +""" + RAPIDpy.postprocess + + Created by Alan D Snow, 2016. + Based on RAPID_Toolbox for ArcMap + License: BSD 3-Clause +""" +from .goodness_of_fit import find_goodness_of_fit, find_goodness_of_fit_csv # noqa +from .merge import ConvertRAPIDOutputToCF # noqa diff --git a/RAPIDpy/postprocess/generate_return_periods.py b/RAPIDpy/postprocess/generate_return_periods.py index 59bfe56..44a6094 100644 --- a/RAPIDpy/postprocess/generate_return_periods.py +++ b/RAPIDpy/postprocess/generate_return_periods.py @@ -1,51 +1,55 @@ # -*- coding: utf-8 -*- -## -## generate_return_periods.py -## RAPIDpy -## -## Created by Alan D. Snow and Scott D. Christensen. -## Copyright © 2015-2016 Alan D Snow and Scott D. Christensen. All rights reserved. -## License: BSD-3 Clause +""" + generate_return_periods.py + RAPIDpy + Created by: Alan D. Snow and Scott D. Christensen, 2015-2016. + License: BSD 3-Clause +""" from datetime import datetime import multiprocessing -import netCDF4 as nc +from netCDF4 import Dataset import numpy as np -#local +# local from ..dataset import RAPIDDataset +from ..helper_functions import add_latlon_metadata, log from ..utilities import partition + def generate_single_return_period(args): """ This function calculates a single return period for a single reach """ - qout_file=args[0] - return_period_file=args[1] - rivid_index_list=args[2] - step=args[3] - num_years=args[4] - method=args[5] - mp_lock=args[6] - - skewvals = [-3.0, -2.8, -2.6, -2.4, -2.2, -2.0, -1.8, -1.6, -1.4, -1.2, -1.0, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, - 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0] - kfac2 = [0.396, 0.384, 0.368, 0.351, 0.33, 0.307, 0.282, 0.254, 0.225, 0.195, 0.164, 0.132, 0.099, 0.066, 0.033, 0, - -0.033, -0.066, -0.099, -0.132, -0.164, -0.195, -0.225, -0.254, -0.282, -0.307, -0.33, -0.351, -0.368, - -0.384, -0.396] - kfac10 = [0.66, 0.702, 0.747, 0.795, 0.844, 0.895, 0.945, 0.994, 1.041, 1.086, 1.128, 1.166, 1.2, 1.231, 1.258, - 1.282, 1.301, 1.317, 1.328, 1.336, 1.34, 1.34, 1.337, 1.329, 1.318, 1.302, 1.284, 1.262, 1.238, 1.21, - 1.18] - kfac25 = [.666, .712, .764, .823, .888, .959, 1.035, 1.116, 1.198, 1.282, 1.366, 1.448, 1.528, 1.606, 1.680, 1.751, - 1.818, 1.880, 1.939, 1.993, 2.043, 2.087, 2.128, 2.163, 2.193, 2.219, 2.240, 2.256, 2.267, 2.275, 2.278] - kfac50 = [0.666, 0.714, 0.768, 0.83, 0.9, 0.98, 1.069, 1.166, 1.27, 1.379, 1.492, 1.606, 1.72, 1.834, 1.945, 2.054, - 2.159, 2.261, 2.359, 2.453, 2.542, 2.626, 2.706, 2.78, 2.848, 2.912, 2.97, 3.023, 3.071, 3.114, 3.152] - kfac100 = [0.667, 0.714, 0.769, 0.832, 0.905, 0.99, 1.087, 1.197, 1.318, 1.499, 1.588, 1.733, 1.88, 2.029, 2.178, - 2.326, 2.472, 2.615, 2.755, 2.891, 3.022, 3.149, 3.271, 3.388, 3.499, 3.605, 3.705, 3.8, 3.889, 3.973, - 4.051] - - with RAPIDDataset(qout_file) as qout_nc_file: - #get index of return period data + qout_file, return_period_file, rivid_index_list, step, num_years, \ + method, mp_lock = args + + skewvals = [-3.0, -2.8, -2.6, -2.4, -2.2, -2.0, -1.8, -1.6, -1.4, -1.2, + -1.0, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1.0, + 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0] + kfac2 = [0.396, 0.384, 0.368, 0.351, 0.33, 0.307, 0.282, 0.254, 0.225, + 0.195, 0.164, 0.132, 0.099, 0.066, 0.033, 0, -0.033, -0.066, + -0.099, -0.132, -0.164, -0.195, -0.225, -0.254, -0.282, -0.307, + -0.33, -0.351, -0.368, -0.384, -0.396] + kfac10 = [0.66, 0.702, 0.747, 0.795, 0.844, 0.895, 0.945, 0.994, 1.041, + 1.086, 1.128, 1.166, 1.2, 1.231, 1.258, 1.282, 1.301, 1.317, + 1.328, 1.336, 1.34, 1.34, 1.337, 1.329, 1.318, 1.302, 1.284, + 1.262, 1.238, 1.21, 1.18] + kfac25 = [.666, .712, .764, .823, .888, .959, 1.035, 1.116, 1.198, 1.282, + 1.366, 1.448, 1.528, 1.606, 1.680, 1.751, 1.818, 1.880, 1.939, + 1.993, 2.043, 2.087, 2.128, 2.163, 2.193, 2.219, 2.240, 2.256, + 2.267, 2.275, 2.278] + kfac50 = [0.666, 0.714, 0.768, 0.83, 0.9, 0.98, 1.069, 1.166, 1.27, 1.379, + 1.492, 1.606, 1.72, 1.834, 1.945, 2.054, 2.159, 2.261, 2.359, + 2.453, 2.542, 2.626, 2.706, 2.78, 2.848, 2.912, 2.97, 3.023, + 3.071, 3.114, 3.152] + kfac100 = [0.667, 0.714, 0.769, 0.832, 0.905, 0.99, 1.087, 1.197, 1.318, + 1.499, 1.588, 1.733, 1.88, 2.029, 2.178, 2.326, 2.472, 2.615, + 2.755, 2.891, 3.022, 3.149, 3.271, 3.388, 3.499, 3.605, 3.705, + 3.8, 3.889, 3.973, 4.051] + + with RAPIDDataset(qout_file) as qout_nc_file: + # get index of return period data if method == 'weibull': rp_index_20 = int((num_years + 1)/20.0) rp_index_10 = int((num_years + 1)/10.0) @@ -65,16 +69,19 @@ def generate_single_return_period(args): return_2_array = np.zeros(len(rivid_index_list)) max_flow_array = np.zeros(len(rivid_index_list)) - #iterate through rivids to generate return periods + # iterate through rivids to generate return periods for iter_idx, rivid_index in enumerate(rivid_index_list): - filtered_flow_data = qout_nc_file.get_qout_index(rivid_index, - pd_filter="{0}D".format(step), - filter_mode="max") + filtered_flow_data = qout_nc_file.get_qout_index( + rivid_index, + pd_filter="{0}D".format(step), + filter_mode="max") sorted_flow_data = np.sort(filtered_flow_data)[:num_years:-1] max_flow = sorted_flow_data[0] if max_flow < 0.01: - print("WARNING: Return period data < 0.01 generated for rivid {0}" - .format(qout_nc_file.qout_nc.variables[qout_nc_file.river_id_dimension][rivid_index])) + log("Return period data < 0.01 generated for rivid {0}" + .format(qout_nc_file.qout_nc.variables[ + qout_nc_file.river_id_dimension][rivid_index]), + "WARNING") max_flow_array[iter_idx] = max_flow if method == 'weibull': @@ -92,121 +99,157 @@ def generate_single_return_period(args): return_2_array[iter_idx] = mean_flow - .164*stddev elif method == 'log_pearson': - log_flow = np.log10(filtered_flow_data[filtered_flow_data>0]) - if len(log_flow)<= 0: + log_flow = np.log10(filtered_flow_data[filtered_flow_data > 0]) + if len(log_flow) <= 0: continue mean_log_flow = np.mean(log_flow) std_log_flow = np.std(log_flow) log_flow_array = np.array(log_flow) - skew = (num_years * (np.sum(np.power((log_flow_array - mean_log_flow), 3)))) / ( - (num_years - 1) * (num_years - 2) * (std_log_flow) ** 3) + skew = (num_years * (np.sum( + np.power((log_flow_array - mean_log_flow), 3)))) / \ + ((num_years - 1) * (num_years - 2) * std_log_flow ** 3) k2 = np.interp(skew, skewvals, kfac2) k10 = np.interp(skew, skewvals, kfac10) k25 = np.interp(skew, skewvals, kfac25) k50 = np.interp(skew, skewvals, kfac50) k100 = np.interp(skew, skewvals, kfac100) - return_100_array[iter_idx] = np.power(10, (mean_log_flow + k100*std_log_flow)) - return_50_array[iter_idx] = np.power(10, (mean_log_flow + k50*std_log_flow)) - return_25_array[iter_idx] = np.power(10, (mean_log_flow + k25*std_log_flow)) - return_10_array[iter_idx] = np.power(10, (mean_log_flow + k10*std_log_flow)) - return_2_array[iter_idx] = np.power(10, (mean_log_flow + k2*std_log_flow)) + return_100_array[iter_idx] = \ + np.power(10, (mean_log_flow + k100*std_log_flow)) + return_50_array[iter_idx] = \ + np.power(10, (mean_log_flow + k50*std_log_flow)) + return_25_array[iter_idx] = \ + np.power(10, (mean_log_flow + k25*std_log_flow)) + return_10_array[iter_idx] = \ + np.power(10, (mean_log_flow + k10*std_log_flow)) + return_2_array[iter_idx] = \ + np.power(10, (mean_log_flow + k2*std_log_flow)) mp_lock.acquire() - return_period_nc = nc.Dataset(return_period_file, 'a') - return_period_nc.variables['max_flow'][rivid_index_list] = max_flow_array + return_period_nc = Dataset(return_period_file, 'a') + return_period_nc.variables['max_flow'][rivid_index_list] = \ + max_flow_array if method == 'weibull': - return_period_nc.variables['return_period_20'][rivid_index_list] = return_20_array + return_period_nc.variables['return_period_20'][ + rivid_index_list] = return_20_array elif method in 'gumble': - return_period_nc.variables['return_period_100'][rivid_index_list] = return_100_array - return_period_nc.variables['return_period_50'][rivid_index_list] = return_50_array - return_period_nc.variables['return_period_20'][rivid_index_list] = return_20_array + return_period_nc.variables['return_period_100'][ + rivid_index_list] = return_100_array + return_period_nc.variables['return_period_50'][ + rivid_index_list] = return_50_array + return_period_nc.variables['return_period_20'][ + rivid_index_list] = return_20_array elif method == 'log_pearson': - return_period_nc.variables['return_period_100'][rivid_index_list] = return_100_array - return_period_nc.variables['return_period_50'][rivid_index_list] = return_50_array - return_period_nc.variables['return_period_25'][rivid_index_list] = return_25_array - return_period_nc.variables['return_period_10'][rivid_index_list] = return_10_array - return_period_nc.variables['return_period_2'][rivid_index_list] = return_2_array + return_period_nc.variables['return_period_100'][ + rivid_index_list] = return_100_array + return_period_nc.variables['return_period_50'][ + rivid_index_list] = return_50_array + return_period_nc.variables['return_period_25'][ + rivid_index_list] = return_25_array + return_period_nc.variables['return_period_10'][ + rivid_index_list] = return_10_array + return_period_nc.variables['return_period_2'][ + rivid_index_list] = return_2_array return_period_nc.close() mp_lock.release() -def generate_return_periods(qout_file, return_period_file, num_cpus=multiprocessing.cpu_count(), storm_duration_days=7, method='weibull'): + +def generate_return_periods(qout_file, + return_period_file, + num_cpus=multiprocessing.cpu_count(), + storm_duration_days=7, + method='weibull'): """ Generate return period from RAPID Qout file """ - - #get ERA Interim Data Analyzed + # get ERA Interim Data Analyzed with RAPIDDataset(qout_file) as qout_nc_file: print("Setting up Return Periods File ...") - return_period_nc = nc.Dataset(return_period_file, 'w') - + return_period_nc = Dataset(return_period_file, 'w') + return_period_nc.createDimension('rivid', qout_nc_file.size_river_id) - timeSeries_var = return_period_nc.createVariable('rivid', 'i4', ('rivid',)) + timeSeries_var = \ + return_period_nc.createVariable('rivid', 'i4', ('rivid',)) timeSeries_var.long_name = ( 'unique identifier for each river reach') - max_flow_var = return_period_nc.createVariable('max_flow', 'f8', ('rivid',)) + max_flow_var = \ + return_period_nc.createVariable('max_flow', 'f8', ('rivid',)) max_flow_var.long_name = 'maximum streamflow' max_flow_var.units = 'm3/s' if method == 'weibull': - return_period_20_var = return_period_nc.createVariable('return_period_20', 'f8', ('rivid',)) + return_period_20_var = \ + return_period_nc.createVariable('return_period_20', + 'f8', ('rivid',)) return_period_20_var.long_name = '20 year return period flow' return_period_20_var.units = 'm3/s' if method == 'gumble': - return_period_100_var = return_period_nc.createVariable('return_period_100', 'f8', ('rivid',)) + return_period_100_var = \ + return_period_nc.createVariable('return_period_100', + 'f8', ('rivid',)) return_period_100_var.long_name = '100 year return period flow' return_period_100_var.units = 'm3/s' - return_period_50_var = return_period_nc.createVariable('return_period_50', 'f8', ('rivid',)) + return_period_50_var = \ + return_period_nc.createVariable('return_period_50', + 'f8', ('rivid',)) return_period_50_var.long_name = '50 year return period flow' return_period_50_var.units = 'm3/s' - return_period_20_var = return_period_nc.createVariable('return_period_20', 'f8', ('rivid',)) + return_period_20_var = \ + return_period_nc.createVariable('return_period_20', + 'f8', ('rivid',)) return_period_20_var.long_name = '20 year return period flow' return_period_20_var.units = 'm3/s' if method == 'log_pearson': - return_period_100_var = return_period_nc.createVariable('return_period_100', 'f8', ('rivid',)) + return_period_100_var = \ + return_period_nc.createVariable('return_period_100', + 'f8', ('rivid',)) return_period_100_var.long_name = '100 year return period flow' return_period_100_var.units = 'm3/s' - return_period_50_var = return_period_nc.createVariable('return_period_50', 'f8', ('rivid',)) + return_period_50_var = \ + return_period_nc.createVariable('return_period_50', + 'f8', ('rivid',)) return_period_50_var.long_name = '50 year return period flow' return_period_50_var.units = 'm3/s' - return_period_25_var = return_period_nc.createVariable('return_period_25', 'f8', ('rivid',)) + return_period_25_var = \ + return_period_nc.createVariable('return_period_25', + 'f8', ('rivid',)) return_period_25_var.long_name = '25 year return period flow' return_period_25_var.units = 'm3/s' - - return_period_10_var = return_period_nc.createVariable('return_period_10', 'f8', ('rivid',)) + + return_period_10_var = \ + return_period_nc.createVariable('return_period_10', + 'f8', ('rivid',)) return_period_10_var.long_name = '10 year return period flow' return_period_10_var.units = 'm3/s' - - return_period_2_var = return_period_nc.createVariable('return_period_2', 'f8', ('rivid',)) + + return_period_2_var = \ + return_period_nc.createVariable('return_period_2', + 'f8', ('rivid',)) return_period_2_var.long_name = '2 year return period flow' return_period_2_var.units = 'm3/s' lat_var = return_period_nc.createVariable('lat', 'f8', ('rivid',), fill_value=-9999.0) - lat_var.long_name = 'latitude' - lat_var.standard_name = 'latitude' - lat_var.units = 'degrees_north' - lat_var.axis = 'Y' lon_var = return_period_nc.createVariable('lon', 'f8', ('rivid',), fill_value=-9999.0) - lon_var.long_name = 'longitude' - lon_var.standard_name = 'longitude' - lon_var.units = 'degrees_east' - lon_var.axis = 'X' - return_period_nc.variables['lat'][:] = qout_nc_file.qout_nc.variables['lat'][:] - return_period_nc.variables['lon'][:] = qout_nc_file.qout_nc.variables['lon'][:] + add_latlon_metadata(lat_var, lon_var) + + return_period_nc.variables['lat'][:] = \ + qout_nc_file.qout_nc.variables['lat'][:] + return_period_nc.variables['lon'][:] = \ + qout_nc_file.qout_nc.variables['lon'][:] river_id_list = qout_nc_file.get_river_id_array() return_period_nc.variables['rivid'][:] = river_id_list @@ -216,21 +259,26 @@ def generate_return_periods(qout_file, return_period_file, num_cpus=multiprocess return_period_nc.close() time_array = qout_nc_file.get_time_array() - - print("Extracting Data and Generating Return Periods ...") - num_years = int((datetime.utcfromtimestamp(time_array[-1])-datetime.utcfromtimestamp(time_array[0])).days/365.2425) - time_steps_per_day = (24*3600)/float((datetime.utcfromtimestamp(time_array[1])-datetime.utcfromtimestamp(time_array[0])).total_seconds()) - step = max(1,int(time_steps_per_day * storm_duration_days)) - - #generate multiprocessing jobs + + log("Extracting Data and Generating Return Periods ...") + num_years = int((datetime.utcfromtimestamp(time_array[-1]) - + datetime.utcfromtimestamp(time_array[0])).days/365.2425) + time_steps_per_day = (24 * 3600) / float( + (datetime.utcfromtimestamp(time_array[1]) - + datetime.utcfromtimestamp(time_array[0])).total_seconds()) + step = max(1, int(time_steps_per_day * storm_duration_days)) + + # generate multiprocessing jobs + # pylint: disable=no-member mp_lock = multiprocessing.Manager().Lock() job_combinations = [] - partition_list, partition_index_list = partition(river_id_list, num_cpus*2) + partition_index_list = partition(river_id_list, num_cpus*2)[1] for sub_partition_index_list in partition_index_list: + # pylint: disable=len-as-condition if len(sub_partition_index_list) > 0: job_combinations.append((qout_file, return_period_file, - sub_partition_index_list, + sub_partition_index_list, step, num_years, method, @@ -241,4 +289,4 @@ def generate_return_periods(qout_file, return_period_file, num_cpus=multiprocess pool.map(generate_single_return_period, job_combinations) pool.close() - pool.join() \ No newline at end of file + pool.join() diff --git a/RAPIDpy/postprocess/generate_seasonal_averages.py b/RAPIDpy/postprocess/generate_seasonal_averages.py index 2dea9c9..33340ce 100644 --- a/RAPIDpy/postprocess/generate_seasonal_averages.py +++ b/RAPIDpy/postprocess/generate_seasonal_averages.py @@ -1,18 +1,21 @@ # -*- coding: utf-8 -*- -## -## generate_seasonal_averages.py -## RAPIDpy -## -## Created by Alan D. Snow -## License: BSD-3 Clause +""" + generate_seasonal_averages.py + RAPIDpy + Created by: Alan D. Snow, 2016. + License: BSD 3-Clause +""" from calendar import isleap import multiprocessing +from time import gmtime + from netCDF4 import Dataset import numpy as np -from time import gmtime from ..dataset import RAPIDDataset +from ..helper_functions import add_latlon_metadata + def generate_single_seasonal_average(args): """ @@ -27,22 +30,22 @@ def generate_single_seasonal_average(args): min_day = day_of_year - 3 max_day = day_of_year + 3 - with RAPIDDataset(qout_file) as qout_nc_file: + with RAPIDDataset(qout_file) as qout_nc_file: time_indices = [] for idx, t in enumerate(qout_nc_file.get_time_array()): var_time = gmtime(t) compare_yday = var_time.tm_yday - #move day back one past because of leap year adds - #a day after feb 29 (day 60) + # move day back one past because of leap year adds + # a day after feb 29 (day 60) if isleap(var_time.tm_year) and compare_yday > 60: compare_yday -= 1 - #check if date within range of season - if compare_yday >= min_day and compare_yday < max_day: + # check if date within range of season + if max_day > compare_yday >= min_day: time_indices.append(idx) - + if not time_indices: raise IndexError("No time steps found within range ...") - + streamflow_array = qout_nc_file.get_qout(time_index_array=time_indices) avg_streamflow_array = np.mean(streamflow_array, axis=1) @@ -52,76 +55,84 @@ def generate_single_seasonal_average(args): mp_lock.acquire() seasonal_avg_nc = Dataset(seasonal_average_file, 'a') - seasonal_avg_nc.variables['average_flow'][:, day_of_year-1] = avg_streamflow_array - seasonal_avg_nc.variables['std_dev_flow'][:, day_of_year-1] = std_streamflow_array - seasonal_avg_nc.variables['max_flow'][:, day_of_year-1] = max_streamflow_array - seasonal_avg_nc.variables['min_flow'][:, day_of_year-1] = min_streamflow_array + seasonal_avg_nc.variables['average_flow'][:, day_of_year-1] = \ + avg_streamflow_array + seasonal_avg_nc.variables['std_dev_flow'][:, day_of_year-1] = \ + std_streamflow_array + seasonal_avg_nc.variables['max_flow'][:, day_of_year-1] = \ + max_streamflow_array + seasonal_avg_nc.variables['min_flow'][:, day_of_year-1] = \ + min_streamflow_array seasonal_avg_nc.close() mp_lock.release() -def generate_seasonal_averages(qout_file, seasonal_average_file, + +def generate_seasonal_averages(qout_file, seasonal_average_file, num_cpus=multiprocessing.cpu_count()): """ This function loops through a CF compliant rapid streamflow file to produce a netCDF file with a seasonal average for 365 days a year """ - with RAPIDDataset(qout_file) as qout_nc_file: print("Generating seasonal average file ...") seasonal_avg_nc = Dataset(seasonal_average_file, 'w') - + seasonal_avg_nc.createDimension('rivid', qout_nc_file.size_river_id) seasonal_avg_nc.createDimension('day_of_year', 365) - timeSeries_var = seasonal_avg_nc.createVariable('rivid', 'i4', ('rivid',)) - timeSeries_var.long_name = ( + time_series_var = seasonal_avg_nc.createVariable('rivid', 'i4', + ('rivid',)) + time_series_var.long_name = ( 'unique identifier for each river reach') - average_flow_var = seasonal_avg_nc.createVariable('average_flow', 'f8', ('rivid','day_of_year')) + average_flow_var = \ + seasonal_avg_nc.createVariable('average_flow', 'f8', + ('rivid', 'day_of_year')) average_flow_var.long_name = 'seasonal average streamflow' average_flow_var.units = 'm3/s' - - std_dev_flow_var = seasonal_avg_nc.createVariable('std_dev_flow', 'f8', ('rivid','day_of_year')) + + std_dev_flow_var = \ + seasonal_avg_nc.createVariable('std_dev_flow', 'f8', + ('rivid', 'day_of_year')) std_dev_flow_var.long_name = 'seasonal std. dev. streamflow' std_dev_flow_var.units = 'm3/s' - std_dev_flow_var = seasonal_avg_nc.createVariable('max_flow', 'f8', ('rivid','day_of_year')) + std_dev_flow_var = \ + seasonal_avg_nc.createVariable('max_flow', 'f8', + ('rivid', 'day_of_year')) std_dev_flow_var.long_name = 'seasonal max streamflow' std_dev_flow_var.units = 'm3/s' - std_dev_flow_var = seasonal_avg_nc.createVariable('min_flow', 'f8', ('rivid','day_of_year')) + std_dev_flow_var = \ + seasonal_avg_nc.createVariable('min_flow', 'f8', + ('rivid', 'day_of_year')) std_dev_flow_var.long_name = 'seasonal min streamflow' std_dev_flow_var.units = 'm3/s' lat_var = seasonal_avg_nc.createVariable('lat', 'f8', ('rivid',), - fill_value=-9999.0) - lat_var.long_name = 'latitude' - lat_var.standard_name = 'latitude' - lat_var.units = 'degrees_north' - lat_var.axis = 'Y' + fill_value=-9999.0) lon_var = seasonal_avg_nc.createVariable('lon', 'f8', ('rivid',), - fill_value=-9999.0) - lon_var.long_name = 'longitude' - lon_var.standard_name = 'longitude' - lon_var.units = 'degrees_east' - lon_var.axis = 'X' + fill_value=-9999.0) + add_latlon_metadata(lat_var, lon_var) - seasonal_avg_nc.variables['lat'][:] = qout_nc_file.qout_nc.variables['lat'][:] - seasonal_avg_nc.variables['lon'][:] = qout_nc_file.qout_nc.variables['lon'][:] + seasonal_avg_nc.variables['lat'][:] = \ + qout_nc_file.qout_nc.variables['lat'][:] + seasonal_avg_nc.variables['lon'][:] = \ + qout_nc_file.qout_nc.variables['lon'][:] river_id_list = qout_nc_file.get_river_id_array() seasonal_avg_nc.variables['rivid'][:] = river_id_list seasonal_avg_nc.close() - - #generate multiprocessing jobs - mp_lock = multiprocessing.Manager().Lock() + + # generate multiprocessing jobs + mp_lock = multiprocessing.Manager().Lock() # pylint: disable=no-member job_combinations = [] for day_of_year in range(1, 366): job_combinations.append((qout_file, seasonal_average_file, - day_of_year, + day_of_year, mp_lock )) diff --git a/RAPIDpy/postprocess/goodness_of_fit.py b/RAPIDpy/postprocess/goodness_of_fit.py index b53d5a7..a7c7870 100644 --- a/RAPIDpy/postprocess/goodness_of_fit.py +++ b/RAPIDpy/postprocess/goodness_of_fit.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# -# goodnessOfFit.py -# RAPIDpy -# -# Created by Alan D Snow 2015. -# License: BSD 3-Clause -# - +""" + goodness_of_fit.py + RAPIDpy + + Created by Alan D Snow, 2015. + Based on RAPID_Toolbox for ArcMap + License: BSD 3-Clause +""" from __future__ import print_function from csv import writer as csvwriter import numpy as np @@ -22,14 +22,14 @@ def filter_nan(s, o): """ this functions removed the data from simulated and observed data whereever the observed data contains nan - + this is used by all other functions, otherwise they will produce nan as output """ - data = np.array([s.flatten(),o.flatten()]) + data = np.array([s.flatten(), o.flatten()]) data = np.transpose(data) data = data[~np.isnan(data).any(1)] - return data[:,0],data[:,1] + return data[:, 0], data[:, 1] def pc_bias(s, o): @@ -70,6 +70,7 @@ def rmse(s, o): # s,o = filter_nan(s,o) return np.sqrt(np.mean((s-o)**2)) + def mae(s, o): """ Mean Absolute Error @@ -135,8 +136,7 @@ def correlation(s, o): if s.size == 0: corr = np.NaN else: - corr = np.corrcoef(o, s)[0,1] - + corr = np.corrcoef(o, s)[0, 1] return corr @@ -150,7 +150,8 @@ def index_agreement(s, o): ia: index of agreement """ # s,o = filter_nan(s,o) - ia = 1 -(np.sum((o-s)**2))/(np.sum((np.abs(s-np.mean(o))+np.abs(o-np.mean(o)))**2)) + ia = 1 - (np.sum((o-s)**2)) /\ + (np.sum((np.abs(s-np.mean(o))+np.abs(o-np.mean(o)))**2)) return ia @@ -166,11 +167,11 @@ def KGE(s, o): alpha: ratio of the standard deviation beta: ratio of the mean """ - # s,o = filter_nan(s,o) - cc = correlation(s,o) + # s,o = filter_nan(s, o) + cc = correlation(s, o) alpha = np.std(s)/np.std(o) beta = np.sum(s)/np.sum(o) - kge = 1- np.sqrt( (cc-1)**2 + (alpha-1)**2 + (beta-1)**2 ) + kge = 1 - np.sqrt((cc-1)**2 + (alpha-1)**2 + (beta-1)**2) return kge, cc, alpha, beta # END FUNCTIONS FROM http://pydoc.net/Python/ambhas/0.4.0/ambhas.errlib/ @@ -180,41 +181,61 @@ def KGE(s, o): # Time Series comparison functions # ------------------------------------------------------------------------------ def find_goodness_of_fit(rapid_qout_file, reach_id_file, observed_file, - out_analysis_file, daily=False, steps_per_group=1): + out_analysis_file, daily=False): """ - Finds the goodness of fit comparing observed streamflow in a rapid Qout file - with simulated flows in a csv file. - - Args: - rapid_qout_file(str): Path to the RAPID Qout file. - reach_id_file(str): Path to file with river reach ID's associate with the RAPID Qout file. It is in the format of the RAPID observed flows reach ID file. - observed_file(str): Path to input csv with with observed flows corresponding to the RAPID Qout. It is in the format of the RAPID observed flows file. - out_analysis_file(str): Path to the analysis output csv file. - daily(Optional[bool]): If True and the file is CF-Compliant, it will compare the *observed_file* with daily average flow from Qout. Default is False. + Finds the goodness of fit comparing observed streamflow in a rapid Qout + file with simulated flows in a csv file. + + Parameters + ---------- + rapid_qout_file: str + Path to the RAPID Qout file. + reach_id_file: str + ath to file with river reach ID's associate with the RAPID Qout file. + It is in the format of the RAPID observed flows reach ID file. + observed_file: str + Path to input csv with with observed flows corresponding to the + RAPID Qout. It is in the format of the RAPID observed flows file. + out_analysis_file: str + Path to the analysis output csv file. + daily: bool, optional + If True and the file is CF-Compliant, it will compare the + *observed_file* with daily average flow from Qout. Default is False. + Example with CF-Compliant RAPID Qout file: - + .. code:: python - + import os from RAPIDpy.postprocess import find_goodness_of_fit - + INPUT_DATA_PATH = '/path/to/data' - reach_id_file = os.path.join(INPUT_DATA_PATH, 'obs_reach_id.csv') - observed_file = os.path.join(INPUT_DATA_PATH, 'obs_flow.csv') - - cf_input_qout_file = os.path.join(COMPARE_DATA_PATH, 'Qout_nasa_lis_3hr_20020830_CF.nc') - cf_out_analysis_file = os.path.join(OUTPUT_DATA_PATH, 'cf_goodness_of_fit_results-daily.csv') - find_goodness_of_fit(cf_input_qout_file, reach_id_file, observed_file, - cf_out_analysis_file, daily=True) - + reach_id_file = os.path.join(INPUT_DATA_PATH, 'obs_reach_id.csv') + observed_file = os.path.join(INPUT_DATA_PATH, 'obs_flow.csv') + + cf_input_qout_file = os.path.join(COMPARE_DATA_PATH, + 'Qout_nasa_lis_3hr_20020830_CF.nc') + cf_out_analysis_file = \ + os.path.join(OUTPUT_DATA_PATH, + 'cf_goodness_of_fit_results-daily.csv') + find_goodness_of_fit(cf_input_qout_file, + reach_id_file, + observed_file, + cf_out_analysis_file, + daily=True) + """ - reach_id_list = np.loadtxt(reach_id_file, delimiter=",", usecols=(0,), ndmin=1, dtype=np.int32) - + reach_id_list = np.loadtxt(reach_id_file, + delimiter=",", usecols=(0,), + ndmin=1, dtype=np.int32) + data_nc = RAPIDDataset(rapid_qout_file) - + # analyze and write - observed_table = np.loadtxt(observed_file, ndmin=2, delimiter=",", usecols=tuple(range(reach_id_list.size))) + observed_table = np.loadtxt(observed_file, + ndmin=2, delimiter=",", + usecols=tuple(range(reach_id_list.size))) with open(out_analysis_file, 'w') as outcsv: writer = csvwriter(outcsv) writer.writerow(["reach_id", @@ -228,14 +249,15 @@ def find_goodness_of_fit(rapid_qout_file, reach_id_file, observed_file, "correlation_coeff", "index_agreement", "KGE"]) - + for index, reach_id in enumerate(reach_id_list): observed_array = observed_table[:, index] simulated_array = data_nc.get_qout(reach_id, daily=daily) # make sure they are the same length simulated_array = simulated_array[:len(observed_array)] observed_array = observed_array[:len(simulated_array)] - simulated_array,observed_array = filter_nan(simulated_array,observed_array) + simulated_array, observed_array = \ + filter_nan(simulated_array, observed_array) writer.writerow([reach_id, pc_bias(simulated_array, observed_array), apb(simulated_array, observed_array), @@ -252,51 +274,68 @@ def find_goodness_of_fit(rapid_qout_file, reach_id_file, observed_file, def find_goodness_of_fit_csv(observed_simulated_file, out_file=None): """ Finds the goodness of fit comparing observed and simulated flows - In the file, the first column is the observed flows and the + In the file, the first column is the observed flows and the second column is the simulated flows. - + Example:: - + 33.5, 77.2 34.7, 73.0 - - Args: - observed_simulated_file(str): Path to the csv file with the observed and simulated flows. - out_file(Optional[str]): Path to output file. If not provided, it will print to console. + + Parameters + ---------- + observed_simulated_file: str + Path to the csv file with the observed and simulated flows. + out_file: str, optional + Path to output file. If not provided, it will print to console. + Example: - + .. code:: python - + from RAPIDpy.postprocess import find_goodness_of_fit_csv - - find_goodness_of_fit_csv('/united_kingdom-thames/flows_kingston_gage_noah.csv') - + + find_goodness_of_fit_csv(' + /united_kingdom-thames/flows_kingston_gage_noah.csv') + """ - observed_simulated_table = np.loadtxt(observed_simulated_file, ndmin=2, delimiter=",", usecols=(0,1)) + observed_simulated_table = np.loadtxt(observed_simulated_file, + ndmin=2, delimiter=",", + usecols=(0, 1)) - observed_array, simulated_array = filter_nan(observed_simulated_table[:, 0], - observed_simulated_table[:, 1]) + observed_array, simulated_array = \ + filter_nan(observed_simulated_table[:, 0], + observed_simulated_table[:, 1]) # print error indices if out_file: print_file = open(out_file, 'w') else: print_file = None - + print("\n".join([ - "Percent Bias: {0}".format(pc_bias(simulated_array, observed_array)), - "Absolute Percent Bias: {0}".format(apb(simulated_array, observed_array)), - "Root Mean Squared Error: {0}".format(rmse(simulated_array, observed_array)), - "Mean Absolute Error: {0}".format(mae(simulated_array, observed_array)), + "Percent Bias: {0:.4f}" + .format(pc_bias(simulated_array, observed_array)), + "Absolute Percent Bias: {0:.4f}" + .format(apb(simulated_array, observed_array)), + "Root Mean Squared Error: {0:.4f}" + .format(rmse(simulated_array, observed_array)), + "Mean Absolute Error: {0:.4f}" + .format(mae(simulated_array, observed_array)), "Bias: {0}".format(bias(simulated_array, observed_array)), - "Nash Sutcliffe efficiency coefficient: {0}".format(NS(simulated_array, observed_array)), - "Likelihood: {0}".format(L(simulated_array, observed_array)), - "correlation coefficient: {0}".format(correlation(simulated_array, observed_array)), - "index of agreement: {0}".format(index_agreement(simulated_array, observed_array)), - "Kling-Gupta Efficiency: {0}".format(KGE(simulated_array, observed_array)[0]), - ]), + "Nash Sutcliffe efficiency coefficient: {0:.4f}" + .format(NS(simulated_array, observed_array)), + "Likelihood: {0:.4f}" + .format(L(simulated_array, observed_array)), + "correlation coefficient: {0:.4f}" + .format(correlation(simulated_array, observed_array)), + "index of agreement: {0:.4f}" + .format(index_agreement(simulated_array, observed_array)), + "Kling-Gupta Efficiency: {0:.4f}" + .format(KGE(simulated_array, observed_array)[0]), + ]), file=print_file) - + if print_file: - print_file.close() \ No newline at end of file + print_file.close() diff --git a/RAPIDpy/postprocess/merge.py b/RAPIDpy/postprocess/merge.py index 50dcc7f..22ee5ba 100644 --- a/RAPIDpy/postprocess/merge.py +++ b/RAPIDpy/postprocess/merge.py @@ -1,16 +1,15 @@ # -*- coding: utf-8 -*- -## -## merge.py -## RAPIDpy -## -## Created by Tim Whitaker, 2015. -## Modified by Alan D Snow, 2015-2016 -## - -"""Copies data from RAPID netCDF output to a CF-compliant netCDF file. +""" +merge.py +RAPIDpy + +Created by Tim Whitaker, 2015. +Modified by Alan D Snow, 2015-2016 + +Copies data from RAPID netCDF output to a CF-compliant netCDF file. Code originated from Tim Whitaker at University of Texas. The code was modified by Alan Snow at US Army ERDC. - + Remarks: A new netCDF file is created with data from RAPID [1] simulation model output. The result follows CF conventions [2] with additional metadata @@ -54,145 +53,166 @@ [2] http://cfconventions.org/ [3] http://www.nodc.noaa.gov/data/formats/netcdf/v1.1/ """ - - from datetime import datetime import os + from netCDF4 import Dataset import numpy as np +from past.builtins import xrange # pylint: disable=redefined-builtin from pytz import utc -#local +# local from ..dataset import RAPIDDataset -from ..helper_functions import csv_to_list, remove_files, log +from ..helper_functions import (add_latlon_metadata, csv_to_list, + remove_files, log) -#in Python 3 xrange is now range -try: - xrange -except NameError: - xrange = range - pass class ConvertRAPIDOutputToCF(object): """ - Class to convert RAPID output to be CF compliant. You can also use this to + Class to convert RAPID output to be CF compliant. You can also use this to combine consecutive RAPID output files into one file. - Attributes: - rapid_output_file(str or list): Path to a single RAPID Qout file or a list of RAPID Qout files. - start_datetime(datetime): Datetime object with the time of the start of the simulation. - time_step(int or list): Time step of simulation in seconds if single Qout file or a list of time steps corresponding to each Qout file in the *rapid_output_file*. - qinit_file(Optional[str]): Path to the Qinit file for the simulation. If used, it will use the values in the file for the flow at simulation time zero. - comid_lat_lon_z_file(Optional[str]): Path to comid_lat_lon_z file. If included, the spatial information will be added to the output NetCDF file. - rapid_connect_file(Optional[str]): Path to RAPID connect file. This is required if *qinit_file* is added. - project_name(Optional[str]): Name of your project in the output file. Default is "Default RAPID Project". - output_id_dim_name(Optional[str]): Name of the output river ID dimension name. Default is 'rivid'. - output_flow_var_name(Optional[str]): Name of streamflow variable in output file, typically 'Qout' or 'm3_riv'. Default is 'Qout'. - print_debug(Optional[bool]): If True, the debug output will be printed to the console. Default is False. + Parameters + ---------- + rapid_output_file: str or list + Path to a single RAPID Qout file or a list of RAPID Qout files. + start_datetime: :obj:`datetime.datetime` + Datetime object with the time of the start of the simulation. + time_step: int or list + Time step of simulation in seconds if single Qout file or a list of + time steps corresponding to each Qout file in the *rapid_output_file*. + qinit_file: str, optional + Path to the Qinit file for the simulation. If used, it will use the + values in the file for the flow at simulation time zero. + comid_lat_lon_z_file: str, optional + Path to comid_lat_lon_z file. If included, the spatial information + will be added to the output NetCDF file. + rapid_connect_file: str, optional + Path to RAPID connect file. This is required if *qinit_file* is added. + project_name: str, optional + Name of your project in the output file. Default is + "Default RAPID Project". + output_id_dim_name: str, optional + Name of the output river ID dimension name. Default is 'rivid'. + output_flow_var_name: str, optional + Name of streamflow variable in output file, typically + 'Qout' or 'm3_riv'. Default is 'Qout'. + print_debug: bool, optional + If True, the debug output will be printed to the console. + Default is False. + .. warning:: This code replaces the first file with the combined output and deletes the second file. BACK UP YOUR FILES!!!! - + + Example: - + .. code:: python - + import datetime from RAPIDpy.postprocess import ConvertRAPIDOutputToCF - + file1 = "/path/to/Qout_1980to1981.nc" file2 = "/path/to/Qout_1981to1982.nc" - - cv = ConvertRAPIDOutputToCF(rapid_output_file=[file1, file2], - start_datetime=datetime.datetime(2005,1,1), - time_step=[3*3600, 3*3600], - project_name="NLDAS(VIC)-RAPID historical flows by US Army ERDC", - ) + + cv = ConvertRAPIDOutputToCF( + rapid_output_file=[file1, file2], + start_datetime=datetime.datetime(2005,1,1), + time_step=[3*3600, 3*3600], + project_name="NLDAS(VIC)-RAPID historical flows by US Army ERDC") cv.convert() + """ - def __init__(self, rapid_output_file, - start_datetime, - time_step, - qinit_file="", - comid_lat_lon_z_file="", - rapid_connect_file="", - project_name="Default RAPID Project", - output_id_dim_name='rivid', - output_flow_var_name='Qout', - print_debug=False - ): - - if not isinstance(rapid_output_file, list): - self.rapid_output_file_list = [rapid_output_file] - else: - self.rapid_output_file_list = rapid_output_file - self.start_datetime = start_datetime.replace(tzinfo=utc) - - if not isinstance(time_step, list): - self.time_step_array = [time_step] - else: - self.time_step_array = time_step - - self.qinit_file = qinit_file - self.comid_lat_lon_z_file = comid_lat_lon_z_file - self.rapid_connect_file = rapid_connect_file - self.project_name = project_name - self.output_id_dim_name = output_id_dim_name - self.output_flow_var_name = output_flow_var_name - self.print_debug = print_debug - self.cf_compliant_file = '%s_CF.nc' % os.path.splitext(self.rapid_output_file_list[0])[0] + + # pylint: disable= + def __init__(self, + rapid_output_file, + start_datetime, + time_step, + qinit_file="", + comid_lat_lon_z_file="", + rapid_connect_file="", + project_name="Default RAPID Project", + output_id_dim_name='rivid', + output_flow_var_name='Qout', + print_debug=False): + if not isinstance(rapid_output_file, list): + self.rapid_output_file_list = [rapid_output_file] + else: + self.rapid_output_file_list = rapid_output_file + self.start_datetime = start_datetime.replace(tzinfo=utc) + + if not isinstance(time_step, list): + self.time_step_array = [time_step] + else: + self.time_step_array = time_step + + self.qinit_file = qinit_file + self.comid_lat_lon_z_file = comid_lat_lon_z_file + self.rapid_connect_file = rapid_connect_file + self.project_name = project_name + self.output_id_dim_name = output_id_dim_name + self.output_flow_var_name = output_flow_var_name + self.print_debug = print_debug + self.cf_compliant_file = '%s_CF.nc' % os.path.splitext( + self.rapid_output_file_list[0])[0] + self.cf_nc = None + self.raw_nc_list = [] def _validate_raw_nc(self): """Checks that raw netCDF file has the right dimensions and variables. - - Arguments: - nc -- netCDF dataset object representing raw RAPID output - - Returns: - name of ID dimension, - length of time dimension, - name of flow variable - + + Returns + ------- + int: + Length of rivid dimension. + int: + Length of time dimension. + Remarks: Raises exception if file doesn't validate. """ - self.raw_nc_list = [] - total_time_len = 1 #add one for the first flow value RAPID - #does not include + # add one for the first flow value RAPID + # does not include + total_time_len = 1 id_len_list = [] for rapid_output_file in self.rapid_output_file_list: qout_nc = RAPIDDataset(rapid_output_file) id_len_list.append(qout_nc.size_river_id) total_time_len += qout_nc.size_time self.raw_nc_list.append(qout_nc) - - #make sure river id lists are the same + + # make sure river id lists are the same for id_len_undex in range(1, len(id_len_list)): if id_len_list[id_len_undex] != id_len_list[0]: - raise Exception("ERROR: River ID size is different in one of the files ...") - + raise Exception("River ID size is different in " + "one of the files ...") + for raw_nc_index in range(1, len(self.raw_nc_list)): - if not (self.raw_nc_list[raw_nc_index].get_river_id_array() == self.raw_nc_list[0].get_river_id_array()).all(): - raise Exception("ERROR: River IDs are different in files ...") + if not (self.raw_nc_list[raw_nc_index].get_river_id_array() == + self.raw_nc_list[0].get_river_id_array()).all(): + raise Exception("River IDs are different in " + "files ...") return id_len_list[0], total_time_len - def _initialize_output(self, time_len, id_len): """Creates netCDF file with CF dimensions and variables, but no data. - - Arguments: - filename -- full path and filename for output netCDF file - id_dim_name -- name of Id dimension and variable, e.g., COMID - time_len -- (integer) length of time dimension (number of time steps) - id_len -- (integer) length of Id dimension (number of time series) - time_step_seconds -- (integer) number of seconds per time step - """ + Arguments + --------- + time_len: int + Length of time dimension (number of time steps). + id_len: int + Length of Id dimension (number of time series). + + """ log('Initializing new file %s' % self.cf_compliant_file, 'INFO') - - self.cf_nc = Dataset(self.cf_compliant_file, 'w', format='NETCDF3_CLASSIC') - + + self.cf_nc = Dataset(self.cf_compliant_file, 'w', + format='NETCDF3_CLASSIC') + # Create global attributes log(' globals', 'DEBUG', self.print_debug) self.cf_nc.featureType = 'timeSeries' @@ -201,14 +221,16 @@ def _initialize_output(self, time_len, id_len): self.cf_nc.cdm_data_type = 'Station' self.cf_nc.nodc_template_version = ( 'NODC_NetCDF_TimeSeries_Orthogonal_Template_v1.1') - self.cf_nc.standard_name_vocabulary = ('NetCDF Climate and Forecast (CF) ' + - 'Metadata Convention Standard Name ' + - 'Table v28') + self.cf_nc.standard_name_vocabulary = \ + ('NetCDF Climate and Forecast (CF) ' + 'Metadata Convention Standard Name ' + 'Table v28') self.cf_nc.title = 'RAPID Result' - self.cf_nc.summary = ("Results of RAPID river routing simulation. Each river " + - "reach (i.e., feature) is represented by a point " + - "feature at its midpoint, and is identified by the " + - "reach's unique NHDPlus COMID identifier.") + self.cf_nc.summary =\ + ("Results of RAPID river routing simulation. Each river " + "reach (i.e., feature) is represented by a point " + "feature at its midpoint, and is identified by the " + "reach's unique NHDPlus COMID identifier.") self.cf_nc.time_coverage_resolution = 'point' self.cf_nc.geospatial_lat_min = 0.0 self.cf_nc.geospatial_lat_max = 0.0 @@ -221,70 +243,73 @@ def _initialize_output(self, time_len, id_len): self.cf_nc.geospatial_vertical_min = 0.0 self.cf_nc.geospatial_vertical_max = 0.0 self.cf_nc.geospatial_vertical_units = 'm' - self.cf_nc.geospatial_vertical_resolution = 'midpoint of stream feature' + self.cf_nc.geospatial_vertical_resolution = \ + 'midpoint of stream feature' self.cf_nc.geospatial_vertical_positive = 'up' self.cf_nc.project = self.project_name self.cf_nc.processing_level = 'Raw simulation result' - self.cf_nc.keywords_vocabulary = ('NASA/Global Change Master Directory ' + - '(GCMD) Earth Science Keywords. Version ' + - '8.0.0.0.0') + self.cf_nc.keywords_vocabulary = \ + ('NASA/Global Change Master Directory ' + '(GCMD) Earth Science Keywords. Version ' + '8.0.0.0.0') self.cf_nc.keywords = 'DISCHARGE/FLOW' - self.cf_nc.comment = 'Result time step(s) (seconds): ' + str(self.time_step_array) - + self.cf_nc.comment = \ + 'Result time step(s) (seconds): ' + str(self.time_step_array) + timestamp = datetime.utcnow().isoformat() + 'Z' self.cf_nc.date_created = timestamp - self.cf_nc.history = (timestamp + '; added time, lat, lon, z, crs variables; ' + - 'added metadata to conform to NODC_NetCDF_TimeSeries_' + - 'Orthogonal_Template_v1.1') - + self.cf_nc.history = \ + (timestamp + '; added time, lat, lon, z, crs variables; ' + 'added metadata to conform to NODC_NetCDF_TimeSeries_' + 'Orthogonal_Template_v1.1') + # Create dimensions log(' dimming', 'DEBUG', self.print_debug) self.cf_nc.createDimension('time', time_len) self.cf_nc.createDimension(self.output_id_dim_name, id_len) - + # Create variables - log(' timeSeries_var', 'DEBUG', self.print_debug) - timeSeries_var = self.cf_nc.createVariable(self.output_id_dim_name, 'i4', - (self.output_id_dim_name,)) - timeSeries_var.long_name = ( + log(' time_series_var', 'DEBUG', self.print_debug) + time_series_var = \ + self.cf_nc.createVariable(self.output_id_dim_name, 'i4', + (self.output_id_dim_name,)) + time_series_var.long_name = ( 'Unique NHDPlus COMID identifier for each river reach feature') - timeSeries_var.cf_role = 'timeseries_id' - + time_series_var.cf_role = 'timeseries_id' + log(' time_var', 'DEBUG', self.print_debug) time_var = self.cf_nc.createVariable('time', 'i4', ('time',)) time_var.long_name = 'time' time_var.standard_name = 'time' time_var.units = 'seconds since 1970-01-01 00:00:00 0:00' time_var.axis = 'T' - - #only add if user adds - if self.comid_lat_lon_z_file and os.path.exists(self.comid_lat_lon_z_file): + + # only add if user adds + if self.comid_lat_lon_z_file and \ + os.path.exists(self.comid_lat_lon_z_file): log(' lat_var', 'DEBUG', self.print_debug) - lat_var = self.cf_nc.createVariable('lat', 'f8', (self.output_id_dim_name,), - fill_value=-9999.0) - lat_var.long_name = 'latitude' - lat_var.standard_name = 'latitude' - lat_var.units = 'degrees_north' - lat_var.axis = 'Y' - + lat_var = self.cf_nc.createVariable('lat', 'f8', + (self.output_id_dim_name,), + fill_value=-9999.0) + log(' lon_var', 'DEBUG', self.print_debug) - lon_var = self.cf_nc.createVariable('lon', 'f8', (self.output_id_dim_name,), - fill_value=-9999.0) - lon_var.long_name = 'longitude' - lon_var.standard_name = 'longitude' - lon_var.units = 'degrees_east' - lon_var.axis = 'X' - + lon_var = self.cf_nc.createVariable('lon', 'f8', + (self.output_id_dim_name,), + fill_value=-9999.0) + + add_latlon_metadata(lat_var, lon_var) + log(' z_var', 'DEBUG', self.print_debug) - z_var = self.cf_nc.createVariable('z', 'f8', (self.output_id_dim_name,), - fill_value=-9999.0) - z_var.long_name = ('Elevation referenced to the North American ' + + z_var = self.cf_nc.createVariable('z', 'f8', + (self.output_id_dim_name,), + fill_value=-9999.0) + z_var.long_name = ('Elevation referenced to the North American ' 'Vertical Datum of 1988 (NAVD88)') z_var.standard_name = 'surface_altitude' z_var.units = 'm' z_var.axis = 'Z' z_var.positive = 'up' - + log(' crs_var', 'DEBUG', self.print_debug) crs_var = self.cf_nc.createVariable('crs', 'i4') crs_var.grid_mapping_name = 'latitude_longitude' @@ -294,85 +319,85 @@ def _initialize_output(self, time_len, id_len): def _write_comid_lat_lon_z(self): """Add latitude, longitude, and z values for each netCDF feature - - Arguments: - cf_nc -- netCDF Dataset object to be modified - lookup_filename -- full path and filename for lookup table - id_var_name -- name of Id variable - + Remarks: - Lookup table is a CSV file with COMID, Lat, Lon, and Elev_m columns. - Columns must be in that order and these must be the first four columns. + Lookup table is a CSV file with COMID, Lat, Lon, + and Elev_m columns. + Columns must be in that order and these must be the first + four columns. """ - #only add if user adds - if self.comid_lat_lon_z_file and os.path.exists(self.comid_lat_lon_z_file): - #get list of COMIDS - lookup_table = csv_to_list(self.comid_lat_lon_z_file ) - lookup_comids = np.array([int(float(row[0])) for row in lookup_table[1:]]) - + # only add if user adds + if self.comid_lat_lon_z_file and \ + os.path.exists(self.comid_lat_lon_z_file): + # get list of COMIDS + lookup_table = csv_to_list(self.comid_lat_lon_z_file) + lookup_comids = np.array([int(float(row[0])) for row in + lookup_table[1:]]) + # Get relevant arrays while we update them nc_comids = self.cf_nc.variables[self.output_id_dim_name][:] lats = self.cf_nc.variables['lat'][:] lons = self.cf_nc.variables['lon'][:] zs = self.cf_nc.variables['z'][:] - - lat_min = None - lat_max = None - lon_min = None - lon_max = None + + min_lat = None + max_lat = None + min_lon = None + max_lon = None z_min = None z_max = None - + # Process each row in the lookup table for nc_index, nc_comid in enumerate(nc_comids): try: - lookup_index = np.where(lookup_comids == nc_comid)[0][0] + 1 - except Exception: - log('COMID %s misssing in comid_lat_lon_z file' % nc_comid, + lookup_index = \ + np.where(lookup_comids == nc_comid)[0][0] + 1 + except IndexError: + log('rivid %s missing in comid_lat_lon_z file' % nc_comid, 'ERROR') - + lat = float(lookup_table[lookup_index][1]) lats[nc_index] = lat - if (lat_min) is None or lat < lat_min: - lat_min = lat - if (lat_max) is None or lat > lat_max: - lat_max = lat - + if min_lat is None or lat < min_lat: + min_lat = lat + if max_lat is None or lat > max_lat: + max_lat = lat + lon = float(lookup_table[lookup_index][2]) lons[nc_index] = lon - if (lon_min) is None or lon < lon_min: - lon_min = lon - if (lon_max) is None or lon > lon_max: - lon_max = lon - + if min_lon is None or lon < min_lon: + min_lon = lon + if max_lon is None or lon > max_lon: + max_lon = lon + z = float(lookup_table[lookup_index][3]) zs[nc_index] = z - if (z_min) is None or z < z_min: + if z_min is None or z < z_min: z_min = z - if (z_max) is None or z > z_max: + if z_max is None or z > z_max: z_max = z - + # Overwrite netCDF variable values self.cf_nc.variables['lat'][:] = lats self.cf_nc.variables['lon'][:] = lons self.cf_nc.variables['z'][:] = zs - + # Update metadata - if lat_min is not None: - self.cf_nc.geospatial_lat_min = lat_min - if lat_max is not None: - self.cf_nc.geospatial_lat_max = lat_max - if lon_min is not None: - self.cf_nc.geospatial_lon_min = lon_min - if lon_max is not None: - self.cf_nc.geospatial_lon_max = lon_max + if min_lat is not None: + self.cf_nc.geospatial_lat_min = min_lat + if max_lat is not None: + self.cf_nc.geospatial_lat_max = max_lat + if min_lon is not None: + self.cf_nc.geospatial_lon_min = min_lon + if max_lon is not None: + self.cf_nc.geospatial_lon_max = max_lon if z_min is not None: self.cf_nc.geospatial_vertical_min = z_min if z_max is not None: self.cf_nc.geospatial_vertical_max = z_max else: log('No comid_lat_lon_z file. Not adding values ...', 'INFO') - + def _generate_time_values(self): """ Generates time values for out nc file @@ -381,16 +406,18 @@ def _generate_time_values(self): log('writing times', 'INFO') d1970 = datetime(1970, 1, 1, tzinfo=utc) time_array = [[int((self.start_datetime - d1970).total_seconds())]] - + datetime_nc_start_simulation = self.start_datetime for raw_nc_index, raw_nc in enumerate(self.raw_nc_list): - - raw_nc_time = raw_nc.get_time_array(datetime_simulation_start=datetime_nc_start_simulation, - simulation_time_step_seconds=self.time_step_array[raw_nc_index]) - + raw_nc_time = raw_nc.get_time_array( + datetime_simulation_start=datetime_nc_start_simulation, + simulation_time_step_seconds=self.time_step_array[ + raw_nc_index]) + time_array.append(raw_nc_time) - datetime_nc_start_simulation = datetime.utcfromtimestamp(raw_nc_time[-1]) - + datetime_nc_start_simulation = \ + datetime.utcfromtimestamp(raw_nc_time[-1]) + self.cf_nc.variables['time'][:] = np.concatenate(time_array) end_date = datetime.utcfromtimestamp(self.cf_nc.variables['time'][-1]) self.cf_nc.time_coverage_start = self.start_datetime.isoformat() + 'Z' @@ -407,60 +434,70 @@ def _copy_streamflow_values(self): q_var.units = 'm^3/s' q_var.coordinates = 'time lat lon z' q_var.grid_mapping = 'crs' - q_var.source = ('Generated by the Routing Application for Parallel ' + - 'computatIon of Discharge (RAPID) river routing model.') + q_var.source = ('Generated by the Routing Application for Parallel ' + 'computatIon of Discharge (RAPID) river routing ' + 'model.') q_var.references = 'http://rapid-hub.org/' - q_var.comment = ('lat, lon, and z values taken at midpoint of river ' + + q_var.comment = ('lat, lon, and z values taken at midpoint of river ' 'reach feature') log('Copying streamflow values', 'INFO') master_begin_time_step_index = 1 - master_end_time_step_index = len(self.cf_nc.dimensions['time']) - - #to reduce RAM, copy by chunks - max_2d_dimension = 1000000000 #~8GB Max - for raw_nc_index, raw_nc in enumerate(self.raw_nc_list): - max_time_step_size = min(raw_nc.size_time, max(1, int(float(max_2d_dimension)/float(raw_nc.size_river_id)))) + # to reduce RAM, copy by chunks + max_2d_dimension = 1000000000 # ~8GB Max + for raw_nc in self.raw_nc_list: + max_time_step_size = min(raw_nc.size_time, + max(1, int(float(max_2d_dimension) / + float(raw_nc.size_river_id)))) raw_nc_begin_time_step_index = 0 - raw_nc_end_time_step_index = raw_nc.size_time - for raw_nc_time_index in xrange(0, raw_nc.size_time, max_time_step_size): - time_interval_size = max(1, min(raw_nc.size_time-raw_nc_time_index, max_time_step_size)) - - raw_nc_end_time_step_index = raw_nc_begin_time_step_index + time_interval_size - master_end_time_step_index = master_begin_time_step_index + time_interval_size - - q_var[:,master_begin_time_step_index:master_end_time_step_index] = raw_nc.get_qout(time_index_start=raw_nc_begin_time_step_index, - time_index_end=raw_nc_end_time_step_index) - + for raw_nc_time_index in \ + xrange(0, raw_nc.size_time, max_time_step_size): + time_interval_size = \ + max(1, min(raw_nc.size_time - raw_nc_time_index, + max_time_step_size)) + + raw_nc_end_time_step_index = \ + raw_nc_begin_time_step_index + time_interval_size + master_end_time_step_index = \ + master_begin_time_step_index + time_interval_size + + q_var[:, + master_begin_time_step_index:master_end_time_step_index]\ + = raw_nc.get_qout( + time_index_start=raw_nc_begin_time_step_index, + time_index_end=raw_nc_end_time_step_index) + master_begin_time_step_index = master_end_time_step_index raw_nc_begin_time_step_index = raw_nc_end_time_step_index log('Adding initial streamflow values', 'INFO') - #add initial flow to RAPID output file + # add initial flow to RAPID output file if self.qinit_file and self.rapid_connect_file: lookup_table = csv_to_list(self.rapid_connect_file) - lookup_comids = np.array([int(float(row[0])) for row in lookup_table]) - + lookup_comids = np.array([int(float(row[0])) for row + in lookup_table]) + init_flow_table = csv_to_list(self.qinit_file) - - for index, comid in enumerate(self.cf_nc.variables[self.output_id_dim_name][:]): + + for index, comid in enumerate( + self.cf_nc.variables[self.output_id_dim_name][:]): try: lookup_index = np.where(lookup_comids == comid)[0][0] - except Exception: + except IndexError: log('COMID %s misssing in rapid_connect file' % comid, 'ERROR') - q_var[index,0] = float(init_flow_table[lookup_index][0]) + q_var[index, 0] = float(init_flow_table[lookup_index][0]) else: - for index, comid in enumerate(self.cf_nc.variables[self.output_id_dim_name][:]): - q_var[index,0] = 0 + for index, comid in enumerate( + self.cf_nc.variables[self.output_id_dim_name][:]): + q_var[index, 0] = 0 def convert(self): """ Copies data from RAPID netCDF output to a CF-compliant netCDF file. """ - try: - log('Processing %s ...' % self.rapid_output_file_list[0], 'INFO') + log('Processing %s ...' % self.rapid_output_file_list[0]) time_start_conversion = datetime.utcnow() # Validate the raw netCDF file @@ -472,34 +509,35 @@ def convert(self): self._initialize_output(time_len, id_len) self._generate_time_values() - - #copy river ids over - self.cf_nc.variables[self.output_id_dim_name][:] = self.raw_nc_list[0].get_river_id_array() + + # copy river ids over + self.cf_nc.variables[self.output_id_dim_name][:] = \ + self.raw_nc_list[0].get_river_id_array() # Populate comid, lat, lon, z - log('writing comid lat lon z', 'INFO') + log('writing comid lat lon z') lookup_start = datetime.now() self._write_comid_lat_lon_z() duration = str((datetime.now() - lookup_start).total_seconds()) - log('Lookup Duration (s): ' + duration, 'INFO') + log('Lookup Duration (s): ' + duration) # Create a variable for streamflow. This is big, and slows down # previous steps if we do it earlier. self._copy_streamflow_values() - - #close files + + # close files for raw_nc in self.raw_nc_list: raw_nc.close() self.cf_nc.close() - - #delete original RAPID output + + # delete original RAPID output remove_files(*self.rapid_output_file_list) - #rename nc compliant file to original name + # rename nc compliant file to original name os.rename(self.cf_compliant_file, self.rapid_output_file_list[0]) - log('Time to process %s' % (datetime.utcnow()-time_start_conversion), 'INFO') + log('Time to process %s' % + (datetime.utcnow()-time_start_conversion)) except Exception: - #delete cf RAPID output + # delete cf RAPID output remove_files(self.cf_compliant_file) - #log('Conversion Error %s' % e, 'ERROR') raise diff --git a/RAPIDpy/rapid.py b/RAPIDpy/rapid.py index cdcaa2a..87574df 100644 --- a/RAPIDpy/rapid.py +++ b/RAPIDpy/rapid.py @@ -1,100 +1,118 @@ # -*- coding: utf-8 -*- -## -## rapid.py -## RAPIDpy -## -## Created by Alan D Snow, 2015. -## Copyright © 2015 Alan D Snow. All rights reserved. -## +""" + rapid.py + RAPIDpy + Created by Alan D Snow, 2015. + License: BSD-3-Clause +""" from calendar import isleap from csv import writer as csvwriter import datetime -from dateutil.parser import parse -#from dateutil.tz import tzoffset from multiprocessing import cpu_count -import numpy as np import os -#USGS not returning tzinfo, so this is no longer needed -#from pytz import utc -from requests import get from subprocess import Popen, PIPE from time import gmtime + +from dateutil.parser import parse +import numpy as np +from requests import get import xarray -#local + from .dataset import RAPIDDataset from .helper_functions import csv_to_list, log, open_csv from .postprocess import ConvertRAPIDOutputToCF -#------------------------------------------------------------------------------ -#Main RAPID Manager Class -#------------------------------------------------------------------------------ + +# ----------------------------------------------------------------------------- +# Main RAPID Manager Class +# ----------------------------------------------------------------------------- class RAPID(object): """ - This class is designed to prepare the rapid_namelist file and run + This class is designed to prepare the rapid_namelist file and run the RAPID program. There are also other utilities added. - Attributes: - rapid_executable_location(Optional[str]): Path to the RAPID executable location. - num_processors(Optional[int]): Number of procesors to use. Default is 1. Overridden if *use_all_processors* is True. - use_all_processors(Optional[bool]): If set to True, the RAPID program will use all available processors. Default is False. - cygwin_bin_location(Optional[str]): If using Windows, this is the path to the Cygwin 'bin' directory. - mpiexec_command(Optional[str]): This is the mpi execute commmand. Default is "mpiexec". - ksp_type(Optional[str]): This is the solver type. Default is "richardson". - **kwargs(Optional[str]): Keyword arguments matching the input parameters in the RAPID namelist. + Attributes + ---------- + rapid_executable_location: str, optional + Path to the RAPID executable location. + num_processors: int, optional + Number of procesors to use. Default is 1. + Overridden if *use_all_processors* is True. + use_all_processors: bool, optional + If set to True, the RAPID program will use all available processors. + Default is False. + cygwin_bin_location: str, optional + If using Windows, this is the path to the Cygwin 'bin' directory. + mpiexec_command: str, optional + This is the mpi execute commmand. Default is "mpiexec". + ksp_type: str, optional + This is the solver type. Default is "richardson". + **kwargs: str, optional + Keyword arguments matching the input parameters in the RAPID namelist. + Linux Example: - + .. code:: python - + from RAPIDpy import RAPID - - rapid_manager = RAPID(rapid_executable_location='~/work/rapid/run/rapid' - use_all_processors=True, - ZS_TauR=24*3600, #duration of routing procedure (time step of runoff data) - ZS_dtR=15*60, #internal routing time step - ZS_TauM=365*24*3600, #total simulation time - ZS_dtM=24*3600 #input time step - ) - + + rapid_manager = RAPID( + rapid_executable_location='~/work/rapid/run/rapid' + use_all_processors=True, + ZS_TauR=24 * 3600, + ZS_dtR=15 * 60, + ZS_TauM=365 * 24 * 3600, + ZS_dtM=24 * 3600 + ) + + Windows with Cygwin Example: - + .. code:: python from RAPIDpy import RAPID - - rapid_manager = RAPID(rapid_executable_location='C:/cygwin64/home/username/work/rapid/run/rapid', - cygwin_bin_location='C:/cygwin64/bin', - use_all_processors=True, - ZS_TauR=24*3600, #duration of routing procedure (time step of runoff data) - ZS_dtR=15*60, #internal routing time step - ZS_TauM=365*24*3600, #total simulation time - ZS_dtM=24*3600 #input time step - ) + + cygwin_exe = 'C:/cygwin64/home/username/work/rapid/run/rapid' + rapid_manager = RAPID( + rapid_executable_location=cygwin_exe, + cygwin_bin_location='C:/cygwin64/bin', + use_all_processors=True, + ZS_TauR=24 * 3600, + ZS_dtR=15 * 60, + ZS_TauM=365 * 24 * 3600, + ZS_dtM=24 * 3600 + ) + """ - def __init__(self, - rapid_executable_location="", - num_processors=1, - use_all_processors=False, + # pylint: disable=too-many-instance-attributes + def __init__(self, + rapid_executable_location="", + num_processors=1, + use_all_processors=False, cygwin_bin_location="", - mpiexec_command="mpiexec", + mpiexec_command="mpiexec", ksp_type="richardson", **kwargs): """ Initialize the class with variables given by the user """ - if os.name == "nt" and (not cygwin_bin_location or not os.path.exists(cygwin_bin_location))\ - and rapid_executable_location: - raise Exception("Required to have cygwin_bin_location set if using windows!") - + if os.name == "nt" and not \ + (cygwin_bin_location or os.path.exists(cygwin_bin_location))\ + and rapid_executable_location: + raise Exception("Required to have cygwin_bin_location set " + "if using windows!") + self._rapid_executable_location = rapid_executable_location self._cygwin_bin_location = cygwin_bin_location - self._cygwin_bash_exe_location = os.path.join(cygwin_bin_location, "bash.exe") + self._cygwin_bash_exe_location = \ + os.path.join(cygwin_bin_location, "bash.exe") self._mpiexec_command = mpiexec_command self._ksp_type = ksp_type - - #use all processors makes precedent over num_processors arg - if use_all_processors == True: + + # use all processors makes precedent over num_processors arg + if use_all_processors is True: self._num_processors = cpu_count() elif num_processors > cpu_count(): log("Num processors requested exceeded max. Set to max ...", @@ -102,121 +120,128 @@ def __init__(self, self._num_processors = cpu_count() else: self._num_processors = num_processors - - #******************************************************************************* - #Runtime options - #******************************************************************************* + + # --------------------------------------------------------------------- + # Runtime options + # --------------------------------------------------------------------- self.BS_opt_Qinit = False - #!.false. --> no read initial flow .true. --> read initial flow + # !.false. --> no read initial flow .true. --> read initial flow self.BS_opt_Qfinal = False - #!.false. --> no write final flow .true. --> write final flow + # !.false. --> no write final flow .true. --> write final flow self.BS_opt_dam = False - #!.false. --> no dam model used .true. --> dam model used + # !.false. --> no dam model used .true. --> dam model used self.BS_opt_for = False - #!.false. --> no forcing .true. --> forcing + # !.false. --> no forcing .true. --> forcing self.BS_opt_influence = False - #!.false. --> no output influence .true. --> output influence + # !.false. --> no output influence .true. --> output influence self.IS_opt_routing = 1 - #!1 --> matrix-based Muskingum 2 --> traditional Muskingum - #!3 --> Transbnd. matrix-based + # !1 --> matrix-based Muskingum 2 --> traditional Muskingum + # !3 --> Transbnd. matrix-based self.IS_opt_run = 1 - #!1 --> regular run 2 --> parameter optimization + # !1 --> regular run 2 --> parameter optimization self.IS_opt_phi = 1 - #!1 --> phi1 2 --> phi2 - - #******************************************************************************* - #Temporal information - #******************************************************************************* - #NOTE: ALL TIME IN SECONDS! - #ALWAYS USED - self.ZS_TauR = 0 #duration of routing procedure (time step of runoff data) - self.ZS_dtR = 0 #internal routing time step - #ONLY FOR REGULAR RUN - self.ZS_TauM = 0 #total simulation time - self.ZS_dtM = 0 #input time step - #ONLY FOR OPTIMIZATION RUN - self.ZS_TauO = 0 #total optimization time - self.ZS_dtO = 0 #observation time step - #FORCING MODE (replace some values with observations) - self.ZS_dtF = 0 #time step of forcing data - - #******************************************************************************* - #Domain in which input data is available - #******************************************************************************* - self.IS_riv_tot = 0 #number of river reaches in rapid connect file - self.rapid_connect_file = '' #path to rapid_connect file - self.IS_max_up = 0 #maximum number of ustream segments - self.Vlat_file = '' #path to runoff file - - #******************************************************************************* - #Domain in which model runs - #******************************************************************************* - self.IS_riv_bas = 0 #number of river reaches in subbasin - self.riv_bas_id_file = '' #subbasin reach id file - - #******************************************************************************* - #Initial instantaneous flow file - #******************************************************************************* - self.Qinit_file = '' #initial flow file (same order as rapid_connect) - - #******************************************************************************* - #Final instantaneous flow file - #******************************************************************************* - self.Qfinal_file = '' #path to output final flow file - - #******************************************************************************* - #Available dam data - #******************************************************************************* - self.IS_dam_tot = 0 #number of dams - self.dam_tot_id_file = '' #ids of dam location - - #******************************************************************************* - #Dam data used - #******************************************************************************* - self.IS_dam_use = 0 #number in subset of dam data to use - self.dam_use_id_file = '' #ids of subset of dams - - #******************************************************************************* - #Available forcing data - #******************************************************************************* + # !1 --> phi1 2 --> phi2 + # --------------------------------------------------------------------- + # Temporal information + # --------------------------------------------------------------------- + # NOTE: ALL TIME IN SECONDS! + # ALWAYS USED + self.ZS_TauR = 0 + # duration of routing procedure (time step of runoff data) + self.ZS_dtR = 0 + # internal routing time step + # ONLY FOR REGULAR RUN + self.ZS_TauM = 0 + # total simulation time + self.ZS_dtM = 0 + # input time step + # ONLY FOR OPTIMIZATION RUN + self.ZS_TauO = 0 + # total optimization time + self.ZS_dtO = 0 + # observation time step + # FORCING MODE (replace some values with observations) + self.ZS_dtF = 0 + # time step of forcing data + # --------------------------------------------------------------------- + # Domain in which input data is available + # --------------------------------------------------------------------- + self.IS_riv_tot = 0 + # number of river reaches in rapid connect file + self.rapid_connect_file = '' + # path to rapid_connect file + self.IS_max_up = 0 + # maximum number of ustream segments + self.Vlat_file = '' + # path to runoff file + # --------------------------------------------------------------------- + # Domain in which model runs + # --------------------------------------------------------------------- + self.IS_riv_bas = 0 + # number of river reaches in subbasin + self.riv_bas_id_file = '' + # subbasin reach id file + # --------------------------------------------------------------------- + # Initial instantaneous flow file + # --------------------------------------------------------------------- + self.Qinit_file = '' + # initial flow file (same order as rapid_connect) + # --------------------------------------------------------------------- + # Final instantaneous flow file + # --------------------------------------------------------------------- + self.Qfinal_file = '' + # path to output final flow file + # --------------------------------------------------------------------- + # Available dam data + # --------------------------------------------------------------------- + self.IS_dam_tot = 0 + # number of dams + self.dam_tot_id_file = '' + # ids of dam location + # --------------------------------------------------------------------- + # Dam data used + # --------------------------------------------------------------------- + self.IS_dam_use = 0 + # number in subset of dam data to use + self.dam_use_id_file = '' + # ids of subset of dams + # --------------------------------------------------------------------- + # Available forcing data + # --------------------------------------------------------------------- self.IS_for_tot = 0 self.for_tot_id_file = '' self.Qfor_file = '' - - #******************************************************************************* - #Forcing data used as model runs - #******************************************************************************* + # --------------------------------------------------------------------- + # Forcing data used as model runs + # --------------------------------------------------------------------- self.IS_for_use = 0 self.for_use_id_file = '' - - #******************************************************************************* - #File where max (min) of absolute values of b (QoutR) are stored - #******************************************************************************* + # --------------------------------------------------------------------- + # File where max (min) of absolute values of b (QoutR) are stored + # --------------------------------------------------------------------- self.babsmax_file = '' self.QoutRabsmin_file = '' self.QoutRabsmax_file = '' - - #******************************************************************************* - #Regular model run - #******************************************************************************* + # --------------------------------------------------------------------- + # Regular model run + # --------------------------------------------------------------------- self.k_file = '' self.x_file = '' self.Qout_file = '' - - #******************************************************************************* - #Optimization - #******************************************************************************* + # --------------------------------------------------------------------- + # Optimization + # --------------------------------------------------------------------- self.ZS_phifac = 0 - #------------------------------------------------------------------------------ - #Routing parameters - #------------------------------------------------------------------------------ + # --------------------------------------------------------------------- + # Routing parameters + # --------------------------------------------------------------------- self.kfac_file = '' - self.xfac_file = '' + self.xfac_file = '' self.ZS_knorm_init = 0 self.ZS_xnorm_init = 0 - #------------------------------------------------------------------------------ - #Gage observations - #------------------------------------------------------------------------------ + # --------------------------------------------------------------------- + # Gage observations + # --------------------------------------------------------------------- self.IS_obs_tot = 0 self.obs_tot_id_file = '' self.Qobs_file = '' @@ -224,11 +249,8 @@ def __init__(self, self.IS_obs_use = 0 self.obs_use_id_file = '' self.IS_strt_opt = 0 - - - + self.update_parameters(**kwargs) - def _get_cygwin_path(self, windows_path): """ @@ -236,13 +258,13 @@ def _get_cygwin_path(self, windows_path): """ conv_cmd = [os.path.join(self._cygwin_bin_location, "cygpath.exe"), "-u", windows_path] - process = Popen(conv_cmd, + process = Popen(conv_cmd, stdout=PIPE, stderr=PIPE, shell=False) out, err = process.communicate() if err: print(err) raise Exception(err) - + return out.strip() def _create_symlink_cygwin(self, initial_path, final_path): @@ -250,81 +272,92 @@ def _create_symlink_cygwin(self, initial_path, final_path): Use cygqin to generate symbolic link """ symlink_cmd = [os.path.join(self._cygwin_bin_location, "ln.exe"), - "-s", self._get_cygwin_path(initial_path), + "-s", self._get_cygwin_path(initial_path), self._get_cygwin_path(final_path)] - process = Popen(symlink_cmd, + process = Popen(symlink_cmd, stdout=PIPE, stderr=PIPE, shell=False) out, err = process.communicate() if err: print(err) raise Exception(err) - + return out.strip() def _dos2unix_cygwin(self, file_path): """ Use cygwin to convert file to unix format """ - dos2unix_cmd = [os.path.join(self._cygwin_bin_location, "dos2unix.exe"), - self._get_cygwin_path(file_path)] - process = Popen(dos2unix_cmd, + dos2unix_cmd = \ + [os.path.join(self._cygwin_bin_location, "dos2unix.exe"), + self._get_cygwin_path(file_path)] + process = Popen(dos2unix_cmd, stdout=PIPE, stderr=PIPE, shell=False) process.communicate() - + def update_parameters(self, **kwargs): """ - You can add or update rapid namelist parameters by using the name of + You can add or update rapid namelist parameters by using the name of the variable in the rapid namelist file (this is case sensitive). - Parameters: - **kwargs(Optional[str]): Keyword arguments matching the input parameters in the RAPID namelist. + Parameters + ---------- + **kwargs: str, optional + Keyword arguments matching the input parameters + in the RAPID namelist. + Example: - + .. code:: python from RAPIDpy import RAPID - + rapid_manager = RAPID( - #ADD PARAMETERS - ) - - rapid_manager.update_parameters(rapid_connect_file='../rapid-io/input/rapid_connect.csv', - Vlat_file='../rapid-io/input/m3_riv.nc', - riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', - k_file='../rapid-io/input/k.csv', - x_file='../rapid-io/input/x.csv', - Qout_file='../rapid-io/output/Qout.nc', - ) - - + rapid_executable_location='~/work/rapid/run/rapid' + use_all_processors=True, + ZS_TauR=24 * 3600, + ZS_dtR=15 * 60, + ZS_TauM=365 * 24 * 3600, + ZS_dtM=24 * 3600 + ) + + rapid_manager.update_parameters( + rapid_connect_file='../rapid-io/input/rapid_connect.csv', + Vlat_file='../rapid-io/input/m3_riv.nc', + riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', + k_file='../rapid-io/input/k.csv', + x_file='../rapid-io/input/x.csv', + Qout_file='../rapid-io/output/Qout.nc', + ) + """ - #set arguments based off of user input + # set arguments based off of user input for key, value in list(kwargs.items()): if key in dir(self) and not key.startswith('_'): setattr(self, key, value) else: log("Invalid RAPID parameter %s." % key, "ERROR") - + def update_reach_number_data(self): """ Update the reach number data for the namelist based on input files. - .. warning:: You need to make sure you set *rapid_connect_file* and *riv_bas_id_file* before running this function. + .. warning:: You need to make sure you set *rapid_connect_file* + and *riv_bas_id_file* before running this function. + Example: - + .. code:: python - + from RAPIDpy import RAPID - + rapid_manager = RAPID( - #ADD PARAMETERS - rapid_connect_file='../rapid-io/input/rapid_connect.csv', - riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', - ) - + rapid_connect_file='../rapid-io/input/rapid_connect.csv', + riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', + ) + rapid_manager.update_reach_number_data() @@ -335,37 +368,39 @@ def update_reach_number_data(self): from RAPIDpy import RAPID rapid_manager = RAPID( - #ADD PARAMETERS - rapid_connect_file='../rapid-io/input/rapid_connect.csv', - riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', - ) - - rapid_manager.update_parameters(Qfor_file=qfor_file, - for_tot_id_file=for_tot_id_file, - for_use_id_file=for_use_id_file, - ZS_dtF=3*60*60, - BS_opt_for=True) + rapid_connect_file='../rapid-io/input/rapid_connect.csv', + riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', + Qfor_file='../rapid-io/input/qfor_file.csv', + for_tot_id_file='../rapid-io/input/for_tot_id_file.csv', + for_use_id_file='../rapid-io/input/for_use_id_file.csv', + ZS_dtF=3*60*60, + BS_opt_for=True + ) rapid_manager.update_reach_number_data() """ - if not self.rapid_connect_file: - log("Missing rapid_connect_file. Please set before running this function ...", + log("Missing rapid_connect_file. " + "Please set before running this function ...", "ERROR") if not self.riv_bas_id_file: - log("Missing riv_bas_id_file. Please set before running this function ...", + log("Missing riv_bas_id_file. " + "Please set before running this function ...", "ERROR") - #get rapid connect info - rapid_connect_table = np.loadtxt(self.rapid_connect_file, ndmin=2, delimiter=",", dtype=int) + # get rapid connect info + rapid_connect_table = np.loadtxt(self.rapid_connect_file, + ndmin=2, delimiter=",", dtype=int) self.IS_riv_tot = int(rapid_connect_table.shape[0]) - self.IS_max_up = int(rapid_connect_table[:,2].max()) - - #get riv_bas_id info - riv_bas_id_table = np.loadtxt(self.riv_bas_id_file, ndmin=1, delimiter=",", usecols=(0,), dtype=int) + self.IS_max_up = int(rapid_connect_table[:, 2].max()) + + # get riv_bas_id info + riv_bas_id_table = np.loadtxt(self.riv_bas_id_file, + ndmin=1, delimiter=",", + usecols=(0,), dtype=int) self.IS_riv_bas = int(riv_bas_id_table.size) # add the forcing files @@ -375,9 +410,9 @@ def update_reach_number_data(self): "WARNING") else: # get riv_bas_id info - for_tot_id_table = np.loadtxt(self.for_tot_id_file, ndmin=1, - delimiter=",", usecols=(0,), - dtype=int) + for_tot_id_table = np.loadtxt(self.for_tot_id_file, + ndmin=1, delimiter=",", + usecols=(0,), dtype=int) self.IS_for_tot = int(for_tot_id_table.size) if not self.for_use_id_file: @@ -386,32 +421,31 @@ def update_reach_number_data(self): "WARNING") else: # get riv_bas_id info - for_use_id_table = np.loadtxt(self.for_use_id_file, ndmin=1, - delimiter=",", usecols=(0,), - dtype=int) + for_use_id_table = np.loadtxt(self.for_use_id_file, + ndmin=1, delimiter=",", + usecols=(0,), dtype=int) self.IS_for_use = int(for_use_id_table.size) - def update_simulation_runtime(self): """ Updates the total simulation duration from the m3 file (Vlat_file) and the time step (ZS_TauR). - - .. warning:: You need to set the m3 file (Vlat_file) and the + + .. warning:: You need to set the m3 file (Vlat_file) and the time step (ZS_TauR) before runnning this function. - + + Example: - + .. code:: python - + from RAPIDpy import RAPID - + rapid_manager = RAPID( - #ADD PARAMETERS - Vlat_file='../rapid-io/input/m3_riv.csv', - ZS_TauR=3*3600, - ) - + Vlat_file='../rapid-io/input/m3_riv.csv', + ZS_TauR=3*3600, + ) + rapid_manager.update_simulation_runtime() """ if not self.Vlat_file or not os.path.exists(self.Vlat_file): @@ -421,10 +455,10 @@ def update_simulation_runtime(self): if self.ZS_TauR <= 0: log("Missing routing time step ...", "ERROR") - + try: self.ZS_TauR = int(self.ZS_TauR) - except Exception: + except ValueError: log("Invalid routing time step: {0} ...".format(self.ZS_TauR), "ERROR") @@ -436,9 +470,11 @@ def generate_namelist_file(self, rapid_namelist_file): """ Generate rapid_namelist file. - Parameters: - rapid_namelist_file(str): Path of namelist file to generate from - parameters added to the RAPID manager. + Parameters + ---------- + rapid_namelist_file: str + Path of namelist file to generate from + parameters added to the RAPID manager. """ log("Generating RAPID namelist file ...", "INFO") @@ -446,33 +482,36 @@ def generate_namelist_file(self, rapid_namelist_file): os.remove(rapid_namelist_file) except OSError: pass - - with open(rapid_namelist_file,'w') as new_file: + + with open(rapid_namelist_file, 'w') as new_file: new_file.write('&NL_namelist\n') for attr, value in sorted(list(self.__dict__.items())): if not attr.startswith('_'): if attr.startswith('BS'): - new_file.write("%s = .%s.\n" % (attr, str(value).lower())) + new_file.write("{0} = .{1}.\n" + .format(attr, str(value).lower())) elif isinstance(value, int): new_file.write("%s = %s\n" % (attr, value)) else: if value: - #file path if os.name == "nt": - #if windows generate file with cygpath + # if windows generate file with cygpath value = self._get_cygwin_path(value) new_file.write("%s = \'%s\'\n" % (attr, value)) new_file.write("/\n") - - def update_namelist_file(self, rapid_namelist_file, new_namelist_file=None): + + def update_namelist_file(self, rapid_namelist_file, + new_namelist_file=None): """ Update existing namelist file with new parameters - Parameters: - rapid_namelist_file(str): Path of namelist file to use in the simulation. - It will be updated with any parameters added to - the RAPID manager. - new_namelist_file(Optional[str]): Path to output the updated namelist file. + Parameters + ---------- + rapid_namelist_file: str + Path of namelist file to use in the simulation. It will be + updated with any parameters added to the RAPID manager. + new_namelist_file: str, optional + Path to output the updated namelist file. """ if os.path.exists(rapid_namelist_file) and rapid_namelist_file: log("Adding missing inputs from RAPID input file ...", @@ -485,22 +524,20 @@ def update_namelist_file(self, rapid_namelist_file, new_namelist_file=None): line_split = line.split("=") attr = line_split[0].strip() value = None - if len(line_split)>1: - value = line_split[1].strip().replace("'", "").replace('"', "") - #convert integers to integers + if len(line_split) > 1: + value = line_split[1].strip()\ + .replace("'", "").replace('"', "") + # convert integers to integers try: value = int(value) - except Exception: + except ValueError: pass - #remove dots from beginning & end of value + # remove dots from beginning & end of value if attr.startswith('BS'): - value = value.replace(".", "") - elif attr in self._no_value_attr_list: - value = True - #add attribute if exists - if attr in dir(self) \ - and not attr.startswith('_'): - #set attribute if not set already + value = value.replace(".", "") + # add attribute if exists + if attr in dir(self) and not attr.startswith('_'): + # set attribute if not set already if not getattr(self, attr): setattr(self, attr, value) else: @@ -509,303 +546,328 @@ def update_namelist_file(self, rapid_namelist_file, new_namelist_file=None): if new_namelist_file is None: new_namelist_file = rapid_namelist_file - + self.generate_namelist_file(new_namelist_file) else: log("RAPID namelist file to update not found.", "ERROR") - - def make_output_CF_compliant(self, + + def make_output_cf_compliant(self, simulation_start_datetime, comid_lat_lon_z_file="", project_name="Normal RAPID project"): """ - This function converts the RAPID output to be CF compliant. This will require - a *comid_lat_lon_z.csv* file (See: :func:`~RAPIDpy.gis.centroid.FlowlineToPoint` - to generate the file). - - .. note:: It prepends time an initial flow to your simulation from the *qinit_file*. - If no qinit file is given, an initial value of zero is added. - + This function converts the RAPID output to be CF compliant. + This will require a *comid_lat_lon_z.csv* file + (See: :func:`~RAPIDpy.gis.centroid.FlowlineToPoint` to + generate the file). + + .. note:: It prepends time an initial flow to your simulation from the + *qinit_file*. If no qinit file is given, an initial value + of zero is added. + .. warning:: This will delete your original Qout file. - - Parameters: - simulation_start_datetime(datetime): Datetime object with the start date of the simulation. - comid_lat_lon_z_file(Optional[str]): Path to the *comid_lat_lon_z.csv* file. If none given, spatial information will be skipped. - project_name(Optional[str]): Name of project to add to the RAPID output file. + + Parameters + ---------- + simulation_start_datetime: datetime + Datetime object with the start date of the simulation. + comid_lat_lon_z_file: str, optional + Path to the *comid_lat_lon_z.csv* file. If none given, + spatial information will be skipped. + project_name: str, optional + Name of project to add to the RAPID output file. + Example: - + .. code:: python - from RAPIDpy import RAPID - rapid_manager = RAPID(rapid_executable_location='~/work/rapid/run/rapid' - use_all_processors=True, - ZS_TauR=24*3600, - ZS_dtR=15*60, - ZS_TauM=365*24*3600, - ZS_dtM=24*3600 - rapid_connect_file='../rapid-io/input/rapid_connect.csv', - Vlat_file='../rapid-io/input/m3_riv.nc', - riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', - k_file='../rapid-io/input/k.csv', - x_file='../rapid-io/input/x.csv', - Qout_file='../rapid-io/output/Qout.nc', - ) + rapid_manager = RAPID( + rapid_executable_location='~/work/rapid/run/rapid' + use_all_processors=True, + ZS_TauR=24*3600, + ZS_dtR=15*60, + ZS_TauM=365*24*3600, + ZS_dtM=24*3600 + rapid_connect_file='../rapid-io/input/rapid_connect.csv', + Vlat_file='../rapid-io/input/m3_riv.nc', + riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', + k_file='../rapid-io/input/k.csv', + x_file='../rapid-io/input/x.csv', + Qout_file='../rapid-io/output/Qout.nc' + ) rapid_manager.run() - - rapid_manager.make_output_CF_compliant(simulation_start_datetime=datetime.datetime(1980, 1, 1), - comid_lat_lon_z_file='../rapid-io/input/comid_lat_lon_z.csv', - project_name="ERA Interim Historical flows by US Army ERDC") + + rapid_manager.make_output_cf_compliant( + simulation_start_datetime=datetime.datetime(1980, 1, 1), + comid_lat_lon_z_file='../rapid-io/input/comid_lat_lon_z.csv', + project_name="ERA Interim Historical flows by US Army ERDC" + ) + """ - need_to_convert = True with RAPIDDataset(self.Qout_file) as qout_nc: - need_to_convert = not qout_nc.is_time_variable_valid() - if not need_to_convert: - log("RAPID Qout file already CF compliant ...", - "INFO") - else: - cv = ConvertRAPIDOutputToCF(rapid_output_file=self.Qout_file, #location of timeseries output file - start_datetime=simulation_start_datetime, #time of the start of the simulation time - time_step=self.ZS_TauR, #time step of simulation in seconds - qinit_file=self.Qinit_file, #RAPID qinit file - comid_lat_lon_z_file=comid_lat_lon_z_file, #path to comid_lat_lon_z file - rapid_connect_file=self.rapid_connect_file, #path to RAPID connect file - project_name=project_name, #name of your project - output_id_dim_name='rivid', #name of ID dimension in output file, typically COMID or FEATUREID - output_flow_var_name='Qout', #name of streamflow variable in output file, typically Qout or m3_riv - print_debug=False) - cv.convert() - + if qout_nc.is_time_variable_valid(): + log("RAPID Qout file already CF compliant ...", + "INFO") + return + + crv = ConvertRAPIDOutputToCF( + rapid_output_file=self.Qout_file, + start_datetime=simulation_start_datetime, + time_step=self.ZS_TauR, + qinit_file=self.Qinit_file, + comid_lat_lon_z_file=comid_lat_lon_z_file, + rapid_connect_file=self.rapid_connect_file, + project_name=project_name, + output_id_dim_name='rivid', + output_flow_var_name='Qout', + print_debug=False + ) + crv.convert() + def run(self, rapid_namelist_file=""): """ Run RAPID program and generate file based on inputs This will generate your rapid_namelist file and run RAPID from wherever you call this script (your working directory). - - Parameters: - rapid_namelist_file(Optional(str)): Path of namelist file to use in the simulation. - It will be updated with any parameters added to - the RAPID manager. - + + Parameters + ---------- + rapid_namelist_file: str, optional + Path of namelist file to use in the simulation. + It will be updated with any parameters added to the RAPID manager. + + Linux Example: - + .. code:: python - + from RAPIDpy import RAPID - - rapid_manager = RAPID(rapid_executable_location='~/work/rapid/src/rapid' - use_all_processors=True, - ) - - rapid_manager.update_parameters(rapid_connect_file='../rapid-io/input/rapid_connect.csv', - Vlat_file='../rapid-io/input/m3_riv.nc', - riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', - k_file='../rapid-io/input/k.csv', - x_file='../rapid-io/input/x.csv', - Qout_file='../rapid-io/output/Qout.nc', - ) - + + rapid_manager = RAPID( + rapid_executable_location='~/work/rapid/src/rapid' + use_all_processors=True, + ) + + rapid_manager.update_parameters( + rapid_connect_file='../rapid-io/input/rapid_connect.csv', + Vlat_file='../rapid-io/input/m3_riv.nc', + riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', + k_file='../rapid-io/input/k.csv', + x_file='../rapid-io/input/x.csv', + Qout_file='../rapid-io/output/Qout.nc', + ) + rapid_manager.update_reach_number_data() rapid_manager.update_simulation_runtime() - rapid_manager.run(rapid_namelist_file='../rapid-io/input/rapid_namelist') - + rapid_manager.run( + rapid_namelist_file='../rapid-io/input/rapid_namelist') + + Linux Reservoir Forcing Flows Example: - + .. code:: python - + from RAPIDpy import RAPID - rapid_manager = RAPID(rapid_executable_location='~/work/rapid/src/rapid', - num_processors=4, - IS_for_tot=4, - IS_for_use=4, - for_tot_id_file='../rapid-io/input/dam_id.csv', - for_use_id_file='../rapid-io/input/dam_id.csv', - Qfor_file='../rapid-io/input/qout_dams.csv', - ZS_dtF=86400, - BS_opt_for=True, - ) - - rapid_manager.run(rapid_namelist_file='../rapid-io/input/rapid_namelist_regular_run') - + rapid_manager = RAPID( + rapid_executable_location='~/work/rapid/src/rapid', + num_processors=4, + IS_for_tot=4, + IS_for_use=4, + for_tot_id_file='../rapid-io/input/dam_id.csv', + for_use_id_file='../rapid-io/input/dam_id.csv', + Qfor_file='../rapid-io/input/qout_dams.csv', + ZS_dtF=86400, + BS_opt_for=True, + ) + + rapid_manager.run( + rapid_namelist_file='../rapid-io/input/rapid_namelist_regular') + Windows with Cygwin Example: - + .. code:: python - + from RAPIDpy import RAPID from os import path - - rapid_manager = RAPID(rapid_executable_location='C:/cygwin64/home/username/work/rapid/run/rapid', - cygwin_bin_location='C:/cygwin64/bin', - use_all_processors=True, - ZS_TauR=24*3600, - ZS_dtR=15*60, - ZS_TauM=365*24*3600, - ZS_dtM=24*3600 - ) - - rapid_input_folder = 'C:/cygwin64/home/username/work/rapid-io/input' - rapid_output_folder = 'C:/cygwin64/home/username/work/rapid-io/output' - rapid_manager.update_parameters(rapid_connect_file=path.join(rapid_input_folder, 'rapid_connect.csv'), - Vlat_file=path.join(rapid_input_folder, 'm3_riv.nc'), - riv_bas_id_file=path.join(rapid_input_folder, 'riv_bas_id.csv'), - k_file=path.join(rapid_input_folder, 'k.csv'), - x_file=path.join(rapid_input_folder, 'x.csv'), - Qout_file=path.join(rapid_output_folder, 'Qout.nc'), - ) - + + rapid_exe_path = 'C:/cygwin64/home/username/rapid/run/rapid', + rapid_manager = RAPID( + rapid_executable_location=rapid_exe_path, + cygwin_bin_location='C:/cygwin64/bin', + use_all_processors=True, + ZS_TauR=24*3600, + ZS_dtR=15*60, + ZS_TauM=365*24*3600, + ZS_dtM=24*3600 + ) + + rapid_input = 'C:/cygwin64/home/username/rapid-io/input' + rapid_output = 'C:/cygwin64/home/username/rapid-io/output' + rapid_manager.update_parameters( + rapid_connect_file=path.join(rapid_input, 'rapid_connect.csv'), + Vlat_file=path.join(rapid_input, 'm3_riv.nc'), + riv_bas_id_file=path.join(rapid_input, 'riv_bas_id.csv'), + k_file=path.join(rapid_input, 'k.csv'), + x_file=path.join(rapid_input, 'x.csv'), + Qout_file=path.join(rapid_output, 'Qout.nc'), + ) + rapid_manager.update_reach_number_data() rapid_manager.update_simulation_runtime() rapid_manager.run() """ - if not self._rapid_executable_location or not self._rapid_executable_location: - log("Missing rapid_executable_location. Please set before running this function ...", + if not self._rapid_executable_location: + log("Missing rapid_executable_location. " + "Please set before running this function ...", "ERROR") time_start = datetime.datetime.utcnow() temp_rapid_namelist_file = os.path.join(os.getcwd(), "rapid_namelist") - + if not rapid_namelist_file or not os.path.exists(rapid_namelist_file): - #generate input file if it does not exist + # generate input file if it does not exist self.generate_namelist_file(temp_rapid_namelist_file) else: - #update existing file - self.update_namelist_file(rapid_namelist_file, temp_rapid_namelist_file) + # update existing file + self.update_namelist_file(rapid_namelist_file, + temp_rapid_namelist_file) - local_rapid_executable_location = os.path.join(os.path.dirname(temp_rapid_namelist_file), "rapid_exe_symlink") + local_rapid_executable_location = \ + os.path.join(os.path.dirname(temp_rapid_namelist_file), + "rapid_exe_symlink") def rapid_cleanup(*args): """ Cleans up the rapid files generated by the process """ for arg in args: - #remove files + # remove files try: os.remove(arg) except OSError: pass - #create link to RAPID if needed + # create link to RAPID if needed temp_link_to_rapid = "" - if not self._rapid_executable_location == local_rapid_executable_location: + # pylint: disable=no-member + if self._rapid_executable_location != \ + local_rapid_executable_location: rapid_cleanup(local_rapid_executable_location) if os.name == "nt": - self._create_symlink_cygwin(self._rapid_executable_location, + self._create_symlink_cygwin(self._rapid_executable_location, local_rapid_executable_location) else: - os.symlink(self._rapid_executable_location, local_rapid_executable_location) + os.symlink(self._rapid_executable_location, + local_rapid_executable_location) temp_link_to_rapid = local_rapid_executable_location - - #run RAPID + # run RAPID log("Running RAPID ...", "INFO") - run_rapid_script = "" if os.name == "nt": - run_rapid_script = os.path.join(os.getcwd(), "run_rapid.sh") - with open(run_rapid_script, "w") as run_rapid: - run_rapid.write("#!/bin/sh\n") - run_rapid.write("cd {}\n".format(self._get_cygwin_path(os.getcwd()))) - if self._num_processors > 1: - run_rapid.write("{0} -np {1} {2} -ksp_type {3}\n".format(self._mpiexec_command, - self._num_processors, - self._get_cygwin_path(local_rapid_executable_location), - self._ksp_type)) - else: - #htcondor will not allow mpiexec for single processor jobs - #this was added for that purpose - run_rapid.write("{0} -ksp_type {1}\n".format(self._get_cygwin_path(local_rapid_executable_location), - self._ksp_type)) - - - self._dos2unix_cygwin(run_rapid_script) - run_rapid_command = [self._cygwin_bash_exe_location, "-l", "-c", - self._get_cygwin_path(run_rapid_script)] + local_rapid_executable_location = \ + self._get_cygwin_path(local_rapid_executable_location) - else: - #htcondor will not allow mpiexec for single processor jobs - #this was added for that purpose - run_rapid_command = [local_rapid_executable_location, - "-ksp_type", self._ksp_type] - - if self._num_processors > 1: - run_rapid_command = [self._mpiexec_command, "-n", str(self._num_processors), - local_rapid_executable_location, - "-ksp_type", self._ksp_type] - - process = Popen(run_rapid_command, + # htcondor will not allow mpiexec for single processor jobs + # this was added for that purpose + run_rapid_command = [local_rapid_executable_location, + "-ksp_type", self._ksp_type] + + if self._num_processors > 1: + run_rapid_command = [self._mpiexec_command, + "-n", str(self._num_processors)] \ + + run_rapid_command + + process = Popen(run_rapid_command, stdout=PIPE, stderr=PIPE, shell=False) out, err = process.communicate() if err: - rapid_cleanup(temp_link_to_rapid, temp_rapid_namelist_file, run_rapid_script) + rapid_cleanup(temp_link_to_rapid, temp_rapid_namelist_file) raise Exception(err) else: log('RAPID output:', "INFO") for line in out.split(b'\n'): print(line) - rapid_cleanup(temp_link_to_rapid, temp_rapid_namelist_file, run_rapid_script) + rapid_cleanup(temp_link_to_rapid, temp_rapid_namelist_file) log("Time to run RAPID: %s" % (datetime.datetime.utcnow()-time_start), "INFO") - def generate_qinit_from_past_qout(self, qinit_file, time_index=-1, out_datetime=None): + def generate_qinit_from_past_qout(self, qinit_file, time_index=-1, + out_datetime=None): """ Generate qinit from a RAPID qout file - Parameters: - qinit_file(str): Path to output qinit_file. - time_index(Optional[int]): Index of simulation to generate initial flow file. Default is the end. - out_datetime(Optional[datetime]): Datetime object containing time of initialization. + Parameters + ---------- + qinit_file: str + Path to output qinit_file. + time_index: int, optional + Index of simulation to generate initial flow file. + Default is the last index. + out_datetime: :obj:`datetime.datetime`, optional + Datetime object containing time of initialization. + Example: - + .. code:: python - + from RAPIDpy import RAPID - - rapid_manager = RAPID(Qout_file='/output_mississippi-nfie/Qout_k2v1_2005to2009.nc', - rapid_connect_file='/input_mississippi_nfie/rapid_connect_ECMWF.csv' - ) - - rapid_manager.generate_qinit_from_past_qout(qinit_file='/input_mississippi_nfie/Qinit_2008_flood.csv', - time_index=10162) - """ + rapid_manager = RAPID( + Qout_file='/output_mississippi-nfie/Qout_k2v1_2005to2009.nc', + rapid_connect_file='/input_mississippi_nfie/rapid_connect.csv' + ) + + rapid_manager.generate_qinit_from_past_qout( + qinit_file='/input_mississippi_nfie/Qinit_2008_flood.csv', + time_index=10162 + ) + + """ if not self.Qout_file or not os.path.exists(self.Qout_file): - log('Missing Qout_file. Please set before running this function ...', + log('Missing Qout_file. ' + 'Please set before running this function ...', "ERROR") if not self.rapid_connect_file or not self.rapid_connect_file: - log('Missing rapid_connect file. Please set before running this function ...', + log('Missing rapid_connect file. ' + 'Please set before running this function ...', "ERROR") - + log("Generating qinit file from qout file ...", "INFO") - #get information from dataset + # get information from dataset with xarray.open_dataset(self.Qout_file) as qds: rivid_array = qds.rivid.values if out_datetime is None: streamflow_values = qds.isel(time=time_index).Qout.values else: streamflow_values = qds.sel(time=str(out_datetime)).Qout.values - + log("Reordering data ...", "INFO") - stream_id_array = np.loadtxt(self.rapid_connect_file, ndmin=1, delimiter=",", usecols=(0,), dtype=int) + stream_id_array = np.loadtxt(self.rapid_connect_file, + ndmin=1, delimiter=",", + usecols=(0,), dtype=int) init_flows_array = np.zeros(stream_id_array.size) for riv_bas_index, riv_bas_id in enumerate(rivid_array): try: - data_index = np.where(stream_id_array==riv_bas_id)[0][0] + data_index = np.where(stream_id_array == riv_bas_id)[0][0] init_flows_array[data_index] = streamflow_values[riv_bas_index] - except Exception: - log('riv bas id {0} not found in connectivity list.'.format(riv_bas_id), + except IndexError: + log('riv bas id {0} not found in connectivity list.' + .format(riv_bas_id), "WARNING") - + log("Writing to file ...", "INFO") with open_csv(qinit_file, 'w') as qinit_out: @@ -817,89 +879,104 @@ def generate_qinit_from_past_qout(self, qinit_file, time_index=-1, out_datetime= log("Initialization Complete!", "INFO") - def generate_seasonal_intitialization(self, - qinit_file, - datetime_start_initialization=datetime.datetime.utcnow()): - """ - This creates a seasonal qinit file from a RAPID qout file. This + def generate_seasonal_intitialization( + self, + qinit_file, + datetime_start_initialization=datetime.datetime.utcnow() + ): + """This creates a seasonal qinit file from a RAPID qout file. This requires a simulation Qout file with a longer time period of record and to be CF compliant. It takes the average of the current date +- 3 days - and goes back as far as possible. - - Parameters: - qinit_file(str): Path to output qinit_file. - datetime_start_initialization(Optional[datetime]): Datetime object with date of simulation to - go back through the years and get a running average - to generate streamflow initialization. Default is utcnow. + and goes back as far as possible. + + Parameters + ---------- + qinit_file: str + Path to output qinit_file. + datetime_start_initialization: :obj:`datetime.datetime`, optional + Datetime object with date of simulation to go back through the + years and get a running average to generate streamflow + initialization. Default is utcnow. - This example shows how to use it: - + Example: + .. code:: python - + from RAPIDpy.rapid import RAPID - - rapid_manager = RAPID(Qout_file='/output_mississippi-nfie/Qout_2000to2015.nc', - rapid_connect_file='/input_mississippi_nfie/rapid_connect_ECMWF.csv' - ) - - rapid_manager.generate_seasonal_intitialization(qinit_file='/input_mississippi_nfie/Qinit_seasonal_avg_jan_1.csv') + + rapid_manager = RAPID( + Qout_file='/output_mississippi-nfie/Qout_2000to2015.nc', + rapid_connect_file='/input_mississippi_nfie/rapid_connect.csv' + ) + + rapid_manager.generate_seasonal_intitialization( + qinit_file='/input_mississippi_nfie/Qinit_seasonal_avg.csv' + ) """ - #get information from datasets if not self.Qout_file or not os.path.exists(self.Qout_file): - log("Missing Qout_file. Please set before running this function ...", + log("Missing Qout_file. " + "Please set before running this function ...", "ERROR") if not self.rapid_connect_file or not self.rapid_connect_file: - log("Missing rapid_connect file. Please set before running this function ...", + log("Missing rapid_connect file. " + "Please set before running this function ...", "ERROR") - + day_of_year = datetime_start_initialization.timetuple().tm_yday min_day = day_of_year - 3 max_day = day_of_year + 3 with RAPIDDataset(self.Qout_file) as qout_hist_nc: if not qout_hist_nc.is_time_variable_valid(): - log("File must be CF 1.6 compliant with valid time variable ...", + log("File must be CF 1.6 compliant " + "with valid time variable ...", "ERROR") log("Generating seasonal average qinit file from qout file ...", "INFO") - + log("Determining dates with streamflows of interest ...", "INFO") - + time_indices = [] - for idx, t in enumerate(qout_hist_nc.get_time_array()): - var_time = gmtime(t) + for idx, ttt in enumerate(qout_hist_nc.get_time_array()): + var_time = gmtime(ttt) compare_yday = var_time.tm_yday - #move day back one past because of leap year adds - #a day after feb 29 (day 60) + # move day back one past because of leap year adds + # a day after feb 29 (day 60) if isleap(var_time.tm_year) and compare_yday > 60: compare_yday -= 1 - #check if date within range of season - if compare_yday >= min_day and compare_yday < max_day: + # check if date within range of season + if min_day <= compare_yday < max_day: time_indices.append(idx) if not time_indices: log("No time steps found within range ...", "ERROR") - + log("Extracting data ...", "INFO") - - streamflow_array = qout_hist_nc.get_qout(time_index_array=time_indices) + + streamflow_array = \ + qout_hist_nc.get_qout(time_index_array=time_indices) log("Reordering data...", "INFO") - stream_id_array = np.loadtxt(self.rapid_connect_file, ndmin=1, delimiter=",", usecols=(0,), dtype=int) + stream_id_array = np.loadtxt(self.rapid_connect_file, + ndmin=1, delimiter=",", + usecols=(0,), dtype=int) init_flows_array = np.zeros(stream_id_array.size) - for riv_bas_index, riv_bas_id in enumerate(qout_hist_nc.get_river_id_array()): + for riv_bas_index, riv_bas_id in enumerate( + qout_hist_nc.get_river_id_array()): try: - data_index = np.where(stream_id_array==riv_bas_id)[0][0] - init_flows_array[data_index] = np.mean(streamflow_array[riv_bas_index]) - except Exception: - log('riv_bas_id {0} not found in connectivity list.'.format(riv_bas_id), + data_index = np.where(stream_id_array == riv_bas_id)[0][0] + init_flows_array[data_index] = \ + np.mean(streamflow_array[riv_bas_index]) + except IndexError: + log('riv_bas_id {0} not found in connectivity list.' + .format(riv_bas_id), "WARNING") log("Writing to file ...", @@ -911,105 +988,121 @@ def generate_seasonal_intitialization(self, log("Initialization Complete!", "INFO") - def generate_usgs_avg_daily_flows_opt(self, + def generate_usgs_avg_daily_flows_opt(self, reach_id_gage_id_file, - start_datetime, + start_datetime, end_datetime, - out_streamflow_file, + out_streamflow_file, out_stream_id_file): """ - Generate daily streamflow file and stream id file required for calibration - or for substituting flows based on USGS gage ids associated with stream ids. - - Parameters: - reach_id_gage_id_file(str): Path to reach_id_gage_id file. - start_datetime(datetime): A datetime object with the start date to download data. - end_datetime(datetime): A datetime object with the end date to download data. - out_streamflow_file(str): The path to output the streamflow file for RAPID. - out_stream_id_file(str): The path to output the stream ID file associated with the streamflow file for RAPID. + Generate daily streamflow file and stream id file required for + calibration or for substituting flows based on USGS gage ids + associated with stream ids. + + Parameters + ---------- + reach_id_gage_id_file: str + Path to reach_id_gage_id file. + start_datetime: datetime + A datetime object with the start date to download data. + end_datetime: datetime + A datetime object with the end date to download data. + out_streamflow_file: str + The path to output the streamflow file for RAPID. + out_stream_id_file: str + The path to output the stream ID file associated with the + streamflow file for RAPID. + Example *reach_id_gage_id_file*:: - + COMID, USGS_GAGE_ID 2000, 503944 ... - .. warning:: Overuse will get you blocked from downloading data from USGS. - + .. warning:: Overuse will get you blocked from downloading data from + USGS. + .. warning:: This code does not clean the data in any way. Thus, you - are likely to run into issues if you simply use the raw data. - + are likely to run into issues if you simply use the raw + data. + .. warning:: The code skips gages that do not have data for the entire time period. - + + Simple Example: - + .. code:: python - + import datetime from os.path import join from RAPIDpy import RAPID - + main_path = "/home/username/data" rapid_manager = RAPID() - rapid_manager.generate_usgs_avg_daily_flows_opt(reach_id_gage_id_file=join(main_path,"mississippi_usgsgage_id_comid.csv"), - start_datetime=datetime.datetime(2000,1,1), - end_datetime=datetime.datetime(2014,12,31), - out_streamflow_file=join(main_path,"streamflow_2000_2014.csv"), - out_stream_id_file=join(main_path,"streamid_2000_2014.csv")) - - + rapid_manager.generate_usgs_avg_daily_flows_opt( + reach_id_gage_id_file=join(main_path, "usgsgage_id_comid.csv"), + start_datetime=datetime.datetime(2000,1,1), + end_datetime=datetime.datetime(2014,12,31), + out_streamflow_file=join(main_path,"streamflow_2000_2014.csv"), + out_stream_id_file=join(main_path,"streamid_2000_2014.csv") + ) + + Complex Example: - + .. code:: python - + import datetime from os.path import join from RAPIDpy import RAPID - + main_path = "/home/username/data" - - rapid_manager = RAPID(rapid_executable_location='~/work/rapid/run/rapid' - use_all_processors=True, - ZS_TauR=24*3600, - ZS_dtR=15*60, - ZS_TauM=365*24*3600, - ZS_dtM=24*3600 - ) - - rapid_manager.update_parameters(rapid_connect_file='../rapid-io/input/rapid_connect.csv', - Vlat_file='../rapid-io/input/m3_riv.nc', - riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', - k_file='../rapid-io/input/k.csv', - x_file='../rapid-io/input/x.csv', - Qout_file='../rapid-io/output/Qout.nc', - ) - + + rapid_manager = RAPID( + rapid_executable_location='~/work/rapid/run/rapid' + use_all_processors=True, + ZS_TauR=24*3600, + ZS_dtR=15*60, + ZS_TauM=365*24*3600, + ZS_dtM=24*3600 + ) + + rapid_manager.update_parameters( + rapid_connect_file='../rapid-io/input/rapid_connect.csv', + Vlat_file='../rapid-io/input/m3_riv.nc', + riv_bas_id_file='../rapid-io/input/riv_bas_id.csv', + k_file='../rapid-io/input/k.csv', + x_file='../rapid-io/input/x.csv', + Qout_file='../rapid-io/output/Qout.nc', + ) + rapid_manager.update_reach_number_data() rapid_manager.update_simulation_runtime() - rapid_manager.generate_usgs_avg_daily_flows_opt(reach_id_gage_id_file=join(main_path,"mississippi_usgsgage_id_comid.csv"), - start_datetime=datetime.datetime(2000,1,1), - end_datetime=datetime.datetime(2014,12,31), - out_streamflow_file=join(main_path,"streamflow_2000_2014.csv"), - out_stream_id_file=join(main_path,"streamid_2000_2014.csv")) + rapid_manager.generate_usgs_avg_daily_flows_opt( + reach_id_gage_id_file=join(main_path, "usgsgage_id_comid.csv"), + start_datetime=datetime.datetime(2000,1,1), + end_datetime=datetime.datetime(2014,12,31), + out_streamflow_file=join(main_path,"streamflow_2000_2014.csv"), + out_stream_id_file=join(main_path,"streamid_2000_2014.csv") + ) rapid_manager.run() """ - log("Generating avg streamflow file and stream id file required for calibration ...", + log("Generating avg streamflow file and stream id file " + "required for calibration ...", "INFO") - log("Generating avg streamflow file and stream id file required for calibration ...", + log("Generating avg streamflow file and stream id file " + "required for calibration ...", "INFO") reach_id_gage_id_list = csv_to_list(reach_id_gage_id_file) -# USGS not returning tzinfo anymore, so removed tzinfo operations -# if start_datetime.tzinfo is None or start_datetime.tzinfo.utcoffset(start_datetime) is None: -# start_datetime = start_datetime.replace(tzinfo=utc) -# if end_datetime.tzinfo is None or end_datetime.tzinfo.utcoffset(end_datetime) is None: -# end_datetime = end_datetime.replace(tzinfo=utc) gage_data_matrix = [] valid_comid_list = [] - - #add extra day as it includes the start date (e.g. 7-5 is 2 days, but have data for 5,6,7, so +1) + + # add extra day as it includes the start date + # (e.g. 7-5 is 2 days, but have data for 5,6,7, so +1) num_days_needed = (end_datetime-start_datetime).days + 1 gage_id_list = [] @@ -1018,89 +1111,96 @@ def generate_usgs_avg_daily_flows_opt(self, if len(row[1]) == 7: station_id = '0' + row[1] gage_id_list.append(station_id) - + num_gage_id_list = np.array(gage_id_list, dtype=np.int32) - log("Querying Server for Data ..." , + log("Querying Server for Data ...", "INFO") - + query_params = { 'format': 'json', 'sites': ",".join(gage_id_list), -# USGS not returning tzinfo anymore, so removed tzinfo operations -# 'startDT': start_datetime.astimezone(tzoffset(None, -18000)).strftime("%Y-%m-%d"), -# 'endDT': end_datetime.astimezone(tzoffset(None, -18000)).strftime("%Y-%m-%d"), 'startDT': start_datetime.strftime("%Y-%m-%d"), 'endDT': end_datetime.strftime("%Y-%m-%d"), - 'parameterCd': '00060', #streamflow - 'statCd': '00003' #average + 'parameterCd': '00060', # streamflow + 'statCd': '00003' # average } - response = get("http://waterservices.usgs.gov/nwis/dv", params=query_params) - if response.ok: - data_valid = True - try: - requested_data = response.json()['value']['timeSeries'] - except IndexError: - data_valid = False - pass - - if data_valid: - for time_series in enumerate(requested_data): - usgs_station_full_name = time_series[1]['name'] - usgs_station_id = usgs_station_full_name.split(":")[1] - gage_data = [] - for time_step in time_series[1]['values'][0]['value']: - local_datetime = parse(time_step['dateTime']) - if local_datetime > end_datetime: - break - - if local_datetime >= start_datetime: - if not time_step['value']: - log("MISSING DATA for USGS Station {0} {1} {2}".format(station_id, - local_datetime, - time_step['value']), - "WARNING") - gage_data.append(float(time_step['value'])/35.3146667) - - try: - #get where streamids assocated with USGS sation id is - streamid_index = np.where(num_gage_id_list==int(float(usgs_station_id)))[0][0]+1 - except Exception: - log("USGS Station {0} not found in list ...".format(usgs_station_id), - "WARNING") - raise - - if len(gage_data) == num_days_needed: - gage_data_matrix.append(gage_data) - valid_comid_list.append(reach_id_gage_id_list[streamid_index][0]) - else: - log("StreamID {0} USGS Station {1} MISSING {2} DATA VALUES".format(reach_id_gage_id_list[streamid_index][0], - usgs_station_id, - num_days_needed-len(gage_data)), - "WARNING") + response = get("http://waterservices.usgs.gov/nwis/dv", + params=query_params) - if gage_data_matrix and valid_comid_list: - log("Writing Output ...", - "INFO") - np_array = np.array(gage_data_matrix).transpose() - with open_csv(out_streamflow_file, 'w') as gage_data: - wf = csvwriter(gage_data) - for row in np_array: - wf.writerow(row) - - with open_csv(out_stream_id_file, 'w') as comid_data: - cf = csvwriter(comid_data) - for row in valid_comid_list: - cf.writerow([int(float(row))]) - - #set parameters for RAPID run - self.IS_obs_tot = len(valid_comid_list) - self.obs_tot_id_file = out_stream_id_file - self.Qobs_file = out_streamflow_file - self.IS_obs_use = len(valid_comid_list) - self.obs_use_id_file = out_stream_id_file - else: - log("No valid data returned ...", - "WARNING") + if not response.ok: + log("USGS query error ...", + "WARNING") + return + + requested_data = None + try: + requested_data = response.json()['value']['timeSeries'] + except IndexError: + pass + + if requested_data is not None: + for time_series in enumerate(requested_data): + usgs_station_full_name = time_series[1]['name'] + usgs_station_id = usgs_station_full_name.split(":")[1] + gage_data = [] + for time_step in time_series[1]['values'][0]['value']: + local_datetime = parse(time_step['dateTime']) + if local_datetime > end_datetime: + break + + if local_datetime >= start_datetime: + if not time_step['value']: + log("MISSING DATA for USGS Station {0} {1} {2}" + .format(usgs_station_id, + local_datetime, + time_step['value']), + "WARNING") + gage_data.append( + float(time_step['value']) / 35.3146667) + + try: + # get where streamids associated with USGS station ID + streamid_index = \ + np.where(num_gage_id_list == + int(float(usgs_station_id)))[0][0]+1 + except (IndexError, ValueError): + log("USGS Station {0} not found in list ..." + .format(usgs_station_id), + "WARNING") + raise + + if len(gage_data) == num_days_needed: + gage_data_matrix.append(gage_data) + valid_comid_list.append( + reach_id_gage_id_list[streamid_index][0]) + else: + log("StreamID {0} USGS Station {1} MISSING {2} " + "DATA VALUES".format( + reach_id_gage_id_list[streamid_index][0], + usgs_station_id, + num_days_needed-len(gage_data)), + "WARNING") + + if gage_data_matrix and valid_comid_list: + log("Writing Output ...", + "INFO") + np_array = np.array(gage_data_matrix).transpose() + with open_csv(out_streamflow_file, 'w') as gage_data: + wgd = csvwriter(gage_data) + for row in np_array: + wgd.writerow(row) + + with open_csv(out_stream_id_file, 'w') as comid_data: + wcd = csvwriter(comid_data) + for row in valid_comid_list: + wcd.writerow([int(float(row))]) + + # set parameters for RAPID run + self.IS_obs_tot = len(valid_comid_list) + self.obs_tot_id_file = out_stream_id_file + self.Qobs_file = out_streamflow_file + self.IS_obs_use = len(valid_comid_list) + self.obs_use_id_file = out_stream_id_file else: - log("USGS query error ...", - "WARNING") \ No newline at end of file + log("No valid data returned ...", + "WARNING") diff --git a/RAPIDpy/utilities.py b/RAPIDpy/utilities.py index 116d782..6b9b162 100644 --- a/RAPIDpy/utilities.py +++ b/RAPIDpy/utilities.py @@ -1,28 +1,29 @@ # -*- coding: utf-8 -*- -# -# utilities.py -# RAPIDpy -# -# Created by Alan D. Snow. -# Copyright © 2016 Alan D Snow. All rights reserved. -# +""" + utilities.py + RAPIDpy + Created by Alan D. Snow, 2016. + License BSD-3-Clause +""" import os -from past.builtins import xrange import re +from past.builtins import xrange # pylint: disable=redefined-builtin -# ---------------------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- # HELPER FUNCTIONS -# ---------------------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- def case_insensitive_file_search(directory, pattern): """ Looks for file with pattern with case insensitive search """ try: - return os.path.join(directory, - [filename for filename in os.listdir(directory) \ - if re.search(pattern, filename, re.IGNORECASE)][0]) + return os.path.join( + directory, + [filename for filename in os.listdir(directory) + if re.search(pattern, filename, re.IGNORECASE)][0]) except IndexError: print("{0} not found".format(pattern)) raise @@ -33,7 +34,7 @@ def partition(lst, n): Divide list into n equal parts """ q, r = divmod(len(lst), n) - indices = [q*i + min(i,r) for i in xrange(n+1)] + indices = [q*i + min(i, r) for i in xrange(n+1)] return [lst[indices[i]:indices[i+1]] for i in xrange(n)], \ [list(xrange(indices[i], indices[i+1])) for i in xrange(n)] @@ -49,5 +50,3 @@ def get_valid_directory_list(input_directory): else: print("{0} not a directory. Skipping ...".format(directory)) return valid_input_directories - - diff --git a/appveyor.yml b/appveyor.yml index 3d91b8f..b8f1e81 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -16,7 +16,7 @@ environment: install: - 'SET PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%' - #create environment in conda + # create environment in conda - conda config --set always_yes yes - conda update -q conda python - "conda create --name rapid python=%PYTHON_VERSION%" @@ -24,28 +24,24 @@ install: # Install required python packages - conda config --add channels conda-forge - conda install --yes cmake gdal future netcdf4 numpy pandas pangaea pyproj pytz requests rtree scipy shapely - - conda install --yes pytest-cov - - pip install coveralls - deactivate rapid #install cygwin - - '%CYG_ROOT%\setup-x86_64.exe -qnNdO -R "%CYG_ROOT%" -s "%CYG_MIRROR%" -l "%CYG_CACHE%" -P dos2unix,gcc-core,gcc-g++,gcc-fortran,gdb,git,make,time,wget,gdal,libgdal-devel,openmpi > NULL' + - '%CYG_ROOT%\setup-x86_64.exe -qnNdO -R "%CYG_ROOT%" -s "%CYG_MIRROR%" -l "%CYG_CACHE%" -P dos2unix,gcc-core,gcc-g++,gcc-fortran,gdb,git,make,time,wget,gdal,libgdal-devel > NULL' #Creating directory for installation of libraries used by RAPID #- 'cd %APPVEYOR_BUILD_FOLDER%' #- 'SET INSTALLZ_DIR=%APPVEYOR_BUILD_FOLDER%\..\installz' #- 'mkdir %INSTALLZ_DIR%' #- 'cd %INSTALLZ_DIR%' #Installing RAPID Prereqs - #- 'appveyor DownloadFile "https://raw.githubusercontent.com/snowman2/rapid/master/rapid_install_prereqs.sh"' - #- '%CYG_ROOT%/bin/bash -lc "cd $(cygpath "$INSTALLZ_DIR"); chmod +x rapid_install_prereqs.sh"' - #- '%CYG_ROOT%/bin/bash -lc "cd $(cygpath "$INSTALLZ_DIR"); ./rapid_install_prereqs.sh -i=$(cygpath "$INSTALLZ_DIR") -b"' + #- '%CYG_ROOT%/bin/bash -lc "cd $(cygpath "$APPVEYOR_BUILD_FOLDER\.."); git clone https://github.com/c-h-david/rapid.git; cd rapid; bash rapid_install_prereqs.sh -i=$(cygpath "$INSTALLZ_DIR")' #Installing RAPID - #- '%CYG_ROOT%/bin/bash -lc "cd $(cygpath "$APPVEYOR_BUILD_FOLDER\..); git clone https://github.com/c-h-david/rapid.git; cd rapid; source ./rapid_specify_varpath.sh $(cygpath "$INSTALLZ_DIR")"; cd src; make rapid"' + #- '%CYG_ROOT%/bin/bash -lc "cd $(cygpath "$APPVEYOR_BUILD_FOLDER\..\rapid"); source ./rapid_specify_varpath.sh $(cygpath "$INSTALLZ_DIR")"; cd src; make rapid"' #Installing TauDEM #- '%CYG_ROOT%/bin/bash -lc "cd $(cygpath "$APPVEYOR_BUILD_FOLDER\..); git clone https://github.com/dtarb/TauDEM.git; cd TauDEM/src; make"' #Installing RAPIDpy - activate rapid - 'cd %APPVEYOR_BUILD_FOLDER%' - - python setup.py install + - pip install -e .[tests] build: false diff --git a/docs/conf.py b/docs/conf.py index 6bfe00f..94e6b83 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -50,6 +50,9 @@ def __getattr__(cls, name): 'sphinxcontrib.napoleon', ] +# Napoleon settings +napoleon_numpy_docstring = True + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -76,9 +79,9 @@ def __getattr__(cls, name): # built documents. # # The short X.Y version. -version = u'2.5.2' +version = u'2.6.0' # The full version, including alpha/beta/rc tags. -release = u'2.5.2' +release = u'2.6.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index 2b3af3d..daaefe4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -17,8 +17,8 @@ https://github.com/c-h-david/rapid. |License (3-Clause BSD)| -.. |DOI| image:: https://zenodo.org/badge/19918/erdc-cm/RAPIDpy.svg - :target: https://zenodo.org/badge/latestdoi/19918/erdc-cm/RAPIDpy +.. |DOI| image:: https://zenodo.org/badge/19918/erdc/RAPIDpy.svg + :target: https://zenodo.org/badge/latestdoi/19918/erdc/RAPIDpy .. |PyPI version| image:: https://badge.fury.io/py/RAPIDpy.svg :target: https://badge.fury.io/py/RAPIDpy .. |Build Status| image:: https://travis-ci.org/erdc-cm/RAPIDpy.svg?branch=master diff --git a/docs/running_rapid.rst b/docs/running_rapid.rst index 3af6169..5a86399 100644 --- a/docs/running_rapid.rst +++ b/docs/running_rapid.rst @@ -39,7 +39,7 @@ Step 5: Run RAPID Step 6 (optional): Convert RAPID output to be CF Compliant ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automethod:: RAPIDpy.rapid.RAPID.make_output_CF_compliant +.. automethod:: RAPIDpy.rapid.RAPID.make_output_cf_compliant Full API Description diff --git a/setup.py b/setup.py index 43e92b2..8c77289 100644 --- a/setup.py +++ b/setup.py @@ -2,17 +2,17 @@ setup( name='RAPIDpy', - version='2.5.2', + version='2.6.0', description='Python interface for RAPID (rapid-hub.org)', long_description='RAPIDpy is a python interface for RAPID that assists ' - 'to prepare inputs, runs the RAPID program, and provides ' - 'post-processing utilities (http://rapidpy.readthedocs.io). ' - 'More information about installation and the input ' - 'parameters for RAPID can be found at http://rapid-hub.org.' - ' The source code for RAPID is located at ' - 'https://github.com/c-h-david/rapid/. \n\n' - '.. image:: https://zenodo.org/badge/19918/erdc-cm/RAPIDpy.svg \n' - ' :target: https://zenodo.org/badge/latestdoi/19918/erdc-cm/RAPIDpy', + 'to prepare inputs, runs the RAPID program, and provides ' + 'post-processing utilities (http://rapidpy.readthedocs.io). ' + 'More information about installation and the input ' + 'parameters for RAPID can be found at http://rapid-hub.org.' + ' The source code for RAPID is located at ' + 'https://github.com/c-h-david/rapid/. \n\n' + '.. image:: https://zenodo.org/badge/19918/erdc-cm/RAPIDpy.svg \n' + ' :target: https://zenodo.org/badge/latestdoi/19918/erdc-cm/RAPIDpy', keywords='RAPID', author='Alan Dee Snow', author_email='alan.d.snow@usace.army.mil', @@ -21,9 +21,16 @@ packages=find_packages(), package_data={'': ['gis/lsm_grids/*.nc']}, install_requires=[ - 'future', 'numpy', 'netcdf4', 'pandas', - 'pangaea', 'python-dateutil', 'pytz', + 'future', + 'numpy', + 'netcdf4', + 'pandas', + 'pangaea', + 'python-dateutil', + 'pytz', 'requests', + 'rtree', + 'shapely', ], classifiers=[ 'Intended Audience :: Developers', @@ -40,6 +47,9 @@ 'tests': [ 'pytest', 'pytest-cov', + 'coveralls', + 'pylint', + 'flake8', ], 'docs': [ 'mock', diff --git a/tests/compare/cf_timeseries_daily_date.ihg b/tests/compare/cf_timeseries_daily_date.ihg index 03fdf1a..3d99e97 100644 --- a/tests/compare/cf_timeseries_daily_date.ihg +++ b/tests/compare/cf_timeseries_daily_date.ihg @@ -1,4 +1,4 @@ NUMPT 1 POINT 1 599 0.0 NRPDS 1 -INPUT 2002 08 31 00 00 12.96322 +INPUT 2002 08 31 00 00 18.36924 diff --git a/tests/compare/cf_timeseries_daily_date.xys b/tests/compare/cf_timeseries_daily_date.xys index d31d379..f958dc3 100644 --- a/tests/compare/cf_timeseries_daily_date.xys +++ b/tests/compare/cf_timeseries_daily_date.xys @@ -1,2 +1,2 @@ -XYS 25 1 "RAPID_TO_GSSHA" -"08/31/2002 12:00:00 AM" 12.96322 +XYS 25 1 "RAPID_TO_GSSHA" +"08/31/2002 12:00:00 AM" 18.36924 diff --git a/tests/compare/cf_timeseries_daily_date_tz.ihg b/tests/compare/cf_timeseries_daily_date_tz.ihg index 3f97740..416bc6b 100644 --- a/tests/compare/cf_timeseries_daily_date_tz.ihg +++ b/tests/compare/cf_timeseries_daily_date_tz.ihg @@ -1,4 +1,4 @@ NUMPT 1 POINT 1 599 0.0 NRPDS 1 -INPUT 2002 08 31 00 00 15.19825 +INPUT 2002 08 31 00 00 19.49250 diff --git a/tests/compare/goodness_of_fit_obs_sim.csv b/tests/compare/goodness_of_fit_obs_sim.csv new file mode 100644 index 0000000..2779a89 --- /dev/null +++ b/tests/compare/goodness_of_fit_obs_sim.csv @@ -0,0 +1,5 @@ +10, 7 +20, 6 +7, 8 +19, 19 +100, 50 \ No newline at end of file diff --git a/tests/compare/goodness_of_fit_obs_sim.txt b/tests/compare/goodness_of_fit_obs_sim.txt new file mode 100644 index 0000000..be43377 --- /dev/null +++ b/tests/compare/goodness_of_fit_obs_sim.txt @@ -0,0 +1,10 @@ +Percent Bias: -42.3077 +Absolute Percent Bias: 43.5897 +Root Mean Squared Error: 23.2637 +Mean Absolute Error: 13.6000 +Bias: -13.2 +Nash Sutcliffe efficiency coefficient: 0.5522 +Likelihood: 0.1066 +correlation coefficient: 0.9658 +index of agreement: 0.8054 +Kling-Gupta Efficiency: 0.3285 diff --git a/tests/compare/raw_goodness_of_fit_analysis.csv b/tests/compare/raw_goodness_of_fit_analysis.csv deleted file mode 100644 index 5d4962a..0000000 --- a/tests/compare/raw_goodness_of_fit_analysis.csv +++ /dev/null @@ -1,3 +0,0 @@ -reach_id,percent_bias,abs_percent_bias,rmse,mae,bias,NSE,likelihood,correlation_coeff,index_agreement,KGE -75224,-98.425120719511412,98.425120719511412,30.480209943223549,30.462574862688783,-30.462574862688783,-2197.918812267415,0.0,-1.0,0.040385246766002525,-1.2655788121180445 -75224,-98.425120719511412,98.425120719511412,30.480209943223549,30.462574862688783,-30.462574862688783,-2197.918812267415,0.0,-1.0,0.040385246766002525,-1.2655788121180445 diff --git a/tests/test_gis.py b/tests/test_gis.py index 21c6711..e991ff5 100644 --- a/tests/test_gis.py +++ b/tests/test_gis.py @@ -382,98 +382,132 @@ def test_extract_sub_network_taudem(): """ Checks extracting sub network from larger network """ - print("TEST 10: TEST EXTRACTING SUB NETWORK FROM LARGER NETWORK") - td = TauDEM() - - subset_network_file = os.path.join(OUTPUT_DATA_PATH, "DrainageLineSubset2.shp") - #to extract a specific network - td.extractSubNetwork(network_file=os.path.join(GIS_INPUT_DATA_PATH, 'u-k', "DrainageLineSubset.shp"), - out_subset_network_file=subset_network_file, - outlet_ids=[42911], #list of outlet ids - river_id_field="HydroID", - next_down_id_field="NextDownID", - river_magnitude_field="HydroID", - safe_mode=False, - ) - - #to extract the subset watersheds using subset river network - subset_watershed_file = os.path.join(OUTPUT_DATA_PATH,"CatchmentSubset2.shp") - td.extractSubsetFromWatershed(subset_network_file=subset_network_file, - subset_network_river_id_field="HydroID", - watershed_file=os.path.join(GIS_INPUT_DATA_PATH, 'u-k', 'CatchmentSubset.shp'), - watershed_network_river_id_field="DrainLnID", - out_watershed_subset_file=subset_watershed_file) - - - #Test results + subset_network_file = os.path.join(OUTPUT_DATA_PATH, + "DrainageLineSubset2.shp") + # to extract a specific network + TauDEM.extractSubNetwork( + network_file=os.path.join(GIS_INPUT_DATA_PATH, + 'u-k', + "DrainageLineSubset.shp"), + out_subset_network_file=subset_network_file, + outlet_ids=[42911], + river_id_field="HydroID", + next_down_id_field="NextDownID", + river_magnitude_field="HydroID", + safe_mode=False) + + # to extract the subset watersheds using subset river network + subset_watershed_file = os.path.join(OUTPUT_DATA_PATH, + "CatchmentSubset2.shp") + TauDEM.extractSubsetFromWatershed( + subset_network_file=subset_network_file, + subset_network_river_id_field="HydroID", + watershed_file=os.path.join(GIS_INPUT_DATA_PATH, 'u-k', + 'CatchmentSubset.shp'), + watershed_network_river_id_field="DrainLnID", + out_watershed_subset_file=subset_watershed_file) + + largest_subset_network_file = \ + os.path.join(OUTPUT_DATA_PATH, + "DrainageLineSubsetLargest.shp") + # to extract a specific network + TauDEM.extractLargestSubNetwork( + network_file=os.path.join(GIS_INPUT_DATA_PATH, 'u-k', + "DrainageLineSubset.shp"), + out_subset_network_file=largest_subset_network_file, + river_id_field="HydroID", + next_down_id_field="NextDownID", + river_magnitude_field="HydroID", + safe_mode=False) + + # Test results subset_network_shapefile = ogr.Open(subset_network_file) subset_network_layer = subset_network_shapefile.GetLayer() ogr_watershed_shapefile = ogr.Open(subset_watershed_file) ogr_watershed_shapefile_lyr = ogr_watershed_shapefile.GetLayer() - number_of_network_features = subset_network_layer.GetFeatureCount() - number_of_watershed_features = ogr_watershed_shapefile_lyr.GetFeatureCount() - - #count number of features - assert (number_of_network_features==7) - assert (number_of_watershed_features==7) + largest_subset_network_shapefile = ogr.Open(subset_network_file) + largest_subset_network_layer = largest_subset_network_shapefile.GetLayer() - #make sure IDs correct - network_id_list = [42911,42891,42747,42748,42892,42841,42846] - for feature_idx, network_feature in enumerate(subset_network_layer): + number_of_network_features = subset_network_layer.GetFeatureCount() + number_of_watershed_features = \ + ogr_watershed_shapefile_lyr.GetFeatureCount() + number_of_largest_network_features = \ + largest_subset_network_layer.GetFeatureCount() + # count number of features + assert (number_of_network_features == 7) + assert (number_of_watershed_features == 7) + assert (number_of_largest_network_features == 7) + + # make sure IDs correct + network_id_list = [42911, 42891, 42747, 42748, 42892, 42841, 42846] + for network_feature in subset_network_layer: assert (network_feature.GetField("HydroID") in network_id_list) - for feature_idx, watershed_feature in enumerate(ogr_watershed_shapefile_lyr): + for watershed_feature in ogr_watershed_shapefile_lyr: assert (watershed_feature.GetField("DrainLnID") in network_id_list) + for network_feature in largest_subset_network_layer: + assert (network_feature.GetField("HydroID") in network_id_list) - #make sure all fields are there + # make sure all fields are there - #TEST WATERSHED + # TEST WATERSHED subset_watershed_layer_defn = ogr_watershed_shapefile_lyr.GetLayerDefn() num_watershed_fields = subset_watershed_layer_defn.GetFieldCount() - watershed_field_names = ['Shape_Leng','Shape_Area','HydroID','GridID','DrainLnID'] - assert (num_watershed_fields==len(watershed_field_names)) + watershed_field_names = ['Shape_Leng', 'Shape_Area', 'HydroID', 'GridID', + 'DrainLnID'] + assert (num_watershed_fields == len(watershed_field_names)) for i in range(num_watershed_fields): - assert (subset_watershed_layer_defn.GetFieldDefn(i).GetNameRef() in watershed_field_names) + assert (subset_watershed_layer_defn.GetFieldDefn(i).GetNameRef() in + watershed_field_names) - #TEST NETWORK + # TEST NETWORK subset_network_layer_defn = subset_network_layer.GetLayerDefn() num_network_fields = subset_network_layer_defn.GetFieldCount() - - network_field_names = ['arcid','from_node','to_node','HydroID','GridID', - 'NextDownID','SLength','Avg_Slope','LENGTHKM', - 'Shape_Leng','Musk_x','watershed','subbasin'] - - assert (num_network_fields==len(network_field_names)) - - for i in range(num_network_fields): - assert (subset_network_layer_defn.GetFieldDefn(i).GetNameRef() in network_field_names) - - #cleanup - remove_files(*glob(os.path.join(OUTPUT_DATA_PATH,"DrainageLineSubset2.*"))) - remove_files(*glob(os.path.join(OUTPUT_DATA_PATH,"CatchmentSubset2.*"))) + largest_subset_network_layer_defn = \ + largest_subset_network_layer.GetLayerDefn() + num_largest_network_fields = \ + largest_subset_network_layer_defn.GetFieldCount() + + network_field_names = ['arcid', 'from_node', 'to_node', 'HydroID', + 'GridID', 'NextDownID', 'SLength', 'Avg_Slope', + 'LENGTHKM', 'Shape_Leng', 'Musk_x', 'watershed', + 'subbasin'] + + assert (num_network_fields == len(network_field_names)) + assert (num_largest_network_fields == len(network_field_names)) + + for iii in range(num_network_fields): + assert (subset_network_layer_defn.GetFieldDefn(iii).GetNameRef() + in network_field_names) + assert ( + largest_subset_network_layer_defn.GetFieldDefn(iii).GetNameRef() + in network_field_names) + + # cleanup + remove_files(*glob(os.path.join(OUTPUT_DATA_PATH, "DrainageLineSubset2.*"))) + remove_files(*glob(os.path.join(OUTPUT_DATA_PATH, "CatchmentSubset2.*"))) + remove_files(*glob(os.path.join(OUTPUT_DATA_PATH, "DrainageLineSubsetLargest.*"))) def test_add_length_to_network_taudem(): """ Checks adding length to network """ print("TEST 11: TEST ADD LENGTH TO NETWORK") - td = TauDEM() - subset_network_file = os.path.join(OUTPUT_DATA_PATH, "DrainageLineSubset2.shp") #to extract a specific network - td.extractSubNetwork(network_file=os.path.join(GIS_INPUT_DATA_PATH, 'u-k', "DrainageLineSubset.shp"), - out_subset_network_file=subset_network_file, - outlet_ids=[42911], #list of outlet ids - river_id_field="HydroID", - next_down_id_field="NextDownID", - river_magnitude_field="HydroID", - safe_mode=False, - ) + TauDEM.extractSubNetwork( + network_file=os.path.join(GIS_INPUT_DATA_PATH, 'u-k', "DrainageLineSubset.shp"), + out_subset_network_file=subset_network_file, + outlet_ids=[42911], #list of outlet ids + river_id_field="HydroID", + next_down_id_field="NextDownID", + river_magnitude_field="HydroID", + safe_mode=False) #add length m field - td.addLengthMeters(subset_network_file) + TauDEM.addLengthMeters(subset_network_file) #Test results subset_network_shapefile = ogr.Open(subset_network_file) diff --git a/tests/test_inflow.py b/tests/test_inflow.py index 2ff952e..56f3b58 100644 --- a/tests/test_inflow.py +++ b/tests/test_inflow.py @@ -33,6 +33,7 @@ "..", "..", "rapid", "src", "rapid") + def compare_array_nan(a, b): # based on https://stackoverflow.com/questions/23810370/python-numpy-comparing-arrays-with-nan return ((a == b) | (np.isnan(a) & np.isnan(b))).all() @@ -49,6 +50,36 @@ def setUp(self): self.CYGWIN_BIN_PATH = 'C:\\cygwin64\\bin' + try: + self.tearDown() + except OSError: + pass + + def tearDown(self): + rmtree(os.path.join(self.OUTPUT_DATA_PATH, "input")) + rmtree(os.path.join(self.OUTPUT_DATA_PATH, "output")) + + @staticmethod + def _compare_m3(generated_m3_file, generated_m3_file_solution): + + # check other info in netcdf file + d1 = Dataset(generated_m3_file) + d2 = Dataset(generated_m3_file_solution) + try: + assert_almost_equal(d1.variables['m3_riv'][:], d2.variables['m3_riv'][:], decimal=4) + if 'rivid' in d2.variables.keys(): + compare_array_nan(d1.variables['rivid'][:], d2.variables['rivid'][:]) + if 'lat' in d2.variables.keys(): + compare_array_nan(d1.variables['lat'][:], d2.variables['lat'][:]) + if 'lon' in d2.variables.keys(): + compare_array_nan(d1.variables['lon'][:], d2.variables['lon'][:]) + except AssertionError: + d1.close() + d2.close() + raise + d1.close() + d2.close() + def _setup_automated(self, directory_name): """ setup for automated method @@ -759,7 +790,6 @@ def test_generate_wrf_inflow2(self): generated_m3_file_solution = os.path.join(self.INFLOW_COMPARE_DATA_PATH, m3_file_name) self._compare_m3(generated_m3_file,generated_m3_file_solution) - def test_generate_cmip5_inflow(self): """ Checks generating inflow file from CMIP5 LSM @@ -780,21 +810,3 @@ def test_generate_cmip5_inflow(self): # check output file info assert output_file_info[0]['ark-ms']['m3_riv'] == generated_m3_file - def _compare_m3(self, generated_m3_file, generated_m3_file_solution): - - # check other info in netcdf file - d1 = Dataset(generated_m3_file) - d2 = Dataset(generated_m3_file_solution) - assert_almost_equal(d1.variables['m3_riv'][:], d2.variables['m3_riv'][:], decimal=5) - if 'rivid' in d2.variables.keys(): - compare_array_nan(d1.variables['rivid'][:], d2.variables['rivid'][:]) - if 'lat' in d2.variables.keys(): - compare_array_nan(d1.variables['lat'][:], d2.variables['lat'][:]) - if 'lon' in d2.variables.keys(): - compare_array_nan(d1.variables['lon'][:], d2.variables['lon'][:]) - d1.close() - d2.close() - - def tearDown(self): - rmtree(os.path.join(self.OUTPUT_DATA_PATH, "input")) - rmtree(os.path.join(self.OUTPUT_DATA_PATH, "output")) diff --git a/tests/test_rapidpy.py b/tests/test_rapidpy.py index 6ffab90..d5b59e9 100644 --- a/tests/test_rapidpy.py +++ b/tests/test_rapidpy.py @@ -23,7 +23,7 @@ compare_csv_timeseries_files, remove_files) -from RAPIDpy.postprocess import find_goodness_of_fit +from RAPIDpy.postprocess import find_goodness_of_fit, find_goodness_of_fit_csv from RAPIDpy.postprocess import ConvertRAPIDOutputToCF #GLOBAL VARIABLES @@ -325,7 +325,7 @@ def test_convert_file_to_be_cf_compliant_new_format_comid_lat_lon_z(): rapid_connect_file=os.path.join(INPUT_DATA_PATH, 'rapid_connect.csv'), ZS_TauR=3*3600) - rapid_manager.make_output_CF_compliant(simulation_start_datetime=datetime(2002, 8, 30), + rapid_manager.make_output_cf_compliant(simulation_start_datetime=datetime(2002, 8, 30), comid_lat_lon_z_file=os.path.join(INPUT_DATA_PATH, 'comid_lat_lon_z.csv'), project_name="ERA Interim (T511 Grid) 3 Hourly Runoff Based Historical flows by US Army ERDC") @@ -365,7 +365,7 @@ def test_convert_file_to_be_cf_compliant_new_format(): rapid_connect_file=os.path.join(INPUT_DATA_PATH, 'rapid_connect.csv'), ZS_TauR=3*3600) - rapid_manager.make_output_CF_compliant(simulation_start_datetime=datetime(2002, 8, 30), + rapid_manager.make_output_cf_compliant(simulation_start_datetime=datetime(2002, 8, 30), comid_lat_lon_z_file="", project_name="ERA Interim (T511 Grid) 3 Hourly Runoff Based Historical flows by US Army ERDC") @@ -403,7 +403,7 @@ def test_convert_file_to_be_cf_compliant_original_format(): rapid_connect_file=os.path.join(INPUT_DATA_PATH, 'rapid_connect.csv'), ZS_TauR=3*3600) - rapid_manager.make_output_CF_compliant(simulation_start_datetime=datetime(2002, 8, 30), + rapid_manager.make_output_cf_compliant(simulation_start_datetime=datetime(2002, 8, 30), comid_lat_lon_z_file=os.path.join(INPUT_DATA_PATH, 'comid_lat_lon_z.csv'), project_name="ERA Interim (T511 Grid) 3 Hourly Runoff Based Historical flows by US Army ERDC") @@ -508,6 +508,10 @@ def test_extract_timeseries(): copy(input_qout_file, new_qout_file) new_timeseries_file = os.path.join(OUTPUT_DATA_PATH, 'new_timeseries_file.csv') + with pytest.raises(ValueError): + with RAPIDDataset(new_qout_file) as qout_nc: + qout_nc.write_flows_to_csv(new_timeseries_file) + with RAPIDDataset(new_qout_file) as qout_nc: qout_nc.write_flows_to_csv(new_timeseries_file, river_id=75224) @@ -564,7 +568,7 @@ def test_extract_timeseries(): date_search_start=datetime(2002, 8, 31), date_search_end=datetime(2002, 8, 31, 23, 59, 59), daily=True, - mode='max') + filter_mode='max') cf_timeseries_daily_date_file_solution = os.path.join(COMPARE_DATA_PATH, 'cf_timeseries_daily_date.csv') assert (compare_csv_timeseries_files(cf_timeseries_daily_date_file, cf_timeseries_daily_date_file_solution, header=False)) @@ -608,22 +612,6 @@ def test_goodness_of_fit(): cf_goodness_of_fit_file_solution = os.path.join(COMPARE_DATA_PATH, 'cf_goodness_of_fit_analysis.csv') assert (compare_csv_decimal_files(cf_out_analysis_file, cf_goodness_of_fit_file_solution)) - #using original RAPID file - raw_goodness_of_fit_file_solution = os.path.join(COMPARE_DATA_PATH, 'raw_goodness_of_fit_analysis.csv') - original_input_qout_file = os.path.join(COMPARE_DATA_PATH, 'Qout_nasa_lis_3hr_20020830_original.nc') - original_out_analysis_file = os.path.join(OUTPUT_DATA_PATH, 'original_goodness_of_fit_results-daily.csv') - find_goodness_of_fit(original_input_qout_file, reach_id_file, observed_file, - original_out_analysis_file, steps_per_group=8) - - assert (compare_csv_decimal_files(original_out_analysis_file, raw_goodness_of_fit_file_solution)) - - #using new RAPID file - new_input_qout_file = os.path.join(COMPARE_DATA_PATH, 'Qout_nasa_lis_3hr_20020830.nc') - new_out_analysis_file = os.path.join(OUTPUT_DATA_PATH, 'goodness_of_fit_results-daily.csv') - find_goodness_of_fit(new_input_qout_file, reach_id_file, observed_file, - new_out_analysis_file, steps_per_group=8) - - assert (compare_csv_decimal_files(new_out_analysis_file, raw_goodness_of_fit_file_solution)) reach_id_file = os.path.join(INPUT_DATA_PATH, 'obs_reach_id_1.csv') observed_file = os.path.join(INPUT_DATA_PATH, 'obs_flow_1.csv') @@ -635,9 +623,20 @@ def test_goodness_of_fit(): cf_goodness_of_fit_file_solution_1 = os.path.join(COMPARE_DATA_PATH, 'cf_goodness_of_fit_analysis_1.csv') assert (compare_csv_decimal_files(cf_out_analysis_file_1, cf_goodness_of_fit_file_solution_1)) + observed_simulated_file = os.path.join(COMPARE_DATA_PATH, + 'goodness_of_fit_obs_sim.csv') + goodness_obs_sim_solution = os.path.join(OUTPUT_DATA_PATH, + 'goodness_of_fit_obs_sim.txt') + # test print goodness of fit to file + find_goodness_of_fit_csv(observed_simulated_file, + out_file=goodness_obs_sim_solution) + goodness_obs_sim = os.path.join(COMPARE_DATA_PATH, + 'goodness_of_fit_obs_sim.txt') + assert (fcmp(goodness_obs_sim, goodness_obs_sim_solution)) + # test print goodness of fit to console + find_goodness_of_fit_csv(observed_simulated_file) + remove_files(cf_out_analysis_file, - original_out_analysis_file, - new_out_analysis_file, cf_out_analysis_file_1) def test_cf_merge(): @@ -731,7 +730,7 @@ def test_extract_timeseries_to_gssha_xys(): date_search_start=datetime(2002, 8, 31), date_search_end=datetime(2002, 8, 31, 23, 59, 59), daily=True, - mode='max') + filter_mode='max') cf_timeseries_daily_date_file_solution = os.path.join(COMPARE_DATA_PATH, 'cf_timeseries_daily_date.xys') assert (compare_csv_timeseries_files(cf_timeseries_daily_date_file, cf_timeseries_daily_date_file_solution)) @@ -800,7 +799,7 @@ def test_extract_timeseries_to_gssha_ihg(): date_search_start=datetime(2002, 8, 31), date_search_end=datetime(2002, 8, 31, 23, 59, 59), daily=True, - mode='max') + filter_mode='max') cf_timeseries_daily_date_file_solution = os.path.join(COMPARE_DATA_PATH, 'cf_timeseries_daily_date.ihg') assert (compare_csv_timeseries_files(cf_timeseries_daily_date_file, cf_timeseries_daily_date_file_solution, header=False)) @@ -817,7 +816,6 @@ def test_extract_timeseries_to_gssha_ihg(): cf_timeseries_date_file_solution = os.path.join(COMPARE_DATA_PATH, 'cf_timeseries_date.ihg') assert (compare_csv_timeseries_files(cf_timeseries_date_file, cf_timeseries_date_file_solution, header=False)) - remove_files(cf_timeseries_file, cf_qout_file, cf_timeseries_daily_file, @@ -872,7 +870,7 @@ def test_extract_timeseries_to_gssha_ihg_tzinfo(): date_search_start=datetime(2002, 8, 31), date_search_end=datetime(2002, 8, 31, 23, 59, 59), daily=True, - mode='max') + filter_mode='max') cf_timeseries_daily_date_file_solution = os.path.join(COMPARE_DATA_PATH, 'cf_timeseries_daily_date_tz.ihg') assert (compare_csv_timeseries_files(cf_timeseries_daily_date_file, cf_timeseries_daily_date_file_solution, header=False)) @@ -895,3 +893,75 @@ def test_extract_timeseries_to_gssha_ihg_tzinfo(): cf_timeseries_daily_date_file, cf_timeseries_date_file, ) + + +def test_dataset_exceptions(): + """This tests RAPIDDataset exceptions""" + dummy_file = os.path.join(OUTPUT_DATA_PATH, + 'dummy_file.txt') + cf_input_qout_file = os.path.join(COMPARE_DATA_PATH, + 'Qout_nasa_lis_3hr_20020830_CF.nc') + cf_qout_file = os.path.join(OUTPUT_DATA_PATH, + 'Qout_nasa_lis_3hr_20020830_CF.nc') + copy(cf_input_qout_file, cf_qout_file) + + with pytest.raises(IndexError): + with RAPIDDataset(cf_qout_file, + river_id_dimension='fake_rivid') as qout_nc: + print(qout_nc) + + # this only prints a warning + with RAPIDDataset(cf_qout_file, + river_id_variable='fake_rivid') as qout_nc: + print(qout_nc) + + with pytest.raises(IndexError): + with RAPIDDataset(cf_qout_file, + streamflow_variable='fake_qout') as qout_nc: + print(qout_nc) + + with pytest.raises(IndexError): + with RAPIDDataset(cf_qout_file) as qout_nc: + print(qout_nc.get_qout(49876539)) + + with RAPIDDataset(cf_qout_file) as qout_nc: + aaa, bbb, ccc = qout_nc.get_subset_riverid_index_list([49876539]) + assert not aaa + assert not bbb + assert ccc[0] == 49876539 + + with pytest.raises(ValueError): + with RAPIDDataset(cf_qout_file) as qout_nc: + qout_nc.write_flows_to_gssha_time_series_xys( + dummy_file, + series_name="RAPID_TO_GSSHA", + series_id=34) + + with pytest.raises(ValueError): + with RAPIDDataset(cf_qout_file) as qout_nc: + qout_nc.write_flows_to_csv(dummy_file) + + # for writing entire time series to file from original rapid output + input_qout_file = os.path.join(COMPARE_DATA_PATH, + 'Qout_nasa_lis_3hr_20020830_original.nc') + original_qout_file = os.path.join(OUTPUT_DATA_PATH, + 'Qout_nasa_lis_3hr_20020830_original.nc') + copy(input_qout_file, original_qout_file) + + with pytest.raises(ValueError): + with RAPIDDataset(original_qout_file) as qout_nc: + print(qout_nc.get_time_array()) + + with pytest.raises(IndexError): + with RAPIDDataset(original_qout_file) as qout_nc: + qout_nc.write_flows_to_gssha_time_series_xys( + dummy_file, + series_name="RAPID_TO_GSSHA", + series_id=34, + river_index=0) + + with pytest.raises(IndexError): + with RAPIDDataset(original_qout_file) as qout_nc: + qout_nc.write_flows_to_gssha_time_series_ihg( + dummy_file, + dummy_file)